sha256_generic.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /*
  2. * Copyright Jean-Luc Cooke
  3. * Copyright Andrew McDonald
  4. * Copyright 2002 James Morris
  5. * SHA224 Support Copyright 2007 Intel Corporation (by Jonathan Lynch)
  6. * Copyright 2012-2013 Luke Dashjr
  7. *
  8. * This program is free software; you can redistribute it and/or modify it
  9. * under the terms of the GNU General Public License as published by the Free
  10. * Software Foundation; either version 2 of the License, or (at your option)
  11. * any later version.
  12. *
  13. */
  14. #include "config.h"
  15. #include <stdint.h>
  16. #include <stdbool.h>
  17. #include <stdlib.h>
  18. #include <string.h>
  19. #include "driver-cpu.h"
  20. #include "miner.h"
  21. typedef uint32_t u32;
  22. typedef uint8_t u8;
  23. static inline u32 ror32(u32 word, unsigned int shift)
  24. {
  25. return (word >> shift) | (word << (32 - shift));
  26. }
  27. static inline u32 Ch(u32 x, u32 y, u32 z)
  28. {
  29. return z ^ (x & (y ^ z));
  30. }
  31. static inline u32 Maj(u32 x, u32 y, u32 z)
  32. {
  33. return (x & y) | (z & (x | y));
  34. }
  35. #define e0(x) (ror32(x, 2) ^ ror32(x,13) ^ ror32(x,22))
  36. #define e1(x) (ror32(x, 6) ^ ror32(x,11) ^ ror32(x,25))
  37. #define s0(x) (ror32(x, 7) ^ ror32(x,18) ^ (x >> 3))
  38. #define s1(x) (ror32(x,17) ^ ror32(x,19) ^ (x >> 10))
  39. static inline void LOAD_OP(int I, u32 *W, const u8 *input)
  40. {
  41. /* byteswap is handled once in scanhash_c
  42. */
  43. W[I] = /* ntohl */ ( ((u32*)(input))[I] );
  44. }
  45. static inline void BLEND_OP(int I, u32 *W)
  46. {
  47. W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
  48. }
  49. static void sha256_transform(u32 *state, const u8 *input)
  50. {
  51. u32 a, b, c, d, e, f, g, h, t1, t2;
  52. u32 W[64];
  53. int i;
  54. /* load the input */
  55. for (i = 0; i < 16; i++)
  56. LOAD_OP(i, W, input);
  57. /* now blend */
  58. for (i = 16; i < 64; i++)
  59. BLEND_OP(i, W);
  60. /* load the state into our registers */
  61. a=state[0]; b=state[1]; c=state[2]; d=state[3];
  62. e=state[4]; f=state[5]; g=state[6]; h=state[7];
  63. /* now iterate */
  64. t1 = h + e1(e) + Ch(e,f,g) + 0x428a2f98 + W[ 0];
  65. t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  66. t1 = g + e1(d) + Ch(d,e,f) + 0x71374491 + W[ 1];
  67. t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  68. t1 = f + e1(c) + Ch(c,d,e) + 0xb5c0fbcf + W[ 2];
  69. t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  70. t1 = e + e1(b) + Ch(b,c,d) + 0xe9b5dba5 + W[ 3];
  71. t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  72. t1 = d + e1(a) + Ch(a,b,c) + 0x3956c25b + W[ 4];
  73. t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  74. t1 = c + e1(h) + Ch(h,a,b) + 0x59f111f1 + W[ 5];
  75. t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  76. t1 = b + e1(g) + Ch(g,h,a) + 0x923f82a4 + W[ 6];
  77. t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  78. t1 = a + e1(f) + Ch(f,g,h) + 0xab1c5ed5 + W[ 7];
  79. t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
  80. t1 = h + e1(e) + Ch(e,f,g) + 0xd807aa98 + W[ 8];
  81. t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  82. t1 = g + e1(d) + Ch(d,e,f) + 0x12835b01 + W[ 9];
  83. t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  84. t1 = f + e1(c) + Ch(c,d,e) + 0x243185be + W[10];
  85. t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  86. t1 = e + e1(b) + Ch(b,c,d) + 0x550c7dc3 + W[11];
  87. t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  88. t1 = d + e1(a) + Ch(a,b,c) + 0x72be5d74 + W[12];
  89. t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  90. t1 = c + e1(h) + Ch(h,a,b) + 0x80deb1fe + W[13];
  91. t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  92. t1 = b + e1(g) + Ch(g,h,a) + 0x9bdc06a7 + W[14];
  93. t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  94. t1 = a + e1(f) + Ch(f,g,h) + 0xc19bf174 + W[15];
  95. t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
  96. t1 = h + e1(e) + Ch(e,f,g) + 0xe49b69c1 + W[16];
  97. t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  98. t1 = g + e1(d) + Ch(d,e,f) + 0xefbe4786 + W[17];
  99. t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  100. t1 = f + e1(c) + Ch(c,d,e) + 0x0fc19dc6 + W[18];
  101. t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  102. t1 = e + e1(b) + Ch(b,c,d) + 0x240ca1cc + W[19];
  103. t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  104. t1 = d + e1(a) + Ch(a,b,c) + 0x2de92c6f + W[20];
  105. t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  106. t1 = c + e1(h) + Ch(h,a,b) + 0x4a7484aa + W[21];
  107. t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  108. t1 = b + e1(g) + Ch(g,h,a) + 0x5cb0a9dc + W[22];
  109. t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  110. t1 = a + e1(f) + Ch(f,g,h) + 0x76f988da + W[23];
  111. t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
  112. t1 = h + e1(e) + Ch(e,f,g) + 0x983e5152 + W[24];
  113. t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  114. t1 = g + e1(d) + Ch(d,e,f) + 0xa831c66d + W[25];
  115. t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  116. t1 = f + e1(c) + Ch(c,d,e) + 0xb00327c8 + W[26];
  117. t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  118. t1 = e + e1(b) + Ch(b,c,d) + 0xbf597fc7 + W[27];
  119. t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  120. t1 = d + e1(a) + Ch(a,b,c) + 0xc6e00bf3 + W[28];
  121. t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  122. t1 = c + e1(h) + Ch(h,a,b) + 0xd5a79147 + W[29];
  123. t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  124. t1 = b + e1(g) + Ch(g,h,a) + 0x06ca6351 + W[30];
  125. t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  126. t1 = a + e1(f) + Ch(f,g,h) + 0x14292967 + W[31];
  127. t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
  128. t1 = h + e1(e) + Ch(e,f,g) + 0x27b70a85 + W[32];
  129. t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  130. t1 = g + e1(d) + Ch(d,e,f) + 0x2e1b2138 + W[33];
  131. t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  132. t1 = f + e1(c) + Ch(c,d,e) + 0x4d2c6dfc + W[34];
  133. t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  134. t1 = e + e1(b) + Ch(b,c,d) + 0x53380d13 + W[35];
  135. t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  136. t1 = d + e1(a) + Ch(a,b,c) + 0x650a7354 + W[36];
  137. t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  138. t1 = c + e1(h) + Ch(h,a,b) + 0x766a0abb + W[37];
  139. t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  140. t1 = b + e1(g) + Ch(g,h,a) + 0x81c2c92e + W[38];
  141. t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  142. t1 = a + e1(f) + Ch(f,g,h) + 0x92722c85 + W[39];
  143. t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
  144. t1 = h + e1(e) + Ch(e,f,g) + 0xa2bfe8a1 + W[40];
  145. t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  146. t1 = g + e1(d) + Ch(d,e,f) + 0xa81a664b + W[41];
  147. t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  148. t1 = f + e1(c) + Ch(c,d,e) + 0xc24b8b70 + W[42];
  149. t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  150. t1 = e + e1(b) + Ch(b,c,d) + 0xc76c51a3 + W[43];
  151. t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  152. t1 = d + e1(a) + Ch(a,b,c) + 0xd192e819 + W[44];
  153. t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  154. t1 = c + e1(h) + Ch(h,a,b) + 0xd6990624 + W[45];
  155. t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  156. t1 = b + e1(g) + Ch(g,h,a) + 0xf40e3585 + W[46];
  157. t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  158. t1 = a + e1(f) + Ch(f,g,h) + 0x106aa070 + W[47];
  159. t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
  160. t1 = h + e1(e) + Ch(e,f,g) + 0x19a4c116 + W[48];
  161. t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  162. t1 = g + e1(d) + Ch(d,e,f) + 0x1e376c08 + W[49];
  163. t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  164. t1 = f + e1(c) + Ch(c,d,e) + 0x2748774c + W[50];
  165. t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  166. t1 = e + e1(b) + Ch(b,c,d) + 0x34b0bcb5 + W[51];
  167. t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  168. t1 = d + e1(a) + Ch(a,b,c) + 0x391c0cb3 + W[52];
  169. t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  170. t1 = c + e1(h) + Ch(h,a,b) + 0x4ed8aa4a + W[53];
  171. t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  172. t1 = b + e1(g) + Ch(g,h,a) + 0x5b9cca4f + W[54];
  173. t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  174. t1 = a + e1(f) + Ch(f,g,h) + 0x682e6ff3 + W[55];
  175. t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
  176. t1 = h + e1(e) + Ch(e,f,g) + 0x748f82ee + W[56];
  177. t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2;
  178. t1 = g + e1(d) + Ch(d,e,f) + 0x78a5636f + W[57];
  179. t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2;
  180. t1 = f + e1(c) + Ch(c,d,e) + 0x84c87814 + W[58];
  181. t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2;
  182. t1 = e + e1(b) + Ch(b,c,d) + 0x8cc70208 + W[59];
  183. t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2;
  184. t1 = d + e1(a) + Ch(a,b,c) + 0x90befffa + W[60];
  185. t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2;
  186. t1 = c + e1(h) + Ch(h,a,b) + 0xa4506ceb + W[61];
  187. t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2;
  188. t1 = b + e1(g) + Ch(g,h,a) + 0xbef9a3f7 + W[62];
  189. t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2;
  190. t1 = a + e1(f) + Ch(f,g,h) + 0xc67178f2 + W[63];
  191. t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2;
  192. state[0] += a; state[1] += b; state[2] += c; state[3] += d;
  193. state[4] += e; state[5] += f; state[6] += g; state[7] += h;
  194. #if 0
  195. /* clear any sensitive info... */
  196. a = b = c = d = e = f = g = h = t1 = t2 = 0;
  197. memset(W, 0, 64 * sizeof(u32));
  198. #endif
  199. }
  200. static void runhash(void *state, const void *input, const void *init)
  201. {
  202. memcpy(state, init, 32);
  203. sha256_transform(state, input);
  204. }
  205. const uint32_t sha256_init_state[8] = {
  206. 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
  207. 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
  208. };
  209. /* suspiciously similar to ScanHash* from bitcoin */
  210. bool scanhash_c(struct thr_info * const thr, struct work * const work,
  211. uint32_t max_nonce, uint32_t *last_nonce,
  212. uint32_t n)
  213. {
  214. const uint8_t *midstate = work->midstate;
  215. uint8_t *data = work->data;
  216. uint8_t hash1[0x40];
  217. memcpy(hash1, hash1_init, sizeof(hash1));
  218. uint8_t * const hash = work->hash;
  219. uint32_t *hash32 = (uint32_t *) hash;
  220. uint32_t *nonce = (uint32_t *)(data + 76);
  221. unsigned long stat_ctr = 0;
  222. data += 64;
  223. // Midstate and data are stored in little endian
  224. LOCAL_swap32le(unsigned char, midstate, 32/4)
  225. LOCAL_swap32le(unsigned char, data, 64/4)
  226. uint32_t *nonce_w = (uint32_t *)(data + 12);
  227. while (1) {
  228. *nonce_w = n;
  229. // runhash expects int32 data preprocessed into native endian
  230. runhash(hash1, data, midstate);
  231. runhash(hash, hash1, sha256_init_state);
  232. stat_ctr++;
  233. if (unlikely(hash32[7] == 0))
  234. {
  235. *nonce = htole32(n);
  236. *last_nonce = n;
  237. return true;
  238. }
  239. if ((n >= max_nonce) || thr->work_restart) {
  240. *nonce = htole32(n);
  241. *last_nonce = n;
  242. return false;
  243. }
  244. n++;
  245. }
  246. }