findnonce.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /*
  2. * Copyright 2011-2012 Con Kolivas
  3. * Copyright 2011 Nils Schneider
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License as published by the Free
  7. * Software Foundation; either version 3 of the License, or (at your option)
  8. * any later version. See COPYING for more details.
  9. */
  10. #include "config.h"
  11. #ifdef HAVE_OPENCL
  12. #include <stdio.h>
  13. #include <inttypes.h>
  14. #include <pthread.h>
  15. #include <string.h>
  16. #include "findnonce.h"
  17. #include "scrypt.h"
  18. const uint32_t SHA256_K[64] = {
  19. 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
  20. 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  21. 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
  22. 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  23. 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
  24. 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  25. 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
  26. 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  27. 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
  28. 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  29. 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
  30. 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  31. 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
  32. 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  33. 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
  34. 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  35. };
  36. #define rotate(x,y) ((x<<y) | (x>>(sizeof(x)*8-y)))
  37. #define rotr(x,y) ((x>>y) | (x<<(sizeof(x)*8-y)))
  38. #define R(a, b, c, d, e, f, g, h, w, k) \
  39. h = h + (rotate(e, 26) ^ rotate(e, 21) ^ rotate(e, 7)) + (g ^ (e & (f ^ g))) + k + w; \
  40. d = d + h; \
  41. h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
  42. void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
  43. {
  44. cl_uint A, B, C, D, E, F, G, H;
  45. A = state[0];
  46. B = state[1];
  47. C = state[2];
  48. D = state[3];
  49. E = state[4];
  50. F = state[5];
  51. G = state[6];
  52. H = state[7];
  53. R(A, B, C, D, E, F, G, H, data[0], SHA256_K[0]);
  54. R(H, A, B, C, D, E, F, G, data[1], SHA256_K[1]);
  55. R(G, H, A, B, C, D, E, F, data[2], SHA256_K[2]);
  56. blk->cty_a = A;
  57. blk->cty_b = B;
  58. blk->cty_c = C;
  59. blk->cty_d = D;
  60. blk->D1A = D + 0xb956c25b;
  61. blk->cty_e = E;
  62. blk->cty_f = F;
  63. blk->cty_g = G;
  64. blk->cty_h = H;
  65. blk->ctx_a = state[0];
  66. blk->ctx_b = state[1];
  67. blk->ctx_c = state[2];
  68. blk->ctx_d = state[3];
  69. blk->ctx_e = state[4];
  70. blk->ctx_f = state[5];
  71. blk->ctx_g = state[6];
  72. blk->ctx_h = state[7];
  73. blk->merkle = data[0];
  74. blk->ntime = data[1];
  75. blk->nbits = data[2];
  76. blk->W16 = blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
  77. blk->W17 = blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
  78. blk->PreVal4 = blk->fcty_e = blk->ctx_e + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
  79. blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
  80. blk->PreVal4_2 = blk->PreVal4 + blk->T1;
  81. blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
  82. blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
  83. blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
  84. blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
  85. blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
  86. blk->W2 = data[2];
  87. blk->W2A = blk->W2 + (rotr(blk->W16, 19) ^ rotr(blk->W16, 17) ^ (blk->W16 >> 10));
  88. blk->W17_2 = 0x11002000 + (rotr(blk->W17, 19) ^ rotr(blk->W17, 17) ^ (blk->W17 >> 10));
  89. blk->fW2 = data[2] + (rotr(blk->fW0, 17) ^ rotr(blk->fW0, 19) ^ (blk->fW0 >> 10));
  90. blk->fW3 = 0x11002000 + (rotr(blk->fW1, 17) ^ rotr(blk->fW1, 19) ^ (blk->fW1 >> 10));
  91. blk->fW15 = 0x00000280 + (rotr(blk->fW0, 7) ^ rotr(blk->fW0, 18) ^ (blk->fW0 >> 3));
  92. blk->fW01r = blk->fW0 + (rotr(blk->fW1, 7) ^ rotr(blk->fW1, 18) ^ (blk->fW1 >> 3));
  93. blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
  94. blk->T1substate0 = blk->ctx_a - blk->T1;
  95. blk->C1addK5 = blk->cty_c + SHA256_K[5];
  96. blk->B1addK6 = blk->cty_b + SHA256_K[6];
  97. blk->PreVal0addK7 = blk->PreVal0 + SHA256_K[7];
  98. blk->W16addK16 = blk->W16 + SHA256_K[16];
  99. blk->W17addK17 = blk->W17 + SHA256_K[17];
  100. blk->zeroA = blk->ctx_a + 0x98c7e2a2;
  101. blk->zeroB = blk->ctx_a + 0xfc08884d;
  102. blk->oneA = blk->ctx_b + 0x90bb1e3c;
  103. blk->twoA = blk->ctx_c + 0x50c6645b;
  104. blk->threeA = blk->ctx_d + 0x3ac42e24;
  105. blk->fourA = blk->ctx_e + SHA256_K[4];
  106. blk->fiveA = blk->ctx_f + SHA256_K[5];
  107. blk->sixA = blk->ctx_g + SHA256_K[6];
  108. blk->sevenA = blk->ctx_h + SHA256_K[7];
  109. }
  110. #if 0 // not used any more
  111. #define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))
  112. #define IR(u) \
  113. R(A, B, C, D, E, F, G, H, W[u+0], SHA256_K[u+0]); \
  114. R(H, A, B, C, D, E, F, G, W[u+1], SHA256_K[u+1]); \
  115. R(G, H, A, B, C, D, E, F, W[u+2], SHA256_K[u+2]); \
  116. R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \
  117. R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \
  118. R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \
  119. R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \
  120. R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7])
  121. #define FR(u) \
  122. R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \
  123. R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \
  124. R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \
  125. R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \
  126. R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
  127. R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5]); \
  128. R(C, D, E, F, G, H, A, B, P(u+6), SHA256_K[u+6]); \
  129. R(B, C, D, E, F, G, H, A, P(u+7), SHA256_K[u+7])
  130. #define PIR(u) \
  131. R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \
  132. R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \
  133. R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \
  134. R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \
  135. R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7])
  136. #define PFR(u) \
  137. R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \
  138. R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \
  139. R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \
  140. R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \
  141. R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
  142. R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])
  143. #endif
  144. struct pc_data {
  145. struct thr_info *thr;
  146. struct work *work;
  147. uint32_t res[MAXBUFFERS];
  148. pthread_t pth;
  149. int found;
  150. };
  151. #if 0 // not used any more
  152. static void send_sha_nonce(struct pc_data *pcd, cl_uint nonce)
  153. {
  154. dev_blk_ctx *blk = &pcd->work->blk;
  155. struct thr_info *thr = pcd->thr;
  156. cl_uint A, B, C, D, E, F, G, H;
  157. struct work *work = pcd->work;
  158. cl_uint W[16];
  159. A = blk->cty_a; B = blk->cty_b;
  160. C = blk->cty_c; D = blk->cty_d;
  161. E = blk->cty_e; F = blk->cty_f;
  162. G = blk->cty_g; H = blk->cty_h;
  163. W[0] = blk->merkle; W[1] = blk->ntime;
  164. W[2] = blk->nbits; W[3] = nonce;
  165. W[4] = 0x80000000; W[5] = 0x00000000; W[6] = 0x00000000; W[7] = 0x00000000;
  166. W[8] = 0x00000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000;
  167. W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000280;
  168. PIR(0); IR(8);
  169. FR(16); FR(24);
  170. FR(32); FR(40);
  171. FR(48); FR(56);
  172. W[0] = A + blk->ctx_a; W[1] = B + blk->ctx_b;
  173. W[2] = C + blk->ctx_c; W[3] = D + blk->ctx_d;
  174. W[4] = E + blk->ctx_e; W[5] = F + blk->ctx_f;
  175. W[6] = G + blk->ctx_g; W[7] = H + blk->ctx_h;
  176. W[8] = 0x80000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000;
  177. W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000100;
  178. A = 0x6a09e667; B = 0xbb67ae85;
  179. C = 0x3c6ef372; D = 0xa54ff53a;
  180. E = 0x510e527f; F = 0x9b05688c;
  181. G = 0x1f83d9ab; H = 0x5be0cd19;
  182. IR(0); IR(8);
  183. FR(16); FR(24);
  184. FR(32); FR(40);
  185. FR(48); PFR(56);
  186. if (likely(H == 0xa41f32e7)) {
  187. if (unlikely(submit_nonce(thr, work, nonce) == false))
  188. applog(LOG_ERR, "Failed to submit work, exiting");
  189. } else {
  190. applog(LOG_DEBUG, "No best_g found! Error in OpenCL code?");
  191. hw_errors++;
  192. thr->cgpu->hw_errors++;
  193. }
  194. }
  195. #endif
  196. static void send_scrypt_nonce(struct pc_data *pcd, uint32_t nonce)
  197. {
  198. struct thr_info *thr = pcd->thr;
  199. struct work *work = pcd->work;
  200. if (scrypt_test(work->data, work->target, nonce))
  201. submit_nonce(thr, pcd->work, nonce);
  202. else {
  203. applog(LOG_INFO, "Scrypt error, review settings");
  204. thr->cgpu->hw_errors++;
  205. }
  206. }
  207. static void *postcalc_hash(void *userdata)
  208. {
  209. struct pc_data *pcd = (struct pc_data *)userdata;
  210. struct thr_info *thr = pcd->thr;
  211. struct work *work = pcd->work;
  212. unsigned int entry = 0;
  213. pthread_detach(pthread_self());
  214. for (entry = 0; entry < pcd->res[FOUND]; entry++) {
  215. uint32_t nonce = pcd->res[entry];
  216. applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry);
  217. if (opt_scrypt)
  218. send_scrypt_nonce(pcd, nonce);
  219. else {
  220. if (unlikely(submit_nonce(thr, work, nonce) == false))
  221. applog(LOG_ERR, "Failed to submit work, exiting");
  222. }
  223. }
  224. free(pcd);
  225. return NULL;
  226. }
  227. void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
  228. {
  229. struct pc_data *pcd = malloc(sizeof(struct pc_data));
  230. if (unlikely(!pcd)) {
  231. applog(LOG_ERR, "Failed to malloc pc_data in postcalc_hash_async");
  232. return;
  233. }
  234. pcd->work = calloc(1, sizeof(struct work));
  235. if (unlikely(!pcd->work)) {
  236. applog(LOG_ERR, "Failed to malloc work in postcalc_hash_async");
  237. return;
  238. }
  239. pcd->thr = thr;
  240. memcpy(pcd->work, work, sizeof(struct work));
  241. memcpy(&pcd->res, res, BUFFERSIZE);
  242. if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) {
  243. applog(LOG_ERR, "Failed to create postcalc_hash thread");
  244. return;
  245. }
  246. }
  247. #endif /* HAVE_OPENCL */