DiabloMiner120210.cl 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. /*
  2. * DiabloMiner - OpenCL miner for BitCoin
  3. * Copyright (C) 2010, 2011, 2012 Patrick McFarland <diablod3@gmail.com>
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. #ifdef VECTORS4
  19. typedef uint4 z;
  20. #elif defined(VECTORS2)
  21. typedef uint2 z;
  22. #else
  23. typedef uint z;
  24. #endif
  25. #ifdef BITALIGN
  26. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  27. #define Zrotr(a, b) amd_bitalign((z)a, (z)a, (z)(32 - b))
  28. #else
  29. #define Zrotr(a, b) rotate((z)a, (z)b)
  30. #endif
  31. #ifdef BFI_INT
  32. #define ZCh(a, b, c) amd_bytealign(a, b, c)
  33. #define ZMa(a, b, c) amd_bytealign((c ^ a), (b), (a))
  34. #else
  35. #define ZCh(a, b, c) bitselect((z)c, (z)b, (z)a)
  36. #define ZMa(a, b, c) bitselect((z)a, (z)b, (z)c ^ (z)a)
  37. #endif
  38. #define ZR25(n) ((Zrotr((n), 25) ^ Zrotr((n), 14) ^ ((n) >> 3U)))
  39. #define ZR15(n) ((Zrotr((n), 15) ^ Zrotr((n), 13) ^ ((n) >> 10U)))
  40. #define ZR26(n) ((Zrotr((n), 26) ^ Zrotr((n), 21) ^ Zrotr((n), 7)))
  41. #define ZR30(n) ((Zrotr((n), 30) ^ Zrotr((n), 19) ^ Zrotr((n), 10)))
  42. __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
  43. const uint base,
  44. const uint PreVal4_plus_state0, const uint PreVal4_plus_T1,
  45. const uint W18, const uint W19,
  46. const uint W16, const uint W17,
  47. const uint W31, const uint W32,
  48. const uint d1, const uint b1, const uint c1,
  49. const uint h1, const uint f1, const uint g1,
  50. const uint c1_plus_k5, const uint b1_plus_k6,
  51. const uint state0, const uint state1, const uint state2, const uint state3,
  52. const uint state4, const uint state5, const uint state6, const uint state7,
  53. __global uint * output)
  54. {
  55. z ZA[4];
  56. z ZB[4];
  57. z ZC[4];
  58. z ZD[4];
  59. z ZE[4];
  60. z ZF[4];
  61. z ZG[4];
  62. z ZH[4];
  63. z Znonce;
  64. #ifdef VECTORS4
  65. Znonce = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3);
  66. #elif defined VECTORS2
  67. Znonce = base + (get_global_id(0)<<1) + (uint2)(0, 1);
  68. #else
  69. Znonce = base + get_global_id(0);
  70. #endif
  71. ZA[0] = PreVal4_plus_state0 + Znonce;
  72. ZB[0] = PreVal4_plus_T1 + Znonce;
  73. ZC[0] = W18 + ZR25(Znonce);
  74. ZD[0] = W19 + Znonce;
  75. ZE[0] = 0x80000000U + ZR15(ZC[0]);
  76. ZF[0] = ZR15(ZD[0]);
  77. ZG[0] = 0x00000280U + ZR15(ZE[0]);
  78. ZH[0] = ZR15(ZF[0]) + W16;
  79. ZA[1] = ZR15(ZG[0]) + W17;
  80. ZB[1] = ZR15(ZH[0]) + ZC[0];
  81. ZC[1] = ZR15(ZA[1]) + ZD[0];
  82. ZD[1] = ZR15(ZB[1]) + ZE[0];
  83. ZE[1] = ZR15(ZC[1]) + ZF[0];
  84. ZF[1] = ZR15(ZD[1]) + ZG[0];
  85. ZG[1] = 0x00A00055U + ZR15(ZE[1]) + ZH[0];
  86. ZH[1] = W31 + ZR15(ZF[1]) + ZA[1];
  87. ZA[2] = W32 + ZR15(ZG[1]) + ZB[1];
  88. ZB[2] = d1 + ZCh(ZA[0], b1, c1) + ZR26(ZA[0]);
  89. ZC[2] = h1 + ZB[2];
  90. ZD[2] = ZB[2] + ZR30(ZB[0]) + ZMa(f1, g1, ZB[0]);
  91. ZE[2] = c1_plus_k5 + ZCh(ZC[2], ZA[0], b1) + ZR26(ZC[2]);
  92. ZF[2] = g1 + ZE[2];
  93. ZG[2] = ZE[2] + ZR30(ZD[2]) + ZMa(ZB[0], f1, ZD[2]);
  94. ZH[2] = b1_plus_k6 + ZCh(ZF[2], ZC[2], ZA[0]) + ZR26(ZF[2]);
  95. ZA[3] = f1 + ZH[2];
  96. ZB[3] = ZH[2] + ZR30(ZG[2]) + ZMa(ZD[2], ZB[0], ZG[2]);
  97. ZC[3] = ZA[0] + 0xab1c5ed5U + ZCh(ZA[3], ZF[2], ZC[2]) + ZR26(ZA[3]);
  98. ZD[3] = ZB[0] + ZC[3];
  99. ZE[3] = ZC[3] + ZR30(ZB[3]) + ZMa(ZG[2], ZD[2], ZB[3]);
  100. ZF[3] = ZC[2] + 0xd807aa98U + ZCh(ZD[3], ZA[3], ZF[2]) + ZR26(ZD[3]);
  101. ZG[3] = ZD[2] + ZF[3];
  102. ZH[3] = ZF[3] + ZR30(ZE[3]) + ZMa(ZB[3], ZG[2], ZE[3]);
  103. ZA[0] = ZF[2] + 0x12835b01U + ZCh(ZG[3], ZD[3], ZA[3]) + ZR26(ZG[3]);
  104. ZB[0] = ZG[2] + ZA[0];
  105. ZB[2] = ZA[0] + ZR30(ZH[3]) + ZMa(ZE[3], ZB[3], ZH[3]);
  106. ZC[2] = ZA[3] + 0x243185beU + ZCh(ZB[0], ZG[3], ZD[3]) + ZR26(ZB[0]);
  107. ZD[2] = ZB[3] + ZC[2];
  108. ZE[2] = ZC[2] + ZR30(ZB[2]) + ZMa(ZH[3], ZE[3], ZB[2]);
  109. ZF[2] = ZD[3] + 0x550c7dc3U + ZCh(ZD[2], ZB[0], ZG[3]) + ZR26(ZD[2]);
  110. ZG[2] = ZE[3] + ZF[2];
  111. ZH[2] = ZF[2] + ZR30(ZE[2]) + ZMa(ZB[2], ZH[3], ZE[2]);
  112. ZA[3] = ZG[3] + 0x72be5d74U + ZCh(ZG[2], ZD[2], ZB[0]) + ZR26(ZG[2]);
  113. ZB[3] = ZH[3] + ZA[3];
  114. ZC[3] = ZA[3] + ZR30(ZH[2]) + ZMa(ZE[2], ZB[2], ZH[2]);
  115. ZD[3] = ZB[0] + 0x80deb1feU + ZCh(ZB[3], ZG[2], ZD[2]) + ZR26(ZB[3]);
  116. ZE[3] = ZB[2] + ZD[3];
  117. ZF[3] = ZD[3] + ZR30(ZC[3]) + ZMa(ZH[2], ZE[2], ZC[3]);
  118. ZG[3] = ZD[2] + 0x9bdc06a7U + ZCh(ZE[3], ZB[3], ZG[2]) + ZR26(ZE[3]);
  119. ZH[3] = ZE[2] + ZG[3];
  120. ZA[0] = ZG[3] + ZR30(ZF[3]) + ZMa(ZC[3], ZH[2], ZF[3]);
  121. ZB[0] = ZG[2] + 0xc19bf3f4U + ZCh(ZH[3], ZE[3], ZB[3]) + ZR26(ZH[3]);
  122. ZB[2] = ZH[2] + ZB[0];
  123. ZC[2] = ZB[0] + ZR30(ZA[0]) + ZMa(ZF[3], ZC[3], ZA[0]);
  124. ZD[2] = ZB[3] + 0xe49b69c1U + W16 + ZCh(ZB[2], ZH[3], ZE[3]) + ZR26(ZB[2]);
  125. ZE[2] = ZC[3] + ZD[2];
  126. ZF[2] = ZD[2] + ZR30(ZC[2]) + ZMa(ZA[0], ZF[3], ZC[2]);
  127. ZG[2] = ZE[3] + 0xefbe4786U + W17 + ZCh(ZE[2], ZB[2], ZH[3]) + ZR26(ZE[2]);
  128. ZH[2] = ZF[3] + ZG[2];
  129. ZA[3] = ZG[2] + ZR30(ZF[2]) + ZMa(ZC[2], ZA[0], ZF[2]);
  130. ZB[3] = ZH[3] + 0x0fc19dc6U + ZC[0] + ZCh(ZH[2], ZE[2], ZB[2]) + ZR26(ZH[2]);
  131. ZC[3] = ZA[0] + ZB[3];
  132. ZD[3] = ZB[3] + ZR30(ZA[3]) + ZMa(ZF[2], ZC[2], ZA[3]);
  133. ZE[3] = ZB[2] + 0x240ca1ccU + ZD[0] + ZCh(ZC[3], ZH[2], ZE[2]) + ZR26(ZC[3]);
  134. ZF[3] = ZC[2] + ZE[3];
  135. ZG[3] = ZE[3] + ZR30(ZD[3]) + ZMa(ZA[3], ZF[2], ZD[3]);
  136. ZH[3] = ZE[2] + 0x2de92c6fU + ZE[0] + ZCh(ZF[3], ZC[3], ZH[2]) + ZR26(ZF[3]);
  137. ZA[0] = ZF[2] + ZH[3];
  138. ZB[0] = ZH[3] + ZR30(ZG[3]) + ZMa(ZD[3], ZA[3], ZG[3]);
  139. ZB[2] = ZH[2] + 0x4a7484aaU + ZF[0] + ZCh(ZA[0], ZF[3], ZC[3]) + ZR26(ZA[0]);
  140. ZC[2] = ZA[3] + ZB[2];
  141. ZD[2] = ZB[2] + ZR30(ZB[0]) + ZMa(ZG[3], ZD[3], ZB[0]);
  142. ZE[2] = ZC[3] + 0x5cb0a9dcU + ZG[0] + ZCh(ZC[2], ZA[0], ZF[3]) + ZR26(ZC[2]);
  143. ZF[2] = ZD[3] + ZE[2];
  144. ZG[2] = ZE[2] + ZR30(ZD[2]) + ZMa(ZB[0], ZG[3], ZD[2]);
  145. ZH[2] = ZF[3] + 0x76f988daU + ZH[0] + ZCh(ZF[2], ZC[2], ZA[0]) + ZR26(ZF[2]);
  146. ZA[3] = ZG[3] + ZH[2];
  147. ZB[3] = ZH[2] + ZR30(ZG[2]) + ZMa(ZD[2], ZB[0], ZG[2]);
  148. ZC[3] = ZA[0] + 0x983e5152U + ZA[1] + ZCh(ZA[3], ZF[2], ZC[2]) + ZR26(ZA[3]);
  149. ZD[3] = ZB[0] + ZC[3];
  150. ZE[3] = ZC[3] + ZR30(ZB[3]) + ZMa(ZG[2], ZD[2], ZB[3]);
  151. ZF[3] = ZC[2] + 0xa831c66dU + ZB[1] + ZCh(ZD[3], ZA[3], ZF[2]) + ZR26(ZD[3]);
  152. ZG[3] = ZD[2] + ZF[3];
  153. ZH[3] = ZF[3] + ZR30(ZE[3]) + ZMa(ZB[3], ZG[2], ZE[3]);
  154. ZA[0] = ZF[2] + 0xb00327c8U + ZC[1] + ZCh(ZG[3], ZD[3], ZA[3]) + ZR26(ZG[3]);
  155. ZB[0] = ZG[2] + ZA[0];
  156. ZB[2] = ZA[0] + ZR30(ZH[3]) + ZMa(ZE[3], ZB[3], ZH[3]);
  157. ZC[2] = ZA[3] + 0xbf597fc7U + ZD[1] + ZCh(ZB[0], ZG[3], ZD[3]) + ZR26(ZB[0]);
  158. ZD[2] = ZB[3] + ZC[2];
  159. ZE[2] = ZC[2] + ZR30(ZB[2]) + ZMa(ZH[3], ZE[3], ZB[2]);
  160. ZF[2] = ZD[3] + 0xc6e00bf3U + ZE[1] + ZCh(ZD[2], ZB[0], ZG[3]) + ZR26(ZD[2]);
  161. ZG[2] = ZE[3] + ZF[2];
  162. ZH[2] = ZF[2] + ZR30(ZE[2]) + ZMa(ZB[2], ZH[3], ZE[2]);
  163. ZA[3] = ZG[3] + 0xd5a79147U + ZF[1] + ZCh(ZG[2], ZD[2], ZB[0]) + ZR26(ZG[2]);
  164. ZB[3] = ZH[3] + ZA[3];
  165. ZC[3] = ZA[3] + ZR30(ZH[2]) + ZMa(ZE[2], ZB[2], ZH[2]);
  166. ZD[3] = ZB[0] + 0x06ca6351U + ZG[1] + ZCh(ZB[3], ZG[2], ZD[2]) + ZR26(ZB[3]);
  167. ZE[3] = ZB[2] + ZD[3];
  168. ZF[3] = ZD[3] + ZR30(ZC[3]) + ZMa(ZH[2], ZE[2], ZC[3]);
  169. ZG[3] = ZD[2] + 0x14292967U + ZH[1] + ZCh(ZE[3], ZB[3], ZG[2]) + ZR26(ZE[3]);
  170. ZH[3] = ZE[2] + ZG[3];
  171. ZA[0] = ZG[3] + ZR30(ZF[3]) + ZMa(ZC[3], ZH[2], ZF[3]);
  172. ZB[0] = ZG[2] + 0x27b70a85U + ZA[2] + ZCh(ZH[3], ZE[3], ZB[3]) + ZR26(ZH[3]);
  173. ZB[2] = ZH[2] + ZB[0];
  174. ZC[2] = ZB[0] + ZR30(ZA[0]) + ZMa(ZF[3], ZC[3], ZA[0]);
  175. ZD[2] = ZR15(ZH[1]) + ZR25(ZC[0]) + ZC[1] + W17;
  176. ZE[2] = ZB[3] + 0x2e1b2138U + ZD[2] + ZCh(ZB[2], ZH[3], ZE[3]) + ZR26(ZB[2]);
  177. ZF[2] = ZC[3] + ZE[2];
  178. ZG[2] = ZE[2] + ZR30(ZC[2]) + ZMa(ZA[0], ZF[3], ZC[2]);
  179. ZH[2] = ZR15(ZA[2]) + ZR25(ZD[0]) + ZD[1] + ZC[0];
  180. ZA[3] = ZE[3] + 0x4d2c6dfcU + ZH[2] + ZCh(ZF[2], ZB[2], ZH[3]) + ZR26(ZF[2]);
  181. ZB[3] = ZF[3] + ZA[3];
  182. ZC[3] = ZA[3] + ZR30(ZG[2]) + ZMa(ZC[2], ZA[0], ZG[2]);
  183. ZD[3] = ZR15(ZD[2]) + ZR25(ZE[0]) + ZE[1] + ZD[0];
  184. ZE[3] = ZH[3] + 0x53380d13U + ZD[3] + ZCh(ZB[3], ZF[2], ZB[2]) + ZR26(ZB[3]);
  185. ZF[3] = ZA[0] + ZE[3];
  186. ZG[3] = ZE[3] + ZR30(ZC[3]) + ZMa(ZG[2], ZC[2], ZC[3]);
  187. ZH[3] = ZR15(ZH[2]) + ZR25(ZF[0]) + ZF[1] + ZE[0];
  188. ZA[0] = ZB[2] + 0x650a7354U + ZH[3] + ZCh(ZF[3], ZB[3], ZF[2]) + ZR26(ZF[3]);
  189. ZB[0] = ZC[2] + ZA[0];
  190. ZC[0] = ZA[0] + ZR30(ZG[3]) + ZMa(ZC[3], ZG[2], ZG[3]);
  191. ZD[0] = ZR15(ZD[3]) + ZR25(ZG[0]) + ZG[1] + ZF[0];
  192. ZE[0] = ZF[2] + 0x766a0abbU + ZD[0] + ZCh(ZB[0], ZF[3], ZB[3]) + ZR26(ZB[0]);
  193. ZF[0] = ZG[2] + ZE[0];
  194. ZB[2] = ZE[0] + ZR30(ZC[0]) + ZMa(ZG[3], ZC[3], ZC[0]);
  195. ZC[2] = ZR15(ZH[3]) + ZR25(ZH[0]) + ZH[1] + ZG[0];
  196. ZE[2] = ZB[3] + 0x81c2c92eU + ZC[2] + ZCh(ZF[0], ZB[0], ZF[3]) + ZR26(ZF[0]);
  197. ZF[2] = ZC[3] + ZE[2];
  198. ZG[2] = ZE[2] + ZR30(ZB[2]) + ZMa(ZC[0], ZG[3], ZB[2]);
  199. ZA[3] = ZR15(ZD[0]) + ZR25(ZA[1]) + ZA[2] + ZH[0];
  200. ZB[3] = ZF[3] + 0x92722c85U + ZA[3] + ZCh(ZF[2], ZF[0], ZB[0]) + ZR26(ZF[2]);
  201. ZC[3] = ZG[3] + ZB[3];
  202. ZE[3] = ZB[3] + ZR30(ZG[2]) + ZMa(ZB[2], ZC[0], ZG[2]);
  203. ZF[3] = ZR15(ZC[2]) + ZR25(ZB[1]) + ZD[2] + ZA[1];
  204. ZG[3] = ZB[0] + 0xa2bfe8a1U + ZF[3] + ZCh(ZC[3], ZF[2], ZF[0]) + ZR26(ZC[3]);
  205. ZA[0] = ZC[0] + ZG[3];
  206. ZB[0] = ZG[3] + ZR30(ZE[3]) + ZMa(ZG[2], ZB[2], ZE[3]);
  207. ZC[0] = ZR15(ZA[3]) + ZR25(ZC[1]) + ZH[2] + ZB[1];
  208. ZE[0] = ZF[0] + 0xa81a664bU + ZC[0] + ZCh(ZA[0], ZC[3], ZF[2]) + ZR26(ZA[0]);
  209. ZF[0] = ZB[2] + ZE[0];
  210. ZG[0] = ZE[0] + ZR30(ZB[0]) + ZMa(ZE[3], ZG[2], ZB[0]);
  211. ZH[0] = ZR15(ZF[3]) + ZR25(ZD[1]) + ZD[3] + ZC[1];
  212. ZA[1] = ZF[2] + 0xc24b8b70U + ZH[0] + ZCh(ZF[0], ZA[0], ZC[3]) + ZR26(ZF[0]);
  213. ZB[1] = ZG[2] + ZA[1];
  214. ZC[1] = ZA[1] + ZR30(ZG[0]) + ZMa(ZB[0], ZE[3], ZG[0]);
  215. ZB[2] = ZR15(ZC[0]) + ZR25(ZE[1]) + ZH[3] + ZD[1];
  216. ZE[2] = ZC[3] + 0xc76c51a3U + ZB[2] + ZCh(ZB[1], ZF[0], ZA[0]) + ZR26(ZB[1]);
  217. ZF[2] = ZE[3] + ZE[2];
  218. ZG[2] = ZE[2] + ZR30(ZC[1]) + ZMa(ZG[0], ZB[0], ZC[1]);
  219. ZB[3] = ZR15(ZH[0]) + ZR25(ZF[1]) + ZD[0] + ZE[1];
  220. ZC[3] = ZA[0] + 0xd192e819U + ZB[3] + ZCh(ZF[2], ZB[1], ZF[0]) + ZR26(ZF[2]);
  221. ZE[3] = ZB[0] + ZC[3];
  222. ZG[3] = ZC[3] + ZR30(ZG[2]) + ZMa(ZC[1], ZG[0], ZG[2]);
  223. ZA[0] = ZR15(ZB[2]) + ZR25(ZG[1]) + ZC[2] + ZF[1];
  224. ZB[0] = ZF[0] + 0xd6990624U + ZA[0] + ZCh(ZE[3], ZF[2], ZB[1]) + ZR26(ZE[3]);
  225. ZE[0] = ZG[0] + ZB[0];
  226. ZF[0] = ZB[0] + ZR30(ZG[3]) + ZMa(ZG[2], ZC[1], ZG[3]);
  227. ZG[0] = ZR15(ZB[3]) + ZR25(ZH[1]) + ZA[3] + ZG[1];
  228. ZA[1] = ZB[1] + 0xf40e3585U + ZG[0] + ZCh(ZE[0], ZE[3], ZF[2]) + ZR26(ZE[0]);
  229. ZB[1] = ZC[1] + ZA[1];
  230. ZC[1] = ZA[1] + ZR30(ZF[0]) + ZMa(ZG[3], ZG[2], ZF[0]);
  231. ZD[1] = ZR15(ZA[0]) + ZF[3] + ZR25(ZA[2]) + ZH[1];
  232. ZE[1] = ZF[2] + 0x106aa070U + ZD[1] + ZCh(ZB[1], ZE[0], ZE[3]) + ZR26(ZB[1]);
  233. ZF[1] = ZG[2] + ZE[1];
  234. ZG[1] = ZE[1] + ZR30(ZC[1]) + ZMa(ZF[0], ZG[3], ZC[1]);
  235. ZH[1] = ZR15(ZG[0]) + ZC[0] + ZR25(ZD[2]) + ZA[2];
  236. ZA[2] = ZE[3] + 0x19a4c116U + ZH[1] + ZCh(ZF[1], ZB[1], ZE[0]) + ZR26(ZF[1]);
  237. ZE[2] = ZG[3] + ZA[2];
  238. ZF[2] = ZA[2] + ZR30(ZG[1]) + ZMa(ZC[1], ZF[0], ZG[1]);
  239. ZG[2] = ZR15(ZD[1]) + ZH[0] + ZR25(ZH[2]) + ZD[2];
  240. ZC[3] = ZE[0] + 0x1e376c08U + ZG[2] + ZCh(ZE[2], ZF[1], ZB[1]) + ZR26(ZE[2]);
  241. ZE[3] = ZF[0] + ZC[3];
  242. ZG[3] = ZC[3] + ZR30(ZF[2]) + ZMa(ZG[1], ZC[1], ZF[2]);
  243. ZB[0] = ZR15(ZH[1]) + ZB[2] + ZR25(ZD[3]) + ZH[2];
  244. ZE[0] = ZB[1] + 0x2748774cU + ZB[0] + ZCh(ZE[3], ZE[2], ZF[1]) + ZR26(ZE[3]);
  245. ZF[0] = ZC[1] + ZE[0];
  246. ZA[1] = ZE[0] + ZR30(ZG[3]) + ZMa(ZF[2], ZG[1], ZG[3]);
  247. ZB[1] = ZR15(ZG[2]) + ZB[3] + ZR25(ZH[3]) + ZD[3];
  248. ZC[1] = ZF[1] + 0x34b0bcb5U + ZB[1] + ZCh(ZF[0], ZE[3], ZE[2]) + ZR26(ZF[0]);
  249. ZE[1] = ZG[1] + ZC[1];
  250. ZF[1] = ZC[1] + ZR30(ZA[1]) + ZMa(ZG[3], ZF[2], ZA[1]);
  251. ZG[1] = ZR15(ZB[0]) + ZA[0] + ZR25(ZD[0]) + ZH[3];
  252. ZA[2] = ZE[2] + 0x391c0cb3U + ZG[1] + ZCh(ZE[1], ZF[0], ZE[3]) + ZR26(ZE[1]);
  253. ZD[2] = ZF[2] + ZA[2];
  254. ZE[2] = ZA[2] + ZR30(ZF[1]) + ZMa(ZA[1], ZG[3], ZF[1]);
  255. ZF[2] = ZR15(ZB[1]) + ZG[0] + ZR25(ZC[2]) + ZD[0];
  256. ZH[2] = ZE[3] + 0x4ed8aa4aU + ZF[2] + ZCh(ZD[2], ZE[1], ZF[0]) + ZR26(ZD[2]);
  257. ZC[3] = ZG[3] + ZH[2];
  258. ZD[3] = ZH[2] + ZR30(ZE[2]) + ZMa(ZF[1], ZA[1], ZE[2]);
  259. ZE[3] = ZR15(ZG[1]) + ZD[1] + ZR25(ZA[3]) + ZC[2];
  260. ZG[3] = ZF[0] + 0x5b9cca4fU + ZE[3] + ZCh(ZC[3], ZD[2], ZE[1]) + ZR26(ZC[3]);
  261. ZH[3] = ZA[1] + ZG[3];
  262. ZD[0] = ZG[3] + ZR30(ZD[3]) + ZMa(ZE[2], ZF[1], ZD[3]);
  263. ZE[0] = ZR15(ZF[2]) + ZH[1] + ZR25(ZF[3]) + ZA[3];
  264. ZF[0] = ZE[1] + 0x682e6ff3U + ZE[0] + ZCh(ZH[3], ZC[3], ZD[2]) + ZR26(ZH[3]);
  265. ZA[1] = ZF[1] + ZF[0];
  266. ZC[1] = ZF[0] + ZR30(ZD[0]) + ZMa(ZD[3], ZE[2], ZD[0]);
  267. ZE[1] = ZR15(ZE[3]) + ZG[2] + ZR25(ZC[0]) + ZF[3];
  268. ZF[1] = ZD[2] + 0x748f82eeU + ZE[1] + ZCh(ZA[1], ZH[3], ZC[3]) + ZR26(ZA[1]);
  269. ZA[2] = ZE[2] + ZF[1];
  270. ZC[2] = ZF[1] + ZR30(ZC[1]) + ZMa(ZD[0], ZD[3], ZC[1]);
  271. ZD[2] = ZR15(ZE[0]) + ZB[0] + ZR25(ZH[0]) + ZC[0];
  272. ZE[2] = ZC[3] + 0x78a5636fU + ZD[2] + ZCh(ZA[2], ZA[1], ZH[3]) + ZR26(ZA[2]);
  273. ZG[2] = ZD[3] + ZE[2];
  274. ZH[2] = ZE[2] + ZR30(ZC[2]) + ZMa(ZC[1], ZD[0], ZC[2]);
  275. ZA[3] = ZR15(ZE[1]) + ZB[1] + ZR25(ZB[2]) + ZH[0];
  276. ZC[3] = ZH[3] + 0x84c87814U + ZA[3] + ZCh(ZG[2], ZA[2], ZA[1]) + ZR26(ZG[2]);
  277. ZD[3] = ZD[0] + ZC[3];
  278. ZF[3] = ZC[3] + ZR30(ZH[2]) + ZMa(ZC[2], ZC[1], ZH[2]);
  279. ZG[3] = ZR15(ZD[2]) + ZG[1] + ZR25(ZB[3]) + ZB[2];
  280. ZH[3] = ZA[1] + 0x8cc70208U + ZG[3] + ZCh(ZD[3], ZG[2], ZA[2]) + ZR26(ZD[3]);
  281. ZB[0] = ZC[1] + ZH[3];
  282. ZC[0] = ZH[3] + ZR30(ZF[3]) + ZMa(ZH[2], ZC[2], ZF[3]);
  283. ZD[0] = ZR15(ZA[3]) + ZF[2] + ZR25(ZA[0]) + ZB[3];
  284. ZF[0] = ZA[2] + 0x90befffaU + ZD[0] + ZCh(ZB[0], ZD[3], ZG[2]) + ZR26(ZB[0]);
  285. ZH[0] = ZC[2] + ZF[0];
  286. ZA[1] = ZF[0] + ZR30(ZC[0]) + ZMa(ZF[3], ZH[2], ZC[0]);
  287. ZB[1] = ZR15(ZG[3]) + ZE[3] + ZR25(ZG[0]) + ZA[0];
  288. ZC[1] = ZG[2] + 0xa4506cebU + ZB[1] + ZCh(ZH[0], ZB[0], ZD[3]) + ZR26(ZH[0]);
  289. ZF[1] = ZH[2] + ZC[1];
  290. ZG[1] = ZC[1] + ZR30(ZA[1]) + ZMa(ZC[0], ZF[3], ZA[1]);
  291. ZA[2] = ZR15(ZD[0]) + ZR25(ZD[1]) + ZE[0] + ZG[0];
  292. ZB[2] = ZD[3] + 0xbef9a3f7U + ZA[2] + ZCh(ZF[1], ZH[0], ZB[0]) + ZR26(ZF[1]);
  293. ZC[2] = ZF[3] + ZB[2];
  294. ZD[2] = ZB[2] + ZR30(ZG[1]) + ZMa(ZA[1], ZC[0], ZG[1]);
  295. ZE[2] = ZR15(ZB[1]) + ZR25(ZH[1]) + ZE[1] + ZD[1];
  296. ZF[2] = ZB[0] + 0xc67178f2U + ZE[2] + ZCh(ZC[2], ZF[1], ZH[0]) + ZR26(ZC[2]);
  297. ZG[2] = ZC[0] + ZF[2];
  298. ZH[2] = ZF[2] + ZR30(ZD[2]) + ZMa(ZG[1], ZA[1], ZD[2]);
  299. ZA[3] = state0 + ZH[2];
  300. ZB[3] = state1 + ZD[2];
  301. ZC[3] = state2 + ZG[1];
  302. ZD[3] = state3 + ZA[1];
  303. ZE[3] = state4 + ZG[2];
  304. ZF[3] = state5 + ZC[2];
  305. ZG[3] = state6 + ZF[1];
  306. ZH[3] = state7 + ZH[0];
  307. ZD[0] = 0x98c7e2a2U + ZA[3];
  308. ZH[0] = 0xfc08884dU + ZA[3];
  309. ZA[1] = ZR25(ZB[3]) + ZA[3];
  310. ZB[1] = 0x90bb1e3cU + ZB[3] + ZCh(ZD[0], 0x510e527fU, 0x9b05688cU) + ZR26(ZD[0]);
  311. ZC[1] = 0x3c6ef372U + ZB[1];
  312. ZD[1] = ZB[1] + ZR30(ZH[0]) + ZMa(0x6a09e667U, 0xbb67ae85U, ZH[0]);
  313. ZE[1] = 0x50c6645bU + ZC[3] + ZCh(ZC[1], ZD[0], 0x510e527fU) + ZR26(ZC[1]);
  314. ZF[1] = 0xbb67ae85U + ZE[1];
  315. ZG[1] = ZE[1] + ZR30(ZD[1]) + ZMa(ZH[0], 0x6a09e667U, ZD[1]);
  316. ZH[1] = 0x00a00000U + ZR25(ZC[3]) + ZB[3];
  317. ZA[2] = ZR15(ZA[1]) + ZR25(ZD[3]) + ZC[3];
  318. ZB[2] = 0x3ac42e24U + ZD[3] + ZCh(ZF[1], ZC[1], ZD[0]) + ZR26(ZF[1]);
  319. ZC[2] = 0x6a09e667U + ZB[2];
  320. ZD[2] = ZB[2] + ZR30(ZG[1]) + ZMa(ZD[1], ZH[0], ZG[1]);
  321. ZE[2] = ZR15(ZH[1]) + ZR25(ZE[3]) + ZD[3];
  322. ZF[2] = ZA[3] + 0xd21ea4fdU + ZE[3] + ZCh(ZC[2], ZF[1], ZC[1]) + ZR26(ZC[2]);
  323. ZG[2] = ZH[0] + ZF[2];
  324. ZH[2] = ZF[2] + ZR30(ZD[2]) + ZMa(ZG[1], ZD[1], ZD[2]);
  325. ZA[3] = ZR15(ZA[2]) + ZR25(ZF[3]) + ZE[3];
  326. ZB[3] = ZC[1] + 0x59f111f1U + ZF[3] + ZCh(ZG[2], ZC[2], ZF[1]) + ZR26(ZG[2]);
  327. ZC[3] = ZD[1] + ZB[3];
  328. ZD[3] = ZB[3] + ZR30(ZH[2]) + ZMa(ZD[2], ZG[1], ZH[2]);
  329. ZE[3] = ZR15(ZE[2]) + ZR25(ZG[3]) + ZF[3];
  330. ZF[3] = ZF[1] + 0x923f82a4U + ZG[3] + ZCh(ZC[3], ZG[2], ZC[2]) + ZR26(ZC[3]);
  331. ZA[0] = ZG[1] + ZF[3];
  332. ZB[0] = ZF[3] + ZR30(ZD[3]) + ZMa(ZH[2], ZD[2], ZD[3]);
  333. ZC[0] = ZR15(ZA[3]) + 0x00000100U + ZR25(ZH[3]) + ZG[3];
  334. ZD[0] = ZC[2] + 0xab1c5ed5U + ZH[3] + ZCh(ZA[0], ZC[3], ZG[2]) + ZR26(ZA[0]);
  335. ZE[0] = ZD[2] + ZD[0];
  336. ZF[0] = ZD[0] + ZR30(ZB[0]) + ZMa(ZD[3], ZH[2], ZB[0]);
  337. ZG[0] = ZG[2] + 0x5807aa98U + ZCh(ZE[0], ZA[0], ZC[3]) + ZR26(ZE[0]);
  338. ZH[0] = ZH[2] + ZG[0];
  339. ZB[1] = ZG[0] + ZR30(ZF[0]) + ZMa(ZB[0], ZD[3], ZF[0]);
  340. ZC[1] = ZR15(ZE[3]) + ZA[1] + 0x11002000U + ZH[3];
  341. ZD[1] = ZR15(ZC[0]) + ZH[1] + 0x80000000U;
  342. ZE[1] = ZC[3] + 0x12835b01U + ZCh(ZH[0], ZE[0], ZA[0]) + ZR26(ZH[0]);
  343. ZF[1] = ZD[3] + ZE[1];
  344. ZG[1] = ZE[1] + ZR30(ZB[1]) + ZMa(ZF[0], ZB[0], ZB[1]);
  345. ZB[2] = ZA[0] + 0x243185beU + ZCh(ZF[1], ZH[0], ZE[0]) + ZR26(ZF[1]);
  346. ZC[2] = ZB[0] + ZB[2];
  347. ZD[2] = ZB[2] + ZR30(ZG[1]) + ZMa(ZB[1], ZF[0], ZG[1]);
  348. ZF[2] = ZR15(ZC[1]) + ZA[2];
  349. ZG[2] = ZR15(ZD[1]) + ZE[2];
  350. ZH[2] = ZE[0] + 0x550c7dc3U + ZCh(ZC[2], ZF[1], ZH[0]) + ZR26(ZC[2]);
  351. ZB[3] = ZF[0] + ZH[2];
  352. ZC[3] = ZH[2] + ZR30(ZD[2]) + ZMa(ZG[1], ZB[1], ZD[2]);
  353. ZD[3] = ZH[0] + 0x72be5d74U + ZCh(ZB[3], ZC[2], ZF[1]) + ZR26(ZB[3]);
  354. ZF[3] = ZB[1] + ZD[3];
  355. ZG[3] = ZD[3] + ZR30(ZC[3]) + ZMa(ZD[2], ZG[1], ZC[3]);
  356. ZH[3] = ZR15(ZF[2]) + ZA[3];
  357. ZA[0] = ZR15(ZG[2]) + ZE[3];
  358. ZB[0] = ZF[1] + 0x80deb1feU + ZCh(ZF[3], ZB[3], ZC[2]) + ZR26(ZF[3]);
  359. ZD[0] = ZG[1] + ZB[0];
  360. ZE[0] = ZB[0] + ZR30(ZG[3]) + ZMa(ZC[3], ZD[2], ZG[3]);
  361. ZF[0] = ZC[2] + 0x9bdc06a7U + ZCh(ZD[0], ZF[3], ZB[3]) + ZR26(ZD[0]);
  362. ZG[0] = ZD[2] + ZF[0];
  363. ZH[0] = ZF[0] + ZR30(ZE[0]) + ZMa(ZG[3], ZC[3], ZE[0]);
  364. ZB[1] = ZB[3] + 0xc19bf274 + ZCh(ZG[0], ZD[0], ZF[3]) + ZR26(ZG[0]);
  365. ZE[1] = ZC[3] + ZB[1];
  366. ZF[1] = ZB[1] + ZR30(ZH[0]) + ZMa(ZE[0], ZG[3], ZH[0]);
  367. ZG[1] = ZF[3] + 0xe49b69c1U + ZA[1] + ZCh(ZE[1], ZG[0], ZD[0]) + ZR26(ZE[1]);
  368. ZB[2] = ZG[3] + ZG[1];
  369. ZC[2] = ZG[1] + ZR30(ZF[1]) + ZMa(ZH[0], ZE[0], ZF[1]);
  370. ZD[2] = ZD[0] + 0xefbe4786U + ZH[1] + ZCh(ZB[2], ZE[1], ZG[0]) + ZR26(ZB[2]);
  371. ZH[2] = ZE[0] + ZD[2];
  372. ZB[3] = ZD[2] + ZR30(ZC[2]) + ZMa(ZF[1], ZH[0], ZC[2]);
  373. ZC[3] = ZG[0] + 0x0fc19dc6U + ZA[2] + ZCh(ZH[2], ZB[2], ZE[1]) + ZR26(ZH[2]);
  374. ZD[3] = ZH[0] + ZC[3];
  375. ZF[3] = ZC[3] + ZR30(ZB[3]) + ZMa(ZC[2], ZF[1], ZB[3]);
  376. ZG[3] = ZE[1] + 0x240ca1ccU + ZE[2] + ZCh(ZD[3], ZH[2], ZB[2]) + ZR26(ZD[3]);
  377. ZB[0] = ZF[1] + ZG[3];
  378. ZD[0] = ZG[3] + ZR30(ZF[3]) + ZMa(ZB[3], ZC[2], ZF[3]);
  379. ZE[0] = ZB[2] + 0x2de92c6fU + ZA[3] + ZCh(ZB[0], ZD[3], ZH[2]) + ZR26(ZB[0]);
  380. ZF[0] = ZC[2] + ZE[0];
  381. ZG[0] = ZE[0] + ZR30(ZD[0]) + ZMa(ZF[3], ZB[3], ZD[0]);
  382. ZH[0] = ZH[2] + 0x4a7484aaU + ZE[3] + ZCh(ZF[0], ZB[0], ZD[3]) + ZR26(ZF[0]);
  383. ZB[1] = ZB[3] + ZH[0];
  384. ZE[1] = ZH[0] + ZR30(ZG[0]) + ZMa(ZD[0], ZF[3], ZG[0]);
  385. ZF[1] = ZD[3] + 0x5cb0a9dcU + ZC[0] + ZCh(ZB[1], ZF[0], ZB[0]) + ZR26(ZB[1]);
  386. ZG[1] = ZF[3] + ZF[1];
  387. ZB[2] = ZF[1] + ZR30(ZE[1]) + ZMa(ZG[0], ZD[0], ZE[1]);
  388. ZC[2] = ZB[0] + 0x76f988daU + ZC[1] + ZCh(ZG[1], ZB[1], ZF[0]) + ZR26(ZG[1]);
  389. ZD[2] = ZD[0] + ZC[2];
  390. ZH[2] = ZC[2] + ZR30(ZB[2]) + ZMa(ZE[1], ZG[0], ZB[2]);
  391. ZB[3] = ZF[0] + 0x983e5152U + ZD[1] + ZCh(ZD[2], ZG[1], ZB[1]) + ZR26(ZD[2]);
  392. ZC[3] = ZG[0] + ZB[3];
  393. ZD[3] = ZB[3] + ZR30(ZH[2]) + ZMa(ZB[2], ZE[1], ZH[2]);
  394. ZF[3] = ZB[1] + 0xa831c66dU + ZF[2] + ZCh(ZC[3], ZD[2], ZG[1]) + ZR26(ZC[3]);
  395. ZG[3] = ZE[1] + ZF[3];
  396. ZB[0] = ZF[3] + ZR30(ZD[3]) + ZMa(ZH[2], ZB[2], ZD[3]);
  397. ZD[0] = ZG[1] + 0xb00327c8U + ZG[2] + ZCh(ZG[3], ZC[3], ZD[2]) + ZR26(ZG[3]);
  398. ZE[0] = ZB[2] + ZD[0];
  399. ZF[0] = ZD[0] + ZR30(ZB[0]) + ZMa(ZD[3], ZH[2], ZB[0]);
  400. ZG[0] = ZD[2] + 0xbf597fc7U + ZH[3] + ZCh(ZE[0], ZG[3], ZC[3]) + ZR26(ZE[0]);
  401. ZH[0] = ZH[2] + ZG[0];
  402. ZB[1] = ZG[0] + ZR30(ZF[0]) + ZMa(ZB[0], ZD[3], ZF[0]);
  403. ZE[1] = ZC[3] + 0xc6e00bf3U + ZA[0] + ZCh(ZH[0], ZE[0], ZG[3]) + ZR26(ZH[0]);
  404. ZF[1] = ZD[3] + ZE[1];
  405. ZG[1] = ZE[1] + ZR30(ZB[1]) + ZMa(ZF[0], ZB[0], ZB[1]);
  406. ZB[2] = ZR15(ZH[3]) + ZC[0];
  407. ZC[2] = ZG[3] + 0xd5a79147U + ZB[2] + ZCh(ZF[1], ZH[0], ZE[0]) + ZR26(ZF[1]);
  408. ZD[2] = ZB[0] + ZC[2];
  409. ZH[2] = ZC[2] + ZR30(ZG[1]) + ZMa(ZB[1], ZF[0], ZG[1]);
  410. ZB[3] = ZR15(ZA[0]) + 0x00400022U + ZC[1];
  411. ZC[3] = ZE[0] + 0x06ca6351U + ZB[3] + ZCh(ZD[2], ZF[1], ZH[0]) + ZR26(ZD[2]);
  412. ZD[3] = ZF[0] + ZC[3];
  413. ZF[3] = ZC[3] + ZR30(ZH[2]) + ZMa(ZG[1], ZB[1], ZH[2]);
  414. ZG[3] = ZR15(ZB[2]) + ZR25(ZA[1]) + ZD[1] + 0x00000100U;
  415. ZB[0] = ZH[0] + 0x14292967U + ZG[3] + ZCh(ZD[3], ZD[2], ZF[1]) + ZR26(ZD[3]);
  416. ZD[0] = ZB[1] + ZB[0];
  417. ZE[0] = ZB[0] + ZR30(ZF[3]) + ZMa(ZH[2], ZG[1], ZF[3]);
  418. ZF[0] = ZR15(ZB[3]) + ZR25(ZH[1]) + ZF[2] + ZA[1];
  419. ZG[0] = ZF[1] + 0x27b70a85U + ZF[0] + ZCh(ZD[0], ZD[3], ZD[2]) + ZR26(ZD[0]);
  420. ZH[0] = ZG[1] + ZG[0];
  421. ZA[1] = ZG[0] + ZR30(ZE[0]) + ZMa(ZF[3], ZH[2], ZE[0]);
  422. ZB[1] = ZR15(ZG[3]) + ZR25(ZA[2]) + ZG[2] + ZH[1];
  423. ZE[1] = ZD[2] + 0x2e1b2138U + ZB[1] + ZCh(ZH[0], ZD[0], ZD[3]) + ZR26(ZH[0]);
  424. ZF[1] = ZH[2] + ZE[1];
  425. ZG[1] = ZE[1] + ZR30(ZA[1]) + ZMa(ZE[0], ZF[3], ZA[1]);
  426. ZH[1] = ZR15(ZF[0]) + ZR25(ZE[2]) + ZH[3] + ZA[2];
  427. ZA[2] = ZD[3] + 0x4d2c6dfcU + ZH[1] + ZCh(ZF[1], ZH[0], ZD[0]) + ZR26(ZF[1]);
  428. ZC[2] = ZF[3] + ZA[2];
  429. ZD[2] = ZA[2] + ZR30(ZG[1]) + ZMa(ZA[1], ZE[0], ZG[1]);
  430. ZH[2] = ZR15(ZB[1]) + ZR25(ZA[3]) + ZA[0] + ZE[2];
  431. ZC[3] = ZD[0] + 0x53380d13U + ZH[2] + ZCh(ZC[2], ZF[1], ZH[0]) + ZR26(ZC[2]);
  432. ZD[3] = ZE[0] + ZC[3];
  433. ZF[3] = ZC[3] + ZR30(ZD[2]) + ZMa(ZG[1], ZA[1], ZD[2]);
  434. ZB[0] = ZR15(ZH[1]) + ZR25(ZE[3]) + ZB[2] + ZA[3];
  435. ZD[0] = ZH[0] + 0x650a7354U + ZB[0] + ZCh(ZD[3], ZC[2], ZF[1]) + ZR26(ZD[3]);
  436. ZE[0] = ZA[1] + ZD[0];
  437. ZG[0] = ZD[0] + ZR30(ZF[3]) + ZMa(ZD[2], ZG[1], ZF[3]);
  438. ZH[0] = ZR15(ZH[2]) + ZR25(ZC[0]) + ZB[3] + ZE[3];
  439. ZA[1] = ZF[1] + 0x766a0abbU + ZH[0] + ZCh(ZE[0], ZD[3], ZC[2]) + ZR26(ZE[0]);
  440. ZE[1] = ZG[1] + ZA[1];
  441. ZF[1] = ZA[1] + ZR30(ZG[0]) + ZMa(ZF[3], ZD[2], ZG[0]);
  442. ZG[1] = ZR15(ZB[0]) + ZR25(ZC[1]) + ZG[3] + ZC[0];
  443. ZA[2] = ZC[2] + 0x81c2c92eU + ZG[1] + ZCh(ZE[1], ZE[0], ZD[3]) + ZR26(ZE[1]);
  444. ZC[2] = ZD[2] + ZA[2];
  445. ZD[2] = ZA[2] + ZR30(ZF[1]) + ZMa(ZG[0], ZF[3], ZF[1]);
  446. ZE[2] = ZR15(ZH[0]) + ZR25(ZD[1]) + ZF[0] + ZC[1];
  447. ZA[3] = ZD[3] + 0x92722c85U + ZE[2] + ZCh(ZC[2], ZE[1], ZE[0]) + ZR26(ZC[2]);
  448. ZC[3] = ZF[3] + ZA[3];
  449. ZD[3] = ZA[3] + ZR30(ZD[2]) + ZMa(ZF[1], ZG[0], ZD[2]);
  450. ZE[3] = ZR15(ZG[1]) + ZR25(ZF[2]) + ZB[1] + ZD[1];
  451. ZF[3] = ZE[0] + 0xa2bfe8a1U + ZE[3] + ZCh(ZC[3], ZC[2], ZE[1]) + ZR26(ZC[3]);
  452. ZC[0] = ZG[0] + ZF[3];
  453. ZD[0] = ZF[3] + ZR30(ZD[3]) + ZMa(ZD[2], ZF[1], ZD[3]);
  454. ZE[0] = ZR15(ZE[2]) + ZR25(ZG[2]) + ZH[1] + ZF[2];
  455. ZG[0] = ZE[1] + 0xa81a664bU + ZE[0] + ZCh(ZC[0], ZC[3], ZC[2]) + ZR26(ZC[0]);
  456. ZA[1] = ZF[1] + ZG[0];
  457. ZC[1] = ZG[0] + ZR30(ZD[0]) + ZMa(ZD[3], ZD[2], ZD[0]);
  458. ZD[1] = ZR15(ZE[3]) + ZR25(ZH[3]) + ZH[2] + ZG[2];
  459. ZE[1] = ZC[2] + 0xc24b8b70U + ZD[1] + ZCh(ZA[1], ZC[0], ZC[3]) + ZR26(ZA[1]);
  460. ZF[1] = ZD[2] + ZE[1];
  461. ZA[2] = ZE[1] + ZR30(ZC[1]) + ZMa(ZD[0], ZD[3], ZC[1]);
  462. ZC[2] = ZR15(ZE[0]) + ZR25(ZA[0]) + ZB[0] + ZH[3];
  463. ZD[2] = ZC[3] + 0xc76c51a3U + ZC[2] + ZCh(ZF[1], ZA[1], ZC[0]) + ZR26(ZF[1]);
  464. ZF[2] = ZD[3] + ZD[2];
  465. ZG[2] = ZD[2] + ZR30(ZA[2]) + ZMa(ZC[1], ZD[0], ZA[2]);
  466. ZA[3] = ZR15(ZD[1]) + ZR25(ZB[2]) + ZH[0] + ZA[0];
  467. ZC[3] = ZC[0] + 0xd192e819U + ZA[3] + ZCh(ZF[2], ZF[1], ZA[1]) + ZR26(ZF[2]);
  468. ZD[3] = ZD[0] + ZC[3];
  469. ZF[3] = ZC[3] + ZR30(ZG[2]) + ZMa(ZA[2], ZC[1], ZG[2]);
  470. ZH[3] = ZR15(ZC[2]) + ZR25(ZB[3]) + ZG[1] + ZB[2];
  471. ZA[0] = ZA[1] + 0xd6990624U + ZH[3] + ZCh(ZD[3], ZF[2], ZF[1]) + ZR26(ZD[3]);
  472. ZC[0] = ZC[1] + ZA[0];
  473. ZD[0] = ZA[0] + ZR30(ZF[3]) + ZMa(ZG[2], ZA[2], ZF[3]);
  474. ZG[0] = ZR15(ZA[3]) + ZR25(ZG[3]) + ZE[2] + ZB[3];
  475. ZA[1] = ZF[1] + 0xf40e3585U + ZG[0] + ZCh(ZC[0], ZD[3], ZF[2]) + ZR26(ZC[0]);
  476. ZC[1] = ZA[2] + ZA[1];
  477. ZE[1] = ZA[1] + ZR30(ZD[0]) + ZMa(ZF[3], ZG[2], ZD[0]);
  478. ZF[1] = ZR15(ZH[3]) + ZR25(ZF[0]) + ZE[3] + ZG[3];
  479. ZA[2] = ZF[2] + 0x106aa070U + ZF[1] + ZCh(ZC[1], ZC[0], ZD[3]) + ZR26(ZC[1]);
  480. ZB[2] = ZG[2] + ZA[2];
  481. ZD[2] = ZA[2] + ZR30(ZE[1]) + ZMa(ZD[0], ZF[3], ZE[1]);
  482. ZF[2] = ZR15(ZG[0]) + ZR25(ZB[1]) + ZE[0] + ZF[0];
  483. ZG[2] = ZD[3] + 0x19a4c116U + ZF[2] + ZCh(ZB[2], ZC[1], ZC[0]) + ZR26(ZB[2]);
  484. ZB[3] = ZF[3] + ZG[2];
  485. ZC[3] = ZG[2] + ZR30(ZD[2]) + ZMa(ZE[1], ZD[0], ZD[2]);
  486. ZD[3] = ZR15(ZF[1]) + ZR25(ZH[1]) + ZD[1] + ZB[1];
  487. ZF[3] = ZC[0] + 0x1e376c08U + ZD[3] + ZCh(ZB[3], ZB[2], ZC[1]) + ZR26(ZB[3]);
  488. ZG[3] = ZD[0] + ZF[3];
  489. ZA[0] = ZF[3] + ZR30(ZC[3]) + ZMa(ZD[2], ZE[1], ZC[3]);
  490. ZC[0] = ZR15(ZF[2]) + ZC[2] + ZR25(ZH[2]) + ZH[1];
  491. ZD[0] = ZC[1] + 0x2748774cU + ZC[0] + ZCh(ZG[3], ZB[3], ZB[2]) + ZR26(ZG[3]);
  492. ZF[0] = ZE[1] + ZD[0];
  493. ZA[1] = ZD[0] + ZR30(ZA[0]) + ZMa(ZC[3], ZD[2], ZA[0]);
  494. ZB[1] = ZR15(ZD[3]) + ZA[3] + ZR25(ZB[0]) + ZH[2];
  495. ZC[1] = ZB[2] + 0x34b0bcb5U + ZB[1] + ZCh(ZF[0], ZG[3], ZB[3]) + ZR26(ZF[0]);
  496. ZE[1] = ZD[2] + ZC[1];
  497. ZH[1] = ZC[1] + ZR30(ZA[1]) + ZMa(ZA[0], ZC[3], ZA[1]);
  498. ZA[2] = ZR15(ZC[0]) + ZH[3] + ZR25(ZH[0]) + ZB[0];
  499. ZB[2] = ZB[3] + 0x391c0cb3U + ZA[2] + ZCh(ZE[1], ZF[0], ZG[3]) + ZR26(ZE[1]);
  500. ZD[2] = ZC[3] + ZB[2];
  501. ZG[2] = ZB[2] + ZR30(ZH[1]) + ZMa(ZA[1], ZA[0], ZH[1]);
  502. ZH[2] = ZR15(ZB[1]) + ZG[0] + ZR25(ZG[1]) + ZH[0];
  503. ZB[3] = ZG[3] + 0x4ed8aa4aU + ZH[2] + ZCh(ZD[2], ZE[1], ZF[0]) + ZR26(ZD[2]);
  504. ZC[3] = ZA[0] + ZB[3];
  505. ZF[3] = ZB[3] + ZR30(ZG[2]) + ZMa(ZH[1], ZA[1], ZG[2]);
  506. ZG[3] = ZR15(ZA[2]) + ZF[1] + ZR25(ZE[2]) + ZG[1];
  507. ZA[0] = ZF[0] + 0x5b9cca4fU + ZG[3] + ZCh(ZC[3], ZD[2], ZE[1]) + ZR26(ZC[3]);
  508. ZB[0] = ZA[1] + ZA[0];
  509. ZD[0] = ZA[0] + ZR30(ZF[3]) + ZMa(ZG[2], ZH[1], ZF[3]);
  510. ZF[0] = ZR15(ZH[2]) + ZF[2] + ZR25(ZE[3]) + ZE[2];
  511. ZG[0] = ZE[1] + 0x682e6ff3U + ZF[0] + ZCh(ZB[0], ZC[3], ZD[2]) + ZR26(ZB[0]);
  512. ZH[0] = ZH[1] + ZG[0];
  513. ZA[1] = ZG[0] + ZR30(ZD[0]) + ZMa(ZF[3], ZG[2], ZD[0]);
  514. ZC[1] = ZR15(ZG[3]) + ZR25(ZE[0]) + ZD[3] + ZE[3];
  515. ZE[1] = ZD[2] + 0x748f82eeU + ZC[1] + ZCh(ZH[0], ZB[0], ZC[3]) + ZR26(ZH[0]);
  516. ZF[1] = ZG[2] + ZE[1];
  517. ZG[1] = ZE[1] + ZR30(ZA[1]) + ZMa(ZD[0], ZF[3], ZA[1]);
  518. ZH[1] = ZR15(ZF[0]) + ZR25(ZD[1]) + ZC[0] + ZE[0];
  519. ZB[2] = ZF[3] + ZC[3] + 0x78a5636fU + ZH[1] + ZCh(ZF[1], ZH[0], ZB[0]) + ZR26(ZF[1]);
  520. ZD[2] = ZR15(ZC[1]) + ZR25(ZC[2]) + ZB[1] + ZD[1];
  521. ZE[2] = ZD[0] + ZB[0] + 0x84c87814U + ZD[2] + ZCh(ZB[2], ZF[1], ZH[0]) + ZR26(ZB[2]);
  522. ZF[2] = ZA[1] + ZH[0] + 0x8cc70208U + ZR15(ZH[1]) + ZR25(ZA[3]) + ZA[2] + ZC[2] + ZCh(ZE[2], ZB[2], ZF[1]) + ZR26(ZE[2]);
  523. ZG[2] = ZG[1] + ZF[1] + ZR26(ZF[2]) + ZCh(ZF[2], ZE[2], ZB[2]) + ZR15(ZD[2]) + ZH[2] + ZR25(ZH[3]) + ZA[3];
  524. #define FOUND (0x80)
  525. #define NFLAG (0x7F)
  526. #if defined(VECTORS4)
  527. ZG[2] ^= 0x136032EDU;
  528. bool result = ZG[2].x & ZG[2].y & ZG[2].z & ZG[2].w;
  529. if (!result) {
  530. if (!ZG[2].x)
  531. output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
  532. if (!ZG[2].y)
  533. output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
  534. if (!ZG[2].z)
  535. output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z;
  536. if (!ZG[2].w)
  537. output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w;
  538. }
  539. #elif defined(VECTORS2)
  540. ZG[2] ^= 0x136032EDU;
  541. bool result = ZG[2].x & ZG[2].y;
  542. if (!result) {
  543. if (!ZG[2].x)
  544. output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
  545. if (!ZG[2].y)
  546. output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
  547. }
  548. #else
  549. if (ZG[2] == 0x136032EDU)
  550. output[FOUND] = output[NFLAG & Znonce] = Znonce;
  551. #endif
  552. }