DiabloMiner120210.cl 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. /*
  2. * DiabloMiner - OpenCL miner for BitCoin
  3. * Copyright (C) 2010, 2011, 2012 Patrick McFarland <diablod3@gmail.com>
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. typedef uint z;
  19. #ifdef BITALIGN
  20. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  21. #define Zrotr(a, b) amd_bitalign((z)a, (z)a, (z)(32 - b))
  22. #else
  23. #define Zrotr(a, b) rotate((z)a, (z)b)
  24. #endif
  25. #ifdef BFI_INT
  26. #define ZCh(a, b, c) amd_bytealign(a, b, c)
  27. #define ZMa(a, b, c) amd_bytealign((c ^ a), (b), (a))
  28. #else
  29. #define ZCh(a, b, c) bitselect((z)c, (z)b, (z)a)
  30. #define ZMa(a, b, c) bitselect((z)a, (z)b, (z)c ^ (z)a)
  31. #endif
  32. #define ZR25(n) ((Zrotr((n), 25) ^ Zrotr((n), 14) ^ ((n) >> 3U)))
  33. #define ZR15(n) ((Zrotr((n), 15) ^ Zrotr((n), 13) ^ ((n) >> 10U)))
  34. #define ZR26(n) ((Zrotr((n), 26) ^ Zrotr((n), 21) ^ Zrotr((n), 7)))
  35. #define ZR30(n) ((Zrotr((n), 30) ^ Zrotr((n), 19) ^ Zrotr((n), 10)))
  36. __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
  37. const uint base,
  38. const uint PreVal4_plus_state0, const uint PreVal4_plus_T1,
  39. const uint W18, const uint W19,
  40. const uint W16, const uint W17,
  41. const uint W31, const uint W32,
  42. const uint d1, const uint b1, const uint c1,
  43. const uint h1, const uint f1, const uint g1,
  44. const uint c1_plus_k5, const uint b1_plus_k6,
  45. const uint state0, const uint state1, const uint state2, const uint state3,
  46. const uint state4, const uint state5, const uint state6, const uint state7,
  47. __global uint * output)
  48. {
  49. z ZA[4];
  50. z ZB[4];
  51. z ZC[4];
  52. z ZD[4];
  53. z ZE[4];
  54. z ZF[4];
  55. z ZG[4];
  56. z ZH[4];
  57. z Znonce = base + get_global_id(0);
  58. ZA[0] = PreVal4_plus_state0 + Znonce;
  59. ZB[0] = PreVal4_plus_T1 + Znonce;
  60. ZC[0] = W18 + ZR25(Znonce);
  61. ZD[0] = W19 + Znonce;
  62. ZE[0] = 0x80000000U + ZR15(ZC[0]);
  63. ZF[0] = ZR15(ZD[0]);
  64. ZG[0] = 0x00000280U + ZR15(ZE[0]);
  65. ZH[0] = ZR15(ZF[0]) + W16;
  66. ZA[1] = ZR15(ZG[0]) + W17;
  67. ZB[1] = ZR15(ZH[0]) + ZC[0];
  68. ZC[1] = ZR15(ZA[1]) + ZD[0];
  69. ZD[1] = ZR15(ZB[1]) + ZE[0];
  70. ZE[1] = ZR15(ZC[1]) + ZF[0];
  71. ZF[1] = ZR15(ZD[1]) + ZG[0];
  72. ZG[1] = 0x00A00055U + ZR15(ZE[1]) + ZH[0];
  73. ZH[1] = W31 + ZR15(ZF[1]) + ZA[1];
  74. ZA[2] = W32 + ZR15(ZG[1]) + ZB[1];
  75. ZB[2] = d1 + ZCh(ZA[0], b1, c1) + ZR26(ZA[0]);
  76. ZC[2] = h1 + ZB[2];
  77. ZD[2] = ZB[2] + ZR30(ZB[0]) + ZMa(f1, g1, ZB[0]);
  78. ZE[2] = c1_plus_k5 + ZCh(ZC[2], ZA[0], b1) + ZR26(ZC[2]);
  79. ZF[2] = g1 + ZE[2];
  80. ZG[2] = ZE[2] + ZR30(ZD[2]) + ZMa(ZB[0], f1, ZD[2]);
  81. ZH[2] = b1_plus_k6 + ZCh(ZF[2], ZC[2], ZA[0]) + ZR26(ZF[2]);
  82. ZA[3] = f1 + ZH[2];
  83. ZB[3] = ZH[2] + ZR30(ZG[2]) + ZMa(ZD[2], ZB[0], ZG[2]);
  84. ZC[3] = ZA[0] + 0xab1c5ed5U + ZCh(ZA[3], ZF[2], ZC[2]) + ZR26(ZA[3]);
  85. ZD[3] = ZB[0] + ZC[3];
  86. ZE[3] = ZC[3] + ZR30(ZB[3]) + ZMa(ZG[2], ZD[2], ZB[3]);
  87. ZF[3] = ZC[2] + 0xd807aa98U + ZCh(ZD[3], ZA[3], ZF[2]) + ZR26(ZD[3]);
  88. ZG[3] = ZD[2] + ZF[3];
  89. ZH[3] = ZF[3] + ZR30(ZE[3]) + ZMa(ZB[3], ZG[2], ZE[3]);
  90. ZA[0] = ZF[2] + 0x12835b01U + ZCh(ZG[3], ZD[3], ZA[3]) + ZR26(ZG[3]);
  91. ZB[0] = ZG[2] + ZA[0];
  92. ZB[2] = ZA[0] + ZR30(ZH[3]) + ZMa(ZE[3], ZB[3], ZH[3]);
  93. ZC[2] = ZA[3] + 0x243185beU + ZCh(ZB[0], ZG[3], ZD[3]) + ZR26(ZB[0]);
  94. ZD[2] = ZB[3] + ZC[2];
  95. ZE[2] = ZC[2] + ZR30(ZB[2]) + ZMa(ZH[3], ZE[3], ZB[2]);
  96. ZF[2] = ZD[3] + 0x550c7dc3U + ZCh(ZD[2], ZB[0], ZG[3]) + ZR26(ZD[2]);
  97. ZG[2] = ZE[3] + ZF[2];
  98. ZH[2] = ZF[2] + ZR30(ZE[2]) + ZMa(ZB[2], ZH[3], ZE[2]);
  99. ZA[3] = ZG[3] + 0x72be5d74U + ZCh(ZG[2], ZD[2], ZB[0]) + ZR26(ZG[2]);
  100. ZB[3] = ZH[3] + ZA[3];
  101. ZC[3] = ZA[3] + ZR30(ZH[2]) + ZMa(ZE[2], ZB[2], ZH[2]);
  102. ZD[3] = ZB[0] + 0x80deb1feU + ZCh(ZB[3], ZG[2], ZD[2]) + ZR26(ZB[3]);
  103. ZE[3] = ZB[2] + ZD[3];
  104. ZF[3] = ZD[3] + ZR30(ZC[3]) + ZMa(ZH[2], ZE[2], ZC[3]);
  105. ZG[3] = ZD[2] + 0x9bdc06a7U + ZCh(ZE[3], ZB[3], ZG[2]) + ZR26(ZE[3]);
  106. ZH[3] = ZE[2] + ZG[3];
  107. ZA[0] = ZG[3] + ZR30(ZF[3]) + ZMa(ZC[3], ZH[2], ZF[3]);
  108. ZB[0] = ZG[2] + 0xc19bf3f4U + ZCh(ZH[3], ZE[3], ZB[3]) + ZR26(ZH[3]);
  109. ZB[2] = ZH[2] + ZB[0];
  110. ZC[2] = ZB[0] + ZR30(ZA[0]) + ZMa(ZF[3], ZC[3], ZA[0]);
  111. ZD[2] = ZB[3] + 0xe49b69c1U + W16 + ZCh(ZB[2], ZH[3], ZE[3]) + ZR26(ZB[2]);
  112. ZE[2] = ZC[3] + ZD[2];
  113. ZF[2] = ZD[2] + ZR30(ZC[2]) + ZMa(ZA[0], ZF[3], ZC[2]);
  114. ZG[2] = ZE[3] + 0xefbe4786U + W17 + ZCh(ZE[2], ZB[2], ZH[3]) + ZR26(ZE[2]);
  115. ZH[2] = ZF[3] + ZG[2];
  116. ZA[3] = ZG[2] + ZR30(ZF[2]) + ZMa(ZC[2], ZA[0], ZF[2]);
  117. ZB[3] = ZH[3] + 0x0fc19dc6U + ZC[0] + ZCh(ZH[2], ZE[2], ZB[2]) + ZR26(ZH[2]);
  118. ZC[3] = ZA[0] + ZB[3];
  119. ZD[3] = ZB[3] + ZR30(ZA[3]) + ZMa(ZF[2], ZC[2], ZA[3]);
  120. ZE[3] = ZB[2] + 0x240ca1ccU + ZD[0] + ZCh(ZC[3], ZH[2], ZE[2]) + ZR26(ZC[3]);
  121. ZF[3] = ZC[2] + ZE[3];
  122. ZG[3] = ZE[3] + ZR30(ZD[3]) + ZMa(ZA[3], ZF[2], ZD[3]);
  123. ZH[3] = ZE[2] + 0x2de92c6fU + ZE[0] + ZCh(ZF[3], ZC[3], ZH[2]) + ZR26(ZF[3]);
  124. ZA[0] = ZF[2] + ZH[3];
  125. ZB[0] = ZH[3] + ZR30(ZG[3]) + ZMa(ZD[3], ZA[3], ZG[3]);
  126. ZB[2] = ZH[2] + 0x4a7484aaU + ZF[0] + ZCh(ZA[0], ZF[3], ZC[3]) + ZR26(ZA[0]);
  127. ZC[2] = ZA[3] + ZB[2];
  128. ZD[2] = ZB[2] + ZR30(ZB[0]) + ZMa(ZG[3], ZD[3], ZB[0]);
  129. ZE[2] = ZC[3] + 0x5cb0a9dcU + ZG[0] + ZCh(ZC[2], ZA[0], ZF[3]) + ZR26(ZC[2]);
  130. ZF[2] = ZD[3] + ZE[2];
  131. ZG[2] = ZE[2] + ZR30(ZD[2]) + ZMa(ZB[0], ZG[3], ZD[2]);
  132. ZH[2] = ZF[3] + 0x76f988daU + ZH[0] + ZCh(ZF[2], ZC[2], ZA[0]) + ZR26(ZF[2]);
  133. ZA[3] = ZG[3] + ZH[2];
  134. ZB[3] = ZH[2] + ZR30(ZG[2]) + ZMa(ZD[2], ZB[0], ZG[2]);
  135. ZC[3] = ZA[0] + 0x983e5152U + ZA[1] + ZCh(ZA[3], ZF[2], ZC[2]) + ZR26(ZA[3]);
  136. ZD[3] = ZB[0] + ZC[3];
  137. ZE[3] = ZC[3] + ZR30(ZB[3]) + ZMa(ZG[2], ZD[2], ZB[3]);
  138. ZF[3] = ZC[2] + 0xa831c66dU + ZB[1] + ZCh(ZD[3], ZA[3], ZF[2]) + ZR26(ZD[3]);
  139. ZG[3] = ZD[2] + ZF[3];
  140. ZH[3] = ZF[3] + ZR30(ZE[3]) + ZMa(ZB[3], ZG[2], ZE[3]);
  141. ZA[0] = ZF[2] + 0xb00327c8U + ZC[1] + ZCh(ZG[3], ZD[3], ZA[3]) + ZR26(ZG[3]);
  142. ZB[0] = ZG[2] + ZA[0];
  143. ZB[2] = ZA[0] + ZR30(ZH[3]) + ZMa(ZE[3], ZB[3], ZH[3]);
  144. ZC[2] = ZA[3] + 0xbf597fc7U + ZD[1] + ZCh(ZB[0], ZG[3], ZD[3]) + ZR26(ZB[0]);
  145. ZD[2] = ZB[3] + ZC[2];
  146. ZE[2] = ZC[2] + ZR30(ZB[2]) + ZMa(ZH[3], ZE[3], ZB[2]);
  147. ZF[2] = ZD[3] + 0xc6e00bf3U + ZE[1] + ZCh(ZD[2], ZB[0], ZG[3]) + ZR26(ZD[2]);
  148. ZG[2] = ZE[3] + ZF[2];
  149. ZH[2] = ZF[2] + ZR30(ZE[2]) + ZMa(ZB[2], ZH[3], ZE[2]);
  150. ZA[3] = ZG[3] + 0xd5a79147U + ZF[1] + ZCh(ZG[2], ZD[2], ZB[0]) + ZR26(ZG[2]);
  151. ZB[3] = ZH[3] + ZA[3];
  152. ZC[3] = ZA[3] + ZR30(ZH[2]) + ZMa(ZE[2], ZB[2], ZH[2]);
  153. ZD[3] = ZB[0] + 0x06ca6351U + ZG[1] + ZCh(ZB[3], ZG[2], ZD[2]) + ZR26(ZB[3]);
  154. ZE[3] = ZB[2] + ZD[3];
  155. ZF[3] = ZD[3] + ZR30(ZC[3]) + ZMa(ZH[2], ZE[2], ZC[3]);
  156. ZG[3] = ZD[2] + 0x14292967U + ZH[1] + ZCh(ZE[3], ZB[3], ZG[2]) + ZR26(ZE[3]);
  157. ZH[3] = ZE[2] + ZG[3];
  158. ZA[0] = ZG[3] + ZR30(ZF[3]) + ZMa(ZC[3], ZH[2], ZF[3]);
  159. ZB[0] = ZG[2] + 0x27b70a85U + ZA[2] + ZCh(ZH[3], ZE[3], ZB[3]) + ZR26(ZH[3]);
  160. ZB[2] = ZH[2] + ZB[0];
  161. ZC[2] = ZB[0] + ZR30(ZA[0]) + ZMa(ZF[3], ZC[3], ZA[0]);
  162. ZD[2] = ZR15(ZH[1]) + ZR25(ZC[0]) + ZC[1] + W17;
  163. ZE[2] = ZB[3] + 0x2e1b2138U + ZD[2] + ZCh(ZB[2], ZH[3], ZE[3]) + ZR26(ZB[2]);
  164. ZF[2] = ZC[3] + ZE[2];
  165. ZG[2] = ZE[2] + ZR30(ZC[2]) + ZMa(ZA[0], ZF[3], ZC[2]);
  166. ZH[2] = ZR15(ZA[2]) + ZR25(ZD[0]) + ZD[1] + ZC[0];
  167. ZA[3] = ZE[3] + 0x4d2c6dfcU + ZH[2] + ZCh(ZF[2], ZB[2], ZH[3]) + ZR26(ZF[2]);
  168. ZB[3] = ZF[3] + ZA[3];
  169. ZC[3] = ZA[3] + ZR30(ZG[2]) + ZMa(ZC[2], ZA[0], ZG[2]);
  170. ZD[3] = ZR15(ZD[2]) + ZR25(ZE[0]) + ZE[1] + ZD[0];
  171. ZE[3] = ZH[3] + 0x53380d13U + ZD[3] + ZCh(ZB[3], ZF[2], ZB[2]) + ZR26(ZB[3]);
  172. ZF[3] = ZA[0] + ZE[3];
  173. ZG[3] = ZE[3] + ZR30(ZC[3]) + ZMa(ZG[2], ZC[2], ZC[3]);
  174. ZH[3] = ZR15(ZH[2]) + ZR25(ZF[0]) + ZF[1] + ZE[0];
  175. ZA[0] = ZB[2] + 0x650a7354U + ZH[3] + ZCh(ZF[3], ZB[3], ZF[2]) + ZR26(ZF[3]);
  176. ZB[0] = ZC[2] + ZA[0];
  177. ZC[0] = ZA[0] + ZR30(ZG[3]) + ZMa(ZC[3], ZG[2], ZG[3]);
  178. ZD[0] = ZR15(ZD[3]) + ZR25(ZG[0]) + ZG[1] + ZF[0];
  179. ZE[0] = ZF[2] + 0x766a0abbU + ZD[0] + ZCh(ZB[0], ZF[3], ZB[3]) + ZR26(ZB[0]);
  180. ZF[0] = ZG[2] + ZE[0];
  181. ZB[2] = ZE[0] + ZR30(ZC[0]) + ZMa(ZG[3], ZC[3], ZC[0]);
  182. ZC[2] = ZR15(ZH[3]) + ZR25(ZH[0]) + ZH[1] + ZG[0];
  183. ZE[2] = ZB[3] + 0x81c2c92eU + ZC[2] + ZCh(ZF[0], ZB[0], ZF[3]) + ZR26(ZF[0]);
  184. ZF[2] = ZC[3] + ZE[2];
  185. ZG[2] = ZE[2] + ZR30(ZB[2]) + ZMa(ZC[0], ZG[3], ZB[2]);
  186. ZA[3] = ZR15(ZD[0]) + ZR25(ZA[1]) + ZA[2] + ZH[0];
  187. ZB[3] = ZF[3] + 0x92722c85U + ZA[3] + ZCh(ZF[2], ZF[0], ZB[0]) + ZR26(ZF[2]);
  188. ZC[3] = ZG[3] + ZB[3];
  189. ZE[3] = ZB[3] + ZR30(ZG[2]) + ZMa(ZB[2], ZC[0], ZG[2]);
  190. ZF[3] = ZR15(ZC[2]) + ZR25(ZB[1]) + ZD[2] + ZA[1];
  191. ZG[3] = ZB[0] + 0xa2bfe8a1U + ZF[3] + ZCh(ZC[3], ZF[2], ZF[0]) + ZR26(ZC[3]);
  192. ZA[0] = ZC[0] + ZG[3];
  193. ZB[0] = ZG[3] + ZR30(ZE[3]) + ZMa(ZG[2], ZB[2], ZE[3]);
  194. ZC[0] = ZR15(ZA[3]) + ZR25(ZC[1]) + ZH[2] + ZB[1];
  195. ZE[0] = ZF[0] + 0xa81a664bU + ZC[0] + ZCh(ZA[0], ZC[3], ZF[2]) + ZR26(ZA[0]);
  196. ZF[0] = ZB[2] + ZE[0];
  197. ZG[0] = ZE[0] + ZR30(ZB[0]) + ZMa(ZE[3], ZG[2], ZB[0]);
  198. ZH[0] = ZR15(ZF[3]) + ZR25(ZD[1]) + ZD[3] + ZC[1];
  199. ZA[1] = ZF[2] + 0xc24b8b70U + ZH[0] + ZCh(ZF[0], ZA[0], ZC[3]) + ZR26(ZF[0]);
  200. ZB[1] = ZG[2] + ZA[1];
  201. ZC[1] = ZA[1] + ZR30(ZG[0]) + ZMa(ZB[0], ZE[3], ZG[0]);
  202. ZB[2] = ZR15(ZC[0]) + ZR25(ZE[1]) + ZH[3] + ZD[1];
  203. ZE[2] = ZC[3] + 0xc76c51a3U + ZB[2] + ZCh(ZB[1], ZF[0], ZA[0]) + ZR26(ZB[1]);
  204. ZF[2] = ZE[3] + ZE[2];
  205. ZG[2] = ZE[2] + ZR30(ZC[1]) + ZMa(ZG[0], ZB[0], ZC[1]);
  206. ZB[3] = ZR15(ZH[0]) + ZR25(ZF[1]) + ZD[0] + ZE[1];
  207. ZC[3] = ZA[0] + 0xd192e819U + ZB[3] + ZCh(ZF[2], ZB[1], ZF[0]) + ZR26(ZF[2]);
  208. ZE[3] = ZB[0] + ZC[3];
  209. ZG[3] = ZC[3] + ZR30(ZG[2]) + ZMa(ZC[1], ZG[0], ZG[2]);
  210. ZA[0] = ZR15(ZB[2]) + ZR25(ZG[1]) + ZC[2] + ZF[1];
  211. ZB[0] = ZF[0] + 0xd6990624U + ZA[0] + ZCh(ZE[3], ZF[2], ZB[1]) + ZR26(ZE[3]);
  212. ZE[0] = ZG[0] + ZB[0];
  213. ZF[0] = ZB[0] + ZR30(ZG[3]) + ZMa(ZG[2], ZC[1], ZG[3]);
  214. ZG[0] = ZR15(ZB[3]) + ZR25(ZH[1]) + ZA[3] + ZG[1];
  215. ZA[1] = ZB[1] + 0xf40e3585U + ZG[0] + ZCh(ZE[0], ZE[3], ZF[2]) + ZR26(ZE[0]);
  216. ZB[1] = ZC[1] + ZA[1];
  217. ZC[1] = ZA[1] + ZR30(ZF[0]) + ZMa(ZG[3], ZG[2], ZF[0]);
  218. ZD[1] = ZR15(ZA[0]) + ZF[3] + ZR25(ZA[2]) + ZH[1];
  219. ZE[1] = ZF[2] + 0x106aa070U + ZD[1] + ZCh(ZB[1], ZE[0], ZE[3]) + ZR26(ZB[1]);
  220. ZF[1] = ZG[2] + ZE[1];
  221. ZG[1] = ZE[1] + ZR30(ZC[1]) + ZMa(ZF[0], ZG[3], ZC[1]);
  222. ZH[1] = ZR15(ZG[0]) + ZC[0] + ZR25(ZD[2]) + ZA[2];
  223. ZA[2] = ZE[3] + 0x19a4c116U + ZH[1] + ZCh(ZF[1], ZB[1], ZE[0]) + ZR26(ZF[1]);
  224. ZE[2] = ZG[3] + ZA[2];
  225. ZF[2] = ZA[2] + ZR30(ZG[1]) + ZMa(ZC[1], ZF[0], ZG[1]);
  226. ZG[2] = ZR15(ZD[1]) + ZH[0] + ZR25(ZH[2]) + ZD[2];
  227. ZC[3] = ZE[0] + 0x1e376c08U + ZG[2] + ZCh(ZE[2], ZF[1], ZB[1]) + ZR26(ZE[2]);
  228. ZE[3] = ZF[0] + ZC[3];
  229. ZG[3] = ZC[3] + ZR30(ZF[2]) + ZMa(ZG[1], ZC[1], ZF[2]);
  230. ZB[0] = ZR15(ZH[1]) + ZB[2] + ZR25(ZD[3]) + ZH[2];
  231. ZE[0] = ZB[1] + 0x2748774cU + ZB[0] + ZCh(ZE[3], ZE[2], ZF[1]) + ZR26(ZE[3]);
  232. ZF[0] = ZC[1] + ZE[0];
  233. ZA[1] = ZE[0] + ZR30(ZG[3]) + ZMa(ZF[2], ZG[1], ZG[3]);
  234. ZB[1] = ZR15(ZG[2]) + ZB[3] + ZR25(ZH[3]) + ZD[3];
  235. ZC[1] = ZF[1] + 0x34b0bcb5U + ZB[1] + ZCh(ZF[0], ZE[3], ZE[2]) + ZR26(ZF[0]);
  236. ZE[1] = ZG[1] + ZC[1];
  237. ZF[1] = ZC[1] + ZR30(ZA[1]) + ZMa(ZG[3], ZF[2], ZA[1]);
  238. ZG[1] = ZR15(ZB[0]) + ZA[0] + ZR25(ZD[0]) + ZH[3];
  239. ZA[2] = ZE[2] + 0x391c0cb3U + ZG[1] + ZCh(ZE[1], ZF[0], ZE[3]) + ZR26(ZE[1]);
  240. ZD[2] = ZF[2] + ZA[2];
  241. ZE[2] = ZA[2] + ZR30(ZF[1]) + ZMa(ZA[1], ZG[3], ZF[1]);
  242. ZF[2] = ZR15(ZB[1]) + ZG[0] + ZR25(ZC[2]) + ZD[0];
  243. ZH[2] = ZE[3] + 0x4ed8aa4aU + ZF[2] + ZCh(ZD[2], ZE[1], ZF[0]) + ZR26(ZD[2]);
  244. ZC[3] = ZG[3] + ZH[2];
  245. ZD[3] = ZH[2] + ZR30(ZE[2]) + ZMa(ZF[1], ZA[1], ZE[2]);
  246. ZE[3] = ZR15(ZG[1]) + ZD[1] + ZR25(ZA[3]) + ZC[2];
  247. ZG[3] = ZF[0] + 0x5b9cca4fU + ZE[3] + ZCh(ZC[3], ZD[2], ZE[1]) + ZR26(ZC[3]);
  248. ZH[3] = ZA[1] + ZG[3];
  249. ZD[0] = ZG[3] + ZR30(ZD[3]) + ZMa(ZE[2], ZF[1], ZD[3]);
  250. ZE[0] = ZR15(ZF[2]) + ZH[1] + ZR25(ZF[3]) + ZA[3];
  251. ZF[0] = ZE[1] + 0x682e6ff3U + ZE[0] + ZCh(ZH[3], ZC[3], ZD[2]) + ZR26(ZH[3]);
  252. ZA[1] = ZF[1] + ZF[0];
  253. ZC[1] = ZF[0] + ZR30(ZD[0]) + ZMa(ZD[3], ZE[2], ZD[0]);
  254. ZE[1] = ZR15(ZE[3]) + ZG[2] + ZR25(ZC[0]) + ZF[3];
  255. ZF[1] = ZD[2] + 0x748f82eeU + ZE[1] + ZCh(ZA[1], ZH[3], ZC[3]) + ZR26(ZA[1]);
  256. ZA[2] = ZE[2] + ZF[1];
  257. ZC[2] = ZF[1] + ZR30(ZC[1]) + ZMa(ZD[0], ZD[3], ZC[1]);
  258. ZD[2] = ZR15(ZE[0]) + ZB[0] + ZR25(ZH[0]) + ZC[0];
  259. ZE[2] = ZC[3] + 0x78a5636fU + ZD[2] + ZCh(ZA[2], ZA[1], ZH[3]) + ZR26(ZA[2]);
  260. ZG[2] = ZD[3] + ZE[2];
  261. ZH[2] = ZE[2] + ZR30(ZC[2]) + ZMa(ZC[1], ZD[0], ZC[2]);
  262. ZA[3] = ZR15(ZE[1]) + ZB[1] + ZR25(ZB[2]) + ZH[0];
  263. ZC[3] = ZH[3] + 0x84c87814U + ZA[3] + ZCh(ZG[2], ZA[2], ZA[1]) + ZR26(ZG[2]);
  264. ZD[3] = ZD[0] + ZC[3];
  265. ZF[3] = ZC[3] + ZR30(ZH[2]) + ZMa(ZC[2], ZC[1], ZH[2]);
  266. ZG[3] = ZR15(ZD[2]) + ZG[1] + ZR25(ZB[3]) + ZB[2];
  267. ZH[3] = ZA[1] + 0x8cc70208U + ZG[3] + ZCh(ZD[3], ZG[2], ZA[2]) + ZR26(ZD[3]);
  268. ZB[0] = ZC[1] + ZH[3];
  269. ZC[0] = ZH[3] + ZR30(ZF[3]) + ZMa(ZH[2], ZC[2], ZF[3]);
  270. ZD[0] = ZR15(ZA[3]) + ZF[2] + ZR25(ZA[0]) + ZB[3];
  271. ZF[0] = ZA[2] + 0x90befffaU + ZD[0] + ZCh(ZB[0], ZD[3], ZG[2]) + ZR26(ZB[0]);
  272. ZH[0] = ZC[2] + ZF[0];
  273. ZA[1] = ZF[0] + ZR30(ZC[0]) + ZMa(ZF[3], ZH[2], ZC[0]);
  274. ZB[1] = ZR15(ZG[3]) + ZE[3] + ZR25(ZG[0]) + ZA[0];
  275. ZC[1] = ZG[2] + 0xa4506cebU + ZB[1] + ZCh(ZH[0], ZB[0], ZD[3]) + ZR26(ZH[0]);
  276. ZF[1] = ZH[2] + ZC[1];
  277. ZG[1] = ZC[1] + ZR30(ZA[1]) + ZMa(ZC[0], ZF[3], ZA[1]);
  278. ZA[2] = ZR15(ZD[0]) + ZR25(ZD[1]) + ZE[0] + ZG[0];
  279. ZB[2] = ZD[3] + 0xbef9a3f7U + ZA[2] + ZCh(ZF[1], ZH[0], ZB[0]) + ZR26(ZF[1]);
  280. ZC[2] = ZF[3] + ZB[2];
  281. ZD[2] = ZB[2] + ZR30(ZG[1]) + ZMa(ZA[1], ZC[0], ZG[1]);
  282. ZE[2] = ZR15(ZB[1]) + ZR25(ZH[1]) + ZE[1] + ZD[1];
  283. ZF[2] = ZB[0] + 0xc67178f2U + ZE[2] + ZCh(ZC[2], ZF[1], ZH[0]) + ZR26(ZC[2]);
  284. ZG[2] = ZC[0] + ZF[2];
  285. ZH[2] = ZF[2] + ZR30(ZD[2]) + ZMa(ZG[1], ZA[1], ZD[2]);
  286. ZA[3] = state0 + ZH[2];
  287. ZB[3] = state1 + ZD[2];
  288. ZC[3] = state2 + ZG[1];
  289. ZD[3] = state3 + ZA[1];
  290. ZE[3] = state4 + ZG[2];
  291. ZF[3] = state5 + ZC[2];
  292. ZG[3] = state6 + ZF[1];
  293. ZH[3] = state7 + ZH[0];
  294. ZD[0] = 0x98c7e2a2U + ZA[3];
  295. ZH[0] = 0xfc08884dU + ZA[3];
  296. ZA[1] = ZR25(ZB[3]) + ZA[3];
  297. ZB[1] = 0x90bb1e3cU + ZB[3] + ZCh(ZD[0], 0x510e527fU, 0x9b05688cU) + ZR26(ZD[0]);
  298. ZC[1] = 0x3c6ef372U + ZB[1];
  299. ZD[1] = ZB[1] + ZR30(ZH[0]) + ZMa(0x6a09e667U, 0xbb67ae85U, ZH[0]);
  300. ZE[1] = 0x50c6645bU + ZC[3] + ZCh(ZC[1], ZD[0], 0x510e527fU) + ZR26(ZC[1]);
  301. ZF[1] = 0xbb67ae85U + ZE[1];
  302. ZG[1] = ZE[1] + ZR30(ZD[1]) + ZMa(ZH[0], 0x6a09e667U, ZD[1]);
  303. ZH[1] = 0x00a00000U + ZR25(ZC[3]) + ZB[3];
  304. ZA[2] = ZR15(ZA[1]) + ZR25(ZD[3]) + ZC[3];
  305. ZB[2] = 0x3ac42e24U + ZD[3] + ZCh(ZF[1], ZC[1], ZD[0]) + ZR26(ZF[1]);
  306. ZC[2] = 0x6a09e667U + ZB[2];
  307. ZD[2] = ZB[2] + ZR30(ZG[1]) + ZMa(ZD[1], ZH[0], ZG[1]);
  308. ZE[2] = ZR15(ZH[1]) + ZR25(ZE[3]) + ZD[3];
  309. ZF[2] = ZA[3] + 0xd21ea4fdU + ZE[3] + ZCh(ZC[2], ZF[1], ZC[1]) + ZR26(ZC[2]);
  310. ZG[2] = ZH[0] + ZF[2];
  311. ZH[2] = ZF[2] + ZR30(ZD[2]) + ZMa(ZG[1], ZD[1], ZD[2]);
  312. ZA[3] = ZR15(ZA[2]) + ZR25(ZF[3]) + ZE[3];
  313. ZB[3] = ZC[1] + 0x59f111f1U + ZF[3] + ZCh(ZG[2], ZC[2], ZF[1]) + ZR26(ZG[2]);
  314. ZC[3] = ZD[1] + ZB[3];
  315. ZD[3] = ZB[3] + ZR30(ZH[2]) + ZMa(ZD[2], ZG[1], ZH[2]);
  316. ZE[3] = ZR15(ZE[2]) + ZR25(ZG[3]) + ZF[3];
  317. ZF[3] = ZF[1] + 0x923f82a4U + ZG[3] + ZCh(ZC[3], ZG[2], ZC[2]) + ZR26(ZC[3]);
  318. ZA[0] = ZG[1] + ZF[3];
  319. ZB[0] = ZF[3] + ZR30(ZD[3]) + ZMa(ZH[2], ZD[2], ZD[3]);
  320. ZC[0] = ZR15(ZA[3]) + 0x00000100U + ZR25(ZH[3]) + ZG[3];
  321. ZD[0] = ZC[2] + 0xab1c5ed5U + ZH[3] + ZCh(ZA[0], ZC[3], ZG[2]) + ZR26(ZA[0]);
  322. ZE[0] = ZD[2] + ZD[0];
  323. ZF[0] = ZD[0] + ZR30(ZB[0]) + ZMa(ZD[3], ZH[2], ZB[0]);
  324. ZG[0] = ZG[2] + 0x5807aa98U + ZCh(ZE[0], ZA[0], ZC[3]) + ZR26(ZE[0]);
  325. ZH[0] = ZH[2] + ZG[0];
  326. ZB[1] = ZG[0] + ZR30(ZF[0]) + ZMa(ZB[0], ZD[3], ZF[0]);
  327. ZC[1] = ZR15(ZE[3]) + ZA[1] + 0x11002000U + ZH[3];
  328. ZD[1] = ZR15(ZC[0]) + ZH[1] + 0x80000000U;
  329. ZE[1] = ZC[3] + 0x12835b01U + ZCh(ZH[0], ZE[0], ZA[0]) + ZR26(ZH[0]);
  330. ZF[1] = ZD[3] + ZE[1];
  331. ZG[1] = ZE[1] + ZR30(ZB[1]) + ZMa(ZF[0], ZB[0], ZB[1]);
  332. ZB[2] = ZA[0] + 0x243185beU + ZCh(ZF[1], ZH[0], ZE[0]) + ZR26(ZF[1]);
  333. ZC[2] = ZB[0] + ZB[2];
  334. ZD[2] = ZB[2] + ZR30(ZG[1]) + ZMa(ZB[1], ZF[0], ZG[1]);
  335. ZF[2] = ZR15(ZC[1]) + ZA[2];
  336. ZG[2] = ZR15(ZD[1]) + ZE[2];
  337. ZH[2] = ZE[0] + 0x550c7dc3U + ZCh(ZC[2], ZF[1], ZH[0]) + ZR26(ZC[2]);
  338. ZB[3] = ZF[0] + ZH[2];
  339. ZC[3] = ZH[2] + ZR30(ZD[2]) + ZMa(ZG[1], ZB[1], ZD[2]);
  340. ZD[3] = ZH[0] + 0x72be5d74U + ZCh(ZB[3], ZC[2], ZF[1]) + ZR26(ZB[3]);
  341. ZF[3] = ZB[1] + ZD[3];
  342. ZG[3] = ZD[3] + ZR30(ZC[3]) + ZMa(ZD[2], ZG[1], ZC[3]);
  343. ZH[3] = ZR15(ZF[2]) + ZA[3];
  344. ZA[0] = ZR15(ZG[2]) + ZE[3];
  345. ZB[0] = ZF[1] + 0x80deb1feU + ZCh(ZF[3], ZB[3], ZC[2]) + ZR26(ZF[3]);
  346. ZD[0] = ZG[1] + ZB[0];
  347. ZE[0] = ZB[0] + ZR30(ZG[3]) + ZMa(ZC[3], ZD[2], ZG[3]);
  348. ZF[0] = ZC[2] + 0x9bdc06a7U + ZCh(ZD[0], ZF[3], ZB[3]) + ZR26(ZD[0]);
  349. ZG[0] = ZD[2] + ZF[0];
  350. ZH[0] = ZF[0] + ZR30(ZE[0]) + ZMa(ZG[3], ZC[3], ZE[0]);
  351. ZB[1] = ZB[3] + 0xc19bf274 + ZCh(ZG[0], ZD[0], ZF[3]) + ZR26(ZG[0]);
  352. ZE[1] = ZC[3] + ZB[1];
  353. ZF[1] = ZB[1] + ZR30(ZH[0]) + ZMa(ZE[0], ZG[3], ZH[0]);
  354. ZG[1] = ZF[3] + 0xe49b69c1U + ZA[1] + ZCh(ZE[1], ZG[0], ZD[0]) + ZR26(ZE[1]);
  355. ZB[2] = ZG[3] + ZG[1];
  356. ZC[2] = ZG[1] + ZR30(ZF[1]) + ZMa(ZH[0], ZE[0], ZF[1]);
  357. ZD[2] = ZD[0] + 0xefbe4786U + ZH[1] + ZCh(ZB[2], ZE[1], ZG[0]) + ZR26(ZB[2]);
  358. ZH[2] = ZE[0] + ZD[2];
  359. ZB[3] = ZD[2] + ZR30(ZC[2]) + ZMa(ZF[1], ZH[0], ZC[2]);
  360. ZC[3] = ZG[0] + 0x0fc19dc6U + ZA[2] + ZCh(ZH[2], ZB[2], ZE[1]) + ZR26(ZH[2]);
  361. ZD[3] = ZH[0] + ZC[3];
  362. ZF[3] = ZC[3] + ZR30(ZB[3]) + ZMa(ZC[2], ZF[1], ZB[3]);
  363. ZG[3] = ZE[1] + 0x240ca1ccU + ZE[2] + ZCh(ZD[3], ZH[2], ZB[2]) + ZR26(ZD[3]);
  364. ZB[0] = ZF[1] + ZG[3];
  365. ZD[0] = ZG[3] + ZR30(ZF[3]) + ZMa(ZB[3], ZC[2], ZF[3]);
  366. ZE[0] = ZB[2] + 0x2de92c6fU + ZA[3] + ZCh(ZB[0], ZD[3], ZH[2]) + ZR26(ZB[0]);
  367. ZF[0] = ZC[2] + ZE[0];
  368. ZG[0] = ZE[0] + ZR30(ZD[0]) + ZMa(ZF[3], ZB[3], ZD[0]);
  369. ZH[0] = ZH[2] + 0x4a7484aaU + ZE[3] + ZCh(ZF[0], ZB[0], ZD[3]) + ZR26(ZF[0]);
  370. ZB[1] = ZB[3] + ZH[0];
  371. ZE[1] = ZH[0] + ZR30(ZG[0]) + ZMa(ZD[0], ZF[3], ZG[0]);
  372. ZF[1] = ZD[3] + 0x5cb0a9dcU + ZC[0] + ZCh(ZB[1], ZF[0], ZB[0]) + ZR26(ZB[1]);
  373. ZG[1] = ZF[3] + ZF[1];
  374. ZB[2] = ZF[1] + ZR30(ZE[1]) + ZMa(ZG[0], ZD[0], ZE[1]);
  375. ZC[2] = ZB[0] + 0x76f988daU + ZC[1] + ZCh(ZG[1], ZB[1], ZF[0]) + ZR26(ZG[1]);
  376. ZD[2] = ZD[0] + ZC[2];
  377. ZH[2] = ZC[2] + ZR30(ZB[2]) + ZMa(ZE[1], ZG[0], ZB[2]);
  378. ZB[3] = ZF[0] + 0x983e5152U + ZD[1] + ZCh(ZD[2], ZG[1], ZB[1]) + ZR26(ZD[2]);
  379. ZC[3] = ZG[0] + ZB[3];
  380. ZD[3] = ZB[3] + ZR30(ZH[2]) + ZMa(ZB[2], ZE[1], ZH[2]);
  381. ZF[3] = ZB[1] + 0xa831c66dU + ZF[2] + ZCh(ZC[3], ZD[2], ZG[1]) + ZR26(ZC[3]);
  382. ZG[3] = ZE[1] + ZF[3];
  383. ZB[0] = ZF[3] + ZR30(ZD[3]) + ZMa(ZH[2], ZB[2], ZD[3]);
  384. ZD[0] = ZG[1] + 0xb00327c8U + ZG[2] + ZCh(ZG[3], ZC[3], ZD[2]) + ZR26(ZG[3]);
  385. ZE[0] = ZB[2] + ZD[0];
  386. ZF[0] = ZD[0] + ZR30(ZB[0]) + ZMa(ZD[3], ZH[2], ZB[0]);
  387. ZG[0] = ZD[2] + 0xbf597fc7U + ZH[3] + ZCh(ZE[0], ZG[3], ZC[3]) + ZR26(ZE[0]);
  388. ZH[0] = ZH[2] + ZG[0];
  389. ZB[1] = ZG[0] + ZR30(ZF[0]) + ZMa(ZB[0], ZD[3], ZF[0]);
  390. ZE[1] = ZC[3] + 0xc6e00bf3U + ZA[0] + ZCh(ZH[0], ZE[0], ZG[3]) + ZR26(ZH[0]);
  391. ZF[1] = ZD[3] + ZE[1];
  392. ZG[1] = ZE[1] + ZR30(ZB[1]) + ZMa(ZF[0], ZB[0], ZB[1]);
  393. ZB[2] = ZR15(ZH[3]) + ZC[0];
  394. ZC[2] = ZG[3] + 0xd5a79147U + ZB[2] + ZCh(ZF[1], ZH[0], ZE[0]) + ZR26(ZF[1]);
  395. ZD[2] = ZB[0] + ZC[2];
  396. ZH[2] = ZC[2] + ZR30(ZG[1]) + ZMa(ZB[1], ZF[0], ZG[1]);
  397. ZB[3] = ZR15(ZA[0]) + 0x00400022U + ZC[1];
  398. ZC[3] = ZE[0] + 0x06ca6351U + ZB[3] + ZCh(ZD[2], ZF[1], ZH[0]) + ZR26(ZD[2]);
  399. ZD[3] = ZF[0] + ZC[3];
  400. ZF[3] = ZC[3] + ZR30(ZH[2]) + ZMa(ZG[1], ZB[1], ZH[2]);
  401. ZG[3] = ZR15(ZB[2]) + ZR25(ZA[1]) + ZD[1] + 0x00000100U;
  402. ZB[0] = ZH[0] + 0x14292967U + ZG[3] + ZCh(ZD[3], ZD[2], ZF[1]) + ZR26(ZD[3]);
  403. ZD[0] = ZB[1] + ZB[0];
  404. ZE[0] = ZB[0] + ZR30(ZF[3]) + ZMa(ZH[2], ZG[1], ZF[3]);
  405. ZF[0] = ZR15(ZB[3]) + ZR25(ZH[1]) + ZF[2] + ZA[1];
  406. ZG[0] = ZF[1] + 0x27b70a85U + ZF[0] + ZCh(ZD[0], ZD[3], ZD[2]) + ZR26(ZD[0]);
  407. ZH[0] = ZG[1] + ZG[0];
  408. ZA[1] = ZG[0] + ZR30(ZE[0]) + ZMa(ZF[3], ZH[2], ZE[0]);
  409. ZB[1] = ZR15(ZG[3]) + ZR25(ZA[2]) + ZG[2] + ZH[1];
  410. ZE[1] = ZD[2] + 0x2e1b2138U + ZB[1] + ZCh(ZH[0], ZD[0], ZD[3]) + ZR26(ZH[0]);
  411. ZF[1] = ZH[2] + ZE[1];
  412. ZG[1] = ZE[1] + ZR30(ZA[1]) + ZMa(ZE[0], ZF[3], ZA[1]);
  413. ZH[1] = ZR15(ZF[0]) + ZR25(ZE[2]) + ZH[3] + ZA[2];
  414. ZA[2] = ZD[3] + 0x4d2c6dfcU + ZH[1] + ZCh(ZF[1], ZH[0], ZD[0]) + ZR26(ZF[1]);
  415. ZC[2] = ZF[3] + ZA[2];
  416. ZD[2] = ZA[2] + ZR30(ZG[1]) + ZMa(ZA[1], ZE[0], ZG[1]);
  417. ZH[2] = ZR15(ZB[1]) + ZR25(ZA[3]) + ZA[0] + ZE[2];
  418. ZC[3] = ZD[0] + 0x53380d13U + ZH[2] + ZCh(ZC[2], ZF[1], ZH[0]) + ZR26(ZC[2]);
  419. ZD[3] = ZE[0] + ZC[3];
  420. ZF[3] = ZC[3] + ZR30(ZD[2]) + ZMa(ZG[1], ZA[1], ZD[2]);
  421. ZB[0] = ZR15(ZH[1]) + ZR25(ZE[3]) + ZB[2] + ZA[3];
  422. ZD[0] = ZH[0] + 0x650a7354U + ZB[0] + ZCh(ZD[3], ZC[2], ZF[1]) + ZR26(ZD[3]);
  423. ZE[0] = ZA[1] + ZD[0];
  424. ZG[0] = ZD[0] + ZR30(ZF[3]) + ZMa(ZD[2], ZG[1], ZF[3]);
  425. ZH[0] = ZR15(ZH[2]) + ZR25(ZC[0]) + ZB[3] + ZE[3];
  426. ZA[1] = ZF[1] + 0x766a0abbU + ZH[0] + ZCh(ZE[0], ZD[3], ZC[2]) + ZR26(ZE[0]);
  427. ZE[1] = ZG[1] + ZA[1];
  428. ZF[1] = ZA[1] + ZR30(ZG[0]) + ZMa(ZF[3], ZD[2], ZG[0]);
  429. ZG[1] = ZR15(ZB[0]) + ZR25(ZC[1]) + ZG[3] + ZC[0];
  430. ZA[2] = ZC[2] + 0x81c2c92eU + ZG[1] + ZCh(ZE[1], ZE[0], ZD[3]) + ZR26(ZE[1]);
  431. ZC[2] = ZD[2] + ZA[2];
  432. ZD[2] = ZA[2] + ZR30(ZF[1]) + ZMa(ZG[0], ZF[3], ZF[1]);
  433. ZE[2] = ZR15(ZH[0]) + ZR25(ZD[1]) + ZF[0] + ZC[1];
  434. ZA[3] = ZD[3] + 0x92722c85U + ZE[2] + ZCh(ZC[2], ZE[1], ZE[0]) + ZR26(ZC[2]);
  435. ZC[3] = ZF[3] + ZA[3];
  436. ZD[3] = ZA[3] + ZR30(ZD[2]) + ZMa(ZF[1], ZG[0], ZD[2]);
  437. ZE[3] = ZR15(ZG[1]) + ZR25(ZF[2]) + ZB[1] + ZD[1];
  438. ZF[3] = ZE[0] + 0xa2bfe8a1U + ZE[3] + ZCh(ZC[3], ZC[2], ZE[1]) + ZR26(ZC[3]);
  439. ZC[0] = ZG[0] + ZF[3];
  440. ZD[0] = ZF[3] + ZR30(ZD[3]) + ZMa(ZD[2], ZF[1], ZD[3]);
  441. ZE[0] = ZR15(ZE[2]) + ZR25(ZG[2]) + ZH[1] + ZF[2];
  442. ZG[0] = ZE[1] + 0xa81a664bU + ZE[0] + ZCh(ZC[0], ZC[3], ZC[2]) + ZR26(ZC[0]);
  443. ZA[1] = ZF[1] + ZG[0];
  444. ZC[1] = ZG[0] + ZR30(ZD[0]) + ZMa(ZD[3], ZD[2], ZD[0]);
  445. ZD[1] = ZR15(ZE[3]) + ZR25(ZH[3]) + ZH[2] + ZG[2];
  446. ZE[1] = ZC[2] + 0xc24b8b70U + ZD[1] + ZCh(ZA[1], ZC[0], ZC[3]) + ZR26(ZA[1]);
  447. ZF[1] = ZD[2] + ZE[1];
  448. ZA[2] = ZE[1] + ZR30(ZC[1]) + ZMa(ZD[0], ZD[3], ZC[1]);
  449. ZC[2] = ZR15(ZE[0]) + ZR25(ZA[0]) + ZB[0] + ZH[3];
  450. ZD[2] = ZC[3] + 0xc76c51a3U + ZC[2] + ZCh(ZF[1], ZA[1], ZC[0]) + ZR26(ZF[1]);
  451. ZF[2] = ZD[3] + ZD[2];
  452. ZG[2] = ZD[2] + ZR30(ZA[2]) + ZMa(ZC[1], ZD[0], ZA[2]);
  453. ZA[3] = ZR15(ZD[1]) + ZR25(ZB[2]) + ZH[0] + ZA[0];
  454. ZC[3] = ZC[0] + 0xd192e819U + ZA[3] + ZCh(ZF[2], ZF[1], ZA[1]) + ZR26(ZF[2]);
  455. ZD[3] = ZD[0] + ZC[3];
  456. ZF[3] = ZC[3] + ZR30(ZG[2]) + ZMa(ZA[2], ZC[1], ZG[2]);
  457. ZH[3] = ZR15(ZC[2]) + ZR25(ZB[3]) + ZG[1] + ZB[2];
  458. ZA[0] = ZA[1] + 0xd6990624U + ZH[3] + ZCh(ZD[3], ZF[2], ZF[1]) + ZR26(ZD[3]);
  459. ZC[0] = ZC[1] + ZA[0];
  460. ZD[0] = ZA[0] + ZR30(ZF[3]) + ZMa(ZG[2], ZA[2], ZF[3]);
  461. ZG[0] = ZR15(ZA[3]) + ZR25(ZG[3]) + ZE[2] + ZB[3];
  462. ZA[1] = ZF[1] + 0xf40e3585U + ZG[0] + ZCh(ZC[0], ZD[3], ZF[2]) + ZR26(ZC[0]);
  463. ZC[1] = ZA[2] + ZA[1];
  464. ZE[1] = ZA[1] + ZR30(ZD[0]) + ZMa(ZF[3], ZG[2], ZD[0]);
  465. ZF[1] = ZR15(ZH[3]) + ZR25(ZF[0]) + ZE[3] + ZG[3];
  466. ZA[2] = ZF[2] + 0x106aa070U + ZF[1] + ZCh(ZC[1], ZC[0], ZD[3]) + ZR26(ZC[1]);
  467. ZB[2] = ZG[2] + ZA[2];
  468. ZD[2] = ZA[2] + ZR30(ZE[1]) + ZMa(ZD[0], ZF[3], ZE[1]);
  469. ZF[2] = ZR15(ZG[0]) + ZR25(ZB[1]) + ZE[0] + ZF[0];
  470. ZG[2] = ZD[3] + 0x19a4c116U + ZF[2] + ZCh(ZB[2], ZC[1], ZC[0]) + ZR26(ZB[2]);
  471. ZB[3] = ZF[3] + ZG[2];
  472. ZC[3] = ZG[2] + ZR30(ZD[2]) + ZMa(ZE[1], ZD[0], ZD[2]);
  473. ZD[3] = ZR15(ZF[1]) + ZR25(ZH[1]) + ZD[1] + ZB[1];
  474. ZF[3] = ZC[0] + 0x1e376c08U + ZD[3] + ZCh(ZB[3], ZB[2], ZC[1]) + ZR26(ZB[3]);
  475. ZG[3] = ZD[0] + ZF[3];
  476. ZA[0] = ZF[3] + ZR30(ZC[3]) + ZMa(ZD[2], ZE[1], ZC[3]);
  477. ZC[0] = ZR15(ZF[2]) + ZC[2] + ZR25(ZH[2]) + ZH[1];
  478. ZD[0] = ZC[1] + 0x2748774cU + ZC[0] + ZCh(ZG[3], ZB[3], ZB[2]) + ZR26(ZG[3]);
  479. ZF[0] = ZE[1] + ZD[0];
  480. ZA[1] = ZD[0] + ZR30(ZA[0]) + ZMa(ZC[3], ZD[2], ZA[0]);
  481. ZB[1] = ZR15(ZD[3]) + ZA[3] + ZR25(ZB[0]) + ZH[2];
  482. ZC[1] = ZB[2] + 0x34b0bcb5U + ZB[1] + ZCh(ZF[0], ZG[3], ZB[3]) + ZR26(ZF[0]);
  483. ZE[1] = ZD[2] + ZC[1];
  484. ZH[1] = ZC[1] + ZR30(ZA[1]) + ZMa(ZA[0], ZC[3], ZA[1]);
  485. ZA[2] = ZR15(ZC[0]) + ZH[3] + ZR25(ZH[0]) + ZB[0];
  486. ZB[2] = ZB[3] + 0x391c0cb3U + ZA[2] + ZCh(ZE[1], ZF[0], ZG[3]) + ZR26(ZE[1]);
  487. ZD[2] = ZC[3] + ZB[2];
  488. ZG[2] = ZB[2] + ZR30(ZH[1]) + ZMa(ZA[1], ZA[0], ZH[1]);
  489. ZH[2] = ZR15(ZB[1]) + ZG[0] + ZR25(ZG[1]) + ZH[0];
  490. ZB[3] = ZG[3] + 0x4ed8aa4aU + ZH[2] + ZCh(ZD[2], ZE[1], ZF[0]) + ZR26(ZD[2]);
  491. ZC[3] = ZA[0] + ZB[3];
  492. ZF[3] = ZB[3] + ZR30(ZG[2]) + ZMa(ZH[1], ZA[1], ZG[2]);
  493. ZG[3] = ZR15(ZA[2]) + ZF[1] + ZR25(ZE[2]) + ZG[1];
  494. ZA[0] = ZF[0] + 0x5b9cca4fU + ZG[3] + ZCh(ZC[3], ZD[2], ZE[1]) + ZR26(ZC[3]);
  495. ZB[0] = ZA[1] + ZA[0];
  496. ZD[0] = ZA[0] + ZR30(ZF[3]) + ZMa(ZG[2], ZH[1], ZF[3]);
  497. ZF[0] = ZR15(ZH[2]) + ZF[2] + ZR25(ZE[3]) + ZE[2];
  498. ZG[0] = ZE[1] + 0x682e6ff3U + ZF[0] + ZCh(ZB[0], ZC[3], ZD[2]) + ZR26(ZB[0]);
  499. ZH[0] = ZH[1] + ZG[0];
  500. ZA[1] = ZG[0] + ZR30(ZD[0]) + ZMa(ZF[3], ZG[2], ZD[0]);
  501. ZC[1] = ZR15(ZG[3]) + ZR25(ZE[0]) + ZD[3] + ZE[3];
  502. ZE[1] = ZD[2] + 0x748f82eeU + ZC[1] + ZCh(ZH[0], ZB[0], ZC[3]) + ZR26(ZH[0]);
  503. ZF[1] = ZG[2] + ZE[1];
  504. ZG[1] = ZE[1] + ZR30(ZA[1]) + ZMa(ZD[0], ZF[3], ZA[1]);
  505. ZH[1] = ZR15(ZF[0]) + ZR25(ZD[1]) + ZC[0] + ZE[0];
  506. ZB[2] = ZF[3] + ZC[3] + 0x78a5636fU + ZH[1] + ZCh(ZF[1], ZH[0], ZB[0]) + ZR26(ZF[1]);
  507. ZD[2] = ZR15(ZC[1]) + ZR25(ZC[2]) + ZB[1] + ZD[1];
  508. ZE[2] = ZD[0] + ZB[0] + 0x84c87814U + ZD[2] + ZCh(ZB[2], ZF[1], ZH[0]) + ZR26(ZB[2]);
  509. ZF[2] = ZA[1] + ZH[0] + 0x8cc70208U + ZR15(ZH[1]) + ZR25(ZA[3]) + ZA[2] + ZC[2] + ZCh(ZE[2], ZB[2], ZF[1]) + ZR26(ZE[2]);
  510. ZG[2] = ZG[1] + ZF[1] + ZR26(ZF[2]) + ZCh(ZF[2], ZE[2], ZB[2]) + ZR15(ZD[2]) + ZH[2] + ZR25(ZH[3]) + ZA[3];
  511. #define FOUND (0x80)
  512. #define NFLAG (0x7F)
  513. #if defined(VECTORS4)
  514. ZG[2] ^= 0x136032EDU;
  515. bool result = ZG[2].x & ZG[2].y & ZG[2].z & ZG[2].w;
  516. if (!result) {
  517. if (!ZG[2].x)
  518. output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
  519. if (!ZG[2].y)
  520. output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
  521. if (!ZG[2].z)
  522. output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z;
  523. if (!ZG[2].w)
  524. output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w;
  525. }
  526. #elif defined(VECTORS2)
  527. ZG[2] ^= 0x136032EDU;
  528. bool result = ZG[2].x & ZG[2].y;
  529. if (!result) {
  530. if (!ZG[2].x)
  531. output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
  532. if (!ZG[2].y)
  533. output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
  534. }
  535. #else
  536. if (ZG[2] == 0x136032EDU)
  537. output[FOUND] = output[NFLAG & Znonce] = Znonce;
  538. #endif
  539. }