poclbm120213.cl 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. // -ck modified kernel taken from Phoenix taken from poclbm, with aspects of
  2. // phatk and others.
  3. // Modified version copyright 2011-2012 Con Kolivas
  4. // This file is taken and modified from the public-domain poclbm project, and
  5. // we have therefore decided to keep it public-domain in Phoenix.
  6. #ifdef VECTORS4
  7. typedef uint4 u;
  8. #elif defined VECTORS2
  9. typedef uint2 u;
  10. #else
  11. typedef uint u;
  12. #endif
  13. __constant uint K[64] = {
  14. 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  15. 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  16. 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  17. 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  18. 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  19. 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  20. 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  21. 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  22. };
  23. // This part is not from the stock poclbm kernel. It's part of an optimization
  24. // added in the Phoenix Miner.
  25. // Some AMD devices have a BFI_INT opcode, which behaves exactly like the
  26. // SHA-256 ch function, but provides it in exactly one instruction. If
  27. // detected, use it for ch. Otherwise, construct ch out of simpler logical
  28. // primitives.
  29. #ifdef BITALIGN
  30. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  31. #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
  32. #ifdef BFI_INT
  33. // Well, slight problem... It turns out BFI_INT isn't actually exposed to
  34. // OpenCL (or CAL IL for that matter) in any way. However, there is
  35. // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
  36. // amd_bytealign, takes the same inputs, and provides the same output.
  37. // We can use that as a placeholder for BFI_INT and have the application
  38. // patch it after compilation.
  39. // This is the BFI_INT function
  40. #define ch(x, y, z) amd_bytealign(x, y, z)
  41. // Ma can also be implemented in terms of BFI_INT...
  42. #define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
  43. #else // BFI_INT
  44. // Later SDKs optimise this to BFI INT without patching and GCN
  45. // actually fails if manually patched with BFI_INT
  46. #define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
  47. #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
  48. #endif
  49. #else // BITALIGN
  50. #define ch(x, y, z) (z ^ (x & (y ^ z)))
  51. #define Ma(x, y, z) ((x & z) | (y & (x | z)))
  52. #define rotr(x, y) rotate((u)x, (u)(32 - y))
  53. #endif
  54. // AMD's KernelAnalyzer throws errors compiling the kernel if we use
  55. // amd_bytealign on constants with vectors enabled, so we use this to avoid
  56. // problems. (this is used 4 times, and likely optimized out by the compiler.)
  57. #define Ma2(x, y, z) ((y & z) | (x & (y | z)))
  58. __kernel void search(const uint state0, const uint state1, const uint state2, const uint state3,
  59. const uint state4, const uint state5, const uint state6, const uint state7,
  60. const uint b1, const uint c1, const uint d1,
  61. const uint f1, const uint g1, const uint h1,
  62. const u base,
  63. const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r, const uint fcty_e, const uint fcty_e2,
  64. __global uint * output)
  65. {
  66. u W[24];
  67. //u Vals[8]; Now put at W[16] to be in same array
  68. #ifdef VECTORS4
  69. const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
  70. #elif defined VECTORS2
  71. const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
  72. #else
  73. const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
  74. #endif
  75. W[20] = fcty_e + nonce;
  76. W[16] = state0 + W[20];
  77. W[19] = d1 + (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], b1, c1) + K[ 4] + 0x80000000;
  78. W[23] = h1 + W[19];
  79. W[20] += fcty_e2;
  80. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma2(g1, W[20], f1);
  81. W[18] = c1 + (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], b1) + K[ 5];
  82. W[22] = g1 + W[18];
  83. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma2(f1, W[19], W[20]);
  84. W[17] = b1 + (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[ 6];
  85. W[21] = f1 + W[17];
  86. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  87. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[ 7];
  88. W[20] += W[16];
  89. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  90. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[ 8];
  91. W[19] += W[23];
  92. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  93. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[ 9];
  94. W[18] += W[22];
  95. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  96. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[10];
  97. W[17] += W[21];
  98. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  99. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[11];
  100. W[16] += W[20];
  101. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  102. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[12];
  103. W[23] += W[19];
  104. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  105. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[13];
  106. W[22] += W[18];
  107. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  108. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[14];
  109. W[21] += W[17];
  110. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  111. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[15] + 0x00000280U;
  112. W[20] += W[16];
  113. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  114. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[16] + fw0;
  115. W[19] += W[23];
  116. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  117. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[17] + fw1;
  118. W[18] += W[22];
  119. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  120. W[2] = (rotr(nonce, 7) ^ rotr(nonce, 18) ^ (nonce >> 3U)) + fw2;
  121. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[18] + W[2];
  122. W[17] += W[21];
  123. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  124. W[3] = nonce + fw3;
  125. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[19] + W[3];
  126. W[16] += W[20];
  127. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  128. W[4] = (rotr(W[2], 17) ^ rotr(W[2], 19) ^ (W[2] >> 10U)) + 0x80000000;
  129. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[20] + W[4];
  130. W[23] += W[19];
  131. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  132. W[5] = (rotr(W[3], 17) ^ rotr(W[3], 19) ^ (W[3] >> 10U));
  133. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[21] + W[5];
  134. W[22] += W[18];
  135. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  136. W[6] = (rotr(W[4], 17) ^ rotr(W[4], 19) ^ (W[4] >> 10U)) + 0x00000280U;
  137. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[22] + W[6];
  138. W[21] += W[17];
  139. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  140. W[7] = (rotr(W[5], 17) ^ rotr(W[5], 19) ^ (W[5] >> 10U)) + fw0;
  141. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[23] + W[7];
  142. W[20] += W[16];
  143. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  144. W[8] = (rotr(W[6], 17) ^ rotr(W[6], 19) ^ (W[6] >> 10U)) + fw1;
  145. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[24] + W[8];
  146. W[19] += W[23];
  147. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  148. W[9] = W[2] + (rotr(W[7], 17) ^ rotr(W[7], 19) ^ (W[7] >> 10U));
  149. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[25] + W[9];
  150. W[18] += W[22];
  151. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  152. W[10] = W[3] + (rotr(W[8], 17) ^ rotr(W[8], 19) ^ (W[8] >> 10U));
  153. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[26] + W[10];
  154. W[17] += W[21];
  155. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  156. W[11] = W[4] + (rotr(W[9], 17) ^ rotr(W[9], 19) ^ (W[9] >> 10U));
  157. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[27] + W[11];
  158. W[16] += W[20];
  159. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  160. W[12] = W[5] + (rotr(W[10], 17) ^ rotr(W[10], 19) ^ (W[10] >> 10U));
  161. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[28] + W[12];
  162. W[23] += W[19];
  163. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  164. W[13] = W[6] + (rotr(W[11], 17) ^ rotr(W[11], 19) ^ (W[11] >> 10U));
  165. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[29] + W[13];
  166. W[22] += W[18];
  167. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  168. W[14] = 0x00a00055U + W[7] + (rotr(W[12], 17) ^ rotr(W[12], 19) ^ (W[12] >> 10U));
  169. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[30] + W[14];
  170. W[21] += W[17];
  171. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  172. W[15] = fw15 + W[8] + (rotr(W[13], 17) ^ rotr(W[13], 19) ^ (W[13] >> 10U));
  173. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[31] + W[15];
  174. W[20] += W[16];
  175. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  176. W[0] = fw01r + W[9] + (rotr(W[14], 17) ^ rotr(W[14], 19) ^ (W[14] >> 10U));
  177. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[32] + W[0];
  178. W[19] += W[23];
  179. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  180. W[1] = fw1 + (rotr(W[2], 7) ^ rotr(W[2], 18) ^ (W[2] >> 3U)) + W[10] + (rotr(W[15], 17) ^ rotr(W[15], 19) ^ (W[15] >> 10U));
  181. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[33] + W[1];
  182. W[18] += W[22];
  183. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  184. W[2] += (rotr(W[3], 7) ^ rotr(W[3], 18) ^ (W[3] >> 3U)) + W[11] + (rotr(W[0], 17) ^ rotr(W[0], 19) ^ (W[0] >> 10U));
  185. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[34] + W[2];
  186. W[17] += W[21];
  187. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  188. W[3] += (rotr(W[4], 7) ^ rotr(W[4], 18) ^ (W[4] >> 3U)) + W[12] + (rotr(W[1], 17) ^ rotr(W[1], 19) ^ (W[1] >> 10U));
  189. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[35] + W[3];
  190. W[16] += W[20];
  191. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  192. W[4] += (rotr(W[5], 7) ^ rotr(W[5], 18) ^ (W[5] >> 3U)) + W[13] + (rotr(W[2], 17) ^ rotr(W[2], 19) ^ (W[2] >> 10U));
  193. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[36] + W[4];
  194. W[23] += W[19];
  195. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  196. W[5] += (rotr(W[6], 7) ^ rotr(W[6], 18) ^ (W[6] >> 3U)) + W[14] + (rotr(W[3], 17) ^ rotr(W[3], 19) ^ (W[3] >> 10U));
  197. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[37] + W[5];
  198. W[22] += W[18];
  199. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  200. W[6] += (rotr(W[7], 7) ^ rotr(W[7], 18) ^ (W[7] >> 3U)) + W[15] + (rotr(W[4], 17) ^ rotr(W[4], 19) ^ (W[4] >> 10U));
  201. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[38] + W[6];
  202. W[21] += W[17];
  203. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  204. W[7] += (rotr(W[8], 7) ^ rotr(W[8], 18) ^ (W[8] >> 3U)) + W[0] + (rotr(W[5], 17) ^ rotr(W[5], 19) ^ (W[5] >> 10U));
  205. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[39] + W[7];
  206. W[20] += W[16];
  207. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  208. W[8] += (rotr(W[9], 7) ^ rotr(W[9], 18) ^ (W[9] >> 3U)) + W[1] + (rotr(W[6], 17) ^ rotr(W[6], 19) ^ (W[6] >> 10U));
  209. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[40] + W[8];
  210. W[19] += W[23];
  211. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  212. W[9] += (rotr(W[10], 7) ^ rotr(W[10], 18) ^ (W[10] >> 3U)) + W[2] + (rotr(W[7], 17) ^ rotr(W[7], 19) ^ (W[7] >> 10U));
  213. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[41] + W[9];
  214. W[18] += W[22];
  215. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  216. W[10] += (rotr(W[11], 7) ^ rotr(W[11], 18) ^ (W[11] >> 3U)) + W[3] + (rotr(W[8], 17) ^ rotr(W[8], 19) ^ (W[8] >> 10U));
  217. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[42] + W[10];
  218. W[17] += W[21];
  219. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  220. W[11] += (rotr(W[12], 7) ^ rotr(W[12], 18) ^ (W[12] >> 3U)) + W[4] + (rotr(W[9], 17) ^ rotr(W[9], 19) ^ (W[9] >> 10U));
  221. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[43] + W[11];
  222. W[16] += W[20];
  223. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  224. W[12] += (rotr(W[13], 7) ^ rotr(W[13], 18) ^ (W[13] >> 3U)) + W[5] + (rotr(W[10], 17) ^ rotr(W[10], 19) ^ (W[10] >> 10U));
  225. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[44] + W[12];
  226. W[23] += W[19];
  227. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  228. W[13] += (rotr(W[14], 7) ^ rotr(W[14], 18) ^ (W[14] >> 3U)) + W[6] + (rotr(W[11], 17) ^ rotr(W[11], 19) ^ (W[11] >> 10U));
  229. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[45] + W[13];
  230. W[22] += W[18];
  231. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  232. W[14] += (rotr(W[15], 7) ^ rotr(W[15], 18) ^ (W[15] >> 3U)) + W[7] + (rotr(W[12], 17) ^ rotr(W[12], 19) ^ (W[12] >> 10U));
  233. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[46] + W[14];
  234. W[21] += W[17];
  235. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  236. W[15] += (rotr(W[0], 7) ^ rotr(W[0], 18) ^ (W[0] >> 3U)) + W[8] + (rotr(W[13], 17) ^ rotr(W[13], 19) ^ (W[13] >> 10U));
  237. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[47] + W[15];
  238. W[20] += W[16];
  239. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  240. W[0] += (rotr(W[1], 7) ^ rotr(W[1], 18) ^ (W[1] >> 3U)) + W[9] + (rotr(W[14], 17) ^ rotr(W[14], 19) ^ (W[14] >> 10U));
  241. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[48] + W[0];
  242. W[19] += W[23];
  243. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  244. W[1] += (rotr(W[2], 7) ^ rotr(W[2], 18) ^ (W[2] >> 3U)) + W[10] + (rotr(W[15], 17) ^ rotr(W[15], 19) ^ (W[15] >> 10U));
  245. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[49] + W[1];
  246. W[18] += W[22];
  247. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  248. W[2] += (rotr(W[3], 7) ^ rotr(W[3], 18) ^ (W[3] >> 3U)) + W[11] + (rotr(W[0], 17) ^ rotr(W[0], 19) ^ (W[0] >> 10U));
  249. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[50] + W[2];
  250. W[17] += W[21];
  251. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  252. W[3] += (rotr(W[4], 7) ^ rotr(W[4], 18) ^ (W[4] >> 3U)) + W[12] + (rotr(W[1], 17) ^ rotr(W[1], 19) ^ (W[1] >> 10U));
  253. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[51] + W[3];
  254. W[16] += W[20];
  255. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  256. W[4] += (rotr(W[5], 7) ^ rotr(W[5], 18) ^ (W[5] >> 3U)) + W[13] + (rotr(W[2], 17) ^ rotr(W[2], 19) ^ (W[2] >> 10U));
  257. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[52] + W[4];
  258. W[23] += W[19];
  259. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  260. W[5] += (rotr(W[6], 7) ^ rotr(W[6], 18) ^ (W[6] >> 3U)) + W[14] + (rotr(W[3], 17) ^ rotr(W[3], 19) ^ (W[3] >> 10U));
  261. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[53] + W[5];
  262. W[22] += W[18];
  263. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  264. W[6] += (rotr(W[7], 7) ^ rotr(W[7], 18) ^ (W[7] >> 3U)) + W[15] + (rotr(W[4], 17) ^ rotr(W[4], 19) ^ (W[4] >> 10U));
  265. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[54] + W[6];
  266. W[21] += W[17];
  267. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  268. W[7] += (rotr(W[8], 7) ^ rotr(W[8], 18) ^ (W[8] >> 3U)) + W[0] + (rotr(W[5], 17) ^ rotr(W[5], 19) ^ (W[5] >> 10U));
  269. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[55] + W[7];
  270. W[20] += W[16];
  271. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  272. W[8] += (rotr(W[9], 7) ^ rotr(W[9], 18) ^ (W[9] >> 3U)) + W[1] + (rotr(W[6], 17) ^ rotr(W[6], 19) ^ (W[6] >> 10U));
  273. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[56] + W[8];
  274. W[19] += W[23];
  275. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  276. W[9] += (rotr(W[10], 7) ^ rotr(W[10], 18) ^ (W[10] >> 3U)) + W[2] + (rotr(W[7], 17) ^ rotr(W[7], 19) ^ (W[7] >> 10U));
  277. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[57] + W[9];
  278. W[18] += W[22];
  279. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  280. W[10] += (rotr(W[11], 7) ^ rotr(W[11], 18) ^ (W[11] >> 3U)) + W[3] + (rotr(W[8], 17) ^ rotr(W[8], 19) ^ (W[8] >> 10U));
  281. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[58] + W[10];
  282. W[17] += W[21];
  283. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  284. W[11] += (rotr(W[12], 7) ^ rotr(W[12], 18) ^ (W[12] >> 3U)) + W[4] + (rotr(W[9], 17) ^ rotr(W[9], 19) ^ (W[9] >> 10U));
  285. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[59] + W[11];
  286. W[16] += W[20];
  287. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  288. W[12] += (rotr(W[13], 7) ^ rotr(W[13], 18) ^ (W[13] >> 3U)) + W[5] + (rotr(W[10], 17) ^ rotr(W[10], 19) ^ (W[10] >> 10U));
  289. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[60] + W[12];
  290. W[23] += W[19];
  291. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  292. W[13] += (rotr(W[14], 7) ^ rotr(W[14], 18) ^ (W[14] >> 3U)) + W[6] + (rotr(W[11], 17) ^ rotr(W[11], 19) ^ (W[11] >> 10U));
  293. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[61] + W[13];
  294. W[22] += W[18];
  295. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  296. W[14] += (rotr(W[15], 7) ^ rotr(W[15], 18) ^ (W[15] >> 3U)) + W[7] + (rotr(W[12], 17) ^ rotr(W[12], 19) ^ (W[12] >> 10U));
  297. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[62] + W[14];
  298. W[21] += W[17];
  299. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  300. W[15] += (rotr(W[0], 7) ^ rotr(W[0], 18) ^ (W[0] >> 3U)) + W[8] + (rotr(W[13], 17) ^ rotr(W[13], 19) ^ (W[13] >> 10U));
  301. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[63] + W[15];
  302. W[20] += W[16];
  303. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  304. W[0] = W[16] + state0;
  305. W[7] = W[23] + state7;
  306. W[23] = 0xb0edbdd0 + K[ 0] + W[0];
  307. W[3] = W[19] + state3;
  308. W[19] = 0xa54ff53a + W[23];
  309. W[23] += 0x08909ae5U;
  310. W[1] = W[17] + state1;
  311. W[6] = W[22] + state6;
  312. W[22] = 0x1f83d9abU + (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + (0x9b05688cU ^ (W[19] & 0xca0b3af3U)) + K[ 1] + W[1];
  313. W[2] = W[18] + state2;
  314. W[18] = 0x3c6ef372U + W[22];
  315. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma2(0xbb67ae85U, W[23], 0x6a09e667U);
  316. W[5] = W[21] + state5;
  317. W[21] = 0x9b05688cU + (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], 0x510e527fU) + K[ 2] + W[2];
  318. W[17] = 0xbb67ae85U + W[21];
  319. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma2(0x6a09e667U, W[22], W[23]);
  320. W[4] = W[20] + state4;
  321. W[20] = 0x510e527fU + (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[ 3] + W[3];
  322. W[16] = 0x6a09e667U + W[20];
  323. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  324. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[ 4] + W[4];
  325. W[23] += W[19];
  326. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  327. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[ 5] + W[5];
  328. W[22] += W[18];
  329. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  330. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[ 6] + W[6];
  331. W[21] += W[17];
  332. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  333. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[ 7] + W[7];
  334. W[20] += W[16];
  335. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  336. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[ 8] + 0x80000000;
  337. W[19] += W[23];
  338. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  339. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[ 9];
  340. W[18] += W[22];
  341. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  342. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[10];
  343. W[17] += W[21];
  344. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  345. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[11];
  346. W[16] += W[20];
  347. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  348. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[12];
  349. W[23] += W[19];
  350. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  351. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[13];
  352. W[22] += W[18];
  353. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  354. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[14];
  355. W[21] += W[17];
  356. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  357. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[15] + 0x00000100U;
  358. W[20] += W[16];
  359. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  360. W[0] += (rotr(W[1], 7) ^ rotr(W[1], 18) ^ (W[1] >> 3U));
  361. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[16] + W[0];
  362. W[19] += W[23];
  363. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  364. W[1] += (rotr(W[2], 7) ^ rotr(W[2], 18) ^ (W[2] >> 3U)) + 0x00a00000U;
  365. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[17] + W[1];
  366. W[18] += W[22];
  367. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  368. W[2] += (rotr(W[3], 7) ^ rotr(W[3], 18) ^ (W[3] >> 3U)) + (rotr(W[0], 17) ^ rotr(W[0], 19) ^ (W[0] >> 10U));
  369. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[18] + W[2];
  370. W[17] += W[21];
  371. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  372. W[3] += (rotr(W[4], 7) ^ rotr(W[4], 18) ^ (W[4] >> 3U)) + (rotr(W[1], 17) ^ rotr(W[1], 19) ^ (W[1] >> 10U));
  373. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[19] + W[3];
  374. W[16] += W[20];
  375. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  376. W[4] += (rotr(W[5], 7) ^ rotr(W[5], 18) ^ (W[5] >> 3U)) + (rotr(W[2], 17) ^ rotr(W[2], 19) ^ (W[2] >> 10U));
  377. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[20] + W[4];
  378. W[23] += W[19];
  379. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  380. W[5] += (rotr(W[6], 7) ^ rotr(W[6], 18) ^ (W[6] >> 3U)) + (rotr(W[3], 17) ^ rotr(W[3], 19) ^ (W[3] >> 10U));
  381. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[21] + W[5];
  382. W[22] += W[18];
  383. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  384. W[6] += (rotr(W[7], 7) ^ rotr(W[7], 18) ^ (W[7] >> 3U)) + 0x00000100U + (rotr(W[4], 17) ^ rotr(W[4], 19) ^ (W[4] >> 10U));
  385. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[22] + W[6];
  386. W[21] += W[17];
  387. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  388. W[7] += 0x11002000U + W[0] + (rotr(W[5], 17) ^ rotr(W[5], 19) ^ (W[5] >> 10U));
  389. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[23] + W[7];
  390. W[20] += W[16];
  391. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  392. W[8] = 0x80000000 + W[1] + (rotr(W[6], 17) ^ rotr(W[6], 19) ^ (W[6] >> 10U));
  393. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[24] + W[8];
  394. W[19] += W[23];
  395. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  396. W[9] = W[2] + (rotr(W[7], 17) ^ rotr(W[7], 19) ^ (W[7] >> 10U));
  397. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[25] + W[9];
  398. W[18] += W[22];
  399. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  400. W[10] = W[3] + (rotr(W[8], 17) ^ rotr(W[8], 19) ^ (W[8] >> 10U));
  401. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[26] + W[10];
  402. W[17] += W[21];
  403. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  404. W[11] = W[4] + (rotr(W[9], 17) ^ rotr(W[9], 19) ^ (W[9] >> 10U));
  405. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[27] + W[11];
  406. W[16] += W[20];
  407. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  408. W[12] = W[5] + (rotr(W[10], 17) ^ rotr(W[10], 19) ^ (W[10] >> 10U));
  409. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[28] + W[12];
  410. W[23] += W[19];
  411. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  412. W[13] = W[6] + (rotr(W[11], 17) ^ rotr(W[11], 19) ^ (W[11] >> 10U));
  413. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[29] + W[13];
  414. W[22] += W[18];
  415. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  416. W[14] = 0x00400022U + W[7] + (rotr(W[12], 17) ^ rotr(W[12], 19) ^ (W[12] >> 10U));
  417. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[30] + W[14];
  418. W[21] += W[17];
  419. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  420. W[15] = 0x00000100U + (rotr(W[0], 7) ^ rotr(W[0], 18) ^ (W[0] >> 3U)) + W[8] + (rotr(W[13], 17) ^ rotr(W[13], 19) ^ (W[13] >> 10U));
  421. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[31] + W[15];
  422. W[20] += W[16];
  423. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  424. W[0] += (rotr(W[1], 7) ^ rotr(W[1], 18) ^ (W[1] >> 3U)) + W[9] + (rotr(W[14], 17) ^ rotr(W[14], 19) ^ (W[14] >> 10U));
  425. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[32] + W[0];
  426. W[19] += W[23];
  427. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  428. W[1] += (rotr(W[2], 7) ^ rotr(W[2], 18) ^ (W[2] >> 3U)) + W[10] + (rotr(W[15], 17) ^ rotr(W[15], 19) ^ (W[15] >> 10U));
  429. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[33] + W[1];
  430. W[18] += W[22];
  431. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  432. W[2] += (rotr(W[3], 7) ^ rotr(W[3], 18) ^ (W[3] >> 3U)) + W[11] + (rotr(W[0], 17) ^ rotr(W[0], 19) ^ (W[0] >> 10U));
  433. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[34] + W[2];
  434. W[17] += W[21];
  435. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  436. W[3] += (rotr(W[4], 7) ^ rotr(W[4], 18) ^ (W[4] >> 3U)) + W[12] + (rotr(W[1], 17) ^ rotr(W[1], 19) ^ (W[1] >> 10U));
  437. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[35] + W[3];
  438. W[16] += W[20];
  439. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  440. W[4] += (rotr(W[5], 7) ^ rotr(W[5], 18) ^ (W[5] >> 3U)) + W[13] + (rotr(W[2], 17) ^ rotr(W[2], 19) ^ (W[2] >> 10U));
  441. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[36] + W[4];
  442. W[23] += W[19];
  443. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  444. W[5] += (rotr(W[6], 7) ^ rotr(W[6], 18) ^ (W[6] >> 3U)) + W[14] + (rotr(W[3], 17) ^ rotr(W[3], 19) ^ (W[3] >> 10U));
  445. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[37] + W[5];
  446. W[22] += W[18];
  447. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  448. W[6] += (rotr(W[7], 7) ^ rotr(W[7], 18) ^ (W[7] >> 3U)) + W[15] + (rotr(W[4], 17) ^ rotr(W[4], 19) ^ (W[4] >> 10U));
  449. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[38] + W[6];
  450. W[21] += W[17];
  451. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  452. W[7] += (rotr(W[8], 7) ^ rotr(W[8], 18) ^ (W[8] >> 3U)) + W[0] + (rotr(W[5], 17) ^ rotr(W[5], 19) ^ (W[5] >> 10U));
  453. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[39] + W[7];
  454. W[20] += W[16];
  455. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  456. W[8] += (rotr(W[9], 7) ^ rotr(W[9], 18) ^ (W[9] >> 3U)) + W[1] + (rotr(W[6], 17) ^ rotr(W[6], 19) ^ (W[6] >> 10U));
  457. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[40] + W[8];
  458. W[19] += W[23];
  459. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  460. W[9] += (rotr(W[10], 7) ^ rotr(W[10], 18) ^ (W[10] >> 3U)) + W[2] + (rotr(W[7], 17) ^ rotr(W[7], 19) ^ (W[7] >> 10U));
  461. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[41] + W[9];
  462. W[18] += W[22];
  463. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  464. W[10] += (rotr(W[11], 7) ^ rotr(W[11], 18) ^ (W[11] >> 3U)) + W[3] + (rotr(W[8], 17) ^ rotr(W[8], 19) ^ (W[8] >> 10U));
  465. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[42] + W[10];
  466. W[17] += W[21];
  467. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  468. W[11] += (rotr(W[12], 7) ^ rotr(W[12], 18) ^ (W[12] >> 3U)) + W[4] + (rotr(W[9], 17) ^ rotr(W[9], 19) ^ (W[9] >> 10U));
  469. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[43] + W[11];
  470. W[16] += W[20];
  471. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  472. W[12] += (rotr(W[13], 7) ^ rotr(W[13], 18) ^ (W[13] >> 3U)) + W[5] + (rotr(W[10], 17) ^ rotr(W[10], 19) ^ (W[10] >> 10U));
  473. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[44] + W[12];
  474. W[23] += W[19];
  475. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  476. W[13] += (rotr(W[14], 7) ^ rotr(W[14], 18) ^ (W[14] >> 3U)) + W[6] + (rotr(W[11], 17) ^ rotr(W[11], 19) ^ (W[11] >> 10U));
  477. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[45] + W[13];
  478. W[22] += W[18];
  479. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  480. W[14] += (rotr(W[15], 7) ^ rotr(W[15], 18) ^ (W[15] >> 3U)) + W[7] + (rotr(W[12], 17) ^ rotr(W[12], 19) ^ (W[12] >> 10U));
  481. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[46] + W[14];
  482. W[21] += W[17];
  483. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  484. W[15] += (rotr(W[0], 7) ^ rotr(W[0], 18) ^ (W[0] >> 3U)) + W[8] + (rotr(W[13], 17) ^ rotr(W[13], 19) ^ (W[13] >> 10U));
  485. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[47] + W[15];
  486. W[20] += W[16];
  487. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  488. W[0] += (rotr(W[1], 7) ^ rotr(W[1], 18) ^ (W[1] >> 3U)) + W[9] + (rotr(W[14], 17) ^ rotr(W[14], 19) ^ (W[14] >> 10U));
  489. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[48] + W[0];
  490. W[19] += W[23];
  491. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  492. W[1] += (rotr(W[2], 7) ^ rotr(W[2], 18) ^ (W[2] >> 3U)) + W[10] + (rotr(W[15], 17) ^ rotr(W[15], 19) ^ (W[15] >> 10U));
  493. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[49] + W[1];
  494. W[18] += W[22];
  495. W[22] += (rotr(W[23], 2) ^ rotr(W[23], 13) ^ rotr(W[23], 22)) + Ma(W[17], W[23], W[16]);
  496. W[2] += (rotr(W[3], 7) ^ rotr(W[3], 18) ^ (W[3] >> 3U)) + W[11] + (rotr(W[0], 17) ^ rotr(W[0], 19) ^ (W[0] >> 10U));
  497. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[50] + W[2];
  498. W[17] += W[21];
  499. W[21] += (rotr(W[22], 2) ^ rotr(W[22], 13) ^ rotr(W[22], 22)) + Ma(W[16], W[22], W[23]);
  500. W[3] += (rotr(W[4], 7) ^ rotr(W[4], 18) ^ (W[4] >> 3U)) + W[12] + (rotr(W[1], 17) ^ rotr(W[1], 19) ^ (W[1] >> 10U));
  501. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[51] + W[3];
  502. W[16] += W[20];
  503. W[20] += (rotr(W[21], 2) ^ rotr(W[21], 13) ^ rotr(W[21], 22)) + Ma(W[23], W[21], W[22]);
  504. W[4] += (rotr(W[5], 7) ^ rotr(W[5], 18) ^ (W[5] >> 3U)) + W[13] + (rotr(W[2], 17) ^ rotr(W[2], 19) ^ (W[2] >> 10U));
  505. W[19] += (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[52] + W[4];
  506. W[23] += W[19];
  507. W[19] += (rotr(W[20], 2) ^ rotr(W[20], 13) ^ rotr(W[20], 22)) + Ma(W[22], W[20], W[21]);
  508. W[5] += (rotr(W[6], 7) ^ rotr(W[6], 18) ^ (W[6] >> 3U)) + W[14] + (rotr(W[3], 17) ^ rotr(W[3], 19) ^ (W[3] >> 10U));
  509. W[18] += (rotr(W[23], 6) ^ rotr(W[23], 11) ^ rotr(W[23], 25)) + ch(W[23], W[16], W[17]) + K[53] + W[5];
  510. W[22] += W[18];
  511. W[18] += (rotr(W[19], 2) ^ rotr(W[19], 13) ^ rotr(W[19], 22)) + Ma(W[21], W[19], W[20]);
  512. W[6] += (rotr(W[7], 7) ^ rotr(W[7], 18) ^ (W[7] >> 3U)) + W[15] + (rotr(W[4], 17) ^ rotr(W[4], 19) ^ (W[4] >> 10U));
  513. W[17] += (rotr(W[22], 6) ^ rotr(W[22], 11) ^ rotr(W[22], 25)) + ch(W[22], W[23], W[16]) + K[54] + W[6];
  514. W[21] += W[17];
  515. W[17] += (rotr(W[18], 2) ^ rotr(W[18], 13) ^ rotr(W[18], 22)) + Ma(W[20], W[18], W[19]);
  516. W[7] += (rotr(W[8], 7) ^ rotr(W[8], 18) ^ (W[8] >> 3U)) + W[0] + (rotr(W[5], 17) ^ rotr(W[5], 19) ^ (W[5] >> 10U));
  517. W[16] += (rotr(W[21], 6) ^ rotr(W[21], 11) ^ rotr(W[21], 25)) + ch(W[21], W[22], W[23]) + K[55] + W[7];
  518. W[20] += W[16];
  519. W[16] += (rotr(W[17], 2) ^ rotr(W[17], 13) ^ rotr(W[17], 22)) + Ma(W[19], W[17], W[18]);
  520. W[8] += (rotr(W[9], 7) ^ rotr(W[9], 18) ^ (W[9] >> 3U)) + W[1] + (rotr(W[6], 17) ^ rotr(W[6], 19) ^ (W[6] >> 10U));
  521. W[23] += (rotr(W[20], 6) ^ rotr(W[20], 11) ^ rotr(W[20], 25)) + ch(W[20], W[21], W[22]) + K[56] + W[8];
  522. W[19] += W[23];
  523. W[23] += (rotr(W[16], 2) ^ rotr(W[16], 13) ^ rotr(W[16], 22)) + Ma(W[18], W[16], W[17]);
  524. W[9] += (rotr(W[10], 7) ^ rotr(W[10], 18) ^ (W[10] >> 3U)) + W[2] + (rotr(W[7], 17) ^ rotr(W[7], 19) ^ (W[7] >> 10U));
  525. W[22] += (rotr(W[19], 6) ^ rotr(W[19], 11) ^ rotr(W[19], 25)) + ch(W[19], W[20], W[21]) + K[57] + W[9];
  526. W[18] += W[22];
  527. W[10] += (rotr(W[11], 7) ^ rotr(W[11], 18) ^ (W[11] >> 3U)) + W[3] + (rotr(W[8], 17) ^ rotr(W[8], 19) ^ (W[8] >> 10U));
  528. W[21] += (rotr(W[18], 6) ^ rotr(W[18], 11) ^ rotr(W[18], 25)) + ch(W[18], W[19], W[20]) + K[58] + W[10];
  529. W[17] += W[21];
  530. W[11] += (rotr(W[12], 7) ^ rotr(W[12], 18) ^ (W[12] >> 3U)) + W[4] + (rotr(W[9], 17) ^ rotr(W[9], 19) ^ (W[9] >> 10U));
  531. W[20] += (rotr(W[17], 6) ^ rotr(W[17], 11) ^ rotr(W[17], 25)) + ch(W[17], W[18], W[19]) + K[59] + W[11];
  532. W[16] += W[20];
  533. W[12] += (rotr(W[13], 7) ^ rotr(W[13], 18) ^ (W[13] >> 3U)) + W[5] + (rotr(W[10], 17) ^ rotr(W[10], 19) ^ (W[10] >> 10U));
  534. W[23] += W[19] + (rotr(W[16], 6) ^ rotr(W[16], 11) ^ rotr(W[16], 25)) + ch(W[16], W[17], W[18]) + K[60] + W[12];
  535. #define FOUND (0x80)
  536. #define NFLAG (0x7F)
  537. #if defined(VECTORS4)
  538. W[23] ^= -0x5be0cd19U;
  539. bool result = W[23].x & W[23].y & W[23].z & W[23].w;
  540. if (!result) {
  541. if (!W[23].x)
  542. output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
  543. if (!W[23].y)
  544. output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
  545. if (!W[23].z)
  546. output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
  547. if (!W[23].w)
  548. output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
  549. }
  550. #elif defined(VECTORS2)
  551. W[23] ^= -0x5be0cd19U;
  552. bool result = W[23].x & W[23].y;
  553. if (!result) {
  554. if (!W[23].x)
  555. output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
  556. if (!W[23].y)
  557. output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
  558. }
  559. #else
  560. if (W[23] == -0x5be0cd19U)
  561. output[FOUND] = output[NFLAG & nonce] = nonce;
  562. #endif
  563. }