poclbm.cl 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. // This file is taken and modified from the public-domain poclbm project, and
  2. // we have therefore decided to keep it public-domain in Phoenix.
  3. #define VECTORS
  4. #ifdef VECTORS
  5. typedef uint4 u;
  6. #else
  7. typedef uint u;
  8. #endif
  9. __constant uint K[64] = {
  10. 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  11. 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  12. 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  13. 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  14. 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  15. 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  16. 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  17. 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  18. };
  19. #define BITALIGN
  20. #ifdef BITALIGN
  21. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  22. #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
  23. #else
  24. #define rotr(x, y) rotate((u)x, (u)(32-y))
  25. #endif
  26. // This part is not from the stock poclbm kernel. It's part of an optimization
  27. // added in the Phoenix Miner.
  28. // Some AMD devices have a BFI_INT opcode, which behaves exactly like the
  29. // SHA-256 Ch function, but provides it in exactly one instruction. If
  30. // detected, use it for Ch. Otherwise, construct Ch out of simpler logical
  31. // primitives.
  32. #define BFI_INT
  33. #ifdef BFI_INT
  34. // Well, slight problem... It turns out BFI_INT isn't actually exposed to
  35. // OpenCL (or CAL IL for that matter) in any way. However, there is
  36. // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
  37. // amd_bytealign, takes the same inputs, and provides the same output.
  38. // We can use that as a placeholder for BFI_INT and have the application
  39. // patch it after compilation.
  40. // This is the BFI_INT function
  41. #define Ch(x, y, z) amd_bytealign(x, y, z)
  42. // Ma can also be implemented in terms of BFI_INT...
  43. #define Ma(x, y, z) amd_bytealign((y), (x | z), (z & x))
  44. #else
  45. #define Ch(x, y, z) (z ^ (x & (y ^ z)))
  46. #define Ma(x, y, z) ((x & z) | (y & (x | z)))
  47. #endif
  48. // AMD's KernelAnalyzer throws errors compiling the kernel if we use
  49. // amd_bytealign on constants with vectors enabled, so we use this to avoid
  50. // problems. (this is used 4 times, and likely optimized out by the compiler.)
  51. #define Ma2(x, y, z) ((y & z) | (x & (y | z)))
  52. __kernel void search( const uint state0, const uint state1, const uint state2, const uint state3,
  53. const uint state4, const uint state5, const uint state6, const uint state7,
  54. const uint B1, const uint C1, const uint D1,
  55. const uint F1, const uint G1, const uint H1,
  56. const uint base,
  57. const uint fW0, const uint fW1, const uint fW2, const uint fW3, const uint fW15, const uint fW01r, const uint fcty_e, const uint fcty_e2,
  58. __global uint * output)
  59. {
  60. u W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15;
  61. u A,B,C,D,E,F,G,H;
  62. u nonce;
  63. uint it;
  64. #ifdef VECTORS
  65. nonce = ((base >> 2) + (get_global_id(0))<<2) + (uint4)(0, 1, 2, 3);
  66. #else
  67. nonce = base + get_global_id(0);
  68. #endif
  69. W3 = nonce + fW3;
  70. E = fcty_e + nonce; A = state0 + E; E = E + fcty_e2;
  71. D = D1 + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B1, C1) + K[ 4] + 0x80000000; H = H1 + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma2(G1, E, F1);
  72. C = C1 + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B1) + K[ 5]; G = G1 + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma2(F1, D, E);
  73. B = B1 + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[ 6]; F = F1 + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  74. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[ 7]; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  75. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[ 8]; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  76. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  77. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  78. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  79. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  80. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  81. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  82. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[15] + 0x00000280U; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  83. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[16] + fW0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  84. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[17] + fW1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  85. W2 = (rotr(nonce, 7) ^ rotr(nonce, 18) ^ (nonce >> 3U)) + fW2;
  86. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[18] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  87. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[19] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  88. W4 = (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10U)) + 0x80000000;
  89. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[20] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  90. W5 = (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10U));
  91. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[21] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  92. W6 = (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10U)) + 0x00000280U;
  93. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[22] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  94. W7 = (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10U)) + fW0;
  95. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[23] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  96. W8 = (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10U)) + fW1;
  97. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[24] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  98. W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10U));
  99. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[25] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  100. W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10U));
  101. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  102. W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10U));
  103. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  104. W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10U));
  105. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  106. W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10U));
  107. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  108. W14 = 0x00a00055U + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10U));
  109. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  110. W15 = fW15 + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10U));
  111. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  112. W0 = fW01r + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10U));
  113. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[32] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  114. W1 = fW1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3U)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10U));
  115. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[33] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  116. W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3U)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10U));
  117. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[34] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  118. W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3U)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10U));
  119. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[35] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  120. W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3U)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10U));
  121. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[36] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  122. W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3U)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10U));
  123. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[37] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  124. W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3U)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10U));
  125. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[38] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  126. W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3U)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10U));
  127. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[39] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  128. W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3U)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10U));
  129. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[40] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  130. W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3U)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10U));
  131. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[41] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  132. W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3U)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10U));
  133. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  134. W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3U)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10U));
  135. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  136. W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3U)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10U));
  137. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  138. W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3U)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10U));
  139. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  140. W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3U)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10U));
  141. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  142. W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3U)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10U));
  143. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  144. W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3U)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10U));
  145. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[48] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  146. W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3U)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10U));
  147. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[49] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  148. W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3U)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10U));
  149. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[50] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  150. W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3U)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10U));
  151. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[51] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  152. W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3U)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10U));
  153. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[52] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  154. W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3U)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10U));
  155. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[53] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  156. W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3U)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10U));
  157. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[54] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  158. W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3U)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10U));
  159. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[55] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  160. W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3U)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10U));
  161. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[56] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  162. W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3U)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10U));
  163. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[57] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  164. W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3U)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10U));
  165. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[58] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  166. W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3U)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10U));
  167. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[59] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  168. W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3U)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10U));
  169. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[60] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  170. W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3U)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10U));
  171. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[61] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  172. W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3U)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10U));
  173. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[62] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  174. W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3U)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10U));
  175. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[63] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  176. W0 = A + state0; W1 = B + state1;
  177. W2 = C + state2; W3 = D + state3;
  178. W4 = E + state4; W5 = F + state5;
  179. W6 = G + state6; W7 = H + state7;
  180. H = 0xb0edbdd0 + K[ 0] + W0; D = 0xa54ff53a + H; H = H + 0x08909ae5U;
  181. G = 0x1f83d9abU + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (0x9b05688cU ^ (D & 0xca0b3af3U)) + K[ 1] + W1; C = 0x3c6ef372U + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma2(0xbb67ae85U, H, 0x6a09e667U);
  182. F = 0x9b05688cU + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, 0x510e527fU) + K[ 2] + W2; B = 0xbb67ae85U + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma2(0x6a09e667U, G, H);
  183. E = 0x510e527fU + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[ 3] + W3; A = 0x6a09e667U + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  184. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[ 4] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  185. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[ 5] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  186. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[ 6] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  187. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[ 7] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  188. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[ 8] + 0x80000000; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  189. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  190. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  191. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  192. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  193. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  194. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  195. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[15] + 0x00000100U; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  196. W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3U));
  197. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[16] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  198. W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3U)) + 0x00a00000U;
  199. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[17] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  200. W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3U)) + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10U));
  201. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[18] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  202. W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3U)) + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10U));
  203. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[19] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  204. W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3U)) + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10U));
  205. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[20] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  206. W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3U)) + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10U));
  207. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[21] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  208. W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3U)) + 0x00000100U + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10U));
  209. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[22] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  210. W7 = W7 + 0x11002000U + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10U));
  211. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[23] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  212. W8 = 0x80000000 + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10U));
  213. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[24] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  214. W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10U));
  215. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[25] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  216. W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10U));
  217. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  218. W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10U));
  219. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  220. W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10U));
  221. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  222. W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10U));
  223. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  224. W14 = 0x00400022U + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10U));
  225. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  226. W15 = 0x00000100U + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3U)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10U));
  227. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  228. W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3U)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10U));
  229. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[32] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  230. W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3U)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10U));
  231. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[33] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  232. W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3U)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10U));
  233. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[34] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  234. W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3U)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10U));
  235. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[35] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  236. W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3U)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10U));
  237. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[36] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  238. W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3U)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10U));
  239. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[37] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  240. W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3U)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10U));
  241. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[38] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  242. W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3U)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10U));
  243. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[39] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  244. W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3U)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10U));
  245. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[40] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  246. W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3U)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10U));
  247. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[41] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  248. W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3U)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10U));
  249. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  250. W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3U)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10U));
  251. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  252. W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3U)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10U));
  253. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  254. W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3U)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10U));
  255. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  256. W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3U)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10U));
  257. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  258. W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3U)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10U));
  259. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  260. W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3U)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10U));
  261. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[48] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  262. W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3U)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10U));
  263. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[49] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + Ma(B, H, A);
  264. W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3U)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10U));
  265. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[50] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + Ma(A, G, H);
  266. W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3U)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10U));
  267. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[51] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + Ma(H, F, G);
  268. W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3U)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10U));
  269. D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[52] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + Ma(G, E, F);
  270. W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3U)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10U));
  271. C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + Ch(H, A, B) + K[53] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + Ma(F, D, E);
  272. W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3U)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10U));
  273. B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + Ch(G, H, A) + K[54] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + Ma(E, C, D);
  274. W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3U)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10U));
  275. A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + Ch(F, G, H) + K[55] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + Ma(D, B, C);
  276. W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3U)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10U));
  277. H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + Ch(E, F, G) + K[56] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + Ma(C, A, B);
  278. W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3U)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10U));
  279. G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + Ch(D, E, F) + K[57] + W9; C = C + G;
  280. W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3U)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10U));
  281. F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + Ch(C, D, E) + K[58] + W10; B = B + F;
  282. W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3U)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10U));
  283. E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + Ch(B, C, D) + K[59] + W11; A = A + E;
  284. W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3U)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10U));
  285. H = H + D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + Ch(A, B, C) + K[60] + W12;
  286. H+=0x5be0cd19U;
  287. #ifdef VECTORS
  288. if (H.x == 0)
  289. {
  290. for (it = 0; it != 127; it++) {
  291. if (!output[it]) {
  292. output[it] = nonce.x;
  293. output[127] = 1;
  294. break;
  295. }
  296. }
  297. }
  298. if (H.y == 0)
  299. {
  300. for (it = 0; it != 127; it++) {
  301. if (!output[it]) {
  302. output[it] = nonce.y;
  303. output[127] = 1;
  304. break;
  305. }
  306. }
  307. }
  308. if (H.z == 0)
  309. {
  310. for (it = 0; it != 127; it++) {
  311. if (!output[it]) {
  312. output[it] = nonce.z;
  313. output[127] = 1;
  314. break;
  315. }
  316. }
  317. }
  318. if (H.w == 0)
  319. {
  320. for (it = 0; it != 127; it++) {
  321. if (!output[it]) {
  322. output[it] = nonce.w;
  323. output[127] = 1;
  324. break;
  325. }
  326. }
  327. }
  328. #else
  329. if (H == 0)
  330. {
  331. for (it = 0; it != 127; it++) {
  332. if (!output[it]) {
  333. output[it] = nonce;
  334. output[127] = 1;
  335. break;
  336. }
  337. }
  338. }
  339. #endif
  340. }