poclbm120213.cl 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294
  1. // -ck modified kernel taken from Phoenix taken from poclbm, with aspects of
  2. // phatk and others.
  3. // Modified version copyright 2011-2012 Con Kolivas
  4. // This file is taken and modified from the public-domain poclbm project, and
  5. // we have therefore decided to keep it public-domain in Phoenix.
  6. #ifdef VECTORS4
  7. typedef uint4 u;
  8. #elif defined VECTORS2
  9. typedef uint2 u;
  10. #else
  11. typedef uint u;
  12. #endif
  13. __constant uint K[64] = {
  14. 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  15. 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  16. 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  17. 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  18. 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  19. 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  20. 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  21. 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  22. };
  23. // This part is not from the stock poclbm kernel. It's part of an optimization
  24. // added in the Phoenix Miner.
  25. // Some AMD devices have a BFI_INT opcode, which behaves exactly like the
  26. // SHA-256 ch function, but provides it in exactly one instruction. If
  27. // detected, use it for ch. Otherwise, construct ch out of simpler logical
  28. // primitives.
  29. #ifdef BITALIGN
  30. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  31. #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
  32. #ifdef BFI_INT
  33. // Well, slight problem... It turns out BFI_INT isn't actually exposed to
  34. // OpenCL (or CAL IL for that matter) in any way. However, there is
  35. // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
  36. // amd_bytealign, takes the same inputs, and provides the same output.
  37. // We can use that as a placeholder for BFI_INT and have the application
  38. // patch it after compilation.
  39. // This is the BFI_INT function
  40. #define ch(x, y, z) amd_bytealign(x, y, z)
  41. // Ma can also be implemented in terms of BFI_INT...
  42. #define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
  43. #else // BFI_INT
  44. // Later SDKs optimise this to BFI INT without patching and GCN
  45. // actually fails if manually patched with BFI_INT
  46. #define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
  47. #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
  48. #endif
  49. #else // BITALIGN
  50. #define ch(x, y, z) (z ^ (x & (y ^ z)))
  51. #define Ma(x, y, z) ((x & z) | (y & (x | z)))
  52. #define rotr(x, y) rotate((u)x, (u)(32 - y))
  53. #endif
  54. // AMD's KernelAnalyzer throws errors compiling the kernel if we use
  55. // amd_bytealign on constants with vectors enabled, so we use this to avoid
  56. // problems. (this is used 4 times, and likely optimized out by the compiler.)
  57. #define Ma2(x, y, z) ((y & z) | (x & (y | z)))
  58. __kernel void search(const uint state0, const uint state1, const uint state2, const uint state3,
  59. const uint state4, const uint state5, const uint state6, const uint state7,
  60. const uint b1, const uint c1, const uint d1,
  61. const uint f1, const uint g1, const uint h1,
  62. const u base,
  63. const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r, const uint fcty_e, const uint fcty_e2,
  64. __global uint * output)
  65. {
  66. u W[24];
  67. u *Vals = &W[16]; // Now put at W[16] to be in same array
  68. #ifdef VECTORS4
  69. const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
  70. #elif defined VECTORS2
  71. const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
  72. #else
  73. const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
  74. #endif
  75. Vals[4]=fcty_e;
  76. Vals[4]+=nonce;
  77. Vals[0]=Vals[4];
  78. Vals[0]+=state0;
  79. Vals[3]=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  80. Vals[3]+=d1;
  81. Vals[3]+=ch(Vals[0],b1,c1);
  82. Vals[3]+=0xB956C25B;
  83. Vals[7]=Vals[3];
  84. Vals[7]+=h1;
  85. Vals[4]+=fcty_e2;
  86. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  87. Vals[2]=c1;
  88. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  89. Vals[2]+=ch(Vals[7],Vals[0],b1);
  90. Vals[2]+=K[5];
  91. Vals[6]=Vals[2];
  92. Vals[6]+=g1;
  93. Vals[3]+=Ma2(g1,Vals[4],f1);
  94. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  95. Vals[1]=b1;
  96. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  97. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  98. Vals[1]+=K[6];
  99. Vals[5]=Vals[1];
  100. Vals[5]+=f1;
  101. Vals[2]+=Ma2(f1,Vals[3],Vals[4]);
  102. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  103. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  104. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  105. Vals[0]+=K[7];
  106. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  107. Vals[4]+=Vals[0];
  108. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  109. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  110. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  111. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  112. Vals[7]+=K[8];
  113. Vals[3]+=Vals[7];
  114. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  115. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  116. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  117. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  118. Vals[6]+=K[9];
  119. Vals[2]+=Vals[6];
  120. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  121. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  122. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  123. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  124. Vals[5]+=K[10];
  125. Vals[1]+=Vals[5];
  126. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  127. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  128. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  129. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  130. Vals[4]+=K[11];
  131. Vals[0]+=Vals[4];
  132. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  133. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  134. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  135. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  136. Vals[3]+=K[12];
  137. Vals[7]+=Vals[3];
  138. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  139. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  140. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  141. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  142. Vals[2]+=K[13];
  143. Vals[6]+=Vals[2];
  144. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  145. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  146. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  147. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  148. Vals[1]+=K[14];
  149. Vals[5]+=Vals[1];
  150. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  151. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  152. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  153. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  154. Vals[0]+=0xC19BF3F4;
  155. Vals[4]+=Vals[0];
  156. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  157. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  158. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  159. Vals[7]+=K[16];
  160. Vals[7]+=fw0;
  161. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  162. Vals[3]+=Vals[7];
  163. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  164. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  165. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  166. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  167. Vals[6]+=K[17];
  168. Vals[6]+=fw1;
  169. Vals[2]+=Vals[6];
  170. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  171. W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U));
  172. W[2]+=fw2;
  173. Vals[5]+=W[2];
  174. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  175. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  176. Vals[5]+=K[18];
  177. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  178. Vals[1]+=Vals[5];
  179. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  180. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  181. W[3]=nonce;
  182. W[3]+=fw3;
  183. Vals[4]+=W[3];
  184. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  185. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  186. Vals[4]+=K[19];
  187. Vals[0]+=Vals[4];
  188. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  189. W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  190. W[4]+=0x80000000;
  191. Vals[3]+=W[4];
  192. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  193. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  194. Vals[3]+=K[20];
  195. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  196. Vals[7]+=Vals[3];
  197. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  198. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  199. W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  200. Vals[2]+=W[5];
  201. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  202. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  203. Vals[2]+=K[21];
  204. Vals[6]+=Vals[2];
  205. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  206. W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  207. W[6]+=0x00000280U;
  208. Vals[1]+=W[6];
  209. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  210. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  211. Vals[1]+=K[22];
  212. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  213. Vals[5]+=Vals[1];
  214. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  215. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  216. W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  217. W[7]+=fw0;
  218. Vals[0]+=W[7];
  219. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  220. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  221. Vals[0]+=K[23];
  222. Vals[4]+=Vals[0];
  223. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  224. W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  225. W[8]+=fw1;
  226. Vals[7]+=W[8];
  227. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  228. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  229. Vals[7]+=K[24];
  230. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  231. Vals[3]+=Vals[7];
  232. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  233. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  234. W[9]=W[2];
  235. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  236. Vals[6]+=W[9];
  237. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  238. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  239. Vals[6]+=K[25];
  240. Vals[2]+=Vals[6];
  241. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  242. W[10]=W[3];
  243. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  244. Vals[5]+=W[10];
  245. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  246. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  247. Vals[5]+=K[26];
  248. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  249. Vals[1]+=Vals[5];
  250. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  251. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  252. W[11]=W[4];
  253. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  254. Vals[4]+=W[11];
  255. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  256. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  257. Vals[4]+=K[27];
  258. Vals[0]+=Vals[4];
  259. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  260. W[12]=W[5];
  261. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  262. Vals[3]+=W[12];
  263. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  264. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  265. Vals[3]+=K[28];
  266. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  267. Vals[7]+=Vals[3];
  268. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  269. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  270. W[13]=W[6];
  271. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  272. Vals[2]+=W[13];
  273. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  274. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  275. Vals[2]+=K[29];
  276. Vals[6]+=Vals[2];
  277. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  278. W[14]=0x00a00055U;
  279. W[14]+=W[7];
  280. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  281. Vals[1]+=W[14];
  282. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  283. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  284. Vals[1]+=K[30];
  285. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  286. Vals[5]+=Vals[1];
  287. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  288. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  289. W[15]=fw15;
  290. W[15]+=W[8];
  291. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  292. Vals[0]+=W[15];
  293. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  294. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  295. Vals[0]+=K[31];
  296. Vals[4]+=Vals[0];
  297. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  298. W[0]=fw01r;
  299. W[0]+=W[9];
  300. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  301. Vals[7]+=W[0];
  302. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  303. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  304. Vals[7]+=K[32];
  305. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  306. Vals[3]+=Vals[7];
  307. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  308. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  309. W[1]=fw1;
  310. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  311. W[1]+=W[10];
  312. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  313. Vals[6]+=W[1];
  314. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  315. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  316. Vals[6]+=K[33];
  317. Vals[2]+=Vals[6];
  318. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  319. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  320. W[2]+=W[11];
  321. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  322. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  323. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  324. Vals[5]+=K[34];
  325. Vals[5]+=W[2];
  326. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  327. Vals[1]+=Vals[5];
  328. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  329. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  330. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  331. W[3]+=W[12];
  332. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  333. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  334. Vals[4]+=K[35];
  335. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  336. Vals[4]+=W[3];
  337. Vals[0]+=Vals[4];
  338. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  339. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  340. W[4]+=W[13];
  341. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  342. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  343. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  344. Vals[3]+=K[36];
  345. Vals[3]+=W[4];
  346. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  347. Vals[7]+=Vals[3];
  348. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  349. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  350. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  351. W[5]+=W[14];
  352. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  353. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  354. Vals[2]+=K[37];
  355. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  356. Vals[2]+=W[5];
  357. Vals[6]+=Vals[2];
  358. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  359. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  360. W[6]+=W[15];
  361. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  362. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  363. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  364. Vals[1]+=K[38];
  365. Vals[1]+=W[6];
  366. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  367. Vals[5]+=Vals[1];
  368. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  369. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  370. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  371. W[7]+=W[0];
  372. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  373. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  374. Vals[0]+=K[39];
  375. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  376. Vals[0]+=W[7];
  377. Vals[4]+=Vals[0];
  378. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  379. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  380. W[8]+=W[1];
  381. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  382. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  383. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  384. Vals[7]+=K[40];
  385. Vals[7]+=W[8];
  386. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  387. Vals[3]+=Vals[7];
  388. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  389. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  390. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  391. W[9]+=W[2];
  392. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  393. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  394. Vals[6]+=K[41];
  395. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  396. Vals[6]+=W[9];
  397. Vals[2]+=Vals[6];
  398. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  399. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  400. W[10]+=W[3];
  401. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  402. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  403. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  404. Vals[5]+=K[42];
  405. Vals[5]+=W[10];
  406. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  407. Vals[1]+=Vals[5];
  408. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  409. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  410. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  411. W[11]+=W[4];
  412. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  413. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  414. Vals[4]+=K[43];
  415. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  416. Vals[4]+=W[11];
  417. Vals[0]+=Vals[4];
  418. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  419. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  420. W[12]+=W[5];
  421. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  422. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  423. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  424. Vals[3]+=K[44];
  425. Vals[3]+=W[12];
  426. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  427. Vals[7]+=Vals[3];
  428. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  429. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  430. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  431. W[13]+=W[6];
  432. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  433. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  434. Vals[2]+=K[45];
  435. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  436. Vals[2]+=W[13];
  437. Vals[6]+=Vals[2];
  438. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  439. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  440. W[14]+=W[7];
  441. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  442. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  443. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  444. Vals[1]+=K[46];
  445. Vals[1]+=W[14];
  446. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  447. Vals[5]+=Vals[1];
  448. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  449. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  450. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  451. W[15]+=W[8];
  452. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  453. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  454. Vals[0]+=K[47];
  455. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  456. Vals[0]+=W[15];
  457. Vals[4]+=Vals[0];
  458. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  459. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  460. W[0]+=W[9];
  461. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  462. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  463. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  464. Vals[7]+=K[48];
  465. Vals[7]+=W[0];
  466. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  467. Vals[3]+=Vals[7];
  468. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  469. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  470. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  471. W[1]+=W[10];
  472. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  473. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  474. Vals[6]+=K[49];
  475. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  476. Vals[6]+=W[1];
  477. Vals[2]+=Vals[6];
  478. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  479. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  480. W[2]+=W[11];
  481. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  482. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  483. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  484. Vals[5]+=K[50];
  485. Vals[5]+=W[2];
  486. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  487. Vals[1]+=Vals[5];
  488. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  489. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  490. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  491. W[3]+=W[12];
  492. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  493. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  494. Vals[4]+=K[51];
  495. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  496. Vals[4]+=W[3];
  497. Vals[0]+=Vals[4];
  498. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  499. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  500. W[4]+=W[13];
  501. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  502. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  503. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  504. Vals[3]+=K[52];
  505. Vals[3]+=W[4];
  506. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  507. Vals[7]+=Vals[3];
  508. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  509. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  510. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  511. W[5]+=W[14];
  512. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  513. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  514. Vals[2]+=K[53];
  515. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  516. Vals[2]+=W[5];
  517. Vals[6]+=Vals[2];
  518. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  519. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  520. W[6]+=W[15];
  521. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  522. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  523. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  524. Vals[1]+=K[54];
  525. Vals[1]+=W[6];
  526. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  527. Vals[5]+=Vals[1];
  528. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  529. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  530. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  531. W[7]+=W[0];
  532. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  533. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  534. Vals[0]+=K[55];
  535. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  536. Vals[0]+=W[7];
  537. Vals[4]+=Vals[0];
  538. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  539. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  540. W[8]+=W[1];
  541. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  542. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  543. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  544. Vals[7]+=K[56];
  545. Vals[7]+=W[8];
  546. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  547. Vals[3]+=Vals[7];
  548. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  549. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  550. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  551. W[9]+=W[2];
  552. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  553. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  554. Vals[6]+=K[57];
  555. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  556. Vals[6]+=W[9];
  557. Vals[2]+=Vals[6];
  558. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  559. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  560. W[10]+=W[3];
  561. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  562. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  563. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  564. Vals[5]+=K[58];
  565. Vals[5]+=W[10];
  566. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  567. Vals[1]+=Vals[5];
  568. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  569. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  570. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  571. W[11]+=W[4];
  572. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  573. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  574. Vals[4]+=K[59];
  575. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  576. Vals[4]+=W[11];
  577. Vals[0]+=Vals[4];
  578. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  579. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  580. W[12]+=W[5];
  581. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  582. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  583. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  584. Vals[3]+=K[60];
  585. Vals[3]+=W[12];
  586. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  587. Vals[7]+=Vals[3];
  588. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  589. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  590. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  591. W[13]+=W[6];
  592. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  593. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  594. Vals[2]+=K[61];
  595. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  596. Vals[2]+=W[13];
  597. Vals[6]+=Vals[2];
  598. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  599. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  600. W[14]+=W[7];
  601. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  602. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  603. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  604. Vals[1]+=K[62];
  605. Vals[1]+=W[14];
  606. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  607. Vals[5]+=Vals[1];
  608. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  609. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  610. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  611. W[15]+=W[8];
  612. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  613. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  614. Vals[0]+=K[63];
  615. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  616. Vals[0]+=W[15];
  617. Vals[4]+=Vals[0];
  618. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  619. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  620. W[0]=Vals[0];
  621. W[7]=state7;
  622. W[7]+=Vals[7];
  623. Vals[7]=0xF377ED68;
  624. W[0]+=state0;
  625. Vals[7]+=W[0];
  626. W[3]=state3;
  627. W[3]+=Vals[3];
  628. Vals[3]=0xa54ff53a;
  629. Vals[3]+=Vals[7];
  630. W[1]=Vals[1];
  631. W[1]+=state1;
  632. W[6]=state6;
  633. W[6]+=Vals[6];
  634. Vals[6]=0x90BB1E3C;
  635. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  636. Vals[6]+=(0x9b05688cU^(Vals[3]&0xca0b3af3U));
  637. W[2]=state2;
  638. W[2]+=Vals[2];
  639. Vals[2]=0x3c6ef372U;
  640. Vals[6]+=W[1];
  641. Vals[2]+=Vals[6];
  642. Vals[7]+=0x08909ae5U;
  643. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  644. W[5]=state5;
  645. W[5]+=Vals[5];
  646. Vals[5]=0x150C6645B;
  647. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  648. Vals[5]+=ch(Vals[2],Vals[3],0x510e527fU);
  649. Vals[5]+=W[2];
  650. Vals[1]=0xbb67ae85U;
  651. Vals[1]+=Vals[5];
  652. Vals[6]+=Ma2(0xbb67ae85U,Vals[7],0x6a09e667U);
  653. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  654. W[4]=state4;
  655. W[4]+=Vals[4];
  656. Vals[4]=0x13AC42E24;
  657. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  658. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  659. Vals[4]+=W[3];
  660. Vals[0]=Vals[4];
  661. Vals[0]+=0x6a09e667U;
  662. Vals[5]+=Ma2(0x6a09e667U,Vals[6],Vals[7]);
  663. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  664. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  665. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  666. Vals[3]+=K[4];
  667. Vals[3]+=W[4];
  668. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  669. Vals[7]+=Vals[3];
  670. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  671. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  672. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  673. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  674. Vals[2]+=K[5];
  675. Vals[2]+=W[5];
  676. Vals[6]+=Vals[2];
  677. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  678. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  679. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  680. Vals[1]+=K[6];
  681. Vals[1]+=W[6];
  682. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  683. Vals[5]+=Vals[1];
  684. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  685. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  686. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  687. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  688. Vals[0]+=K[7];
  689. Vals[0]+=W[7];
  690. Vals[4]+=Vals[0];
  691. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  692. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  693. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  694. Vals[7]+=0x15807AA98;
  695. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  696. Vals[3]+=Vals[7];
  697. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  698. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  699. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  700. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  701. Vals[6]+=K[9];
  702. Vals[2]+=Vals[6];
  703. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  704. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  705. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  706. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  707. Vals[5]+=K[10];
  708. Vals[1]+=Vals[5];
  709. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  710. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  711. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  712. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  713. Vals[4]+=K[11];
  714. Vals[0]+=Vals[4];
  715. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  716. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  717. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  718. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  719. Vals[3]+=K[12];
  720. Vals[7]+=Vals[3];
  721. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  722. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  723. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  724. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  725. Vals[2]+=K[13];
  726. Vals[6]+=Vals[2];
  727. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  728. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  729. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  730. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  731. Vals[1]+=K[14];
  732. Vals[5]+=Vals[1];
  733. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  734. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  735. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  736. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  737. Vals[0]+=0xC19BF274;
  738. Vals[4]+=Vals[0];
  739. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  740. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  741. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  742. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  743. Vals[7]+=K[16];
  744. Vals[7]+=W[0];
  745. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  746. Vals[3]+=Vals[7];
  747. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  748. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  749. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  750. W[1]+=0x00a00000U;
  751. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  752. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  753. Vals[6]+=K[17];
  754. Vals[6]+=W[1];
  755. Vals[2]+=Vals[6];
  756. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  757. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  758. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  759. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  760. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  761. Vals[5]+=K[18];
  762. Vals[5]+=W[2];
  763. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  764. Vals[1]+=Vals[5];
  765. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  766. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  767. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  768. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  769. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  770. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  771. Vals[4]+=K[19];
  772. Vals[4]+=W[3];
  773. Vals[0]+=Vals[4];
  774. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  775. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  776. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  777. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  778. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  779. Vals[3]+=K[20];
  780. Vals[3]+=W[4];
  781. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  782. Vals[7]+=Vals[3];
  783. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  784. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  785. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  786. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  787. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  788. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  789. Vals[2]+=K[21];
  790. Vals[2]+=W[5];
  791. Vals[6]+=Vals[2];
  792. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  793. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  794. W[6]+=0x00000100U;
  795. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  796. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  797. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  798. Vals[1]+=K[22];
  799. Vals[1]+=W[6];
  800. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  801. Vals[5]+=Vals[1];
  802. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  803. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  804. W[7]+=0x11002000U;
  805. W[7]+=W[0];
  806. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  807. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  808. Vals[0]+=K[23];
  809. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  810. Vals[0]+=W[7];
  811. Vals[4]+=Vals[0];
  812. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  813. W[8]=0x80000000;
  814. W[8]+=W[1];
  815. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  816. Vals[7]+=W[8];
  817. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  818. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  819. Vals[7]+=K[24];
  820. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  821. Vals[3]+=Vals[7];
  822. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  823. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  824. W[9]=W[2];
  825. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  826. Vals[6]+=W[9];
  827. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  828. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  829. Vals[6]+=K[25];
  830. Vals[2]+=Vals[6];
  831. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  832. W[10]=W[3];
  833. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  834. Vals[5]+=W[10];
  835. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  836. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  837. Vals[5]+=K[26];
  838. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  839. Vals[1]+=Vals[5];
  840. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  841. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  842. W[11]=W[4];
  843. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  844. Vals[4]+=W[11];
  845. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  846. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  847. Vals[4]+=K[27];
  848. Vals[0]+=Vals[4];
  849. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  850. W[12]=W[5];
  851. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  852. Vals[3]+=W[12];
  853. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  854. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  855. Vals[3]+=K[28];
  856. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  857. Vals[7]+=Vals[3];
  858. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  859. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  860. W[13]=W[6];
  861. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  862. Vals[2]+=W[13];
  863. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  864. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  865. Vals[2]+=K[29];
  866. Vals[6]+=Vals[2];
  867. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  868. W[14]=0x00400022U;
  869. W[14]+=W[7];
  870. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  871. Vals[1]+=W[14];
  872. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  873. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  874. Vals[1]+=K[30];
  875. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  876. Vals[5]+=Vals[1];
  877. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  878. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  879. W[15]=0x00000100U;
  880. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  881. W[15]+=W[8];
  882. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  883. Vals[0]+=W[15];
  884. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  885. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  886. Vals[0]+=K[31];
  887. Vals[4]+=Vals[0];
  888. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  889. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  890. W[0]+=W[9];
  891. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  892. Vals[7]+=W[0];
  893. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  894. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  895. Vals[7]+=K[32];
  896. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  897. Vals[3]+=Vals[7];
  898. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  899. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  900. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  901. W[1]+=W[10];
  902. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  903. Vals[6]+=W[1];
  904. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  905. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  906. Vals[6]+=K[33];
  907. Vals[2]+=Vals[6];
  908. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  909. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  910. W[2]+=W[11];
  911. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  912. Vals[5]+=W[2];
  913. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  914. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  915. Vals[5]+=K[34];
  916. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  917. Vals[1]+=Vals[5];
  918. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  919. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  920. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  921. W[3]+=W[12];
  922. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  923. Vals[4]+=W[3];
  924. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  925. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  926. Vals[4]+=K[35];
  927. Vals[0]+=Vals[4];
  928. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  929. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  930. W[4]+=W[13];
  931. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  932. Vals[3]+=W[4];
  933. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  934. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  935. Vals[3]+=K[36];
  936. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  937. Vals[7]+=Vals[3];
  938. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  939. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  940. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  941. W[5]+=W[14];
  942. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  943. Vals[2]+=W[5];
  944. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  945. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  946. Vals[2]+=K[37];
  947. Vals[6]+=Vals[2];
  948. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  949. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  950. W[6]+=W[15];
  951. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  952. Vals[1]+=W[6];
  953. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  954. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  955. Vals[1]+=K[38];
  956. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  957. Vals[5]+=Vals[1];
  958. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  959. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  960. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  961. W[7]+=W[0];
  962. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  963. Vals[0]+=W[7];
  964. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  965. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  966. Vals[0]+=K[39];
  967. Vals[4]+=Vals[0];
  968. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  969. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  970. W[8]+=W[1];
  971. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  972. Vals[7]+=W[8];
  973. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  974. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  975. Vals[7]+=K[40];
  976. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  977. Vals[3]+=Vals[7];
  978. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  979. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  980. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  981. W[9]+=W[2];
  982. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  983. Vals[6]+=W[9];
  984. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  985. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  986. Vals[6]+=K[41];
  987. Vals[2]+=Vals[6];
  988. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  989. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  990. W[10]+=W[3];
  991. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  992. Vals[5]+=W[10];
  993. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  994. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  995. Vals[5]+=K[42];
  996. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  997. Vals[1]+=Vals[5];
  998. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  999. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  1000. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  1001. W[11]+=W[4];
  1002. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  1003. Vals[4]+=W[11];
  1004. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1005. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1006. Vals[4]+=K[43];
  1007. Vals[0]+=Vals[4];
  1008. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1009. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  1010. W[12]+=W[5];
  1011. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  1012. Vals[3]+=W[12];
  1013. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1014. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  1015. Vals[3]+=K[44];
  1016. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  1017. Vals[7]+=Vals[3];
  1018. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  1019. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  1020. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  1021. W[13]+=W[6];
  1022. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  1023. Vals[2]+=W[13];
  1024. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  1025. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  1026. Vals[2]+=K[45];
  1027. Vals[6]+=Vals[2];
  1028. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  1029. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  1030. W[14]+=W[7];
  1031. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  1032. Vals[1]+=W[14];
  1033. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  1034. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  1035. Vals[1]+=K[46];
  1036. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  1037. Vals[5]+=Vals[1];
  1038. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  1039. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  1040. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  1041. W[15]+=W[8];
  1042. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  1043. Vals[0]+=W[15];
  1044. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  1045. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  1046. Vals[0]+=K[47];
  1047. Vals[4]+=Vals[0];
  1048. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  1049. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  1050. W[0]+=W[9];
  1051. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  1052. Vals[7]+=W[0];
  1053. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1054. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  1055. Vals[7]+=K[48];
  1056. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  1057. Vals[3]+=Vals[7];
  1058. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  1059. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  1060. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  1061. W[1]+=W[10];
  1062. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  1063. Vals[6]+=W[1];
  1064. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1065. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  1066. Vals[6]+=K[49];
  1067. Vals[2]+=Vals[6];
  1068. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  1069. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  1070. W[2]+=W[11];
  1071. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  1072. Vals[5]+=W[2];
  1073. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  1074. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  1075. Vals[5]+=K[50];
  1076. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  1077. Vals[1]+=Vals[5];
  1078. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  1079. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  1080. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  1081. W[3]+=W[12];
  1082. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  1083. Vals[4]+=W[3];
  1084. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1085. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1086. Vals[4]+=K[51];
  1087. Vals[0]+=Vals[4];
  1088. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1089. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  1090. W[4]+=W[13];
  1091. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  1092. Vals[3]+=W[4];
  1093. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1094. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  1095. Vals[3]+=K[52];
  1096. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  1097. Vals[7]+=Vals[3];
  1098. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  1099. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  1100. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  1101. W[5]+=W[14];
  1102. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  1103. Vals[2]+=W[5];
  1104. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  1105. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  1106. Vals[2]+=K[53];
  1107. Vals[6]+=Vals[2];
  1108. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  1109. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  1110. W[6]+=W[15];
  1111. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  1112. Vals[1]+=W[6];
  1113. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  1114. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  1115. Vals[1]+=K[54];
  1116. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  1117. Vals[5]+=Vals[1];
  1118. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  1119. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  1120. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  1121. W[7]+=W[0];
  1122. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  1123. Vals[0]+=W[7];
  1124. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  1125. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  1126. Vals[0]+=K[55];
  1127. Vals[4]+=Vals[0];
  1128. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  1129. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  1130. W[8]+=W[1];
  1131. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  1132. Vals[7]+=W[8];
  1133. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1134. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  1135. Vals[7]+=K[56];
  1136. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  1137. Vals[3]+=Vals[7];
  1138. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  1139. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  1140. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  1141. W[9]+=W[2];
  1142. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  1143. Vals[6]+=W[9];
  1144. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1145. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  1146. Vals[6]+=K[57];
  1147. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  1148. W[10]+=W[3];
  1149. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  1150. Vals[5]+=W[10];
  1151. Vals[2]+=Vals[6];
  1152. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  1153. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  1154. Vals[5]+=K[58];
  1155. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  1156. W[11]+=W[4];
  1157. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  1158. Vals[4]+=W[11];
  1159. Vals[1]+=Vals[5];
  1160. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1161. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1162. Vals[4]+=K[59];
  1163. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  1164. W[12]+=W[5];
  1165. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  1166. Vals[7]+=W[12];
  1167. Vals[0]+=Vals[4];
  1168. Vals[7]+=Vals[3];
  1169. Vals[7]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1170. Vals[7]+=ch(Vals[0],Vals[1],Vals[2]);
  1171. //Vals[7]+=K[60]; diffed from 0xA41F32E7
  1172. #define FOUND (0x80)
  1173. #define NFLAG (0x7F)
  1174. #if defined(VECTORS4)
  1175. Vals[7] ^= 0x136032ED;
  1176. bool result = Vals[7].x & Vals[7].y & Vals[7].z & Vals[7].w;
  1177. if (!result) {
  1178. if (!Vals[7].x)
  1179. output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
  1180. if (!Vals[7].y)
  1181. output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
  1182. if (!Vals[7].z)
  1183. output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
  1184. if (!Vals[7].w)
  1185. output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
  1186. }
  1187. #elif defined(VECTORS2)
  1188. Vals[7] ^= 0x136032ED;
  1189. bool result = Vals[7].x & Vals[7].y;
  1190. if (!result) {
  1191. if (!Vals[7].x)
  1192. output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
  1193. if (!Vals[7].y)
  1194. output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
  1195. }
  1196. #else
  1197. if (Vals[7] == 0x136032ED)
  1198. output[FOUND] = output[NFLAG & nonce] = nonce;
  1199. #endif
  1200. }