poclbm.cl 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390
  1. // -ck modified kernel taken from Phoenix taken from poclbm, with aspects of
  2. // phatk and others.
  3. // Modified version copyright 2011-2013 Con Kolivas
  4. // This file is taken and modified from the public-domain poclbm project, and
  5. // we have therefore decided to keep it public-domain in Phoenix.
  6. // kernel-interface: poclbm SHA256d
  7. #ifdef VECTORS4
  8. typedef uint4 u;
  9. #elif defined VECTORS2
  10. typedef uint2 u;
  11. #else
  12. typedef uint u;
  13. #endif
  14. __constant uint K[87] = {
  15. 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  16. 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  17. 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  18. 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  19. 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  20. 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  21. 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  22. 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
  23. 0xc19bf3f4U,
  24. 0x80000000U,
  25. 0x00000280U,
  26. 0x00a00055U,
  27. 0xf377ed68U,
  28. 0xa54ff53aU,
  29. 0x08909ae5U,
  30. 0x90bb1e3cU,
  31. 0x9b05688cU,
  32. 0xca0b3af3U,
  33. 0x3c6ef372U,
  34. 0xbb67ae85U,
  35. 0x6a09e667U,
  36. 0x50c6645bU,
  37. 0x510e527fU,
  38. 0x3ac42e24U,
  39. 0x5807aa98U,
  40. 0xc19bf274U,
  41. 0x00a00000U,
  42. 0x00000100U,
  43. 0x11002000U,
  44. 0x00400022U,
  45. 0x136032edU
  46. };
  47. #define xc19bf3f4U K[64]
  48. #define x80000000U K[65]
  49. #define x00000280U K[66]
  50. #define x00a00055U K[67]
  51. #define xf377ed68U K[68]
  52. #define xa54ff53aU K[69]
  53. #define x08909ae5U K[70]
  54. #define x90bb1e3cU K[71]
  55. #define x9b05688cU K[72]
  56. #define xca0b3af3U K[73]
  57. #define x3c6ef372U K[74]
  58. #define xbb67ae85U K[75]
  59. #define x6a09e667U K[76]
  60. #define x50c6645bU K[77]
  61. #define x510e527fU K[78]
  62. #define x3ac42e24U K[79]
  63. #define x5807aa98U K[80]
  64. #define xc19bf274U K[81]
  65. #define x00a00000U K[82]
  66. #define x00000100U K[83]
  67. #define x11002000U K[84]
  68. #define x00400022U K[85]
  69. #define x136032edU K[86]
  70. // This part is not from the stock poclbm kernel. It's part of an optimization
  71. // added in the Phoenix Miner.
  72. // Some AMD devices have a BFI_INT opcode, which behaves exactly like the
  73. // SHA-256 ch function, but provides it in exactly one instruction. If
  74. // detected, use it for ch. Otherwise, construct ch out of simpler logical
  75. // primitives.
  76. #ifdef BITALIGN
  77. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  78. #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
  79. #else
  80. #define rotr(x, y) rotate((u)x, (u)(32 - y))
  81. #endif
  82. #ifdef BFI_INT
  83. // Well, slight problem... It turns out BFI_INT isn't actually exposed to
  84. // OpenCL (or CAL IL for that matter) in any way. However, there is
  85. // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
  86. // amd_bytealign, takes the same inputs, and provides the same output.
  87. // We can use that as a placeholder for BFI_INT and have the application
  88. // patch it after compilation.
  89. // This is the BFI_INT function
  90. #define ch(x, y, z) amd_bytealign(x, y, z)
  91. // Ma can also be implemented in terms of BFI_INT...
  92. #define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
  93. // AMD's KernelAnalyzer throws errors compiling the kernel if we use
  94. // amd_bytealign on constants with vectors enabled, so we use this to avoid
  95. // problems. (this is used 4 times, and likely optimized out by the compiler.)
  96. #define Ma2(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
  97. #else // BFI_INT
  98. //GCN actually fails if manually patched with BFI_INT
  99. #define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
  100. #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
  101. #define Ma2(x, y, z) Ma(x, y, z)
  102. #endif
  103. __kernel
  104. __attribute__((vec_type_hint(u)))
  105. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  106. void search(const uint state0, const uint state1, const uint state2, const uint state3,
  107. const uint state4, const uint state5, const uint state6, const uint state7,
  108. const uint b1, const uint c1,
  109. const uint f1, const uint g1, const uint h1,
  110. #ifndef GOFFSET
  111. const u base,
  112. #endif
  113. const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r,
  114. const uint D1A, const uint C1addK5, const uint B1addK6,
  115. const uint W16addK16, const uint W17addK17,
  116. const uint PreVal4addT1, const uint Preval0,
  117. volatile __global uint * output)
  118. {
  119. u Vals[24];
  120. u *W = &Vals[8];
  121. #ifdef GOFFSET
  122. const u nonce = (uint)(get_global_id(0));
  123. #else
  124. const u nonce = base + (uint)(get_global_id(0));
  125. #endif
  126. Vals[5]=Preval0;
  127. Vals[5]+=nonce;
  128. Vals[0]=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  129. Vals[0]+=ch(Vals[5],b1,c1);
  130. Vals[0]+=D1A;
  131. Vals[2]=Vals[0];
  132. Vals[2]+=h1;
  133. Vals[1]=PreVal4addT1;
  134. Vals[1]+=nonce;
  135. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  136. Vals[6]=C1addK5;
  137. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  138. Vals[6]+=ch(Vals[2],Vals[5],b1);
  139. Vals[3]=Vals[6];
  140. Vals[3]+=g1;
  141. Vals[0]+=Ma2(g1,Vals[1],f1);
  142. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  143. Vals[6]+=Ma2(f1,Vals[0],Vals[1]);
  144. Vals[7]=B1addK6;
  145. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  146. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  147. Vals[4]=Vals[7];
  148. Vals[4]+=f1;
  149. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  150. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  151. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  152. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  153. Vals[5]+=K[7];
  154. Vals[1]+=Vals[5];
  155. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  156. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  157. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  158. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  159. Vals[2]+=K[8];
  160. Vals[0]+=Vals[2];
  161. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  162. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  163. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  164. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  165. Vals[3]+=K[9];
  166. Vals[6]+=Vals[3];
  167. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  168. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  169. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  170. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  171. Vals[4]+=K[10];
  172. Vals[7]+=Vals[4];
  173. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  174. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  175. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  176. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  177. Vals[1]+=K[11];
  178. Vals[5]+=Vals[1];
  179. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  180. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  181. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  182. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  183. Vals[0]+=K[12];
  184. Vals[2]+=Vals[0];
  185. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  186. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  187. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  188. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  189. Vals[6]+=K[13];
  190. Vals[3]+=Vals[6];
  191. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  192. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  193. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  194. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  195. Vals[7]+=K[14];
  196. Vals[4]+=Vals[7];
  197. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  198. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  199. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  200. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  201. Vals[5]+=xc19bf3f4U;
  202. Vals[1]+=Vals[5];
  203. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  204. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  205. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  206. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  207. Vals[2]+=W16addK16;
  208. Vals[0]+=Vals[2];
  209. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  210. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  211. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  212. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  213. Vals[3]+=W17addK17;
  214. Vals[6]+=Vals[3];
  215. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  216. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  217. W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U));
  218. W[2]+=fw2;
  219. Vals[4]+=W[2];
  220. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  221. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  222. Vals[4]+=K[18];
  223. Vals[7]+=Vals[4];
  224. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  225. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  226. W[3]=nonce;
  227. W[3]+=fw3;
  228. Vals[1]+=W[3];
  229. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  230. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  231. Vals[1]+=K[19];
  232. Vals[5]+=Vals[1];
  233. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  234. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  235. W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  236. W[4]+=x80000000U;
  237. Vals[0]+=W[4];
  238. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  239. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  240. Vals[0]+=K[20];
  241. Vals[2]+=Vals[0];
  242. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  243. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  244. W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  245. Vals[6]+=W[5];
  246. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  247. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  248. Vals[6]+=K[21];
  249. Vals[3]+=Vals[6];
  250. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  251. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  252. W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  253. W[6]+=x00000280U;
  254. Vals[7]+=W[6];
  255. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  256. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  257. Vals[7]+=K[22];
  258. Vals[4]+=Vals[7];
  259. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  260. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  261. W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  262. W[7]+=fw0;
  263. Vals[5]+=W[7];
  264. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  265. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  266. Vals[5]+=K[23];
  267. Vals[1]+=Vals[5];
  268. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  269. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  270. W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  271. W[8]+=fw1;
  272. Vals[2]+=W[8];
  273. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  274. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  275. Vals[2]+=K[24];
  276. Vals[0]+=Vals[2];
  277. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  278. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  279. W[9]=W[2];
  280. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  281. Vals[3]+=W[9];
  282. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  283. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  284. Vals[3]+=K[25];
  285. Vals[6]+=Vals[3];
  286. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  287. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  288. W[10]=W[3];
  289. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  290. Vals[4]+=W[10];
  291. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  292. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  293. Vals[4]+=K[26];
  294. Vals[7]+=Vals[4];
  295. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  296. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  297. W[11]=W[4];
  298. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  299. Vals[1]+=W[11];
  300. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  301. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  302. Vals[1]+=K[27];
  303. Vals[5]+=Vals[1];
  304. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  305. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  306. W[12]=W[5];
  307. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  308. Vals[0]+=W[12];
  309. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  310. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  311. Vals[0]+=K[28];
  312. Vals[2]+=Vals[0];
  313. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  314. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  315. W[13]=W[6];
  316. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  317. Vals[6]+=W[13];
  318. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  319. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  320. Vals[6]+=K[29];
  321. Vals[3]+=Vals[6];
  322. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  323. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  324. W[14]=x00a00055U;
  325. W[14]+=W[7];
  326. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  327. Vals[7]+=W[14];
  328. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  329. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  330. Vals[7]+=K[30];
  331. Vals[4]+=Vals[7];
  332. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  333. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  334. W[15]=fw15;
  335. W[15]+=W[8];
  336. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  337. Vals[5]+=W[15];
  338. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  339. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  340. Vals[5]+=K[31];
  341. Vals[1]+=Vals[5];
  342. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  343. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  344. W[0]=fw01r;
  345. W[0]+=W[9];
  346. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  347. Vals[2]+=W[0];
  348. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  349. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  350. Vals[2]+=K[32];
  351. Vals[0]+=Vals[2];
  352. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  353. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  354. W[1]=fw1;
  355. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  356. W[1]+=W[10];
  357. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  358. Vals[3]+=W[1];
  359. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  360. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  361. Vals[3]+=K[33];
  362. Vals[6]+=Vals[3];
  363. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  364. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  365. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  366. W[2]+=W[11];
  367. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  368. Vals[4]+=W[2];
  369. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  370. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  371. Vals[4]+=K[34];
  372. Vals[7]+=Vals[4];
  373. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  374. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  375. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  376. W[3]+=W[12];
  377. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  378. Vals[1]+=W[3];
  379. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  380. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  381. Vals[1]+=K[35];
  382. Vals[5]+=Vals[1];
  383. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  384. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  385. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  386. W[4]+=W[13];
  387. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  388. Vals[0]+=W[4];
  389. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  390. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  391. Vals[0]+=K[36];
  392. Vals[2]+=Vals[0];
  393. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  394. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  395. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  396. W[5]+=W[14];
  397. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  398. Vals[6]+=W[5];
  399. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  400. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  401. Vals[6]+=K[37];
  402. Vals[3]+=Vals[6];
  403. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  404. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  405. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  406. W[6]+=W[15];
  407. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  408. Vals[7]+=W[6];
  409. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  410. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  411. Vals[7]+=K[38];
  412. Vals[4]+=Vals[7];
  413. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  414. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  415. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  416. W[7]+=W[0];
  417. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  418. Vals[5]+=W[7];
  419. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  420. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  421. Vals[5]+=K[39];
  422. Vals[1]+=Vals[5];
  423. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  424. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  425. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  426. W[8]+=W[1];
  427. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  428. Vals[2]+=W[8];
  429. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  430. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  431. Vals[2]+=K[40];
  432. Vals[0]+=Vals[2];
  433. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  434. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  435. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  436. W[9]+=W[2];
  437. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  438. Vals[3]+=W[9];
  439. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  440. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  441. Vals[3]+=K[41];
  442. Vals[6]+=Vals[3];
  443. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  444. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  445. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  446. W[10]+=W[3];
  447. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  448. Vals[4]+=W[10];
  449. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  450. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  451. Vals[4]+=K[42];
  452. Vals[7]+=Vals[4];
  453. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  454. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  455. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  456. W[11]+=W[4];
  457. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  458. Vals[1]+=W[11];
  459. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  460. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  461. Vals[1]+=K[43];
  462. Vals[5]+=Vals[1];
  463. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  464. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  465. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  466. W[12]+=W[5];
  467. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  468. Vals[0]+=W[12];
  469. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  470. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  471. Vals[0]+=K[44];
  472. Vals[2]+=Vals[0];
  473. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  474. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  475. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  476. W[13]+=W[6];
  477. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  478. Vals[6]+=W[13];
  479. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  480. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  481. Vals[6]+=K[45];
  482. Vals[3]+=Vals[6];
  483. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  484. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  485. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  486. W[14]+=W[7];
  487. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  488. Vals[7]+=W[14];
  489. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  490. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  491. Vals[7]+=K[46];
  492. Vals[4]+=Vals[7];
  493. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  494. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  495. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  496. W[15]+=W[8];
  497. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  498. Vals[5]+=W[15];
  499. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  500. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  501. Vals[5]+=K[47];
  502. Vals[1]+=Vals[5];
  503. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  504. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  505. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  506. W[0]+=W[9];
  507. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  508. Vals[2]+=W[0];
  509. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  510. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  511. Vals[2]+=K[48];
  512. Vals[0]+=Vals[2];
  513. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  514. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  515. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  516. W[1]+=W[10];
  517. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  518. Vals[3]+=W[1];
  519. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  520. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  521. Vals[3]+=K[49];
  522. Vals[6]+=Vals[3];
  523. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  524. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  525. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  526. W[2]+=W[11];
  527. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  528. Vals[4]+=W[2];
  529. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  530. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  531. Vals[4]+=K[50];
  532. Vals[7]+=Vals[4];
  533. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  534. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  535. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  536. W[3]+=W[12];
  537. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  538. Vals[1]+=W[3];
  539. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  540. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  541. Vals[1]+=K[51];
  542. Vals[5]+=Vals[1];
  543. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  544. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  545. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  546. W[4]+=W[13];
  547. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  548. Vals[0]+=W[4];
  549. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  550. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  551. Vals[0]+=K[52];
  552. Vals[2]+=Vals[0];
  553. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  554. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  555. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  556. W[5]+=W[14];
  557. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  558. Vals[6]+=W[5];
  559. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  560. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  561. Vals[6]+=K[53];
  562. Vals[3]+=Vals[6];
  563. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  564. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  565. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  566. W[6]+=W[15];
  567. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  568. Vals[7]+=W[6];
  569. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  570. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  571. Vals[7]+=K[54];
  572. Vals[4]+=Vals[7];
  573. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  574. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  575. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  576. W[7]+=W[0];
  577. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  578. Vals[5]+=W[7];
  579. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  580. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  581. Vals[5]+=K[55];
  582. Vals[1]+=Vals[5];
  583. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  584. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  585. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  586. W[8]+=W[1];
  587. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  588. Vals[2]+=W[8];
  589. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  590. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  591. Vals[2]+=K[56];
  592. Vals[0]+=Vals[2];
  593. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  594. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  595. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  596. W[9]+=W[2];
  597. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  598. Vals[3]+=W[9];
  599. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  600. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  601. Vals[3]+=K[57];
  602. Vals[6]+=Vals[3];
  603. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  604. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  605. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  606. W[10]+=W[3];
  607. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  608. Vals[4]+=W[10];
  609. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  610. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  611. Vals[4]+=K[58];
  612. Vals[7]+=Vals[4];
  613. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  614. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  615. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  616. W[11]+=W[4];
  617. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  618. Vals[1]+=W[11];
  619. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  620. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  621. Vals[1]+=K[59];
  622. Vals[5]+=Vals[1];
  623. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  624. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  625. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  626. W[12]+=W[5];
  627. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  628. Vals[0]+=W[12];
  629. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  630. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  631. Vals[0]+=K[60];
  632. Vals[2]+=Vals[0];
  633. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  634. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  635. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  636. W[13]+=W[6];
  637. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  638. Vals[6]+=W[13];
  639. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  640. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  641. Vals[6]+=K[61];
  642. Vals[3]+=Vals[6];
  643. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  644. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  645. Vals[7]+=W[14];
  646. Vals[7]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  647. Vals[7]+=W[7];
  648. Vals[7]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  649. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  650. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  651. Vals[7]+=K[62];
  652. Vals[4]+=Vals[7];
  653. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  654. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  655. Vals[5]+=W[15];
  656. Vals[5]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  657. Vals[5]+=W[8];
  658. Vals[5]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  659. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  660. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  661. Vals[5]+=K[63];
  662. Vals[1]+=Vals[5];
  663. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  664. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  665. Vals[5]+=state0;
  666. W[7]=state7;
  667. W[7]+=Vals[2];
  668. Vals[2]=xf377ed68U;
  669. Vals[2]+=Vals[5];
  670. W[0]=Vals[5];
  671. Vals[5]=x6a09e667U;
  672. W[3]=state3;
  673. W[3]+=Vals[0];
  674. Vals[0]=xa54ff53aU;
  675. Vals[0]+=Vals[2];
  676. Vals[2]+=x08909ae5U;
  677. W[6]=state6;
  678. W[6]+=Vals[3];
  679. Vals[3]=x90bb1e3cU;
  680. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  681. Vals[3]+=(x9b05688cU^(Vals[0]&xca0b3af3U));
  682. Vals[7]+=state1;
  683. Vals[3]+=Vals[7];
  684. W[1]=Vals[7];
  685. Vals[7]=xbb67ae85U;
  686. W[2]=state2;
  687. W[2]+=Vals[6];
  688. Vals[6]=x3c6ef372U;
  689. Vals[6]+=Vals[3];
  690. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  691. Vals[3]+=Ma2(Vals[7],Vals[2],Vals[5]);
  692. W[5]=state5;
  693. W[5]+=Vals[4];
  694. Vals[4]=x50c6645bU;
  695. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  696. Vals[4]+=ch(Vals[6],Vals[0],x510e527fU);
  697. Vals[4]+=W[2];
  698. Vals[7]+=Vals[4];
  699. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  700. Vals[4]+=Ma2(Vals[5],Vals[3],Vals[2]);
  701. W[4]=state4;
  702. W[4]+=Vals[1];
  703. Vals[1]=x3ac42e24U;
  704. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  705. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  706. Vals[1]+=W[3];
  707. Vals[5]+=Vals[1];
  708. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  709. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  710. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  711. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  712. Vals[0]+=K[4];
  713. Vals[0]+=W[4];
  714. Vals[2]+=Vals[0];
  715. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  716. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  717. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  718. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  719. Vals[6]+=K[5];
  720. Vals[6]+=W[5];
  721. Vals[3]+=Vals[6];
  722. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  723. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  724. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  725. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  726. Vals[7]+=K[6];
  727. Vals[7]+=W[6];
  728. Vals[4]+=Vals[7];
  729. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  730. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  731. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  732. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  733. Vals[5]+=K[7];
  734. Vals[5]+=W[7];
  735. Vals[1]+=Vals[5];
  736. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  737. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  738. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  739. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  740. Vals[2]+=x5807aa98U;
  741. Vals[0]+=Vals[2];
  742. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  743. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  744. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  745. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  746. Vals[3]+=K[9];
  747. Vals[6]+=Vals[3];
  748. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  749. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  750. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  751. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  752. Vals[4]+=K[10];
  753. Vals[7]+=Vals[4];
  754. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  755. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  756. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  757. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  758. Vals[1]+=K[11];
  759. Vals[5]+=Vals[1];
  760. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  761. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  762. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  763. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  764. Vals[0]+=K[12];
  765. Vals[2]+=Vals[0];
  766. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  767. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  768. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  769. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  770. Vals[6]+=K[13];
  771. Vals[3]+=Vals[6];
  772. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  773. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  774. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  775. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  776. Vals[7]+=K[14];
  777. Vals[4]+=Vals[7];
  778. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  779. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  780. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  781. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  782. Vals[5]+=xc19bf274U;
  783. Vals[1]+=Vals[5];
  784. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  785. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  786. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  787. Vals[2]+=W[0];
  788. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  789. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  790. Vals[2]+=K[16];
  791. Vals[0]+=Vals[2];
  792. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  793. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  794. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  795. W[1]+=x00a00000U;
  796. Vals[3]+=W[1];
  797. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  798. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  799. Vals[3]+=K[17];
  800. Vals[6]+=Vals[3];
  801. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  802. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  803. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  804. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  805. Vals[4]+=W[2];
  806. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  807. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  808. Vals[4]+=K[18];
  809. Vals[7]+=Vals[4];
  810. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  811. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  812. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  813. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  814. Vals[1]+=W[3];
  815. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  816. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  817. Vals[1]+=K[19];
  818. Vals[5]+=Vals[1];
  819. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  820. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  821. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  822. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  823. Vals[0]+=W[4];
  824. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  825. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  826. Vals[0]+=K[20];
  827. Vals[2]+=Vals[0];
  828. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  829. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  830. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  831. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  832. Vals[6]+=W[5];
  833. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  834. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  835. Vals[6]+=K[21];
  836. Vals[3]+=Vals[6];
  837. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  838. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  839. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  840. W[6]+=x00000100U;
  841. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  842. Vals[7]+=W[6];
  843. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  844. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  845. Vals[7]+=K[22];
  846. Vals[4]+=Vals[7];
  847. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  848. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  849. W[7]+=x11002000U;
  850. W[7]+=W[0];
  851. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  852. Vals[5]+=W[7];
  853. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  854. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  855. Vals[5]+=K[23];
  856. Vals[1]+=Vals[5];
  857. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  858. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  859. W[8]=x80000000U;
  860. W[8]+=W[1];
  861. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  862. Vals[2]+=W[8];
  863. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  864. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  865. Vals[2]+=K[24];
  866. Vals[0]+=Vals[2];
  867. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  868. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  869. W[9]=W[2];
  870. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  871. Vals[3]+=W[9];
  872. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  873. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  874. Vals[3]+=K[25];
  875. Vals[6]+=Vals[3];
  876. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  877. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  878. W[10]=W[3];
  879. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  880. Vals[4]+=W[10];
  881. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  882. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  883. Vals[4]+=K[26];
  884. Vals[7]+=Vals[4];
  885. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  886. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  887. W[11]=W[4];
  888. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  889. Vals[1]+=W[11];
  890. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  891. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  892. Vals[1]+=K[27];
  893. Vals[5]+=Vals[1];
  894. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  895. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  896. W[12]=W[5];
  897. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  898. Vals[0]+=W[12];
  899. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  900. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  901. Vals[0]+=K[28];
  902. Vals[2]+=Vals[0];
  903. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  904. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  905. W[13]=W[6];
  906. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  907. Vals[6]+=W[13];
  908. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  909. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  910. Vals[6]+=K[29];
  911. Vals[3]+=Vals[6];
  912. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  913. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  914. W[14]=x00400022U;
  915. W[14]+=W[7];
  916. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  917. Vals[7]+=W[14];
  918. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  919. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  920. Vals[7]+=K[30];
  921. Vals[4]+=Vals[7];
  922. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  923. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  924. W[15]=x00000100U;
  925. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  926. W[15]+=W[8];
  927. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  928. Vals[5]+=W[15];
  929. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  930. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  931. Vals[5]+=K[31];
  932. Vals[1]+=Vals[5];
  933. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  934. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  935. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  936. W[0]+=W[9];
  937. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  938. Vals[2]+=W[0];
  939. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  940. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  941. Vals[2]+=K[32];
  942. Vals[0]+=Vals[2];
  943. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  944. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  945. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  946. W[1]+=W[10];
  947. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  948. Vals[3]+=W[1];
  949. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  950. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  951. Vals[3]+=K[33];
  952. Vals[6]+=Vals[3];
  953. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  954. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  955. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  956. W[2]+=W[11];
  957. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  958. Vals[4]+=W[2];
  959. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  960. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  961. Vals[4]+=K[34];
  962. Vals[7]+=Vals[4];
  963. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  964. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  965. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  966. W[3]+=W[12];
  967. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  968. Vals[1]+=W[3];
  969. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  970. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  971. Vals[1]+=K[35];
  972. Vals[5]+=Vals[1];
  973. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  974. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  975. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  976. W[4]+=W[13];
  977. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  978. Vals[0]+=W[4];
  979. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  980. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  981. Vals[0]+=K[36];
  982. Vals[2]+=Vals[0];
  983. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  984. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  985. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  986. W[5]+=W[14];
  987. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  988. Vals[6]+=W[5];
  989. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  990. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  991. Vals[6]+=K[37];
  992. Vals[3]+=Vals[6];
  993. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  994. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  995. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  996. W[6]+=W[15];
  997. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  998. Vals[7]+=W[6];
  999. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1000. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  1001. Vals[7]+=K[38];
  1002. Vals[4]+=Vals[7];
  1003. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  1004. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  1005. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  1006. W[7]+=W[0];
  1007. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  1008. Vals[5]+=W[7];
  1009. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1010. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  1011. Vals[5]+=K[39];
  1012. Vals[1]+=Vals[5];
  1013. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  1014. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  1015. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  1016. W[8]+=W[1];
  1017. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  1018. Vals[2]+=W[8];
  1019. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1020. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  1021. Vals[2]+=K[40];
  1022. Vals[0]+=Vals[2];
  1023. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1024. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  1025. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  1026. W[9]+=W[2];
  1027. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  1028. Vals[3]+=W[9];
  1029. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1030. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  1031. Vals[3]+=K[41];
  1032. Vals[6]+=Vals[3];
  1033. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  1034. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  1035. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  1036. W[10]+=W[3];
  1037. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  1038. Vals[4]+=W[10];
  1039. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  1040. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  1041. Vals[4]+=K[42];
  1042. Vals[7]+=Vals[4];
  1043. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  1044. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  1045. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  1046. W[11]+=W[4];
  1047. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  1048. Vals[1]+=W[11];
  1049. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  1050. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  1051. Vals[1]+=K[43];
  1052. Vals[5]+=Vals[1];
  1053. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  1054. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  1055. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  1056. W[12]+=W[5];
  1057. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  1058. Vals[0]+=W[12];
  1059. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  1060. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  1061. Vals[0]+=K[44];
  1062. Vals[2]+=Vals[0];
  1063. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  1064. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  1065. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  1066. W[13]+=W[6];
  1067. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  1068. Vals[6]+=W[13];
  1069. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  1070. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  1071. Vals[6]+=K[45];
  1072. Vals[3]+=Vals[6];
  1073. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  1074. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  1075. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  1076. W[14]+=W[7];
  1077. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  1078. Vals[7]+=W[14];
  1079. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1080. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  1081. Vals[7]+=K[46];
  1082. Vals[4]+=Vals[7];
  1083. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  1084. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  1085. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  1086. W[15]+=W[8];
  1087. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  1088. Vals[5]+=W[15];
  1089. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1090. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  1091. Vals[5]+=K[47];
  1092. Vals[1]+=Vals[5];
  1093. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  1094. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  1095. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  1096. W[0]+=W[9];
  1097. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  1098. Vals[2]+=W[0];
  1099. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1100. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  1101. Vals[2]+=K[48];
  1102. Vals[0]+=Vals[2];
  1103. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1104. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  1105. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  1106. W[1]+=W[10];
  1107. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  1108. Vals[3]+=W[1];
  1109. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1110. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  1111. Vals[3]+=K[49];
  1112. Vals[6]+=Vals[3];
  1113. Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  1114. Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
  1115. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  1116. W[2]+=W[11];
  1117. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  1118. Vals[4]+=W[2];
  1119. Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  1120. Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
  1121. Vals[4]+=K[50];
  1122. Vals[7]+=Vals[4];
  1123. Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  1124. Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
  1125. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  1126. W[3]+=W[12];
  1127. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  1128. Vals[1]+=W[3];
  1129. Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  1130. Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
  1131. Vals[1]+=K[51];
  1132. Vals[5]+=Vals[1];
  1133. Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  1134. Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
  1135. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  1136. W[4]+=W[13];
  1137. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  1138. Vals[0]+=W[4];
  1139. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  1140. Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
  1141. Vals[0]+=K[52];
  1142. Vals[2]+=Vals[0];
  1143. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  1144. Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
  1145. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  1146. W[5]+=W[14];
  1147. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  1148. Vals[6]+=W[5];
  1149. Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  1150. Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
  1151. Vals[6]+=K[53];
  1152. Vals[3]+=Vals[6];
  1153. Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  1154. Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
  1155. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  1156. W[6]+=W[15];
  1157. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  1158. Vals[7]+=W[6];
  1159. Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1160. Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
  1161. Vals[7]+=K[54];
  1162. Vals[4]+=Vals[7];
  1163. Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  1164. Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
  1165. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  1166. W[7]+=W[0];
  1167. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  1168. Vals[5]+=W[7];
  1169. Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1170. Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
  1171. Vals[5]+=K[55];
  1172. Vals[1]+=Vals[5];
  1173. Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  1174. Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
  1175. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  1176. W[8]+=W[1];
  1177. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  1178. Vals[2]+=W[8];
  1179. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1180. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  1181. Vals[2]+=K[56];
  1182. Vals[0]+=Vals[2];
  1183. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  1184. W[9]+=W[2];
  1185. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  1186. Vals[3]+=W[9];
  1187. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1188. Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
  1189. Vals[3]+=K[57];
  1190. Vals[3]+=Vals[6];
  1191. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  1192. W[10]+=W[3];
  1193. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  1194. Vals[4]+=W[10];
  1195. Vals[4]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1196. Vals[4]+=ch(Vals[3],Vals[0],Vals[1]);
  1197. Vals[4]+=K[58];
  1198. Vals[4]+=Vals[7];
  1199. Vals[1]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1200. Vals[1]+=ch(Vals[4],Vals[3],Vals[0]);
  1201. Vals[1]+=W[11];
  1202. Vals[1]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  1203. Vals[1]+=W[4];
  1204. Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  1205. Vals[1]+=K[59];
  1206. Vals[1]+=Vals[5];
  1207. Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
  1208. Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1209. Vals[2]+=W[12];
  1210. Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  1211. Vals[2]+=W[5];
  1212. Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  1213. Vals[2]+=Vals[0];
  1214. Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1215. Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
  1216. #define FOUND (0x0F)
  1217. #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
  1218. #if defined(VECTORS2) || defined(VECTORS4)
  1219. if (any(Vals[2] == x136032edU)) {
  1220. if (Vals[2].x == x136032edU)
  1221. SETFOUND(nonce.x);
  1222. if (Vals[2].y == x136032edU)
  1223. SETFOUND(nonce.y);
  1224. #if defined(VECTORS4)
  1225. if (Vals[2].z == x136032edU)
  1226. SETFOUND(nonce.z);
  1227. if (Vals[2].w == x136032edU)
  1228. SETFOUND(nonce.w);
  1229. #endif
  1230. }
  1231. #else
  1232. if (Vals[2] == x136032edU)
  1233. SETFOUND(nonce);
  1234. #endif
  1235. }