poclbm120214.cl 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290
  1. // -ck modified kernel taken from Phoenix taken from poclbm, with aspects of
  2. // phatk and others.
  3. // Modified version copyright 2011-2012 Con Kolivas
  4. // This file is taken and modified from the public-domain poclbm project, and
  5. // we have therefore decided to keep it public-domain in Phoenix.
  6. #ifdef VECTORS4
  7. typedef uint4 u;
  8. #elif defined VECTORS2
  9. typedef uint2 u;
  10. #else
  11. typedef uint u;
  12. #endif
  13. __constant uint K[64] = {
  14. 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  15. 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  16. 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  17. 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  18. 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  19. 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  20. 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  21. 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  22. };
  23. // This part is not from the stock poclbm kernel. It's part of an optimization
  24. // added in the Phoenix Miner.
  25. // Some AMD devices have a BFI_INT opcode, which behaves exactly like the
  26. // SHA-256 ch function, but provides it in exactly one instruction. If
  27. // detected, use it for ch. Otherwise, construct ch out of simpler logical
  28. // primitives.
  29. #ifdef BITALIGN
  30. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  31. #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
  32. #ifdef BFI_INT
  33. // Well, slight problem... It turns out BFI_INT isn't actually exposed to
  34. // OpenCL (or CAL IL for that matter) in any way. However, there is
  35. // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
  36. // amd_bytealign, takes the same inputs, and provides the same output.
  37. // We can use that as a placeholder for BFI_INT and have the application
  38. // patch it after compilation.
  39. // This is the BFI_INT function
  40. #define ch(x, y, z) amd_bytealign(x, y, z)
  41. // Ma can also be implemented in terms of BFI_INT...
  42. #define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
  43. #else // BFI_INT
  44. // Later SDKs optimise this to BFI INT without patching and GCN
  45. // actually fails if manually patched with BFI_INT
  46. #define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
  47. #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
  48. #endif
  49. #else // BITALIGN
  50. #define ch(x, y, z) (z ^ (x & (y ^ z)))
  51. #define Ma(x, y, z) ((x & z) | (y & (x | z)))
  52. #define rotr(x, y) rotate((u)x, (u)(32 - y))
  53. #endif
  54. // AMD's KernelAnalyzer throws errors compiling the kernel if we use
  55. // amd_bytealign on constants with vectors enabled, so we use this to avoid
  56. // problems. (this is used 4 times, and likely optimized out by the compiler.)
  57. #define Ma2(x, y, z) ((y & z) | (x & (y | z)))
  58. __kernel void search(const uint state0, const uint state1, const uint state2, const uint state3,
  59. const uint state4, const uint state5, const uint state6, const uint state7,
  60. const uint b1, const uint c1,
  61. const uint f1, const uint g1, const uint h1,
  62. const u base,
  63. const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r,
  64. const uint fcty_e2,
  65. const uint D1A, const uint C1addK5, const uint B1addK6,
  66. const uint W16addK16, const uint W17addK17,
  67. const uint PreVal4addT1, const uint Preval0,
  68. __global uint * output)
  69. {
  70. u W[24];
  71. u *Vals = &W[16]; // Now put at W[16] to be in same array
  72. #ifdef VECTORS4
  73. const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
  74. #elif defined VECTORS2
  75. const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
  76. #else
  77. const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
  78. #endif
  79. Vals[0]=Preval0+nonce;
  80. Vals[3]=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  81. Vals[3]+=ch(Vals[0],b1,c1);
  82. Vals[3]+=D1A;
  83. Vals[7]=Vals[3];
  84. Vals[7]+=h1;
  85. Vals[4]=PreVal4addT1+nonce;
  86. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  87. Vals[2]=C1addK5;
  88. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  89. Vals[2]+=ch(Vals[7],Vals[0],b1);
  90. Vals[6]=Vals[2];
  91. Vals[6]+=g1;
  92. Vals[3]+=Ma2(g1,Vals[4],f1);
  93. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  94. Vals[1]=B1addK6;
  95. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  96. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  97. Vals[5]=Vals[1];
  98. Vals[5]+=f1;
  99. Vals[2]+=Ma2(f1,Vals[3],Vals[4]);
  100. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  101. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  102. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  103. Vals[0]+=K[7];
  104. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  105. Vals[4]+=Vals[0];
  106. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  107. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  108. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  109. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  110. Vals[7]+=K[8];
  111. Vals[3]+=Vals[7];
  112. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  113. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  114. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  115. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  116. Vals[6]+=K[9];
  117. Vals[2]+=Vals[6];
  118. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  119. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  120. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  121. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  122. Vals[5]+=K[10];
  123. Vals[1]+=Vals[5];
  124. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  125. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  126. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  127. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  128. Vals[4]+=K[11];
  129. Vals[0]+=Vals[4];
  130. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  131. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  132. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  133. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  134. Vals[3]+=K[12];
  135. Vals[7]+=Vals[3];
  136. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  137. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  138. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  139. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  140. Vals[2]+=K[13];
  141. Vals[6]+=Vals[2];
  142. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  143. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  144. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  145. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  146. Vals[1]+=K[14];
  147. Vals[5]+=Vals[1];
  148. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  149. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  150. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  151. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  152. Vals[0]+=0xC19BF3F4;
  153. Vals[4]+=Vals[0];
  154. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  155. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  156. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  157. Vals[7]+=W16addK16;
  158. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  159. Vals[3]+=Vals[7];
  160. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  161. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  162. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  163. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  164. Vals[6]+=W17addK17;
  165. Vals[2]+=Vals[6];
  166. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  167. W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U));
  168. W[2]+=fw2;
  169. Vals[5]+=W[2];
  170. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  171. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  172. Vals[5]+=K[18];
  173. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  174. Vals[1]+=Vals[5];
  175. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  176. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  177. W[3]=nonce;
  178. W[3]+=fw3;
  179. Vals[4]+=W[3];
  180. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  181. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  182. Vals[4]+=K[19];
  183. Vals[0]+=Vals[4];
  184. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  185. W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  186. W[4]+=0x80000000;
  187. Vals[3]+=W[4];
  188. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  189. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  190. Vals[3]+=K[20];
  191. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  192. Vals[7]+=Vals[3];
  193. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  194. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  195. W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  196. Vals[2]+=W[5];
  197. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  198. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  199. Vals[2]+=K[21];
  200. Vals[6]+=Vals[2];
  201. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  202. W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  203. W[6]+=0x00000280U;
  204. Vals[1]+=W[6];
  205. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  206. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  207. Vals[1]+=K[22];
  208. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  209. Vals[5]+=Vals[1];
  210. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  211. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  212. W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  213. W[7]+=fw0;
  214. Vals[0]+=W[7];
  215. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  216. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  217. Vals[0]+=K[23];
  218. Vals[4]+=Vals[0];
  219. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  220. W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  221. W[8]+=fw1;
  222. Vals[7]+=W[8];
  223. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  224. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  225. Vals[7]+=K[24];
  226. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  227. Vals[3]+=Vals[7];
  228. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  229. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  230. W[9]=W[2];
  231. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  232. Vals[6]+=W[9];
  233. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  234. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  235. Vals[6]+=K[25];
  236. Vals[2]+=Vals[6];
  237. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  238. W[10]=W[3];
  239. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  240. Vals[5]+=W[10];
  241. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  242. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  243. Vals[5]+=K[26];
  244. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  245. Vals[1]+=Vals[5];
  246. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  247. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  248. W[11]=W[4];
  249. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  250. Vals[4]+=W[11];
  251. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  252. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  253. Vals[4]+=K[27];
  254. Vals[0]+=Vals[4];
  255. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  256. W[12]=W[5];
  257. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  258. Vals[3]+=W[12];
  259. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  260. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  261. Vals[3]+=K[28];
  262. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  263. Vals[7]+=Vals[3];
  264. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  265. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  266. W[13]=W[6];
  267. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  268. Vals[2]+=W[13];
  269. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  270. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  271. Vals[2]+=K[29];
  272. Vals[6]+=Vals[2];
  273. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  274. W[14]=0x00a00055U;
  275. W[14]+=W[7];
  276. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  277. Vals[1]+=W[14];
  278. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  279. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  280. Vals[1]+=K[30];
  281. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  282. Vals[5]+=Vals[1];
  283. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  284. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  285. W[15]=fw15;
  286. W[15]+=W[8];
  287. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  288. Vals[0]+=W[15];
  289. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  290. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  291. Vals[0]+=K[31];
  292. Vals[4]+=Vals[0];
  293. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  294. W[0]=fw01r;
  295. W[0]+=W[9];
  296. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  297. Vals[7]+=W[0];
  298. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  299. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  300. Vals[7]+=K[32];
  301. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  302. Vals[3]+=Vals[7];
  303. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  304. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  305. W[1]=fw1;
  306. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  307. W[1]+=W[10];
  308. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  309. Vals[6]+=W[1];
  310. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  311. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  312. Vals[6]+=K[33];
  313. Vals[2]+=Vals[6];
  314. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  315. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  316. W[2]+=W[11];
  317. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  318. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  319. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  320. Vals[5]+=K[34];
  321. Vals[5]+=W[2];
  322. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  323. Vals[1]+=Vals[5];
  324. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  325. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  326. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  327. W[3]+=W[12];
  328. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  329. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  330. Vals[4]+=K[35];
  331. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  332. Vals[4]+=W[3];
  333. Vals[0]+=Vals[4];
  334. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  335. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  336. W[4]+=W[13];
  337. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  338. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  339. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  340. Vals[3]+=K[36];
  341. Vals[3]+=W[4];
  342. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  343. Vals[7]+=Vals[3];
  344. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  345. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  346. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  347. W[5]+=W[14];
  348. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  349. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  350. Vals[2]+=K[37];
  351. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  352. Vals[2]+=W[5];
  353. Vals[6]+=Vals[2];
  354. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  355. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  356. W[6]+=W[15];
  357. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  358. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  359. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  360. Vals[1]+=K[38];
  361. Vals[1]+=W[6];
  362. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  363. Vals[5]+=Vals[1];
  364. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  365. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  366. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  367. W[7]+=W[0];
  368. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  369. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  370. Vals[0]+=K[39];
  371. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  372. Vals[0]+=W[7];
  373. Vals[4]+=Vals[0];
  374. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  375. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  376. W[8]+=W[1];
  377. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  378. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  379. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  380. Vals[7]+=K[40];
  381. Vals[7]+=W[8];
  382. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  383. Vals[3]+=Vals[7];
  384. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  385. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  386. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  387. W[9]+=W[2];
  388. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  389. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  390. Vals[6]+=K[41];
  391. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  392. Vals[6]+=W[9];
  393. Vals[2]+=Vals[6];
  394. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  395. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  396. W[10]+=W[3];
  397. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  398. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  399. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  400. Vals[5]+=K[42];
  401. Vals[5]+=W[10];
  402. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  403. Vals[1]+=Vals[5];
  404. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  405. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  406. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  407. W[11]+=W[4];
  408. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  409. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  410. Vals[4]+=K[43];
  411. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  412. Vals[4]+=W[11];
  413. Vals[0]+=Vals[4];
  414. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  415. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  416. W[12]+=W[5];
  417. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  418. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  419. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  420. Vals[3]+=K[44];
  421. Vals[3]+=W[12];
  422. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  423. Vals[7]+=Vals[3];
  424. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  425. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  426. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  427. W[13]+=W[6];
  428. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  429. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  430. Vals[2]+=K[45];
  431. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  432. Vals[2]+=W[13];
  433. Vals[6]+=Vals[2];
  434. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  435. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  436. W[14]+=W[7];
  437. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  438. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  439. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  440. Vals[1]+=K[46];
  441. Vals[1]+=W[14];
  442. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  443. Vals[5]+=Vals[1];
  444. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  445. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  446. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  447. W[15]+=W[8];
  448. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  449. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  450. Vals[0]+=K[47];
  451. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  452. Vals[0]+=W[15];
  453. Vals[4]+=Vals[0];
  454. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  455. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  456. W[0]+=W[9];
  457. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  458. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  459. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  460. Vals[7]+=K[48];
  461. Vals[7]+=W[0];
  462. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  463. Vals[3]+=Vals[7];
  464. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  465. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  466. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  467. W[1]+=W[10];
  468. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  469. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  470. Vals[6]+=K[49];
  471. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  472. Vals[6]+=W[1];
  473. Vals[2]+=Vals[6];
  474. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  475. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  476. W[2]+=W[11];
  477. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  478. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  479. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  480. Vals[5]+=K[50];
  481. Vals[5]+=W[2];
  482. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  483. Vals[1]+=Vals[5];
  484. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  485. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  486. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  487. W[3]+=W[12];
  488. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  489. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  490. Vals[4]+=K[51];
  491. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  492. Vals[4]+=W[3];
  493. Vals[0]+=Vals[4];
  494. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  495. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  496. W[4]+=W[13];
  497. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  498. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  499. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  500. Vals[3]+=K[52];
  501. Vals[3]+=W[4];
  502. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  503. Vals[7]+=Vals[3];
  504. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  505. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  506. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  507. W[5]+=W[14];
  508. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  509. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  510. Vals[2]+=K[53];
  511. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  512. Vals[2]+=W[5];
  513. Vals[6]+=Vals[2];
  514. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  515. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  516. W[6]+=W[15];
  517. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  518. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  519. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  520. Vals[1]+=K[54];
  521. Vals[1]+=W[6];
  522. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  523. Vals[5]+=Vals[1];
  524. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  525. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  526. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  527. W[7]+=W[0];
  528. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  529. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  530. Vals[0]+=K[55];
  531. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  532. Vals[0]+=W[7];
  533. Vals[4]+=Vals[0];
  534. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  535. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  536. W[8]+=W[1];
  537. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  538. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  539. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  540. Vals[7]+=K[56];
  541. Vals[7]+=W[8];
  542. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  543. Vals[3]+=Vals[7];
  544. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  545. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  546. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  547. W[9]+=W[2];
  548. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  549. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  550. Vals[6]+=K[57];
  551. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  552. Vals[6]+=W[9];
  553. Vals[2]+=Vals[6];
  554. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  555. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  556. W[10]+=W[3];
  557. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  558. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  559. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  560. Vals[5]+=K[58];
  561. Vals[5]+=W[10];
  562. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  563. Vals[1]+=Vals[5];
  564. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  565. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  566. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  567. W[11]+=W[4];
  568. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  569. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  570. Vals[4]+=K[59];
  571. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  572. Vals[4]+=W[11];
  573. Vals[0]+=Vals[4];
  574. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  575. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  576. W[12]+=W[5];
  577. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  578. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  579. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  580. Vals[3]+=K[60];
  581. Vals[3]+=W[12];
  582. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  583. Vals[7]+=Vals[3];
  584. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  585. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  586. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  587. W[13]+=W[6];
  588. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  589. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  590. Vals[2]+=K[61];
  591. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  592. Vals[2]+=W[13];
  593. Vals[6]+=Vals[2];
  594. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  595. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  596. W[14]+=W[7];
  597. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  598. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  599. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  600. Vals[1]+=K[62];
  601. Vals[1]+=W[14];
  602. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  603. Vals[5]+=Vals[1];
  604. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  605. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  606. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  607. W[15]+=W[8];
  608. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  609. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  610. Vals[0]+=K[63];
  611. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  612. Vals[0]+=W[15];
  613. Vals[4]+=Vals[0];
  614. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  615. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  616. W[0]=Vals[0];
  617. W[7]=state7;
  618. W[7]+=Vals[7];
  619. Vals[7]=0xF377ED68;
  620. W[0]+=state0;
  621. Vals[7]+=W[0];
  622. W[3]=state3;
  623. W[3]+=Vals[3];
  624. Vals[3]=0xa54ff53a;
  625. Vals[3]+=Vals[7];
  626. W[1]=Vals[1];
  627. W[1]+=state1;
  628. W[6]=state6;
  629. W[6]+=Vals[6];
  630. Vals[6]=0x90BB1E3C;
  631. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  632. Vals[6]+=(0x9b05688cU^(Vals[3]&0xca0b3af3U));
  633. W[2]=state2;
  634. W[2]+=Vals[2];
  635. Vals[2]=0x3c6ef372U;
  636. Vals[6]+=W[1];
  637. Vals[2]+=Vals[6];
  638. Vals[7]+=0x08909ae5U;
  639. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  640. W[5]=state5;
  641. W[5]+=Vals[5];
  642. Vals[5]=0x150C6645B;
  643. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  644. Vals[5]+=ch(Vals[2],Vals[3],0x510e527fU);
  645. Vals[5]+=W[2];
  646. Vals[1]=0xbb67ae85U;
  647. Vals[1]+=Vals[5];
  648. Vals[6]+=Ma2(0xbb67ae85U,Vals[7],0x6a09e667U);
  649. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  650. W[4]=state4;
  651. W[4]+=Vals[4];
  652. Vals[4]=0x13AC42E24;
  653. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  654. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  655. Vals[4]+=W[3];
  656. Vals[0]=Vals[4];
  657. Vals[0]+=0x6a09e667U;
  658. Vals[5]+=Ma2(0x6a09e667U,Vals[6],Vals[7]);
  659. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  660. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  661. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  662. Vals[3]+=K[4];
  663. Vals[3]+=W[4];
  664. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  665. Vals[7]+=Vals[3];
  666. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  667. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  668. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  669. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  670. Vals[2]+=K[5];
  671. Vals[2]+=W[5];
  672. Vals[6]+=Vals[2];
  673. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  674. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  675. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  676. Vals[1]+=K[6];
  677. Vals[1]+=W[6];
  678. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  679. Vals[5]+=Vals[1];
  680. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  681. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  682. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  683. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  684. Vals[0]+=K[7];
  685. Vals[0]+=W[7];
  686. Vals[4]+=Vals[0];
  687. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  688. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  689. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  690. Vals[7]+=0x15807AA98;
  691. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  692. Vals[3]+=Vals[7];
  693. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  694. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  695. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  696. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  697. Vals[6]+=K[9];
  698. Vals[2]+=Vals[6];
  699. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  700. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  701. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  702. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  703. Vals[5]+=K[10];
  704. Vals[1]+=Vals[5];
  705. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  706. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  707. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  708. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  709. Vals[4]+=K[11];
  710. Vals[0]+=Vals[4];
  711. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  712. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  713. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  714. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  715. Vals[3]+=K[12];
  716. Vals[7]+=Vals[3];
  717. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  718. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  719. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  720. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  721. Vals[2]+=K[13];
  722. Vals[6]+=Vals[2];
  723. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  724. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  725. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  726. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  727. Vals[1]+=K[14];
  728. Vals[5]+=Vals[1];
  729. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  730. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  731. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  732. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  733. Vals[0]+=0xC19BF274;
  734. Vals[4]+=Vals[0];
  735. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  736. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  737. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  738. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  739. Vals[7]+=K[16];
  740. Vals[7]+=W[0];
  741. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  742. Vals[3]+=Vals[7];
  743. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  744. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  745. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  746. W[1]+=0x00a00000U;
  747. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  748. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  749. Vals[6]+=K[17];
  750. Vals[6]+=W[1];
  751. Vals[2]+=Vals[6];
  752. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  753. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  754. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  755. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  756. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  757. Vals[5]+=K[18];
  758. Vals[5]+=W[2];
  759. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  760. Vals[1]+=Vals[5];
  761. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  762. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  763. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  764. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  765. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  766. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  767. Vals[4]+=K[19];
  768. Vals[4]+=W[3];
  769. Vals[0]+=Vals[4];
  770. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  771. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  772. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  773. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  774. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  775. Vals[3]+=K[20];
  776. Vals[3]+=W[4];
  777. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  778. Vals[7]+=Vals[3];
  779. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  780. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  781. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  782. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  783. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  784. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  785. Vals[2]+=K[21];
  786. Vals[2]+=W[5];
  787. Vals[6]+=Vals[2];
  788. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  789. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  790. W[6]+=0x00000100U;
  791. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  792. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  793. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  794. Vals[1]+=K[22];
  795. Vals[1]+=W[6];
  796. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  797. Vals[5]+=Vals[1];
  798. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  799. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  800. W[7]+=0x11002000U;
  801. W[7]+=W[0];
  802. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  803. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  804. Vals[0]+=K[23];
  805. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  806. Vals[0]+=W[7];
  807. Vals[4]+=Vals[0];
  808. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  809. W[8]=0x80000000;
  810. W[8]+=W[1];
  811. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  812. Vals[7]+=W[8];
  813. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  814. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  815. Vals[7]+=K[24];
  816. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  817. Vals[3]+=Vals[7];
  818. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  819. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  820. W[9]=W[2];
  821. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  822. Vals[6]+=W[9];
  823. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  824. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  825. Vals[6]+=K[25];
  826. Vals[2]+=Vals[6];
  827. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  828. W[10]=W[3];
  829. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  830. Vals[5]+=W[10];
  831. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  832. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  833. Vals[5]+=K[26];
  834. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  835. Vals[1]+=Vals[5];
  836. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  837. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  838. W[11]=W[4];
  839. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  840. Vals[4]+=W[11];
  841. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  842. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  843. Vals[4]+=K[27];
  844. Vals[0]+=Vals[4];
  845. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  846. W[12]=W[5];
  847. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  848. Vals[3]+=W[12];
  849. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  850. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  851. Vals[3]+=K[28];
  852. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  853. Vals[7]+=Vals[3];
  854. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  855. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  856. W[13]=W[6];
  857. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  858. Vals[2]+=W[13];
  859. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  860. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  861. Vals[2]+=K[29];
  862. Vals[6]+=Vals[2];
  863. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  864. W[14]=0x00400022U;
  865. W[14]+=W[7];
  866. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  867. Vals[1]+=W[14];
  868. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  869. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  870. Vals[1]+=K[30];
  871. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  872. Vals[5]+=Vals[1];
  873. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  874. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  875. W[15]=0x00000100U;
  876. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  877. W[15]+=W[8];
  878. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  879. Vals[0]+=W[15];
  880. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  881. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  882. Vals[0]+=K[31];
  883. Vals[4]+=Vals[0];
  884. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  885. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  886. W[0]+=W[9];
  887. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  888. Vals[7]+=W[0];
  889. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  890. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  891. Vals[7]+=K[32];
  892. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  893. Vals[3]+=Vals[7];
  894. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  895. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  896. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  897. W[1]+=W[10];
  898. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  899. Vals[6]+=W[1];
  900. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  901. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  902. Vals[6]+=K[33];
  903. Vals[2]+=Vals[6];
  904. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  905. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  906. W[2]+=W[11];
  907. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  908. Vals[5]+=W[2];
  909. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  910. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  911. Vals[5]+=K[34];
  912. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  913. Vals[1]+=Vals[5];
  914. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  915. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  916. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  917. W[3]+=W[12];
  918. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  919. Vals[4]+=W[3];
  920. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  921. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  922. Vals[4]+=K[35];
  923. Vals[0]+=Vals[4];
  924. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  925. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  926. W[4]+=W[13];
  927. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  928. Vals[3]+=W[4];
  929. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  930. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  931. Vals[3]+=K[36];
  932. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  933. Vals[7]+=Vals[3];
  934. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  935. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  936. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  937. W[5]+=W[14];
  938. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  939. Vals[2]+=W[5];
  940. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  941. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  942. Vals[2]+=K[37];
  943. Vals[6]+=Vals[2];
  944. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  945. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  946. W[6]+=W[15];
  947. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  948. Vals[1]+=W[6];
  949. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  950. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  951. Vals[1]+=K[38];
  952. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  953. Vals[5]+=Vals[1];
  954. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  955. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  956. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  957. W[7]+=W[0];
  958. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  959. Vals[0]+=W[7];
  960. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  961. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  962. Vals[0]+=K[39];
  963. Vals[4]+=Vals[0];
  964. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  965. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  966. W[8]+=W[1];
  967. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  968. Vals[7]+=W[8];
  969. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  970. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  971. Vals[7]+=K[40];
  972. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  973. Vals[3]+=Vals[7];
  974. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  975. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  976. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  977. W[9]+=W[2];
  978. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  979. Vals[6]+=W[9];
  980. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  981. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  982. Vals[6]+=K[41];
  983. Vals[2]+=Vals[6];
  984. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  985. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  986. W[10]+=W[3];
  987. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  988. Vals[5]+=W[10];
  989. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  990. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  991. Vals[5]+=K[42];
  992. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  993. Vals[1]+=Vals[5];
  994. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  995. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  996. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  997. W[11]+=W[4];
  998. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  999. Vals[4]+=W[11];
  1000. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1001. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1002. Vals[4]+=K[43];
  1003. Vals[0]+=Vals[4];
  1004. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1005. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  1006. W[12]+=W[5];
  1007. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  1008. Vals[3]+=W[12];
  1009. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1010. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  1011. Vals[3]+=K[44];
  1012. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  1013. Vals[7]+=Vals[3];
  1014. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  1015. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  1016. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  1017. W[13]+=W[6];
  1018. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  1019. Vals[2]+=W[13];
  1020. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  1021. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  1022. Vals[2]+=K[45];
  1023. Vals[6]+=Vals[2];
  1024. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  1025. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  1026. W[14]+=W[7];
  1027. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  1028. Vals[1]+=W[14];
  1029. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  1030. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  1031. Vals[1]+=K[46];
  1032. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  1033. Vals[5]+=Vals[1];
  1034. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  1035. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  1036. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  1037. W[15]+=W[8];
  1038. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  1039. Vals[0]+=W[15];
  1040. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  1041. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  1042. Vals[0]+=K[47];
  1043. Vals[4]+=Vals[0];
  1044. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  1045. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  1046. W[0]+=W[9];
  1047. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  1048. Vals[7]+=W[0];
  1049. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1050. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  1051. Vals[7]+=K[48];
  1052. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  1053. Vals[3]+=Vals[7];
  1054. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  1055. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  1056. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  1057. W[1]+=W[10];
  1058. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  1059. Vals[6]+=W[1];
  1060. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1061. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  1062. Vals[6]+=K[49];
  1063. Vals[2]+=Vals[6];
  1064. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  1065. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  1066. W[2]+=W[11];
  1067. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  1068. Vals[5]+=W[2];
  1069. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  1070. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  1071. Vals[5]+=K[50];
  1072. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  1073. Vals[1]+=Vals[5];
  1074. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  1075. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  1076. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  1077. W[3]+=W[12];
  1078. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  1079. Vals[4]+=W[3];
  1080. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1081. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1082. Vals[4]+=K[51];
  1083. Vals[0]+=Vals[4];
  1084. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1085. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  1086. W[4]+=W[13];
  1087. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  1088. Vals[3]+=W[4];
  1089. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1090. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  1091. Vals[3]+=K[52];
  1092. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  1093. Vals[7]+=Vals[3];
  1094. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  1095. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  1096. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  1097. W[5]+=W[14];
  1098. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  1099. Vals[2]+=W[5];
  1100. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  1101. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  1102. Vals[2]+=K[53];
  1103. Vals[6]+=Vals[2];
  1104. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  1105. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  1106. W[6]+=W[15];
  1107. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  1108. Vals[1]+=W[6];
  1109. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  1110. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  1111. Vals[1]+=K[54];
  1112. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  1113. Vals[5]+=Vals[1];
  1114. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  1115. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  1116. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  1117. W[7]+=W[0];
  1118. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  1119. Vals[0]+=W[7];
  1120. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  1121. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  1122. Vals[0]+=K[55];
  1123. Vals[4]+=Vals[0];
  1124. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  1125. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  1126. W[8]+=W[1];
  1127. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  1128. Vals[7]+=W[8];
  1129. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1130. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  1131. Vals[7]+=K[56];
  1132. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  1133. Vals[3]+=Vals[7];
  1134. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  1135. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  1136. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  1137. W[9]+=W[2];
  1138. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  1139. Vals[6]+=W[9];
  1140. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1141. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  1142. Vals[6]+=K[57];
  1143. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  1144. W[10]+=W[3];
  1145. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  1146. Vals[5]+=W[10];
  1147. Vals[2]+=Vals[6];
  1148. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  1149. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  1150. Vals[5]+=K[58];
  1151. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  1152. W[11]+=W[4];
  1153. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  1154. Vals[4]+=W[11];
  1155. Vals[1]+=Vals[5];
  1156. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1157. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1158. Vals[4]+=K[59];
  1159. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  1160. W[12]+=W[5];
  1161. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  1162. Vals[7]+=W[12];
  1163. Vals[0]+=Vals[4];
  1164. Vals[7]+=Vals[3];
  1165. Vals[7]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1166. Vals[7]+=ch(Vals[0],Vals[1],Vals[2]);
  1167. //Vals[7]+=K[60]; diffed from 0xA41F32E7
  1168. #define FOUND (0x80)
  1169. #define NFLAG (0x7F)
  1170. #if defined(VECTORS4)
  1171. Vals[7] ^= 0x136032ED;
  1172. bool result = Vals[7].x & Vals[7].y & Vals[7].z & Vals[7].w;
  1173. if (!result) {
  1174. if (!Vals[7].x)
  1175. output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
  1176. if (!Vals[7].y)
  1177. output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
  1178. if (!Vals[7].z)
  1179. output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
  1180. if (!Vals[7].w)
  1181. output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
  1182. }
  1183. #elif defined(VECTORS2)
  1184. Vals[7] ^= 0x136032ED;
  1185. bool result = Vals[7].x & Vals[7].y;
  1186. if (!result) {
  1187. if (!Vals[7].x)
  1188. output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
  1189. if (!Vals[7].y)
  1190. output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
  1191. }
  1192. #else
  1193. if (Vals[7] == 0x136032ED)
  1194. output[FOUND] = output[NFLAG & nonce] = nonce;
  1195. #endif
  1196. }