poclbm120213.cl 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292
  1. // -ck modified kernel taken from Phoenix taken from poclbm, with aspects of
  2. // phatk and others.
  3. // Modified version copyright 2011-2012 Con Kolivas
  4. // This file is taken and modified from the public-domain poclbm project, and
  5. // we have therefore decided to keep it public-domain in Phoenix.
  6. #ifdef VECTORS4
  7. typedef uint4 u;
  8. #elif defined VECTORS2
  9. typedef uint2 u;
  10. #else
  11. typedef uint u;
  12. #endif
  13. __constant uint K[64] = {
  14. 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
  15. 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
  16. 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
  17. 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
  18. 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
  19. 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
  20. 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
  21. 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
  22. };
  23. // This part is not from the stock poclbm kernel. It's part of an optimization
  24. // added in the Phoenix Miner.
  25. // Some AMD devices have a BFI_INT opcode, which behaves exactly like the
  26. // SHA-256 ch function, but provides it in exactly one instruction. If
  27. // detected, use it for ch. Otherwise, construct ch out of simpler logical
  28. // primitives.
  29. #ifdef BITALIGN
  30. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  31. #define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
  32. #ifdef BFI_INT
  33. // Well, slight problem... It turns out BFI_INT isn't actually exposed to
  34. // OpenCL (or CAL IL for that matter) in any way. However, there is
  35. // a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
  36. // amd_bytealign, takes the same inputs, and provides the same output.
  37. // We can use that as a placeholder for BFI_INT and have the application
  38. // patch it after compilation.
  39. // This is the BFI_INT function
  40. #define ch(x, y, z) amd_bytealign(x, y, z)
  41. // Ma can also be implemented in terms of BFI_INT...
  42. #define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
  43. #else // BFI_INT
  44. // Later SDKs optimise this to BFI INT without patching and GCN
  45. // actually fails if manually patched with BFI_INT
  46. #define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
  47. #define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
  48. #endif
  49. #else // BITALIGN
  50. #define ch(x, y, z) (z ^ (x & (y ^ z)))
  51. #define Ma(x, y, z) ((x & z) | (y & (x | z)))
  52. #define rotr(x, y) rotate((u)x, (u)(32 - y))
  53. #endif
  54. // AMD's KernelAnalyzer throws errors compiling the kernel if we use
  55. // amd_bytealign on constants with vectors enabled, so we use this to avoid
  56. // problems. (this is used 4 times, and likely optimized out by the compiler.)
  57. #define Ma2(x, y, z) ((y & z) | (x & (y | z)))
  58. __kernel void search(const uint state0, const uint state1, const uint state2, const uint state3,
  59. const uint state4, const uint state5, const uint state6, const uint state7,
  60. const uint b1, const uint c1,
  61. const uint f1, const uint g1, const uint h1,
  62. const u base,
  63. const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r,
  64. const uint fcty_e, const uint fcty_e2,
  65. const uint D1A, const uint C1addK5, const uint B1addK6,
  66. const uint W16addK16, const uint W17addK17,
  67. __global uint * output)
  68. {
  69. u W[24];
  70. u *Vals = &W[16]; // Now put at W[16] to be in same array
  71. #ifdef VECTORS4
  72. const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
  73. #elif defined VECTORS2
  74. const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
  75. #else
  76. const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
  77. #endif
  78. Vals[4]=fcty_e;
  79. Vals[4]+=nonce;
  80. Vals[0]=Vals[4];
  81. Vals[0]+=state0;
  82. Vals[3]=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  83. Vals[3]+=ch(Vals[0],b1,c1);
  84. Vals[3]+=D1A;
  85. Vals[7]=Vals[3];
  86. Vals[7]+=h1;
  87. Vals[4]+=fcty_e2;
  88. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  89. Vals[2]=C1addK5;
  90. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  91. Vals[2]+=ch(Vals[7],Vals[0],b1);
  92. Vals[6]=Vals[2];
  93. Vals[6]+=g1;
  94. Vals[3]+=Ma2(g1,Vals[4],f1);
  95. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  96. Vals[1]=B1addK6;
  97. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  98. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  99. Vals[5]=Vals[1];
  100. Vals[5]+=f1;
  101. Vals[2]+=Ma2(f1,Vals[3],Vals[4]);
  102. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  103. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  104. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  105. Vals[0]+=K[7];
  106. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  107. Vals[4]+=Vals[0];
  108. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  109. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  110. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  111. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  112. Vals[7]+=K[8];
  113. Vals[3]+=Vals[7];
  114. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  115. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  116. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  117. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  118. Vals[6]+=K[9];
  119. Vals[2]+=Vals[6];
  120. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  121. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  122. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  123. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  124. Vals[5]+=K[10];
  125. Vals[1]+=Vals[5];
  126. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  127. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  128. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  129. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  130. Vals[4]+=K[11];
  131. Vals[0]+=Vals[4];
  132. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  133. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  134. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  135. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  136. Vals[3]+=K[12];
  137. Vals[7]+=Vals[3];
  138. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  139. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  140. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  141. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  142. Vals[2]+=K[13];
  143. Vals[6]+=Vals[2];
  144. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  145. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  146. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  147. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  148. Vals[1]+=K[14];
  149. Vals[5]+=Vals[1];
  150. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  151. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  152. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  153. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  154. Vals[0]+=0xC19BF3F4;
  155. Vals[4]+=Vals[0];
  156. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  157. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  158. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  159. Vals[7]+=W16addK16;
  160. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  161. Vals[3]+=Vals[7];
  162. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  163. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  164. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  165. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  166. Vals[6]+=W17addK17;
  167. Vals[2]+=Vals[6];
  168. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  169. W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U));
  170. W[2]+=fw2;
  171. Vals[5]+=W[2];
  172. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  173. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  174. Vals[5]+=K[18];
  175. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  176. Vals[1]+=Vals[5];
  177. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  178. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  179. W[3]=nonce;
  180. W[3]+=fw3;
  181. Vals[4]+=W[3];
  182. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  183. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  184. Vals[4]+=K[19];
  185. Vals[0]+=Vals[4];
  186. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  187. W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  188. W[4]+=0x80000000;
  189. Vals[3]+=W[4];
  190. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  191. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  192. Vals[3]+=K[20];
  193. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  194. Vals[7]+=Vals[3];
  195. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  196. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  197. W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  198. Vals[2]+=W[5];
  199. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  200. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  201. Vals[2]+=K[21];
  202. Vals[6]+=Vals[2];
  203. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  204. W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  205. W[6]+=0x00000280U;
  206. Vals[1]+=W[6];
  207. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  208. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  209. Vals[1]+=K[22];
  210. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  211. Vals[5]+=Vals[1];
  212. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  213. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  214. W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  215. W[7]+=fw0;
  216. Vals[0]+=W[7];
  217. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  218. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  219. Vals[0]+=K[23];
  220. Vals[4]+=Vals[0];
  221. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  222. W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  223. W[8]+=fw1;
  224. Vals[7]+=W[8];
  225. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  226. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  227. Vals[7]+=K[24];
  228. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  229. Vals[3]+=Vals[7];
  230. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  231. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  232. W[9]=W[2];
  233. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  234. Vals[6]+=W[9];
  235. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  236. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  237. Vals[6]+=K[25];
  238. Vals[2]+=Vals[6];
  239. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  240. W[10]=W[3];
  241. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  242. Vals[5]+=W[10];
  243. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  244. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  245. Vals[5]+=K[26];
  246. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  247. Vals[1]+=Vals[5];
  248. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  249. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  250. W[11]=W[4];
  251. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  252. Vals[4]+=W[11];
  253. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  254. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  255. Vals[4]+=K[27];
  256. Vals[0]+=Vals[4];
  257. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  258. W[12]=W[5];
  259. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  260. Vals[3]+=W[12];
  261. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  262. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  263. Vals[3]+=K[28];
  264. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  265. Vals[7]+=Vals[3];
  266. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  267. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  268. W[13]=W[6];
  269. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  270. Vals[2]+=W[13];
  271. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  272. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  273. Vals[2]+=K[29];
  274. Vals[6]+=Vals[2];
  275. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  276. W[14]=0x00a00055U;
  277. W[14]+=W[7];
  278. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  279. Vals[1]+=W[14];
  280. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  281. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  282. Vals[1]+=K[30];
  283. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  284. Vals[5]+=Vals[1];
  285. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  286. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  287. W[15]=fw15;
  288. W[15]+=W[8];
  289. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  290. Vals[0]+=W[15];
  291. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  292. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  293. Vals[0]+=K[31];
  294. Vals[4]+=Vals[0];
  295. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  296. W[0]=fw01r;
  297. W[0]+=W[9];
  298. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  299. Vals[7]+=W[0];
  300. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  301. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  302. Vals[7]+=K[32];
  303. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  304. Vals[3]+=Vals[7];
  305. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  306. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  307. W[1]=fw1;
  308. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  309. W[1]+=W[10];
  310. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  311. Vals[6]+=W[1];
  312. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  313. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  314. Vals[6]+=K[33];
  315. Vals[2]+=Vals[6];
  316. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  317. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  318. W[2]+=W[11];
  319. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  320. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  321. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  322. Vals[5]+=K[34];
  323. Vals[5]+=W[2];
  324. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  325. Vals[1]+=Vals[5];
  326. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  327. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  328. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  329. W[3]+=W[12];
  330. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  331. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  332. Vals[4]+=K[35];
  333. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  334. Vals[4]+=W[3];
  335. Vals[0]+=Vals[4];
  336. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  337. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  338. W[4]+=W[13];
  339. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  340. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  341. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  342. Vals[3]+=K[36];
  343. Vals[3]+=W[4];
  344. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  345. Vals[7]+=Vals[3];
  346. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  347. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  348. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  349. W[5]+=W[14];
  350. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  351. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  352. Vals[2]+=K[37];
  353. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  354. Vals[2]+=W[5];
  355. Vals[6]+=Vals[2];
  356. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  357. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  358. W[6]+=W[15];
  359. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  360. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  361. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  362. Vals[1]+=K[38];
  363. Vals[1]+=W[6];
  364. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  365. Vals[5]+=Vals[1];
  366. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  367. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  368. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  369. W[7]+=W[0];
  370. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  371. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  372. Vals[0]+=K[39];
  373. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  374. Vals[0]+=W[7];
  375. Vals[4]+=Vals[0];
  376. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  377. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  378. W[8]+=W[1];
  379. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  380. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  381. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  382. Vals[7]+=K[40];
  383. Vals[7]+=W[8];
  384. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  385. Vals[3]+=Vals[7];
  386. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  387. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  388. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  389. W[9]+=W[2];
  390. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  391. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  392. Vals[6]+=K[41];
  393. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  394. Vals[6]+=W[9];
  395. Vals[2]+=Vals[6];
  396. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  397. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  398. W[10]+=W[3];
  399. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  400. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  401. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  402. Vals[5]+=K[42];
  403. Vals[5]+=W[10];
  404. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  405. Vals[1]+=Vals[5];
  406. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  407. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  408. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  409. W[11]+=W[4];
  410. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  411. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  412. Vals[4]+=K[43];
  413. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  414. Vals[4]+=W[11];
  415. Vals[0]+=Vals[4];
  416. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  417. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  418. W[12]+=W[5];
  419. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  420. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  421. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  422. Vals[3]+=K[44];
  423. Vals[3]+=W[12];
  424. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  425. Vals[7]+=Vals[3];
  426. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  427. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  428. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  429. W[13]+=W[6];
  430. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  431. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  432. Vals[2]+=K[45];
  433. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  434. Vals[2]+=W[13];
  435. Vals[6]+=Vals[2];
  436. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  437. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  438. W[14]+=W[7];
  439. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  440. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  441. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  442. Vals[1]+=K[46];
  443. Vals[1]+=W[14];
  444. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  445. Vals[5]+=Vals[1];
  446. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  447. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  448. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  449. W[15]+=W[8];
  450. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  451. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  452. Vals[0]+=K[47];
  453. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  454. Vals[0]+=W[15];
  455. Vals[4]+=Vals[0];
  456. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  457. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  458. W[0]+=W[9];
  459. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  460. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  461. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  462. Vals[7]+=K[48];
  463. Vals[7]+=W[0];
  464. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  465. Vals[3]+=Vals[7];
  466. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  467. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  468. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  469. W[1]+=W[10];
  470. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  471. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  472. Vals[6]+=K[49];
  473. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  474. Vals[6]+=W[1];
  475. Vals[2]+=Vals[6];
  476. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  477. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  478. W[2]+=W[11];
  479. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  480. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  481. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  482. Vals[5]+=K[50];
  483. Vals[5]+=W[2];
  484. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  485. Vals[1]+=Vals[5];
  486. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  487. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  488. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  489. W[3]+=W[12];
  490. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  491. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  492. Vals[4]+=K[51];
  493. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  494. Vals[4]+=W[3];
  495. Vals[0]+=Vals[4];
  496. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  497. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  498. W[4]+=W[13];
  499. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  500. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  501. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  502. Vals[3]+=K[52];
  503. Vals[3]+=W[4];
  504. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  505. Vals[7]+=Vals[3];
  506. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  507. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  508. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  509. W[5]+=W[14];
  510. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  511. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  512. Vals[2]+=K[53];
  513. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  514. Vals[2]+=W[5];
  515. Vals[6]+=Vals[2];
  516. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  517. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  518. W[6]+=W[15];
  519. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  520. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  521. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  522. Vals[1]+=K[54];
  523. Vals[1]+=W[6];
  524. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  525. Vals[5]+=Vals[1];
  526. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  527. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  528. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  529. W[7]+=W[0];
  530. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  531. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  532. Vals[0]+=K[55];
  533. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  534. Vals[0]+=W[7];
  535. Vals[4]+=Vals[0];
  536. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  537. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  538. W[8]+=W[1];
  539. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  540. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  541. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  542. Vals[7]+=K[56];
  543. Vals[7]+=W[8];
  544. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  545. Vals[3]+=Vals[7];
  546. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  547. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  548. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  549. W[9]+=W[2];
  550. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  551. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  552. Vals[6]+=K[57];
  553. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  554. Vals[6]+=W[9];
  555. Vals[2]+=Vals[6];
  556. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  557. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  558. W[10]+=W[3];
  559. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  560. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  561. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  562. Vals[5]+=K[58];
  563. Vals[5]+=W[10];
  564. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  565. Vals[1]+=Vals[5];
  566. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  567. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  568. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  569. W[11]+=W[4];
  570. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  571. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  572. Vals[4]+=K[59];
  573. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  574. Vals[4]+=W[11];
  575. Vals[0]+=Vals[4];
  576. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  577. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  578. W[12]+=W[5];
  579. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  580. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  581. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  582. Vals[3]+=K[60];
  583. Vals[3]+=W[12];
  584. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  585. Vals[7]+=Vals[3];
  586. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  587. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  588. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  589. W[13]+=W[6];
  590. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  591. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  592. Vals[2]+=K[61];
  593. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  594. Vals[2]+=W[13];
  595. Vals[6]+=Vals[2];
  596. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  597. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  598. W[14]+=W[7];
  599. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  600. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  601. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  602. Vals[1]+=K[62];
  603. Vals[1]+=W[14];
  604. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  605. Vals[5]+=Vals[1];
  606. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  607. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  608. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  609. W[15]+=W[8];
  610. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  611. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  612. Vals[0]+=K[63];
  613. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  614. Vals[0]+=W[15];
  615. Vals[4]+=Vals[0];
  616. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  617. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  618. W[0]=Vals[0];
  619. W[7]=state7;
  620. W[7]+=Vals[7];
  621. Vals[7]=0xF377ED68;
  622. W[0]+=state0;
  623. Vals[7]+=W[0];
  624. W[3]=state3;
  625. W[3]+=Vals[3];
  626. Vals[3]=0xa54ff53a;
  627. Vals[3]+=Vals[7];
  628. W[1]=Vals[1];
  629. W[1]+=state1;
  630. W[6]=state6;
  631. W[6]+=Vals[6];
  632. Vals[6]=0x90BB1E3C;
  633. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  634. Vals[6]+=(0x9b05688cU^(Vals[3]&0xca0b3af3U));
  635. W[2]=state2;
  636. W[2]+=Vals[2];
  637. Vals[2]=0x3c6ef372U;
  638. Vals[6]+=W[1];
  639. Vals[2]+=Vals[6];
  640. Vals[7]+=0x08909ae5U;
  641. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  642. W[5]=state5;
  643. W[5]+=Vals[5];
  644. Vals[5]=0x150C6645B;
  645. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  646. Vals[5]+=ch(Vals[2],Vals[3],0x510e527fU);
  647. Vals[5]+=W[2];
  648. Vals[1]=0xbb67ae85U;
  649. Vals[1]+=Vals[5];
  650. Vals[6]+=Ma2(0xbb67ae85U,Vals[7],0x6a09e667U);
  651. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  652. W[4]=state4;
  653. W[4]+=Vals[4];
  654. Vals[4]=0x13AC42E24;
  655. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  656. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  657. Vals[4]+=W[3];
  658. Vals[0]=Vals[4];
  659. Vals[0]+=0x6a09e667U;
  660. Vals[5]+=Ma2(0x6a09e667U,Vals[6],Vals[7]);
  661. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  662. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  663. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  664. Vals[3]+=K[4];
  665. Vals[3]+=W[4];
  666. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  667. Vals[7]+=Vals[3];
  668. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  669. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  670. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  671. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  672. Vals[2]+=K[5];
  673. Vals[2]+=W[5];
  674. Vals[6]+=Vals[2];
  675. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  676. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  677. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  678. Vals[1]+=K[6];
  679. Vals[1]+=W[6];
  680. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  681. Vals[5]+=Vals[1];
  682. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  683. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  684. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  685. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  686. Vals[0]+=K[7];
  687. Vals[0]+=W[7];
  688. Vals[4]+=Vals[0];
  689. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  690. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  691. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  692. Vals[7]+=0x15807AA98;
  693. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  694. Vals[3]+=Vals[7];
  695. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  696. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  697. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  698. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  699. Vals[6]+=K[9];
  700. Vals[2]+=Vals[6];
  701. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  702. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  703. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  704. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  705. Vals[5]+=K[10];
  706. Vals[1]+=Vals[5];
  707. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  708. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  709. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  710. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  711. Vals[4]+=K[11];
  712. Vals[0]+=Vals[4];
  713. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  714. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  715. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  716. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  717. Vals[3]+=K[12];
  718. Vals[7]+=Vals[3];
  719. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  720. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  721. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  722. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  723. Vals[2]+=K[13];
  724. Vals[6]+=Vals[2];
  725. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  726. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  727. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  728. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  729. Vals[1]+=K[14];
  730. Vals[5]+=Vals[1];
  731. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  732. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  733. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  734. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  735. Vals[0]+=0xC19BF274;
  736. Vals[4]+=Vals[0];
  737. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  738. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  739. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  740. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  741. Vals[7]+=K[16];
  742. Vals[7]+=W[0];
  743. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  744. Vals[3]+=Vals[7];
  745. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  746. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  747. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  748. W[1]+=0x00a00000U;
  749. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  750. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  751. Vals[6]+=K[17];
  752. Vals[6]+=W[1];
  753. Vals[2]+=Vals[6];
  754. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  755. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  756. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  757. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  758. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  759. Vals[5]+=K[18];
  760. Vals[5]+=W[2];
  761. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  762. Vals[1]+=Vals[5];
  763. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  764. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  765. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  766. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  767. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  768. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  769. Vals[4]+=K[19];
  770. Vals[4]+=W[3];
  771. Vals[0]+=Vals[4];
  772. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  773. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  774. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  775. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  776. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  777. Vals[3]+=K[20];
  778. Vals[3]+=W[4];
  779. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  780. Vals[7]+=Vals[3];
  781. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  782. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  783. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  784. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  785. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  786. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  787. Vals[2]+=K[21];
  788. Vals[2]+=W[5];
  789. Vals[6]+=Vals[2];
  790. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  791. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  792. W[6]+=0x00000100U;
  793. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  794. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  795. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  796. Vals[1]+=K[22];
  797. Vals[1]+=W[6];
  798. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  799. Vals[5]+=Vals[1];
  800. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  801. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  802. W[7]+=0x11002000U;
  803. W[7]+=W[0];
  804. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  805. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  806. Vals[0]+=K[23];
  807. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  808. Vals[0]+=W[7];
  809. Vals[4]+=Vals[0];
  810. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  811. W[8]=0x80000000;
  812. W[8]+=W[1];
  813. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  814. Vals[7]+=W[8];
  815. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  816. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  817. Vals[7]+=K[24];
  818. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  819. Vals[3]+=Vals[7];
  820. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  821. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  822. W[9]=W[2];
  823. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  824. Vals[6]+=W[9];
  825. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  826. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  827. Vals[6]+=K[25];
  828. Vals[2]+=Vals[6];
  829. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  830. W[10]=W[3];
  831. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  832. Vals[5]+=W[10];
  833. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  834. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  835. Vals[5]+=K[26];
  836. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  837. Vals[1]+=Vals[5];
  838. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  839. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  840. W[11]=W[4];
  841. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  842. Vals[4]+=W[11];
  843. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  844. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  845. Vals[4]+=K[27];
  846. Vals[0]+=Vals[4];
  847. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  848. W[12]=W[5];
  849. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  850. Vals[3]+=W[12];
  851. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  852. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  853. Vals[3]+=K[28];
  854. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  855. Vals[7]+=Vals[3];
  856. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  857. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  858. W[13]=W[6];
  859. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  860. Vals[2]+=W[13];
  861. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  862. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  863. Vals[2]+=K[29];
  864. Vals[6]+=Vals[2];
  865. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  866. W[14]=0x00400022U;
  867. W[14]+=W[7];
  868. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  869. Vals[1]+=W[14];
  870. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  871. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  872. Vals[1]+=K[30];
  873. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  874. Vals[5]+=Vals[1];
  875. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  876. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  877. W[15]=0x00000100U;
  878. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  879. W[15]+=W[8];
  880. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  881. Vals[0]+=W[15];
  882. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  883. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  884. Vals[0]+=K[31];
  885. Vals[4]+=Vals[0];
  886. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  887. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  888. W[0]+=W[9];
  889. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  890. Vals[7]+=W[0];
  891. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  892. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  893. Vals[7]+=K[32];
  894. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  895. Vals[3]+=Vals[7];
  896. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  897. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  898. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  899. W[1]+=W[10];
  900. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  901. Vals[6]+=W[1];
  902. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  903. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  904. Vals[6]+=K[33];
  905. Vals[2]+=Vals[6];
  906. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  907. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  908. W[2]+=W[11];
  909. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  910. Vals[5]+=W[2];
  911. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  912. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  913. Vals[5]+=K[34];
  914. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  915. Vals[1]+=Vals[5];
  916. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  917. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  918. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  919. W[3]+=W[12];
  920. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  921. Vals[4]+=W[3];
  922. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  923. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  924. Vals[4]+=K[35];
  925. Vals[0]+=Vals[4];
  926. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  927. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  928. W[4]+=W[13];
  929. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  930. Vals[3]+=W[4];
  931. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  932. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  933. Vals[3]+=K[36];
  934. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  935. Vals[7]+=Vals[3];
  936. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  937. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  938. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  939. W[5]+=W[14];
  940. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  941. Vals[2]+=W[5];
  942. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  943. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  944. Vals[2]+=K[37];
  945. Vals[6]+=Vals[2];
  946. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  947. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  948. W[6]+=W[15];
  949. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  950. Vals[1]+=W[6];
  951. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  952. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  953. Vals[1]+=K[38];
  954. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  955. Vals[5]+=Vals[1];
  956. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  957. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  958. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  959. W[7]+=W[0];
  960. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  961. Vals[0]+=W[7];
  962. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  963. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  964. Vals[0]+=K[39];
  965. Vals[4]+=Vals[0];
  966. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  967. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  968. W[8]+=W[1];
  969. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  970. Vals[7]+=W[8];
  971. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  972. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  973. Vals[7]+=K[40];
  974. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  975. Vals[3]+=Vals[7];
  976. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  977. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  978. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  979. W[9]+=W[2];
  980. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  981. Vals[6]+=W[9];
  982. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  983. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  984. Vals[6]+=K[41];
  985. Vals[2]+=Vals[6];
  986. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  987. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  988. W[10]+=W[3];
  989. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  990. Vals[5]+=W[10];
  991. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  992. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  993. Vals[5]+=K[42];
  994. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  995. Vals[1]+=Vals[5];
  996. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  997. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  998. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  999. W[11]+=W[4];
  1000. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  1001. Vals[4]+=W[11];
  1002. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1003. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1004. Vals[4]+=K[43];
  1005. Vals[0]+=Vals[4];
  1006. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1007. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  1008. W[12]+=W[5];
  1009. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  1010. Vals[3]+=W[12];
  1011. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1012. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  1013. Vals[3]+=K[44];
  1014. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  1015. Vals[7]+=Vals[3];
  1016. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  1017. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  1018. W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
  1019. W[13]+=W[6];
  1020. W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
  1021. Vals[2]+=W[13];
  1022. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  1023. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  1024. Vals[2]+=K[45];
  1025. Vals[6]+=Vals[2];
  1026. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  1027. W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
  1028. W[14]+=W[7];
  1029. W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
  1030. Vals[1]+=W[14];
  1031. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  1032. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  1033. Vals[1]+=K[46];
  1034. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  1035. Vals[5]+=Vals[1];
  1036. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  1037. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  1038. W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
  1039. W[15]+=W[8];
  1040. W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
  1041. Vals[0]+=W[15];
  1042. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  1043. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  1044. Vals[0]+=K[47];
  1045. Vals[4]+=Vals[0];
  1046. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  1047. W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
  1048. W[0]+=W[9];
  1049. W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
  1050. Vals[7]+=W[0];
  1051. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1052. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  1053. Vals[7]+=K[48];
  1054. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  1055. Vals[3]+=Vals[7];
  1056. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  1057. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  1058. W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
  1059. W[1]+=W[10];
  1060. W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
  1061. Vals[6]+=W[1];
  1062. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1063. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  1064. Vals[6]+=K[49];
  1065. Vals[2]+=Vals[6];
  1066. Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
  1067. W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
  1068. W[2]+=W[11];
  1069. W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
  1070. Vals[5]+=W[2];
  1071. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  1072. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  1073. Vals[5]+=K[50];
  1074. Vals[6]+=Ma(Vals[1],Vals[7],Vals[0]);
  1075. Vals[1]+=Vals[5];
  1076. Vals[5]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
  1077. Vals[5]+=Ma(Vals[0],Vals[6],Vals[7]);
  1078. W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
  1079. W[3]+=W[12];
  1080. W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
  1081. Vals[4]+=W[3];
  1082. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1083. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1084. Vals[4]+=K[51];
  1085. Vals[0]+=Vals[4];
  1086. Vals[4]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
  1087. W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
  1088. W[4]+=W[13];
  1089. W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
  1090. Vals[3]+=W[4];
  1091. Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1092. Vals[3]+=ch(Vals[0],Vals[1],Vals[2]);
  1093. Vals[3]+=K[52];
  1094. Vals[4]+=Ma(Vals[7],Vals[5],Vals[6]);
  1095. Vals[7]+=Vals[3];
  1096. Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
  1097. Vals[3]+=Ma(Vals[6],Vals[4],Vals[5]);
  1098. W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
  1099. W[5]+=W[14];
  1100. W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
  1101. Vals[2]+=W[5];
  1102. Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
  1103. Vals[2]+=ch(Vals[7],Vals[0],Vals[1]);
  1104. Vals[2]+=K[53];
  1105. Vals[6]+=Vals[2];
  1106. Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
  1107. W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
  1108. W[6]+=W[15];
  1109. W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
  1110. Vals[1]+=W[6];
  1111. Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
  1112. Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
  1113. Vals[1]+=K[54];
  1114. Vals[2]+=Ma(Vals[5],Vals[3],Vals[4]);
  1115. Vals[5]+=Vals[1];
  1116. Vals[1]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
  1117. Vals[1]+=Ma(Vals[4],Vals[2],Vals[3]);
  1118. W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
  1119. W[7]+=W[0];
  1120. W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
  1121. Vals[0]+=W[7];
  1122. Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
  1123. Vals[0]+=ch(Vals[5],Vals[6],Vals[7]);
  1124. Vals[0]+=K[55];
  1125. Vals[4]+=Vals[0];
  1126. Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
  1127. W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
  1128. W[8]+=W[1];
  1129. W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
  1130. Vals[7]+=W[8];
  1131. Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
  1132. Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
  1133. Vals[7]+=K[56];
  1134. Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
  1135. Vals[3]+=Vals[7];
  1136. Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
  1137. Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
  1138. W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
  1139. W[9]+=W[2];
  1140. W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
  1141. Vals[6]+=W[9];
  1142. Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
  1143. Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
  1144. Vals[6]+=K[57];
  1145. W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
  1146. W[10]+=W[3];
  1147. W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
  1148. Vals[5]+=W[10];
  1149. Vals[2]+=Vals[6];
  1150. Vals[5]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
  1151. Vals[5]+=ch(Vals[2],Vals[3],Vals[4]);
  1152. Vals[5]+=K[58];
  1153. W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
  1154. W[11]+=W[4];
  1155. W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
  1156. Vals[4]+=W[11];
  1157. Vals[1]+=Vals[5];
  1158. Vals[4]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
  1159. Vals[4]+=ch(Vals[1],Vals[2],Vals[3]);
  1160. Vals[4]+=K[59];
  1161. W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
  1162. W[12]+=W[5];
  1163. W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
  1164. Vals[7]+=W[12];
  1165. Vals[0]+=Vals[4];
  1166. Vals[7]+=Vals[3];
  1167. Vals[7]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
  1168. Vals[7]+=ch(Vals[0],Vals[1],Vals[2]);
  1169. //Vals[7]+=K[60]; diffed from 0xA41F32E7
  1170. #define FOUND (0x80)
  1171. #define NFLAG (0x7F)
  1172. #if defined(VECTORS4)
  1173. Vals[7] ^= 0x136032ED;
  1174. bool result = Vals[7].x & Vals[7].y & Vals[7].z & Vals[7].w;
  1175. if (!result) {
  1176. if (!Vals[7].x)
  1177. output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
  1178. if (!Vals[7].y)
  1179. output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
  1180. if (!Vals[7].z)
  1181. output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
  1182. if (!Vals[7].w)
  1183. output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
  1184. }
  1185. #elif defined(VECTORS2)
  1186. Vals[7] ^= 0x136032ED;
  1187. bool result = Vals[7].x & Vals[7].y;
  1188. if (!result) {
  1189. if (!Vals[7].x)
  1190. output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
  1191. if (!Vals[7].y)
  1192. output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
  1193. }
  1194. #else
  1195. if (Vals[7] == 0x136032ED)
  1196. output[FOUND] = output[NFLAG & nonce] = nonce;
  1197. #endif
  1198. }