Browse Source

Hand optimise variable addition order.

Con Kolivas 14 years ago
parent
commit
81cb584586
1 changed files with 120 additions and 89 deletions
  1. 120 89
      poclbm120213.cl

+ 120 - 89
poclbm120213.cl

@@ -194,10 +194,10 @@ W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
 
 W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U));
 W[2]+=fw2;
+W[21]+=W[2];
 W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
 W[21]+=ch(W[18],W[19],W[20]);
 W[21]+=K[18];
-W[21]+=W[2];
 W[22]+=Ma(W[17],W[23],W[16]);
 W[17]+=W[21];
 W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
@@ -205,38 +205,38 @@ W[21]+=Ma(W[16],W[22],W[23]);
 
 W[3]=nonce;
 W[3]+=fw3;
+W[20]+=W[3];
 W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
 W[20]+=ch(W[17],W[18],W[19]);
 W[20]+=K[19];
-W[20]+=W[3];
 W[16]+=W[20];
 W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
 
 W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
 W[4]+=0x80000000;
+W[19]+=W[4];
 W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
 W[19]+=ch(W[16],W[17],W[18]);
 W[19]+=K[20];
-W[19]+=W[4];
 W[20]+=Ma(W[23],W[21],W[22]);
 W[23]+=W[19];
 W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
 W[19]+=Ma(W[22],W[20],W[21]);
-W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
-W[18]+=ch(W[23],W[16],W[17]);
-W[18]+=K[21];
 
 W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
 W[18]+=W[5];
+W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
+W[18]+=ch(W[23],W[16],W[17]);
+W[18]+=K[21];
 W[22]+=W[18];
 W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
 
 W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
 W[6]+=0x00000280U;
+W[17]+=W[6];
 W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
 W[17]+=ch(W[22],W[23],W[16]);
 W[17]+=K[22];
-W[17]+=W[6];
 W[18]+=Ma(W[21],W[19],W[20]);
 W[21]+=W[17];
 W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
@@ -244,19 +244,20 @@ W[17]+=Ma(W[20],W[18],W[19]);
 
 W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
 W[7]+=fw0;
+W[16]+=W[7];
 W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
 W[16]+=ch(W[21],W[22],W[23]);
 W[16]+=K[23];
-W[16]+=W[7];
+
 W[20]+=W[16];
 W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
 
 W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
 W[8]+=fw1;
+W[23]+=W[8];
 W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
 W[23]+=ch(W[20],W[21],W[22]);
 W[23]+=K[24];
-W[23]+=W[8];
 W[16]+=Ma(W[19],W[17],W[18]);
 W[19]+=W[23];
 W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
@@ -264,19 +265,20 @@ W[23]+=Ma(W[18],W[16],W[17]);
 
 W[9]=W[2];
 W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
+W[22]+=W[9];
 W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
 W[22]+=ch(W[19],W[20],W[21]);
 W[22]+=K[25];
-W[22]+=W[9];
+
 W[18]+=W[22];
 W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
 
 W[10]=W[3];
 W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
+W[21]+=W[10];
 W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
 W[21]+=ch(W[18],W[19],W[20]);
 W[21]+=K[26];
-W[21]+=W[10];
 W[22]+=Ma(W[17],W[23],W[16]);
 W[17]+=W[21];
 W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
@@ -284,19 +286,19 @@ W[21]+=Ma(W[16],W[22],W[23]);
 
 W[11]=W[4];
 W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
+W[20]+=W[11];
 W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
 W[20]+=ch(W[17],W[18],W[19]);
 W[20]+=K[27];
-W[20]+=W[11];
 W[16]+=W[20];
 W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
 
 W[12]=W[5];
 W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
+W[19]+=W[12];
 W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
 W[19]+=ch(W[16],W[17],W[18]);
 W[19]+=K[28];
-W[19]+=W[12];
 W[20]+=Ma(W[23],W[21],W[22]);
 W[23]+=W[19];
 W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
@@ -304,20 +306,20 @@ W[19]+=Ma(W[22],W[20],W[21]);
 
 W[13]=W[6];
 W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
+W[18]+=W[13];
 W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
 W[18]+=ch(W[23],W[16],W[17]);
 W[18]+=K[29];
-W[18]+=W[13];
 W[22]+=W[18];
 W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
 
 W[14]=0x00a00055U;
 W[14]+=W[7];
+W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
+W[17]+=W[14];
 W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
 W[17]+=ch(W[22],W[23],W[16]);
-W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
 W[17]+=K[30];
-W[17]+=W[14];
 W[18]+=Ma(W[21],W[19],W[20]);
 W[21]+=W[17];
 W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
@@ -325,21 +327,21 @@ W[17]+=Ma(W[20],W[18],W[19]);
 
 W[15]=fw15;
 W[15]+=W[8];
+W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
+W[16]+=W[15];
 W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
 W[16]+=ch(W[21],W[22],W[23]);
 W[16]+=K[31];
-W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
-W[16]+=W[15];
 W[20]+=W[16];
 W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
 
 W[0]=fw01r;
 W[0]+=W[9];
+W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
+W[23]+=W[0];
 W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
 W[23]+=ch(W[20],W[21],W[22]);
-W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
 W[23]+=K[32];
-W[23]+=W[0];
 W[16]+=Ma(W[19],W[17],W[18]);
 W[19]+=W[23];
 W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
@@ -349,10 +351,10 @@ W[1]=fw1;
 W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
 W[1]+=W[10];
 W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
+W[22]+=W[1];
 W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
 W[22]+=ch(W[19],W[20],W[21]);
 W[22]+=K[33];
-W[22]+=W[1];
 W[18]+=W[22];
 W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
 W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
@@ -874,11 +876,11 @@ W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
 
 W[8]=0x80000000;
 W[8]+=W[1];
+W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
+W[23]+=W[8];
 W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
 W[23]+=ch(W[20],W[21],W[22]);
-W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
 W[23]+=K[24];
-W[23]+=W[8];
 W[16]+=Ma(W[19],W[17],W[18]);
 W[19]+=W[23];
 W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
@@ -886,19 +888,19 @@ W[23]+=Ma(W[18],W[16],W[17]);
 
 W[9]=W[2];
 W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
+W[22]+=W[9];
 W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
 W[22]+=ch(W[19],W[20],W[21]);
 W[22]+=K[25];
-W[22]+=W[9];
 W[18]+=W[22];
 W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
 
 W[10]=W[3];
 W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
+W[21]+=W[10];
 W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
 W[21]+=ch(W[18],W[19],W[20]);
 W[21]+=K[26];
-W[21]+=W[10];
 W[22]+=Ma(W[17],W[23],W[16]);
 W[17]+=W[21];
 W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
@@ -906,19 +908,19 @@ W[21]+=Ma(W[16],W[22],W[23]);
 
 W[11]=W[4];
 W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
+W[20]+=W[11];
 W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
 W[20]+=ch(W[17],W[18],W[19]);
 W[20]+=K[27];
-W[20]+=W[11];
 W[16]+=W[20];
 W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
 
 W[12]=W[5];
 W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
+W[19]+=W[12];
 W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
 W[19]+=ch(W[16],W[17],W[18]);
 W[19]+=K[28];
-W[19]+=W[12];
 W[20]+=Ma(W[23],W[21],W[22]);
 W[23]+=W[19];
 W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
@@ -926,20 +928,20 @@ W[19]+=Ma(W[22],W[20],W[21]);
 
 W[13]=W[6];
 W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
+W[18]+=W[13];
 W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
 W[18]+=ch(W[23],W[16],W[17]);
 W[18]+=K[29];
-W[18]+=W[13];
 W[22]+=W[18];
 W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
 
 W[14]=0x00400022U;
 W[14]+=W[7];
+W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
+W[17]+=W[14];
 W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
 W[17]+=ch(W[22],W[23],W[16]);
-W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
 W[17]+=K[30];
-W[17]+=W[14];
 W[18]+=Ma(W[21],W[19],W[20]);
 W[21]+=W[17];
 W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
@@ -949,295 +951,324 @@ W[15]=0x00000100U;
 W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
 W[15]+=W[8];
 W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
+W[16]+=W[15];
 W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
 W[16]+=ch(W[21],W[22],W[23]);
 W[16]+=K[31];
-W[16]+=W[15];
 W[20]+=W[16];
 W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
+
 W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
 W[0]+=W[9];
+W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
+W[23]+=W[0];
 W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
 W[23]+=ch(W[20],W[21],W[22]);
-W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
 W[23]+=K[32];
-W[23]+=W[0];
 W[16]+=Ma(W[19],W[17],W[18]);
 W[19]+=W[23];
 W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
 W[23]+=Ma(W[18],W[16],W[17]);
+
 W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
 W[1]+=W[10];
+W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
+W[22]+=W[1];
 W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
 W[22]+=ch(W[19],W[20],W[21]);
 W[22]+=K[33];
-W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
-W[22]+=W[1];
 W[18]+=W[22];
 W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
+
 W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
 W[2]+=W[11];
+W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
+W[21]+=W[2];
 W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
 W[21]+=ch(W[18],W[19],W[20]);
-W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
 W[21]+=K[34];
-W[21]+=W[2];
 W[22]+=Ma(W[17],W[23],W[16]);
 W[17]+=W[21];
 W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
 W[21]+=Ma(W[16],W[22],W[23]);
+
 W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
 W[3]+=W[12];
+W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
+W[20]+=W[3];
 W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
 W[20]+=ch(W[17],W[18],W[19]);
 W[20]+=K[35];
-W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
-W[20]+=W[3];
 W[16]+=W[20];
 W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
+
 W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
 W[4]+=W[13];
+W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
+W[19]+=W[4];
 W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
 W[19]+=ch(W[16],W[17],W[18]);
-W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
 W[19]+=K[36];
-W[19]+=W[4];
 W[20]+=Ma(W[23],W[21],W[22]);
 W[23]+=W[19];
 W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
 W[19]+=Ma(W[22],W[20],W[21]);
+
 W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
 W[5]+=W[14];
+W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
+W[18]+=W[5];
 W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
 W[18]+=ch(W[23],W[16],W[17]);
 W[18]+=K[37];
-W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
-W[18]+=W[5];
 W[22]+=W[18];
 W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
+
 W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
 W[6]+=W[15];
+W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
+W[17]+=W[6];
 W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
 W[17]+=ch(W[22],W[23],W[16]);
-W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
 W[17]+=K[38];
-W[17]+=W[6];
 W[18]+=Ma(W[21],W[19],W[20]);
 W[21]+=W[17];
 W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
 W[17]+=Ma(W[20],W[18],W[19]);
+
 W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
 W[7]+=W[0];
+W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
+W[16]+=W[7];
 W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
 W[16]+=ch(W[21],W[22],W[23]);
 W[16]+=K[39];
-W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
-W[16]+=W[7];
 W[20]+=W[16];
 W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
+
 W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
 W[8]+=W[1];
+W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
+W[23]+=W[8];
 W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
 W[23]+=ch(W[20],W[21],W[22]);
-W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
 W[23]+=K[40];
-W[23]+=W[8];
 W[16]+=Ma(W[19],W[17],W[18]);
 W[19]+=W[23];
 W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
 W[23]+=Ma(W[18],W[16],W[17]);
+
 W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
 W[9]+=W[2];
+W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
+W[22]+=W[9];
 W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
 W[22]+=ch(W[19],W[20],W[21]);
 W[22]+=K[41];
-W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
-W[22]+=W[9];
 W[18]+=W[22];
 W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
+
 W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
 W[10]+=W[3];
+W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
+W[21]+=W[10];
 W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
 W[21]+=ch(W[18],W[19],W[20]);
-W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
 W[21]+=K[42];
-W[21]+=W[10];
 W[22]+=Ma(W[17],W[23],W[16]);
 W[17]+=W[21];
 W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
 W[21]+=Ma(W[16],W[22],W[23]);
+
 W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
 W[11]+=W[4];
+W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
+W[20]+=W[11];
 W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
 W[20]+=ch(W[17],W[18],W[19]);
 W[20]+=K[43];
-W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
-W[20]+=W[11];
 W[16]+=W[20];
 W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
+
 W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
 W[12]+=W[5];
+W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
+W[19]+=W[12];
 W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
 W[19]+=ch(W[16],W[17],W[18]);
-W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
 W[19]+=K[44];
-W[19]+=W[12];
 W[20]+=Ma(W[23],W[21],W[22]);
 W[23]+=W[19];
 W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
 W[19]+=Ma(W[22],W[20],W[21]);
+
 W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
 W[13]+=W[6];
+W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
+W[18]+=W[13];
 W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
 W[18]+=ch(W[23],W[16],W[17]);
 W[18]+=K[45];
-W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
-W[18]+=W[13];
 W[22]+=W[18];
 W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
+
 W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
 W[14]+=W[7];
+W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
+W[17]+=W[14];
 W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
 W[17]+=ch(W[22],W[23],W[16]);
-W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
 W[17]+=K[46];
-W[17]+=W[14];
 W[18]+=Ma(W[21],W[19],W[20]);
 W[21]+=W[17];
 W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
 W[17]+=Ma(W[20],W[18],W[19]);
+
 W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
 W[15]+=W[8];
+W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
+W[16]+=W[15];
 W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
 W[16]+=ch(W[21],W[22],W[23]);
 W[16]+=K[47];
-W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
-W[16]+=W[15];
 W[20]+=W[16];
 W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
+
 W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
 W[0]+=W[9];
+W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
+W[23]+=W[0];
 W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
 W[23]+=ch(W[20],W[21],W[22]);
-W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
 W[23]+=K[48];
-W[23]+=W[0];
 W[16]+=Ma(W[19],W[17],W[18]);
 W[19]+=W[23];
 W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
 W[23]+=Ma(W[18],W[16],W[17]);
+
 W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
 W[1]+=W[10];
+W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
+W[22]+=W[1];
 W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
 W[22]+=ch(W[19],W[20],W[21]);
 W[22]+=K[49];
-W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
-W[22]+=W[1];
 W[18]+=W[22];
 W[22]+=(rotr(W[23],2)^rotr(W[23],13)^rotr(W[23],22));
+
 W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
 W[2]+=W[11];
+W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
+W[21]+=W[2];
 W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
 W[21]+=ch(W[18],W[19],W[20]);
-W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
 W[21]+=K[50];
-W[21]+=W[2];
 W[22]+=Ma(W[17],W[23],W[16]);
 W[17]+=W[21];
 W[21]+=(rotr(W[22],2)^rotr(W[22],13)^rotr(W[22],22));
 W[21]+=Ma(W[16],W[22],W[23]);
+
 W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
 W[3]+=W[12];
+W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
+W[20]+=W[3];
 W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
 W[20]+=ch(W[17],W[18],W[19]);
 W[20]+=K[51];
-W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
-W[20]+=W[3];
 W[16]+=W[20];
 W[20]+=(rotr(W[21],2)^rotr(W[21],13)^rotr(W[21],22));
+
 W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
 W[4]+=W[13];
+W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
+W[19]+=W[4];
 W[19]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
 W[19]+=ch(W[16],W[17],W[18]);
-W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
 W[19]+=K[52];
-W[19]+=W[4];
 W[20]+=Ma(W[23],W[21],W[22]);
 W[23]+=W[19];
 W[19]+=(rotr(W[20],2)^rotr(W[20],13)^rotr(W[20],22));
 W[19]+=Ma(W[22],W[20],W[21]);
+
 W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
 W[5]+=W[14];
+W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
+W[18]+=W[5];
 W[18]+=(rotr(W[23],6)^rotr(W[23],11)^rotr(W[23],25));
 W[18]+=ch(W[23],W[16],W[17]);
 W[18]+=K[53];
-W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
-W[18]+=W[5];
 W[22]+=W[18];
 W[18]+=(rotr(W[19],2)^rotr(W[19],13)^rotr(W[19],22));
+
 W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
 W[6]+=W[15];
+W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
+W[17]+=W[6];
 W[17]+=(rotr(W[22],6)^rotr(W[22],11)^rotr(W[22],25));
 W[17]+=ch(W[22],W[23],W[16]);
-W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
 W[17]+=K[54];
-W[17]+=W[6];
 W[18]+=Ma(W[21],W[19],W[20]);
 W[21]+=W[17];
 W[17]+=(rotr(W[18],2)^rotr(W[18],13)^rotr(W[18],22));
 W[17]+=Ma(W[20],W[18],W[19]);
+
 W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
 W[7]+=W[0];
+W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
+W[16]+=W[7];
 W[16]+=(rotr(W[21],6)^rotr(W[21],11)^rotr(W[21],25));
 W[16]+=ch(W[21],W[22],W[23]);
 W[16]+=K[55];
-W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
-W[16]+=W[7];
 W[20]+=W[16];
 W[16]+=(rotr(W[17],2)^rotr(W[17],13)^rotr(W[17],22));
+
 W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
 W[8]+=W[1];
+W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
+W[23]+=W[8];
 W[23]+=(rotr(W[20],6)^rotr(W[20],11)^rotr(W[20],25));
 W[23]+=ch(W[20],W[21],W[22]);
-W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
 W[23]+=K[56];
-W[23]+=W[8];
 W[16]+=Ma(W[19],W[17],W[18]);
 W[19]+=W[23];
 W[23]+=(rotr(W[16],2)^rotr(W[16],13)^rotr(W[16],22));
 W[23]+=Ma(W[18],W[16],W[17]);
+
 W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
 W[9]+=W[2];
+W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
+W[22]+=W[9];
 W[22]+=(rotr(W[19],6)^rotr(W[19],11)^rotr(W[19],25));
 W[22]+=ch(W[19],W[20],W[21]);
 W[22]+=K[57];
-W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
-W[22]+=W[9];
+
 W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
 W[10]+=W[3];
+W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
+W[21]+=W[10];
 W[18]+=W[22];
 W[21]+=(rotr(W[18],6)^rotr(W[18],11)^rotr(W[18],25));
 W[21]+=ch(W[18],W[19],W[20]);
 W[21]+=K[58];
-W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
-W[21]+=W[10];
+
 W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
 W[11]+=W[4];
+W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
+W[20]+=W[11];
 W[17]+=W[21];
 W[20]+=(rotr(W[17],6)^rotr(W[17],11)^rotr(W[17],25));
 W[20]+=ch(W[17],W[18],W[19]);
 W[20]+=K[59];
-W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
-W[20]+=W[11];
+
 W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
 W[12]+=W[5];
+W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
+W[23]+=W[12];
 W[16]+=W[20];
 W[23]+=W[19];
 W[23]+=(rotr(W[16],6)^rotr(W[16],11)^rotr(W[16],25));
 W[23]+=ch(W[16],W[17],W[18]);
 W[23]+=K[60];
-W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
-W[23]+=W[12];
 
 #define FOUND (0x80)
 #define NFLAG (0x7F)