keccak.cl 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. /*
  2. * Scrypt-jane public domain, OpenCL implementation of scrypt(keccak,chacha,SCRYPTN,1,1) 2013 mtrlt
  3. */
  4. // kernel-interface: fullheader Keccak
  5. #define ARGS_25(x) x ## 0, x ## 1, x ## 2, x ## 3, x ## 4, x ## 5, x ## 6, x ## 7, x ## 8, x ## 9, x ## 10, x ## 11, x ## 12, x ## 13, x ## 14, x ## 15, x ## 16, x ## 17, x ## 18, x ## 19, x ## 20, x ## 21, x ## 22, x ## 23, x ## 24
  6. __constant uint2 keccak_round_constants[24] = {
  7. (uint2)(0x00000001,0x00000000), (uint2)(0x00008082,0x00000000),
  8. (uint2)(0x0000808a,0x80000000), (uint2)(0x80008000,0x80000000),
  9. (uint2)(0x0000808b,0x00000000), (uint2)(0x80000001,0x00000000),
  10. (uint2)(0x80008081,0x80000000), (uint2)(0x00008009,0x80000000),
  11. (uint2)(0x0000008a,0x00000000), (uint2)(0x00000088,0x00000000),
  12. (uint2)(0x80008009,0x00000000), (uint2)(0x8000000a,0x00000000),
  13. (uint2)(0x8000808b,0x00000000), (uint2)(0x0000008b,0x80000000),
  14. (uint2)(0x00008089,0x80000000), (uint2)(0x00008003,0x80000000),
  15. (uint2)(0x00008002,0x80000000), (uint2)(0x00000080,0x80000000),
  16. (uint2)(0x0000800a,0x00000000), (uint2)(0x8000000a,0x80000000),
  17. (uint2)(0x80008081,0x80000000), (uint2)(0x00008080,0x80000000),
  18. (uint2)(0x80000001,0x00000000), (uint2)(0x80008008,0x80000000)
  19. };
  20. uint2 ROTL64_1(const uint2 x, const uint y)
  21. {
  22. return (uint2)((x.x<<y)^(x.y>>(32-y)),(x.y<<y)^(x.x>>(32-y)));
  23. }
  24. uint2 ROTL64_2(const uint2 x, const uint y)
  25. {
  26. return (uint2)((x.y<<y)^(x.x>>(32-y)),(x.x<<y)^(x.y>>(32-y)));
  27. }
  28. #define RND(i) \
  29. m0 = *s0 ^ *s5 ^ *s10 ^ *s15 ^ *s20 ^ ROTL64_1(*s2 ^ *s7 ^ *s12 ^ *s17 ^ *s22, 1);\
  30. m1 = *s1 ^ *s6 ^ *s11 ^ *s16 ^ *s21 ^ ROTL64_1(*s3 ^ *s8 ^ *s13 ^ *s18 ^ *s23, 1);\
  31. m2 = *s2 ^ *s7 ^ *s12 ^ *s17 ^ *s22 ^ ROTL64_1(*s4 ^ *s9 ^ *s14 ^ *s19 ^ *s24, 1);\
  32. m3 = *s3 ^ *s8 ^ *s13 ^ *s18 ^ *s23 ^ ROTL64_1(*s0 ^ *s5 ^ *s10 ^ *s15 ^ *s20, 1);\
  33. m4 = *s4 ^ *s9 ^ *s14 ^ *s19 ^ *s24 ^ ROTL64_1(*s1 ^ *s6 ^ *s11 ^ *s16 ^ *s21, 1);\
  34. \
  35. m5 = *s1^m0;\
  36. \
  37. *s0 ^= m4;\
  38. *s1 = ROTL64_2(*s6^m0, 12);\
  39. *s6 = ROTL64_1(*s9^m3, 20);\
  40. *s9 = ROTL64_2(*s22^m1, 29);\
  41. *s22 = ROTL64_2(*s14^m3, 7);\
  42. *s14 = ROTL64_1(*s20^m4, 18);\
  43. *s20 = ROTL64_2(*s2^m1, 30);\
  44. *s2 = ROTL64_2(*s12^m1, 11);\
  45. *s12 = ROTL64_1(*s13^m2, 25);\
  46. *s13 = ROTL64_1(*s19^m3, 8);\
  47. *s19 = ROTL64_2(*s23^m2, 24);\
  48. *s23 = ROTL64_2(*s15^m4, 9);\
  49. *s15 = ROTL64_1(*s4^m3, 27);\
  50. *s4 = ROTL64_1(*s24^m3, 14);\
  51. *s24 = ROTL64_1(*s21^m0, 2);\
  52. *s21 = ROTL64_2(*s8^m2, 23);\
  53. *s8 = ROTL64_2(*s16^m0, 13);\
  54. *s16 = ROTL64_2(*s5^m4, 4);\
  55. *s5 = ROTL64_1(*s3^m2, 28);\
  56. *s3 = ROTL64_1(*s18^m2, 21);\
  57. *s18 = ROTL64_1(*s17^m1, 15);\
  58. *s17 = ROTL64_1(*s11^m0, 10);\
  59. *s11 = ROTL64_1(*s7^m1, 6);\
  60. *s7 = ROTL64_1(*s10^m4, 3);\
  61. *s10 = ROTL64_1( m5, 1);\
  62. \
  63. m5 = *s0; m6 = *s1; *s0 = bitselect(*s0^*s2,*s0,*s1); *s1 = bitselect(*s1^*s3,*s1,*s2); *s2 = bitselect(*s2^*s4,*s2,*s3); *s3 = bitselect(*s3^m5,*s3,*s4); *s4 = bitselect(*s4^m6,*s4,m5);\
  64. m5 = *s5; m6 = *s6; *s5 = bitselect(*s5^*s7,*s5,*s6); *s6 = bitselect(*s6^*s8,*s6,*s7); *s7 = bitselect(*s7^*s9,*s7,*s8); *s8 = bitselect(*s8^m5,*s8,*s9); *s9 = bitselect(*s9^m6,*s9,m5);\
  65. m5 = *s10; m6 = *s11; *s10 = bitselect(*s10^*s12,*s10,*s11); *s11 = bitselect(*s11^*s13,*s11,*s12); *s12 = bitselect(*s12^*s14,*s12,*s13); *s13 = bitselect(*s13^m5,*s13,*s14); *s14 = bitselect(*s14^m6,*s14,m5);\
  66. m5 = *s15; m6 = *s16; *s15 = bitselect(*s15^*s17,*s15,*s16); *s16 = bitselect(*s16^*s18,*s16,*s17); *s17 = bitselect(*s17^*s19,*s17,*s18); *s18 = bitselect(*s18^m5,*s18,*s19); *s19 = bitselect(*s19^m6,*s19,m5);\
  67. m5 = *s20; m6 = *s21; *s20 = bitselect(*s20^*s22,*s20,*s21); *s21 = bitselect(*s21^*s23,*s21,*s22); *s22 = bitselect(*s22^*s24,*s22,*s23); *s23 = bitselect(*s23^m5,*s23,*s24); *s24 = bitselect(*s24^m6,*s24,m5);\
  68. \
  69. *s0 ^= keccak_round_constants[i];
  70. void keccak_block_noabsorb(ARGS_25(uint2* s))
  71. {
  72. uint2 m0,m1,m2,m3,m4,m5,m6;
  73. RND(0);
  74. for (int i = 1; i < 22; ++i)
  75. {
  76. RND(i);
  77. ++i;
  78. RND(i);
  79. ++i;
  80. RND(i);
  81. }
  82. RND(22);
  83. RND(23);
  84. }
  85. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  86. __kernel void search(__global const uint2*restrict in, __global uint*restrict output)
  87. {
  88. uint2 ARGS_25(state);
  89. state0 = in[0];
  90. state1 = in[1];
  91. state2 = in[2];
  92. state3 = in[3];
  93. state4 = in[4];
  94. state5 = in[5];
  95. state6 = in[6];
  96. state7 = in[7];
  97. state8 = in[8];
  98. state9 = (uint2)(in[9].x,get_global_id(0));
  99. state10 = (uint2)(1,0);
  100. state11 = 0;
  101. state12 = 0;
  102. state13 = 0;
  103. state14 = 0;
  104. state15 = 0;
  105. state16 = (uint2)(0,0x80000000U);
  106. state17 = 0;
  107. state18 = 0;
  108. state19 = 0;
  109. state20 = 0;
  110. state21 = 0;
  111. state22 = 0;
  112. state23 = 0;
  113. state24 = 0;
  114. keccak_block_noabsorb(ARGS_25(&state));
  115. #define FOUND (0x0F)
  116. #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
  117. if ((state3.y & 0xFFFFFFF0U) == 0)
  118. {
  119. SETFOUND(get_global_id(0));
  120. }
  121. }