Browse Source

Keccak: Import algorithm code as-is

Luke Mitchell 12 years ago
parent
commit
d505eb688d
3 changed files with 459 additions and 0 deletions
  1. 1 0
      AUTHORS
  2. 328 0
      malgo/keccak.c
  3. 130 0
      opencl/keccak.cl

+ 1 - 0
AUTHORS

@@ -54,6 +54,7 @@ Jean-Luc Cooke <jlcooke@certainkey.com>
 Jonathan Lynch <jonathan.lynch@intel.com>
 Josh Lehan <krellan@krellan.net>
 Lingchao Xu <lingchao.xu@bitmaintech.com>
+Luke Mitchell <Luke.Mitchell.2011@my.bristol.ac.uk>
 Mark Crichton <crichton@gmail.com>
 Martin Danielsen <kalroth@gmail.com>
 Michael Kedzierski <michael@kedzierski.id.au>

+ 328 - 0
malgo/keccak.c

@@ -0,0 +1,328 @@
+#include "config.h"
+#include "miner.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <stdio.h>
+
+#define CL_SET_BLKARG(blkvar) status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->blkvar)
+#define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
+
+struct uint256 {
+	unsigned char v[32];
+};
+typedef struct uint256 uint256;
+
+typedef unsigned long long UINT64;
+
+#define ROL(a, offset) ((a << offset) | (a >> (64-offset)))
+
+static const UINT64 KeccakF_RoundConstants[24] = {
+	0x0000000000000001ULL,
+	0x0000000000008082ULL,
+	0x800000000000808aULL,
+	0x8000000080008000ULL,
+	0x000000000000808bULL,
+	0x0000000080000001ULL,
+	0x8000000080008081ULL,
+	0x8000000000008009ULL,
+	0x000000000000008aULL,
+	0x0000000000000088ULL,
+	0x0000000080008009ULL,
+	0x000000008000000aULL,
+	0x000000008000808bULL,
+	0x800000000000008bULL,
+	0x8000000000008089ULL,
+	0x8000000000008003ULL,
+	0x8000000000008002ULL,
+	0x8000000000000080ULL,
+	0x000000000000800aULL,
+	0x800000008000000aULL,
+	0x8000000080008081ULL,
+	0x8000000000008080ULL,
+	0x0000000080000001ULL,
+	0x8000000080008008ULL
+};
+
+struct bin32 {
+	UINT64 v0;
+	UINT64 v1;
+	UINT64 v2;
+	UINT64 v3;
+};
+
+void keccak1(unsigned char *out, const unsigned char *inraw, unsigned inrawlen)
+{
+	unsigned char temp[136];
+	unsigned round;
+	
+	UINT64 Aba, Abe, Abi, Abo, Abu;
+	UINT64 Aga, Age, Agi, Ago, Agu;
+	UINT64 Aka, Ake, Aki, Ako, Aku;
+	UINT64 Ama, Ame, Ami, Amo, Amu;
+	UINT64 Asa, Ase, Asi, Aso, Asu;
+	UINT64 BCa, BCe, BCi, BCo, BCu;
+	UINT64 Da, De, Di, Do, Du;
+	UINT64 Eba, Ebe, Ebi, Ebo, Ebu;
+	UINT64 Ega, Ege, Egi, Ego, Egu;
+	UINT64 Eka, Eke, Eki, Eko, Eku;
+	UINT64 Ema, Eme, Emi, Emo, Emu;
+	UINT64 Esa, Ese, Esi, Eso, Esu;
+	
+	memcpy(temp, inraw, inrawlen);
+	temp[inrawlen++] = 1;
+	memset( temp+inrawlen, 0, 136 - inrawlen);
+	temp[136-1] |= 0x80;
+	const UINT64 *in = (const UINT64 *)temp;
+	
+	// copyFromState(A, state)
+	Aba = in[ 0];
+	Abe = in[ 1];
+	Abi = in[ 2];
+	Abo = in[ 3];
+	Abu = in[ 4];
+	Aga = in[ 5];
+	Age = in[ 6];
+	Agi = in[ 7];
+	Ago = in[ 8];
+	Agu = in[ 9];
+	Aka = in[10];
+	Ake = in[11];
+	Aki = in[12];
+	Ako = in[13];
+	Aku = in[14];
+	Ama = in[15];
+	Ame = in[16];
+	Ami = 0;
+	Amo = 0;
+	Amu = 0;
+	Asa = 0;
+	Ase = 0;
+	Asi = 0;
+	Aso = 0;
+	Asu = 0;
+	
+	for (round = 0; round < 24; round += 2)
+	{
+		// prepareTheta
+		BCa = Aba^Aga^Aka^Ama^Asa;
+		BCe = Abe^Age^Ake^Ame^Ase;
+		BCi = Abi^Agi^Aki^Ami^Asi;
+		BCo = Abo^Ago^Ako^Amo^Aso;
+		BCu = Abu^Agu^Aku^Amu^Asu;
+		
+		// thetaRhoPiChiIotaPrepareTheta(round, A, E)
+		Da = BCu^ROL(BCe, 1);
+		De = BCa^ROL(BCi, 1);
+		Di = BCe^ROL(BCo, 1);
+		Do = BCi^ROL(BCu, 1);
+		Du = BCo^ROL(BCa, 1);
+		
+		Aba ^= Da;
+		BCa = Aba;
+		Age ^= De;
+		BCe = ROL(Age, 44);
+		Aki ^= Di;
+		BCi = ROL(Aki, 43);
+		Amo ^= Do;
+		BCo = ROL(Amo, 21);
+		Asu ^= Du;
+		BCu = ROL(Asu, 14);
+		Eba = BCa ^((~BCe) & BCi);
+		Eba ^= KeccakF_RoundConstants[round];
+		Ebe = BCe ^((~BCi) & BCo);
+		Ebi = BCi ^((~BCo) & BCu);
+		Ebo = BCo ^((~BCu) & BCa);
+		Ebu = BCu ^((~BCa) & BCe);
+		
+		Abo ^= Do;
+		BCa = ROL(Abo, 28);
+		Agu ^= Du;
+		BCe = ROL(Agu, 20);
+		Aka ^= Da;
+		BCi = ROL(Aka,  3);
+		Ame ^= De;
+		BCo = ROL(Ame, 45);
+		Asi ^= Di;
+		BCu = ROL(Asi, 61);
+		Ega = BCa ^((~BCe) & BCi);
+		Ege = BCe ^((~BCi) & BCo);
+		Egi = BCi ^((~BCo) & BCu);
+		Ego = BCo ^((~BCu) & BCa);
+		Egu = BCu ^((~BCa) & BCe);
+		
+		Abe ^= De;
+		BCa = ROL(Abe,  1);
+		Agi ^= Di;
+		BCe = ROL(Agi,  6);
+		Ako ^= Do;
+		BCi = ROL(Ako, 25);
+		Amu ^= Du;
+		BCo = ROL(Amu,  8);
+		Asa ^= Da;
+		BCu = ROL(Asa, 18);
+		Eka = BCa ^((~BCe) & BCi);
+		Eke = BCe ^((~BCi) & BCo);
+		Eki = BCi ^((~BCo) & BCu);
+		Eko = BCo ^((~BCu) & BCa);
+		Eku = BCu ^((~BCa) & BCe);
+		
+		Abu ^= Du;
+		BCa = ROL(Abu, 27);
+		Aga ^= Da;
+		BCe = ROL(Aga, 36);
+		Ake ^= De;
+		BCi = ROL(Ake, 10);
+		Ami ^= Di;
+		BCo = ROL(Ami, 15);
+		Aso ^= Do;
+		BCu = ROL(Aso, 56);
+		Ema = BCa ^((~BCe) & BCi);
+		Eme = BCe ^((~BCi) & BCo);
+		Emi = BCi ^((~BCo) & BCu);
+		Emo = BCo ^((~BCu) & BCa);
+		Emu = BCu ^((~BCa) & BCe);
+		
+		Abi ^= Di;
+		BCa = ROL(Abi, 62);
+		Ago ^= Do;
+		BCe = ROL(Ago, 55);
+		Aku ^= Du;
+		BCi = ROL(Aku, 39);
+		Ama ^= Da;
+		BCo = ROL(Ama, 41);
+		Ase ^= De;
+		BCu = ROL(Ase,  2);
+		Esa = BCa ^((~BCe) & BCi);
+		Ese = BCe ^((~BCi) & BCo);
+		Esi = BCi ^((~BCo) & BCu);
+		Eso = BCo ^((~BCu) & BCa);
+		Esu = BCu ^((~BCa) & BCe);
+		
+		// prepareTheta
+		BCa = Eba^Ega^Eka^Ema^Esa;
+		BCe = Ebe^Ege^Eke^Eme^Ese;
+		BCi = Ebi^Egi^Eki^Emi^Esi;
+		BCo = Ebo^Ego^Eko^Emo^Eso;
+		BCu = Ebu^Egu^Eku^Emu^Esu;
+		
+		// thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
+		Da = BCu^ROL(BCe, 1);
+		De = BCa^ROL(BCi, 1);
+		Di = BCe^ROL(BCo, 1);
+		Do = BCi^ROL(BCu, 1);
+		Du = BCo^ROL(BCa, 1);
+		
+		Eba ^= Da;
+		BCa = Eba;
+		Ege ^= De;
+		BCe = ROL(Ege, 44);
+		Eki ^= Di;
+		BCi = ROL(Eki, 43);
+		Emo ^= Do;
+		BCo = ROL(Emo, 21);
+		Esu ^= Du;
+		BCu = ROL(Esu, 14);
+		Aba = BCa ^((~BCe) & BCi);
+		Aba ^= KeccakF_RoundConstants[round+1];
+		Abe = BCe ^((~BCi) & BCo);
+		Abi = BCi ^((~BCo) & BCu);
+		Abo = BCo ^((~BCu) & BCa);
+		Abu = BCu ^((~BCa) & BCe);
+		
+		Ebo ^= Do;
+		BCa = ROL(Ebo, 28);
+		Egu ^= Du;
+		BCe = ROL(Egu, 20);
+		Eka ^= Da;
+		BCi = ROL(Eka, 3);
+		Eme ^= De;
+		BCo = ROL(Eme, 45);
+		Esi ^= Di;
+		BCu = ROL(Esi, 61);
+		Aga = BCa ^((~BCe) & BCi);
+		Age = BCe ^((~BCi) & BCo);
+		Agi = BCi ^((~BCo) & BCu);
+		Ago = BCo ^((~BCu) & BCa);
+		Agu = BCu ^((~BCa) & BCe);
+		
+		Ebe ^= De;
+		BCa = ROL(Ebe, 1);
+		Egi ^= Di;
+		BCe = ROL(Egi, 6);
+		Eko ^= Do;
+		BCi = ROL(Eko, 25);
+		Emu ^= Du;
+		BCo = ROL(Emu, 8);
+		Esa ^= Da;
+		BCu = ROL(Esa, 18);
+		Aka = BCa ^((~BCe) & BCi);
+		Ake = BCe ^((~BCi) & BCo);
+		Aki = BCi ^((~BCo) & BCu);
+		Ako = BCo ^((~BCu) & BCa);
+		Aku = BCu ^((~BCa) & BCe);
+		
+		Ebu ^= Du;
+		BCa = ROL(Ebu, 27);
+		Ega ^= Da;
+		BCe = ROL(Ega, 36);
+		Eke ^= De;
+		BCi = ROL(Eke, 10);
+		Emi ^= Di;
+		BCo = ROL(Emi, 15);
+		Eso ^= Do;
+		BCu = ROL(Eso, 56);
+		Ama = BCa ^((~BCe) & BCi);
+		Ame = BCe ^((~BCi) & BCo);
+		Ami = BCi ^((~BCo) & BCu);
+		Amo = BCo ^((~BCu) & BCa);
+		Amu = BCu ^((~BCa) & BCe);
+		
+		Ebi ^= Di;
+		BCa = ROL(Ebi, 62);
+		Ego ^= Do;
+		BCe = ROL(Ego, 55);
+		Eku ^= Du;
+		BCi = ROL(Eku, 39);
+		Ema ^= Da;
+		BCo = ROL(Ema, 41);
+		Ese ^= De;
+		BCu = ROL(Ese, 2);
+		Asa = BCa ^((~BCe) & BCi);
+		Ase = BCe ^((~BCi) & BCo);
+		Asi = BCi ^((~BCo) & BCu);
+		Aso = BCo ^((~BCu) & BCa);
+		Asu = BCu ^((~BCa) & BCe);
+	}
+	{
+		UINT64 *out64 = (UINT64 *)out;
+		out64[ 0] = Aba;
+		out64[ 1] = Abe;
+		out64[ 2] = Abi;
+		out64[ 3] = Abo;
+	}
+}
+
+void keccak_regenhash(struct work *work)
+{
+	uint256 result;
+	
+	unsigned int data[20], datacopy[20]; // aligned for flip80
+	memcpy(datacopy, work->data, 80);
+	flip80(data, datacopy); 
+	keccak1((unsigned char*)&result, (unsigned char*)data, 80);
+	
+	memcpy(work->hash, &result, 32);
+}
+
+bool keccak_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
+{
+	unsigned int src[20], dst[20]; // aligned for flip80
+	int i;
+	memcpy(src, work->data, 80);
+	flip80(dst, src);
+	memcpy(work->blk.keccak_data, dst, 80);
+	return true;
+}

+ 130 - 0
opencl/keccak.cl

@@ -0,0 +1,130 @@
+#define ARGS_25(x) x ## 0, x ## 1, x ## 2, x ## 3, x ## 4, x ## 5, x ## 6, x ## 7, x ## 8, x ## 9, x ## 10, x ## 11, x ## 12, x ## 13, x ## 14, x ## 15, x ## 16, x ## 17, x ## 18, x ## 19, x ## 20, x ## 21, x ## 22, x ## 23, x ## 24
+
+__constant uint2 keccak_round_constants[24] = {
+	(uint2)(0x00000001,0x00000000), (uint2)(0x00008082,0x00000000),
+	(uint2)(0x0000808a,0x80000000), (uint2)(0x80008000,0x80000000),
+	(uint2)(0x0000808b,0x00000000), (uint2)(0x80000001,0x00000000),
+	(uint2)(0x80008081,0x80000000), (uint2)(0x00008009,0x80000000),
+	(uint2)(0x0000008a,0x00000000), (uint2)(0x00000088,0x00000000),
+	(uint2)(0x80008009,0x00000000), (uint2)(0x8000000a,0x00000000),
+	(uint2)(0x8000808b,0x00000000), (uint2)(0x0000008b,0x80000000),
+	(uint2)(0x00008089,0x80000000), (uint2)(0x00008003,0x80000000),
+	(uint2)(0x00008002,0x80000000), (uint2)(0x00000080,0x80000000),
+	(uint2)(0x0000800a,0x00000000), (uint2)(0x8000000a,0x80000000),
+	(uint2)(0x80008081,0x80000000), (uint2)(0x00008080,0x80000000),
+	(uint2)(0x80000001,0x00000000), (uint2)(0x80008008,0x80000000)
+};
+
+uint2 ROTL64_1(const uint2 x, const uint y)
+{
+	return (uint2)((x.x<<y)^(x.y>>(32-y)),(x.y<<y)^(x.x>>(32-y)));
+}
+uint2 ROTL64_2(const uint2 x, const uint y)
+{
+	return (uint2)((x.y<<y)^(x.x>>(32-y)),(x.x<<y)^(x.y>>(32-y)));
+}
+
+#define RND(i) \
+		m0 = *s0 ^ *s5 ^ *s10 ^ *s15 ^ *s20 ^ ROTL64_1(*s2 ^ *s7 ^ *s12 ^ *s17 ^ *s22, 1);\
+		m1 = *s1 ^ *s6 ^ *s11 ^ *s16 ^ *s21 ^ ROTL64_1(*s3 ^ *s8 ^ *s13 ^ *s18 ^ *s23, 1);\
+		m2 = *s2 ^ *s7 ^ *s12 ^ *s17 ^ *s22 ^ ROTL64_1(*s4 ^ *s9 ^ *s14 ^ *s19 ^ *s24, 1);\
+		m3 = *s3 ^ *s8 ^ *s13 ^ *s18 ^ *s23 ^ ROTL64_1(*s0 ^ *s5 ^ *s10 ^ *s15 ^ *s20, 1);\
+		m4 = *s4 ^ *s9 ^ *s14 ^ *s19 ^ *s24 ^ ROTL64_1(*s1 ^ *s6 ^ *s11 ^ *s16 ^ *s21, 1);\
+\
+		m5 = *s1^m0;\
+\
+		*s0 ^= m4;\
+		*s1 = ROTL64_2(*s6^m0, 12);\
+		*s6 = ROTL64_1(*s9^m3, 20);\
+		*s9 = ROTL64_2(*s22^m1, 29);\
+		*s22 = ROTL64_2(*s14^m3, 7);\
+		*s14 = ROTL64_1(*s20^m4, 18);\
+		*s20 = ROTL64_2(*s2^m1, 30);\
+		*s2 = ROTL64_2(*s12^m1, 11);\
+		*s12 = ROTL64_1(*s13^m2, 25);\
+		*s13 = ROTL64_1(*s19^m3,  8);\
+		*s19 = ROTL64_2(*s23^m2, 24);\
+		*s23 = ROTL64_2(*s15^m4, 9);\
+		*s15 = ROTL64_1(*s4^m3, 27);\
+		*s4 = ROTL64_1(*s24^m3, 14);\
+		*s24 = ROTL64_1(*s21^m0,  2);\
+		*s21 = ROTL64_2(*s8^m2, 23);\
+		*s8 = ROTL64_2(*s16^m0, 13);\
+		*s16 = ROTL64_2(*s5^m4, 4);\
+		*s5 = ROTL64_1(*s3^m2, 28);\
+		*s3 = ROTL64_1(*s18^m2, 21);\
+		*s18 = ROTL64_1(*s17^m1, 15);\
+		*s17 = ROTL64_1(*s11^m0, 10);\
+		*s11 = ROTL64_1(*s7^m1,  6);\
+		*s7 = ROTL64_1(*s10^m4,  3);\
+		*s10 = ROTL64_1(      m5,  1);\
+		\
+		m5 = *s0; m6 = *s1; *s0 = bitselect(*s0^*s2,*s0,*s1); *s1 = bitselect(*s1^*s3,*s1,*s2); *s2 = bitselect(*s2^*s4,*s2,*s3); *s3 = bitselect(*s3^m5,*s3,*s4); *s4 = bitselect(*s4^m6,*s4,m5);\
+		m5 = *s5; m6 = *s6; *s5 = bitselect(*s5^*s7,*s5,*s6); *s6 = bitselect(*s6^*s8,*s6,*s7); *s7 = bitselect(*s7^*s9,*s7,*s8); *s8 = bitselect(*s8^m5,*s8,*s9); *s9 = bitselect(*s9^m6,*s9,m5);\
+		m5 = *s10; m6 = *s11; *s10 = bitselect(*s10^*s12,*s10,*s11); *s11 = bitselect(*s11^*s13,*s11,*s12); *s12 = bitselect(*s12^*s14,*s12,*s13); *s13 = bitselect(*s13^m5,*s13,*s14); *s14 = bitselect(*s14^m6,*s14,m5);\
+		m5 = *s15; m6 = *s16; *s15 = bitselect(*s15^*s17,*s15,*s16); *s16 = bitselect(*s16^*s18,*s16,*s17); *s17 = bitselect(*s17^*s19,*s17,*s18); *s18 = bitselect(*s18^m5,*s18,*s19); *s19 = bitselect(*s19^m6,*s19,m5);\
+		m5 = *s20; m6 = *s21; *s20 = bitselect(*s20^*s22,*s20,*s21); *s21 = bitselect(*s21^*s23,*s21,*s22); *s22 = bitselect(*s22^*s24,*s22,*s23); *s23 = bitselect(*s23^m5,*s23,*s24); *s24 = bitselect(*s24^m6,*s24,m5);\
+\
+		*s0 ^= keccak_round_constants[i];
+
+void keccak_block_noabsorb(ARGS_25(uint2* s))
+{
+	uint2 m0,m1,m2,m3,m4,m5,m6;
+	RND(0);
+	for (int i = 1; i < 22; ++i) 
+	{
+		RND(i);
+		++i;
+		RND(i);
+		++i;
+		RND(i);
+	}
+	RND(22);
+	RND(23);
+}
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(__global const uint2*restrict in, __global uint*restrict output)
+{
+	uint2 ARGS_25(state);
+	
+	state0 = in[0];
+	state1 = in[1];
+	state2 = in[2];
+	state3 = in[3];
+	state4 = in[4];
+	state5 = in[5];
+	state6 = in[6];
+	state7 = in[7];
+	state8 = in[8];
+	state9 = (uint2)(in[9].x,get_global_id(0));
+	state10 = (uint2)(1,0);
+	state11 = 0;
+	state12 = 0;
+	state13 = 0;
+	state14 = 0;
+	state15 = 0;
+	state16 = (uint2)(0,0x80000000U);
+	state17 = 0;
+	state18 = 0;
+	state19 = 0;
+	state20 = 0;
+	state21 = 0;
+	state22 = 0;
+	state23 = 0;
+	state24 = 0;
+	
+	keccak_block_noabsorb(ARGS_25(&state));
+	
+#define FOUND (0x0F)
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
+	
+	if ((state3.y & 0xFFFFFFF0U) == 0)
+	{
+		SETFOUND(get_global_id(0));
+	}
+}
+
+/*-
+ * Scrypt-jane public domain, OpenCL implementation of scrypt(keccak,chacha,SCRYPTN,1,1) 2013 mtrlt
+ */