Browse Source

Use any() in kernel output code and revert breakage of diakgcn kernel.

Con Kolivas 14 years ago
parent
commit
8f08a775ad
6 changed files with 101 additions and 80 deletions
  1. 20 18
      DiabloMiner120221.cl
  2. 3 2
      device-gpu.c
  3. 41 26
      diakgcn120216.cl
  4. 4 5
      findnonce.c
  5. 13 11
      phatk120213.cl
  6. 20 18
      poclbm120214.cl

+ 20 - 18
DiabloMiner120221.cl

@@ -1237,26 +1237,28 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
 #define NFLAG (0x7F)
 #define NFLAG (0x7F)
 
 
 #if defined(VECTORS4)
 #if defined(VECTORS4)
-	ZA[924] ^= 0x136032EDU;
-	bool result = ZA[924].x & ZA[924].y & ZA[924].z & ZA[924].w;
-	if (!result) {
-		if (!ZA[924].x)
-			output[FOUND] = output[NFLAG & Znonce.x] =  Znonce.x;
-		if (!ZA[924].y)
-			output[FOUND] = output[NFLAG & Znonce.y] =  Znonce.y;
-		if (!ZA[924].z)
-			output[FOUND] = output[NFLAG & Znonce.z] =  Znonce.z;
-		if (!ZA[924].w)
-			output[FOUND] = output[NFLAG & Znonce.w] =  Znonce.w;
+	bool result = any(ZA[924] == 0x136032EDU);
+
+	if (result) {
+		output[FOUND] = FOUND;
+		if (ZA[924].x == 0x136032EDU)
+			output[NFLAG & Znonce.x] =  Znonce.x;
+		if (ZA[924].y == 0x136032EDU)
+			output[NFLAG & Znonce.y] =  Znonce.y;
+		if (ZA[924].z == 0x136032EDU)
+			output[NFLAG & Znonce.z] =  Znonce.z;
+		if (ZA[924].w == 0x136032EDU)
+			output[NFLAG & Znonce.w] =  Znonce.w;
 	}
 	}
 #elif defined(VECTORS2)
 #elif defined(VECTORS2)
-	ZA[924] ^= 0x136032EDU;
-	bool result = ZA[924].x & ZA[924].y;
-	if (!result) {
-		if (!ZA[924].x)
-			output[FOUND] = output[NFLAG & Znonce.x] =  Znonce.x;
-		if (!ZA[924].y)
-			output[FOUND] = output[NFLAG & Znonce.y] =  Znonce.y;
+	bool result = any(ZA[924] == 0x136032EDU);
+
+	if (result) {
+		output[FOUND] = FOUND;
+		if (ZA[924].x == 0x136032EDU)
+			output[NFLAG & Znonce.x] =  Znonce.x;
+		if (ZA[924].y == 0x136032EDU)
+			output[NFLAG & Znonce.y] =  Znonce.y;
 	}
 	}
 #else
 #else
 	if (ZA[924] == 0x136032EDU)
 	if (ZA[924] == 0x136032EDU)

+ 3 - 2
device-gpu.c

@@ -748,7 +748,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
 	return status;
 	return status;
 }
 }
 
 
-static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
+static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
+				   __maybe_unused cl_uint threads)
 {
 {
 	cl_uint vwidth = clState->preferred_vwidth;
 	cl_uint vwidth = clState->preferred_vwidth;
 	cl_kernel *kernel = &clState->kernel;
 	cl_kernel *kernel = &clState->kernel;
@@ -758,7 +759,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint
 
 
 	nonces = alloca(sizeof(uint) * vwidth);
 	nonces = alloca(sizeof(uint) * vwidth);
 	for (i = 0; i < vwidth; i++)
 	for (i = 0; i < vwidth; i++)
-		nonces[i] = blk->nonce + (i * threads);
+		nonces[i] = blk->nonce + i;
 	CL_SET_VARG(vwidth, nonces);
 	CL_SET_VARG(vwidth, nonces);
 
 
 	CL_SET_BLKARG(PreVal0);
 	CL_SET_BLKARG(PreVal0);

+ 41 - 26
diakgcn120216.cl

@@ -55,18 +55,30 @@ __kernel
 	u V[8];
 	u V[8];
 	u W[16];
 	u W[16];
 
 
-#ifdef GOFFSET
-	#ifdef VECTORS8
+#ifdef VECTORS8
+	#ifdef GOFFSET
 		const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
 		const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
-	#elif defined VECTORS4
+	#else
+		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base;
+	#endif
+#elif defined VECTORS4
+	#ifdef GOFFSET
 		const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
 		const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
-	#elif defined VECTORS2
+	#else
+		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base;
+	#endif
+#elif defined VECTORS2
+	#ifdef GOFFSET
 		const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
 		const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
 	#else
 	#else
-		const u nonce = (uint)get_global_id(0);
+		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base;
 	#endif
 	#endif
 #else
 #else
-	const u nonce = base + (uint)(get_global_id(0));
+	#ifdef GOFFSET
+		const u nonce = (uint)get_global_id(0);
+	#else
+		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base;
+	#endif
 #endif
 #endif
 
 
 	V[0] = PreVal0 + nonce;
 	V[0] = PreVal0 + nonce;
@@ -585,51 +597,54 @@ __kernel
 #ifdef VECTORS8
 #ifdef VECTORS8
 	V[7] ^= 0x136032ed;
 	V[7] ^= 0x136032ed;
 
 
-	bool result = V[7].s0 & V[7].s1 & V[7].s2 & V[7].s3 & V[7].s4 & V[7].s5 & V[7].s6 & V[7].s7;
+	bool result = any(V[7] == 0);
 
 
-	if (!result) {
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!V[7].s0)
 		if (!V[7].s0)
-			output[FOUND] = output[NFLAG & nonce.s0] = nonce.s0;
+			output[NFLAG & nonce.s0] = nonce.s0;
 		if (!V[7].s1)
 		if (!V[7].s1)
-			output[FOUND] = output[NFLAG & nonce.s1] = nonce.s1;
+			output[NFLAG & nonce.s1] = nonce.s1;
 		if (!V[7].s2)
 		if (!V[7].s2)
-			output[FOUND] = output[NFLAG & nonce.s2] = nonce.s2;
+			output[NFLAG & nonce.s2] = nonce.s2;
 		if (!V[7].s3)
 		if (!V[7].s3)
-			output[FOUND] = output[NFLAG & nonce.s3] = nonce.s3;
+			output[NFLAG & nonce.s3] = nonce.s3;
 		if (!V[7].s4)
 		if (!V[7].s4)
-			output[FOUND] = output[NFLAG & nonce.s4] = nonce.s4;
+			output[NFLAG & nonce.s4] = nonce.s4;
 		if (!V[7].s5)
 		if (!V[7].s5)
-			output[FOUND] = output[NFLAG & nonce.s5] = nonce.s5;
+			output[NFLAG & nonce.s5] = nonce.s5;
 		if (!V[7].s6)
 		if (!V[7].s6)
-			output[FOUND] = output[NFLAG & nonce.s6] = nonce.s6;
+			output[NFLAG & nonce.s6] = nonce.s6;
 		if (!V[7].s7)
 		if (!V[7].s7)
-			output[FOUND] = output[NFLAG & nonce.s7] = nonce.s7;
+			output[NFLAG & nonce.s7] = nonce.s7;
 	}
 	}
 #elif defined VECTORS4
 #elif defined VECTORS4
 	V[7] ^= 0x136032ed;
 	V[7] ^= 0x136032ed;
 
 
-	bool result = V[7].x & V[7].y & V[7].z & V[7].w;
+	bool result = any(V[7] == 0);
 
 
-	if (!result) {
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!V[7].x)
 		if (!V[7].x)
-			output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
+			output[NFLAG & nonce.x] = nonce.x;
 		if (!V[7].y)
 		if (!V[7].y)
-			output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
+			output[NFLAG & nonce.y] = nonce.y;
 		if (!V[7].z)
 		if (!V[7].z)
-			output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
+			output[NFLAG & nonce.z] = nonce.z;
 		if (!V[7].w)
 		if (!V[7].w)
-			output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
+			output[NFLAG & nonce.w] = nonce.w;
 	}
 	}
 #elif defined VECTORS2
 #elif defined VECTORS2
 	V[7] ^= 0x136032ed;
 	V[7] ^= 0x136032ed;
 
 
-	bool result = V[7].x & V[7].y;
+	bool result = any(V[7] == 0);
 
 
-	if (!result) {
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!V[7].x)
 		if (!V[7].x)
-			output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
+			output[NFLAG & nonce.x] = nonce.x;
 		if (!V[7].y)
 		if (!V[7].y)
-			output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
+			output[NFLAG & nonce.y] = nonce.y;
 	}
 	}
 #else
 #else
 	if (V[7] == 0x136032ed)
 	if (V[7] == 0x136032ed)

+ 4 - 5
findnonce.c

@@ -227,12 +227,11 @@ static void *postcalc_hash(void *userdata)
 
 
 	pthread_detach(pthread_self());
 	pthread_detach(pthread_self());
 
 
-	do {
-		if (pcd->res[entry]) {
+	for (entry = 0; entry < FOUND; entry++) {
+		if (pcd->res[entry])
 			send_nonce(pcd, pcd->res[entry]);
 			send_nonce(pcd, pcd->res[entry]);
-			nonces++;
-		}
-	} while (++entry < FOUND);
+		nonces++;
+	}
 
 
 	free(pcd);
 	free(pcd);
 
 

+ 13 - 11
phatk120213.cl

@@ -391,27 +391,29 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 #define NFLAG (0x7F)
 #define NFLAG (0x7F)
 
 
 #ifdef VECTORS4
 #ifdef VECTORS4
-	bool result = W[117].x & W[117].y & W[117].z & W[117].w;
-	if (!result) {
+	bool result = any(W[117] == 0);
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!W[117].x)
 		if (!W[117].x)
-			output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
+			output[NFLAG & W[3].x] = W[3].x;
 		if (!W[117].y)
 		if (!W[117].y)
-			output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
+			output[NFLAG & W[3].y] = W[3].y;
 		if (!W[117].z)
 		if (!W[117].z)
-			output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
+			output[NFLAG & W[3].z] = W[3].z;
 		if (!W[117].w)
 		if (!W[117].w)
-			output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
+			output[NFLAG & W[3].w] = W[3].w;
 	}
 	}
 #elif defined VECTORS2
 #elif defined VECTORS2
-	bool result = W[117].x & W[117].y;
-	if (!result) {
+	bool result = any(W[117] == 0);
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!W[117].x)
 		if (!W[117].x)
-			output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
+			output[NFLAG & W[3].x] = W[3].x;
 		if (!W[117].y)
 		if (!W[117].y)
-			output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
+			output[NFLAG & W[3].y] = W[3].y;
 	}
 	}
 #else
 #else
-	if (!W[117])
+	if (W[117] == 0)
 		output[FOUND] = output[NFLAG & W[3]] = W[3];
 		output[FOUND] = output[NFLAG & W[3]] = W[3];
 #endif
 #endif
 }
 }

+ 20 - 18
poclbm120214.cl

@@ -1256,26 +1256,28 @@ Vals[7]+=ch(Vals[0],Vals[1],Vals[2]);
 #define NFLAG (0x7F)
 #define NFLAG (0x7F)
 
 
 #if defined(VECTORS4)
 #if defined(VECTORS4)
-	Vals[7] ^= 0x136032ED;
-	bool result = Vals[7].x & Vals[7].y & Vals[7].z & Vals[7].w;
-	if (!result) {
-		if (!Vals[7].x)
-			output[FOUND] = output[NFLAG & nonce.x] =  nonce.x;
-		if (!Vals[7].y)
-			output[FOUND] = output[NFLAG & nonce.y] =  nonce.y;
-		if (!Vals[7].z)
-			output[FOUND] = output[NFLAG & nonce.z] =  nonce.z;
-		if (!Vals[7].w)
-			output[FOUND] = output[NFLAG & nonce.w] =  nonce.w;
+	bool result = any(Vals[7] == 0x136032ed);
+
+	if (result) {
+		output[FOUND] = FOUND;
+		if (Vals[7].x == 0x136032ed)
+			output[NFLAG & nonce.x] =  nonce.x;
+		if (Vals[7].y == 0x136032ed)
+			output[NFLAG & nonce.y] =  nonce.y;
+		if (Vals[7].z == 0x136032ed)
+			output[NFLAG & nonce.z] =  nonce.z;
+		if (Vals[7].w == 0x136032ed)
+			output[NFLAG & nonce.w] =  nonce.w;
 	}
 	}
 #elif defined(VECTORS2)
 #elif defined(VECTORS2)
-	Vals[7] ^= 0x136032ED;
-	bool result = Vals[7].x & Vals[7].y;
-	if (!result) {
-		if (!Vals[7].x)
-			output[FOUND] = output[NFLAG & nonce.x] =  nonce.x;
-		if (!Vals[7].y)
-			output[FOUND] = output[NFLAG & nonce.y] =  nonce.y;
+	bool result = any(Vals[7] == 0x136032ed);
+
+	if (result) {
+		output[FOUND] = FOUND;
+		if (Vals[7].x == 0x136032ed)
+			output[NFLAG & nonce.x] =  nonce.x;
+		if (Vals[7].y == 0x136032ed)
+			output[NFLAG & nonce.y] =  nonce.y;
 	}
 	}
 #else
 #else
 	if (Vals[7] == 0x136032ED)
 	if (Vals[7] == 0x136032ED)