Browse Source

Use 256 output slots for kernels to allow 1 for each worksize.

Con Kolivas 13 years ago
parent
commit
1b5c676de7
6 changed files with 13 additions and 13 deletions
  1. 2 2
      diablo120328.cl
  2. 2 2
      diakgcn120427.cl
  3. 3 3
      findnonce.h
  4. 2 2
      phatk120223.cl
  5. 2 2
      poclbm120327.cl
  6. 2 2
      scrypt120713.cl

+ 2 - 2
diablo120328.cl

@@ -1242,8 +1242,8 @@ void search(
     
     ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
     
-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)
 
 #if defined(VECTORS4)
 	bool result = any(ZA[924] == 0x136032EDU);

+ 2 - 2
diakgcn120427.cl

@@ -571,8 +571,8 @@ __kernel
 
 	V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
 
-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)
 
 #ifdef VECTORS4
 	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU))

+ 3 - 3
findnonce.h

@@ -4,10 +4,10 @@
 #include "config.h"
 
 #define MAXTHREADS (0xFFFFFFFEULL)
-#define MAXBUFFERS (0xFF)
+#define MAXBUFFERS (0xFFF)
 #define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
-#define FOUND (0x80)
-/* #define NFLAG (0x7F) Just for reference */
+#define FOUND (0x800)
+/* #define NFLAG (0x7FF) Just for reference */
 
 #ifdef HAVE_OPENCL
 extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);

+ 2 - 2
phatk120223.cl

@@ -387,8 +387,8 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 	W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) -
 		(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64))  + s1(64+59)+ ch(59+64)));
 
-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)
 
 #ifdef VECTORS4
 	bool result = W[117].x & W[117].y & W[117].z & W[117].w;

+ 2 - 2
poclbm120327.cl

@@ -1311,8 +1311,8 @@ Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
 Vals[1]+=K[59];
 Vals[1]+=Vals[5];
 
-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)
 
 #if defined(VECTORS2) || defined(VECTORS4)
 	Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);

+ 2 - 2
scrypt120713.cl

@@ -685,8 +685,8 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 	unshittify(X);
 }
 
-#define FOUND (0x80)
-#define NFLAG (0x7F)
+#define FOUND (0x800)
+#define NFLAG (0x7FF)
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(__global const uint4 * restrict input,