Browse Source

Remove atomic ops from opencl kernels given rarity of more than once nonce on the same wavefront and the potential increased ramspeed requirements to use the atomics.

ckolivas 13 years ago
parent
commit
775a27281a
5 changed files with 5 additions and 30 deletions
  1. 1 6
      diablo120823.cl
  2. 1 6
      diakgcn120823.cl
  3. 1 6
      phatk120823.cl
  4. 1 6
      poclbm120823.cl
  5. 1 6
      scrypt120823.cl

+ 1 - 6
diablo120823.cl

@@ -1243,12 +1243,7 @@ void search(
     ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
     
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #if defined(VECTORS4)
 	bool result = any(ZA[924] == 0x136032EDU);

+ 1 - 6
diakgcn120823.cl

@@ -572,12 +572,7 @@ __kernel
 	V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #ifdef VECTORS4
 	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {

+ 1 - 6
phatk120823.cl

@@ -388,12 +388,7 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 		(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64))  + s1(64+59)+ ch(59+64)));
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #ifdef VECTORS4
 	bool result = W[117].x & W[117].y & W[117].z & W[117].w;

+ 1 - 6
poclbm120823.cl

@@ -1322,12 +1322,7 @@ Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
 Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #if defined(VECTORS2) || defined(VECTORS4)
 	if (any(Vals[2] == 0x136032edU)) {

+ 1 - 6
scrypt120823.cl

@@ -683,12 +683,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 }
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(__global const uint4 * restrict input,