Browse Source

Update kernel with a shorter output path, and use 4k output buffer to match OS page sizes.

Con Kolivas 14 years ago
parent
commit
eea05c05b8
3 changed files with 13 additions and 45 deletions
  1. 2 2
      findnonce.h
  2. 2 2
      ocl.c
  3. 9 41
      phatk110714.cl

+ 2 - 2
findnonce.h

@@ -4,8 +4,8 @@
 #include "config.h"
 #include "config.h"
 
 
 #define MAXTHREADS (0xFFFFFFFEULL)
 #define MAXTHREADS (0xFFFFFFFEULL)
-/* Maximum worksize 512 * maximum vectors 4 plus one flag entry */
-#define MAXBUFFERS (4 * 512)
+/* Maximum worksize 4k to match page size */
+#define MAXBUFFERS (4095)
 #define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1))
 #define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1))
 
 
 #ifdef HAVE_OPENCL
 #ifdef HAVE_OPENCL

+ 2 - 2
ocl.c

@@ -341,7 +341,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	char numbuf[10];
 	char numbuf[10];
 	char filename[15];
 	char filename[15];
 	if (clState->hasBitAlign)
 	if (clState->hasBitAlign)
-		strcpy(filename, "phatk110711.cl");
+		strcpy(filename, "phatk110714.cl");
 	else
 	else
 		strcpy(filename, "poclbm.cl");
 		strcpy(filename, "poclbm.cl");
 	FILE *binaryfile;
 	FILE *binaryfile;
@@ -370,7 +370,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 
 
 	strcpy(binaryfilename, name);
 	strcpy(binaryfilename, name);
 	if (clState->hasBitAlign) {
 	if (clState->hasBitAlign) {
-		strcat(binaryfilename, "phatk110711");
+		strcat(binaryfilename, "phatk110714");
 		strcat(binaryfilename, "bitalign");
 		strcat(binaryfilename, "bitalign");
 	} else
 	} else
 		strcat(binaryfilename, "poclbm");
 		strcat(binaryfilename, "poclbm");

+ 9 - 41
phatk110711.cl → phatk110714.cl

@@ -3,6 +3,7 @@
 
 
 // 2011-07-11: further modified by Diapolo and still public-domain
 // 2011-07-11: further modified by Diapolo and still public-domain
 // -ck version to be compatible with cgminer
 // -ck version to be compatible with cgminer
+// 2011-07-14: shorter code
 
 
 #define VECTORSX
 #define VECTORSX
 
 
@@ -84,7 +85,6 @@ __kernel void search(	const uint state0, const uint state1, const uint state2, c
 {
 {
 	u W[124];
 	u W[124];
 	u Vals[8];
 	u Vals[8];
-	uint it = get_local_id(0);
 
 
 	Vals[1] = B1;
 	Vals[1] = B1;
 	Vals[2] = C1;
 	Vals[2] = C1;
@@ -380,70 +380,38 @@ __kernel void search(	const uint state0, const uint state1, const uint state2, c
 	// Round 124
 	// Round 124
 	Vals[7] += Vals[3] + P4(124) + P3(124) + P2(124) + P1(124) + s1(124) + ch(124);
 	Vals[7] += Vals[3] + P4(124) + P3(124) + P2(124) + P1(124) + s1(124) + ch(124);
 	
 	
-#define MAXBUFFERS (4 * 512)
+#define MAXBUFFERS (4095)
+#define NFLAG (0xFFFUL)
 
 
 #if defined(VECTORS4) || defined(VECTORS2)
 #if defined(VECTORS4) || defined(VECTORS2)
 	if (Vals[7].x == -H[7])
 	if (Vals[7].x == -H[7])
 	{
 	{
-		// Unlikely event there is something here already !
-		if (output[it]) {
-			for (it = 0; it < MAXBUFFERS; it++) {
-				if (!output[it])
-					break;
-			}
-		}
-		output[it] = W[3].x;
+		output[W[3].x & NFLAG] =  W[3].x;
 		output[MAXBUFFERS] = 1;
 		output[MAXBUFFERS] = 1;
 	}
 	}
 	if (Vals[7].y == -H[7])
 	if (Vals[7].y == -H[7])
 	{
 	{
-		it += 512;
-		if (output[it]) {
-			for (it = 0; it < MAXBUFFERS; it++) {
-				if (!output[it])
-					break;
-			}
-		}
-		output[it] = W[3].y;
+		output[W[3].y & NFLAG] =  W[3].y;
 		output[MAXBUFFERS] = 1;
 		output[MAXBUFFERS] = 1;
 	}
 	}
 #ifdef VECTORS4
 #ifdef VECTORS4
 	if (Vals[7].z == -H[7])
 	if (Vals[7].z == -H[7])
 	{
 	{
-		it += 1024;
-		if (output[it]) {
-			for (it = 0; it < MAXBUFFERS; it++) {
-				if (!output[it])
-					break;
-			}
-		}
-		output[it] = W[3].z;
+		output[W[3].z & NFLAG] =  W[3].z;
 		output[MAXBUFFERS] = 1;
 		output[MAXBUFFERS] = 1;
 	}
 	}
 	if (Vals[7].w == -H[7])
 	if (Vals[7].w == -H[7])
 	{
 	{
-		it += 1536;
-		if (output[it]) {
-			for (it = 0; it < MAXBUFFERS; it++) {
-				if (!output[it])
-					break;
-			}
-		}
-		output[it] = W[3].w;
+		output[W[3].w & NFLAG] =  W[3].w;
 		output[MAXBUFFERS] = 1;
 		output[MAXBUFFERS] = 1;
 	}
 	}
 #endif
 #endif
 #else
 #else
 	if (Vals[7] == -H[7])
 	if (Vals[7] == -H[7])
 	{
 	{
-		if (output[it]) {
-			for (it = 0; it < MAXBUFFERS; it++) {
-				if (!output[it])
-					break;
-			}
-		}
-		output[it] = W[3];
+		output[W[3] & NFLAG] =  W[3];
 		output[MAXBUFFERS] = 1;
 		output[MAXBUFFERS] = 1;
 	}
 	}
 #endif
 #endif
+
 }
 }