Browse Source

Merge commit '0feb679' into bfgminer

Luke Dashjr 13 years ago
parent
commit
d5b0698f6c
10 changed files with 187 additions and 111 deletions
  1. 5 5
      configure.ac
  2. 34 17
      diablo120823.cl
  3. 38 9
      diakgcn120823.cl
  4. 1 1
      driver-opencl.c
  5. 8 17
      findnonce.c
  6. 2 3
      findnonce.h
  7. 24 5
      miner.c
  8. 34 17
      phatk120823.cl
  9. 34 31
      poclbm120823.cl
  10. 7 6
      scrypt120823.cl

+ 5 - 5
configure.ac

@@ -362,11 +362,11 @@ fi
 
 AC_DEFINE_UNQUOTED([CGMINER_PREFIX], ["$prefix/bin"], [Path to bfgminer install])
 
-AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120724"], [Filename for phatk kernel])
-AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120724"], [Filename for poclbm kernel])
-AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120724"], [Filename for diakgcn kernel])
-AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120724"], [Filename for diablo kernel])
-AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt120724"], [Filename for scrypt kernel])
+AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120823"], [Filename for phatk kernel])
+AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120823"], [Filename for poclbm kernel])
+AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120823"], [Filename for diakgcn kernel])
+AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120823"], [Filename for diablo kernel])
+AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt120823"], [Filename for scrypt kernel])
 
 
 AC_SUBST(JANSSON_LIBS)

+ 34 - 17
diablo120724.cl → diablo120823.cl

@@ -62,7 +62,7 @@ void search(
     const uint c1_plus_k5, const uint b1_plus_k6,
     const uint state0, const uint state1, const uint state2, const uint state3,
     const uint state4, const uint state5, const uint state6, const uint state7,
-    __global uint * output)
+    volatile __global uint * output)
 {
 
   z ZA[930];
@@ -1242,33 +1242,50 @@ void search(
     
     ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
     
-#define FOUND (0x800)
-#define NFLAG (0x7FF)
+#define FOUND (0x0F)
 
 #if defined(VECTORS4)
 	bool result = any(ZA[924] == 0x136032EDU);
 
 	if (result) {
-		if (ZA[924].x == 0x136032EDU)
-			output[FOUND] = output[NFLAG & Znonce.x] =  Znonce.x;
-		if (ZA[924].y == 0x136032EDU)
-			output[FOUND] = output[NFLAG & Znonce.y] =  Znonce.y;
-		if (ZA[924].z == 0x136032EDU)
-			output[FOUND] = output[NFLAG & Znonce.z] =  Znonce.z;
-		if (ZA[924].w == 0x136032EDU)
-			output[FOUND] = output[NFLAG & Znonce.w] =  Znonce.w;
+		uint found;
+
+		if (ZA[924].x == 0x136032EDU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = Znonce.x;
+		}
+		if (ZA[924].y == 0x136032EDU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = Znonce.y;
+		}
+		if (ZA[924].z == 0x136032EDU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = Znonce.z;
+		}
+		if (ZA[924].w == 0x136032EDU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = Znonce.w;
+		}
 	}
 #elif defined(VECTORS2)
 	bool result = any(ZA[924] == 0x136032EDU);
 
 	if (result) {
-		if (ZA[924].x == 0x136032EDU)
-			output[FOUND] = output[NFLAG & Znonce.x] =  Znonce.x;
-		if (ZA[924].y == 0x136032EDU)
-			output[FOUND] = output[NFLAG & Znonce.y] =  Znonce.y;
+		uint found;
+
+		if (ZA[924].x == 0x136032EDU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = Znonce.x;
+		}
+		if (ZA[924].y == 0x136032EDU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = Znonce.y;
+		}
 	}
 #else
-	if (ZA[924] == 0x136032EDU)
-		output[FOUND] = output[NFLAG & Znonce] =  Znonce;
+	if (ZA[924] == 0x136032EDU) {
+		uint found = atomic_add(&output[FOUND], 1);
+		output[found] = Znonce;
+	}
 #endif
 }

+ 38 - 9
diakgcn120724.cl → diakgcn120823.cl

@@ -48,7 +48,7 @@ __kernel
 			const uint state0A, const uint state0B,
 			const uint state1A, const uint state2A, const uint state3A, const uint state4A,
 			const uint state5A, const uint state6A, const uint state7A,
-			__global uint * output)
+			volatile __global uint * output)
 {
 	u V[8];
 	u W[16];
@@ -571,17 +571,46 @@ __kernel
 
 	V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
 
-#define FOUND (0x800)
-#define NFLAG (0x7FF)
+#define FOUND (0x0F)
 
 #ifdef VECTORS4
-	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU))
-		output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w));
+	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {
+		uint found;
+
+		if (V[7].x == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.x;
+		}
+		if (V[7].y == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.y;
+		}
+		if (V[7].z == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.z;
+		}
+		if (V[7].w == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.w;
+		}
+	}
 #elif defined VECTORS2
-	if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU))
-		output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y;
+	if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) {
+		uint found;
+
+		if (V[7].x == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.x;
+		}
+		if (V[7].y == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.y;
+		}
+	}
 #else
-	if (V[7] == 0x136032edU)
-		output[FOUND] = output[NFLAG & nonce] = nonce;
+	if (V[7] == 0x136032edU) {
+		uint found = atomic_add(&output[FOUND], 1);
+		output[found] = nonce;
+	}
 #endif
 }

+ 1 - 1
driver-opencl.c

@@ -1798,7 +1798,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
 
-	/* MAXBUFFERS entry is used as a flag to say nonces exist */
+	/* FOUND entry is used as a counter to say how many nonces exist */
 	if (thrdata->res[FOUND]) {
 		/* Clear the buffer again */
 		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,

+ 8 - 17
findnonce.c

@@ -173,6 +173,7 @@ struct pc_data {
 	struct work *work;
 	uint32_t res[MAXBUFFERS];
 	pthread_t pth;
+	int found;
 };
 
 static void send_sha_nonce(struct pc_data *pcd, cl_uint nonce)
@@ -238,33 +239,23 @@ static void send_scrypt_nonce(struct pc_data *pcd, uint32_t nonce)
 static void *postcalc_hash(void *userdata)
 {
 	struct pc_data *pcd = (struct pc_data *)userdata;
-	struct thr_info *thr = pcd->thr;
-	int entry = 0, nonces = 0;
+	unsigned int entry = 0;
 
 	pthread_detach(pthread_self());
 	rename_thr("bfg-postcalchsh");
 
-	for (entry = 0; entry < FOUND; entry++) {
+	for (entry = 0; entry < pcd->res[FOUND]; entry++) {
 		uint32_t nonce = pcd->res[entry];
 
-		if (nonce) {
-			applog(LOG_DEBUG, "OCL NONCE %u", nonce);
-			if (opt_scrypt)
-				send_scrypt_nonce(pcd, nonce);
-			else
-				send_sha_nonce(pcd, nonce);
-			nonces++;
-		}
+		applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry);
+		if (opt_scrypt)
+			send_scrypt_nonce(pcd, nonce);
+		else
+			send_sha_nonce(pcd, nonce);
 	}
 
 	free(pcd);
 
-	if (unlikely(!nonces)) {
-		applog(LOG_DEBUG, "No nonces found! Error in OpenCL code?");
-		hw_errors++;
-		thr->cgpu->hw_errors++;
-	}
-
 	return NULL;
 }
 

+ 2 - 3
findnonce.h

@@ -4,10 +4,9 @@
 #include "config.h"
 
 #define MAXTHREADS (0xFFFFFFFEULL)
-#define MAXBUFFERS (0xFFF)
+#define MAXBUFFERS (0x10)
 #define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
-#define FOUND (0x800)
-/* #define NFLAG (0x7FF) Just for reference */
+#define FOUND (0x0F)
 
 #ifdef HAVE_OPENCL
 extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);

+ 24 - 5
miner.c

@@ -2412,7 +2412,7 @@ retry:
 	if (!pool->curls)
 		recruit_curl(pool);
 	else if (list_empty(&pool->curlring)) {
-		if (pool->submit_fail || pool->curls >= curl_limit) {
+		if (pool->curls >= curl_limit) {
 			pthread_cond_wait(&pool->cr_cond, &pool->pool_lock);
 			goto retry;
 		} else
@@ -2609,13 +2609,18 @@ retry:
 			lagging = true;
 		pool = ret_work->pool = select_pool(lagging);
 
+		inc_queued();
+
 		if (!ce)
 			ce = pop_curl_entry(pool);
 
-		/* Inc queued count after ce is popped in case there're none
-		 * left and we think we've queued work when we're just waiting
-		 * for curls */
-		inc_queued();
+		/* Check that we haven't staged work via other threads while
+		 * waiting for a curl entry */
+		if (total_staged() >= maxq) {
+			dec_queued();
+			free_work(ret_work);
+			goto out;
+		}
 
 		/* obtain new work from bitcoin via JSON-RPC */
 		if (!get_upstream_work(ret_work, ce->curl)) {
@@ -3182,6 +3187,20 @@ static void test_work_current(struct work *work)
 			quit (1, "test_work_current OOM");
 		strcpy(s->hash, hexstr);
 		wr_lock(&blk_lock);
+		/* Only keep the last 6 blocks in memory since work from blocks
+		 * before this is virtually impossible and we want to prevent
+		 * memory usage from continually rising */
+		if (HASH_COUNT(blocks) > 5) {
+			struct block *blocka, *blockb;
+			int count = 0;
+
+			HASH_ITER(hh, blocks, blocka, blockb) {
+				if (count++ < 6)
+					continue;
+				HASH_DEL(blocks, blocka);
+				free(blocka);
+			}
+		}
 		HASH_ADD_STR(blocks, hash, s);
 		wr_unlock(&blk_lock);
 		work->pool->block_id = block_id;

+ 34 - 17
phatk120724.cl → phatk120823.cl

@@ -164,7 +164,7 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 						const uint PreW18, const uint PreW19,
 						const uint PreW31, const uint PreW32,
 						
-						__global uint * output)
+						volatile __global uint * output)
 {
 
 
@@ -387,31 +387,48 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 	W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) -
 		(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64))  + s1(64+59)+ ch(59+64)));
 
-#define FOUND (0x800)
-#define NFLAG (0x7FF)
+#define FOUND (0x0F)
 
 #ifdef VECTORS4
 	bool result = W[117].x & W[117].y & W[117].z & W[117].w;
 	if (!result) {
-		if (!W[117].x)
-			output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
-		if (!W[117].y)
-			output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
-		if (!W[117].z)
-			output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
-		if (!W[117].w)
-			output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
+		uint found;
+
+		if (!W[117].x) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = W[3].x;
+		}
+		if (!W[117].y) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = W[3].y;
+		}
+		if (!W[117].z) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = W[3].z;
+		}
+		if (!W[117].w) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = W[3].w;
+		}
 	}
 #elif defined VECTORS2
 	bool result = W[117].x & W[117].y;
 	if (!result) {
-		if (!W[117].x)
-			output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
-		if (!W[117].y)
-			output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
+		uint found;
+
+		if (!W[117].x) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = W[3].x;
+		}
+		if (!W[117].y) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = W[3].y;
+		}
 	}
 #else
-	if (!W[117])
-		output[FOUND] = output[NFLAG & W[3]] = W[3];
+	if (!W[117]) {
+		uint found = atomic_add(&output[FOUND], 1);
+		output[found] = W[3];
+	}
 #endif
 }

+ 34 - 31
poclbm120724.cl → poclbm120823.cl

@@ -80,7 +80,7 @@ void search(const uint state0, const uint state1, const uint state2, const uint
 	const uint D1A, const uint C1addK5, const uint B1addK6,
 	const uint W16addK16, const uint W17addK17,
 	const uint PreVal4addT1, const uint Preval0,
-	__global uint * output)
+	volatile __global uint * output)
 {
 	u Vals[24];
 	u *W = &Vals[8];
@@ -1311,43 +1311,46 @@ Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
 Vals[1]+=K[59];
 Vals[1]+=Vals[5];
 
-#define FOUND (0x800)
-#define NFLAG (0x7FF)
+Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
+Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
+Vals[2]+=W[12];
+Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
+Vals[2]+=W[5];
+Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
+Vals[2]+=Vals[0];
+Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
+Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
+
+#define FOUND (0x0F)
 
 #if defined(VECTORS2) || defined(VECTORS4)
-	Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-	Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-	Vals[2]+=W[12];
-	Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
-	Vals[2]+=W[5];
-	Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
-	Vals[2]+=Vals[0];
-	Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-	Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
 
 	if (any(Vals[2] == 0x136032edU)) {
-		if (Vals[2].x == 0x136032edU)
-			output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
-		if (Vals[2].y == 0x136032edU)
-			output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
+		uint found;
+
+		if (Vals[2].x == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.x;
+		}
+		if (Vals[2].y == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.y;
+		}
 #if defined(VECTORS4)
-		if (Vals[2].z == 0x136032edU)
-			output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
-		if (Vals[2].w == 0x136032edU)
-			output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
+		if (Vals[2].z == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.z;
+		}
+		if (Vals[2].w == 0x136032edU) {
+			found = atomic_add(&output[FOUND], 1);
+			output[found] = nonce.w;
+		}
 #endif
 	}
 #else
-	if ((Vals[2]+
-		Ma(Vals[6],Vals[5],Vals[7])+
-		(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22))+
-		W[12]+
-		(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U))+
-		W[5]+
-		(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U))+
-		Vals[0]+
-		(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25))+
-		ch(Vals[1],Vals[4],Vals[3])) == 0x136032edU)
-			output[FOUND] = output[NFLAG & nonce] =  nonce;
+	if (Vals[2] == 0x136032edU) {
+		uint found = atomic_add(&output[FOUND], 1);
+		output[found] = nonce;
+	}
 #endif
 }

+ 7 - 6
scrypt120724.cl → scrypt120823.cl

@@ -682,12 +682,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 	unshittify(X);
 }
 
-#define FOUND (0x800)
-#define NFLAG (0x7FF)
+#define FOUND (0x0F)
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(__global const uint4 * restrict input,
-__global uint*restrict output, __global uint4*restrict padcache,
+volatile __global uint*restrict output, __global uint4*restrict padcache,
 const uint4 midstate0, const uint4 midstate16, const uint target)
 {
 	uint gid = get_global_id(0);
@@ -721,9 +720,11 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
 	SHA256_fixed(&tmp0,&tmp1);
 	SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
 
-	bool found = (EndianSwap(ostate1.w) <= target);
-	if (found)
-		output[FOUND] = output[NFLAG & gid] = gid;
+	bool result = (EndianSwap(ostate1.w) <= target);
+	if (result) {
+		uint found = atomic_add(&output[FOUND], 1);
+		output[found] = gid;
+	}
 }
 
 /*-