14 years ago · 145f3c0b1d
--- a/DiabloMiner120221.cl
+++ b/DiabloMiner120221.cl
@@ -62,13 +62,7 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
 
				 
			
 
				   z ZA[930];
			
 
				 
			
 
				-#ifdef VECTORS4
			
 
				-	const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
			
 
				-#elif defined VECTORS2
			
 
				-	const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
			
 
				-#else
			
 
				-	const z Znonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
			
 
				-#endif
			
 
				+	const z Znonce = base + (uint)(get_global_id(0));
			
 
				 
			
 
				     ZA[15] = Znonce + PreVal4_state0;
			
 
				     
			
--- a/device-gpu.c
+++ b/device-gpu.c
@@ -653,7 +653,7 @@ static _clState *clStates[MAX_GPUDEVICES];
 
				 #define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
			
 
				 #define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
			
 
				 
			
 
				-static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
			
 
				+static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
			
 
				 {
			
 
				 	cl_uint vwidth = clState->preferred_vwidth;
			
 
				 	cl_kernel *kernel = &clState->kernel;
			
@@ -680,7 +680,7 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
 
				 
			
 
				 	nonces = alloca(sizeof(uint) * vwidth);
			
 
				 	for (i = 0; i < vwidth; i++)
			
 
				-		nonces[i] = blk->nonce + i;
			
 
				+		nonces[i] = blk->nonce + (i * threads);
			
 
				 	CL_SET_VARG(vwidth, nonces);
			
 
				 
			
 
				 	CL_SET_BLKARG(fW0);
			
@@ -704,7 +704,8 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				-static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
			
 
				+static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
			
 
				+				 __maybe_unused cl_uint threads)
			
 
				 {
			
 
				 	cl_uint vwidth = clState->preferred_vwidth;
			
 
				 	cl_kernel *kernel = &clState->kernel;
			
@@ -747,7 +748,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				-static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
			
 
				+static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
			
 
				 {
			
 
				 	cl_uint vwidth = clState->preferred_vwidth;
			
 
				 	cl_kernel *kernel = &clState->kernel;
			
@@ -757,7 +758,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
 
				 
			
 
				 	nonces = alloca(sizeof(uint) * vwidth);
			
 
				 	for (i = 0; i < vwidth; i++)
			
 
				-		nonces[i] = blk->nonce + i;
			
 
				+		nonces[i] = blk->nonce + (i * threads);
			
 
				 	CL_SET_VARG(vwidth, nonces);
			
 
				 
			
 
				 	CL_SET_BLKARG(PreVal0);
			
@@ -805,7 +806,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
 
				 	return status;
			
 
				 }
			
 
				 
			
 
				-static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk)
			
 
				+static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
			
 
				 {
			
 
				 	cl_uint vwidth = clState->preferred_vwidth;
			
 
				 	cl_kernel *kernel = &clState->kernel;
			
@@ -815,7 +816,7 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk)
 
				 
			
 
				 	nonces = alloca(sizeof(uint) * vwidth);
			
 
				 	for (i = 0; i < vwidth; i++)
			
 
				-		nonces[i] = blk->nonce + i;
			
 
				+		nonces[i] = blk->nonce + (i * threads);
			
 
				 	CL_SET_VARG(vwidth, nonces);
			
 
				 
			
 
				 	CL_SET_BLKARG(PreVal0);
			
@@ -1071,7 +1072,7 @@ static void get_opencl_statline(char *buf, struct cgpu_info *gpu)
 
				 }
			
 
				 
			
 
				 struct opencl_thread_data {
			
 
				-	cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *);
			
 
				+	cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint);
			
 
				 	uint32_t *res;
			
 
				 	struct work *last_work;
			
 
				 	struct work _last_work;
			
@@ -1244,7 +1245,7 @@ static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 			   localThreads[0], gpu->intensity);
			
 
				 	if (hashes > gpu->max_hashes)
			
 
				 		gpu->max_hashes = hashes;
			
 
				-	status = thrdata->queue_kernel_parameters(clState, &work->blk);
			
 
				+	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
			
 
				 	if (unlikely(status != CL_SUCCESS)) {
			
 
				 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
			
 
				 		return 0;
			
--- a/diakgcn120216.cl
+++ b/diakgcn120216.cl
@@ -55,30 +55,18 @@ __kernel
 
				 	u V[8];
			
 
				 	u W[16];
			
 
				 
			
 
				-#ifdef VECTORS8
			
 
				-	#ifdef GOFFSET
			
 
				+#ifdef GOFFSET
			
 
				+	#ifdef VECTORS8
			
 
				 		const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
			
 
				-	#else
			
 
				-		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base;
			
 
				-	#endif
			
 
				-#elif defined VECTORS4
			
 
				-	#ifdef GOFFSET
			
 
				+	#elif defined VECTORS4
			
 
				 		const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
			
 
				-	#else
			
 
				-		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base;
			
 
				-	#endif
			
 
				-#elif defined VECTORS2
			
 
				-	#ifdef GOFFSET
			
 
				+	#elif defined VECTORS2
			
 
				 		const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
			
 
				 	#else
			
 
				-		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base;
			
 
				-	#endif
			
 
				-#else
			
 
				-	#ifdef GOFFSET
			
 
				 		const u nonce = (uint)get_global_id(0);
			
 
				-	#else
			
 
				-		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base;
			
 
				 	#endif
			
 
				+#else
			
 
				+	const u nonce = base + (uint)(get_global_id(0));
			
 
				 #endif
			
 
				 
			
 
				 	V[0] = PreVal0 + nonce;
			
--- a/poclbm120214.cl
+++ b/poclbm120214.cl
@@ -82,13 +82,7 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co
 
				 	u W[24];
			
 
				 	u *Vals = &W[16]; // Now put at W[16] to be in same array
			
 
				 
			
 
				-#ifdef VECTORS4
			
 
				-	const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
			
 
				-#elif defined VECTORS2
			
 
				-	const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
			
 
				-#else
			
 
				-	const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
			
 
				-#endif
			
 
				+	const u nonce = base + (uint)(get_global_id(0));
			
 
				 
			
 
				 
			
 
				 Vals[0]=Preval0+nonce;