Browse Source

Merge commit 'aabc723' into scrypt

Luke Dashjr 13 years ago
parent
commit
a0198c1502
7 changed files with 117 additions and 30 deletions
  1. 51 19
      driver-opencl.c
  2. 12 2
      findnonce.c
  3. 12 1
      miner.c
  4. 8 0
      miner.h
  5. 24 6
      ocl.c
  6. 4 0
      ocl.h
  7. 6 2
      scrypt120713.cl

+ 51 - 19
driver-opencl.c

@@ -354,8 +354,10 @@ static enum cl_kernels select_kernel(char *arg)
 		return KL_POCLBM;
 	if (!strcmp(arg, "phatk"))
 		return KL_PHATK;
+#ifdef USE_SCRYPT
 	if (!strcmp(arg, "scrypt"))
 		return KL_SCRYPT;
+#endif
 	return KL_NONE;
 }
 
@@ -365,6 +367,8 @@ char *set_kernel(char *arg)
 	int i, device = 0;
 	char *nextptr;
 
+	if (opt_scrypt)
+		return "Cannot use sha256 kernel with scrypt";
 	nextptr = strtok(arg, ",");
 	if (nextptr == NULL)
 		return "Invalid parameters for set kernel";
@@ -1205,12 +1209,32 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
 	return status;
 }
 
+#ifdef USE_SCRYPT
 static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
 {
+	cl_uint4 *midstate = (cl_uint4 *)blk->midstate;
+	cl_kernel *kernel = &clState->kernel;
+	unsigned int num = 0;
 	cl_int status = 0;
+	int i;
 
+	CL_SET_ARG(clState->CLbuffer0);
+	CL_SET_ARG(clState->outputBuffer);
+	CL_SET_ARG(clState->padbuffer8);
+	CL_SET_VARG(4, &midstate[0]);
+	CL_SET_VARG(4, &midstate[16]);
+
+#if 0
+	clSetKernelArg(clState->kernel,0,sizeof(cl_mem), &clState->CLbuffer[0]);
+	clSetKernelArg(clState->kernel,1,sizeof(cl_mem), &clState->CLbuffer[1]);
+	clSetKernelArg(clState->kernel,2,sizeof(cl_mem), &clState->padbuffer8);
+	clSetKernelArg(clState->kernel,3,sizeof(cl_uint4), &midstate[0]);
+	clSetKernelArg(clState->kernel,4,sizeof(cl_uint4), &midstate[16]);
+#endif
 	return status;
 }
+#endif
+
 static void set_threads_hashes(unsigned int vectors, unsigned int *threads,
 			       int64_t *hashes, size_t *globalThreads,
 			       unsigned int minthreads, int intensity)
@@ -1498,23 +1522,25 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 	if (!cgpu->kname)
 	{
 		switch (clStates[i]->chosen_kernel) {
-		case KL_DIABLO:
-			cgpu->kname = "diablo";
-			break;
-		case KL_DIAKGCN:
-			cgpu->kname = "diakgcn";
-			break;
-		case KL_PHATK:
-			cgpu->kname = "phatk";
-			break;
-		case KL_SCRYPT:
-			cgpu->kname = "scrypt";
-			break;
-		case KL_POCLBM:
-			cgpu->kname = "poclbm";
-			break;
-		default:
-			break;
+			case KL_DIABLO:
+				cgpu->kname = "diablo";
+				break;
+			case KL_DIAKGCN:
+				cgpu->kname = "diakgcn";
+				break;
+			case KL_PHATK:
+				cgpu->kname = "phatk";
+				break;
+#ifdef USE_SCRYPT
+			case KL_SCRYPT:
+				cgpu->kname = "scrypt";
+				break;
+#endif
+			case KL_POCLBM:
+				cgpu->kname = "poclbm";
+				break;
+			default:
+				break;
 		}
 	}
 	applog(LOG_INFO, "initCl() finished. Found %s", name);
@@ -1551,9 +1577,11 @@ static bool opencl_thread_init(struct thr_info *thr)
 		case KL_DIAKGCN:
 			thrdata->queue_kernel_parameters = &queue_diakgcn_kernel;
 			break;
+#ifdef USE_SCRYPT
 		case KL_SCRYPT:
 			thrdata->queue_kernel_parameters = &queue_scrypt_kernel;
 			break;
+#endif
 		default:
 		case KL_DIABLO:
 			thrdata->queue_kernel_parameters = &queue_diablo_kernel;
@@ -1568,6 +1596,10 @@ static bool opencl_thread_init(struct thr_info *thr)
 		return false;
 	}
 
+#ifdef USE_SCRYPT
+	if (opt_scrypt)
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, BUFFERSIZE, blank_res, 0, NULL,NULL);
+#endif
 	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
 			BUFFERSIZE, blank_res, 0, NULL, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
@@ -1687,14 +1719,14 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
 						globalThreads, localThreads, 0,  NULL, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)");
+		applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
 		return -1;
 	}
 
 	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
 			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)");
+		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
 		return -1;
 	}
 

+ 12 - 2
findnonce.c

@@ -127,6 +127,10 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
 	blk->fiveA = blk->ctx_f + SHA256_K[5];
 	blk->sixA = blk->ctx_g + SHA256_K[6];
 	blk->sevenA = blk->ctx_h + SHA256_K[7];
+
+#ifdef USE_SCRYPT
+	blk->midstate = (unsigned char *)state;
+#endif
 }
 
 #define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))
@@ -228,9 +232,15 @@ static void *postcalc_hash(void *userdata)
 	pthread_detach(pthread_self());
 
 	for (entry = 0; entry < FOUND; entry++) {
-		if (pcd->res[entry])
-			send_nonce(pcd, pcd->res[entry]);
+		if (pcd->res[entry]) {
+#ifdef USE_SCRYPT
+			if (opt_scrypt)
+				submit_nonce(thr, pcd->work, entry);
+			else
+#endif
+				send_nonce(pcd, pcd->res[entry]);
 		nonces++;
+		}
 	}
 
 	free(pcd);

+ 12 - 1
miner.c

@@ -108,6 +108,9 @@ int opt_dynamic_interval = 7;
 int nDevs;
 int opt_g_threads = 2;
 int gpu_threads;
+#ifdef USE_SCRYPT
+bool opt_scrypt;
+#endif
 #endif
 bool opt_restart = true;
 static bool opt_nogpu;
@@ -865,7 +868,7 @@ static struct opt_table opt_config_table[] = {
 #ifdef HAVE_OPENCL
 	OPT_WITH_ARG("--kernel|-k",
 		     set_kernel, NULL, NULL,
-		     "Override kernel to use (diablo, poclbm, phatk or diakgcn) - one value or comma separated"),
+		     "Override sha256 kernel to use (diablo, poclbm, phatk or diakgcn) - one value or comma separated"),
 #endif
 #ifdef USE_ICARUS
 	OPT_WITH_ARG("--icarus-timing",
@@ -958,6 +961,11 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITH_ARG("--sched-stop",
 		     set_schedtime, NULL, &schedstop,
 		     "Set a time of day in HH:MM to stop mining (will quit without a start time)"),
+#ifdef USE_SCRYPT
+	OPT_WITHOUT_ARG("--scrypt",
+			opt_set_bool, &opt_scrypt,
+			"Use the scrypt algorithm for mining (litecoin only)"),
+#endif
 	OPT_WITH_ARG("--sharelog",
 		     set_sharelog, NULL, NULL,
 		     "Append share log to file"),
@@ -4326,6 +4334,9 @@ bool hashtest(const struct work *work, bool checktarget)
 
 bool test_nonce(struct work *work, uint32_t nonce, bool checktarget)
 {
+	if (opt_scrypt)
+		return true;
+
 	work->data[64 + 12 + 0] = (nonce >> 0) & 0xff;
 	work->data[64 + 12 + 1] = (nonce >> 8) & 0xff;
 	work->data[64 + 12 + 2] = (nonce >> 16) & 0xff;

+ 8 - 0
miner.h

@@ -626,6 +626,11 @@ extern bool opt_quiet;
 extern struct thr_info *thr_info;
 extern struct cgpu_info gpus[MAX_GPUDEVICES];
 extern int gpu_threads;
+#ifdef USE_SCRYPT
+extern bool opt_scrypt;
+#else
+#define opt_scrypt (0)
+#endif
 extern double total_secs;
 extern int mining_threads;
 extern struct cgpu_info *cpus;
@@ -673,6 +678,9 @@ typedef struct {
 	cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
 	cl_uint zeroA, zeroB;
 	cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
+#ifdef USE_SCRYPT
+	unsigned char *midstate;
+#endif
 } dev_blk_ctx;
 #else
 typedef struct {

+ 24 - 6
ocl.c

@@ -538,8 +538,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	char numbuf[10];
 
 	if (gpus[gpu].kernel == KL_NONE) {
-		/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
-		if (!strstr(name, "Tahiti") &&
+		if (opt_scrypt) {
+			applog(LOG_INFO, "Selecting scrypt kernel");
+			clState->chosen_kernel = KL_SCRYPT;
+		} else if (!strstr(name, "Tahiti") &&
+			/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
 			(strstr(vbuff, "844.4") ||  // Linux 64 bit ATI 2.6 SDK
 			 strstr(vbuff, "851.4") ||  // Windows 64 bit ""
 			 strstr(vbuff, "831.4") ||
@@ -591,6 +594,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			strcpy(filename, DIAKGCN_KERNNAME".cl");
 			strcpy(binaryfilename, DIAKGCN_KERNNAME);
 			break;
+		case KL_SCRYPT:
+			strcpy(filename, SCRYPT_KERNNAME".cl");
+			strcpy(binaryfilename, SCRYPT_KERNNAME);
+			break;
 		case KL_NONE: /* Shouldn't happen */
 		case KL_DIABLO:
 			strcpy(filename, DIABLO_KERNNAME".cl");
@@ -605,8 +612,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		gpus[gpu].vwidth = preferred_vwidth;
 	}
 
-	if ((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
-		clState->vwidth == 1 && clState->hasOpenCL11plus)
+	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
+		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
 			clState->goffset = true;
 
 	if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
@@ -712,8 +719,13 @@ build:
 	/* create a cl program executable for all the devices specified */
 	char *CompilerOptions = calloc(1, 256);
 
-	sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
-		(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
+	if (opt_scrypt) {
+		sprintf(CompilerOptions, "-D LOOKUP_GAP=1 -D CONCURRENT_THREADS=1 -D WORKSIZE=%d",
+			(int)clState->wsize);
+	} else {
+		sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
+			(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
+	}
 	applog(LOG_DEBUG, "Setting worksize to %d", clState->wsize);
 	if (clState->vwidth > 1)
 		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
@@ -904,6 +916,12 @@ built:
 		return NULL;
 	}
 
+#ifdef USE_SCRYPT
+	if (opt_scrypt) {
+		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
+		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, 131072, NULL, &status);
+	}
+#endif
 	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);

+ 4 - 0
ocl.h

@@ -15,6 +15,10 @@ typedef struct {
 	cl_command_queue commandQueue;
 	cl_program program;
 	cl_mem outputBuffer;
+#ifdef USE_SCRYPT
+	cl_mem CLbuffer0;
+	cl_mem padbuffer8;
+#endif
 	bool hasBitAlign;
 	bool hasOpenCL11plus;
 	bool goffset;

+ 6 - 2
scrypt120713.cl

@@ -685,12 +685,16 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 	unshittify(X);
 }
 
+#define FOUND (0x80)
+#define NFLAG (0x7F)
+
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(__global uint4*restrict input, __global uint*restrict output, __global uint4*restrict padcache, uint4 pad0, uint4 pad1)
 {
+	uint gid = get_global_id(0);
 	uint4 X[8];
 	uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1;
-	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,get_global_id(0));
+	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
 
 	SHA256(&pad0,&pad1, data, (uint4)(0x80000000U,0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x280));
 	SHA256_fresh(&ostate0,&ostate1, pad0^0x5C5C5C5CU, pad1^0x5C5C5C5CU, 0x5C5C5C5CU, 0x5C5C5C5CU);
@@ -718,7 +722,7 @@ __kernel void search(__global uint4*restrict input, __global uint*restrict outpu
 	SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
 	
 	if ((ostate1.w&0xFFFF) == 0)
-		output[get_global_id(0)&255] = get_global_id(0);
+		output[FOUND] = output[NFLAG & gid] = gid;
 }
 
 /*-