Browse Source

opencl: Defer loading kernel until it is needed

Luke Dashjr 11 years ago
parent
commit
28e4673181
3 changed files with 40 additions and 44 deletions
  1. 29 24
      driver-opencl.c
  2. 8 19
      ocl.c
  3. 3 1
      ocl.h

+ 29 - 24
driver-opencl.c

@@ -1325,7 +1325,7 @@ select_cgpu:
 		//free(clState);
 		//free(clState);
 
 
 		applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
 		applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
-		clStates[thr_id] = initCl(virtual_gpu, name, sizeof(name));
+		clStates[thr_id] = opencl_create_clState(virtual_gpu, name, sizeof(name));
 		if (!clStates[thr_id]) {
 		if (!clStates[thr_id]) {
 			applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
 			applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
 			goto select_cgpu;
 			goto select_cgpu;
@@ -1581,7 +1581,7 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 
 
 	strcpy(name, "");
 	strcpy(name, "");
 	applog(LOG_INFO, "Init GPU thread %i GPU %i virtual GPU %i", i, gpu, virtual_gpu);
 	applog(LOG_INFO, "Init GPU thread %i GPU %i virtual GPU %i", i, gpu, virtual_gpu);
-	clStates[i] = initCl(virtual_gpu, name, sizeof(name));
+	clStates[i] = opencl_create_clState(virtual_gpu, name, sizeof(name));
 	if (!clStates[i]) {
 	if (!clStates[i]) {
 #ifdef HAVE_CURSES
 #ifdef HAVE_CURSES
 		if (use_curses)
 		if (use_curses)
@@ -1627,34 +1627,13 @@ static bool opencl_thread_init(struct thr_info *thr)
 	cl_int status = 0;
 	cl_int status = 0;
 	thrdata = calloc(1, sizeof(*thrdata));
 	thrdata = calloc(1, sizeof(*thrdata));
 	thr->cgpu_data = thrdata;
 	thr->cgpu_data = thrdata;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+	int buffersize = SCRYPT_BUFFERSIZE;
 
 
 	if (!thrdata) {
 	if (!thrdata) {
 		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
 		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
 		return false;
 		return false;
 	}
 	}
 
 
-	switch (clState->chosen_kernel) {
-		case KL_POCLBM:
-			thrdata->queue_kernel_parameters = &queue_poclbm_kernel;
-			break;
-		case KL_PHATK:
-			thrdata->queue_kernel_parameters = &queue_phatk_kernel;
-			break;
-		case KL_DIAKGCN:
-			thrdata->queue_kernel_parameters = &queue_diakgcn_kernel;
-			break;
-#ifdef USE_SCRYPT
-		case KL_SCRYPT:
-			thrdata->queue_kernel_parameters = &queue_scrypt_kernel;
-			break;
-#endif
-		default:
-		case KL_DIABLO:
-			thrdata->queue_kernel_parameters = &queue_diablo_kernel;
-			break;
-	}
-
 	thrdata->res = calloc(buffersize, 1);
 	thrdata->res = calloc(buffersize, 1);
 
 
 	if (!thrdata->res) {
 	if (!thrdata->res) {
@@ -1700,6 +1679,32 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	struct cgpu_info *gpu = thr->cgpu;
 	struct cgpu_info *gpu = thr->cgpu;
 	struct opencl_device_data * const data = gpu->device_data;
 	struct opencl_device_data * const data = gpu->device_data;
 	_clState *clState = clStates[thr_id];
 	_clState *clState = clStates[thr_id];
+	if (!clState->kernel_loaded)
+	{
+		if (!opencl_load_kernel(gpu, clState, gpu->name))
+			applogr(-1, LOG_ERR, "%s: Failed to load kernel", gpu->dev_repr);
+		
+		switch (clState->chosen_kernel) {
+			case KL_POCLBM:
+				thrdata->queue_kernel_parameters = &queue_poclbm_kernel;
+				break;
+			case KL_PHATK:
+				thrdata->queue_kernel_parameters = &queue_phatk_kernel;
+				break;
+			case KL_DIAKGCN:
+				thrdata->queue_kernel_parameters = &queue_diakgcn_kernel;
+				break;
+#ifdef USE_SCRYPT
+			case KL_SCRYPT:
+				thrdata->queue_kernel_parameters = &queue_scrypt_kernel;
+				break;
+#endif
+			default:
+			case KL_DIABLO:
+				thrdata->queue_kernel_parameters = &queue_diablo_kernel;
+				break;
+		}
+	}
 	const cl_kernel *kernel = &clState->kernel;
 	const cl_kernel *kernel = &clState->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
 	const int dynamic_us = opt_dynamic_interval * 1000;
 
 

+ 8 - 19
ocl.c

@@ -640,6 +640,12 @@ _clState *opencl_create_clState(unsigned int gpu, char *name, size_t nameSize)
 	clState->devid = devices[gpu];
 	clState->devid = devices[gpu];
 	free(devices);
 	free(devices);
 	
 	
+	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, SCRYPT_BUFFERSIZE, NULL, &status);
+	if (status != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
+		return false;
+	}
+	
 	return clState;
 	return clState;
 }
 }
 
 
@@ -1129,29 +1135,12 @@ built:
 			applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
 			applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
 			return false;
 			return false;
 		}
 		}
-		clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, SCRYPT_BUFFERSIZE, NULL, &status);
-	} else
-#endif
-	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
-		return false;
 	}
 	}
+#endif
 
 
+	clState->kernel_loaded = true;
 	return true;
 	return true;
 }
 }
 
 
-_clState *initCl(const unsigned int gpu, char * const name, const size_t nameSize)
-{
-	struct cgpu_info * const cgpu = &gpus[gpu];
-	_clState * const clState = opencl_create_clState(gpu, name, nameSize);
-	if (!opencl_load_kernel(cgpu, clState, name))
-	{
-		free(clState);
-		return NULL;
-	}
-	return clState;
-}
-
 #endif /* HAVE_OPENCL */
 #endif /* HAVE_OPENCL */
 
 

+ 3 - 1
ocl.h

@@ -17,6 +17,7 @@ typedef struct {
 	bool is_mesa;
 	bool is_mesa;
 	
 	
 	cl_context context;
 	cl_context context;
+	bool kernel_loaded;
 	cl_kernel kernel;
 	cl_kernel kernel;
 	cl_command_queue commandQueue;
 	cl_command_queue commandQueue;
 	cl_program program;
 	cl_program program;
@@ -41,6 +42,7 @@ typedef struct {
 extern FILE *opencl_open_kernel(const char *filename);
 extern FILE *opencl_open_kernel(const char *filename);
 extern char *file_contents(const char *filename, int *length);
 extern char *file_contents(const char *filename, int *length);
 extern int clDevicesNum(void);
 extern int clDevicesNum(void);
-extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize);
+extern _clState *opencl_create_clState(unsigned int gpu, char *name, size_t nameSize);
+extern bool opencl_load_kernel(struct cgpu_info *, _clState *clState, const char *name);
 #endif /* HAVE_OPENCL */
 #endif /* HAVE_OPENCL */
 #endif /* __OCL_H__ */
 #endif /* __OCL_H__ */