Browse Source

opencl: Autodetect whether global offset actually works

Luke Dashjr 11 years ago
parent
commit
09ae9e3921
1 changed files with 48 additions and 1 deletions
  1. 48 1
      ocl.c

+ 48 - 1
ocl.c

@@ -710,6 +710,52 @@ _clState *opencl_create_clState(unsigned int gpu, char *name, size_t nameSize)
 	return clState;
 }
 
+static
+bool opencl_test_goffset(_clState * const clState)
+{
+	if (sizeof(size_t) < sizeof(uint32_t))
+		return false;
+	
+	const char *source = "__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void runtest(volatile __global uint *out) { *out = get_global_id(0); }";
+	const size_t source_len = strlen(source);
+	cl_int status;
+	cl_program program = clCreateProgramWithSource(clState->context, 1, &source, &source_len, &status);
+	if (status != CL_SUCCESS)
+		applogr(false, LOG_ERR, "Error %d: Loading %s code into cl_program (clCreateProgramWithSource)", status, "goffset test");
+	status = bfg_clBuildProgram(&program, clState->devid, "");
+	if (status != CL_SUCCESS)
+	{
+fail:
+		clReleaseProgram(program);
+		return false;
+	}
+	cl_kernel kernel = clCreateKernel(program, "runtest", &status);
+	if (status != CL_SUCCESS)
+		return_via_applog(fail, , LOG_ERR, "Error %d: Creating kernel from %s program (clCreateKernel)", status, "goffset test");
+	static const uint32_t cleardata = 0;
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, sizeof(cleardata), &cleardata, 0, NULL, NULL);
+	if (status != CL_SUCCESS)
+	{
+		applog(LOG_ERR, "Error %d: Clearing output buffer for %s kernel (clEnqueueWriteBuffer)", status, "goffset test");
+fail2:
+		clReleaseKernel(kernel);
+		goto fail;
+	}
+	status = clSetKernelArg(kernel, 0, sizeof(clState->outputBuffer), &clState->outputBuffer);
+	if (status != CL_SUCCESS)
+		return_via_applog(fail2, , LOG_ERR, "Error %d: Setting kernel argument for %s kernel (clSetKernelArg)", status, "goffset test");
+	const size_t size_t_one = 1, test_goffset = 0xfabd0bf9;
+	status = clEnqueueNDRangeKernel(clState->commandQueue, kernel, 1, &test_goffset, &size_t_one, &size_t_one, 0,  NULL, NULL);
+	if (status != CL_SUCCESS)
+		return_via_applog(fail2, , LOG_DEBUG, "Error %d: Running %s kernel (clEnqueueNDRangeKernel)", status, "goffset test");
+	uint32_t resultdata;
+	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, sizeof(resultdata), &resultdata, 0, NULL, NULL);
+	if (status != CL_SUCCESS)
+		return_via_applog(fail2, , LOG_DEBUG, "Error %d: Reading result from %s kernel (clEnqueueReadBuffer)", status, "goffset test");
+	applog(LOG_DEBUG, "%s kernel returned 0x%08lx for goffset 0x%08lx", "goffset test", (unsigned long)resultdata, (unsigned long)test_goffset);
+	return (resultdata == test_goffset);
+}
+
 bool opencl_load_kernel(struct cgpu_info * const cgpu, _clState * const clState, const char * const name, struct opencl_kernel_info * const kernelinfo, const char * const kernel_file, __maybe_unused const struct mining_algorithm * const malgo)
 {
 	const int gpu = cgpu->device_id;
@@ -793,10 +839,11 @@ bool opencl_load_kernel(struct cgpu_info * const cgpu, _clState * const clState,
 					break;
 				// fallthru
 			case BTS_UNKNOWN:
-				if (clState->hasOpenCL11plus && !clState->is_mesa)
+				if (opencl_test_goffset(clState))
 					device_goffset_support = true;
 				break;
 		}
+		applog(LOG_DEBUG, "%s: goffset support: device=%s kernel=%s", cgpu->dev_repr, device_goffset_support ? "yes" : "no", (kernel_goffset_support == 2) ? "required" : ((kernel_goffset_support == 1) ? "optional" : "none"));
 		if (device_goffset_support)
 		{
 			if (kernel_goffset_support)