Browse Source

opencl: Support for setting oclthreads as intensity=xNNN (for NNN xintensity); also allow setting d<whatever> for an initial dynamic intensity

The xintensity unit is credited to ArGee of RGMiner.

This new unit is designed for a much finer grained intensity setting and also opens up for dual GPU threads on devices not previously able to. Note: make sure to use lower thread-concurrency values when you increase threads.

Intensity is currently used to spawn GPU threads as a simple 2^value setting.
  I:13 =    8192 threads
  I:15 =   32768 threads
  I:17 =  131072 threads
  I:18 =  262144 threads
  I:19 =  524288 threads
  I:20 = 1048576 threads
Notice how the higher settings increase thread count tremendously.

Now enter the xintensity setting. It is simply a shader multiplier, obviously based on the amount of shaders you got on a card, this should allow the same value to scale with different card models.
   6970 with 1536 shaders: xI:64 = 98304 threads
R9 280X with 2048 shaders: xI:64 = 131072 threads
 R9 290 with 2560 shaders: xI:64 = 180224 threads
R9 290X with 2816 shaders: xI:64 = 163840 threads

   6970 with 1536 shaders: xI:300 = 460800 threads
R9 280X with 2048 shaders: xI:300 = 614400 threads
 R9 290 with 2560 shaders: xI:300 = 768000 threads
R9 290X with 2816 shaders: xI:300 = 844800 threads

It's now much easier to control thread intensity and it potentially allows for a uniform way of setting the intensity on your system.
Luke Dashjr 12 years ago
parent
commit
176e13ea3c
4 changed files with 46 additions and 4 deletions
  1. 34 4
      driver-opencl.c
  2. 2 0
      driver-opencl.h
  3. 9 0
      ocl.c
  4. 1 0
      ocl.h

+ 34 - 4
driver-opencl.c

@@ -22,6 +22,7 @@
 #include <windows.h>
 #endif
 
+#include <ctype.h>
 #include <math.h>
 #include <string.h>
 #include <stdbool.h>
@@ -271,6 +272,7 @@ extern char *opt_kernel_path;
 extern int gpur_thr_id;
 extern bool opt_noadl;
 extern bool have_opencl;
+static _clState *clStates[MAX_GPUDEVICES];
 
 
 
@@ -550,18 +552,47 @@ unsigned long intensity_to_oclthreads(double intensity, const bool is_sha256d)
 	return pow(2, intensity);
 }
 
+unsigned long xintensity_to_oclthreads(const double xintensity, const cl_uint max_compute_units)
+{
+	return xintensity * max_compute_units * 0x40;
+}
+
 static
-bool _set_intensity(struct cgpu_info * const cgpu, const char * const _val)
+bool _set_intensity(struct cgpu_info * const cgpu, const char *_val)
 {
 	struct opencl_device_data * const data = cgpu->device_data;
+	
+	data->dynamic = false;
+	data->_init_xintensity = 0;
+	
 	if (!strncasecmp(_val, "d", 1))
+	{
 		data->dynamic = true;
+		++_val;
+	}
+	
+	if (!strncasecmp(_val, "x", 1))
+	{
+		const double v = atof(&_val[1]);
+		if (v < 1 || v > 9999)
+			return false;
+		
+		if (cgpu->thr)
+		{
+			struct thr_info * const thr = cgpu->thr[0];
+			const int thr_id = thr->id;
+			_clState * const clState = clStates[thr_id];
+			
+			data->oclthreads = xintensity_to_oclthreads(v, clState->max_compute_units);
+		}
+		data->_init_xintensity = v;
+	}
 	else
+	if (isdigit(_val[0]))
 	{
 		const double v = atof(_val);
 		if (v < MIN_INTENSITY || v > MAX_GPU_INTENSITY)
 			return false;
-		data->dynamic = false;
 		data->oclthreads = intensity_to_oclthreads(v, !opt_scrypt);
 	}
 	pause_dynamic_threads(cgpu->device_id);
@@ -929,7 +960,6 @@ const char *opencl_tui_handle_choice(struct cgpu_info *cgpu, int input)
 
 
 #ifdef HAVE_OPENCL
-static _clState *clStates[MAX_GPUDEVICES];
 
 #define CL_SET_BLKARG(blkvar) status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->blkvar)
 #define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
@@ -1802,7 +1832,7 @@ static const struct bfg_set_device_definition opencl_set_device_funcs_probe[] =
 };
 
 static const struct bfg_set_device_definition opencl_set_device_funcs[] = {
-	{"intensity", opencl_init_intensity, "Intensity of GPU scanning (d or -10 -> 31)"},
+	{"intensity", opencl_init_intensity, "Intensity of GPU scanning (d, -10 -> 31, or x1 to x9999)"},
 	{"kernel", opencl_cannot_set, "Mining kernel code to use"},
 	{"threads", opencl_cannot_set, "Number of threads"},
 	{"vector", opencl_cannot_set, ""},

+ 2 - 0
driver-opencl.h

@@ -15,6 +15,7 @@ struct opencl_device_data {
 	int virtual_gpu;
 	int virtual_adl;
 	unsigned long oclthreads;
+	double _init_xintensity;
 	bool dynamic;
 	
 	cl_uint vwidth;
@@ -51,6 +52,7 @@ struct opencl_device_data {
 
 extern double oclthreads_to_intensity(unsigned long oclthreads, bool is_sha256d);
 extern unsigned long intensity_to_oclthreads(double intensity, bool is_sha256d);
+extern unsigned long xintensity_to_oclthreads(double xintensity, cl_uint max_compute_units);
 
 struct opencl_work_data {
 	cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;

+ 9 - 0
ocl.c

@@ -552,6 +552,15 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	}
 	applog(LOG_DEBUG, "Max work group size reported %"PRId64, (int64_t)clState->max_work_size);
 
+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(clState->max_compute_units), (void *)&clState->max_compute_units, NULL);
+	if (status != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_COMPUTE_UNITS", status);
+		return NULL;
+	}
+	if (data->_init_xintensity)
+		data->oclthreads = xintensity_to_oclthreads(data->_init_xintensity, clState->max_compute_units);
+	applog(LOG_DEBUG, "Max compute units reported %u", (unsigned)clState->max_compute_units);
+	
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&data->max_alloc, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);

+ 1 - 0
ocl.h

@@ -27,6 +27,7 @@ typedef struct {
 	cl_uint vwidth;
 	size_t max_work_size;
 	size_t wsize;
+	cl_uint max_compute_units;
 	enum cl_kernels chosen_kernel;
 } _clState;