14 years ago · deff55c640
--- a/cgminer.c
+++ b/cgminer.c
@@ -831,8 +831,8 @@ static struct opt_table opt_config_table[] = {
 
															 		     "Username for bitcoin JSON-RPC server"),
														
 
															 #ifdef HAVE_OPENCL
														
 
															 	OPT_WITH_ARG("--vectors|-v",
														
 
															-		     set_vector, NULL, &opt_vectors,
														
 
															-		     "Override detected optimal vector width (1, 2 or 4)"),
														
 
															+		     set_vector, NULL, NULL,
														
 
															+		     "Override detected optimal vector (1, 2 or 4) - one value or comma separated list"),
														
 
															 #endif
														
 
															 	OPT_WITHOUT_ARG("--verbose",
														
 
															 			opt_set_bool, &opt_log_output,
														
--- a/device-gpu.c
+++ b/device-gpu.c
@@ -65,14 +65,32 @@ extern int gpu_fanpercent(int gpu);
 
															 #ifdef HAVE_OPENCL
														
 
															-char *set_vector(const char *arg, int *i)
														
 
															+char *set_vector(char *arg)
														
 
															 {
														
 
															-	char *err = opt_set_intval(arg, i);
														
 
															-	if (err)
														
 
															-		return err;
														
 
															+	int i, val = 0, device = 0;
														
 
															+	char *nextptr;
														
 
															+
														
 
															+	nextptr = strtok(arg, ",");
														
 
															+	if (nextptr == NULL)
														
 
															+		return "Invalid parameters for set vector";
														
 
															+	val = atoi(nextptr);
														
 
															+	if (val != 1 && val != 2 && val != 4)
														
 
															+		return "Invalid value passed to set_vector";
														
 
															+
														
 
															+	gpus[device++].vwidth = val;
														
 
															+
														
 
															+	while ((nextptr = strtok(NULL, ",")) != NULL) {
														
 
															+		val = atoi(nextptr);
														
 
															+		if (val != 1 && val != 2 && val != 4)
														
 
															+			return "Invalid value passed to set_vector";
														
 
															+
														
 
															+		gpus[device++].vwidth = val;
														
 
															+	}
														
 
															+	if (device == 1) {
														
 
															+		for (i = device; i < MAX_GPUDEVICES; i++)
														
 
															+			gpus[i].vwidth = gpus[0].vwidth;
														
 
															+	}
														
 
															-	if (*i != 1 && *i != 2 && *i != 4)
														
 
															-		return "Valid vectors are 1, 2 or 4";
														
 
															 	return NULL;
														
 
															 }
														
 
															 #endif
														
@@ -655,8 +673,8 @@ static _clState *clStates[MAX_GPUDEVICES];
 
															 static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
														
 
															 {
														
 
															-	cl_uint vwidth = clState->preferred_vwidth;
														
 
															 	cl_kernel *kernel = &clState->kernel;
														
 
															+	cl_uint vwidth = clState->vwidth;
														
 
															 	unsigned int i, num = 0;
														
 
															 	cl_int status = 0;
														
 
															 	uint *nonces;
														
@@ -707,8 +725,8 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
 
															 static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
														
 
															 				 __maybe_unused cl_uint threads)
														
 
															 {
														
 
															-	cl_uint vwidth = clState->preferred_vwidth;
														
 
															 	cl_kernel *kernel = &clState->kernel;
														
 
															+	cl_uint vwidth = clState->vwidth;
														
 
															 	unsigned int i, num = 0;
														
 
															 	cl_int status = 0;
														
 
															 	uint *nonces;
														
@@ -751,8 +769,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
 
															 static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
														
 
															 				   __maybe_unused cl_uint threads)
														
 
															 {
														
 
															-	cl_uint vwidth = clState->preferred_vwidth;
														
 
															 	cl_kernel *kernel = &clState->kernel;
														
 
															+	cl_uint vwidth = clState->vwidth;
														
 
															 	unsigned int i, num = 0;
														
 
															 	cl_int status = 0;
														
 
															 	uint *nonces;
														
@@ -809,8 +827,8 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
 
															 static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
														
 
															 {
														
 
															-	cl_uint vwidth = clState->preferred_vwidth;
														
 
															 	cl_kernel *kernel = &clState->kernel;
														
 
															+	cl_uint vwidth = clState->vwidth;
														
 
															 	unsigned int i, num = 0;
														
 
															 	cl_int status = 0;
														
 
															 	uint *nonces;
														
@@ -1242,7 +1260,7 @@ static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
															 				++gpu->intensity;
														
 
															 		}
														
 
															 	}
														
 
															-	set_threads_hashes(clState->preferred_vwidth, &threads, &hashes, globalThreads,
														
 
															+	set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
														
 
															 			   localThreads[0], gpu->intensity);
														
 
															 	if (hashes > gpu->max_hashes)
														
 
															 		gpu->max_hashes = hashes;
														
--- a/device-gpu.h
+++ b/device-gpu.h
@@ -15,7 +15,7 @@ extern char *set_gpu_vddc(char *arg);
 
															 extern char *set_temp_overheat(char *arg);
														
 
															 extern char *set_temp_target(char *arg);
														
 
															 extern char *set_intensity(char *arg);
														
 
															-extern char *set_vector(const char *arg, int *i);
														
 
															+extern char *set_vector(char *arg);
														
 
															 void manage_gpu(void);
														
 
															 extern void pause_dynamic_threads(int gpu);
														
--- a/miner.h
+++ b/miner.h
@@ -236,9 +236,13 @@ struct cgpu_info {
 
															 	struct thr_info *thread;
														
 
															 	unsigned int max_hashes;
														
 
															+
														
 
															+#ifdef HAVE_OPENCL
														
 
															 	int virtual_gpu;
														
 
															 	int intensity;
														
 
															 	bool dynamic;
														
 
															+	cl_uint vwidth;
														
 
															+	size_t work_size;
														
 
															 	float temp;
														
 
															 	int cutofftemp;
														
@@ -255,6 +259,7 @@ struct cgpu_info {
 
															 	int gpu_memdiff;
														
 
															 	int gpu_powertune;
														
 
															 	float gpu_vddc;
														
 
															+#endif
														
 
															 #endif
														
 
															 	int last_share_pool;
														
 
															 	time_t last_share_pool_time;
														
--- a/ocl.c
+++ b/ocl.c
@@ -33,7 +33,6 @@
 
															 #include "findnonce.h"
														
 
															 #include "ocl.h"
														
 
															-extern int opt_vectors;
														
 
															 extern int opt_worksize;
														
 
															 int opt_platform_id;
														
@@ -194,6 +193,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 
															 	cl_platform_id platform = NULL;
														
 
															 	char pbuff[256], vbuff[255];
														
 
															 	cl_platform_id* platforms;
														
 
															+	cl_uint preferred_vwidth;
														
 
															 	cl_device_id *devices;
														
 
															 	cl_uint numPlatforms;
														
 
															 	cl_uint numDevices;
														
@@ -319,12 +319,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 
															 	if (!find)
														
 
															 		clState->hasOpenCL11plus = true;
														
 
															-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
														
 
															+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
														
 
															 	if (status != CL_SUCCESS) {
														
 
															 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
														
 
															 		return NULL;
														
 
															 	}
														
 
															-	applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth);
														
 
															+	applog(LOG_DEBUG, "Preferred vector width reported %d", preferred_vwidth);
														
 
															 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
														
 
															 	if (status != CL_SUCCESS) {
														
@@ -337,22 +337,24 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 
															 	 * otherwise, and many cards lie about their max so use 256 as max
														
 
															 	 * unless explicitly set on the command line. 79x0 cards perform
														
 
															 	 * better without vectors */
														
 
															-	if (clState->preferred_vwidth > 1) {
														
 
															+	if (preferred_vwidth > 1) {
														
 
															 		if (strstr(name, "Tahiti"))
														
 
															-			clState->preferred_vwidth = 1;
														
 
															+			preferred_vwidth = 1;
														
 
															 		else
														
 
															-			clState->preferred_vwidth = 2;
														
 
															+			preferred_vwidth = 2;
														
 
															 	}
														
 
															-	if (opt_vectors)
														
 
															-		clState->preferred_vwidth = opt_vectors;
														
 
															+	if (gpus[gpu].vwidth)
														
 
															+		clState->vwidth = gpus[gpu].vwidth;
														
 
															+	else
														
 
															+		clState->vwidth = preferred_vwidth;
														
 
															+
														
 
															 	if (opt_worksize && opt_worksize <= (int)clState->max_work_size)
														
 
															 		clState->work_size = opt_worksize;
														
 
															 	else if (strstr(name, "Tahiti"))
														
 
															 		clState->work_size = 64;
														
 
															 	else
														
 
															-		clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) /
														
 
															-				clState->preferred_vwidth;
														
 
															+		clState->work_size = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
														
 
															 	/* Create binary filename based on parameters passed to opencl
														
 
															 	 * compiler to ensure we only load a binary that matches what would
														
@@ -428,7 +430,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 
															 	strcat(binaryfilename, name);
														
 
															 	strcat(binaryfilename, "v");
														
 
															-	sprintf(numbuf, "%d", clState->preferred_vwidth);
														
 
															+	sprintf(numbuf, "%d", clState->vwidth);
														
 
															 	strcat(binaryfilename, numbuf);
														
 
															 	strcat(binaryfilename, "w");
														
 
															 	sprintf(numbuf, "%d", (int)clState->work_size);
														
@@ -496,10 +498,10 @@ build:
 
															 	char *CompilerOptions = calloc(1, 256);
														
 
															 	sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d",
														
 
															-		(int)clState->work_size, clState->preferred_vwidth);
														
 
															+		(int)clState->work_size, clState->vwidth);
														
 
															 	applog(LOG_DEBUG, "Setting worksize to %d", clState->work_size);
														
 
															-	if (clState->preferred_vwidth > 1)
														
 
															-		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->preferred_vwidth);
														
 
															+	if (clState->vwidth > 1)
														
 
															+		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
														
 
															 	if (clState->hasBitAlign) {
														
 
															 		strcat(CompilerOptions, " -D BITALIGN");
														
@@ -648,7 +650,7 @@ built:
 
															 	free(binary_sizes);
														
 
															 	applog(LOG_INFO, "Initialising kernel %s with%s bitalign, %d vectors and worksize %d",
														
 
															-	       filename, clState->hasBitAlign ? "" : "out", clState->preferred_vwidth, clState->work_size);
														
 
															+	       filename, clState->hasBitAlign ? "" : "out", clState->vwidth, clState->work_size);
														
 
															 	if (!prog_built) {
														
 
															 		/* create a cl program executable for all the devices specified */
														
--- a/ocl.h
+++ b/ocl.h
@@ -21,9 +21,10 @@ typedef struct {
 
															 	cl_mem outputBuffer;
														
 
															 	bool hasBitAlign;
														
 
															 	bool hasOpenCL11plus;
														
 
															-	cl_uint preferred_vwidth;
														
 
															+	cl_uint vwidth;
														
 
															 	size_t max_work_size;
														
 
															 	size_t work_size;
														
 
															+	size_t wsize;
														
 
															 	enum cl_kernels chosen_kernel;
														
 
															 } _clState;