Browse Source

Move all OpenCL-specific data behind device_data pointers, and relocate OpenCL-specific code out of miner.c

Luke Dashjr 12 years ago
parent
commit
223df8d6ad
9 changed files with 560 additions and 386 deletions
  1. 145 91
      adl.c
  2. 8 4
      api.c
  3. 288 71
      driver-opencl.c
  4. 73 0
      driver-opencl.h
  5. 1 1
      findnonce.c
  6. 2 1
      findnonce.h
  7. 6 116
      miner.c
  8. 5 72
      miner.h
  9. 32 30
      ocl.c

+ 145 - 91
adl.c

@@ -23,6 +23,7 @@
 
 #include <stdbool.h>
 
+#include "driver-opencl.h"
 #include "miner.h"
 #include "ADL/adl_sdk.h"
 #include "compat.h"
@@ -312,11 +313,12 @@ void init_adl(int nDevs)
 
 	/* Apply manually provided OpenCL to ADL mapping, if any */
 	for (i = 0; i < nDevs; i++) {
-		if (gpus[i].mapped) {
-			vadapters[gpus[i].virtual_adl].virtual_gpu = i;
-			applog(LOG_INFO, "Mapping OpenCL device %d to ADL device %d", i, gpus[i].virtual_adl);
+		struct opencl_device_data * const data = gpus[i].device_data;
+		if (data->mapped) {
+			vadapters[data->virtual_adl].virtual_gpu = i;
+			applog(LOG_INFO, "Mapping OpenCL device %d to ADL device %d", i, data->virtual_adl);
 		} else
-			gpus[i].virtual_adl = i;
+			data->virtual_adl = i;
 	}
 
 	if (!devs_match) {
@@ -353,16 +355,18 @@ void init_adl(int nDevs)
 		devices = nDevs;
 
 	for (gpu = 0; gpu < devices; gpu++) {
+		struct cgpu_info * const cgpu = &gpus[gpu];
+		struct opencl_device_data * const data = cgpu->device_data;
 		struct gpu_adl *ga;
 		int iAdapterIndex;
 		int lpAdapterID;
 		ADLODPerformanceLevels *lpOdPerformanceLevels;
 		int lev, adlGpu;
 
-		adlGpu = gpus[gpu].virtual_adl;
+		adlGpu = data->virtual_adl;
 		i = vadapters[adlGpu].id;
 		iAdapterIndex = lpInfo[i].iAdapterIndex;
-		gpus[gpu].virtual_gpu = vadapters[adlGpu].virtual_gpu;
+		data->virtual_gpu = vadapters[adlGpu].virtual_gpu;
 
 		/* Get unique identifier of the adapter, 0 means not AMD */
 		result = ADL_Adapter_ID_Get(iAdapterIndex, &lpAdapterID);
@@ -372,11 +376,11 @@ void init_adl(int nDevs)
 		}
 
 		if (gpus[gpu].deven == DEV_DISABLED) {
-			gpus[gpu].gpu_engine =
-			gpus[gpu].gpu_memclock =
-			gpus[gpu].gpu_vddc =
-			gpus[gpu].gpu_fan =
-			gpus[gpu].gpu_powertune = 0;
+			data->gpu_engine =
+			data->gpu_memclock =
+			data->gpu_vddc =
+			data->gpu_fan =
+			data->gpu_powertune = 0;
 			continue;
 		}
 
@@ -384,13 +388,13 @@ void init_adl(int nDevs)
 		if (gpus[gpu].name)
 			free((void*)gpus[gpu].name);
 		gpus[gpu].name = lpInfo[i].strAdapterName;
-		gpus[gpu].has_adl = true;
+		data->has_adl = true;
 		/* Flag adl as active if any card is successfully activated */
 		adl_active = true;
 
 		/* From here on we know this device is a discrete device and
 		 * should support ADL */
-		ga = &gpus[gpu].adl;
+		ga = &data->adl;
 		ga->gpu = gpu;
 		ga->iAdapterIndex = iAdapterIndex;
 		ga->lpAdapterID = lpAdapterID;
@@ -421,8 +425,8 @@ void init_adl(int nDevs)
 		/* Save these values as the defaults in case we wish to reset to defaults */
 		ga->DefPerfLev = lpOdPerformanceLevels;
 
-		if (gpus[gpu].gpu_engine) {
-			int setengine = gpus[gpu].gpu_engine * 100;
+		if (data->gpu_engine) {
+			int setengine = data->gpu_engine * 100;
 
 			/* Lower profiles can't have a higher setting */
 			for (j = 0; j < lev; j++) {
@@ -430,38 +434,38 @@ void init_adl(int nDevs)
 					lpOdPerformanceLevels->aLevels[j].iEngineClock = setengine;
 			}
 			lpOdPerformanceLevels->aLevels[lev].iEngineClock = setengine;
-			applog(LOG_INFO, "Setting GPU %d engine clock to %d", gpu, gpus[gpu].gpu_engine);
+			applog(LOG_INFO, "Setting GPU %d engine clock to %d", gpu, data->gpu_engine);
 			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
 			ga->maxspeed = setengine;
-			if (gpus[gpu].min_engine)
-				ga->minspeed = gpus[gpu].min_engine * 100;
+			if (data->min_engine)
+				ga->minspeed = data->min_engine * 100;
 			ga->managed = true;
-			if (gpus[gpu].gpu_memdiff)
-				set_memoryclock(gpu, gpus[gpu].gpu_engine + gpus[gpu].gpu_memdiff);
+			if (data->gpu_memdiff)
+				set_memoryclock(gpu, data->gpu_engine + data->gpu_memdiff);
 		}
 
-		if (gpus[gpu].gpu_memclock) {
-			int setmem = gpus[gpu].gpu_memclock * 100;
+		if (data->gpu_memclock) {
+			int setmem = data->gpu_memclock * 100;
 
 			for (j = 0; j < lev; j++) {
 				if (lpOdPerformanceLevels->aLevels[j].iMemoryClock > setmem)
 					lpOdPerformanceLevels->aLevels[j].iMemoryClock = setmem;
 			}
 			lpOdPerformanceLevels->aLevels[lev].iMemoryClock = setmem;
-			applog(LOG_INFO, "Setting GPU %d memory clock to %d", gpu, gpus[gpu].gpu_memclock);
+			applog(LOG_INFO, "Setting GPU %d memory clock to %d", gpu, data->gpu_memclock);
 			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
 			ga->managed = true;
 		}
 
-		if (gpus[gpu].gpu_vddc) {
-			int setv = gpus[gpu].gpu_vddc * 1000;
+		if (data->gpu_vddc) {
+			int setv = data->gpu_vddc * 1000;
 
 			for (j = 0; j < lev; j++) {
 				if (lpOdPerformanceLevels->aLevels[j].iVddc > setv)
 					lpOdPerformanceLevels->aLevels[j].iVddc = setv;
 			}
 			lpOdPerformanceLevels->aLevels[lev].iVddc = setv;
-			applog(LOG_INFO, "Setting GPU %d voltage to %.3f", gpu, gpus[gpu].gpu_vddc);
+			applog(LOG_INFO, "Setting GPU %d voltage to %.3f", gpu, data->gpu_vddc);
 			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
 			ga->managed = true;
 		}
@@ -480,17 +484,17 @@ void init_adl(int nDevs)
 		/* Save the fanspeed values as defaults in case we reset later */
 		ga->DefFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_RPM;
 		ADL_Overdrive5_FanSpeed_Get(ga->iAdapterIndex, 0, &ga->DefFanSpeedValue);
-		if (gpus[gpu].gpu_fan)
-			set_fanspeed(gpu, gpus[gpu].gpu_fan);
+		if (data->gpu_fan)
+			set_fanspeed(gpu, data->gpu_fan);
 		else
-			gpus[gpu].gpu_fan = 85; /* Set a nominal upper limit of 85% */
+			data->gpu_fan = 85; /* Set a nominal upper limit of 85% */
 
 		/* Not fatal if powercontrol get fails */
 		if (ADL_Overdrive5_PowerControl_Get(ga->iAdapterIndex, &ga->iPercentage, &dummy) != ADL_OK)
 			applog(LOG_INFO, "Failed to ADL_Overdrive5_PowerControl_get");
 
-		if (gpus[gpu].gpu_powertune) {
-			ADL_Overdrive5_PowerControl_Set(ga->iAdapterIndex, gpus[gpu].gpu_powertune);
+		if (data->gpu_powertune) {
+			ADL_Overdrive5_PowerControl_Set(ga->iAdapterIndex, data->gpu_powertune);
 			ADL_Overdrive5_PowerControl_Get(ga->iAdapterIndex, &ga->iPercentage, &dummy);
 			ga->managed = true;
 		}
@@ -508,10 +512,10 @@ void init_adl(int nDevs)
 
 			ga->autofan = true;
 			/* Clamp fanspeed values to range provided */
-			if (nominal > gpus[gpu].gpu_fan)
-				nominal = gpus[gpu].gpu_fan;
-			if (nominal < gpus[gpu].min_fan)
-				nominal = gpus[gpu].min_fan;
+			if (nominal > data->gpu_fan)
+				nominal = data->gpu_fan;
+			if (nominal < data->min_fan)
+				nominal = data->min_fan;
 			set_fanspeed(gpu, nominal);
 		}
 		if (opt_autoengine) {
@@ -523,16 +527,20 @@ void init_adl(int nDevs)
 	}
 
 	for (gpu = 0; gpu < devices; gpu++) {
-		struct gpu_adl *ga = &gpus[gpu].adl;
+		struct cgpu_info * const cgpu = &gpus[gpu];
+		struct opencl_device_data * const data = cgpu->device_data;
+		struct gpu_adl *ga = &data->adl;
 		int j;
 
 		for (j = 0; j < devices; j++) {
+			struct cgpu_info * const other_cgpu = &gpus[j];
+			struct opencl_device_data * const other_data = other_cgpu->device_data;
 			struct gpu_adl *other_ga;
 
 			if (j == gpu)
 				continue;
 
-			other_ga = &gpus[j].adl;
+			other_ga = &other_data->adl;
 
 			/* Search for twin GPUs on a single card. They will be
 			 * separated by one bus id and one will have fanspeed
@@ -558,13 +566,15 @@ static float __gpu_temp(struct gpu_adl *ga)
 
 float gpu_temp(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	float ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = __gpu_temp(ga);
 	unlock_adl();
@@ -579,13 +589,15 @@ static inline int __gpu_engineclock(struct gpu_adl *ga)
 
 int gpu_engineclock(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
 		goto out;
@@ -602,13 +614,15 @@ static inline int __gpu_memclock(struct gpu_adl *ga)
 
 int gpu_memclock(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
 		goto out;
@@ -625,13 +639,15 @@ static inline float __gpu_vddc(struct gpu_adl *ga)
 
 float gpu_vddc(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	float ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
 		goto out;
@@ -650,13 +666,15 @@ static inline int __gpu_activity(struct gpu_adl *ga)
 
 int gpu_activity(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity);
 	unlock_adl();
@@ -682,13 +700,15 @@ static inline int __gpu_fanspeed(struct gpu_adl *ga)
 
 int gpu_fanspeed(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = __gpu_fanspeed(ga);
 	unlock_adl();
@@ -710,13 +730,15 @@ static int __gpu_fanpercent(struct gpu_adl *ga)
 
 int gpu_fanpercent(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = __gpu_fanpercent(ga);
 	unlock_adl();
@@ -734,13 +756,15 @@ static inline int __gpu_powertune(struct gpu_adl *ga)
 
 int gpu_powertune(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = __gpu_powertune(ga);
 	unlock_adl();
@@ -750,12 +774,14 @@ int gpu_powertune(int gpu)
 bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
 	       int *activity, int *fanspeed, int *fanpercent, int *powertune)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return false;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	lock_adl();
 	gpus[gpu].temp =
@@ -782,13 +808,15 @@ bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vdd
 #ifdef HAVE_CURSES
 static void get_enginerange(int gpu, int *imin, int *imax)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Get enginerange not supported\n");
 		return;
 	}
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	*imin = ga->lpOdParameters.sEngineClock.iMin / 100;
 	*imax = ga->lpOdParameters.sEngineClock.iMax / 100;
 }
@@ -796,18 +824,19 @@ static void get_enginerange(int gpu, int *imin, int *imax)
 
 int set_engineclock(int gpu, int iEngineClock)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	ADLODPerformanceLevels *lpOdPerformanceLevels;
-	struct cgpu_info *cgpu;
 	int i, lev, ret = 1;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set engineclock not supported\n");
 		return ret;
 	}
 
 	iEngineClock *= 100;
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	/* Keep track of intended engine clock in case the device changes
 	 * profile and drops while idle, not taking the new engine clock */
@@ -840,9 +869,8 @@ int set_engineclock(int gpu, int iEngineClock)
 out:
 	unlock_adl();
 
-	cgpu = &gpus[gpu];
-	if (cgpu->gpu_memdiff)
-		set_memoryclock(gpu, iEngineClock / 100 + cgpu->gpu_memdiff);
+	if (data->gpu_memdiff)
+		set_memoryclock(gpu, iEngineClock / 100 + data->gpu_memdiff);
 
 	return ret;
 }
@@ -850,13 +878,15 @@ out:
 #ifdef HAVE_CURSES
 static void get_memoryrange(int gpu, int *imin, int *imax)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Get memoryrange not supported\n");
 		return;
 	}
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	*imin = ga->lpOdParameters.sMemoryClock.iMin / 100;
 	*imax = ga->lpOdParameters.sMemoryClock.iMax / 100;
 }
@@ -864,19 +894,21 @@ static void get_memoryrange(int gpu, int *imin, int *imax)
 
 int set_memoryclock(int gpu, int iMemoryClock)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	ADLODPerformanceLevels *lpOdPerformanceLevels;
 	int i, lev, ret = 1;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set memoryclock not supported\n");
 		return ret;
 	}
 
-	gpus[gpu].gpu_memclock = iMemoryClock;
+	data->gpu_memclock = iMemoryClock;
 
 	iMemoryClock *= 100;
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	lev = ga->lpOdParameters.iNumberOfPerformanceLevels - 1;
 	lpOdPerformanceLevels = alloca(sizeof(ADLODPerformanceLevels) + (lev * sizeof(ADLODPerformanceLevel)));
@@ -906,13 +938,15 @@ out:
 #ifdef HAVE_CURSES
 static void get_vddcrange(int gpu, float *imin, float *imax)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Get vddcrange not supported\n");
 		return;
 	}
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	*imin = (float)ga->lpOdParameters.sVddc.iMin / 1000;
 	*imax = (float)ga->lpOdParameters.sVddc.iMax / 1000;
 }
@@ -933,17 +967,19 @@ static float curses_float(const char *query)
 
 int set_vddc(int gpu, float fVddc)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	ADLODPerformanceLevels *lpOdPerformanceLevels;
 	int i, iVddc, lev, ret = 1;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set vddc not supported\n");
 		return ret;
 	}
 
 	iVddc = 1000 * fVddc;
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	lev = ga->lpOdParameters.iNumberOfPerformanceLevels - 1;
 	lpOdPerformanceLevels = alloca(sizeof(ADLODPerformanceLevels) + (lev * sizeof(ADLODPerformanceLevel)));
@@ -972,28 +1008,32 @@ out:
 
 static void get_fanrange(int gpu, int *imin, int *imax)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Get fanrange not supported\n");
 		return;
 	}
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	*imin = ga->lpFanSpeedInfo.iMinPercent;
 	*imax = ga->lpFanSpeedInfo.iMaxPercent;
 }
 
 int set_fanspeed(int gpu, int iFanSpeed)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = 1;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set fanspeed not supported\n");
 		return ret;
 	}
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	if (!(ga->lpFanSpeedInfo.iFlags & (ADL_DL_FANCTRL_SUPPORTS_RPM_WRITE | ADL_DL_FANCTRL_SUPPORTS_PERCENT_WRITE ))) {
 		applog(LOG_DEBUG, "GPU %d doesn't support rpm or percent write", gpu);
 		return ret;
@@ -1030,15 +1070,17 @@ int set_fanspeed(int gpu, int iFanSpeed)
 #ifdef HAVE_CURSES
 static int set_powertune(int gpu, int iPercentage)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int dummy, ret = 1;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set powertune not supported\n");
 		return ret;
 	}
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	lock_adl();
 	ADL_Overdrive5_PowerControl_Set(ga->iAdapterIndex, iPercentage);
@@ -1057,10 +1099,11 @@ static int set_powertune(int gpu, int iPercentage)
 static bool fan_autotune(int gpu, int temp, int fanpercent, int lasttemp, bool *fan_window)
 {
 	struct cgpu_info *cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	int tdiff = round(temp - lasttemp);
-	struct gpu_adl *ga = &cgpu->adl;
-	int top = gpus[gpu].gpu_fan;
-	int bot = gpus[gpu].min_fan;
+	struct gpu_adl *ga = &data->adl;
+	const int top = data->gpu_fan;
+	const int bot = data->min_fan;
 	int newpercent = fanpercent;
 	int iMin = 0, iMax = 100;
 
@@ -1132,7 +1175,8 @@ void gpu_autotune(int gpu, enum dev_enable *denable)
 	struct gpu_adl *ga;
 
 	cgpu = &gpus[gpu];
-	ga = &cgpu->adl;
+	struct opencl_device_data * const data = cgpu->device_data;
+	ga = &data->adl;
 
 	lock_adl();
 	ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity);
@@ -1211,11 +1255,13 @@ void gpu_autotune(int gpu, enum dev_enable *denable)
 
 void set_defaultfan(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ADL_Overdrive5_FanSpeed_Set(ga->iAdapterIndex, 0, &ga->DefFanSpeedValue);
 	unlock_adl();
@@ -1223,11 +1269,13 @@ void set_defaultfan(int gpu)
 
 void set_defaultengine(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, ga->DefPerfLev);
 	unlock_adl();
@@ -1236,7 +1284,9 @@ void set_defaultengine(int gpu)
 #ifdef HAVE_CURSES
 void change_autosettings(int gpu)
 {
-	struct gpu_adl *ga = &gpus[gpu].adl;
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
+	struct gpu_adl *ga = &data->adl;
 	char input;
 	int val;
 
@@ -1285,7 +1335,9 @@ void change_autosettings(int gpu)
 
 void change_gpusettings(int gpu)
 {
-	struct gpu_adl *ga = &gpus[gpu].adl;
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
+	struct gpu_adl *ga = &data->adl;
 	float fval, fmin = 0, fmax = 0;
 	int val, imin = 0, imax = 0;
 	char input;
@@ -1306,7 +1358,7 @@ updated:
 	wlogprint("Engine Clock: %d MHz\nMemory Clock: %d MHz\nVddc: %.3f V\nActivity: %d%%\nPowertune: %d%%\n",
 		engineclock, memclock, vddc, activity, powertune);
 	wlogprint("Fan autotune is %s (%d-%d)\n", ga->autofan ? "enabled" : "disabled",
-		  gpus[gpu].min_fan, gpus[gpu].gpu_fan);
+		  data->min_fan, data->gpu_fan);
 	wlogprint("GPU engine clock autotune is %s (%d-%d)\n", ga->autoengine ? "enabled" : "disabled",
 		ga->minspeed / 100, ga->maxspeed / 100);
 	wlogprint("Change [A]utomatic [E]ngine [F]an [M]emory [V]oltage [P]owertune\n");
@@ -1414,9 +1466,11 @@ void clear_adl(int nDevs)
 	lock_adl();
 	/* Try to reset values to their defaults */
 	for (i = 0; i < nDevs; i++) {
-		ga = &gpus[i].adl;
+		struct cgpu_info * const cgpu = &gpus[i];
+		struct opencl_device_data * const data = cgpu->device_data;
+		ga = &data->adl;
 		/*  Only reset the values if we've changed them at any time */
-		if (!gpus[i].has_adl || !ga->managed)
+		if (!data->has_adl || !ga->managed)
 			continue;
 		ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, ga->DefPerfLev);
 		free(ga->DefPerfLev);

+ 8 - 4
api.c

@@ -33,6 +33,7 @@
 #include "miner.h"
 #include "util.h"
 #include "driver-cpu.h" /* for algo_names[], TODO: re-factor dependency */
+#include "driver-opencl.h"
 
 #define HAVE_AN_FPGA 1
 
@@ -1338,7 +1339,8 @@ static void minerconfig(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __
 	int i;
 
 	for (i = 0; i < nDevs; i++) {
-		if (gpus[i].has_adl) {
+		struct opencl_device_data * const data = gpus[i].device_data;
+		if (data->has_adl) {
 			adlinuse = (char *)YES;
 			break;
 		}
@@ -2731,8 +2733,10 @@ static void gpuintensity(struct io_data *io_data, __maybe_unused SOCKETTYPE c, c
 	if (!splitgpuvalue(io_data, param, &id, &value, isjson))
 		return;
 
+	struct opencl_device_data * const data = gpus[id].device_data;
+	
 	if (!strncasecmp(value, DYNAMIC, 1)) {
-		gpus[id].dynamic = true;
+		data->dynamic = true;
 		strcpy(intensitystr, DYNAMIC);
 	}
 	else {
@@ -2742,8 +2746,8 @@ static void gpuintensity(struct io_data *io_data, __maybe_unused SOCKETTYPE c, c
 			return;
 		}
 
-		gpus[id].dynamic = false;
-		gpus[id].intensity = intensity;
+		data->dynamic = false;
+		data->intensity = intensity;
 		sprintf(intensitystr, "%d", intensity);
 	}
 

+ 288 - 71
driver-opencl.c

@@ -25,6 +25,7 @@
 #include <string.h>
 #include <stdbool.h>
 #include <stdint.h>
+#include <stdio.h>
 
 #include <sys/types.h>
 
@@ -286,14 +287,20 @@ extern int gpu_fanpercent(int gpu);
 #endif
 
 
-#ifdef HAVE_SENSORS
-#include <sensors/sensors.h>
-
-struct opencl_device_data {
-	const sensors_chip_name *sensor;
-};
-#endif
-
+void opencl_early_init()
+{
+	static struct opencl_device_data dataarray[MAX_GPUDEVICES];
+	for (int i = 0; i < MAX_GPUDEVICES; ++i)
+	{
+		struct opencl_device_data * const data = &dataarray[i];
+		*data = (struct opencl_device_data){
+			.dynamic = true,
+		};
+		gpus[i] = (struct cgpu_info){
+			.device_data = data,
+		};
+	}
+}
 
 static
 const char *_set_list(char * const arg, const char * const emsg, bool (*set_func)(struct cgpu_info *, const char *))
@@ -326,7 +333,8 @@ bool _set_ ## PNAME (struct cgpu_info * const cgpu, const char * const _val)  \
 	const int v = atoi(_val);  \
 	if (!(VCHECK))  \
 		return false;  \
-	cgpu->FIELD = v;  \
+	struct opencl_device_data * const data = cgpu->device_data;  \
+	data->FIELD = v;  \
 	return true;  \
 }  \
 const char *set_ ## PNAME(char *arg)  \
@@ -370,7 +378,8 @@ bool _set_kernel(struct cgpu_info * const cgpu, const char * const _val)
 	const enum cl_kernels kern = select_kernel(_val);
 	if (kern == KL_NONE)
 		return false;
-	cgpu->kernel = kern;
+	struct opencl_device_data * const data = cgpu->device_data;
+	data->kernel = kern;
 	return true;
 }
 const char *set_kernel(char *arg)
@@ -386,6 +395,7 @@ const char *set_kernel(char *arg)
  * simple enumeration has failed to match them. */
 char *set_gpu_map(char *arg)
 {
+	struct opencl_device_data *data;
 	int val1 = 0, val2 = 0;
 	char *nextptr;
 
@@ -397,16 +407,18 @@ char *set_gpu_map(char *arg)
 	if (val1 < 0 || val1 > MAX_GPUDEVICES || val2 < 0 || val2 > MAX_GPUDEVICES)
 		return "Invalid value passed to set_gpu_map";
 
-	gpus[val1].virtual_adl = val2;
-	gpus[val1].mapped = true;
+	data = gpus[val1].device_data;
+	data->virtual_adl = val2;
+	data->mapped = true;
 
 	while ((nextptr = strtok(NULL, ",")) != NULL) {
 		if (sscanf(nextptr, "%d:%d", &val1, &val2) != 2)
 			return "Invalid description for map pair";
 		if (val1 < 0 || val1 > MAX_GPUDEVICES || val2 < 0 || val2 > MAX_GPUDEVICES)
 			return "Invalid value passed to set_gpu_map";
-		gpus[val1].virtual_adl = val2;
-		gpus[val1].mapped = true;
+		data = gpus[val1].device_data;
+		data->virtual_adl = val2;
+		data->mapped = true;
 	}
 
 	return NULL;
@@ -419,8 +431,9 @@ bool _set_gpu_engine(struct cgpu_info * const cgpu, const char * const _val)
 	get_intrange(_val, &val1, &val2);
 	if (val1 < 0 || val1 > 9999 || val2 < 0 || val2 > 9999)
 		return false;
-	cgpu->min_engine = val1;
-	cgpu->gpu_engine = val2;
+	struct opencl_device_data * const data = cgpu->device_data;
+	data->min_engine = val1;
+	data->gpu_engine = val2;
 	return true;
 }
 const char *set_gpu_engine(char *arg)
@@ -435,8 +448,9 @@ bool _set_gpu_fan(struct cgpu_info * const cgpu, const char * const _val)
 	get_intrange(_val, &val1, &val2);
 	if (val1 < 0 || val1 > 100 || val2 < 0 || val2 > 100)
 		return false;
-	cgpu->min_fan = val1;
-	cgpu->gpu_fan = val2;
+	struct opencl_device_data * const data = cgpu->device_data;
+	data->min_fan = val1;
+	data->gpu_fan = val2;
 	return true;
 }
 const char *set_gpu_fan(char *arg)
@@ -455,15 +469,16 @@ _SET_INT_LIST(temp_overheat, (v >=     0 && v <   200), adl.overtemp )
 static
 bool _set_intensity(struct cgpu_info * const cgpu, const char * const _val)
 {
+	struct opencl_device_data * const data = cgpu->device_data;
 	if (!strncasecmp(_val, "d", 1))
-		cgpu->dynamic = true;
+		data->dynamic = true;
 	else
 	{
 		const int v = atoi(_val);
 		if (v < MIN_INTENSITY || v > MAX_GPU_INTENSITY)
 			return false;
-		cgpu->dynamic = false;
-		cgpu->intensity = v;
+		data->dynamic = false;
+		data->intensity = v;
 	}
 	return true;
 }
@@ -473,6 +488,153 @@ const char *set_intensity(char *arg)
 }
 #endif
 
+void write_config_opencl(FILE * const fcfg)
+{
+	int i;
+#ifdef HAVE_OPENCL
+	if (nDevs) {
+		/* Write GPU device values */
+		fputs(",\n\"intensity\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			if (i > 0)
+				fputc(',', fcfg);
+			if (data->dynamic)
+				fputc('d', fcfg);
+			else
+				fprintf(fcfg, "%d", data->intensity);
+		}
+		fputs("\",\n\"vectors\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
+			        data->vwidth);
+		}
+		fputs("\",\n\"worksize\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
+			        (int)data->work_size);
+		}
+		fputs("\",\n\"kernel\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++) {
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s", i > 0 ? "," : "");
+			switch (data->kernel) {
+				case KL_NONE: // Shouldn't happen
+					break;
+				case KL_POCLBM:
+					fprintf(fcfg, "poclbm");
+					break;
+				case KL_PHATK:
+					fprintf(fcfg, "phatk");
+					break;
+				case KL_DIAKGCN:
+					fprintf(fcfg, "diakgcn");
+					break;
+				case KL_DIABLO:
+					fprintf(fcfg, "diablo");
+					break;
+				case KL_SCRYPT:
+					fprintf(fcfg, "scrypt");
+					break;
+			}
+		}
+#ifdef USE_SCRYPT
+		fputs("\",\n\"lookup-gap\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
+			        (int)data->opt_lg);
+		}
+		fputs("\",\n\"thread-concurrency\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
+			        (int)data->opt_tc);
+		}
+		fputs("\",\n\"shaders\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
+			        (int)data->shaders);
+		}
+#endif
+#ifdef HAVE_ADL
+		fputs("\",\n\"gpu-engine\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d-%d", i > 0 ? "," : "",
+			        data->min_engine, data->gpu_engine);
+		}
+		fputs("\",\n\"gpu-fan\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d-%d", i > 0 ? "," : "",
+			        data->min_fan, data->gpu_fan);
+		}
+		fputs("\",\n\"gpu-memclock\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "", data->gpu_memclock);
+		}
+		fputs("\",\n\"gpu-memdiff\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "", data->gpu_memdiff);
+		}
+		fputs("\",\n\"gpu-powertune\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "", data->gpu_powertune);
+		}
+		fputs("\",\n\"gpu-vddc\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%1.3f", i > 0 ? "," : "", data->gpu_vddc);
+		}
+		fputs("\",\n\"temp-overheat\" : \"", fcfg);
+		for(i = 0; i < nDevs; i++)
+		{
+			struct cgpu_info * const cgpu = &gpus[i];
+			struct opencl_device_data * const data = cgpu->device_data;
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "", data->adl.overtemp);
+		}
+#endif
+		fputs("\"", fcfg);
+	}
+#endif
+#ifdef HAVE_ADL
+	if (opt_reorder)
+		fprintf(fcfg, ",\n\"gpu-reorder\" : true");
+#endif
+}
+
 
 #ifdef HAVE_OPENCL
 BFG_REGISTER_DRIVER(opencl_api)
@@ -501,19 +663,20 @@ struct cgpu_info *cpus;
 void pause_dynamic_threads(int gpu)
 {
 	struct cgpu_info *cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	int i;
 
 	for (i = 1; i < cgpu->threads; i++) {
 		struct thr_info *thr;
 
 		thr = cgpu->thr[i];
-		if (!thr->pause && cgpu->dynamic) {
+		if (!thr->pause && data->dynamic) {
 			applog(LOG_WARNING, "Disabling extra threads due to dynamic mode.");
 			applog(LOG_WARNING, "Tune dynamic intensity with --gpu-dyninterval");
 		}
 
-		thr->pause = cgpu->dynamic;
-		if (!cgpu->dynamic && cgpu->deven != DEV_DISABLED)
+		thr->pause = data->dynamic;
+		if (!data->dynamic && cgpu->deven != DEV_DISABLED)
 			mt_enable(thr);
 	}
 }
@@ -527,6 +690,7 @@ struct device_drv opencl_api;
 static
 void opencl_wlogprint_status(struct cgpu_info *cgpu)
 {
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct thr_info *thr;
 	int i;
 	char checkin[40];
@@ -535,9 +699,9 @@ void opencl_wlogprint_status(struct cgpu_info *cgpu)
 	char logline[255];
 	strcpy(logline, ""); // In case it has no data
 	
-	tailsprintf(logline, sizeof(logline), "I:%s%d  ", (cgpu->dynamic ? "d" : ""), cgpu->intensity);
+	tailsprintf(logline, sizeof(logline), "I:%s%d  ", (data->dynamic ? "d" : ""), data->intensity);
 #ifdef HAVE_ADL
-	if (cgpu->has_adl) {
+	if (data->has_adl) {
 		int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
 		float temp = 0, vddc = 0;
 
@@ -611,7 +775,8 @@ void opencl_tui_wlogprint_choices(struct cgpu_info *cgpu)
 {
 	wlogprint("[I]ntensity [R]estart GPU ");
 #ifdef HAVE_ADL
-	if (cgpu->has_adl)
+	struct opencl_device_data * const data = cgpu->device_data;
+	if (data->has_adl)
 		wlogprint("[C]hange settings ");
 #endif
 }
@@ -619,6 +784,8 @@ void opencl_tui_wlogprint_choices(struct cgpu_info *cgpu)
 static
 const char *opencl_tui_handle_choice(struct cgpu_info *cgpu, int input)
 {
+	struct opencl_device_data * const data = cgpu->device_data;
+	
 	switch (input)
 	{
 		case 'i': case 'I':
@@ -638,7 +805,7 @@ const char *opencl_tui_handle_choice(struct cgpu_info *cgpu, int input)
 			if (!intvar)
 				return "Invalid intensity\n";
 			if (!strncasecmp(intvar, "d", 1)) {
-				cgpu->dynamic = true;
+				data->dynamic = true;
 				pause_dynamic_threads(cgpu->device_id);
 				free(intvar);
 				return "Dynamic mode enabled\n";
@@ -647,8 +814,8 @@ const char *opencl_tui_handle_choice(struct cgpu_info *cgpu, int input)
 			free(intvar);
 			if (intensity < MIN_INTENSITY || intensity > MAX_INTENSITY)
 				return "Invalid intensity (out of range)\n";
-			cgpu->dynamic = false;
-			cgpu->intensity = intensity;
+			data->dynamic = false;
+			data->intensity = intensity;
 			pause_dynamic_threads(cgpu->device_id);
 			return "Intensity changed\n";
 		}
@@ -683,8 +850,18 @@ static _clState *clStates[MAX_GPUDEVICES];
 #define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
 #define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
 
-static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
+static
+struct opencl_work_data *_opencl_work_data(struct work * const work)
 {
+	if (work->device_data)
+		return work->device_data;
+	return (work->device_data = calloc(1, sizeof(struct opencl_work_data)));
+}
+
+static
+cl_int queue_poclbm_kernel(_clState * const clState, struct work * const work, const cl_uint threads)
+{
+	struct opencl_work_data * const blk = _opencl_work_data(work);
 	cl_kernel *kernel = &clState->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
@@ -712,7 +889,7 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
 		unsigned int i;
 
 		for (i = 0; i < vwidth; i++)
-			nonces[i] = blk->nonce + (i * threads);
+			nonces[i] = work->blk.nonce + (i * threads);
 		CL_SET_VARG(vwidth, nonces);
 	}
 
@@ -736,9 +913,10 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
 	return status;
 }
 
-static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
-				 __maybe_unused cl_uint threads)
+static
+cl_int queue_phatk_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
+	struct opencl_work_data * const blk = _opencl_work_data(work);
 	cl_kernel *kernel = &clState->kernel;
 	cl_uint vwidth = clState->vwidth;
 	unsigned int i, num = 0;
@@ -763,7 +941,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
 
 	nonces = alloca(sizeof(uint) * vwidth);
 	for (i = 0; i < vwidth; i++)
-		nonces[i] = blk->nonce + i;
+		nonces[i] = work->blk.nonce + i;
 	CL_SET_VARG(vwidth, nonces);
 
 	CL_SET_BLKARG(W16);
@@ -780,9 +958,10 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
 	return status;
 }
 
-static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
-				   __maybe_unused cl_uint threads)
+static
+cl_int queue_diakgcn_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
+	struct opencl_work_data * const blk = _opencl_work_data(work);
 	cl_kernel *kernel = &clState->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
@@ -792,7 +971,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
 		uint *nonces = alloca(sizeof(uint) * vwidth);
 		unsigned int i;
 		for (i = 0; i < vwidth; i++)
-			nonces[i] = blk->nonce + i;
+			nonces[i] = work->blk.nonce + i;
 		CL_SET_VARG(vwidth, nonces);
 	}
 
@@ -841,8 +1020,10 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
 	return status;
 }
 
-static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
+static
+cl_int queue_diablo_kernel(_clState * const clState, struct work * const work, const cl_uint threads)
 {
+	struct opencl_work_data * const blk = _opencl_work_data(work);
 	cl_kernel *kernel = &clState->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
@@ -853,7 +1034,7 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
 		unsigned int i;
 
 		for (i = 0; i < vwidth; i++)
-			nonces[i] = blk->nonce + (i * threads);
+			nonces[i] = work->blk.nonce + (i * threads);
 		CL_SET_VARG(vwidth, nonces);
 	}
 
@@ -895,16 +1076,17 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
 }
 
 #ifdef USE_SCRYPT
-static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
+static
+cl_int queue_scrypt_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
-	unsigned char *midstate = blk->work->midstate;
+	unsigned char *midstate = work->midstate;
 	cl_kernel *kernel = &clState->kernel;
 	unsigned int num = 0;
 	cl_uint le_target;
 	cl_int status = 0;
 
-	le_target = *(cl_uint *)(blk->work->target + 28);
-	clState->cldata = blk->work->data;
+	le_target = *(cl_uint *)(work->target + 28);
+	clState->cldata = work->data;
 	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
 
 	CL_SET_ARG(clState->CLbuffer0);
@@ -962,6 +1144,8 @@ select_cgpu:
 	cgpu = tq_pop(mythr->q, NULL);
 	if (!cgpu)
 		goto out;
+	
+	struct opencl_device_data * const data = cgpu->device_data;
 
 	if (clDevicesNum() != nDevs) {
 		applog(LOG_WARNING, "Hardware not reporting same number of active devices, will not attempt to restart GPU");
@@ -989,7 +1173,7 @@ select_cgpu:
 		thr = cgpu->thr[i];
 		thr_id = thr->id;
 
-		virtual_gpu = cgpu->virtual_gpu;
+		virtual_gpu = data->virtual_gpu;
 		/* Lose this ram cause we may get stuck here! */
 		//tq_freeze(thr->q);
 
@@ -1072,7 +1256,6 @@ static int opencl_autodetect()
 	}
 
 #ifdef HAVE_SENSORS
-	struct opencl_device_data *data;
 	const sensors_chip_name *cn;
 	int c = 0;
 	
@@ -1086,19 +1269,18 @@ static int opencl_autodetect()
 		struct cgpu_info *cgpu;
 
 		cgpu = &gpus[i];
+		struct opencl_device_data * const data = cgpu->device_data;
+		
 		cgpu->devtype = "GPU";
 		cgpu->deven = DEV_ENABLED;
 		cgpu->drv = &opencl_api;
 		cgpu->device_id = i;
 		cgpu->threads = opt_g_threads;
-		cgpu->virtual_gpu = i;
+		data->virtual_gpu = i;
 		
 #ifdef HAVE_SENSORS
 		cn = (c == -1) ? NULL : sensors_get_detected_chips(&cnm, &c);
-		cgpu->device_data = data = malloc(sizeof(*data));
-		*data = (struct opencl_device_data){
-			.sensor = cn,
-		};
+		data->sensor = cn;
 #endif
 		
 		add_cgpu(cgpu);
@@ -1120,6 +1302,15 @@ static void opencl_detect()
 
 static void reinit_opencl_device(struct cgpu_info *gpu)
 {
+#ifdef HAVE_ADL
+	struct opencl_device_data * const data = gpu->device_data;
+	if (adl_active && data->has_adl && gpu_activity(gpu->device_id) > 50)
+	{
+		applogr(, LOG_ERR, "%s: Still showing activity (suggests a hard hang); cancelling reinitialise.",
+		        gpu->dev_repr);
+	}
+#endif
+	
 	tq_push(control_thr[gpur_thr_id].q, gpu);
 }
 
@@ -1127,8 +1318,8 @@ static void reinit_opencl_device(struct cgpu_info *gpu)
 static
 bool override_opencl_statline_temp(char *buf, size_t bufsz, struct cgpu_info *gpu, __maybe_unused bool per_processor)
 {
+	__maybe_unused struct opencl_device_data * const data = gpu->device_data;
 #ifdef HAVE_SENSORS
-	struct opencl_device_data *data = gpu->device_data;
 	if (data->sensor)
 	{
 		const sensors_chip_name *cn = data->sensor;
@@ -1151,7 +1342,7 @@ bool override_opencl_statline_temp(char *buf, size_t bufsz, struct cgpu_info *gp
 	}
 #endif
 #ifdef HAVE_ADL
-	if (gpu->has_adl) {
+	if (data->has_adl) {
 		int gpuid = gpu->device_id;
 		gpu_temp(gpuid);
 		gpu_fanspeed(gpuid);
@@ -1160,9 +1351,31 @@ bool override_opencl_statline_temp(char *buf, size_t bufsz, struct cgpu_info *gp
 	return false;
 }
 
+static
+void opencl_watchdog(struct cgpu_info * const cgpu, __maybe_unused const struct timeval * const tv_now)
+{
+#ifdef HAVE_ADL
+	struct opencl_device_data * const data = cgpu->device_data;
+	const int gpu = cgpu->device_id;
+	enum dev_enable *denable = &cgpu->deven;
+	
+	if (adl_active && data->has_adl)
+		gpu_autotune(gpu, denable);
+	if (opt_debug && data->has_adl) {
+		int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
+		float temp = 0, vddc = 0;
+
+		if (gpu_stats(gpu, &temp, &engineclock, &memclock, &vddc, &activity, &fanspeed, &fanpercent, &powertune))
+			applog(LOG_DEBUG, "%.1f C  F: %d%%(%dRPM)  E: %dMHz  M: %dMHz  V: %.3fV  A: %d%%  P: %d%%",
+			temp, fanpercent, fanspeed, engineclock, memclock, vddc, activity, powertune);
+	}
+#endif
+}
+
 static struct api_data*
 get_opencl_api_extra_device_status(struct cgpu_info *gpu)
 {
+	struct opencl_device_data * const data = gpu->device_data;
 	struct api_data*root = NULL;
 
 	float gt, gv;
@@ -1180,17 +1393,17 @@ get_opencl_api_extra_device_status(struct cgpu_info *gpu)
 	root = api_add_int(root, "Powertune", &pt, true);
 
 	char intensity[20];
-	if (gpu->dynamic)
+	if (data->dynamic)
 		strcpy(intensity, "D");
 	else
-		sprintf(intensity, "%d", gpu->intensity);
+		sprintf(intensity, "%d", data->intensity);
 	root = api_add_string(root, "Intensity", intensity, true);
 
 	return root;
 }
 
 struct opencl_thread_data {
-	cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint);
+	cl_int (*queue_kernel_parameters)(_clState *, struct work *, cl_uint);
 	uint32_t *res;
 };
 
@@ -1200,8 +1413,9 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 {
 	char name[256];
 	struct cgpu_info *cgpu = thr->cgpu;
+	struct opencl_device_data * const data = cgpu->device_data;
 	int gpu = cgpu->device_id;
-	int virtual_gpu = cgpu->virtual_gpu;
+	int virtual_gpu = data->virtual_gpu;
 	int i = thr->id;
 	static bool failmessage = false;
 	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
@@ -1340,11 +1554,12 @@ static bool opencl_thread_init(struct thr_info *thr)
 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
 #ifdef USE_SCRYPT
-	if (opt_scrypt)
-		work->blk.work = work;
-	else
+	if (!opt_scrypt)
 #endif
-		precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
+	{
+		struct opencl_work_data * const blk = _opencl_work_data(work);
+		precalc_hash(blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
+	}
 	return true;
 }
 
@@ -1356,6 +1571,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	const int thr_id = thr->id;
 	struct opencl_thread_data *thrdata = thr->cgpu_data;
 	struct cgpu_info *gpu = thr->cgpu;
+	struct opencl_device_data * const data = gpu->device_data;
 	_clState *clState = clStates[thr_id];
 	const cl_kernel *kernel = &clState->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
@@ -1368,28 +1584,28 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
 
 	/* Windows' timer resolution is only 15ms so oversample 5x */
-	if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
+	if (data->dynamic && (++data->intervals * dynamic_us) > 70000) {
 		struct timeval tv_gpuend;
 		double gpu_us;
 
 		cgtime(&tv_gpuend);
-		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
+		gpu_us = us_tdiff(&tv_gpuend, &data->tv_gpustart) / data->intervals;
 		if (gpu_us > dynamic_us) {
-			if (gpu->intensity > MIN_INTENSITY)
-				--gpu->intensity;
+			if (data->intensity > MIN_INTENSITY)
+				--data->intensity;
 		} else if (gpu_us < dynamic_us / 2) {
-			if (gpu->intensity < MAX_INTENSITY)
-				++gpu->intensity;
+			if (data->intensity < MAX_INTENSITY)
+				++data->intensity;
 		}
-		memcpy(&(gpu->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
-		gpu->intervals = 0;
+		memcpy(&(data->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
+		data->intervals = 0;
 	}
 
-	set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity);
+	set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &data->intensity);
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
 
-	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
+	status = thrdata->queue_kernel_parameters(clState, work, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
 		return -1;
@@ -1461,6 +1677,7 @@ struct device_drv opencl_api = {
 	.supported_algos = POW_SHA256D | POW_SCRYPT,
 	.drv_detect = opencl_detect,
 	.reinit_device = reinit_opencl_device,
+	.watchdog = opencl_watchdog,
 	.override_statline_temp2 = override_opencl_statline_temp,
 #ifdef HAVE_CURSES
 	.proc_wlogprint_status = opencl_wlogprint_status,

+ 73 - 0
driver-opencl.h

@@ -3,9 +3,81 @@
 
 #include <stdbool.h>
 
+#include "CL/cl.h"
+#ifdef HAVE_SENSORS
+#include <sensors/sensors.h>
+#endif
+
 #include "miner.h"
 
+struct opencl_device_data {
+	bool mapped;
+	int virtual_gpu;
+	int virtual_adl;
+	int intensity;
+	bool dynamic;
+	
+	cl_uint vwidth;
+	size_t work_size;
+	enum cl_kernels kernel;
+	cl_ulong max_alloc;
+	
+#ifdef USE_SCRYPT
+	int opt_lg, lookup_gap;
+	size_t opt_tc, thread_concurrency;
+	size_t shaders;
+#endif
+	struct timeval tv_gpustart;
+	int intervals;
+	
+#ifdef HAVE_ADL
+	bool has_adl;
+	struct gpu_adl adl;
+	
+	int gpu_engine;
+	int min_engine;
+	int gpu_fan;
+	int min_fan;
+	int gpu_memclock;
+	int gpu_memdiff;
+	int gpu_powertune;
+	float gpu_vddc;
+#endif
+	
+#ifdef HAVE_SENSORS
+	const sensors_chip_name *sensor;
+#endif
+};
+
+struct opencl_work_data {
+	cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;
+	cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
+	cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d;
+	cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h;
+	cl_uint merkle; cl_uint ntime; cl_uint nbits;
+	cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15;
+	cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2;
+	cl_uint W16; cl_uint W17; cl_uint W2;
+	cl_uint PreVal4; cl_uint T1;
+	cl_uint C1addK5; cl_uint D1A; cl_uint W2A; cl_uint W17_2;
+	cl_uint PreVal4addT1; cl_uint T1substate0;
+	cl_uint PreVal4_2;
+	cl_uint PreVal0;
+	cl_uint PreW18;
+	cl_uint PreW19;
+	cl_uint PreW31;
+	cl_uint PreW32;
+
+	/* For diakgcn */
+	cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
+	cl_uint zeroA, zeroB;
+	cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
+#ifdef USE_SCRYPT
+	struct work *work;
+#endif
+};
 
+extern void opencl_early_init();
 extern char *print_ndevs_and_exit(int *ndevs);
 extern void *reinit_gpu(void *userdata);
 extern char *set_gpu_map(char *arg);
@@ -25,6 +97,7 @@ extern const char *set_lookup_gap(char *arg);
 extern const char *set_thread_concurrency(char *arg);
 #endif
 extern const char *set_kernel(char *arg);
+extern void write_config_opencl(FILE *);
 void manage_gpu(void);
 extern void opencl_dynamic_cleanup();
 extern void pause_dynamic_threads(int gpu);

+ 1 - 1
findnonce.c

@@ -49,7 +49,7 @@ const uint32_t SHA256_K[64] = {
 	d = d + h; \
 	h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
 
-void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
+void precalc_hash(struct opencl_work_data *blk, uint32_t *state, uint32_t *data)
 {
 	cl_uint A, B, C, D, E, F, G, H;
 

+ 2 - 1
findnonce.h

@@ -3,6 +3,7 @@
 
 #include <stdint.h>
 
+#include "driver-opencl.h"
 #include "miner.h"
 #include "config.h"
 
@@ -16,7 +17,7 @@
 #define SCRYPT_FOUND (0xFF)
 
 #ifdef HAVE_OPENCL
-extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
+extern void precalc_hash(struct opencl_work_data *blk, uint32_t *state, uint32_t *data);
 extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
 #endif /* HAVE_OPENCL */
 #endif /*__FINDNONCE_H__*/

+ 6 - 116
miner.c

@@ -74,7 +74,6 @@
 #include "deviceapi.h"
 #include "logging.h"
 #include "miner.h"
-#include "findnonce.h"
 #include "adl.h"
 #include "driver-cpu.h"
 #include "driver-opencl.h"
@@ -2702,6 +2701,7 @@ void clean_work(struct work *work)
 	free(work->job_id);
 	bytes_free(&work->nonce2);
 	free(work->nonce1);
+	free(work->device_data);
 
 	if (work->tmpl) {
 		struct pool *pool = work->pool;
@@ -6513,92 +6513,7 @@ void write_config(FILE *fcfg)
 	fputs("\n]\n", fcfg);
 
 #ifdef HAVE_OPENCL
-	if (nDevs) {
-		/* Write GPU device values */
-		fputs(",\n\"intensity\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-		{
-			if (i > 0)
-				fputc(',', fcfg);
-			if (gpus[i].dynamic)
-				fputc('d', fcfg);
-			else
-				fprintf(fcfg, "%d", gpus[i].intensity);
-		}
-		fputs("\",\n\"vectors\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				gpus[i].vwidth);
-		fputs("\",\n\"worksize\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].work_size);
-		fputs("\",\n\"kernel\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++) {
-			fprintf(fcfg, "%s", i > 0 ? "," : "");
-			switch (gpus[i].kernel) {
-				case KL_NONE: // Shouldn't happen
-					break;
-				case KL_POCLBM:
-					fprintf(fcfg, "poclbm");
-					break;
-				case KL_PHATK:
-					fprintf(fcfg, "phatk");
-					break;
-				case KL_DIAKGCN:
-					fprintf(fcfg, "diakgcn");
-					break;
-				case KL_DIABLO:
-					fprintf(fcfg, "diablo");
-					break;
-				case KL_SCRYPT:
-					fprintf(fcfg, "scrypt");
-					break;
-			}
-		}
-#ifdef USE_SCRYPT
-		fputs("\",\n\"lookup-gap\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].opt_lg);
-		fputs("\",\n\"thread-concurrency\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].opt_tc);
-		fputs("\",\n\"shaders\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].shaders);
-#endif
-#ifdef HAVE_ADL
-		fputs("\",\n\"gpu-engine\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d-%d", i > 0 ? "," : "", gpus[i].min_engine, gpus[i].gpu_engine);
-		fputs("\",\n\"gpu-fan\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d-%d", i > 0 ? "," : "", gpus[i].min_fan, gpus[i].gpu_fan);
-		fputs("\",\n\"gpu-memclock\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_memclock);
-		fputs("\",\n\"gpu-memdiff\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_memdiff);
-		fputs("\",\n\"gpu-powertune\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_powertune);
-		fputs("\",\n\"gpu-vddc\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%1.3f", i > 0 ? "," : "", gpus[i].gpu_vddc);
-		fputs("\",\n\"temp-overheat\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].adl.overtemp);
-#endif
-		fputs("\"", fcfg);
-	}
-#endif
-#ifdef HAVE_ADL
-	if (opt_reorder)
-		fprintf(fcfg, ",\n\"gpu-reorder\" : true");
+	write_config_opencl(fcfg);
 #endif
 #ifdef WANT_CPUMINE
 	fprintf(fcfg, ",\n\"algo\" : \"%s\"", algo_names[opt_algo]);
@@ -9099,7 +9014,7 @@ out:
 bool abandon_work(struct work *work, struct timeval *wdiff, uint64_t hashes)
 {
 	if (wdiff->tv_sec > opt_scantime ||
-	    work->blk.nonce >= MAXTHREADS - hashes ||
+	    work->blk.nonce >= 0xfffffffe - hashes ||
 	    hashes >= 0xfffffffe ||
 	    stale_work(work, false))
 		return true;
@@ -9977,7 +9892,6 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 			struct thr_info *thr = cgpu->thr[0];
 			enum dev_enable *denable;
 			char *dev_str = cgpu->proc_repr;
-			int gpu;
 
 			if (likely(drv_ready(cgpu)))
 			{
@@ -9987,21 +9901,10 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 					cgpu->drv->get_stats(cgpu);
 			}
 
-			gpu = cgpu->device_id;
 			denable = &cgpu->deven;
 
-#ifdef HAVE_ADL
-			if (adl_active && cgpu->has_adl)
-				gpu_autotune(gpu, denable);
-			if (opt_debug && cgpu->has_adl) {
-				int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
-				float temp = 0, vddc = 0;
-
-				if (gpu_stats(gpu, &temp, &engineclock, &memclock, &vddc, &activity, &fanspeed, &fanpercent, &powertune))
-					applog(LOG_DEBUG, "%.1f C  F: %d%%(%dRPM)  E: %dMHz  M: %dMHz  V: %.3fV  A: %d%%  P: %d%%",
-					temp, fanpercent, fanspeed, engineclock, memclock, vddc, activity, powertune);
-			}
-#endif
+			if (cgpu->drv->watchdog)
+				cgpu->drv->watchdog(cgpu, tvp_now);
 			
 			/* Thread is disabled */
 			if (*denable == DEV_DISABLED)
@@ -10074,12 +9977,6 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 				dev_error(cgpu, REASON_DEV_SICK_IDLE_60);
 				run_cmd(cmd_sick);
 				
-#ifdef HAVE_ADL
-				if (adl_active && cgpu->has_adl && gpu_activity(gpu) > 50) {
-					applog(LOG_ERR, "GPU still showing activity suggesting a hard hang.");
-					applog(LOG_ERR, "Will not attempt to auto-restart it.");
-				} else
-#endif
 				if (opt_restart && cgpu->drv->reinit_device) {
 					applog(LOG_ERR, "%s: Attempting to restart", dev_str);
 					reinit_device(cgpu);
@@ -10095,11 +9992,6 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 				   (cgpu->status == LIFE_SICK || cgpu->status == LIFE_DEAD)) {
 				/* Attempt to restart a GPU that's sick or dead once every minute */
 				cgtime(&thr->sick);
-#ifdef HAVE_ADL
-				if (adl_active && cgpu->has_adl && gpu_activity(gpu) > 50) {
-					/* Again do not attempt to restart a device that may have hard hung */
-				} else
-#endif
 				if (opt_restart)
 					reinit_device(cgpu);
 			}
@@ -11455,9 +11347,7 @@ int main(int argc, char *argv[])
 	mutex_init(&submitting_lock);
 
 #ifdef HAVE_OPENCL
-	memset(gpus, 0, sizeof(gpus));
-	for (i = 0; i < MAX_GPUDEVICES; i++)
-		gpus[i].dynamic = true;
+	opencl_early_init();
 #endif
 
 	schedstart.tm.tm_sec = 1;

+ 5 - 72
miner.h

@@ -42,10 +42,6 @@
 #include "logging.h"
 #include "util.h"
 
-#ifdef HAVE_OPENCL
-#include "CL/cl.h"
-#endif /* HAVE_OPENCL */
-
 #ifdef STDC_HEADERS
 # include <stdlib.h>
 # include <stddef.h>
@@ -304,6 +300,7 @@ struct device_drv {
 	bool (*lowl_probe)(const struct lowlevel_device_info *);
 
 	// Processor-specific functions
+	void (*watchdog)(struct cgpu_info *, const struct timeval *tv_now);
 	void (*reinit_device)(struct cgpu_info *);
 	bool (*override_statline_temp2)(char *buf, size_t bufsz, struct cgpu_info *, bool per_processor);
 	struct api_data* (*get_api_extra_device_detail)(struct cgpu_info *);
@@ -516,45 +513,12 @@ struct cgpu_info {
 	int64_t max_hashes;
 
 	const char *kname;
-#ifdef HAVE_OPENCL
-	bool mapped;
-	int virtual_gpu;
-	int virtual_adl;
-	int intensity;
-	bool dynamic;
-
-	cl_uint vwidth;
-	size_t work_size;
-	enum cl_kernels kernel;
-	cl_ulong max_alloc;
-
-#ifdef USE_SCRYPT
-	int opt_lg, lookup_gap;
-	size_t opt_tc, thread_concurrency;
-	size_t shaders;
-#endif
-	struct timeval tv_gpustart;
-	int intervals;
-#endif
 
 	float temp;
 	int cutofftemp;
 	int targettemp;
 	bool targettemp_user;
 
-#ifdef HAVE_ADL
-	bool has_adl;
-	struct gpu_adl adl;
-
-	int gpu_engine;
-	int min_engine;
-	int gpu_fan;
-	int min_fan;
-	int gpu_memclock;
-	int gpu_memdiff;
-	int gpu_powertune;
-	float gpu_vddc;
-#endif
 	double diff1;
 	double diff_accepted;
 	double diff_rejected;
@@ -1143,40 +1107,6 @@ extern double current_diff;
 extern uint64_t best_diff;
 extern time_t block_time;
 
-#ifdef HAVE_OPENCL
-typedef struct {
-	cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;
-	cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
-	cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d;
-	cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h;
-	cl_uint merkle; cl_uint ntime; cl_uint nbits; cl_uint nonce;
-	cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15;
-	cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2;
-	cl_uint W16; cl_uint W17; cl_uint W2;
-	cl_uint PreVal4; cl_uint T1;
-	cl_uint C1addK5; cl_uint D1A; cl_uint W2A; cl_uint W17_2;
-	cl_uint PreVal4addT1; cl_uint T1substate0;
-	cl_uint PreVal4_2;
-	cl_uint PreVal0;
-	cl_uint PreW18;
-	cl_uint PreW19;
-	cl_uint PreW31;
-	cl_uint PreW32;
-
-	/* For diakgcn */
-	cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
-	cl_uint zeroA, zeroB;
-	cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
-#ifdef USE_SCRYPT
-	struct work *work;
-#endif
-} dev_blk_ctx;
-#else
-typedef struct {
-	uint32_t nonce;
-} dev_blk_ctx;
-#endif
-
 struct curl_ent {
 	CURL *curl;
 	struct curl_ent *next;
@@ -1349,7 +1279,9 @@ struct work {
 	int		rolls;
 	int		drv_rolllimit; /* How much the driver can roll ntime */
 
-	dev_blk_ctx	blk;
+	struct {
+		uint32_t nonce;
+	} blk;
 
 	struct thr_info	*thr;
 	int		thr_id;
@@ -1374,6 +1306,7 @@ struct work {
 	unsigned char	work_restart_id;
 	int		id;
 	int		device_id;
+	void *device_data;
 	UT_hash_handle hh;
 	
 	double		work_difficulty;

+ 32 - 30
ocl.c

@@ -28,6 +28,7 @@
 #define OMIT_OPENCL_API
 
 #include "deviceapi.h"
+#include "driver-opencl.h"
 #include "findnonce.h"
 #include "logging.h"
 #include "ocl.h"
@@ -394,6 +395,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	bool patchbfi = false, prog_built = false;
 	bool usebinary = opt_opencl_binaries, ismesa = false;
 	struct cgpu_info *cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	cl_platform_id platform = NULL;
 	char pbuff[256], vbuff[255];
 	char *s;
@@ -550,12 +552,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	}
 	applog(LOG_DEBUG, "Max work group size reported %"PRId64, (int64_t)clState->max_work_size);
 
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&data->max_alloc, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
 		return NULL;
 	}
-	applog(LOG_DEBUG, "Max mem alloc size is %lu", (unsigned long)cgpu->max_alloc);
+	applog(LOG_DEBUG, "Max mem alloc size is %lu", (unsigned long)data->max_alloc);
 	
 	if (strstr(vbuff, "MESA"))
 	{
@@ -576,7 +578,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	char filename[255];
 	char numbuf[32];
 
-	if (cgpu->kernel == KL_NONE) {
+	if (data->kernel == KL_NONE) {
 		if (opt_scrypt) {
 			applog(LOG_INFO, "Selecting scrypt kernel");
 			clState->chosen_kernel = KL_SCRYPT;
@@ -605,9 +607,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			applog(LOG_INFO, "Selecting phatk kernel");
 			clState->chosen_kernel = KL_PHATK;
 		}
-		cgpu->kernel = clState->chosen_kernel;
+		data->kernel = clState->chosen_kernel;
 	} else {
-		clState->chosen_kernel = cgpu->kernel;
+		clState->chosen_kernel = data->kernel;
 		if (clState->chosen_kernel == KL_PHATK &&
 		    (strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
 		     strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
@@ -645,7 +647,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			strcpy(filename, SCRYPT_KERNNAME".cl");
 			strcpy(binaryfilename, SCRYPT_KERNNAME);
 			/* Scrypt only supports vector 1 */
-			cgpu->vwidth = 1;
+			data->vwidth = 1;
 			break;
 		case KL_NONE: /* Shouldn't happen */
 		case KL_DIABLO:
@@ -654,48 +656,48 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			break;
 	}
 
-	if (cgpu->vwidth)
-		clState->vwidth = cgpu->vwidth;
+	if (data->vwidth)
+		clState->vwidth = data->vwidth;
 	else {
 		clState->vwidth = preferred_vwidth;
-		cgpu->vwidth = preferred_vwidth;
+		data->vwidth = preferred_vwidth;
 	}
 
 	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
 		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
 			clState->goffset = true;
 
-	if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
-		clState->wsize = cgpu->work_size;
+	if (data->work_size && data->work_size <= clState->max_work_size)
+		clState->wsize = data->work_size;
 	else if (opt_scrypt)
 		clState->wsize = 256;
 	else if (strstr(name, "Tahiti"))
 		clState->wsize = 64;
 	else
 		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-	cgpu->work_size = clState->wsize;
+	data->work_size = clState->wsize;
 
 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
-		if (!cgpu->opt_lg) {
+		if (!data->opt_lg) {
 			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
-			cgpu->lookup_gap = 2;
+			data->lookup_gap = 2;
 		} else
-			cgpu->lookup_gap = cgpu->opt_lg;
+			data->lookup_gap = data->opt_lg;
 
-		if (!cgpu->opt_tc) {
+		if (!data->opt_tc) {
 			unsigned int sixtyfours;
 
-			sixtyfours =  cgpu->max_alloc / 131072 / 64 - 1;
-			cgpu->thread_concurrency = sixtyfours * 64;
-			if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
-				cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
-				if (cgpu->thread_concurrency > cgpu->shaders * 5)
-					cgpu->thread_concurrency = cgpu->shaders * 5;
+			sixtyfours =  data->max_alloc / 131072 / 64 - 1;
+			data->thread_concurrency = sixtyfours * 64;
+			if (data->shaders && data->thread_concurrency > data->shaders) {
+				data->thread_concurrency -= data->thread_concurrency % data->shaders;
+				if (data->thread_concurrency > data->shaders * 5)
+					data->thread_concurrency = data->shaders * 5;
 			}
-			applog(LOG_DEBUG, "GPU %u: selecting thread concurrency of %lu", gpu,  (unsigned long)cgpu->thread_concurrency);
+			applog(LOG_DEBUG, "GPU %u: selecting thread concurrency of %lu", gpu,  (unsigned long)data->thread_concurrency);
 		} else
-			cgpu->thread_concurrency = cgpu->opt_tc;
+			data->thread_concurrency = data->opt_tc;
 	}
 #endif
 
@@ -728,7 +730,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		strcat(binaryfilename, "g");
 	if (opt_scrypt) {
 #ifdef USE_SCRYPT
-		sprintf(numbuf, "lg%utc%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency);
+		sprintf(numbuf, "lg%utc%u", data->lookup_gap, (unsigned int)data->thread_concurrency);
 		strcat(binaryfilename, numbuf);
 #endif
 	} else {
@@ -808,7 +810,7 @@ build:
 #ifdef USE_SCRYPT
 	if (opt_scrypt)
 		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
-			cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize);
+			data->lookup_gap, (unsigned int)data->thread_concurrency, (int)clState->wsize);
 	else
 #endif
 	{
@@ -1008,13 +1010,13 @@ built:
 
 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
-		size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
-		size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
+		size_t ipt = (1024 / data->lookup_gap + (1024 % data->lookup_gap > 0));
+		size_t bufsize = 128 * ipt * data->thread_concurrency;
 
 		/* Use the max alloc value which has been rounded to a power of
 		 * 2 greater >= required amount earlier */
-		if (bufsize > cgpu->max_alloc) {
-			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu", gpu, (unsigned long)cgpu->max_alloc);
+		if (bufsize > data->max_alloc) {
+			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu", gpu, (unsigned long)data->max_alloc);
 			applog(LOG_WARNING, "Your scrypt settings come to %lu", (unsigned long)bufsize);
 		}
 		applog(LOG_DEBUG, "Creating scrypt buffer sized %lu", (unsigned long)bufsize);