Browse Source

Merge branch '201402_opencl_cleanup' into bfgminer

Luke Dashjr 12 years ago
parent
commit
569b362821
9 changed files with 600 additions and 760 deletions
  1. 145 91
      adl.c
  2. 8 4
      api.c
  3. 314 430
      driver-opencl.c
  4. 87 15
      driver-opencl.h
  5. 1 1
      findnonce.c
  6. 2 1
      findnonce.h
  7. 6 116
      miner.c
  8. 5 72
      miner.h
  9. 32 30
      ocl.c

+ 145 - 91
adl.c

@@ -23,6 +23,7 @@
 
 #include <stdbool.h>
 
+#include "driver-opencl.h"
 #include "miner.h"
 #include "ADL/adl_sdk.h"
 #include "compat.h"
@@ -312,11 +313,12 @@ void init_adl(int nDevs)
 
 	/* Apply manually provided OpenCL to ADL mapping, if any */
 	for (i = 0; i < nDevs; i++) {
-		if (gpus[i].mapped) {
-			vadapters[gpus[i].virtual_adl].virtual_gpu = i;
-			applog(LOG_INFO, "Mapping OpenCL device %d to ADL device %d", i, gpus[i].virtual_adl);
+		struct opencl_device_data * const data = gpus[i].device_data;
+		if (data->mapped) {
+			vadapters[data->virtual_adl].virtual_gpu = i;
+			applog(LOG_INFO, "Mapping OpenCL device %d to ADL device %d", i, data->virtual_adl);
 		} else
-			gpus[i].virtual_adl = i;
+			data->virtual_adl = i;
 	}
 
 	if (!devs_match) {
@@ -353,16 +355,18 @@ void init_adl(int nDevs)
 		devices = nDevs;
 
 	for (gpu = 0; gpu < devices; gpu++) {
+		struct cgpu_info * const cgpu = &gpus[gpu];
+		struct opencl_device_data * const data = cgpu->device_data;
 		struct gpu_adl *ga;
 		int iAdapterIndex;
 		int lpAdapterID;
 		ADLODPerformanceLevels *lpOdPerformanceLevels;
 		int lev, adlGpu;
 
-		adlGpu = gpus[gpu].virtual_adl;
+		adlGpu = data->virtual_adl;
 		i = vadapters[adlGpu].id;
 		iAdapterIndex = lpInfo[i].iAdapterIndex;
-		gpus[gpu].virtual_gpu = vadapters[adlGpu].virtual_gpu;
+		data->virtual_gpu = vadapters[adlGpu].virtual_gpu;
 
 		/* Get unique identifier of the adapter, 0 means not AMD */
 		result = ADL_Adapter_ID_Get(iAdapterIndex, &lpAdapterID);
@@ -372,11 +376,11 @@ void init_adl(int nDevs)
 		}
 
 		if (gpus[gpu].deven == DEV_DISABLED) {
-			gpus[gpu].gpu_engine =
-			gpus[gpu].gpu_memclock =
-			gpus[gpu].gpu_vddc =
-			gpus[gpu].gpu_fan =
-			gpus[gpu].gpu_powertune = 0;
+			data->gpu_engine =
+			data->gpu_memclock =
+			data->gpu_vddc =
+			data->gpu_fan =
+			data->gpu_powertune = 0;
 			continue;
 		}
 
@@ -384,13 +388,13 @@ void init_adl(int nDevs)
 		if (gpus[gpu].name)
 			free((void*)gpus[gpu].name);
 		gpus[gpu].name = lpInfo[i].strAdapterName;
-		gpus[gpu].has_adl = true;
+		data->has_adl = true;
 		/* Flag adl as active if any card is successfully activated */
 		adl_active = true;
 
 		/* From here on we know this device is a discrete device and
 		 * should support ADL */
-		ga = &gpus[gpu].adl;
+		ga = &data->adl;
 		ga->gpu = gpu;
 		ga->iAdapterIndex = iAdapterIndex;
 		ga->lpAdapterID = lpAdapterID;
@@ -421,8 +425,8 @@ void init_adl(int nDevs)
 		/* Save these values as the defaults in case we wish to reset to defaults */
 		ga->DefPerfLev = lpOdPerformanceLevels;
 
-		if (gpus[gpu].gpu_engine) {
-			int setengine = gpus[gpu].gpu_engine * 100;
+		if (data->gpu_engine) {
+			int setengine = data->gpu_engine * 100;
 
 			/* Lower profiles can't have a higher setting */
 			for (j = 0; j < lev; j++) {
@@ -430,38 +434,38 @@ void init_adl(int nDevs)
 					lpOdPerformanceLevels->aLevels[j].iEngineClock = setengine;
 			}
 			lpOdPerformanceLevels->aLevels[lev].iEngineClock = setengine;
-			applog(LOG_INFO, "Setting GPU %d engine clock to %d", gpu, gpus[gpu].gpu_engine);
+			applog(LOG_INFO, "Setting GPU %d engine clock to %d", gpu, data->gpu_engine);
 			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
 			ga->maxspeed = setengine;
-			if (gpus[gpu].min_engine)
-				ga->minspeed = gpus[gpu].min_engine * 100;
+			if (data->min_engine)
+				ga->minspeed = data->min_engine * 100;
 			ga->managed = true;
-			if (gpus[gpu].gpu_memdiff)
-				set_memoryclock(gpu, gpus[gpu].gpu_engine + gpus[gpu].gpu_memdiff);
+			if (data->gpu_memdiff)
+				set_memoryclock(gpu, data->gpu_engine + data->gpu_memdiff);
 		}
 
-		if (gpus[gpu].gpu_memclock) {
-			int setmem = gpus[gpu].gpu_memclock * 100;
+		if (data->gpu_memclock) {
+			int setmem = data->gpu_memclock * 100;
 
 			for (j = 0; j < lev; j++) {
 				if (lpOdPerformanceLevels->aLevels[j].iMemoryClock > setmem)
 					lpOdPerformanceLevels->aLevels[j].iMemoryClock = setmem;
 			}
 			lpOdPerformanceLevels->aLevels[lev].iMemoryClock = setmem;
-			applog(LOG_INFO, "Setting GPU %d memory clock to %d", gpu, gpus[gpu].gpu_memclock);
+			applog(LOG_INFO, "Setting GPU %d memory clock to %d", gpu, data->gpu_memclock);
 			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
 			ga->managed = true;
 		}
 
-		if (gpus[gpu].gpu_vddc) {
-			int setv = gpus[gpu].gpu_vddc * 1000;
+		if (data->gpu_vddc) {
+			int setv = data->gpu_vddc * 1000;
 
 			for (j = 0; j < lev; j++) {
 				if (lpOdPerformanceLevels->aLevels[j].iVddc > setv)
 					lpOdPerformanceLevels->aLevels[j].iVddc = setv;
 			}
 			lpOdPerformanceLevels->aLevels[lev].iVddc = setv;
-			applog(LOG_INFO, "Setting GPU %d voltage to %.3f", gpu, gpus[gpu].gpu_vddc);
+			applog(LOG_INFO, "Setting GPU %d voltage to %.3f", gpu, data->gpu_vddc);
 			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
 			ga->managed = true;
 		}
@@ -480,17 +484,17 @@ void init_adl(int nDevs)
 		/* Save the fanspeed values as defaults in case we reset later */
 		ga->DefFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_RPM;
 		ADL_Overdrive5_FanSpeed_Get(ga->iAdapterIndex, 0, &ga->DefFanSpeedValue);
-		if (gpus[gpu].gpu_fan)
-			set_fanspeed(gpu, gpus[gpu].gpu_fan);
+		if (data->gpu_fan)
+			set_fanspeed(gpu, data->gpu_fan);
 		else
-			gpus[gpu].gpu_fan = 85; /* Set a nominal upper limit of 85% */
+			data->gpu_fan = 85; /* Set a nominal upper limit of 85% */
 
 		/* Not fatal if powercontrol get fails */
 		if (ADL_Overdrive5_PowerControl_Get(ga->iAdapterIndex, &ga->iPercentage, &dummy) != ADL_OK)
 			applog(LOG_INFO, "Failed to ADL_Overdrive5_PowerControl_get");
 
-		if (gpus[gpu].gpu_powertune) {
-			ADL_Overdrive5_PowerControl_Set(ga->iAdapterIndex, gpus[gpu].gpu_powertune);
+		if (data->gpu_powertune) {
+			ADL_Overdrive5_PowerControl_Set(ga->iAdapterIndex, data->gpu_powertune);
 			ADL_Overdrive5_PowerControl_Get(ga->iAdapterIndex, &ga->iPercentage, &dummy);
 			ga->managed = true;
 		}
@@ -508,10 +512,10 @@ void init_adl(int nDevs)
 
 			ga->autofan = true;
 			/* Clamp fanspeed values to range provided */
-			if (nominal > gpus[gpu].gpu_fan)
-				nominal = gpus[gpu].gpu_fan;
-			if (nominal < gpus[gpu].min_fan)
-				nominal = gpus[gpu].min_fan;
+			if (nominal > data->gpu_fan)
+				nominal = data->gpu_fan;
+			if (nominal < data->min_fan)
+				nominal = data->min_fan;
 			set_fanspeed(gpu, nominal);
 		}
 		if (opt_autoengine) {
@@ -523,16 +527,20 @@ void init_adl(int nDevs)
 	}
 
 	for (gpu = 0; gpu < devices; gpu++) {
-		struct gpu_adl *ga = &gpus[gpu].adl;
+		struct cgpu_info * const cgpu = &gpus[gpu];
+		struct opencl_device_data * const data = cgpu->device_data;
+		struct gpu_adl *ga = &data->adl;
 		int j;
 
 		for (j = 0; j < devices; j++) {
+			struct cgpu_info * const other_cgpu = &gpus[j];
+			struct opencl_device_data * const other_data = other_cgpu->device_data;
 			struct gpu_adl *other_ga;
 
 			if (j == gpu)
 				continue;
 
-			other_ga = &gpus[j].adl;
+			other_ga = &other_data->adl;
 
 			/* Search for twin GPUs on a single card. They will be
 			 * separated by one bus id and one will have fanspeed
@@ -558,13 +566,15 @@ static float __gpu_temp(struct gpu_adl *ga)
 
 float gpu_temp(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	float ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = __gpu_temp(ga);
 	unlock_adl();
@@ -579,13 +589,15 @@ static inline int __gpu_engineclock(struct gpu_adl *ga)
 
 int gpu_engineclock(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
 		goto out;
@@ -602,13 +614,15 @@ static inline int __gpu_memclock(struct gpu_adl *ga)
 
 int gpu_memclock(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
 		goto out;
@@ -625,13 +639,15 @@ static inline float __gpu_vddc(struct gpu_adl *ga)
 
 float gpu_vddc(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	float ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
 		goto out;
@@ -650,13 +666,15 @@ static inline int __gpu_activity(struct gpu_adl *ga)
 
 int gpu_activity(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity);
 	unlock_adl();
@@ -682,13 +700,15 @@ static inline int __gpu_fanspeed(struct gpu_adl *ga)
 
 int gpu_fanspeed(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = __gpu_fanspeed(ga);
 	unlock_adl();
@@ -710,13 +730,15 @@ static int __gpu_fanpercent(struct gpu_adl *ga)
 
 int gpu_fanpercent(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = __gpu_fanpercent(ga);
 	unlock_adl();
@@ -734,13 +756,15 @@ static inline int __gpu_powertune(struct gpu_adl *ga)
 
 int gpu_powertune(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = -1;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return ret;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ret = __gpu_powertune(ga);
 	unlock_adl();
@@ -750,12 +774,14 @@ int gpu_powertune(int gpu)
 bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
 	       int *activity, int *fanspeed, int *fanpercent, int *powertune)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return false;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	lock_adl();
 	gpus[gpu].temp =
@@ -782,13 +808,15 @@ bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vdd
 #ifdef HAVE_CURSES
 static void get_enginerange(int gpu, int *imin, int *imax)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Get enginerange not supported\n");
 		return;
 	}
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	*imin = ga->lpOdParameters.sEngineClock.iMin / 100;
 	*imax = ga->lpOdParameters.sEngineClock.iMax / 100;
 }
@@ -796,18 +824,19 @@ static void get_enginerange(int gpu, int *imin, int *imax)
 
 int set_engineclock(int gpu, int iEngineClock)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	ADLODPerformanceLevels *lpOdPerformanceLevels;
-	struct cgpu_info *cgpu;
 	int i, lev, ret = 1;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set engineclock not supported\n");
 		return ret;
 	}
 
 	iEngineClock *= 100;
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	/* Keep track of intended engine clock in case the device changes
 	 * profile and drops while idle, not taking the new engine clock */
@@ -840,9 +869,8 @@ int set_engineclock(int gpu, int iEngineClock)
 out:
 	unlock_adl();
 
-	cgpu = &gpus[gpu];
-	if (cgpu->gpu_memdiff)
-		set_memoryclock(gpu, iEngineClock / 100 + cgpu->gpu_memdiff);
+	if (data->gpu_memdiff)
+		set_memoryclock(gpu, iEngineClock / 100 + data->gpu_memdiff);
 
 	return ret;
 }
@@ -850,13 +878,15 @@ out:
 #ifdef HAVE_CURSES
 static void get_memoryrange(int gpu, int *imin, int *imax)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Get memoryrange not supported\n");
 		return;
 	}
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	*imin = ga->lpOdParameters.sMemoryClock.iMin / 100;
 	*imax = ga->lpOdParameters.sMemoryClock.iMax / 100;
 }
@@ -864,19 +894,21 @@ static void get_memoryrange(int gpu, int *imin, int *imax)
 
 int set_memoryclock(int gpu, int iMemoryClock)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	ADLODPerformanceLevels *lpOdPerformanceLevels;
 	int i, lev, ret = 1;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set memoryclock not supported\n");
 		return ret;
 	}
 
-	gpus[gpu].gpu_memclock = iMemoryClock;
+	data->gpu_memclock = iMemoryClock;
 
 	iMemoryClock *= 100;
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	lev = ga->lpOdParameters.iNumberOfPerformanceLevels - 1;
 	lpOdPerformanceLevels = alloca(sizeof(ADLODPerformanceLevels) + (lev * sizeof(ADLODPerformanceLevel)));
@@ -906,13 +938,15 @@ out:
 #ifdef HAVE_CURSES
 static void get_vddcrange(int gpu, float *imin, float *imax)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Get vddcrange not supported\n");
 		return;
 	}
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	*imin = (float)ga->lpOdParameters.sVddc.iMin / 1000;
 	*imax = (float)ga->lpOdParameters.sVddc.iMax / 1000;
 }
@@ -933,17 +967,19 @@ static float curses_float(const char *query)
 
 int set_vddc(int gpu, float fVddc)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	ADLODPerformanceLevels *lpOdPerformanceLevels;
 	int i, iVddc, lev, ret = 1;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set vddc not supported\n");
 		return ret;
 	}
 
 	iVddc = 1000 * fVddc;
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	lev = ga->lpOdParameters.iNumberOfPerformanceLevels - 1;
 	lpOdPerformanceLevels = alloca(sizeof(ADLODPerformanceLevels) + (lev * sizeof(ADLODPerformanceLevel)));
@@ -972,28 +1008,32 @@ out:
 
 static void get_fanrange(int gpu, int *imin, int *imax)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Get fanrange not supported\n");
 		return;
 	}
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	*imin = ga->lpFanSpeedInfo.iMinPercent;
 	*imax = ga->lpFanSpeedInfo.iMaxPercent;
 }
 
 int set_fanspeed(int gpu, int iFanSpeed)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int ret = 1;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set fanspeed not supported\n");
 		return ret;
 	}
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	if (!(ga->lpFanSpeedInfo.iFlags & (ADL_DL_FANCTRL_SUPPORTS_RPM_WRITE | ADL_DL_FANCTRL_SUPPORTS_PERCENT_WRITE ))) {
 		applog(LOG_DEBUG, "GPU %d doesn't support rpm or percent write", gpu);
 		return ret;
@@ -1030,15 +1070,17 @@ int set_fanspeed(int gpu, int iFanSpeed)
 #ifdef HAVE_CURSES
 static int set_powertune(int gpu, int iPercentage)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
 	int dummy, ret = 1;
 
-	if (!gpus[gpu].has_adl || !adl_active) {
+	if (!data->has_adl || !adl_active) {
 		wlogprint("Set powertune not supported\n");
 		return ret;
 	}
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 
 	lock_adl();
 	ADL_Overdrive5_PowerControl_Set(ga->iAdapterIndex, iPercentage);
@@ -1057,10 +1099,11 @@ static int set_powertune(int gpu, int iPercentage)
 static bool fan_autotune(int gpu, int temp, int fanpercent, int lasttemp, bool *fan_window)
 {
 	struct cgpu_info *cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	int tdiff = round(temp - lasttemp);
-	struct gpu_adl *ga = &cgpu->adl;
-	int top = gpus[gpu].gpu_fan;
-	int bot = gpus[gpu].min_fan;
+	struct gpu_adl *ga = &data->adl;
+	const int top = data->gpu_fan;
+	const int bot = data->min_fan;
 	int newpercent = fanpercent;
 	int iMin = 0, iMax = 100;
 
@@ -1132,7 +1175,8 @@ void gpu_autotune(int gpu, enum dev_enable *denable)
 	struct gpu_adl *ga;
 
 	cgpu = &gpus[gpu];
-	ga = &cgpu->adl;
+	struct opencl_device_data * const data = cgpu->device_data;
+	ga = &data->adl;
 
 	lock_adl();
 	ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity);
@@ -1211,11 +1255,13 @@ void gpu_autotune(int gpu, enum dev_enable *denable)
 
 void set_defaultfan(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ADL_Overdrive5_FanSpeed_Set(ga->iAdapterIndex, 0, &ga->DefFanSpeedValue);
 	unlock_adl();
@@ -1223,11 +1269,13 @@ void set_defaultfan(int gpu)
 
 void set_defaultengine(int gpu)
 {
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	struct gpu_adl *ga;
-	if (!gpus[gpu].has_adl || !adl_active)
+	if (!data->has_adl || !adl_active)
 		return;
 
-	ga = &gpus[gpu].adl;
+	ga = &data->adl;
 	lock_adl();
 	ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, ga->DefPerfLev);
 	unlock_adl();
@@ -1236,7 +1284,9 @@ void set_defaultengine(int gpu)
 #ifdef HAVE_CURSES
 void change_autosettings(int gpu)
 {
-	struct gpu_adl *ga = &gpus[gpu].adl;
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
+	struct gpu_adl *ga = &data->adl;
 	char input;
 	int val;
 
@@ -1285,7 +1335,9 @@ void change_autosettings(int gpu)
 
 void change_gpusettings(int gpu)
 {
-	struct gpu_adl *ga = &gpus[gpu].adl;
+	struct cgpu_info * const cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
+	struct gpu_adl *ga = &data->adl;
 	float fval, fmin = 0, fmax = 0;
 	int val, imin = 0, imax = 0;
 	char input;
@@ -1306,7 +1358,7 @@ updated:
 	wlogprint("Engine Clock: %d MHz\nMemory Clock: %d MHz\nVddc: %.3f V\nActivity: %d%%\nPowertune: %d%%\n",
 		engineclock, memclock, vddc, activity, powertune);
 	wlogprint("Fan autotune is %s (%d-%d)\n", ga->autofan ? "enabled" : "disabled",
-		  gpus[gpu].min_fan, gpus[gpu].gpu_fan);
+		  data->min_fan, data->gpu_fan);
 	wlogprint("GPU engine clock autotune is %s (%d-%d)\n", ga->autoengine ? "enabled" : "disabled",
 		ga->minspeed / 100, ga->maxspeed / 100);
 	wlogprint("Change [A]utomatic [E]ngine [F]an [M]emory [V]oltage [P]owertune\n");
@@ -1414,9 +1466,11 @@ void clear_adl(int nDevs)
 	lock_adl();
 	/* Try to reset values to their defaults */
 	for (i = 0; i < nDevs; i++) {
-		ga = &gpus[i].adl;
+		struct cgpu_info * const cgpu = &gpus[i];
+		struct opencl_device_data * const data = cgpu->device_data;
+		ga = &data->adl;
 		/*  Only reset the values if we've changed them at any time */
-		if (!gpus[i].has_adl || !ga->managed)
+		if (!data->has_adl || !ga->managed)
 			continue;
 		ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, ga->DefPerfLev);
 		free(ga->DefPerfLev);

+ 8 - 4
api.c

@@ -33,6 +33,7 @@
 #include "miner.h"
 #include "util.h"
 #include "driver-cpu.h" /* for algo_names[], TODO: re-factor dependency */
+#include "driver-opencl.h"
 
 #define HAVE_AN_FPGA 1
 
@@ -1338,7 +1339,8 @@ static void minerconfig(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __
 	int i;
 
 	for (i = 0; i < nDevs; i++) {
-		if (gpus[i].has_adl) {
+		struct opencl_device_data * const data = gpus[i].device_data;
+		if (data->has_adl) {
 			adlinuse = (char *)YES;
 			break;
 		}
@@ -2731,8 +2733,10 @@ static void gpuintensity(struct io_data *io_data, __maybe_unused SOCKETTYPE c, c
 	if (!splitgpuvalue(io_data, param, &id, &value, isjson))
 		return;
 
+	struct opencl_device_data * const data = gpus[id].device_data;
+	
 	if (!strncasecmp(value, DYNAMIC, 1)) {
-		gpus[id].dynamic = true;
+		data->dynamic = true;
 		strcpy(intensitystr, DYNAMIC);
 	}
 	else {
@@ -2742,8 +2746,8 @@ static void gpuintensity(struct io_data *io_data, __maybe_unused SOCKETTYPE c, c
 			return;
 		}
 
-		gpus[id].dynamic = false;
-		gpus[id].intensity = intensity;
+		data->dynamic = false;
+		data->intensity = intensity;
 		sprintf(intensitystr, "%d", intensity);
 	}
 

File diff suppressed because it is too large
+ 314 - 430
driver-opencl.c


+ 87 - 15
driver-opencl.h

@@ -3,29 +3,101 @@
 
 #include <stdbool.h>
 
+#include "CL/cl.h"
+#ifdef HAVE_SENSORS
+#include <sensors/sensors.h>
+#endif
+
 #include "miner.h"
 
+struct opencl_device_data {
+	bool mapped;
+	int virtual_gpu;
+	int virtual_adl;
+	int intensity;
+	bool dynamic;
+	
+	cl_uint vwidth;
+	size_t work_size;
+	enum cl_kernels kernel;
+	cl_ulong max_alloc;
+	
+#ifdef USE_SCRYPT
+	int opt_lg, lookup_gap;
+	size_t opt_tc, thread_concurrency;
+	size_t shaders;
+#endif
+	struct timeval tv_gpustart;
+	int intervals;
+	
+#ifdef HAVE_ADL
+	bool has_adl;
+	struct gpu_adl adl;
+	
+	int gpu_engine;
+	int min_engine;
+	int gpu_fan;
+	int min_fan;
+	int gpu_memclock;
+	int gpu_memdiff;
+	int gpu_powertune;
+	float gpu_vddc;
+#endif
+	
+#ifdef HAVE_SENSORS
+	const sensors_chip_name *sensor;
+#endif
+};
+
+struct opencl_work_data {
+	cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;
+	cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
+	cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d;
+	cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h;
+	cl_uint merkle; cl_uint ntime; cl_uint nbits;
+	cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15;
+	cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2;
+	cl_uint W16; cl_uint W17; cl_uint W2;
+	cl_uint PreVal4; cl_uint T1;
+	cl_uint C1addK5; cl_uint D1A; cl_uint W2A; cl_uint W17_2;
+	cl_uint PreVal4addT1; cl_uint T1substate0;
+	cl_uint PreVal4_2;
+	cl_uint PreVal0;
+	cl_uint PreW18;
+	cl_uint PreW19;
+	cl_uint PreW31;
+	cl_uint PreW32;
+
+	/* For diakgcn */
+	cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
+	cl_uint zeroA, zeroB;
+	cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
+#ifdef USE_SCRYPT
+	struct work *work;
+#endif
+};
 
+extern void opencl_early_init();
 extern char *print_ndevs_and_exit(int *ndevs);
 extern void *reinit_gpu(void *userdata);
 extern char *set_gpu_map(char *arg);
-extern char *set_gpu_engine(char *arg);
-extern char *set_gpu_fan(char *arg);
-extern char *set_gpu_memclock(char *arg);
-extern char *set_gpu_memdiff(char *arg);
-extern char *set_gpu_powertune(char *arg);
-extern char *set_gpu_vddc(char *arg);
-extern char *set_temp_overheat(char *arg);
-extern char *set_temp_target(char *arg);
-extern char *set_intensity(char *arg);
-extern char *set_vector(char *arg);
-extern char *set_worksize(char *arg);
+extern const char *set_gpu_engine(char *arg);
+extern const char *set_gpu_fan(char *arg);
+extern const char *set_gpu_memclock(char *arg);
+extern const char *set_gpu_memdiff(char *arg);
+extern const char *set_gpu_powertune(char *arg);
+extern const char *set_gpu_vddc(char *arg);
+extern const char *set_temp_overheat(char *arg);
+extern const char *set_intensity(char *arg);
+extern const char *set_vector(char *arg);
+extern const char *set_worksize(char *arg);
 #ifdef USE_SCRYPT
-extern char *set_shaders(char *arg);
-extern char *set_lookup_gap(char *arg);
-extern char *set_thread_concurrency(char *arg);
+extern const char *set_shaders(char *arg);
+extern const char *set_lookup_gap(char *arg);
+extern const char *set_thread_concurrency(char *arg);
 #endif
-extern char *set_kernel(char *arg);
+extern const char *set_kernel(char *arg);
+extern void write_config_opencl(FILE *);
 void manage_gpu(void);
 extern void opencl_dynamic_cleanup();
 extern void pause_dynamic_threads(int gpu);

+ 1 - 1
findnonce.c

@@ -49,7 +49,7 @@ const uint32_t SHA256_K[64] = {
 	d = d + h; \
 	h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
 
-void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
+void precalc_hash(struct opencl_work_data *blk, uint32_t *state, uint32_t *data)
 {
 	cl_uint A, B, C, D, E, F, G, H;
 

+ 2 - 1
findnonce.h

@@ -3,6 +3,7 @@
 
 #include <stdint.h>
 
+#include "driver-opencl.h"
 #include "miner.h"
 #include "config.h"
 
@@ -16,7 +17,7 @@
 #define SCRYPT_FOUND (0xFF)
 
 #ifdef HAVE_OPENCL
-extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
+extern void precalc_hash(struct opencl_work_data *blk, uint32_t *state, uint32_t *data);
 extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
 #endif /* HAVE_OPENCL */
 #endif /*__FINDNONCE_H__*/

+ 6 - 116
miner.c

@@ -74,7 +74,6 @@
 #include "deviceapi.h"
 #include "logging.h"
 #include "miner.h"
-#include "findnonce.h"
 #include "adl.h"
 #include "driver-cpu.h"
 #include "driver-opencl.h"
@@ -2702,6 +2701,7 @@ void clean_work(struct work *work)
 	free(work->job_id);
 	bytes_free(&work->nonce2);
 	free(work->nonce1);
+	free(work->device_data);
 
 	if (work->tmpl) {
 		struct pool *pool = work->pool;
@@ -6513,92 +6513,7 @@ void write_config(FILE *fcfg)
 	fputs("\n]\n", fcfg);
 
 #ifdef HAVE_OPENCL
-	if (nDevs) {
-		/* Write GPU device values */
-		fputs(",\n\"intensity\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-		{
-			if (i > 0)
-				fputc(',', fcfg);
-			if (gpus[i].dynamic)
-				fputc('d', fcfg);
-			else
-				fprintf(fcfg, "%d", gpus[i].intensity);
-		}
-		fputs("\",\n\"vectors\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				gpus[i].vwidth);
-		fputs("\",\n\"worksize\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].work_size);
-		fputs("\",\n\"kernel\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++) {
-			fprintf(fcfg, "%s", i > 0 ? "," : "");
-			switch (gpus[i].kernel) {
-				case KL_NONE: // Shouldn't happen
-					break;
-				case KL_POCLBM:
-					fprintf(fcfg, "poclbm");
-					break;
-				case KL_PHATK:
-					fprintf(fcfg, "phatk");
-					break;
-				case KL_DIAKGCN:
-					fprintf(fcfg, "diakgcn");
-					break;
-				case KL_DIABLO:
-					fprintf(fcfg, "diablo");
-					break;
-				case KL_SCRYPT:
-					fprintf(fcfg, "scrypt");
-					break;
-			}
-		}
-#ifdef USE_SCRYPT
-		fputs("\",\n\"lookup-gap\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].opt_lg);
-		fputs("\",\n\"thread-concurrency\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].opt_tc);
-		fputs("\",\n\"shaders\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].shaders);
-#endif
-#ifdef HAVE_ADL
-		fputs("\",\n\"gpu-engine\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d-%d", i > 0 ? "," : "", gpus[i].min_engine, gpus[i].gpu_engine);
-		fputs("\",\n\"gpu-fan\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d-%d", i > 0 ? "," : "", gpus[i].min_fan, gpus[i].gpu_fan);
-		fputs("\",\n\"gpu-memclock\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_memclock);
-		fputs("\",\n\"gpu-memdiff\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_memdiff);
-		fputs("\",\n\"gpu-powertune\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_powertune);
-		fputs("\",\n\"gpu-vddc\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%1.3f", i > 0 ? "," : "", gpus[i].gpu_vddc);
-		fputs("\",\n\"temp-overheat\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].adl.overtemp);
-#endif
-		fputs("\"", fcfg);
-	}
-#endif
-#ifdef HAVE_ADL
-	if (opt_reorder)
-		fprintf(fcfg, ",\n\"gpu-reorder\" : true");
+	write_config_opencl(fcfg);
 #endif
 #ifdef WANT_CPUMINE
 	fprintf(fcfg, ",\n\"algo\" : \"%s\"", algo_names[opt_algo]);
@@ -9099,7 +9014,7 @@ out:
 bool abandon_work(struct work *work, struct timeval *wdiff, uint64_t hashes)
 {
 	if (wdiff->tv_sec > opt_scantime ||
-	    work->blk.nonce >= MAXTHREADS - hashes ||
+	    work->blk.nonce >= 0xfffffffe - hashes ||
 	    hashes >= 0xfffffffe ||
 	    stale_work(work, false))
 		return true;
@@ -9977,7 +9892,6 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 			struct thr_info *thr = cgpu->thr[0];
 			enum dev_enable *denable;
 			char *dev_str = cgpu->proc_repr;
-			int gpu;
 
 			if (likely(drv_ready(cgpu)))
 			{
@@ -9987,21 +9901,10 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 					cgpu->drv->get_stats(cgpu);
 			}
 
-			gpu = cgpu->device_id;
 			denable = &cgpu->deven;
 
-#ifdef HAVE_ADL
-			if (adl_active && cgpu->has_adl)
-				gpu_autotune(gpu, denable);
-			if (opt_debug && cgpu->has_adl) {
-				int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
-				float temp = 0, vddc = 0;
-
-				if (gpu_stats(gpu, &temp, &engineclock, &memclock, &vddc, &activity, &fanspeed, &fanpercent, &powertune))
-					applog(LOG_DEBUG, "%.1f C  F: %d%%(%dRPM)  E: %dMHz  M: %dMHz  V: %.3fV  A: %d%%  P: %d%%",
-					temp, fanpercent, fanspeed, engineclock, memclock, vddc, activity, powertune);
-			}
-#endif
+			if (cgpu->drv->watchdog)
+				cgpu->drv->watchdog(cgpu, tvp_now);
 			
 			/* Thread is disabled */
 			if (*denable == DEV_DISABLED)
@@ -10074,12 +9977,6 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 				dev_error(cgpu, REASON_DEV_SICK_IDLE_60);
 				run_cmd(cmd_sick);
 				
-#ifdef HAVE_ADL
-				if (adl_active && cgpu->has_adl && gpu_activity(gpu) > 50) {
-					applog(LOG_ERR, "GPU still showing activity suggesting a hard hang.");
-					applog(LOG_ERR, "Will not attempt to auto-restart it.");
-				} else
-#endif
 				if (opt_restart && cgpu->drv->reinit_device) {
 					applog(LOG_ERR, "%s: Attempting to restart", dev_str);
 					reinit_device(cgpu);
@@ -10095,11 +9992,6 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 				   (cgpu->status == LIFE_SICK || cgpu->status == LIFE_DEAD)) {
 				/* Attempt to restart a GPU that's sick or dead once every minute */
 				cgtime(&thr->sick);
-#ifdef HAVE_ADL
-				if (adl_active && cgpu->has_adl && gpu_activity(gpu) > 50) {
-					/* Again do not attempt to restart a device that may have hard hung */
-				} else
-#endif
 				if (opt_restart)
 					reinit_device(cgpu);
 			}
@@ -11455,9 +11347,7 @@ int main(int argc, char *argv[])
 	mutex_init(&submitting_lock);
 
 #ifdef HAVE_OPENCL
-	memset(gpus, 0, sizeof(gpus));
-	for (i = 0; i < MAX_GPUDEVICES; i++)
-		gpus[i].dynamic = true;
+	opencl_early_init();
 #endif
 
 	schedstart.tm.tm_sec = 1;

+ 5 - 72
miner.h

@@ -42,10 +42,6 @@
 #include "logging.h"
 #include "util.h"
 
-#ifdef HAVE_OPENCL
-#include "CL/cl.h"
-#endif /* HAVE_OPENCL */
-
 #ifdef STDC_HEADERS
 # include <stdlib.h>
 # include <stddef.h>
@@ -304,6 +300,7 @@ struct device_drv {
 	bool (*lowl_probe)(const struct lowlevel_device_info *);
 
 	// Processor-specific functions
+	void (*watchdog)(struct cgpu_info *, const struct timeval *tv_now);
 	void (*reinit_device)(struct cgpu_info *);
 	bool (*override_statline_temp2)(char *buf, size_t bufsz, struct cgpu_info *, bool per_processor);
 	struct api_data* (*get_api_extra_device_detail)(struct cgpu_info *);
@@ -516,45 +513,12 @@ struct cgpu_info {
 	int64_t max_hashes;
 
 	const char *kname;
-#ifdef HAVE_OPENCL
-	bool mapped;
-	int virtual_gpu;
-	int virtual_adl;
-	int intensity;
-	bool dynamic;
-
-	cl_uint vwidth;
-	size_t work_size;
-	enum cl_kernels kernel;
-	cl_ulong max_alloc;
-
-#ifdef USE_SCRYPT
-	int opt_lg, lookup_gap;
-	size_t opt_tc, thread_concurrency;
-	size_t shaders;
-#endif
-	struct timeval tv_gpustart;
-	int intervals;
-#endif
 
 	float temp;
 	int cutofftemp;
 	int targettemp;
 	bool targettemp_user;
 
-#ifdef HAVE_ADL
-	bool has_adl;
-	struct gpu_adl adl;
-
-	int gpu_engine;
-	int min_engine;
-	int gpu_fan;
-	int min_fan;
-	int gpu_memclock;
-	int gpu_memdiff;
-	int gpu_powertune;
-	float gpu_vddc;
-#endif
 	double diff1;
 	double diff_accepted;
 	double diff_rejected;
@@ -1143,40 +1107,6 @@ extern double current_diff;
 extern uint64_t best_diff;
 extern time_t block_time;
 
-#ifdef HAVE_OPENCL
-typedef struct {
-	cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;
-	cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
-	cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d;
-	cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h;
-	cl_uint merkle; cl_uint ntime; cl_uint nbits; cl_uint nonce;
-	cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15;
-	cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2;
-	cl_uint W16; cl_uint W17; cl_uint W2;
-	cl_uint PreVal4; cl_uint T1;
-	cl_uint C1addK5; cl_uint D1A; cl_uint W2A; cl_uint W17_2;
-	cl_uint PreVal4addT1; cl_uint T1substate0;
-	cl_uint PreVal4_2;
-	cl_uint PreVal0;
-	cl_uint PreW18;
-	cl_uint PreW19;
-	cl_uint PreW31;
-	cl_uint PreW32;
-
-	/* For diakgcn */
-	cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
-	cl_uint zeroA, zeroB;
-	cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
-#ifdef USE_SCRYPT
-	struct work *work;
-#endif
-} dev_blk_ctx;
-#else
-typedef struct {
-	uint32_t nonce;
-} dev_blk_ctx;
-#endif
-
 struct curl_ent {
 	CURL *curl;
 	struct curl_ent *next;
@@ -1349,7 +1279,9 @@ struct work {
 	int		rolls;
 	int		drv_rolllimit; /* How much the driver can roll ntime */
 
-	dev_blk_ctx	blk;
+	struct {
+		uint32_t nonce;
+	} blk;
 
 	struct thr_info	*thr;
 	int		thr_id;
@@ -1374,6 +1306,7 @@ struct work {
 	unsigned char	work_restart_id;
 	int		id;
 	int		device_id;
+	void *device_data;
 	UT_hash_handle hh;
 	
 	double		work_difficulty;

+ 32 - 30
ocl.c

@@ -28,6 +28,7 @@
 #define OMIT_OPENCL_API
 
 #include "deviceapi.h"
+#include "driver-opencl.h"
 #include "findnonce.h"
 #include "logging.h"
 #include "ocl.h"
@@ -394,6 +395,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	bool patchbfi = false, prog_built = false;
 	bool usebinary = opt_opencl_binaries, ismesa = false;
 	struct cgpu_info *cgpu = &gpus[gpu];
+	struct opencl_device_data * const data = cgpu->device_data;
 	cl_platform_id platform = NULL;
 	char pbuff[256], vbuff[255];
 	char *s;
@@ -550,12 +552,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	}
 	applog(LOG_DEBUG, "Max work group size reported %"PRId64, (int64_t)clState->max_work_size);
 
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&data->max_alloc, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
 		return NULL;
 	}
-	applog(LOG_DEBUG, "Max mem alloc size is %lu", (unsigned long)cgpu->max_alloc);
+	applog(LOG_DEBUG, "Max mem alloc size is %lu", (unsigned long)data->max_alloc);
 	
 	if (strstr(vbuff, "MESA"))
 	{
@@ -576,7 +578,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	char filename[255];
 	char numbuf[32];
 
-	if (cgpu->kernel == KL_NONE) {
+	if (data->kernel == KL_NONE) {
 		if (opt_scrypt) {
 			applog(LOG_INFO, "Selecting scrypt kernel");
 			clState->chosen_kernel = KL_SCRYPT;
@@ -605,9 +607,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			applog(LOG_INFO, "Selecting phatk kernel");
 			clState->chosen_kernel = KL_PHATK;
 		}
-		cgpu->kernel = clState->chosen_kernel;
+		data->kernel = clState->chosen_kernel;
 	} else {
-		clState->chosen_kernel = cgpu->kernel;
+		clState->chosen_kernel = data->kernel;
 		if (clState->chosen_kernel == KL_PHATK &&
 		    (strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
 		     strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
@@ -645,7 +647,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			strcpy(filename, SCRYPT_KERNNAME".cl");
 			strcpy(binaryfilename, SCRYPT_KERNNAME);
 			/* Scrypt only supports vector 1 */
-			cgpu->vwidth = 1;
+			data->vwidth = 1;
 			break;
 		case KL_NONE: /* Shouldn't happen */
 		case KL_DIABLO:
@@ -654,48 +656,48 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			break;
 	}
 
-	if (cgpu->vwidth)
-		clState->vwidth = cgpu->vwidth;
+	if (data->vwidth)
+		clState->vwidth = data->vwidth;
 	else {
 		clState->vwidth = preferred_vwidth;
-		cgpu->vwidth = preferred_vwidth;
+		data->vwidth = preferred_vwidth;
 	}
 
 	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
 		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
 			clState->goffset = true;
 
-	if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
-		clState->wsize = cgpu->work_size;
+	if (data->work_size && data->work_size <= clState->max_work_size)
+		clState->wsize = data->work_size;
 	else if (opt_scrypt)
 		clState->wsize = 256;
 	else if (strstr(name, "Tahiti"))
 		clState->wsize = 64;
 	else
 		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-	cgpu->work_size = clState->wsize;
+	data->work_size = clState->wsize;
 
 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
-		if (!cgpu->opt_lg) {
+		if (!data->opt_lg) {
 			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
-			cgpu->lookup_gap = 2;
+			data->lookup_gap = 2;
 		} else
-			cgpu->lookup_gap = cgpu->opt_lg;
+			data->lookup_gap = data->opt_lg;
 
-		if (!cgpu->opt_tc) {
+		if (!data->opt_tc) {
 			unsigned int sixtyfours;
 
-			sixtyfours =  cgpu->max_alloc / 131072 / 64 - 1;
-			cgpu->thread_concurrency = sixtyfours * 64;
-			if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
-				cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
-				if (cgpu->thread_concurrency > cgpu->shaders * 5)
-					cgpu->thread_concurrency = cgpu->shaders * 5;
+			sixtyfours =  data->max_alloc / 131072 / 64 - 1;
+			data->thread_concurrency = sixtyfours * 64;
+			if (data->shaders && data->thread_concurrency > data->shaders) {
+				data->thread_concurrency -= data->thread_concurrency % data->shaders;
+				if (data->thread_concurrency > data->shaders * 5)
+					data->thread_concurrency = data->shaders * 5;
 			}
-			applog(LOG_DEBUG, "GPU %u: selecting thread concurrency of %lu", gpu,  (unsigned long)cgpu->thread_concurrency);
+			applog(LOG_DEBUG, "GPU %u: selecting thread concurrency of %lu", gpu,  (unsigned long)data->thread_concurrency);
 		} else
-			cgpu->thread_concurrency = cgpu->opt_tc;
+			data->thread_concurrency = data->opt_tc;
 	}
 #endif
 
@@ -728,7 +730,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		strcat(binaryfilename, "g");
 	if (opt_scrypt) {
 #ifdef USE_SCRYPT
-		sprintf(numbuf, "lg%utc%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency);
+		sprintf(numbuf, "lg%utc%u", data->lookup_gap, (unsigned int)data->thread_concurrency);
 		strcat(binaryfilename, numbuf);
 #endif
 	} else {
@@ -808,7 +810,7 @@ build:
 #ifdef USE_SCRYPT
 	if (opt_scrypt)
 		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
-			cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize);
+			data->lookup_gap, (unsigned int)data->thread_concurrency, (int)clState->wsize);
 	else
 #endif
 	{
@@ -1008,13 +1010,13 @@ built:
 
 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
-		size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
-		size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
+		size_t ipt = (1024 / data->lookup_gap + (1024 % data->lookup_gap > 0));
+		size_t bufsize = 128 * ipt * data->thread_concurrency;
 
 		/* Use the max alloc value which has been rounded to a power of
 		 * 2 greater >= required amount earlier */
-		if (bufsize > cgpu->max_alloc) {
-			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu", gpu, (unsigned long)cgpu->max_alloc);
+		if (bufsize > data->max_alloc) {
+			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu", gpu, (unsigned long)data->max_alloc);
 			applog(LOG_WARNING, "Your scrypt settings come to %lu", (unsigned long)bufsize);
 		}
 		applog(LOG_DEBUG, "Creating scrypt buffer sized %lu", (unsigned long)bufsize);

Some files were not shown because too many files changed in this diff