Browse Source

Merge branch 'multialgo' into bfgminer

Luke Dashjr 11 years ago
parent
commit
ede1b33c81
27 changed files with 1358 additions and 559 deletions
  1. 17 6
      api.c
  2. 22 0
      deviceapi.c
  3. 3 0
      deviceapi.h
  4. 7 1
      driver-cointerra.c
  5. 15 19
      driver-cpu.c
  6. 62 34
      driver-dualminer.c
  7. 5 2
      driver-getwork.c
  8. 1 16
      driver-gridseed.c
  9. 7 0
      driver-icarus.h
  10. 229 114
      driver-opencl.c
  11. 9 5
      driver-opencl.h
  12. 13 4
      driver-proxy.c
  13. 136 24
      driver-stratum.c
  14. 8 5
      driver-titan.c
  15. 1 2
      driver-zeusminer.c
  16. 12 3
      findnonce.c
  17. 6 0
      findnonce.h
  18. 3 3
      gc3355.c
  19. 1 1
      gc3355.h
  20. 471 110
      miner.c
  21. 53 33
      miner.h
  22. 148 152
      ocl.c
  23. 29 8
      ocl.h
  24. 3 3
      scrypt.c
  25. 1 1
      scrypt.h
  26. 93 12
      util.c
  27. 3 1
      util.h

+ 17 - 6
api.c

@@ -411,7 +411,7 @@ struct CODES {
  { SEVERITY_ERR,   MSG_MISVAL,	PARAM_NONE,	"Missing comma after GPU number" },
  { SEVERITY_ERR,   MSG_NOADL,	PARAM_NONE,	"ADL is not available" },
  { SEVERITY_ERR,   MSG_NOGPUADL,PARAM_GPU,	"GPU %d does not have ADL" },
- { SEVERITY_ERR,   MSG_INVINT,	PARAM_STR,	"Invalid intensity (%s) - must be '" _DYNAMIC  "' or range " MIN_SHA_INTENSITY_STR " - " MAX_SCRYPT_INTENSITY_STR },
+ { SEVERITY_ERR,   MSG_INVINT,	PARAM_STR,	"Invalid intensity (%s) - must be '" _DYNAMIC  "' or range -10 - 31" },
  { SEVERITY_INFO,  MSG_GPUINT,	PARAM_BOTH,	"GPU %d set new intensity to %s" },
  { SEVERITY_SUCC,  MSG_MINECONFIG,PARAM_NONE,	"BFGMiner config" },
 #ifdef HAVE_OPENCL
@@ -2682,7 +2682,11 @@ static void gpuintensity(struct io_data *io_data, __maybe_unused SOCKETTYPE c, c
 		if (data->dynamic)
 			strcpy(intensitystr, DYNAMIC);
 		else
-			snprintf(intensitystr, sizeof(intensitystr), "%g", oclthreads_to_intensity(data->oclthreads, !opt_scrypt));
+		{
+			const char *iunit;
+			float intensity = opencl_proc_get_intensity(cgpu, &iunit);
+			snprintf(intensitystr, sizeof(intensitystr), "%s%g", iunit, intensity);
+		}
 	}
 	else
 	{
@@ -3086,12 +3090,19 @@ static void minecoin(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __may
 			io_add(io_data, buf);
 		}
 		
+		switch (goal->malgo->algo)
+		{
 #ifdef USE_SCRYPT
-		if (opt_scrypt)
-			root = api_add_const(root, "Hash Method", SCRYPTSTR, false);
-		else
+			case POW_SCRYPT:
+				root = api_add_const(root, "Hash Method", SCRYPTSTR, false);
+				break;
 #endif
-			root = api_add_const(root, "Hash Method", SHA256STR, false);
+			case POW_SHA256D:
+				root = api_add_const(root, "Hash Method", SHA256STR, false);
+				break;
+			default:
+				break;
+		}
 
 		cg_rlock(&ch_lock);
 		struct blockchain_info * const blkchain = goal->blkchain;

+ 22 - 0
deviceapi.c

@@ -86,6 +86,28 @@ void bfg_devapi_init()
 }
 
 
+float common_sha256d_and_scrypt_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	switch (malgo->algo)
+	{
+#ifdef USE_SCRYPT
+		case POW_SCRYPT:
+			return 1./0x10000;
+#endif
+		case POW_SHA256D:
+			return 1.;
+		default:
+			return -1.;
+	}
+}
+
+#ifdef USE_SCRYPT
+float common_scrypt_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return (malgo->algo == POW_SCRYPT) ? (1./0x10000) : -1.;
+}
+#endif
+
 bool hashes_done(struct thr_info *thr, int64_t hashes, struct timeval *tvp_hashes, uint32_t *max_nonce)
 {
 	struct cgpu_info *cgpu = thr->cgpu;

+ 3 - 0
deviceapi.h

@@ -42,6 +42,9 @@ extern void _bfg_register_driver(const struct device_drv *);
 
 extern bool bfg_need_detect_rescan;
 
+extern float common_sha256d_and_scrypt_min_nonce_diff(struct cgpu_info *, const struct mining_algorithm *);
+extern float common_scrypt_min_nonce_diff(struct cgpu_info *, const struct mining_algorithm *);
+
 extern void request_work(struct thr_info *);
 extern struct work *get_work(struct thr_info *);
 extern bool hashes_done(struct thr_info *, int64_t hashes, struct timeval *tvp_hashes, uint32_t *max_nonce);

+ 7 - 1
driver-cointerra.c

@@ -272,6 +272,12 @@ bool cointerra_wait_for_info(struct cointerra_info * const ctainfo, struct lowl_
 	return true;
 }
 
+static
+float cointerra_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return (malgo->algo == POW_SHA256D) ? CTA_INIT_DIFF : -1.;
+}
+
 static
 bool cointerra_lowl_probe(const struct lowlevel_device_info * const info)
 {
@@ -316,7 +322,6 @@ bool cointerra_lowl_probe(const struct lowlevel_device_info * const info)
 		.dev_product = maybe_strdup(info->product),
 		.dev_serial = maybe_strdup(info->serial),
 		.deven = DEV_ENABLED,
-		.min_nonce_diff = CTA_INIT_DIFF,
 	};
 	const bool rv = add_cgpu(dev);
 	applog(LOG_INFO, "%s: Successfully set up %s",
@@ -1357,6 +1362,7 @@ static const struct bfg_set_device_definition cointerra_set_device_funcs[] = {
 struct device_drv cointerra_drv = {
 	.dname = "cointerra",
 	.name = "CTA",
+	.drv_min_nonce_diff = cointerra_min_nonce_diff,
 	.lowl_match = cointerra_lowl_match,
 	.lowl_probe = cointerra_lowl_probe,
 	.thread_init = cta_prepare,

+ 15 - 19
driver-cpu.c

@@ -665,14 +665,10 @@ static enum sha256_algos pick_fastest_algo()
 	return best_algo;
 }
 
-/* FIXME: Use asprintf for better errors. */
 char *set_algo(const char *arg, enum sha256_algos *algo)
 {
 	enum sha256_algos i;
 
-	if (opt_scrypt)
-		return "Can only use scrypt algorithm";
-
 	for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
 		if (algo_names[i] && !strcmp(arg, algo_names[i])) {
 			*algo = i;
@@ -682,13 +678,6 @@ char *set_algo(const char *arg, enum sha256_algos *algo)
 	return "Unknown algorithm";
 }
 
-#ifdef WANT_SCRYPT
-void set_scrypt_algo(enum sha256_algos *algo)
-{
-	*algo = ALGO_SCRYPT;
-}
-#endif
-
 void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo)
 {
 	strncpy(buf, algo_names[*algo], OPT_SHOW_LEN);
@@ -805,9 +794,6 @@ static bool cpu_thread_init(struct thr_info *thr)
 
 	cgpu->kname = algo_names[opt_algo];
 	
-	if (opt_algo == ALGO_SCRYPT)
-		cgpu->min_nonce_diff = 1./0x10000;
-	
 	/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
 	 * and if that fails, then SCHED_BATCH. No need for this to be an
 	 * error if it fails */
@@ -834,7 +820,20 @@ CPUSearch:
 
 	/* scan nonces for a proof-of-work hash */
 	{
-		sha256_func func = sha256_funcs[opt_algo];
+		sha256_func func = NULL;
+		switch (work_mining_algorithm(work)->algo)
+		{
+#ifdef USE_SCRYPT
+			case POW_SCRYPT:
+				func = scanhash_scrypt;
+				break;
+#endif
+			case POW_SHA256D:
+				func = sha256_funcs[opt_algo];
+				break;
+		}
+		if (unlikely(!func))
+			applogr(0, LOG_ERR, "%"PRIpreprv": Unknown mining algorithm", thr->cgpu->proc_repr);
 		rc = (*func)(
 			thr,
 			work->midstate,
@@ -867,7 +866,7 @@ struct device_drv cpu_drv = {
 	.dname = "cpu",
 	.name = "CPU",
 	.probe_priority = 120,
-	.supported_algos = POW_SHA256D | POW_SCRYPT,
+	.drv_min_nonce_diff = common_sha256d_and_scrypt_min_nonce_diff,
 	.drv_detect = cpu_detect,
 	.thread_prepare = cpu_thread_prepare,
 	.can_limit_work = cpu_can_limit_work,
@@ -875,6 +874,3 @@ struct device_drv cpu_drv = {
 	.scanhash = cpu_scanhash,
 };
 #endif
-
-
-

+ 62 - 34
driver-dualminer.c

@@ -33,11 +33,6 @@
   #include <io.h>
 #endif
 
-// mining both Scrypt & SHA2 at the same time with two processes
-// SHA2 process must be run first, no arg requirements, first serial port will be used
-// Scrypt process must be launched after, --scrypt and --dual-mode args required
-bool opt_dual_mode = false;
-
 #define DUALMINER_IO_SPEED 115200
 
 #define DUALMINER_SCRYPT_SM_HASH_TIME   0.00001428571429
@@ -85,6 +80,16 @@ const struct bfg_set_device_definition dualminer_set_device_funcs[];
 
 // device helper functions
 
+static inline
+bool dualminer_is_scrypt(struct ICARUS_INFO * const info)
+{
+#ifdef USE_SCRYPT
+	return info->scrypt;
+#else
+	return false;
+#endif
+}
+
 static
 void dualminer_teardown_device(int fd)
 {
@@ -102,37 +107,27 @@ void dualminer_init_hashrate(struct cgpu_info * const cgpu)
 
 	// get clear to send (CTS) status
 	if ((gc3355_get_cts_status(fd) != 1) &&  // 0.9v - dip-switch set to B
-		(opt_scrypt))
+		(dualminer_is_scrypt(info)))
 		// adjust hash-rate for voltage
 		info->Hs = DUALMINER_SCRYPT_DM_HASH_TIME;
 }
 
-static
-bool dualminer_init(struct thr_info * const thr)
-{
-	struct cgpu_info * const cgpu = thr->cgpu;
-	
-	if (opt_scrypt)
-		cgpu->min_nonce_diff = 1./0x10000;
-	
-	return icarus_init(thr);
-}
-
 // runs when job starts and the device has been reset (or first run)
 static
 void dualminer_init_firstrun(struct cgpu_info *icarus)
 {
+	struct ICARUS_INFO * const info = icarus->device_data;
 	int fd = icarus->device_fd;
 
-	gc3355_init_dualminer(fd, opt_pll_freq, !opt_dual_mode, false);
+	gc3355_init_dualminer(fd, opt_pll_freq, !info->dual_mode, false, dualminer_is_scrypt(info));
 	
 	dualminer_init_hashrate(icarus);
 
 	applog(LOG_DEBUG, "%"PRIpreprv": dualminer: Init: pll=%d, scrypt: %d, scrypt only: %d",
 		   icarus->proc_repr,
 		   opt_pll_freq,
-		   opt_scrypt,
-		   opt_scrypt && !opt_dual_mode);
+		   dualminer_is_scrypt(info),
+		   dualminer_is_scrypt(info) && !info->dual_mode);
 }
 
 // set defaults for options that the user didn't specify
@@ -160,15 +155,31 @@ void dualminer_set_defaults(int fd)
 	}
 }
 
+float dualminer_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	struct ICARUS_INFO * const info = proc ? proc->device_data : NULL;
+	switch (malgo->algo)
+	{
+#ifdef USE_SCRYPT
+		case POW_SCRYPT:
+			return ((!info) || dualminer_is_scrypt(info)) ? (1./0x10000) : -1.;
+#endif
+		case POW_SHA256D:
+			return (info && dualminer_is_scrypt(info)) ? -1. : 1.;
+		default:
+			return -1.;
+	}
+}
+
 // ICARUS_INFO functions - icarus-common.h
 
 // runs after fd is opened but before the device detection code
 static
-bool dualminer_detect_init(const char *devpath, int fd, struct ICARUS_INFO * __maybe_unused info)
+bool dualminer_detect_init(const char *devpath, int fd, struct ICARUS_INFO * const info)
 {
 	dualminer_set_defaults(fd);
 	
-	gc3355_init_dualminer(fd, opt_pll_freq, !opt_dual_mode, true);
+	gc3355_init_dualminer(fd, opt_pll_freq, !info->dual_mode, true, dualminer_is_scrypt(info));
 
 	return true;
 }
@@ -178,6 +189,7 @@ static
 bool dualminer_job_start(struct thr_info * const thr)
 {
 	struct cgpu_info *icarus = thr->cgpu;
+	struct ICARUS_INFO * const info = icarus->device_data;
 	struct icarus_state * const state = thr->cgpu_data;
 	int fd = icarus->device_fd;
 
@@ -185,9 +197,9 @@ bool dualminer_job_start(struct thr_info * const thr)
 		// runs when job starts and the device has been reset (or first run)
 		dualminer_init_firstrun(icarus);
 
-	if (opt_scrypt)
+	if (dualminer_is_scrypt(info))
 	{
-		if (opt_dual_mode)
+		if (info->dual_mode)
 			gc3355_scrypt_reset(fd);
 		else
 			gc3355_scrypt_only_reset(fd);
@@ -221,10 +233,15 @@ bool dualminer_detect_one(const char *devpath)
 		.nonce_littleendian = true,
 		.work_division = 1,
 		.detect_init_func = dualminer_detect_init,
-		.job_start_func = dualminer_job_start
+		.job_start_func = dualminer_job_start,
+#ifdef USE_SCRYPT
+		.scrypt = (get_mining_goal("default")->malgo->algo == POW_SCRYPT),
+#endif
 	};
 
-	if (opt_scrypt)
+	drv_set_defaults(drv, dualminer_set_device_funcs, info, devpath, detectone_meta_info.serial, 1);
+
+	if (dualminer_is_scrypt(info))
 	{
 		info->golden_ob = (char*)scrypt_golden_ob;
 		info->golden_nonce = (char*)scrypt_golden_nonce;
@@ -237,15 +254,13 @@ bool dualminer_detect_one(const char *devpath)
 		info->Hs = DUALMINER_SHA2_DM_HASH_TIME;
 	}
 
-	drv_set_defaults(drv, dualminer_set_device_funcs, info, devpath, detectone_meta_info.serial, 1);
-
 	if (!icarus_detect_custom(devpath, drv, info))
 	{
 		free(info);
 		return false;
 	}
 
-	if (opt_scrypt)
+	if (dualminer_is_scrypt(info))
 		info->read_count = DUALMINER_SCRYPT_READ_COUNT; // 4.8s to read
 	else
 		info->read_count = DUALMINER_SHA2_READ_COUNT; // 1.6s to read
@@ -259,14 +274,28 @@ bool dualminer_detect_one(const char *devpath)
 static
 const char *dualminer_set_dual_mode(struct cgpu_info * const proc, const char * const option, const char * const setting, char * const replybuf, enum bfg_set_device_replytype * const success)
 {
+	struct ICARUS_INFO * const info = proc->device_data;
 	int val = atoi(setting);
-	opt_dual_mode = val == 1;
+	info->dual_mode = val == 1;
+	return NULL;
+}
+
+#ifdef USE_SCRYPT
+static
+const char *dualminer_set_scrypt(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const out_success)
+{
+	struct ICARUS_INFO * const info = proc->device_data;
+	info->scrypt = atoi(newvalue);
 	return NULL;
 }
+#endif
 
 static
 const struct bfg_set_device_definition dualminer_set_device_funcs[] = {
-	{"dual_mode", dualminer_set_dual_mode, "set to 1 to enable dual algorithm mining with two BFGMiner processes"},
+	{"dual_mode", dualminer_set_dual_mode, "set to 1 to enable dual algorithm mining"},
+#ifdef USE_SCRYPT
+	{"scrypt", dualminer_set_scrypt, "set to 1 to put in scrypt mode"},
+#endif
 	{NULL},
 };
 
@@ -298,7 +327,7 @@ bool dualminer_job_prepare(struct thr_info *thr, struct work *work, __maybe_unus
 
 	memset(state->ob_bin, 0, info->ob_size);
 
-	if (opt_scrypt)
+	if (dualminer_is_scrypt(info))
 		gc3355_scrypt_prepare_work(state->ob_bin, work);
 	else
 		gc3355_sha2_prepare_work(state->ob_bin, work);
@@ -329,9 +358,8 @@ void dualminer_drv_init()
 	dualminer_drv = icarus_drv;
 	dualminer_drv.dname = "dualminer";
 	dualminer_drv.name = "DMU";
-	dualminer_drv.supported_algos = POW_SCRYPT | POW_SHA256D;
+	dualminer_drv.drv_min_nonce_diff = dualminer_min_nonce_diff;
 	dualminer_drv.lowl_probe = dualminer_lowl_probe;
-	dualminer_drv.thread_init = dualminer_init;
 	dualminer_drv.thread_shutdown = dualminer_thread_shutdown;
 	dualminer_drv.job_prepare = dualminer_job_prepare;
 	dualminer_drv.set_device = dualminer_set_device;

+ 5 - 2
driver-getwork.c

@@ -185,7 +185,8 @@ int handle_getwork(struct MHD_Connection *conn, bytes_t *upbuf)
 		size_t replysz = 590 + idstr_sz;
 		
 		work = get_work(thr);
-		work->nonce_diff = client->desired_share_pdiff;
+		const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+		work->nonce_diff = client->desired_share_pdiff ?: malgo->reasonable_low_nonce_diff;
 		if (work->nonce_diff > work->work_difficulty)
 			work->nonce_diff = work->work_difficulty;
 		
@@ -201,13 +202,15 @@ int handle_getwork(struct MHD_Connection *conn, bytes_t *upbuf)
 		memcpy(&reply[442], "\",\"hash1\":\"00000000000000000000000000000000000000000000000000000000000000000000008000000000000000000000000000000000000000000000000000010000\"},\"id\":", 147);
 		memcpy(&reply[589], idstr ?: "0", idstr_sz);
 		memcpy(&reply[589 + idstr_sz], "}", 1);
-		if (opt_scrypt)
+#ifdef USE_SCRYPT
+		if (malgo->algo == POW_SCRYPT)
 		{
 			replysz += 21;
 			reply = realloc(reply, replysz);
 			memmove(&reply[443 + 21], &reply[443], replysz - (443 + 21));
 			memcpy(&reply[443], ",\"algorithm\":\"scrypt\"", 21);
 		}
+#endif
 		
 		timer_set_now(&work->tv_work_start);
 		HASH_ADD_KEYPTR(hh, client->work, work->data, 76, work);

+ 1 - 16
driver-gridseed.c

@@ -215,24 +215,12 @@ bool gridseed_lowl_probe(const struct lowlevel_device_info * const info)
  * setup & shutdown
  */
 
-static
-bool gridseed_thread_prepare(struct thr_info *thr)
-{
-	thr->cgpu_data = calloc(1, sizeof(*thr->cgpu_data));
-	
-	struct cgpu_info *device = thr->cgpu;
-	device->min_nonce_diff = 1./0x10000;
-
-	return true;
-}
-
 static
 void gridseed_thread_shutdown(struct thr_info *thr)
 {
 	struct cgpu_info *device = thr->cgpu;
 
 	gc3355_close(device->device_fd);
-	free(thr->cgpu_data);
 }
 
 /*
@@ -446,14 +434,11 @@ struct device_drv gridseed_drv =
 	// metadata
 	.dname = "gridseed",
 	.name = "GSD",
-	.supported_algos = POW_SCRYPT,
+	.drv_min_nonce_diff = common_scrypt_min_nonce_diff,
 	
 	// detect device
 	.lowl_probe = gridseed_lowl_probe,
 	
-	// initialize device
-	.thread_prepare = gridseed_thread_prepare,
-	
 	// specify mining type - scanhash
 	.minerloop = minerloop_scanhash,
 	

+ 7 - 0
driver-icarus.h

@@ -134,6 +134,13 @@ struct ICARUS_INFO {
 	bool (*detect_init_func)(const char *devpath, int fd, struct ICARUS_INFO *);
 	bool (*job_start_func)(struct thr_info *);
 	
+#ifdef USE_DUALMINER
+#ifdef USE_SCRYPT
+	bool scrypt;
+#endif
+	bool dual_mode;
+#endif
+	
 #ifdef USE_ZEUSMINER
 	// Hardware information, doesn't affect anything directly
 	uint16_t freq;

+ 229 - 114
driver-opencl.c

@@ -260,8 +260,6 @@ load_opencl_symbols() {
 #endif
 
 
-typedef cl_int (*queue_kernel_parameters_func_t)(_clState *, struct work *, cl_uint);
-
 struct opencl_kernel_interface {
 	const char *kiname;
 	queue_kernel_parameters_func_t queue_kernel_parameters_func;
@@ -308,6 +306,10 @@ void opencl_early_init()
 		struct opencl_device_data * const data = &dataarray[i];
 		*data = (struct opencl_device_data){
 			.dynamic = true,
+			.intensity = intensity_not_set,
+#ifdef USE_SCRYPT
+			.lookup_gap = 2,
+#endif
 		};
 		gpus[i] = (struct cgpu_info){
 			.device_data = data,
@@ -375,8 +377,8 @@ _SET_INT_LIST(worksize, (v >= 1 && v <= 9999)       , work_size)
 
 #ifdef USE_SCRYPT
 _SET_INT_LIST(shaders           , true, shaders)
-_SET_INT_LIST(lookup_gap        , true, opt_lg )
-_SET_INT_LIST(thread_concurrency, true, opt_tc )
+_SET_INT_LIST(lookup_gap        , true, lookup_gap)
+_SET_INT_LIST(thread_concurrency, true, thread_concurrency)
 #endif
 
 enum cl_kernels select_kernel(const char * const arg)
@@ -405,20 +407,26 @@ const char *opencl_get_kernel_interface_name(const enum cl_kernels kern)
 static
 bool _set_kernel(struct cgpu_info * const cgpu, const char *_val)
 {
-	FILE *F;
 	struct opencl_device_data * const data = cgpu->device_data;
 	
 	size_t knamelen = strlen(_val);
 	char filename[knamelen + 3 + 1];
 	sprintf(filename, "%s.cl", _val);
 	
-	F = opencl_open_kernel(filename);
-	if (!F)
+	int dummy_srclen;
+	enum cl_kernels interface;
+	char *src = opencl_kernel_source(filename, &dummy_srclen, &interface);
+	if (!src)
 		return false;
-	fclose(F);
+	free(src);
 	
-	free(data->kernel_file);
-	data->kernel_file = strdup(_val);
+	char **kfp = &data->kernel_file_sha256d;
+#ifdef USE_SCRYPT
+	if (interface == KL_SCRYPT)
+		kfp = &data->kernel_file_scrypt;
+#endif
+	free(*kfp);
+	*kfp = strdup(_val);
 	
 	return true;
 }
@@ -655,22 +663,6 @@ _SET_INT_LIST(temp_overheat, (v >=     0 && v <   200), adl.overtemp )
 #endif
 
 #ifdef HAVE_OPENCL
-// SHA256d "intensity" has an artificial offset of -15
-double oclthreads_to_intensity(const unsigned long oclthreads, const bool is_sha256d)
-{
-	double intensity = log2(oclthreads);
-	if (is_sha256d)
-		intensity -= 15.;
-	return intensity;
-}
-
-unsigned long intensity_to_oclthreads(double intensity, const bool is_sha256d)
-{
-	if (is_sha256d)
-		intensity += 15;
-	return pow(2, intensity);
-}
-
 double oclthreads_to_xintensity(const unsigned long oclthreads, const cl_uint max_compute_units)
 {
 	return (double)oclthreads / (double)max_compute_units / 64.;
@@ -681,10 +673,33 @@ unsigned long xintensity_to_oclthreads(const double xintensity, const cl_uint ma
 	return xintensity * max_compute_units * 0x40;
 }
 
+static int min_intensity, max_intensity;
+
+// NOTE: This can't be attribute-constructor because then it would race with the mining_algorithms list being populated
+static
+void opencl_calc_intensity_range()
+{
+	RUNONCE();
+	
+	min_intensity = INT_MAX;
+	max_intensity = INT_MIN;
+	struct mining_algorithm *malgo;
+	LL_FOREACH(mining_algorithms, malgo)
+	{
+		const int malgo_min_intensity = malgo->opencl_oclthreads_to_intensity(malgo->opencl_min_oclthreads);
+		const int malgo_max_intensity = malgo->opencl_oclthreads_to_intensity(malgo->opencl_max_oclthreads);
+		if (malgo_min_intensity < min_intensity)
+			min_intensity = malgo_min_intensity;
+		if (malgo_max_intensity > max_intensity)
+			max_intensity = malgo_max_intensity;
+	}
+}
+
 bool opencl_set_intensity_from_str(struct cgpu_info * const cgpu, const char *_val)
 {
 	struct opencl_device_data * const data = cgpu->device_data;
 	unsigned long oclthreads = 0;
+	float intensity = intensity_not_set;
 	bool dynamic = false;
 	
 	if (!strncasecmp(_val, "d", 1))
@@ -713,9 +728,11 @@ bool opencl_set_intensity_from_str(struct cgpu_info * const cgpu, const char *_v
 	if (isdigit(_val[0]))
 	{
 		const double v = atof(_val);
-		if (v < MIN_INTENSITY || v > MAX_GPU_INTENSITY)
+		opencl_calc_intensity_range();
+		if (v < min_intensity || v > max_intensity)
 			return false;
-		oclthreads = intensity_to_oclthreads(v, !opt_scrypt);
+		oclthreads = 1;
+		intensity = v;
 	}
 	
 	// Make actual assignments after we know the values are valid
@@ -723,6 +740,7 @@ bool opencl_set_intensity_from_str(struct cgpu_info * const cgpu, const char *_v
 	if (data->oclthreads)
 	{
 		data->oclthreads = oclthreads;
+		data->intensity = intensity;
 		pause_dynamic_threads(cgpu->device_id);
 	}
 	else
@@ -804,14 +822,29 @@ struct device_drv opencl_api;
 
 #endif /* HAVE_OPENCL */
 
+float opencl_proc_get_intensity(struct cgpu_info * const proc, const char ** const iunit)
+{
+	struct opencl_device_data * const data = proc->device_data;
+	struct thr_info *thr = proc->thr[0];
+	const int thr_id = thr->id;
+	_clState * const clState = clStates[thr_id];
+	float intensity = data->intensity;
+	if (intensity == intensity_not_set)
+	{
+		intensity = oclthreads_to_xintensity(data->oclthreads, clState->max_compute_units);
+		*iunit = data->dynamic ? "dx" : "x";
+	}
+	else
+		*iunit = data->dynamic ? "d" : "";
+	return intensity;
+}
+
 #if defined(HAVE_OPENCL) && defined(HAVE_CURSES)
 static
 void opencl_wlogprint_status(struct cgpu_info *cgpu)
 {
 	struct opencl_device_data * const data = cgpu->device_data;
 	struct thr_info *thr = cgpu->thr[0];
-	const int thr_id = thr->id;
-	_clState * const clState = clStates[thr_id];
 	int i;
 	char checkin[40];
 	double displayed_rolling;
@@ -820,16 +853,9 @@ void opencl_wlogprint_status(struct cgpu_info *cgpu)
 	strcpy(logline, ""); // In case it has no data
 	
 	{
-		double intensity = oclthreads_to_intensity(data->oclthreads, !opt_scrypt);
-		double xintensity = oclthreads_to_xintensity(data->oclthreads, clState->max_compute_units);
-		const char *iunit = "";
-		if (xintensity - (int)xintensity < intensity - (int)intensity)
-		{
-			intensity = xintensity;
-			iunit = "x";
-		}
-		tailsprintf(logline, sizeof(logline), "I:%s%s%g ",
-		            (data->dynamic ? "d" : ""),
+		const char *iunit;
+		float intensity = opencl_proc_get_intensity(cgpu, &iunit);
+		tailsprintf(logline, sizeof(logline), "I:%s%g ",
 		            iunit,
 		            intensity);
 	}
@@ -923,17 +949,12 @@ const char *opencl_tui_handle_choice(struct cgpu_info *cgpu, int input)
 	{
 		case 'i': case 'I':
 		{
+			char promptbuf[0x40];
 			char *intvar;
 
-			if (opt_scrypt) {
-				intvar = curses_input("Set GPU scan intensity (d or "
-						      MIN_SCRYPT_INTENSITY_STR " -> "
-						      MAX_SCRYPT_INTENSITY_STR ")");
-			} else {
-				intvar = curses_input("Set GPU scan intensity (d or "
-						      MIN_SHA_INTENSITY_STR " -> "
-						      MAX_SHA_INTENSITY_STR ")");
-			}
+			opencl_calc_intensity_range();
+			snprintf(promptbuf, sizeof(promptbuf), "Set GPU scan intensity (d or %d -> %d)", min_intensity, max_intensity);
+			intvar = curses_input(promptbuf);
 			if (!intvar)
 				return "Invalid intensity\n";
 			if (!strncasecmp(intvar, "d", 1)) {
@@ -1007,10 +1028,10 @@ struct opencl_work_data *_opencl_work_data(struct work * const work)
 }
 
 static
-cl_int queue_poclbm_kernel(_clState * const clState, struct work * const work, const cl_uint threads)
+cl_int queue_poclbm_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, const cl_uint threads)
 {
 	struct opencl_work_data * const blk = _opencl_work_data(work);
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
 
@@ -1031,7 +1052,8 @@ cl_int queue_poclbm_kernel(_clState * const clState, struct work * const work, c
 	CL_SET_BLKARG(cty_g);
 	CL_SET_BLKARG(cty_h);
 
-	if (!clState->goffset) {
+	if (!kinfo->goffset)
+	{
 		cl_uint vwidth = clState->vwidth;
 		uint *nonces = alloca(sizeof(uint) * vwidth);
 		unsigned int i;
@@ -1062,10 +1084,10 @@ cl_int queue_poclbm_kernel(_clState * const clState, struct work * const work, c
 }
 
 static
-cl_int queue_phatk_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
+cl_int queue_phatk_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
 	struct opencl_work_data * const blk = _opencl_work_data(work);
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	cl_uint vwidth = clState->vwidth;
 	unsigned int i, num = 0;
 	cl_int status = 0;
@@ -1107,14 +1129,14 @@ cl_int queue_phatk_kernel(_clState * const clState, struct work * const work, __
 }
 
 static
-cl_int queue_diakgcn_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
+cl_int queue_diakgcn_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
 	struct opencl_work_data * const blk = _opencl_work_data(work);
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
 
-	if (!clState->goffset) {
+	if (!kinfo->goffset) {
 		cl_uint vwidth = clState->vwidth;
 		uint *nonces = alloca(sizeof(uint) * vwidth);
 		unsigned int i;
@@ -1169,14 +1191,14 @@ cl_int queue_diakgcn_kernel(_clState * const clState, struct work * const work,
 }
 
 static
-cl_int queue_diablo_kernel(_clState * const clState, struct work * const work, const cl_uint threads)
+cl_int queue_diablo_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, const cl_uint threads)
 {
 	struct opencl_work_data * const blk = _opencl_work_data(work);
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
 
-	if (!clState->goffset) {
+	if (!kinfo->goffset) {
 		cl_uint vwidth = clState->vwidth;
 		uint *nonces = alloca(sizeof(uint) * vwidth);
 		unsigned int i;
@@ -1225,10 +1247,10 @@ cl_int queue_diablo_kernel(_clState * const clState, struct work * const work, c
 
 #ifdef USE_SCRYPT
 static
-cl_int queue_scrypt_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
+cl_int queue_scrypt_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
 	unsigned char *midstate = work->midstate;
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	unsigned int num = 0;
 	cl_uint le_target;
 	cl_int status = 0;
@@ -1327,7 +1349,7 @@ select_cgpu:
 		//free(clState);
 
 		applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
-		clStates[thr_id] = initCl(virtual_gpu, name, sizeof(name));
+		clStates[thr_id] = opencl_create_clState(virtual_gpu, name, sizeof(name));
 		if (!clStates[thr_id]) {
 			applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
 			goto select_cgpu;
@@ -1388,13 +1410,9 @@ static int opencl_autodetect()
 	if (!nDevs)
 		return 0;
 
-	/* If opt_g_threads is not set, use default 1 thread on scrypt and
-	 * 2 for regular mining */
 	if (opt_g_threads == -1) {
-		if (opt_scrypt)
-			opt_g_threads = 1;
-		else
-			opt_g_threads = 2;
+		// NOTE: This should ideally default to 2 for non-scrypt
+		opt_g_threads = 1;
 	}
 
 #ifdef HAVE_SENSORS
@@ -1440,9 +1458,13 @@ static int opencl_autodetect()
 
 static void opencl_detect()
 {
-	int flags = 0;
-	if (!opt_scrypt)
-		flags |= GDF_DEFAULT_NOAUTO;
+	int flags = GDF_DEFAULT_NOAUTO;
+	struct mining_goal_info *goal, *tmpgoal;
+	HASH_ITER(hh, mining_goals, goal, tmpgoal)
+	{
+		if (!goal->malgo->opencl_nodefault)
+			flags &= ~GDF_DEFAULT_NOAUTO;
+	}
 	generic_detect(&opencl_api, NULL, opencl_autodetect, flags);
 }
 
@@ -1541,8 +1563,18 @@ get_opencl_api_extra_device_status(struct cgpu_info *gpu)
 	root = api_add_int(root, "Powertune", &pt, true);
 
 	char intensity[20];
-	uint32_t oclthreads = data->oclthreads;
-	double intensityf = oclthreads_to_intensity(oclthreads, !opt_scrypt);
+	uint32_t oclthreads;
+	double intensityf = data->intensity;
+	// FIXME: Some way to express intensities malgo-neutral?
+	struct mining_goal_info * const goal = get_mining_goal("default");
+	struct mining_algorithm * const malgo = goal->malgo;
+	if (data->intensity == intensity_not_set)
+	{
+		oclthreads = data->oclthreads;
+		intensityf = malgo->opencl_oclthreads_to_intensity(oclthreads);
+	}
+	else
+		oclthreads = malgo->opencl_intensity_to_oclthreads(intensityf);
 	double xintensity = oclthreads_to_xintensity(oclthreads, clState->max_compute_units);
 	if (data->dynamic)
 		strcpy(intensity, "D");
@@ -1557,7 +1589,6 @@ get_opencl_api_extra_device_status(struct cgpu_info *gpu)
 }
 
 struct opencl_thread_data {
-	cl_int (*queue_kernel_parameters)(_clState *, struct work *, cl_uint);
 	uint32_t *res;
 };
 
@@ -1572,7 +1603,7 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 	int virtual_gpu = data->virtual_gpu;
 	int i = thr->id;
 	static bool failmessage = false;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+	int buffersize = OPENCL_MAX_BUFFERSIZE;
 
 	if (!blank_res)
 		blank_res = calloc(buffersize, 1);
@@ -1583,7 +1614,7 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 
 	strcpy(name, "");
 	applog(LOG_INFO, "Init GPU thread %i GPU %i virtual GPU %i", i, gpu, virtual_gpu);
-	clStates[i] = initCl(virtual_gpu, name, sizeof(name));
+	clStates[i] = opencl_create_clState(virtual_gpu, name, sizeof(name));
 	if (!clStates[i]) {
 #ifdef HAVE_CURSES
 		if (use_curses)
@@ -1629,35 +1660,13 @@ static bool opencl_thread_init(struct thr_info *thr)
 	cl_int status = 0;
 	thrdata = calloc(1, sizeof(*thrdata));
 	thr->cgpu_data = thrdata;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+	int buffersize = OPENCL_MAX_BUFFERSIZE;
 
 	if (!thrdata) {
 		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
 		return false;
 	}
 
-	switch (clState->chosen_kernel) {
-		case KL_POCLBM:
-			thrdata->queue_kernel_parameters = &queue_poclbm_kernel;
-			break;
-		case KL_PHATK:
-			thrdata->queue_kernel_parameters = &queue_phatk_kernel;
-			break;
-		case KL_DIAKGCN:
-			thrdata->queue_kernel_parameters = &queue_diakgcn_kernel;
-			break;
-#ifdef USE_SCRYPT
-		case KL_SCRYPT:
-			thrdata->queue_kernel_parameters = &queue_scrypt_kernel;
-			gpu->min_nonce_diff = 1./0x10000;
-			break;
-#endif
-		default:
-		case KL_DIABLO:
-			thrdata->queue_kernel_parameters = &queue_diablo_kernel;
-			break;
-	}
-
 	thrdata->res = calloc(buffersize, 1);
 
 	if (!thrdata->res) {
@@ -1683,9 +1692,8 @@ static bool opencl_thread_init(struct thr_info *thr)
 
 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
-#ifdef USE_SCRYPT
-	if (!opt_scrypt)
-#endif
+	const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+	if (malgo->algo == POW_SHA256D)
 	{
 		struct opencl_work_data * const blk = _opencl_work_data(work);
 		precalc_hash(blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
@@ -1695,6 +1703,90 @@ static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work
 
 extern int opt_dynamic_interval;
 
+const struct opencl_kernel_info *opencl_scanhash_get_kernel(struct cgpu_info * const cgpu, _clState * const clState, const struct mining_algorithm * const malgo)
+{
+	struct opencl_device_data * const data = cgpu->device_data;
+	struct opencl_kernel_info *kernelinfo = NULL;
+	char *kernel_file;
+	switch (malgo->algo)
+	{
+		case POW_SHA256D:
+			kernelinfo = &clState->kernel_sha256d;
+			if (!data->kernel_file_sha256d)
+			{
+				const char * const vbuff = clState->platform_ver_str;
+				if (clState->is_mesa)
+				{
+					applog(LOG_INFO, "Selecting phatk kernel for Mesa");
+					data->kernel_file_sha256d = strdup("phatk");
+				}
+				else  /* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
+				if (!strstr(cgpu->name, "Tahiti") &&
+				   (strstr(vbuff, "844.4") ||  // Linux 64 bit ATI 2.6 SDK
+				    strstr(vbuff, "851.4") ||  // Windows 64 bit ""
+				    strstr(vbuff, "831.4") ||
+				    strstr(vbuff, "898.1") ||  // 12.2 driver SDK 
+				    strstr(vbuff, "923.1") ||  // 12.4
+				    strstr(vbuff, "938.2") ||  // SDK 2.7
+				    strstr(vbuff, "1113.2")))  // SDK 2.8
+				{
+					applog(LOG_INFO, "Selecting diablo kernel");
+					data->kernel_file_sha256d = strdup("diablo");
+				}
+				else  /* Detect all 7970s, older ATI and NVIDIA and use poclbm */
+				if (strstr(cgpu->name, "Tahiti") || !clState->hasBitAlign)
+				{
+					applog(LOG_INFO, "Selecting poclbm kernel");
+					data->kernel_file_sha256d = strdup("poclbm");
+				}
+				else  /* Use phatk for the rest R5xxx R6xxx */
+				{
+					applog(LOG_INFO, "Selecting phatk kernel");
+					data->kernel_file_sha256d = strdup("phatk");
+				}
+			}
+			kernel_file = data->kernel_file_sha256d;
+			break;
+#ifdef USE_SCRYPT
+		case POW_SCRYPT:
+			kernelinfo = &clState->kernel_scrypt;
+			BFGINIT(data->kernel_file_scrypt, strdup("scrypt"));
+			kernel_file = data->kernel_file_scrypt;
+			break;
+#endif
+	}
+	if (!kernelinfo)
+		applogr(NULL, LOG_ERR, "%s: Unsupported mining algorithm", cgpu->dev_repr);
+	if (!kernelinfo->loaded)
+	{
+		if (!opencl_load_kernel(cgpu, clState, cgpu->name, kernelinfo, kernel_file, malgo))
+			applogr(NULL, LOG_ERR, "%s: Failed to load kernel", cgpu->dev_repr);
+		
+		switch (kernelinfo->interface)
+		{
+			case KL_POCLBM:
+				kernelinfo->queue_kernel_parameters = &queue_poclbm_kernel;
+				break;
+			case KL_PHATK:
+				kernelinfo->queue_kernel_parameters = &queue_phatk_kernel;
+				break;
+			case KL_DIAKGCN:
+				kernelinfo->queue_kernel_parameters = &queue_diakgcn_kernel;
+				break;
+#ifdef USE_SCRYPT
+			case KL_SCRYPT:
+				kernelinfo->queue_kernel_parameters = &queue_scrypt_kernel;
+				break;
+#endif
+			default:
+			case KL_DIABLO:
+				kernelinfo->queue_kernel_parameters = &queue_diablo_kernel;
+				break;
+		}
+	}
+	return kernelinfo;
+}
+
 static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 				int64_t __maybe_unused max_nonce)
 {
@@ -1703,15 +1795,28 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	struct cgpu_info *gpu = thr->cgpu;
 	struct opencl_device_data * const data = gpu->device_data;
 	_clState *clState = clStates[thr_id];
-	const cl_kernel *kernel = &clState->kernel;
+	const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+	const struct opencl_kernel_info *kinfo = opencl_scanhash_get_kernel(gpu, clState, malgo);
+	if (!kinfo)
+		return -1;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
 
 	cl_int status;
 	size_t globalThreads[1];
-	size_t localThreads[1] = { clState->wsize };
+	size_t localThreads[1] = { kinfo->wsize };
 	int64_t hashes;
-	int found = opt_scrypt ? SCRYPT_FOUND : FOUND;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+	int found = FOUND;
+	int buffersize = BUFFERSIZE;
+#ifdef USE_SCRYPT
+	if (malgo->algo == POW_SCRYPT)
+	{
+		found = SCRYPT_FOUND;
+		buffersize = SCRYPT_BUFFERSIZE;
+	}
+#endif
+	if (data->intensity != intensity_not_set)
+		data->oclthreads = malgo->opencl_intensity_to_oclthreads(data->intensity);
 
 	/* Windows' timer resolution is only 15ms so oversample 5x */
 	if (data->dynamic && (++data->intervals * dynamic_us) > 70000) {
@@ -1721,16 +1826,18 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		cgtime(&tv_gpuend);
 		gpu_us = us_tdiff(&tv_gpuend, &data->tv_gpustart) / data->intervals;
 		if (gpu_us > dynamic_us) {
-			const unsigned long min_oclthreads = intensity_to_oclthreads(MIN_INTENSITY, !opt_scrypt);
+			const unsigned long min_oclthreads = malgo->opencl_min_oclthreads;
 			data->oclthreads /= 2;
 			if (data->oclthreads < min_oclthreads)
 				data->oclthreads = min_oclthreads;
 		} else if (gpu_us < dynamic_us / 2) {
-			const unsigned long max_oclthreads = intensity_to_oclthreads(MAX_INTENSITY, !opt_scrypt);
+			const unsigned long max_oclthreads = malgo->opencl_max_oclthreads;
 			data->oclthreads *= 2;
 			if (data->oclthreads > max_oclthreads)
 				data->oclthreads = max_oclthreads;
 		}
+		if (data->intensity != intensity_not_set)
+			data->intensity = malgo->opencl_oclthreads_to_intensity(data->oclthreads);
 		memcpy(&(data->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
 		data->intervals = 0;
 	}
@@ -1744,13 +1851,14 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
 
-	status = thrdata->queue_kernel_parameters(clState, work, globalThreads[0]);
+	status = kinfo->queue_kernel_parameters(kinfo, clState, work, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
 		return -1;
 	}
 
-	if (clState->goffset) {
+	if (kinfo->goffset)
+	{
 		size_t global_work_offset[1];
 
 		global_work_offset[0] = work->blk.nonce;
@@ -1798,13 +1906,20 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	return hashes;
 }
 
+static
+void opencl_clean_kernel_info(struct opencl_kernel_info * const kinfo)
+{
+	clReleaseKernel(kinfo->kernel);
+	clReleaseProgram(kinfo->program);
+}
+
 static void opencl_thread_shutdown(struct thr_info *thr)
 {
 	const int thr_id = thr->id;
 	_clState *clState = clStates[thr_id];
 
-	clReleaseKernel(clState->kernel);
-	clReleaseProgram(clState->program);
+	opencl_clean_kernel_info(&clState->kernel_sha256d);
+	opencl_clean_kernel_info(&clState->kernel_scrypt);
 	clReleaseCommandQueue(clState->commandQueue);
 	clReleaseContext(clState->context);
 }
@@ -1869,7 +1984,7 @@ struct device_drv opencl_api = {
 	.dname = "opencl",
 	.name = "OCL",
 	.probe_priority = 110,
-	.supported_algos = POW_SHA256D | POW_SCRYPT,
+	.drv_min_nonce_diff = common_sha256d_and_scrypt_min_nonce_diff,
 	.drv_detect = opencl_detect,
 	.reinit_device = reinit_opencl_device,
 	.watchdog = opencl_watchdog,

+ 9 - 5
driver-opencl.h

@@ -1,6 +1,7 @@
 #ifndef BFG_DRIVER_OPENCL
 #define BFG_DRIVER_OPENCL
 
+#include <float.h>
 #include <stdbool.h>
 
 #include "CL/cl.h"
@@ -18,23 +19,27 @@ enum opencl_binary_usage {
 	OBU_NONE     = 4,
 };
 
+static const float intensity_not_set = FLT_MAX;
+
 struct opencl_device_data {
 	bool mapped;
 	int virtual_gpu;
 	int virtual_adl;
 	unsigned long oclthreads;
+	float intensity;
 	char *_init_intensity;
 	bool dynamic;
 	
 	cl_uint vwidth;
 	size_t work_size;
-	char *kernel_file;
+	char *kernel_file_sha256d;
 	cl_ulong max_alloc;
 	
 	enum opencl_binary_usage opt_opencl_binaries;
 #ifdef USE_SCRYPT
-	int opt_lg, lookup_gap;
-	size_t opt_tc, thread_concurrency;
+	char *kernel_file_scrypt;
+	int lookup_gap;
+	size_t thread_concurrency;
 	size_t shaders;
 #endif
 	struct timeval tv_gpustart;
@@ -59,8 +64,7 @@ struct opencl_device_data {
 #endif
 };
 
-extern double oclthreads_to_intensity(unsigned long oclthreads, bool is_sha256d);
-extern unsigned long intensity_to_oclthreads(double intensity, bool is_sha256d);
+extern float opencl_proc_get_intensity(struct cgpu_info *, const char **iunit);
 extern unsigned long xintensity_to_oclthreads(double xintensity, cl_uint max_compute_units);
 extern bool opencl_set_intensity_from_str(struct cgpu_info *, const char *newvalue);
 

+ 13 - 4
driver-proxy.c

@@ -70,6 +70,12 @@ void *prune_worklog_thread(void *userdata)
 	return NULL;
 }
 
+static
+float proxy_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return minimum_pdiff;
+}
+
 static
 void proxy_first_client(struct cgpu_info *cgpu)
 {
@@ -99,7 +105,6 @@ struct proxy_client *proxy_find_or_create_client(const char *username)
 			.threads = 0,
 			.device_data = client,
 			.device_path = user,
-			.min_nonce_diff = (opt_scrypt ? (1./0x10000) : 1.),
 		};
 		timer_set_now(&cgpu->cgminer_stats.start_tv);
 		if (unlikely(!create_new_cgpus(add_cgpu_live, cgpu)))
@@ -112,7 +117,7 @@ struct proxy_client *proxy_find_or_create_client(const char *username)
 		*client = (struct proxy_client){
 			.username = user,
 			.cgpu = cgpu,
-			.desired_share_pdiff = opt_scrypt ? (1./0x10000) : 1.,
+			.desired_share_pdiff = 0.,
 		};
 		
 		b = HASH_COUNT(proxy_clients);
@@ -133,14 +138,17 @@ struct proxy_client *proxy_find_or_create_client(const char *username)
 	return client;
 }
 
+// See also, stratumsrv_init_diff in driver-stratum.c
 static
 const char *proxy_set_diff(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const success)
 {
 	struct proxy_client * const client = proc->device_data;
-	const double nv = atof(newvalue);
-	if (nv <= 0)
+	double nv = atof(newvalue);
+	if (nv < 0)
 		return "Invalid difficulty";
 	
+	if (nv <= minimum_pdiff)
+		nv = minimum_pdiff;
 	client->desired_share_pdiff = nv;
 	
 #ifdef USE_LIBEVENT
@@ -167,6 +175,7 @@ static const struct bfg_set_device_definition proxy_set_device_funcs[] = {
 struct device_drv proxy_drv = {
 	.dname = "proxy",
 	.name = "PXY",
+	.drv_min_nonce_diff = proxy_min_nonce_diff,
 #ifdef HAVE_CURSES
 	.proc_wlogprint_status = proxy_wlogprint_status,
 #endif

+ 136 - 24
driver-stratum.c

@@ -15,6 +15,7 @@
 #include <winsock2.h>
 #endif
 
+#include <float.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <string.h>
@@ -34,8 +35,8 @@
 
 #define _ssm_client_octets     work2d_xnonce1sz
 #define _ssm_client_xnonce2sz  work2d_xnonce2sz
-static char *_ssm_notify;
-static int _ssm_notify_sz;
+static char *_ssm_notify, *_ssm_setgoal;
+static int _ssm_notify_sz, _ssm_setgoal_sz;
 static struct stratumsrv_job *_ssm_last_ssj;
 static struct event *ev_notify;
 static notifier_t _ssm_update_notifier;
@@ -65,12 +66,21 @@ struct stratumsrv_conn_userlist {
 	struct stratumsrv_conn_userlist *next;
 };
 
+enum stratumsrv_conn_capability {
+	SCC_NOTIFY    = 1 << 0,
+	SCC_SET_DIFF  = 1 << 1,
+	SCC_SET_GOAL  = 1 << 2,
+};
+typedef uint8_t stratumsrv_conn_capabilities_t;
+
 struct stratumsrv_conn {
 	struct bufferevent *bev;
+	stratumsrv_conn_capabilities_t capabilities;
 	uint32_t xnonce1_le;
 	struct timeval tv_hashes_done;
 	bool hashes_done_ext;
 	float current_share_pdiff;
+	bool desired_default_share_pdiff;  // Set if any authenticated user is configured for the default
 	float desired_share_pdiff;
 	struct stratumsrv_conn_userlist *authorised_users;
 	
@@ -93,6 +103,15 @@ void stratumsrv_send_set_difficulty(struct stratumsrv_conn * const conn, const f
 
 #define _ssm_gen_dummy_work work2d_gen_dummy_work
 
+static
+float stratumsrv_choose_share_pdiff(const struct stratumsrv_conn * const conn, const struct mining_algorithm * const malgo)
+{
+	float conn_pdiff = conn->desired_share_pdiff;
+	if (conn->desired_default_share_pdiff && malgo->reasonable_low_nonce_diff < conn_pdiff)
+		conn_pdiff = malgo->reasonable_low_nonce_diff;
+	return conn_pdiff;
+}
+
 static
 bool stratumsrv_update_notify_str(struct pool * const pool, bool clean)
 {
@@ -145,6 +164,10 @@ bool stratumsrv_update_notify_str(struct pool * const pool, bool clean)
 	bin2hex(ntime, &ntime_n, 4);
 	p += sprintf(p, "],\"%s\",\"%s\",\"%s\",%s],\"method\":\"mining.notify\",\"id\":null}\n", version, nbits, ntime, clean ? "true" : "false");
 	
+	const size_t setgoalbufsz = 49 + strlen(pool->goal->name) + (pool->goalname ? (1 + strlen(pool->goalname)) : 0) + 12 + strlen(pool->goal->malgo->name) + 5 + 1;
+	char * const setgoalbuf = malloc(setgoalbufsz);
+	snprintf(setgoalbuf, setgoalbufsz, "{\"method\":\"mining.set_goal\",\"id\":null,\"params\":[\"%s%s%s\",{\"malgo\":\"%s\"}]}\n", pool->goal->name, pool->goalname ? "/" : "", pool->goalname ?: "", pool->goal->malgo->name);
+	
 	ssj = malloc(sizeof(*ssj));
 	*ssj = (struct stratumsrv_job){
 		.my_job_id = strdup(my_job_id),
@@ -165,20 +188,37 @@ bool stratumsrv_update_notify_str(struct pool * const pool, bool clean)
 	assert(_ssm_notify_sz <= bufsz);
 	free(_ssm_notify);
 	_ssm_notify = buf;
+	const bool setgoal_changed = _ssm_setgoal ? strcmp(setgoalbuf, _ssm_setgoal) : true;
+	if (setgoal_changed)
+	{
+		free(_ssm_setgoal);
+		_ssm_setgoal = setgoalbuf;
+		_ssm_setgoal_sz = setgoalbufsz - 1;
+	}
+	else
+		free(setgoalbuf);
 	_ssm_last_ssj = ssj;
 	
 	float pdiff = target_diff(ssj->swork.target);
+	const struct mining_goal_info * const goal = pool->goal;
+	const struct mining_algorithm * const malgo = goal->malgo;
 	LL_FOREACH(_ssm_connections, conn)
 	{
 		if (unlikely(!conn->xnonce1_le))
 			continue;
-		float conn_pdiff = conn->desired_share_pdiff;
-		if (pdiff < conn_pdiff)
-			conn_pdiff = pdiff;
-		ssj->job_pdiff[conn->xnonce1_le] = conn_pdiff;
-		if (conn_pdiff != conn->current_share_pdiff)
-			stratumsrv_send_set_difficulty(conn, conn_pdiff);
-		bufferevent_write(conn->bev, _ssm_notify, _ssm_notify_sz);
+		if (setgoal_changed && (conn->capabilities & SCC_SET_GOAL))
+			bufferevent_write(conn->bev, setgoalbuf, setgoalbufsz);
+		if (likely(conn->capabilities & SCC_SET_DIFF))
+		{
+			float conn_pdiff = stratumsrv_choose_share_pdiff(conn, malgo);
+			if (pdiff < conn_pdiff)
+				conn_pdiff = pdiff;
+			ssj->job_pdiff[conn->xnonce1_le] = conn_pdiff;
+			if (conn_pdiff != conn->current_share_pdiff)
+				stratumsrv_send_set_difficulty(conn, conn_pdiff);
+		}
+		if (likely(conn->capabilities & SCC_NOTIFY))
+			bufferevent_write(conn->bev, _ssm_notify, _ssm_notify_sz);
 	}
 	
 	return true;
@@ -195,15 +235,21 @@ void stratumsrv_client_changed_diff(struct proxy_client * const client)
 		++connections_affected;
 		
 		float desired_share_pdiff = client->desired_share_pdiff;
+		bool any_default_share_pdiff = !desired_share_pdiff;
 		LL_FOREACH(conn->authorised_users, ule2)
 		{
 			struct proxy_client * const other_client = ule2->client;
+			if (!other_client->desired_share_pdiff)
+				any_default_share_pdiff = true;
+			else
 			if (other_client->desired_share_pdiff < desired_share_pdiff)
 				desired_share_pdiff = other_client->desired_share_pdiff;
 		}
-		if (conn->desired_share_pdiff != desired_share_pdiff)
+		BFGINIT(desired_share_pdiff, FLT_MAX);
+		if (conn->desired_share_pdiff != desired_share_pdiff || conn->desired_default_share_pdiff != any_default_share_pdiff)
 		{
 			conn->desired_share_pdiff = desired_share_pdiff;
+			conn->desired_default_share_pdiff = any_default_share_pdiff;
 			++connections_changed;
 		}
 	}
@@ -369,18 +415,55 @@ void _stratumsrv_failure(struct bufferevent * const bev, const char * const idst
 }while(0)
 
 static
-void _stratumsrv_success(struct bufferevent * const bev, const char * const idstr)
+void stratumsrv_success2(struct bufferevent * const bev, const char * const idstr, const char * const resultstr)
 {
 	if (!idstr)
 		return;
 	
-	size_t bufsz = 36 + strlen(idstr);
+	size_t bufsz = 32 + strlen(resultstr) + strlen(idstr);
 	char buf[bufsz];
 	
-	bufsz = sprintf(buf, "{\"result\":true,\"id\":%s,\"error\":null}\n", idstr);
+	bufsz = sprintf(buf, "{\"result\":%s,\"id\":%s,\"error\":null}\n", resultstr, idstr);
 	bufferevent_write(bev, buf, bufsz);
 }
 
+static inline
+void _stratumsrv_success(struct bufferevent * const bev, const char * const idstr)
+{
+	stratumsrv_success2(bev, idstr, "true");
+}
+
+static
+void stratumsrv_mining_capabilities(struct bufferevent * const bev, json_t * const params, const char * const idstr, struct stratumsrv_conn * const conn)
+{
+	if (json_is_null(params) || (!json_is_array(params)))
+		return_stratumsrv_failure(20, "Bad params");
+	
+	conn->capabilities = 0;
+	
+	json_t * const caps = (json_array_size(params) < 1) ? NULL : json_array_get(params, 0);
+	if (caps && (!json_is_null(caps)) && json_is_array(caps))
+	{
+		for (size_t i = json_array_size(caps); i-- > 0; )
+		{
+			json_t * const j = json_array_get(caps, i);
+			if (!json_is_string(j))
+				continue;
+			const char * const s = json_string_value(j);
+			if (!strcasecmp(s, "notify"))
+				conn->capabilities |= SCC_NOTIFY;
+			else
+			if (!strcasecmp(s, "set_difficulty"))
+				conn->capabilities |= SCC_SET_DIFF;
+			else
+			if (!strcasecmp(s, "set_goal"))
+				conn->capabilities |= SCC_SET_GOAL;
+		}
+	}
+	
+	stratumsrv_success2(bev, idstr, "null");
+}
+
 static
 void stratumsrv_mining_subscribe(struct bufferevent * const bev, json_t * const params, const char * const idstr, struct stratumsrv_conn * const conn)
 {
@@ -407,12 +490,22 @@ void stratumsrv_mining_subscribe(struct bufferevent * const bev, json_t * const
 	bufsz = sprintf(buf, "{\"id\":%s,\"result\":[[[\"mining.set_difficulty\",\"x\"],[\"mining.notify\",\"%s\"]],\"%s\",%d],\"error\":null}\n", idstr, xnonce1x, xnonce1x, _ssm_client_xnonce2sz);
 	bufferevent_write(bev, buf, bufsz);
 	
-	float pdiff = target_diff(_ssm_last_ssj->swork.target);
-	if (pdiff > conn->desired_share_pdiff)
-		pdiff = conn->desired_share_pdiff;
-	_ssm_last_ssj->job_pdiff[*xnonce1_p] = pdiff;
-	stratumsrv_send_set_difficulty(conn, pdiff);
-	bufferevent_write(bev, _ssm_notify, _ssm_notify_sz);
+	if (conn->capabilities & SCC_SET_GOAL)
+		bufferevent_write(conn->bev, _ssm_setgoal, _ssm_setgoal_sz);
+	if (likely(conn->capabilities & SCC_SET_DIFF))
+	{
+		const struct pool * const pool = _ssm_last_ssj->swork.pool;
+		const struct mining_goal_info * const goal = pool->goal;
+		const struct mining_algorithm * const malgo = goal->malgo;
+		float pdiff = target_diff(_ssm_last_ssj->swork.target);
+		const float conn_pdiff = stratumsrv_choose_share_pdiff(conn, malgo);
+		if (pdiff > conn_pdiff)
+			pdiff = conn_pdiff;
+		_ssm_last_ssj->job_pdiff[*xnonce1_p] = pdiff;
+		stratumsrv_send_set_difficulty(conn, pdiff);
+	}
+	if (likely(conn->capabilities & SCC_NOTIFY))
+		bufferevent_write(bev, _ssm_notify, _ssm_notify_sz);
 }
 
 static
@@ -427,8 +520,19 @@ void stratumsrv_mining_authorize(struct bufferevent * const bev, json_t * const
 	if (unlikely(!client))
 		return_stratumsrv_failure(20, "Failed creating new cgpu");
 	
-	if ((!conn->authorised_users) || client->desired_share_pdiff < conn->desired_share_pdiff)
-		conn->desired_share_pdiff = client->desired_share_pdiff;
+	if (client->desired_share_pdiff)
+	{
+		if (!conn->authorised_users)
+			conn->desired_default_share_pdiff = false;
+		if ((!conn->authorised_users) || client->desired_share_pdiff < conn->desired_share_pdiff)
+			conn->desired_share_pdiff = client->desired_share_pdiff;
+	}
+	else
+	{
+		conn->desired_default_share_pdiff = true;
+		if (!conn->authorised_users)
+			conn->desired_share_pdiff = FLT_MAX;
+	}
 	
 	struct stratumsrv_conn_userlist *ule = malloc(sizeof(*ule));
 	*ule = (struct stratumsrv_conn_userlist){
@@ -585,6 +689,9 @@ errout:
 	else
 	if (!strcasecmp(method, "mining.subscribe"))
 		stratumsrv_mining_subscribe(bev, params, idstr, conn);
+	else
+	if (!strcasecmp(method, "mining.capabilities"))
+		stratumsrv_mining_capabilities(bev, params, idstr, conn);
 	else
 		_stratumsrv_failure(bev, idstr, -3, "Method not supported");
 	
@@ -644,15 +751,18 @@ void stratumsrv_event(struct bufferevent *bev, short events, void *p)
 	}
 }
 
+// See also, proxy_set_diff in driver-proxy.c
 static
 const char *stratumsrv_init_diff(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const success)
 {
 	struct stratumsrv_conn * const conn = proc->device_data;
 	
-	const double nv = atof(newvalue);
-	if (nv <= 0)
+	double nv = atof(newvalue);
+	if (nv < 0)
 		return "Invalid difficulty";
 	
+	if (nv <= minimum_pdiff)
+		nv = minimum_pdiff;
 	conn->desired_share_pdiff = nv;
 	
 	return NULL;
@@ -672,7 +782,9 @@ void stratumlistener(struct evconnlistener *listener, evutil_socket_t sock, stru
 	conn = malloc(sizeof(*conn));
 	*conn = (struct stratumsrv_conn){
 		.bev = bev,
-		.desired_share_pdiff = opt_scrypt ? (1./0x10000) : 1.,
+		.capabilities = SCC_NOTIFY | SCC_SET_DIFF,
+		.desired_share_pdiff = FLT_MAX,
+		.desired_default_share_pdiff = true,
 	};
 	drv_set_defaults(&proxy_drv, stratumsrv_set_device_funcs_newconnect, conn, NULL, NULL, 1);
 	LL_PREPEND(_ssm_connections, conn);

+ 8 - 5
driver-titan.c

@@ -296,6 +296,12 @@ static bool configure_one_die(struct knc_titan_info *knc, int asic, int die)
 	return true;
 }
 
+static
+float titan_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return (malgo->algo == POW_SCRYPT) ? DEFAULT_DIFF_FILTERING_FLOAT : -1.;
+}
+
 static bool knc_titan_init(struct thr_info * const thr)
 {
 	const int max_cores = KNC_TITAN_CORES_PER_ASIC;
@@ -308,7 +314,6 @@ static bool knc_titan_init(struct thr_info * const thr)
 	int asic_cores[KNC_TITAN_MAX_ASICS] = {0};
 
 	for (proc = cgpu; proc; ) {
-		proc->min_nonce_diff = DEFAULT_DIFF_FILTERING_FLOAT;
 		if (proc->device != proc) {
 			applog(LOG_WARNING, "%"PRIpreprv": Extra processor?", proc->proc_repr);
 			proc = proc->next_proc;
@@ -430,9 +435,7 @@ static bool die_reconfigure(struct knc_titan_info * const knc, int asic, int die
 
 static bool knc_titan_prepare_work(struct thr_info *thr, struct work *work)
 {
-	struct cgpu_info * const cgpu = thr->cgpu;
-
-	work->nonce_diff = cgpu->min_nonce_diff;
+	work->nonce_diff = DEFAULT_DIFF_FILTERING_FLOAT;
 	return true;
 }
 
@@ -805,7 +808,7 @@ struct device_drv knc_titan_drv =
 	/* metadata */
 	.dname = "titan",
 	.name = "KNC",
-	.supported_algos = POW_SCRYPT,
+	.drv_min_nonce_diff = titan_min_nonce_diff,
 	.drv_detect = knc_titan_detect,
 
 	.thread_init = knc_titan_init,

+ 1 - 2
driver-zeusminer.c

@@ -219,7 +219,6 @@ bool zeusminer_thread_init(struct thr_info * const thr)
 {
 	struct cgpu_info * const device = thr->cgpu;
 	
-	device->min_nonce_diff = 1./0x10000;
 	device->set_device_funcs = zeusminer_set_device_funcs_live;
 	
 	return icarus_init(thr);
@@ -320,7 +319,7 @@ void zeusminer_drv_init()
 	// metadata
 	zeusminer_drv.dname = "zeusminer";
 	zeusminer_drv.name = "ZUS";
-	zeusminer_drv.supported_algos = POW_SCRYPT;
+	zeusminer_drv.drv_min_nonce_diff = common_scrypt_min_nonce_diff;
 	
 	// detect device
 	zeusminer_drv.lowl_probe = zeusminer_lowl_probe;

+ 12 - 3
findnonce.c

@@ -137,7 +137,7 @@ void precalc_hash(struct opencl_work_data *blk, uint32_t *state, uint32_t *data)
 struct pc_data {
 	struct thr_info *thr;
 	struct work work;
-	uint32_t res[SCRYPT_MAXBUFFERS];
+	uint32_t res[OPENCL_MAX_BUFFERSIZE];
 	pthread_t pth;
 	int found;
 };
@@ -147,7 +147,11 @@ static void *postcalc_hash(void *userdata)
 	struct pc_data *pcd = (struct pc_data *)userdata;
 	struct thr_info *thr = pcd->thr;
 	unsigned int entry = 0;
-	int found = opt_scrypt ? SCRYPT_FOUND : FOUND;
+	int found = FOUND;
+#ifdef USE_SCRYPT
+	if (work_mining_algorithm(&pcd->work)->algo == POW_SCRYPT)
+		found = SCRYPT_FOUND;
+#endif
 
 	pthread_detach(pthread_self());
 	RenameThread("postcalchsh");
@@ -188,7 +192,12 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
 		.thr = thr,
 	};
 	__copy_work(&pcd->work, work);
-	buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+#ifdef USE_SCRYPT
+	if (work_mining_algorithm(work)->algo == POW_SCRYPT)
+		buffersize = SCRYPT_BUFFERSIZE;
+	else
+#endif
+		buffersize = BUFFERSIZE;
 	memcpy(&pcd->res, res, buffersize);
 
 	if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) {

+ 6 - 0
findnonce.h

@@ -12,10 +12,16 @@
 #define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
 #define FOUND (0x0F)
 
+#ifdef USE_SCRYPT
 #define SCRYPT_MAXBUFFERS (0x100)
 #define SCRYPT_BUFFERSIZE (sizeof(uint32_t) * SCRYPT_MAXBUFFERS)
 #define SCRYPT_FOUND (0xFF)
 
+#define OPENCL_MAX_BUFFERSIZE  SCRYPT_BUFFERSIZE
+#else
+#define OPENCL_MAX_BUFFERSIZE  BUFFERSIZE
+#endif
+
 #ifdef HAVE_OPENCL
 extern void precalc_hash(struct opencl_work_data *blk, uint32_t *state, uint32_t *data);
 extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);

+ 3 - 3
gc3355.c

@@ -513,7 +513,7 @@ void gc3355_init_miner(int fd, int pll_freq)
 	gc3355_set_pll_freq(fd, pll_freq);
 }
 
-void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_only)
+void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_only, bool scrypt)
 {
 	gc3355_send_cmds(fd, gcp_chip_reset_cmd);
 
@@ -525,7 +525,7 @@ void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_o
 	// initialize units
 	gc3355_reset_dtr(fd);
 
-	if (opt_scrypt && scrypt_only)
+	if (scrypt && scrypt_only)
 		gc3355_scrypt_only_init(fd);
 	else
 	{
@@ -541,7 +541,7 @@ void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_o
 
 	if (!detect_only)
 	{
-		if (!opt_scrypt)
+		if (!scrypt)
 			// open sha2 units
 			gc3355_open_sha2_units(fd, opt_sha2_units);
 

+ 1 - 1
gc3355.h

@@ -44,7 +44,7 @@ extern ssize_t gc3355_read(int fd, char *buf, size_t size);
 extern ssize_t gc3355_write(int fd, const void * const buf, const size_t size);
 
 extern void gc3355_init_miner(int fd, int pll_freq);
-extern void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_only);
+extern void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_only, bool scrypt);
 
 extern void gc3355_scrypt_reset(int fd);
 extern void gc3355_scrypt_only_reset(int fd);

+ 471 - 110
miner.c

@@ -176,7 +176,6 @@ int opt_g_threads = -1;
 #endif
 #ifdef USE_SCRYPT
 static char detect_algo = 1;
-bool opt_scrypt;
 #else
 static char detect_algo;
 #endif
@@ -304,7 +303,7 @@ int total_getworks, total_stale, total_discarded;
 uint64_t total_bytes_rcvd, total_bytes_sent;
 double total_diff1, total_bad_diff1;
 double total_diff_accepted, total_diff_rejected, total_diff_stale;
-static int staged_rollable;
+static int staged_rollable, staged_spare;
 unsigned int new_blocks;
 unsigned int found_blocks;
 
@@ -371,6 +370,7 @@ static char datestamp[40];
 static char best_share[ALLOC_H2B_SHORTV] = "0";
 double best_diff = 0;
 
+struct mining_algorithm *mining_algorithms;
 struct mining_goal_info *mining_goals;
 int active_goals = 1;
 
@@ -511,6 +511,14 @@ static void applog_and_exit(const char *fmt, ...)
 	exit(1);
 }
 
+static
+float drv_min_nonce_diff(const struct device_drv * const drv, struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	if (drv->drv_min_nonce_diff)
+		return drv->drv_min_nonce_diff(proc, malgo);
+	return (malgo->algo == POW_SHA256D) ? 1. : -1.;
+}
+
 char *devpath_to_devid(const char *devpath)
 {
 #ifndef WIN32
@@ -993,6 +1001,122 @@ static void sharelog(const char*disposition, const struct work*work)
 static void switch_logsize(void);
 #endif
 
+static void hotplug_trigger();
+
+void goal_set_malgo(struct mining_goal_info * const goal, struct mining_algorithm * const malgo)
+{
+	if (goal->malgo == malgo)
+		return;
+	
+	if (goal->malgo)
+		--goal->malgo->goal_refs;
+	if (malgo->goal_refs++)
+		// First time using a new mining algorithm may means we need to add mining hardware to support it
+		// api_thr_id is used as an ugly hack to determine if mining has started - if not, we do NOT want to try to hotplug anything (let the initial detect handle it)
+		if (opt_hotplug && api_thr_id)
+			hotplug_trigger();
+	goal->malgo = malgo;
+}
+
+struct mining_algorithm *mining_algorithm_by_alias(const char * const alias)
+{
+	struct mining_algorithm *malgo;
+	LL_FOREACH(mining_algorithms, malgo)
+	{
+		if (match_strtok(malgo->aliases, "|", alias))
+			return malgo;
+	}
+	return NULL;
+}
+
+
+#ifdef HAVE_OPENCL
+static
+float opencl_oclthreads_to_intensity_sha256d(const unsigned long oclthreads)
+{
+	return log2f(oclthreads) - 15.;
+}
+
+static
+unsigned long opencl_intensity_to_oclthreads_sha256d(float intensity)
+{
+	return powf(2, intensity + 15);
+}
+#endif
+
+static struct mining_algorithm malgo_sha256d = {
+	.name = "SHA256d",
+	.aliases = "SHA256d|SHA256|SHA2",
+	
+	.algo = POW_SHA256D,
+	.ui_skip_hash_bytes = 4,
+	.worktime_skip_prevblk_u32 = 1,
+	.reasonable_low_nonce_diff = 1.,
+	
+	.hash_data_f = hash_data,
+	
+#ifdef HAVE_OPENCL
+	.opencl_nodefault = true,
+	.opencl_oclthreads_to_intensity = opencl_oclthreads_to_intensity_sha256d,
+	.opencl_intensity_to_oclthreads = opencl_intensity_to_oclthreads_sha256d,
+	.opencl_min_oclthreads =       0x20,  // intensity -10
+	.opencl_max_oclthreads = 0x20000000,  // intensity  14
+#endif
+};
+
+
+#ifdef USE_SCRYPT
+#ifdef HAVE_OPENCL
+static
+float opencl_oclthreads_to_intensity_scrypt(const unsigned long oclthreads)
+{
+	return log2(oclthreads);
+}
+
+static
+unsigned long opencl_intensity_to_oclthreads_scrypt(float intensity)
+{
+	return pow(2, intensity);
+}
+#endif
+
+static struct mining_algorithm malgo_scrypt = {
+	.name = "scrypt",
+	.aliases = "scrypt",
+	
+	.algo = POW_SCRYPT,
+	.ui_skip_hash_bytes = 2,
+	.reasonable_low_nonce_diff = 1./0x10000,
+	
+	.hash_data_f = scrypt_hash_data,
+	
+#ifdef HAVE_OPENCL
+	.opencl_oclthreads_to_intensity = opencl_oclthreads_to_intensity_scrypt,
+	.opencl_intensity_to_oclthreads = opencl_intensity_to_oclthreads_scrypt,
+	.opencl_min_oclthreads =      0x100,  // intensity   8
+	.opencl_max_oclthreads = 0x20000000,  // intensity  31
+#endif
+};
+
+static
+const char *set_malgo_scrypt()
+{
+	goal_set_malgo(get_mining_goal("default"), &malgo_scrypt);
+	return NULL;
+}
+
+#endif
+
+static
+__attribute__((constructor))
+void init_mining_goals(struct mining_goal_info * const goal, const struct mining_algorithm * const malgo)
+{
+	LL_APPEND(mining_algorithms, (&malgo_sha256d));
+#ifdef USE_SCRYPT
+	LL_APPEND(mining_algorithms, (&malgo_scrypt));
+#endif
+}
+
 static
 int mining_goals_name_cmp(const struct mining_goal_info * const a, const struct mining_goal_info * const b)
 {
@@ -1035,6 +1159,7 @@ struct mining_goal_info *get_mining_goal(const char * const name)
 			.blkchain = blkchain,
 			.current_diff = 0xFFFFFFFFFFFFFFFFULL,
 		};
+		goal_set_malgo(goal, &malgo_sha256d);
 		HASH_ADD_STR(mining_goals, name, goal);
 		HASH_SORT(mining_goals, mining_goals_name_cmp);
 		
@@ -1258,6 +1383,8 @@ char *set_b58addr(const char * const arg, bytes_t * const b)
 	return NULL;
 }
 
+static char *set_generate_addr2(struct mining_goal_info *, const char *);
+
 static
 char *set_generate_addr(char *arg)
 {
@@ -1272,6 +1399,12 @@ char *set_generate_addr(char *arg)
 	else
 		goal = get_mining_goal("default");
 	
+	return set_generate_addr2(goal, arg);
+}
+
+static
+char *set_generate_addr2(struct mining_goal_info * const goal, const char * const arg)
+{
 	bytes_t newscript = BYTES_INIT;
 	char *estr = set_b58addr(arg, &newscript);
 	if (estr)
@@ -1286,6 +1419,7 @@ char *set_generate_addr(char *arg)
 	}
 	bytes_assimilate(goal->generation_script, &newscript);
 	bytes_free(&newscript);
+	
 	return NULL;
 }
 #endif
@@ -1754,7 +1888,59 @@ static char *set_cbcperc(const char *arg)
 }
 
 static
-char *set_pool_goal(const char * const arg)
+const char *goal_set(struct mining_goal_info * const goal, const char * const optname, const char * const newvalue, bytes_t * const replybuf, enum bfg_set_device_replytype * const out_success)
+{
+	*out_success = SDR_ERR;
+	if (!(strcasecmp(optname, "malgo") && strcasecmp(optname, "algo")))
+	{
+		if (!newvalue)
+			return "Goal option 'malgo' requires a value (eg, SHA256d)";
+		struct mining_algorithm * const new_malgo = mining_algorithm_by_alias(newvalue);
+		if (!new_malgo)
+			return "Unrecognised mining algorithm";
+		goal_set_malgo(goal, new_malgo);
+		goto success;
+	}
+#if BLKMAKER_VERSION > 1
+	if (match_strtok("generate-to|generate-to-addr|generate-to-address|genaddress|genaddr|gen-address|gen-addr|generate-address|generate-addr|coinbase-addr|coinbase-address|coinbase-payout|cbaddress|cbaddr|cb-address|cb-addr|payout", "|", optname))
+	{
+		if (!newvalue)
+			return "Missing value for 'generate-to' goal option";
+		const char * const emsg = set_generate_addr2(goal, newvalue);
+		if (emsg)
+			return emsg;
+		goto success;
+	}
+#endif
+	*out_success = SDR_UNKNOWN;
+	return "Unknown goal option";
+
+success:
+	*out_success = SDR_OK;
+	return NULL;
+}
+
+// May leak replybuf if returning an error
+static
+const char *set_goal_params(struct mining_goal_info * const goal, char *arg)
+{
+	bytes_t replybuf = BYTES_INIT;
+	for (char *param, *nextptr; (param = strtok_r(arg, ",", &nextptr)); arg = NULL)
+	{
+		char *val = strchr(param, '=');
+		if (val)
+			val++[0] = '\0';
+		enum bfg_set_device_replytype success;
+		const char * const emsg = goal_set(goal, param, val, &replybuf, &success);
+		if (success != SDR_OK)
+			return emsg ?: "Error setting goal param";
+	}
+	bytes_free(&replybuf);
+	return NULL;
+}
+
+static
+const char *set_pool_goal(const char * const arg)
 {
 	struct pool *pool;
 	
@@ -1762,8 +1948,14 @@ char *set_pool_goal(const char * const arg)
 		return "Usage of --pool-goal before pools are defined does not make sense";
 	
 	pool = pools[total_pools - 1];
+	char *param = strchr(arg, ':');
+	if (param)
+		param++[0] = '\0';
 	pool->goal = get_mining_goal(arg);
 	
+	if (param)
+		return set_goal_params(pool->goal, param);
+	
 	return NULL;
 }
 
@@ -2528,7 +2720,7 @@ static struct opt_table opt_config_table[] = {
 		     "Set a time of day in HH:MM to stop mining (will quit without a start time)"),
 #ifdef USE_SCRYPT
 	OPT_WITHOUT_ARG("--scrypt",
-			opt_set_bool, &opt_scrypt,
+	                set_malgo_scrypt, NULL,
 			"Use the scrypt algorithm for mining (non-bitcoin)"),
 #endif
 	OPT_WITH_ARG("--set-device|--set",
@@ -3493,17 +3685,21 @@ void decay_time(double *f, double fadd, double fsecs)
 	*f /= ftotal;
 }
 
-static int __total_staged(void)
+static
+int __total_staged(const bool include_spares)
 {
-	return HASH_COUNT(staged_work);
+	int tot = HASH_COUNT(staged_work);
+	if (!include_spares)
+		tot -= staged_spare;
+	return tot;
 }
 
-static int total_staged(void)
+static int total_staged(const bool include_spares)
 {
 	int ret;
 
 	mutex_lock(stgd_lock);
-	ret = __total_staged();
+	ret = __total_staged(include_spares);
 	mutex_unlock(stgd_lock);
 
 	return ret;
@@ -4884,7 +5080,8 @@ void disable_pool(struct pool * const pool, const enum pool_enable enable_status
 static
 void share_result_msg(const struct work *work, const char *disp, const char *reason, bool resubmit, const char *worktime) {
 	struct cgpu_info *cgpu;
-	const unsigned char *hashpart = &work->hash[opt_scrypt ? 26 : 24];
+	const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+	const unsigned char *hashpart = &work->hash[0x1c - malgo->ui_skip_hash_bytes];
 	char shrdiffdisp[ALLOC_H2B_SHORTV];
 	const double tgtdiff = work->work_difficulty;
 	char tgtdiffdisp[ALLOC_H2B_SHORTV];
@@ -5204,10 +5401,12 @@ static bool submit_upstream_work_completed(struct work *work, bool resubmit, str
 			if (work->work_difficulty < 1)
 				diffplaces = 6;
 
+			const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+			const uint8_t * const prevblkhash = &work->data[4];
 			snprintf(worktime, sizeof(worktime),
 				" <-%08lx.%08lx M:%c D:%1.*f G:%02d:%02d:%02d:%1.3f %s (%1.3f) W:%1.3f (%1.3f) S:%1.3f R:%02d:%02d:%02d",
-				(unsigned long)be32toh(*(uint32_t *)&(work->data[opt_scrypt ? 32 : 28])),
-				(unsigned long)be32toh(*(uint32_t *)&(work->data[opt_scrypt ? 28 : 24])),
+				(unsigned long)be32toh(((uint32_t *)prevblkhash)[7 - malgo->worktime_skip_prevblk_u32]),
+				(unsigned long)be32toh(((uint32_t *)prevblkhash)[6 - malgo->worktime_skip_prevblk_u32]),
 				work->getwork_mode, diffplaces, work->work_difficulty,
 				tm_getwork.tm_hour, tm_getwork.tm_min,
 				tm_getwork.tm_sec, getwork_time, workclone,
@@ -5250,6 +5449,9 @@ static bool pool_unworkable(const struct pool * const pool)
 	return false;
 }
 
+static struct pool *priority_pool(int);
+static bool pool_unusable(struct pool *);
+
 static
 bool pool_actively_desired(const struct pool * const pool, const struct pool *cp)
 {
@@ -5261,7 +5463,23 @@ bool pool_actively_desired(const struct pool * const pool, const struct pool *cp
 		return true;
 	if (!cp)
 		cp = current_pool();
-	return (pool == cp);
+	if (pool == cp)
+		return true;
+	
+	// If we are the highest priority, workable pool for a given algorithm, we are needed
+	struct mining_algorithm * const malgo = pool->goal->malgo;
+	for (int i = 0; i < total_pools; ++i)
+	{
+		struct pool * const other_pool = priority_pool(i);
+		if (other_pool == pool)
+			return true;
+		if (pool_unusable(other_pool))
+			continue;
+		if (other_pool->goal->malgo == malgo)
+			break;
+	}
+	
+	return false;
 }
 
 static
@@ -5291,7 +5509,8 @@ static struct pool *_select_longpoll_pool(struct pool *, bool(*)(struct pool *))
  * away from them to distribute work evenly. The share count is reset to the
  * rolling average every 10 minutes to not send all work to one pool after it
  * has been disabled/out for an extended period. */
-static struct pool *select_balanced(struct pool *cp)
+static
+struct pool *select_balanced(struct pool *cp, struct mining_algorithm * const malgo)
 {
 	int i, lowest = cp->shares;
 	struct pool *ret = cp, *failover_pool = NULL;
@@ -5299,6 +5518,8 @@ static struct pool *select_balanced(struct pool *cp)
 	for (i = 0; i < total_pools; i++) {
 		struct pool *pool = pools[i];
 
+		if (malgo && pool->goal->malgo != malgo)
+			continue;
 		if (pool_unworkable(pool))
 			continue;
 		if (pool->failover_only)
@@ -5311,42 +5532,25 @@ static struct pool *select_balanced(struct pool *cp)
 			ret = pool;
 		}
 	}
+	if (malgo && ret->goal->malgo != malgo)
+		// Yes, we want failover_pool even if it's NULL
+		ret = failover_pool;
+	else
 	if (pool_unworkable(ret) && failover_pool)
 		ret = failover_pool;
 
-	ret->shares++;
+	if (ret)
+		++ret->shares;
 	return ret;
 }
 
-static bool pool_active(struct pool *, bool pinging);
-static void pool_died(struct pool *);
-static struct pool *priority_pool(int choice);
-static bool pool_unusable(struct pool *pool);
-
-/* Select any active pool in a rotating fashion when loadbalance is chosen if
- * it has any quota left. */
-static inline struct pool *select_pool(bool lagging)
+static
+struct pool *select_loadbalance(struct mining_algorithm * const malgo)
 {
 	static int rotating_pool = 0;
-	struct pool *pool, *cp;
+	struct pool *pool;
 	bool avail = false;
-	int tested, i;
-
-retry:
-	cp = current_pool();
-
-	if (pool_strategy == POOL_BALANCE) {
-		pool = select_balanced(cp);
-		if (pool_unworkable(pool))
-			goto simple_failover;
-		goto out;
-	}
-
-	if (pool_strategy != POOL_LOADBALANCE && (!lagging || opt_fail_only)) {
-		pool = cp;
-		goto out;
-	} else
-		pool = NULL;
+	int tested, i, rpsave;
 
 	for (i = 0; i < total_pools; i++) {
 		struct pool *tp = pools[i];
@@ -5360,45 +5564,125 @@ retry:
 	/* There are no pools with quota, so reset them. */
 	if (!avail) {
 		for (i = 0; i < total_pools; i++)
-			pools[i]->quota_used = 0;
+		{
+			struct pool * const tp = pools[i];
+			tp->quota_used -= tp->quota_gcd;
+		}
 		if (++rotating_pool >= total_pools)
 			rotating_pool = 0;
 	}
 
 	/* Try to find the first pool in the rotation that is usable */
-	tested = 0;
-	while (!pool && tested++ < total_pools) {
+	// Look for the lowest integer quota_used / quota_gcd in case we are imbalanced by algorithm demands
+	struct pool *pool_lowest = NULL;
+	int lowest = INT_MAX;
+	rpsave = rotating_pool;
+	for (tested = 0; tested < total_pools; ++tested)
+	{
 		pool = pools[rotating_pool];
-		if (pool->quota_used++ < pool->quota_gcd) {
+		if (malgo && pool->goal->malgo != malgo)
+			goto continue_tested;
+		
+		if (pool->quota_used < pool->quota_gcd)
+		{
+			++pool->quota_used;
 			if (!pool_unworkable(pool))
-				break;
+				goto out;
 			/* Failover-only flag for load-balance means distribute
 			 * unused quota to priority pool 0. */
 			if (opt_fail_only)
 				priority_pool(0)->quota_used--;
 		}
-		pool = NULL;
+		if (malgo)
+		{
+			const int count = pool->quota_used / pool->quota_gcd;
+			if (count < lowest)
+			{
+				pool_lowest = pool;
+				lowest = count;
+			}
+		}
+		
+continue_tested: ;
 		if (++rotating_pool >= total_pools)
 			rotating_pool = 0;
 	}
+	
+	// Even if pool_lowest is NULL, we want to return that to indicate failure
+	// Note it isn't possible to get here if !malgo
+	pool = pool_lowest;
+	
+out: ;
+	// Restore rotating_pool static, so malgo searches don't affect the usual load balancing
+	if (malgo)
+		rotating_pool = rpsave;
+	
+	return pool;
+}
+
+static
+struct pool *select_failover(struct mining_algorithm * const malgo)
+{
+	int i;
+	
+	for (i = 0; i < total_pools; i++) {
+		struct pool *tp = priority_pool(i);
+		
+		if (malgo && tp->goal->malgo != malgo)
+			continue;
+		
+		if (!pool_unusable(tp)) {
+			return tp;
+		}
+	}
+	
+	return NULL;
+}
+
+static bool pool_active(struct pool *, bool pinging);
+static void pool_died(struct pool *);
+
+/* Select any active pool in a rotating fashion when loadbalance is chosen if
+ * it has any quota left. */
+static inline struct pool *select_pool(bool lagging, struct mining_algorithm * const malgo)
+{
+	struct pool *pool = NULL, *cp;
+
+retry:
+	cp = current_pool();
+
+	if (pool_strategy == POOL_BALANCE) {
+		pool = select_balanced(cp, malgo);
+		if ((!pool) || pool_unworkable(pool))
+			goto simple_failover;
+		goto out;
+	}
+
+	if (pool_strategy != POOL_LOADBALANCE && (!lagging || opt_fail_only)) {
+		if (malgo && cp->goal->malgo != malgo)
+			goto simple_failover;
+		pool = cp;
+		goto out;
+	} else
+		pool = select_loadbalance(malgo);
 
 simple_failover:
 	/* If there are no alive pools with quota, choose according to
 	 * priority. */
 	if (!pool) {
-		for (i = 0; i < total_pools; i++) {
-			struct pool *tp = priority_pool(i);
-
-			if (!pool_unusable(tp)) {
-				pool = tp;
-				break;
-			}
-		}
+		pool = select_failover(malgo);
 	}
 
 	/* If still nothing is usable, use the current pool */
 	if (!pool)
+	{
+		if (malgo && cp->goal->malgo != malgo)
+		{
+			applog(LOG_DEBUG, "Failed to select pool for specific mining algorithm '%s'", malgo->name);
+			return NULL;
+		}
 		pool = cp;
+	}
 
 out:
 	if (!pool_actively_in_use(pool, cp))
@@ -5410,7 +5694,7 @@ out:
 		}
 		pool_tclear(pool, &pool->idle);
 	}
-	applog(LOG_DEBUG, "Selecting pool %d for work", pool->pool_no);
+	applog(LOG_DEBUG, "Selecting pool %d for %s%swork", pool->pool_no, malgo ? malgo->name : "", malgo ? " " : "");
 	return pool;
 }
 
@@ -5547,7 +5831,9 @@ void setup_benchmark_pool()
 		swork->ntime = 0x7fffffff;
 		timer_unset(&swork->tv_received);
 		memcpy(swork->diffbits, "\x17\0\xff\xff", 4);
-		set_target_to_pdiff(swork->target, opt_scrypt ? (1./0x10000) : 1.);
+		const struct mining_goal_info * const goal = get_mining_goal("default");
+		const struct mining_algorithm * const malgo = goal->malgo;
+		set_target_to_pdiff(swork->target, malgo->reasonable_low_nonce_diff);
 		pool->nonce2sz = swork->n2size = GBT_XNONCESZ;
 		pool->nonce2 = 0;
 	}
@@ -7003,6 +7289,20 @@ static void discard_work(struct work *work)
 	free_work(work);
 }
 
+static bool work_rollable(struct work *);
+
+static
+void unstage_work(struct work * const work)
+{
+	HASH_DEL(staged_work, work);
+	--work_mining_algorithm(work)->staged;
+	if (work_rollable(work))
+		--staged_rollable;
+	if (work->spare)
+		--staged_spare;
+	staged_full = false;
+}
+
 static void wake_gws(void)
 {
 	mutex_lock(stgd_lock);
@@ -7018,10 +7318,9 @@ static void discard_stale(void)
 	mutex_lock(stgd_lock);
 	HASH_ITER(hh, staged_work, work, tmp) {
 		if (stale_work(work, false)) {
-			HASH_DEL(staged_work, work);
+			unstage_work(work);
 			discard_work(work);
 			stale++;
-			staged_full = false;
 		}
 	}
 	pthread_cond_signal(&gws_cond);
@@ -7313,6 +7612,9 @@ static bool hash_push(struct work *work)
 	mutex_lock(stgd_lock);
 	if (work_rollable(work))
 		staged_rollable++;
+	++work_mining_algorithm(work)->staged;
+	if (work->spare)
+		++staged_spare;
 	if (likely(!getq->frozen)) {
 		HASH_ADD_INT(staged_work, id, work);
 		HASH_SORT(staged_work, tv_sort);
@@ -9075,10 +9377,9 @@ static void clear_pool_work(struct pool *pool)
 	mutex_lock(stgd_lock);
 	HASH_ITER(hh, staged_work, work, tmp) {
 		if (work->pool == pool) {
-			HASH_DEL(staged_work, work);
+			unstage_work(work);
 			free_work(work);
 			cleared++;
-			staged_full = false;
 		}
 	}
 	mutex_unlock(stgd_lock);
@@ -9609,17 +9910,63 @@ void *cmd_idle_thread(void * const __maybe_unused userp)
 	return NULL;
 }
 
-static struct work *hash_pop(void)
+static struct work *hash_pop(struct cgpu_info * const proc)
 {
-	struct work *work = NULL, *tmp;
 	int hc;
+	struct work *work, *work_found, *tmp;
+	enum {
+		HPWS_NONE,
+		HPWS_LOWDIFF,
+		HPWS_SPARE,
+		HPWS_ROLLABLE,
+		HPWS_PERFECT,
+	} work_score = HPWS_NONE;
 	bool did_cmd_idle = false;
 	pthread_t cmd_idle_thr;
 
 retry:
 	mutex_lock(stgd_lock);
-	while (!HASH_COUNT(staged_work))
+	while (true)
 	{
+		work_found = NULL;
+		work_score = 0;
+		hc = HASH_COUNT(staged_work);
+		HASH_ITER(hh, staged_work, work, tmp)
+		{
+			const struct mining_algorithm * const work_malgo = work_mining_algorithm(work);
+			const float min_nonce_diff = drv_min_nonce_diff(proc->drv, proc, work_malgo);
+#define FOUND_WORK(score)  do{  \
+				if (work_score < score)  \
+				{  \
+					work_found = work;  \
+					work_score = score;  \
+				}  \
+				continue;  \
+}while(0)
+			if (min_nonce_diff < work->work_difficulty)
+			{
+				if (min_nonce_diff < 0)
+					continue;
+				FOUND_WORK(HPWS_LOWDIFF);
+			}
+			if (work->spare)
+				FOUND_WORK(HPWS_SPARE);
+			if (work->rolltime && hc > staged_rollable)
+				FOUND_WORK(HPWS_ROLLABLE);
+#undef FOUND_WORK
+			
+			// Good match
+			work_found = work;
+			work_score = HPWS_PERFECT;
+			break;
+		}
+		if (work_found)
+		{
+			work = work_found;
+			break;
+		}
+		
+		// Failed to get a usable work
 		if (unlikely(staged_full))
 		{
 			if (likely(opt_queue < 10 + mining_threads))
@@ -9646,16 +9993,6 @@ retry:
 	
 	no_work = false;
 
-	hc = HASH_COUNT(staged_work);
-	/* Find clone work if possible, to allow masters to be reused */
-	if (hc > staged_rollable) {
-		HASH_ITER(hh, staged_work, work, tmp) {
-			if (!work_rollable(work))
-				break;
-		}
-	} else
-		work = staged_work;
-	
 	if (can_roll(work) && should_roll(work))
 	{
 		// Instead of consuming it, force it to be cloned and grab the clone
@@ -9664,9 +10001,7 @@ retry:
 		goto retry;
 	}
 	
-	HASH_DEL(staged_work, work);
-	if (work_rollable(work))
-		staged_rollable--;
+	unstage_work(work);
 
 	/* Signal the getwork scheduler to look for more work */
 	pthread_cond_signal(&gws_cond);
@@ -9685,7 +10020,7 @@ retry:
  * the future */
 static struct work *clone_work(struct work *work)
 {
-	int mrs = mining_threads + opt_queue - total_staged();
+	int mrs = mining_threads + opt_queue - total_staged(false);
 	struct work *work_clone;
 	bool cloned;
 
@@ -9994,7 +10329,7 @@ struct work *get_work(struct thr_info *thr)
 
 	applog(LOG_DEBUG, "%"PRIpreprv": Popping work from get queue to get work", cgpu->proc_repr);
 	while (!work) {
-		work = hash_pop();
+		work = hash_pop(cgpu);
 		if (stale_work(work, false)) {
 			staged_full = false;  // It wasn't really full, since it was stale :(
 			discard_work(work);
@@ -10040,12 +10375,13 @@ struct work *get_work(struct thr_info *thr)
 	
 	if (work->work_difficulty < 1)
 	{
-		if (unlikely(work->work_difficulty < cgpu->min_nonce_diff))
+		const float min_nonce_diff = drv_min_nonce_diff(cgpu->drv, cgpu, work_mining_algorithm(work));
+		if (unlikely(work->work_difficulty < min_nonce_diff))
 		{
-			if (cgpu->min_nonce_diff - work->work_difficulty > 1./0x10000000)
+			if (min_nonce_diff - work->work_difficulty > 1./0x10000000)
 				applog(LOG_WARNING, "%"PRIpreprv": Using work with lower difficulty than device supports",
 				       cgpu->proc_repr);
-			work->nonce_diff = cgpu->min_nonce_diff;
+			work->nonce_diff = min_nonce_diff;
 		}
 		else
 			work->nonce_diff = work->work_difficulty;
@@ -10162,12 +10498,8 @@ void inc_hw_errors3(struct thr_info *thr, const struct work *work, const uint32_
 
 void work_hash(struct work * const work)
 {
-#ifdef USE_SCRYPT
-	if (opt_scrypt)
-		scrypt_hash_data(work->hash, work->data);
-	else
-#endif
-		hash_data(work->hash, work->data);
+	const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+	malgo->hash_data_f(work->hash, work->data);
 }
 
 static
@@ -11148,7 +11480,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
 		hashmeter(-1, &zero_tv, 0);
 
 #ifdef HAVE_CURSES
-		const int ts = total_staged();
+		const int ts = total_staged(true);
 		if (curses_active_locked()) {
 			change_logwinsize();
 			curses_print_status(ts);
@@ -11905,6 +12237,16 @@ void register_device(struct cgpu_info *cgpu)
 	int thr_objs = cgpu->threads ?: 1;
 	mining_threads += thr_objs;
 	base_queue += thr_objs + cgpu->extra_work_queue;
+	{
+		const struct device_drv * const drv = cgpu->drv;
+		struct mining_algorithm *malgo;
+		LL_FOREACH(mining_algorithms, malgo)
+		{
+			if (drv_min_nonce_diff(drv, cgpu, malgo) < 0)
+				continue;
+			malgo->base_queue += thr_objs + cgpu->extra_work_queue;
+		}
+	}
 #ifdef HAVE_CURSES
 	adj_width(mining_threads, &dev_width);
 #endif
@@ -11979,11 +12321,15 @@ static bool my_blkmaker_sha256_callback(void *digest, const void *buffer, size_t
 }
 
 static
-int drv_algo_check(const struct device_drv * const drv)
+bool drv_algo_check(const struct device_drv * const drv)
 {
-	const int algomatch = opt_scrypt ? POW_SCRYPT : POW_SHA256D;
-	const supported_algos_t algos = drv->supported_algos ?: POW_SHA256D;
-	return (algos & algomatch);
+	struct mining_goal_info *goal, *tmpgoal;
+	HASH_ITER(hh, mining_goals, goal, tmpgoal)
+	{
+		if (drv_min_nonce_diff(drv, NULL, goal->malgo) >= 0)
+			return true;
+	}
+	return false;
 }
 
 #ifndef HAVE_PTHREAD_CANCEL
@@ -12076,7 +12422,6 @@ void allocate_cgpu(struct cgpu_info *cgpu, unsigned int *kp)
 	}
 
 	cgpu->max_hashes = 0;
-	BFGINIT(cgpu->min_nonce_diff, 1);
 	
 	BFGINIT(cgpu->cutofftemp, opt_cutofftemp);
 	BFGINIT(cgpu->targettemp, cgpu->cutofftemp - 6);
@@ -12565,7 +12910,6 @@ void schedule_rescan(const struct timeval * const tvp_when)
 	mutex_unlock(&rescan_mutex);
 }
 
-#ifdef HAVE_BFG_HOTPLUG
 static
 void hotplug_trigger()
 {
@@ -12574,7 +12918,6 @@ void hotplug_trigger()
 	timer_set_now(&tv_now);
 	schedule_rescan(&tv_now);
 }
-#endif
 
 #if defined(HAVE_LIBUDEV) && defined(HAVE_SYS_EPOLL_H)
 
@@ -12904,6 +13247,8 @@ int main(int argc, char *argv[])
 
 	mutex_init(&submitting_lock);
 
+	// Ensure at least the default goal is created
+	get_mining_goal("default");
 #ifdef HAVE_OPENCL
 	opencl_early_init();
 #endif
@@ -13056,13 +13401,6 @@ int main(int argc, char *argv[])
 	if (want_per_device_stats)
 		opt_log_output = true;
 
-#ifdef WANT_CPUMINE
-#ifdef USE_SCRYPT
-	if (opt_scrypt)
-		set_scrypt_algo(&opt_algo);
-#endif
-#endif
-
 	bfg_devapi_init();
 	drv_detect_all();
 	total_devices = total_devices_new;
@@ -13117,7 +13455,7 @@ int main(int argc, char *argv[])
 #endif
 
 #if BLKMAKER_VERSION > 1
-	if (opt_load_bitcoin_conf && !(opt_scrypt || opt_benchmark))
+	if (opt_load_bitcoin_conf && !(get_mining_goal("default")->malgo->algo != POW_SHA256D || opt_benchmark))
 		add_local_gbt(total_pools);
 #endif
 	
@@ -13224,9 +13562,9 @@ int main(int argc, char *argv[])
 	} while (!pools_active);
 
 #ifdef USE_SCRYPT
-	if (detect_algo == 1 && !opt_scrypt) {
+	if (detect_algo == 1 && get_mining_goal("default")->malgo->algo != POW_SCRYPT) {
 		applog(LOG_NOTICE, "Detected scrypt algorithm");
-		opt_scrypt = true;
+		set_malgo_scrypt();
 	}
 #endif
 	detect_algo = 0;
@@ -13351,6 +13689,7 @@ begin_bench:
 		bool lagging = false;
 		struct curl_ent *ce;
 		struct work *work;
+		struct mining_algorithm *malgo = NULL;
 
 		cp = current_pool();
 
@@ -13358,16 +13697,37 @@ begin_bench:
 		max_staged += base_queue;
 
 		mutex_lock(stgd_lock);
-		ts = __total_staged();
+		ts = __total_staged(false);
 
 		if (!pool_localgen(cp) && !ts && !opt_fail_only)
 			lagging = true;
 
 		/* Wait until hash_pop tells us we need to create more work */
 		if (ts > max_staged) {
+			{
+				LL_FOREACH(mining_algorithms, malgo)
+				{
+					if (!malgo->goal_refs)
+						continue;
+					if (!malgo->base_queue)
+						continue;
+					if (malgo->staged < malgo->base_queue + opt_queue)
+					{
+						mutex_unlock(stgd_lock);
+						pool = select_pool(lagging, malgo);
+						if (pool)
+						{
+							work = make_work();
+							work->spare = true;
+							goto retry;
+						}
+					}
+				}
+				malgo = NULL;
+			}
 			staged_full = true;
 			pthread_cond_wait(&gws_cond, stgd_lock);
-			ts = __total_staged();
+			ts = __total_staged(false);
 		}
 		mutex_unlock(stgd_lock);
 
@@ -13381,11 +13741,12 @@ begin_bench:
 			cp->getfail_occasions++;
 			total_go++;
 		}
-		pool = select_pool(lagging);
+		pool = select_pool(lagging, malgo);
+
 retry:
 		if (pool->has_stratum) {
 			while (!pool->stratum_active || !pool->stratum_notify) {
-				struct pool *altpool = select_pool(true);
+				struct pool *altpool = select_pool(true, malgo);
 
 				if (altpool == pool && pool->has_stratum)
 					cgsleep_ms(5000);
@@ -13447,7 +13808,7 @@ retry:
 			push_curl_entry(ce, pool);
 			++pool->seq_getfails;
 			pool_died(pool);
-			next_pool = select_pool(!opt_fail_only);
+			next_pool = select_pool(!opt_fail_only, malgo);
 			if (pool == next_pool) {
 				applog(LOG_DEBUG, "Pool %d json_rpc_call failed on get work, retrying in 5s", pool->pool_no);
 				cgsleep_ms(5000);

+ 53 - 33
miner.h

@@ -21,6 +21,7 @@
 #include <winsock2.h>
 #endif
 
+#include <float.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <sys/time.h>
@@ -277,9 +278,10 @@ struct gpu_adl {
 
 enum pow_algorithm {
 	POW_SHA256D = 1,
+#ifdef USE_SCRYPT
 	POW_SCRYPT  = 2,
+#endif
 };
-typedef uint8_t supported_algos_t;
 
 struct api_data;
 struct thr_info;
@@ -294,15 +296,19 @@ enum bfg_probe_result_flags_values {
 extern unsigned *_bfg_probe_result_flags();
 #define bfg_probe_result_flags (*_bfg_probe_result_flags())
 
+struct mining_algorithm;
+
 struct device_drv {
 	const char *dname;
 	const char *name;
 	int8_t probe_priority;
 	bool lowl_probe_by_name_only;
-	supported_algos_t supported_algos;
 
 	// DRV-global functions
 	void (*drv_init)();
+	// drv_min_nonce_diff's proc may be NULL
+	// drv_min_nonce_diff should return negative if algorithm is not supported
+	float (*drv_min_nonce_diff)(struct cgpu_info *proc, const struct mining_algorithm *);
 	void (*drv_detect)();
 	bool (*lowl_match)(const struct lowlevel_device_info *);
 	bool (*lowl_probe)(const struct lowlevel_device_info *);
@@ -569,9 +575,6 @@ struct cgpu_info {
 
 	bool disable_watchdog;
 	bool shutdown;
-	
-	// Lowest difficulty supported for finding nonces
-	float min_nonce_diff;
 };
 
 extern void renumber_cgpu(struct cgpu_info *);
@@ -1061,6 +1064,7 @@ extern void hashmeter2(struct thr_info *);
 extern bool stale_work(struct work *, bool share);
 extern bool stale_work_future(struct work *, bool share, unsigned long ustime);
 extern void blkhashstr(char *out, const unsigned char *hash);
+static const float minimum_pdiff = max(FLT_MIN, 1./0x100000000);
 extern void set_target_to_pdiff(void *dest_target, double pdiff);
 #define bdiff_to_pdiff(n) (n * 1.0000152587)
 extern void set_target_to_bdiff(void *dest_target, double bdiff);
@@ -1093,7 +1097,9 @@ extern int enabled_pools;
 extern bool get_intrange(const char *arg, int *val1, int *val2);
 extern bool detect_stratum(struct pool *pool, char *url);
 extern void print_summary(void);
+extern struct mining_algorithm *mining_algorithm_by_alias(const char *alias);
 extern struct mining_goal_info *get_mining_goal(const char *name);
+extern void goal_set_malgo(struct mining_goal_info *, struct mining_algorithm *);
 extern void mining_goal_reset(struct mining_goal_info * const goal);
 extern void adjust_quota_gcd(void);
 extern struct pool *add_pool2(struct mining_goal_info *);
@@ -1103,28 +1109,6 @@ extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user
 #define MAX_GPUDEVICES 16
 #define MAX_DEVICES 4096
 
-#define MIN_SHA_INTENSITY -10
-#define MIN_SHA_INTENSITY_STR "-10"
-#define MAX_SHA_INTENSITY 14
-#define MAX_SHA_INTENSITY_STR "14"
-#define MIN_SCRYPT_INTENSITY 8
-#define MIN_SCRYPT_INTENSITY_STR "8"
-#define MAX_SCRYPT_INTENSITY 31
-#define MAX_SCRYPT_INTENSITY_STR "31"
-#ifdef USE_SCRYPT
-#define MIN_INTENSITY (opt_scrypt ? MIN_SCRYPT_INTENSITY : MIN_SHA_INTENSITY)
-#define MIN_INTENSITY_STR (opt_scrypt ? MIN_SCRYPT_INTENSITY_STR : MIN_SHA_INTENSITY_STR)
-#define MAX_INTENSITY (opt_scrypt ? MAX_SCRYPT_INTENSITY : MAX_SHA_INTENSITY)
-#define MAX_INTENSITY_STR (opt_scrypt ? MAX_SCRYPT_INTENSITY_STR : MAX_SHA_INTENSITY_STR)
-#define MAX_GPU_INTENSITY MAX_SCRYPT_INTENSITY
-#else
-#define MIN_INTENSITY MIN_SHA_INTENSITY
-#define MIN_INTENSITY_STR MIN_SHA_INTENSITY_STR
-#define MAX_INTENSITY MAX_SHA_INTENSITY
-#define MAX_INTENSITY_STR MAX_SHA_INTENSITY_STR
-#define MAX_GPU_INTENSITY MAX_SHA_INTENSITY
-#endif
-
 struct block_info {
 	uint32_t block_id;
 	uint8_t prevblkhash[0x20];
@@ -1142,6 +1126,32 @@ struct blockchain_info {
 	char currentblk_first_seen_time_str[0x20];  // was global blocktime
 };
 
+struct mining_algorithm {
+	const char *name;
+	const char *aliases;
+	
+	enum pow_algorithm algo;
+	uint8_t ui_skip_hash_bytes;
+	uint8_t worktime_skip_prevblk_u32;
+	float reasonable_low_nonce_diff;
+	
+	void (*hash_data_f)(void *digest, const void *data);
+	
+	int goal_refs;
+	int staged;
+	int base_queue;
+	
+	struct mining_algorithm *next;
+	
+#ifdef HAVE_OPENCL
+	bool opencl_nodefault;
+	float (*opencl_oclthreads_to_intensity)(unsigned long oclthreads);
+	unsigned long (*opencl_intensity_to_oclthreads)(float intensity);
+	unsigned long opencl_min_oclthreads;
+	unsigned long opencl_max_oclthreads;
+#endif
+};
+
 struct mining_goal_info {
 	unsigned id;
 	char *name;
@@ -1151,6 +1161,7 @@ struct mining_goal_info {
 	
 	bytes_t *generation_script;  // was opt_coinbase_script
 	
+	struct mining_algorithm *malgo;
 	double current_diff;
 	char current_diff_str[ALLOC_H2B_SHORTV];  // was global block_diff
 	char net_hashrate[ALLOC_H2B_SHORT];
@@ -1175,11 +1186,6 @@ extern bool opt_quiet;
 extern struct thr_info *control_thr;
 extern struct thr_info **mining_thr;
 extern struct cgpu_info gpus[MAX_GPUDEVICES];
-#ifdef USE_SCRYPT
-extern bool opt_scrypt;
-#else
-#define opt_scrypt (0)
-#endif
 extern double total_secs;
 extern int mining_threads;
 extern struct cgpu_info *cpus;
@@ -1213,6 +1219,7 @@ extern int opt_log_interval;
 extern unsigned long long global_hashrate;
 extern unsigned unittest_failures;
 extern double best_diff;
+extern struct mining_algorithm *mining_algorithms;
 extern struct mining_goal_info *mining_goals;
 
 struct curl_ent {
@@ -1409,7 +1416,9 @@ struct pool {
 	bool stratum_init;
 	bool stratum_notify;
 	struct stratum_work swork;
-	bool next_goalreset;
+	char *goalname;
+	char *next_goalname;
+	struct mining_algorithm *next_goal_malgo;
 	uint8_t next_target[0x20];
 	char *next_nonce1;
 	int next_n2size;
@@ -1461,6 +1470,7 @@ struct work {
 	bool		longpoll;
 	bool		stale;
 	bool		mandatory;
+	bool spare;
 	bool		block;
 
 	bool		stratum;
@@ -1597,6 +1607,16 @@ extern const char *bfg_workpadding_bin;
 extern void set_simple_ntime_roll_limit(struct ntime_roll_limits *, uint32_t ntime_base, int ntime_roll, const struct timeval *tvp_ref);
 extern void work_set_simple_ntime_roll_limit(struct work *, int ntime_roll, const struct timeval *tvp_ref);
 extern int work_ntime_range(struct work *, const struct timeval *tvp_earliest, const struct timeval *tvp_latest, int desired_roll);
+
+static inline
+struct mining_algorithm *work_mining_algorithm(const struct work * const work)
+{
+	const struct pool * const pool = work->pool;
+	const struct mining_goal_info * const goal = pool->goal;
+	struct mining_algorithm * const malgo = goal->malgo;
+	return malgo;
+}
+
 extern void work_hash(struct work *);
 
 #define NTIME_DATA_OFFSET  0x44

+ 148 - 152
ocl.c

@@ -12,6 +12,7 @@
 #ifdef HAVE_OPENCL
 
 #include <ctype.h>
+#include <limits.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdint.h>
@@ -258,6 +259,30 @@ char *file_contents(const char *filename, int *length)
 	return (char*)buffer;
 }
 
+char *opencl_kernel_source(const char * const filename, int * const out_sourcelen, enum cl_kernels * const out_kinterface)
+{
+	char *source = file_contents(filename, out_sourcelen);
+	if (!source)
+		return NULL;
+	char *s = strstr(source, "kernel-interface:"), *q;
+	if (s)
+	{
+		for (s = &s[17]; s[0] && isspace(s[0]); ++s)
+			if (s[0] == '\n' || s[0] == '\r')
+				break;
+		for (q = s; q[0] && !isspace(q[0]); ++q)
+		{}  // Find end of string
+		const size_t kinamelen = q - s;
+		char kiname[kinamelen + 1];
+		memcpy(kiname, s, kinamelen);
+		kiname[kinamelen] = '\0';
+		*out_kinterface = select_kernel(kiname);
+	}
+	else
+		*out_kinterface = KL_NONE;
+	return source;
+}
+
 extern int opt_g_threads;
 
 int clDevicesNum(void) {
@@ -339,20 +364,20 @@ int clDevicesNum(void) {
 	return most_devices;
 }
 
-cl_int bfg_clBuildProgram(_clState * const clState, const cl_device_id devid, const char * const CompilerOptions)
+cl_int bfg_clBuildProgram(cl_program * const program, const cl_device_id devid, const char * const CompilerOptions)
 {
 	cl_int status;
 	
-	status = clBuildProgram(clState->program, 1, &devid, CompilerOptions, NULL, NULL);
+	status = clBuildProgram(*program, 1, &devid, CompilerOptions, NULL, NULL);
 	
 	if (status != CL_SUCCESS)
 	{
 		applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status);
 		size_t logSize;
-		status = clGetProgramBuildInfo(clState->program, devid, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
+		status = clGetProgramBuildInfo(*program, devid, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
 		
 		char *log = malloc(logSize ?: 1);
-		status = clGetProgramBuildInfo(clState->program, devid, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
+		status = clGetProgramBuildInfo(*program, devid, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
 		if (logSize > 0 && log[0])
 			applog(LOG_ERR, "%s", log);
 		free(log);
@@ -418,18 +443,15 @@ void patch_opcodes(char *w, unsigned remaining)
 	applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched);
 }
 
-_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
+_clState *opencl_create_clState(unsigned int gpu, char *name, size_t nameSize)
 {
 	_clState *clState = calloc(1, sizeof(_clState));
-	bool patchbfi = false, prog_built = false;
-	bool ismesa = false;
 	struct cgpu_info *cgpu = &gpus[gpu];
 	struct opencl_device_data * const data = cgpu->device_data;
 	cl_platform_id platform = NULL;
 	char pbuff[256], vbuff[255];
-	char *s, *q;
+	char *s;
 	cl_platform_id* platforms;
-	cl_uint preferred_vwidth;
 	cl_device_id *devices;
 	cl_uint numPlatforms;
 	cl_uint numDevices;
@@ -472,6 +494,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(vbuff), vbuff, NULL);
 	if (status == CL_SUCCESS)
 		applog(LOG_INFO, "CL Platform version: %s", vbuff);
+	clState->platform_ver_str = strdup(vbuff);
 
 	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
 	if (status != CL_SUCCESS) {
@@ -569,12 +592,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		clState->hasOpenCL11plus = true;
 	free(devoclver);
 
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
 		return NULL;
 	}
-	applog(LOG_DEBUG, "Preferred vector width reported %d", preferred_vwidth);
+	applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth);
 
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
 	if (status != CL_SUCCESS) {
@@ -594,7 +617,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		opencl_set_intensity_from_str(cgpu, data->_init_intensity);
 	}
 	else
-		data->oclthreads = intensity_to_oclthreads(MIN_INTENSITY, !opt_scrypt);
+	{
+		data->oclthreads = 1;
+		data->intensity = INT_MIN;
+	}
 	applog(LOG_DEBUG, "Max compute units reported %u", (unsigned)clState->max_compute_units);
 	
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&data->max_alloc, NULL);
@@ -626,7 +652,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		}
 		else
 			applog(LOG_DEBUG, "Mesa OpenCL platform detected (v%ld.%ld)", major, minor);
-		ismesa = true;
+		clState->is_mesa = true;
 	}
 	
 	if (data->opt_opencl_binaries == OBU_DEFAULT)
@@ -638,7 +664,44 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		data->opt_opencl_binaries = OBU_LOADSAVE;
 #endif
 	}
+	
+	clState->devid = devices[gpu];
+	free(devices);
+	
+	/* For some reason 2 vectors is still better even if the card says
+	 * otherwise, and many cards lie about their max so use 256 as max
+	 * unless explicitly set on the command line. Tahiti prefers 1 */
+	if (strstr(name, "Tahiti"))
+		clState->preferred_vwidth = 1;
+	else
+	if (clState->preferred_vwidth > 2)
+		clState->preferred_vwidth = 2;
 
+	if (data->vwidth)
+		clState->vwidth = data->vwidth;
+	else {
+		clState->vwidth = clState->preferred_vwidth;
+		data->vwidth = clState->preferred_vwidth;
+	}
+
+	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, OPENCL_MAX_BUFFERSIZE, NULL, &status);
+	if (status != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
+		return false;
+	}
+	
+	return clState;
+}
+
+bool opencl_load_kernel(struct cgpu_info * const cgpu, _clState * const clState, const char * const name, struct opencl_kernel_info * const kernelinfo, const char * const kernel_file, __maybe_unused const struct mining_algorithm * const malgo)
+{
+	const int gpu = cgpu->device_id;
+	bool patchbfi = false, prog_built = false;
+	struct opencl_device_data * const data = cgpu->device_data;
+	const char * const vbuff = clState->platform_ver_str;
+	char *s;
+	cl_int status;
+	
 	/* Create binary filename based on parameters passed to opencl
 	 * compiler to ensure we only load a binary that matches what would
 	 * have otherwise created. The filename is:
@@ -650,45 +713,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	char filename[255];
 	char numbuf[32];
 
-	if (!data->kernel_file)
-	{
-		if (opt_scrypt) {
-			applog(LOG_INFO, "Selecting scrypt kernel");
-			clState->chosen_kernel = KL_SCRYPT;
-		}
-		else if (ismesa)
-		{
-			applog(LOG_INFO, "Selecting phatk kernel for Mesa");
-			clState->chosen_kernel = KL_PHATK;
-		} else if (!strstr(name, "Tahiti") &&
-			/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
-			(strstr(vbuff, "844.4") ||  // Linux 64 bit ATI 2.6 SDK
-			 strstr(vbuff, "851.4") ||  // Windows 64 bit ""
-			 strstr(vbuff, "831.4") ||
-			 strstr(vbuff, "898.1") ||  // 12.2 driver SDK 
-			 strstr(vbuff, "923.1") ||  // 12.4
-			 strstr(vbuff, "938.2") ||  // SDK 2.7
-			 strstr(vbuff, "1113.2"))) {// SDK 2.8
-				applog(LOG_INFO, "Selecting diablo kernel");
-				clState->chosen_kernel = KL_DIABLO;
-		/* Detect all 7970s, older ATI and NVIDIA and use poclbm */
-		} else if (strstr(name, "Tahiti") || !clState->hasBitAlign) {
-			applog(LOG_INFO, "Selecting poclbm kernel");
-			clState->chosen_kernel = KL_POCLBM;
-		/* Use phatk for the rest R5xxx R6xxx */
-		} else {
-			applog(LOG_INFO, "Selecting phatk kernel");
-			clState->chosen_kernel = KL_PHATK;
-		}
-		data->kernel_file = strdup(opencl_get_kernel_interface_name(clState->chosen_kernel));
-	}
-	
-	snprintf(filename, sizeof(filename), "%s.cl", data->kernel_file);
-	snprintf(binaryfilename, sizeof(filename), "%s", data->kernel_file);
+	snprintf(filename, sizeof(filename), "%s.cl", kernel_file);
+	snprintf(binaryfilename, sizeof(filename), "%s", kernel_file);
 	int pl;
-	char *source = file_contents(filename, &pl);
+	char *source = opencl_kernel_source(filename, &pl, &kernelinfo->interface);
 	if (!source)
-		return NULL;
+		return false;
 	{
 		uint8_t hash[0x20];
 		char hashhex[7];
@@ -696,29 +726,13 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		bin2hex(hashhex, hash, 3);
 		tailsprintf(binaryfilename, sizeof(binaryfilename), "-%s", hashhex);
 	}
-	s = strstr(source, "kernel-interface:");
-	if (s)
+	switch (kernelinfo->interface)
 	{
-		for (s = &s[17]; s[0] && isspace(s[0]); ++s)
-			if (s[0] == '\n' || s[0] == '\r')
-				break;
-		for (q = s; q[0] && !isspace(q[0]); ++q)
-		{}  // Find end of string
-		const size_t kinamelen = q - s;
-		char kiname[kinamelen + 1];
-		memcpy(kiname, s, kinamelen);
-		kiname[kinamelen] = '\0';
-		clState->chosen_kernel = select_kernel(kiname);
-	}
-	else
-	if (opt_scrypt)
-		clState->chosen_kernel = KL_SCRYPT;
-	switch (clState->chosen_kernel) {
 		case KL_NONE:
 			applog(LOG_ERR, "%s: Failed to identify kernel interface for %s",
-			       cgpu->dev_repr, data->kernel_file);
+			       cgpu->dev_repr, kernel_file);
 			free(source);
-			return NULL;
+			return false;
 		case KL_PHATK:
 			if ((strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
 			     strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
@@ -733,47 +747,30 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			;
 	}
 	applog(LOG_DEBUG, "%s: Using kernel %s with interface %s",
-	       cgpu->dev_repr, data->kernel_file,
-	       opencl_get_kernel_interface_name(clState->chosen_kernel));
+	       cgpu->dev_repr, kernel_file,
+	       opencl_get_kernel_interface_name(kernelinfo->interface));
 
-	/* For some reason 2 vectors is still better even if the card says
-	 * otherwise, and many cards lie about their max so use 256 as max
-	 * unless explicitly set on the command line. Tahiti prefers 1 */
-	if (strstr(name, "Tahiti"))
-		preferred_vwidth = 1;
-	else if (preferred_vwidth > 2)
-		preferred_vwidth = 2;
-
-	if (data->vwidth)
-		clState->vwidth = data->vwidth;
-	else {
-		clState->vwidth = preferred_vwidth;
-		data->vwidth = preferred_vwidth;
-	}
-
-	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
-		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
-			clState->goffset = true;
+	if (((kernelinfo->interface == KL_POCLBM || kernelinfo->interface == KL_DIABLO || kernelinfo->interface == KL_DIAKGCN || kernelinfo->interface) && clState->vwidth == 1 && clState->hasOpenCL11plus) || kernelinfo->interface == KL_SCRYPT)
+		kernelinfo->goffset = true;
 
 	if (data->work_size && data->work_size <= clState->max_work_size)
-		clState->wsize = data->work_size;
-	else if (opt_scrypt)
-		clState->wsize = 256;
-	else if (strstr(name, "Tahiti"))
-		clState->wsize = 64;
+		kernelinfo->wsize = data->work_size;
+	else
+#ifdef USE_SCRYPT
+	if (malgo->algo == POW_SCRYPT)
+		kernelinfo->wsize = 256;
+	else
+#endif
+	if (strstr(name, "Tahiti"))
+		kernelinfo->wsize = 64;
 	else
-		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-	data->work_size = clState->wsize;
+		kernelinfo->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
 
 #ifdef USE_SCRYPT
-	if (opt_scrypt) {
-		if (!data->opt_lg) {
-			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
-			data->lookup_gap = 2;
-		} else
-			data->lookup_gap = data->opt_lg;
-
-		if (!data->opt_tc) {
+	if (kernelinfo->interface == KL_SCRYPT)
+	{
+		if (!data->thread_concurrency)
+		{
 			unsigned int sixtyfours;
 
 			sixtyfours =  data->max_alloc / 131072 / 64 - 1;
@@ -784,8 +781,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 					data->thread_concurrency = data->shaders * 5;
 			}
 			applog(LOG_DEBUG, "GPU %u: selecting thread concurrency of %lu", gpu,  (unsigned long)data->thread_concurrency);
-		} else
-			data->thread_concurrency = data->opt_tc;
+		}
 	}
 #endif
 
@@ -800,27 +796,30 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	binary_sizes = calloc(sizeof(size_t) * MAX_GPUDEVICES * 4, 1);
 	if (unlikely(!binary_sizes)) {
 		applog(LOG_ERR, "Unable to calloc binary_sizes");
-		return NULL;
+		return false;
 	}
 	binaries = calloc(sizeof(char *) * MAX_GPUDEVICES * 4, 1);
 	if (unlikely(!binaries)) {
 		applog(LOG_ERR, "Unable to calloc binaries");
-		return NULL;
+		return false;
 	}
 
 	strcat(binaryfilename, name);
-	if (clState->goffset)
+	if (kernelinfo->goffset)
 		strcat(binaryfilename, "g");
-	if (opt_scrypt) {
 #ifdef USE_SCRYPT
+	if (kernelinfo->interface == KL_SCRYPT)
+	{
 		sprintf(numbuf, "lg%utc%u", data->lookup_gap, (unsigned int)data->thread_concurrency);
 		strcat(binaryfilename, numbuf);
+	}
+	else
 #endif
-	} else {
+	{
 		sprintf(numbuf, "v%d", clState->vwidth);
 		strcat(binaryfilename, numbuf);
 	}
-	sprintf(numbuf, "w%d", (int)clState->wsize);
+	sprintf(numbuf, "w%d", (int)kernelinfo->wsize);
 	strcat(binaryfilename, numbuf);
 	sprintf(numbuf, "l%d", (int)sizeof(long));
 	strcat(binaryfilename, numbuf);
@@ -852,7 +851,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		if (unlikely(!binaries[slot])) {
 			applog(LOG_ERR, "Unable to calloc binaries");
 			fclose(binaryfile);
-			return NULL;
+			return false;
 		}
 
 		if (fread(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot]) {
@@ -862,7 +861,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			goto build;
 		}
 
-		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)binaries, &status, NULL);
+		kernelinfo->program = clCreateProgramWithBinary(clState->context, 1, &clState->devid, &binary_sizes[slot], (const unsigned char **)binaries, &status, NULL);
 		if (status != CL_SUCCESS) {
 			applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
 			fclose(binaryfile);
@@ -881,26 +880,26 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	/////////////////////////////////////////////////////////////////
 
 build:
-	clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
+	kernelinfo->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithSource)", status);
-		return NULL;
+		return false;
 	}
 
 	/* create a cl program executable for all the devices specified */
 	char *CompilerOptions = calloc(1, 256);
 
 #ifdef USE_SCRYPT
-	if (opt_scrypt)
+	if (kernelinfo->interface == KL_SCRYPT)
 		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
-			data->lookup_gap, (unsigned int)data->thread_concurrency, (int)clState->wsize);
+			data->lookup_gap, (unsigned int)data->thread_concurrency, (int)kernelinfo->wsize);
 	else
 #endif
 	{
 		sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
-			(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
+			(int)kernelinfo->wsize, clState->vwidth, (int)kernelinfo->wsize * clState->vwidth);
 	}
-	applog(LOG_DEBUG, "Setting worksize to %"PRId64, (int64_t)clState->wsize);
+	applog(LOG_DEBUG, "Setting worksize to %"PRId64, (int64_t)kernelinfo->wsize);
 	if (clState->vwidth > 1)
 		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
 
@@ -945,34 +944,34 @@ build:
 	} else
 		applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch");
 
-	if (clState->goffset)
+	if (kernelinfo->goffset)
 		strcat(CompilerOptions, " -D GOFFSET");
 
 	if (!clState->hasOpenCL11plus)
 		strcat(CompilerOptions, " -D OCL1");
 
 	applog(LOG_DEBUG, "CompilerOptions: %s", CompilerOptions);
-	status = bfg_clBuildProgram(clState, devices[gpu], CompilerOptions);
+	status = bfg_clBuildProgram(&kernelinfo->program, clState->devid, CompilerOptions);
 	free(CompilerOptions);
 
 	if (status != CL_SUCCESS)
-		return NULL;
+		return false;
 
 	prog_built = true;
 	
 	if (!(data->opt_opencl_binaries & OBU_SAVE))
 		goto built;
 
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL);
+	status = clGetProgramInfo(kernelinfo->program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_NUM_DEVICES. (clGetProgramInfo)", status);
-		return NULL;
+		return false;
 	}
 
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*cpnd, binary_sizes, NULL);
+	status = clGetProgramInfo(kernelinfo->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*cpnd, binary_sizes, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetProgramInfo)", status);
-		return NULL;
+		return false;
 	}
 
 	/* The actual compiled binary ends up in a RANDOM slot! Grr, so we have
@@ -987,13 +986,13 @@ build:
 	       gpu, (unsigned)slot, (int64_t)binary_sizes[slot]);
 	if (!binary_sizes[slot]) {
 		applog(LOG_ERR, "OpenCL compiler generated a zero sized binary, FAIL!");
-		return NULL;
+		return false;
 	}
 	binaries[slot] = calloc(sizeof(char) * binary_sizes[slot], 1);
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARIES, sizeof(char *) * cpnd, binaries, NULL );
+	status = clGetProgramInfo(kernelinfo->program, CL_PROGRAM_BINARIES, sizeof(char *) * cpnd, binaries, NULL );
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error %d: Getting program info. CL_PROGRAM_BINARIES (clGetProgramInfo)", status);
-		return NULL;
+		return false;
 	}
 
 	/* Patch the kernel if the hardware supports BFI_INT but it needs to
@@ -1030,16 +1029,16 @@ build:
 			w, remaining);
 		patch_opcodes(w, length);
 
-		status = clReleaseProgram(clState->program);
+		status = clReleaseProgram(kernelinfo->program);
 		if (status != CL_SUCCESS) {
 			applog(LOG_ERR, "Error %d: Releasing program. (clReleaseProgram)", status);
-			return NULL;
+			return false;
 		}
 
-		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)&binaries[slot], &status, NULL);
+		kernelinfo->program = clCreateProgramWithBinary(clState->context, 1, &clState->devid, &binary_sizes[slot], (const unsigned char **)&binaries[slot], &status, NULL);
 		if (status != CL_SUCCESS) {
 			applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
-			return NULL;
+			return false;
 		}
 
 		/* Program needs to be rebuilt */
@@ -1056,7 +1055,7 @@ build:
 	} else {
 		if (unlikely(fwrite(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot])) {
 			applog(LOG_ERR, "Unable to fwrite to binaryfile");
-			return NULL;
+			return false;
 		}
 		fclose(binaryfile);
 	}
@@ -1067,27 +1066,28 @@ built:
 	free(binary_sizes);
 
 	applog(LOG_INFO, "Initialising kernel %s with%s bitalign, %"PRId64" vectors and worksize %"PRIu64,
-	       filename, clState->hasBitAlign ? "" : "out", (int64_t)clState->vwidth, (uint64_t)clState->wsize);
+	       filename, clState->hasBitAlign ? "" : "out", (int64_t)clState->vwidth, (uint64_t)kernelinfo->wsize);
 
 	if (!prog_built) {
 		/* create a cl program executable for all the devices specified */
-		status = bfg_clBuildProgram(clState, devices[gpu], NULL);
+		status = bfg_clBuildProgram(&kernelinfo->program, clState->devid, NULL);
 		if (status != CL_SUCCESS)
-			return NULL;
+			return false;
 	}
 
 	/* get a kernel object handle for a kernel with the given name */
-	clState->kernel = clCreateKernel(clState->program, "search", &status);
+	kernelinfo->kernel = clCreateKernel(kernelinfo->program, "search", &status);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Creating Kernel from program. (clCreateKernel)", status);
-		return NULL;
+		return false;
 	}
 	
 	free((void*)cgpu->kname);
-	cgpu->kname = strdup(data->kernel_file);
+	cgpu->kname = strdup(kernel_file);
 
 #ifdef USE_SCRYPT
-	if (opt_scrypt) {
+	if (kernelinfo->interface == KL_SCRYPT && !clState->padbufsize)
+	{
 		size_t ipt = (1024 / data->lookup_gap + (1024 % data->lookup_gap > 0));
 		size_t bufsize = 128 * ipt * data->thread_concurrency;
 
@@ -1107,24 +1107,20 @@ built:
 		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
 		if (status != CL_SUCCESS && !clState->padbuffer8) {
 			applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
-			return NULL;
+			return false;
 		}
 
 		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
 		if (status != CL_SUCCESS) {
 			applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
-			return NULL;
+			return false;
 		}
-		clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, SCRYPT_BUFFERSIZE, NULL, &status);
-	} else
-#endif
-	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
-		return NULL;
 	}
+#endif
 
-	return clState;
+	kernelinfo->loaded = true;
+	return true;
 }
+
 #endif /* HAVE_OPENCL */
 

+ 29 - 8
ocl.h

@@ -11,11 +11,32 @@
 
 #include "miner.h"
 
-typedef struct {
-	cl_context context;
+struct opencl_kernel_info;
+typedef struct _clState _clState;
+
+typedef cl_int (*queue_kernel_parameters_func_t)(const struct opencl_kernel_info *, _clState *, struct work *, cl_uint);
+
+struct opencl_kernel_info {
+	bool loaded;
+	cl_program program;
 	cl_kernel kernel;
+	bool goffset;
+	enum cl_kernels interface;
+	size_t wsize;
+	queue_kernel_parameters_func_t queue_kernel_parameters;
+};
+
+struct _clState {
+	cl_device_id devid;
+	char *platform_ver_str;
+	bool is_mesa;
+	
+	cl_context context;
 	cl_command_queue commandQueue;
-	cl_program program;
+	
+	struct opencl_kernel_info kernel_sha256d;
+	struct opencl_kernel_info kernel_scrypt;
+	
 	cl_mem outputBuffer;
 #ifdef USE_SCRYPT
 	cl_mem CLbuffer0;
@@ -25,17 +46,17 @@ typedef struct {
 #endif
 	bool hasBitAlign;
 	bool hasOpenCL11plus;
-	bool goffset;
+	cl_uint preferred_vwidth;
 	cl_uint vwidth;
 	size_t max_work_size;
-	size_t wsize;
 	cl_uint max_compute_units;
-	enum cl_kernels chosen_kernel;
-} _clState;
+};
 
 extern FILE *opencl_open_kernel(const char *filename);
 extern char *file_contents(const char *filename, int *length);
+extern char *opencl_kernel_source(const char *filename, int *out_sourcelen, enum cl_kernels *out_kinterface);
 extern int clDevicesNum(void);
-extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize);
+extern _clState *opencl_create_clState(unsigned int gpu, char *name, size_t nameSize);
+extern bool opencl_load_kernel(struct cgpu_info *, _clState *clState, const char *name, struct opencl_kernel_info *, const char *kernel_file, const struct mining_algorithm *);
 #endif /* HAVE_OPENCL */
 #endif /* __OCL_H__ */

+ 3 - 3
scrypt.c

@@ -489,15 +489,15 @@ void scrypt_regenhash(struct work *work)
 }
 
 /* Used by test_nonce functions */
-void scrypt_hash_data(unsigned char * const out_hash, const unsigned char * const pdata)
+void scrypt_hash_data(void * const out_hash, const void * const pdata)
 {
 	uint32_t data[20], ohash[8];
 	char *scratchbuf;
 
-	be32enc_vect(data, (const uint32_t *)pdata, 20);
+	be32enc_vect(data, pdata, 20);
 	scratchbuf = alloca(SCRATCHBUF_SIZE);
 	scrypt_1024_1_1_256_sp(data, scratchbuf, ohash);
-	swap32tobe((void*)out_hash, ohash, 32/4);
+	swap32tobe(out_hash, ohash, 32/4);
 }
 
 bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,

+ 1 - 1
scrypt.h

@@ -7,7 +7,7 @@
 
 #ifdef USE_SCRYPT
 extern void test_scrypt(void);
-extern void scrypt_hash_data(unsigned char *out_hash, const unsigned char *data);
+extern void scrypt_hash_data(void *digest, const void *data);
 extern void scrypt_regenhash(struct work *work);
 
 #else /* USE_SCRYPT */

+ 93 - 12
util.c

@@ -790,7 +790,7 @@ char *ucs2_to_utf8_dup(uint16_t * const in, size_t sz)
 	return out;
 }
 
-void hash_data(unsigned char *out_hash, const unsigned char *data)
+void hash_data(void *out_hash, const void *data)
 {
 	unsigned char blkheader[80];
 	
@@ -1957,6 +1957,17 @@ bool isCalpha(const int c)
 	return false;
 }
 
+bool match_strtok(const char * const optlist, const char * const delim, const char * const needle)
+{
+	const size_t optlist_sz = strlen(optlist) + 1;
+	char opts[optlist_sz];
+	memcpy(opts, optlist, optlist_sz);
+	for (char *el, *nextptr, *s = opts; (el = strtok_r(s, delim, &nextptr)); s = NULL)
+		if (!strcasecmp(el, needle))
+			return true;
+	return false;
+}
+
 static
 bool _appdata_file_call(const char * const appname, const char * const filename, const appdata_file_callback_t cb, void * const userp, const char * const path)
 {
@@ -2553,11 +2564,18 @@ static bool parse_notify(struct pool *pool, json_t *val)
 	pool->submit_old = !clean;
 	pool->swork.clean = true;
 	
-	if (pool->next_goalreset)
+	// stratum_set_goal ensures these are the same pointer if they match
+	if (pool->goalname != pool->next_goalname)
 	{
-		pool->next_goalreset = false;
+		free(pool->goalname);
+		pool->goalname = pool->next_goalname;
 		mining_goal_reset(pool->goal);
 	}
+	if (pool->next_goal_malgo)
+	{
+		goal_set_malgo(pool->goal, pool->next_goal_malgo);
+		pool->next_goal_malgo = NULL;
+	}
 	
 	if (pool->next_nonce1)
 	{
@@ -2635,6 +2653,8 @@ out:
 
 static bool parse_diff(struct pool *pool, json_t *val)
 {
+	const struct mining_goal_info * const goal = pool->goal;
+	const struct mining_algorithm * const malgo = goal->malgo;
 	double diff;
 
 	diff = json_number_value(json_array_get(val, 0));
@@ -2648,7 +2668,7 @@ static bool parse_diff(struct pool *pool, json_t *val)
 		diff = bdiff_to_pdiff(diff);
 	}
 	
-	if ((!opt_scrypt) && diff < 1 && diff > 0.999)
+	if (malgo->algo == POW_SHA256D && diff < 1 && diff > 0.999)
 		diff = 1;
 	
 #ifdef USE_SCRYPT
@@ -2666,7 +2686,7 @@ static bool parse_diff(struct pool *pool, json_t *val)
 	// Diff 16 at 1.15 Gh/s = 1 share / 60s
 	// Diff 16 at 7.00 Gh/s = 1 share / 10s
 
-	if (opt_scrypt && (diff >= minimum_broken_scrypt_diff))
+	if (malgo->algo == POW_SCRYPT && (diff >= minimum_broken_scrypt_diff))
 		diff /= broken_scrypt_diff_multiplier;
 #endif
 
@@ -2751,7 +2771,63 @@ bool stratum_set_goal(struct pool * const pool, json_t * const val, json_t * con
 	if (!uri_get_param_bool(pool->rpc_url, "goalreset", false))
 		return false;
 	
-	pool->next_goalreset = true;
+	const char * const new_goalname = __json_array_string(params, 0);
+	struct mining_algorithm *new_malgo = NULL;
+	const char *emsg = NULL;
+	
+	if (json_is_array(params) && json_array_size(params) > 1)
+	{
+		json_t * const j_goaldesc = json_array_get(params, 1);
+		if (json_is_object(j_goaldesc))
+		{
+			json_t * const j_malgo = json_object_get(j_goaldesc, "malgo");
+			if (j_malgo && json_is_string(j_malgo))
+			{
+				const char * const newvalue = json_string_value(j_malgo);
+				new_malgo = mining_algorithm_by_alias(newvalue);
+				// Even if it's the current malgo, we should reset next_goal_malgo in case of a prior set_goal
+				if (new_malgo == pool->goal->malgo)
+				{}  // Do nothing, assignment takes place below
+				if (new_malgo && uri_get_param_bool(pool->rpc_url, "change_goal_malgo", false))
+				{}  // Do nothing, assignment takes place below
+				else
+				{
+					emsg = "Mining algorithm not supported";
+					// Ignore even the goal name, if we are failing
+					goto out;
+				}
+				if (new_malgo == pool->goal->malgo)
+					new_malgo = NULL;
+			}
+		}
+	}
+	
+	// Even if the goal name is not changing, we need to adopt and configuration change
+	pool->next_goal_malgo = new_malgo;
+	
+	if (pool->next_goalname && pool->next_goalname != pool->goalname)
+		free(pool->next_goalname);
+	
+	// This compares goalname to new_goalname, but matches NULL correctly :)
+	if (pool->goalname ? !strcmp(pool->goalname, new_goalname) : !new_goalname)
+		pool->next_goalname = pool->goalname;
+	else
+		pool->next_goalname = maybe_strdup(new_goalname);
+	
+out: ;
+	json_t * const j_id = json_object_get(val, "id");
+	if (j_id && !json_is_null(j_id))
+	{
+		char * const idstr = json_dumps_ANY(j_id, 0);
+		char buf[0x80];
+		if (unlikely(emsg))
+			snprintf(buf, sizeof(buf), "{\"id\":%s,\"result\":true,\"error\":null}", idstr);
+		else
+			snprintf(buf, sizeof(buf), "{\"id\":%s,\"result\":null,\"error\":[-1,\"%s\",null]}", idstr, emsg);
+		free(idstr);
+		stratum_send(pool, buf, strlen(buf));
+	}
+	
 	return true;
 }
 
@@ -2921,7 +2997,7 @@ bool parse_method(struct pool *pool, char *s)
 		goto out;
 	}
 	
-	// Usage: mining.set_goal("goal name", [reserved])
+	// Usage: mining.set_goal("goal name", {"malgo":"SHA256d", ...})
 	if (!strncasecmp(buf, "mining.set_goal", 15) && stratum_set_goal(pool, val, params))
 		return_via(out, ret = true);
 	
@@ -3199,6 +3275,16 @@ resend:
 		recvd = true;
 	}
 	
+	if (uri_get_param_bool(pool->rpc_url, "goalreset", false))
+	{
+		// Default: ["notify", "set_difficulty"] (but these must be explicit if mining.capabilities is used)
+		sprintf(s, "{\"id\":null,\"method\":\"mining.capabilities\",\"params\":[[\"notify\",\"set_difficulty\",\"set_goal\"]");
+		if (request_target_str)
+			tailsprintf(s, sizeof(s), ", {\"suggested_target\":\"%s\"}", request_target_str);
+		tailsprintf(s, sizeof(s), "]}");
+		_stratum_send(pool, s, strlen(s), true);
+	}
+	
 	if (noresume) {
 		sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": []}", swork_id++);
 	} else {
@@ -3302,11 +3388,6 @@ out:
 			sprintf(s, "{\"id\": \"xnsub\", \"method\": \"mining.extranonce.subscribe\", \"params\": []}");
 			_stratum_send(pool, s, strlen(s), true);
 		}
-		if (uri_get_param_bool(pool->rpc_url, "goalreset", false))
-		{
-			sprintf(s, "{\"id\": \"goalsub\", \"method\": \"mining.goal.subscribe\", \"params\": []}");
-			_stratum_send(pool, s, strlen(s), true);
-		}
 	} else {
 		if (recvd)
 		{

+ 3 - 1
util.h

@@ -125,6 +125,8 @@ bool isCspace(int c)
 	}
 }
 
+extern bool match_strtok(const char *optlist, const char *delim, const char *needle);
+
 typedef bool (*appdata_file_callback_t)(const char *, void *);
 extern bool appdata_file_call(const char *appname, const char *filename, appdata_file_callback_t, void *userp);
 extern char *appdata_file_find_first(const char *appname, const char *filename);
@@ -192,7 +194,7 @@ extern char *ucs2_to_utf8_dup(uint16_t *in, size_t sz);
 }while(0)
 
 extern void gen_hash(unsigned char *data, unsigned char *hash, int len);
-extern void hash_data(unsigned char *out_hash, const unsigned char *data);
+extern void hash_data(void *digest, const void *data);
 extern void real_block_target(unsigned char *target, const unsigned char *data);
 extern bool hash_target_check(const unsigned char *hash, const unsigned char *target);
 extern bool hash_target_check_v(const unsigned char *hash, const unsigned char *target);