Browse Source

Merge branch 'master' of git://github.com/ckolivas/cgminer.git

Paul Sheppard 13 years ago
parent
commit
a86b592c74
22 changed files with 856 additions and 353 deletions
  1. 7 0
      API-README
  2. 74 2
      FPGA-README
  3. 57 0
      NEWS
  4. 1 1
      README
  5. 10 1
      api.c
  6. 131 74
      cgminer.c
  7. 22 24
      configure.ac
  8. 1 6
      diablo121016.cl
  9. 1 6
      diakgcn121016.cl
  10. 13 18
      driver-icarus.c
  11. 256 100
      driver-modminer.c
  12. 20 52
      driver-opencl.c
  13. 93 0
      fpgautils.c
  14. 43 0
      fpgautils.h
  15. 13 11
      miner.h
  16. 6 1
      ocl.c
  17. 1 6
      phatk121016.cl
  18. 1 6
      poclbm121016.cl
  19. 17 7
      scrypt.c
  20. 8 0
      scrypt.h
  21. 1 6
      scrypt121016.cl
  22. 80 32
      util.c

+ 7 - 0
API-README

@@ -383,6 +383,13 @@ miner.php - an example web page to access the API
 Feature Changelog for external applications using the API:
 
 
+API V1.20
+
+Modified API commands:
+ 'pools' - add 'Has Stratum', 'Stratum Active', 'Stratum URL'
+
+----------
+
 API V1.19 (cgminer v2.7.6)
 
 Added API commands:

+ 74 - 2
FPGA-README

@@ -2,7 +2,78 @@
 This README contains extended details about FPGA mining with cgminer
 
 
-Bitforce
+ModMinerQuad (MMQ)
+------------------
+
+The mining bitstream does not survive a power cycle, so cgminer will upload
+it, if it needs to, before it starts mining
+
+-
+
+You must make sure you have an approriate firmware in your MMQ
+Read here for official details of changing the firmware:
+ http://wiki.btcfpga.com/index.php?title=Firmware
+
+The basics of changing the firmware are:
+ You need two short pieces of conductive wire if your MMQ doesn't have
+ buttons on the "RESET" and "ISP" pads on the backplane board
+ Cutting a small (metal) paper-clip in half works well for this
+
+ Join the 2 left pads of the "RESET" pad with wire and the led will dim
+ Without disconnecting the "RESET", join the 2 left pads of the "ISP" pad
+ with a wire and it will stay dim
+ Release "RESET" then release "ISP" and is should still be dim
+ Unplug the USB and when you plug it back in it will show up as a mass
+ storage device
+  Linux: (as one single line):
+   mcopy -i /dev/disk/by-id/usb-NXP_LPC134X_IFLASH_ISP000000000-0:0
+      modminer091012.bin ::/firmware.bin
+  Windows: delete the MSD device file firmware.bin and copy in the new one
+   rename the new file and put it under the same name 'firmware.bin'
+ Disconnect the USB correctly (so writes are flushed first)
+ Join and then disconnect "RESET" and then plug the USB back in and it's done
+
+Best to update to one of the latest 2 listed below if you don't already
+have one of them in your MMQ
+
+The current latest different firmware are:
+
+ Latest for support of normal or TLM bitstream:
+  http://btcfpga.com/files/firmware/modminer092612-TLM.bin
+
+ Latest with only normal bitstream support (Temps/HW Fix):
+  http://btcfpga.com/files/firmware/modminer091012.bin
+
+The code is currently tested on the modminer091012.bin firmware.
+This comment will be updated when others have been tested
+
+-
+
+On many linux distributions there is an app called modem-manager that
+may cause problems when it is enabled, due to opening the MMQ device
+and writing to it
+
+The problem will typically present itself by the flashing led on the
+backplane going out (no longer flashing) and it takes a power cycle to
+re-enable the MMQ firmware - which then can lead to the problem happening
+again
+
+You can either disable/uninstall modem-manager if you don't need it or:
+a (hack) solution to this is to blacklist the MMQ USB device in
+/lib/udev/rules.d/77-mm-usb-device-blacklist.rules
+
+Adding 2 lines like this (just above APC) should help
+# MMQ
+ATTRS{idVendor}=="ifc9", ATTRS{idProduct}=="0003", ENV{ID_MM_DEVICE_IGNORE}="1"
+
+The change will be lost and need to be re-done, next time you update the
+modem-manager software
+
+TODO: check that all MMQ's have the same product ID
+
+
+Bitforce (BFL)
+--------------
 
 --bfl-range         Use nonce range on bitforce devices if supported
 
@@ -37,7 +108,8 @@ the MH/s value reported with the changed firmware - and the MH/s reported
 will be less than the firmware speed since you lose work on every block change.
 
 
-Icarus
+Icarus (ICA)
+------------
 
 There are two hidden options in cgminer when Icarus support is compiled in:
 

+ 57 - 0
NEWS

@@ -1,3 +1,60 @@
+Version 2.8.5 - October 23, 2012
+
+- Handle crash exceptions by trying to restart cgminer unless the --no-restart
+option is used.
+- Switch queued count when choosing a different pool from a failed stratum pool
+in getwork thread.
+- Put a mandatory 5s wait between reattempting a getwork on failure to avoid
+hammering requests.
+- The ATI stream / AMD APP SDK environment variables appear to only interfere
+with win32 builds so bypass them.
+- Make sure to check pool stratum curl exists under lock before attempting any
+recv to not risk dereferencing upon attempting to reinitiate stratum.
+- Avoid redefining macros and align to 4 byte boundaries.
+- API - add Stratum information to pools
+- update FPGA-README for MMQ
+
+
+Version 2.8.4 - October 18, 2012
+
+- Time for dynamic is in microseconds, not ms.
+- x86_64 builds of mingw32 are not supported directly and should just configure
+as generic mingw32 builds since they're NOT 64 bit.
+- Cope with both ATI stream and AMD APP SDK roots being set when building.
+- Use 3 significant digits when suffix string is used and values are >1000.
+- MMQ new initialisation (that works) and clocking control
+- Get rid of unused warning for !scrypt.
+- Use select on stratum send to make sure the socket is writeable.
+- Cope with dval being zero in suffix_string and display a single decimal place
+when significant digits is not specified but the value is greater than 1000.
+- Pad out the suffix string function with zeroes on the right.
+- Failure to calloc in bin2hex is a fatal failure always so just check for that
+failure within the function and abort, simplifying the rest of the code.
+- Provide locking around the change of the stratum curl structures to avoid
+possible races.
+- Bump opencl kernel version numbers.
+- Remove atomic ops from opencl kernels given rarity of more than once nonce on
+the same wavefront and the potential increased ramspeed requirements to use the
+atomics.
+- Clear the pool idle flag in stratum when it comes back to life.
+- Display correct share hash and share difficulty with scrypt mining.
+- Use explicit host to BE functions in scrypt code instead of hard coding
+byteswap everywhere.
+- Show work target diff for scrypt mining.
+- Ease the checking on allocation of padbuffer8 in the hope it works partially
+anyway on an apparently failed call.
+- Watch for buffer overflows on receiving data into the socket buffer.
+- Round target difficulties down to be in keeping with the rounding of detected
+share difficulties.
+- Dramatically simplify the dynamic intensity calculation by oversampling many
+runs through the opencl kernel till we're likely well within the timer
+resolution on windows.
+- String alignment to 4 byte boundaries and optimisations for bin<->hex
+conversions.
+- In opencl_free_work, make sure to still flush results in dynamic mode.
+- Align static arrays to 4 byte boundaries to appease ARM builds for stratum.
+
+
 Version 2.8.3 - October 12, 2012
 
 - Left align values that are suffix_string generated.

+ 1 - 1
README

@@ -153,6 +153,7 @@ Options for both config file and command line:
 --monitor|-m <arg>  Use custom pipe cmd for output messages
 --net-delay         Impose small delays in networking to not overload slow routers
 --no-pool-disable   Do not automatically disable pools that continually reject shares
+--no-restart        Do not attempt to restart GPUs that hang or cgminer if it crashes
 --no-submit-stale   Don't submit shares if they are detected as stale
 --pass|-p <arg>     Password for bitcoin JSON-RPC server
 --per-device-stats  Force verbose mode and output per-device statistics
@@ -203,7 +204,6 @@ GPU only options:
 --intensity|-I <arg> Intensity of GPU scanning (d or -10 -> 10, default: d to maintain desktop interactivity)
 --kernel|-k <arg>   Override kernel to use (diablo, poclbm, phatk or diakgcn) - one value or comma separated
 --ndevs|-n          Enumerate number of detected GPUs and exit
---no-restart        Do not attempt to restart GPUs that hang
 --temp-hysteresis <arg> Set how much the temperature can fluctuate outside limits when automanaging speeds (default: 3)
 --temp-overheat <arg> Overheat temperature when automatically managing fan and GPU speeds (default: 85)
 --temp-target <arg> Target temperature when automatically managing fan and GPU speeds (default: 75)

+ 10 - 1
api.c

@@ -131,7 +131,7 @@ static const char SEPARATOR = '|';
 #define SEPSTR "|"
 static const char GPUSEP = ',';
 
-static const char *APIVERSION = "1.19";
+static const char *APIVERSION = "1.20";
 static const char *DEAD = "Dead";
 #if defined(HAVE_OPENCL) || defined(HAVE_AN_FPGA)
 static const char *SICK = "Sick";
@@ -1810,6 +1810,9 @@ static void poolstatus(__maybe_unused SOCKETTYPE c, __maybe_unused char *param,
 	for (i = 0; i < total_pools; i++) {
 		struct pool *pool = pools[i];
 
+		if (pool->removed)
+			continue;
+
 		switch (pool->enabled) {
 			case POOL_DISABLED:
 				status = (char *)DISABLED;
@@ -1859,6 +1862,12 @@ static void poolstatus(__maybe_unused SOCKETTYPE c, __maybe_unused char *param,
 		root = api_add_diff(root, "Difficulty Rejected", &(pool->diff_rejected), false);
 		root = api_add_diff(root, "Difficulty Stale", &(pool->diff_stale), false);
 		root = api_add_diff(root, "Last Share Difficulty", &(pool->last_share_diff), false);
+		root = api_add_bool(root, "Has Stratum", &(pool->has_stratum), false);
+		root = api_add_bool(root, "Stratum Active", &(pool->stratum_active), false);
+		if (pool->stratum_active)
+			root = api_add_escape(root, "Stratum URL", pool->stratum_url, false);
+		else
+			root = api_add_const(root, "Stratum URL", BLANK, false);
 
 		if (isjson && (i > 0))
 			strcat(io_buffer, COMMA);

+ 131 - 74
cgminer.c

@@ -47,6 +47,7 @@
 #include "driver-cpu.h"
 #include "driver-opencl.h"
 #include "bench_block.h"
+#include "scrypt.h"
 
 #if defined(unix)
 	#include <errno.h>
@@ -260,7 +261,7 @@ static int include_count;
 
 bool ping = true;
 
-struct sigaction termhandler, inthandler;
+struct sigaction termhandler, inthandler, segvhandler, bushandler, illhandler;
 
 struct thread_q *getq;
 
@@ -378,25 +379,8 @@ static void sharelog(const char*disposition, const struct work*work)
 	pool = work->pool;
 	t = (unsigned long int)(work->tv_work_found.tv_sec);
 	target = bin2hex(work->target, sizeof(work->target));
-	if (unlikely(!target)) {
-		applog(LOG_ERR, "sharelog target OOM");
-		return;
-	}
-
 	hash = bin2hex(work->hash, sizeof(work->hash));
-	if (unlikely(!hash)) {
-		free(target);
-		applog(LOG_ERR, "sharelog hash OOM");
-		return;
-	}
-
 	data = bin2hex(work->data, sizeof(work->data));
-	if (unlikely(!data)) {
-		free(target);
-		free(hash);
-		applog(LOG_ERR, "sharelog data OOM");
-		return;
-	}
 
 	// timestamp,disposition,target,pool,dev,thr,sharehash,sharedata
 	rv = snprintf(s, sizeof(s), "%lu,%s,%s,%s,%s%u,%u,%s,%s\n", t, disposition, target, pool->rpc_url, cgpu->api->name, cgpu->device_id, thr_id, hash, data);
@@ -991,7 +975,7 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--no-restart",
 			opt_set_invbool, &opt_restart,
 #ifdef HAVE_OPENCL
-			"Do not attempt to restart GPUs that hang"
+			"Do not attempt to restart GPUs that hang or cgminer if it crashes"
 #else
 			opt_hidden
 #endif
@@ -1487,6 +1471,7 @@ static void suffix_string(uint64_t val, char *buf, int sigdigits)
 	const uint64_t peta = 1000000000000000ull;
 	const uint64_t exa  = 1000000000000000000ull;
 	char suffix[2] = "";
+	bool decimal = true;
 	double dval;
 
 	if (val >= exa) {
@@ -1512,13 +1497,23 @@ static void suffix_string(uint64_t val, char *buf, int sigdigits)
 	} else if (val >= kilo) {
 		dval = (double)val / dkilo;
 		sprintf(suffix, "K");
-	} else
+	} else {
 		dval = val;
+		decimal = false;
+	}
 
-	if (!sigdigits)
-		sprintf(buf, "%d%s", (unsigned int)dval, suffix);
-	else
-		sprintf(buf, "%-*.*g%s", sigdigits + 1, sigdigits, dval, suffix);
+	if (!sigdigits) {
+		if (decimal)
+			sprintf(buf, "%.3g%s", dval, suffix);
+		else
+			sprintf(buf, "%d%s", (unsigned int)dval, suffix);
+	} else {
+		/* Always show sigdigits + 1, padded on right with zeroes
+		 * followed by suffix */
+		int ndigits = sigdigits - 1 - (dval > 0.0 ? floor(log10(dval)) : 0);
+
+		sprintf(buf, "%*.*f%s", sigdigits + 1, ndigits, dval, suffix);
+	}
 }
 
 static void get_statline(char *buf, struct cgpu_info *cgpu)
@@ -1971,11 +1966,12 @@ share_result(json_t *val, json_t *res, json_t *err, const struct work *work,
 	}
 }
 
+static const uint64_t diffone = 0xFFFF000000000000ull;
+
 static uint64_t share_diff(const struct work *work)
 {
-	const uint64_t h64 = 0xFFFF000000000000ull;
 	uint64_t *data64, d64;
-	char rhash[33];
+	char rhash[36];
 	uint64_t ret;
 
 	swab256(rhash, work->hash);
@@ -1983,11 +1979,21 @@ static uint64_t share_diff(const struct work *work)
 	d64 = be64toh(*data64);
 	if (unlikely(!d64))
 		d64 = 1;
-	ret = h64 / d64;
+	ret = diffone / d64;
 	return ret;
 }
 
-static bool submit_upstream_work(const struct work *work, CURL *curl, bool resubmit)
+static uint32_t scrypt_diff(const struct work *work)
+{
+	const uint32_t scrypt_diffone = 0x0000fffful;
+	uint32_t d32 = work->outputhash;
+
+	if (unlikely(!d32))
+		d32 = 1;
+	return scrypt_diffone / d32;
+}
+
+static bool submit_upstream_work(struct work *work, CURL *curl, bool resubmit)
 {
 	char *hexstr = NULL;
 	json_t *val, *res, *err;
@@ -2010,10 +2016,6 @@ static bool submit_upstream_work(const struct work *work, CURL *curl, bool resub
 
 	/* build hex string */
 	hexstr = bin2hex(work->data, sizeof(work->data));
-	if (unlikely(!hexstr)) {
-		applog(LOG_ERR, "submit_upstream_work OOM");
-		goto out_nofree;
-	}
 
 	/* build JSON-RPC request */
 	sprintf(s,
@@ -2044,13 +2046,20 @@ static bool submit_upstream_work(const struct work *work, CURL *curl, bool resub
 	err = json_object_get(val, "error");
 
 	if (!QUIET) {
+		int intdiff = floor(work->work_difficulty);
+		char diffdisp[16];
+
 		hash32 = (uint32_t *)(work->hash);
-		if (opt_scrypt)
-			sprintf(hashshow, "%08lx.%08lx", (unsigned long)(hash32[7]), (unsigned long)(hash32[6]));
-		else {
-			int intdiff = round(work->work_difficulty);
+		if (opt_scrypt) {
+			uint32_t sharediff;
+
+			scrypt_outputhash(work);
+			sharediff = scrypt_diff(work);
+			suffix_string(sharediff, diffdisp, 0);
+
+			sprintf(hashshow, "%08lx Diff %s/%d", (unsigned long)work->outputhash, diffdisp, intdiff);
+		} else {
 			uint64_t sharediff = share_diff(work);
-			char diffdisp[16];
 
 			suffix_string(sharediff, diffdisp, 0);
 
@@ -2118,7 +2127,6 @@ static bool submit_upstream_work(const struct work *work, CURL *curl, bool resub
 	rc = true;
 out:
 	free(hexstr);
-out_nofree:
 	return rc;
 }
 
@@ -2186,11 +2194,21 @@ static double DIFFEXACTONE = 269599466671506397946670150870196306736371444225405
 static void calc_diff(struct work *work, int known)
 {
 	struct cgminer_pool_stats *pool_stats = &(work->pool->cgminer_pool_stats);
-	double targ;
-	int i;
 
-	if (!known) {
-		targ = 0;
+	if (opt_scrypt) {
+		uint64_t *data64, d64;
+		char rtarget[36];
+
+		swab256(rtarget, work->target);
+		data64 = (uint64_t *)(rtarget + 2);
+		d64 = be64toh(*data64);
+		if (unlikely(!d64))
+			d64 = 1;
+		work->work_difficulty = diffone / d64;
+	} else if (!known) {
+		double targ = 0;
+		int i;
+
 		for (i = 31; i >= 0; i--) {
 			targ *= 256;
 			targ += work->target[i];
@@ -2420,13 +2438,8 @@ char **initial_args;
 
 static void clean_up(void);
 
-void app_restart(void)
+static inline void __app_restart(void)
 {
-	applog(LOG_WARNING, "Attempting to restart %s", packagename);
-
-	__kill_work();
-	clean_up();
-
 #if defined(unix)
 	if (forkpid > 0) {
 		kill(forkpid, SIGTERM);
@@ -2435,17 +2448,55 @@ void app_restart(void)
 #endif
 
 	execv(initial_args[0], initial_args);
+}
+
+void app_restart(void)
+{
+	applog(LOG_WARNING, "Attempting to restart %s", packagename);
+
+	__kill_work();
+	clean_up();
+
+	__app_restart();
+
+	/* We shouldn't reach here */
 	applog(LOG_WARNING, "Failed to restart application");
 }
 
-static void sighandler(int __maybe_unused sig)
+/* Returns all signal handlers to their defaults */
+static inline void __sighandler(void)
 {
 	/* Restore signal handlers so we can still quit if kill_work fails */
 	sigaction(SIGTERM, &termhandler, NULL);
 	sigaction(SIGINT, &inthandler, NULL);
+	if (opt_restart) {
+		sigaction(SIGSEGV, &segvhandler, NULL);
+		sigaction(SIGILL, &illhandler, NULL);
+#ifndef WIN32
+		sigaction(SIGBUS, &bushandler, NULL);
+#endif
+	}
+}
+
+static void sighandler(int __maybe_unused sig)
+{
+	__sighandler();
 	kill_work();
 }
 
+/* Handles segfaults and other crashes by attempting to restart cgminer. Try to
+ * do as little as possible since we are probably corrupted. */
+static void seghandler(int sig)
+{
+	__sighandler();
+	fprintf(stderr, "\nCrashed with signal %d! Will attempt to restart\n", sig);
+	__app_restart();
+	/* We shouldn't reach here */
+	fprintf(stderr, "Failed to restart, exiting now\n");
+
+	exit(1);
+}
+
 /* Called with pool_lock held. Recruit an extra curl if none are available for
  * this pool. */
 static void recruit_curl(struct pool *pool)
@@ -2663,11 +2714,13 @@ retry:
 		while (!pool->stratum_active) {
 			struct pool *altpool = select_pool(true);
 
+			sleep(5);
 			if (altpool != pool) {
 				wc->pool = altpool;
+				inc_queued(altpool);
+				dec_queued(pool);
 				goto retry;
 			}
-			sleep(5);
 		}
 		ret_work = make_work();
 		gen_stratum_work(pool, ret_work);
@@ -2699,7 +2752,8 @@ retry:
 
 		/* obtain new work from bitcoin via JSON-RPC */
 		if (!get_upstream_work(ret_work, ce->curl)) {
-			applog(LOG_DEBUG, "json_rpc_call failed on get work, retrying");
+			applog(LOG_DEBUG, "Pool %d json_rpc_call failed on get work, retrying in 5s", pool->pool_no);
+			sleep(5);
 			dec_queued(pool);
 			/* Make sure the pool just hasn't stopped serving
 			 * requests but is up as we'll keep hammering it */
@@ -3129,10 +3183,6 @@ static inline bool from_existing_block(struct work *work)
 	char *hexstr = bin2hex(work->data + 8, 18);
 	bool ret;
 
-	if (unlikely(!hexstr)) {
-		applog(LOG_ERR, "from_existing_block OOM");
-		return true;
-	}
 	ret = block_exists(hexstr);
 	free(hexstr);
 	return ret;
@@ -3152,10 +3202,6 @@ static bool test_work_current(struct work *work)
 		return ret;
 
 	hexstr = bin2hex(work->data + 8, 18);
-	if (unlikely(!hexstr)) {
-		applog(LOG_ERR, "stage_thread OOM");
-		return ret;
-	}
 
 	/* Search to see if this block exists yet and if not, consider it a
 	 * new block and set the current block details to this one */
@@ -4146,7 +4192,7 @@ static void stratum_share_result(json_t *val, json_t *res_val, json_t *err_val,
 	int intdiff;
 
 	hash32 = (uint32_t *)(work->hash);
-	intdiff = round(work->work_difficulty);
+	intdiff = floor(work->work_difficulty);
 	suffix_string(sharediff, diffdisp, 0);
 	sprintf(hashshow, "%08lx Diff %s/%d%s", (unsigned long)(hash32[6]), diffdisp, intdiff,
 		work->block? " BLOCK!" : "");
@@ -4258,6 +4304,7 @@ static void *stratum_thread(void *userdata)
 				sleep(30);
 			}
 			applog(LOG_INFO, "Stratum connection to pool %d resumed", pool->pool_no);
+			pool_tclear(pool, &pool->idle);
 			pool_resus(pool);
 			continue;
 		}
@@ -4591,7 +4638,7 @@ static struct work *clone_work(struct work *work)
 
 static void gen_hash(unsigned char *data, unsigned char *hash, int len)
 {
-	unsigned char hash1[33];
+	unsigned char hash1[36];
 
 	sha2(data, len, hash1, false);
 	sha2(hash1, 32, hash, false);
@@ -4603,10 +4650,10 @@ static void gen_hash(unsigned char *data, unsigned char *hash, int len)
  * cover a huge range of difficulty targets, though not all 256 bits' worth */
 static void set_work_target(struct work *work, int diff)
 {
-	unsigned char rtarget[33], target[33];
+	unsigned char rtarget[36], target[36];
 	uint64_t *data64, h64;
 
-	h64 = 0xFFFF000000000000ull;
+	h64 = diffone;
 	h64 /= (uint64_t)diff;
 	memset(rtarget, 0, 32);
 	data64 = (uint64_t *)(rtarget + 4);
@@ -4615,10 +4662,8 @@ static void set_work_target(struct work *work, int diff)
 	if (opt_debug) {
 		char *htarget = bin2hex(target, 32);
 
-		if (likely(htarget)) {
-			applog(LOG_DEBUG, "Generated target %s", htarget);
-			free(htarget);
-		}
+		applog(LOG_DEBUG, "Generated target %s", htarget);
+		free(htarget);
 	}
 	memcpy(work->target, target, 32);
 }
@@ -4628,8 +4673,8 @@ static void set_work_target(struct work *work, int diff)
  * other means to detect when the pool has died in stratum_thread */
 static void gen_stratum_work(struct pool *pool, struct work *work)
 {
-	unsigned char *coinbase, merkle_root[33], merkle_sha[65], *merkle_hash;
-	char header[257], hash1[129], *nonce2;
+	unsigned char *coinbase, merkle_root[36], merkle_sha[68], *merkle_hash;
+	char header[260], hash1[132], *nonce2;
 	int len, cb1_len, n1_len, cb2_len, i;
 	uint32_t *data32, *swap32;
 
@@ -4641,8 +4686,6 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
 
 	/* Generate coinbase */
 	nonce2 = bin2hex((const unsigned char *)&pool->nonce2, pool->n2size);
-	if (unlikely(!nonce2))
-		quit(1, "Failed to convert nonce2 in gen_stratum_work");
 	pool->nonce2++;
 	cb1_len = strlen(pool->swork.coinbase1) / 2;
 	n1_len = strlen(pool->nonce1) / 2;
@@ -4658,7 +4701,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
 	gen_hash(coinbase, merkle_root, len);
 	memcpy(merkle_sha, merkle_root, 32);
 	for (i = 0; i < pool->swork.merkles; i++) {
-		unsigned char merkle_bin[33];
+		unsigned char merkle_bin[36];
 
 		hex2bin(merkle_bin, pool->swork.merkle[i], 32);
 		memcpy(merkle_sha + 32, merkle_bin, 32);
@@ -4670,8 +4713,6 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
 	for (i = 0; i < 32 / 4; i++)
 		swap32[i] = swab32(data32[i]);
 	merkle_hash = (unsigned char *)bin2hex((const unsigned char *)merkle_root, 32);
-	if (unlikely(!merkle_hash))
-		quit(1, "Failed to conver merkle_hash in gen_stratum_work");
 
 	sprintf(header, "%s", pool->swork.bbversion);
 	strcat(header, pool->swork.prev_hash);
@@ -4870,6 +4911,10 @@ static bool hashtest(struct thr_info *thr, struct work *work)
 				thr->cgpu->api->name, thr->cgpu->device_id);
 		hw_errors++;
 		thr->cgpu->hw_errors++;
+
+		if (thr->cgpu->api->hw_error)
+			thr->cgpu->api->hw_error(thr);
+
 		return false;
 	}
 
@@ -6072,6 +6117,18 @@ int main(int argc, char *argv[])
 	if (!config_loaded)
 		load_default_config();
 
+	if (opt_restart) {
+		struct sigaction shandler;
+
+		shandler.sa_handler = &seghandler;
+		shandler.sa_flags = 0;
+		sigemptyset(&shandler.sa_mask);
+		sigaction(SIGSEGV, &shandler, &segvhandler);
+		sigaction(SIGILL, &shandler, &illhandler);
+#ifndef WIN32
+		sigaction(SIGBUS, &shandler, &bushandler);
+#endif
+	}
 	if (opt_benchmark) {
 		struct pool *pool;
 

+ 22 - 24
configure.ac

@@ -2,7 +2,7 @@
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_maj], [2])
 m4_define([v_min], [8])
-m4_define([v_mic], [3])
+m4_define([v_mic], [5])
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_ver], [v_maj.v_min.v_mic])
 m4_define([lt_rev], m4_eval(v_maj + v_min))
@@ -79,13 +79,6 @@ case $target in
 esac
 
 case $target in
-	x86_64-w64-mingw32)
-	have_x86_64=true
-	have_win32=true
-	PTHREAD_FLAGS=""
-	DLOPEN_FLAGS=""
-	WS2_LIBS="-lws2_32"
-	;;
   *-*-mingw*)
     have_x86_64=false
     have_win32=true
@@ -105,18 +98,23 @@ case $target in
 	;;
 esac
 
-if test "x$have_x86_64" = xtrue; then
-	ARCH_DIR=x86_64
-else
-	ARCH_DIR=x86
-fi
 
-if test "x$ATISTREAMSDKROOT" != x; then
-	OPENCL_FLAGS="-I$ATISTREAMSDKROOT/include $OPENCL_FLAGS"
-	OPENCL_LIBS="-L$ATISTREAMSDKROOT/lib/$ARCH_DIR $OPENCL_LIBS"
-elif test "x$AMDAPPSDKROOT" != x; then
-	OPENCL_FLAGS="-I$AMDAPPSDKROOT/include $OPENCL_FLAGS"
-	OPENCL_LIBS="-L$AMDAPPSDKROOT/lib/$ARCH_DIR $OPENCL_LIBS"
+if test "x$have_win32" != xtrue; then
+	if test "x$have_x86_64" = xtrue; then
+		ARCH_DIR=x86_64
+	else
+		ARCH_DIR=x86
+	fi
+
+	if test "x$ATISTREAMSDKROOT" != x; then
+		OPENCL_FLAGS="-I$ATISTREAMSDKROOT/include $OPENCL_FLAGS"
+		OPENCL_LIBS="-L$ATISTREAMSDKROOT/lib/$ARCH_DIR $OPENCL_LIBS"
+	fi
+
+	if test "x$AMDAPPSDKROOT" != x; then
+		OPENCL_FLAGS="-I$AMDAPPSDKROOT/include $OPENCL_FLAGS"
+		OPENCL_LIBS="-L$AMDAPPSDKROOT/lib/$ARCH_DIR $OPENCL_LIBS"
+	fi
 fi
 
 cpumining="no"
@@ -393,11 +391,11 @@ fi
 
 AC_DEFINE_UNQUOTED([CGMINER_PREFIX], ["$prefix/bin"], [Path to cgminer install])
 
-AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120823"], [Filename for phatk kernel])
-AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120823"], [Filename for poclbm kernel])
-AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120823"], [Filename for diakgcn kernel])
-AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120823"], [Filename for diablo kernel])
-AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt120823"], [Filename for scrypt kernel])
+AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk121016"], [Filename for phatk kernel])
+AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm121016"], [Filename for poclbm kernel])
+AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn121016"], [Filename for diakgcn kernel])
+AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo121016"], [Filename for diablo kernel])
+AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt121016"], [Filename for scrypt kernel])
 
 
 AC_SUBST(OPENCL_LIBS)

+ 1 - 6
diablo120823.cl → diablo121016.cl

@@ -1243,12 +1243,7 @@ void search(
     ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
     
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #if defined(VECTORS4)
 	bool result = any(ZA[924] == 0x136032EDU);

+ 1 - 6
diakgcn120823.cl → diakgcn121016.cl

@@ -572,12 +572,7 @@ __kernel
 	V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #ifdef VECTORS4
 	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {

+ 13 - 18
driver-icarus.c

@@ -554,22 +554,19 @@ static bool icarus_detect_one(const char *devpath)
 	icarus_close(fd);
 
 	nonce_hex = bin2hex(nonce_bin, sizeof(nonce_bin));
-	if (nonce_hex) {
-		if (strncmp(nonce_hex, golden_nonce, 8)) {
-			applog(LOG_ERR, 
-				"Icarus Detect: "
-				"Test failed at %s: get %s, should: %s",
-				devpath, nonce_hex, golden_nonce);
-			free(nonce_hex);
-			return false;
-		}
-		applog(LOG_DEBUG, 
+	if (strncmp(nonce_hex, golden_nonce, 8)) {
+		applog(LOG_ERR,
 			"Icarus Detect: "
-			"Test succeeded at %s: got %s",
-				devpath, nonce_hex);
+			"Test failed at %s: get %s, should: %s",
+			devpath, nonce_hex, golden_nonce);
 		free(nonce_hex);
-	} else
 		return false;
+	}
+	applog(LOG_DEBUG,
+		"Icarus Detect: "
+		"Test succeeded at %s: got %s",
+			devpath, nonce_hex);
+	free(nonce_hex);
 
 	/* We have a real Icarus! */
 	struct cgpu_info *icarus;
@@ -704,11 +701,9 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 
 	if (opt_debug) {
 		ob_hex = bin2hex(ob_bin, sizeof(ob_bin));
-		if (ob_hex) {
-			applog(LOG_DEBUG, "Icarus %d sent: %s",
-				icarus->device_id, ob_hex);
-			free(ob_hex);
-		}
+		applog(LOG_DEBUG, "Icarus %d sent: %s",
+			icarus->device_id, ob_hex);
+		free(ob_hex);
 	}
 
 	/* Icarus will return 4 bytes (ICARUS_READ_SIZE) nonces or nothing */

+ 256 - 100
driver-modminer.c

@@ -1,4 +1,5 @@
 /*
+ * Copyright 2012 Andrew Smith
  * Copyright 2012 Luke Dashjr
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -12,6 +13,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <math.h>
 
 #include "logging.h"
 #include "miner.h"
@@ -21,10 +23,31 @@
 #define BITSTREAM_FILENAME "fpgaminer_top_fixed7_197MHz.ncd"
 #define BISTREAM_USER_ID "\2\4$B"
 
+#define MODMINER_CUTOFF_TEMP 60.0
+#define MODMINER_OVERHEAT_TEMP 50.0
+#define MODMINER_OVERHEAT_CLOCK -10
+
+#define MODMINER_HW_ERROR_PERCENT 0.75
+
+#define MODMINER_MAX_CLOCK 220
+#define MODMINER_DEF_CLOCK 200
+#define MODMINER_MIN_CLOCK 160
+
+#define MODMINER_CLOCK_DOWN -2
+#define MODMINER_CLOCK_SET 0
+#define MODMINER_CLOCK_UP 2
+
+// Maximum how many good shares in a row means clock up
+// 96 is ~34m22s at 200MH/s
+#define MODMINER_TRY_UP 96
+// Initially how many good shares in a row means clock up
+// This is doubled each down clock until it reaches MODMINER_TRY_UP
+// 6 is ~2m9s at 200MH/s
+#define MODMINER_EARLY_UP 6
+
 struct device_api modminer_api;
 
-static inline bool
-_bailout(int fd, struct cgpu_info*modminer, int prio, const char *fmt, ...)
+static inline bool _bailout(int fd, struct cgpu_info *modminer, int prio, const char *fmt, ...)
 {
 	if (fd != -1)
 		serial_close(fd);
@@ -39,42 +62,112 @@ _bailout(int fd, struct cgpu_info*modminer, int prio, const char *fmt, ...)
 	va_end(ap);
 	return false;
 }
-#define bailout(...)  return _bailout(fd, NULL, __VA_ARGS__);
 
-static bool
-modminer_detect_one(const char *devpath)
-{
-	int fd = serial_open(devpath, 0, 10, true);
-	if (unlikely(fd == -1))
-		bailout(LOG_DEBUG, "ModMiner detect: failed to open %s", devpath);
+// 45 noops sent when detecting, in case the device was left in "start job" reading
+static const char NOOP[] = "\0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";
 
+static bool modminer_detect_one(const char *devpath)
+{
 	char buf[0x100];
+	char *devname;
 	ssize_t len;
+	int fd;
+
+#ifdef WIN32
+	fd = serial_open(devpath, 0, 10, true);
+	if (fd < 0) {
+		applog(LOG_ERR, "ModMiner detect: failed to open %s", devpath);
+		return false;
+	}
 
-	// Sending 45 noops, just in case the device was left in "start job" reading
-	(void)(write(fd, "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", 45) ?:0);
+	(void)(write(fd, NOOP, sizeof(NOOP)-1) ?:0);
 	while (serial_read(fd, buf, sizeof(buf)) > 0)
 		;
 
-	if (1 != write(fd, "\x01", 1))  // Get version
-		bailout(LOG_DEBUG, "ModMiner detect: write failed on %s (get version)", devpath);
+	// Version
+	if (1 != write(fd, "\x01", 1)) {
+		applog(LOG_ERR, "ModMiner detect: version request failed on %s (%d)", devpath, errno);
+		goto shin;
+	}
+
 	len = serial_read(fd, buf, sizeof(buf)-1);
-	if (len < 1)
-		bailout(LOG_DEBUG, "ModMiner detect: no response to version request from %s", devpath);
+	if (len < 1) {
+		applog(LOG_ERR, "ModMiner detect: no version reply on %s (%d)", devpath, errno);
+		goto shin;
+	}
 	buf[len] = '\0';
-	char*devname = strdup(buf);
+	devname = strdup(buf);
 	applog(LOG_DEBUG, "ModMiner identified as: %s", devname);
 
-	if (1 != write(fd, "\x02", 1))  // Get FPGA count
-		bailout(LOG_DEBUG, "ModMiner detect: write failed on %s (get FPGA count)", devpath);
+	// FPGA count
+	if (1 != write(fd, "\x02", 1)) {
+		applog(LOG_ERR, "ModMiner detect: FPGA count request failed on %s (%d)", devpath, errno);
+		goto shin;
+	}
 	len = read(fd, buf, 1);
-	if (len < 1)
-		bailout(LOG_ERR, "ModMiner detect: timeout waiting for FPGA count from %s", devpath);
-	if (!buf[0])
-		bailout(LOG_ERR, "ModMiner detect: zero FPGAs reported on %s", devpath);
-	applog(LOG_DEBUG, "ModMiner %s has %u FPGAs", devname, buf[0]);
+
+	if (len < 1) {
+		applog(LOG_ERR, "ModMiner detect: timeout waiting for FPGA count from %s (%d)", devpath, errno);
+		goto shin;
+	}
 
 	serial_close(fd);
+#else
+	fd = select_open(devpath);
+
+	if (fd < 0) {
+		applog(LOG_ERR, "ModMiner detect: failed to open %s", devpath);
+		return false;
+	}
+
+	// Don't care if they fail
+	select_write(fd, (char *)NOOP, sizeof(NOOP)-1);
+
+	// Will clear up to a max of sizeof(buf)-1 chars
+	select_read(fd, buf, sizeof(buf)-1);
+
+	// Version
+	if (select_write(fd, "\x01", 1) < 1) {
+		applog(LOG_ERR, "ModMiner detect: version request failed on %s (%d)", devpath, errno);
+		goto shin;
+	}
+
+	if ((len = select_read(fd, buf, sizeof(buf)-1)) < 1) {
+		applog(LOG_ERR, "ModMiner detect: no version reply on %s (%d)", devpath, errno);
+		goto shin;
+	}
+	buf[len] = '\0';
+	devname = strdup(buf);
+	applog(LOG_DEBUG, "ModMiner identified as: %s", devname);
+
+	// FPGA count
+	if (select_write(fd, "\x02", 1) < 1) {
+		applog(LOG_ERR, "ModMiner detect: FPGA count request failed on %s (%d)", devpath, errno);
+		goto shin;
+	}
+
+	if ((len = select_read(fd, buf, 1)) < 1) {
+		applog(LOG_ERR, "ModMiner detect: no FPGA count reply on %s (%d)", devpath, errno);
+		goto shin;
+	}
+
+	select_close(fd);
+#endif
+
+	// TODO: check if it supports 2 byte temperatures and if not
+	// add a flag and set it use 1 byte and code to use the flag
+
+	if (buf[0] == 0) {
+		applog(LOG_ERR, "ModMiner detect: zero FPGA count from %s", devpath);
+		goto shin;
+	}
+
+	if (buf[0] < 1 || buf[0] > 4) {
+		applog(LOG_ERR, "ModMiner detect: invalid FPGA count (%u) from %s", buf[0], devpath);
+		goto shin;
+	}
+
+	applog(LOG_DEBUG, "ModMiner %s has %u FPGAs", devname, buf[0]);
 
 	struct cgpu_info *modminer;
 	modminer = calloc(1, sizeof(*modminer));
@@ -85,24 +178,28 @@ modminer_detect_one(const char *devpath)
 	modminer->deven = DEV_ENABLED;
 	modminer->threads = buf[0];
 	modminer->name = devname;
-	modminer->cutofftemp = 85;
 
 	return add_cgpu(modminer);
-}
 
-#undef bailout
+shin:
+
+#ifdef WIN32
+	serial_close(fd);
+#else
+	select_close(fd);
+#endif
+	return false;
+}
 
-static int
-modminer_detect_auto()
+static int modminer_detect_auto()
 {
 	return
-	serial_autodetect_udev     (modminer_detect_one, "BTCFPGA*ModMiner") ?:
+	serial_autodetect_udev     (modminer_detect_one, "*ModMiner*") ?:
 	serial_autodetect_devserial(modminer_detect_one, "BTCFPGA_ModMiner") ?:
 	0;
 }
 
-static void
-modminer_detect()
+static void modminer_detect()
 {
 	serial_detect_auto(&modminer_api, modminer_detect_one, modminer_detect_auto);
 }
@@ -138,12 +235,11 @@ select(fd+1, &fds, NULL, NULL, NULL);  \
 		bailout2(LOG_ERR, "%s %u: Wrong " eng " programming %s", modminer->api->name, modminer->device_id, modminer->device_path);  \
 } while(0)
 
-static bool
-modminer_fpga_upload_bitstream(struct cgpu_info*modminer)
+static bool modminer_fpga_upload_bitstream(struct cgpu_info *modminer)
 {
 	fd_set fds;
 	char buf[0x100];
-	unsigned char *ubuf = (unsigned char*)buf;
+	unsigned char *ubuf = (unsigned char *)buf;
 	unsigned long len;
 	char *p;
 	const char *fwfile = BITSTREAM_FILENAME;
@@ -215,10 +311,9 @@ modminer_fpga_upload_bitstream(struct cgpu_info*modminer)
 	return true;
 }
 
-static bool
-modminer_device_prepare(struct cgpu_info *modminer)
+static bool modminer_device_prepare(struct cgpu_info *modminer)
 {
-	int fd = serial_open(modminer->device_path, 0, /*FIXME=-1*/3000, true);
+	int fd = serial_open(modminer->device_path, 0, 10, true);
 	if (unlikely(-1 == fd))
 		bailout(LOG_ERR, "%s %u: Failed to open %s", modminer->api->name, modminer->device_id, modminer->device_path);
 
@@ -234,12 +329,12 @@ modminer_device_prepare(struct cgpu_info *modminer)
 
 #undef bailout
 
-static bool
-modminer_fpga_prepare(struct thr_info *thr)
+static bool modminer_fpga_prepare(struct thr_info *thr)
 {
 	struct cgpu_info *modminer = thr->cgpu;
 
-	// Don't need to lock the mutex here, since prepare runs from the main thread before the miner threads start
+	// Don't need to lock the mutex here,
+	// since prepare runs from the main thread before the miner threads start
 	if (modminer->device_fd == -1 && !modminer_device_prepare(modminer))
 		return false;
 
@@ -247,43 +342,86 @@ modminer_fpga_prepare(struct thr_info *thr)
 	state = thr->cgpu_data = calloc(1, sizeof(struct modminer_fpga_state));
 	state->next_work_cmd[0] = '\x08';  // Send Job
 	state->next_work_cmd[1] = thr->device_thread;  // FPGA id
+	state->shares_to_good = MODMINER_EARLY_UP;
 
 	return true;
 }
 
-static bool
-modminer_reduce_clock(struct thr_info*thr, bool needlock)
+/*
+ * Clocking rules:
+ *	If device exceeds cutoff temp - shut down - and decrease the clock by
+ *		MODMINER_OVERHEAT_CLOCK for when it restarts
+ *
+ * When to clock down:
+ *	If device overheats
+ *	 or
+ *	If device gets MODMINER_HW_ERROR_PERCENT errors since last clock up or down
+ *		if clock is <= default it requires 2 HW to do this test
+ *		if clock is > default it only requires 1 HW to do this test
+ *
+ * When to clock up:
+ *	If device gets shares_to_good good shares in a row
+ *
+ * N.B. clock must always be a multiple of 2
+ */
+static bool modminer_delta_clock(struct thr_info *thr, bool needlock, int delta, bool temp)
 {
-	struct cgpu_info*modminer = thr->cgpu;
+	struct cgpu_info *modminer = thr->cgpu;
 	struct modminer_fpga_state *state = thr->cgpu_data;
 	char fpgaid = thr->device_thread;
 	int fd = modminer->device_fd;
 	unsigned char cmd[6], buf[1];
+	struct timeval now;
+
+	gettimeofday(&now, NULL);
+
+	// Only do once if multiple shares per work or multiple reasons
+	// Since the temperature down clock test is first in the code this is OK
+	if (tdiff(&now, &(state->last_changed)) < 0.5)
+		return false;
 
-	if (state->clock <= 100)
+	// Update before possibly aborting to avoid repeating unnecessarily
+	memcpy(&(state->last_changed), &now, sizeof(struct timeval));
+	state->shares = 0;
+	state->shares_last_hw = 0;
+	state->hw_errors = 0;
+
+	// If drop requested due to temperature, clock drop is always allowed
+	if (!temp && delta < 0 && state->clock <= MODMINER_MIN_CLOCK)
+		return false;
+
+	if (delta > 0 && state->clock >= MODMINER_MAX_CLOCK)
 		return false;
 
+	if (delta < 0) {
+		if ((state->shares_to_good * 2) < MODMINER_TRY_UP)
+			state->shares_to_good *= 2;
+		else
+			state->shares_to_good = MODMINER_TRY_UP;
+	}
+
+	state->clock += delta;
+
 	cmd[0] = '\x06';  // set clock speed
 	cmd[1] = fpgaid;
-	cmd[2] = state->clock -= 2;
+	cmd[2] = state->clock;
 	cmd[3] = cmd[4] = cmd[5] = '\0';
 
 	if (needlock)
 		mutex_lock(&modminer->device_mutex);
 	if (6 != write(fd, cmd, 6))
-		bailout2(LOG_ERR, "%s %u.%u: Error writing (set clock speed)", modminer->api->name, modminer->device_id, fpgaid);
+		bailout2(LOG_ERR, "%s%u.%u: Error writing (set clock speed)", modminer->api->name, modminer->device_id, fpgaid);
 	if (serial_read(fd, &buf, 1) != 1)
-		bailout2(LOG_ERR, "%s %u.%u: Error reading (set clock speed)", modminer->api->name, modminer->device_id, fpgaid);
+		bailout2(LOG_ERR, "%s%u.%u: Error reading (set clock speed)", modminer->api->name, modminer->device_id, fpgaid);
 	if (needlock)
 		mutex_unlock(&modminer->device_mutex);
 
-	applog(LOG_WARNING, "%s %u.%u: Setting clock speed to %u", modminer->api->name, modminer->device_id, fpgaid, state->clock);
+	applog(LOG_WARNING, "%s%u.%u: Set clock speed %sto %u", modminer->api->name, modminer->device_id, fpgaid, (delta < 0) ? "down " : (delta > 0 ? "up " : ""), state->clock);
 
 	return true;
 }
 
-static bool
-modminer_fpga_init(struct thr_info *thr)
+static bool modminer_fpga_init(struct thr_info *thr)
 {
 	struct cgpu_info *modminer = thr->cgpu;
 	struct modminer_fpga_state *state = thr->cgpu_data;
@@ -303,20 +441,20 @@ modminer_fpga_init(struct thr_info *thr)
 	cmd[0] = '\x04';  // Read USER code (bitstream id)
 	cmd[1] = fpgaid;
 	if (write(fd, cmd, 2) != 2)
-		bailout2(LOG_ERR, "%s %u.%u: Error writing (read USER code)", modminer->api->name, modminer->device_id, fpgaid);
+		bailout2(LOG_ERR, "%s%u.%u: Error writing (read USER code)", modminer->api->name, modminer->device_id, fpgaid);
 	if (serial_read(fd, buf, 4) != 4)
-		bailout2(LOG_ERR, "%s %u.%u: Error reading (read USER code)", modminer->api->name, modminer->device_id, fpgaid);
+		bailout2(LOG_ERR, "%s%u.%u: Error reading (read USER code)", modminer->api->name, modminer->device_id, fpgaid);
 
 	if (memcmp(buf, BISTREAM_USER_ID, 4)) {
-		applog(LOG_ERR, "%s %u.%u: FPGA not programmed", modminer->api->name, modminer->device_id, fpgaid);
+		applog(LOG_ERR, "%s%u.%u: FPGA not programmed", modminer->api->name, modminer->device_id, fpgaid);
 		if (!modminer_fpga_upload_bitstream(modminer))
 			return false;
 	}
 	else
-		applog(LOG_DEBUG, "%s %u.%u: FPGA is already programmed :)", modminer->api->name, modminer->device_id, fpgaid);
+		applog(LOG_DEBUG, "%s%u.%u: FPGA is already programmed :)", modminer->api->name, modminer->device_id, fpgaid);
 
-	state->clock = 212;  // Will be reduced to 210 by modminer_reduce_clock
-	modminer_reduce_clock(thr, false);
+	state->clock = MODMINER_DEF_CLOCK;
+	modminer_delta_clock(thr, false, MODMINER_CLOCK_SET, false);
 
 	mutex_unlock(&modminer->device_mutex);
 
@@ -325,8 +463,7 @@ modminer_fpga_init(struct thr_info *thr)
 	return true;
 }
 
-static void
-get_modminer_statline_before(char *buf, struct cgpu_info *modminer)
+static void get_modminer_statline_before(char *buf, struct cgpu_info *modminer)
 {
 	char info[18] = "               | ";
 	int tc = modminer->threads;
@@ -337,16 +474,16 @@ get_modminer_statline_before(char *buf, struct cgpu_info *modminer)
 		tc = 4;
 
 	for (i = tc - 1; i >= 0; --i) {
-		struct thr_info*thr = modminer->thr[i];
+		struct thr_info *thr = modminer->thr[i];
 		struct modminer_fpga_state *state = thr->cgpu_data;
-		unsigned char temp = state->temp;
+		float temp = state->temp;
 
 		info[i*3+2] = '/';
 		if (temp) {
 			havetemp = true;
 			if (temp > 9)
 				info[i*3+0] = 0x30 + (temp / 10);
-			info[i*3+1] = 0x30 + (temp % 10);
+			info[i*3+1] = 0x30 + ((int)temp % 10);
 		}
 	}
 	if (havetemp) {
@@ -358,8 +495,7 @@ get_modminer_statline_before(char *buf, struct cgpu_info *modminer)
 		strcat(buf, "               | ");
 }
 
-static bool
-modminer_prepare_next_work(struct modminer_fpga_state*state, struct work*work)
+static bool modminer_prepare_next_work(struct modminer_fpga_state *state, struct work *work)
 {
 	char *midstate = state->next_work_cmd + 2;
 	char *taildata = midstate + 32;
@@ -370,11 +506,10 @@ modminer_prepare_next_work(struct modminer_fpga_state*state, struct work*work)
 	return true;
 }
 
-static bool
-modminer_start_work(struct thr_info*thr)
+static bool modminer_start_work(struct thr_info *thr)
 {
 fd_set fds;
-	struct cgpu_info*modminer = thr->cgpu;
+	struct cgpu_info *modminer = thr->cgpu;
 	struct modminer_fpga_state *state = thr->cgpu_data;
 	char fpgaid = thr->device_thread;
 	SOCKETTYPE fd = modminer->device_fd;
@@ -383,7 +518,7 @@ fd_set fds;
 
 	mutex_lock(&modminer->device_mutex);
 	if (46 != write(fd, state->next_work_cmd, 46))
-		bailout2(LOG_ERR, "%s %u.%u: Error writing (start work)", modminer->api->name, modminer->device_id, fpgaid);
+		bailout2(LOG_ERR, "%s%u.%u: Error writing (start work)", modminer->api->name, modminer->device_id, fpgaid);
 	gettimeofday(&state->tv_workstart, NULL);
 	state->hashes = 0;
 	status_read("start work");
@@ -394,42 +529,48 @@ fd_set fds;
 
 #define work_restart(thr)  thr->work_restart
 
-static uint64_t
-modminer_process_results(struct thr_info*thr)
+static uint64_t modminer_process_results(struct thr_info *thr)
 {
-	struct cgpu_info*modminer = thr->cgpu;
+	struct cgpu_info *modminer = thr->cgpu;
 	struct modminer_fpga_state *state = thr->cgpu_data;
 	char fpgaid = thr->device_thread;
 	int fd = modminer->device_fd;
 	struct work *work = &state->running_work;
 
-	char cmd[2], temperature;
+	char cmd[2], temperature[2];
 	uint32_t nonce;
 	long iter;
-	int curr_hw_errors;
-	cmd[0] = '\x0a';
+	uint32_t curr_hw_errors;
+
+	// \x0a is 1 byte temperature
+	// \x0d is 2 byte temperature
+	cmd[0] = '\x0d';
 	cmd[1] = fpgaid;
 
 	mutex_lock(&modminer->device_mutex);
-	if (2 == write(fd, cmd, 2) && read(fd, &temperature, 1) == 1)
+	if (2 == write(fd, cmd, 2) && read(fd, &temperature, 2) == 2)
 	{
-		state->temp = temperature;
+		// Only accurate to 2 and a bit places
+		state->temp = roundf((temperature[1] * 256.0 + temperature[0]) / 0.128) / 1000.0;
 		if (!fpgaid)
-			modminer->temp = (float)temperature;
-		if (temperature > modminer->cutofftemp - 2) {
-			if (temperature > modminer->cutofftemp) {
-				applog(LOG_WARNING, "%s %u.%u: Hit thermal cutoff limit, disabling device!", modminer->api->name, modminer->device_id, fpgaid);
-				modminer->deven = DEV_RECOVER;
+			modminer->temp = state->temp;
 
+		if (state->temp >= MODMINER_OVERHEAT_TEMP) {
+			if (state->temp >= MODMINER_CUTOFF_TEMP) {
+				applog(LOG_WARNING, "%s%u.%u: Hit thermal cutoff limit (%f) at %f, disabling device!", modminer->api->name, modminer->device_id, fpgaid, MODMINER_CUTOFF_TEMP, state->temp);
+				modminer_delta_clock(thr, true, MODMINER_OVERHEAT_CLOCK, true);
+
+				modminer->deven = DEV_RECOVER;
 				modminer->device_last_not_well = time(NULL);
 				modminer->device_not_well_reason = REASON_DEV_THERMAL_CUTOFF;
-				++modminer->dev_thermal_cutoff_count;
+				modminer->dev_thermal_cutoff_count++;
 			} else {
-				time_t now = time(NULL);
-				if (state->last_cutoff_reduced != now) {
-					state->last_cutoff_reduced = now;
-					modminer_reduce_clock(thr, false);
-				}
+				 applog(LOG_WARNING, "%s%u.%u Overheat limit (%f) reached %f", modminer->api->name, modminer->device_id, fpgaid, MODMINER_OVERHEAT_TEMP, state->temp);
+				modminer_delta_clock(thr, true, MODMINER_CLOCK_DOWN, true);
+
+				modminer->device_last_not_well = time(NULL);
+				modminer->device_not_well_reason = REASON_DEV_OVER_HEAT;
+				modminer->dev_over_heat_count++;
 			}
 		}
 	}
@@ -438,24 +579,33 @@ modminer_process_results(struct thr_info*thr)
 	iter = 200;
 	while (1) {
 		if (write(fd, cmd, 2) != 2)
-			bailout2(LOG_ERR, "%s %u.%u: Error reading (get nonce)", modminer->api->name, modminer->device_id, fpgaid);
+			bailout2(LOG_ERR, "%s%u.%u: Error reading (get nonce)", modminer->api->name, modminer->device_id, fpgaid);
 		serial_read(fd, &nonce, 4);
 		mutex_unlock(&modminer->device_mutex);
 		if (memcmp(&nonce, "\xff\xff\xff\xff", 4)) {
+			state->shares++;
 			state->no_nonce_counter = 0;
-			curr_hw_errors = modminer->hw_errors;
+			curr_hw_errors = state->hw_errors;
 			submit_nonce(thr, work, nonce);
-			if (modminer->hw_errors > curr_hw_errors) {
-				if (modminer->hw_errors * 100 > 1000 + state->good_share_counter)
-					// Only reduce clocks if hardware errors are more than ~1% of results
-					modminer_reduce_clock(thr, true);
+			if (state->hw_errors > curr_hw_errors) {
+				state->shares_last_hw = state->shares;
+				if (state->clock > MODMINER_DEF_CLOCK || state->hw_errors > 1) {
+					float pct = (state->hw_errors * 100.0 / (state->shares ? : 1.0));
+					if (pct >= MODMINER_HW_ERROR_PERCENT)
+						modminer_delta_clock(thr, true, MODMINER_CLOCK_DOWN, false);
+				}
+			} else {
+				// If we've reached the required good shares in a row then clock up
+				if ((state->shares - state->shares_last_hw) >= state->shares_to_good)
+					modminer_delta_clock(thr, true, MODMINER_CLOCK_UP, false);
 			}
-		}
-		else
-		if (++state->no_nonce_counter > 18000) {
+		} else if (++state->no_nonce_counter > 18000) {
+			// TODO: NFI what this is - but will be gone
+			// when the threading rewrite is done
 			state->no_nonce_counter = 0;
-			modminer_reduce_clock(thr, true);
+			modminer_delta_clock(thr, true, MODMINER_CLOCK_DOWN, false);
 		}
+
 		if (work_restart(thr))
 			break;
 		usleep(10000);
@@ -480,8 +630,7 @@ modminer_process_results(struct thr_info*thr)
 	return hashes;
 }
 
-static int64_t
-modminer_scanhash(struct thr_info*thr, struct work*work, int64_t __maybe_unused max_nonce)
+static int64_t modminer_scanhash(struct thr_info *thr, struct work *work, int64_t __maybe_unused max_nonce)
 {
 	struct modminer_fpga_state *state = thr->cgpu_data;
 	int64_t hashes = 0;
@@ -508,8 +657,14 @@ modminer_scanhash(struct thr_info*thr, struct work*work, int64_t __maybe_unused
 	return hashes;
 }
 
-static void
-modminer_fpga_shutdown(struct thr_info *thr)
+static void modminer_hw_error(struct thr_info *thr)
+{
+	struct modminer_fpga_state *state = thr->cgpu_data;
+
+	state->hw_errors++;
+}
+
+static void modminer_fpga_shutdown(struct thr_info *thr)
 {
 	free(thr->cgpu_data);
 }
@@ -522,5 +677,6 @@ struct device_api modminer_api = {
 	.thread_prepare = modminer_fpga_prepare,
 	.thread_init = modminer_fpga_init,
 	.scanhash = modminer_scanhash,
+	.hw_error = modminer_hw_error,
 	.thread_shutdown = modminer_fpga_shutdown,
 };

+ 20 - 52
driver-opencl.c

@@ -1463,12 +1463,9 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
 	const int thr_id = thr->id;
 	struct opencl_thread_data *thrdata = thr->cgpu_data;
 	_clState *clState = clStates[thr_id];
-	struct cgpu_info *gpu = thr->cgpu;
-
-	if (gpu->dynamic)
-		return;
 
 	clFinish(clState->commandQueue);
+
 	if (thrdata->res[FOUND]) {
 		thrdata->last_work = &thrdata->_last_work;
 		memcpy(thrdata->last_work, work, sizeof(*thrdata->last_work));
@@ -1497,7 +1494,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	_clState *clState = clStates[thr_id];
 	const cl_kernel *kernel = &clState->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
-	struct timeval tv_gpuend;
 
 	cl_int status;
 	size_t globalThreads[1];
@@ -1505,8 +1501,25 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	int64_t hashes;
 
 	/* This finish flushes the readbuffer set with CL_FALSE later */
-	if (!gpu->dynamic)
-		clFinish(clState->commandQueue);
+	clFinish(clState->commandQueue);
+
+	/* Windows' timer resolution is only 15ms so oversample 5x */
+	if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
+		struct timeval tv_gpuend;
+		double gpu_us;
+
+		gettimeofday(&tv_gpuend, NULL);
+		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
+		if (gpu_us > dynamic_us) {
+			if (gpu->intensity > MIN_INTENSITY)
+				--gpu->intensity;
+		} else if (gpu_us < dynamic_us / 2) {
+			if (gpu->intensity < MAX_INTENSITY)
+				++gpu->intensity;
+		}
+		memcpy(&(gpu->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
+		gpu->intervals = 0;
+	}
 
 	set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity);
 	if (hashes > gpu->max_hashes)
@@ -1533,18 +1546,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		clFinish(clState->commandQueue);
 	}
 
-	if (gpu->dynamic) {
-		gettimeofday(&gpu->tv_gpumid, NULL);
-		if (gpu->new_work) {
-			gpu->new_work = false;
-			gpu->intervals = gpu->hit = 0;
-		}
-		if (!gpu->intervals) {
-			gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
-			gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
-		}
-	}
-
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
@@ -1572,39 +1573,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		return -1;
 	}
 
-	if (gpu->dynamic) {
-		double gpu_us;
-
-		clFinish(clState->commandQueue);
-		/* Windows returns the same time for gettimeofday due to its
-		 * 15ms timer resolution, so we must average the result over
-		 * at least 5 values that are actually different to get an
-		 * accurate result */
-		gpu->intervals++;
-		gettimeofday(&tv_gpuend, NULL);
-		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
-		if (gpu_us > 0 && ++gpu->hit > 4) {
-			gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
-			/* Very rarely we may get an overflow so put an upper
-			 * limit on the detected time */
-			if (unlikely(gpu->gpu_us_average > 0 && gpu_us > gpu->gpu_us_average * 4))
-				gpu_us = gpu->gpu_us_average * 4;
-			gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
-
-			/* Try to not let the GPU be out for longer than
-			 * opt_dynamic_interval in ms, but increase
-			 * intensity when the system is idle in dynamic mode */
-			if (gpu->gpu_us_average > dynamic_us) {
-				if (gpu->intensity > MIN_INTENSITY)
-					--gpu->intensity;
-			} else if (gpu->gpu_us_average < dynamic_us / 2) {	
-				if (gpu->intensity < MAX_INTENSITY)
-					++gpu->intensity;
-			}
-			gpu->intervals = gpu->hit = 0;
-		}
-	}
-
 	/* The amount of work scanned can fluctuate when intensity changes
 	 * and since we do this one cycle behind, we increment the work more
 	 * than enough to prevent repeating work */

+ 93 - 0
fpgautils.c

@@ -477,3 +477,96 @@ FILE *open_bitstream(const char *dname, const char *filename)
 
 	return NULL;
 }
+
+#ifndef WIN32
+
+static bool _select_wait_read(int fd, struct timeval *timeout)
+{
+	fd_set rfds;
+
+	FD_ZERO(&rfds);
+	FD_SET(fd, &rfds);
+
+	if (select(fd+1, &rfds, NULL, NULL, timeout) > 0)
+		return true;
+	else
+		return false;
+}
+
+// Default timeout 100ms - only for device initialisation
+const struct timeval tv_timeout_default = { 0, 100000 };
+// Default inter character timeout = 1ms - only for device initialisation
+const struct timeval tv_inter_char_default = { 0, 1000 };
+
+// Device initialisation function - NOT for work processing
+size_t _select_read(int fd, char *buf, size_t bufsiz, struct timeval *timeout, struct timeval *char_timeout, int finished)
+{
+	struct timeval tv_time, tv_char;
+	ssize_t siz, red = 0;
+	char got;
+
+	// timeout is the maximum time to wait for the first character
+	tv_time.tv_sec = timeout->tv_sec;
+	tv_time.tv_usec = timeout->tv_usec;
+
+	if (!_select_wait_read(fd, &tv_time))
+		return 0;
+
+	while (4242) {
+		if ((siz = read(fd, buf, 1)) < 0)
+			return red;
+
+		got = *buf;
+		buf += siz;
+		red += siz;
+		bufsiz -= siz;
+
+		if (bufsiz < 1 || (finished >= 0 && got == finished))
+			return red;
+
+		// char_timeout is the maximum time to wait for each subsequent character
+		// this is OK for initialisation, but bad for work processing
+		// work processing MUST have a fixed size so this doesn't come into play
+		tv_char.tv_sec = char_timeout->tv_sec;
+		tv_char.tv_usec = char_timeout->tv_usec;
+
+		if (!_select_wait_read(fd, &tv_char))
+			return red;
+	}
+
+	return red;
+}
+
+// Device initialisation function - NOT for work processing
+size_t _select_write(int fd, char *buf, size_t siz, struct timeval *timeout)
+{
+	struct timeval tv_time, tv_now, tv_finish;
+	fd_set rfds;
+	ssize_t wrote = 0, ret;
+
+	gettimeofday(&tv_now, NULL);
+	timeradd(&tv_now, timeout, &tv_finish);
+
+	// timeout is the maximum time to spend trying to write
+	tv_time.tv_sec = timeout->tv_sec;
+	tv_time.tv_usec = timeout->tv_usec;
+
+	FD_ZERO(&rfds);
+	FD_SET(fd, &rfds);
+
+	while (siz > 0 && (tv_now.tv_sec < tv_finish.tv_sec || (tv_now.tv_sec == tv_finish.tv_sec && tv_now.tv_usec < tv_finish.tv_usec)) && select(fd+1, NULL, &rfds, NULL, &tv_time) > 0) {
+		if ((ret = write(fd, buf, 1)) > 0) {
+			buf++;
+			wrote++;
+			siz--;
+		}
+		else if (ret < 0)
+			return wrote;
+
+		gettimeofday(&tv_now, NULL);
+	}
+
+	return wrote;
+}
+
+#endif // ! WIN32

+ 43 - 0
fpgautils.h

@@ -36,4 +36,47 @@ extern ssize_t _serial_read(int fd, char *buf, size_t buflen, char *eol);
 
 extern FILE *open_bitstream(const char *dname, const char *filename);
 
+#ifndef WIN32
+extern const struct timeval tv_timeout_default;
+extern const struct timeval tv_inter_char_default;
+
+extern size_t _select_read(int fd, char *buf, size_t bufsiz, struct timeval *timeout, struct timeval *char_timeout, int finished);
+extern size_t _select_write(int fd, char *buf, size_t siz, struct timeval *timeout);
+
+#define select_open(devpath) \
+	serial_open(devpath, 0, 0, false)
+
+#define select_open_purge(devpath, purge)\
+	serial_open(devpath, 0, 0, purge)
+
+#define select_write(fd, buf, siz) \
+	_select_write(fd, buf, siz, (struct timeval *)(&tv_timeout_default))
+
+#define select_write_full _select_write
+
+#define select_read(fd, buf, bufsiz) \
+	_select_read(fd, buf, bufsiz, (struct timeval *)(&tv_timeout_default), \
+			(struct timeval *)(&tv_inter_char_default), -1)
+
+#define select_read_til(fd, buf, bufsiz, eol) \
+	_select_read(fd, buf, bufsiz, (struct timeval *)(&tv_timeout_default), \
+			(struct timeval *)(&tv_inter_char_default), eol)
+
+#define select_read_wait(fd, buf, bufsiz, timeout) \
+	_select_read(fd, buf, bufsiz, timeout, \
+			(struct timeval *)(&tv_inter_char_default), -1)
+
+#define select_read_wait_til(fd, buf, bufsiz, timeout, eol) \
+	_select_read(fd, buf, bufsiz, timeout, \
+			(struct timeval *)(&tv_inter_char_default), eol)
+
+#define select_read_wait_both(fd, buf, bufsiz, timeout, char_timeout) \
+	_select_read(fd, buf, bufsiz, timeout, char_timeout, -1)
+
+#define select_read_full _select_read
+
+#define select_close(fd)  close(fd)
+
+#endif // ! WIN32
+
 #endif

+ 13 - 11
miner.h

@@ -267,6 +267,7 @@ struct device_api {
 	void (*free_work)(struct thr_info*, struct work*);
 	bool (*prepare_work)(struct thr_info*, struct work*);
 	int64_t (*scanhash)(struct thr_info*, struct work*, int64_t);
+	void (*hw_error)(struct thr_info*);
 	void (*thread_shutdown)(struct thr_info*);
 	void (*thread_enable)(struct thr_info*);
 };
@@ -401,9 +402,7 @@ struct cgpu_info {
 	size_t shaders;
 #endif
 	struct timeval tv_gpustart;
-	struct timeval tv_gpumid;
-	double gpu_us_average;
-	int intervals, hit;
+	int intervals;
 #endif
 
 	bool new_work;
@@ -801,8 +800,8 @@ struct stratum_work {
 	int diff;
 };
 
-#define RECVSIZE 8191
-#define RBUFSIZE (RECVSIZE + 1)
+#define RECVSIZE 8192
+#define RBUFSIZE (RECVSIZE + 4)
 
 struct pool {
 	int pool_no;
@@ -899,10 +898,10 @@ struct work {
 	unsigned char	target[32];
 	unsigned char	hash[32];
 
+	uint32_t	outputhash;
+
 	int		rolls;
 
-	uint32_t	output[1];
-	uint32_t	valid;
 	dev_blk_ctx	blk;
 
 	struct thr_info	*thr;
@@ -952,11 +951,14 @@ struct modminer_fpga_state {
 	char next_work_cmd[46];
 
 	unsigned char clock;
-	int no_nonce_counter;
-	int good_share_counter;
-	time_t last_cutoff_reduced;
+	float temp;
 
-	unsigned char temp;
+	uint32_t shares;
+	uint32_t shares_last_hw;
+	uint32_t hw_errors;
+	uint32_t shares_to_good;
+	struct timeval last_changed;
+	uint32_t no_nonce_counter;
 };
 #endif
 

+ 6 - 1
ocl.c

@@ -816,8 +816,13 @@ built:
 			bufsize = cgpu->max_alloc;
 		applog(LOG_DEBUG, "Creating scrypt buffer sized %d", bufsize);
 		clState->padbufsize = bufsize;
+
+		/* This buffer is weird and might work to some degree even if
+		 * the create buffer call has apparently failed, so check if we
+		 * get anything back before we call it a failure. */
+		clState->padbuffer8 = NULL;
 		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
-		if (status != CL_SUCCESS) {
+		if (status != CL_SUCCESS && !clState->padbuffer8) {
 			applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease CT or increase LG", status);
 			return NULL;
 		}

+ 1 - 6
phatk120823.cl → phatk121016.cl

@@ -388,12 +388,7 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 		(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64))  + s1(64+59)+ ch(59+64)));
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #ifdef VECTORS4
 	bool result = W[117].x & W[117].y & W[117].z & W[117].w;

+ 1 - 6
poclbm120823.cl → poclbm121016.cl

@@ -1322,12 +1322,7 @@ Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
 Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #if defined(VECTORS2) || defined(VECTORS4)
 	if (any(Vals[2] == 0x136032edU)) {

+ 17 - 7
scrypt.c

@@ -34,8 +34,6 @@
 #include <stdint.h>
 #include <string.h>
 
-#define byteswap(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
-
 typedef struct SHA256Context {
 	uint32_t state[8];
 	uint32_t buf[16];
@@ -51,7 +49,7 @@ be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len)
 	uint32_t i;
 
 	for (i = 0; i < len; i++)
-		dst[i] = byteswap(src[i]);
+		dst[i] = htobe32(src[i]);
 }
 
 /* Elementary functions used by SHA256 */
@@ -94,7 +92,7 @@ SHA256_Transform(uint32_t * state, const uint32_t block[16], int swap)
 	/* 1. Prepare message schedule W. */
 	if(swap)
 		for (i = 0; i < 16; i++)
-			W[i] = byteswap(block[i]);
+			W[i] = htobe32(block[i]);
 	else
 		memcpy(W, block, 64);
 	for (i = 16; i < 64; i += 2) {
@@ -295,7 +293,7 @@ PBKDF2_SHA256_80_128_32(const uint32_t * passwd, const uint32_t * salt)
 	/* Feed the inner hash to the outer SHA256 operation. */
 	SHA256_Transform(ostate, pad, 0);
 	/* Finish the outer SHA256 operation. */
-	return byteswap(ostate[7]);
+	return be32toh(ostate[7]);
 }
 
 
@@ -407,6 +405,18 @@ static uint32_t scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad)
 	return PBKDF2_SHA256_80_128_32(input, X);
 }
 
+void scrypt_outputhash(struct work *work)
+{
+	uint32_t data[20];
+	char *scratchbuf;
+	uint32_t *nonce = (uint32_t *)(work->data + 76);
+
+	be32enc_vect(data, (const uint32_t *)work->data, 19);
+	data[19] = htobe32(*nonce);
+	scratchbuf = alloca(131584);
+	work->outputhash = scrypt_1024_1_1_256_sp(data, scratchbuf);
+}
+
 /* Used externally as confirmation of correct OCL code */
 bool scrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
 {
@@ -415,7 +425,7 @@ bool scrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t no
 	uint32_t data[20];
 
 	be32enc_vect(data, (const uint32_t *)pdata, 19);
-	data[19] = byteswap(nonce);
+	data[19] = htobe32(nonce);
 	scratchbuf = alloca(131584);
 	tmp_hash7 = scrypt_1024_1_1_256_sp(data, scratchbuf);
 
@@ -448,7 +458,7 @@ bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *p
 		tmp_hash7 = scrypt_1024_1_1_256_sp(data, scratchbuf);
 
 		if (unlikely(tmp_hash7 <= Htarg)) {
-			((uint32_t *)pdata)[19] = byteswap(n);
+			((uint32_t *)pdata)[19] = htobe32(n);
 			*last_nonce = n;
 			ret = true;
 			break;

+ 8 - 0
scrypt.h

@@ -1,9 +1,13 @@
 #ifndef SCRYPT_H
 #define SCRYPT_H
 
+#include "miner.h"
+
 #ifdef USE_SCRYPT
 extern bool scrypt_test(unsigned char *pdata, const unsigned char *ptarget,
 			uint32_t nonce);
+extern void scrypt_outputhash(struct work *work);
+
 #else /* USE_SCRYPT */
 static inline bool scrypt_test(__maybe_unused unsigned char *pdata,
 			       __maybe_unused const unsigned char *ptarget,
@@ -11,6 +15,10 @@ static inline bool scrypt_test(__maybe_unused unsigned char *pdata,
 {
 	return false;
 }
+
+static inline void scrypt_outputhash(__maybe_unused struct work *work)
+{
+}
 #endif /* USE_SCRYPT */
 
 #endif /* SCRYPT_H */

+ 1 - 6
scrypt120823.cl → scrypt121016.cl

@@ -683,12 +683,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 }
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(__global const uint4 * restrict input,

+ 80 - 32
util.c

@@ -534,14 +534,21 @@ char *get_proxy(char *url, struct pool *pool)
 	return url;
 }
 
-
+/* Returns a malloced array string of a binary value of arbitrary length. The
+ * array is rounded up to a 4 byte size to appease architectures that need
+ * aligned array  sizes */
 char *bin2hex(const unsigned char *p, size_t len)
 {
-	char *s = malloc((len * 2) + 1);
 	unsigned int i;
+	ssize_t slen;
+	char *s;
 
-	if (!s)
-		return NULL;
+	slen = len * 2 + 1;
+	if (slen % 4)
+		slen += 4 - (slen % 4);
+	s = calloc(slen, 1);
+	if (unlikely(!s))
+		quit(1, "Failed to calloc in bin2hex");
 
 	for (i = 0; i < len; i++)
 		sprintf(s + (i * 2), "%02x", (unsigned int) p[i]);
@@ -549,24 +556,27 @@ char *bin2hex(const unsigned char *p, size_t len)
 	return s;
 }
 
+/* Does the reverse of bin2hex but does not allocate any ram */
 bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
 {
+	bool ret = false;
+
 	while (*hexstr && len) {
-		char hex_byte[3];
+		char hex_byte[4];
 		unsigned int v;
 
-		if (!hexstr[1]) {
+		if (unlikely(!hexstr[1])) {
 			applog(LOG_ERR, "hex2bin str truncated");
-			return false;
+			return ret;
 		}
 
+		memset(hex_byte, 0, 4);
 		hex_byte[0] = hexstr[0];
 		hex_byte[1] = hexstr[1];
-		hex_byte[2] = 0;
 
-		if (sscanf(hex_byte, "%x", &v) != 1) {
+		if (unlikely(sscanf(hex_byte, "%x", &v) != 1)) {
 			applog(LOG_ERR, "hex2bin sscanf '%s' failed", hex_byte);
-			return false;
+			return ret;
 		}
 
 		*p = (unsigned char) v;
@@ -576,7 +586,9 @@ bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
 		len--;
 	}
 
-	return (len == 0 && *hexstr == 0) ? true : false;
+	if (likely(len == 0 && *hexstr == 0))
+		ret = true;
+	return ret;
 }
 
 bool fulltest(const unsigned char *hash, const unsigned char *target)
@@ -854,11 +866,12 @@ bool extract_sockaddr(struct pool *pool, char *url)
 	return true;
 }
 
-/* Send a single command across a socket, appending \n to it */
-bool stratum_send(struct pool *pool, char *s, ssize_t len)
+/* Send a single command across a socket, appending \n to it. This should all
+ * be done under stratum lock except when first establishing the socket */
+static bool __stratum_send(struct pool *pool, char *s, ssize_t len)
 {
+	SOCKETTYPE sock = pool->sock;
 	ssize_t ssent = 0;
-	bool ret = false;
 
 	if (opt_protocol)
 		applog(LOG_DEBUG, "SEND: %s", s);
@@ -866,32 +879,55 @@ bool stratum_send(struct pool *pool, char *s, ssize_t len)
 	strcat(s, "\n");
 	len++;
 
-	mutex_lock(&pool->stratum_lock);
 	while (len > 0 ) {
+		struct timeval timeout = {0, 0};
+		CURLcode rc = CURLE_SEND_ERROR;
 		size_t sent = 0;
+		fd_set wd;
 
-		if (curl_easy_send(pool->stratum_curl, s + ssent, len, &sent) != CURLE_OK) {
+		FD_ZERO(&wd);
+		FD_SET(sock, &wd);
+		if (select(sock + 1, NULL, &wd, NULL, &timeout) < 1) {
+			applog(LOG_DEBUG, "Write select failed on pool %d sock", pool->pool_no);
+			return false;
+		}
+		if (likely(pool->stratum_curl))
+			rc = curl_easy_send(pool->stratum_curl, s + ssent, len, &sent);
+		if (rc != CURLE_OK) {
 			applog(LOG_DEBUG, "Failed to curl_easy_send in stratum_send");
-			ret = false;
-			goto out_unlock;
+			return false;
 		}
 		ssent += sent;
 		len -= ssent;
 	}
-	ret = true;
-out_unlock:
-	mutex_unlock(&pool->stratum_lock);
-	return ret;;
+
+	return true;
 }
 
-#define RECVSIZE 8191
-#define RBUFSIZE (RECVSIZE + 1)
+bool stratum_send(struct pool *pool, char *s, ssize_t len)
+{
+	bool ret = false;
+
+	mutex_lock(&pool->stratum_lock);
+	if (pool->stratum_active)
+		ret = __stratum_send(pool, s, len);
+	else
+		applog(LOG_DEBUG, "Stratum send failed due to no pool stratum_active");
+	mutex_unlock(&pool->stratum_lock);
+
+	return ret;
+}
 
 static void clear_sock(struct pool *pool)
 {
-	SOCKETTYPE sock = pool->sock;
+	size_t n = 0;
 
-	recv(sock, pool->sockbuf, RECVSIZE, MSG_DONTWAIT);
+	mutex_lock(&pool->stratum_lock);
+	/* Ignore return code of curl_easy_recv since we're just clearing
+	 * anything in the socket if it's still alive */
+	if (likely(pool->stratum_curl))
+		curl_easy_recv(pool->stratum_curl, pool->sockbuf, RECVSIZE, &n);
+	mutex_unlock(&pool->stratum_lock);
 	strcpy(pool->sockbuf, "");
 }
 
@@ -923,11 +959,12 @@ char *recv_line(struct pool *pool)
 {
 	ssize_t len, buflen;
 	char *tok, *sret = NULL;
-	size_t n;
+	size_t n = 0;
 
 	if (!strstr(pool->sockbuf, "\n")) {
+		CURLcode rc = CURLE_RECV_ERROR;
 		char s[RBUFSIZE];
-		CURLcode rc;
+		size_t sspace;
 
 		if (!sock_full(pool, true)) {
 			applog(LOG_DEBUG, "Timed out waiting for data on sock_full");
@@ -936,14 +973,19 @@ char *recv_line(struct pool *pool)
 		memset(s, 0, RBUFSIZE);
 
 		mutex_lock(&pool->stratum_lock);
-		rc = curl_easy_recv(pool->stratum_curl, s, RECVSIZE, &n);
+		if (likely(pool->stratum_curl))
+			rc = curl_easy_recv(pool->stratum_curl, s, RECVSIZE, &n);
 		mutex_unlock(&pool->stratum_lock);
 
 		if (rc != CURLE_OK) {
 			applog(LOG_DEBUG, "Failed to recv sock in recv_line");
 			goto out;
 		}
-		strcat(pool->sockbuf, s);
+		/* Prevent buffer overflows, but if 8k is still not enough,
+		 * likely we have had some comms issues and the data is all
+		 * useless anyway */
+		sspace = RECVSIZE - strlen(pool->sockbuf);
+		strncat(pool->sockbuf, s, sspace);
 	}
 
 	buflen = strlen(pool->sockbuf);
@@ -1270,11 +1312,15 @@ bool initiate_stratum(struct pool *pool)
 	json_error_t err;
 	bool ret = false;
 
+	mutex_lock(&pool->stratum_lock);
+	pool->stratum_active = false;
+
 	if (!pool->stratum_curl) {
 		pool->stratum_curl = curl_easy_init();
 		if (unlikely(!pool->stratum_curl))
 			quit(1, "Failed to curl_easy_init in initiate_stratum");
 	}
+	mutex_unlock(&pool->stratum_lock);
 	curl = pool->stratum_curl;
 
 	/* Create a http url for use with curl */
@@ -1303,7 +1349,7 @@ bool initiate_stratum(struct pool *pool)
 
 	sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": []}", swork_id++);
 
-	if (!stratum_send(pool, s, strlen(s))) {
+	if (!__stratum_send(pool, s, strlen(s))) {
 		applog(LOG_DEBUG, "Failed to send s in initiate_stratum");
 		goto out;
 	}
@@ -1369,11 +1415,13 @@ out:
 			       pool->pool_no, pool->nonce1, pool->n2size);
 		}
 	} else {
-		pool->stratum_active = false;
+		applog(LOG_DEBUG, "Initiate stratum failed, disabling stratum_active");
+		mutex_lock(&pool->stratum_lock);
 		if (curl) {
 			curl_easy_cleanup(curl);
 			pool->stratum_curl = NULL;
 		}
+		mutex_unlock(&pool->stratum_lock);
 	}
 
 	return ret;