Browse Source

Merge remote-tracking branch 'upstream/bfgminer' into bfgminer

bdev 13 years ago
parent
commit
302cf37fff
27 changed files with 644 additions and 272 deletions
  1. 1 1
      ADL/adl_sdk.h
  2. 11 0
      Makefile.am
  3. 80 0
      NEWS
  4. 2 2
      README
  5. 32 34
      adl.c
  6. 0 1
      adl.h
  7. 1 1
      adl_functions.h
  8. 5 3
      api.c
  9. 14 13
      autogen.sh
  10. 45 4
      configure.ac
  11. 11 0
      debian/changelog
  12. 1 1
      debian/control
  13. 7 8
      driver-bitforce.c
  14. 36 17
      driver-cairnsmore.c
  15. 1 1
      driver-cpu.c
  16. 128 56
      driver-icarus.c
  17. 76 40
      driver-modminer.c
  18. 1 33
      driver-opencl.c
  19. 7 2
      dynclock.c
  20. 5 0
      dynclock.h
  21. 3 0
      findnonce.c
  22. 1 1
      fpgautils.c
  23. 1 0
      icarus-common.h
  24. 1 1
      libblkmaker
  25. 158 45
      miner.c
  26. 11 3
      miner.h
  27. 5 5
      ocl.c

+ 1 - 1
ADL/adl_sdk.h

@@ -24,7 +24,7 @@
 #include "adl_structures.h"
 
 typedef void*(
-#ifdef WIN32
+#ifdef __stdcall
 	__stdcall
 #endif
 *ADL_MAIN_MALLOC_CALLBACK)(int);

+ 11 - 0
Makefile.am

@@ -22,18 +22,29 @@ bfgminer_LDADD	= $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
 		  @MATH_LIBS@ lib/libgnu.a ccan/libccan.a
 bfgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib
 
+bfgminer_CPPFLAGS += $(NCURSES_CPPFLAGS)
+
 # common sources
 bfgminer_SOURCES := miner.c
 
 bfgminer_SOURCES	+= elist.h miner.h compat.h bench_block.h	\
 		   util.c uthash.h logging.h			\
 		   sha2.c sha2.h api.c
+bfgminer_DEPENDENCIES =
 
 if NEED_LIBBLKMAKER
 SUBDIRS           += libblkmaker
 bfgminer_CPPFLAGS += -Ilibblkmaker
 bfgminer_LDFLAGS  += -Llibblkmaker/.libs -Wl,-rpath,libblkmaker/.libs
 bfgminer_LDADD    += -lblkmaker-0.1 -lblkmaker_jansson-0.1
+
+if HAVE_CYGWIN
+bfgminer_DEPENDENCIES += cygblkmaker-0.1-0.dll cygblkmaker_jansson-0.1-0.dll
+
+cyg%.dll: libblkmaker/.libs/cyg%.dll
+	cp -p $< $@
+endif
+
 endif
 
 bfgminer_SOURCES	+= logging.c

+ 80 - 0
NEWS

@@ -1,5 +1,85 @@
+BFGMiner Version 2.8.2 - October 8, 2012
+
+- Update to libblkmaker 0.1.2
+- Bugfix: --temp-target no longer has a simple default (fixes build without
+OpenCL support)
+- Bugfix: icarus: Silence false epoll error
+- Bugfix: icarus: Set firstrun for errors starting next job, so the current
+one finishes properly
+- Bugfix: icarus: Restore generic failure management for write errors
+- Use strtod not strtol for bitforce temp backup.
+- Cope with broken drivers returning nonsense values for bitforce temperatures.
+- Minor warning fixes.
+- Fix unused warnings on ming build.
+- Fix sign warning in ocl.c
+- fds need to be zeroed before set in modminer.
+- Put scrypt warning on separate line to avoid 0 being shown on windows as
+bufsize.
+- Prevent corrupt values returned from the opencl code from trying to read
+beyond the end of the buffer by masking the value to a max of 15.
+- Icarus USB write failure is also a comms error
+- api.c DEBUG message has no paramter
+- Icarus catch more USB errors and close/reopen the port
+- API-README update cgminer verison number
+- hashmeter fix stats kh/s on 32bit windows
+- cairnsmore: Increase maximum clock frequency to 210 Mhz
+- icarus: Hashrate estimates really don't need the attention of a warning,
+demote them to debug
+- cairnsmore: Automatically "downgrade" default FPGA-per-device to 1 for
+dynclock devices
+- Bugfix: cairnsmore: Get autodetection of dynclock to work consistently
+- cairnsmore: Adjust dynclock usage to react in proper time
+- dynclock: Document function usage
+- cairnsmore: Fix race on dynclock detection
+- icarus: Detect attempts to send commands via work and neuter them
+- cairnsmore: Glasswalker has a minimum multiplier of 20 :(
+- cairnsmore: Detect frequency changing support despite hashing of commands
+- modminer: Allow clocks down to 2 Mhz just in case
+- Allow device drivers and users to properly change target temperatures for
+non-GPUs
+- Check that ncurses*-config installs actually work before deciding to use
+them
+- Bugfix: Fix multiple bugs in autogen.sh
+- - Don't use readlink -f unneccesarily (it's not portable)
+- - Always run autoreconf within the real source directory
+- - Run configure from PWD, *not* the real source directory
+- Bugfix: Include nonce in data buffer for debugging
+- Bugfix: swap32* wants count of 32-bit blocks, not bytes
+- Initial Cygwin port
+- Revert "Remove needless roundl define.", since it is needed for Cygwin and
+OpenWRT
+- Bugfix: Deal with various compiler warnings
+- modminer: Implement --temp-hysteresis logic
+- Support for maximum frequency being below the default, eg when the maximum
+is temporarily reduced to deal with temperature
+- Bugfix: modminer: Reduce dynclock max frequency as needed to keep
+temperature below cutoff
+- Bugfix: Restore disabled label, needed to skip over hashrate calculations
+(which mess up otherwise)
+- Bugfix: bitforce: Count actual throttling as hardware errors
+- icarus: Allow failure in case of reopen failure, now that the miner core
+will retry on its own
+- If a device dies, attempt to reinitialize it occasionally
+- Bugfix: The REST flag is now preferred over WAIT, since the former might
+trigger the latter
+- Bugfix: modminer: Update temperature readings when disabled (fixes thermal
+cutoff recovery)
+- Bugfix: Move thermal cutoff to general watchdog code (fixes bitforce
+recovery)
+- Rename enable_device to register_device, since it only works for setting it
+up at startup
+- Move targettemp from ADL to cgpu_info, so all devices can readily use it
+- Bugfix: "REST" flag had too much padding
+- Bugfix: adl: Only warn and disable GPU due to thermal cutoff, if it's
+actually enabled
+- Bugfix: bitforce: Only warn and disable bitforce due to thermal cutoff, if
+it's actually enabled
+
+
 BFGMiner Version 2.8.1 - September 27, 2012
 
+- Avoid strndup for Windows compatibility
+- Bugfix: cairnsmore: Add missing compat.h include (for sleep)
 - cairnsmore: Implement "identify" for supported firmware
 - Adjust identify_device API to return a bool whether supported or not, for
 runtime capability detection

+ 2 - 2
README

@@ -168,6 +168,8 @@ Options for both config file and command line:
 --submit-threads    Maximum number of share submission threads (default: 64)
 --syslog            Use system log for output messages (default: standard error)
 --temp-cutoff <arg> Temperature where a device will be automatically disabled, one value or comma separated list (default: 95)
+--temp-hysteresis <arg> Set how much the temperature can fluctuate outside limits when automanaging speeds (default: 3)
+--temp-target <arg> Target temperature when automatically managing fan and clock speeds
 --text-only|-T      Disable ncurses formatted screen output
 --url|-o <arg>      URL for bitcoin JSON-RPC server
 --user|-u <arg>     Username for bitcoin JSON-RPC server
@@ -202,9 +204,7 @@ GPU only options:
 --ndevs|-n          Enumerate number of detected GPUs and exit
 --no-adl            Disable the ATI display library used for monitoring and setting GPU parameters
 --no-restart        Do not attempt to restart GPUs that hang
---temp-hysteresis <arg> Set how much the temperature can fluctuate outside limits when automanaging speeds (default: 3)
 --temp-overheat <arg> Overheat temperature when automatically managing fan and GPU speeds (default: 85)
---temp-target <arg> Target temperature when automatically managing fan and GPU speeds (default: 75)
 --vectors|-v <arg>  Override detected optimal vector (1, 2 or 4) - one value or comma separated list
 --worksize|-w <arg> Override detected optimal worksize - one value or comma separated list
 

+ 32 - 34
adl.c

@@ -9,7 +9,7 @@
 
 #include "config.h"
 
-#if defined(HAVE_ADL) && (defined(__linux) || defined (WIN32))
+#ifdef HAVE_ADL
 
 #include <stdio.h>
 #include <string.h>
@@ -23,7 +23,7 @@
 #include "ADL/adl_sdk.h"
 #include "compat.h"
 
-#if defined (__linux)
+#ifndef WIN32
 #include <dlfcn.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -33,7 +33,7 @@
 #endif
 #include "adl_functions.h"
 
-#ifndef WIN32
+#ifndef __stdcall
 #define __stdcall
 #endif
 
@@ -44,7 +44,7 @@
 bool adl_active;
 bool opt_reorder = false;
 
-int opt_hysteresis = 3;
+static
 const int opt_targettemp = 75;
 const int opt_overheattemp = 85;
 static pthread_mutex_t adl_lock;
@@ -73,7 +73,7 @@ static void __stdcall ADL_Main_Memory_Free (void **lpBuffer)
 	}
 }
 
-#if defined (LINUX)
+#ifndef WIN32
 // equivalent functions in linux
 static void *GetProcAddress(void *pLibrary, const char *name)
 {
@@ -99,9 +99,9 @@ static	ADL_OVERDRIVE5_POWERCONTROL_GET	ADL_Overdrive5_PowerControl_Get;
 static	ADL_OVERDRIVE5_POWERCONTROL_SET	ADL_Overdrive5_PowerControl_Set;
 static	ADL_OVERDRIVE5_FANSPEEDTODEFAULT_SET	ADL_Overdrive5_FanSpeedToDefault_Set;
 
-#if defined (LINUX)
+#ifndef WIN32
 	static void *hDLL;	// Handle to .so library
-#else
+#else /* WIN32 */
 	HINSTANCE hDLL;		// Handle to DLL
 #endif
 static int iNumberAdapters;
@@ -137,14 +137,17 @@ static bool prepare_adl(void)
 {
 	int result;
 
-#if defined (LINUX)
-	hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL);
-#else
+#if defined(WIN32) || defined(__CYGWIN__)
+#	ifdef __CYGWIN__
+#		define LoadLibrary(x) dlopen(x, RTLD_LAZY|RTLD_GLOBAL);
+#	endif
 	hDLL = LoadLibrary("atiadlxx.dll");
 	if (hDLL == NULL)
 		// A 32 bit calling application on 64 bit OS will fail to LoadLIbrary.
 		// Try to load the 32 bit library (atiadlxy.dll) instead
 		hDLL = LoadLibrary("atiadlxy.dll");
+#else
+	hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL);
 #endif
 	if (hDLL == NULL) {
 		applog(LOG_INFO, "Unable to load ati adl library");
@@ -490,8 +493,8 @@ void init_adl(int nDevs)
 		}
 
 		/* Set some default temperatures for autotune when enabled */
-		if (!ga->targettemp)
-			ga->targettemp = opt_targettemp;
+		if (!gpus[gpu].targettemp)
+			gpus[gpu].targettemp = opt_targettemp;
 		if (!ga->overtemp)
 			ga->overtemp = opt_overheattemp;
 		if (!gpus[gpu].cutofftemp)
@@ -505,6 +508,7 @@ void init_adl(int nDevs)
 			ga->autoengine = true;
 			ga->managed = true;
 		}
+		gpus[gpu].temp =
 		ga->lasttemp = __gpu_temp(ga);
 	}
 
@@ -744,6 +748,7 @@ bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vdd
 	ga = &gpus[gpu].adl;
 
 	lock_adl();
+	gpus[gpu].temp =
 	*temp = __gpu_temp(ga);
 	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK) {
 		*engineclock = 0;
@@ -1054,15 +1059,15 @@ static bool fan_autotune(int gpu, int temp, int fanpercent, int lasttemp, bool *
 		cgpu->device_last_not_well = time(NULL);
 		cgpu->device_not_well_reason = REASON_DEV_OVER_HEAT;
 		cgpu->dev_over_heat_count++;
-	} else if (temp > ga->targettemp && fanpercent < top && tdiff >= 0) {
+	} else if (temp > gpus[gpu].targettemp && fanpercent < top && tdiff >= 0) {
 		applog(LOG_DEBUG, "Temperature over target, increasing fanspeed");
-		if (temp > ga->targettemp + opt_hysteresis)
+		if (temp > gpus[gpu].targettemp + opt_hysteresis)
 			newpercent = ga->targetfan + 10;
 		else
 			newpercent = ga->targetfan + 5;
 		if (newpercent > top)
 			newpercent = top;
-	} else if (fanpercent > bot && temp < ga->targettemp - opt_hysteresis) {
+	} else if (fanpercent > bot && temp < gpus[gpu].targettemp - opt_hysteresis) {
 		/* Detect large swings of 5 degrees or more and change fan by
 		 * a proportion more */
 		if (tdiff <= 0) {
@@ -1076,10 +1081,10 @@ static bool fan_autotune(int gpu, int temp, int fanpercent, int lasttemp, bool *
 
 		/* We're in the optimal range, make minor adjustments if the
 		 * temp is still drifting */
-		if (fanpercent > bot && tdiff < 0 && lasttemp < ga->targettemp) {
+		if (fanpercent > bot && tdiff < 0 && lasttemp < gpus[gpu].targettemp) {
 			applog(LOG_DEBUG, "Temperature dropping while in target range, decreasing fanspeed");
 			newpercent = ga->targetfan + tdiff;
-		} else if (fanpercent < top && tdiff > 0 && temp > ga->targettemp - opt_hysteresis) {
+		} else if (fanpercent < top && tdiff > 0 && temp > gpus[gpu].targettemp - opt_hysteresis) {
 			applog(LOG_DEBUG, "Temperature rising while in target range, increasing fanspeed");
 			newpercent = ga->targetfan + tdiff;
 		}
@@ -1120,6 +1125,7 @@ void gpu_autotune(int gpu, enum dev_enable *denable)
 
 	lock_adl();
 	ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity);
+	gpus[gpu].temp =
 	temp = __gpu_temp(ga);
 	if (ga->twin)
 		twintemp = __gpu_temp(ga->twin);
@@ -1154,13 +1160,8 @@ void gpu_autotune(int gpu, enum dev_enable *denable)
 
 	if (engine && ga->autoengine) {
 		if (temp > cgpu->cutofftemp) {
-			applog(LOG_WARNING, "Hit thermal cutoff limit on GPU %d, disabling!", gpu);
-			*denable = DEV_RECOVER;
+			// Shutoff and recovery happens back in watchdog_thread
 			newengine = ga->minspeed;
-
-			cgpu->device_last_not_well = time(NULL);
-			cgpu->device_not_well_reason = REASON_DEV_THERMAL_CUTOFF;
-			cgpu->dev_thermal_cutoff_count++;
 		} else if (temp > ga->overtemp && engine > ga->minspeed) {
 			applog(LOG_WARNING, "Overheat detected, decreasing GPU %d clock speed", gpu);
 			newengine = ga->minspeed;
@@ -1168,19 +1169,16 @@ void gpu_autotune(int gpu, enum dev_enable *denable)
 			cgpu->device_last_not_well = time(NULL);
 			cgpu->device_not_well_reason = REASON_DEV_OVER_HEAT;
 			cgpu->dev_over_heat_count++;
-		} else if (temp > ga->targettemp + opt_hysteresis && engine > ga->minspeed && fan_optimal) {
+		} else if (temp > gpus[gpu].targettemp + opt_hysteresis && engine > ga->minspeed && fan_optimal) {
 			applog(LOG_DEBUG, "Temperature %d degrees over target, decreasing clock speed", opt_hysteresis);
 			newengine = engine - ga->lpOdParameters.sEngineClock.iStep;
 			/* Only try to tune engine speed up if this GPU is not disabled */
-		} else if (temp < ga->targettemp && engine < ga->maxspeed && fan_window && *denable == DEV_ENABLED) {
+		} else if (temp < gpus[gpu].targettemp && engine < ga->maxspeed && fan_window && *denable == DEV_ENABLED) {
 			applog(LOG_DEBUG, "Temperature below target, increasing clock speed");
-			if (temp < ga->targettemp - opt_hysteresis)
+			if (temp < gpus[gpu].targettemp - opt_hysteresis)
 				newengine = ga->maxspeed;
 			else
 				newengine = engine + ga->lpOdParameters.sEngineClock.iStep;
-		} else if (temp < ga->targettemp && *denable == DEV_RECOVER && opt_restart) {
-			applog(LOG_NOTICE, "Device recovered to temperature below target, re-enabling");
-			*denable = DEV_ENABLED;
 		}
 
 		if (newengine > ga->maxspeed)
@@ -1232,7 +1230,7 @@ void change_autosettings(int gpu)
 	char input;
 	int val;
 
-	wlogprint("Target temperature: %d\n", ga->targettemp);
+	wlogprint("Target temperature: %d\n", gpus[gpu].targettemp);
 	wlogprint("Overheat temperature: %d\n", ga->overtemp);
 	wlogprint("Cutoff temperature: %d\n", gpus[gpu].cutofftemp);
 	wlogprint("Toggle [F]an auto [G]PU auto\nChange [T]arget [O]verheat [C]utoff\n");
@@ -1257,11 +1255,11 @@ void change_autosettings(int gpu)
 		if (val < 0 || val > 200)
 			wlogprint("Invalid temperature");
 		else
-			ga->targettemp = val;
+			gpus[gpu].targettemp = val;
 	} else if (!strncasecmp(&input, "o", 1)) {
-		wlogprint("Enter overheat temperature for this GPU in C (%d+)", ga->targettemp);
+		wlogprint("Enter overheat temperature for this GPU in C (%d+)", gpus[gpu].targettemp);
 		val = curses_int("");
-		if (val <= ga->targettemp || val > 200)
+		if (val <= gpus[gpu].targettemp || val > 200)
 			wlogprint("Invalid temperature");
 		else
 			ga->overtemp = val;
@@ -1388,7 +1386,7 @@ static void free_adl(void)
 {
 	ADL_Main_Memory_Free ((void **)&lpInfo);
 	ADL_Main_Control_Destroy ();
-#if defined (LINUX)
+#ifndef WIN32
 	dlclose(hDLL);
 #else
 	FreeLibrary(hDLL);

+ 0 - 1
adl.h

@@ -3,7 +3,6 @@
 #ifdef HAVE_ADL
 bool adl_active;
 bool opt_reorder;
-int opt_hysteresis;
 const int opt_targettemp;
 const int opt_overheattemp;
 void init_adl(int nDevs);

+ 1 - 1
adl_functions.h

@@ -29,7 +29,7 @@
 // AMD ADL function types from Version 3.0
 // ------------------------------------------------------------------------------------------------------------
 
-#if defined (linux)
+#ifndef WIN32
  #include <dlfcn.h>	//dyopen, dlsym, dlclose
  #include <stdlib.h>
  #include <string.h>	//memeset

+ 5 - 3
api.c

@@ -221,7 +221,9 @@ static const char *OSINFO =
 #if defined(__APPLE__)
 			"Apple";
 #else
-#if defined (WIN32)
+#if defined (__CYGWIN__)
+			"Cygwin";
+#elif defined (WIN32)
 			"Windows";
 #else
 #if defined(unix)
@@ -558,7 +560,7 @@ struct CODES {
  { SEVERITY_ERR,   MSG_INVBOOL,	PARAM_NONE,	"Invalid parameter should be true or false" },
  { SEVERITY_SUCC,  MSG_FOO,	PARAM_BOOL,	"Failover-Only set to %s" },
  { SEVERITY_SUCC,  MSG_MINECOIN,PARAM_NONE,	"BFGMiner coin" },
- { SEVERITY_SUCC,  MSG_DEBUGSET,PARAM_STR,	"Debug settings" },
+ { SEVERITY_SUCC,  MSG_DEBUGSET,PARAM_NONE,	"Debug settings" },
 #ifdef HAVE_AN_FPGA
  { SEVERITY_SUCC,  MSG_PGAIDENT,PARAM_PGA,	"Identify command sent to PGA%d" },
  { SEVERITY_WARN,  MSG_PGANOID,	PARAM_PGA,	"PGA%d does not support identify" },
@@ -1697,7 +1699,7 @@ static void pgaidentify(__maybe_unused SOCKETTYPE c, char *param, bool isjson, _
 	}
 
 	struct cgpu_info *cgpu = devices[dev];
-	struct device_api *api = cgpu->api;
+	const struct device_api *api = cgpu->api;
 
 	if (api->identify_device && api->identify_device(cgpu))
 		strcpy(io_buffer, message(MSG_PGAIDENT, id, NULL, isjson));

+ 14 - 13
autogen.sh

@@ -1,22 +1,23 @@
-#!/bin/sh
-cwd="$PWD"
-bs_dir="$(dirname $(readlink -f $0))"
-rm -rf "${bs_dir}"/autom4te.cache
-rm -f "${bs_dir}"/aclocal.m4 "${bs_dir}"/ltmain.sh
+#!/bin/sh -e
+bs_dir="$(dirname "$0")"
 
 if test -z "$NOSUBMODULES" ; then
 	echo 'Getting submodules...'
-	git submodule update --init
+	(
+		cd "${bs_dir}"
+		git submodule update --init
+	)
 fi
 
 echo 'Running autoreconf -if...'
-autoreconf -if || exit 1
+(
+	cd "${bs_dir}"
+	rm -rf autom4te.cache
+	rm -f aclocal.m4 ltmain.sh
+	autoreconf -if
+)
+
 if test -z "$NOCONFIGURE" ; then
 	echo 'Configuring...'
-	cd "${bs_dir}" &> /dev/null
-	test "$?" = "0" || e=1
-	test "$cwd" != "$bs_dir" && cd "$bs_dir" &> /dev/null
-	./configure "$@"
-	test "$e" = "1" && exit 1
-	cd "$cwd"
+	"${bs_dir}"/configure "$@"
 fi

+ 45 - 4
configure.ac

@@ -2,7 +2,7 @@
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_maj], [2])
 m4_define([v_min], [8])
-m4_define([v_mic], [1])
+m4_define([v_mic], [2])
 ##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##--##
 m4_define([v_ver], [v_maj.v_min.v_mic])
 m4_define([lt_rev], m4_eval(v_maj + v_min))
@@ -62,6 +62,7 @@ AC_CHECK_HEADERS([sys/prctl.h])
 
 AC_FUNC_ALLOCA
 
+have_cygwin=false
 have_win32=false
 PTHREAD_FLAGS="-lpthread"
 USB_LIBS=""
@@ -97,6 +98,9 @@ case $target in
     WS2_LIBS="-lws2_32"
 	TIMER_LIBS="-lwinmm"
     ;;
+  *-*-cygwin*)
+	have_cygwin=true
+	;;
   powerpc-*-darwin*)
     CFLAGS="$CFLAGS -faltivec"
     PTHREAD_FLAGS=""
@@ -233,10 +237,40 @@ AC_ARG_WITH([curses],
 if test "x$curses" = "xno"; then
 	cursesmsg='User specified --without-curses. TUI support DISABLED'
 else
-	AC_SEARCH_LIBS(addstr, ncurses pdcurses, [
+	if test "x$cross_compiling" != "xyes"; then
+		AC_MSG_CHECKING([for best native curses library])
+		orig_libs="$LIBS"
+		orig_cflags="$CFLAGS"
+		for curses_lib in ncurses{w,}{,6,5}; do
+			if ! ${curses_lib}-config --cflags >/dev/null 2>/dev/null; then
+				continue
+			fi
+			CFLAGS="$orig_cflags $(${curses_lib}-config --cflags)"
+			LIBS="$orig_libs $(${curses_lib}-config --libs)"
+			AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+				#include <curses.h>
+			]], [[
+				WINDOW *w = NULL;
+				mvwprintw(w, 2, 2, "Testing %s", "o hai");
+			]])], [
+				curses=yes
+				cursesmsg="FOUND: ${curses_lib}"
+				AC_MSG_RESULT([$curses_lib])
+				NCURSES_LIBS=`${curses_lib}-config --libs`
+				NCURSES_CPPFLAGS=`${curses_lib}-config --cflags`
+				break
+			], [
+				AC_MSG_WARN([${curses_lib} doesn't seem to be installed properly])
+			])
+		done
+		LIBS="$orig_libs"
+		CFLAGS="$orig_cflags"
+	fi
+	if test "x$curses" != "xyes"; then
+
+	AC_SEARCH_LIBS(addstr, ncursesw ncurses pdcurses, [
 		curses=yes
 		cursesmsg="FOUND: ${ac_cv_search_addstr:2}"
-		AC_DEFINE([HAVE_CURSES], [1], [Defined to 1 if curses TUI support is wanted])
 	], [
 		if test "x$curses" = "xyes"; then
 			AC_MSG_ERROR([Could not find curses library - please install libncurses-dev or pdcurses-dev (or configure --without-curses)])
@@ -246,6 +280,11 @@ else
 			cursesmsg='NOT FOUND. TUI support DISABLED'
 		fi
 	])
+
+	fi
+	if test "x$curses" = "xyes"; then
+		AC_DEFINE([HAVE_CURSES], [1], [Defined to 1 if curses TUI support is wanted])
+	fi
 fi
 
 
@@ -256,6 +295,7 @@ AM_CONDITIONAL([NEED_DYNCLOCK], [test x$icarus$modminer$ztex != xnonono])
 AM_CONDITIONAL([NEED_FPGAUTILS], [test x$icarus$bitforce$modminer$ztex != xnononono])
 AM_CONDITIONAL([HAS_SCRYPT], [test x$scrypt = xyes])
 AM_CONDITIONAL([HAVE_CURSES], [test x$curses = xyes])
+AM_CONDITIONAL([HAVE_CYGWIN], [test x$have_cygwin = xtrue])
 AM_CONDITIONAL([HAVE_WINDOWS], [test x$have_win32 = xtrue])
 AM_CONDITIONAL([HAVE_x86_64], [test x$have_x86_64 = xtrue])
 
@@ -372,6 +412,7 @@ AC_SUBST(JANSSON_LIBS)
 AC_SUBST(PTHREAD_FLAGS)
 AC_SUBST(DLOPEN_FLAGS)
 AC_SUBST(PTHREAD_LIBS)
+AC_SUBST(NCURSES_CPPFLAGS)
 AC_SUBST(NCURSES_LIBS)
 AC_SUBST(PDCURSES_LIBS)
 AC_SUBST(WS2_LIBS)
@@ -465,7 +506,7 @@ fi
 
 echo
 echo "Compilation............: make (or gmake)"
-echo "  CPPFLAGS.............: $CPPFLAGS"
+echo "  CPPFLAGS.............: $CPPFLAGS $NCURSES_CPPFLAGS"
 echo "  CFLAGS...............: $CFLAGS"
 echo "  LDFLAGS..............: $LDFLAGS $PTHREAD_FLAGS $USB_FLAGS"
 echo "  LDADD................: $DLOPEN_FLAGS $LIBCURL_LIBS $JANSSON_LIBS $PTHREAD_LIBS $NCURSES_LIBS $PDCURSES_LIBS $WS2_LIBS $TIMER_LIBS $MATH_LIBS $UDEV_LIBS $USB_LIBS"

+ 11 - 0
debian/changelog

@@ -1,3 +1,14 @@
+bfgminer (2.8.2-0precise1) precise; urgency=low
+
+  * Numerous fixups for Enterpoint's Cairsmore dynamic clocking; it has actually been tested this time. :)
+  * Support for --temp-target and --temp-hysteresis controls on ModMiner FPGA devices.
+  * Generic failure management for all devices, including automatically attempting to restart dead devices.
+  * Improved portability to new platforms, partially including Cygwin.
+  * Various minor error handling improvements and bugfixes.
+
+ -- Luke Dashjr <luke+bfgminer@dashjr.org>  Mon, 08 Oct 2012 23:13:01 -0000
+
+
 bfgminer (2.8.1-0precise1) precise; urgency=low
 
   * Many improvements for Enterpoint's Cairsmore, including (experimental) support for Glasswalker's dynamic frequency bitstream.

+ 1 - 1
debian/control

@@ -2,7 +2,7 @@ Source: bfgminer
 Priority: optional
 Section: misc
 Maintainer: Luke Dashjr <luke_bfgminer@dashjr.org>
-Standards-Version: 2.8.1
+Standards-Version: 2.8.2
 Build-Depends: build-essential, debhelper, autoconf, automake, libtool, libssl-dev, yasm, pkg-config, libudev-dev, libcurl4-openssl-dev, wget, unzip, libjansson-dev, libncurses5-dev, libudev-dev, libusb-1.0-0-dev
 
 Package: bfgminer

+ 7 - 8
driver-bitforce.c

@@ -306,16 +306,13 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)
 	if ((!strncasecmp(pdevbuf, "TEMP", 4)) && (s = strchr(pdevbuf + 4, ':'))) {
 		float temp = strtof(s + 1, NULL);
 
+		/* Cope with older software  that breaks and reads nonsense
+		 * values */
+		if (temp > 100)
+			temp = strtod(s + 1, NULL);
+
 		if (temp > 0) {
 			bitforce->temp = temp;
-			if (unlikely(bitforce->cutofftemp > 0 && temp > bitforce->cutofftemp)) {
-				applog(LOG_WARNING, "BFL%i: Hit thermal cutoff limit, disabling!", bitforce->device_id);
-				bitforce->deven = DEV_RECOVER;
-
-				bitforce->device_last_not_well = time(NULL);
-				bitforce->device_not_well_reason = REASON_DEV_THERMAL_CUTOFF;
-				bitforce->dev_thermal_cutoff_count++;
-			}
 		}
 	} else {
 		/* Use the temperature monitor as a kind of watchdog for when
@@ -471,6 +468,8 @@ static int64_t bitforce_get_result(struct thr_info *thr, struct work *work)
 		bitforce->device_last_not_well = time(NULL);
 		bitforce->device_not_well_reason = REASON_DEV_OVER_HEAT;
 		bitforce->dev_over_heat_count++;
+		++bitforce->hw_errors;
+		++hw_errors;
 
 		/* If the device truly throttled, it didn't process the job and there
 		 * are no results. But check first, just in case we're wrong about it

+ 36 - 17
driver-cairnsmore.c

@@ -18,12 +18,14 @@
 // This is a general ballpark
 #define CAIRNSMORE1_HASH_TIME 0.0000000024484
 
-#define CAIRNSMORE1_MINIMUM_CLOCK  5
+#define CAIRNSMORE1_MINIMUM_CLOCK  50
 #define CAIRNSMORE1_DEFAULT_CLOCK  200
-#define CAIRNSMORE1_MAXIMUM_CLOCK  200
+#define CAIRNSMORE1_MAXIMUM_CLOCK  210
 
 struct device_api cairnsmore_api;
 
+static void cairnsmore_api_init();
+
 static bool cairnsmore_detect_one(const char *devpath)
 {
 	struct ICARUS_INFO *info = calloc(1, sizeof(struct ICARUS_INFO));
@@ -56,17 +58,20 @@ static int cairnsmore_detect_auto(void)
 
 static void cairnsmore_detect()
 {
+	cairnsmore_api_init();
 	// Actual serial detection is handled by Icarus driver
 	serial_detect_auto_byname(&cairnsmore_api, cairnsmore_detect_one, cairnsmore_detect_auto);
 }
 
-static bool cairnsmore_send_cmd(int fd, uint8_t cmd, uint8_t data)
+static bool cairnsmore_send_cmd(int fd, uint8_t cmd, uint8_t data, bool probe)
 {
 	unsigned char pkt[64] =
 		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
 		"vdi\xb7"
 		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
-		"BFG0" "\xff\xff\xff\xff" "\0\0\0\0";
+		"bfg0" "\xff\xff\xff\xff" "\xb5\0\0\0";
+	if (unlikely(probe))
+		pkt[61] = '\x01';
 	pkt[32] = 0xda ^ cmd ^ data;
 	pkt[33] = data;
 	pkt[34] = cmd;
@@ -75,12 +80,11 @@ static bool cairnsmore_send_cmd(int fd, uint8_t cmd, uint8_t data)
 
 bool cairnsmore_supports_dynclock(int fd)
 {
-	unsigned char pkts[64] =
-		"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
-		"\xe6\x3c\0\xb7"  // Set frequency multiplier to 60 (150 Mhz)
-		"\0\0\0\0\0\0\0\0\0\0\0\0" "BFG0"
-		"\x8b\xdb\x05\x1a" "\xff\xff\xff\xff" "\x00\x00\x1e\xfd";
-	if (write(fd, pkts, sizeof(pkts)) != sizeof(pkts))
+	if (!cairnsmore_send_cmd(fd, 0, 1, true))
+		return false;
+	struct timeval tv_start, elapsed;
+	gettimeofday(&tv_start, NULL);
+	if (!cairnsmore_send_cmd(fd, 0, 1, true))
 		return false;
 
 	uint32_t nonce = 0;
@@ -91,19 +95,24 @@ bool cairnsmore_supports_dynclock(int fd)
 			.work_restart_fd = -1,
 		};
 		icarus_gets((unsigned char*)&nonce, fd, &tv_finish, &dummy, 1);
+		timersub(&tv_finish, &tv_start, &elapsed);
 	}
+	applog(LOG_DEBUG, "Cairnsmore dynclock detection... Got %08x in %d.%06ds", nonce, elapsed.tv_sec, elapsed.tv_usec);
 	switch (nonce) {
-		case 0x000b1b5e:  // on big    endian
-		case 0x5e1b0b00:  // on little endian
+		case 0x00949a6f:  // big    endian
+		case 0x6f9a9400:  // little endian
 			// Hashed the command, so it's not supported
 			return false;
 		default:
-			// TODO: nonce from a real job... handle it
+			applog(LOG_WARNING, "Unexpected nonce from dynclock probe: %08x", be32toh(nonce));
+			return false;
 		case 0:
 			return true;
 	}
 }
 
+#define cairnsmore_send_cmd(fd, cmd, data) cairnsmore_send_cmd(fd, cmd, data, false)
+
 static bool cairnsmore_change_clock_func(struct thr_info *thr, int bestM)
 {
 	struct cgpu_info *cm1 = thr->cgpu;
@@ -127,6 +136,7 @@ static bool cairnsmore_init(struct thr_info *thr)
 {
 	struct cgpu_info *cm1 = thr->cgpu;
 	struct ICARUS_INFO *info = cm1->cgpu_data;
+	struct icarus_state *state = thr->cgpu_data;
 
 	if (cairnsmore_supports_dynclock(cm1->device_fd)) {
 		info->dclk_change_clock_func = cairnsmore_change_clock_func;
@@ -140,14 +150,19 @@ static bool cairnsmore_init(struct thr_info *thr)
 		       cm1->api->name, cm1->device_id,
 		       CAIRNSMORE1_DEFAULT_CLOCK, CAIRNSMORE1_MINIMUM_CLOCK, CAIRNSMORE1_MAXIMUM_CLOCK
 		);
+		// The dynamic-clocking firmware connects each FPGA as its own device
+		if (!(info->user_set & 1)) {
+			info->work_division = 1;
+			if (!(info->user_set & 2))
+				info->fpga_count = 1;
+		}
 	} else {
 		applog(LOG_WARNING, "%s %u: Frequency scaling not supported",
 			cm1->api->name, cm1->device_id
 		);
-		// Test failures corrupt the hash state, so next scanhash is a firstrun
-		struct icarus_state *state = thr->cgpu_data;
-		state->firstrun = true;
 	}
+	// Commands corrupt the hash state, so next scanhash is a firstrun
+	state->firstrun = true;
 
 	return true;
 }
@@ -192,7 +207,6 @@ static bool cairnsmore_identify(struct cgpu_info *cm1)
 
 extern struct device_api icarus_api;
 
-__attribute__((constructor(1000)))
 static void cairnsmore_api_init()
 {
 	cairnsmore_api = icarus_api;
@@ -203,3 +217,8 @@ static void cairnsmore_api_init()
 	cairnsmore_api.identify_device = cairnsmore_identify;
 	cairnsmore_api.get_api_extra_device_status = cairnsmore_api_extra_device_status;
 }
+
+struct device_api cairnsmore_api = {
+	// Needed to get to cairnsmore_api_init at all
+	.api_detect = cairnsmore_detect,
+};

+ 1 - 1
driver-cpu.c

@@ -233,7 +233,7 @@ double bench_algo_stage3(
 	memset(&work, 0, sizeof(work));
 	memcpy(&work, &bench_block, min_size);
 
-	struct thr_info dummy = {0};
+	static struct thr_info dummy;
 
 	struct timeval end;
 	struct timeval start;

+ 128 - 56
driver-icarus.c

@@ -173,6 +173,11 @@ static void rev(unsigned char *s, size_t l)
 #define icarus_open2(devpath, baud, purge)  serial_open(devpath, baud, ICARUS_READ_FAULT_DECISECONDS, purge)
 #define icarus_open(devpath, baud)  icarus_open2(devpath, baud, false)
 
+#define ICA_GETS_ERROR -1
+#define ICA_GETS_OK 0
+#define ICA_GETS_RESTART 1
+#define ICA_GETS_TIMEOUT 2
+
 int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct thr_info *thr, int read_count)
 {
 	ssize_t ret = 0;
@@ -188,13 +193,13 @@ int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct th
 	};
 	struct epoll_event evr[2];
 	int epoll_timeout = ICARUS_READ_FAULT_DECISECONDS * 100;
+	if (thr && thr->work_restart_fd != -1) {
 	epollfd = epoll_create(2);
 	if (epollfd != -1) {
 		if (-1 == epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev)) {
 			close(epollfd);
 			epollfd = -1;
 		}
-		if (thr->work_restart_fd != -1)
 		{
 			ev.data.fd = thr->work_restart_fd;
 			if (-1 == epoll_ctl(epollfd, EPOLL_CTL_ADD, thr->work_restart_fd, &ev))
@@ -208,6 +213,7 @@ int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct th
 	}
 	else
 		applog(LOG_ERR, "Icarus: Error creating epoll");
+	}
 #endif
 
 	// Read reply 1 byte at a time to get earliest tv_finish
@@ -228,6 +234,8 @@ int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct th
 		else
 #endif
 		ret = read(fd, buf, 1);
+		if (ret < 0)
+			return ICA_GETS_ERROR;
 
 		if (first)
 			gettimeofday(tv_finish, NULL);
@@ -236,7 +244,7 @@ int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct th
 		{
 			if (epollfd != -1)
 				close(epollfd);
-			return 0;
+			return ICA_GETS_OK;
 		}
 
 		if (ret > 0) {
@@ -247,17 +255,26 @@ int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct th
 		}
 			
 		rc++;
-		if (rc >= read_count || thr->work_restart) {
+		if (rc >= read_count) {
+			if (epollfd != -1)
+				close(epollfd);
+			if (opt_debug) {
+				applog(LOG_DEBUG,
+					"Icarus Read: No data in %.2f seconds",
+					(float)rc/(float)TIME_FACTOR);
+			}
+			return ICA_GETS_TIMEOUT;
+		}
+
+		if (thr && thr->work_restart) {
 			if (epollfd != -1)
 				close(epollfd);
 			if (opt_debug) {
-				rc *= ICARUS_READ_FAULT_DECISECONDS;
 				applog(LOG_DEBUG,
-			        "Icarus Read: %s %d.%d seconds",
-			        thr->work_restart ? "Work restart at" : "No data in",
-			        rc / 10, rc % 10);
+					"Icarus Read: Work restart at %.2f seconds",
+					(float)(rc)/(float)TIME_FACTOR);
 			}
-			return 1;
+			return ICA_GETS_RESTART;
 		}
 	}
 }
@@ -275,6 +292,13 @@ static int icarus_write(int fd, const void *buf, size_t bufLen)
 
 #define icarus_close(fd) close(fd)
 
+static void do_icarus_close(struct thr_info *thr)
+{
+	struct cgpu_info *icarus = thr->cgpu;
+	icarus_close(icarus->device_fd);
+	icarus->device_fd = -1;
+}
+
 static const char *timing_mode_str(enum timing_mode timing_mode)
 {
 	switch(timing_mode) {
@@ -468,6 +492,7 @@ static void get_options(int this_option_offset, struct ICARUS_INFO *info)
 				*(colon2++) = '\0';
 
 			if (*colon) {
+				info->user_set |= 1;
 				tmp = atoi(colon);
 				if (tmp == 1 || tmp == 2 || tmp == 4 || tmp == 8) {
 					*work_division = tmp;
@@ -484,6 +509,7 @@ static void get_options(int this_option_offset, struct ICARUS_INFO *info)
 					*(colon++) = '\0';
 
 			  if (*colon2) {
+				info->user_set |= 2;
 				tmp = atoi(colon2);
 				if (tmp > 0 && tmp <= *work_division)
 					*fpga_count = tmp;
@@ -555,11 +581,7 @@ bool icarus_detect_custom(const char *devpath, struct device_api *api, struct IC
 	gettimeofday(&tv_start, NULL);
 
 	memset(nonce_bin, 0, sizeof(nonce_bin));
-	struct thr_info dummy = {
-		.work_restart = false,
-		.work_restart_fd = -1,
-	};
-	icarus_gets(nonce_bin, fd, &tv_finish, &dummy, 1);
+	icarus_gets(nonce_bin, fd, &tv_finish, NULL, 1);
 
 	icarus_close(fd);
 
@@ -592,6 +614,7 @@ bool icarus_detect_custom(const char *devpath, struct device_api *api, struct IC
 	icarus = calloc(1, sizeof(struct cgpu_info));
 	icarus->api = api;
 	icarus->device_path = strdup(devpath);
+	icarus->device_fd = -1;
 	icarus->threads = 1;
 	add_cgpu(icarus);
 
@@ -647,6 +670,8 @@ static bool icarus_prepare(struct thr_info *thr)
 
 	struct timeval now;
 
+	icarus->device_fd = -1;
+
 	int fd = icarus_open2(icarus->device_path, info->baud, true);
 	if (unlikely(-1 == fd)) {
 		applog(LOG_ERR, "Failed to open Icarus on %s",
@@ -685,23 +710,58 @@ static bool icarus_reopen(struct cgpu_info *icarus, struct icarus_state *state,
 	*fdp = icarus->device_fd = icarus_open(icarus->device_path, info->baud);
 	if (unlikely(-1 == *fdp)) {
 		applog(LOG_ERR, "%s %u: Failed to reopen on %s", icarus->api->name, icarus->device_id, icarus->device_path);
+		icarus->device_last_not_well = time(NULL);
+		icarus->device_not_well_reason = REASON_DEV_COMMS_ERROR;
+		icarus->dev_comms_error_count++;
 		state->firstrun = true;
 		return false;
 	}
 	return true;
 }
 
+static bool icarus_start_work(struct thr_info *thr, const unsigned char *ob_bin)
+{
+	struct cgpu_info *icarus = thr->cgpu;
+	struct icarus_state *state = thr->cgpu_data;
+	int fd = icarus->device_fd;
+	int ret;
+	char *ob_hex;
+
+	gettimeofday(&state->tv_workstart, NULL);
+
+	ret = icarus_write(fd, ob_bin, 64);
+	if (ret) {
+		do_icarus_close(thr);
+		applog(LOG_ERR, "ICA%i: Comms error", icarus->device_id);
+		icarus->device_last_not_well = time(NULL);
+		icarus->device_not_well_reason = REASON_DEV_COMMS_ERROR;
+		icarus->dev_comms_error_count++;
+		return false;	/* This should never happen */
+	}
+
+	if (opt_debug) {
+		ob_hex = bin2hex(ob_bin, 64);
+		if (ob_hex) {
+			applog(LOG_DEBUG, "%s %u sent: %s",
+				icarus->api->name,
+				icarus->device_id, ob_hex);
+			free(ob_hex);
+		}
+	}
+
+	return true;
+}
+
 static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 				__maybe_unused int64_t max_nonce)
 {
 	struct cgpu_info *icarus;
 	int fd;
-	int ret, lret;
+	int ret;
 
 	struct ICARUS_INFO *info;
 
 	unsigned char ob_bin[64] = {0}, nonce_bin[ICARUS_READ_SIZE] = {0};
-	char *ob_hex;
 	uint32_t nonce;
 	int64_t hash_count;
 	struct timeval tv_start, elapsed;
@@ -726,6 +786,13 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 	// Prepare the next work immediately
 	memcpy(ob_bin, work->midstate, 32);
 	memcpy(ob_bin + 52, work->data + 64, 12);
+	if (!(memcmp(&ob_bin[56], "\xff\xff\xff\xff", 4)
+	   || memcmp(&ob_bin, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 32))) {
+		// This sequence is used on cairnsmore bitstreams for commands, NEVER send it otherwise
+		applog(LOG_WARNING, "%s %u: Received job attempting to send a command, corrupting it!",
+		       icarus->api->name, icarus->device_id);
+		ob_bin[56] = 0;
+	}
 	rev(ob_bin, 32);
 	rev(ob_bin + 52, 12);
 
@@ -737,26 +804,34 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 		if (state->changework)
 		{
 			state->changework = false;
-			lret = 1;
+			ret = ICA_GETS_RESTART;
 		}
 		else
 		{
 			/* Icarus will return 4 bytes (ICARUS_READ_SIZE) nonces or nothing */
-			lret = icarus_gets(nonce_bin, fd, &state->tv_workfinish, thr, info->read_count);
-			if (lret) {
-				if (thr->work_restart) {
-
-				// The prepared work is invalid, and the current work is abandoned
-				// Go back to the main loop to get the next work, and stuff
-				// Returning to the main loop will clear work_restart, so use a flag...
-				state->changework = true;
-				return 0;
-
-				}
-				if (info->quirk_reopen == 1 && !icarus_reopen(icarus, state, &fd))
+			ret = icarus_gets(nonce_bin, fd, &state->tv_workfinish, thr, info->read_count);
+			switch (ret) {
+				case ICA_GETS_RESTART:
+					// The prepared work is invalid, and the current work is abandoned
+					// Go back to the main loop to get the next work, and stuff
+					// Returning to the main loop will clear work_restart, so use a flag...
+					state->changework = true;
 					return 0;
+				case ICA_GETS_ERROR:
+					do_icarus_close(thr);
+					applog(LOG_ERR, "ICA%i: Comms error", icarus->device_id);
+					icarus->device_last_not_well = time(NULL);
+					icarus->device_not_well_reason = REASON_DEV_COMMS_ERROR;
+					icarus->dev_comms_error_count++;
+					if (!icarus_reopen(icarus, state, &fd))
+						return -1;
+					break;
+				case ICA_GETS_TIMEOUT:
+					if (info->quirk_reopen == 1 && !icarus_reopen(icarus, state, &fd))
+						return -1;
+				case ICA_GETS_OK:
+					break;
 			}
-			
 		}
 
 		tv_start = state->tv_workstart;
@@ -764,7 +839,7 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 	}
 	else
 	if (fd == -1 && !icarus_reopen(icarus, state, &fd))
-		return 0;
+		return -1;
 
 #ifndef WIN32
 	tcflush(fd, TCOFLUSH);
@@ -774,35 +849,23 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 	nonce = be32toh(nonce);
 
 	// Handle dynamic clocking for "subclass" devices
-	// This runs before sending next job, in case it isn't supported
+	// This needs to run before sending next job, since it hashes the command too
 	if (info->dclk.freqM && likely(!state->firstrun)) {
-		dclk_gotNonces(&info->dclk);
+		int qsec = ((4 * elapsed.tv_sec) + (elapsed.tv_usec / 250000)) ?: 1;
+		for (int n = qsec; n; --n)
+			dclk_gotNonces(&info->dclk);
 		if (nonce && !test_nonce(&state->last_work, nonce, false))
-			dclk_errorCount(&info->dclk, 1.0);
+			dclk_errorCount(&info->dclk, qsec);
 		dclk_preUpdate(&info->dclk);
 		dclk_updateFreq(&info->dclk, info->dclk_change_clock_func, thr);
 	}
 
-	gettimeofday(&state->tv_workstart, NULL);
-
-	ret = icarus_write(fd, ob_bin, sizeof(ob_bin));
-	if (ret) {
-		icarus_close(fd);
-		return -1;	/* This should never happen */
-	}
-
-	if (opt_debug) {
-		ob_hex = bin2hex(ob_bin, sizeof(ob_bin));
-		if (ob_hex) {
-			applog(LOG_DEBUG, "%s %u sent: %s",
-				icarus->api->name,
-				icarus->device_id, ob_hex);
-			free(ob_hex);
-		}
-	}
+	if (!icarus_start_work(thr, ob_bin))
+		/* This should never happen */
+		state->firstrun = true;
 
 	if (info->quirk_reopen == 2 && !icarus_reopen(icarus, state, &fd))
-		return 0;
+		state->firstrun = true;
 
 	work->blk.nonce = 0xffffffff;
 
@@ -815,7 +878,7 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 	// OK, done starting Icarus's next job... now process the last run's result!
 
 	// aborted before becoming idle, get new work
-	if (nonce == 0 && lret) {
+	if (ret == ICA_GETS_TIMEOUT || ret == ICA_GETS_RESTART) {
 		memcpy(&state->last_work, work, sizeof(state->last_work));
 		// ONLY up to just when it aborted
 		// We didn't read a reply so we don't subtract ICARUS_READ_TIME
@@ -842,6 +905,16 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 	was_hw_error = (curr_hw_errors > icarus->hw_errors);
 	memcpy(&state->last_work, work, sizeof(state->last_work));
 
+	// Force a USB close/reopen on any hw error
+	if (was_hw_error)
+		if (info->quirk_reopen != 2) {
+			if (!icarus_reopen(icarus, state, &fd))
+				state->firstrun = true;
+			// Some devices (Cairnsmore1, for example) abort hashing when reopened, so send the job again
+			if (!icarus_start_work(thr, ob_bin))
+				state->firstrun = true;
+		}
+
 	hash_count = (nonce & info->nonce_mask);
 	hash_count++;
 	hash_count *= info->fpga_count;
@@ -970,8 +1043,8 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
 			else if (info->timing_mode == MODE_SHORT)
 				info->do_icarus_timing = false;
 
-//			applog(LOG_WARNING, "%s %u Re-estimate: read_count=%d fullnonce=%fs history count=%d Hs=%e W=%e values=%d hash range=0x%08lx min data count=%u", icarus->api->name, icarus->device_id, read_count, fullnonce, count, Hs, W, values, hash_count_range, info->min_data_count);
-			applog(LOG_WARNING, "%s %u Re-estimate: Hs=%e W=%e read_count=%d fullnonce=%.3fs",
+//			applog(LOG_DEBUG, "%s %u Re-estimate: read_count=%d fullnonce=%fs history count=%d Hs=%e W=%e values=%d hash range=0x%08lx min data count=%u", icarus->api->name, icarus->device_id, read_count, fullnonce, count, Hs, W, values, hash_count_range, info->min_data_count);
+			applog(LOG_DEBUG, "%s %u Re-estimate: Hs=%e W=%e read_count=%d fullnonce=%.3fs",
 					icarus->api->name,
 					icarus->device_id, Hs, W, read_count, fullnonce);
 		}
@@ -1016,8 +1089,7 @@ static struct api_data *icarus_api_stats(struct cgpu_info *cgpu)
 
 static void icarus_shutdown(struct thr_info *thr)
 {
-	struct cgpu_info *icarus = thr->cgpu;
-	icarus_close(icarus->device_fd);
+	do_icarus_close(thr);
 	free(thr->cgpu_data);
 }
 

+ 76 - 40
driver-modminer.c

@@ -20,7 +20,7 @@
 
 #define BITSTREAM_FILENAME "fpgaminer_top_fixed7_197MHz.bit"
 #define BISTREAM_USER_ID "\2\4$B"
-#define MODMINER_MINIMUM_CLOCK  178
+#define MODMINER_MINIMUM_CLOCK    2
 #define MODMINER_DEFAULT_CLOCK  200
 #define MODMINER_MAXIMUM_CLOCK  210
 
@@ -36,6 +36,7 @@ struct modminer_fpga_state {
 	char next_work_cmd[46];
 
 	struct dclk_data dclk;
+	uint8_t freqMaxMaxM;
 	// Number of nonces didn't meet pdiff 1, ever
 	int bad_share_counter;
 	// Number of nonces did meet pdiff 1, ever
@@ -176,6 +177,7 @@ modminer_reopen(struct cgpu_info*modminer)
 } while(0)
 
 #define status_read(eng)  do {  \
+FD_ZERO(&fds); \
 FD_SET(fd, &fds);  \
 select(fd+1, &fds, NULL, NULL, NULL);  \
 	if (1 != read(fd, buf, 1))  \
@@ -188,7 +190,7 @@ static bool
 modminer_fpga_upload_bitstream(struct cgpu_info*modminer)
 {
 	struct modminer_fpga_state *state = modminer->thr[0]->cgpu_data;
-fd_set fds;
+	fd_set fds;
 	char buf[0x100];
 	unsigned char *ubuf = (unsigned char*)buf;
 	unsigned long len, flen;
@@ -434,6 +436,7 @@ modminer_fpga_init(struct thr_info *thr)
 			continue;
 		break;
 	}
+	state->freqMaxMaxM =
 	state->dclk.freqMaxM = state->dclk.freqM;
 	if (MODMINER_DEFAULT_CLOCK / 2 < state->dclk.freqM) {
 		if (!modminer_change_clock(thr, false, -(state->dclk.freqM * 2 - MODMINER_DEFAULT_CLOCK)))
@@ -490,6 +493,73 @@ get_modminer_statline_before(char *buf, struct cgpu_info *modminer)
 		strcat(buf, "               | ");
 }
 
+static void modminer_get_temperature(struct cgpu_info *modminer, struct thr_info *thr)
+{
+	struct modminer_fpga_state *state = thr->cgpu_data;
+
+#ifdef WIN32
+	/* Workaround for bug in Windows driver */
+	if (!modminer_reopen(modminer))
+		return -1;
+#endif
+
+	int fd = modminer->device_fd;
+	int fpgaid = thr->device_thread;
+	char cmd[2] = {'\x0a', fpgaid};
+	char temperature;
+
+	if (2 == write(fd, cmd, 2) && read(fd, &temperature, 1) == 1)
+	{
+		state->temp = temperature;
+		if (temperature > modminer->targettemp + opt_hysteresis) {
+			{
+				time_t now = time(NULL);
+				if (state->last_cutoff_reduced != now) {
+					state->last_cutoff_reduced = now;
+					int oldFreq = state->dclk.freqM;
+					if (modminer_reduce_clock(thr, false))
+						applog(LOG_NOTICE, "%s %u.%u: Frequency %s from %u to %u Mhz (temp: %d)",
+						       modminer->api->name, modminer->device_id, fpgaid,
+						       (oldFreq > state->dclk.freqM ? "dropped" : "raised "),
+						       oldFreq * 2, state->dclk.freqM * 2,
+						       temperature
+						);
+					state->dclk.freqMaxM = state->dclk.freqM;
+				}
+			}
+		}
+		else
+		if (state->dclk.freqMaxM < state->freqMaxMaxM && temperature < modminer->targettemp) {
+			if (temperature < modminer->targettemp - opt_hysteresis) {
+				state->dclk.freqMaxM = state->freqMaxMaxM;
+			} else {
+				++state->dclk.freqMaxM;
+			}
+		}
+	}
+}
+
+static bool modminer_get_stats(struct cgpu_info *modminer)
+{
+	int hottest = 0;
+	bool get_temp = (modminer->deven != DEV_ENABLED);
+	// Getting temperature more efficiently while enabled
+	// NOTE: Don't need to mess with mutex here, since the device is disabled
+	for (int i = modminer->threads; i--; ) {
+		struct thr_info*thr = modminer->thr[i];
+		struct modminer_fpga_state *state = thr->cgpu_data;
+		if (get_temp)
+			modminer_get_temperature(modminer, thr);
+		int temp = state->temp;
+		if (temp > hottest)
+			hottest = temp;
+	}
+
+	modminer->temp = (float)hottest;
+
+	return true;
+}
+
 static struct api_data*
 get_modminer_api_extra_device_status(struct cgpu_info*modminer)
 {
@@ -505,7 +575,8 @@ get_modminer_api_extra_device_status(struct cgpu_info*modminer)
 		if (state->temp)
 			json_object_set(o, "Temperature", json_integer(state->temp));
 		json_object_set(o, "Frequency", json_real((double)state->dclk.freqM * 2 * 1000000.));
-		json_object_set(o, "Max Frequency", json_real((double)state->dclk.freqMaxM * 2 * 1000000.));
+		json_object_set(o, "Cool Max Frequency", json_real((double)state->dclk.freqMaxM * 2 * 1000000.));
+		json_object_set(o, "Max Frequency", json_real((double)state->freqMaxMaxM * 2 * 1000000.));
 		json_object_set(o, "Hardware Errors", json_integer(state->bad_share_counter));
 		json_object_set(o, "Valid Nonces", json_integer(state->good_share_counter));
 
@@ -574,50 +645,14 @@ modminer_process_results(struct thr_info*thr)
 	int fd;
 	struct work *work = &state->running_work;
 
-	char cmd[2], temperature;
 	uint32_t nonce;
 	long iter;
 	int immediate_bad_nonces = 0, immediate_nonces = 0;
 	bool bad;
-	cmd[0] = '\x0a';
-	cmd[1] = fpgaid;
 
 	mutex_lock(&modminer->device_mutex);
-#ifdef WIN32
-	/* Workaround for bug in Windows driver */
-	if (!modminer_reopen(modminer))
-		return -1;
-#endif
+	modminer_get_temperature(modminer, thr);
 	fd = modminer->device_fd;
-	if (2 == write(fd, cmd, 2) && read(fd, &temperature, 1) == 1)
-	{
-		state->temp = temperature;
-		if (!fpgaid)
-			modminer->temp = (float)temperature;
-		if (temperature > modminer->cutofftemp - 2) {
-			if (temperature > modminer->cutofftemp) {
-				applog(LOG_WARNING, "%s %u.%u: Hit thermal cutoff limit, disabling device!", modminer->api->name, modminer->device_id, fpgaid);
-				modminer->deven = DEV_RECOVER;
-
-				modminer->device_last_not_well = time(NULL);
-				modminer->device_not_well_reason = REASON_DEV_THERMAL_CUTOFF;
-				++modminer->dev_thermal_cutoff_count;
-			} else {
-				time_t now = time(NULL);
-				if (state->last_cutoff_reduced != now) {
-					state->last_cutoff_reduced = now;
-					int oldFreq = state->dclk.freqM;
-					if (modminer_reduce_clock(thr, false))
-						applog(LOG_NOTICE, "%s %u.%u: Frequency %s from %u to %u Mhz (temp: %d)",
-						       modminer->api->name, modminer->device_id, fpgaid,
-						       (oldFreq > state->dclk.freqM ? "dropped" : "raised "),
-						       oldFreq * 2, state->dclk.freqM * 2,
-						       temperature
-						);
-				}
-			}
-		}
-	}
 
 	iter = 200;
 	while (1) {
@@ -738,6 +773,7 @@ struct device_api modminer_api = {
 	.name = "MMQ",
 	.api_detect = modminer_detect,
 	.get_statline_before = get_modminer_statline_before,
+	.get_stats = modminer_get_stats,
 	.get_api_extra_device_status = get_modminer_api_extra_device_status,
 	.thread_prepare = modminer_fpga_prepare,
 	.thread_init = modminer_fpga_init,

+ 1 - 33
driver-opencl.c

@@ -729,40 +729,8 @@ char *set_temp_overheat(char *arg)
 
 	return NULL;
 }
-
-char *set_temp_target(char *arg)
-{
-	int i, val = 0, device = 0, *tt;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set temp target";
-	val = atoi(nextptr);
-	if (val < 0 || val > 200)
-		return "Invalid value passed to set temp target";
-
-	tt = &gpus[device++].adl.targettemp;
-	*tt = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-		if (val < 0 || val > 200)
-			return "Invalid value passed to set temp target";
-
-		tt = &gpus[device++].adl.targettemp;
-		*tt = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++) {
-			tt = &gpus[i].adl.targettemp;
-			*tt = val;
-		}
-	}
-
-	return NULL;
-}
 #endif
+
 #ifdef HAVE_OPENCL
 char *set_intensity(char *arg)
 {

+ 7 - 2
dynclock.c

@@ -29,16 +29,21 @@ void dclk_msg_freqchange(const char *repr, int oldFreq, int newFreq, const char
 bool dclk_updateFreq(struct dclk_data *data, dclk_change_clock_func_t changeclock, struct thr_info *thr)
 {
 	struct cgpu_info *cgpu = thr->cgpu;
+	uint8_t freqMDefault = data->freqMDefault;
 	int i, maxM, bestM;
 	double bestR, r;
 	bool rv = true;
 
+	if (freqMDefault > data->freqMaxM)
+		// This occurs when the device in question adjusts its MaxM down due to temperature or similar reasons
+		freqMDefault = data->freqMaxM;
+
 	for (i = 0; i < data->freqMaxM; i++)
 		if (data->maxErrorRate[i + 1] * i < data->maxErrorRate[i] * (i + 20))
 			data->maxErrorRate[i + 1] = data->maxErrorRate[i] * (1.0 + 20.0 / i);
 
 	maxM = 0;
-	while (maxM < data->freqMDefault && data->maxErrorRate[maxM + 1] < DCLK_MAXMAXERRORRATE)
+	while (maxM < freqMDefault && data->maxErrorRate[maxM + 1] < DCLK_MAXMAXERRORRATE)
 		maxM++;
 	while (maxM < data->freqMaxM && data->errorWeight[maxM] > 150 && data->maxErrorRate[maxM + 1] < DCLK_MAXMAXERRORRATE)
 		maxM++;
@@ -57,7 +62,7 @@ bool dclk_updateFreq(struct dclk_data *data, dclk_change_clock_func_t changecloc
 		rv = changeclock(thr, bestM);
 	}
 
-	maxM = data->freqMDefault;
+	maxM = freqMDefault;
 	while (maxM < data->freqMaxM && data->errorWeight[maxM + 1] > 100)
 		maxM++;
 	if ((bestM < (1.0 - DCLK_OVERHEATTHRESHOLD) * maxM) && bestM < maxM - 1) {

+ 5 - 0
dynclock.h

@@ -25,10 +25,15 @@ typedef bool (*dclk_change_clock_func_t)(struct thr_info *, int multiplier);
 
 extern void dclk_msg_freqchange(const char *, int oldFreq, int newFreq, const char *tail);
 
+// Called to initialize dclk_data at startup
 extern void dclk_prepare(struct dclk_data *data);
+// Called for every quarter of a second to age error rate info
 extern void dclk_gotNonces(struct dclk_data *);
+// Called for errors (1.0 "portion" is a quarter second)
 extern void dclk_errorCount(struct dclk_data *, double portion);
+// Called after a nonce range is completed to update actual error rate
 extern void dclk_preUpdate(struct dclk_data *data);
+// Called after a nonce range is completed, and error rate updated, to make actual clock adjustments
 extern bool dclk_updateFreq(struct dclk_data *, dclk_change_clock_func_t changeclock, struct thr_info *);
 
 #endif

+ 3 - 0
findnonce.c

@@ -150,6 +150,9 @@ static void *postcalc_hash(void *userdata)
 	pthread_detach(pthread_self());
 	rename_thr("bfg-postcalchsh");
 
+	/* To prevent corrupt values in FOUND from trying to read beyond the
+	 * end of the res[] array */
+	pcd->res[FOUND] &= FOUND;
 	for (entry = 0; entry < pcd->res[FOUND]; entry++) {
 		uint32_t nonce = pcd->res[entry];
 

+ 1 - 1
fpgautils.c

@@ -92,7 +92,7 @@ int serial_autodetect_udev(__maybe_unused detectone_func_t detectone, __maybe_un
 }
 #endif
 
-int serial_autodetect_devserial(detectone_func_t detectone, const char*prodname)
+int serial_autodetect_devserial(__maybe_unused detectone_func_t detectone, __maybe_unused const char*prodname)
 {
 #ifndef WIN32
 	DIR *D;

+ 1 - 0
icarus-common.h

@@ -70,6 +70,7 @@ struct ICARUS_INFO {
 	int fpga_count;
 	uint32_t nonce_mask;
 	bool quirk_reopen;
+	uint8_t user_set;
 
 	dclk_change_clock_func_t dclk_change_clock_func;
 	struct dclk_data dclk;

+ 1 - 1
libblkmaker

@@ -1 +1 @@
-Subproject commit 7f153402d6699d6748fc3ac6585c9076375c4b53
+Subproject commit a9a29294b2028dfd68fba35d42d440d3283f668d

+ 158 - 45
miner.c

@@ -111,6 +111,7 @@ bool opt_quiet;
 bool opt_realquiet;
 bool opt_loginput;
 const int opt_cutofftemp = 95;
+int opt_hysteresis = 3;
 static int opt_retries = -1;
 int opt_fail_pause = 5;
 int opt_log_interval = 5;
@@ -783,7 +784,8 @@ static char* set_sharelog(char *arg)
 	return NULL;
 }
 
-static char *temp_cutoff_str = NULL;
+static char *temp_cutoff_str = "";
+static char *temp_target_str = "";
 
 char *set_temp_cutoff(char *arg)
 {
@@ -799,32 +801,80 @@ char *set_temp_cutoff(char *arg)
 	return NULL;
 }
 
-static void load_temp_cutoffs()
+char *set_temp_target(char *arg)
 {
-	int i, val = 0, device = 0;
-	char *nextptr;
+	int val;
 
-	if (temp_cutoff_str) {
-		for (device = 0, nextptr = strtok(temp_cutoff_str, ","); nextptr; ++device, nextptr = strtok(NULL, ",")) {
-			if (device >= total_devices)
-				quit(1, "Too many values passed to set temp cutoff");
-			val = atoi(nextptr);
-			if (val < 0 || val > 200)
-				quit(1, "Invalid value passed to set temp cutoff");
+	if (!(arg && arg[0]))
+		return "Invalid parameters for set temp target";
+	val = atoi(arg);
+	if (val < 0 || val > 200)
+		return "Invalid value passed to set temp target";
+	temp_target_str = arg;
 
-			devices[device]->cutofftemp = val;
-		}
-	} else {
-		for (i = device; i < total_devices; ++i) {
-			if (!devices[i]->cutofftemp)
-				devices[i]->cutofftemp = opt_cutofftemp;
-		}
-		return;
+	return NULL;
+}
+
+// For a single element string, this always returns the number (for all calls)
+// For multi-element strings, it returns each element as a number in order, and 0 when there are no more
+static int temp_strtok(char *base, char **n)
+{
+	char *i = *n;
+	char *p = strchr(i, ',');
+	if (p) {
+		p[0] = '\0';
+		*n = &p[1];
 	}
-	if (device <= 1) {
-		for (i = device; i < total_devices; ++i)
-			devices[i]->cutofftemp = val;
+	else
+	if (base != i)
+		*n = strchr(i, '\0');
+	return atoi(i);
+}
+
+static void load_temp_config()
+{
+	int i, val = 0, target_off;
+	char *cutoff_n, *target_n;
+	struct cgpu_info *cgpu;
+
+	cutoff_n = temp_cutoff_str;
+	target_n = temp_target_str;
+
+	for (i = 0; i < total_devices; ++i) {
+		cgpu = devices[i];
+		
+		// cutoff default may be specified by driver during probe; otherwise, opt_cutofftemp (const)
+		if (!cgpu->cutofftemp)
+			cgpu->cutofftemp = opt_cutofftemp;
+		
+		// target default may be specified by driver, and is moved with offset; otherwise, offset minus 6
+		if (cgpu->targettemp)
+			target_off = cgpu->targettemp - cgpu->cutofftemp;
+		else
+			target_off = -6;
+		
+		val = temp_strtok(temp_cutoff_str, &cutoff_n);
+		if (val < 0 || val > 200)
+			quit(1, "Invalid value passed to set temp cutoff");
+		if (val)
+			cgpu->cutofftemp = val;
+		
+		val = temp_strtok(temp_target_str, &target_n);
+		if (val < 0 || val > 200)
+			quit(1, "Invalid value passed to set temp target");
+		if (val)
+			cgpu->targettemp = val;
+		else
+			cgpu->targettemp = cgpu->cutofftemp + target_off;
+		
+		applog(LOG_DEBUG, "%s %u: Set temperature config: target=%d cutoff=%d",
+		       cgpu->api->name, cgpu->device_id,
+		       cgpu->targettemp, cgpu->cutofftemp);
 	}
+	if (cutoff_n != temp_cutoff_str && cutoff_n[0])
+		quit(1, "Too many values passed to set temp cutoff");
+	if (target_n != temp_target_str && target_n[0])
+		quit(1, "Too many values passed to set temp target");
 }
 
 static char *set_api_allow(const char *arg)
@@ -1175,16 +1225,18 @@ static struct opt_table opt_config_table[] = {
 		     set_temp_cutoff, opt_show_intval, &opt_cutofftemp,
 		     "Temperature where a device will be automatically disabled, one value or comma separated list"),
 #endif
-#ifdef HAVE_ADL
+#if defined(HAVE_ADL) || defined(USE_MODMINER)
 	OPT_WITH_ARG("--temp-hysteresis",
 		     set_int_1_to_10, opt_show_intval, &opt_hysteresis,
 		     "Set how much the temperature can fluctuate outside limits when automanaging speeds"),
+#ifdef HAVE_ADL
 	OPT_WITH_ARG("--temp-overheat",
 		     set_temp_overheat, opt_show_intval, &opt_overheattemp,
 		     "Overheat temperature when automatically managing fan and GPU speeds, one value or comma separated list"),
+#endif
 	OPT_WITH_ARG("--temp-target",
-		     set_temp_target, opt_show_intval, &opt_targettemp,
-		     "Target temperature when automatically managing fan and GPU speeds, one value or comma separated list"),
+		     set_temp_target, NULL, NULL,
+		     "Target temperature when automatically managing fan and clock speeds, one value or comma separated list"),
 #endif
 	OPT_WITHOUT_ARG("--text-only|-T",
 			opt_set_invbool, &use_curses,
@@ -1927,12 +1979,14 @@ static void curses_print_devstatus(int thr_id)
 		wprintw(statuswin, "DEAD ");
 	else if (cgpu->status == LIFE_SICK)
 		wprintw(statuswin, "SICK ");
-	else if (cgpu->status == LIFE_WAIT)
-		wprintw(statuswin, "WAIT ");
 	else if (cgpu->deven == DEV_DISABLED)
 		wprintw(statuswin, "OFF  ");
 	else if (cgpu->deven == DEV_RECOVER)
-		wprintw(statuswin, "REST  ");
+		wprintw(statuswin, "REST ");
+	else if (cgpu->deven == DEV_RECOVER_ERR)
+		wprintw(statuswin, " ERR ");
+	else if (cgpu->status == LIFE_WAIT)
+		wprintw(statuswin, "WAIT ");
 	else
 		wprintw(statuswin, "%s", cHr);
 	adj_width(cgpu->accepted, &awidth);
@@ -2158,8 +2212,8 @@ static bool submit_upstream_work(const struct work *work, CURL *curl, bool resub
 	char worktime[200] = "";
 
 	if (work->tmpl) {
-		unsigned char data[76];
-		swap32yes(data, work->data, 76);
+		unsigned char data[80];
+		swap32yes(data, work->data, 80 / 4);
 		json_t *req = blkmk_submit_jansson(work->tmpl, data, work->dataid, *((uint32_t*)&work->data[76]));
 		s = json_dumps(req, 0);
 		sd = bin2hex(data, 80);
@@ -4008,7 +4062,7 @@ void write_config(FILE *fcfg)
 			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].adl.overtemp);
 		fputs("\",\n\"temp-target\" : \"", fcfg);
 		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].adl.targettemp);
+			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].targettemp);
 #endif
 		fputs("\"", fcfg);
 	}
@@ -4625,7 +4679,7 @@ static inline void thread_reportout(struct thr_info *thr)
 }
 
 static void hashmeter(int thr_id, struct timeval *diff,
-		      unsigned long long hashes_done)
+		      uint64_t hashes_done)
 {
 	struct timeval temp_tv_end, total_diff;
 	double secs;
@@ -4652,7 +4706,7 @@ static void hashmeter(int thr_id, struct timeval *diff,
 		double thread_rolling = 0.0;
 		int i;
 
-		applog(LOG_DEBUG, "[thread %d: %llu hashes, %.1f khash/sec]",
+		applog(LOG_DEBUG, "[thread %d: %"PRIu64" hashes, %.1f khash/sec]",
 			thr_id, hashes_done, hashes_done / 1000 / secs);
 
 		/* Rolling average for each thread and each device */
@@ -5262,6 +5316,7 @@ void *miner_thread(void *userdata)
 	uint32_t max_nonce = api->can_limit_work ? api->can_limit_work(mythr) : 0xffffffff;
 	int64_t hashes_done = 0;
 	int64_t hashes;
+	bool scanhash_working = true;
 	struct work *work = make_work();
 	const bool primary = (!mythr->device_thread) || mythr->primary_thread;
 
@@ -5342,15 +5397,26 @@ void *miner_thread(void *userdata)
 			gettimeofday(&getwork_start, NULL);
 
 			if (unlikely(hashes == -1)) {
-				applog(LOG_ERR, "%s %d failure, disabling!", api->name, cgpu->device_id);
-				cgpu->deven = DEV_DISABLED;
-
-				cgpu->device_last_not_well = time(NULL);
-				cgpu->device_not_well_reason = REASON_THREAD_ZERO_HASH;
-				cgpu->thread_zero_hash_count++;
+				time_t now = time(NULL);
+				if (difftime(now, cgpu->device_last_not_well) > 1.) {
+					cgpu->device_last_not_well = time(NULL);
+					cgpu->device_not_well_reason = REASON_THREAD_ZERO_HASH;
+					cgpu->thread_zero_hash_count++;
+				}
 
-				mt_disable(mythr, thr_id, api);
+				if (scanhash_working && opt_restart) {
+					applog(LOG_ERR, "%s %u failure, attempting to reinitialize", api->name, cgpu->device_id);
+					scanhash_working = false;
+					cgpu->reinit_backoff = 5.2734375;
+					hashes = 0;
+				} else {
+					applog(LOG_ERR, "%s %u failure, disabling!", api->name, cgpu->device_id);
+					cgpu->deven = DEV_RECOVER_ERR;
+					goto disabled;
+				}
 			}
+			else
+				scanhash_working = true;
 
 			hashes_done += hashes;
 			if (hashes > cgpu->max_hashes)
@@ -5407,6 +5473,7 @@ void *miner_thread(void *userdata)
 			}
 
 			if (unlikely(mythr->pause || cgpu->deven != DEV_ENABLED))
+disabled:
 				mt_disable(mythr, thr_id, api);
 
 			sdiff.tv_sec = sdiff.tv_usec = 0;
@@ -5742,6 +5809,19 @@ static void *watchpool_thread(void __maybe_unused *userdata)
 	return NULL;
 }
 
+void device_recovered(struct cgpu_info *cgpu)
+{
+	struct thr_info *thr;
+	int j;
+
+	cgpu->deven = DEV_ENABLED;
+	for (j = 0; j < cgpu->threads; ++j) {
+		thr = cgpu->thr[j];
+		applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
+		tq_push(thr->q, &ping);
+	}
+}
+
 /* Makes sure the hashmeter keeps going even if mining threads stall, updates
  * the screen at regular intervals, and restarts threads if they appear to have
  * died. */
@@ -5854,6 +5934,39 @@ static void *watchdog_thread(void __maybe_unused *userdata)
 			/* Thread is disabled */
 			if (*denable == DEV_DISABLED)
 				continue;
+			else
+			if (*denable == DEV_RECOVER_ERR) {
+				if (opt_restart && difftime(time(NULL), cgpu->device_last_not_well) > cgpu->reinit_backoff) {
+					applog(LOG_NOTICE, "Attempting to reinitialize %s %u",
+					       cgpu->api->name, cgpu->device_id);
+					if (cgpu->reinit_backoff < 300)
+						cgpu->reinit_backoff *= 2;
+					device_recovered(cgpu);
+				}
+				continue;
+			}
+			else
+			if (*denable == DEV_RECOVER) {
+				if (opt_restart && cgpu->temp < cgpu->targettemp) {
+					applog(LOG_NOTICE, "%s %u recovered to temperature below target, re-enabling",
+					       cgpu->api->name, cgpu->device_id);
+					device_recovered(cgpu);
+				}
+				cgpu->device_last_not_well = time(NULL);
+				cgpu->device_not_well_reason = REASON_DEV_THERMAL_CUTOFF;
+				continue;
+			}
+			else
+			if (cgpu->temp > cgpu->cutofftemp)
+			{
+				applog(LOG_WARNING, "%s %u hit thermal cutoff limit, disabling!",
+				       cgpu->api->name, cgpu->device_id);
+				*denable = DEV_RECOVER;
+
+				cgpu->device_last_not_well = time(NULL);
+				cgpu->device_not_well_reason = REASON_DEV_THERMAL_CUTOFF;
+				++cgpu->dev_thermal_cutoff_count;
+			}
 
 			if (thr->getwork) {
 				if (cgpu->status == LIFE_WELL && thr->getwork < now.tv_sec - opt_log_interval) {
@@ -6297,7 +6410,7 @@ extern struct device_api ztex_api;
 
 static int cgminer_id_count = 0;
 
-void enable_device(struct cgpu_info *cgpu)
+void register_device(struct cgpu_info *cgpu)
 {
 	cgpu->deven = DEV_ENABLED;
 	devices[cgpu->cgminer_id = cgminer_id_count++] = cgpu;
@@ -6608,13 +6721,13 @@ int main(int argc, char *argv[])
 			if (devices_enabled & (1 << i)) {
 				if (i >= total_devices)
 					quit (1, "Command line options set a device that doesn't exist");
-				enable_device(devices[i]);
+				register_device(devices[i]);
 			} else if (i < total_devices) {
 				if (opt_removedisabled) {
 					if (devices[i]->api == &cpu_api)
 						--opt_n_threads;
 				} else {
-					enable_device(devices[i]);
+					register_device(devices[i]);
 				}
 				devices[i]->deven = DEV_DISABLED;
 			}
@@ -6622,13 +6735,13 @@ int main(int argc, char *argv[])
 		total_devices = cgminer_id_count;
 	} else {
 		for (i = 0; i < total_devices; ++i)
-			enable_device(devices[i]);
+			register_device(devices[i]);
 	}
 
 	if (!total_devices)
 		quit(1, "All devices disabled, cannot mine!");
 
-	load_temp_cutoffs();
+	load_temp_config();
 
 	for (i = 0; i < total_devices; ++i)
 		devices[i]->cgminer_stats.getwork_wait_min.tv_sec = MIN_SEC_UNSET;

+ 11 - 3
miner.h

@@ -195,6 +195,10 @@ void *alloca (size_t);
 #	endif
 #endif
 
+#ifndef roundl
+#define roundl(x)   (long double)((long long)((x==0)?0.0:((x)+((x)>0)?0.5:-0.5)))
+#endif
+
 enum alive {
 	LIFE_WELL,
 	LIFE_SICK,
@@ -248,7 +252,6 @@ struct gpu_adl {
 	int lastengine;
 	int lasttemp;
 	int targetfan;
-	int targettemp;
 	int overtemp;
 	int minspeed;
 	int maxspeed;
@@ -295,6 +298,7 @@ enum dev_enable {
 	DEV_ENABLED,
 	DEV_DISABLED,
 	DEV_RECOVER,
+	DEV_RECOVER_ERR,
 };
 
 enum cl_kernels {
@@ -421,8 +425,8 @@ struct cgpu_info {
 
 #ifdef USE_SCRYPT
 	int opt_lg, lookup_gap;
-	int opt_tc, thread_concurrency;
-	int shaders;
+	size_t opt_tc, thread_concurrency;
+	size_t shaders;
 #endif
 	struct timeval tv_gpustart;;
 	struct timeval tv_gpuend;
@@ -433,6 +437,7 @@ struct cgpu_info {
 
 	float temp;
 	int cutofftemp;
+	int targettemp;
 
 #ifdef HAVE_ADL
 	bool has_adl;
@@ -457,6 +462,7 @@ struct cgpu_info {
 	time_t device_last_well;
 	time_t device_last_not_well;
 	enum dev_reason device_not_well_reason;
+	float reinit_backoff;
 	int thread_fail_init_count;
 	int thread_zero_hash_count;
 	int thread_fail_queue_count;
@@ -692,6 +698,7 @@ extern int restart_wait(unsigned int mstime);
 extern int stale_wait(unsigned int mstime, struct work*, bool checkend);
 
 extern void kill_work(void);
+extern void app_restart(void);
 
 extern void reinit_device(struct cgpu_info *cgpu);
 
@@ -758,6 +765,7 @@ extern double total_diff_accepted, total_diff_rejected, total_diff_stale;
 extern unsigned int local_work;
 extern unsigned int total_go, total_ro;
 extern const int opt_cutofftemp;
+extern int opt_hysteresis;
 extern int opt_fail_pause;
 extern int opt_log_interval;
 extern unsigned long long global_hashrate;

+ 5 - 5
ocl.c

@@ -295,7 +295,7 @@ int clDevicesNum(void) {
 		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
 		if (status != CL_SUCCESS) {
 			applog(LOG_ERR, "Error %d: Getting Device IDs (num)", status);
-			if (i != opt_platform_id)
+			if ((int)i != opt_platform_id)
 				continue;
 			return -1;
 		}
@@ -718,7 +718,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		strcat(binaryfilename, "g");
 	if (opt_scrypt) {
 #ifdef USE_SCRYPT
-		sprintf(numbuf, "lg%dtc%d", cgpu->lookup_gap, cgpu->thread_concurrency);
+		sprintf(numbuf, "lg%utc%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency);
 		strcat(binaryfilename, numbuf);
 #endif
 	} else {
@@ -791,7 +791,7 @@ build:
 #ifdef USE_SCRYPT
 	if (opt_scrypt)
 		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
-			cgpu->lookup_gap, cgpu->thread_concurrency, (int)clState->wsize);
+			cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize);
 	else
 #endif
 	{
@@ -987,8 +987,8 @@ built:
 		/* Use the max alloc value which has been rounded to a power of
 		 * 2 greater >= required amount earlier */
 		if (bufsize > cgpu->max_alloc) {
-			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u",
-			       gpu, cgpu->max_alloc, bufsize);
+			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u", gpu, cgpu->max_alloc);
+			applog(LOG_WARNING, "Your scrypt settings come to %u", bufsize);
 		} else
 			bufsize = cgpu->max_alloc;
 		applog(LOG_DEBUG, "Creating scrypt buffer sized %d", bufsize);