Browse Source

Merge tag 'bfgminer-5.0.0' into knc

BFGMiner version 5.0.0
Vitalii Demianets 11 years ago
parent
commit
b430347ba6
79 changed files with 5555 additions and 1912 deletions
  1. 2 2
      .travis.deps
  2. 1 0
      .travis.script
  3. 35 25
      .travis.yml
  4. 18 18
      70-bfgminer.rules.in
  5. 8 1
      AUTHORS
  6. 67 28
      Makefile.am
  7. 268 0
      NEWS
  8. 50 27
      README
  9. 37 0
      README.ASIC
  10. 12 2
      README.RPC
  11. 6 2
      README.scrypt
  12. 97 69
      api.c
  13. 1 1
      compat.h
  14. 211 565
      configure.ac
  15. 15 0
      debian/changelog
  16. 1 1
      debian/control
  17. 1 1
      debian/rules
  18. 34 14
      deviceapi.c
  19. 3 0
      deviceapi.h
  20. 2 2
      driver-bitfury.c
  21. 0 1
      driver-cointerra.c
  22. 103 111
      driver-cpu.c
  23. 15 1
      driver-cpu.h
  24. 64 34
      driver-dualminer.c
  25. 5 2
      driver-getwork.c
  26. 1 16
      driver-gridseed.c
  27. 7 0
      driver-icarus.h
  28. 893 0
      driver-kncasic.c
  29. 9 3
      driver-minergate.c
  30. 757 0
      driver-minion.c
  31. 2 2
      driver-modminer.c
  32. 2 0
      driver-nanofury.c
  33. 257 168
      driver-opencl.c
  34. 28 8
      driver-opencl.h
  35. 13 4
      driver-proxy.c
  36. 133 24
      driver-stratum.c
  37. 9 6
      driver-titan.c
  38. 1 2
      driver-zeusminer.c
  39. 21 7
      findnonce.c
  40. 10 3
      findnonce.h
  41. 3 3
      gc3355.c
  42. 1 1
      gc3355.h
  43. 2 2
      httpsrv.c
  44. 1 1
      libbase58
  45. 3 3
      libbitfury.c
  46. 29 0
      lowl-spi.c
  47. 1 0
      lowl-spi.h
  48. 1 0
      make-release
  49. 374 0
      malgo/keccak.c
  50. 58 6
      malgo/scrypt.c
  51. 6 0
      malgo/scrypt.h
  52. 114 0
      malgo/sha256d.c
  53. 2 1
      mcp2210.c
  54. 527 120
      miner.c
  55. 124 47
      miner.h
  56. 618 434
      ocl.c
  57. 27 14
      ocl.h
  58. 1 1
      opencl/diablo.cl
  59. 1 1
      opencl/diakgcn.cl
  60. 133 0
      opencl/keccak.cl
  61. 1 1
      opencl/phatk.cl
  62. 1 1
      opencl/poclbm.cl
  63. 14 6
      opencl/psw.cl
  64. 14 6
      opencl/scrypt.cl
  65. 14 6
      opencl/zuikkis.cl
  66. 9 1
      openwrt/bfgminer/Makefile
  67. 0 19
      scrypt.h
  68. 12 9
      sha256_4way.c
  69. 10 6
      sha256_altivec_4way.c
  70. 20 10
      sha256_cryptopp.c
  71. 11 4
      sha256_generic.c
  72. 8 5
      sha256_sse2_amd64.c
  73. 8 5
      sha256_sse2_i386.c
  74. 8 5
      sha256_sse4_amd64.c
  75. 5 5
      sha256_via.c
  76. 171 36
      util.c
  77. 3 2
      util.h
  78. 16 0
      version.c
  79. 5 1
      winhacks.h

+ 2 - 2
.travis.deps

@@ -29,7 +29,7 @@ fi
 if [ -n "$CROSS_BINPKGS" ]; then
 	wget "https://github.com/luke-jr/cross-binpkgs/archive/${CROSS_BINPKGS}.zip"
 	unzip "${CROSS_BINPKGS}.zip"
-	for f in "cross-binpkgs-${CROSS_BINPKGS}"/*/*; do
-		sudo tar -C "/usr/${CROSS_BINPKGS}/" -xjvpf "$f"
+	for f in "cross-binpkgs-${CROSS_BINPKGS/+/-}"/*/*; do
+		sudo tar -C "/usr/${CROSS_BINPKGS/+*/}/" -xjvpf "$f"
 	done
 fi

+ 1 - 0
.travis.script

@@ -2,6 +2,7 @@ test "x$1" = "xI-am-okay-with-destroying-my-system" || exit 1
 set -ex
 
 if [ -n "$CROSS_BINPKGS" ]; then
+	CROSS_BINPKGS="${CROSS_BINPKGS/+*/}"
 	myCC="${CROSS_BINPKGS}-gcc"
 	CC_ARGS="-I/usr/${CROSS_BINPKGS}/usr/include"
 	export LDFLAGS="-L/usr/${CROSS_BINPKGS}/usr/lib -L/usr/${CROSS_BINPKGS}/usr/bin"

+ 35 - 25
.travis.yml

@@ -10,57 +10,63 @@ matrix:
   include:
     - compiler: ": Full GCC"
       # Upgrade GCC to avoid false warnings; build the full project with -Werror
-      env: myCC='gcc' UBUNTU_DEPS='gcc libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-system-libbase58 --enable-tool'
+      env: myCC='gcc' UBUNTU_DEPS='gcc libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-system-libbase58 --enable-tool'
     - compiler: ": Full LLVM"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-system-libbase58 --enable-tool'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-system-libbase58 --enable-tool'
     - compiler: ": pkgconf"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev pkgconf' EXTRA_DEPS='libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-system-libbase58 --enable-tool'
-    - compiler: ": MinGW64"
-      env: UBUNTU_DEPS='gcc-mingw-w64-x86-64' EXTRA_DEPS='pkg-config yasm' CROSS_BINPKGS='x86_64-w64-mingw32' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-system-libbase58 --host=x86_64-w64-mingw32 --disable-knc --disable-bfsb --disable-jingtian --disable-metabank --disable-minergate --disable-titan'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev pkgconf' EXTRA_DEPS='libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-system-libbase58 --enable-tool'
+    - compiler: ": MinGW64 ncurses"
+      env: UBUNTU_DEPS='gcc-mingw-w64-x86-64' EXTRA_DEPS='pkg-config yasm' CROSS_BINPKGS='x86_64-w64-mingw32+ncurses' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-system-libbase58 --host=x86_64-w64-mingw32 --disable-knc --disable-bfsb --disable-jingtian --disable-metabank --disable-minergate --disable-titan --disable-kncasic --disable-minion'
+    - compiler: ": MinGW64 pdcurses"
+      env: UBUNTU_DEPS='gcc-mingw-w64-x86-64' EXTRA_DEPS='pkg-config yasm' CROSS_BINPKGS='x86_64-w64-mingw32' BUILD_CFLAGS='-Werror' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-system-libbase58 --host=x86_64-w64-mingw32 --disable-knc --disable-bfsb --disable-jingtian --disable-metabank --disable-minergate --disable-titan --disable-kncasic --disable-minion'
+    - compiler: ": Standard"
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-keccak --enable-scrypt'
     - compiler: ": Std SHA2"
       env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS=''
+    - compiler: ": Std Keccak"
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-keccak'
     - compiler: ": Std scrypt"
       env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-scrypt'
     - compiler: ": No hidapi"
-      env: myCC='clang' UBUNTU_DEPS='linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt'
+      env: myCC='clang' UBUNTU_DEPS='linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt'
     - compiler: ": No VFIO"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-vfio'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-vfio'
     - compiler: ": No UIO"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-uio'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-uio'
     - compiler: ": No VFIO/UIO"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-uio --without-vfio'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-uio --without-vfio'
     - compiler: ": Non-wide ncurses"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncurses5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --with-curses=ncurses5'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncurses5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --with-curses=ncurses5'
     - compiler: ": No curses"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-curses'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-curses'
     - compiler: ": No libudev"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-libudev'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-libudev'
     - compiler: ": No libusb"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-libusb'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-libusb'
     - compiler: ": No libevent"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-libevent'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libmicrohttpd-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-libevent'
     - compiler: ": No libmicrohttpd"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-libmicrohttpd'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libi2c-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-libmicrohttpd'
     - compiler: ": No libi2c-dev"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --disable-knc --disable-titan'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev yasm libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --disable-knc --disable-titan --disable-kncasic'
     - compiler: ": No yasm"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev libsensors4-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt'
     - compiler: ": No libsensors"
-      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-sensors'
+      env: myCC='clang' UBUNTU_DEPS='libhidapi-dev linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev libudev-dev libusb-1.0-0-dev libevent-dev libmicrohttpd-dev libi2c-dev yasm' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-sensors'
     - compiler: ": No opt deps"
-      env: myCC='clang' EXTRA_DEPS='pkg-config' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-uio --without-vfio --without-sensors --without-libmicrohttpd --without-libevent --without-libusb --without-curses --without-libudev --disable-knc --disable-titan'
+      env: myCC='clang' EXTRA_DEPS='pkg-config' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-uio --without-vfio --without-sensors --without-libmicrohttpd --without-libevent --without-libusb --without-curses --without-libudev --disable-knc --disable-titan --disable-kncasic'
     - compiler: ": Only ncurses"
-      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-scrypt --without-uio --without-vfio --without-sensors --without-libmicrohttpd --without-libevent --without-libusb --without-libudev --disable-knc --disable-titan'
+      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--enable-other-drivers --enable-keccak --enable-scrypt --without-uio --without-vfio --without-sensors --without-libmicrohttpd --without-libevent --without-libusb --without-libudev --disable-knc --disable-titan --disable-kncasic'
     - compiler: ": Only CPU"
-      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev yasm' CONFIGURE_ARGS='--disable-other-drivers --enable-cpumining --enable-scrypt'
+      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev yasm' CONFIGURE_ARGS='--disable-other-drivers --enable-cpumining --enable-keccak --enable-scrypt'
     - compiler: ": Only OpenCL"
-      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev libsensors4-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-opencl --enable-scrypt'
+      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev libsensors4-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-opencl --enable-keccak --enable-scrypt'
     - compiler: ": OpenCL w/o ADL"
-      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev libsensors4-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-opencl --enable-scrypt --disable-adl'
+      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev libsensors4-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-opencl --enable-keccak --enable-scrypt --disable-adl'
     - compiler: ": OpenCL w/o sensors"
-      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-opencl --enable-scrypt'
+      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-opencl --enable-keccak --enable-scrypt'
     - compiler: ": OpenCL w/o ADL or sensors"
-      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-opencl --enable-scrypt --disable-adl'
+      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-opencl --enable-keccak --enable-scrypt --disable-adl'
     - compiler: ": Only bitforce"
       env: myCC='clang' UBUNTU_DEPS='linux-libc-dev' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-bitforce'
     - compiler: ": Only icarus"
@@ -77,6 +83,8 @@ matrix:
       env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-avalonmm'
     - compiler: ": Only knc"
       env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev libi2c-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-knc'
+    - compiler: ": Only kncasic"
+      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev libi2c-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-scrypt --enable-kncasic'
     - compiler: ": Only modminer"
       env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-modminer'
     - compiler: ": Only cointerra"
@@ -119,6 +127,8 @@ matrix:
       env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-bitfury --enable-metabank'
     - compiler: ": Only minergate"
       env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-minergate'
+    - compiler: ": Only minion"
+      env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-minion'
     - compiler: ": Only rockminer"
       env: myCC='clang' EXTRA_DEPS='pkg-config libncursesw5-dev' CONFIGURE_ARGS='--disable-other-drivers --enable-rockminer'
 install:

+ 18 - 18
70-bfgminer.rules.in

@@ -5,32 +5,32 @@ GOTO="bfgminer_end"
 LABEL="bfgminer_start"
 
 @USE_BIFURY_TRUE@ENV{ID_MODEL}=="*bi•fury*", GOTO="bfgminer_add"
-@HAS_BIGPIC_TRUE@ENV{ID_MODEL}=="*Bitfury*BF1*", GOTO="bfgminer_add"
-@HAS_BITFORCE_TRUE@ENV{ID_MODEL}=="*BitFORCE*SHA256*", GOTO="bfgminer_add"
+@USE_BIGPIC_TRUE@ENV{ID_MODEL}=="*Bitfury*BF1*", GOTO="bfgminer_add"
+@USE_BITFORCE_TRUE@ENV{ID_MODEL}=="*BitFORCE*SHA256*", GOTO="bfgminer_add"
 @USE_COINTERRA_TRUE@ENV{ID_MODEL}=="*GoldStrike*", GOTO="bfgminer_add"
 @USE_DRILLBIT_TRUE@ENV{manufacturer}=="*Drillbit*", GOTO="bfgminer_add"
-@HAS_ICARUS_TRUE@ENV{ID_MODEL}=="*Cairnsmore1*", GOTO="bfgminer_add"
-@HAS_ICARUS_TRUE@ENV{ID_MODEL}=="*Block*Erupter*", GOTO="bfgminer_add"
+@USE_ICARUS_TRUE@ENV{ID_MODEL}=="*Cairnsmore1*", GOTO="bfgminer_add"
+@USE_ICARUS_TRUE@ENV{ID_MODEL}=="*Block*Erupter*", GOTO="bfgminer_add"
 @USE_HASHBUSTER_TRUE@ENV{ID_MODEL}=="*HashBuster*", GOTO="bfgminer_add"
 @USE_HASHBUSTERUSB_TRUE@ENV{ID_MODEL}=="*HashBuster*", GOTO="bfgminer_add"
 @USE_HASHFAST_TRUE@ENV{idVendor}=="297c", ENV{manufacturer}=="*HashFast*", GOTO="bfgminer_add"
 @USE_HASHFAST_TRUE@ENV{ID_MODEL}=="*GoldenNonce*", GOTO="bfgminer_add"
-@HAS_KLONDIKE_TRUE@ENV{idVendor}=="04d8", ENV{idProduct}=="f60a", ENV{manufacturer}=="*Klondike*", GOTO="bfgminer_add"
-@HAS_KLONDIKE_TRUE@ENV{idVendor}=="fa05", ENV{idProduct}=="0001", ENV{manufacturer}=="*HashBuster*", GOTO="bfgminer_add"
-@HAS_LITTLEFURY_TRUE@ENV{ID_MODEL}=="*LittleFury*", GOTO="bfgminer_add"
-@HAS_MODMINER_TRUE@ENV{ID_MODEL}=="*ModMiner*", GOTO="bfgminer_add"
-@HAS_NANOFURY_TRUE@ENV{idVendor}=="04d8", ENV{idProduct}=="00de", ENV{ID_MODEL}=="*NanoFury*", GOTO="bfgminer_add"
+@USE_KLONDIKE_TRUE@ENV{idVendor}=="04d8", ENV{idProduct}=="f60a", ENV{manufacturer}=="*Klondike*", GOTO="bfgminer_add"
+@USE_KLONDIKE_TRUE@ENV{idVendor}=="fa05", ENV{idProduct}=="0001", ENV{manufacturer}=="*HashBuster*", GOTO="bfgminer_add"
+@USE_LITTLEFURY_TRUE@ENV{ID_MODEL}=="*LittleFury*", GOTO="bfgminer_add"
+@USE_MODMINER_TRUE@ENV{ID_MODEL}=="*ModMiner*", GOTO="bfgminer_add"
+@USE_NANOFURY_TRUE@ENV{idVendor}=="04d8", ENV{idProduct}=="00de", ENV{ID_MODEL}=="*NanoFury*", GOTO="bfgminer_add"
 @USE_ROCKMINER_TRUE@ENV{ID_MODEL}=="*R-BOX miner*", GOTO="bfgminer_add"
 @USE_ROCKMINER_TRUE@ENV{ID_MODEL}=="*RX-BOX miner*", GOTO="bfgminer_add"
-@HAS_TWINFURY_TRUE@ENV{ID_MODEL}=="*Twinfury*", GOTO="bfgminer_add"
-@HAS_X6500_TRUE@ENV{idVendor}=="0403", ENV{idProduct}=="6001", ENV{ID_MODEL}=="*X6500 FPGA Miner*", GOTO="bfgminer_add"
-@HAS_ZTEX_TRUE@ENV{ID_MODEL}=="*btcminer for ZTEX*", GOTO="bfgminer_add"
+@USE_TWINFURY_TRUE@ENV{ID_MODEL}=="*Twinfury*", GOTO="bfgminer_add"
+@USE_X6500_TRUE@ENV{idVendor}=="0403", ENV{idProduct}=="6001", ENV{ID_MODEL}=="*X6500 FPGA Miner*", GOTO="bfgminer_add"
+@USE_ZTEX_TRUE@ENV{ID_MODEL}=="*btcminer for ZTEX*", GOTO="bfgminer_add"
 
 # The below are broad udev rules that may match devices other than the miners expected.
 # You can uncomment them with the --enable-broad-udevrules configure option.
 
 # Avalon1
-@BROAD_UDEVRULES_TRUE@@HAS_AVALON_TRUE@ATTRS{idVendor}=="0403", ATTRS{idProduct}=="6001", GOTO="bfgminer_add"
+@BROAD_UDEVRULES_TRUE@@USE_AVALON_TRUE@ATTRS{idVendor}=="0403", ATTRS{idProduct}=="6001", GOTO="bfgminer_add"
 # AvalonMM
 @BROAD_UDEVRULES_TRUE@@USE_AVALONMM_TRUE@ATTRS{idVendor}=="067b", ATTRS{idProduct}=="2303", GOTO="bfgminer_add"
 # DualMiner
@@ -38,16 +38,16 @@ LABEL="bfgminer_start"
 # GridSeed
 @BROAD_UDEVRULES_TRUE@@USE_GRIDSEED_TRUE@ATTRS{idVendor}=="0483", ATTRS{idProduct}=="5740", GOTO="bfgminer_add"
 # Icarus
-@BROAD_UDEVRULES_TRUE@@HAS_ICARUS_TRUE@ATTRS{idVendor}=="067b", ATTRS{idProduct}=="2303", GOTO="bfgminer_add"
+@BROAD_UDEVRULES_TRUE@@USE_ICARUS_TRUE@ATTRS{idVendor}=="067b", ATTRS{idProduct}=="2303", GOTO="bfgminer_add"
 # Cairnsmore1
-@BROAD_UDEVRULES_TRUE@@HAS_ICARUS_TRUE@ATTRS{idVendor}=="067b", ATTRS{idProduct}=="0230", GOTO="bfgminer_add"
-@BROAD_UDEVRULES_TRUE@@HAS_ICARUS_TRUE@ATTRS{idVendor}=="0403", ATTRS{idProduct}=="8350", GOTO="bfgminer_add"
+@BROAD_UDEVRULES_TRUE@@USE_ICARUS_TRUE@ATTRS{idVendor}=="067b", ATTRS{idProduct}=="0230", GOTO="bfgminer_add"
+@BROAD_UDEVRULES_TRUE@@USE_ICARUS_TRUE@ATTRS{idVendor}=="0403", ATTRS{idProduct}=="8350", GOTO="bfgminer_add"
 # Block Erupter and Antminer U*
-@BROAD_UDEVRULES_TRUE@@HAS_ICARUS_TRUE@ATTRS{idVendor}=="10c4", ATTRS{idProduct}=="ea60", GOTO="bfgminer_add"
+@BROAD_UDEVRULES_TRUE@@USE_ICARUS_TRUE@ATTRS{idVendor}=="10c4", ATTRS{idProduct}=="ea60", GOTO="bfgminer_add"
 # ZeusMiner
 @BROAD_UDEVRULES_TRUE@@USE_ZEUSMINER_TRUE@ATTRS{idVendor}=="10c4", ATTRS{idProduct}=="ea60", GOTO="bfgminer_add"
 # Possibly unprogrammed ZTEX
-@BROAD_UDEVRULES_TRUE@@HAS_ZTEX_TRUE@ATTRS{idVendor}=="221a", ATTRS{idProduct}=="0100", GOTO="bfgminer_add"
+@BROAD_UDEVRULES_TRUE@@USE_ZTEX_TRUE@ATTRS{idVendor}=="221a", ATTRS{idProduct}=="0100", GOTO="bfgminer_add"
 # BFx2
 @BROAD_UDEVRULES_TRUE@@USE_BFX_TRUE@ATTRS{idVendor}=="0403", ATTRS{idProduct}=="6014", GOTO="bfgminer_add"
 # RockMiner

+ 8 - 1
AUTHORS

@@ -2,6 +2,8 @@ CURRENT MAINTAINERS:
 
 Luke Dashjr <luke-jr+bfgminer@utopios.org> 1QATWksNFGeUJCWBrN4g6hGM178Lovm7Wh
 Nate Woolls <nwoolls@gmail.com> 1JnZoFeCVYJgaKbKwDUxSkTZWWpBmwWTgV
+Pelle Nilsson <per.nilsson@xelmo.com>
+Vitalii Demianets <vitalii@orsoc.se>
 
 Debian packaging: Dmitry Smirnov <onlyjob@member.fsf.org>
 
@@ -53,7 +55,10 @@ Jason Snell <abysss@gmail.com>
 Jean-Luc Cooke <jlcooke@certainkey.com>
 Jonathan Lynch <jonathan.lynch@intel.com>
 Josh Lehan <krellan@krellan.net>
+Kiyoaki Matsugae <matsugae.kiyoaki@gmail.com>
+Lauri Kasanen <curaga@operamail.com>
 Lingchao Xu <lingchao.xu@bitmaintech.com>
+Luke Mitchell <Luke.Mitchell.2011@my.bristol.ac.uk>
 Mark Crichton <crichton@gmail.com>
 Martin Danielsen <kalroth@gmail.com>
 Michael Kedzierski <michael@kedzierski.id.au>
@@ -73,13 +78,15 @@ pooler <pooler@litecoinpool.org>
 Raulo <p987341@gmail.com>
 Red_Wolf_2 <redwolf@wolfnexus.net>
 Ricardo Iván Vieitez Parra <cop3504@memoryplate.com>
+Ronny Van Keer
 Rusty Russell <rusty@rustcorp.com.au>
+Серёга <netswalker@yandex.ru>
 slax0r <frcole@gmail.com>
 Teemu Suikki <zuikkis@gmail.com>
 Thorsten Gilling <tgilling@web.de>
+Tim Bartletts <github@tim.bartletts.id.au>
 Tydus <Tydus@Tydus.org>
 Ufasoft <support@ufasoft.com>
-Vitalii Demianets <vitalii@orsoc.se>
 Vladimir Strinski <vstrinski@nanofury.com>
 Zefir Kurtisi <zefir.kurtisi@gmail.com>
 ZeusMiner Team <cs@zeusminer.com>

+ 67 - 28
Makefile.am

@@ -95,7 +95,7 @@ update-version:
 	./gen-version.sh >version.h.new
 	cmp version.h version.h.new && rm version.h.new || mv version.h.new version.h
 version.h: update-version
-bfgminer_SOURCES += version.h
+bfgminer_SOURCES += version.c version.h
 BUILT_SOURCES = version.h
 
 SUBDIRS += $(libbase58_SUBDIRS) $(libblkmaker_SUBDIRS)
@@ -131,14 +131,13 @@ bfgminer_CPPFLAGS += $(libevent_CFLAGS)
 endif
 
 
-if HAVE_OPENCL
+if USE_OPENCL
 bfgminer_SOURCES += driver-opencl.h driver-opencl.c
 bfgminer_SOURCES += ocl.c ocl.h findnonce.c findnonce.h
 bfgminer_SOURCES += adl.c adl.h adl_functions.h
-bfgminer_SOURCES += opencl/*.cl
 
 kernelsdir = $(pkgdatadir)/opencl
-dist_kernels_DATA = $(top_srcdir)/opencl/*.cl
+dist_kernels_DATA =
 
 dist_doc_DATA += README.GPU
 
@@ -149,22 +148,32 @@ endif
 endif
 
 
-if HAS_SCRYPT
-bfgminer_SOURCES += scrypt.c scrypt.h
-dist_doc_DATA += README.scrypt
+if USE_KECCAK
+bfgminer_SOURCES += malgo/keccak.c
+
+if USE_OPENCL
+dist_kernels_DATA += $(top_srcdir)/opencl/keccak.cl
+endif
 endif
 
-if HAS_CPUMINE
-dist_doc_DATA += README.CPU
+if USE_SHA256D
+bfgminer_SOURCES += malgo/sha256d.c
+
+if USE_OPENCL
+dist_kernels_DATA += \
+	$(top_srcdir)/opencl/diablo.cl  \
+	$(top_srcdir)/opencl/diakgcn.cl  \
+	$(top_srcdir)/opencl/phatk.cl  \
+	$(top_srcdir)/opencl/poclbm.cl
+endif
+
+if USE_CPUMINING
 bfgminer_SOURCES	+= \
 		  sha256_generic.c sha256_via.c	\
 		  sha256_cryptopp.c sha256_sse2_amd64.c		\
 		  sha256_sse4_amd64.c 	\
 		  sha256_altivec_4way.c
 
-# the CPU portion extracted from original main.c
-bfgminer_SOURCES += driver-cpu.h driver-cpu.c
-
 if HAVE_SSE2
 bfgminer_LDADD  += libsse2cpuminer.a
 noinst_LIBRARIES = libsse2cpuminer.a
@@ -192,7 +201,25 @@ endif
 
 endif # HAVE_x86_64
 endif # HAS_YASM
-endif # HAS_CPUMINE
+endif # USE_CPUMINING
+endif # USE_SHA256D
+
+if USE_SCRYPT
+bfgminer_SOURCES += malgo/scrypt.c malgo/scrypt.h
+dist_doc_DATA += README.scrypt
+
+if USE_OPENCL
+dist_kernels_DATA += \
+	$(top_srcdir)/opencl/psw.cl  \
+	$(top_srcdir)/opencl/scrypt.cl  \
+	$(top_srcdir)/opencl/zuikkis.cl
+endif
+endif
+
+if USE_CPUMINING
+dist_doc_DATA += README.CPU
+bfgminer_SOURCES += driver-cpu.h driver-cpu.c
+endif # USE_CPUMINING
 
 if NEED_BFG_LOWL_VCOM
 bfgminer_SOURCES += lowl-vcom.c lowl-vcom.h
@@ -230,11 +257,11 @@ if HAS_ASIC
 dist_doc_DATA += README.ASIC
 endif
 
-if HAS_BITFORCE
+if USE_BITFORCE
 bfgminer_SOURCES += driver-bitforce.c
 endif
 
-if HAS_BIGPIC
+if USE_BIGPIC
 bfgminer_SOURCES += driver-bigpic.c driver-bigpic.h
 endif
 
@@ -242,11 +269,11 @@ if USE_DRILLBIT
 bfgminer_SOURCES += driver-drillbit.c
 endif
 
-if HAS_TWINFURY
+if USE_TWINFURY
 bfgminer_SOURCES += driver-twinfury.c driver-twinfury.h
 endif
 
-if HAS_ICARUS
+if USE_ICARUS
 bfgminer_SOURCES += driver-icarus.c driver-icarus.h
 bfgminer_SOURCES += driver-cairnsmore.c
 bfgminer_SOURCES += driver-erupter.c
@@ -269,7 +296,7 @@ if USE_ZEUSMINER
 bfgminer_SOURCES += driver-zeusminer.c
 endif
 
-if HAS_AVALON
+if USE_AVALON
 bfgminer_SOURCES += driver-avalon.c driver-avalon.h
 endif
 
@@ -281,23 +308,31 @@ if USE_KNC
 bfgminer_SOURCES += driver-knc.c
 endif
 
-if HAS_KLONDIKE
+if USE_KNCASIC
+bfgminer_SOURCES += driver-kncasic.c
+endif
+
+if USE_KLONDIKE
 bfgminer_SOURCES += driver-klondike.c driver-klondike.h driver-hashbusteravalon.c
 endif
 
-if HAS_MODMINER
+if USE_MODMINER
 bfgminer_SOURCES += driver-modminer.c
 endif
 
 if USE_TITAN
-bfgminer_SOURCES += driver-titan.c titan-asic.c knc-asic/knc-asic.c knc-asic/knc-spimux.c knc-asic/knc-transport-spimux.c titan-asic.h knc-asic/include/knc-asic.h knc-asic/include/knc-transport.h
+bfgminer_SOURCES += driver-titan.c titan-asic.c titan-asic.h
+endif
+
+if NEED_KNC_ASIC
+bfgminer_SOURCES += knc-asic/knc-asic.c knc-asic/knc-spimux.c knc-asic/knc-transport-spimux.c knc-asic/include/knc-asic.h knc-asic/include/knc-transport.h
 endif
 
-if HAS_X6500
+if USE_X6500
 bfgminer_SOURCES += driver-x6500.c jtag.c jtag.h
 endif
 
-if HAS_ZTEX
+if USE_ZTEX
 bfgminer_SOURCES += driver-ztex.c libztex.c libztex.h
 endif
 
@@ -305,10 +340,10 @@ if USE_BIFURY
 bfgminer_SOURCES += driver-bifury.c
 endif
 
-if HAS_BITFURY
+if USE_BITFURY
 bfgminer_SOURCES += driver-bitfury.c driver-bitfury.h libbitfury.c libbitfury.h
 
-if HAS_BFSB
+if USE_BFSB
 bfgminer_SOURCES += driver-bfsb.c
 endif
 
@@ -316,15 +351,15 @@ if USE_BFX
 bfgminer_SOURCES += driver-bfx.c
 endif
 
-if HAS_METABANK
+if USE_METABANK
 bfgminer_SOURCES += driver-metabank.c tm_i2c.h tm_i2c.c
 endif
 
-if HAS_LITTLEFURY
+if USE_LITTLEFURY
 bfgminer_SOURCES += driver-littlefury.c
 endif
 
-if HAS_NANOFURY
+if USE_NANOFURY
 bfgminer_SOURCES += driver-nanofury.c mcp2210.c mcp2210.h
 endif
 
@@ -355,6 +390,10 @@ if USE_MINERGATE
 bfgminer_SOURCES += driver-minergate.c
 endif
 
+if USE_MINION
+bfgminer_SOURCES += driver-minion.c
+endif
+
 if USE_ROCKMINER
 bfgminer_SOURCES += driver-rockminer.c
 endif

+ 268 - 0
NEWS

@@ -1,3 +1,271 @@
+BFGMiner Version 5.0.0 - November 29, 2014
+
+- opencl: Fail gracefully if clGetProgramInfo/CL_PROGRAM_NUM_DEVICES returns
+zero
+- opencl: Remember results of global offset testing
+- opencl: Only save kernel binary when we built from source
+- opencl: Only try to patch BFI_INT when compiling a kernel from source
+- opencl: Major refactor, splitting up opencl_load_kernel into many new
+functions
+- titan: Initialise variables to satisfy compilers
+- Bugfix: opencl: Correctly handle non-goffset kernels
+- Fixing column spacing of non-curses device output
+- Bugfix: opencl: free memory if clState creation fails
+- Titan: Do not do actual configuration job in API thread context. Instead, send
+commands to the worker thread to do the actual job.
+- Titan: bugfix: dies are not configured if die#0 is OFF
+- Titan: unused function argument
+- Titan: unused variables
+- Update knc-asic: Titan FPGA image with spi->i2c commands removed and fixed
+some lock-up scenarios
+- Titan code rework: Use one bfgminer processor per die (instead of core)
+- minion: Reinitialise chips if they don't seem to be progressing
+- minion: Read temperature sensor
+- minion: TUI clock speed control
+- minion: Include "Frequency" in RPC status
+- minion: Add "clock" setting
+- minion: Set configuration parameters we care about
+- minion: Reset chip at initialization
+- minion: Configure PLL to 900 MHz at startup
+- minion: Report hashes done as accurately as we can
+- minion: Core enable/disable control
+- minion: Implement queue flushing
+- minion: Implement basic mining
+- lowl-spi: linux_spi_txrx2 to include device protocol dumping
+- minion: Detection code for the Prospero X1
+- Actually add version.c file
+- Only rebuild a minimal version.c file when git commit changes
+- Bugfix: Use HASH_ADD_KEYPTR for const char *, not HASH_ADD_STR
+- Bugfix: Re-lock stgd_lock when we don't have a malgo-specific pool to use
+- scrypt: Disable OpenCL by default
+- opencl: Remove support for "OCL1" kernel define (indicated OpenCL 1.1+)
+- opencl: Autodetect whether global offset actually works
+- opencl: Add no-goffset support to scrypt kernels
+- opencl: Determine kernel support for goffset by code analysis
+- opencl: Add "goffset" setting to override detection of support for global work
+offsets
+- util: seek_data_cb: Check validity of offset
+- Allow libcurl to rewind the upload buffer
+- cpu: Set the priority of miner threads to idle on Windows
+- cpu: Use _SC_NPROCESSORS_CONF instead of _SC_NPROCESSORS_ONLN
+- cpu: Do not set thread affinity on single-processor systems
+- Adding additional boolean keywords enable and disable
+- AUTHORS: Move Pelle and Vitalii to "Current Maintainers" since they actively
+maintain the Titan driver
+- AUTHORS: Add Ronny Van Keer (Keccak C implementation)
+- Titan: supply its own rolling hashrate implementation
+- A driver can supply its own rolling hashrate function   (in case generic
+rolling hashrate implementation does not work)
+- Bugfix: configure: kncasic does not actually need i2c-tools
+- Keccak: Simplify keccak_hash_data
+- keccak: Adapt opencl intensity interpretation to have approximately the same
+desktop interactivity affect as SHA256d
+- Keccak: Include support in various builds
+- Travis: Update with Keccak
+- opencl/keccak: Add non-goffset support
+- opencl/keccak: Unroll all the hash rounds properly
+- opencl: Add Keccak support via "fullheader" kernel interface
+- keccak: Adapt for BFGMiner
+- Keccak: Import algorithm code as-is
+- opencl: Add a simple "fullheader" kernel interface
+- Update stratum mining.capabilities method to use a single Object for all
+parameters
+- Bugfix: Correct USE_SHA256D macro name to ensure SHA256d is always preferred
+as default algorithm
+- cpu: Generic scanhash that can support any PoW algorithm
+- RPC: Include Hash Method in minecoin for new algorithms
+- opencl: Teach findnonce to behave based on kernel interface rather than mining
+algorithm
+- opencl: Get min_nonce_diff from struct mining_algorithm
+- Move malgo-specific code to dedicated source files
+- opencl: Simplify kernel-specific data handling
+- Update knc-asic: Fix issue with detect_die crashing due to errors in response
+- titan: Init last_nonce on die reconfiguration to avoid lots of "unknown work"
+messages
+- Bugfix: titan: Extra garbage was sent on SPI with new commands
+- titan: Remove temporary log messages for manual report checks
+- titan: Wrong variable used in new per-die iteration
+- titan: Do manual core checks in parallel with normal ones
+- titan: Fix: Only one die was checked after flush in last commit
+- titan: Manually check all cores for reports after flush
+- opencl: Remove redundant checks for USE_OPENCL
+- cpu: Remove redundant checks for USE_CPUMINING
+- opencl: Only attempt to BFI_INT patch SHA256d kernels
+- Significantly rewrite configure script to handle driver dependencies cleaner
+- Make SHA256d mining optional
+- Bugfix: cointerra: Can handle nonces down to pdiff 1
+- kncasic: Initialise first_cgpu to silence false warning
+- Bugfix: Clarify goffset decision code, and refuse to compile kernels in
+situations where they won't work
+- Bugfix: opencl: Output buffer must be readable by kernel since it is used with
+a count/position iterator
+- Bugfix: opencl: Skip NULL kernel interface entry
+- DevAPI: hashes_done: Simplify and improve precision of max_nonce calculation
+- opencl: Avoid duplicating kernel_interfaces mapping in select_kernel
+- opencl: Avoid duplicating kernel_interfaces mapping in
+opencl_scanhash_get_kernel
+- cpu: Drivers should not be doing fulltest on their own, so remove it
+- cpu: Pass full work struct to scanhash functions
+- kncasic: Split up each ASIC channel to its own device
+- kncasic: Use consistent naming for KNC_MAX_DIES_PER_CORE
+- kncasic: Refactoring: coreid is not used, remove it
+- kncasic: Refactoring: put all device architecture defines in one place (knc-asic.h)
+- kncasic: Add lock to protect concurrent accesses to knc_state
+- kncasic: Fix auto
+- Build system fixes for kncasic driver
+- configure: Deduplicate Linux i2c-dev.h checks for KnCMiner drivers
+- Travis: Update for kncasic driver
+- kncasic: Only show the relevant die for RPC stats
+- kncasic: Use proc_repr for logging
+- kncasic: Split up logical processors
+- kncasic: Silence warning about unhandled enumeration values
+- Bugfix: kncasic: Proper format specifications
+- kncasic: Remove unused code
+- kncasic: Minimal changes to get it compiling
+- kncasic: Import gen 2 driver from cgminer commit
+bc153552be8591250cb3214bf5202501d4a39922
+- titan: Move thread_reportin() call to poll function
+- titan: reportin every process_report to avoid falsely tripping the watchdog
+- titan: Remove unused variables
+- Bump knc-asic submodule
+- titan: Increase FPGA to ASIC SPI frequency to 6 MHz
+- titan: Use new get_work_status prototype and print FPGA CRC error counters to
+log
+- Titan flush optimization fix: Don't send get_info commands when FPGA is
+speaking to ASIC
+- titan: All dies use same nonce working range
+- Titan flush optimization bugfix: FPGA status was checked once per poll instead
+of once per ASIC
+- Titan flush optimization: Bugfix: Flush time measurement only worked with one
+ASIC
+- Titan flush optimization: Point knc-asic submodule to include new FPGA image
+- Titan flush optimization: Remove temporary debug messages
+- Titan flush optimization: Keep works in local queue until slot number is
+reused
+- Titan flush optimization: Point knc-asic submodule to needed revision
+- Titan flush optimization: Measure new flush time
+- Titan flush optimization: Send works to all dies after flush
+- Titan flush optimization: First rough implementation
+- Titan: Less clobber on the screen: group some "failure" messages
+- Bugfix: Prefer not using work created just to ensure a specific algorithm is
+queued, so strategies work as much as possible
+- Stratum: Enable mining.set_goal to change parameters on the current goal
+- cpu: Fail gracefully if unsupported mining algo gets into scanhash
+- opencl: Fail gracefully if unsupported mining algo gets into
+opencl_scanhash_get_kernel
+- SSM: Send goal malgo to goal-enabled clients
+- Allow pools with #change_goal_malgo attribute to change the mining algorithm
+used by their assigned goal
+- Gracefully fail when no pool can be found to generate specific-algo work
+- Remove opt_scrypt
+- dualminer: Replace opt_scrypt with a per-device "scrypt" option
+- dualminer: Make dual_mode a per-device option as it should be
+- opencl: Nearly complete migration to per-work mining algorithms
+- pool_actively_desired: If we are the highest priority, workable pool for a
+given algorithm, we are needed
+- Ignore opt_queue for unused mining algorithms
+- Set name and aliases on mining algorithms
+- When hotplug is enabled and a mining algorithm is configured for the first
+time, schedule a rescan of hardware to pick up anything now applicable that may
+not have been before
+- Try to keep enough work queued for each mining algorithm in use
+- Move select_loadbalance and select_failover logic into their own functions
+- Keep track of how much work is staged per-algorithm
+- Keep track of how many goals reference each mining algorithm
+- Abstract goal_set_malgo function
+- Build a mining_algorithms list
+- get_work: Restore previous getwork rollntime behaviour
+- get_work: Only return work items compatible with processor (degrades getwork
+rollntime support)
+- opencl: Support for per-work mining algorithms
+- Build without POW_SCRYPT at all
+- opencl: Defer loading kernel until it is needed
+- opencl: Split initCl into opencl_create_clState and opencl_load_kernel
+- SSM: Implement mining.capabilities including proxying mining.set_goal
+- Replace mining.goal.subscribe with mining.capabilities
+- Stratum: Avoid resetting the goal if the old and new name match
+- Allow specifying goal options as eg, --pool-goal name:malgo=scrypt
+- cpu: Support for per-work mining algorithms
+- Replace cgpu.min_nonce_diff with drv.drv_min_nonce_diff(cgpu, malgo)
+- proxy: Avoid assumptions about mining algorithm at initialisation
+- Core only: Partially move most of opt_scrypt to mining_goal
+- opencl: Reintroduce independent intensity setting internally
+- Move MAX_*_INTENSITY stuff to driver-opencl.h
+- proxy: Minimise minimum difficulty for proxy clients
+- Enable --generate-to option (was --coinbase-addr) to work with non-default
+goals
+- Save pool goals in written config files
+- README: Update documentation for multiple blockchain support
+- RPC: Add "Difficulty Accepted" to "coin" command
+- RPC: Add "Mining Goal" to "pools" command
+- RPC: Accept an additional argument for "addpool" to indicate mining goal by
+name
+- Stratum: Support for mining.set_goal("goal name") - currently just resetting
+the user-configured goal
+- Teach longpoll logic about multiple mining goals
+- RPC: Include non-default goals in reply to "coin" command
+- Ignore generation address/script on pools using non-default mining goals
+- Only include block display lines for active goals
+- Move income to block display line, and show extra block display lines for each
+additional mining goal
+- New --pool-goal option to set a distinct named goal per-pool
+- Parameterise most references to global mining_goal_info
+- Move block_time to be per block_info
+- Show "?" for income if block height is unknown
+- Move block height tracking onto block_info
+- Calculate current_fullhash only when needed (for RPC 'coins')
+- Replace current_block_id with blkchain->currentblk[->block_id]
+- Move current_hash to goal->current_goal_detail
+- Clean up struct block_info names
+- Use full prevblock hash as block key (also gets rid of dead code)
+- Move global variables related to the blockchain or mining goals on to global
+structs
+
+
+BFGMiner Version 4.10.1 - November 29, 2014
+
+- Upgraded Windows libcurl from 7.38.0 to 7.39.0
+- Bugfix: bytes_free: Set buf to NULL so the bytes_t is reusable
+- Bugfix: minergate: Avoid malloc(0) if stats file has too few values
+- Bugfix: modminer: Avoid leaking devname when detecting claimed devices
+- Bugfix: free memory allocated for config file loading/writing
+- Bugfix: nanofury: Avoid NULL pointer dereference on init failure
+- Bugfix: varint_decode: Correctly decode multibyte numbers
+- cpu: Fix processor count detection with HW_NCPU
+- cpu: SCHED_IDLE and SCHED_BATCH can only be used with sched_priority 0
+- cpu: fix sched_setaffinity parameter : sizeof(cpu_set_t)
+- Bugfix: opencl: Correctly accept negative intensities and set dynamic
+intensity without changing current value
+- Since longpoll connections are never reused, explicitly forbid reuse so
+libcurl cleans them up immediately
+- Bugfix: Correct already-in-use getcbaddr message
+- Demote some Bitfury debug log messages to only with device protocol dumping
+- Update bundled libbase58 to 0.1.3
+- README.ASIC: Minor cleanups to Zeusminer section
+- README.ASIC: Explain Zeusminer options
+- Makefile: Specify .cl files explicitly, and only include scrypt ones when
+configured with scrypt support
+- Bugfix: DevAPI: hashes_done: Explicitly cast to uint64_t for big calculations
+(LLVM was allowing overflow)
+- Bugfix: titan: First core in each die got too big nonce area to work on
+- Travis: Build MinGW64 with ncurses
+- Bugfix: Declare manual_enable_pool in miner.h, for api.c use
+- Bugfix: bitfury: Use long long and llabs to avoid under/over flows misbehaving
+- Bugfix: Highlight all active pools, in pool listing
+- Set quota to 1 when enabling a failover-only pool, and disable failover-only
+when setting quota
+- Bugfix: pool_actively_desired: Detect failover-only conditions for loadbalance
+and balance strategies
+- Bugfix: Only display pools as Failover state when it is effectively so
+- Make sure KEY_EVENT from wincon is ignored (it conflicts with ncurses)
+- README.ASIC: Add details for BFx2 USB
+- Titan: Submit stale shares while doing flushes
+- README.ASIC: Remove CFLAGS hack not needed for KnCMiner Titan build
+- Bugfix: Need unicode_micro in non-Unicode curses builds
+- Remove unused variables from curses-less builds
+- Bugfix: Cannot duplicate userpass option or we end up with it processed double
+
+
 BFGMiner Version 4.10.0 - October 21, 2014
 
 - Upgraded Windows libjansson from 2.6 to 2.7

+ 50 - 27
README

@@ -2,9 +2,8 @@ BFGMiner:
 St. Barbara's Faithfully Glorified Mining Initiative Naturally Exceeding Rivals
 or Basically a Freaking Good Miner
 
-This is a multi-threaded multi-pool ASIC, FPGA, GPU and CPU miner with dynamic
-clocking, monitoring, and fanspeed support for bitcoin. Do not use on multiple
-block chains at the same time!
+This is a multi-threaded, multi-blockchain, multi-pool ASIC, FPGA, GPU and CPU
+miner with dynamic clocking, monitoring, and fanspeed support for bitcoin.
 
 This code is provided entirely free of charge by the programmer in his spare
 time so donations would be greatly appreciated. Please consider donating to the
@@ -46,6 +45,10 @@ Multiple pools:
 
 bfgminer -o http://pool1:port -u pool1username -p pool1password -o http://pool2:port -u pool2usernmae -p pool2password
 
+Multiple blockchains:
+
+bfgminer -o http://pool1:port -u pool1username -p pool1password --pool-goal default -o http://pool2:port -u pool2usernmae -p pool2password --pool-goal freicoin
+
 Single pool with a standard http proxy:
 
 bfgminer -o http://pool:port -x http://proxy:port -u username -p password
@@ -140,44 +143,49 @@ BFGMiner driver configuration options:
 	                        Include udev rules for ambiguous devices which may
 	                        not be miners
 	--disable-avalon        Compile support for Avalon (default enabled)
+	--disable-avalonmm      Compile support for Avalon2/3 (default enabled)
 	--enable-bfsb           Compile support for BFSB (default disabled)
-	--disable-bfx           Compile support for BFx2 (default if libusb)
+	--disable-bfx           Compile support for BFx2 (default enabled)
 	--disable-bifury        Compile support for Bi*Fury (default enabled)
 	--disable-bigpic        Compile support for Big Picture Mining USB (default
 	                        enabled)
 	--disable-bitforce      Compile support for BitForce (default enabled)
 	--disable-bitfury       Compile support for Bitfury (default enabled)
-	--enable-cpumining      Build with CPU mining support (default disabled)
+	--disable-cointerra     Compile support for CoinTerra (default enabled)
+	--enable-cpumining      Compile support for CPU mining (default disabled)
 	--disable-drillbit      Compile support for DrillBit (default enabled)
 	--disable-dualminer     Compile support for DualMiner (default enabled)
-	--disable-gridseed      Compile support for GridSeed (default enabled with
-	                        scrypt)
+	--disable-gridseed      Compile support for GridSeed (default enabled)
 	--disable-hashbuster    Compile support for HashBuster Nano (default
 	                        enabled)
-	--disable-hashbusterusb Compile support for HashBuster Micro (default if
-	                        libusb)
+	--disable-hashbusterusb Compile support for HashBuster Micro (default
+	                        enabled)
 	--disable-hashfast      Compile support for HashFast (default enabled)
 	--disable-icarus        Compile support for Icarus (default enabled)
 	--enable-jingtian       Compile support for JingTian (default disabled)
 	--disable-klondike      Compile support for Klondike (default enabled)
 	--enable-knc            Compile support for KnC (default disabled)
+	--enable-kncasic        Compile support for KnC gen 2 (default disabled)
 	--disable-littlefury    Compile support for LittleFury (default enabled)
 	--enable-metabank       Compile support for Metabank (default disabled)
+	--enable-minergate      Compile support for Spondoolies minergate interface
+	                        (default disabled)
+	--enable-minion         Compile support for Minion (default disabled)
 	--disable-modminer      Compile support for ModMiner (default enabled)
 	--disable-nanofury      Compile support for NanoFury (default enabled)
 	--enable-opencl         Compile support for OpenCL (default disabled)
 	--disable-adl           Build without ADL monitoring (default enabled)
 	--disable-rockminer     Compile support for RockMiner (default enabled)
 	--enable-titan          Compile support for KnC Titan (default disabled)
-	--disable-twinfury      Compile support for Twinfury USB miner (default
-	                        enabled)
-	--disable-x6500         Compile support for X6500 (default if libusb)
-	--disable-zeusminer     Compile support for ZeusMiner (default enabled with
-	                        scrypt)
-	--disable-ztex          Compile support for ZTEX (default if libusb)
+	--disable-twinfury      Compile support for Twinfury (default enabled)
+	--disable-x6500         Compile support for X6500 (default enabled)
+	--disable-zeusminer     Compile support for ZeusMiner (default enabled)
+	--disable-ztex          Compile support for ZTEX (default enabled)
 
 BFGMiner algorithm configuration option:
-	--enable-scrypt         Compile support for scrypt mining (default disabled)
+	--enable-keccak         Compile support for Keccak (default disabled)
+	--disable-sha256d       Compile support for SHA256d (default enabled)
+	--enable-scrypt         Compile support for scrypt (default disabled)
 
 BFGMiner dependency configuration options:
 	--without-curses        Compile support for curses TUI (default enabled)
@@ -252,7 +260,6 @@ Options for both config file and command line:
 --cmd-idle <arg>    Execute a command when a device is allowed to be idle (rest or wait)
 --cmd-sick <arg>    Execute a command when a device is declared sick
 --cmd-dead <arg>    Execute a command when a device is declared dead
---coinbase-addr <arg> Set coinbase payout address for solo mining
 --coinbase-check-addr <arg> A list of address to check against in coinbase payout list received from the previous-defined pool, separated by ','
 --coinbase-check-total <arg> The least total payout amount expected in coinbase received from the previous-defined pool
 --coinbase-check-percent <arg> The least benefit percentage expected for the sum of addr(s) listed in --cbaddr argument for previous-defined pool
@@ -268,6 +275,7 @@ Options for both config file and command line:
 --expiry-lp <arg>   Upper bound on how many seconds after getting work we consider a share from it stale (with longpoll active) (default: 3600)
 --failover-only     Don't leak work to backup pools when primary pool is lagging
 --failover-switch-delay <arg> Delay in seconds before switching back to a failed pool (default: 300)
+--generate-to <arg> Set an address to generate to for solo mining
 --force-dev-init    Always initialize devices when possible (such as bitstream uploads to some FPGAs)
 --kernel-path <arg> Specify a path to where bitstream and kernel files are
 --load-balance      Change multipool strategy from failover to quota based balance
@@ -289,6 +297,7 @@ Options for both config file and command line:
 --noncelog <arg>    Create log of all nonces found
 --pass|-p <arg>     Password for bitcoin JSON-RPC server
 --per-device-stats  Force verbose mode and output per-device statistics
+--pool-goal <arg>   Named goal for the previous-defined pool
 --pool-priority <arg> Priority for just the previous-defined pool
 --pool-proxy|-x     Proxy URI to use for connecting to just the previous-defined pool
 --protocol-dump|-P  Verbose dump of protocol-level activities
@@ -467,14 +476,16 @@ the range of current share difficulties, whether block notification is working
 work.
 
 The block display shows:
-Block: ...1b89f8d3 #217364  Diff:7.67M (54.93Th/s)  Started: [17:17:22]
+Block #217364: ...1b89f8d3  Diff:7.67M (54.93T)  Started: [17:17:22]  I:12.99mBTC/hr
 
-This shows a short stretch of the current block, the next block's height and
-difficulty (including the network hashrate that difficulty represents), and when
-the search for the new block started.
+This shows a short stretch of the next block's height, the current block,
+difficulty (including the network hashrate that difficulty represents), when the
+search for the new block started, and finally expected Income, calculated by
+actual shares submitted in 100% PPS value (assumes Bitcoin, does not account for
+altcoin conversions!).
 
 The BFGMiner status line shows:
- ST:1  F:0  NB:1  AS:0  BW:[ 75/241 B/s]  E:2.42  I:12.99mBTC/hr  BS:2.71k
+ ST:1  F:0  NB:1  AS:0  BW:[ 75/241 B/s]  E:2.42  BS:2.71k
 
 ST is STaged work items (ready to use).
 F  is network Failure occasions (server down or slow to provide work)
@@ -483,8 +494,6 @@ AS is Active Submissions (shares in the process of submitting)
 BW is BandWidth usage on the network (received/sent)
 E  is Efficiency defined as number of shares accepted (multiplied by their
           difficulty) per 2 KB of bandwidth
-I  is expected Income, calculated by actual shares submitted in 100% PPS value
-          (assumes Bitcoin, does not account for altcoin conversions!)
 BS is the all time Best Share difficulty you've found
 
 The totals line shows the following:
@@ -557,10 +566,11 @@ SOLO MINING
 
 BFGMiner supports solo mining with any GBT-compatible bitcoin node (such as
 bitcoind). To use this mode, you need to specify the URL of your bitcoind node
-using the usual pool options (--url, --userpass, etc), and the --coinbase-addr
+using the usual pool options (--url, --userpass, etc), and the --generate-to
 option to specify the Bitcoin address you wish to receive the block rewards
 mined. When you run Bitcoin Core on the same computer as your miner, the pool
-itself will be automatically configured for you.
+itself will be automatically configured for you (on the default goal). Please be
+aware that solo mining via GBT is at this time only supported for Bitcoin.
 
 IMPORTANT: If you are solo mining with more than one instance of BFGMiner (or
 any other software) per payout address, you must also specify data using the
@@ -577,7 +587,20 @@ rejected; this does not indicate that it has stopped mining.
 Example solo mining usage:
 
 bfgminer -o http://localhost:8332 -u username -p password \
-    --coinbase-addr 1QATWksNFGeUJCWBrN4g6hGM178Lovm7Wh \
+    --generate-to 1QATWksNFGeUJCWBrN4g6hGM178Lovm7Wh \
+    --coinbase-sig "rig1: This is Joe's block!"
+
+If you want to solo mine on multiple GBT-compatible Bitcoin blockchains, you can
+specify --generate-to multiple times with a goal name prefix followed by a
+colon. Note that at this time, the coinbase sig is always shared across all
+goals/pools.
+
+Example multi-blockchain solo mining usage:
+
+bfgminer -o http://localhost:8332 -u username -p password \
+    --generate-to 1QATWksNFGeUJCWBrN4g6hGM178Lovm7Wh \
+    -o http://localhost:7221 -u user2 -p password --pool-goal mychain \
+    --generate-to mychain:1QATWksNFGeUJCWBrN4g6hGM178Lovm7Wh \
     --coinbase-sig "rig1: This is Joe's block!"
 
 

+ 37 - 0
README.ASIC

@@ -89,6 +89,21 @@ For example:
     sudo bfgminer -S bfsb:auto
 
 
+BFx2
+----
+
+You will need to install the WinUSB driver instead of the default FTDI serial
+driver. The easiest way to do this is using Zadig: http://zadig.akeo.ie/
+
+Note that since it's impossible to tell the BFx2 apart from various other
+devices (including BFL/Cairnsmore1 miners and even many non-mining devices!),
+you must run with the -S bfx:all option (or 'bfx:all' at the M+ menu).
+
+I do not know what this will do with other devices; it may start fires,
+launch nuclear missiles (please don't run BFGMiner on computers with
+missile controls), etc.
+
+
 BI*FURY
 -------
 
@@ -316,6 +331,28 @@ So for example, an entry would look like this:
         gridseed@6D85278F5650:clock=875
 
 
+ZEUSMINER
+---------
+
+Zeusminers do not support autodetection, so you will need to use --scan to probe
+for them:
+
+-S zeusminer:\\.\COM3
+
+You should also configure the driver for your specific device:
+
+    --set zeusminer:clock=N        Clock frequency (default: 328)
+    --set zeusminer:chips=N        Number of chips per device
+        Blizzard    :  6          Cyclone     :  96
+        Hurricane X2: 48 (2*24)   Hurricane X3:  64 (2*32)
+        Thunder   X2: 96 (4*24)   Thunder   X3: 128 (4*32)
+Note: if you set this option incorrectly, the device may underperform and/or
+      misreport hashrate.
+
+For example:
+
+bfgminer --scrypt -o stratum+tcp://pool:port -u user -p pass -S zeusminer:\\.\COM3 --set zeusminer:clock=328 --set zeusminer:chips=128
+
 ---
 
 This code is provided entirely free of charge by the programmer in his spare

+ 12 - 2
README.RPC

@@ -213,12 +213,12 @@ The list of requests - a (*) means it requires privileged access - and replies:
                               stating the results of enabling pool N
                               The Msg includes the pool URL
 
- addpool|URL,USR,PASS (*)
+ addpool|URL,USR,PASS[,GOAL] (*)
                none           There is no reply section just the STATUS section
                               stating the results of attempting to add pool N
                               The Msg includes the pool URL
                               Use '\\' to get a '\' and '\,' to include a comma
-                              inside URL, USR or PASS
+                              inside URL, USR, PASS, or GOAL
 
  poolpriority|N,... (*)
                none           There is no reply section just the STATUS section
@@ -449,6 +449,16 @@ https://www.npmjs.org/package/miner-rpc
 Feature Changelog for external applications using the API:
 
 
+API V3.3 (BFGMiner v5.0.0)
+
+Modified API command:
+ 'addpool' - accept an additional argument to indicate mining goal by name
+ 'coin' - return multiple elements, when there are multiple mining goals
+          defined; add 'Difficulty Accepted'
+ 'pools' - add 'Mining Goal'
+
+---------
+
 API V3.2 (BFGMiner v4.1.0)
 
 Modified API command:

+ 6 - 2
README.scrypt

@@ -8,8 +8,12 @@ Scrypt mining for GPU is completely different to sha256 used for bitcoin
 mining. It has very different requirements to bitcoin mining and is a
 lot more complicated to get working well. Note that it is a ram dependent
 workload, and requires you to have enough system ram as well as fast enough
-GPU ram. If you have less system ram than your GPU has, it may not be possible
-to mine at any reasonable rate.
+GPU ram.
+
+Since scrypt ASICs are widely available, GPUs are not used by default. If you
+want to use them, use the -S opencl:auto option. Note that scrypt ASICs are
+documented in README.ASIC rather than this file, which only deals with GPU
+mining.
 
 There are 5 main parameters to tuning scrypt, all of which are optional for
 further fine tuning. When you start scrypt mining with the --scrypt option,

+ 97 - 69
api.c

@@ -26,6 +26,8 @@
 #include <unistd.h>
 #include <sys/types.h>
 
+#include <uthash.h>
+
 #include "compat.h"
 #include "deviceapi.h"
 #ifdef USE_LIBMICROHTTPD
@@ -35,7 +37,6 @@
 #include "util.h"
 #include "driver-cpu.h" /* for algo_names[], TODO: re-factor dependency */
 #include "driver-opencl.h"
-#include "version.h"
 
 #define HAVE_AN_FPGA 1
 
@@ -73,7 +74,7 @@ static const char *ALIVE = "Alive";
 static const char *REJECTING = "Rejecting";
 static const char *UNKNOWN = "Unknown";
 #define _DYNAMIC "D"
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 static const char *DYNAMIC = _DYNAMIC;
 #endif
 
@@ -87,7 +88,9 @@ static const char *FALSESTR = "false";
 #ifdef USE_SCRYPT
 static const char *SCRYPTSTR = "scrypt";
 #endif
+#ifdef USE_SHA256D
 static const char *SHA256STR = "sha256";
+#endif
 
 static const char *OSINFO =
 #if defined(__linux)
@@ -122,7 +125,7 @@ static const char *OSINFO =
 #define _PGA		"PGA"
 #endif
 
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 #define _CPU		"CPU"
 #endif
 
@@ -162,7 +165,7 @@ static const char ISJSON = '{';
 #define JSON_PGA	JSON1 _PGA JSON2
 #endif
 
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 #define JSON_CPU	JSON1 _CPU JSON2
 #endif
 
@@ -170,11 +173,9 @@ static const char ISJSON = '{';
 #define JSON_PGAS	JSON1 _PGAS JSON2
 #define JSON_CPUS	JSON1 _CPUS JSON2
 #define JSON_NOTIFY	JSON1 _NOTIFY JSON2
-#define JSON_DEVDETAILS	JSON1 _DEVDETAILS JSON2
 #define JSON_CLOSE	JSON3
 #define JSON_MINESTATS	JSON1 _MINESTATS JSON2
 #define JSON_CHECK	JSON1 _CHECK JSON2
-#define JSON_MINECOIN	JSON1 _MINECOIN JSON2
 #define JSON_DEBUGSET	JSON1 _DEBUGSET JSON2
 #define JSON_SETCONFIG	JSON1 _SETCONFIG JSON2
 #define JSON_END	JSON4 JSON5
@@ -201,7 +202,7 @@ static const char *JSON_PARAMETER = "parameter";
 #define MSG_MISID 15
 #define MSG_GPUDEV 17
 
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 #define MSG_CPUNON 16
 #define MSG_CPUDEV 18
 #define MSG_INVCPU 19
@@ -350,7 +351,7 @@ struct CODES {
 	const enum code_parameters params;
 	const char *description;
 } codes[] = {
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
  { SEVERITY_ERR,   MSG_INVGPU,	PARAM_GPUMAX,	"Invalid GPU id %d - range is 0 - %d" },
  { SEVERITY_INFO,  MSG_ALRENA,	PARAM_GPU,	"GPU %d already enabled" },
  { SEVERITY_INFO,  MSG_ALRDIS,	PARAM_GPU,	"GPU %d already disabled" },
@@ -369,13 +370,13 @@ struct CODES {
  },
 
  { SEVERITY_SUCC,  MSG_SUMM,	PARAM_NONE,	"Summary" },
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
  { SEVERITY_INFO,  MSG_GPUDIS,	PARAM_GPU,	"GPU %d set disable flag" },
  { SEVERITY_INFO,  MSG_GPUREI,	PARAM_GPU,	"GPU %d restart attempted" },
 #endif
  { SEVERITY_ERR,   MSG_INVCMD,	PARAM_NONE,	"Invalid command" },
  { SEVERITY_ERR,   MSG_MISID,	PARAM_NONE,	"Missing device id parameter" },
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
  { SEVERITY_SUCC,  MSG_GPUDEV,	PARAM_GPU,	"GPU%d" },
 #endif
 #ifdef HAVE_AN_FPGA
@@ -388,7 +389,7 @@ struct CODES {
  { SEVERITY_INFO,  MSG_PGADIS,	PARAM_PGA,	"PGA %d set disable flag" },
  { SEVERITY_ERR,   MSG_PGAUNW,	PARAM_PGA,	"PGA %d is not flagged WELL, cannot enable" },
 #endif
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
  { SEVERITY_ERR,   MSG_CPUNON,	PARAM_NONE,	"No CPUs" },
  { SEVERITY_SUCC,  MSG_CPUDEV,	PARAM_CPU,	"CPU%d" },
  { SEVERITY_ERR,   MSG_INVCPU,	PARAM_CPUMAX,	"Invalid CPU id %d - range is 0 - %d" },
@@ -411,10 +412,10 @@ struct CODES {
  { SEVERITY_ERR,   MSG_MISVAL,	PARAM_NONE,	"Missing comma after GPU number" },
  { SEVERITY_ERR,   MSG_NOADL,	PARAM_NONE,	"ADL is not available" },
  { SEVERITY_ERR,   MSG_NOGPUADL,PARAM_GPU,	"GPU %d does not have ADL" },
- { SEVERITY_ERR,   MSG_INVINT,	PARAM_STR,	"Invalid intensity (%s) - must be '" _DYNAMIC  "' or range " MIN_SHA_INTENSITY_STR " - " MAX_SCRYPT_INTENSITY_STR },
+ { SEVERITY_ERR,   MSG_INVINT,	PARAM_STR,	"Invalid intensity (%s) - must be '" _DYNAMIC  "' or range -10 - 31" },
  { SEVERITY_INFO,  MSG_GPUINT,	PARAM_BOTH,	"GPU %d set new intensity to %s" },
  { SEVERITY_SUCC,  MSG_MINECONFIG,PARAM_NONE,	"BFGMiner config" },
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
  { SEVERITY_ERR,   MSG_GPUMERR,	PARAM_BOTH,	"Setting GPU %d memoryclock to (%s) reported failure" },
  { SEVERITY_SUCC,  MSG_GPUMEM,	PARAM_BOTH,	"Setting GPU %d memoryclock to (%s) reported success" },
  { SEVERITY_ERR,   MSG_GPUEERR,	PARAM_BOTH,	"Setting GPU %d clock to (%s) reported failure" },
@@ -1142,7 +1143,7 @@ static void message(struct io_data * const io_data, const int messageid2, const
 #ifdef HAVE_AN_FPGA
 	int pga;
 #endif
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 	int cpu;
 #endif
 	int i;
@@ -1181,7 +1182,7 @@ static void message(struct io_data * const io_data, const int messageid2, const
 				case PARAM_POOL:
 					sprintf(buf, codes[i].description, paramid, pools[paramid]->rpc_url);
 					break;
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 				case PARAM_GPUMAX:
 					sprintf(buf, codes[i].description, paramid, nDevs - 1);
 					break;
@@ -1192,7 +1193,7 @@ static void message(struct io_data * const io_data, const int messageid2, const
 					sprintf(buf, codes[i].description, paramid, pga - 1);
 					break;
 #endif
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 				case PARAM_CPUMAX:
 					if (opt_n_threads > 0)
 						cpu = num_processors;
@@ -1271,8 +1272,8 @@ static void apiversion(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m
 	message(io_data, MSG_VERSION, 0, NULL, isjson);
 	io_open = io_add(io_data, isjson ? COMSTR JSON_VERSION : _VERSION COMSTR);
 
-	root = api_add_string(root, "Miner", PACKAGE " " VERSION, false);
-	root = api_add_string(root, "CGMiner", VERSION, false);
+	root = api_add_string(root, "Miner", bfgminer_name_space_ver, false);
+	root = api_add_string(root, "CGMiner", bfgminer_ver, false);
 	root = api_add_const(root, "API", APIVERSION, false);
 
 	root = print_data(root, buf, isjson, false);
@@ -1564,7 +1565,7 @@ void devstatus_an(struct io_data *io_data, struct cgpu_info *cgpu, bool isjson,
 	io_add(io_data, buf);
 }
 
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 static void gpustatus(struct io_data *io_data, int gpu, bool isjson, bool precom)
 {
         if (gpu < 0 || gpu >= nDevs)
@@ -1583,7 +1584,7 @@ static void pgastatus(struct io_data *io_data, int pga, bool isjson, bool precom
 }
 #endif
 
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 static void cpustatus(struct io_data *io_data, int cpu, bool isjson, bool precom)
 {
         if (opt_n_threads <= 0 || cpu < 0 || cpu >= num_processors)
@@ -1629,7 +1630,7 @@ static void devstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __ma
 	return devinfo_internal(devstatus_an, MSG_DEVS, io_data, c, param, isjson, group);
 }
 
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 static void gpudev(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
 {
 	bool io_open = false;
@@ -1847,7 +1848,7 @@ static void pgaidentify(struct io_data *io_data, __maybe_unused SOCKETTYPE c, ch
 }
 #endif
 
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 static void cpudev(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
 {
 	bool io_open = false;
@@ -1937,6 +1938,7 @@ static void poolstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __m
 		root = api_add_string(root, "Status", status, false);
 		root = api_add_int(root, "Priority", &(pool->prio), false);
 		root = api_add_int(root, "Quota", &pool->quota, false);
+		root = api_add_string(root, "Mining Goal", pool->goal->name, false);
 		root = api_add_string(root, "Long Poll", lp, false);
 		root = api_add_uint(root, "Getworks", &(pool->getwork_requested), false);
 		root = api_add_int(root, "Accepted", &(pool->accepted), false);
@@ -1989,12 +1991,6 @@ static void summary(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __mayb
 	bool io_open;
 	double utility, mhs, work_utility;
 
-#ifdef WANT_CPUMINE
-	char *algo = (char *)(algo_names[opt_algo]);
-	if (algo == NULL)
-		algo = (char *)NULLSTR;
-#endif
-
 	message(io_data, MSG_SUMM, 0, NULL, isjson);
 	io_open = io_add(io_data, isjson ? COMSTR JSON_SUMMARY : _SUMMARY COMSTR);
 
@@ -2006,9 +2002,9 @@ static void summary(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __mayb
 	work_utility = total_diff1 / ( total_secs ? total_secs : 1 ) * 60;
 
 	root = api_add_elapsed(root, "Elapsed", &(total_secs), true);
-#ifdef WANT_CPUMINE
+#if defined(USE_CPUMINING) && defined(USE_SHA256D)
 	if (opt_n_threads > 0)
-	root = api_add_string(root, "Algorithm", algo, false);
+		root = api_add_string(root, "Algorithm", (algo_names[opt_algo] ?: NULLSTR), false);
 #endif
 	root = api_add_mhs(root, "MHS av", &(mhs), false);
 	char mhsname[27];
@@ -2055,7 +2051,7 @@ static void summary(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __mayb
 		io_close(io_data);
 }
 
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 static void gpuenable(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
 {
 	int id;
@@ -2160,7 +2156,7 @@ static void gpucount(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __may
 	bool io_open;
 	int numgpu = 0;
 
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 	numgpu = nDevs;
 #endif
 
@@ -2197,7 +2193,7 @@ static void pgacount(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __may
 		io_close(io_data);
 }
 
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 static void cpuenable(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
 {
 	int id;
@@ -2305,7 +2301,7 @@ static void cpucount(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __may
 	bool io_open;
 	int count = 0;
 
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 	count = opt_n_threads > 0 ? num_processors : 0;
 #endif
 
@@ -2344,7 +2340,7 @@ static void switchpool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, cha
 	}
 
 	pool = pools[id];
-	pool->failover_only = false;
+	manual_enable_pool(pool);
 	cg_runlock(&control_lock);
 	switch_pools(pool);
 
@@ -2368,7 +2364,7 @@ static void copyadvanceafter(char ch, char **param, char **buf)
 	*(dst_b++) = '\0';
 }
 
-static bool pooldetails(char *param, char **url, char **user, char **pass)
+static bool pooldetails(char *param, char **url, char **user, char **pass, char **goalname)
 {
 	char *ptr, *buf;
 
@@ -2396,6 +2392,12 @@ static bool pooldetails(char *param, char **url, char **user, char **pass)
 
 	// copy pass
 	copyadvanceafter(',', &param, &buf);
+	
+	if (*param)
+		*goalname = buf;
+	
+	// copy goalname
+	copyadvanceafter(',', &param, &buf);
 
 	return true;
 
@@ -2406,7 +2408,7 @@ exitsama:
 
 static void addpool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
 {
-	char *url, *user, *pass;
+	char *url, *user, *pass, *goalname = "default";
 	struct pool *pool;
 	char *ptr;
 
@@ -2415,7 +2417,8 @@ static void addpool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *
 		return;
 	}
 
-	if (!pooldetails(param, &url, &user, &pass)) {
+	if (!pooldetails(param, &url, &user, &pass, &goalname))
+	{
 		ptr = escape_string(param, isjson);
 		message(io_data, MSG_INVPDP, 0, ptr, isjson);
 		if (ptr != param)
@@ -2424,7 +2427,8 @@ static void addpool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *
 		return;
 	}
 
-	pool = add_pool();
+	struct mining_goal_info * const goal = get_mining_goal(goalname);
+	pool = add_pool2(goal);
 	detect_stratum(pool, url);
 	add_pool_details(pool, true, url, user, pass);
 
@@ -2462,8 +2466,7 @@ static void enablepool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, cha
 		return;
 	}
 
-	pool->failover_only = false;
-	enable_pool(pool);
+	manual_enable_pool(pool);
 
 	message(io_data, MSG_ENAPOOL, id, NULL, isjson);
 }
@@ -2621,7 +2624,7 @@ static void removepool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, cha
 	rpc_url = NULL;
 }
 
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 static bool splitgpuvalue(struct io_data *io_data, char *param, int *gpu, char **value, bool isjson)
 {
 	int id;
@@ -2677,7 +2680,11 @@ static void gpuintensity(struct io_data *io_data, __maybe_unused SOCKETTYPE c, c
 		if (data->dynamic)
 			strcpy(intensitystr, DYNAMIC);
 		else
-			snprintf(intensitystr, sizeof(intensitystr), "%g", oclthreads_to_intensity(data->oclthreads, !opt_scrypt));
+		{
+			const char *iunit;
+			float intensity = opencl_proc_get_intensity(cgpu, &iunit);
+			snprintf(intensitystr, sizeof(intensitystr), "%s%g", iunit, intensity);
+		}
 	}
 	else
 	{
@@ -3066,36 +3073,57 @@ static void minecoin(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __may
 {
 	struct api_data *root = NULL;
 	char buf[TMPBUFSIZ];
-	bool io_open;
 
 	message(io_data, MSG_MINECOIN, 0, NULL, isjson);
-	io_open = io_add(io_data, isjson ? COMSTR JSON_MINECOIN : _MINECOIN COMSTR);
 
+	struct mining_goal_info *goal, *tmpgoal;
+	bool precom = false;
+	HASH_ITER(hh, mining_goals, goal, tmpgoal)
+	{
+		if (goal->is_default)
+			io_add(io_data, isjson ? COMSTR JSON1 _MINECOIN JSON2 : _MINECOIN COMSTR);
+		else
+		{
+			sprintf(buf, isjson ? COMSTR JSON1 _MINECOIN "%u" JSON2 : _MINECOIN "%u" COMSTR, goal->id);
+			io_add(io_data, buf);
+		}
+		
+		switch (goal->malgo->algo)
+		{
 #ifdef USE_SCRYPT
-	if (opt_scrypt)
-		root = api_add_const(root, "Hash Method", SCRYPTSTR, false);
-	else
+			case POW_SCRYPT:
+				root = api_add_const(root, "Hash Method", SCRYPTSTR, false);
+				break;
+#endif
+#ifdef USE_SHA256D
+			case POW_SHA256D:
+				root = api_add_const(root, "Hash Method", SHA256STR, false);
+				break;
 #endif
-		root = api_add_const(root, "Hash Method", SHA256STR, false);
+			default:
+				root = api_add_const(root, "Hash Method", goal->malgo->name, false);
+				break;
+		}
 
-	cg_rlock(&ch_lock);
-	if (current_fullhash && *current_fullhash) {
-		root = api_add_time(root, "Current Block Time", &block_time, true);
-		root = api_add_string(root, "Current Block Hash", current_fullhash, true);
-	} else {
-		time_t t = 0;
-		root = api_add_time(root, "Current Block Time", &t, true);
-		root = api_add_const(root, "Current Block Hash", BLANK, false);
+		cg_rlock(&ch_lock);
+		struct blockchain_info * const blkchain = goal->blkchain;
+		struct block_info * const blkinfo = blkchain->currentblk;
+		root = api_add_time(root, "Current Block Time", &blkinfo->first_seen_time, true);
+		char fullhash[(sizeof(blkinfo->prevblkhash) * 2) + 1];
+		blkhashstr(fullhash, blkinfo->prevblkhash);
+		root = api_add_string(root, "Current Block Hash", fullhash, true);
+		cg_runlock(&ch_lock);
+
+		root = api_add_bool(root, "LP", &goal->have_longpoll, false);
+		root = api_add_diff(root, "Network Difficulty", &goal->current_diff, true);
+		
+		root = api_add_diff(root, "Difficulty Accepted", &goal->diff_accepted, false);
+		
+		root = print_data(root, buf, isjson, precom);
+		io_add(io_data, buf);
+		if (isjson)
+			io_add(io_data, JSON_CLOSE);
 	}
-	cg_runlock(&ch_lock);
-
-	root = api_add_bool(root, "LP", &have_longpoll, false);
-	root = api_add_diff(root, "Network Difficulty", &current_diff, true);
-
-	root = print_data(root, buf, isjson, false);
-	io_add(io_data, buf);
-	if (isjson && io_open)
-		io_close(io_data);
 }
 
 static void debugstate(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
@@ -3372,7 +3400,7 @@ struct CMDS {
 	{ "procs",		devstatus,	false,	true },
 	{ "pools",		poolstatus,	false,	true },
 	{ "summary",		summary,	false,	true },
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 	{ "gpuenable",		gpuenable,	true,	false },
 	{ "gpudisable",		gpudisable,	true,	false },
 	{ "gpurestart",		gpurestart,	true,	false },
@@ -3389,7 +3417,7 @@ struct CMDS {
 	{ "procdisable",		pgadisable,	true,	false },
 	{ "procidentify",	pgaidentify,	true,	false },
 #endif
-#ifdef WANT_CPUMINE
+#ifdef USE_CPUMINING
 	{ "cpuenable",		cpuenable,	true,	false },
 	{ "cpudisable",		cpudisable,	true,	false },
 	{ "cpurestart",		cpurestart,	true,	false },
@@ -3406,7 +3434,7 @@ struct CMDS {
 	{ "enablepool",		enablepool,	true,	false },
 	{ "disablepool",	disablepool,	true,	false },
 	{ "removepool",		removepool,	true,	false },
-#ifdef HAVE_OPENCL
+#ifdef USE_OPENCL
 	{ "gpuintensity",	gpuintensity,	true,	false },
 	{ "gpumem",		gpumem,		true,	false },
 	{ "gpuengine",		gpuengine,	true,	false },

+ 1 - 1
compat.h

@@ -172,7 +172,7 @@ enum {
 
 static inline int setpriority(__maybe_unused int which, __maybe_unused int who, __maybe_unused int prio)
 {
-	return -!SetPriorityClass(GetCurrentProcess(), IDLE_PRIORITY_CLASS);
+	return -!SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_IDLE);
 }
 
 typedef unsigned long int ulong;

File diff suppressed because it is too large
+ 211 - 565
configure.ac


+ 15 - 0
debian/changelog

@@ -1,3 +1,18 @@
+bfgminer (4.99.1-0precise1) precise; urgency=low
+
+  * Multi-blockchain support: BFGMiner can now be told which pools use the same "mining goals", and will track the blockchain independently for ones that don't. This allows you to mine multiple cryptocurrencies concurrently using any pool strategy (including balance and load-balance).
+  * Multi-algorithm support: BFGMiner is now capable of hashing on both scrypt and SHA256d work at the same time, and you can assign the mining algorithm to use on a per-goal basis. As with multi-blockchain support, this works even in balancing strategies. Note that at this time, only CPU, OpenCL, and Proxy drivers actually support multiple algorithms at the same time (DualMiner must be preconfigured for only one, and GridSeed remains scrypt-only).
+  * Stratum extensions for mining goals: New experimental methods mining.capabilities and mining.set_goal for Stratum allow you to expose control of the mining algorithm to the pool. These extensions are considered draft and may be changed based on the needs of multiblockchain pool operators.
+  * RPC: Also extended for multiple mining goals/algorithms. Interface is subject to change.
+  * kncasic: New driver for KnCMiner Neptune (and 2nd-gen Jupiter modules).
+  * minion: New driver for BlackArrow Prospero X1.5.
+  * titan: Work flushing optimisations from KnCMiner, and reduce processor view from per-core to per-die.
+  * Keccak: Support for the SHA-3 winner hash as a proof-of-work algorithm.
+  * opencl: Workaround broken global work offset support in buggy drivers.
+  * opencl: GPU mining is now disabled by default for scrypt. Use -S opencl:auto to enable it.
+
+ -- Luke Dashjr <luke+bfgminer@dashjr.org>  Sat, 29 Nov 2014 00:56:45 -0000
+
 bfgminer (4.10.0-0precise1) precise; urgency=low
 
   * minergate: Support for Spondoolies SP30.

+ 1 - 1
debian/control

@@ -2,7 +2,7 @@ Source: bfgminer
 Priority: optional
 Section: misc
 Maintainer: Luke Dashjr <luke_bfgminer@dashjr.org>
-Standards-Version: 4.10.0
+Standards-Version: 5.0.0
 Build-Depends: build-essential, debhelper, autoconf, automake, libtool, libssl-dev, yasm, pkg-config, libudev-dev, libcurl4-openssl-dev, wget, unzip, libjansson-dev, libncurses5-dev, libudev-dev, libusb-1.0-0-dev, git, quilt, uthash-dev, libsensors4-dev
 
 Package: bfgminer

+ 1 - 1
debian/rules

@@ -5,7 +5,7 @@
 
 override_dh_auto_configure:
 	NOSUBMODULES=1 ./autogen.sh
-	dh_auto_configure -- --enable-ztex --enable-bitforce --enable-icarus --enable-cpumining --enable-scrypt --enable-opencl
+	dh_auto_configure -- --enable-ztex --enable-bitforce --enable-icarus --enable-cpumining --enable-keccak --enable-scrypt --enable-opencl
 
 override_dh_auto_install:
 	$(MAKE) DESTDIR=$(CURDIR)/debian/bfgminer install

+ 34 - 14
deviceapi.c

@@ -86,6 +86,30 @@ void bfg_devapi_init()
 }
 
 
+float common_sha256d_and_scrypt_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	switch (malgo->algo)
+	{
+#ifdef USE_SCRYPT
+		case POW_SCRYPT:
+			return 1./0x10000;
+#endif
+#ifdef USE_SHA256D
+		case POW_SHA256D:
+			return 1.;
+#endif
+		default:
+			return -1.;
+	}
+}
+
+#ifdef USE_SCRYPT
+float common_scrypt_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return (malgo->algo == POW_SCRYPT) ? (1./0x10000) : -1.;
+}
+#endif
+
 bool hashes_done(struct thr_info *thr, int64_t hashes, struct timeval *tvp_hashes, uint32_t *max_nonce)
 {
 	struct cgpu_info *cgpu = thr->cgpu;
@@ -117,22 +141,18 @@ bool hashes_done(struct thr_info *thr, int64_t hashes, struct timeval *tvp_hashe
 	timeradd(&thr->tv_hashes_done, tvp_hashes, &thr->tv_hashes_done);
 	
 	// max_nonce management (optional)
-	if (unlikely((long)thr->tv_hashes_done.tv_sec < cycle)) {
-		int mult;
+	if (max_nonce)
+	{
+		uint64_t new_max_nonce = *max_nonce;
+		new_max_nonce *= cycle;
+		new_max_nonce *= 1000000;
+		new_max_nonce /= ((uint64_t)thr->tv_hashes_done.tv_sec * 1000000) + thr->tv_hashes_done.tv_usec;
 		
-		if (likely(!max_nonce || *max_nonce == 0xffffffff))
-			return true;
+		if (new_max_nonce > 0xffffffff)
+			new_max_nonce = 0xffffffff;
 		
-		mult = 1000000 / ((thr->tv_hashes_done.tv_usec + 0x400) / 0x400) + 0x10;
-		mult *= cycle;
-		if (*max_nonce > (0xffffffff * 0x400) / mult)
-			*max_nonce = 0xffffffff;
-		else
-			*max_nonce = (*max_nonce * mult) / 0x400;
-	} else if (unlikely(thr->tv_hashes_done.tv_sec > cycle) && max_nonce)
-		*max_nonce = *max_nonce * cycle / thr->tv_hashes_done.tv_sec;
-	else if (unlikely(thr->tv_hashes_done.tv_usec > 100000) && max_nonce)
-		*max_nonce = *max_nonce * 0x400 / (((cycle * 1000000) + thr->tv_hashes_done.tv_usec) / (cycle * 1000000 / 0x400));
+		*max_nonce = new_max_nonce;
+	}
 	
 	hashmeter2(thr);
 	

+ 3 - 0
deviceapi.h

@@ -42,6 +42,9 @@ extern void _bfg_register_driver(const struct device_drv *);
 
 extern bool bfg_need_detect_rescan;
 
+extern float common_sha256d_and_scrypt_min_nonce_diff(struct cgpu_info *, const struct mining_algorithm *);
+extern float common_scrypt_min_nonce_diff(struct cgpu_info *, const struct mining_algorithm *);
+
 extern void request_work(struct thr_info *);
 extern struct work *get_work(struct thr_info *);
 extern bool hashes_done(struct thr_info *, int64_t hashes, struct timeval *tvp_hashes, uint32_t *max_nonce);

+ 2 - 2
driver-bitfury.c

@@ -173,7 +173,7 @@ tryagain:
 	bitfury->oldjob = inp[0x10];
 	bitfury->desync_counter = 0;
 	
-	if (opt_debug)
+	if (opt_dev_protocol)
 		bitfury_debug_nonce_array(proc, "Init", inp);
 	
 	return true;
@@ -459,7 +459,7 @@ void bitfury_do_io(struct thr_info * const master_thr)
 			goto out;
 		}
 		
-		if (opt_debug)
+		if (opt_dev_protocol)
 			bitfury_debug_nonce_array(proc, "Read", inp);
 		
 		// To avoid dealing with wrap-around entirely, we rotate array so previous active uint32_t is at index 0

+ 0 - 1
driver-cointerra.c

@@ -316,7 +316,6 @@ bool cointerra_lowl_probe(const struct lowlevel_device_info * const info)
 		.dev_product = maybe_strdup(info->product),
 		.dev_serial = maybe_strdup(info->serial),
 		.deven = DEV_ENABLED,
-		.min_nonce_diff = CTA_INIT_DIFF,
 	};
 	const bool rv = add_cgpu(dev);
 	applog(LOG_INFO, "%s: Successfully set up %s",

+ 103 - 111
driver-cpu.c

@@ -48,6 +48,7 @@ BFG_REGISTER_DRIVER(cpu_drv)
 static inline void drop_policy(void)
 {
 	struct sched_param param;
+	param.sched_priority = 0;
 
 #ifdef SCHED_BATCH
 #ifdef SCHED_IDLE
@@ -63,7 +64,7 @@ static inline void affine_to_cpu(int id, int cpu)
 
 	CPU_ZERO(&set);
 	CPU_SET(cpu, &set);
-	sched_setaffinity(0, sizeof(&set), &set);
+	sched_setaffinity(0, sizeof(set), &set);
 	applog(LOG_INFO, "Binding cpu mining thread %d to cpu %d", id, cpu);
 }
 #else
@@ -84,64 +85,27 @@ extern int dev_from_id(int thr_id);
 
 
 /* chipset-optimized hash functions */
-extern bool ScanHash_4WaySSE2(struct thr_info*, const unsigned char *pmidstate,
-	unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
-
-extern bool ScanHash_altivec_4way(struct thr_info*, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
-
-extern bool scanhash_via(struct thr_info*, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *target,
-	uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
-
-extern bool scanhash_c(struct thr_info*, const unsigned char *midstate, unsigned char *data,
-	      unsigned char *hash1, unsigned char *hash,
-	      const unsigned char *target,
-	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
-
-extern bool scanhash_cryptopp(struct thr_info*, const unsigned char *midstate,unsigned char *data,
-	      unsigned char *hash1, unsigned char *hash,
-	      const unsigned char *target,
-	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
-
-extern bool scanhash_asm32(struct thr_info*, const unsigned char *midstate,unsigned char *data,
-	      unsigned char *hash1, unsigned char *hash,
-	      const unsigned char *target,
-	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
-
-extern bool scanhash_sse2_64(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce);
-
-extern bool scanhash_sse4_64(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce);
-
-extern bool scanhash_sse2_32(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce);
-
-extern bool scanhash_scrypt(struct thr_info *, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char __maybe_unused *phash, const unsigned char *ptarget, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
-
-
-
-#ifdef WANT_CPUMINE
+typedef bool (*sha256_func)(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+
+extern bool ScanHash_4WaySSE2(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool ScanHash_altivec_4way(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool scanhash_via(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool scanhash_c(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool scanhash_cryptopp(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool scanhash_asm32(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool scanhash_sse2_64(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool scanhash_sse4_64(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool scanhash_sse2_32(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+extern bool scanhash_scrypt(struct thr_info *, struct work *, uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
+
+
+#ifdef USE_SHA256D
 static size_t max_name_len = 0;
 static char *name_spaces_pad = NULL;
+#endif
+
 const char *algo_names[] = {
+#ifdef USE_SHA256D
 	[ALGO_C]		= "c",
 #ifdef WANT_SSE2_4WAY
 	[ALGO_4WAY]		= "4way",
@@ -165,13 +129,17 @@ const char *algo_names[] = {
 #ifdef WANT_ALTIVEC_4WAY
     [ALGO_ALTIVEC_4WAY] = "altivec_4way",
 #endif
+#endif
 #ifdef WANT_SCRYPT
     [ALGO_SCRYPT] = "scrypt",
 #endif
+#ifdef USE_SHA256D
 	[ALGO_FASTAUTO] = "fastauto",
 	[ALGO_AUTO] = "auto",
+#endif
 };
 
+#ifdef USE_SHA256D
 static const sha256_func sha256_funcs[] = {
 	[ALGO_C]		= (sha256_func)scanhash_c,
 #ifdef WANT_SSE2_4WAY
@@ -196,20 +164,17 @@ static const sha256_func sha256_funcs[] = {
 #ifdef WANT_X8664_SSE4
 	[ALGO_SSE4_64]		= (sha256_func)scanhash_sse4_64,
 #endif
-#ifdef WANT_SCRYPT
-	[ALGO_SCRYPT]		= (sha256_func)scanhash_scrypt
-#endif
 };
 #endif
 
-
-
-#ifdef WANT_CPUMINE
+#ifdef USE_SHA256D
 enum sha256_algos opt_algo = ALGO_FASTAUTO;
-static bool forced_n_threads;
 #endif
 
-static const uint32_t hash1_init[] = {
+static bool forced_n_threads;
+
+#ifdef USE_SHA256D
+const uint32_t hash1_init[] = {
 	0,0,0,0,0,0,0,0,
 	0x80000000,
 	  0,0,0,0,0,0,
@@ -217,16 +182,12 @@ static const uint32_t hash1_init[] = {
 };
 
 
-
-
-#ifdef WANT_CPUMINE
 // Algo benchmark, crash-prone, system independent stage
 double bench_algo_stage3(
 	enum sha256_algos algo
 )
 {
 	struct work work __attribute__((aligned(128)));
-	unsigned char hash1[64];
 
 	get_benchmark_work(&work, false);
 
@@ -237,18 +198,12 @@ double bench_algo_stage3(
 	uint32_t max_nonce = opt_algo == ALGO_FASTAUTO ? (1<<8) : (1<<22);
 	uint32_t last_nonce = 0;
 
-	memcpy(&hash1[0], &hash1_init[0], sizeof(hash1));
-
 	timer_set_now(&start);
 			{
 				sha256_func func = sha256_funcs[algo];
 				(*func)(
 					&dummy,
-					work.midstate,
-					work.data,
-					hash1,
-					work.hash,
-					work.target,
+					&work,
 					max_nonce,
 					&last_nonce,
 					0
@@ -665,14 +620,10 @@ static enum sha256_algos pick_fastest_algo()
 	return best_algo;
 }
 
-/* FIXME: Use asprintf for better errors. */
 char *set_algo(const char *arg, enum sha256_algos *algo)
 {
 	enum sha256_algos i;
 
-	if (opt_scrypt)
-		return "Can only use scrypt algorithm";
-
 	for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
 		if (algo_names[i] && !strcmp(arg, algo_names[i])) {
 			*algo = i;
@@ -682,28 +633,18 @@ char *set_algo(const char *arg, enum sha256_algos *algo)
 	return "Unknown algorithm";
 }
 
-#ifdef WANT_SCRYPT
-void set_scrypt_algo(enum sha256_algos *algo)
-{
-	*algo = ALGO_SCRYPT;
-}
-#endif
-
 void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo)
 {
 	strncpy(buf, algo_names[*algo], OPT_SHOW_LEN);
 }
-#endif
+#endif  /* USE_SHA256D */
 
-#ifdef WANT_CPUMINE
 char *force_nthreads_int(const char *arg, int *i)
 {
 	forced_n_threads = true;
 	return set_int_range(arg, i, 0, 9999);
 }
-#endif
 
-#ifdef WANT_CPUMINE
 static int cpu_autodetect()
 {
 	RUNONCE(0);
@@ -731,12 +672,12 @@ static int cpu_autodetect()
 					++num_processors;
 		}
 	}
-	#elif defined(_SC_NPROCESSORS_ONLN)
-		num_processors = sysconf(_SC_NPROCESSORS_ONLN);
-	#elif defined(HW_NCPU)
+	#elif defined(_SC_NPROCESSORS_CONF)
+		num_processors = sysconf(_SC_NPROCESSORS_CONF);
+	#elif defined(CTL_HW) && defined(HW_NCPU)
 		int req[] = { CTL_HW, HW_NCPU };
 		size_t len = sizeof(num_processors);
-		v = sysctl(req, 2, &num_processors, &len, NULL, 0);
+		sysctl(req, 2, &num_processors, &len, NULL, 0);
 	#else
 		num_processors = 1;
 	#endif /* !WIN32 */
@@ -757,7 +698,9 @@ static int cpu_autodetect()
 		cgpu->drv = &cpu_drv;
 		cgpu->deven = DEV_ENABLED;
 		cgpu->threads = 1;
+#ifdef USE_SHA256D
 		cgpu->kname = algo_names[opt_algo];
+#endif
 		add_cgpu(cgpu);
 	}
 	return opt_n_threads;
@@ -790,6 +733,7 @@ static uint64_t cpu_can_limit_work(struct thr_info __maybe_unused *thr)
 static bool cpu_thread_init(struct thr_info *thr)
 {
 	const int thr_id = thr->id;
+#ifdef USE_SHA256D
 	struct cgpu_info *cgpu = thr->cgpu;
 
 	mutex_lock(&cpualgo_lock);
@@ -804,9 +748,7 @@ static bool cpu_thread_init(struct thr_info *thr)
 	mutex_unlock(&cpualgo_lock);
 
 	cgpu->kname = algo_names[opt_algo];
-	
-	if (opt_algo == ALGO_SCRYPT)
-		cgpu->min_nonce_diff = 1./0x10000;
+#endif
 	
 	/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
 	 * and if that fails, then SCHED_BATCH. No need for this to be an
@@ -815,33 +757,87 @@ static bool cpu_thread_init(struct thr_info *thr)
 	drop_policy();
 	/* Cpu affinity only makes sense if the number of threads is a multiple
 	 * of the number of CPUs */
-	if (!(opt_n_threads % num_processors))
+	if (num_processors > 1 && opt_n_threads % num_processors == 0)
 		affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors);
 	return true;
 }
 
+static
+float cpu_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return minimum_pdiff;
+}
+
+static
+bool scanhash_generic(struct thr_info * const thr, struct work * const work, const uint32_t max_nonce, uint32_t * const last_nonce, uint32_t n)
+{
+	struct mining_algorithm * const malgo = work_mining_algorithm(work);
+	void (* const hash_data_f)(void *, const void *) = malgo->hash_data_f;
+	uint8_t * const hash = work->hash;
+	uint8_t *data = work->data;
+	const uint8_t * const target = work->target;
+	uint32_t * const out_nonce = (uint32_t *)&data[0x4c];
+	bool ret = false;
+	
+	const uint32_t hash7_targ = le32toh(((const uint32_t *)target)[7]);
+	uint32_t * const hash7_tmp = &((uint32_t *)hash)[7];
+	
+	while (true)
+	{
+		*out_nonce = n;
+		
+		hash_data_f(hash, data);
+		
+		if (unlikely(le32toh(*hash7_tmp) <= hash7_targ))
+		{
+			ret = true;
+			break;
+		}
+
+		if ((n >= max_nonce) || thr->work_restart)
+			break;
+
+		n++;
+	}
+	
+	*last_nonce = n;
+	return ret;
+}
+
 static int64_t cpu_scanhash(struct thr_info *thr, struct work *work, int64_t max_nonce)
 {
-	unsigned char hash1[64];
 	uint32_t first_nonce = work->blk.nonce;
 	uint32_t last_nonce;
 	bool rc;
 
-	memcpy(&hash1[0], &hash1_init[0], sizeof(hash1));
 CPUSearch:
 	last_nonce = first_nonce;
 	rc = false;
 
 	/* scan nonces for a proof-of-work hash */
 	{
-		sha256_func func = sha256_funcs[opt_algo];
+		sha256_func func = scanhash_generic;
+		switch (work_mining_algorithm(work)->algo)
+		{
+#ifdef USE_SCRYPT
+			case POW_SCRYPT:
+				func = scanhash_scrypt;
+				break;
+#endif
+#ifdef USE_SHA256D
+			case POW_SHA256D:
+				if (work->nonce_diff >= 1.)
+					func = sha256_funcs[opt_algo];
+				break;
+#endif
+			default:
+				break;
+		}
+		if (unlikely(!func))
+			applogr(0, LOG_ERR, "%"PRIpreprv": Unknown mining algorithm", thr->cgpu->proc_repr);
 		rc = (*func)(
 			thr,
-			work->midstate,
-			work->data,
-			hash1,
-			work->hash,
-			work->target,
+			work,
 			max_nonce,
 			&last_nonce,
 			work->blk.nonce
@@ -867,14 +863,10 @@ struct device_drv cpu_drv = {
 	.dname = "cpu",
 	.name = "CPU",
 	.probe_priority = 120,
-	.supported_algos = POW_SHA256D | POW_SCRYPT,
+	.drv_min_nonce_diff = cpu_min_nonce_diff,
 	.drv_detect = cpu_detect,
 	.thread_prepare = cpu_thread_prepare,
 	.can_limit_work = cpu_can_limit_work,
 	.thread_init = cpu_thread_init,
 	.scanhash = cpu_scanhash,
 };
-#endif
-
-
-

+ 15 - 1
driver-cpu.h

@@ -22,6 +22,8 @@
 #define OPT_SHOW_LEN 80
 #endif
 
+#ifdef USE_SHA256D
+
 #if defined(__i386__) && defined(HAVE_SSE2)
 #define WANT_SSE2_4WAY 1
 #endif
@@ -46,11 +48,14 @@
 #define WANT_X8664_SSE4 1
 #endif
 
+#endif  /* USE_SHA256D */
+
 #ifdef USE_SCRYPT
 #define WANT_SCRYPT
 #endif
 
 enum sha256_algos {
+#ifdef USE_SHA256D
 	ALGO_C,			/* plain C */
 	ALGO_4WAY,		/* parallel SSE2 */
 	ALGO_VIA,		/* VIA padlock */
@@ -60,15 +65,24 @@ enum sha256_algos {
 	ALGO_SSE2_64,		/* SSE2 for x86_64 */
 	ALGO_SSE4_64,		/* SSE4 for x86_64 */
 	ALGO_ALTIVEC_4WAY,	/* parallel Altivec */
+#endif
+#ifdef USE_SCRYPT
 	ALGO_SCRYPT,		/* scrypt */
+#endif
 	
+#ifdef USE_SHA256D
 	ALGO_FASTAUTO,		/* fast autodetect */
-	ALGO_AUTO		/* autodetect */
+	ALGO_AUTO,		/* autodetect */
+#endif
+	
+	CUSTOM_CPU_MINING_ALGOS_COUNT,
 };
 
 extern const char *algo_names[];
 extern struct device_drv cpu_drv;
 
+extern const uint32_t hash1_init[];
+
 extern char *set_algo(const char *arg, enum sha256_algos *algo);
 extern void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo);
 extern char *force_nthreads_int(const char *arg, int *i);

+ 64 - 34
driver-dualminer.c

@@ -33,11 +33,6 @@
   #include <io.h>
 #endif
 
-// mining both Scrypt & SHA2 at the same time with two processes
-// SHA2 process must be run first, no arg requirements, first serial port will be used
-// Scrypt process must be launched after, --scrypt and --dual-mode args required
-bool opt_dual_mode = false;
-
 #define DUALMINER_IO_SPEED 115200
 
 #define DUALMINER_SCRYPT_SM_HASH_TIME   0.00001428571429
@@ -85,6 +80,16 @@ const struct bfg_set_device_definition dualminer_set_device_funcs[];
 
 // device helper functions
 
+static inline
+bool dualminer_is_scrypt(struct ICARUS_INFO * const info)
+{
+#ifdef USE_SCRYPT
+	return info->scrypt;
+#else
+	return false;
+#endif
+}
+
 static
 void dualminer_teardown_device(int fd)
 {
@@ -102,37 +107,27 @@ void dualminer_init_hashrate(struct cgpu_info * const cgpu)
 
 	// get clear to send (CTS) status
 	if ((gc3355_get_cts_status(fd) != 1) &&  // 0.9v - dip-switch set to B
-		(opt_scrypt))
+		(dualminer_is_scrypt(info)))
 		// adjust hash-rate for voltage
 		info->Hs = DUALMINER_SCRYPT_DM_HASH_TIME;
 }
 
-static
-bool dualminer_init(struct thr_info * const thr)
-{
-	struct cgpu_info * const cgpu = thr->cgpu;
-	
-	if (opt_scrypt)
-		cgpu->min_nonce_diff = 1./0x10000;
-	
-	return icarus_init(thr);
-}
-
 // runs when job starts and the device has been reset (or first run)
 static
 void dualminer_init_firstrun(struct cgpu_info *icarus)
 {
+	struct ICARUS_INFO * const info = icarus->device_data;
 	int fd = icarus->device_fd;
 
-	gc3355_init_dualminer(fd, opt_pll_freq, !opt_dual_mode, false);
+	gc3355_init_dualminer(fd, opt_pll_freq, !info->dual_mode, false, dualminer_is_scrypt(info));
 	
 	dualminer_init_hashrate(icarus);
 
 	applog(LOG_DEBUG, "%"PRIpreprv": dualminer: Init: pll=%d, scrypt: %d, scrypt only: %d",
 		   icarus->proc_repr,
 		   opt_pll_freq,
-		   opt_scrypt,
-		   opt_scrypt && !opt_dual_mode);
+		   dualminer_is_scrypt(info),
+		   dualminer_is_scrypt(info) && !info->dual_mode);
 }
 
 // set defaults for options that the user didn't specify
@@ -160,15 +155,33 @@ void dualminer_set_defaults(int fd)
 	}
 }
 
+float dualminer_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	struct ICARUS_INFO * const info = proc ? proc->device_data : NULL;
+	switch (malgo->algo)
+	{
+#ifdef USE_SCRYPT
+		case POW_SCRYPT:
+			return ((!info) || dualminer_is_scrypt(info)) ? (1./0x10000) : -1.;
+#endif
+#ifdef USE_SHA256D
+		case POW_SHA256D:
+			return (info && dualminer_is_scrypt(info)) ? -1. : 1.;
+#endif
+		default:
+			return -1.;
+	}
+}
+
 // ICARUS_INFO functions - icarus-common.h
 
 // runs after fd is opened but before the device detection code
 static
-bool dualminer_detect_init(const char *devpath, int fd, struct ICARUS_INFO * __maybe_unused info)
+bool dualminer_detect_init(const char *devpath, int fd, struct ICARUS_INFO * const info)
 {
 	dualminer_set_defaults(fd);
 	
-	gc3355_init_dualminer(fd, opt_pll_freq, !opt_dual_mode, true);
+	gc3355_init_dualminer(fd, opt_pll_freq, !info->dual_mode, true, dualminer_is_scrypt(info));
 
 	return true;
 }
@@ -178,6 +191,7 @@ static
 bool dualminer_job_start(struct thr_info * const thr)
 {
 	struct cgpu_info *icarus = thr->cgpu;
+	struct ICARUS_INFO * const info = icarus->device_data;
 	struct icarus_state * const state = thr->cgpu_data;
 	int fd = icarus->device_fd;
 
@@ -185,9 +199,9 @@ bool dualminer_job_start(struct thr_info * const thr)
 		// runs when job starts and the device has been reset (or first run)
 		dualminer_init_firstrun(icarus);
 
-	if (opt_scrypt)
+	if (dualminer_is_scrypt(info))
 	{
-		if (opt_dual_mode)
+		if (info->dual_mode)
 			gc3355_scrypt_reset(fd);
 		else
 			gc3355_scrypt_only_reset(fd);
@@ -221,10 +235,15 @@ bool dualminer_detect_one(const char *devpath)
 		.nonce_littleendian = true,
 		.work_division = 1,
 		.detect_init_func = dualminer_detect_init,
-		.job_start_func = dualminer_job_start
+		.job_start_func = dualminer_job_start,
+#ifdef USE_SCRYPT
+		.scrypt = (get_mining_goal("default")->malgo->algo == POW_SCRYPT),
+#endif
 	};
 
-	if (opt_scrypt)
+	drv_set_defaults(drv, dualminer_set_device_funcs, info, devpath, detectone_meta_info.serial, 1);
+
+	if (dualminer_is_scrypt(info))
 	{
 		info->golden_ob = (char*)scrypt_golden_ob;
 		info->golden_nonce = (char*)scrypt_golden_nonce;
@@ -237,15 +256,13 @@ bool dualminer_detect_one(const char *devpath)
 		info->Hs = DUALMINER_SHA2_DM_HASH_TIME;
 	}
 
-	drv_set_defaults(drv, dualminer_set_device_funcs, info, devpath, detectone_meta_info.serial, 1);
-
 	if (!icarus_detect_custom(devpath, drv, info))
 	{
 		free(info);
 		return false;
 	}
 
-	if (opt_scrypt)
+	if (dualminer_is_scrypt(info))
 		info->read_count = DUALMINER_SCRYPT_READ_COUNT; // 4.8s to read
 	else
 		info->read_count = DUALMINER_SHA2_READ_COUNT; // 1.6s to read
@@ -259,14 +276,28 @@ bool dualminer_detect_one(const char *devpath)
 static
 const char *dualminer_set_dual_mode(struct cgpu_info * const proc, const char * const option, const char * const setting, char * const replybuf, enum bfg_set_device_replytype * const success)
 {
+	struct ICARUS_INFO * const info = proc->device_data;
 	int val = atoi(setting);
-	opt_dual_mode = val == 1;
+	info->dual_mode = val == 1;
+	return NULL;
+}
+
+#ifdef USE_SCRYPT
+static
+const char *dualminer_set_scrypt(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const out_success)
+{
+	struct ICARUS_INFO * const info = proc->device_data;
+	info->scrypt = atoi(newvalue);
 	return NULL;
 }
+#endif
 
 static
 const struct bfg_set_device_definition dualminer_set_device_funcs[] = {
-	{"dual_mode", dualminer_set_dual_mode, "set to 1 to enable dual algorithm mining with two BFGMiner processes"},
+	{"dual_mode", dualminer_set_dual_mode, "set to 1 to enable dual algorithm mining"},
+#ifdef USE_SCRYPT
+	{"scrypt", dualminer_set_scrypt, "set to 1 to put in scrypt mode"},
+#endif
 	{NULL},
 };
 
@@ -298,7 +329,7 @@ bool dualminer_job_prepare(struct thr_info *thr, struct work *work, __maybe_unus
 
 	memset(state->ob_bin, 0, info->ob_size);
 
-	if (opt_scrypt)
+	if (dualminer_is_scrypt(info))
 		gc3355_scrypt_prepare_work(state->ob_bin, work);
 	else
 		gc3355_sha2_prepare_work(state->ob_bin, work);
@@ -329,9 +360,8 @@ void dualminer_drv_init()
 	dualminer_drv = icarus_drv;
 	dualminer_drv.dname = "dualminer";
 	dualminer_drv.name = "DMU";
-	dualminer_drv.supported_algos = POW_SCRYPT | POW_SHA256D;
+	dualminer_drv.drv_min_nonce_diff = dualminer_min_nonce_diff;
 	dualminer_drv.lowl_probe = dualminer_lowl_probe;
-	dualminer_drv.thread_init = dualminer_init;
 	dualminer_drv.thread_shutdown = dualminer_thread_shutdown;
 	dualminer_drv.job_prepare = dualminer_job_prepare;
 	dualminer_drv.set_device = dualminer_set_device;

+ 5 - 2
driver-getwork.c

@@ -185,7 +185,8 @@ int handle_getwork(struct MHD_Connection *conn, bytes_t *upbuf)
 		size_t replysz = 590 + idstr_sz;
 		
 		work = get_work(thr);
-		work->nonce_diff = client->desired_share_pdiff;
+		const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+		work->nonce_diff = client->desired_share_pdiff ?: malgo->reasonable_low_nonce_diff;
 		if (work->nonce_diff > work->work_difficulty)
 			work->nonce_diff = work->work_difficulty;
 		
@@ -201,13 +202,15 @@ int handle_getwork(struct MHD_Connection *conn, bytes_t *upbuf)
 		memcpy(&reply[442], "\",\"hash1\":\"00000000000000000000000000000000000000000000000000000000000000000000008000000000000000000000000000000000000000000000000000010000\"},\"id\":", 147);
 		memcpy(&reply[589], idstr ?: "0", idstr_sz);
 		memcpy(&reply[589 + idstr_sz], "}", 1);
-		if (opt_scrypt)
+#ifdef USE_SCRYPT
+		if (malgo->algo == POW_SCRYPT)
 		{
 			replysz += 21;
 			reply = realloc(reply, replysz);
 			memmove(&reply[443 + 21], &reply[443], replysz - (443 + 21));
 			memcpy(&reply[443], ",\"algorithm\":\"scrypt\"", 21);
 		}
+#endif
 		
 		timer_set_now(&work->tv_work_start);
 		HASH_ADD_KEYPTR(hh, client->work, work->data, 76, work);

+ 1 - 16
driver-gridseed.c

@@ -215,24 +215,12 @@ bool gridseed_lowl_probe(const struct lowlevel_device_info * const info)
  * setup & shutdown
  */
 
-static
-bool gridseed_thread_prepare(struct thr_info *thr)
-{
-	thr->cgpu_data = calloc(1, sizeof(*thr->cgpu_data));
-	
-	struct cgpu_info *device = thr->cgpu;
-	device->min_nonce_diff = 1./0x10000;
-
-	return true;
-}
-
 static
 void gridseed_thread_shutdown(struct thr_info *thr)
 {
 	struct cgpu_info *device = thr->cgpu;
 
 	gc3355_close(device->device_fd);
-	free(thr->cgpu_data);
 }
 
 /*
@@ -446,14 +434,11 @@ struct device_drv gridseed_drv =
 	// metadata
 	.dname = "gridseed",
 	.name = "GSD",
-	.supported_algos = POW_SCRYPT,
+	.drv_min_nonce_diff = common_scrypt_min_nonce_diff,
 	
 	// detect device
 	.lowl_probe = gridseed_lowl_probe,
 	
-	// initialize device
-	.thread_prepare = gridseed_thread_prepare,
-	
 	// specify mining type - scanhash
 	.minerloop = minerloop_scanhash,
 	

+ 7 - 0
driver-icarus.h

@@ -134,6 +134,13 @@ struct ICARUS_INFO {
 	bool (*detect_init_func)(const char *devpath, int fd, struct ICARUS_INFO *);
 	bool (*job_start_func)(struct thr_info *);
 	
+#ifdef USE_DUALMINER
+#ifdef USE_SCRYPT
+	bool scrypt;
+#endif
+	bool dual_mode;
+#endif
+	
 #ifdef USE_ZEUSMINER
 	// Hardware information, doesn't affect anything directly
 	uint16_t freq;

+ 893 - 0
driver-kncasic.c

@@ -0,0 +1,893 @@
+/*
+ * Copyright 2014 KnCminer
+ * Copyright 2014 Luke Dashjr
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.  See COPYING for more details.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <linux/types.h>
+#include <linux/spi/spidev.h>
+
+#include <zlib.h>
+
+#include "deviceapi.h"
+#include "logging.h"
+#include "miner.h"
+#include "knc-asic/knc-transport.h"
+#include "knc-asic/knc-asic.h"
+
+#define WORKS_PER_CORE          3
+
+#define CORE_ERROR_LIMIT	30
+#define CORE_ERROR_INTERVAL	30
+#define CORE_ERROR_DISABLE_TIME	5*60
+#define CORE_SUBMIT_MIN_TIME	2
+#define CORE_TIMEOUT		20
+#define SCAN_ADJUST_RANGE	32
+
+BFG_REGISTER_DRIVER(kncasic_drv)
+
+static struct timeval now;
+static const struct timeval core_check_interval = {
+	CORE_ERROR_INTERVAL, 0
+};
+static const struct timeval core_disable_interval = {
+	CORE_ERROR_DISABLE_TIME, 0
+};
+static const struct timeval core_submit_interval = {
+	CORE_SUBMIT_MIN_TIME, 0
+};
+static const struct timeval core_timeout_interval = {
+	CORE_TIMEOUT, 0
+};
+
+struct knc_die;
+
+struct knc_core_state {
+	int generation;
+	int core;
+	struct knc_die *die;
+	struct {
+		int slot;
+		struct work *work;
+	} workslot[WORKS_PER_CORE]; 	/* active, next */
+	int transfer_stamp;
+	struct knc_report report;
+	struct {
+		int slot;
+		uint32_t nonce;
+	} last_nonce;
+	uint32_t works;
+	uint32_t shares;
+	uint32_t errors;
+	uint32_t completed;
+	int last_slot;
+	uint32_t errors_now;
+	struct timeval disabled_until;
+	struct timeval hold_work_until;
+	struct timeval timeout;
+	bool inuse;
+	
+	struct cgpu_info *proc;
+};
+
+struct knc_state;
+
+struct knc_die {
+	int channel;
+	int die;
+	int version;
+	int cores;
+	struct knc_state *knc;
+	struct knc_core_state *core;
+};
+
+#define MAX_SPI_SIZE		(4096)
+#define MAX_SPI_RESPONSES	(MAX_SPI_SIZE / (2 + 4 + 1 + 1 + 1 + 4))
+#define MAX_SPI_MESSAGE		(128)
+#define KNC_SPI_BUFFERS		(3)
+
+struct knc_state {
+	void *ctx;
+	int generation;    /* work/block generation, incremented on each flush invalidating older works */
+	int dies;
+	struct knc_die die[KNC_MAX_ASICS * KNC_MAX_DIES_PER_ASIC];
+	int cores;
+	int scan_adjust;
+	int startup;
+	/* Statistics */
+	uint64_t shares;		/* diff1 shares reported by hardware */
+	uint64_t works;			/* Work units submitted */
+	uint64_t completed;		/* Work units completed */
+	uint64_t errors;		/* Hardware & communication errors */
+	struct timeval next_error_interval;
+	/* End of statistics */
+	/* SPI communications thread */
+	pthread_mutex_t spi_qlock;	/* SPI queue status lock */
+	struct thr_info spi_thr;	/* SPI I/O thread */
+	pthread_cond_t spi_qcond;	/* SPI queue change wakeup */
+	struct knc_spi_buffer {
+		enum {
+			KNC_SPI_IDLE=0,
+			KNC_SPI_PENDING,
+			KNC_SPI_DONE
+		} state;
+		int size;
+		uint8_t txbuf[MAX_SPI_SIZE];
+		uint8_t rxbuf[MAX_SPI_SIZE];
+		int responses;
+		struct knc_spi_response {
+			int request_length;
+			int response_length;
+			enum {
+				KNC_UNKNOWN = 0,
+				KNC_NO_RESPONSE,
+				KNC_SETWORK,
+				KNC_REPORT,
+				KNC_INFO
+			} type;
+			struct knc_core_state *core;
+			uint32_t data;
+			int offset;
+		} response_info[MAX_SPI_RESPONSES];
+	} spi_buffer[KNC_SPI_BUFFERS];
+	int send_buffer;
+	int read_buffer;
+	int send_buffer_count;
+	int read_buffer_count;
+	/* end SPI thread */
+
+	/* lock to protect resources between different threads */
+	pthread_mutex_t state_lock;
+
+	/* Do not add anything below here!! core[] must be last */
+	struct knc_core_state core[];
+};
+
+int opt_knc_device_bus = -1;
+char *knc_log_file = NULL;
+
+static void *knc_spi(void *thr_data)
+{
+	struct cgpu_info *cgpu = thr_data;
+	struct knc_state *knc = cgpu->device_data;
+	int buffer = 0;
+	
+	pthread_mutex_lock(&knc->spi_qlock);
+	while (!cgpu->shutdown) {
+		int this_buffer = buffer;
+		while (knc->spi_buffer[buffer].state != KNC_SPI_PENDING && !cgpu->shutdown)
+			pthread_cond_wait(&knc->spi_qcond, &knc->spi_qlock);
+		pthread_mutex_unlock(&knc->spi_qlock);
+		if (cgpu->shutdown)
+			return NULL;
+
+		knc_trnsp_transfer(knc->ctx, knc->spi_buffer[buffer].txbuf, knc->spi_buffer[buffer].rxbuf, knc->spi_buffer[buffer].size);
+
+		buffer += 1;
+		if (buffer >= KNC_SPI_BUFFERS)
+			buffer = 0;
+
+		pthread_mutex_lock(&knc->spi_qlock);
+		knc->spi_buffer[this_buffer].state = KNC_SPI_DONE;
+		pthread_cond_signal(&knc->spi_qcond);
+	}
+	pthread_mutex_unlock(&knc->spi_qlock);
+	return NULL;
+}
+
+static void knc_process_responses(struct thr_info *thr);
+
+static void knc_flush(struct thr_info *thr)
+{
+	struct cgpu_info *cgpu = thr->cgpu;
+	struct knc_state *knc = cgpu->device_data;
+	struct knc_spi_buffer *buffer = &knc->spi_buffer[knc->send_buffer];
+	if (buffer->state == KNC_SPI_IDLE && buffer->size > 0) {
+		pthread_mutex_lock(&knc->spi_qlock);
+		buffer->state = KNC_SPI_PENDING;
+		pthread_cond_signal(&knc->spi_qcond);
+		knc->send_buffer += 1;
+		knc->send_buffer_count += 1;
+		if (knc->send_buffer >= KNC_SPI_BUFFERS)
+			knc->send_buffer = 0;
+		buffer = &knc->spi_buffer[knc->send_buffer];
+		/* Block for SPI to finish a transfer if all buffers are busy */
+		while (buffer->state == KNC_SPI_PENDING) {
+			applog(LOG_DEBUG, "KnC: SPI buffer full (%d), waiting for SPI thread", buffer->responses);
+			pthread_cond_wait(&knc->spi_qcond, &knc->spi_qlock);
+		}
+		pthread_mutex_unlock(&knc->spi_qlock);
+	}
+        knc_process_responses(thr);
+}
+
+static void knc_transfer(struct thr_info *thr, struct knc_core_state *core, int request_length, uint8_t *request, int response_length, int response_type, uint32_t data)
+{
+	struct cgpu_info *cgpu = thr->cgpu;
+	struct knc_state *knc = cgpu->device_data;
+	struct knc_spi_buffer *buffer = &knc->spi_buffer[knc->send_buffer];
+	/* FPGA control, request header, request body/response, CRC(4), ACK(1), EXTRA(3) */
+	int msglen = 2 + max(request_length, 4 + response_length) + 4 + 1 + 3;
+	if (buffer->size + msglen > MAX_SPI_SIZE || buffer->responses >= MAX_SPI_RESPONSES) {
+		applog(LOG_INFO, "KnC: SPI buffer sent, %d messages %d bytes", buffer->responses, buffer->size);
+		knc_flush(thr);
+		buffer = &knc->spi_buffer[knc->send_buffer];
+	}
+	struct knc_spi_response *response_info = &buffer->response_info[buffer->responses];
+	buffer->responses++;
+	response_info->offset = buffer->size;
+	response_info->type = response_type;
+	response_info->request_length = request_length;
+	response_info->response_length = response_length;
+	response_info->core = core;
+	response_info->data = data;
+	buffer->size = knc_prepare_transfer(buffer->txbuf, buffer->size, MAX_SPI_SIZE, core->die->channel, request_length, request, response_length);
+}
+
+static int knc_transfer_stamp(struct knc_state *knc)
+{
+	return knc->send_buffer_count;
+}
+
+static int knc_transfer_completed(struct knc_state *knc, int stamp)
+{
+	/* signed delta math, counter wrap OK */
+	return (int)(knc->read_buffer_count - stamp) >= 1;
+}
+
+static bool knc_detect_one(void *ctx)
+{
+	/* Scan device for ASICs */
+	int channel, die, cores = 0, core;
+	struct knc_state *knc;
+	struct knc_die_info die_info[KNC_MAX_ASICS][KNC_MAX_DIES_PER_ASIC];
+
+	memset(die_info, 0, sizeof(die_info));
+
+	/* Send GETINFO to each die to detect if it is usable */
+	for (channel = 0; channel < KNC_MAX_ASICS; channel++) {
+		if (!knc_trnsp_asic_detect(ctx, channel))
+			continue;
+		for (die = 0; die < KNC_MAX_DIES_PER_ASIC; die++) {
+		    if (knc_detect_die(ctx, channel, die, &die_info[channel][die]) == 0)
+			cores += die_info[channel][die].cores;
+		}
+	}
+
+	if (!cores) {
+		applog(LOG_NOTICE, "no KnCminer cores found");
+		return false;
+	}
+
+	applog(LOG_ERR, "Found a KnC miner with %d cores", cores);
+
+	knc = calloc(1, sizeof(*knc) + cores * sizeof(struct knc_core_state));
+	if (!knc)
+	{
+		applog(LOG_ERR, "KnC miner detected, but failed to allocate memory");
+		return false;
+	}
+
+	knc->ctx = ctx;
+	knc->generation = 1;
+
+	/* Index all cores */
+	struct cgpu_info *prev_cgpu = NULL, *first_cgpu = NULL;
+	int dies = 0;
+	cores = 0;
+	struct knc_core_state *pcore = knc->core;
+	int channel_cores_base = 0;
+	for (channel = 0; channel < KNC_MAX_ASICS; channel++) {
+		int channel_cores = 0;
+		
+		for (die = 0; die < KNC_MAX_DIES_PER_ASIC; die++) {
+			if (die_info[channel][die].cores) {
+				knc->die[dies].channel = channel;
+				knc->die[dies].die = die;
+				knc->die[dies].version = die_info[channel][die].version;
+				knc->die[dies].cores = die_info[channel][die].cores;
+				knc->die[dies].core = pcore;
+				knc->die[dies].knc = knc;
+				for (core = 0; core < knc->die[dies].cores; core++) {
+					knc->die[dies].core[core].die = &knc->die[dies];
+					knc->die[dies].core[core].core = core;
+				}
+				cores += knc->die[dies].cores;
+				channel_cores += knc->die[dies].cores;
+				pcore += knc->die[dies].cores;
+				dies++;
+			}
+		}
+		
+		if (channel_cores)
+		{
+			struct cgpu_info * const cgpu = malloc(sizeof(*cgpu));
+			*cgpu = (struct cgpu_info){
+				.drv = &kncasic_drv,
+				.name = "KnCminer",
+				.procs = channel_cores,
+				.threads = prev_cgpu ? 0 : 1,
+				.device_data = knc,
+			};
+			add_cgpu_slave(cgpu, prev_cgpu);
+			if (!prev_cgpu)
+				first_cgpu = cgpu;
+			prev_cgpu = cgpu;
+			
+			for_each_managed_proc(proc, cgpu)
+			{
+				knc->core[channel_cores_base++].proc = proc;
+			}
+		}
+	}
+	
+	knc->dies = dies;
+	knc->cores = cores;
+	knc->startup = 2;
+
+	pthread_mutex_init(&knc->spi_qlock, NULL);
+	pthread_cond_init(&knc->spi_qcond, NULL);
+	pthread_mutex_init(&knc->state_lock, NULL);
+
+	if (thr_info_create(&knc->spi_thr, NULL, knc_spi, first_cgpu))
+	{
+		applog(LOG_ERR, "%s: SPI thread create failed", first_cgpu->dev_repr);
+		free(knc);
+		return false;
+	}
+	
+	return true;
+}
+
+/* Probe devices and register with add_cgpu */
+static
+bool kncasic_detect_one(const char * const devpath)
+{
+	void *ctx = knc_trnsp_new(devpath);
+
+	if (ctx != NULL) {
+		if (!knc_detect_one(ctx))
+			knc_trnsp_free(ctx);
+		else
+			return true;
+	}
+	return false;
+}
+
+static
+int kncasic_detect_auto(void)
+{
+	return kncasic_detect_one(NULL) ? 1 : 0;
+}
+
+static
+void kncasic_detect(void)
+{
+	generic_detect(&kncasic_drv, kncasic_detect_one, kncasic_detect_auto, GDF_REQUIRE_DNAME | GDF_DEFAULT_NOAUTO);
+}
+
+/* Core helper functions */
+static int knc_core_hold_work(struct knc_core_state *core)
+{
+	return timercmp(&core->hold_work_until, &now, >);
+}
+
+static int knc_core_has_work(struct knc_core_state *core)
+{
+	int i;
+	for (i = 0; i < WORKS_PER_CORE; i++) {
+		if (core->workslot[i].slot > 0)
+			return true;
+	}
+	return false;
+}
+
+static int knc_core_need_work(struct knc_core_state *core)
+{
+	return !knc_core_hold_work(core) && !core->workslot[1].work && !core->workslot[2].work;
+}
+
+static int knc_core_disabled(struct knc_core_state *core)
+{
+	return timercmp(&core->disabled_until, &now, >);
+}
+
+static int _knc_core_next_slot(struct knc_core_state *core)
+{
+	/* Avoid slot #0 and #15. #0 is "no work assigned" and #15 is seen on bad cores */
+	int slot = core->last_slot + 1;
+	if (slot >= 15)
+		slot = 1;
+	core->last_slot = slot;
+	return slot;
+}
+
+static bool knc_core_slot_busy(struct knc_core_state *core, int slot)
+{
+	if (slot == core->report.active_slot)
+		return true;
+	if (slot == core->report.next_slot)
+		return true;
+	int i;
+	for (i = 0; i < WORKS_PER_CORE; i++) {
+		if (slot == core->workslot[i].slot)
+			return true;
+	}
+	return false;
+}
+
+static int knc_core_next_slot(struct knc_core_state *core)
+{
+	int slot;
+	do slot = _knc_core_next_slot(core);
+	while (knc_core_slot_busy(core, slot));
+	return slot;
+}
+
+static void knc_core_failure(struct knc_core_state *core)
+{
+	core->errors++;
+	core->errors_now++;
+	core->die->knc->errors++;
+	if (knc_core_disabled(core))
+		return;
+	if (core->errors_now > CORE_ERROR_LIMIT) {
+		struct cgpu_info * const proc = core->proc;
+		applog(LOG_ERR, "%"PRIpreprv" disabled for %ld seconds due to repeated hardware errors",
+			proc->proc_repr, (long)core_disable_interval.tv_sec);
+		timeradd(&now, &core_disable_interval, &core->disabled_until);
+	}
+}
+
+static
+void knc_core_handle_nonce(struct thr_info *thr, struct knc_core_state *core, int slot, uint32_t nonce)
+{
+	int i;
+	if (!slot)
+		return;
+	core->last_nonce.slot = slot;
+	core->last_nonce.nonce = nonce;
+	if (core->die->knc->startup)
+		return;
+	for (i = 0; i < WORKS_PER_CORE; i++) {
+		if (slot == core->workslot[i].slot && core->workslot[i].work) {
+			struct cgpu_info * const proc = core->proc;
+			struct thr_info * const corethr = proc->thr[0];
+			
+			applog(LOG_INFO, "%"PRIpreprv" found nonce %08x", proc->proc_repr, nonce);
+			if (submit_nonce(corethr, core->workslot[i].work, nonce)) {
+				/* Good share */
+				core->shares++;
+				core->die->knc->shares++;
+				hashes_done2(corethr, 0x100000000, NULL);
+				/* This core is useful. Ignore any errors */
+				core->errors_now = 0;
+			} else {
+				applog(LOG_INFO, "%"PRIpreprv" hwerror nonce %08x", proc->proc_repr, nonce);
+				/* Bad share */
+				knc_core_failure(core);
+			}
+		}
+	}
+}
+
+static int knc_core_process_report(struct thr_info *thr, struct knc_core_state *core, uint8_t *response)
+{
+	struct cgpu_info * const proc = core->proc;
+	struct knc_report *report = &core->report;
+	knc_decode_report(response, report, core->die->version);
+	bool had_event = false;
+
+	applog(LOG_DEBUG, "%"PRIpreprv": Process report %d %d(%d) / %d %d %d", proc->proc_repr, report->active_slot, report->next_slot, report->next_state, core->workslot[0].slot, core->workslot[1].slot, core->workslot[2].slot);
+	int n;
+	for (n = 0; n < KNC_NONCES_PER_REPORT; n++) {
+		if (report->nonce[n].slot < 0)
+			break;
+		if (core->last_nonce.slot == report->nonce[n].slot && core->last_nonce.nonce == report->nonce[n].nonce)
+			break;
+	}
+	while(n-- > 0) {
+		knc_core_handle_nonce(thr, core, report->nonce[n].slot, report->nonce[n].nonce);
+	}
+
+	if (report->active_slot && core->workslot[0].slot != report->active_slot) {
+		had_event = true;
+		applog(LOG_INFO, "%"PRIpreprv": New work %d %d / %d %d %d", proc->proc_repr, report->active_slot, report->next_slot, core->workslot[0].slot, core->workslot[1].slot, core->workslot[2].slot);
+		/* Core switched to next work */
+		if (core->workslot[0].work) {
+			core->die->knc->completed++;
+			core->completed++;
+			applog(LOG_INFO, "%"PRIpreprv": Work completed!", proc->proc_repr);
+			free_work(core->workslot[0].work);
+		}
+		core->workslot[0] = core->workslot[1];
+		core->workslot[1].work = NULL;
+		core->workslot[1].slot = -1;
+
+		/* or did it switch directly to pending work? */
+		if (report->active_slot == core->workslot[2].slot) {
+			applog(LOG_INFO, "%"PRIpreprv": New work %d %d %d %d (pending)", proc->proc_repr, report->active_slot, core->workslot[0].slot, core->workslot[1].slot, core->workslot[2].slot);
+			if (core->workslot[0].work)
+				free_work(core->workslot[0].work);
+			core->workslot[0] = core->workslot[2];
+			core->workslot[2].work = NULL;
+			core->workslot[2].slot = -1;
+		}
+	}
+
+	if (report->next_state && core->workslot[2].slot > 0 && (core->workslot[2].slot == report->next_slot  || report->next_slot == -1)) {
+		had_event = true;
+		applog(LOG_INFO, "%"PRIpreprv": Accepted work %d %d %d %d (pending)", proc->proc_repr, report->active_slot, core->workslot[0].slot, core->workslot[1].slot, core->workslot[2].slot);
+		/* core accepted next work */
+		if (core->workslot[1].work)
+			free_work(core->workslot[1].work);
+		core->workslot[1] = core->workslot[2];
+		core->workslot[2].work = NULL;
+		core->workslot[2].slot = -1;
+	}
+
+	if (core->workslot[2].work && knc_transfer_completed(core->die->knc, core->transfer_stamp)) {
+		had_event = true;
+		applog(LOG_INFO, "%"PRIpreprv": Setwork failed?", proc->proc_repr);
+		free_work(core->workslot[2].work);
+		core->workslot[2].slot = -1;
+	}
+
+	if (had_event)
+		applog(LOG_INFO, "%"PRIpreprv": Exit report %d %d / %d %d %d", proc->proc_repr, report->active_slot, report->next_slot, core->workslot[0].slot, core->workslot[1].slot, core->workslot[2].slot);
+
+	return 0;
+}
+
+static void knc_process_responses(struct thr_info *thr)
+{
+	struct cgpu_info *cgpu = thr->cgpu;
+	struct knc_state *knc = cgpu->device_data;
+	struct knc_spi_buffer *buffer = &knc->spi_buffer[knc->read_buffer];
+	while (buffer->state == KNC_SPI_DONE) {
+		int i;
+		for (i = 0; i < buffer->responses; i++) {
+			struct knc_spi_response *response_info = &buffer->response_info[i];
+			uint8_t *rxbuf = &buffer->rxbuf[response_info->offset];
+			struct knc_core_state *core = response_info->core;
+			struct cgpu_info * const proc = core->proc;
+			int status = knc_decode_response(rxbuf, response_info->request_length, &rxbuf, response_info->response_length);
+			/* Invert KNC_ACCEPTED to simplify logics below */
+			if (response_info->type == KNC_SETWORK && !KNC_IS_ERROR(status))
+				status ^= KNC_ACCEPTED;
+			if (core->die->version != KNC_VERSION_JUPITER && status != 0) {
+				applog(LOG_ERR, "%s: Communication error (%x / %d)", proc->proc_repr, status, i);
+				if (status == KNC_ACCEPTED) {
+					/* Core refused our work vector. Likely out of sync. Reset it */
+					core->inuse = false;
+				}
+				knc_core_failure(core);
+			}
+			switch(response_info->type) {
+			case KNC_REPORT:
+			case KNC_SETWORK:
+				/* Should we care about failed SETWORK explicit? Or simply handle it by next state not loaded indication in reports?  */
+				knc_core_process_report(thr, core, rxbuf);
+				break;
+			default:
+				break;
+			}
+		}
+
+		buffer->state = KNC_SPI_IDLE;
+		buffer->responses = 0;
+		buffer->size = 0;
+		knc->read_buffer += 1;
+		knc->read_buffer_count += 1;
+		if (knc->read_buffer >= KNC_SPI_BUFFERS)
+			knc->read_buffer = 0;
+		buffer = &knc->spi_buffer[knc->read_buffer];
+	}
+}
+
+static int knc_core_send_work(struct thr_info *thr, struct knc_core_state *core, struct work *work, bool clean)
+{
+	struct knc_state *knc = core->die->knc;
+	struct cgpu_info * const proc = core->proc;
+	int request_length = 4 + 1 + 6*4 + 3*4 + 8*4;
+	uint8_t request[request_length];
+	int response_length = 1 + 1 + (1 + 4) * 5;
+
+	int slot = knc_core_next_slot(core);
+	if (slot < 0)
+		goto error;
+
+	applog(LOG_INFO, "%"PRIpreprv" setwork%s  = %d, %d %d / %d %d %d", proc->proc_repr, clean ? " CLEAN" : "", slot, core->report.active_slot, core->report.next_slot, core->workslot[0].slot, core->workslot[1].slot, core->workslot[2].slot);
+	if (!clean && !knc_core_need_work(core))
+		goto error;
+
+	switch(core->die->version) {
+	case KNC_VERSION_JUPITER:
+		if (clean) {
+			/* Double halt to get rid of any previous queued work */
+			request_length = knc_prepare_jupiter_halt(request, core->die->die, core->core);
+			knc_transfer(thr, core, request_length, request, 0, KNC_NO_RESPONSE, 0);
+			knc_transfer(thr, core, request_length, request, 0, KNC_NO_RESPONSE, 0);
+		}
+		request_length = knc_prepare_jupiter_setwork(request, core->die->die, core->core, slot, work);
+		knc_transfer(thr, core, request_length, request, 0, KNC_NO_RESPONSE, 0);
+		break;
+	case KNC_VERSION_NEPTUNE:
+		request_length = knc_prepare_neptune_setwork(request, core->die->die, core->core, slot, work, clean);
+		knc_transfer(thr, core, request_length, request, response_length, KNC_SETWORK, slot);
+		break;
+	default:
+		goto error;
+	}
+
+	core->workslot[2].work = work;
+	core->workslot[2].slot = slot;
+	core->works++;
+	core->die->knc->works++;
+	core->transfer_stamp = knc_transfer_stamp(knc);
+	core->inuse = true;
+
+	timeradd(&now, &core_submit_interval, &core->hold_work_until);
+	timeradd(&now, &core_timeout_interval, &core->timeout);
+
+	return 0;
+
+error:
+	applog(LOG_INFO, "%"PRIpreprv": Failed to setwork (%d)", proc->proc_repr, core->errors_now);
+	knc_core_failure(core);
+	free_work(work);
+	return -1;
+}
+
+static int knc_core_request_report(struct thr_info *thr, struct knc_core_state *core)
+{
+	struct cgpu_info * const proc = core->proc;
+	int request_length = 4;
+	uint8_t request[request_length];
+	int response_length = 1 + 1 + (1 + 4) * 5;
+
+	applog(LOG_DEBUG, "%"PRIpreprv": Request report", proc->proc_repr);
+
+	request_length = knc_prepare_report(request, core->die->die, core->core);
+
+	switch(core->die->version) {
+	case KNC_VERSION_JUPITER:
+		response_length = 1 + 1 + (1 + 4);
+		knc_transfer(thr, core, request_length, request, response_length, KNC_REPORT, 0);
+		return 0;
+	case KNC_VERSION_NEPTUNE:
+		knc_transfer(thr, core, request_length, request, response_length, KNC_REPORT, 0);
+		return 0;
+	}
+
+	applog(LOG_INFO, "%"PRIpreprv": Failed to scan work report", proc->proc_repr);
+	knc_core_failure(core);
+	return -1;
+}
+
+/* return value is number of nonces that have been checked since
+ * previous call
+ */
+static int64_t knc_scanwork(struct thr_info *thr)
+{
+	struct cgpu_info *cgpu = thr->cgpu;
+	struct knc_state *knc = cgpu->device_data;
+
+	applog(LOG_DEBUG, "KnC running scanwork");
+	mutex_lock(&knc->state_lock);
+
+	gettimeofday(&now, NULL);
+
+	knc_trnsp_periodic_check(knc->ctx);
+
+	int i;
+
+	knc_process_responses(thr);
+
+	if (timercmp(&knc->next_error_interval, &now, >)) {
+		/* Reset hw error limiter every check interval */
+		timeradd(&now, &core_check_interval, &knc->next_error_interval);
+		for (i = 0; i < knc->cores; i++) {
+			struct knc_core_state *core = &knc->core[i];
+			core->errors_now = 0;
+		}
+	}
+
+	for (i = 0; i < knc->cores; i++) {
+		struct knc_core_state *core = &knc->core[i];
+		struct cgpu_info * const proc = core->proc;
+		bool clean = !core->inuse;
+		if (knc_core_disabled(core))
+			continue;
+		if (core->generation != knc->generation) {
+			applog(LOG_INFO, "%"PRIpreprv" flush gen=%d/%d", proc->proc_repr, core->generation, knc->generation);
+			/* clean set state, forget everything */
+			int slot;
+			for (slot = 0; slot < WORKS_PER_CORE; slot ++) {
+				if (core->workslot[slot].work)
+					free_work(core->workslot[slot].work);
+				core->workslot[slot].slot = -1;
+			}
+			core->hold_work_until = now;
+			core->generation = knc->generation;
+		} else if (timercmp(&core->timeout, &now, <=) && (core->workslot[0].slot > 0 || core->workslot[1].slot > 0 || core->workslot[2].slot > 0)) {
+			applog(LOG_ERR, "%"PRIpreprv" timeout gen=%d/%d", proc->proc_repr, core->generation, knc->generation);
+			clean = true;
+		}
+		if (!knc_core_has_work(core))
+			clean = true;
+		if (core->workslot[0].slot < 0 && core->workslot[1].slot < 0 && core->workslot[2].slot < 0)
+			clean = true;
+		if (i % SCAN_ADJUST_RANGE == knc->scan_adjust)
+			clean = true;
+		if ((knc_core_need_work(core) || clean) && !knc->startup) {
+			struct work *work = get_work(thr);
+			knc_core_send_work(thr, core, work, clean);
+		} else {
+			knc_core_request_report(thr, core);
+		}
+	}
+	/* knc->startup delays initial work submission until we have had chance to query all cores on their current status, to avoid slot number collisions with earlier run */
+	if (knc->startup)
+		knc->startup--;
+	else if (knc->scan_adjust < SCAN_ADJUST_RANGE)
+		knc->scan_adjust++;
+
+	knc_flush(thr);
+
+	mutex_unlock(&knc->state_lock);
+	return 0;
+}
+
+static void knc_flush_work(struct cgpu_info *cgpu)
+{
+	struct knc_state *knc = cgpu->device_data;
+
+	applog(LOG_INFO, "KnC running flushwork");
+
+	mutex_lock(&knc->state_lock);
+
+	knc->generation++;
+	knc->scan_adjust=0;
+	if (!knc->generation)
+		knc->generation++;
+
+	mutex_unlock(&knc->state_lock);
+}
+
+static void knc_zero_stats(struct cgpu_info *cgpu)
+{
+	int core;
+	struct knc_state *knc = cgpu->device_data;
+
+	mutex_lock(&knc->state_lock);
+	for (core = 0; core < knc->cores; core++) {
+		knc->shares = 0;
+		knc->completed = 0;
+		knc->works = 0;
+		knc->errors = 0;
+		knc->core[core].works = 0;
+		knc->core[core].errors = 0;
+		knc->core[core].shares = 0;
+		knc->core[core].completed = 0;
+	}
+	mutex_unlock(&knc->state_lock);
+}
+
+static struct api_data *knc_api_stats(struct cgpu_info *cgpu)
+{
+	struct knc_state *knc = cgpu->device_data;
+	struct knc_core_state * const proccore = &knc->core[cgpu->proc_id];
+	struct knc_die * const die = proccore->die;
+	struct api_data *root = NULL;
+	int core;
+	char label[256];
+
+	mutex_lock(&knc->state_lock);
+
+	root = api_add_int(root, "dies", &knc->dies, 1);
+	root = api_add_int(root, "cores", &knc->cores, 1);
+	root = api_add_uint64(root, "shares", &knc->shares, 1);
+	root = api_add_uint64(root, "works", &knc->works, 1);
+	root = api_add_uint64(root, "completed", &knc->completed, 1);
+	root = api_add_uint64(root, "errors", &knc->errors, 1);
+
+	/* Active cores */
+	int active = knc->cores;
+	for (core = 0; core < knc->cores; core++) {
+		if (knc_core_disabled(&knc->core[core]))
+			active -= 1;
+	}
+	root = api_add_int(root, "active", &active, 1);
+
+	/* Per ASIC/die data */
+	{
+#define knc_api_die_string(name, value) do { \
+	snprintf(label, sizeof(label), "%d.%d.%s", die->channel, die->die, name); \
+	root = api_add_string(root, label, value, 1); \
+	} while(0)
+#define knc_api_die_int(name, value) do { \
+	snprintf(label, sizeof(label), "%d.%d.%s", die->channel, die->die, name); \
+	uint64_t v = value; \
+	root = api_add_uint64(root, label, &v, 1); \
+	} while(0)
+
+		/* Model */
+		{
+			char *model = "?";
+			switch(die->version) {
+			case KNC_VERSION_JUPITER:
+				model = "Jupiter";
+				break;
+			case KNC_VERSION_NEPTUNE:
+				model = "Neptune";
+				break;
+			}
+			knc_api_die_string("model", model);
+			knc_api_die_int("cores", die->cores);
+		}
+
+		/* Core based stats */
+		{
+			uint64_t errors = 0;
+			uint64_t shares = 0;
+			uint64_t works = 0;
+			uint64_t completed = 0;
+			char coremap[die->cores+1];
+
+			for (core = 0; core < die->cores; core++) {
+				coremap[core] = knc_core_disabled(&die->core[core]) ? '0' : '1';
+				works += die->core[core].works;
+				shares += die->core[core].shares;
+				errors += die->core[core].errors;
+				completed += die->core[core].completed;
+			}
+			coremap[die->cores] = '\0';
+			knc_api_die_int("errors", errors);
+			knc_api_die_int("shares", shares);
+			knc_api_die_int("works", works);
+			knc_api_die_int("completed", completed);
+			knc_api_die_string("coremap", coremap);
+		}
+	}
+
+	mutex_unlock(&knc->state_lock);
+	return root;
+}
+
+static
+void hash_driver_work(struct thr_info * const thr)
+{
+	struct cgpu_info * const cgpu = thr->cgpu;
+	struct device_drv * const drv = cgpu->drv;
+	
+	while (likely(!cgpu->shutdown))
+	{
+		drv->scanwork(thr);
+		
+		if (unlikely(thr->pause || cgpu->deven != DEV_ENABLED))
+			mt_disable(thr);
+	}
+}
+
+struct device_drv kncasic_drv = {
+	.dname = "kncasic",
+	.name = "KNC",
+	.drv_detect = kncasic_detect,
+	.minerloop = hash_driver_work,
+	.flush_work = knc_flush_work,
+	.scanwork = knc_scanwork,
+	.zero_stats = knc_zero_stats,
+	.get_api_stats = knc_api_stats,
+};

+ 9 - 3
driver-minergate.c

@@ -578,7 +578,7 @@ bool minergate_get_stats(struct cgpu_info * const dev)
 		{
 			long nums[0x80];
 			char *endptr;
-			unsigned i;
+			int i;
 			float max_temp = 0;
 			for (i = 0; 1; ++i)
 			{
@@ -596,8 +596,14 @@ bool minergate_get_stats(struct cgpu_info * const dev)
 				p = endptr;
 			}
 			i -= skip_stats;
-			long *new_stats = malloc(sizeof(*state->stats) * i);
-			memcpy(new_stats, &nums[skip_stats], sizeof(*nums) * i);
+			long *new_stats;
+			if (likely(i > 0))
+			{
+				new_stats = malloc(sizeof(*state->stats) * i);
+				memcpy(new_stats, &nums[skip_stats], sizeof(*nums) * i);
+			}
+			else
+				new_stats = NULL;
 			mutex_lock(&dev->device_mutex);
 			free(state->stats);
 			state->stats = new_stats;

+ 757 - 0
driver-minion.c

@@ -0,0 +1,757 @@
+/*
+ * Copyright 2014 Luke Dashjr
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.  See COPYING for more details.
+ */
+
+#include "config.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/spi/spidev.h>
+#include <utlist.h>
+
+#include "deviceapi.h"
+#include "logging.h"
+#include "lowl-spi.h"
+#include "miner.h"
+#include "util.h"
+
+static const uint8_t minion_max_chipid = 0x1f;
+static const uint8_t minion_chip_signature[] = {0x44, 0x8a, 0xac, 0xb1};
+static const unsigned minion_max_queued = 0x10;
+static const unsigned minion_poll_us = 10000;
+static const unsigned minion_min_clock =  800;
+static const unsigned minion_max_clock = 1999;
+static const unsigned long minion_temp_interval_us = 5273437;
+
+enum minion_register {
+	MRA_SIGNATURE        = 0x00,
+	MRA_STATUS           = 0x01,
+	MRA_TEMP_CFG         = 0x03,
+	MRA_PLL_CFG          = 0x04,
+	MRA_MISC_CTL         = 0x06,
+	MRA_RESET            = 0x07,
+	MRA_FIFO_STATUS      = 0x0b,
+	
+	MRA_CORE_EN_         = 0x10,
+	
+	MRA_RESULT           = 0x20,
+	
+	MRA_TASK             = 0x30,
+	
+	MRA_NONCE_START      = 0x70,
+	MRA_NONCE_INC        = 0x71,
+};
+
+struct minion_chip {
+	uint8_t chipid;
+	uint8_t core_count;
+	uint8_t core_enabled_count;
+	uint16_t next_taskid;
+	struct cgpu_info *first_proc;
+	unsigned queue_count;
+	uint32_t core_nonce_inc;
+	uint32_t pllcfg_asserted;
+	uint32_t pllcfg_desired;
+	struct timeval tv_read_temp;
+	unsigned long timeout_us;
+	struct timeval tv_timeout;
+};
+
+struct minion_bus {
+	struct spi_port *spi;
+};
+
+static const uint8_t minion_crystal_mhz = 12;
+
+static
+uint32_t minion_freq_to_pllcfg(unsigned freq)
+{
+	uint32_t rv;
+	uint8_t * const pllcfg = (void*)&rv;
+	uint8_t best_rem = 12, pll_dm = 1;
+	for (uint8_t try_dm = 1; try_dm <= 8; ++try_dm)
+	{
+		const unsigned x = freq * try_dm;
+		if (x > 0x100 * minion_crystal_mhz)
+			// We'd overflow pll_dn to continue
+			break;
+		const uint8_t rem = x % minion_crystal_mhz;
+		if (rem > best_rem)
+			continue;
+		best_rem = rem;
+		pll_dm = try_dm;
+		if (!rem)
+			break;
+	}
+	const unsigned pll_dn = freq * pll_dm / minion_crystal_mhz;
+	freq = pll_dn * minion_crystal_mhz / pll_dm;
+	const uint8_t pll_cont = ((freq - 800) / 300);  // 2 bits
+	static const uint8_t pll_dp   = 0;  // 3 bits
+	static const uint8_t pll_byp  = 0;  // 1 bit
+	static const uint8_t pll_div2 = 0;  // 1 bit
+	static const uint8_t sys_div  = 1;  // 3 bits
+	pllcfg[0] = pll_dn - 1;
+	pllcfg[1] = (pll_dm - 1) | (pll_dp << 4);
+	pllcfg[2] = pll_cont | (pll_byp << 2) | (pll_div2 << 4) | (sys_div << 5);
+	pllcfg[3] = 0;
+	return rv;
+}
+
+static
+unsigned minion_pllcfg_to_freq(const uint32_t in_pllcfg)
+{
+	const uint8_t * const pllcfg = (void*)&in_pllcfg;
+	const unsigned pll_dn = (unsigned)pllcfg[0] + 1;
+	const uint8_t pll_dm = (pllcfg[1] & 0xf) + 1;
+	const unsigned freq = pll_dn * minion_crystal_mhz / pll_dm;
+	// FIXME: How to interpret the rest of the pll cfg?
+	if (minion_freq_to_pllcfg(freq) != in_pllcfg)
+		return 0;
+	return freq;
+}
+
+static
+void minion_get(struct spi_port * const spi, const uint8_t chipid, const uint8_t addr, void * const buf, const size_t bufsz)
+{
+	const uint8_t header[] = {chipid, addr | 0x80, bufsz & 0xff, bufsz >> 8};
+	spi_clear_buf(spi);
+	spi_emit_buf(spi, header, sizeof(header));
+	uint8_t dummy[bufsz];
+	memset(dummy, 0xff, bufsz);
+	spi_emit_buf(spi, dummy, bufsz);
+	spi_txrx(spi);
+	
+	uint8_t * const rdbuf = spi_getrxbuf(spi);
+	memcpy(buf, &rdbuf[sizeof(header)], bufsz);
+}
+
+static
+void minion_set(struct spi_port * const spi, const uint8_t chipid, const uint8_t addr, const void * const buf, const size_t bufsz)
+{
+	const uint8_t header[] = {chipid, addr, bufsz & 0xff, bufsz >> 8};
+	spi_clear_buf(spi);
+	spi_emit_buf(spi, header, sizeof(header));
+	spi_emit_buf(spi, buf, bufsz);
+	spi_txrx(spi);
+}
+
+static
+unsigned minion_count_cores(struct spi_port * const spi)
+{
+	uint8_t buf[max(4, sizeof(minion_chip_signature))];
+	unsigned total_core_count = 0;
+	
+	for (unsigned chipid = 0; chipid <= minion_max_chipid; ++chipid)
+	{
+		minion_get(spi, chipid, MRA_SIGNATURE, buf, sizeof(minion_chip_signature));
+		if (memcmp(buf, minion_chip_signature, sizeof(minion_chip_signature)))
+		{
+			for (unsigned i = 0; i < sizeof(minion_chip_signature); ++i)
+			{
+				if (buf[i] != 0xff)
+				{
+					char hex[(sizeof(minion_chip_signature) * 2) + 1];
+					bin2hex(hex, buf, sizeof(minion_chip_signature));
+					applog(LOG_DEBUG, "%s: chipid %u: Bad signature (%s)", spi->repr, chipid, hex);
+					break;
+				}
+			}
+			continue;
+		}
+		
+		minion_get(spi, chipid, MRA_STATUS, buf, 4);
+		const uint8_t core_count = buf[2];
+		
+		applog(LOG_DEBUG, "%s: chipid %u: Found %u cores", spi->repr, chipid, core_count);
+		total_core_count += core_count;
+	}
+	
+	return total_core_count;
+}
+
+static inline
+void minion_config_pll(struct spi_port * const spi, struct minion_chip * const chip)
+{
+	if (chip->pllcfg_asserted == chip->pllcfg_desired)
+		return;
+	const uint8_t chipid = chip->chipid;
+	minion_set(spi, chipid, MRA_PLL_CFG, &chip->pllcfg_desired, 4);
+	chip->pllcfg_asserted = chip->pllcfg_desired;
+	// NOTE: This assumes we only ever assert pllcfgs we can decode!
+	chip->timeout_us = 0xffffffff / minion_pllcfg_to_freq(chip->pllcfg_asserted);
+	timer_set_delay_from_now(&chip->tv_timeout, chip->timeout_us);
+}
+
+static inline
+void minion_core_enable_register_position(const uint8_t coreid, uint8_t * const corereg, uint8_t * const corebyte, uint8_t * const corebit)
+{
+	*corereg = MRA_CORE_EN_ + (coreid >> 5);
+	*corebyte = (coreid >> 3) % 4;
+	*corebit = 1 << (coreid % 8);
+}
+
+static
+void minion_reinit(struct cgpu_info * const first_proc, struct minion_chip * const chip, const struct timeval * const tvp_now)
+{
+	struct thr_info * const thr = first_proc->thr[0];
+	struct minion_bus * const mbus = first_proc->device_data;
+	struct spi_port * const spi = mbus->spi;
+	const uint8_t chipid = chip->chipid;
+	uint8_t buf[4];
+	
+	static const uint8_t resetcmd[4] = {0xff, 0xff, 0xa5, 0xf5};
+	minion_set(spi, chipid, MRA_RESET, resetcmd, sizeof(resetcmd));
+	
+	minion_set(spi, chipid, MRA_NONCE_START, "\0\0\0\0", 4);
+	chip->core_nonce_inc = 0xffffffff / chip->core_count;
+	pk_u32le(buf, 0, chip->core_nonce_inc);
+	minion_set(spi, chipid, MRA_NONCE_INC, buf, 4);
+	
+	minion_get(spi, chipid, MRA_TEMP_CFG, buf, 4);
+	buf[0] &= ~(1 << 5);  // Enable temperature sensor
+	buf[0] &= ~(1 << 4);  // 20 C precision (alternative is 40 C)
+	minion_set(spi, chipid, MRA_TEMP_CFG, buf, 4);
+	
+	minion_get(spi, chipid, MRA_PLL_CFG, &chip->pllcfg_asserted, 4);
+	
+	minion_get(spi, chipid, MRA_MISC_CTL, buf, 4);
+	buf[0] &= ~(1 << 4);  // Unpause cores
+	buf[0] &= ~(1 << 3);  // Unpause queue
+	buf[0] |= 1 << 2;  // Enable "no nonce" result reports
+	buf[0] &= ~(1 << 1);  // Disable test mode
+	minion_set(spi, chipid, MRA_MISC_CTL, buf, 4);
+	
+	thr->tv_poll = *tvp_now;
+	chip->tv_read_temp = *tvp_now;
+}
+
+static
+void minion_reenable_cores(struct cgpu_info * const first_proc, struct minion_chip * const chip)
+{
+	struct minion_bus * const mbus = first_proc->device_data;
+	struct spi_port * const spi = mbus->spi;
+	const uint8_t chipid = chip->chipid;
+	uint8_t buf[4] = {0,0,0,0};
+	struct cgpu_info *proc = first_proc;
+	for (unsigned coreid = 0; coreid < chip->core_count; (proc = proc->next_proc), ++coreid)
+	{
+		uint8_t corereg, corebyte, corebit;
+		minion_core_enable_register_position(coreid, &corereg, &corebyte, &corebit);
+		if (proc->deven == DEV_ENABLED)
+			buf[corebyte] |= corebit;
+		if (coreid % 0x20 == 0x1f || coreid == chip->core_count - 1)
+			minion_set(spi, chipid, corereg, buf, 4);
+	}
+}
+
+static
+bool minion_init(struct thr_info * const thr)
+{
+	struct cgpu_info * const dev = thr->cgpu, *proc = dev;
+	struct minion_bus * const mbus = dev->device_data;
+	struct spi_port * const spi = mbus->spi;
+	uint8_t buf[max(4, sizeof(minion_chip_signature))];
+	struct timeval tv_now;
+	
+	timer_set_now(&tv_now);
+	
+	struct minion_chip * const chips = malloc(sizeof(*chips) * ((size_t)minion_max_chipid + 1));
+	for (unsigned chipid = 0; proc; ++chipid)
+	{
+		struct minion_chip * const chip = &chips[chipid];
+		spi->repr = proc->proc_repr;
+		
+		minion_get(spi, chipid, MRA_SIGNATURE, buf, sizeof(minion_chip_signature));
+		if (memcmp(buf, minion_chip_signature, sizeof(minion_chip_signature)))
+			continue;
+		
+		minion_get(spi, chipid, MRA_STATUS, buf, 4);
+		if (!buf[2])
+			continue;
+		
+		*chip = (struct minion_chip){
+			.chipid = chipid,
+			.core_count = buf[2],
+			.first_proc = proc,
+			.pllcfg_desired = minion_freq_to_pllcfg(900),
+		};
+		minion_reinit(proc, chip, &tv_now);
+		
+		for (unsigned coreid = 0; coreid < chip->core_count; ++coreid)
+		{
+			struct thr_info * const thr = proc->thr[0];
+			
+			uint8_t corereg, corebyte, corebit;
+			minion_core_enable_register_position(coreid, &corereg, &corebyte, &corebit);
+			if (coreid % 0x20 == 0)
+			{
+				spi->repr = proc->proc_repr;
+				minion_get(spi, chipid, corereg, buf, 4);
+			}
+			if (buf[corebyte] & corebit)
+				++chip->core_enabled_count;
+			else
+				proc->deven = DEV_DISABLED;
+			
+			thr->cgpu_data = chip;
+			
+			proc = proc->next_proc;
+		}
+	}
+	
+	return true;
+}
+
+static
+bool minion_queue_full(struct minion_chip * const chip)
+{
+	struct cgpu_info *proc = chip->first_proc;
+	struct thr_info *thr = proc->thr[0];
+	
+	const bool full = (chip->queue_count >= minion_max_queued);
+	if (full != thr->queue_full)
+	{
+		for (unsigned i = 0; i < chip->core_count; (proc = proc->next_proc), ++i)
+		{
+			thr = proc->thr[0];
+			
+			thr->queue_full = full;
+		}
+	}
+	
+	return full;
+}
+
+static
+void minion_core_enabledisable(struct thr_info * const thr, const bool enable)
+{
+	struct cgpu_info * const proc = thr->cgpu;
+	struct minion_bus * const mbus = proc->device_data;
+	struct minion_chip * const chip = thr->cgpu_data;
+	struct spi_port * const spi = mbus->spi;
+	const uint8_t chipid = chip->chipid;
+	
+	uint8_t coreid = 0;
+	for (struct cgpu_info *p = chip->first_proc; p != proc; p = p->next_proc)
+		++coreid;
+	
+	uint8_t corereg, corebyte, corebit;
+	minion_core_enable_register_position(coreid, &corereg, &corebyte, &corebit);
+	
+	uint8_t buf[4];
+	minion_get(spi, chipid, corereg, buf, 4);
+	const uint8_t oldbyte = buf[corebyte];
+	if (enable)
+		buf[corebyte] |= corebit;
+	else
+		buf[corebyte] &= ~corebit;
+	if (buf[corebyte] != oldbyte)
+	{
+		minion_set(spi, chipid, corereg, buf, 4);
+		chip->core_enabled_count += enable ? 1 : -1;
+	}
+}
+
+static
+void minion_core_disable(struct thr_info * const thr)
+{
+	minion_core_enabledisable(thr, false);
+}
+
+static
+void minion_core_enable(struct thr_info * const thr)
+{
+	minion_core_enabledisable(thr, true);
+}
+
+static
+bool minion_queue_append(struct thr_info *thr, struct work * const work)
+{
+	struct cgpu_info *proc = thr->cgpu;
+	struct minion_bus * const mbus = proc->device_data;
+	struct minion_chip * const chip = thr->cgpu_data;
+	proc = chip->first_proc;
+	thr = proc->thr[0];
+	
+	if (minion_queue_full(chip))
+		return false;
+	
+	struct spi_port * const spi = mbus->spi;
+	const uint8_t chipid = chip->chipid;
+	uint8_t taskdata[0x30];
+	spi->repr = proc->proc_repr;
+	
+	work->device_id = ++chip->next_taskid;
+	work->tv_stamp.tv_sec = 1;
+	work->blk.nonce = 0;
+	
+	pk_u16be(taskdata, 0, work->device_id);
+	memset(&taskdata[2], 0, 2);
+	memcpy(&taskdata[4], work->midstate, 0x20);
+	memcpy(&taskdata[0x24], &work->data[0x40], 0xc);
+	
+	minion_config_pll(spi, chip);
+	minion_set(spi, chipid, MRA_TASK, taskdata, sizeof(taskdata));
+	
+	DL_APPEND(thr->work_list, work);
+	++chip->queue_count;
+	
+	minion_queue_full(chip);
+	return true;
+}
+
+static void minion_refill_queue(struct thr_info *);
+
+static
+void minion_queue_flush(struct thr_info * const thr)
+{
+	struct cgpu_info * const proc = thr->cgpu;
+	struct minion_bus * const mbus = proc->device_data;
+	struct minion_chip * const chip = thr->cgpu_data;
+	if (proc != chip->first_proc)
+		// Redundant, all queues flush at the same time
+		return;
+	const uint8_t chipid = chip->chipid;
+	struct spi_port * const spi = mbus->spi;
+	
+	static const uint8_t flushcmd[4] = {0xfb, 0xff, 0xff, 0xff};
+	minion_set(spi, chipid, MRA_RESET, flushcmd, sizeof(flushcmd));
+	
+	minion_refill_queue(thr);
+}
+
+static
+void minion_refill_queue(struct thr_info * const thr)
+{
+	struct minion_chip * const chip = thr->cgpu_data;
+	struct work *work;
+	DL_FOREACH(thr->work_list, work)
+	{
+		work->tv_stamp.tv_sec = 0;
+	}
+	chip->queue_count = 0;
+	minion_queue_full(chip);
+}
+
+static
+void minion_hashes_done(struct cgpu_info *proc, const uint8_t core_count, const uint64_t hashes)
+{
+	for (int j = 0; j < core_count; (proc = proc->next_proc), ++j)
+	{
+		if (proc->deven != DEV_ENABLED)
+			continue;
+		struct thr_info * const thr = proc->thr[0];
+		hashes_done2(thr, hashes, NULL);
+	}
+}
+
+static
+void minion_poll(struct thr_info * const chip_thr)
+{
+	struct cgpu_info * const first_proc = chip_thr->cgpu;
+	struct minion_bus * const mbus = first_proc->device_data;
+	struct minion_chip * const chip = chip_thr->cgpu_data;
+	struct spi_port * const spi = mbus->spi;
+	const uint8_t chipid = chip->chipid;
+	spi->repr = first_proc->proc_repr;
+	
+	uint8_t buf[4];
+	minion_get(spi, chipid, MRA_FIFO_STATUS, buf, 4);
+	
+	const uint8_t res_fifo_len = buf[0];
+	if (res_fifo_len)
+	{
+		static const size_t resbuf_i_len = 8;
+		const size_t resbuf_len = (size_t)res_fifo_len * resbuf_i_len;
+		uint8_t resbuf[resbuf_len], *resbuf_i = resbuf;
+		minion_get(spi, chipid, MRA_RESULT, resbuf, resbuf_len);
+		
+		for (unsigned i = 0; i < res_fifo_len; (resbuf_i += resbuf_i_len), ++i)
+		{
+			const uint8_t coreid = resbuf_i[2];
+			work_device_id_t taskid = upk_u16be(resbuf_i, 0);
+			const bool have_nonce = !(resbuf_i[3] & 0x80);
+			struct cgpu_info *proc;
+			struct thr_info *core_thr;
+			bool clean = false;
+			
+			if (likely(coreid < chip->core_count))
+			{
+				proc = first_proc;
+				for (int j = 0; j < coreid; ++j)
+					proc = proc->next_proc;
+				core_thr = proc->thr[0];
+			}
+			else
+			{
+				proc = first_proc;
+				core_thr = proc->thr[0];
+				inc_hw_errors_only(core_thr);
+				applog(LOG_ERR, "%"PRIpreprv": Core id out of range (%u >= %u)", proc->proc_repr, coreid, chip->core_count);
+			}
+			
+			struct work *work;
+			DL_SEARCH_SCALAR(chip_thr->work_list, work, device_id, taskid);
+			if (unlikely(!work))
+			{
+				inc_hw_errors_only(core_thr);
+				applog(LOG_ERR, "%"PRIpreprv": Unknown task %"PRIwdi, proc->proc_repr, taskid);
+				continue;
+			}
+			
+			if (have_nonce)
+			{
+				const uint32_t nonce = upk_u32le(resbuf_i, 4);
+				
+				if (submit_nonce(core_thr, work, nonce))
+				{
+					clean = (coreid < chip->core_count);
+					
+					// It's only 0xffffffff if we prematurely considered it complete
+					if (likely(work->blk.nonce != 0xffffffff))
+					{
+						uint32_t hashes = (nonce % chip->core_nonce_inc);
+						if (hashes > work->blk.nonce)
+						{
+							hashes -= work->blk.nonce - 1;
+							minion_hashes_done(first_proc, chip->core_count, hashes);
+							work->blk.nonce = hashes + 1;
+						}
+					}
+				}
+			}
+			else
+			{
+				const uint32_t hashes = chip->core_nonce_inc - work->blk.nonce;
+				minion_hashes_done(first_proc, chip->core_count, hashes);
+				work->blk.nonce = 0xffffffff;
+			}
+			
+			// Flag previous work(s) as done, and delete them when we are sure
+			struct work *work_tmp;
+			DL_FOREACH_SAFE(chip_thr->work_list, work, work_tmp)
+			{
+				if (work->device_id == taskid)
+					break;
+				
+				if (work->blk.nonce && work->blk.nonce != 0xffffffff)
+				{
+					// At least one nonce was found, assume the job completed
+					const uint32_t hashes = chip->core_nonce_inc - work->blk.nonce;
+					minion_hashes_done(first_proc, chip->core_count, hashes);
+					work->blk.nonce = 0xffffffff;
+				}
+				if (work->tv_stamp.tv_sec)
+				{
+					--chip->queue_count;
+					work->tv_stamp.tv_sec = 0;
+				}
+				if (clean)
+				{
+					DL_DELETE(chip_thr->work_list, work);
+					free_work(work);
+				}
+			}
+		}
+		minion_queue_full(chip);
+	}
+	
+	struct timeval tv_now;
+	timer_set_now(&tv_now);
+	
+	if (timer_passed(&chip->tv_read_temp, &tv_now))
+	{
+		minion_get(spi, chipid, MRA_STATUS, buf, 4);
+		const float temp = buf[3] * 20.;
+		struct cgpu_info *proc = first_proc;
+		for (int j = 0; j < chip->core_count; (proc = proc->next_proc), ++j)
+			proc->temp = temp;
+		timer_set_delay(&chip_thr->tv_poll, &tv_now, minion_temp_interval_us);
+	}
+	
+	if (res_fifo_len)
+		timer_set_delay(&chip->tv_timeout, &tv_now, chip->timeout_us);
+	else
+	if (timer_passed(&chip->tv_timeout, &tv_now))
+	{
+		applog(LOG_WARNING, "%"PRIpreprv": Chip timeout, reinitialising", first_proc->proc_repr);
+		minion_reinit(first_proc, chip, &tv_now);
+		minion_reenable_cores(first_proc, chip);
+		minion_refill_queue(chip_thr);
+	}
+	
+	minion_config_pll(spi, chip);
+	
+	timer_set_delay(&chip_thr->tv_poll, &tv_now, minion_poll_us);
+}
+
+static
+struct api_data *minion_get_api_extra_device_status(struct cgpu_info * const proc)
+{
+	struct thr_info * const thr = proc->thr[0];
+	struct minion_chip * const chip = thr->cgpu_data;
+	struct api_data *root = NULL;
+	double d;
+	
+	d = minion_pllcfg_to_freq(chip->pllcfg_asserted);
+	if (d > 0)
+		root = api_add_freq(root, "Frequency", &d, true);
+	
+	return root;
+}
+
+static
+const char *minion_set_clock(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const out_success)
+{
+	struct thr_info * const thr = proc->thr[0];
+	struct minion_chip * const chip = thr->cgpu_data;
+	
+	const int nv = atoi(newvalue);
+	if (nv < minion_min_clock || nv > minion_max_clock)
+	{
+		sprintf(replybuf, "Clock frequency must be within range of %u-%u MHz", minion_min_clock, minion_max_clock);
+		return replybuf;
+	}
+	
+	const uint32_t pllcfg = minion_freq_to_pllcfg(nv);
+	chip->pllcfg_desired = pllcfg;
+	
+	return NULL;
+}
+
+static const struct bfg_set_device_definition minion_set_device_funcs[] = {
+	{"clock", minion_set_clock, "clock frequency"},
+	{NULL},
+};
+
+#ifdef HAVE_CURSES
+static
+void minion_tui_wlogprint_choices(struct cgpu_info * const proc)
+{
+	wlogprint("[C]lock speed ");
+}
+
+static
+const char *minion_tui_handle_choice(struct cgpu_info * const proc, const int input)
+{
+	struct thr_info * const thr = proc->thr[0];
+	struct minion_chip * const chip = thr->cgpu_data;
+	char buf[0x100];
+	
+	switch (input)
+	{
+		case 'c': case 'C':
+		{
+			sprintf(buf, "Set clock speed (range %d-%d)", minion_min_clock, minion_max_clock);
+			const int nv = curses_int(buf);
+			if (nv < minion_min_clock || nv > minion_max_clock)
+				return "Invalid clock speed\n";
+			
+			const uint32_t pllcfg = minion_freq_to_pllcfg(nv);
+			chip->pllcfg_desired = pllcfg;
+			
+			return "Clock speed changed\n";
+		}
+	}
+	
+	return NULL;
+}
+
+static
+void minion_wlogprint_status(struct cgpu_info * const proc)
+{
+	struct thr_info * const thr = proc->thr[0];
+	struct minion_chip * const chip = thr->cgpu_data;
+	
+	const unsigned freq = minion_pllcfg_to_freq(chip->pllcfg_asserted);
+	if (freq)
+		wlogprint("Clock speed: %u\n", freq);
+}
+#endif
+
+BFG_REGISTER_DRIVER(minion_drv)
+
+static
+bool minion_detect_one(const char * const devpath)
+{
+	spi_init();
+	
+	struct spi_port *spi = malloc(sizeof(*spi));
+	// Be careful, read lowl-spi.h comments for warnings
+	memset(spi, 0, sizeof(*spi));
+	spi->speed = 50000000;
+	spi->mode = SPI_MODE_0;
+	spi->bits = 8;
+	spi->txrx = linux_spi_txrx2;
+	if (spi_open(spi, devpath) < 0)
+	{
+		free(spi);
+		applogr(false, LOG_ERR, "%s: Failed to open %s", minion_drv.dname, devpath);
+	}
+	
+	spi->repr = minion_drv.dname;
+	spi->logprio = LOG_WARNING;
+	const unsigned total_core_count = minion_count_cores(spi);
+	
+	struct minion_bus * const mbus = malloc(sizeof(*mbus));
+	*mbus = (struct minion_bus){
+		.spi = spi,
+	};
+	
+	struct cgpu_info * const cgpu = malloc(sizeof(*cgpu));
+	*cgpu = (struct cgpu_info){
+		.drv = &minion_drv,
+		.device_path = strdup(devpath),
+		.device_data = mbus,
+		.set_device_funcs = minion_set_device_funcs,
+		.deven = DEV_ENABLED,
+		.procs = total_core_count,
+		.threads = 1,
+	};
+	return add_cgpu(cgpu);
+}
+
+static
+int minion_detect_auto(void)
+{
+	return minion_detect_one("/dev/spidev0.0") ? 1 : 0;
+}
+
+static
+void minion_detect(void)
+{
+	generic_detect(&minion_drv, minion_detect_one, minion_detect_auto, GDF_REQUIRE_DNAME | GDF_DEFAULT_NOAUTO);
+}
+
+struct device_drv minion_drv = {
+	.dname = "minion",
+	.name = "MNN",
+	.drv_detect = minion_detect,
+	
+	.thread_init = minion_init,
+	.minerloop = minerloop_queue,
+	
+	.thread_disable = minion_core_disable,
+	.thread_enable = minion_core_enable,
+	
+	.queue_append = minion_queue_append,
+	.queue_flush = minion_queue_flush,
+	.poll = minion_poll,
+	
+	.get_api_extra_device_status = minion_get_api_extra_device_status,
+	
+#ifdef HAVE_CURSES
+	.proc_wlogprint_status = minion_wlogprint_status,
+	.proc_tui_wlogprint_choices = minion_tui_wlogprint_choices,
+	.proc_tui_handle_choice = minion_tui_handle_choice,
+#endif
+};

+ 2 - 2
driver-modminer.c

@@ -132,8 +132,7 @@ modminer_detect_one(const char *devpath)
 	buf[len] = '\0';
 	if (strncasecmp(buf, "ModMiner", 8))
 		bailout(LOG_DEBUG, "%s: %s: response did not begin with 'ModMiner'", __func__, devpath);
-	char*devname = strdup(buf);
-	applog(LOG_DEBUG, "ModMiner identified as: %s", devname);
+	applog(LOG_DEBUG, "ModMiner identified as: %s", buf);
 
 	if (serial_claim_v(devpath, &modminer_drv))
 	{
@@ -141,6 +140,7 @@ modminer_detect_one(const char *devpath)
 		return false;
 	}
 	
+	char*devname = strdup(buf);
 	if (1 != write(fd, MODMINER_FPGA_COUNT, 1))
 		bailout(LOG_DEBUG, "ModMiner detect: write failed on %s (get FPGA count)", devpath);
 	len = read(fd, buf, 1);

+ 2 - 0
driver-nanofury.c

@@ -517,6 +517,8 @@ static
 void nanofury_shutdown(struct thr_info * const thr)
 {
 	struct nanofury_state * const state = thr->cgpu_data;
+	if (!state)
+		return;
 	struct mcp2210_device * const mcp = state->mcp;
 	
 	if (mcp)

+ 257 - 168
driver-opencl.c

@@ -50,7 +50,6 @@
 /* TODO: cleanup externals ********************/
 
 
-#ifdef HAVE_OPENCL
 /* Platform API */
 CL_API_ENTRY cl_int CL_API_CALL
 (*clGetPlatformIDs)(cl_uint          /* num_entries */,
@@ -257,10 +256,7 @@ load_opencl_symbols() {
 	
 	return true;
 }
-#endif
-
 
-typedef cl_int (*queue_kernel_parameters_func_t)(_clState *, struct work *, cl_uint);
 
 struct opencl_kernel_interface {
 	const char *kiname;
@@ -308,6 +304,11 @@ void opencl_early_init()
 		struct opencl_device_data * const data = &dataarray[i];
 		*data = (struct opencl_device_data){
 			.dynamic = true,
+			.use_goffset = BTS_UNKNOWN,
+			.intensity = intensity_not_set,
+#ifdef USE_SCRYPT
+			.lookup_gap = 2,
+#endif
 		};
 		gpus[i] = (struct cgpu_info){
 			.device_data = data,
@@ -369,30 +370,20 @@ const char *set_ ## PNAME(char *arg)  \
 #define _SET_INT_LIST(PNAME, VCHECK, FIELD)  \
 	_SET_INT_LIST2(PNAME, VCHECK, ((struct opencl_device_data *)cgpu->device_data)->FIELD)
 
-#ifdef HAVE_OPENCL
 _SET_INT_LIST(vector  , (v == 1 || v == 2 || v == 4), vwidth   )
 _SET_INT_LIST(worksize, (v >= 1 && v <= 9999)       , work_size)
 
 #ifdef USE_SCRYPT
 _SET_INT_LIST(shaders           , true, shaders)
-_SET_INT_LIST(lookup_gap        , true, opt_lg )
-_SET_INT_LIST(thread_concurrency, true, opt_tc )
+_SET_INT_LIST(lookup_gap        , true, lookup_gap)
+_SET_INT_LIST(thread_concurrency, true, thread_concurrency)
 #endif
 
 enum cl_kernels select_kernel(const char * const arg)
 {
-	if (!strcmp(arg, "diablo"))
-		return KL_DIABLO;
-	if (!strcmp(arg, "diakgcn"))
-		return KL_DIAKGCN;
-	if (!strcmp(arg, "poclbm"))
-		return KL_POCLBM;
-	if (!strcmp(arg, "phatk"))
-		return KL_PHATK;
-#ifdef USE_SCRYPT
-	if (!strcmp(arg, "scrypt"))
-		return KL_SCRYPT;
-#endif
+	for (unsigned i = 1; i < (unsigned)OPENCL_KERNEL_INTERFACE_COUNT; ++i)
+		if (!strcasecmp(arg, kernel_interfaces[i].kiname))
+			return i;
 	return KL_NONE;
 }
 
@@ -405,20 +396,25 @@ const char *opencl_get_kernel_interface_name(const enum cl_kernels kern)
 static
 bool _set_kernel(struct cgpu_info * const cgpu, const char *_val)
 {
-	FILE *F;
 	struct opencl_device_data * const data = cgpu->device_data;
 	
 	size_t knamelen = strlen(_val);
 	char filename[knamelen + 3 + 1];
 	sprintf(filename, "%s.cl", _val);
 	
-	F = opencl_open_kernel(filename);
-	if (!F)
+	int dummy_srclen;
+	enum cl_kernels interface;
+	struct mining_algorithm *malgo;
+	char *src = opencl_kernel_source(filename, &dummy_srclen, &interface, &malgo);
+	if (!src)
+		return false;
+	free(src);
+	if (!malgo)
 		return false;
-	fclose(F);
 	
-	free(data->kernel_file);
-	data->kernel_file = strdup(_val);
+	struct opencl_kernel_info * const kernelinfo = &data->kernelinfo[malgo->algo];
+	free(kernelinfo->file);
+	kernelinfo->file = strdup(_val);
 	
 	return true;
 }
@@ -427,7 +423,6 @@ const char *set_kernel(char *arg)
 {
 	return _set_list(arg, "Invalid value passed to set_kernel", _set_kernel);
 }
-#endif
 
 static
 const char *opencl_init_binary(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const out_success)
@@ -457,6 +452,19 @@ const char *opencl_init_binary(struct cgpu_info * const proc, const char * const
 	return NULL;
 }
 
+static
+const char *opencl_init_goffset(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const out_success)
+{
+	struct opencl_device_data * const data = proc->device_data;
+	char *end;
+	bool nv = bfg_strtobool(newvalue, &end, 0);
+	if (newvalue[0] && !end[0])
+		data->use_goffset = nv;
+	else
+		return "Invalid boolean value";
+	return NULL;
+}
+
 #ifdef HAVE_ADL
 /* This function allows us to map an adl device to an opencl device for when
  * simple enumeration has failed to match them. */
@@ -654,23 +662,6 @@ const char *opencl_set_gpu_vddc(struct cgpu_info * const proc, const char * cons
 _SET_INT_LIST(temp_overheat, (v >=     0 && v <   200), adl.overtemp )
 #endif
 
-#ifdef HAVE_OPENCL
-// SHA256d "intensity" has an artificial offset of -15
-double oclthreads_to_intensity(const unsigned long oclthreads, const bool is_sha256d)
-{
-	double intensity = log2(oclthreads);
-	if (is_sha256d)
-		intensity -= 15.;
-	return intensity;
-}
-
-unsigned long intensity_to_oclthreads(double intensity, const bool is_sha256d)
-{
-	if (is_sha256d)
-		intensity += 15;
-	return pow(2, intensity);
-}
-
 double oclthreads_to_xintensity(const unsigned long oclthreads, const cl_uint max_compute_units)
 {
 	return (double)oclthreads / (double)max_compute_units / 64.;
@@ -681,10 +672,33 @@ unsigned long xintensity_to_oclthreads(const double xintensity, const cl_uint ma
 	return xintensity * max_compute_units * 0x40;
 }
 
+static int min_intensity, max_intensity;
+
+// NOTE: This can't be attribute-constructor because then it would race with the mining_algorithms list being populated
+static
+void opencl_calc_intensity_range()
+{
+	RUNONCE();
+	
+	min_intensity = INT_MAX;
+	max_intensity = INT_MIN;
+	struct mining_algorithm *malgo;
+	LL_FOREACH(mining_algorithms, malgo)
+	{
+		const int malgo_min_intensity = malgo->opencl_oclthreads_to_intensity(malgo->opencl_min_oclthreads);
+		const int malgo_max_intensity = malgo->opencl_oclthreads_to_intensity(malgo->opencl_max_oclthreads);
+		if (malgo_min_intensity < min_intensity)
+			min_intensity = malgo_min_intensity;
+		if (malgo_max_intensity > max_intensity)
+			max_intensity = malgo_max_intensity;
+	}
+}
+
 bool opencl_set_intensity_from_str(struct cgpu_info * const cgpu, const char *_val)
 {
 	struct opencl_device_data * const data = cgpu->device_data;
 	unsigned long oclthreads = 0;
+	float intensity = intensity_not_set;
 	bool dynamic = false;
 	
 	if (!strncasecmp(_val, "d", 1))
@@ -710,19 +724,33 @@ bool opencl_set_intensity_from_str(struct cgpu_info * const cgpu, const char *_v
 		}
 	}
 	else
-	if (isdigit(_val[0]))
 	{
-		const double v = atof(_val);
-		if (v < MIN_INTENSITY || v > MAX_GPU_INTENSITY)
-			return false;
-		oclthreads = intensity_to_oclthreads(v, !opt_scrypt);
+		char *endptr;
+		const double v = strtod(_val, &endptr);
+		if (endptr == _val)
+		{
+			if (!dynamic)
+				return false;
+		}
+		else
+		{
+			opencl_calc_intensity_range();
+			if (v < min_intensity || v > max_intensity)
+				return false;
+			oclthreads = 1;
+			intensity = v;
+		}
 	}
 	
 	// Make actual assignments after we know the values are valid
 	data->dynamic = dynamic;
 	if (data->oclthreads)
 	{
-		data->oclthreads = oclthreads;
+		if (oclthreads)
+		{
+			data->oclthreads = oclthreads;
+			data->intensity = intensity;
+		}
 		pause_dynamic_threads(cgpu->device_id);
 	}
 	else
@@ -742,7 +770,6 @@ const char *set_intensity(char *arg)
 }
 
 _SET_INT_LIST2(gpu_threads, (v >= 1 && v <= 10), cgpu->threads)
-#endif
 
 void write_config_opencl(FILE * const fcfg)
 {
@@ -753,7 +780,6 @@ void write_config_opencl(FILE * const fcfg)
 }
 
 
-#ifdef HAVE_OPENCL
 BFG_REGISTER_DRIVER(opencl_api)
 static const struct bfg_set_device_definition opencl_set_device_funcs_probe[];
 static const struct bfg_set_device_definition opencl_set_device_funcs[];
@@ -766,16 +792,12 @@ char *print_ndevs_and_exit(int *ndevs)
 	applog(LOG_INFO, "%i GPU devices max detected", *ndevs);
 	exit(*ndevs);
 }
-#endif
 
 
 struct cgpu_info gpus[MAX_GPUDEVICES]; /* Maximum number apparently possible */
 struct cgpu_info *cpus;
 
 
-
-#ifdef HAVE_OPENCL
-
 /* In dynamic mode, only the first thread of each device will be in use.
  * This potentially could start a thread that was stopped with the start-stop
  * options if one were to disable dynamic from the menu on a paused GPU */
@@ -802,16 +824,29 @@ void pause_dynamic_threads(int gpu)
 
 struct device_drv opencl_api;
 
-#endif /* HAVE_OPENCL */
+float opencl_proc_get_intensity(struct cgpu_info * const proc, const char ** const iunit)
+{
+	struct opencl_device_data * const data = proc->device_data;
+	struct thr_info *thr = proc->thr[0];
+	const int thr_id = thr->id;
+	_clState * const clState = clStates[thr_id];
+	float intensity = data->intensity;
+	if (intensity == intensity_not_set)
+	{
+		intensity = oclthreads_to_xintensity(data->oclthreads, clState->max_compute_units);
+		*iunit = data->dynamic ? "dx" : "x";
+	}
+	else
+		*iunit = data->dynamic ? "d" : "";
+	return intensity;
+}
 
-#if defined(HAVE_OPENCL) && defined(HAVE_CURSES)
+#ifdef HAVE_CURSES
 static
 void opencl_wlogprint_status(struct cgpu_info *cgpu)
 {
 	struct opencl_device_data * const data = cgpu->device_data;
 	struct thr_info *thr = cgpu->thr[0];
-	const int thr_id = thr->id;
-	_clState * const clState = clStates[thr_id];
 	int i;
 	char checkin[40];
 	double displayed_rolling;
@@ -820,16 +855,9 @@ void opencl_wlogprint_status(struct cgpu_info *cgpu)
 	strcpy(logline, ""); // In case it has no data
 	
 	{
-		double intensity = oclthreads_to_intensity(data->oclthreads, !opt_scrypt);
-		double xintensity = oclthreads_to_xintensity(data->oclthreads, clState->max_compute_units);
-		const char *iunit = "";
-		if (xintensity - (int)xintensity < intensity - (int)intensity)
-		{
-			intensity = xintensity;
-			iunit = "x";
-		}
-		tailsprintf(logline, sizeof(logline), "I:%s%s%g ",
-		            (data->dynamic ? "d" : ""),
+		const char *iunit;
+		float intensity = opencl_proc_get_intensity(cgpu, &iunit);
+		tailsprintf(logline, sizeof(logline), "I:%s%g ",
 		            iunit,
 		            intensity);
 	}
@@ -917,33 +945,23 @@ void opencl_tui_wlogprint_choices(struct cgpu_info *cgpu)
 static
 const char *opencl_tui_handle_choice(struct cgpu_info *cgpu, int input)
 {
-	struct opencl_device_data * const data = cgpu->device_data;
-	
 	switch (input)
 	{
 		case 'i': case 'I':
 		{
+			char promptbuf[0x40];
 			char *intvar;
 
-			if (opt_scrypt) {
-				intvar = curses_input("Set GPU scan intensity (d or "
-						      MIN_SCRYPT_INTENSITY_STR " -> "
-						      MAX_SCRYPT_INTENSITY_STR ")");
-			} else {
-				intvar = curses_input("Set GPU scan intensity (d or "
-						      MIN_SHA_INTENSITY_STR " -> "
-						      MAX_SHA_INTENSITY_STR ")");
-			}
+			opencl_calc_intensity_range();
+			snprintf(promptbuf, sizeof(promptbuf), "Set GPU scan intensity (d or %d -> %d)", min_intensity, max_intensity);
+			intvar = curses_input(promptbuf);
 			if (!intvar)
 				return "Invalid intensity\n";
-			if (!strncasecmp(intvar, "d", 1)) {
-				data->dynamic = true;
-				pause_dynamic_threads(cgpu->device_id);
-				free(intvar);
-				return "Dynamic mode enabled\n";
-			}
 			if (!_set_intensity(cgpu, intvar))
+			{
+				free(intvar);
 				return "Invalid intensity (out of range)\n";
+			}
 			free(intvar);
 			return "Intensity changed\n";
 		}
@@ -971,20 +989,16 @@ const char *opencl_tui_handle_choice(struct cgpu_info *cgpu, int input)
 #endif
 
 
-#ifdef HAVE_OPENCL
-
 #define CL_SET_BLKARG(blkvar) status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->blkvar)
 #define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
 #define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
 
+#ifdef USE_SHA256D
 static
 void *_opencl_work_data_dup(struct work * const work)
 {
 	struct opencl_work_data *p = malloc(sizeof(*p));
 	memcpy(p, work->device_data, sizeof(*p));
-#ifdef USE_SCRYPT
-	p->work = work;
-#endif
 	return p;
 }
 
@@ -1007,10 +1021,10 @@ struct opencl_work_data *_opencl_work_data(struct work * const work)
 }
 
 static
-cl_int queue_poclbm_kernel(_clState * const clState, struct work * const work, const cl_uint threads)
+cl_int queue_poclbm_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, const cl_uint threads)
 {
 	struct opencl_work_data * const blk = _opencl_work_data(work);
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
 
@@ -1031,7 +1045,8 @@ cl_int queue_poclbm_kernel(_clState * const clState, struct work * const work, c
 	CL_SET_BLKARG(cty_g);
 	CL_SET_BLKARG(cty_h);
 
-	if (!clState->goffset) {
+	if (!kinfo->goffset)
+	{
 		cl_uint vwidth = clState->vwidth;
 		uint *nonces = alloca(sizeof(uint) * vwidth);
 		unsigned int i;
@@ -1062,10 +1077,10 @@ cl_int queue_poclbm_kernel(_clState * const clState, struct work * const work, c
 }
 
 static
-cl_int queue_phatk_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
+cl_int queue_phatk_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
 	struct opencl_work_data * const blk = _opencl_work_data(work);
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	cl_uint vwidth = clState->vwidth;
 	unsigned int i, num = 0;
 	cl_int status = 0;
@@ -1107,14 +1122,14 @@ cl_int queue_phatk_kernel(_clState * const clState, struct work * const work, __
 }
 
 static
-cl_int queue_diakgcn_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
+cl_int queue_diakgcn_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
 	struct opencl_work_data * const blk = _opencl_work_data(work);
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
 
-	if (!clState->goffset) {
+	if (!kinfo->goffset) {
 		cl_uint vwidth = clState->vwidth;
 		uint *nonces = alloca(sizeof(uint) * vwidth);
 		unsigned int i;
@@ -1169,14 +1184,14 @@ cl_int queue_diakgcn_kernel(_clState * const clState, struct work * const work,
 }
 
 static
-cl_int queue_diablo_kernel(_clState * const clState, struct work * const work, const cl_uint threads)
+cl_int queue_diablo_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, const cl_uint threads)
 {
 	struct opencl_work_data * const blk = _opencl_work_data(work);
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	unsigned int num = 0;
 	cl_int status = 0;
 
-	if (!clState->goffset) {
+	if (!kinfo->goffset) {
 		cl_uint vwidth = clState->vwidth;
 		uint *nonces = alloca(sizeof(uint) * vwidth);
 		unsigned int i;
@@ -1222,16 +1237,23 @@ cl_int queue_diablo_kernel(_clState * const clState, struct work * const work, c
 
 	return status;
 }
+#endif
 
 #ifdef USE_SCRYPT
 static
-cl_int queue_scrypt_kernel(_clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
+cl_int queue_scrypt_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
 {
 	unsigned char *midstate = work->midstate;
-	cl_kernel *kernel = &clState->kernel;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	unsigned int num = 0;
 	cl_uint le_target;
 	cl_int status = 0;
+	
+	if (!kinfo->goffset)
+	{
+		cl_uint nonce_base = work->blk.nonce;
+		CL_SET_ARG(nonce_base);
+	}
 
 	le_target = *(cl_uint *)(work->target + 28);
 	clState->cldata = work->data;
@@ -1247,23 +1269,54 @@ cl_int queue_scrypt_kernel(_clState * const clState, struct work * const work, _
 	return status;
 }
 #endif
-#endif /* HAVE_OPENCL */
+
+#ifdef USE_OPENCL_FULLHEADER
+static
+cl_int queue_fullheader_kernel(const struct opencl_kernel_info * const kinfo, _clState * const clState, struct work * const work, __maybe_unused const cl_uint threads)
+{
+	const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+	const cl_kernel * const kernel = &kinfo->kernel;
+	unsigned int num = 0;
+	cl_int status = 0;
+	uint8_t blkheader[80];
+	
+	work->nonce_diff = malgo->opencl_min_nonce_diff;
+	
+	if (!kinfo->goffset)
+	{
+		cl_uint nonce_base = work->blk.nonce;
+		CL_SET_ARG(nonce_base);
+	}
+	
+	swap32yes(blkheader, work->data, 80/4);
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, CL_TRUE, 0, sizeof(blkheader), blkheader, 0, NULL, NULL);
+	
+	CL_SET_ARG(clState->CLbuffer0);
+	CL_SET_ARG(clState->outputBuffer);
+	
+	return status;
+}
+#endif
 
 
 static
 struct opencl_kernel_interface kernel_interfaces[] = {
 	{NULL},
+#ifdef USE_SHA256D
 	{"poclbm",  queue_poclbm_kernel },
 	{"phatk",   queue_phatk_kernel  },
 	{"diakgcn", queue_diakgcn_kernel},
 	{"diablo",  queue_diablo_kernel },
+#endif
+#ifdef USE_OPENCL_FULLHEADER
+	{"fullheader", queue_fullheader_kernel },
+#endif
 #ifdef USE_SCRYPT
 	{"scrypt",  queue_scrypt_kernel },
 #endif
 };
 
 
-#ifdef HAVE_OPENCL
 /* We have only one thread that ever re-initialises GPUs, thus if any GPU
  * init command fails due to a completely wedged GPU, the thread will never
  * return, unable to harm other GPUs. If it does return, it means we only had
@@ -1327,7 +1380,7 @@ select_cgpu:
 		//free(clState);
 
 		applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
-		clStates[thr_id] = initCl(virtual_gpu, name, sizeof(name));
+		clStates[thr_id] = opencl_create_clState(virtual_gpu, name, sizeof(name));
 		if (!clStates[thr_id]) {
 			applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
 			goto select_cgpu;
@@ -1349,15 +1402,7 @@ select_cgpu:
 out:
 	return NULL;
 }
-#else
-void *reinit_gpu(__maybe_unused void *userdata)
-{
-	return NULL;
-}
-#endif
-
 
-#ifdef HAVE_OPENCL
 struct device_drv opencl_api;
 
 static int opencl_autodetect()
@@ -1388,13 +1433,9 @@ static int opencl_autodetect()
 	if (!nDevs)
 		return 0;
 
-	/* If opt_g_threads is not set, use default 1 thread on scrypt and
-	 * 2 for regular mining */
 	if (opt_g_threads == -1) {
-		if (opt_scrypt)
-			opt_g_threads = 1;
-		else
-			opt_g_threads = 2;
+		// NOTE: This should ideally default to 2 for non-scrypt
+		opt_g_threads = 1;
 	}
 
 #ifdef HAVE_SENSORS
@@ -1440,9 +1481,13 @@ static int opencl_autodetect()
 
 static void opencl_detect()
 {
-	int flags = 0;
-	if (!opt_scrypt)
-		flags |= GDF_DEFAULT_NOAUTO;
+	int flags = GDF_DEFAULT_NOAUTO;
+	struct mining_goal_info *goal, *tmpgoal;
+	HASH_ITER(hh, mining_goals, goal, tmpgoal)
+	{
+		if (!goal->malgo->opencl_nodefault)
+			flags &= ~GDF_DEFAULT_NOAUTO;
+	}
 	generic_detect(&opencl_api, NULL, opencl_autodetect, flags);
 }
 
@@ -1541,8 +1586,18 @@ get_opencl_api_extra_device_status(struct cgpu_info *gpu)
 	root = api_add_int(root, "Powertune", &pt, true);
 
 	char intensity[20];
-	uint32_t oclthreads = data->oclthreads;
-	double intensityf = oclthreads_to_intensity(oclthreads, !opt_scrypt);
+	uint32_t oclthreads;
+	double intensityf = data->intensity;
+	// FIXME: Some way to express intensities malgo-neutral?
+	struct mining_goal_info * const goal = get_mining_goal("default");
+	struct mining_algorithm * const malgo = goal->malgo;
+	if (data->intensity == intensity_not_set)
+	{
+		oclthreads = data->oclthreads;
+		intensityf = malgo->opencl_oclthreads_to_intensity(oclthreads);
+	}
+	else
+		oclthreads = malgo->opencl_intensity_to_oclthreads(intensityf);
 	double xintensity = oclthreads_to_xintensity(oclthreads, clState->max_compute_units);
 	if (data->dynamic)
 		strcpy(intensity, "D");
@@ -1557,7 +1612,6 @@ get_opencl_api_extra_device_status(struct cgpu_info *gpu)
 }
 
 struct opencl_thread_data {
-	cl_int (*queue_kernel_parameters)(_clState *, struct work *, cl_uint);
 	uint32_t *res;
 };
 
@@ -1572,7 +1626,7 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 	int virtual_gpu = data->virtual_gpu;
 	int i = thr->id;
 	static bool failmessage = false;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+	int buffersize = OPENCL_MAX_BUFFERSIZE;
 
 	if (!blank_res)
 		blank_res = calloc(buffersize, 1);
@@ -1583,7 +1637,7 @@ static bool opencl_thread_prepare(struct thr_info *thr)
 
 	strcpy(name, "");
 	applog(LOG_INFO, "Init GPU thread %i GPU %i virtual GPU %i", i, gpu, virtual_gpu);
-	clStates[i] = initCl(virtual_gpu, name, sizeof(name));
+	clStates[i] = opencl_create_clState(virtual_gpu, name, sizeof(name));
 	if (!clStates[i]) {
 #ifdef HAVE_CURSES
 		if (use_curses)
@@ -1629,35 +1683,13 @@ static bool opencl_thread_init(struct thr_info *thr)
 	cl_int status = 0;
 	thrdata = calloc(1, sizeof(*thrdata));
 	thr->cgpu_data = thrdata;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+	int buffersize = OPENCL_MAX_BUFFERSIZE;
 
 	if (!thrdata) {
 		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
 		return false;
 	}
 
-	switch (clState->chosen_kernel) {
-		case KL_POCLBM:
-			thrdata->queue_kernel_parameters = &queue_poclbm_kernel;
-			break;
-		case KL_PHATK:
-			thrdata->queue_kernel_parameters = &queue_phatk_kernel;
-			break;
-		case KL_DIAKGCN:
-			thrdata->queue_kernel_parameters = &queue_diakgcn_kernel;
-			break;
-#ifdef USE_SCRYPT
-		case KL_SCRYPT:
-			thrdata->queue_kernel_parameters = &queue_scrypt_kernel;
-			gpu->min_nonce_diff = 1./0x10000;
-			break;
-#endif
-		default:
-		case KL_DIABLO:
-			thrdata->queue_kernel_parameters = &queue_diablo_kernel;
-			break;
-	}
-
 	thrdata->res = calloc(buffersize, 1);
 
 	if (!thrdata->res) {
@@ -1680,21 +1712,48 @@ static bool opencl_thread_init(struct thr_info *thr)
 	return true;
 }
 
+static
+float opencl_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return malgo->opencl_min_nonce_diff ?: -1.;
+}
 
+#ifdef USE_SHA256D
 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
-#ifdef USE_SCRYPT
-	if (!opt_scrypt)
-#endif
+	const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+	if (malgo->algo == POW_SHA256D)
 	{
 		struct opencl_work_data * const blk = _opencl_work_data(work);
 		precalc_hash(blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
 	}
 	return true;
 }
+#endif
 
 extern int opt_dynamic_interval;
 
+const struct opencl_kernel_info *opencl_scanhash_get_kernel(struct cgpu_info * const cgpu, _clState * const clState, const struct mining_algorithm * const malgo)
+{
+	struct opencl_device_data * const data = cgpu->device_data;
+	struct opencl_kernel_info *kernelinfo = NULL;
+	kernelinfo = &data->kernelinfo[malgo->algo];
+	if (!kernelinfo->file)
+	{
+		kernelinfo->file = malgo->opencl_get_default_kernel_file(malgo, cgpu, clState);
+		if (!kernelinfo->file)
+			applogr(NULL, LOG_ERR, "%s: Unsupported mining algorithm", cgpu->dev_repr);
+	}
+	if (!kernelinfo->loaded)
+	{
+		if (!opencl_load_kernel(cgpu, clState, cgpu->name, kernelinfo, kernelinfo->file, malgo))
+			applogr(NULL, LOG_ERR, "%s: Failed to load kernel", cgpu->dev_repr);
+		
+		kernelinfo->queue_kernel_parameters = kernel_interfaces[kernelinfo->interface].queue_kernel_parameters_func;
+	}
+	return kernelinfo;
+}
+
 static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 				int64_t __maybe_unused max_nonce)
 {
@@ -1703,15 +1762,28 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	struct cgpu_info *gpu = thr->cgpu;
 	struct opencl_device_data * const data = gpu->device_data;
 	_clState *clState = clStates[thr_id];
-	const cl_kernel *kernel = &clState->kernel;
+	const struct mining_algorithm * const malgo = work_mining_algorithm(work);
+	const struct opencl_kernel_info *kinfo = opencl_scanhash_get_kernel(gpu, clState, malgo);
+	if (!kinfo)
+		return -1;
+	const cl_kernel * const kernel = &kinfo->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
 
 	cl_int status;
 	size_t globalThreads[1];
-	size_t localThreads[1] = { clState->wsize };
+	size_t localThreads[1] = { kinfo->wsize };
 	int64_t hashes;
-	int found = opt_scrypt ? SCRYPT_FOUND : FOUND;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+	int found = FOUND;
+	int buffersize = BUFFERSIZE;
+#ifdef USE_SCRYPT
+	if (malgo->algo == POW_SCRYPT)
+	{
+		found = SCRYPT_FOUND;
+		buffersize = SCRYPT_BUFFERSIZE;
+	}
+#endif
+	if (data->intensity != intensity_not_set)
+		data->oclthreads = malgo->opencl_intensity_to_oclthreads(data->intensity);
 
 	/* Windows' timer resolution is only 15ms so oversample 5x */
 	if (data->dynamic && (++data->intervals * dynamic_us) > 70000) {
@@ -1721,16 +1793,18 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		cgtime(&tv_gpuend);
 		gpu_us = us_tdiff(&tv_gpuend, &data->tv_gpustart) / data->intervals;
 		if (gpu_us > dynamic_us) {
-			const unsigned long min_oclthreads = intensity_to_oclthreads(MIN_INTENSITY, !opt_scrypt);
+			const unsigned long min_oclthreads = malgo->opencl_min_oclthreads;
 			data->oclthreads /= 2;
 			if (data->oclthreads < min_oclthreads)
 				data->oclthreads = min_oclthreads;
 		} else if (gpu_us < dynamic_us / 2) {
-			const unsigned long max_oclthreads = intensity_to_oclthreads(MAX_INTENSITY, !opt_scrypt);
+			const unsigned long max_oclthreads = malgo->opencl_max_oclthreads;
 			data->oclthreads *= 2;
 			if (data->oclthreads > max_oclthreads)
 				data->oclthreads = max_oclthreads;
 		}
+		if (data->intensity != intensity_not_set)
+			data->intensity = malgo->opencl_oclthreads_to_intensity(data->oclthreads);
 		memcpy(&(data->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
 		data->intervals = 0;
 	}
@@ -1744,13 +1818,14 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
 
-	status = thrdata->queue_kernel_parameters(clState, work, globalThreads[0]);
+	status = kinfo->queue_kernel_parameters(kinfo, clState, work, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
 		return -1;
 	}
 
-	if (clState->goffset) {
+	if (kinfo->goffset)
+	{
 		size_t global_work_offset[1];
 
 		global_work_offset[0] = work->blk.nonce;
@@ -1789,7 +1864,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 			return -1;
 		}
 		applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
-		postcalc_hash_async(thr, work, thrdata->res);
+		postcalc_hash_async(thr, work, thrdata->res, kinfo->interface);
 		memset(thrdata->res, 0, buffersize);
 		/* This finish flushes the writebuffer set with CL_FALSE in clEnqueueWriteBuffer */
 		clFinish(clState->commandQueue);
@@ -1798,13 +1873,24 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	return hashes;
 }
 
+static
+void opencl_clean_kernel_info(struct opencl_kernel_info * const kinfo)
+{
+	clReleaseKernel(kinfo->kernel);
+	clReleaseProgram(kinfo->program);
+}
+
 static void opencl_thread_shutdown(struct thr_info *thr)
 {
+	struct cgpu_info * const cgpu = thr->cgpu;
+	struct opencl_device_data * const data = cgpu->device_data;
 	const int thr_id = thr->id;
 	_clState *clState = clStates[thr_id];
 
-	clReleaseKernel(clState->kernel);
-	clReleaseProgram(clState->program);
+	for (unsigned i = 0; i < (unsigned)POW_ALGORITHM_COUNT; ++i)
+	{
+		opencl_clean_kernel_info(&data->kernelinfo[i]);
+	}
 	clReleaseCommandQueue(clState->commandQueue);
 	clReleaseContext(clState->context);
 }
@@ -1822,6 +1908,7 @@ static const struct bfg_set_device_definition opencl_set_device_funcs_probe[] =
 	{"vector", opencl_init_vector},
 	{"work_size", opencl_init_worksize},
 	{"binary", opencl_init_binary},
+	{"goffset", opencl_init_goffset},
 #ifdef HAVE_ADL
 	{"adl_mapping", opencl_init_gpu_map},
 	{"clock", opencl_init_gpu_engine},
@@ -1847,6 +1934,7 @@ static const struct bfg_set_device_definition opencl_set_device_funcs[] = {
 	{"vector", opencl_cannot_set, ""},
 	{"work_size", opencl_cannot_set, ""},
 	{"binary", opencl_cannot_set, ""},
+	{"goffset", opencl_cannot_set, ""},
 #ifdef HAVE_ADL
 	{"adl_mapping", opencl_cannot_set, "Map to ADL device"},
 	{"clock", opencl_set_gpu_engine, "GPU engine clock"},
@@ -1869,7 +1957,7 @@ struct device_drv opencl_api = {
 	.dname = "opencl",
 	.name = "OCL",
 	.probe_priority = 110,
-	.supported_algos = POW_SHA256D | POW_SCRYPT,
+	.drv_min_nonce_diff = opencl_min_nonce_diff,
 	.drv_detect = opencl_detect,
 	.reinit_device = reinit_opencl_device,
 	.watchdog = opencl_watchdog,
@@ -1882,8 +1970,9 @@ struct device_drv opencl_api = {
 	.get_api_extra_device_status = get_opencl_api_extra_device_status,
 	.thread_prepare = opencl_thread_prepare,
 	.thread_init = opencl_thread_init,
+#ifdef USE_SHA256D
 	.prepare_work = opencl_prepare_work,
+#endif
 	.scanhash = opencl_scanhash,
 	.thread_shutdown = opencl_thread_shutdown,
 };
-#endif

+ 28 - 8
driver-opencl.h

@@ -1,6 +1,7 @@
 #ifndef BFG_DRIVER_OPENCL
 #define BFG_DRIVER_OPENCL
 
+#include <float.h>
 #include <stdbool.h>
 
 #include "CL/cl.h"
@@ -18,23 +19,44 @@ enum opencl_binary_usage {
 	OBU_NONE     = 4,
 };
 
+static const float intensity_not_set = FLT_MAX;
+
+struct opencl_kernel_info;
+struct _clState;
+
+typedef cl_int (*queue_kernel_parameters_func_t)(const struct opencl_kernel_info *, struct _clState *, struct work *, cl_uint);
+
+struct opencl_kernel_info {
+	char *file;
+	bool loaded;
+	cl_program program;
+	cl_kernel kernel;
+	bool goffset;
+	enum cl_kernels interface;
+	size_t wsize;
+	queue_kernel_parameters_func_t queue_kernel_parameters;
+};
+
 struct opencl_device_data {
 	bool mapped;
 	int virtual_gpu;
 	int virtual_adl;
 	unsigned long oclthreads;
+	float intensity;
 	char *_init_intensity;
 	bool dynamic;
 	
+	enum bfg_tristate use_goffset;
 	cl_uint vwidth;
 	size_t work_size;
-	char *kernel_file;
 	cl_ulong max_alloc;
 	
+	struct opencl_kernel_info kernelinfo[POW_ALGORITHM_COUNT];
+	
 	enum opencl_binary_usage opt_opencl_binaries;
 #ifdef USE_SCRYPT
-	int opt_lg, lookup_gap;
-	size_t opt_tc, thread_concurrency;
+	int lookup_gap;
+	size_t thread_concurrency;
 	size_t shaders;
 #endif
 	struct timeval tv_gpustart;
@@ -59,11 +81,11 @@ struct opencl_device_data {
 #endif
 };
 
-extern double oclthreads_to_intensity(unsigned long oclthreads, bool is_sha256d);
-extern unsigned long intensity_to_oclthreads(double intensity, bool is_sha256d);
+extern float opencl_proc_get_intensity(struct cgpu_info *, const char **iunit);
 extern unsigned long xintensity_to_oclthreads(double xintensity, cl_uint max_compute_units);
 extern bool opencl_set_intensity_from_str(struct cgpu_info *, const char *newvalue);
 
+#ifdef USE_SHA256D
 struct opencl_work_data {
 	cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;
 	cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
@@ -87,10 +109,8 @@ struct opencl_work_data {
 	cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
 	cl_uint zeroA, zeroB;
 	cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
-#ifdef USE_SCRYPT
-	struct work *work;
-#endif
 };
+#endif
 
 extern void opencl_early_init();
 extern char *print_ndevs_and_exit(int *ndevs);

+ 13 - 4
driver-proxy.c

@@ -70,6 +70,12 @@ void *prune_worklog_thread(void *userdata)
 	return NULL;
 }
 
+static
+float proxy_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return minimum_pdiff;
+}
+
 static
 void proxy_first_client(struct cgpu_info *cgpu)
 {
@@ -99,7 +105,6 @@ struct proxy_client *proxy_find_or_create_client(const char *username)
 			.threads = 0,
 			.device_data = client,
 			.device_path = user,
-			.min_nonce_diff = (opt_scrypt ? (1./0x10000) : 1.),
 		};
 		timer_set_now(&cgpu->cgminer_stats.start_tv);
 		if (unlikely(!create_new_cgpus(add_cgpu_live, cgpu)))
@@ -112,7 +117,7 @@ struct proxy_client *proxy_find_or_create_client(const char *username)
 		*client = (struct proxy_client){
 			.username = user,
 			.cgpu = cgpu,
-			.desired_share_pdiff = opt_scrypt ? (1./0x10000) : 1.,
+			.desired_share_pdiff = 0.,
 		};
 		
 		b = HASH_COUNT(proxy_clients);
@@ -133,14 +138,17 @@ struct proxy_client *proxy_find_or_create_client(const char *username)
 	return client;
 }
 
+// See also, stratumsrv_init_diff in driver-stratum.c
 static
 const char *proxy_set_diff(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const success)
 {
 	struct proxy_client * const client = proc->device_data;
-	const double nv = atof(newvalue);
-	if (nv <= 0)
+	double nv = atof(newvalue);
+	if (nv < 0)
 		return "Invalid difficulty";
 	
+	if (nv <= minimum_pdiff)
+		nv = minimum_pdiff;
 	client->desired_share_pdiff = nv;
 	
 #ifdef USE_LIBEVENT
@@ -167,6 +175,7 @@ static const struct bfg_set_device_definition proxy_set_device_funcs[] = {
 struct device_drv proxy_drv = {
 	.dname = "proxy",
 	.name = "PXY",
+	.drv_min_nonce_diff = proxy_min_nonce_diff,
 #ifdef HAVE_CURSES
 	.proc_wlogprint_status = proxy_wlogprint_status,
 #endif

+ 133 - 24
driver-stratum.c

@@ -15,6 +15,7 @@
 #include <winsock2.h>
 #endif
 
+#include <float.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <string.h>
@@ -34,8 +35,8 @@
 
 #define _ssm_client_octets     work2d_xnonce1sz
 #define _ssm_client_xnonce2sz  work2d_xnonce2sz
-static char *_ssm_notify;
-static int _ssm_notify_sz;
+static char *_ssm_notify, *_ssm_setgoal;
+static int _ssm_notify_sz, _ssm_setgoal_sz;
 static struct stratumsrv_job *_ssm_last_ssj;
 static struct event *ev_notify;
 static notifier_t _ssm_update_notifier;
@@ -65,12 +66,21 @@ struct stratumsrv_conn_userlist {
 	struct stratumsrv_conn_userlist *next;
 };
 
+enum stratumsrv_conn_capability {
+	SCC_NOTIFY    = 1 << 0,
+	SCC_SET_DIFF  = 1 << 1,
+	SCC_SET_GOAL  = 1 << 2,
+};
+typedef uint8_t stratumsrv_conn_capabilities_t;
+
 struct stratumsrv_conn {
 	struct bufferevent *bev;
+	stratumsrv_conn_capabilities_t capabilities;
 	uint32_t xnonce1_le;
 	struct timeval tv_hashes_done;
 	bool hashes_done_ext;
 	float current_share_pdiff;
+	bool desired_default_share_pdiff;  // Set if any authenticated user is configured for the default
 	float desired_share_pdiff;
 	struct stratumsrv_conn_userlist *authorised_users;
 	
@@ -93,6 +103,15 @@ void stratumsrv_send_set_difficulty(struct stratumsrv_conn * const conn, const f
 
 #define _ssm_gen_dummy_work work2d_gen_dummy_work
 
+static
+float stratumsrv_choose_share_pdiff(const struct stratumsrv_conn * const conn, const struct mining_algorithm * const malgo)
+{
+	float conn_pdiff = conn->desired_share_pdiff;
+	if (conn->desired_default_share_pdiff && malgo->reasonable_low_nonce_diff < conn_pdiff)
+		conn_pdiff = malgo->reasonable_low_nonce_diff;
+	return conn_pdiff;
+}
+
 static
 bool stratumsrv_update_notify_str(struct pool * const pool, bool clean)
 {
@@ -145,6 +164,10 @@ bool stratumsrv_update_notify_str(struct pool * const pool, bool clean)
 	bin2hex(ntime, &ntime_n, 4);
 	p += sprintf(p, "],\"%s\",\"%s\",\"%s\",%s],\"method\":\"mining.notify\",\"id\":null}\n", version, nbits, ntime, clean ? "true" : "false");
 	
+	const size_t setgoalbufsz = 49 + strlen(pool->goal->name) + (pool->goalname ? (1 + strlen(pool->goalname)) : 0) + 12 + strlen(pool->goal->malgo->name) + 5 + 1;
+	char * const setgoalbuf = malloc(setgoalbufsz);
+	snprintf(setgoalbuf, setgoalbufsz, "{\"method\":\"mining.set_goal\",\"id\":null,\"params\":[\"%s%s%s\",{\"malgo\":\"%s\"}]}\n", pool->goal->name, pool->goalname ? "/" : "", pool->goalname ?: "", pool->goal->malgo->name);
+	
 	ssj = malloc(sizeof(*ssj));
 	*ssj = (struct stratumsrv_job){
 		.my_job_id = strdup(my_job_id),
@@ -165,20 +188,37 @@ bool stratumsrv_update_notify_str(struct pool * const pool, bool clean)
 	assert(_ssm_notify_sz <= bufsz);
 	free(_ssm_notify);
 	_ssm_notify = buf;
+	const bool setgoal_changed = _ssm_setgoal ? strcmp(setgoalbuf, _ssm_setgoal) : true;
+	if (setgoal_changed)
+	{
+		free(_ssm_setgoal);
+		_ssm_setgoal = setgoalbuf;
+		_ssm_setgoal_sz = setgoalbufsz - 1;
+	}
+	else
+		free(setgoalbuf);
 	_ssm_last_ssj = ssj;
 	
 	float pdiff = target_diff(ssj->swork.target);
+	const struct mining_goal_info * const goal = pool->goal;
+	const struct mining_algorithm * const malgo = goal->malgo;
 	LL_FOREACH(_ssm_connections, conn)
 	{
 		if (unlikely(!conn->xnonce1_le))
 			continue;
-		float conn_pdiff = conn->desired_share_pdiff;
-		if (pdiff < conn_pdiff)
-			conn_pdiff = pdiff;
-		ssj->job_pdiff[conn->xnonce1_le] = conn_pdiff;
-		if (conn_pdiff != conn->current_share_pdiff)
-			stratumsrv_send_set_difficulty(conn, conn_pdiff);
-		bufferevent_write(conn->bev, _ssm_notify, _ssm_notify_sz);
+		if (setgoal_changed && (conn->capabilities & SCC_SET_GOAL))
+			bufferevent_write(conn->bev, setgoalbuf, setgoalbufsz);
+		if (likely(conn->capabilities & SCC_SET_DIFF))
+		{
+			float conn_pdiff = stratumsrv_choose_share_pdiff(conn, malgo);
+			if (pdiff < conn_pdiff)
+				conn_pdiff = pdiff;
+			ssj->job_pdiff[conn->xnonce1_le] = conn_pdiff;
+			if (conn_pdiff != conn->current_share_pdiff)
+				stratumsrv_send_set_difficulty(conn, conn_pdiff);
+		}
+		if (likely(conn->capabilities & SCC_NOTIFY))
+			bufferevent_write(conn->bev, _ssm_notify, _ssm_notify_sz);
 	}
 	
 	return true;
@@ -195,15 +235,21 @@ void stratumsrv_client_changed_diff(struct proxy_client * const client)
 		++connections_affected;
 		
 		float desired_share_pdiff = client->desired_share_pdiff;
+		bool any_default_share_pdiff = !desired_share_pdiff;
 		LL_FOREACH(conn->authorised_users, ule2)
 		{
 			struct proxy_client * const other_client = ule2->client;
+			if (!other_client->desired_share_pdiff)
+				any_default_share_pdiff = true;
+			else
 			if (other_client->desired_share_pdiff < desired_share_pdiff)
 				desired_share_pdiff = other_client->desired_share_pdiff;
 		}
-		if (conn->desired_share_pdiff != desired_share_pdiff)
+		BFGINIT(desired_share_pdiff, FLT_MAX);
+		if (conn->desired_share_pdiff != desired_share_pdiff || conn->desired_default_share_pdiff != any_default_share_pdiff)
 		{
 			conn->desired_share_pdiff = desired_share_pdiff;
+			conn->desired_default_share_pdiff = any_default_share_pdiff;
 			++connections_changed;
 		}
 	}
@@ -369,18 +415,52 @@ void _stratumsrv_failure(struct bufferevent * const bev, const char * const idst
 }while(0)
 
 static
-void _stratumsrv_success(struct bufferevent * const bev, const char * const idstr)
+void stratumsrv_success2(struct bufferevent * const bev, const char * const idstr, const char * const resultstr)
 {
 	if (!idstr)
 		return;
 	
-	size_t bufsz = 36 + strlen(idstr);
+	size_t bufsz = 32 + strlen(resultstr) + strlen(idstr);
 	char buf[bufsz];
 	
-	bufsz = sprintf(buf, "{\"result\":true,\"id\":%s,\"error\":null}\n", idstr);
+	bufsz = sprintf(buf, "{\"result\":%s,\"id\":%s,\"error\":null}\n", resultstr, idstr);
 	bufferevent_write(bev, buf, bufsz);
 }
 
+static inline
+void _stratumsrv_success(struct bufferevent * const bev, const char * const idstr)
+{
+	stratumsrv_success2(bev, idstr, "true");
+}
+
+static
+void stratumsrv_mining_capabilities(struct bufferevent * const bev, json_t * const params, const char * const idstr, struct stratumsrv_conn * const conn)
+{
+	if (json_is_null(params) || (!json_is_array(params)))
+		return_stratumsrv_failure(20, "Bad params");
+	
+	conn->capabilities = 0;
+	
+	json_t * const caps = (json_array_size(params) < 1) ? NULL : json_array_get(params, 0);
+	if (caps && (!json_is_null(caps)) && json_is_object(caps))
+	{
+		for (void *iter = json_object_iter(caps); iter; iter = json_object_iter_next(caps, iter))
+		{
+			const char * const s = json_object_iter_key(iter);
+			if (!strcasecmp(s, "notify"))
+				conn->capabilities |= SCC_NOTIFY;
+			else
+			if (!strcasecmp(s, "set_difficulty"))
+				conn->capabilities |= SCC_SET_DIFF;
+			else
+			if (!strcasecmp(s, "set_goal"))
+				conn->capabilities |= SCC_SET_GOAL;
+		}
+	}
+	
+	stratumsrv_success2(bev, idstr, "null");
+}
+
 static
 void stratumsrv_mining_subscribe(struct bufferevent * const bev, json_t * const params, const char * const idstr, struct stratumsrv_conn * const conn)
 {
@@ -407,12 +487,22 @@ void stratumsrv_mining_subscribe(struct bufferevent * const bev, json_t * const
 	bufsz = sprintf(buf, "{\"id\":%s,\"result\":[[[\"mining.set_difficulty\",\"x\"],[\"mining.notify\",\"%s\"]],\"%s\",%d],\"error\":null}\n", idstr, xnonce1x, xnonce1x, _ssm_client_xnonce2sz);
 	bufferevent_write(bev, buf, bufsz);
 	
-	float pdiff = target_diff(_ssm_last_ssj->swork.target);
-	if (pdiff > conn->desired_share_pdiff)
-		pdiff = conn->desired_share_pdiff;
-	_ssm_last_ssj->job_pdiff[*xnonce1_p] = pdiff;
-	stratumsrv_send_set_difficulty(conn, pdiff);
-	bufferevent_write(bev, _ssm_notify, _ssm_notify_sz);
+	if (conn->capabilities & SCC_SET_GOAL)
+		bufferevent_write(conn->bev, _ssm_setgoal, _ssm_setgoal_sz);
+	if (likely(conn->capabilities & SCC_SET_DIFF))
+	{
+		const struct pool * const pool = _ssm_last_ssj->swork.pool;
+		const struct mining_goal_info * const goal = pool->goal;
+		const struct mining_algorithm * const malgo = goal->malgo;
+		float pdiff = target_diff(_ssm_last_ssj->swork.target);
+		const float conn_pdiff = stratumsrv_choose_share_pdiff(conn, malgo);
+		if (pdiff > conn_pdiff)
+			pdiff = conn_pdiff;
+		_ssm_last_ssj->job_pdiff[*xnonce1_p] = pdiff;
+		stratumsrv_send_set_difficulty(conn, pdiff);
+	}
+	if (likely(conn->capabilities & SCC_NOTIFY))
+		bufferevent_write(bev, _ssm_notify, _ssm_notify_sz);
 }
 
 static
@@ -427,8 +517,19 @@ void stratumsrv_mining_authorize(struct bufferevent * const bev, json_t * const
 	if (unlikely(!client))
 		return_stratumsrv_failure(20, "Failed creating new cgpu");
 	
-	if ((!conn->authorised_users) || client->desired_share_pdiff < conn->desired_share_pdiff)
-		conn->desired_share_pdiff = client->desired_share_pdiff;
+	if (client->desired_share_pdiff)
+	{
+		if (!conn->authorised_users)
+			conn->desired_default_share_pdiff = false;
+		if ((!conn->authorised_users) || client->desired_share_pdiff < conn->desired_share_pdiff)
+			conn->desired_share_pdiff = client->desired_share_pdiff;
+	}
+	else
+	{
+		conn->desired_default_share_pdiff = true;
+		if (!conn->authorised_users)
+			conn->desired_share_pdiff = FLT_MAX;
+	}
 	
 	struct stratumsrv_conn_userlist *ule = malloc(sizeof(*ule));
 	*ule = (struct stratumsrv_conn_userlist){
@@ -585,6 +686,9 @@ errout:
 	else
 	if (!strcasecmp(method, "mining.subscribe"))
 		stratumsrv_mining_subscribe(bev, params, idstr, conn);
+	else
+	if (!strcasecmp(method, "mining.capabilities"))
+		stratumsrv_mining_capabilities(bev, params, idstr, conn);
 	else
 		_stratumsrv_failure(bev, idstr, -3, "Method not supported");
 	
@@ -644,15 +748,18 @@ void stratumsrv_event(struct bufferevent *bev, short events, void *p)
 	}
 }
 
+// See also, proxy_set_diff in driver-proxy.c
 static
 const char *stratumsrv_init_diff(struct cgpu_info * const proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const success)
 {
 	struct stratumsrv_conn * const conn = proc->device_data;
 	
-	const double nv = atof(newvalue);
-	if (nv <= 0)
+	double nv = atof(newvalue);
+	if (nv < 0)
 		return "Invalid difficulty";
 	
+	if (nv <= minimum_pdiff)
+		nv = minimum_pdiff;
 	conn->desired_share_pdiff = nv;
 	
 	return NULL;
@@ -672,7 +779,9 @@ void stratumlistener(struct evconnlistener *listener, evutil_socket_t sock, stru
 	conn = malloc(sizeof(*conn));
 	*conn = (struct stratumsrv_conn){
 		.bev = bev,
-		.desired_share_pdiff = opt_scrypt ? (1./0x10000) : 1.,
+		.capabilities = SCC_NOTIFY | SCC_SET_DIFF,
+		.desired_share_pdiff = FLT_MAX,
+		.desired_default_share_pdiff = true,
 	};
 	drv_set_defaults(&proxy_drv, stratumsrv_set_device_funcs_newconnect, conn, NULL, NULL, 1);
 	LL_PREPEND(_ssm_connections, conn);

+ 9 - 6
driver-titan.c

@@ -365,13 +365,19 @@ static bool configure_one_die(struct knc_titan_info *knc, int asic, int die)
 	return true;
 }
 
+static
+float titan_min_nonce_diff(struct cgpu_info * const proc, const struct mining_algorithm * const malgo)
+{
+	return (malgo->algo == POW_SCRYPT) ? DEFAULT_DIFF_FILTERING_FLOAT : -1.;
+}
+
 static bool knc_titan_init(struct thr_info * const thr)
 {
 	struct cgpu_info * const cgpu = thr->cgpu, *proc;
 	struct knc_titan_core *knccore;
 	struct knc_titan_die *kncdie;
 	struct knc_titan_info *knc;
-	int i, asic, logical_dieno, ena_die, die;
+	int i, asic = 0, logical_dieno = 0, ena_die, die;
 	int total_cores = 0;
 	int asic_cores[KNC_TITAN_MAX_ASICS] = {0};
 
@@ -379,7 +385,6 @@ static bool knc_titan_init(struct thr_info * const thr)
 
 	for (proc = cgpu ; proc ; proc = proc->next_proc) {
 		proc->device_data = knc;
-		proc->min_nonce_diff = DEFAULT_DIFF_FILTERING_FLOAT;
 		if (proc->device == proc) {
 			asic = atoi(proc->device_path);
 			logical_dieno = 0;
@@ -505,9 +510,7 @@ static bool die_reconfigure(struct knc_titan_info * const knc, int asic, int die
 
 static bool knc_titan_prepare_work(struct thr_info *thr, struct work *work)
 {
-	struct cgpu_info * const cgpu = thr->cgpu;
-
-	work->nonce_diff = cgpu->min_nonce_diff;
+	work->nonce_diff = DEFAULT_DIFF_FILTERING_FLOAT;
 	return true;
 }
 
@@ -929,7 +932,7 @@ struct device_drv knc_titan_drv =
 	/* metadata */
 	.dname = "titan",
 	.name = "KNC",
-	.supported_algos = POW_SCRYPT,
+	.drv_min_nonce_diff = titan_min_nonce_diff,
 	.drv_detect = knc_titan_detect,
 
 	.thread_init = knc_titan_init,

+ 1 - 2
driver-zeusminer.c

@@ -219,7 +219,6 @@ bool zeusminer_thread_init(struct thr_info * const thr)
 {
 	struct cgpu_info * const device = thr->cgpu;
 	
-	device->min_nonce_diff = 1./0x10000;
 	device->set_device_funcs = zeusminer_set_device_funcs_live;
 	
 	return icarus_init(thr);
@@ -320,7 +319,7 @@ void zeusminer_drv_init()
 	// metadata
 	zeusminer_drv.dname = "zeusminer";
 	zeusminer_drv.name = "ZUS";
-	zeusminer_drv.supported_algos = POW_SCRYPT;
+	zeusminer_drv.drv_min_nonce_diff = common_scrypt_min_nonce_diff;
 	
 	// detect device
 	zeusminer_drv.lowl_probe = zeusminer_lowl_probe;

+ 21 - 7
findnonce.c

@@ -10,7 +10,6 @@
  */
 
 #include "config.h"
-#ifdef HAVE_OPENCL
 
 #include <stdint.h>
 #include <stdio.h>
@@ -20,8 +19,8 @@
 
 #include "findnonce.h"
 #include "miner.h"
-#include "scrypt.h"
 
+#ifdef USE_SHA256D
 const uint32_t SHA256_K[64] = {
 	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
 	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
@@ -133,13 +132,15 @@ void precalc_hash(struct opencl_work_data *blk, uint32_t *state, uint32_t *data)
 	blk->sixA = blk->ctx_g + SHA256_K[6];
 	blk->sevenA = blk->ctx_h + SHA256_K[7];
 }
+#endif
 
 struct pc_data {
 	struct thr_info *thr;
 	struct work work;
-	uint32_t res[SCRYPT_MAXBUFFERS];
+	uint32_t res[OPENCL_MAX_BUFFERSIZE];
 	pthread_t pth;
 	int found;
+	enum cl_kernels kinterface;
 };
 
 static void *postcalc_hash(void *userdata)
@@ -147,7 +148,11 @@ static void *postcalc_hash(void *userdata)
 	struct pc_data *pcd = (struct pc_data *)userdata;
 	struct thr_info *thr = pcd->thr;
 	unsigned int entry = 0;
-	int found = opt_scrypt ? SCRYPT_FOUND : FOUND;
+	int found = FOUND;
+#ifdef USE_SCRYPT
+	if (pcd->kinterface == KL_SCRYPT)
+		found = SCRYPT_FOUND;
+#endif
 
 	pthread_detach(pthread_self());
 	RenameThread("postcalchsh");
@@ -163,6 +168,10 @@ static void *postcalc_hash(void *userdata)
 
 	for (entry = 0; entry < pcd->res[found]; entry++) {
 		uint32_t nonce = pcd->res[entry];
+#ifdef USE_OPENCL_FULLHEADER
+		if (pcd->kinterface == KL_FULLHEADER)
+			nonce = swab32(nonce);
+#endif
 
 		applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry);
 		submit_nonce(thr, &pcd->work, nonce);
@@ -174,7 +183,7 @@ static void *postcalc_hash(void *userdata)
 	return NULL;
 }
 
-void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
+void postcalc_hash_async(struct thr_info * const thr, struct work * const work, uint32_t * const res, const enum cl_kernels kinterface)
 {
 	struct pc_data *pcd = malloc(sizeof(struct pc_data));
 	int buffersize;
@@ -186,9 +195,15 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
 
 	*pcd = (struct pc_data){
 		.thr = thr,
+		.kinterface = kinterface,
 	};
 	__copy_work(&pcd->work, work);
-	buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
+#ifdef USE_SCRYPT
+	if (kinterface == KL_SCRYPT)
+		buffersize = SCRYPT_BUFFERSIZE;
+	else
+#endif
+		buffersize = BUFFERSIZE;
 	memcpy(&pcd->res, res, buffersize);
 
 	if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) {
@@ -196,4 +211,3 @@ void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
 		return;
 	}
 }
-#endif /* HAVE_OPENCL */

+ 10 - 3
findnonce.h

@@ -12,12 +12,19 @@
 #define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
 #define FOUND (0x0F)
 
+#ifdef USE_SCRYPT
 #define SCRYPT_MAXBUFFERS (0x100)
 #define SCRYPT_BUFFERSIZE (sizeof(uint32_t) * SCRYPT_MAXBUFFERS)
 #define SCRYPT_FOUND (0xFF)
 
-#ifdef HAVE_OPENCL
+#define OPENCL_MAX_BUFFERSIZE  SCRYPT_BUFFERSIZE
+#else
+#define OPENCL_MAX_BUFFERSIZE  BUFFERSIZE
+#endif
+
+#ifdef USE_SHA256D
 extern void precalc_hash(struct opencl_work_data *blk, uint32_t *state, uint32_t *data);
-extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
-#endif /* HAVE_OPENCL */
+#endif
+extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res, enum cl_kernels);
+
 #endif /*__FINDNONCE_H__*/

+ 3 - 3
gc3355.c

@@ -513,7 +513,7 @@ void gc3355_init_miner(int fd, int pll_freq)
 	gc3355_set_pll_freq(fd, pll_freq);
 }
 
-void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_only)
+void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_only, bool scrypt)
 {
 	gc3355_send_cmds(fd, gcp_chip_reset_cmd);
 
@@ -525,7 +525,7 @@ void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_o
 	// initialize units
 	gc3355_reset_dtr(fd);
 
-	if (opt_scrypt && scrypt_only)
+	if (scrypt && scrypt_only)
 		gc3355_scrypt_only_init(fd);
 	else
 	{
@@ -541,7 +541,7 @@ void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_o
 
 	if (!detect_only)
 	{
-		if (!opt_scrypt)
+		if (!scrypt)
 			// open sha2 units
 			gc3355_open_sha2_units(fd, opt_sha2_units);
 

+ 1 - 1
gc3355.h

@@ -44,7 +44,7 @@ extern ssize_t gc3355_read(int fd, char *buf, size_t size);
 extern ssize_t gc3355_write(int fd, const void * const buf, const size_t size);
 
 extern void gc3355_init_miner(int fd, int pll_freq);
-extern void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_only);
+extern void gc3355_init_dualminer(int fd, int pll_freq, bool scrypt_only, bool detect_only, bool scrypt);
 
 extern void gc3355_scrypt_reset(int fd);
 extern void gc3355_scrypt_only_reset(int fd);

+ 2 - 2
httpsrv.c

@@ -28,8 +28,8 @@
 #include <microhttpd.h>
 
 #include "logging.h"
+#include "miner.h"
 #include "util.h"
-#include "version.h"
 
 static struct MHD_Daemon *httpsrv;
 
@@ -37,7 +37,7 @@ extern int handle_getwork(struct MHD_Connection *, bytes_t *);
 
 void httpsrv_prepare_resp(struct MHD_Response *resp)
 {
-	MHD_add_response_header(resp, MHD_HTTP_HEADER_SERVER, PACKAGE"/"VERSION" getwork server");
+	MHD_add_response_header(resp, MHD_HTTP_HEADER_SERVER, bfgminer_name_slash_ver);
 }
 
 static

+ 1 - 1
libbase58

@@ -1 +1 @@
-Subproject commit 5d537990219726df09bccfb602450dd95343c847
+Subproject commit b6adca8ebb12962374a9cd1f6b543186f0cf6a40

+ 3 - 3
libbitfury.c

@@ -241,7 +241,7 @@ int libbitfury_detect_chip(struct spi_port *port, int chip_n) {
 	int i;
 	uint32_t newbuf[17] = {0}, oldbuf[17] = {0};
 	uint32_t ocounter;
-	long odiff = 0;
+	long long odiff = 0;
 
 	memset(newbuf, 0, 17 * 4);
 	memset(oldbuf, 0, 17 * 4);
@@ -274,9 +274,9 @@ int libbitfury_detect_chip(struct spi_port *port, int chip_n) {
 
 		counter = libbitfury_get_counter(newbuf, oldbuf);
 		if (ocounter) {
-			uint32_t cdiff = libbitfury_c_diff(ocounter, counter);
+			long long cdiff = libbitfury_c_diff(ocounter, counter);
 
-			if (abs(odiff - cdiff) < 5000)
+			if (llabs(odiff - cdiff) < 5000)
 				return 1;
 			odiff = cdiff;
 		}

+ 29 - 0
lowl-spi.c

@@ -50,6 +50,8 @@
 
 #include "logging.h"
 #include "lowl-spi.h"
+#include "miner.h"
+#include "util.h"
 
 #ifdef HAVE_LINUX_SPI
 bool sys_spi_txrx(struct spi_port *port);
@@ -253,6 +255,33 @@ bool linux_spi_txrx(struct spi_port * const spi)
 	return (ioctl(fd, SPI_IOC_MESSAGE(1), &xf) > 0);
 }
 
+bool linux_spi_txrx2(struct spi_port * const spi)
+{
+	const size_t bufsz = spi_getbufsz(spi);
+	
+	if (opt_dev_protocol)
+	{
+		const void * const txbuf = spi_gettxbuf(spi);
+		char hex[(bufsz * 2) + 1];
+		bin2hex(hex, txbuf, bufsz);
+		applog(LOG_DEBUG, "%s: %cX %s", spi->repr, 'T', hex);
+	}
+	bool rv = linux_spi_txrx(spi);
+	if (opt_dev_protocol)
+	{
+		if (likely(rv))
+		{
+			void * const rxbuf = spi_getrxbuf(spi);
+			char hex[(bufsz * 2) + 1];
+			bin2hex(hex, rxbuf, bufsz);
+			applog(LOG_DEBUG, "%s: %cX %s", spi->repr, 'R', hex);
+		}
+		else
+			applog(LOG_DEBUG, "%s: SPI ERROR", spi->repr);
+	}
+	return rv;
+}
+
 #endif
 
 static

+ 1 - 0
lowl-spi.h

@@ -91,6 +91,7 @@ bool spi_txrx(struct spi_port *port)
 extern int spi_open(struct spi_port *, const char *);
 extern bool sys_spi_txrx(struct spi_port *);
 extern bool linux_spi_txrx(struct spi_port *);
+extern bool linux_spi_txrx2(struct spi_port *);
 
 void spi_bfsb_select_bank(int bank);
 

+ 1 - 0
make-release

@@ -97,6 +97,7 @@ for build in "${builds[@]}"; do
 		--enable-icarus \
 		--enable-modminer \
 		--enable-ztex \
+		--enable-keccak \
 		--enable-scrypt \
 		--without-system-libbase58 \
 		--host="$machine"

+ 374 - 0
malgo/keccak.c

@@ -0,0 +1,374 @@
+/*
+ * Copyright 2013-2014 Ronny Van Keer (released as CC0)
+ * Copyright 2014 Luke Mitchell
+ * Copyright 2014 Luke Dashjr
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.  See COPYING for more details.
+ */
+
+#include "config.h"
+#include "miner.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <stdio.h>
+
+#include <uthash.h>
+
+struct uint256 {
+	unsigned char v[32];
+};
+typedef struct uint256 uint256;
+
+typedef unsigned long long UINT64;
+
+#define ROL(a, offset) ((a << offset) | (a >> (64-offset)))
+
+static const UINT64 KeccakF_RoundConstants[24] = {
+	0x0000000000000001ULL,
+	0x0000000000008082ULL,
+	0x800000000000808aULL,
+	0x8000000080008000ULL,
+	0x000000000000808bULL,
+	0x0000000080000001ULL,
+	0x8000000080008081ULL,
+	0x8000000000008009ULL,
+	0x000000000000008aULL,
+	0x0000000000000088ULL,
+	0x0000000080008009ULL,
+	0x000000008000000aULL,
+	0x000000008000808bULL,
+	0x800000000000008bULL,
+	0x8000000000008089ULL,
+	0x8000000000008003ULL,
+	0x8000000000008002ULL,
+	0x8000000000000080ULL,
+	0x000000000000800aULL,
+	0x800000008000000aULL,
+	0x8000000080008081ULL,
+	0x8000000000008080ULL,
+	0x0000000080000001ULL,
+	0x8000000080008008ULL
+};
+
+struct bin32 {
+	UINT64 v0;
+	UINT64 v1;
+	UINT64 v2;
+	UINT64 v3;
+};
+
+static
+void keccak1(unsigned char *out, const unsigned char *inraw, unsigned inrawlen)
+{
+	unsigned char temp[136];
+	unsigned round;
+	
+	UINT64 Aba, Abe, Abi, Abo, Abu;
+	UINT64 Aga, Age, Agi, Ago, Agu;
+	UINT64 Aka, Ake, Aki, Ako, Aku;
+	UINT64 Ama, Ame, Ami, Amo, Amu;
+	UINT64 Asa, Ase, Asi, Aso, Asu;
+	UINT64 BCa, BCe, BCi, BCo, BCu;
+	UINT64 Da, De, Di, Do, Du;
+	UINT64 Eba, Ebe, Ebi, Ebo, Ebu;
+	UINT64 Ega, Ege, Egi, Ego, Egu;
+	UINT64 Eka, Eke, Eki, Eko, Eku;
+	UINT64 Ema, Eme, Emi, Emo, Emu;
+	UINT64 Esa, Ese, Esi, Eso, Esu;
+	
+	memcpy(temp, inraw, inrawlen);
+	temp[inrawlen++] = 1;
+	memset( temp+inrawlen, 0, 136 - inrawlen);
+	temp[136-1] |= 0x80;
+	const UINT64 *in = (const UINT64 *)temp;
+	
+	// copyFromState(A, state)
+	Aba = in[ 0];
+	Abe = in[ 1];
+	Abi = in[ 2];
+	Abo = in[ 3];
+	Abu = in[ 4];
+	Aga = in[ 5];
+	Age = in[ 6];
+	Agi = in[ 7];
+	Ago = in[ 8];
+	Agu = in[ 9];
+	Aka = in[10];
+	Ake = in[11];
+	Aki = in[12];
+	Ako = in[13];
+	Aku = in[14];
+	Ama = in[15];
+	Ame = in[16];
+	Ami = 0;
+	Amo = 0;
+	Amu = 0;
+	Asa = 0;
+	Ase = 0;
+	Asi = 0;
+	Aso = 0;
+	Asu = 0;
+	
+	for (round = 0; round < 24; round += 2)
+	{
+		// prepareTheta
+		BCa = Aba^Aga^Aka^Ama^Asa;
+		BCe = Abe^Age^Ake^Ame^Ase;
+		BCi = Abi^Agi^Aki^Ami^Asi;
+		BCo = Abo^Ago^Ako^Amo^Aso;
+		BCu = Abu^Agu^Aku^Amu^Asu;
+		
+		// thetaRhoPiChiIotaPrepareTheta(round, A, E)
+		Da = BCu^ROL(BCe, 1);
+		De = BCa^ROL(BCi, 1);
+		Di = BCe^ROL(BCo, 1);
+		Do = BCi^ROL(BCu, 1);
+		Du = BCo^ROL(BCa, 1);
+		
+		Aba ^= Da;
+		BCa = Aba;
+		Age ^= De;
+		BCe = ROL(Age, 44);
+		Aki ^= Di;
+		BCi = ROL(Aki, 43);
+		Amo ^= Do;
+		BCo = ROL(Amo, 21);
+		Asu ^= Du;
+		BCu = ROL(Asu, 14);
+		Eba = BCa ^((~BCe) & BCi);
+		Eba ^= KeccakF_RoundConstants[round];
+		Ebe = BCe ^((~BCi) & BCo);
+		Ebi = BCi ^((~BCo) & BCu);
+		Ebo = BCo ^((~BCu) & BCa);
+		Ebu = BCu ^((~BCa) & BCe);
+		
+		Abo ^= Do;
+		BCa = ROL(Abo, 28);
+		Agu ^= Du;
+		BCe = ROL(Agu, 20);
+		Aka ^= Da;
+		BCi = ROL(Aka,  3);
+		Ame ^= De;
+		BCo = ROL(Ame, 45);
+		Asi ^= Di;
+		BCu = ROL(Asi, 61);
+		Ega = BCa ^((~BCe) & BCi);
+		Ege = BCe ^((~BCi) & BCo);
+		Egi = BCi ^((~BCo) & BCu);
+		Ego = BCo ^((~BCu) & BCa);
+		Egu = BCu ^((~BCa) & BCe);
+		
+		Abe ^= De;
+		BCa = ROL(Abe,  1);
+		Agi ^= Di;
+		BCe = ROL(Agi,  6);
+		Ako ^= Do;
+		BCi = ROL(Ako, 25);
+		Amu ^= Du;
+		BCo = ROL(Amu,  8);
+		Asa ^= Da;
+		BCu = ROL(Asa, 18);
+		Eka = BCa ^((~BCe) & BCi);
+		Eke = BCe ^((~BCi) & BCo);
+		Eki = BCi ^((~BCo) & BCu);
+		Eko = BCo ^((~BCu) & BCa);
+		Eku = BCu ^((~BCa) & BCe);
+		
+		Abu ^= Du;
+		BCa = ROL(Abu, 27);
+		Aga ^= Da;
+		BCe = ROL(Aga, 36);
+		Ake ^= De;
+		BCi = ROL(Ake, 10);
+		Ami ^= Di;
+		BCo = ROL(Ami, 15);
+		Aso ^= Do;
+		BCu = ROL(Aso, 56);
+		Ema = BCa ^((~BCe) & BCi);
+		Eme = BCe ^((~BCi) & BCo);
+		Emi = BCi ^((~BCo) & BCu);
+		Emo = BCo ^((~BCu) & BCa);
+		Emu = BCu ^((~BCa) & BCe);
+		
+		Abi ^= Di;
+		BCa = ROL(Abi, 62);
+		Ago ^= Do;
+		BCe = ROL(Ago, 55);
+		Aku ^= Du;
+		BCi = ROL(Aku, 39);
+		Ama ^= Da;
+		BCo = ROL(Ama, 41);
+		Ase ^= De;
+		BCu = ROL(Ase,  2);
+		Esa = BCa ^((~BCe) & BCi);
+		Ese = BCe ^((~BCi) & BCo);
+		Esi = BCi ^((~BCo) & BCu);
+		Eso = BCo ^((~BCu) & BCa);
+		Esu = BCu ^((~BCa) & BCe);
+		
+		// prepareTheta
+		BCa = Eba^Ega^Eka^Ema^Esa;
+		BCe = Ebe^Ege^Eke^Eme^Ese;
+		BCi = Ebi^Egi^Eki^Emi^Esi;
+		BCo = Ebo^Ego^Eko^Emo^Eso;
+		BCu = Ebu^Egu^Eku^Emu^Esu;
+		
+		// thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
+		Da = BCu^ROL(BCe, 1);
+		De = BCa^ROL(BCi, 1);
+		Di = BCe^ROL(BCo, 1);
+		Do = BCi^ROL(BCu, 1);
+		Du = BCo^ROL(BCa, 1);
+		
+		Eba ^= Da;
+		BCa = Eba;
+		Ege ^= De;
+		BCe = ROL(Ege, 44);
+		Eki ^= Di;
+		BCi = ROL(Eki, 43);
+		Emo ^= Do;
+		BCo = ROL(Emo, 21);
+		Esu ^= Du;
+		BCu = ROL(Esu, 14);
+		Aba = BCa ^((~BCe) & BCi);
+		Aba ^= KeccakF_RoundConstants[round+1];
+		Abe = BCe ^((~BCi) & BCo);
+		Abi = BCi ^((~BCo) & BCu);
+		Abo = BCo ^((~BCu) & BCa);
+		Abu = BCu ^((~BCa) & BCe);
+		
+		Ebo ^= Do;
+		BCa = ROL(Ebo, 28);
+		Egu ^= Du;
+		BCe = ROL(Egu, 20);
+		Eka ^= Da;
+		BCi = ROL(Eka, 3);
+		Eme ^= De;
+		BCo = ROL(Eme, 45);
+		Esi ^= Di;
+		BCu = ROL(Esi, 61);
+		Aga = BCa ^((~BCe) & BCi);
+		Age = BCe ^((~BCi) & BCo);
+		Agi = BCi ^((~BCo) & BCu);
+		Ago = BCo ^((~BCu) & BCa);
+		Agu = BCu ^((~BCa) & BCe);
+		
+		Ebe ^= De;
+		BCa = ROL(Ebe, 1);
+		Egi ^= Di;
+		BCe = ROL(Egi, 6);
+		Eko ^= Do;
+		BCi = ROL(Eko, 25);
+		Emu ^= Du;
+		BCo = ROL(Emu, 8);
+		Esa ^= Da;
+		BCu = ROL(Esa, 18);
+		Aka = BCa ^((~BCe) & BCi);
+		Ake = BCe ^((~BCi) & BCo);
+		Aki = BCi ^((~BCo) & BCu);
+		Ako = BCo ^((~BCu) & BCa);
+		Aku = BCu ^((~BCa) & BCe);
+		
+		Ebu ^= Du;
+		BCa = ROL(Ebu, 27);
+		Ega ^= Da;
+		BCe = ROL(Ega, 36);
+		Eke ^= De;
+		BCi = ROL(Eke, 10);
+		Emi ^= Di;
+		BCo = ROL(Emi, 15);
+		Eso ^= Do;
+		BCu = ROL(Eso, 56);
+		Ama = BCa ^((~BCe) & BCi);
+		Ame = BCe ^((~BCi) & BCo);
+		Ami = BCi ^((~BCo) & BCu);
+		Amo = BCo ^((~BCu) & BCa);
+		Amu = BCu ^((~BCa) & BCe);
+		
+		Ebi ^= Di;
+		BCa = ROL(Ebi, 62);
+		Ego ^= Do;
+		BCe = ROL(Ego, 55);
+		Eku ^= Du;
+		BCi = ROL(Eku, 39);
+		Ema ^= Da;
+		BCo = ROL(Ema, 41);
+		Ese ^= De;
+		BCu = ROL(Ese, 2);
+		Asa = BCa ^((~BCe) & BCi);
+		Ase = BCe ^((~BCi) & BCo);
+		Asi = BCi ^((~BCo) & BCu);
+		Aso = BCo ^((~BCu) & BCa);
+		Asu = BCu ^((~BCa) & BCe);
+	}
+	{
+		UINT64 *out64 = (UINT64 *)out;
+		out64[ 0] = Aba;
+		out64[ 1] = Abe;
+		out64[ 2] = Abi;
+		out64[ 3] = Abo;
+	}
+}
+
+static
+void keccak_hash_data(void * const digest, const void * const pdata)
+{
+	uint32_t data[20];
+	swap32yes(data, pdata, 20);
+	keccak1(digest, (unsigned char*)data, 80);
+}
+
+#ifdef USE_OPENCL
+static
+float opencl_oclthreads_to_intensity_keccak(const unsigned long oclthreads)
+{
+	return log2f(oclthreads) - 13.;
+}
+
+static
+unsigned long opencl_intensity_to_oclthreads_keccak(float intensity)
+{
+	return powf(2, intensity + 13);
+}
+
+static
+char *opencl_get_default_kernel_file_keccak(const struct mining_algorithm * const malgo, struct cgpu_info * const cgpu, struct _clState * const clState)
+{
+	return strdup("keccak");
+}
+#endif
+
+static struct mining_algorithm malgo_keccak = {
+	.name = "Keccak",
+	.aliases = "Keccak",
+	
+	.algo = POW_KECCAK,
+	.ui_skip_hash_bytes = 4,
+	.worktime_skip_prevblk_u32 = 1,
+	.reasonable_low_nonce_diff = 1.,
+	
+	.hash_data_f = keccak_hash_data,
+	
+#ifdef USE_OPENCL
+	.opencl_oclthreads_to_intensity = opencl_oclthreads_to_intensity_keccak,
+	.opencl_intensity_to_oclthreads = opencl_intensity_to_oclthreads_keccak,
+	.opencl_min_oclthreads =       0x20,  // intensity -8
+	.opencl_max_oclthreads = 0x20000000,  // intensity 16
+	.opencl_min_nonce_diff = 1./0x10,
+	.opencl_get_default_kernel_file = opencl_get_default_kernel_file_keccak,
+#endif
+};
+
+static
+__attribute__((constructor))
+void init_keccak(void)
+{
+    LL_APPEND(mining_algorithms, (&malgo_keccak));
+}

+ 58 - 6
scrypt.c → malgo/scrypt.c

@@ -32,11 +32,14 @@
 #include "config.h"
 #include "miner.h"
 
+#include <math.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <string.h>
 
+#include <uthash.h>
+
 typedef struct SHA256Context {
 	uint32_t state[8];
 	uint32_t buf[16];
@@ -489,22 +492,23 @@ void scrypt_regenhash(struct work *work)
 }
 
 /* Used by test_nonce functions */
-void scrypt_hash_data(unsigned char * const out_hash, const unsigned char * const pdata)
+void scrypt_hash_data(void * const out_hash, const void * const pdata)
 {
 	uint32_t data[20], ohash[8];
 	char *scratchbuf;
 
-	be32enc_vect(data, (const uint32_t *)pdata, 20);
+	be32enc_vect(data, pdata, 20);
 	scratchbuf = alloca(SCRATCHBUF_SIZE);
 	scrypt_1024_1_1_256_sp(data, scratchbuf, ohash);
-	swap32tobe((void*)out_hash, ohash, 32/4);
+	swap32tobe(out_hash, ohash, 32/4);
 }
 
-bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate,
-		     unsigned char *pdata, unsigned char __maybe_unused *phash1,
-		     unsigned char __maybe_unused *phash, const unsigned char *ptarget,
+bool scanhash_scrypt(struct thr_info * const thr, struct work * const work,
 		     uint32_t max_nonce, uint32_t *last_nonce, uint32_t n)
 {
+	uint8_t * const pdata = work->data;
+	const uint8_t * const ptarget = work->target;
+	
 	uint32_t *nonce = (uint32_t *)(pdata + 76);
 	char *scratchbuf;
 	uint32_t data[20];
@@ -545,3 +549,51 @@ bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *p
 	free(scratchbuf);
 	return ret;
 }
+
+#ifdef USE_OPENCL
+static
+float opencl_oclthreads_to_intensity_scrypt(const unsigned long oclthreads)
+{
+	return log2(oclthreads);
+}
+
+static
+unsigned long opencl_intensity_to_oclthreads_scrypt(float intensity)
+{
+	return pow(2, intensity);
+}
+
+static
+char *opencl_get_default_kernel_file_scrypt(const struct mining_algorithm * const malgo, struct cgpu_info * const cgpu, struct _clState * const clState)
+{
+	return strdup("scrypt");
+}
+#endif
+
+struct mining_algorithm malgo_scrypt = {
+	.name = "scrypt",
+	.aliases = "scrypt",
+	
+	.algo = POW_SCRYPT,
+	.ui_skip_hash_bytes = 2,
+	.reasonable_low_nonce_diff = 1./0x10000,
+	
+	.hash_data_f = scrypt_hash_data,
+	
+#ifdef USE_OPENCL
+	.opencl_nodefault = true,
+	.opencl_oclthreads_to_intensity = opencl_oclthreads_to_intensity_scrypt,
+	.opencl_intensity_to_oclthreads = opencl_intensity_to_oclthreads_scrypt,
+	.opencl_min_oclthreads =      0x100,  // intensity   8
+	.opencl_max_oclthreads = 0x20000000,  // intensity  31
+	.opencl_min_nonce_diff = 1./0x10000,
+	.opencl_get_default_kernel_file = opencl_get_default_kernel_file_scrypt,
+#endif
+};
+
+static
+__attribute__((constructor))
+void init_scrypt(void)
+{
+	LL_APPEND(mining_algorithms, (&malgo_scrypt));
+}

+ 6 - 0
malgo/scrypt.h

@@ -0,0 +1,6 @@
+#ifndef SCRYPT_H
+#define SCRYPT_H
+
+extern void test_scrypt(void);
+
+#endif /* SCRYPT_H */

+ 114 - 0
malgo/sha256d.c

@@ -0,0 +1,114 @@
+/*
+ * Copyright 2012-2014 Luke Dashjr
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.  See COPYING for more details.
+ */
+
+#include "config.h"
+
+#include <math.h>
+#include <string.h>
+
+#include <uthash.h>
+
+#include "logging.h"
+#include "miner.h"
+#include "ocl.h"
+#include "util.h"
+
+static
+void hash_data(void *out_hash, const void *data)
+{
+	unsigned char blkheader[80];
+	
+	// data is past the first SHA256 step (padding and interpreting as big endian on a little endian platform), so we need to flip each 32-bit chunk around to get the original input block header
+	swap32yes(blkheader, data, 80 / 4);
+	
+	// double-SHA256 to get the block hash
+	gen_hash(blkheader, out_hash, 80);
+}
+
+#ifdef USE_OPENCL
+static
+float opencl_oclthreads_to_intensity_sha256d(const unsigned long oclthreads)
+{
+	return log2f(oclthreads) - 15.;
+}
+
+static
+unsigned long opencl_intensity_to_oclthreads_sha256d(float intensity)
+{
+	return powf(2, intensity + 15);
+}
+
+static
+char *opencl_get_default_kernel_file_sha256d(const struct mining_algorithm * const malgo, struct cgpu_info * const cgpu, struct _clState * const clState)
+{
+	const char * const vbuff = clState->platform_ver_str;
+	
+	if (clState->is_mesa)
+	{
+		applog(LOG_INFO, "Selecting phatk kernel for Mesa");
+		return strdup("phatk");
+	}
+	
+	/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
+	if (!strstr(cgpu->name, "Tahiti") &&
+	   (strstr(vbuff, "844.4") ||  // Linux 64 bit ATI 2.6 SDK
+	    strstr(vbuff, "851.4") ||  // Windows 64 bit ""
+	    strstr(vbuff, "831.4") ||
+	    strstr(vbuff, "898.1") ||  // 12.2 driver SDK
+	    strstr(vbuff, "923.1") ||  // 12.4
+	    strstr(vbuff, "938.2") ||  // SDK 2.7
+	    strstr(vbuff, "1113.2")))  // SDK 2.8
+	{
+		applog(LOG_INFO, "Selecting diablo kernel");
+		return strdup("diablo");
+	}
+	
+	/* Detect all 7970s, older ATI and NVIDIA and use poclbm */
+	if (strstr(cgpu->name, "Tahiti") || !clState->hasBitAlign)
+	{
+		applog(LOG_INFO, "Selecting poclbm kernel");
+		return strdup("poclbm");
+	}
+	
+	/* Use phatk for the rest R5xxx R6xxx */
+	{
+		applog(LOG_INFO, "Selecting phatk kernel");
+		return strdup("phatk");
+	}
+}
+#endif  /* USE_OPENCL */
+
+struct mining_algorithm malgo_sha256d = {
+	.name = "SHA256d",
+	.aliases = "SHA256d|SHA256|SHA2",
+	
+	.algo = POW_SHA256D,
+	.ui_skip_hash_bytes = 4,
+	.worktime_skip_prevblk_u32 = 1,
+	.reasonable_low_nonce_diff = 1.,
+	
+	.hash_data_f = hash_data,
+	
+#ifdef USE_OPENCL
+	.opencl_nodefault = true,
+	.opencl_oclthreads_to_intensity = opencl_oclthreads_to_intensity_sha256d,
+	.opencl_intensity_to_oclthreads = opencl_intensity_to_oclthreads_sha256d,
+	.opencl_min_oclthreads =       0x20,  // intensity -10
+	.opencl_max_oclthreads = 0x20000000,  // intensity  14
+	.opencl_min_nonce_diff = 1.,
+	.opencl_get_default_kernel_file = opencl_get_default_kernel_file_sha256d,
+#endif
+};
+
+static
+__attribute__((constructor))
+void init_sha256d(void)
+{
+    LL_APPEND(mining_algorithms, (&malgo_sha256d));
+}

+ 2 - 1
mcp2210.c

@@ -221,7 +221,8 @@ bool mcp2210_spi_transfer(struct mcp2210_device * const h, const void * const tx
 				cmd[2] = 0;
 				break;
 			case 0xf8:  // transfer in progress
-				applog(LOG_DEBUG, "%s: SPI transfer rejected temporarily (%d bytes remaining)", __func__, sz);
+				if (opt_dev_protocol)
+					applog(LOG_DEBUG, "%s: SPI transfer rejected temporarily (%d bytes remaining)", __func__, sz);
 				cgsleep_ms(20);
 				goto retry;
 			default:

File diff suppressed because it is too large
+ 527 - 120
miner.c


+ 124 - 47
miner.h

@@ -21,6 +21,7 @@
 #include <winsock2.h>
 #endif
 
+#include <float.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <sys/time.h>
@@ -46,6 +47,10 @@
 #include "logging.h"
 #include "util.h"
 
+extern const char * const bfgminer_name_space_ver;
+extern const char * const bfgminer_name_slash_ver;
+extern const char * const bfgminer_ver;
+
 #ifdef STDC_HEADERS
 # include <stdlib.h>
 # include <stddef.h>
@@ -276,10 +281,17 @@ struct gpu_adl {
 #endif
 
 enum pow_algorithm {
-	POW_SHA256D = 1,
-	POW_SCRYPT  = 2,
+#ifdef USE_KECCAK
+	POW_KECCAK,
+#endif
+#ifdef USE_SHA256D
+	POW_SHA256D,
+#endif
+#ifdef USE_SCRYPT
+	POW_SCRYPT,
+#endif
+	POW_ALGORITHM_COUNT,
 };
-typedef uint8_t supported_algos_t;
 
 struct api_data;
 struct thr_info;
@@ -294,15 +306,19 @@ enum bfg_probe_result_flags_values {
 extern unsigned *_bfg_probe_result_flags();
 #define bfg_probe_result_flags (*_bfg_probe_result_flags())
 
+struct mining_algorithm;
+
 struct device_drv {
 	const char *dname;
 	const char *name;
 	int8_t probe_priority;
 	bool lowl_probe_by_name_only;
-	supported_algos_t supported_algos;
 
 	// DRV-global functions
 	void (*drv_init)();
+	// drv_min_nonce_diff's proc may be NULL
+	// drv_min_nonce_diff should return negative if algorithm is not supported
+	float (*drv_min_nonce_diff)(struct cgpu_info *proc, const struct mining_algorithm *);
 	void (*drv_detect)();
 	bool (*lowl_match)(const struct lowlevel_device_info *);
 	bool (*lowl_probe)(const struct lowlevel_device_info *);
@@ -367,11 +383,19 @@ enum dev_enable {
 
 enum cl_kernels {
 	KL_NONE,
+#ifdef USE_SHA256D
 	KL_POCLBM,
 	KL_PHATK,
 	KL_DIAKGCN,
 	KL_DIABLO,
+#endif
+#ifdef USE_OPENCL_FULLHEADER
+	KL_FULLHEADER,
+#endif
+#ifdef USE_SCRYPT
 	KL_SCRYPT,
+#endif
+	OPENCL_KERNEL_INTERFACE_COUNT,
 };
 
 enum dev_reason {
@@ -570,9 +594,6 @@ struct cgpu_info {
 
 	bool disable_watchdog;
 	bool shutdown;
-	
-	// Lowest difficulty supported for finding nonces
-	float min_nonce_diff;
 };
 
 extern void renumber_cgpu(struct cgpu_info *);
@@ -948,7 +969,6 @@ extern bool opt_protocol;
 extern bool opt_dev_protocol;
 extern char *opt_coinbase_sig;
 extern char *request_target_str;
-extern bool have_longpoll;
 extern int opt_skip_checks;
 extern char *opt_kernel_path;
 extern char *opt_socks_proxy;
@@ -1003,16 +1023,6 @@ extern bool our_curl_supports_proxy_uris();
 extern void bin2hex(char *out, const void *in, size_t len);
 extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len);
 
-typedef bool (*sha256_func)(struct thr_info*, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce,
-	uint32_t *last_nonce,
-	uint32_t nonce);
-
-extern bool fulltest(const unsigned char *hash, const unsigned char *target);
-
 extern int opt_queue;
 extern int opt_scantime;
 extern int opt_expiry;
@@ -1062,6 +1072,8 @@ extern void clear_stratum_shares(struct pool *pool);
 extern void hashmeter2(struct thr_info *);
 extern bool stale_work(struct work *, bool share);
 extern bool stale_work_future(struct work *, bool share, unsigned long ustime);
+extern void blkhashstr(char *out, const unsigned char *hash);
+static const float minimum_pdiff = max(FLT_MIN, 1./0x100000000);
 extern void set_target_to_pdiff(void *dest_target, double pdiff);
 #define bdiff_to_pdiff(n) (n * 1.0000152587)
 extern void set_target_to_bdiff(void *dest_target, double bdiff);
@@ -1094,34 +1106,89 @@ extern int enabled_pools;
 extern bool get_intrange(const char *arg, int *val1, int *val2);
 extern bool detect_stratum(struct pool *pool, char *url);
 extern void print_summary(void);
+extern struct mining_algorithm *mining_algorithm_by_alias(const char *alias);
+extern struct mining_goal_info *get_mining_goal(const char *name);
+extern void goal_set_malgo(struct mining_goal_info *, struct mining_algorithm *);
+extern void mining_goal_reset(struct mining_goal_info * const goal);
 extern void adjust_quota_gcd(void);
-extern struct pool *add_pool(void);
+extern struct pool *add_pool2(struct mining_goal_info *);
+#define add_pool()  add_pool2(get_mining_goal("default"))
 extern bool add_pool_details(struct pool *pool, bool live, char *url, char *user, char *pass);
 
 #define MAX_GPUDEVICES 16
 #define MAX_DEVICES 4096
 
-#define MIN_SHA_INTENSITY -10
-#define MIN_SHA_INTENSITY_STR "-10"
-#define MAX_SHA_INTENSITY 14
-#define MAX_SHA_INTENSITY_STR "14"
-#define MIN_SCRYPT_INTENSITY 8
-#define MIN_SCRYPT_INTENSITY_STR "8"
-#define MAX_SCRYPT_INTENSITY 31
-#define MAX_SCRYPT_INTENSITY_STR "31"
-#ifdef USE_SCRYPT
-#define MIN_INTENSITY (opt_scrypt ? MIN_SCRYPT_INTENSITY : MIN_SHA_INTENSITY)
-#define MIN_INTENSITY_STR (opt_scrypt ? MIN_SCRYPT_INTENSITY_STR : MIN_SHA_INTENSITY_STR)
-#define MAX_INTENSITY (opt_scrypt ? MAX_SCRYPT_INTENSITY : MAX_SHA_INTENSITY)
-#define MAX_INTENSITY_STR (opt_scrypt ? MAX_SCRYPT_INTENSITY_STR : MAX_SHA_INTENSITY_STR)
-#define MAX_GPU_INTENSITY MAX_SCRYPT_INTENSITY
-#else
-#define MIN_INTENSITY MIN_SHA_INTENSITY
-#define MIN_INTENSITY_STR MIN_SHA_INTENSITY_STR
-#define MAX_INTENSITY MAX_SHA_INTENSITY
-#define MAX_INTENSITY_STR MAX_SHA_INTENSITY_STR
-#define MAX_GPU_INTENSITY MAX_SHA_INTENSITY
+struct block_info {
+	uint32_t block_id;
+	uint8_t prevblkhash[0x20];
+	unsigned block_seen_order;  // new_blocks when this block was first seen; was 'block_no'
+	uint32_t height;
+	time_t first_seen_time;
+	
+	UT_hash_handle hh;
+};
+
+struct blockchain_info {
+	struct block_info *blocks;
+	struct block_info *currentblk;
+	uint64_t currentblk_subsidy;  // only valid when height is known! (and assumes Bitcoin)
+	char currentblk_first_seen_time_str[0x20];  // was global blocktime
+};
+
+struct _clState;
+struct cgpu_info;
+struct mining_algorithm;
+
+struct mining_algorithm {
+	const char *name;
+	const char *aliases;
+	
+	enum pow_algorithm algo;
+	uint8_t ui_skip_hash_bytes;
+	uint8_t worktime_skip_prevblk_u32;
+	float reasonable_low_nonce_diff;
+	
+	void (*hash_data_f)(void *digest, const void *data);
+	
+	int goal_refs;
+	int staged;
+	int base_queue;
+	
+	struct mining_algorithm *next;
+	
+#ifdef USE_OPENCL
+	bool opencl_nodefault;
+	float (*opencl_oclthreads_to_intensity)(unsigned long oclthreads);
+	unsigned long (*opencl_intensity_to_oclthreads)(float intensity);
+	unsigned long opencl_min_oclthreads;
+	unsigned long opencl_max_oclthreads;
+	float opencl_min_nonce_diff;
+	char *(*opencl_get_default_kernel_file)(const struct mining_algorithm *, struct cgpu_info *, struct _clState *);
 #endif
+};
+
+struct mining_goal_info {
+	unsigned id;
+	char *name;
+	bool is_default;
+	
+	struct blockchain_info *blkchain;
+	
+	bytes_t *generation_script;  // was opt_coinbase_script
+	
+	struct mining_algorithm *malgo;
+	double current_diff;
+	char current_diff_str[ALLOC_H2B_SHORTV];  // was global block_diff
+	char net_hashrate[ALLOC_H2B_SHORT];
+	
+	char *current_goal_detail;
+	
+	double diff_accepted;
+	
+	bool have_longpoll;
+	
+	UT_hash_handle hh;
+};
 
 extern struct string_elist *scan_devices;
 extern bool opt_force_dev_init;
@@ -1134,11 +1201,6 @@ extern bool opt_quiet;
 extern struct thr_info *control_thr;
 extern struct thr_info **mining_thr;
 extern struct cgpu_info gpus[MAX_GPUDEVICES];
-#ifdef USE_SCRYPT
-extern bool opt_scrypt;
-#else
-#define opt_scrypt (0)
-#endif
 extern double total_secs;
 extern int mining_threads;
 extern struct cgpu_info *cpus;
@@ -1171,10 +1233,9 @@ extern int opt_fail_pause;
 extern int opt_log_interval;
 extern unsigned long long global_hashrate;
 extern unsigned unittest_failures;
-extern char *current_fullhash;
-extern double current_diff;
 extern double best_diff;
-extern time_t block_time;
+extern struct mining_algorithm *mining_algorithms;
+extern struct mining_goal_info *mining_goals;
 
 struct curl_ent {
 	CURL *curl;
@@ -1298,6 +1359,7 @@ struct pool {
 	time_t work_restart_time;
 	char work_restart_timestamp[11];
 	uint32_t	block_id;
+	struct mining_goal_info *goal;
 
 	enum pool_protocol proto;
 
@@ -1369,6 +1431,9 @@ struct pool {
 	bool stratum_init;
 	bool stratum_notify;
 	struct stratum_work swork;
+	char *goalname;
+	char *next_goalname;
+	struct mining_algorithm *next_goal_malgo;
 	uint8_t next_target[0x20];
 	char *next_nonce1;
 	int next_n2size;
@@ -1420,6 +1485,7 @@ struct work {
 	bool		longpoll;
 	bool		stale;
 	bool		mandatory;
+	bool spare;
 	bool		block;
 
 	bool		stratum;
@@ -1523,6 +1589,7 @@ extern void kill_work(void);
 extern int prioritize_pools(char *param, int *pid);
 extern void validate_pool_priorities(void);
 extern void enable_pool(struct pool *);
+extern void manual_enable_pool(struct pool *);
 extern void disable_pool(struct pool *, enum pool_enable);
 extern void switch_pools(struct pool *selected);
 extern void remove_pool(struct pool *pool);
@@ -1555,6 +1622,16 @@ extern const char *bfg_workpadding_bin;
 extern void set_simple_ntime_roll_limit(struct ntime_roll_limits *, uint32_t ntime_base, int ntime_roll, const struct timeval *tvp_ref);
 extern void work_set_simple_ntime_roll_limit(struct work *, int ntime_roll, const struct timeval *tvp_ref);
 extern int work_ntime_range(struct work *, const struct timeval *tvp_earliest, const struct timeval *tvp_latest, int desired_roll);
+
+static inline
+struct mining_algorithm *work_mining_algorithm(const struct work * const work)
+{
+	const struct pool * const pool = work->pool;
+	const struct mining_goal_info * const goal = pool->goal;
+	struct mining_algorithm * const malgo = goal->malgo;
+	return malgo;
+}
+
 extern void work_hash(struct work *);
 
 #define NTIME_DATA_OFFSET  0x44

+ 618 - 434
ocl.c

@@ -9,9 +9,9 @@
  */
 
 #include "config.h"
-#ifdef HAVE_OPENCL
 
 #include <ctype.h>
+#include <limits.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdint.h>
@@ -258,6 +258,48 @@ char *file_contents(const char *filename, int *length)
 	return (char*)buffer;
 }
 
+static
+void extract_word(char * const buf, const size_t bufsz, const char ** const endptr, const char *s)
+{
+	const char *q;
+	for ( ; s[0] && isspace(s[0]); ++s)
+		if (s[0] == '\n' || s[0] == '\r')
+			break;
+	for (q = s; q[0] && !isspace(q[0]); ++q)
+	{}  // Find end of string
+	size_t len = q - s;
+	if (len >= bufsz)
+		len = bufsz - 1;
+	memcpy(buf, s, len);
+	buf[len] = '\0';
+	if (endptr)
+		*endptr = q;
+}
+
+char *opencl_kernel_source(const char * const filename, int * const out_sourcelen, enum cl_kernels * const out_kinterface, struct mining_algorithm ** const out_malgo)
+{
+	char *source = file_contents(filename, out_sourcelen);
+	if (!source)
+		return NULL;
+	char *s = strstr(source, "kernel-interface:");
+	if (s)
+	{
+		const char *q;
+		char buf[0x20];
+		extract_word(buf, sizeof(buf), &q, &s[17]);
+		*out_kinterface = select_kernel(buf);
+		
+		if (out_malgo && (q[0] == '\t' || q[0] == ' '))
+		{
+			extract_word(buf, sizeof(buf), &q, q);
+			*out_malgo = mining_algorithm_by_alias(buf);
+		}
+	}
+	else
+		*out_kinterface = KL_NONE;
+	return source;
+}
+
 extern int opt_g_threads;
 
 int clDevicesNum(void) {
@@ -339,20 +381,20 @@ int clDevicesNum(void) {
 	return most_devices;
 }
 
-cl_int bfg_clBuildProgram(_clState * const clState, const cl_device_id devid, const char * const CompilerOptions)
+cl_int bfg_clBuildProgram(cl_program * const program, const cl_device_id devid, const char * const CompilerOptions)
 {
 	cl_int status;
 	
-	status = clBuildProgram(clState->program, 1, &devid, CompilerOptions, NULL, NULL);
+	status = clBuildProgram(*program, 1, &devid, CompilerOptions, NULL, NULL);
 	
 	if (status != CL_SUCCESS)
 	{
 		applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status);
 		size_t logSize;
-		status = clGetProgramBuildInfo(clState->program, devid, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
+		status = clGetProgramBuildInfo(*program, devid, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
 		
 		char *log = malloc(logSize ?: 1);
-		status = clGetProgramBuildInfo(clState->program, devid, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
+		status = clGetProgramBuildInfo(*program, devid, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
 		if (logSize > 0 && log[0])
 			applog(LOG_ERR, "%s", log);
 		free(log);
@@ -418,18 +460,15 @@ void patch_opcodes(char *w, unsigned remaining)
 	applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched);
 }
 
-_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
+_clState *opencl_create_clState(unsigned int gpu, char *name, size_t nameSize)
 {
 	_clState *clState = calloc(1, sizeof(_clState));
-	bool patchbfi = false, prog_built = false;
-	bool ismesa = false;
 	struct cgpu_info *cgpu = &gpus[gpu];
 	struct opencl_device_data * const data = cgpu->device_data;
 	cl_platform_id platform = NULL;
 	char pbuff[256], vbuff[255];
-	char *s, *q;
+	char *s;
 	cl_platform_id* platforms;
-	cl_uint preferred_vwidth;
 	cl_device_id *devices;
 	cl_uint numPlatforms;
 	cl_uint numDevices;
@@ -438,6 +477,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	status = clGetPlatformIDs(0, NULL, &numPlatforms);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Getting Platforms. (clGetPlatformsIDs)", status);
+err:
+		free(clState);
 		return NULL;
 	}
 
@@ -445,24 +486,24 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	status = clGetPlatformIDs(numPlatforms, platforms, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Getting Platform Ids. (clGetPlatformsIDs)", status);
-		return NULL;
+		goto err;
 	}
 
 	if (opt_platform_id >= (int)numPlatforms) {
 		applog(LOG_ERR, "Specified platform that does not exist");
-		return NULL;
+		goto err;
 	}
 
 	status = clGetPlatformInfo(platforms[opt_platform_id], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Getting Platform Info. (clGetPlatformInfo)", status);
-		return NULL;
+		goto err;
 	}
 	platform = platforms[opt_platform_id];
 
 	if (platform == NULL) {
 		perror("NULL platform found!\n");
-		return NULL;
+		goto err;
 	}
 
 	applog(LOG_INFO, "CL Platform vendor: %s", pbuff);
@@ -472,14 +513,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(vbuff), vbuff, NULL);
 	if (status == CL_SUCCESS)
 		applog(LOG_INFO, "CL Platform version: %s", vbuff);
+	clState->platform_ver_str = strdup(vbuff);
 
 	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Getting Device IDs (num)", status);
-		return NULL;
+		goto err;
 	}
 
-	if (numDevices > 0 ) {
+	if (numDevices <= 0)
+		goto err;
+	
+	{
 		devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
 
 		/* Now, get the device list data */
@@ -487,7 +532,9 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
 		if (status != CL_SUCCESS) {
 			applog(LOG_ERR, "Error %d: Getting Device IDs (list)", status);
-			return NULL;
+err2:
+			free(devices);
+			goto err;
 		}
 
 		applog(LOG_INFO, "List of devices:");
@@ -497,7 +544,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
 			if (status != CL_SUCCESS) {
 				applog(LOG_ERR, "Error %d: Getting Device Info", status);
-				return NULL;
+				goto err2;
 			}
 
 			applog(LOG_INFO, "\t%i\t%s", i, pbuff);
@@ -507,24 +554,23 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
 			if (status != CL_SUCCESS) {
 				applog(LOG_ERR, "Error %d: Getting Device Info", status);
-				return NULL;
+				goto err2;
 			}
 
 			applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
 			strncpy(name, pbuff, nameSize);
 		} else {
 			applog(LOG_ERR, "Invalid GPU %i", gpu);
-			return NULL;
+			goto err2;
 		}
-
-	} else return NULL;
+	}
 
 	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
 
 	clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Creating Context. (clCreateContextFromType)", status);
-		return NULL;
+		goto err2;
 	}
 
 	/////////////////////////////////////////////////////////////////
@@ -536,7 +582,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Creating Command Queue. (clCreateCommandQueue)", status);
-		return NULL;
+		goto err2;
 	}
 
 	/* Check for BFI INT support. Hopefully people don't mix devices with
@@ -548,45 +594,32 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_EXTENSIONS", status);
-		return NULL;
+		free(extensions);
+		goto err2;
 	}
 	find = strstr(extensions, camo);
 	if (find)
 		clState->hasBitAlign = true;
 	free(extensions);
 
-	/* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */
-	char * devoclver = malloc(1024);
-	const char * ocl10 = "OpenCL 1.0";
-
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_VERSION, 1024, (void *)devoclver, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_VERSION", status);
-		return NULL;
-	}
-	find = strstr(devoclver, ocl10);
-	if (!find)
-		clState->hasOpenCL11plus = true;
-	free(devoclver);
-
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
-		return NULL;
+		goto err2;
 	}
-	applog(LOG_DEBUG, "Preferred vector width reported %d", preferred_vwidth);
+	applog(LOG_DEBUG, "Preferred vector width reported %d", clState->preferred_vwidth);
 
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE", status);
-		return NULL;
+		goto err2;
 	}
 	applog(LOG_DEBUG, "Max work group size reported %"PRId64, (int64_t)clState->max_work_size);
 
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(clState->max_compute_units), (void *)&clState->max_compute_units, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_COMPUTE_UNITS", status);
-		return NULL;
+		goto err2;
 	}
 	if (data->_init_intensity)
 	{
@@ -594,13 +627,16 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		opencl_set_intensity_from_str(cgpu, data->_init_intensity);
 	}
 	else
-		data->oclthreads = intensity_to_oclthreads(MIN_INTENSITY, !opt_scrypt);
+	{
+		data->oclthreads = 1;
+		data->intensity = INT_MIN;
+	}
 	applog(LOG_DEBUG, "Max compute units reported %u", (unsigned)clState->max_compute_units);
 	
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&data->max_alloc, NULL);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
-		return NULL;
+		goto err2;
 	}
 	applog(LOG_DEBUG, "Max mem alloc size is %lu", (unsigned long)data->max_alloc);
 	
@@ -626,7 +662,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		}
 		else
 			applog(LOG_DEBUG, "Mesa OpenCL platform detected (v%ld.%ld)", major, minor);
-		ismesa = true;
+		clState->is_mesa = true;
 	}
 	
 	if (data->opt_opencl_binaries == OBU_DEFAULT)
@@ -638,7 +674,372 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		data->opt_opencl_binaries = OBU_LOADSAVE;
 #endif
 	}
+	
+	clState->devid = devices[gpu];
+	free(devices);
+	
+	/* For some reason 2 vectors is still better even if the card says
+	 * otherwise, and many cards lie about their max so use 256 as max
+	 * unless explicitly set on the command line. Tahiti prefers 1 */
+	if (strstr(name, "Tahiti"))
+		clState->preferred_vwidth = 1;
+	else
+	if (clState->preferred_vwidth > 2)
+		clState->preferred_vwidth = 2;
+
+	if (data->vwidth)
+		clState->vwidth = data->vwidth;
+	else {
+		clState->vwidth = clState->preferred_vwidth;
+		data->vwidth = clState->preferred_vwidth;
+	}
+
+	clState->outputBuffer = clCreateBuffer(clState->context, 0, OPENCL_MAX_BUFFERSIZE, NULL, &status);
+	if (status != CL_SUCCESS) {
+		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
+		// NOTE: devices is freed here, but still assigned
+		goto err;
+	}
+	
+	return clState;
+}
+
+static
+bool opencl_load_kernel_binary(struct cgpu_info * const cgpu, _clState * const clState, struct opencl_kernel_info * const kernelinfo, const char * const binaryfilename, bytes_t * const b)
+{
+	cl_int status;
+	
+	FILE * const binaryfile = fopen(binaryfilename, "rb");
+	if (!binaryfile)
+		return false;
+	
+	struct stat binary_stat;
+	if (unlikely(stat(binaryfilename, &binary_stat)))
+	{
+		applog(LOG_DEBUG, "Unable to stat binary, generating from source");
+		fclose(binaryfile);
+		return false;
+	}
+	if (!binary_stat.st_size)
+	{
+		fclose(binaryfile);
+		return false;
+	}
+	
+	const size_t binsz = binary_stat.st_size;
+	bytes_resize(b, binsz);
+	if (fread(bytes_buf(b), 1, binsz, binaryfile) != binsz)
+	{
+		applog(LOG_ERR, "Unable to fread binaries");
+		fclose(binaryfile);
+		return false;
+	}
+	fclose(binaryfile);
+	
+	kernelinfo->program = clCreateProgramWithBinary(clState->context, 1, &clState->devid, &binsz, (void*)&bytes_buf(b), &status, NULL);
+	if (status != CL_SUCCESS)
+		applogr(false, LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
+	
+	status = bfg_clBuildProgram(&kernelinfo->program, clState->devid, NULL);
+	if (status != CL_SUCCESS)
+		return false;
+	
+	applog(LOG_DEBUG, "Loaded binary image %s", binaryfilename);
+	return true;
+}
+
+static
+bool opencl_should_patch_bfi_int(struct cgpu_info * const cgpu, _clState * const clState, struct opencl_kernel_info * const kernelinfo)
+{
+#ifdef USE_SHA256D
+	struct opencl_device_data * const data = cgpu->device_data;
+	const char * const name = cgpu->name;
+	const char * const vbuff = clState->platform_ver_str;
+	char *s;
+	
+	if (!clState->hasBitAlign)
+		return false;
+	
+	if (!(strstr(name, "Cedar") ||
+	      strstr(name, "Redwood") ||
+	      strstr(name, "Juniper") ||
+	      strstr(name, "Cypress" ) ||
+	      strstr(name, "Hemlock" ) ||
+	      strstr(name, "Caicos" ) ||
+	      strstr(name, "Turks" ) ||
+	      strstr(name, "Barts" ) ||
+	      strstr(name, "Cayman" ) ||
+	      strstr(name, "Antilles" ) ||
+	      strstr(name, "Wrestler" ) ||
+	      strstr(name, "Zacate" ) ||
+	      strstr(name, "WinterPark" )))
+		return false;
+	
+	// BFI_INT patching only works with AMD-APP up to 1084
+	if (strstr(vbuff, "ATI-Stream"))
+	{}
+	else
+	if ((s = strstr(vbuff, "AMD-APP")) && (s = strchr(s, '(')) && atoi(&s[1]) < 1085)
+	{}
+	else
+		return false;
+	
+	switch (kernelinfo->interface)
+	{
+		case KL_DIABLO: case KL_DIAKGCN: case KL_PHATK: case KL_POCLBM:
+			// Okay, these actually use BFI_INT hacking
+			break;
+		default:
+			// Anything else has never needed it
+			return false;
+			break;
+	}
+	
+	if (data->opt_opencl_binaries != OBU_LOADSAVE)
+		applogr(false, LOG_WARNING, "BFI_INT patch requiring device found, but OpenCL binary usage disabled; cannot BFI_INT patch");
+	
+	applog(LOG_DEBUG, "BFI_INT patch requiring device found, will patch source with BFI_INT");
+	return true;
+#else
+	return false;
+#endif
+}
 
+static
+bool opencl_build_kernel(struct cgpu_info * const cgpu, _clState * const clState, struct opencl_kernel_info * const kernelinfo, const char *source, const size_t source_len, const bool patchbfi)
+{
+	struct opencl_device_data * const data = cgpu->device_data;
+	cl_int status;
+	
+	kernelinfo->program = clCreateProgramWithSource(clState->context, 1, &source, &source_len, &status);
+	if (status != CL_SUCCESS)
+		applogr(false, LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithSource)", status);
+
+	/* create a cl program executable for all the devices specified */
+	char *CompilerOptions = calloc(1, 256);
+
+#ifdef USE_SCRYPT
+	if (kernelinfo->interface == KL_SCRYPT)
+		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
+			data->lookup_gap, (unsigned int)data->thread_concurrency, (int)kernelinfo->wsize);
+	else
+#endif
+	{
+		sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
+			(int)kernelinfo->wsize, clState->vwidth, (int)kernelinfo->wsize * clState->vwidth);
+	}
+	applog(LOG_DEBUG, "Setting worksize to %"PRId64, (int64_t)kernelinfo->wsize);
+	if (clState->vwidth > 1)
+		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
+
+	if (clState->hasBitAlign)
+	{
+		strcat(CompilerOptions, " -D BITALIGN");
+		applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN");
+	}
+	else
+		applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN");
+
+#ifdef USE_SHA256D
+	if (patchbfi)
+		strcat(CompilerOptions, " -D BFI_INT");
+#endif
+
+	if (kernelinfo->goffset)
+		strcat(CompilerOptions, " -D GOFFSET");
+
+	applog(LOG_DEBUG, "CompilerOptions: %s", CompilerOptions);
+	status = bfg_clBuildProgram(&kernelinfo->program, clState->devid, CompilerOptions);
+	free(CompilerOptions);
+
+	if (status != CL_SUCCESS)
+		return false;
+	
+	return true;
+}
+
+static
+bool opencl_get_kernel_binary(struct cgpu_info * const cgpu, _clState * const clState, struct opencl_kernel_info * const kernelinfo, bytes_t * const b)
+{
+	cl_int status;
+	cl_uint slot, cpnd;
+	
+	status = clGetProgramInfo(kernelinfo->program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL);
+	if (unlikely(status != CL_SUCCESS))
+		applogr(false, LOG_ERR, "Error %d: Getting program info CL_PROGRAM_NUM_DEVICES. (clGetProgramInfo)", status);
+	
+	if (!cpnd)
+		return false;
+
+	size_t binary_sizes[cpnd];
+	status = clGetProgramInfo(kernelinfo->program, CL_PROGRAM_BINARY_SIZES, sizeof(binary_sizes), binary_sizes, NULL);
+	if (unlikely(status != CL_SUCCESS))
+		applogr(false, LOG_ERR, "Error %d: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetProgramInfo)", status);
+	
+	uint8_t **binaries = malloc(sizeof(*binaries) * cpnd);
+	for (slot = 0; slot < cpnd; ++slot)
+		binaries[slot] = malloc(binary_sizes[slot] + 1);
+
+	/* The actual compiled binary ends up in a RANDOM slot! Grr, so we have
+	 * to iterate over all the binary slots and find where the real program
+	 * is. What the heck is this!? */
+	for (slot = 0; slot < cpnd; slot++)
+		if (binary_sizes[slot])
+			break;
+
+	/* copy over all of the generated binaries. */
+	applog(LOG_DEBUG, "%s: Binary size found in binary slot %u: %"PRId64, cgpu->dev_repr, (unsigned)slot, (int64_t)binary_sizes[slot]);
+	if (!binary_sizes[slot])
+		applogr(false, LOG_ERR, "OpenCL compiler generated a zero sized binary, FAIL!");
+	status = clGetProgramInfo(kernelinfo->program, CL_PROGRAM_BINARIES, sizeof(binaries), binaries, NULL);
+	if (unlikely(status != CL_SUCCESS))
+		applogr(false, LOG_ERR, "Error %d: Getting program info. CL_PROGRAM_BINARIES (clGetProgramInfo)", status);
+	
+	bytes_resize(b, binary_sizes[slot]);
+	memcpy(bytes_buf(b), binaries[slot], bytes_len(b));
+	
+	for (slot = 0; slot < cpnd; ++slot)
+		free(binaries[slot]);
+	free(binaries);
+	
+	return true;
+}
+
+#ifdef USE_SHA256D
+	/* Patch the kernel if the hardware supports BFI_INT but it needs to
+	 * be hacked in */
+static
+bool opencl_patch_kernel_binary(bytes_t * const b)
+{
+	unsigned remaining = bytes_len(b);
+	char *w = (void*)bytes_buf(b);
+	unsigned int start, length;
+
+	/* Find 2nd incidence of .text, and copy the program's
+	* position and length at a fixed offset from that. Then go
+	* back and find the 2nd incidence of \x7ELF (rewind by one
+	* from ELF) and then patch the opcocdes */
+	if (!advance(&w, &remaining, ".text"))
+		return false;
+	w++; remaining--;
+	if (!advance(&w, &remaining, ".text")) {
+		/* 32 bit builds only one ELF */
+		w--; remaining++;
+	}
+	memcpy(&start, w + 285, 4);
+	memcpy(&length, w + 289, 4);
+	w = (void*)bytes_buf(b);
+	remaining = bytes_len(b);
+	if (!advance(&w, &remaining, "ELF"))
+		return false;
+	w++; remaining--;
+	if (!advance(&w, &remaining, "ELF")) {
+		/* 32 bit builds only one ELF */
+		w--; remaining++;
+	}
+	w--; remaining++;
+	w += start; remaining -= start;
+	applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching", w, remaining);
+	patch_opcodes(w, length);
+	return true;
+}
+
+static
+bool opencl_replace_binary_kernel(struct cgpu_info * const cgpu, _clState * const clState, struct opencl_kernel_info * const kernelinfo, bytes_t * const b)
+{
+	cl_int status;
+	
+	status = clReleaseProgram(kernelinfo->program);
+	if (status != CL_SUCCESS)
+		applogr(false, LOG_ERR, "Error %d: Releasing program. (clReleaseProgram)", status);
+	
+	const size_t binsz = bytes_len(b);
+	kernelinfo->program = clCreateProgramWithBinary(clState->context, 1, &clState->devid, &binsz, (void*)&bytes_buf(b), &status, NULL);
+	if (status != CL_SUCCESS)
+		applogr(false, LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
+	
+	status = bfg_clBuildProgram(&kernelinfo->program, clState->devid, NULL);
+	if (status != CL_SUCCESS)
+		return false;
+	
+	return true;
+}
+#endif
+
+static
+bool opencl_save_kernel_binary(const char * const binaryfilename, bytes_t * const b)
+{
+	FILE *binaryfile;
+	
+	/* Save the binary to be loaded next time */
+	binaryfile = fopen(binaryfilename, "wb");
+	if (!binaryfile)
+		return false;
+	
+	// FIXME: Failure here results in a bad file; better to write and move-replace (but unlink before replacing for Windows)
+	if (unlikely(fwrite(bytes_buf(b), 1, bytes_len(b), binaryfile) != bytes_len(b)))
+	{
+		fclose(binaryfile);
+		return false;
+	}
+	
+	fclose(binaryfile);
+	return true;
+}
+
+static
+bool opencl_test_goffset(_clState * const clState)
+{
+	if (sizeof(size_t) < sizeof(uint32_t))
+		return false;
+	
+	const char *source = "__kernel __attribute__((reqd_work_group_size(64, 1, 1))) void runtest(volatile __global uint *out) { *out = get_global_id(0); }";
+	const size_t source_len = strlen(source);
+	cl_int status;
+	cl_program program = clCreateProgramWithSource(clState->context, 1, &source, &source_len, &status);
+	if (status != CL_SUCCESS)
+		applogr(false, LOG_ERR, "Error %d: Loading %s code into cl_program (clCreateProgramWithSource)", status, "goffset test");
+	status = bfg_clBuildProgram(&program, clState->devid, "");
+	if (status != CL_SUCCESS)
+	{
+fail:
+		clReleaseProgram(program);
+		return false;
+	}
+	cl_kernel kernel = clCreateKernel(program, "runtest", &status);
+	if (status != CL_SUCCESS)
+		return_via_applog(fail, , LOG_ERR, "Error %d: Creating kernel from %s program (clCreateKernel)", status, "goffset test");
+	static const uint32_t cleardata = 0;
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, sizeof(cleardata), &cleardata, 0, NULL, NULL);
+	if (status != CL_SUCCESS)
+	{
+		applog(LOG_ERR, "Error %d: Clearing output buffer for %s kernel (clEnqueueWriteBuffer)", status, "goffset test");
+fail2:
+		clReleaseKernel(kernel);
+		goto fail;
+	}
+	status = clSetKernelArg(kernel, 0, sizeof(clState->outputBuffer), &clState->outputBuffer);
+	if (status != CL_SUCCESS)
+		return_via_applog(fail2, , LOG_ERR, "Error %d: Setting kernel argument for %s kernel (clSetKernelArg)", status, "goffset test");
+	const size_t size_t_one = 1, test_goffset = 0xfabd0bf9;
+	status = clEnqueueNDRangeKernel(clState->commandQueue, kernel, 1, &test_goffset, &size_t_one, &size_t_one, 0,  NULL, NULL);
+	if (status != CL_SUCCESS)
+		return_via_applog(fail2, , LOG_DEBUG, "Error %d: Running %s kernel (clEnqueueNDRangeKernel)", status, "goffset test");
+	uint32_t resultdata;
+	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, sizeof(resultdata), &resultdata, 0, NULL, NULL);
+	if (status != CL_SUCCESS)
+		return_via_applog(fail2, , LOG_DEBUG, "Error %d: Reading result from %s kernel (clEnqueueReadBuffer)", status, "goffset test");
+	applog(LOG_DEBUG, "%s kernel returned 0x%08lx for goffset 0x%08lx", "goffset test", (unsigned long)resultdata, (unsigned long)test_goffset);
+	return (resultdata == test_goffset);
+}
+
+bool opencl_load_kernel(struct cgpu_info * const cgpu, _clState * const clState, const char * const name, struct opencl_kernel_info * const kernelinfo, const char * const kernel_file, __maybe_unused const struct mining_algorithm * const malgo)
+{
+	const int gpu = cgpu->device_id;
+	struct opencl_device_data * const data = cgpu->device_data;
+	const char * const vbuff = clState->platform_ver_str;
+	cl_int status;
+	
 	/* Create binary filename based on parameters passed to opencl
 	 * compiler to ensure we only load a binary that matches what would
 	 * have otherwise created. The filename is:
@@ -650,45 +1051,12 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	char filename[255];
 	char numbuf[32];
 
-	if (!data->kernel_file)
-	{
-		if (opt_scrypt) {
-			applog(LOG_INFO, "Selecting scrypt kernel");
-			clState->chosen_kernel = KL_SCRYPT;
-		}
-		else if (ismesa)
-		{
-			applog(LOG_INFO, "Selecting phatk kernel for Mesa");
-			clState->chosen_kernel = KL_PHATK;
-		} else if (!strstr(name, "Tahiti") &&
-			/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
-			(strstr(vbuff, "844.4") ||  // Linux 64 bit ATI 2.6 SDK
-			 strstr(vbuff, "851.4") ||  // Windows 64 bit ""
-			 strstr(vbuff, "831.4") ||
-			 strstr(vbuff, "898.1") ||  // 12.2 driver SDK 
-			 strstr(vbuff, "923.1") ||  // 12.4
-			 strstr(vbuff, "938.2") ||  // SDK 2.7
-			 strstr(vbuff, "1113.2"))) {// SDK 2.8
-				applog(LOG_INFO, "Selecting diablo kernel");
-				clState->chosen_kernel = KL_DIABLO;
-		/* Detect all 7970s, older ATI and NVIDIA and use poclbm */
-		} else if (strstr(name, "Tahiti") || !clState->hasBitAlign) {
-			applog(LOG_INFO, "Selecting poclbm kernel");
-			clState->chosen_kernel = KL_POCLBM;
-		/* Use phatk for the rest R5xxx R6xxx */
-		} else {
-			applog(LOG_INFO, "Selecting phatk kernel");
-			clState->chosen_kernel = KL_PHATK;
-		}
-		data->kernel_file = strdup(opencl_get_kernel_interface_name(clState->chosen_kernel));
-	}
-	
-	snprintf(filename, sizeof(filename), "%s.cl", data->kernel_file);
-	snprintf(binaryfilename, sizeof(filename), "%s", data->kernel_file);
+	snprintf(filename, sizeof(filename), "%s.cl", kernel_file);
+	snprintf(binaryfilename, sizeof(filename), "%s", kernel_file);
 	int pl;
-	char *source = file_contents(filename, &pl);
+	char *source = opencl_kernel_source(filename, &pl, &kernelinfo->interface, NULL);
 	if (!source)
-		return NULL;
+		return false;
 	{
 		uint8_t hash[0x20];
 		char hashhex[7];
@@ -696,29 +1064,14 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		bin2hex(hashhex, hash, 3);
 		tailsprintf(binaryfilename, sizeof(binaryfilename), "-%s", hashhex);
 	}
-	s = strstr(source, "kernel-interface:");
-	if (s)
+	switch (kernelinfo->interface)
 	{
-		for (s = &s[17]; s[0] && isspace(s[0]); ++s)
-			if (s[0] == '\n' || s[0] == '\r')
-				break;
-		for (q = s; q[0] && !isspace(q[0]); ++q)
-		{}  // Find end of string
-		const size_t kinamelen = q - s;
-		char kiname[kinamelen + 1];
-		memcpy(kiname, s, kinamelen);
-		kiname[kinamelen] = '\0';
-		clState->chosen_kernel = select_kernel(kiname);
-	}
-	else
-	if (opt_scrypt)
-		clState->chosen_kernel = KL_SCRYPT;
-	switch (clState->chosen_kernel) {
 		case KL_NONE:
 			applog(LOG_ERR, "%s: Failed to identify kernel interface for %s",
-			       cgpu->dev_repr, data->kernel_file);
+			       cgpu->dev_repr, kernel_file);
 			free(source);
-			return NULL;
+			return false;
+#ifdef USE_SHA256D
 		case KL_PHATK:
 			if ((strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
 			     strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
@@ -729,51 +1082,76 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 				applog(LOG_WARNING, "Downgrade your SDK and delete any .bin files before starting again.");
 				applog(LOG_WARNING, "Or allow BFGMiner to automatically choose a more suitable kernel.");
 			}
+#endif
 		default:
 			;
 	}
 	applog(LOG_DEBUG, "%s: Using kernel %s with interface %s",
-	       cgpu->dev_repr, data->kernel_file,
-	       opencl_get_kernel_interface_name(clState->chosen_kernel));
+	       cgpu->dev_repr, kernel_file,
+	       opencl_get_kernel_interface_name(kernelinfo->interface));
 
-	/* For some reason 2 vectors is still better even if the card says
-	 * otherwise, and many cards lie about their max so use 256 as max
-	 * unless explicitly set on the command line. Tahiti prefers 1 */
-	if (strstr(name, "Tahiti"))
-		preferred_vwidth = 1;
-	else if (preferred_vwidth > 2)
-		preferred_vwidth = 2;
-
-	if (data->vwidth)
-		clState->vwidth = data->vwidth;
-	else {
-		clState->vwidth = preferred_vwidth;
-		data->vwidth = preferred_vwidth;
+	{
+		int kernel_goffset_support = 0;  // 0 = none; 1 = optional; 2 = required
+		if (strstr(source, "def GOFFSET"))
+			kernel_goffset_support = 1;
+		else
+		if (strstr(source, " base,"))
+			kernel_goffset_support = 0;
+		else
+			kernel_goffset_support = 2;
+		bool device_goffset_support = false;
+		switch (data->use_goffset)
+		{
+			case BTS_TRUE:
+				device_goffset_support = true;
+				break;
+			case BTS_FALSE:
+				// if the kernel requires goffset, don't allow the user to disable it
+				if (kernel_goffset_support == 2)
+				{
+					if (opencl_test_goffset(clState))
+						device_goffset_support = true;
+				}
+				break;
+			case BTS_UNKNOWN:
+				data->use_goffset = opencl_test_goffset(clState);
+				if (data->use_goffset)
+					device_goffset_support = true;
+				break;
+		}
+		applog(LOG_DEBUG, "%s: goffset support: device=%s kernel=%s", cgpu->dev_repr, device_goffset_support ? "yes" : "no", (kernel_goffset_support == 2) ? "required" : ((kernel_goffset_support == 1) ? "optional" : "none"));
+		if (device_goffset_support)
+		{
+			if (kernel_goffset_support)
+				kernelinfo->goffset = true;
+		}
+		else
+		if (kernel_goffset_support == 2)
+		{
+			// FIXME: Determine this before min_nonce_diff returns positive
+			applog(LOG_ERR, "%s: Need goffset support!", cgpu->dev_repr);
+			return false;
+		}
 	}
 
-	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
-		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
-			clState->goffset = true;
-
 	if (data->work_size && data->work_size <= clState->max_work_size)
-		clState->wsize = data->work_size;
-	else if (opt_scrypt)
-		clState->wsize = 256;
-	else if (strstr(name, "Tahiti"))
-		clState->wsize = 64;
+		kernelinfo->wsize = data->work_size;
+	else
+#ifdef USE_SCRYPT
+	if (malgo->algo == POW_SCRYPT)
+		kernelinfo->wsize = 256;
 	else
-		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-	data->work_size = clState->wsize;
+#endif
+	if (strstr(name, "Tahiti"))
+		kernelinfo->wsize = 64;
+	else
+		kernelinfo->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
 
 #ifdef USE_SCRYPT
-	if (opt_scrypt) {
-		if (!data->opt_lg) {
-			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
-			data->lookup_gap = 2;
-		} else
-			data->lookup_gap = data->opt_lg;
-
-		if (!data->opt_tc) {
+	if (kernelinfo->interface == KL_SCRYPT)
+	{
+		if (!data->thread_concurrency)
+		{
 			unsigned int sixtyfours;
 
 			sixtyfours =  data->max_alloc / 131072 / 64 - 1;
@@ -784,43 +1162,26 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 					data->thread_concurrency = data->shaders * 5;
 			}
 			applog(LOG_DEBUG, "GPU %u: selecting thread concurrency of %lu", gpu,  (unsigned long)data->thread_concurrency);
-		} else
-			data->thread_concurrency = data->opt_tc;
+		}
 	}
 #endif
 
-	FILE *binaryfile;
-	size_t *binary_sizes;
-	char **binaries;
-	size_t sourceSize[] = {(size_t)pl};
-	cl_uint slot, cpnd;
-
-	slot = cpnd = 0;
-
-	binary_sizes = calloc(sizeof(size_t) * MAX_GPUDEVICES * 4, 1);
-	if (unlikely(!binary_sizes)) {
-		applog(LOG_ERR, "Unable to calloc binary_sizes");
-		return NULL;
-	}
-	binaries = calloc(sizeof(char *) * MAX_GPUDEVICES * 4, 1);
-	if (unlikely(!binaries)) {
-		applog(LOG_ERR, "Unable to calloc binaries");
-		return NULL;
-	}
-
 	strcat(binaryfilename, name);
-	if (clState->goffset)
+	if (kernelinfo->goffset)
 		strcat(binaryfilename, "g");
-	if (opt_scrypt) {
 #ifdef USE_SCRYPT
+	if (kernelinfo->interface == KL_SCRYPT)
+	{
 		sprintf(numbuf, "lg%utc%u", data->lookup_gap, (unsigned int)data->thread_concurrency);
 		strcat(binaryfilename, numbuf);
+	}
+	else
 #endif
-	} else {
+	{
 		sprintf(numbuf, "v%d", clState->vwidth);
 		strcat(binaryfilename, numbuf);
 	}
-	sprintf(numbuf, "w%d", (int)clState->wsize);
+	sprintf(numbuf, "w%d", (int)kernelinfo->wsize);
 	strcat(binaryfilename, numbuf);
 	sprintf(numbuf, "l%d", (int)sizeof(long));
 	strcat(binaryfilename, numbuf);
@@ -830,301 +1191,124 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	applog(LOG_DEBUG, "OCL%2u: Configured OpenCL kernel name: %s", gpu, binaryfilename);
 	strcat(binaryfilename, ".bin");
 	
-	if (!(data->opt_opencl_binaries & OBU_LOAD))
-		goto build;
-
-	binaryfile = fopen(binaryfilename, "rb");
-	if (!binaryfile) {
-		applog(LOG_DEBUG, "No binary found, generating from source");
-	} else {
-		struct stat binary_stat;
-
-		if (unlikely(stat(binaryfilename, &binary_stat))) {
-			applog(LOG_DEBUG, "Unable to stat binary, generating from source");
-			fclose(binaryfile);
-			goto build;
-		}
-		if (!binary_stat.st_size)
-			goto build;
-
-		binary_sizes[slot] = binary_stat.st_size;
-		binaries[slot] = (char *)calloc(binary_sizes[slot], 1);
-		if (unlikely(!binaries[slot])) {
-			applog(LOG_ERR, "Unable to calloc binaries");
-			fclose(binaryfile);
-			return NULL;
-		}
-
-		if (fread(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot]) {
-			applog(LOG_ERR, "Unable to fread binaries");
-			fclose(binaryfile);
-			free(binaries[slot]);
-			goto build;
-		}
-
-		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)binaries, &status, NULL);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
-			fclose(binaryfile);
-			free(binaries[slot]);
-			goto build;
+	bool patchbfi = opencl_should_patch_bfi_int(cgpu, clState, kernelinfo);
+	
+	bytes_t binary_bytes = BYTES_INIT;
+	bool loaded_kernel = false;
+	if (data->opt_opencl_binaries & OBU_LOAD)
+	{
+		if (opencl_load_kernel_binary(cgpu, clState, kernelinfo, binaryfilename, &binary_bytes))
+			loaded_kernel = true;
+		else
+		{
+			bytes_free(&binary_bytes);
+			applog(LOG_DEBUG, "No usable binary found, generating from source");
 		}
-
-		fclose(binaryfile);
-		applog(LOG_DEBUG, "Loaded binary image %s", binaryfilename);
-
-		goto built;
-	}
-
-	/////////////////////////////////////////////////////////////////
-	// Load CL file, build CL program object, create CL kernel object
-	/////////////////////////////////////////////////////////////////
-
-build:
-	clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithSource)", status);
-		return NULL;
 	}
-
-	/* create a cl program executable for all the devices specified */
-	char *CompilerOptions = calloc(1, 256);
-
-#ifdef USE_SCRYPT
-	if (opt_scrypt)
-		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
-			data->lookup_gap, (unsigned int)data->thread_concurrency, (int)clState->wsize);
-	else
-#endif
+	
+	if (!loaded_kernel)
 	{
-		sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
-			(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
-	}
-	applog(LOG_DEBUG, "Setting worksize to %"PRId64, (int64_t)clState->wsize);
-	if (clState->vwidth > 1)
-		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
-
-	if (clState->hasBitAlign) {
-		strcat(CompilerOptions, " -D BITALIGN");
-		applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN");
-		if (strstr(name, "Cedar") ||
-		    strstr(name, "Redwood") ||
-		    strstr(name, "Juniper") ||
-		    strstr(name, "Cypress" ) ||
-		    strstr(name, "Hemlock" ) ||
-		    strstr(name, "Caicos" ) ||
-		    strstr(name, "Turks" ) ||
-		    strstr(name, "Barts" ) ||
-		    strstr(name, "Cayman" ) ||
-		    strstr(name, "Antilles" ) ||
-		    strstr(name, "Wrestler" ) ||
-		    strstr(name, "Zacate" ) ||
-		    strstr(name, "WinterPark" ))
+build:
+		if (!opencl_build_kernel(cgpu, clState, kernelinfo, source, pl, patchbfi))
 		{
-			// BFI_INT patching only works with AMD-APP up to 1084
-			if (strstr(vbuff, "ATI-Stream"))
-				patchbfi = true;
-			else
-			if ((s = strstr(vbuff, "AMD-APP")) && (s = strchr(s, '(')) && atoi(&s[1]) < 1085)
-				patchbfi = true;
+			free(source);
+			return false;
 		}
-	} else
-		applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN");
-
-	if (patchbfi) {
-		if (data->opt_opencl_binaries == OBU_LOADSAVE)
+		
+		if ((patchbfi || (data->opt_opencl_binaries & OBU_SAVE)) && !bytes_len(&binary_bytes))
 		{
-			strcat(CompilerOptions, " -D BFI_INT");
-			applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT");
+			if (!opencl_get_kernel_binary(cgpu, clState, kernelinfo, &binary_bytes))
+			{
+				bytes_free(&binary_bytes);
+				applog(LOG_DEBUG, "%s: Failed to get compiled kernel binary from OpenCL (cannot save it)", cgpu->dev_repr);
+				// NOTE: empty binary_bytes will fail BFI_INT patch on its own
+			}
 		}
-		else
+		
+#ifdef USE_SHA256D
+		if (patchbfi)
 		{
-			patchbfi = false;
-			applog(LOG_WARNING, "BFI_INT patch requiring device found, but OpenCL binary usage disabled; cannot BFI_INT patch");
-		}
-	} else
-		applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch");
-
-	if (clState->goffset)
-		strcat(CompilerOptions, " -D GOFFSET");
-
-	if (!clState->hasOpenCL11plus)
-		strcat(CompilerOptions, " -D OCL1");
-
-	applog(LOG_DEBUG, "CompilerOptions: %s", CompilerOptions);
-	status = bfg_clBuildProgram(clState, devices[gpu], CompilerOptions);
-	free(CompilerOptions);
-
-	if (status != CL_SUCCESS)
-		return NULL;
-
-	prog_built = true;
-	
-	if (!(data->opt_opencl_binaries & OBU_SAVE))
-		goto built;
-
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_NUM_DEVICES. (clGetProgramInfo)", status);
-		return NULL;
-	}
-
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*cpnd, binary_sizes, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetProgramInfo)", status);
-		return NULL;
-	}
-
-	/* The actual compiled binary ends up in a RANDOM slot! Grr, so we have
-	 * to iterate over all the binary slots and find where the real program
-	 * is. What the heck is this!? */
-	for (slot = 0; slot < cpnd; slot++)
-		if (binary_sizes[slot])
-			break;
-
-	/* copy over all of the generated binaries. */
-	applog(LOG_DEBUG, "Binary size for gpu %u found in binary slot %u: %"PRId64,
-	       gpu, (unsigned)slot, (int64_t)binary_sizes[slot]);
-	if (!binary_sizes[slot]) {
-		applog(LOG_ERR, "OpenCL compiler generated a zero sized binary, FAIL!");
-		return NULL;
-	}
-	binaries[slot] = calloc(sizeof(char) * binary_sizes[slot], 1);
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARIES, sizeof(char *) * cpnd, binaries, NULL );
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error %d: Getting program info. CL_PROGRAM_BINARIES (clGetProgramInfo)", status);
-		return NULL;
-	}
-
-	/* Patch the kernel if the hardware supports BFI_INT but it needs to
-	 * be hacked in */
-	if (patchbfi) {
-		unsigned remaining = binary_sizes[slot];
-		char *w = binaries[slot];
-		unsigned int start, length;
-
-		/* Find 2nd incidence of .text, and copy the program's
-		* position and length at a fixed offset from that. Then go
-		* back and find the 2nd incidence of \x7ELF (rewind by one
-		* from ELF) and then patch the opcocdes */
-		if (!advance(&w, &remaining, ".text"))
-			goto build;
-		w++; remaining--;
-		if (!advance(&w, &remaining, ".text")) {
-			/* 32 bit builds only one ELF */
-			w--; remaining++;
-		}
-		memcpy(&start, w + 285, 4);
-		memcpy(&length, w + 289, 4);
-		w = binaries[slot]; remaining = binary_sizes[slot];
-		if (!advance(&w, &remaining, "ELF"))
-			goto build;
-		w++; remaining--;
-		if (!advance(&w, &remaining, "ELF")) {
-			/* 32 bit builds only one ELF */
-			w--; remaining++;
-		}
-		w--; remaining++;
-		w += start; remaining -= start;
-		applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching",
-			w, remaining);
-		patch_opcodes(w, length);
-
-		status = clReleaseProgram(clState->program);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Releasing program. (clReleaseProgram)", status);
-			return NULL;
+			if (!(opencl_patch_kernel_binary(&binary_bytes)) && opencl_replace_binary_kernel(cgpu, clState, kernelinfo, &binary_bytes))
+			{
+				applog(LOG_DEBUG, "%s: BFI_INT patching failed, rebuilding without it", cgpu->dev_repr);
+				patchbfi = false;
+				bytes_free(&binary_bytes);
+				goto build;
+			}
 		}
-
-		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)&binaries[slot], &status, NULL);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
-			return NULL;
+#endif
+		
+		if (data->opt_opencl_binaries & OBU_SAVE)
+		{
+			if (!opencl_save_kernel_binary(binaryfilename, &binary_bytes))
+				applog(LOG_DEBUG, "Unable to save file %s", binaryfilename);
 		}
-
-		/* Program needs to be rebuilt */
-		prog_built = false;
 	}
-
+	
 	free(source);
-
-	/* Save the binary to be loaded next time */
-	binaryfile = fopen(binaryfilename, "wb");
-	if (!binaryfile) {
-		/* Not a fatal problem, just means we build it again next time */
-		applog(LOG_DEBUG, "Unable to create file %s", binaryfilename);
-	} else {
-		if (unlikely(fwrite(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot])) {
-			applog(LOG_ERR, "Unable to fwrite to binaryfile");
-			return NULL;
-		}
-		fclose(binaryfile);
-	}
-built:
-	if (binaries[slot])
-		free(binaries[slot]);
-	free(binaries);
-	free(binary_sizes);
-
+	bytes_free(&binary_bytes);
+	
 	applog(LOG_INFO, "Initialising kernel %s with%s bitalign, %"PRId64" vectors and worksize %"PRIu64,
-	       filename, clState->hasBitAlign ? "" : "out", (int64_t)clState->vwidth, (uint64_t)clState->wsize);
-
-	if (!prog_built) {
-		/* create a cl program executable for all the devices specified */
-		status = bfg_clBuildProgram(clState, devices[gpu], NULL);
-		if (status != CL_SUCCESS)
-			return NULL;
-	}
+	       filename, clState->hasBitAlign ? "" : "out", (int64_t)clState->vwidth, (uint64_t)kernelinfo->wsize);
 
 	/* get a kernel object handle for a kernel with the given name */
-	clState->kernel = clCreateKernel(clState->program, "search", &status);
+	kernelinfo->kernel = clCreateKernel(kernelinfo->program, "search", &status);
 	if (status != CL_SUCCESS) {
 		applog(LOG_ERR, "Error %d: Creating Kernel from program. (clCreateKernel)", status);
-		return NULL;
+		return false;
 	}
 	
 	free((void*)cgpu->kname);
-	cgpu->kname = strdup(data->kernel_file);
+	cgpu->kname = strdup(kernel_file);
 
+#ifdef MAX_CLBUFFER0_SZ
+	switch (kernelinfo->interface)
+	{
 #ifdef USE_SCRYPT
-	if (opt_scrypt) {
-		size_t ipt = (1024 / data->lookup_gap + (1024 % data->lookup_gap > 0));
-		size_t bufsize = 128 * ipt * data->thread_concurrency;
-
-		/* Use the max alloc value which has been rounded to a power of
-		 * 2 greater >= required amount earlier */
-		if (bufsize > data->max_alloc) {
-			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu", gpu, (unsigned long)data->max_alloc);
-			applog(LOG_WARNING, "Your scrypt settings come to %lu", (unsigned long)bufsize);
-		}
-		applog(LOG_DEBUG, "Creating scrypt buffer sized %lu", (unsigned long)bufsize);
-		clState->padbufsize = bufsize;
-
-		/* This buffer is weird and might work to some degree even if
-		 * the create buffer call has apparently failed, so check if we
-		 * get anything back before we call it a failure. */
-		clState->padbuffer8 = NULL;
-		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
-		if (status != CL_SUCCESS && !clState->padbuffer8) {
-			applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
-			return NULL;
-		}
-
-		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
-			return NULL;
-		}
-		clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, SCRYPT_BUFFERSIZE, NULL, &status);
-	} else
+		case KL_SCRYPT:
+			if (!clState->padbufsize)
+			{
+				size_t ipt = (1024 / data->lookup_gap + (1024 % data->lookup_gap > 0));
+				size_t bufsize = 128 * ipt * data->thread_concurrency;
+
+				/* Use the max alloc value which has been rounded to a power of
+				 * 2 greater >= required amount earlier */
+				if (bufsize > data->max_alloc) {
+					applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu", gpu, (unsigned long)data->max_alloc);
+					applog(LOG_WARNING, "Your scrypt settings come to %lu", (unsigned long)bufsize);
+				}
+				applog(LOG_DEBUG, "Creating scrypt buffer sized %lu", (unsigned long)bufsize);
+				clState->padbufsize = bufsize;
+
+				/* This buffer is weird and might work to some degree even if
+				 * the create buffer call has apparently failed, so check if we
+				 * get anything back before we call it a failure. */
+				clState->padbuffer8 = NULL;
+				clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
+				if (status != CL_SUCCESS && !clState->padbuffer8) {
+					applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
+					return false;
+				}
+			}
+			// NOTE: fallthru
 #endif
-	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
-		return NULL;
+#ifdef USE_OPENCL_FULLHEADER
+		case KL_FULLHEADER:
+#endif
+			if (!clState->CLbuffer0)
+			{
+				clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, MAX_CLBUFFER0_SZ, NULL, &status);
+				if (status != CL_SUCCESS) {
+					applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
+					return false;
+				}
+			}
+			break;
+		default:
+			break;
 	}
+#endif
 
-	return clState;
+	kernelinfo->loaded = true;
+	return true;
 }
-#endif /* HAVE_OPENCL */
-

+ 27 - 14
ocl.h

@@ -1,41 +1,54 @@
 #ifndef BFG_OCL_H
 #define BFG_OCL_H
 
-#include "config.h"
-
 #include <stdbool.h>
 #include <stdio.h>
 
-#ifdef HAVE_OPENCL
 #include "CL/cl.h"
 
 #include "miner.h"
 
-typedef struct {
+#define SCRYPT_CLBUFFER0_SZ      (128)
+#define FULLHEADER_CLBUFFER0_SZ  ( 80)
+#ifdef USE_SCRYPT
+#	define MAX_CLBUFFER0_SZ  SCRYPT_CLBUFFER0_SZ
+#elif USE_OPENCL_FULLHEADER
+#	define MAX_CLBUFFER0_SZ  FULLHEADER_CLBUFFER0_SZ
+#endif
+
+struct mining_algorithm;
+struct opencl_kernel_info;
+typedef struct _clState _clState;
+
+struct _clState {
+	cl_device_id devid;
+	char *platform_ver_str;
+	bool is_mesa;
+	
 	cl_context context;
-	cl_kernel kernel;
 	cl_command_queue commandQueue;
-	cl_program program;
+	
 	cl_mem outputBuffer;
-#ifdef USE_SCRYPT
+#ifdef MAX_CLBUFFER0_SZ
 	cl_mem CLbuffer0;
+#endif
+#ifdef USE_SCRYPT
 	cl_mem padbuffer8;
 	size_t padbufsize;
 	void * cldata;
 #endif
 	bool hasBitAlign;
-	bool hasOpenCL11plus;
-	bool goffset;
+	cl_uint preferred_vwidth;
 	cl_uint vwidth;
 	size_t max_work_size;
-	size_t wsize;
 	cl_uint max_compute_units;
-	enum cl_kernels chosen_kernel;
-} _clState;
+};
 
 extern FILE *opencl_open_kernel(const char *filename);
 extern char *file_contents(const char *filename, int *length);
+extern char *opencl_kernel_source(const char *filename, int *out_sourcelen, enum cl_kernels *out_kinterface, struct mining_algorithm **);
 extern int clDevicesNum(void);
-extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize);
-#endif /* HAVE_OPENCL */
+extern _clState *opencl_create_clState(unsigned int gpu, char *name, size_t nameSize);
+extern bool opencl_load_kernel(struct cgpu_info *, _clState *clState, const char *name, struct opencl_kernel_info *, const char *kernel_file, const struct mining_algorithm *);
+
 #endif /* __OCL_H__ */

+ 1 - 1
opencl/diablo.cl

@@ -17,7 +17,7 @@
  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-// kernel-interface: diablo
+// kernel-interface: diablo SHA256d
 
 #ifdef VECTORS4
 	typedef uint4 z;

+ 1 - 1
opencl/diakgcn.cl

@@ -3,7 +3,7 @@
 // Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3.
 // The kernel was rewritten by me (Diapolo) and is still public-domain!
 
-// kernel-interface: diakgcn
+// kernel-interface: diakgcn SHA256d
 
 #ifdef VECTORS4
 	typedef uint4 u;

+ 133 - 0
opencl/keccak.cl

@@ -0,0 +1,133 @@
+/*
+ * Scrypt-jane public domain, OpenCL implementation of scrypt(keccak,chacha,SCRYPTN,1,1) 2013 mtrlt
+ */
+
+// kernel-interface: fullheader Keccak
+
+#define ARGS_25(x) x ## 0, x ## 1, x ## 2, x ## 3, x ## 4, x ## 5, x ## 6, x ## 7, x ## 8, x ## 9, x ## 10, x ## 11, x ## 12, x ## 13, x ## 14, x ## 15, x ## 16, x ## 17, x ## 18, x ## 19, x ## 20, x ## 21, x ## 22, x ## 23, x ## 24
+
+__constant uint2 keccak_round_constants[24] = {
+	(uint2)(0x00000001,0x00000000), (uint2)(0x00008082,0x00000000),
+	(uint2)(0x0000808a,0x80000000), (uint2)(0x80008000,0x80000000),
+	(uint2)(0x0000808b,0x00000000), (uint2)(0x80000001,0x00000000),
+	(uint2)(0x80008081,0x80000000), (uint2)(0x00008009,0x80000000),
+	(uint2)(0x0000008a,0x00000000), (uint2)(0x00000088,0x00000000),
+	(uint2)(0x80008009,0x00000000), (uint2)(0x8000000a,0x00000000),
+	(uint2)(0x8000808b,0x00000000), (uint2)(0x0000008b,0x80000000),
+	(uint2)(0x00008089,0x80000000), (uint2)(0x00008003,0x80000000),
+	(uint2)(0x00008002,0x80000000), (uint2)(0x00000080,0x80000000),
+	(uint2)(0x0000800a,0x00000000), (uint2)(0x8000000a,0x80000000),
+	(uint2)(0x80008081,0x80000000), (uint2)(0x00008080,0x80000000),
+	(uint2)(0x80000001,0x00000000), (uint2)(0x80008008,0x80000000)
+};
+
+uint2 ROTL64_1(const uint2 x, const uint y)
+{
+	return (uint2)((x.x<<y)^(x.y>>(32-y)),(x.y<<y)^(x.x>>(32-y)));
+}
+uint2 ROTL64_2(const uint2 x, const uint y)
+{
+	return (uint2)((x.y<<y)^(x.x>>(32-y)),(x.x<<y)^(x.y>>(32-y)));
+}
+
+#define RND(i) \
+do{  \
+		m0 = *s0 ^ *s5 ^ *s10 ^ *s15 ^ *s20 ^ ROTL64_1(*s2 ^ *s7 ^ *s12 ^ *s17 ^ *s22, 1);\
+		m1 = *s1 ^ *s6 ^ *s11 ^ *s16 ^ *s21 ^ ROTL64_1(*s3 ^ *s8 ^ *s13 ^ *s18 ^ *s23, 1);\
+		m2 = *s2 ^ *s7 ^ *s12 ^ *s17 ^ *s22 ^ ROTL64_1(*s4 ^ *s9 ^ *s14 ^ *s19 ^ *s24, 1);\
+		m3 = *s3 ^ *s8 ^ *s13 ^ *s18 ^ *s23 ^ ROTL64_1(*s0 ^ *s5 ^ *s10 ^ *s15 ^ *s20, 1);\
+		m4 = *s4 ^ *s9 ^ *s14 ^ *s19 ^ *s24 ^ ROTL64_1(*s1 ^ *s6 ^ *s11 ^ *s16 ^ *s21, 1);\
+\
+		m5 = *s1^m0;\
+\
+		*s0 ^= m4;\
+		*s1 = ROTL64_2(*s6^m0, 12);\
+		*s6 = ROTL64_1(*s9^m3, 20);\
+		*s9 = ROTL64_2(*s22^m1, 29);\
+		*s22 = ROTL64_2(*s14^m3, 7);\
+		*s14 = ROTL64_1(*s20^m4, 18);\
+		*s20 = ROTL64_2(*s2^m1, 30);\
+		*s2 = ROTL64_2(*s12^m1, 11);\
+		*s12 = ROTL64_1(*s13^m2, 25);\
+		*s13 = ROTL64_1(*s19^m3,  8);\
+		*s19 = ROTL64_2(*s23^m2, 24);\
+		*s23 = ROTL64_2(*s15^m4, 9);\
+		*s15 = ROTL64_1(*s4^m3, 27);\
+		*s4 = ROTL64_1(*s24^m3, 14);\
+		*s24 = ROTL64_1(*s21^m0,  2);\
+		*s21 = ROTL64_2(*s8^m2, 23);\
+		*s8 = ROTL64_2(*s16^m0, 13);\
+		*s16 = ROTL64_2(*s5^m4, 4);\
+		*s5 = ROTL64_1(*s3^m2, 28);\
+		*s3 = ROTL64_1(*s18^m2, 21);\
+		*s18 = ROTL64_1(*s17^m1, 15);\
+		*s17 = ROTL64_1(*s11^m0, 10);\
+		*s11 = ROTL64_1(*s7^m1,  6);\
+		*s7 = ROTL64_1(*s10^m4,  3);\
+		*s10 = ROTL64_1(      m5,  1);\
+		\
+		m5 = *s0; m6 = *s1; *s0 = bitselect(*s0^*s2,*s0,*s1); *s1 = bitselect(*s1^*s3,*s1,*s2); *s2 = bitselect(*s2^*s4,*s2,*s3); *s3 = bitselect(*s3^m5,*s3,*s4); *s4 = bitselect(*s4^m6,*s4,m5);\
+		m5 = *s5; m6 = *s6; *s5 = bitselect(*s5^*s7,*s5,*s6); *s6 = bitselect(*s6^*s8,*s6,*s7); *s7 = bitselect(*s7^*s9,*s7,*s8); *s8 = bitselect(*s8^m5,*s8,*s9); *s9 = bitselect(*s9^m6,*s9,m5);\
+		m5 = *s10; m6 = *s11; *s10 = bitselect(*s10^*s12,*s10,*s11); *s11 = bitselect(*s11^*s13,*s11,*s12); *s12 = bitselect(*s12^*s14,*s12,*s13); *s13 = bitselect(*s13^m5,*s13,*s14); *s14 = bitselect(*s14^m6,*s14,m5);\
+		m5 = *s15; m6 = *s16; *s15 = bitselect(*s15^*s17,*s15,*s16); *s16 = bitselect(*s16^*s18,*s16,*s17); *s17 = bitselect(*s17^*s19,*s17,*s18); *s18 = bitselect(*s18^m5,*s18,*s19); *s19 = bitselect(*s19^m6,*s19,m5);\
+		m5 = *s20; m6 = *s21; *s20 = bitselect(*s20^*s22,*s20,*s21); *s21 = bitselect(*s21^*s23,*s21,*s22); *s22 = bitselect(*s22^*s24,*s22,*s23); *s23 = bitselect(*s23^m5,*s23,*s24); *s24 = bitselect(*s24^m6,*s24,m5);\
+\
+		*s0 ^= keccak_round_constants[i];  \
+}while(0)
+
+void keccak_block_noabsorb(ARGS_25(uint2* s))
+{
+	uint2 m0,m1,m2,m3,m4,m5,m6;
+#pragma unroll
+	for (uint i = 0; i < 24; ++i)
+		RND(i);
+}
+
+__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
+__kernel void search(
+#ifndef GOFFSET
+	const uint base,
+#endif
+	__global const uint2*restrict in, __global uint*restrict output)
+{
+#ifdef GOFFSET
+	const uint base = 0;
+#endif
+	uint2 ARGS_25(state);
+	
+	state0 = in[0];
+	state1 = in[1];
+	state2 = in[2];
+	state3 = in[3];
+	state4 = in[4];
+	state5 = in[5];
+	state6 = in[6];
+	state7 = in[7];
+	state8 = in[8];
+	state9 = (uint2)(in[9].x, base + get_global_id(0));
+	state10 = (uint2)(1,0);
+	state11 = 0;
+	state12 = 0;
+	state13 = 0;
+	state14 = 0;
+	state15 = 0;
+	state16 = (uint2)(0,0x80000000U);
+	state17 = 0;
+	state18 = 0;
+	state19 = 0;
+	state20 = 0;
+	state21 = 0;
+	state22 = 0;
+	state23 = 0;
+	state24 = 0;
+	
+	keccak_block_noabsorb(ARGS_25(&state));
+	
+#define FOUND (0x0F)
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
+	
+	if ((state3.y & 0xFFFFFFF0U) == 0)
+	{
+		SETFOUND(base + get_global_id(0));
+	}
+}

+ 1 - 1
opencl/phatk.cl

@@ -2,7 +2,7 @@
 // I have therefore decided to keep it public-domain.
 // Modified version copyright 2011-2012 Con Kolivas
 
-// kernel-interface: phatk
+// kernel-interface: phatk SHA256d
 
 #ifdef VECTORS4
 	typedef uint4 u;

+ 1 - 1
opencl/poclbm.cl

@@ -5,7 +5,7 @@
 // This file is taken and modified from the public-domain poclbm project, and
 // we have therefore decided to keep it public-domain in Phoenix.
 
-// kernel-interface: poclbm
+// kernel-interface: poclbm SHA256d
 
 #ifdef VECTORS4
 	typedef uint4 u;

+ 14 - 6
opencl/psw.cl

@@ -29,7 +29,7 @@
  * online backup system.
  */
 
-// kernel-interface: scrypt
+// kernel-interface: scrypt scrypt
 
 __constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
 __constant uint K[] = {
@@ -697,13 +697,13 @@ void salsa(uint4 B[8])
 #define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE)
 #define CO Coord(z,x,y)
 
-void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
+void scrypt_core(const uint gid, uint4 X[8], __global uint4 * restrict lookup)
 {
 	shittify(X);
 	const uint zSIZE = 8;
 	const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
 	const uint xSIZE = CONCURRENT_THREADS;
-	uint x = get_global_id(0)%xSIZE;
+	const uint x = gid % xSIZE;
 
 	for(uint y=0; y<1024/LOOKUP_GAP; ++y)
 	{
@@ -754,11 +754,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 #define SETFOUND(Xnonce) output[output[SCRYPT_FOUND]++] = Xnonce
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search(__global const uint4 * restrict input,
+__kernel void search(
+#ifndef GOFFSET
+	const uint base,
+#endif
+	__global const uint4 * restrict input,
 volatile __global uint*restrict output, __global uint4*restrict padcache,
 const uint4 midstate0, const uint4 midstate16, const uint target)
 {
-	uint gid = get_global_id(0);
+	const uint gid = get_global_id(0)
+#ifndef GOFFSET
+		+ base
+#endif
+	;
 	uint4 X[8];
 	uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1;
 	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
@@ -783,7 +791,7 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
 		SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
 		SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
 	}
-	scrypt_core(X,padcache);
+	scrypt_core(gid, X, padcache);
 	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
 	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]);
 	SHA256_fixed(&tmp0,&tmp1);

+ 14 - 6
opencl/scrypt.cl

@@ -31,7 +31,7 @@
  * online backup system.
  */
 
-// kernel-interface: scrypt
+// kernel-interface: scrypt scrypt
 
 __constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
 __constant uint K[] = {
@@ -760,13 +760,13 @@ void salsa(uint4 B[8])
 #define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE)
 #define CO Coord(z,x,y)
 
-void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
+void scrypt_core(const uint gid, uint4 X[8], __global uint4 * restrict lookup)
 {
 	shittify(X);
 	const uint zSIZE = 8;
 	const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
 	const uint xSIZE = CONCURRENT_THREADS;
-	uint x = get_global_id(0)%xSIZE;
+	const uint x = gid % xSIZE;
 
 	for(uint y=0; y<1024/LOOKUP_GAP; ++y)
 	{
@@ -817,11 +817,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 #define SETFOUND(Xnonce) output[output[SCRYPT_FOUND]++] = Xnonce
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search(__global const uint4 * restrict input,
+__kernel void search(
+#ifndef GOFFSET
+	const uint base,
+#endif
+	__global const uint4 * restrict input,
 volatile __global uint*restrict output, __global uint4*restrict padcache,
 const uint4 midstate0, const uint4 midstate16, const uint target)
 {
-	uint gid = get_global_id(0);
+	const uint gid = get_global_id(0)
+#ifndef GOFFSET
+		+ base
+#endif
+	;
 	uint4 X[8];
 	uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1;
 	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
@@ -846,7 +854,7 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
 		SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
 		SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
 	}
-	scrypt_core(X,padcache);
+	scrypt_core(gid, X, padcache);
 	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
 	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]);
 	SHA256_fixed(&tmp0,&tmp1);

+ 14 - 6
opencl/zuikkis.cl

@@ -28,7 +28,7 @@
  * online backup system.
  */
 
-// kernel-interface: scrypt
+// kernel-interface: scrypt scrypt
 
 __constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
 __constant uint K[] = {
@@ -748,13 +748,13 @@ void salsa(uint4 B[8])
 #define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE)
 #define CO Coord(z,x,y)
 
-void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
+void scrypt_core(const uint gid, uint4 X[8], __global uint4 * restrict lookup)
 {
 	shittify(X);
 	const uint zSIZE = 8;
 	const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
 	const uint xSIZE = CONCURRENT_THREADS;
-	uint x = get_global_id(0)%xSIZE;
+	const uint x = gid % xSIZE;
 
 	for(uint y=0; y<1024/LOOKUP_GAP; ++y)
 	{
@@ -792,11 +792,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 #define SETFOUND(Xnonce) output[output[SCRYPT_FOUND]++] = Xnonce
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search(__global const uint4 * restrict input,
+__kernel void search(
+#ifndef GOFFSET
+	const uint base,
+#endif
+	__global const uint4 * restrict input,
 volatile __global uint*restrict output, __global uint4*restrict padcache,
 const uint4 midstate0, const uint4 midstate16, const uint target)
 {
-	uint gid = get_global_id(0);
+	const uint gid = get_global_id(0)
+#ifndef GOFFSET
+		+ base
+#endif
+	;
 	uint4 X[8];
 	uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1;
 	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
@@ -820,7 +828,7 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
 		SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
 		SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
 	}
-	scrypt_core(X,padcache);
+	scrypt_core(gid, X, padcache);
 	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
 	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]);
 	SHA256_fixed(&tmp0,&tmp1);

+ 9 - 1
openwrt/bfgminer/Makefile

@@ -11,7 +11,7 @@ include $(TOPDIR)/rules.mk
 
 PKG_NAME:=bfgminer
 PKG_TITLE:=BFGMiner
-PKG_VERSION:=4.10.0
+PKG_VERSION:=5.0.0
 PKG_RELEASE:=1
 
 PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).txz
@@ -62,6 +62,10 @@ config PACKAGE_$(PKG_NAME)_libusb
 	bool "Build with libusb support (X6500 & ZTEX)"
 	depends on PACKAGE_$(PKG_NAME)
 	default y
+config PACKAGE_$(PKG_NAME)_keccak
+	bool "Build with Keccak algorithm support"
+	depends on PACKAGE_$(PKG_NAME)
+	default n
 config PACKAGE_$(PKG_NAME)_scrypt
 	bool "Build with scrypt algorithm support"
 	depends on PACKAGE_$(PKG_NAME)
@@ -90,6 +94,10 @@ ifndef CONFIG_PACKAGE_$(PKG_NAME)_libusb
 CONFIGURE_ARGS += --without-libusb
 endif
 
+ifdef CONFIG_PACKAGE_$(PKG_NAME)_keccak
+CONFIGURE_ARGS += --enable-keccak
+endif
+
 ifdef CONFIG_PACKAGE_$(PKG_NAME)_scrypt
 CONFIGURE_ARGS += --enable-scrypt
 endif

+ 0 - 19
scrypt.h

@@ -1,19 +0,0 @@
-#ifndef SCRYPT_H
-#define SCRYPT_H
-
-#include <stdint.h>
-
-#include "miner.h"
-
-#ifdef USE_SCRYPT
-extern void test_scrypt(void);
-extern void scrypt_hash_data(unsigned char *out_hash, const unsigned char *data);
-extern void scrypt_regenhash(struct work *work);
-
-#else /* USE_SCRYPT */
-static inline void scrypt_regenhash(__maybe_unused struct work *work)
-{
-}
-#endif /* USE_SCRYPT */
-
-#endif /* SCRYPT_H */

+ 12 - 9
sha256_4way.c

@@ -21,7 +21,7 @@
 
 #define NPAR 32
 
-static void DoubleBlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2);
+static void DoubleBlockSHA256(const void* pin, const void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2);
 
 static const unsigned int sha256_consts[] = {
     0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /*  0 */
@@ -79,13 +79,15 @@ static const unsigned int pSHA256InitState[8] =
 {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
 
 
-bool ScanHash_4WaySSE2(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
+bool ScanHash_4WaySSE2(struct thr_info * const thr, struct work * const work,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	const uint8_t * const pmidstate = work->midstate;
+	uint8_t *pdata = work->data;
+	const uint32_t * const phash1 = hash1_init;
+	uint8_t * const phash = work->hash;
+	
 	uint32_t *hash32 = (uint32_t *)phash;
     unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
 
@@ -109,12 +111,13 @@ bool ScanHash_4WaySSE2(struct thr_info*thr, const unsigned char *pmidstate,
                 for (i = 0; i < 32/4; i++)
                     ((unsigned int*)phash)[i] = thash[i][j];
 
-		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
+				if (unlikely(hash32[7] == 0))
+				{
 					nonce += j;
 					*last_nonce = nonce;
 					*nNonce_p = nonce;
 					return true;
-		}
+				}
             }
         }
 
@@ -129,10 +132,10 @@ bool ScanHash_4WaySSE2(struct thr_info*thr, const unsigned char *pmidstate,
 }
 
 
-static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
+static void DoubleBlockSHA256(const void* pin, const void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
 {
     unsigned int* In = (unsigned int*)pin;
-    unsigned int* Pad = (unsigned int*)pad;
+    const unsigned int* Pad = pad;
     unsigned int* hPre = (unsigned int*)pre;
     unsigned int* hInit = (unsigned int*)init;
     unsigned int /* i, j, */ k;

+ 10 - 6
sha256_altivec_4way.c

@@ -77,13 +77,16 @@ static const unsigned int pSHA256InitState[8] =
 {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
 
 
-bool ScanHash_altivec_4way(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
+bool ScanHash_altivec_4way(struct thr_info * const thr, struct work * const work,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	const uint8_t * const pmidstate = work->midstate;
+	uint8_t *pdata = work->data;
+	uint8_t hash1[0x40];
+	memcpy(hash1, hash1_init, sizeof(hash1));
+	uint8_t * const phash = work->hash;
+	
 	uint32_t *hash32 = (uint32_t *)phash;
     unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
 
@@ -107,12 +110,13 @@ bool ScanHash_altivec_4way(struct thr_info*thr, const unsigned char *pmidstate,
                 for (i = 0; i < 32/4; i++)
                     ((unsigned int*)phash)[i] = thash[i][j];
 
-		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
+				if (unlikely(hash32[7] == 0))
+				{
 					nonce += j;
 					*last_nonce = nonce;
 					*nNonce_p = nonce;
 					return true;
-		}
+				}
             }
         }
 

+ 20 - 10
sha256_cryptopp.c

@@ -17,6 +17,8 @@
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
+
+#include "driver-cpu.h"
 #include "miner.h"
 
 typedef uint32_t word32;
@@ -104,13 +106,16 @@ static void runhash(void *state, const void *input, const void *init)
 }
 
 /* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_cryptopp(struct thr_info*thr, const unsigned char *midstate,
-		unsigned char *data,
-	        unsigned char *hash1, unsigned char *hash,
-		const unsigned char *target,
+bool scanhash_cryptopp(struct thr_info * const thr, struct work * const work,
 	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
+	const uint8_t *midstate = work->midstate;
+	uint8_t *data = work->data;
+	uint8_t hash1[0x40];
+	memcpy(hash1, hash1_init, sizeof(hash1));
+	uint8_t * const hash = work->hash;
+	
 	uint32_t *hash32 = (uint32_t *) hash;
 	uint32_t *nonce = (uint32_t *)(data + 76);
 
@@ -127,7 +132,8 @@ bool scanhash_cryptopp(struct thr_info*thr, const unsigned char *midstate,
 		runhash(hash1, data, midstate);
 		runhash(hash, hash1, sha256_init_state);
 
-		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
+		if (unlikely((hash32[7] == 0)))
+		{
 			*nonce = htole32(n);
 			*last_nonce = n;
 			return true;
@@ -590,13 +596,16 @@ static void runhash32(void *state, const void *input, const void *init)
 }
 
 /* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_asm32(struct thr_info*thr, const unsigned char *midstate,
-		unsigned char *data,
-	        unsigned char *hash1, unsigned char *hash,
-		const unsigned char *target,
+bool scanhash_asm32(struct thr_info * const thr, struct work * const work,
 	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
+	const uint8_t * const midstate = work->midstate;
+	uint8_t *data = work->data;
+	uint8_t hash1[0x40];
+	memcpy(hash1, hash1_init, sizeof(hash1));
+	uint8_t * const hash = work->hash;
+	
 	uint32_t *hash32 = (uint32_t *) hash;
 	uint32_t *nonce = (uint32_t *)(data + 76);
 
@@ -608,7 +617,8 @@ bool scanhash_asm32(struct thr_info*thr, const unsigned char *midstate,
 		runhash32(hash1, data, midstate);
 		runhash32(hash, hash1, sha256_init_state);
 
-		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
+		if (unlikely(hash32[7] == 0))
+		{
 			*last_nonce = n;
 			return true;
 		}

+ 11 - 4
sha256_generic.c

@@ -18,6 +18,8 @@
 #include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
+
+#include "driver-cpu.h"
 #include "miner.h"
 
 typedef uint32_t u32;
@@ -232,12 +234,16 @@ const uint32_t sha256_init_state[8] = {
 };
 
 /* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_c(struct thr_info*thr, const unsigned char *midstate, unsigned char *data,
-	        unsigned char *hash1, unsigned char *hash,
-		const unsigned char *target,
+bool scanhash_c(struct thr_info * const thr, struct work * const work,
 	        uint32_t max_nonce, uint32_t *last_nonce,
 		uint32_t n)
 {
+	const uint8_t *midstate = work->midstate;
+	uint8_t *data = work->data;
+	uint8_t hash1[0x40];
+	memcpy(hash1, hash1_init, sizeof(hash1));
+	uint8_t * const hash = work->hash;
+	
 	uint32_t *hash32 = (uint32_t *) hash;
 	uint32_t *nonce = (uint32_t *)(data + 76);
 	unsigned long stat_ctr = 0;
@@ -258,7 +264,8 @@ bool scanhash_c(struct thr_info*thr, const unsigned char *midstate, unsigned cha
 
 		stat_ctr++;
 
-		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
+		if (unlikely(hash32[7] == 0))
+		{
 			*nonce = htole32(n);
 			*last_nonce = n;
 			return true;

+ 8 - 5
sha256_sse2_amd64.c

@@ -51,13 +51,15 @@ const uint32_t sha256_init_sse2[8]__asm__("sha256_init_sse2")__attribute__((alig
 __m128i g_4sha256_k[64];
 __m128i sha256_consts_m128i[64]__asm__("sha256_consts_m128i")__attribute__((aligned(0x1000)));
 
-bool scanhash_sse2_64(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
+bool scanhash_sse2_64(struct thr_info * const thr, struct work * const work,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	const uint8_t * const pmidstate = work->midstate;
+	uint8_t *pdata = work->data;
+	const uint32_t * const phash1 = hash1_init;
+	uint8_t * const phash = work->hash;
+	
 	uint32_t *hash32 = (uint32_t *)phash;
     uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
@@ -115,7 +117,8 @@ bool scanhash_sse2_64(struct thr_info*thr, const unsigned char *pmidstate,
 		    *(uint32_t *)&(phash)[i*4] = mi.i[j];
 		}
 
-		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
+		if (unlikely(hash32[7] == 0))
+		{
 		     nonce += j;
 		     *last_nonce = nonce + 1;
 		     *nNonce_p = nonce;

+ 8 - 5
sha256_sse2_i386.c

@@ -51,13 +51,15 @@ const uint32_t sha256_32init[8]__attribute__((aligned(0x100))) =
 __m128i g_4sha256_k[64];
 __m128i sha256_consts_m128i[64]__asm__("sha256_consts_m128i")__attribute__((aligned(0x1000)));
 
-bool scanhash_sse2_32(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
+bool scanhash_sse2_32(struct thr_info * const thr, struct work * const work,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	const uint8_t * const pmidstate = work->midstate;
+	uint8_t *pdata = work->data;
+	const uint32_t * const phash1 = hash1_init;
+	uint8_t * const phash = work->hash;
+	
 	uint32_t *hash32 = (uint32_t *)phash;
     uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
@@ -106,7 +108,8 @@ bool scanhash_sse2_32(struct thr_info*thr, const unsigned char *pmidstate,
 		    *(uint32_t *)&(phash)[i<<2] = ((uint32_t *)&(m_4hash[i]))[j];
 		}
 
-		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
+		if (unlikely(hash32[7] == 0))
+		{
 		     nonce += j;
 		     *last_nonce = nonce;
 		     *nNonce_p = nonce;

+ 8 - 5
sha256_sse4_amd64.c

@@ -50,13 +50,15 @@ static uint32_t g_sha256_hinit[8] =
 
 __m128i g_4sha256_k[64]__asm__("g_4sha256_k");
 
-bool scanhash_sse4_64(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
+bool scanhash_sse4_64(struct thr_info * const thr, struct work * const work,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	const uint8_t * const pmidstate = work->midstate;
+	uint8_t *pdata = work->data;
+	const uint32_t * const phash1 = hash1_init;
+	uint8_t * const phash = work->hash;
+	
 	uint32_t *hash32 = (uint32_t *)phash;
     uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
@@ -114,7 +116,8 @@ bool scanhash_sse4_64(struct thr_info*thr, const unsigned char *pmidstate,
 		    *(uint32_t *)&(phash)[i*4] = mi.i[j];
 		}
 
-		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
+		if (unlikely(hash32[7] == 0))
+		{
 			nonce += j;
 			*last_nonce = nonce;
 			*nNonce_p = nonce;

+ 5 - 5
sha256_via.c

@@ -32,13 +32,12 @@ static void via_sha256(void *hash, void *buf, unsigned len)
 		     :"memory");
 }
 
-bool scanhash_via(struct thr_info*thr, const unsigned char __maybe_unused *pmidstate,
-	unsigned char *data_inout,
-	unsigned char __maybe_unused *phash1, unsigned char __maybe_unused *phash,
-	const unsigned char *target,
+bool scanhash_via(struct thr_info * const thr, struct work * const work,
 		  uint32_t max_nonce, uint32_t *last_nonce,
 		  uint32_t n)
 {
+	uint8_t * const data_inout = work->data;
+	
 	unsigned char data[128] __attribute__((aligned(128)));
 	unsigned char tmp_hash[32] __attribute__((aligned(128)));
 	unsigned char tmp_hash1[32] __attribute__((aligned(128)));
@@ -70,7 +69,8 @@ bool scanhash_via(struct thr_info*thr, const unsigned char __maybe_unused *pmids
 
 		stat_ctr++;
 
-		if (unlikely((hash32[7] == 0) && fulltest(tmp_hash, target))) {
+		if (unlikely((hash32[7] == 0)))
+		{
 			/* swap nonce'd data back into original storage area;
 			 */
 			*nonce_inout = bswap_32(n);

+ 171 - 36
util.c

@@ -62,7 +62,6 @@
 #include "miner.h"
 #include "compat.h"
 #include "util.h"
-#include "version.h"
 
 #define DEFAULT_SOCKWAIT 60
 
@@ -78,6 +77,7 @@ struct data_buffer {
 struct upload_buffer {
 	const void	*buf;
 	size_t		len;
+	size_t		pos;
 };
 
 struct header_info {
@@ -177,18 +177,52 @@ static size_t upload_data_cb(void *ptr, size_t size, size_t nmemb,
 		pool->lp_active = true;
 	}
 
-	if (len > ub->len)
-		len = ub->len;
+	if (len > ub->len - ub->pos)
+		len = ub->len - ub->pos;
 
 	if (len) {
-		memcpy(ptr, ub->buf, len);
-		ub->buf += len;
-		ub->len -= len;
+		memcpy(ptr, ub->buf + ub->pos, len);
+		ub->pos += len;
 	}
 
 	return len;
 }
 
+#if LIBCURL_VERSION_NUM >= 0x071200
+static int seek_data_cb(void *user_data, curl_off_t offset, int origin)
+{
+	struct json_rpc_call_state * const state = user_data;
+	struct upload_buffer * const ub = &state->upload_data;
+	
+	switch (origin) {
+		case SEEK_SET:
+			if (offset < 0 || offset > ub->len)
+				return 1;
+			ub->pos = offset;
+			break;
+		case SEEK_CUR:
+			// Check the offset is valid, taking care to avoid overflows or negative unsigned numbers
+			if (offset < 0 && ub->pos < (size_t)-offset)
+				return 1;
+			if (ub->len < offset)
+				return 1;
+			if (ub->pos > ub->len - offset)
+				return 1;
+			ub->pos += offset;
+			break;
+		case SEEK_END:
+			if (offset > 0 || (size_t)-offset > ub->len)
+				return 1;
+			ub->pos = ub->len + offset;
+			break;
+		default:
+			return 1;  /* CURL_SEEKFUNC_FAIL */
+	}
+	
+	return 0;  /* CURL_SEEKFUNC_OK */
+}
+#endif
+
 static size_t resp_hdr_cb(void *ptr, size_t size, size_t nmemb, void *user_data)
 {
 	struct header_info *hi = user_data;
@@ -461,6 +495,10 @@ void json_rpc_call_async(CURL *curl, const char *url,
 	curl_easy_setopt(curl, CURLOPT_WRITEDATA, &state->all_data);
 	curl_easy_setopt(curl, CURLOPT_READFUNCTION, upload_data_cb);
 	curl_easy_setopt(curl, CURLOPT_READDATA, state);
+#if LIBCURL_VERSION_NUM >= 0x071200
+	curl_easy_setopt(curl, CURLOPT_SEEKFUNCTION, &seek_data_cb);
+	curl_easy_setopt(curl, CURLOPT_SEEKDATA, state);
+#endif
 	curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, &state->curl_err_str[0]);
 	curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
 	curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, resp_hdr_cb);
@@ -486,9 +524,10 @@ void json_rpc_call_async(CURL *curl, const char *url,
 
 	state->upload_data.buf = rpc_req;
 	state->upload_data.len = strlen(rpc_req);
+	state->upload_data.pos = 0;
 	sprintf(len_hdr, "Content-Length: %lu",
 		(unsigned long) state->upload_data.len);
-	sprintf(user_agent_hdr, "User-Agent: %s", PACKAGE"/"VERSION);
+	sprintf(user_agent_hdr, "User-Agent: %s", bfgminer_name_slash_ver);
 
 	headers = curl_slist_append(headers,
 		"Content-type: application/json");
@@ -790,17 +829,6 @@ char *ucs2_to_utf8_dup(uint16_t * const in, size_t sz)
 	return out;
 }
 
-void hash_data(unsigned char *out_hash, const unsigned char *data)
-{
-	unsigned char blkheader[80];
-	
-	// data is past the first SHA256 step (padding and interpreting as big endian on a little endian platform), so we need to flip each 32-bit chunk around to get the original input block header
-	swap32yes(blkheader, data, 80 / 4);
-	
-	// double-SHA256 to get the block hash
-	gen_hash(blkheader, out_hash, 80);
-}
-
 // Example output: 0000000000000000000000000000000000000000000000000000ffff00000000 (bdiff 1)
 void real_block_target(unsigned char *target, const unsigned char *data)
 {
@@ -865,15 +893,6 @@ bool hash_target_check_v(const unsigned char *hash, const unsigned char *target)
 	return rc;
 }
 
-// This operates on a native-endian SHA256 state
-// In other words, on little endian platforms, every 4 bytes are in reverse order
-bool fulltest(const unsigned char *hash, const unsigned char *target)
-{
-	unsigned char hash2[32];
-	swap32tobe(hash2, hash, 32 / 4);
-	return hash_target_check_v(hash2, target);
-}
-
 struct thread_q *tq_new(void)
 {
 	struct thread_q *tq;
@@ -1957,6 +1976,17 @@ bool isCalpha(const int c)
 	return false;
 }
 
+bool match_strtok(const char * const optlist, const char * const delim, const char * const needle)
+{
+	const size_t optlist_sz = strlen(optlist) + 1;
+	char opts[optlist_sz];
+	memcpy(opts, optlist, optlist_sz);
+	for (char *el, *nextptr, *s = opts; (el = strtok_r(s, delim, &nextptr)); s = NULL)
+		if (!strcasecmp(el, needle))
+			return true;
+	return false;
+}
+
 static
 bool _appdata_file_call(const char * const appname, const char * const filename, const appdata_file_callback_t cb, void * const userp, const char * const path)
 {
@@ -2203,6 +2233,7 @@ struct bfg_strtobool_keyword {
 bool bfg_strtobool(const char * const s, char ** const endptr, __maybe_unused const int opts)
 {
 	struct bfg_strtobool_keyword keywords[] = {
+		{false, "disable"},
 		{false, "false"},
 		{false, "never"},
 		{false, "none"},
@@ -2210,7 +2241,9 @@ bool bfg_strtobool(const char * const s, char ** const endptr, __maybe_unused co
 		{false, "no"},
 		{false, "0"},
 		
+		{true , "enable"},
 		{true , "always"},
+		{true , "force"},
 		{true , "true"},
 		{true , "yes"},
 		{true , "on"},
@@ -2396,20 +2429,20 @@ size_t varint_decode(const uint8_t *p, size_t size, uint64_t *n)
 {
 	if (size > 8 && p[0] == 0xff)
 	{
-		*n = upk_u64le(p, 0);
+		*n = upk_u64le(p, 1);
 		return 9;
 	}
 	if (size > 4 && p[0] == 0xfe)
 	{
-		*n = upk_u32le(p, 0);
+		*n = upk_u32le(p, 1);
 		return 5;
 	}
 	if (size > 2 && p[0] == 0xfd)
 	{
-		*n = upk_u16le(p, 0);
+		*n = upk_u16le(p, 1);
 		return 3;
 	}
-	if (size > 0)
+	if (size > 0 && p[0] <= 0xfc)
 	{
 		*n = p[0];
 		return 1;
@@ -2553,6 +2586,19 @@ static bool parse_notify(struct pool *pool, json_t *val)
 	pool->submit_old = !clean;
 	pool->swork.clean = true;
 	
+	// stratum_set_goal ensures these are the same pointer if they match
+	if (pool->goalname != pool->next_goalname)
+	{
+		free(pool->goalname);
+		pool->goalname = pool->next_goalname;
+		mining_goal_reset(pool->goal);
+	}
+	if (pool->next_goal_malgo)
+	{
+		goal_set_malgo(pool->goal, pool->next_goal_malgo);
+		pool->next_goal_malgo = NULL;
+	}
+	
 	if (pool->next_nonce1)
 	{
 		free(pool->swork.nonce1);
@@ -2629,6 +2675,8 @@ out:
 
 static bool parse_diff(struct pool *pool, json_t *val)
 {
+	const struct mining_goal_info * const goal = pool->goal;
+	const struct mining_algorithm * const malgo = goal->malgo;
 	double diff;
 
 	diff = json_number_value(json_array_get(val, 0));
@@ -2642,8 +2690,10 @@ static bool parse_diff(struct pool *pool, json_t *val)
 		diff = bdiff_to_pdiff(diff);
 	}
 	
-	if ((!opt_scrypt) && diff < 1 && diff > 0.999)
+#ifdef USE_SHA256D
+	if (malgo->algo == POW_SHA256D && diff < 1 && diff > 0.999)
 		diff = 1;
+#endif
 	
 #ifdef USE_SCRYPT
 	// Broken Scrypt pools multiply difficulty by 0x10000
@@ -2660,7 +2710,7 @@ static bool parse_diff(struct pool *pool, json_t *val)
 	// Diff 16 at 1.15 Gh/s = 1 share / 60s
 	// Diff 16 at 7.00 Gh/s = 1 share / 10s
 
-	if (opt_scrypt && (diff >= minimum_broken_scrypt_diff))
+	if (malgo->algo == POW_SCRYPT && (diff >= minimum_broken_scrypt_diff))
 		diff /= broken_scrypt_diff_multiplier;
 #endif
 
@@ -2739,6 +2789,72 @@ err:
 	return true;
 }
 
+static
+bool stratum_set_goal(struct pool * const pool, json_t * const val, json_t * const params)
+{
+	if (!uri_get_param_bool(pool->rpc_url, "goalreset", false))
+		return false;
+	
+	const char * const new_goalname = __json_array_string(params, 0);
+	struct mining_algorithm *new_malgo = NULL;
+	const char *emsg = NULL;
+	
+	if (json_is_array(params) && json_array_size(params) > 1)
+	{
+		json_t * const j_goaldesc = json_array_get(params, 1);
+		if (json_is_object(j_goaldesc))
+		{
+			json_t * const j_malgo = json_object_get(j_goaldesc, "malgo");
+			if (j_malgo && json_is_string(j_malgo))
+			{
+				const char * const newvalue = json_string_value(j_malgo);
+				new_malgo = mining_algorithm_by_alias(newvalue);
+				// Even if it's the current malgo, we should reset next_goal_malgo in case of a prior set_goal
+				if (new_malgo == pool->goal->malgo)
+				{}  // Do nothing, assignment takes place below
+				if (new_malgo && uri_get_param_bool(pool->rpc_url, "change_goal_malgo", false))
+				{}  // Do nothing, assignment takes place below
+				else
+				{
+					emsg = "Mining algorithm not supported";
+					// Ignore even the goal name, if we are failing
+					goto out;
+				}
+				if (new_malgo == pool->goal->malgo)
+					new_malgo = NULL;
+			}
+		}
+	}
+	
+	// Even if the goal name is not changing, we need to adopt and configuration change
+	pool->next_goal_malgo = new_malgo;
+	
+	if (pool->next_goalname && pool->next_goalname != pool->goalname)
+		free(pool->next_goalname);
+	
+	// This compares goalname to new_goalname, but matches NULL correctly :)
+	if (pool->goalname ? !strcmp(pool->goalname, new_goalname) : !new_goalname)
+		pool->next_goalname = pool->goalname;
+	else
+		pool->next_goalname = maybe_strdup(new_goalname);
+	
+out: ;
+	json_t * const j_id = json_object_get(val, "id");
+	if (j_id && !json_is_null(j_id))
+	{
+		char * const idstr = json_dumps_ANY(j_id, 0);
+		char buf[0x80];
+		if (unlikely(emsg))
+			snprintf(buf, sizeof(buf), "{\"id\":%s,\"result\":true,\"error\":null}", idstr);
+		else
+			snprintf(buf, sizeof(buf), "{\"id\":%s,\"result\":null,\"error\":[-1,\"%s\",null]}", idstr, emsg);
+		free(idstr);
+		stratum_send(pool, buf, strlen(buf));
+	}
+	
+	return true;
+}
+
 static bool parse_reconnect(struct pool *pool, json_t *val)
 {
 	if (opt_disable_client_reconnect)
@@ -2794,7 +2910,7 @@ static bool send_version(struct pool *pool, json_t *val)
 		return false;
 
 	idstr = json_dumps_ANY(id, 0);
-	sprintf(s, "{\"id\": %s, \"result\": \""PACKAGE"/"VERSION"\", \"error\": null}", idstr);
+	sprintf(s, "{\"id\": %s, \"result\": \"%s\", \"error\": null}", idstr, bfgminer_name_slash_ver);
 	free(idstr);
 	if (!stratum_send(pool, s, strlen(s)))
 		return false;
@@ -2905,6 +3021,10 @@ bool parse_method(struct pool *pool, char *s)
 		goto out;
 	}
 	
+	// Usage: mining.set_goal("goal name", {"malgo":"SHA256d", ...})
+	if (!strncasecmp(buf, "mining.set_goal", 15) && stratum_set_goal(pool, val, params))
+		return_via(out, ret = true);
+	
 out:
 	if (val)
 		json_decref(val);
@@ -3179,13 +3299,28 @@ resend:
 		recvd = true;
 	}
 	
+	if (uri_get_param_bool(pool->rpc_url, "goalreset", false))
+	{
+		// Default: ["notify", "set_difficulty"] (but these must be explicit if mining.capabilities is used)
+		snprintf(s, sizeof(s), "{\"id\":null,\"method\":\"mining.capabilities\",\"params\":[{\"notify\":[],\"set_difficulty\":{},\"set_goal\":[],\"malgo\":{");
+		struct mining_algorithm *malgo;
+		LL_FOREACH(mining_algorithms, malgo)
+		{
+			tailsprintf(s, sizeof(s), "\"%s\":{}%c", malgo->name, malgo->next ? ',' : '}');
+		}
+		if (request_target_str)
+			tailsprintf(s, sizeof(s), ",\"suggested_target\":\"%s\"", request_target_str);
+		tailsprintf(s, sizeof(s), "}]}");
+		_stratum_send(pool, s, strlen(s), true);
+	}
+	
 	if (noresume) {
 		sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": []}", swork_id++);
 	} else {
 		if (pool->sessionid)
-			sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\", \"%s\"]}", swork_id++, pool->sessionid);
+			sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\"%s\", \"%s\"]}", swork_id++, bfgminer_name_slash_ver, pool->sessionid);
 		else
-			sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\""PACKAGE"/"VERSION"\"]}", swork_id++);
+			sprintf(s, "{\"id\": %d, \"method\": \"mining.subscribe\", \"params\": [\"%s\"]}", swork_id++, bfgminer_name_slash_ver);
 	}
 
 	if (!_stratum_send(pool, s, strlen(s), true)) {

+ 3 - 2
util.h

@@ -125,6 +125,8 @@ bool isCspace(int c)
 	}
 }
 
+extern bool match_strtok(const char *optlist, const char *delim, const char *needle);
+
 typedef bool (*appdata_file_callback_t)(const char *, void *);
 extern bool appdata_file_call(const char *appname, const char *filename, appdata_file_callback_t, void *userp);
 extern char *appdata_file_find_first(const char *appname, const char *filename);
@@ -192,7 +194,6 @@ extern char *ucs2_to_utf8_dup(uint16_t *in, size_t sz);
 }while(0)
 
 extern void gen_hash(unsigned char *data, unsigned char *hash, int len);
-extern void hash_data(unsigned char *out_hash, const unsigned char *data);
 extern void real_block_target(unsigned char *target, const unsigned char *data);
 extern bool hash_target_check(const unsigned char *hash, const unsigned char *target);
 extern bool hash_target_check_v(const unsigned char *hash, const unsigned char *target);
@@ -642,7 +643,7 @@ static inline
 void bytes_free(bytes_t *b)
 {
 	free(b->buf);
-	b->sz = b->allocsz = 0;
+	bytes_init(b);
 }
 
 

+ 16 - 0
version.c

@@ -0,0 +1,16 @@
+/*
+ * Copyright 2014 Luke Dashjr
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.  See COPYING for more details.
+ */
+
+#include "config.h"
+
+#include "version.h"
+
+const char * const bfgminer_name_space_ver = PACKAGE " " VERSION;
+const char * const bfgminer_name_slash_ver = PACKAGE "/" VERSION;
+const char * const bfgminer_ver = VERSION;

+ 5 - 1
winhacks.h

@@ -3,8 +3,12 @@
 
 #include <winsock2.h>
 
-// wincon.h contains a MOUSE_MOVED that conflicts with curses
+// wincon.h contains a KEY_EVENT that conflicts with ncurses
 #include <wincon.h>
+#ifdef KEY_EVENT
+#	undef KEY_EVENT
+#endif
+// wincon.h contains a MOUSE_MOVED that conflicts with curses
 #ifdef MOUSE_MOVED
 #	undef MOUSE_MOVED
 #endif

Some files were not shown because too many files changed in this diff