Browse Source

Remove all CPU mining code.

Con Kolivas 12 years ago
parent
commit
f4b2790761
22 changed files with 7 additions and 4596 deletions
  1. 0 23
      Makefile.am
  2. 4 182
      api.c
  3. 1 153
      cgminer.c
  4. 2 67
      configure.ac
  5. 0 863
      driver-cpu.c
  6. 0 65
      driver-cpu.h
  7. 0 5
      miner.h
  8. 0 488
      sha256_4way.c
  9. 0 469
      sha256_altivec_4way.c
  10. 0 609
      sha256_cryptopp.c
  11. 0 274
      sha256_generic.c
  12. 0 133
      sha256_sse2_amd64.c
  13. 0 125
      sha256_sse2_i386.c
  14. 0 132
      sha256_sse4_amd64.c
  15. 0 85
      sha256_via.c
  16. 0 1
      x86_32/.gitignore
  17. 0 8
      x86_32/Makefile.am
  18. 0 259
      x86_32/sha256_xmm.asm
  19. 0 1
      x86_64/.gitignore
  20. 0 8
      x86_64/Makefile.am
  21. 0 292
      x86_64/sha256_sse4_amd64.asm
  22. 0 354
      x86_64/sha256_xmm_amd64.asm

+ 0 - 23
Makefile.am

@@ -57,29 +57,6 @@ if HAS_SCRYPT
 cgminer_SOURCES += scrypt.c scrypt.h
 endif
 
-if HAS_CPUMINE
-# original CPU related sources, unchanged
-cgminer_SOURCES	+= \
-		  sha256_generic.c sha256_4way.c sha256_via.c	\
-		  sha256_cryptopp.c sha256_sse2_amd64.c		\
-		  sha256_sse4_amd64.c sha256_sse2_i386.c	\
-		  sha256_altivec_4way.c
-
-# the CPU portion extracted from original main.c
-cgminer_SOURCES += driver-cpu.h driver-cpu.c
-
-if HAS_YASM
-AM_CFLAGS	= -DHAS_YASM
-if HAVE_x86_64
-SUBDIRS		+= x86_64
-cgminer_LDADD	+= x86_64/libx8664.a
-else # HAVE_x86_64
-SUBDIRS		+= x86_32
-cgminer_LDADD	+= x86_32/libx8632.a
-endif # HAVE_x86_64
-endif # HAS_YASM
-endif # HAS_CPUMINE
-
 if NEED_FPGAUTILS
 cgminer_SOURCES += fpgautils.c fpgautils.h
 endif

+ 4 - 182
api.c

@@ -27,7 +27,6 @@
 #include "compat.h"
 #include "miner.h"
 #include "util.h"
-#include "driver-cpu.h" /* for algo_names[], TODO: re-factor dependency */
 
 #if defined(USE_BFLSC) || defined(USE_AVALON)
 #define HAVE_AN_ASIC 1
@@ -38,7 +37,7 @@
 #endif
 
 // Big enough for largest API request
-//  though a PC with 100s of PGAs/CPUs may exceed the size ...
+//  though a PC with 100s of PGAs may exceed the size ...
 //  data is truncated at the end of the last record that fits
 //	but still closed correctly for JSON
 // Current code assumes it can socket send this size + JSON_CLOSE + JSON_END
@@ -187,9 +186,6 @@ static const char *DEVICECODE = ""
 #endif
 #ifdef USE_MODMINER
 			"MMQ "
-#endif
-#ifdef WANT_CPUMINE
-			"CPU "
 #endif
 			"";
 
@@ -224,13 +220,8 @@ static const char *OSINFO =
 #define _PGA		"PGA"
 #endif
 
-#ifdef WANT_CPUMINE
-#define _CPU		"CPU"
-#endif
-
 #define _GPUS		"GPUS"
 #define _PGAS		"PGAS"
-#define _CPUS		"CPUS"
 #define _NOTIFY		"NOTIFY"
 #define _DEVDETAILS	"DEVDETAILS"
 #define _BYE		"BYE"
@@ -265,13 +256,8 @@ static const char ISJSON = '{';
 #define JSON_PGA	JSON1 _PGA JSON2
 #endif
 
-#ifdef WANT_CPUMINE
-#define JSON_CPU	JSON1 _CPU JSON2
-#endif
-
 #define JSON_GPUS	JSON1 _GPUS JSON2
 #define JSON_PGAS	JSON1 _PGAS JSON2
-#define JSON_CPUS	JSON1 _CPUS JSON2
 #define JSON_NOTIFY	JSON1 _NOTIFY JSON2
 #define JSON_DEVDETAILS	JSON1 _DEVDETAILS JSON2
 #define JSON_BYE	JSON1 _BYE JSON1
@@ -306,14 +292,8 @@ static const char *JSON_PARAMETER = "parameter";
 #define MSG_MISID 15
 #define MSG_GPUDEV 17
 
-#ifdef WANT_CPUMINE
-#define MSG_CPUNON 16
-#define MSG_CPUDEV 18
-#define MSG_INVCPU 19
-#endif
-
 #define MSG_NUMGPU 20
-#define MSG_NUMCPU 21
+
 #define MSG_VERSION 22
 #define MSG_INVJSON 23
 #define MSG_MISCMD 24
@@ -420,11 +400,9 @@ enum code_severity {
 enum code_parameters {
 	PARAM_GPU,
 	PARAM_PGA,
-	PARAM_CPU,
 	PARAM_PID,
 	PARAM_GPUMAX,
 	PARAM_PGAMAX,
-	PARAM_CPUMAX,
 	PARAM_PMAX,
 	PARAM_POOLMAX,
 
@@ -474,11 +452,8 @@ struct CODES {
 #ifdef HAVE_AN_FPGA
 						"%d PGA(s)"
 #endif
-#if defined(WANT_CPUMINE) && (defined(HAVE_OPENCL) || defined(HAVE_AN_ASIC) || defined(HAVE_AN_FPGA))
+#if (defined(HAVE_OPENCL) || defined(HAVE_AN_ASIC) || defined(HAVE_AN_FPGA))
 						" - "
-#endif
-#ifdef WANT_CPUMINE
-						"%d CPU(s)"
 #endif
  },
 
@@ -488,9 +463,6 @@ struct CODES {
 #endif
 #ifdef HAVE_AN_FPGA
 						"/PGAs"
-#endif
-#ifdef WANT_CPUMINE
-						"/CPUs"
 #endif
  },
 
@@ -513,15 +485,9 @@ struct CODES {
  { SEVERITY_INFO,  MSG_PGAENA,	PARAM_PGA,	"PGA %d sent enable message" },
  { SEVERITY_INFO,  MSG_PGADIS,	PARAM_PGA,	"PGA %d set disable flag" },
  { SEVERITY_ERR,   MSG_PGAUNW,	PARAM_PGA,	"PGA %d is not flagged WELL, cannot enable" },
-#endif
-#ifdef WANT_CPUMINE
- { SEVERITY_ERR,   MSG_CPUNON,	PARAM_NONE,	"No CPUs" },
- { SEVERITY_SUCC,  MSG_CPUDEV,	PARAM_CPU,	"CPU%d" },
- { SEVERITY_ERR,   MSG_INVCPU,	PARAM_CPUMAX,	"Invalid CPU id %d - range is 0 - %d" },
 #endif
  { SEVERITY_SUCC,  MSG_NUMGPU,	PARAM_NONE,	"GPU count" },
  { SEVERITY_SUCC,  MSG_NUMPGA,	PARAM_NONE,	"PGA count" },
- { SEVERITY_SUCC,  MSG_NUMCPU,	PARAM_NONE,	"CPU count" },
  { SEVERITY_SUCC,  MSG_VERSION,	PARAM_NONE,	"CGMiner versions" },
  { SEVERITY_ERR,   MSG_INVJSON,	PARAM_NONE,	"Invalid JSON" },
  { SEVERITY_ERR,   MSG_MISCMD,	PARAM_CMD,	"Missing JSON '%s'" },
@@ -1300,9 +1266,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p
 #endif
 #ifdef HAVE_AN_FPGA
 	int pga;
-#endif
-#ifdef WANT_CPUMINE
-	int cpu;
 #endif
 	int i;
 
@@ -1333,7 +1296,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p
 			switch(codes[i].params) {
 				case PARAM_GPU:
 				case PARAM_PGA:
-				case PARAM_CPU:
 				case PARAM_PID:
 				case PARAM_INT:
 					sprintf(buf, codes[i].description, paramid);
@@ -1351,15 +1313,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p
 					pga = numpgas();
 					sprintf(buf, codes[i].description, paramid, pga - 1);
 					break;
-#endif
-#ifdef WANT_CPUMINE
-				case PARAM_CPUMAX:
-					if (opt_n_threads > 0)
-						cpu = num_processors;
-					else
-						cpu = 0;
-					sprintf(buf, codes[i].description, paramid, cpu - 1);
-					break;
 #endif
 				case PARAM_PMAX:
 					sprintf(buf, codes[i].description, total_pools);
@@ -1374,12 +1327,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p
 #ifdef HAVE_AN_FPGA
 					pga = numpgas();
 #endif
-#ifdef WANT_CPUMINE
-					if (opt_n_threads > 0)
-						cpu = num_processors;
-					else
-						cpu = 0;
-#endif
 
 					sprintf(buf, codes[i].description
 #ifdef HAVE_OPENCL
@@ -1390,9 +1337,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p
 #endif
 #ifdef HAVE_AN_FPGA
 						, pga
-#endif
-#ifdef WANT_CPUMINE
-						, cpu
 #endif
 						);
 					break;
@@ -1470,7 +1414,6 @@ static void minerconfig(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __
 	int gpucount = 0;
 	int asccount = 0;
 	int pgacount = 0;
-	int cpucount = 0;
 	char *adlinuse = (char *)NO;
 #ifdef HAVE_ADL
 	const char *adl = YES;
@@ -1498,17 +1441,12 @@ static void minerconfig(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __
 	pgacount = numpgas();
 #endif
 
-#ifdef WANT_CPUMINE
-	cpucount = opt_n_threads > 0 ? num_processors : 0;
-#endif
-
 	message(io_data, MSG_MINECONFIG, 0, NULL, isjson);
 	io_open = io_add(io_data, isjson ? COMSTR JSON_MINECONFIG : _MINECONFIG COMSTR);
 
 	root = api_add_int(root, "GPU Count", &gpucount, false);
 	root = api_add_int(root, "ASC Count", &asccount, false);
 	root = api_add_int(root, "PGA Count", &pgacount, false);
-	root = api_add_int(root, "CPU Count", &cpucount, false);
 	root = api_add_int(root, "Pool Count", &total_pools, false);
 	root = api_add_const(root, "ADL", (char *)adl, false);
 	root = api_add_string(root, "ADL in use", adlinuse, false);
@@ -1759,43 +1697,6 @@ static void pgastatus(struct io_data *io_data, int pga, bool isjson, bool precom
 }
 #endif
 
-#ifdef WANT_CPUMINE
-static void cpustatus(struct io_data *io_data, int cpu, bool isjson, bool precom)
-{
-	struct api_data *root = NULL;
-	char buf[TMPBUFSIZ];
-
-	if (opt_n_threads > 0 && cpu >= 0 && cpu < num_processors) {
-		struct cgpu_info *cgpu = &cpus[cpu];
-
-		cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60;
-
-		root = api_add_int(root, "CPU", &cpu, false);
-		double mhs = cgpu->total_mhashes / total_secs;
-		root = api_add_mhs(root, "MHS av", &mhs, false);
-		char mhsname[27];
-		sprintf(mhsname, "MHS %ds", opt_log_interval);
-		root = api_add_mhs(root, mhsname, &(cgpu->rolling), false);
-		root = api_add_int(root, "Accepted", &(cgpu->accepted), false);
-		root = api_add_int(root, "Rejected", &(cgpu->rejected), false);
-		root = api_add_utility(root, "Utility", &(cgpu->utility), false);
-		int last_share_pool = cgpu->last_share_pool_time > 0 ?
-					cgpu->last_share_pool : -1;
-		root = api_add_int(root, "Last Share Pool", &last_share_pool, false);
-		root = api_add_time(root, "Last Share Time", &(cgpu->last_share_pool_time), false);
-		root = api_add_mhtotal(root, "Total MH", &(cgpu->total_mhashes), false);
-		root = api_add_int(root, "Diff1 Work", &(cgpu->diff1), false);
-		root = api_add_diff(root, "Difficulty Accepted", &(cgpu->diff_accepted), false);
-		root = api_add_diff(root, "Difficulty Rejected", &(cgpu->diff_rejected), false);
-		root = api_add_diff(root, "Last Share Difficulty", &(cgpu->last_share_diff), false);
-		root = api_add_time(root, "Last Valid Work", &(cgpu->last_device_valid_work), false);
-
-		root = print_data(root, buf, isjson, precom);
-		io_add(io_data, buf);
-	}
-}
-#endif
-
 static void devstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson, __maybe_unused char group)
 {
 	bool io_open = false;
@@ -1817,7 +1718,7 @@ static void devstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __ma
 	numpga = numpgas();
 #endif
 
-	if (numgpu == 0 && opt_n_threads == 0 && numpga == 0 && numasc == 0) {
+	if (numgpu == 0 && numpga == 0 && numasc == 0) {
 		message(io_data, MSG_NODEVS, 0, NULL, isjson);
 		return;
 	}
@@ -1854,16 +1755,6 @@ static void devstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __ma
 	}
 #endif
 
-#ifdef WANT_CPUMINE
-	if (opt_n_threads > 0) {
-		for (i = 0; i < num_processors; i++) {
-			cpustatus(io_data, i, isjson, isjson && devcount > 0);
-
-			devcount++;
-		}
-	}
-#endif
-
 	if (isjson && io_open)
 		io_close(io_data);
 }
@@ -2088,40 +1979,6 @@ static void pgaidentify(struct io_data *io_data, __maybe_unused SOCKETTYPE c, ch
 }
 #endif
 
-#ifdef WANT_CPUMINE
-static void cpudev(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
-{
-	bool io_open = false;
-	int id;
-
-	if (opt_n_threads == 0) {
-		message(io_data, MSG_CPUNON, 0, NULL, isjson);
-		return;
-	}
-
-	if (param == NULL || *param == '\0') {
-		message(io_data, MSG_MISID, 0, NULL, isjson);
-		return;
-	}
-
-	id = atoi(param);
-	if (id < 0 || id >= num_processors) {
-		message(io_data, MSG_INVCPU, id, NULL, isjson);
-		return;
-	}
-
-	message(io_data, MSG_CPUDEV, id, NULL, isjson);
-
-	if (isjson)
-		io_open = io_add(io_data, COMSTR JSON_CPU);
-
-	cpustatus(io_data, id, isjson, false);
-
-	if (isjson && io_open)
-		io_close(io_data);
-}
-#endif
-
 static void poolstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson, __maybe_unused char group)
 {
 	struct api_data *root = NULL;
@@ -2219,12 +2076,6 @@ static void summary(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __mayb
 	bool io_open;
 	double utility, mhs, work_utility;
 
-#ifdef WANT_CPUMINE
-	char *algo = (char *)(algo_names[opt_algo]);
-	if (algo == NULL)
-		algo = (char *)NULLSTR;
-#endif
-
 	message(io_data, MSG_SUMM, 0, NULL, isjson);
 	io_open = io_add(io_data, isjson ? COMSTR JSON_SUMMARY : _SUMMARY COMSTR);
 
@@ -2236,9 +2087,6 @@ static void summary(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __mayb
 	work_utility = total_diff1 / ( total_secs ? total_secs : 1 ) * 60;
 
 	root = api_add_elapsed(root, "Elapsed", &(total_secs), true);
-#ifdef WANT_CPUMINE
-	root = api_add_string(root, "Algorithm", algo, false);
-#endif
 	root = api_add_mhs(root, "MHS av", &(mhs), false);
 	root = api_add_uint(root, "Found Blocks", &(found_blocks), true);
 	root = api_add_int(root, "Getworks", &(total_getworks), true);
@@ -2419,28 +2267,6 @@ static void pgacount(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __may
 		io_close(io_data);
 }
 
-static void cpucount(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson, __maybe_unused char group)
-{
-	struct api_data *root = NULL;
-	char buf[TMPBUFSIZ];
-	bool io_open;
-	int count = 0;
-
-#ifdef WANT_CPUMINE
-	count = opt_n_threads > 0 ? num_processors : 0;
-#endif
-
-	message(io_data, MSG_NUMCPU, 0, NULL, isjson);
-	io_open = io_add(io_data, isjson ? COMSTR JSON_CPUS : _CPUS COMSTR);
-
-	root = api_add_int(root, "Count", &count, false);
-
-	root = print_data(root, buf, isjson, false);
-	io_add(io_data, buf);
-	if (isjson && io_open)
-		io_close(io_data);
-}
-
 static void switchpool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group)
 {
 	struct pool *pool;
@@ -3540,13 +3366,9 @@ struct CMDS {
 	{ "pgaenable",		pgaenable,	true },
 	{ "pgadisable",		pgadisable,	true },
 	{ "pgaidentify",	pgaidentify,	true },
-#endif
-#ifdef WANT_CPUMINE
-	{ "cpu",		cpudev,		false },
 #endif
 	{ "gpucount",		gpucount,	false },
 	{ "pgacount",		pgacount,	false },
-	{ "cpucount",		cpucount,	false },
 	{ "switchpool",		switchpool,	true },
 	{ "addpool",		addpool,	true },
 	{ "poolpriority",	poolpriority,	true },

+ 1 - 153
cgminer.c

@@ -44,7 +44,6 @@
 #include "miner.h"
 #include "findnonce.h"
 #include "adl.h"
-#include "driver-cpu.h"
 #include "driver-opencl.h"
 #include "bench_block.h"
 #include "scrypt.h"
@@ -89,7 +88,6 @@ int opt_log_interval = 5;
 int opt_queue = 1;
 int opt_scantime = 60;
 int opt_expiry = 120;
-int opt_bench_algo = -1;
 static const bool opt_time = true;
 unsigned long long global_hashrate;
 
@@ -113,7 +111,6 @@ static bool opt_removedisabled;
 int total_devices;
 struct cgpu_info **devices;
 bool have_opencl;
-int opt_n_threads = -1;
 int mining_threads;
 int num_processors;
 #ifdef HAVE_CURSES
@@ -869,36 +866,6 @@ static char *set_null(const char __maybe_unused *arg)
 
 /* These options are available from config file or commandline */
 static struct opt_table opt_config_table[] = {
-#ifdef WANT_CPUMINE
-	OPT_WITH_ARG("--algo|-a",
-		     set_algo, show_algo, &opt_algo,
-		     "Specify sha256 implementation for CPU mining:\n"
-		     "\tauto\t\tBenchmark at startup and pick fastest algorithm"
-		     "\n\tc\t\tLinux kernel sha256, implemented in C"
-#ifdef WANT_SSE2_4WAY
-		     "\n\t4way\t\ttcatm's 4-way SSE2 implementation"
-#endif
-#ifdef WANT_VIA_PADLOCK
-		     "\n\tvia\t\tVIA padlock implementation"
-#endif
-		     "\n\tcryptopp\tCrypto++ C/C++ implementation"
-#ifdef WANT_CRYPTOPP_ASM32
-		     "\n\tcryptopp_asm32\tCrypto++ 32-bit assembler implementation"
-#endif
-#ifdef WANT_X8632_SSE2
-		     "\n\tsse2_32\t\tSSE2 32 bit implementation for i386 machines"
-#endif
-#ifdef WANT_X8664_SSE2
-		     "\n\tsse2_64\t\tSSE2 64 bit implementation for x86_64 machines"
-#endif
-#ifdef WANT_X8664_SSE4
-		     "\n\tsse4_64\t\tSSE4.1 64 bit implementation for x86_64 machines"
-#endif
-#ifdef WANT_ALTIVEC_4WAY
-    "\n\taltivec_4way\tAltivec implementation for PowerPC G4 and G5 machines"
-#endif
-		),
-#endif
 	OPT_WITH_ARG("--api-allow",
 		     set_api_allow, NULL, NULL,
 		     "Allow API access only to the given list of [G:]IP[/Prefix] addresses[/subnets]"),
@@ -936,20 +903,10 @@ static struct opt_table opt_config_table[] = {
 			opt_set_bool, &opt_bfl_noncerange,
 			"Use nonce range on bitforce devices if supported"),
 #endif
-#ifdef WANT_CPUMINE
-	OPT_WITH_ARG("--bench-algo|-b",
-		     set_int_0_to_9999, opt_show_intval, &opt_bench_algo,
-		     opt_hidden),
-#endif
 #ifdef HAVE_CURSES
 	OPT_WITHOUT_ARG("--compact",
 			opt_set_bool, &opt_compact,
 			"Use compact display without per device statistics"),
-#endif
-#ifdef WANT_CPUMINE
-	OPT_WITH_ARG("--cpu-threads|-t",
-		     force_nthreads_int, opt_show_intval, &opt_n_threads,
-		     "Number of miner CPU threads"),
 #endif
 	OPT_WITHOUT_ARG("--debug|-D",
 		     enable_debug, &opt_debug,
@@ -968,11 +925,6 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--disable-rejecting",
 			opt_set_bool, &opt_disable_pool,
 			"Automatically disable pools that continually reject shares"),
-#if defined(WANT_CPUMINE) && (defined(HAVE_OPENCL) || defined(USE_FPGA))
-	OPT_WITHOUT_ARG("--enable-cpu|-C",
-			opt_set_bool, &opt_usecpu,
-			"Enable CPU mining with other mining (default: no CPU mining if other devices exist)"),
-#endif
 	OPT_WITH_ARG("--expiry|-E",
 		     set_int_0_to_9999, opt_show_intval, &opt_expiry,
 		     "Upper bound on how many seconds after getting work we consider a share from it stale"),
@@ -1375,9 +1327,6 @@ static char *opt_verusage_and_exit(const char *extra)
 #ifdef HAVE_OPENCL
 		"GPU "
 #endif
-#ifdef WANT_CPUMINE
-		"CPU "
-#endif
 #ifdef USE_BITFORCE
 		"bitforce "
 #endif
@@ -1905,7 +1854,6 @@ static int statusy;
 #ifdef HAVE_OPENCL
 struct cgpu_info gpus[MAX_GPUDEVICES]; /* Maximum number apparently possible */
 #endif
-struct cgpu_info *cpus;
 
 #ifdef HAVE_CURSES
 static inline void unlock_curses(void)
@@ -2039,10 +1987,6 @@ static void curses_print_status(void)
 
 	wattron(statuswin, A_BOLD);
 	mvwprintw(statuswin, 0, 0, " " PACKAGE " version " VERSION " - Started: %s", datestamp);
-#ifdef WANT_CPUMINE
-	if (opt_n_threads)
-		wprintw(statuswin, " CPU Algo: %s", algo_names[opt_algo]);
-#endif
 	wattroff(statuswin, A_BOLD);
 	mvwhline(statuswin, 1, 0, '-', 80);
 	mvwprintw(statuswin, 2, 0, " %s", statusline);
@@ -4059,9 +4003,6 @@ void write_config(FILE *fcfg)
 	if (opt_reorder)
 		fprintf(fcfg, ",\n\"gpu-reorder\" : true");
 #endif
-#ifdef WANT_CPUMINE
-	fprintf(fcfg, ",\n\"algo\" : \"%s\"", algo_names[opt_algo]);
-#endif
 
 	/* Simple bool and int options */
 	struct opt_table *opt;
@@ -6382,10 +6323,6 @@ static void *watchdog_thread(void __maybe_unused *userdata)
 			if (thr->getwork || *denable == DEV_DISABLED)
 				continue;
 
-#ifdef WANT_CPUMINE
-			if (cgpu->drv->drv_id == DRIVER_CPU)
-				continue;
-#endif
 			if (cgpu->status != LIFE_WELL && (now.tv_sec - thr->last.tv_sec < WATCHDOG_SICK_TIME)) {
 				if (cgpu->status != LIFE_INIT)
 				applog(LOG_ERR, "%s: Recovered, declaring WELL!", dev_str);
@@ -6459,10 +6396,6 @@ void print_summary(void)
 	applog(LOG_WARNING, "Started at %s", datestamp);
 	if (total_pools == 1)
 		applog(LOG_WARNING, "Pool: %s", pools[0]->rpc_url);
-#ifdef WANT_CPUMINE
-	if (opt_n_threads)
-		applog(LOG_WARNING, "CPU hasher algorithm used: %s", algo_names[opt_algo]);
-#endif
 	applog(LOG_WARNING, "Runtime: %d hrs : %d mins : %d secs", hours, mins, secs);
 	displayed_hashes = total_mhashes_done / total_secs;
 	if (displayed_hashes < 1) {
@@ -6542,9 +6475,6 @@ static void clean_up(void)
 	if (!opt_realquiet && successful_connect)
 		print_summary();
 
-	if (opt_n_threads)
-		free(cpus);
-
 	curl_global_cleanup();
 }
 
@@ -6798,15 +6728,6 @@ void enable_curses(void) {
 }
 #endif
 
-/* TODO: fix need a dummy CPU device_drv even if no support for CPU mining */
-#ifndef WANT_CPUMINE
-struct device_drv cpu_drv;
-struct device_drv cpu_drv = {
-	.drv_id = DRIVER_CPU,
-	.name = "CPU",
-};
-#endif
-
 #ifdef USE_BFLSC
 extern struct device_drv bflsc_drv;
 #endif
@@ -7183,10 +7104,6 @@ int main(int argc, char *argv[])
 
 	sprintf(packagename, "%s %s", PACKAGE, VERSION);
 
-#ifdef WANT_CPUMINE
-	init_max_name_len();
-#endif
-
 	handler.sa_handler = &sighandler;
 	handler.sa_flags = 0;
 	sigemptyset(&handler.sa_mask);
@@ -7202,15 +7119,6 @@ int main(int argc, char *argv[])
 	strcpy(cgminer_path, dirname(s));
 	free(s);
 	strcat(cgminer_path, "/");
-#ifdef WANT_CPUMINE
-	// Hack to make cgminer silent when called recursively on WIN32
-	int skip_to_bench = 0;
-	#if defined(WIN32)
-		char buf[32];
-		if (GetEnvironmentVariable("CGMINER_BENCH_ALGO", buf, 16))
-			skip_to_bench = 1;
-	#endif // defined(WIN32)
-#endif
 
 	devcursor = 8;
 	logstart = devcursor + 1;
@@ -7295,51 +7203,6 @@ int main(int argc, char *argv[])
 	usb_initialise();
 #endif
 
-#ifdef WANT_CPUMINE
-#ifdef USE_SCRYPT
-	if (opt_scrypt)
-		set_scrypt_algo(&opt_algo);
-	else
-#endif
-	if (0 <= opt_bench_algo) {
-		double rate = bench_algo_stage3(opt_bench_algo);
-
-		if (!skip_to_bench)
-			printf("%.5f (%s)\n", rate, algo_names[opt_bench_algo]);
-		else {
-			// Write result to shared memory for parent
-#if defined(WIN32)
-				char unique_name[64];
-
-				if (GetEnvironmentVariable("CGMINER_SHARED_MEM", unique_name, 32)) {
-					HANDLE map_handle = CreateFileMapping(
-						INVALID_HANDLE_VALUE,   // use paging file
-						NULL,                   // default security attributes
-						PAGE_READWRITE,         // read/write access
-						0,                      // size: high 32-bits
-						4096,			// size: low 32-bits
-						unique_name		// name of map object
-					);
-					if (NULL != map_handle) {
-						void *shared_mem = MapViewOfFile(
-							map_handle,	// object to map view of
-							FILE_MAP_WRITE, // read/write access
-							0,              // high offset:  map from
-							0,              // low offset:   beginning
-							0		// default: map entire file
-						);
-						if (NULL != shared_mem)
-							CopyMemory(shared_mem, &rate, sizeof(rate));
-						(void)UnmapViewOfFile(shared_mem);
-					}
-					(void)CloseHandle(map_handle);
-				}
-#endif
-		}
-		exit(0);
-	}
-#endif
-
 #ifdef HAVE_OPENCL
 	if (!opt_nogpu)
 		opencl_drv.drv_detect();
@@ -7376,10 +7239,6 @@ int main(int argc, char *argv[])
 		ztex_drv.drv_detect();
 #endif
 
-#ifdef WANT_CPUMINE
-	cpu_drv.drv_detect();
-#endif
-
 	if (devices_enabled == -1) {
 		applog(LOG_ERR, "Devices detected:");
 		for (i = 0; i < total_devices; ++i) {
@@ -7400,12 +7259,8 @@ int main(int argc, char *argv[])
 					quit (1, "Command line options set a device that doesn't exist");
 				enable_device(devices[i]);
 			} else if (i < total_devices) {
-				if (opt_removedisabled) {
-					if (devices[i]->drv->drv_id == DRIVER_CPU)
-						--opt_n_threads;
-				} else {
+				if (!opt_removedisabled)
 					enable_device(devices[i]);
-				}
 				devices[i]->deven = DEV_DISABLED;
 			}
 		}
@@ -7609,13 +7464,6 @@ begin_bench:
 		pause_dynamic_threads(i);
 #endif
 
-#ifdef WANT_CPUMINE
-	applog(LOG_INFO, "%d cpu miner threads started, "
-		"using SHA256 '%s' algorithm.",
-		opt_n_threads,
-		algo_names[opt_algo]);
-#endif
-
 	cgtime(&total_tv_start);
 	cgtime(&total_tv_end);
 

+ 2 - 67
configure.ac

@@ -126,14 +126,6 @@ if test -n "$CGMINER_SDK"; then
 	LDFLAGS="-L$CGMINER_SDK/lib/$target $LDFLAGS"
 fi
 
-cpumining="no"
-
-AC_ARG_ENABLE([cpumining],,[cpumining=$enableval]	)
-if test "x$cpumining" = xyes; then
-	AC_DEFINE_UNQUOTED([WANT_CPUMINE], [1], [Enable CPUMINING])
-fi
-AM_CONDITIONAL([HAS_CPUMINE], [test x$cpumining = xyes])
-
 opencl="yes"
 
 AC_ARG_ENABLE([opencl],
@@ -329,54 +321,6 @@ else
 	JANSSON_LIBS=-ljansson
 fi
 
-dnl Find YASM
-has_yasm=false
-AC_PATH_PROG([YASM],[yasm],[false])
-if test "x$YASM" != "xfalse" ; then
-  AC_MSG_CHECKING([if yasm version is greater than 1.0.1])
-  yasmver=`"$YASM" --version | head -1 | cut -d\  -f2`
-  yamajor=`echo $yasmver | cut -d. -f1`
-  yaminor=`echo $yasmver | cut -d. -f2`
-  yamini=`echo $yasmver | cut -d. -f3`
-  if test "$yamajor" -ge "1" ; then
-    if test "$yamajor" -eq "1" ; then
-      if test "$yaminor" -ge "0" ; then
-        if test "$yaminor" -eq "0"; then
-          if test "$yamini" -ge "1"; then
-            has_yasm=true
-          fi
-        else
-          has_yasm=true
-        fi
-      fi
-    fi
-  else
-    has_yasm=false
-  fi
-  if test "x$has_yasm" = "xtrue" ; then
-    AC_MSG_RESULT([yes])
-  else
-    AC_MSG_RESULT([no])
-  fi
-fi
-if test "x$has_yasm" = "xfalse" ; then
-  AC_MSG_NOTICE([yasm is required for the assembly algorithms. They will be skipped.])
-else
-  if test "x$have_x86_64" = xtrue; then
-    if test "x$have_win32" = xtrue; then
-      YASM_FMT="win64"
-    else
-      YASM_FMT="elf64"
-    fi
-  elif test "x$have_win32" = xtrue; then
-    YASM_FMT="coff"
-  else
-    YASM_FMT="elf32"
-  fi
-fi
-
-AM_CONDITIONAL([HAS_YASM], [test x$has_yasm = xtrue])
-
 if test "x$icarus" != xno; then
 	AC_ARG_WITH([libudev], [AC_HELP_STRING([--without-libudev], [Autodetect FPGAs using libudev (default enabled)])],
 		[libudev=$withval],
@@ -498,15 +442,12 @@ AC_SUBST(WS2_LIBS)
 AC_SUBST(MM_LIBS)
 AC_SUBST(MATH_LIBS)
 AC_SUBST(UDEV_LIBS)
-AC_SUBST(YASM_FMT)
 AC_SUBST(ADL_CPPFLAGS)
 
 AC_CONFIG_FILES([
 	Makefile
 	compat/Makefile
 	compat/jansson/Makefile
-	x86_64/Makefile
-	x86_32/Makefile
 	ccan/Makefile
 	lib/Makefile
 	])
@@ -537,14 +478,14 @@ if test "x$opencl" != xno; then
 
 	else
 		echo "  OpenCL...............: NOT FOUND. GPU mining support DISABLED"
-		if test "x$cpumining$bitforce$avalon$icarus$ztex$modminer$bflsc" = xnonononononono; then
+		if test "x$bitforce$avalon$icarus$ztex$modminer$bflsc" = xnonononononono; then
 			AC_MSG_ERROR([No mining configured in])
 		fi
 		echo "  scrypt...............: Disabled (needs OpenCL)"
 	fi
 else
 	echo "  OpenCL...............: Detection overrided. GPU mining support DISABLED"
-	if test "x$cpumining$bitforce$icarus$avalon$ztex$modminer$bflsc" = xnonononononono; then
+	if test "x$bitforce$icarus$avalon$ztex$modminer$bflsc" = xnonononononono; then
 		AC_MSG_ERROR([No mining configured in])
 	fi
 	echo "  scrypt...............: Disabled (needs OpenCL)"
@@ -601,12 +542,6 @@ if test "x$icarus" != xno; then
 	echo "  libudev.detection....: $libudev"
 fi
 
-if test "x$cpumining" = xyes; then
-	echo
-	echo "  CPU Mining...........: Enabled"
-	echo "  ASM.(for CPU mining).: $has_yasm"
-fi
-
 echo
 echo "Compilation............: make (or gmake)"
 echo "  CPPFLAGS.............: $CPPFLAGS"

+ 0 - 863
driver-cpu.c

@@ -1,863 +0,0 @@
-/*
- * Copyright 2011-2012 Con Kolivas
- * Copyright 2011-2012 Luke Dashjr
- * Copyright 2010 Jeff Garzik
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.  See COPYING for more details.
- */
-
-#include "config.h"
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <unistd.h>
-#include <signal.h>
-
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#ifndef WIN32
-#include <sys/wait.h>
-#include <sys/resource.h>
-#endif
-#include <libgen.h>
-
-#include "compat.h"
-#include "miner.h"
-#include "bench_block.h"
-#include "driver-cpu.h"
-
-#if defined(unix)
-	#include <errno.h>
-	#include <fcntl.h>
-#endif
-
-#if defined(__linux) && defined(cpu_set_t) /* Linux specific policy and affinity management */
-#include <sched.h>
-static inline void drop_policy(void)
-{
-	struct sched_param param;
-
-#ifdef SCHED_BATCH
-#ifdef SCHED_IDLE
-	if (unlikely(sched_setscheduler(0, SCHED_IDLE, &param) == -1))
-#endif
-		sched_setscheduler(0, SCHED_BATCH, &param);
-#endif
-}
-
-static inline void affine_to_cpu(int id, int cpu)
-{
-	cpu_set_t set;
-
-	CPU_ZERO(&set);
-	CPU_SET(cpu, &set);
-	sched_setaffinity(0, sizeof(&set), &set);
-	applog(LOG_INFO, "Binding cpu mining thread %d to cpu %d", id, cpu);
-}
-#else
-static inline void drop_policy(void)
-{
-}
-
-static inline void affine_to_cpu(int __maybe_unused id, int __maybe_unused cpu)
-{
-}
-#endif
-
-
-
-/* TODO: resolve externals */
-extern char *set_int_range(const char *arg, int *i, int min, int max);
-extern int dev_from_id(int thr_id);
-
-
-/* chipset-optimized hash functions */
-extern bool ScanHash_4WaySSE2(struct thr_info*, const unsigned char *pmidstate,
-	unsigned char *pdata, unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
-
-extern bool ScanHash_altivec_4way(struct thr_info*, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
-
-extern bool scanhash_via(struct thr_info*, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *target,
-	uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
-
-extern bool scanhash_c(struct thr_info*, const unsigned char *midstate, unsigned char *data,
-	      unsigned char *hash1, unsigned char *hash,
-	      const unsigned char *target,
-	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
-
-extern bool scanhash_cryptopp(struct thr_info*, const unsigned char *midstate,unsigned char *data,
-	      unsigned char *hash1, unsigned char *hash,
-	      const unsigned char *target,
-	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t n);
-
-extern bool scanhash_asm32(struct thr_info*, const unsigned char *midstate,unsigned char *data,
-	      unsigned char *hash1, unsigned char *hash,
-	      const unsigned char *target,
-	      uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce);
-
-extern bool scanhash_sse2_64(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce);
-
-extern bool scanhash_sse4_64(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce);
-
-extern bool scanhash_sse2_32(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce);
-
-extern bool scanhash_scrypt(struct thr_info *thr, int thr_id, unsigned char *pdata, unsigned char *scratchbuf,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, unsigned long *hashes_done);
-
-
-
-#ifdef WANT_CPUMINE
-static size_t max_name_len = 0;
-static char *name_spaces_pad = NULL;
-const char *algo_names[] = {
-	[ALGO_C]		= "c",
-#ifdef WANT_SSE2_4WAY
-	[ALGO_4WAY]		= "4way",
-#endif
-#ifdef WANT_VIA_PADLOCK
-	[ALGO_VIA]		= "via",
-#endif
-	[ALGO_CRYPTOPP]		= "cryptopp",
-#ifdef WANT_CRYPTOPP_ASM32
-	[ALGO_CRYPTOPP_ASM32]	= "cryptopp_asm32",
-#endif
-#ifdef WANT_X8632_SSE2
-	[ALGO_SSE2_32]		= "sse2_32",
-#endif
-#ifdef WANT_X8664_SSE2
-	[ALGO_SSE2_64]		= "sse2_64",
-#endif
-#ifdef WANT_X8664_SSE4
-	[ALGO_SSE4_64]		= "sse4_64",
-#endif
-#ifdef WANT_ALTIVEC_4WAY
-    [ALGO_ALTIVEC_4WAY] = "altivec_4way",
-#endif
-#ifdef WANT_SCRYPT
-    [ALGO_SCRYPT] = "scrypt",
-#endif
-};
-
-static const sha256_func sha256_funcs[] = {
-	[ALGO_C]		= (sha256_func)scanhash_c,
-#ifdef WANT_SSE2_4WAY
-	[ALGO_4WAY]		= (sha256_func)ScanHash_4WaySSE2,
-#endif
-#ifdef WANT_ALTIVEC_4WAY
-    [ALGO_ALTIVEC_4WAY] = (sha256_func) ScanHash_altivec_4way,
-#endif
-#ifdef WANT_VIA_PADLOCK
-	[ALGO_VIA]		= (sha256_func)scanhash_via,
-#endif
-	[ALGO_CRYPTOPP]		=  (sha256_func)scanhash_cryptopp,
-#ifdef WANT_CRYPTOPP_ASM32
-	[ALGO_CRYPTOPP_ASM32]	= (sha256_func)scanhash_asm32,
-#endif
-#ifdef WANT_X8632_SSE2
-	[ALGO_SSE2_32]		= (sha256_func)scanhash_sse2_32,
-#endif
-#ifdef WANT_X8664_SSE2
-	[ALGO_SSE2_64]		= (sha256_func)scanhash_sse2_64,
-#endif
-#ifdef WANT_X8664_SSE4
-	[ALGO_SSE4_64]		= (sha256_func)scanhash_sse4_64,
-#endif
-#ifdef WANT_SCRYPT
-	[ALGO_SCRYPT]		= (sha256_func)scanhash_scrypt
-#endif
-};
-#endif
-
-
-
-#ifdef WANT_CPUMINE
-#if defined(WANT_X8664_SSE4) && defined(__SSE4_1__)
-enum sha256_algos opt_algo = ALGO_SSE4_64;
-#elif defined(WANT_X8664_SSE2) && defined(__SSE2__)
-enum sha256_algos opt_algo = ALGO_SSE2_64;
-#elif defined(WANT_X8632_SSE2) && defined(__SSE2__)
-enum sha256_algos opt_algo = ALGO_SSE2_32;
-#else
-enum sha256_algos opt_algo = ALGO_C;
-#endif
-bool opt_usecpu = false;
-static int cpur_thr_id;
-static bool forced_n_threads;
-#endif
-
-
-
-
-#ifdef WANT_CPUMINE
-// Algo benchmark, crash-prone, system independent stage
-double bench_algo_stage3(
-	enum sha256_algos algo
-)
-{
-	// Use a random work block pulled from a pool
-	static uint8_t bench_block[] = { CGMINER_BENCHMARK_BLOCK };
-	struct work work __attribute__((aligned(128)));
-	unsigned char hash1[64];
-
-	size_t bench_size = sizeof(work);
-	size_t work_size = sizeof(bench_block);
-	size_t min_size = (work_size < bench_size ? work_size : bench_size);
-	memset(&work, 0, sizeof(work));
-	memcpy(&work, &bench_block, min_size);
-
-	struct thr_info dummy = {0};
-
-	struct timeval end;
-	struct timeval start;
-	uint32_t max_nonce = (1<<22);
-	uint32_t last_nonce = 0;
-
-	hex2bin(hash1, "00000000000000000000000000000000000000000000000000000000000000000000008000000000000000000000000000000000000000000000000000010000", 64);
-
-	gettimeofday(&start, 0);
-			{
-				sha256_func func = sha256_funcs[algo];
-				(*func)(
-					&dummy,
-					work.midstate,
-					work.data,
-					hash1,
-					work.hash,
-					work.target,
-					max_nonce,
-					&last_nonce,
-					work.blk.nonce
-				);
-			}
-	gettimeofday(&end, 0);
-
-	uint64_t usec_end = ((uint64_t)end.tv_sec)*1000*1000 + end.tv_usec;
-	uint64_t usec_start = ((uint64_t)start.tv_sec)*1000*1000 + start.tv_usec;
-	uint64_t usec_elapsed = usec_end - usec_start;
-
-	double rate = -1.0;
-	if (0<usec_elapsed) {
-		rate = (1.0*(last_nonce+1))/usec_elapsed;
-	}
-	return rate;
-}
-
-#if defined(unix)
-
-	// Change non-blocking status on a file descriptor
-	static void set_non_blocking(
-		int fd,
-		int yes
-	)
-	{
-		int flags = fcntl(fd, F_GETFL, 0);
-		if (flags<0) {
-			perror("fcntl(GET) failed");
-			exit(1);
-		}
-		flags = yes ? (flags|O_NONBLOCK) : (flags&~O_NONBLOCK);
-
-		int r = fcntl(fd, F_SETFL, flags);
-		if (r<0) {
-			perror("fcntl(SET) failed");
-			exit(1);
-		}
-	}
-
-#endif // defined(unix)
-
-// Algo benchmark, crash-safe, system-dependent stage
-static double bench_algo_stage2(
-	enum sha256_algos algo
-)
-{
-	// Here, the gig is to safely run a piece of code that potentially
-	// crashes. Unfortunately, the Right Way (tm) to do this is rather
-	// heavily platform dependent :(
-
-	double rate = -1.23457;
-
-	#if defined(unix)
-
-		// Make a pipe: [readFD, writeFD]
-		int pfd[2];
-		int r = pipe(pfd);
-		if (r<0) {
-			perror("pipe - failed to create pipe for --algo auto");
-			exit(1);
-		}
-
-		// Make pipe non blocking
-		set_non_blocking(pfd[0], 1);
-		set_non_blocking(pfd[1], 1);
-
-		// Don't allow a crashing child to kill the main process
-		sighandler_t sr0 = signal(SIGPIPE, SIG_IGN);
-		sighandler_t sr1 = signal(SIGPIPE, SIG_IGN);
-		if (SIG_ERR==sr0 || SIG_ERR==sr1) {
-			perror("signal - failed to edit signal mask for --algo auto");
-			exit(1);
-		}
-
-		// Fork a child to do the actual benchmarking
-		pid_t child_pid = fork();
-		if (child_pid<0) {
-			perror("fork - failed to create a child process for --algo auto");
-			exit(1);
-		}
-
-		// Do the dangerous work in the child, knowing we might crash
-		if (0==child_pid) {
-
-			// TODO: some umask trickery to prevent coredumps
-
-			// Benchmark this algorithm
-			double r = bench_algo_stage3(algo);
-
-			// We survived, send result to parent and bail
-			int loop_count = 0;
-			while (1) {
-				ssize_t bytes_written = write(pfd[1], &r, sizeof(r));
-				int try_again = (0==bytes_written || (bytes_written<0 && EAGAIN==errno));
-				int success = (sizeof(r)==(size_t)bytes_written);
-
-				if (success)
-					break;
-
-				if (!try_again) {
-					perror("write - child failed to write benchmark result to pipe");
-					exit(1);
-				}
-
-				if (5<loop_count) {
-					applog(LOG_ERR, "child tried %d times to communicate with parent, giving up", loop_count);
-					exit(1);
-				}
-				++loop_count;
-				sleep(1);
-			}
-			exit(0);
-		}
-
-		// Parent waits for a result from child
-		int loop_count = 0;
-		while (1) {
-
-			// Wait for child to die
-			int status;
-			int r = waitpid(child_pid, &status, WNOHANG);
-			if ((child_pid==r) || (r<0 && ECHILD==errno)) {
-
-				// Child died somehow. Grab result and bail
-				double tmp;
-				ssize_t bytes_read = read(pfd[0], &tmp, sizeof(tmp));
-				if (sizeof(tmp)==(size_t)bytes_read)
-					rate = tmp;
-				break;
-
-			} else if (r<0) {
-				perror("bench_algo: waitpid failed. giving up.");
-				exit(1);
-			}
-
-			// Give up on child after a ~60s
-			if (60<loop_count) {
-				kill(child_pid, SIGKILL);
-				waitpid(child_pid, &status, 0);
-				break;
-			}
-
-			// Wait a bit longer
-			++loop_count;
-			sleep(1);
-		}
-
-		// Close pipe
-		r = close(pfd[0]);
-		if (r<0) {
-			perror("close - failed to close read end of pipe for --algo auto");
-			exit(1);
-		}
-		r = close(pfd[1]);
-		if (r<0) {
-			perror("close - failed to close read end of pipe for --algo auto");
-			exit(1);
-		}
-
-	#elif defined(WIN32)
-
-		// Get handle to current exe
-		HINSTANCE module = GetModuleHandle(0);
-		if (!module) {
-			applog(LOG_ERR, "failed to retrieve module handle");
-			exit(1);
-		}
-
-		// Create a unique name
-		char unique_name[32];
-		snprintf(
-			unique_name,
-			sizeof(unique_name)-1,
-			"cgminer-%p",
-			(void*)module
-		);
-
-		// Create and init a chunked of shared memory
-		HANDLE map_handle = CreateFileMapping(
-			INVALID_HANDLE_VALUE,   // use paging file
-			NULL,                   // default security attributes
-			PAGE_READWRITE,         // read/write access
-			0,                      // size: high 32-bits
-			4096,			// size: low 32-bits
-			unique_name		// name of map object
-		);
-		if (NULL==map_handle) {
-			applog(LOG_ERR, "could not create shared memory");
-			exit(1);
-		}
-
-		void *shared_mem = MapViewOfFile(
-			map_handle,	// object to map view of
-			FILE_MAP_WRITE, // read/write access
-			0,              // high offset:  map from
-			0,              // low offset:   beginning
-			0		// default: map entire file
-		);
-		if (NULL==shared_mem) {
-			applog(LOG_ERR, "could not map shared memory");
-			exit(1);
-		}
-		SetEnvironmentVariable("CGMINER_SHARED_MEM", unique_name);
-		CopyMemory(shared_mem, &rate, sizeof(rate));
-
-		// Get path to current exe
-		char cmd_line[256 + MAX_PATH];
-		const size_t n = sizeof(cmd_line)-200;
-		DWORD size = GetModuleFileName(module, cmd_line, n);
-		if (0==size) {
-			applog(LOG_ERR, "failed to retrieve module path");
-			exit(1);
-		}
-
-		// Construct new command line based on that
-		char *p = strlen(cmd_line) + cmd_line;
-		sprintf(p, " --bench-algo %d", algo);
-		SetEnvironmentVariable("CGMINER_BENCH_ALGO", "1");
-
-		// Launch a debug copy of cgminer
-		STARTUPINFO startup_info;
-		PROCESS_INFORMATION process_info;
-		ZeroMemory(&startup_info, sizeof(startup_info));
-		ZeroMemory(&process_info, sizeof(process_info));
-		startup_info.cb = sizeof(startup_info);
-
-		BOOL ok = CreateProcess(
-			NULL,			// No module name (use command line)
-			cmd_line,		// Command line
-			NULL,			// Process handle not inheritable
-			NULL,			// Thread handle not inheritable
-			FALSE,			// Set handle inheritance to FALSE
-			DEBUG_ONLY_THIS_PROCESS,// We're going to debug the child
-			NULL,			// Use parent's environment block
-			NULL,			// Use parent's starting directory
-			&startup_info,		// Pointer to STARTUPINFO structure
-			&process_info		// Pointer to PROCESS_INFORMATION structure
-		);
-		if (!ok) {
-			applog(LOG_ERR, "CreateProcess failed with error %d\n", GetLastError() );
-			exit(1);
-		}
-
-		// Debug the child (only clean way to catch exceptions)
-		while (1) {
-
-			// Wait for child to do something
-			DEBUG_EVENT debug_event;
-			ZeroMemory(&debug_event, sizeof(debug_event));
-
-			BOOL ok = WaitForDebugEvent(&debug_event, 60 * 1000);
-			if (!ok)
-				break;
-
-			// Decide if event is "normal"
-			int go_on =
-				CREATE_PROCESS_DEBUG_EVENT== debug_event.dwDebugEventCode	||
-				CREATE_THREAD_DEBUG_EVENT == debug_event.dwDebugEventCode	||
-				EXIT_THREAD_DEBUG_EVENT   == debug_event.dwDebugEventCode	||
-				EXCEPTION_DEBUG_EVENT     == debug_event.dwDebugEventCode	||
-				LOAD_DLL_DEBUG_EVENT      == debug_event.dwDebugEventCode	||
-				OUTPUT_DEBUG_STRING_EVENT == debug_event.dwDebugEventCode	||
-				UNLOAD_DLL_DEBUG_EVENT    == debug_event.dwDebugEventCode;
-			if (!go_on)
-				break;
-
-			// Some exceptions are also "normal", apparently.
-			if (EXCEPTION_DEBUG_EVENT== debug_event.dwDebugEventCode) {
-
-				int go_on =
-					EXCEPTION_BREAKPOINT== debug_event.u.Exception.ExceptionRecord.ExceptionCode;
-				if (!go_on)
-					break;
-			}
-
-			// If nothing unexpected happened, let child proceed
-			ContinueDebugEvent(
-				debug_event.dwProcessId,
-				debug_event.dwThreadId,
-				DBG_CONTINUE
-			);
-		}
-
-		// Clean up child process
-		TerminateProcess(process_info.hProcess, 1);
-		CloseHandle(process_info.hProcess);
-		CloseHandle(process_info.hThread);
-
-		// Reap return value and cleanup
-		CopyMemory(&rate, shared_mem, sizeof(rate));
-		(void)UnmapViewOfFile(shared_mem);
-		(void)CloseHandle(map_handle);
-
-	#else
-
-		// Not linux, not unix, not WIN32 ... do our best
-		rate = bench_algo_stage3(algo);
-
-	#endif // defined(unix)
-
-	// Done
-	return rate;
-}
-
-static void bench_algo(
-	double            *best_rate,
-	enum sha256_algos *best_algo,
-	enum sha256_algos algo
-)
-{
-	size_t n = max_name_len - strlen(algo_names[algo]);
-	memset(name_spaces_pad, ' ', n);
-	name_spaces_pad[n] = 0;
-
-	applog(
-		LOG_ERR,
-		"\"%s\"%s : benchmarking algorithm ...",
-		algo_names[algo],
-		name_spaces_pad
-	);
-
-	double rate = bench_algo_stage2(algo);
-	if (rate<0.0) {
-		applog(
-			LOG_ERR,
-			"\"%s\"%s : algorithm fails on this platform",
-			algo_names[algo],
-			name_spaces_pad
-		);
-	} else {
-		applog(
-			LOG_ERR,
-			"\"%s\"%s : algorithm runs at %.5f MH/s",
-			algo_names[algo],
-			name_spaces_pad,
-			rate
-		);
-		if (*best_rate<rate) {
-			*best_rate = rate;
-			*best_algo = algo;
-		}
-	}
-}
-
-// Figure out the longest algorithm name
-void init_max_name_len()
-{
-	size_t i;
-	size_t nb_names = sizeof(algo_names)/sizeof(algo_names[0]);
-	for (i=0; i<nb_names; ++i) {
-		const char *p = algo_names[i];
-		size_t name_len = p ? strlen(p) : 0;
-		if (max_name_len<name_len)
-			max_name_len = name_len;
-	}
-
-	name_spaces_pad = (char*) malloc(max_name_len+16);
-	if (0==name_spaces_pad) {
-		perror("malloc failed");
-		exit(1);
-	}
-}
-
-// Pick the fastest CPU hasher
-static enum sha256_algos pick_fastest_algo()
-{
-	double best_rate = -1.0;
-	enum sha256_algos best_algo = 0;
-	applog(LOG_ERR, "benchmarking all sha256 algorithms ...");
-
-	bench_algo(&best_rate, &best_algo, ALGO_C);
-
-	#if defined(WANT_SSE2_4WAY)
-		bench_algo(&best_rate, &best_algo, ALGO_4WAY);
-	#endif
-
-	#if defined(WANT_VIA_PADLOCK)
-		bench_algo(&best_rate, &best_algo, ALGO_VIA);
-	#endif
-
-	bench_algo(&best_rate, &best_algo, ALGO_CRYPTOPP);
-
-	#if defined(WANT_CRYPTOPP_ASM32)
-		bench_algo(&best_rate, &best_algo, ALGO_CRYPTOPP_ASM32);
-	#endif
-
-	#if defined(WANT_X8632_SSE2)
-		bench_algo(&best_rate, &best_algo, ALGO_SSE2_32);
-	#endif
-
-	#if defined(WANT_X8664_SSE2)
-		bench_algo(&best_rate, &best_algo, ALGO_SSE2_64);
-	#endif
-
-	#if defined(WANT_X8664_SSE4)
-		bench_algo(&best_rate, &best_algo, ALGO_SSE4_64);
-	#endif
-
-        #if defined(WANT_ALTIVEC_4WAY)
-                bench_algo(&best_rate, &best_algo, ALGO_ALTIVEC_4WAY);
-        #endif
-
-	size_t n = max_name_len - strlen(algo_names[best_algo]);
-	memset(name_spaces_pad, ' ', n);
-	name_spaces_pad[n] = 0;
-	applog(
-		LOG_ERR,
-		"\"%s\"%s : is fastest algorithm at %.5f MH/s",
-		algo_names[best_algo],
-		name_spaces_pad,
-		best_rate
-	);
-	return best_algo;
-}
-
-/* FIXME: Use asprintf for better errors. */
-char *set_algo(const char *arg, enum sha256_algos *algo)
-{
-	enum sha256_algos i;
-
-	if (opt_scrypt)
-		return "Can only use scrypt algorithm";
-
-	if (!strcmp(arg, "auto")) {
-		*algo = pick_fastest_algo();
-		return NULL;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
-		if (algo_names[i] && !strcmp(arg, algo_names[i])) {
-			*algo = i;
-			return NULL;
-		}
-	}
-	return "Unknown algorithm";
-}
-
-#ifdef WANT_SCRYPT
-void set_scrypt_algo(enum sha256_algos *algo)
-{
-	*algo = ALGO_SCRYPT;
-}
-#endif
-
-void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo)
-{
-	strncpy(buf, algo_names[*algo], OPT_SHOW_LEN);
-}
-#endif
-
-#ifdef WANT_CPUMINE
-char *force_nthreads_int(const char *arg, int *i)
-{
-	forced_n_threads = true;
-	return set_int_range(arg, i, 0, 9999);
-}
-#endif
-
-#ifdef WANT_CPUMINE
-static void cpu_detect()
-{
-	int i;
-
-	// Reckon number of cores in the box
-	#if defined(WIN32)
-	{
-		DWORD_PTR system_am;
-		DWORD_PTR process_am;
-		BOOL ok = GetProcessAffinityMask(
-			GetCurrentProcess(),
-			&system_am,
-			&process_am
-		);
-		if (!ok) {
-			applog(LOG_ERR, "couldn't figure out number of processors :(");
-			num_processors = 1;
-		} else {
-			size_t n = 32;
-			num_processors = 0;
-			while (n--)
-				if (process_am & (1<<n))
-					++num_processors;
-		}
-	}
-	#else
-		num_processors = sysconf(_SC_NPROCESSORS_ONLN);
-	#endif /* !WIN32 */
-
-	if (opt_n_threads < 0 || !forced_n_threads) {
-		if (total_devices && !opt_usecpu)
-			opt_n_threads = 0;
-		else
-			opt_n_threads = num_processors;
-	}
-	if (num_processors < 1)
-		return;
-
-	cpus = calloc(opt_n_threads, sizeof(struct cgpu_info));
-	if (unlikely(!cpus))
-		quit(1, "Failed to calloc cpus");
-	for (i = 0; i < opt_n_threads; ++i) {
-		struct cgpu_info *cgpu;
-
-		cgpu = &cpus[i];
-		cgpu->drv = &cpu_drv;
-		cgpu->deven = DEV_ENABLED;
-		cgpu->threads = 1;
-		cgpu->kname = algo_names[opt_algo];
-		if (opt_scrypt)
-			cgpu->drv->max_diff = 0xffffffff;
-		add_cgpu(cgpu);
-	}
-}
-
-static void reinit_cpu_device(struct cgpu_info *cpu)
-{
-	tq_push(control_thr[cpur_thr_id].q, cpu);
-}
-
-static bool cpu_thread_prepare(struct thr_info *thr)
-{
-	thread_reportin(thr);
-
-	return true;
-}
-
-static uint64_t cpu_can_limit_work(struct thr_info __maybe_unused *thr)
-{
-	return 0xffff;
-}
-
-static bool cpu_thread_init(struct thr_info *thr)
-{
-	const int thr_id = thr->id;
-
-	/* Set worker threads to nice 19 and then preferentially to SCHED_IDLE
-	 * and if that fails, then SCHED_BATCH. No need for this to be an
-	 * error if it fails */
-	setpriority(PRIO_PROCESS, 0, 19);
-	drop_policy();
-	/* Cpu affinity only makes sense if the number of threads is a multiple
-	 * of the number of CPUs */
-	if (!(opt_n_threads % num_processors))
-		affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors);
-	return true;
-}
-
-static int64_t cpu_scanhash(struct thr_info *thr, struct work *work, int64_t max_nonce)
-{
-	const int thr_id = thr->id;
-	unsigned char hash1[64];
-	uint32_t first_nonce = work->blk.nonce;
-	uint32_t last_nonce;
-	bool rc;
-
-	hex2bin(hash1, "00000000000000000000000000000000000000000000000000000000000000000000008000000000000000000000000000000000000000000000000000010000", 64);
-CPUSearch:
-	last_nonce = first_nonce;
-	rc = false;
-
-	/* scan nonces for a proof-of-work hash */
-	{
-		sha256_func func = sha256_funcs[opt_algo];
-		rc = (*func)(
-			thr,
-			work->midstate,
-			work->data,
-			hash1,
-			work->hash,
-			work->target,
-			max_nonce,
-			&last_nonce,
-			work->blk.nonce
-		);
-	}
-
-	/* if nonce found, submit work */
-	if (unlikely(rc)) {
-		applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id));
-		submit_nonce(thr, work, last_nonce);
-		work->blk.nonce = last_nonce + 1;
-		goto CPUSearch;
-	}
-	else
-	if (unlikely(last_nonce == first_nonce))
-		return 0;
-
-	work->blk.nonce = last_nonce + 1;
-	return last_nonce - first_nonce + 1;
-}
-
-struct device_drv cpu_drv = {
-	.drv_id = DRIVER_CPU,
-	.dname = "cpu",
-	.name = "CPU",
-	.drv_detect = cpu_detect,
-	.reinit_device = reinit_cpu_device,
-	.thread_prepare = cpu_thread_prepare,
-	.can_limit_work = cpu_can_limit_work,
-	.thread_init = cpu_thread_init,
-	.scanhash = cpu_scanhash,
-};
-#endif
-
-
-

+ 0 - 65
driver-cpu.h

@@ -1,65 +0,0 @@
-#ifndef __DEVICE_CPU_H__
-#define __DEVICE_CPU_H__
-
-#include "miner.h"
-
-#include "config.h"
-#include <stdbool.h>
-
-#ifndef OPT_SHOW_LEN
-#define OPT_SHOW_LEN 80
-#endif
-
-#ifdef __SSE2__
-#define WANT_SSE2_4WAY 1
-#endif
-
-#ifdef __ALTIVEC__
-#define WANT_ALTIVEC_4WAY 1
-#endif
-
-#if defined(__i386__) && defined(HAS_YASM) && defined(__SSE2__)
-#define WANT_X8632_SSE2 1
-#endif
-
-#if (defined(__i386__) || defined(__x86_64__)) &&  !defined(__APPLE__)
-#define WANT_VIA_PADLOCK 1
-#endif
-
-#if defined(__x86_64__) && defined(HAS_YASM)
-#define WANT_X8664_SSE2 1
-#endif
-
-#if defined(__x86_64__) && defined(HAS_YASM) && defined(__SSE4_1__)
-#define WANT_X8664_SSE4 1
-#endif
-
-#ifdef USE_SCRYPT
-#define WANT_SCRYPT
-#endif
-
-enum sha256_algos {
-	ALGO_C,			/* plain C */
-	ALGO_4WAY,		/* parallel SSE2 */
-	ALGO_VIA,		/* VIA padlock */
-	ALGO_CRYPTOPP,		/* Crypto++ (C) */
-	ALGO_CRYPTOPP_ASM32,	/* Crypto++ 32-bit assembly */
-	ALGO_SSE2_32,		/* SSE2 for x86_32 */
-	ALGO_SSE2_64,		/* SSE2 for x86_64 */
-	ALGO_SSE4_64,		/* SSE4 for x86_64 */
-	ALGO_ALTIVEC_4WAY,	/* parallel Altivec */
-	ALGO_SCRYPT,		/* scrypt */
-};
-
-extern const char *algo_names[];
-extern bool opt_usecpu;
-extern struct device_drv cpu_drv;
-
-extern char *set_algo(const char *arg, enum sha256_algos *algo);
-extern void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo);
-extern char *force_nthreads_int(const char *arg, int *i);
-extern void init_max_name_len();
-extern double bench_algo_stage3(enum sha256_algos algo);
-extern void set_scrypt_algo(enum sha256_algos *algo);
-
-#endif /* __DEVICE_CPU_H__ */

+ 0 - 5
miner.h

@@ -210,7 +210,6 @@ enum drv_driver {
 	DRIVER_BITFORCE,
 	DRIVER_MODMINER,
 	DRIVER_ZTEX,
-	DRIVER_CPU,
 	DRIVER_BFLSC,
 	DRIVER_AVALON,
 	DRIVER_MAX
@@ -927,7 +926,6 @@ extern bool hotplug_mode;
 extern int hotplug_time;
 extern struct list_head scan_devices;
 extern int nDevs;
-extern int opt_n_threads;
 extern int num_processors;
 extern int hw_errors;
 extern bool use_syslog;
@@ -943,13 +941,10 @@ extern bool opt_scrypt;
 #endif
 extern double total_secs;
 extern int mining_threads;
-extern struct cgpu_info *cpus;
 extern int total_devices;
 extern struct cgpu_info **devices;
 extern int total_pools;
 extern struct pool **pools;
-extern const char *algo_names[];
-extern enum sha256_algos opt_algo;
 extern struct strategies strategies[];
 extern enum pool_strategy pool_strategy;
 extern int opt_rotate_period;

+ 0 - 488
sha256_4way.c

@@ -1,488 +0,0 @@
-// Copyright (c) 2010 Satoshi Nakamoto
-// Distributed under the MIT/X11 software license, see the accompanying
-// file license.txt or http://www.opensource.org/licenses/mit-license.php.
-
-// tcatm's 4-way 128-bit SSE2 SHA-256
-
-#include "driver-cpu.h"
-
-#ifdef WANT_SSE2_4WAY
-
-#include <string.h>
-#include <assert.h>
-
-#include <xmmintrin.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#define NPAR 32
-
-static void DoubleBlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2);
-
-static const unsigned int sha256_consts[] = {
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /*  0 */
-    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /*  8 */
-    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
-    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
-    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
-    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
-    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
-    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
-    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-static inline __m128i Ch(const __m128i b, const __m128i c, const __m128i d) {
-    return _mm_xor_si128(_mm_and_si128(b,c),_mm_andnot_si128(b,d));
-}
-
-static inline __m128i Maj(const __m128i b, const __m128i c, const __m128i d) {
-    return _mm_xor_si128(_mm_xor_si128(_mm_and_si128(b,c),_mm_and_si128(b,d)),_mm_and_si128(c,d));
-}
-
-static inline __m128i  ROTR(__m128i x, const int n) {
-    return _mm_or_si128(_mm_srli_epi32(x, n),_mm_slli_epi32(x, 32 - n));
-}
-
-static inline __m128i SHR(__m128i x, const int n) {
-    return _mm_srli_epi32(x, n);
-}
-
-/* SHA256 Functions */
-#define BIGSIGMA0_256(x)    (_mm_xor_si128(_mm_xor_si128(ROTR((x), 2),ROTR((x), 13)),ROTR((x), 22)))
-#define BIGSIGMA1_256(x)    (_mm_xor_si128(_mm_xor_si128(ROTR((x), 6),ROTR((x), 11)),ROTR((x), 25)))
-
-
-#define SIGMA0_256(x)       (_mm_xor_si128(_mm_xor_si128(ROTR((x), 7),ROTR((x), 18)), SHR((x), 3 )))
-#define SIGMA1_256(x)       (_mm_xor_si128(_mm_xor_si128(ROTR((x),17),ROTR((x), 19)), SHR((x), 10)))
-
-static inline unsigned int store32(const __m128i x, int i) {
-    union { unsigned int ret[4]; __m128i x; } box;
-    box.x = x;
-    return box.ret[i];
-}
-
-static inline void store_epi32(const __m128i x, unsigned int *x0, unsigned int *x1, unsigned int *x2, unsigned int *x3) {
-    union { unsigned int ret[4]; __m128i x; } box;
-    box.x = x;
-    *x0 = box.ret[3]; *x1 = box.ret[2]; *x2 = box.ret[1]; *x3 = box.ret[0];
-}
-
-#define add4(x0, x1, x2, x3) _mm_add_epi32(_mm_add_epi32(x0, x1),_mm_add_epi32( x2,x3))
-#define add5(x0, x1, x2, x3, x4) _mm_add_epi32(add4(x0, x1, x2, x3), x4)
-
-#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w)                       \
-    T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), _mm_set1_epi32(sha256_consts[i]), w);   \
-d = _mm_add_epi32(d, T1);                                           \
-h = _mm_add_epi32(T1, _mm_add_epi32(BIGSIGMA0_256(a), Maj(a, b, c)));
-
-static inline void dumpreg(__m128i x, char *msg) {
-    union { unsigned int ret[4]; __m128i x; } box;
-    box.x = x ;
-    printf("%s %08x %08x %08x %08x\n", msg, box.ret[0], box.ret[1], box.ret[2], box.ret[3]);
-}
-
-#if 1
-#define dumpstate(i) printf("%s: %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", \
-        __func__, store32(w0, i), store32(a, i), store32(b, i), store32(c, i), store32(d, i), store32(e, i), store32(f, i), store32(g, i), store32(h, i));
-#else
-#define dumpstate()
-#endif
-
-static const unsigned int pSHA256InitState[8] =
-{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-
-bool ScanHash_4WaySSE2(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce)
-{
-    unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
-
-	pdata += 64;
-
-    for (;;)
-    {
-        unsigned int thash[9][NPAR] __attribute__((aligned(128)));
-	int j;
-
-	nonce += NPAR;
-	*nNonce_p = nonce;
-
-        DoubleBlockSHA256(pdata, phash1, pmidstate, thash, pSHA256InitState);
-
-        for (j = 0; j < NPAR; j++)
-        {
-            if (unlikely(thash[7][j] == 0))
-            {
-		int i;
-
-                for (i = 0; i < 32/4; i++)
-                    ((unsigned int*)phash)[i] = thash[i][j];
-
-		if (fulltest(phash, ptarget)) {
-					nonce += j;
-					*last_nonce = nonce;
-					*nNonce_p = nonce;
-					return true;
-		}
-            }
-        }
-
-        if ((nonce >= max_nonce) || thr->work_restart)
-        {
-            *last_nonce = nonce;
-            return false;
-        }
-    }
-}
-
-
-static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
-{
-    unsigned int* In = (unsigned int*)pin;
-    unsigned int* Pad = (unsigned int*)pad;
-    unsigned int* hPre = (unsigned int*)pre;
-    unsigned int* hInit = (unsigned int*)init;
-    unsigned int /* i, j, */ k;
-
-    /* vectors used in calculation */
-    __m128i w0, w1, w2, w3, w4, w5, w6, w7;
-    __m128i w8, w9, w10, w11, w12, w13, w14, w15;
-    __m128i T1;
-    __m128i a, b, c, d, e, f, g, h;
-    __m128i nonce, preNonce;
-
-    /* nonce offset for vector */
-    __m128i offset = _mm_set_epi32(0x00000003, 0x00000002, 0x00000001, 0x00000000);
-
-
-    preNonce = _mm_add_epi32(_mm_set1_epi32(In[3]), offset);
-
-    for(k = 0; k<NPAR; k+=4) {
-        w0 = _mm_set1_epi32(In[0]);
-        w1 = _mm_set1_epi32(In[1]);
-        w2 = _mm_set1_epi32(In[2]);
-        //w3 = _mm_set1_epi32(In[3]); nonce will be later hacked into the hash
-        w4 = _mm_set1_epi32(In[4]);
-        w5 = _mm_set1_epi32(In[5]);
-        w6 = _mm_set1_epi32(In[6]);
-        w7 = _mm_set1_epi32(In[7]);
-        w8 = _mm_set1_epi32(In[8]);
-        w9 = _mm_set1_epi32(In[9]);
-        w10 = _mm_set1_epi32(In[10]);
-        w11 = _mm_set1_epi32(In[11]);
-        w12 = _mm_set1_epi32(In[12]);
-        w13 = _mm_set1_epi32(In[13]);
-        w14 = _mm_set1_epi32(In[14]);
-        w15 = _mm_set1_epi32(In[15]);
-
-        /* hack nonce into lowest byte of w3 */
-	nonce = _mm_add_epi32(preNonce, _mm_set1_epi32(k));
-        w3 = nonce;
-
-        a = _mm_set1_epi32(hPre[0]);
-        b = _mm_set1_epi32(hPre[1]);
-        c = _mm_set1_epi32(hPre[2]);
-        d = _mm_set1_epi32(hPre[3]);
-        e = _mm_set1_epi32(hPre[4]);
-        f = _mm_set1_epi32(hPre[5]);
-        g = _mm_set1_epi32(hPre[6]);
-        h = _mm_set1_epi32(hPre[7]);
-
-        SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
-#define store_load(x, i, dest) \
-        T1 = _mm_set1_epi32((hPre)[i]); \
-        dest = _mm_add_epi32(T1, x);
-
-        store_load(a, 0, w0);
-        store_load(b, 1, w1);
-        store_load(c, 2, w2);
-        store_load(d, 3, w3);
-        store_load(e, 4, w4);
-        store_load(f, 5, w5);
-        store_load(g, 6, w6);
-        store_load(h, 7, w7);
-
-        w8 = _mm_set1_epi32(Pad[8]);
-        w9 = _mm_set1_epi32(Pad[9]);
-        w10 = _mm_set1_epi32(Pad[10]);
-        w11 = _mm_set1_epi32(Pad[11]);
-        w12 = _mm_set1_epi32(Pad[12]);
-        w13 = _mm_set1_epi32(Pad[13]);
-        w14 = _mm_set1_epi32(Pad[14]);
-        w15 = _mm_set1_epi32(Pad[15]);
-
-        a = _mm_set1_epi32(hInit[0]);
-        b = _mm_set1_epi32(hInit[1]);
-        c = _mm_set1_epi32(hInit[2]);
-        d = _mm_set1_epi32(hInit[3]);
-        e = _mm_set1_epi32(hInit[4]);
-        f = _mm_set1_epi32(hInit[5]);
-        g = _mm_set1_epi32(hInit[6]);
-        h = _mm_set1_epi32(hInit[7]);
-
-        SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
-
-	/* Skip last 3-rounds; not necessary for H==0 */
-#if 0
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-#endif
-
-        /* store resulsts directly in thash */
-#define store_2(x,i)  \
-        w0 = _mm_set1_epi32(hInit[i]); \
-        *(__m128i *)&(thash)[i][0+k] = _mm_add_epi32(w0, x);
-
-        store_2(a, 0);
-        store_2(b, 1);
-        store_2(c, 2);
-        store_2(d, 3);
-        store_2(e, 4);
-        store_2(f, 5);
-        store_2(g, 6);
-        store_2(h, 7);
-        *(__m128i *)&(thash)[8][0+k] = nonce;
-    }
-
-}
-
-#endif /* WANT_SSE2_4WAY */

+ 0 - 469
sha256_altivec_4way.c

@@ -1,469 +0,0 @@
-// Copyright (c) 2010 Satoshi Nakamoto
-// Copyright (c) 2011 Gilles Risch
-// Distributed under the MIT/X11 software license, see the accompanying
-// file license.txt or http://www.opensource.org/licenses/mit-license.php.
-
-
-// 4-way 128-bit Altivec SHA-256,
-// based on tcatm's 4-way 128-bit SSE2 SHA-256
-//
-
-
-#include "driver-cpu.h"
-
-#ifdef WANT_ALTIVEC_4WAY
-
-#include <string.h>
-#include <assert.h>
-
-//#include <altivec.h>
-#include <stdint.h>
-#include <stdio.h>
-
-#define NPAR 32
-
-static void DoubleBlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2);
-
-static const unsigned int sha256_consts[] = {
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /*  0 */
-    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /*  8 */
-    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
-    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
-    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
-    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
-    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
-    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
-    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-static inline vector unsigned int Ch(const vector unsigned int b, const vector unsigned int c, const vector unsigned int d) {
-    return vec_sel(d,c,b);
-}
-
-static inline vector unsigned int Maj(const vector unsigned int b, const vector unsigned int c, const vector unsigned int d) {
-    return vec_sel(b,c, vec_xor(b,d));
-}
-
-/* RotateRight(x, n) := RotateLeft(x, 32-n) */
-/* SHA256 Functions */
-#define BIGSIGMA0_256(x)    (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-2)),vec_rl((x), (vector unsigned int)(32-13))),vec_rl((x), (vector unsigned int)(32-22))))
-#define BIGSIGMA1_256(x)    (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-6)),vec_rl((x), (vector unsigned int)(32-11))),vec_rl((x), (vector unsigned int)(32-25))))
-
-#define SIGMA0_256(x)       (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32- 7)),vec_rl((x), (vector unsigned int)(32-18))), vec_sr((x), (vector unsigned int)(3 ))))
-#define SIGMA1_256(x)       (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-17)),vec_rl((x), (vector unsigned int)(32-19))), vec_sr((x), (vector unsigned int)(10))))
-
-#define add4(x0, x1, x2, x3) vec_add(vec_add(x0, x1),vec_add( x2,x3))
-#define add5(x0, x1, x2, x3, x4) vec_add(add4(x0, x1, x2, x3), x4)
-
-#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w)                       \
-    T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), (vector unsigned int)(sha256_consts[i],sha256_consts[i],sha256_consts[i],sha256_consts[i]), w);   \
-    d = vec_add(d, T1);                                           \
-    h = vec_add(T1, vec_add(BIGSIGMA0_256(a), Maj(a, b, c)));
-
-
-static const unsigned int pSHA256InitState[8] =
-{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-
-bool ScanHash_altivec_4way(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce)
-{
-    unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
-
-	pdata += 64;
-
-    for (;;)
-    {
-        unsigned int thash[9][NPAR] __attribute__((aligned(128)));
-	int j;
-
-	*nNonce_p = nonce;
-
-        DoubleBlockSHA256(pdata, phash1, pmidstate, thash, pSHA256InitState);
-
-        for (j = 0; j < NPAR; j++)
-        {
-            if (unlikely(thash[7][j] == 0))
-            {
-		int i;
-
-                for (i = 0; i < 32/4; i++)
-                    ((unsigned int*)phash)[i] = thash[i][j];
-
-		if (fulltest(phash, ptarget)) {
-					nonce += j;
-					*last_nonce = nonce;
-					*nNonce_p = nonce;
-					return true;
-		}
-            }
-        }
-
-        if ((nonce >= max_nonce) || thr->work_restart)
-        {
-            *last_nonce = nonce;
-            return false;
-        }
-
-        nonce += NPAR;
-    }
-}
-
-
-static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
-{
-    unsigned int* In = (unsigned int*)pin;
-    unsigned int* Pad = (unsigned int*)pad;
-    unsigned int* hPre = (unsigned int*)pre;
-    unsigned int* hInit = (unsigned int*)init;
-    unsigned int /* i, j, */ k;
-
-    /* vectors used in calculation */
-    vector unsigned int w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
-    vector unsigned int T1;
-    vector unsigned int a, b, c, d, e, f, g, h;
-    vector unsigned int nonce, preNonce;
-
-    /* nonce offset for vector */
-    vector unsigned int offset = (vector unsigned int)(0, 1, 2, 3);
-
-    preNonce = vec_add((vector unsigned int)(In[3],In[3],In[3],In[3]), offset);
-
-   for(k = 0; k<NPAR; k+=4)
-   {
-        w0 = (vector unsigned int)(In[0],In[0],In[0],In[0]);
-        w1 = (vector unsigned int)(In[1],In[1],In[1],In[1]);
-        w2 = (vector unsigned int)(In[2],In[2],In[2],In[2]);
-        //w3 = (vector unsigned int)(In[3],In[3],In[3],In[3]); nonce will be later hacked into the hash
-        w4 = (vector unsigned int)(In[4],In[4],In[4],In[4]);
-        w5 = (vector unsigned int)(In[5],In[5],In[5],In[5]);
-        w6 = (vector unsigned int)(In[6],In[6],In[6],In[6]);
-        w7 = (vector unsigned int)(In[7],In[7],In[7],In[7]);
-        w8 = (vector unsigned int)(In[8],In[8],In[8],In[8]);
-        w9 = (vector unsigned int)(In[9],In[9],In[9],In[9]);
-        w10 = (vector unsigned int)(In[10],In[10],In[10],In[10]);
-        w11 = (vector unsigned int)(In[11],In[11],In[11],In[11]);
-        w12 = (vector unsigned int)(In[12],In[12],In[12],In[12]);
-        w13 = (vector unsigned int)(In[13],In[13],In[13],In[13]);
-        w14 = (vector unsigned int)(In[14],In[14],In[14],In[14]);
-        w15 = (vector unsigned int)(In[15],In[15],In[15],In[15]);
-
-        /* hack nonce into lowest byte of w3 */
-	nonce = vec_add(preNonce, (vector unsigned int)(k,k,k,k));
-
-        w3 = nonce;
-        //printf ("W3: %08vlx\n", w3);
-
-        a = (vector unsigned int)(hPre[0],hPre[0],hPre[0],hPre[0]);
-        b = (vector unsigned int)(hPre[1],hPre[1],hPre[1],hPre[1]);
-        c = (vector unsigned int)(hPre[2],hPre[2],hPre[2],hPre[2]);
-        d = (vector unsigned int)(hPre[3],hPre[3],hPre[3],hPre[3]);
-        e = (vector unsigned int)(hPre[4],hPre[4],hPre[4],hPre[4]);
-        f = (vector unsigned int)(hPre[5],hPre[5],hPre[5],hPre[5]);
-        g = (vector unsigned int)(hPre[6],hPre[6],hPre[6],hPre[6]);
-        h = (vector unsigned int)(hPre[7],hPre[7],hPre[7],hPre[7]);
-
-        SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
-#define store_load(x, i, dest) \
-        T1 = (vector unsigned int)((hPre)[i],(hPre)[i],(hPre)[i],(hPre)[i]); \
-        dest = vec_add(T1, x);
-
-        store_load(a, 0, w0);
-        store_load(b, 1, w1);
-        store_load(c, 2, w2);
-        store_load(d, 3, w3);
-        store_load(e, 4, w4);
-        store_load(f, 5, w5);
-        store_load(g, 6, w6);
-        store_load(h, 7, w7);
-
-        /* end of first SHA256 round */
-
-        w8 = (vector unsigned int)(Pad[8],Pad[8],Pad[8],Pad[8]);
-        w9 = (vector unsigned int)(Pad[9],Pad[9],Pad[9],Pad[9]);
-        w10 = (vector unsigned int)(Pad[10],Pad[10],Pad[10],Pad[10]);
-        w11 = (vector unsigned int)(Pad[11],Pad[11],Pad[11],Pad[11]);
-        w12 = (vector unsigned int)(Pad[12],Pad[12],Pad[12],Pad[12]);
-        w13 = (vector unsigned int)(Pad[13],Pad[13],Pad[13],Pad[13]);
-        w14 = (vector unsigned int)(Pad[14],Pad[14],Pad[14],Pad[14]);
-        w15 = (vector unsigned int)(Pad[15],Pad[15],Pad[15],Pad[15]);
-
-        a = (vector unsigned int)(hInit[0],hInit[0],hInit[0],hInit[0]);
-        b = (vector unsigned int)(hInit[1],hInit[1],hInit[1],hInit[1]);
-        c = (vector unsigned int)(hInit[2],hInit[2],hInit[2],hInit[2]);
-        d = (vector unsigned int)(hInit[3],hInit[3],hInit[3],hInit[3]);
-        e = (vector unsigned int)(hInit[4],hInit[4],hInit[4],hInit[4]);
-        f = (vector unsigned int)(hInit[5],hInit[5],hInit[5],hInit[5]);
-        g = (vector unsigned int)(hInit[6],hInit[6],hInit[6],hInit[6]);
-        h = (vector unsigned int)(hInit[7],hInit[7],hInit[7],hInit[7]);
-
-        SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
-        w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
-        w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
-        w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
-        w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
-        w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
-        w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
-        w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
-        w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
-        w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
-        SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
-        w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
-        SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
-        w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
-        SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
-        w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
-        SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
-        w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
-        SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
-
-	/* Skip last 3-rounds; not necessary for H==0 */
-/*#if 0
-        w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
-        SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
-        w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
-        SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
-        w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
-        SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-#endif*/
-
-        /* store resulsts directly in thash */
-#define store_2(x,i)  \
-        w0 = (vector unsigned int)(hInit[i],hInit[i],hInit[i],hInit[i]); \
-        vec_st(vec_add(w0, x), 0 ,&thash[i][k]);
-
-        store_2(a, 0);
-        store_2(b, 1);
-        store_2(c, 2);
-        store_2(d, 3);
-        store_2(e, 4);
-        store_2(f, 5);
-        store_2(g, 6);
-        store_2(h, 7);
-
-        vec_st(nonce, 0 ,&thash[8][k]);
-        /* writing the results into the array is time intensive */
-        /* -> try if it´s faster to compare the results with the target inside this function */
-    }
-
-}
-
-#endif /* WANT_ALTIVEC_4WAY */
-

+ 0 - 609
sha256_cryptopp.c

@@ -1,609 +0,0 @@
-
-#include "config.h"
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "miner.h"
-
-typedef uint32_t word32;
-
-static word32 rotrFixed(word32 word, unsigned int shift)
-{
-	return (word >> shift) | (word << (32 - shift));
-}
-
-#define blk0(i) (W[i] = data[i])
-
-static const word32 SHA256_K[64] = {
-	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
-	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
-	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
-	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
-	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
-	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
-	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
-	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
-	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
-
-#define Ch(x,y,z) (z^(x&(y^z)))
-#define Maj(x,y,z) (y^((x^y)&(y^z)))
-
-#define a(i) T[(0-i)&7]
-#define b(i) T[(1-i)&7]
-#define c(i) T[(2-i)&7]
-#define d(i) T[(3-i)&7]
-#define e(i) T[(4-i)&7]
-#define f(i) T[(5-i)&7]
-#define g(i) T[(6-i)&7]
-#define h(i) T[(7-i)&7]
-
-#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\
-	d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
-
-// for SHA256
-#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
-#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
-#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
-#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
-
-static void SHA256_Transform(word32 *state, const word32 *data)
-{
-	word32 W[16] = { };
-	word32 T[8];
-	unsigned int j;
-
-    /* Copy context->state[] to working vars */
-	memcpy(T, state, sizeof(T));
-    /* 64 operations, partially loop unrolled */
-	for (j=0; j<64; j+=16)
-	{
-		R( 0); R( 1); R( 2); R( 3);
-		R( 4); R( 5); R( 6); R( 7);
-		R( 8); R( 9); R(10); R(11);
-		R(12); R(13); R(14); R(15);
-	}
-    /* Add the working vars back into context.state[] */
-    state[0] += a(0);
-    state[1] += b(0);
-    state[2] += c(0);
-    state[3] += d(0);
-    state[4] += e(0);
-    state[5] += f(0);
-    state[6] += g(0);
-    state[7] += h(0);
-}
-
-static void runhash(void *state, const void *input, const void *init)
-{
-	memcpy(state, init, 32);
-	SHA256_Transform(state, input);
-}
-
-/* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_cryptopp(struct thr_info*thr, const unsigned char *midstate,
-		unsigned char *data,
-	        unsigned char *hash1, unsigned char *hash,
-		const unsigned char *target,
-	        uint32_t max_nonce, uint32_t *last_nonce,
-		uint32_t n)
-{
-	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 76);
-
-	data += 64;
-
-	while (1) {
-		n++;
-		*nonce = n;
-
-		runhash(hash1, data, midstate);
-		runhash(hash, hash1, sha256_init_state);
-
-		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*last_nonce = n;
-			return true;
-		}
-
-		if ((n >= max_nonce) || thr->work_restart) {
-			*last_nonce = n;
-			return false;
-		}
-	}
-}
-
-#if defined(WANT_CRYPTOPP_ASM32)
-
-#define CRYPTOPP_FASTCALL
-#define CRYPTOPP_BOOL_X86 1
-#define CRYPTOPP_BOOL_X64 0
-#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
-	#define AS1(x) x*newline*
-	#define AS2(x, y) x, y*newline*
-	#define AS3(x, y, z) x, y, z*newline*
-	#define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
-	#define ASL(x) label##x:*newline*
-	#define ASJ(x, y, z) x label##y*newline*
-	#define ASC(x, y) x label##y*newline*
-	#define AS_HEX(y) 0##y##h
-#elif defined(_MSC_VER) || defined(__BORLANDC__)
-	#define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
-	#define AS1(x) __asm {x}
-	#define AS2(x, y) __asm {x, y}
-	#define AS3(x, y, z) __asm {x, y, z}
-	#define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
-	#define ASL(x) __asm {label##x:}
-	#define ASJ(x, y, z) __asm {x label##y}
-	#define ASC(x, y) __asm {x label##y}
-	#define CRYPTOPP_NAKED __declspec(naked)
-	#define AS_HEX(y) 0x##y
-#else
-	#define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
-	// define these in two steps to allow arguments to be expanded
-	#define GNU_AS1(x) #x ";"
-	#define GNU_AS2(x, y) #x ", " #y ";"
-	#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
-	#define GNU_ASL(x) "\n" #x ":"
-	#define GNU_ASJ(x, y, z) #x " " #y #z ";"
-	#define AS1(x) GNU_AS1(x)
-	#define AS2(x, y) GNU_AS2(x, y)
-	#define AS3(x, y, z) GNU_AS3(x, y, z)
-	#define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
-	#define ASL(x) GNU_ASL(x)
-	#define ASJ(x, y, z) GNU_ASJ(x, y, z)
-	#define ASC(x, y) #x " " #y ";"
-	#define CRYPTOPP_NAKED
-	#define AS_HEX(y) 0x##y
-#endif
-
-#define IF0(y)
-#define IF1(y) y
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
-#define ASM_MOD(x, y) ((x) MOD (y))
-#define XMMWORD_PTR XMMWORD PTR
-#else
-// GNU assembler doesn't seem to have mod operator
-#define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
-// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
-#define XMMWORD_PTR
-#endif
-
-#if CRYPTOPP_BOOL_X86
-	#define AS_REG_1 ecx
-	#define AS_REG_2 edx
-	#define AS_REG_3 esi
-	#define AS_REG_4 edi
-	#define AS_REG_5 eax
-	#define AS_REG_6 ebx
-	#define AS_REG_7 ebp
-	#define AS_REG_1d ecx
-	#define AS_REG_2d edx
-	#define AS_REG_3d esi
-	#define AS_REG_4d edi
-	#define AS_REG_5d eax
-	#define AS_REG_6d ebx
-	#define AS_REG_7d ebp
-	#define WORD_SZ 4
-	#define WORD_REG(x)	e##x
-	#define WORD_PTR DWORD PTR
-	#define AS_PUSH_IF86(x) AS1(push e##x)
-	#define AS_POP_IF86(x) AS1(pop e##x)
-	#define AS_JCXZ jecxz
-#elif CRYPTOPP_BOOL_X64
-	#ifdef CRYPTOPP_GENERATE_X64_MASM
-		#define AS_REG_1 rcx
-		#define AS_REG_2 rdx
-		#define AS_REG_3 r8
-		#define AS_REG_4 r9
-		#define AS_REG_5 rax
-		#define AS_REG_6 r10
-		#define AS_REG_7 r11
-		#define AS_REG_1d ecx
-		#define AS_REG_2d edx
-		#define AS_REG_3d r8d
-		#define AS_REG_4d r9d
-		#define AS_REG_5d eax
-		#define AS_REG_6d r10d
-		#define AS_REG_7d r11d
-	#else
-		#define AS_REG_1 rdi
-		#define AS_REG_2 rsi
-		#define AS_REG_3 rdx
-		#define AS_REG_4 rcx
-		#define AS_REG_5 r8
-		#define AS_REG_6 r9
-		#define AS_REG_7 r10
-		#define AS_REG_1d edi
-		#define AS_REG_2d esi
-		#define AS_REG_3d edx
-		#define AS_REG_4d ecx
-		#define AS_REG_5d r8d
-		#define AS_REG_6d r9d
-		#define AS_REG_7d r10d
-	#endif
-	#define WORD_SZ 8
-	#define WORD_REG(x)	r##x
-	#define WORD_PTR QWORD PTR
-	#define AS_PUSH_IF86(x)
-	#define AS_POP_IF86(x)
-	#define AS_JCXZ jrcxz
-#endif
-
-static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len
-#if defined(_MSC_VER) && (_MSC_VER == 1200)
-	, ...	// VC60 workaround: prevent VC 6 from inlining this function
-#endif
-	)
-{
-#if defined(_MSC_VER) && (_MSC_VER == 1200)
-	AS2(mov ecx, [state])
-	AS2(mov edx, [data])
-#endif
-
-	#define LOCALS_SIZE	8*4 + 16*4 + 4*WORD_SZ
-	#define H(i)		[BASE+ASM_MOD(1024+7-(i),8)*4]
-	#define G(i)		H(i+1)
-	#define F(i)		H(i+2)
-	#define E(i)		H(i+3)
-	#define D(i)		H(i+4)
-	#define C(i)		H(i+5)
-	#define B(i)		H(i+6)
-	#define A(i)		H(i+7)
-	#define Wt(i)		BASE+8*4+ASM_MOD(1024+15-(i),16)*4
-	#define Wt_2(i)		Wt((i)-2)
-	#define Wt_15(i)	Wt((i)-15)
-	#define Wt_7(i)		Wt((i)-7)
-	#define K_END		[BASE+8*4+16*4+0*WORD_SZ]
-	#define STATE_SAVE	[BASE+8*4+16*4+1*WORD_SZ]
-	#define DATA_SAVE	[BASE+8*4+16*4+2*WORD_SZ]
-	#define DATA_END	[BASE+8*4+16*4+3*WORD_SZ]
-	#define Kt(i)		WORD_REG(si)+(i)*4
-#if CRYPTOPP_BOOL_X86
-	#define BASE		esp+4
-#elif defined(__GNUC__)
-	#define BASE		r8
-#else
-	#define BASE		rsp
-#endif
-
-#define RA0(i, edx, edi)		\
-	AS2(	add edx, [Kt(i)]	)\
-	AS2(	add edx, [Wt(i)]	)\
-	AS2(	add edx, H(i)		)\
-
-#define RA1(i, edx, edi)
-
-#define RB0(i, edx, edi)
-
-#define RB1(i, edx, edi)	\
-	AS2(	mov AS_REG_7d, [Wt_2(i)]	)\
-	AS2(	mov edi, [Wt_15(i)])\
-	AS2(	mov ebx, AS_REG_7d	)\
-	AS2(	shr AS_REG_7d, 10		)\
-	AS2(	ror ebx, 17		)\
-	AS2(	xor AS_REG_7d, ebx	)\
-	AS2(	ror ebx, 2		)\
-	AS2(	xor ebx, AS_REG_7d	)/* s1(W_t-2) */\
-	AS2(	add ebx, [Wt_7(i)])\
-	AS2(	mov AS_REG_7d, edi	)\
-	AS2(	shr AS_REG_7d, 3		)\
-	AS2(	ror edi, 7		)\
-	AS2(	add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\
-	AS2(	xor AS_REG_7d, edi	)\
-	AS2(	add edx, [Kt(i)])\
-	AS2(	ror edi, 11		)\
-	AS2(	add edx, H(i)	)\
-	AS2(	xor AS_REG_7d, edi	)/* s0(W_t-15) */\
-	AS2(	add AS_REG_7d, ebx	)/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\
-	AS2(	mov [Wt(i)], AS_REG_7d)\
-	AS2(	add edx, AS_REG_7d	)\
-
-#define ROUND(i, r, eax, ecx, edi, edx)\
-	/* in: edi = E	*/\
-	/* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\
-	AS2(	mov edx, F(i)	)\
-	AS2(	xor edx, G(i)	)\
-	AS2(	and edx, edi	)\
-	AS2(	xor edx, G(i)	)/* Ch(E,F,G) = (G^(E&(F^G))) */\
-	AS2(	mov AS_REG_7d, edi	)\
-	AS2(	ror edi, 6		)\
-	AS2(	ror AS_REG_7d, 25		)\
-	RA##r(i, edx, edi		)/* H + Wt + Kt + Ch(E,F,G) */\
-	AS2(	xor AS_REG_7d, edi	)\
-	AS2(	ror edi, 5		)\
-	AS2(	xor AS_REG_7d, edi	)/* S1(E) */\
-	AS2(	add edx, AS_REG_7d	)/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\
-	RB##r(i, edx, edi		)/* H + Wt + Kt + Ch(E,F,G) */\
-	/* in: ecx = A, eax = B^C, edx = T1 */\
-	/* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\
-	AS2(	mov ebx, ecx	)\
-	AS2(	xor ecx, B(i)	)/* A^B */\
-	AS2(	and eax, ecx	)\
-	AS2(	xor eax, B(i)	)/* Maj(A,B,C) = B^((A^B)&(B^C) */\
-	AS2(	mov AS_REG_7d, ebx	)\
-	AS2(	ror ebx, 2		)\
-	AS2(	add eax, edx	)/* T1 + Maj(A,B,C) */\
-	AS2(	add edx, D(i)	)\
-	AS2(	mov D(i), edx	)\
-	AS2(	ror AS_REG_7d, 22		)\
-	AS2(	xor AS_REG_7d, ebx	)\
-	AS2(	ror ebx, 11		)\
-	AS2(	xor AS_REG_7d, ebx	)\
-	AS2(	add eax, AS_REG_7d	)/* T1 + S0(A) + Maj(A,B,C) */\
-	AS2(	mov H(i), eax	)\
-
-#define SWAP_COPY(i)		\
-	AS2(	mov		WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\
-	AS1(	bswap	WORD_REG(bx))\
-	AS2(	mov		[Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx))
-
-#if defined(__GNUC__)
-	#if CRYPTOPP_BOOL_X64
-		FixedSizeAlignedSecBlock<byte, LOCALS_SIZE> workspace;
-	#endif
-	__asm__ __volatile__
-	(
-	#if CRYPTOPP_BOOL_X64
-		"lea %4, %%r8;"
-	#endif
-	".intel_syntax noprefix;"
-#elif defined(CRYPTOPP_GENERATE_X64_MASM)
-		ALIGN   8
-	X86_SHA256_HashBlocks	PROC FRAME
-		rex_push_reg rsi
-		push_reg rdi
-		push_reg rbx
-		push_reg rbp
-		alloc_stack(LOCALS_SIZE+8)
-		.endprolog
-		mov rdi, r8
-		lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4]
-#endif
-
-#if CRYPTOPP_BOOL_X86
-	#ifndef __GNUC__
-		AS2(	mov		edi, [len])
-		AS2(	lea		WORD_REG(si), [SHA256_K+48*4])
-	#endif
-	#if !defined(_MSC_VER) || (_MSC_VER < 1400)
-		AS_PUSH_IF86(bx)
-	#endif
-
-	AS_PUSH_IF86(bp)
-	AS2(	mov		ebx, esp)
-	AS2(	and		esp, -16)
-	AS2(	sub		WORD_REG(sp), LOCALS_SIZE)
-	AS_PUSH_IF86(bx)
-#endif
-	AS2(	mov		STATE_SAVE, WORD_REG(cx))
-	AS2(	mov		DATA_SAVE, WORD_REG(dx))
-	AS2(	lea		WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)])
-	AS2(	mov		DATA_END, WORD_REG(ax))
-	AS2(	mov		K_END, WORD_REG(si))
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-#if CRYPTOPP_BOOL_X86
-	AS2(	test	edi, 1)
-	ASJ(	jnz,	2, f)
-	AS1(	dec		DWORD PTR K_END)
-#endif
-	AS2(	movdqa	xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16])
-	AS2(	movdqa	xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16])
-#endif
-
-#if CRYPTOPP_BOOL_X86
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-	ASJ(	jmp,	0, f)
-#endif
-	ASL(2)	// non-SSE2
-	AS2(	mov		esi, ecx)
-	AS2(	lea		edi, A(0))
-	AS2(	mov		ecx, 8)
-	AS1(	rep movsd)
-	AS2(	mov		esi, K_END)
-	ASJ(	jmp,	3, f)
-#endif
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-	ASL(0)
-	AS2(	movdqa	E(0), xmm1)
-	AS2(	movdqa	A(0), xmm0)
-#endif
-#if CRYPTOPP_BOOL_X86
-	ASL(3)
-#endif
-	AS2(	sub		WORD_REG(si), 48*4)
-	SWAP_COPY(0)	SWAP_COPY(1)	SWAP_COPY(2)	SWAP_COPY(3)
-	SWAP_COPY(4)	SWAP_COPY(5)	SWAP_COPY(6)	SWAP_COPY(7)
-#if CRYPTOPP_BOOL_X86
-	SWAP_COPY(8)	SWAP_COPY(9)	SWAP_COPY(10)	SWAP_COPY(11)
-	SWAP_COPY(12)	SWAP_COPY(13)	SWAP_COPY(14)	SWAP_COPY(15)
-#endif
-	AS2(	mov		edi, E(0))	// E
-	AS2(	mov		eax, B(0))	// B
-	AS2(	xor		eax, C(0))	// B^C
-	AS2(	mov		ecx, A(0))	// A
-
-	ROUND(0, 0, eax, ecx, edi, edx)
-	ROUND(1, 0, ecx, eax, edx, edi)
-	ROUND(2, 0, eax, ecx, edi, edx)
-	ROUND(3, 0, ecx, eax, edx, edi)
-	ROUND(4, 0, eax, ecx, edi, edx)
-	ROUND(5, 0, ecx, eax, edx, edi)
-	ROUND(6, 0, eax, ecx, edi, edx)
-	ROUND(7, 0, ecx, eax, edx, edi)
-	ROUND(8, 0, eax, ecx, edi, edx)
-	ROUND(9, 0, ecx, eax, edx, edi)
-	ROUND(10, 0, eax, ecx, edi, edx)
-	ROUND(11, 0, ecx, eax, edx, edi)
-	ROUND(12, 0, eax, ecx, edi, edx)
-	ROUND(13, 0, ecx, eax, edx, edi)
-	ROUND(14, 0, eax, ecx, edi, edx)
-	ROUND(15, 0, ecx, eax, edx, edi)
-
-	ASL(1)
-	AS2(add WORD_REG(si), 4*16)
-	ROUND(0, 1, eax, ecx, edi, edx)
-	ROUND(1, 1, ecx, eax, edx, edi)
-	ROUND(2, 1, eax, ecx, edi, edx)
-	ROUND(3, 1, ecx, eax, edx, edi)
-	ROUND(4, 1, eax, ecx, edi, edx)
-	ROUND(5, 1, ecx, eax, edx, edi)
-	ROUND(6, 1, eax, ecx, edi, edx)
-	ROUND(7, 1, ecx, eax, edx, edi)
-	ROUND(8, 1, eax, ecx, edi, edx)
-	ROUND(9, 1, ecx, eax, edx, edi)
-	ROUND(10, 1, eax, ecx, edi, edx)
-	ROUND(11, 1, ecx, eax, edx, edi)
-	ROUND(12, 1, eax, ecx, edi, edx)
-	ROUND(13, 1, ecx, eax, edx, edi)
-	ROUND(14, 1, eax, ecx, edi, edx)
-	ROUND(15, 1, ecx, eax, edx, edi)
-	AS2(	cmp		WORD_REG(si), K_END)
-	ASJ(	jb,		1, b)
-
-	AS2(	mov		WORD_REG(dx), DATA_SAVE)
-	AS2(	add		WORD_REG(dx), 64)
-	AS2(	mov		AS_REG_7, STATE_SAVE)
-	AS2(	mov		DATA_SAVE, WORD_REG(dx))
-
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-#if CRYPTOPP_BOOL_X86
-	AS2(	test	DWORD PTR K_END, 1)
-	ASJ(	jz,		4, f)
-#endif
-	AS2(	movdqa	xmm1, XMMWORD_PTR [AS_REG_7+1*16])
-	AS2(	movdqa	xmm0, XMMWORD_PTR [AS_REG_7+0*16])
-	AS2(	paddd	xmm1, E(0))
-	AS2(	paddd	xmm0, A(0))
-	AS2(	movdqa	[AS_REG_7+1*16], xmm1)
-	AS2(	movdqa	[AS_REG_7+0*16], xmm0)
-	AS2(	cmp		WORD_REG(dx), DATA_END)
-	ASJ(	jb,		0, b)
-#endif
-
-#if CRYPTOPP_BOOL_X86
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-	ASJ(	jmp,	5, f)
-	ASL(4)	// non-SSE2
-#endif
-	AS2(	add		[AS_REG_7+0*4], ecx)	// A
-	AS2(	add		[AS_REG_7+4*4], edi)	// E
-	AS2(	mov		eax, B(0))
-	AS2(	mov		ebx, C(0))
-	AS2(	mov		ecx, D(0))
-	AS2(	add		[AS_REG_7+1*4], eax)
-	AS2(	add		[AS_REG_7+2*4], ebx)
-	AS2(	add		[AS_REG_7+3*4], ecx)
-	AS2(	mov		eax, F(0))
-	AS2(	mov		ebx, G(0))
-	AS2(	mov		ecx, H(0))
-	AS2(	add		[AS_REG_7+5*4], eax)
-	AS2(	add		[AS_REG_7+6*4], ebx)
-	AS2(	add		[AS_REG_7+7*4], ecx)
-	AS2(	mov		ecx, AS_REG_7d)
-	AS2(	cmp		WORD_REG(dx), DATA_END)
-	ASJ(	jb,		2, b)
-#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
-	ASL(5)
-#endif
-#endif
-
-	AS_POP_IF86(sp)
-	AS_POP_IF86(bp)
-	#if !defined(_MSC_VER) || (_MSC_VER < 1400)
-		AS_POP_IF86(bx)
-	#endif
-
-#ifdef CRYPTOPP_GENERATE_X64_MASM
-	add		rsp, LOCALS_SIZE+8
-	pop		rbp
-	pop		rbx
-	pop		rdi
-	pop		rsi
-	ret
-	X86_SHA256_HashBlocks ENDP
-#endif
-
-#ifdef __GNUC__
-	".att_syntax prefix;"
-	:
-	: "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len)
-	#if CRYPTOPP_BOOL_X64
-		, "m" (workspace[0])
-	#endif
-	: "memory", "cc", "%eax"
-	#if CRYPTOPP_BOOL_X64
-		, "%rbx", "%r8", "%r10"
-	#endif
-	);
-#endif
-}
-
-static inline bool HasSSE2(void) { return false; }
-
-static void SHA256_Transform32(word32 *state, const word32 *data)
-{
-	word32 W[16];
-	int i;
-
-	for (i = 0; i < 16; i++)
-		W[i] = swab32(((word32 *)(data))[i]);
-
-	X86_SHA256_HashBlocks(state, W, 16 * 4);
-}
-
-static void runhash32(void *state, const void *input, const void *init)
-{
-	memcpy(state, init, 32);
-	SHA256_Transform32(state, input);
-}
-
-/* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_asm32(struct thr_info*thr, const unsigned char *midstate,
-		unsigned char *data,
-	        unsigned char *hash1, unsigned char *hash,
-		const unsigned char *target,
-	        uint32_t max_nonce, uint32_t *last_nonce,
-		uint32_t n)
-{
-	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 76);
-
-	data += 64;
-
-	while (1) {
-		n++;
-		*nonce = n;
-
-		runhash32(hash1, data, midstate);
-		runhash32(hash, hash1, sha256_init_state);
-
-		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*last_nonce = n;
-			return true;
-		}
-
-		if ((n >= max_nonce) || thr->work_restart) {
-			*last_nonce = n;
-			return false;
-		}
-	}
-}
-
-#endif	// #if defined(WANT_CRYPTOPP_ASM32)

+ 0 - 274
sha256_generic.c

@@ -1,274 +0,0 @@
-/*
- * Cryptographic API.
- *
- * SHA-256, as specified in
- * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
- *
- * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
- *
- * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include "config.h"
-
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include "miner.h"
-
-typedef uint32_t u32;
-typedef uint8_t u8;
-
-static inline u32 ror32(u32 word, unsigned int shift)
-{
-	return (word >> shift) | (word << (32 - shift));
-}
-
-static inline u32 Ch(u32 x, u32 y, u32 z)
-{
-	return z ^ (x & (y ^ z));
-}
-
-static inline u32 Maj(u32 x, u32 y, u32 z)
-{
-	return (x & y) | (z & (x | y));
-}
-
-#define e0(x)       (ror32(x, 2) ^ ror32(x,13) ^ ror32(x,22))
-#define e1(x)       (ror32(x, 6) ^ ror32(x,11) ^ ror32(x,25))
-#define s0(x)       (ror32(x, 7) ^ ror32(x,18) ^ (x >> 3))
-#define s1(x)       (ror32(x,17) ^ ror32(x,19) ^ (x >> 10))
-
-static inline void LOAD_OP(int I, u32 *W, const u8 *input)
-{
-	/* byteswap is commented out, because bitcoin input
-	 * is already big-endian
-	 */
-	W[I] = /* ntohl */ ( ((u32*)(input))[I] );
-}
-
-static inline void BLEND_OP(int I, u32 *W)
-{
-	W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
-}
-
-static void sha256_transform(u32 *state, const u8 *input)
-{
-	u32 a, b, c, d, e, f, g, h, t1, t2;
-	u32 W[64];
-	int i;
-
-	/* load the input */
-	for (i = 0; i < 16; i++)
-		LOAD_OP(i, W, input);
-
-	/* now blend */
-	for (i = 16; i < 64; i++)
-		BLEND_OP(i, W);
-
-	/* load the state into our registers */
-	a=state[0];  b=state[1];  c=state[2];  d=state[3];
-	e=state[4];  f=state[5];  g=state[6];  h=state[7];
-
-	/* now iterate */
-	t1 = h + e1(e) + Ch(e,f,g) + 0x428a2f98 + W[ 0];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x71374491 + W[ 1];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0xb5c0fbcf + W[ 2];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0xe9b5dba5 + W[ 3];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x3956c25b + W[ 4];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x59f111f1 + W[ 5];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x923f82a4 + W[ 6];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0xab1c5ed5 + W[ 7];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0xd807aa98 + W[ 8];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x12835b01 + W[ 9];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x243185be + W[10];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x550c7dc3 + W[11];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x72be5d74 + W[12];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x80deb1fe + W[13];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x9bdc06a7 + W[14];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0xc19bf174 + W[15];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0xe49b69c1 + W[16];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0xefbe4786 + W[17];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x0fc19dc6 + W[18];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x240ca1cc + W[19];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x2de92c6f + W[20];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x4a7484aa + W[21];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x5cb0a9dc + W[22];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x76f988da + W[23];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0x983e5152 + W[24];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0xa831c66d + W[25];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0xb00327c8 + W[26];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0xbf597fc7 + W[27];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0xc6e00bf3 + W[28];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0xd5a79147 + W[29];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x06ca6351 + W[30];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x14292967 + W[31];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0x27b70a85 + W[32];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x2e1b2138 + W[33];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x4d2c6dfc + W[34];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x53380d13 + W[35];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x650a7354 + W[36];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x766a0abb + W[37];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x81c2c92e + W[38];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x92722c85 + W[39];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0xa2bfe8a1 + W[40];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0xa81a664b + W[41];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0xc24b8b70 + W[42];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0xc76c51a3 + W[43];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0xd192e819 + W[44];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0xd6990624 + W[45];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0xf40e3585 + W[46];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x106aa070 + W[47];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0x19a4c116 + W[48];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x1e376c08 + W[49];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x2748774c + W[50];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x34b0bcb5 + W[51];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x391c0cb3 + W[52];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x4ed8aa4a + W[53];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x5b9cca4f + W[54];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x682e6ff3 + W[55];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0x748f82ee + W[56];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x78a5636f + W[57];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x84c87814 + W[58];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x8cc70208 + W[59];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x90befffa + W[60];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0xa4506ceb + W[61];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0xbef9a3f7 + W[62];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0xc67178f2 + W[63];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
-	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-
-#if 0
-	/* clear any sensitive info... */
-	a = b = c = d = e = f = g = h = t1 = t2 = 0;
-	memset(W, 0, 64 * sizeof(u32));
-#endif
-}
-
-static void runhash(void *state, const void *input, const void *init)
-{
-	memcpy(state, init, 32);
-	sha256_transform(state, input);
-}
-
-const uint32_t sha256_init_state[8] = {
-	0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
-	0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
-};
-
-/* suspiciously similar to ScanHash* from bitcoin */
-bool scanhash_c(struct thr_info*thr, const unsigned char *midstate, unsigned char *data,
-	        unsigned char *hash1, unsigned char *hash,
-		const unsigned char *target,
-	        uint32_t max_nonce, uint32_t *last_nonce,
-		uint32_t n)
-{
-	uint32_t *hash32 = (uint32_t *) hash;
-	uint32_t *nonce = (uint32_t *)(data + 76);
-	unsigned long stat_ctr = 0;
-
-	data += 64;
-
-	while (1) {
-		n++;
-		*nonce = n;
-
-		runhash(hash1, data, midstate);
-		runhash(hash, hash1, sha256_init_state);
-
-		stat_ctr++;
-
-		if (unlikely((hash32[7] == 0) && fulltest(hash, target))) {
-			*last_nonce = n;
-			return true;
-		}
-
-		if ((n >= max_nonce) || thr->work_restart) {
-			*last_nonce = n;
-			return false;
-		}
-	}
-}
-

+ 0 - 133
sha256_sse2_amd64.c

@@ -1,133 +0,0 @@
-/*
- * SHA-256 driver for ASM routine for x86_64 on Linux
- * Copyright (c) Mark Crichton <crichton@gimp.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include "driver-cpu.h"
-
-#ifdef WANT_X8664_SSE2
-
-#include <string.h>
-#include <assert.h>
-
-#include <xmmintrin.h>
-#include <stdint.h>
-#include <stdio.h>
-
-extern void sha256_sse2_64_new (__m128i *res, __m128i *res1, __m128i *data, const uint32_t init[8]);
-
-static uint32_t g_sha256_k[]__attribute__((aligned(0x100))) = {
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /*  0 */
-    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /*  8 */
-    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
-    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
-    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
-    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
-    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
-    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
-    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-const uint32_t sha256_init[8]__attribute__((aligned(0x100))) =
-{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-__m128i g_4sha256_k[64];
-__m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000)));
-
-bool scanhash_sse2_64(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce)
-{
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
-    uint32_t m_midstate[8], m_w[16], m_w1[16];
-    __m128i m_4w[64] __attribute__ ((aligned (0x100)));
-    __m128i m_4hash[64] __attribute__ ((aligned (0x100)));
-    __m128i m_4hash1[64] __attribute__ ((aligned (0x100)));
-    __m128i offset;
-    int i;
-
-	pdata += 64;
-
-    /* For debugging */
-    union {
-        __m128i m;
-        uint32_t i[4];
-    } mi;
-
-    /* Message expansion */
-    memcpy(m_midstate, pmidstate, sizeof(m_midstate));
-    memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */
-    memcpy(m_w1, phash1, sizeof(m_w1));
-    memset(m_4hash, 0, sizeof(m_4hash));
-
-    /* Transmongrify */
-    for (i = 0; i < 16; i++)
-        m_4w[i] = _mm_set1_epi32(m_w[i]);
-
-    for (i = 0; i < 16; i++)
-        m_4hash1[i] = _mm_set1_epi32(m_w1[i]);
-
-    for (i = 0; i < 64; i++)
-	sha256_consts_m128i[i] = _mm_set1_epi32(g_sha256_k[i]);
-
-    offset = _mm_set_epi32(0x3, 0x2, 0x1, 0x0);
-
-    for (;;)
-    {
-	int j;
-
-	m_4w[3] = _mm_add_epi32(offset, _mm_set1_epi32(nonce));
-
-	sha256_sse2_64_new (m_4hash, m_4hash1, m_4w, m_midstate);
-
-	for (j = 0; j < 4; j++) {
-	    mi.m = m_4hash[7];
-	    if (unlikely(mi.i[j] == 0))
-		break;
-        }
-
-	/* If j = true, we found a hit...so check it */
-	/* Use the C version for a check... */
-	if (unlikely(j != 4)) {
-		for (i = 0; i < 8; i++) {
-		    mi.m = m_4hash[i];
-		    *(uint32_t *)&(phash)[i*4] = mi.i[j];
-		}
-
-		if (fulltest(phash, ptarget)) {
-		     nonce += j;
-		     *last_nonce = nonce + 1;
-		     *nNonce_p = nonce;
-		     return true;
-		}
-	}
-
-        if (unlikely((nonce >= max_nonce) || thr->work_restart))
-        {
-			*last_nonce = nonce;
-			return false;
-	}
-
-	nonce += 4;
-   }
-}
-
-#endif /* WANT_X8664_SSE2 */
-

+ 0 - 125
sha256_sse2_i386.c

@@ -1,125 +0,0 @@
-/*
- * SHA-256 driver for ASM routine for x86_64 on Linux
- * Copyright (c) Mark Crichton <crichton@gimp.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include "driver-cpu.h"
-
-#ifdef WANT_X8632_SSE2
-
-#include <string.h>
-#include <assert.h>
-
-#include <xmmintrin.h>
-#include <stdint.h>
-#include <stdio.h>
-
-extern void CalcSha256_x86 (__m128i *res, __m128i *data, const uint32_t init[8])__attribute__((fastcall));
-
-static uint32_t g_sha256_k[]__attribute__((aligned(0x100))) = {
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /*  0 */
-    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /*  8 */
-    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
-    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
-    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
-    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
-    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
-    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
-    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-const uint32_t sha256_32init[8]__attribute__((aligned(0x100))) =
-{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-__m128i g_4sha256_k[64];
-__m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000)));
-
-bool scanhash_sse2_32(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce)
-{
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
-    uint32_t m_midstate[8], m_w[16], m_w1[16];
-    __m128i m_4w[64] __attribute__ ((aligned (0x100)));
-    __m128i m_4hash[64] __attribute__ ((aligned (0x100)));
-    __m128i m_4hash1[64] __attribute__ ((aligned (0x100)));
-    __m128i offset;
-    int i;
-
-	pdata += 64;
-
-    /* Message expansion */
-    memcpy(m_midstate, pmidstate, sizeof(m_midstate));
-    memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */
-    memcpy(m_w1, phash1, sizeof(m_w1));
-    memset(m_4hash, 0, sizeof(m_4hash));
-
-    /* Transmongrify */
-    for (i = 0; i < 16; i++)
-        m_4w[i] = _mm_set1_epi32(m_w[i]);
-
-    for (i = 0; i < 16; i++)
-        m_4hash1[i] = _mm_set1_epi32(m_w1[i]);
-
-    for (i = 0; i < 64; i++)
-	sha256_consts_m128i[i] = _mm_set1_epi32(g_sha256_k[i]);
-
-    offset = _mm_set_epi32(0x3, 0x2, 0x1, 0x0);
-
-    for (;;)
-    {
-	int j;
-
-	m_4w[3] = _mm_add_epi32(offset, _mm_set1_epi32(nonce));
-
-	/* Some optimization can be done here W.R.T. precalculating some hash */
-	CalcSha256_x86 (m_4hash1, m_4w, m_midstate);
-	CalcSha256_x86 (m_4hash, m_4hash1, sha256_32init);
-
-	for (j = 0; j < 4; j++) {
-	    if (unlikely(((uint32_t *)&(m_4hash[7]))[j] == 0)) {
-		/* We found a hit...so check it */
-		/* Use the C version for a check... */
-
-		for (i = 0; i < 8; i++) {
-		    *(uint32_t *)&(phash)[i<<2] = ((uint32_t *)&(m_4hash[i]))[j];
-		}
-
-		if (fulltest(phash, ptarget)) {
-		     nonce += j;
-		     *last_nonce = nonce;
-		     *nNonce_p = nonce;
-		     return true;
-		}
-	    }
-	}
-
-	if (unlikely((nonce >= max_nonce) || thr->work_restart)) {
-		*last_nonce = nonce;
-		return false;
-	}
-
-	nonce += 4;
-
-   }
-}
-
-#endif /* WANT_X8632_SSE2 */
-

+ 0 - 132
sha256_sse4_amd64.c

@@ -1,132 +0,0 @@
-/*
- * SHA-256 driver for ASM routine for x86_64 on Linux
- * Copyright (c) Mark Crichton <crichton@gimp.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include "driver-cpu.h"
-
-#ifdef WANT_X8664_SSE4
-
-#include <string.h>
-#include <assert.h>
-
-#include <xmmintrin.h>
-#include <stdint.h>
-#include <stdio.h>
-
-extern void CalcSha256_x64_sse4(__m128i *res, __m128i *data, uint32_t init[8]);
-
-static uint32_t g_sha256_k[] = {
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /*  0 */
-    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /*  8 */
-    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
-    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
-    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
-    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
-    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
-    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
-    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-
-static uint32_t g_sha256_hinit[8] =
-{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19};
-
-__m128i g_4sha256_k[64];
-
-bool scanhash_sse4_64(struct thr_info*thr, const unsigned char *pmidstate,
-	unsigned char *pdata,
-	unsigned char *phash1, unsigned char *phash,
-	const unsigned char *ptarget,
-	uint32_t max_nonce, uint32_t *last_nonce,
-	uint32_t nonce)
-{
-    uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
-    uint32_t m_midstate[8], m_w[16], m_w1[16];
-    __m128i m_4w[64], m_4hash[64], m_4hash1[64];
-    __m128i offset;
-    int i;
-
-	pdata += 64;
-
-    /* For debugging */
-    union {
-        __m128i m;
-        uint32_t i[4];
-    } mi;
-
-    /* Message expansion */
-    memcpy(m_midstate, pmidstate, sizeof(m_midstate));
-    memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */
-    memcpy(m_w1, phash1, sizeof(m_w1));
-    memset(m_4hash, 0, sizeof(m_4hash));
-
-    /* Transmongrify */
-    for (i = 0; i < 16; i++)
-        m_4w[i] = _mm_set1_epi32(m_w[i]);
-
-    for (i = 0; i < 16; i++)
-        m_4hash1[i] = _mm_set1_epi32(m_w1[i]);
-
-    for (i = 0; i < 64; i++)
-	g_4sha256_k[i] = _mm_set1_epi32(g_sha256_k[i]);
-
-    offset = _mm_set_epi32(0x3, 0x2, 0x1, 0x0);
-
-    for (;;)
-    {
-	int j;
-
-	m_4w[3] = _mm_add_epi32(offset, _mm_set1_epi32(nonce));
-
-	/* Some optimization can be done here W.R.T. precalculating some hash */
-        CalcSha256_x64_sse4(m_4hash1, m_4w, m_midstate);
-	CalcSha256_x64_sse4(m_4hash, m_4hash1, g_sha256_hinit);
-
-	for (j = 0; j < 4; j++) {
-	    mi.m = m_4hash[7];
-	    if (unlikely(mi.i[j] == 0))
-		break;
-        }
-
-	/* If j = true, we found a hit...so check it */
-	/* Use the C version for a check... */
-	if (unlikely(j != 4)) {
-		for (i = 0; i < 8; i++) {
-		    mi.m = m_4hash[i];
-		    *(uint32_t *)&(phash)[i*4] = mi.i[j];
-		}
-
-		if (fulltest(phash, ptarget)) {
-			nonce += j;
-			*last_nonce = nonce;
-			*nNonce_p = nonce;
-			return true;
-		}
-	}
-
-        if (unlikely((nonce >= max_nonce) || thr->work_restart))
-        {
-			*last_nonce = nonce;
-			return false;
-	}
-
-	nonce += 4;
-   }
-}
-
-#endif /* WANT_X8664_SSE4 */
-

+ 0 - 85
sha256_via.c

@@ -1,85 +0,0 @@
-
-#include "driver-cpu.h"
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <sys/time.h>
-#include "miner.h"
-
-#ifdef WANT_VIA_PADLOCK
-
-static void via_sha256(void *hash, void *buf, unsigned len)
-{
-	unsigned stat = 0;
-	asm volatile(".byte 0xf3, 0x0f, 0xa6, 0xd0"
-		     :"+S"(buf), "+a"(stat)
-		     :"c"(len), "D" (hash)
-		     :"memory");
-}
-
-bool scanhash_via(struct thr_info*thr, const unsigned char __maybe_unused *pmidstate,
-	unsigned char *data_inout,
-	unsigned char __maybe_unused *phash1, unsigned char __maybe_unused *phash,
-	const unsigned char *target,
-		  uint32_t max_nonce, uint32_t *last_nonce,
-		  uint32_t n)
-{
-	unsigned char data[128] __attribute__((aligned(128)));
-	unsigned char tmp_hash[32] __attribute__((aligned(128)));
-	unsigned char tmp_hash1[32] __attribute__((aligned(128)));
-	uint32_t *data32 = (uint32_t *) data;
-	uint32_t *hash32 = (uint32_t *) tmp_hash;
-	uint32_t *nonce = (uint32_t *)(data + 64 + 12);
-	unsigned long stat_ctr = 0;
-	int i;
-
-	/* bitcoin gives us big endian input, but via wants LE,
-	 * so we reverse the swapping bitcoin has already done (extra work)
-	 * in order to permit the hardware to swap everything
-	 * back to BE again (extra work).
-	 */
-	for (i = 0; i < 128/4; i++)
-		data32[i] = swab32(((uint32_t *)data_inout)[i]);
-
-	while (1) {
-		n++;
-		*nonce = n;
-
-		/* first SHA256 transform */
-		memcpy(tmp_hash1, sha256_init_state, 32);
-		via_sha256(tmp_hash1, data, 80);	/* or maybe 128? */
-
-		for (i = 0; i < 32/4; i++)
-			((uint32_t *)tmp_hash1)[i] =
-				swab32(((uint32_t *)tmp_hash1)[i]);
-
-		/* second SHA256 transform */
-		memcpy(tmp_hash, sha256_init_state, 32);
-		via_sha256(tmp_hash, tmp_hash1, 32);
-
-		stat_ctr++;
-
-		if (unlikely((hash32[7] == 0) && fulltest(tmp_hash, target))) {
-			/* swap nonce'd data back into original storage area;
-			 * TODO: only swap back the nonce, rather than all data
-			 */
-			for (i = 0; i < 128/4; i++) {
-				uint32_t *dout32 = (uint32_t *) data_inout;
-				dout32[i] = swab32(data32[i]);
-			}
-
-			*last_nonce = n;
-			return true;
-		}
-
-		if ((n >= max_nonce) || thr->work_restart) {
-			*last_nonce = n;
-			return false;
-		}
-	}
-}
-
-#endif /* WANT_VIA_PADLOCK */
-

+ 0 - 1
x86_32/.gitignore

@@ -1 +0,0 @@
-libx8632.a

+ 0 - 8
x86_32/Makefile.am

@@ -1,8 +0,0 @@
-noinst_LIBRARIES	= libx8632.a
-
-SUFFIXES = .asm
-
-libx8632_a_SOURCES	= sha256_xmm.asm
-
-.asm.o:
-	$(YASM) -f $(YASM_FMT) $<

+ 0 - 259
x86_32/sha256_xmm.asm

@@ -1,259 +0,0 @@
-;; SHA-256 for X86 for Linux, based off of:A
-
-; (c) Ufasoft 2011 http://ufasoft.com mailto:support@ufasoft.com
-; Version 2011
-; This software is Public Domain
-
-; SHA-256 CPU SSE cruncher for Bitcoin Miner
-
-ALIGN 32
-BITS 32
-
-%define hash ecx
-%define data edx
-%define init esi
-
-; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
-%define LAB_CALC_PARA	2
-%define LAB_CALC_UNROLL	24
-
-%define LAB_LOOP_UNROLL 64
-
-extern _sha256_consts_m128i
-
-global $@CalcSha256_x86@12
-;	CalcSha256	hash(ecx), data(edx), init([esp+4])
-@CalcSha256_x86@12:
-	push	esi
-	push	edi
-	mov	init, [esp+12]
-
-LAB_SHA:
-	lea	edi, qword [data+256]				; + 256
-
-LAB_CALC:
-%macro	lab_calc_blk 1
-	movdqa	xmm0, [edi-(15-%1)*16]				; xmm0 = W[I-15]
-	movdqa	xmm4, [edi-(15-(%1+1))*16]			; xmm4 = W[I-15+1]
-	movdqa	xmm2, xmm0					; xmm2 = W[I-15]
-	movdqa	xmm6, xmm4					; xmm6 = W[I-15+1]
-	psrld	xmm0, 3						; xmm0 = W[I-15] >> 3
-	psrld	xmm4, 3						; xmm4 = W[I-15+1] >> 3
-	movdqa	xmm1, xmm0					; xmm1 = W[I-15] >> 3
-	movdqa	xmm5, xmm4					; xmm5 = W[I-15+1] >> 3
-	pslld	xmm2, 14					; xmm2 = W[I-15] << 14
-	pslld	xmm6, 14					; xmm6 = W[I-15+1] << 14
-	psrld	xmm1, 4						; xmm1 = W[I-15] >> 7
-	psrld	xmm5, 4						; xmm5 = W[I-15+1] >> 7
-	pxor	xmm0, xmm1					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7)
-	pxor	xmm4, xmm5					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7)
-	psrld	xmm1, 11					; xmm1 = W[I-15] >> 18
-	psrld	xmm5, 11					; xmm5 = W[I-15+1] >> 18
-	pxor	xmm0, xmm2					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14)
-	pxor	xmm4, xmm6					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14)
-	pslld	xmm2, 11					; xmm2 = W[I-15] << 25
-	pslld	xmm6, 11					; xmm6 = W[I-15+1] << 25
-	pxor	xmm0, xmm1					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18)
-	pxor	xmm4, xmm5					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18)
-	pxor	xmm0, xmm2					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) ^ (W[I-15] << 25)
-	pxor	xmm4, xmm6					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) ^ (W[I-15+1] << 25)
-
-	movdqa	xmm3, [edi-(2-%1)*16]				; xmm3 = W[I-2]
-	movdqa	xmm7, [edi-(2-(%1+1))*16]			; xmm7 = W[I-2+1]
-
-	paddd	xmm0, [edi-(16-%1)*16]				; xmm0 = s0(W[I-15]) + W[I-16]
-	paddd	xmm4, [edi-(16-(%1+1))*16]			; xmm4 = s0(W[I-15+1]) + W[I-16+1]
-
-;;;;;;;;;;;;;;;;;;
-
-	movdqa	xmm2, xmm3					; xmm2 = W[I-2]
-	movdqa	xmm6, xmm7					; xmm6 = W[I-2+1]
-	psrld	xmm3, 10					; xmm3 = W[I-2] >> 10
-	psrld	xmm7, 10					; xmm7 = W[I-2+1] >> 10
-	movdqa	xmm1, xmm3					; xmm1 = W[I-2] >> 10
-	movdqa	xmm5, xmm7					; xmm5 = W[I-2+1] >> 10
-
-	paddd	xmm0, [edi-(7-%1)*16]				; xmm0 = s0(W[I-15]) + W[I-16] + W[I-7]
-
-	pslld	xmm2, 13					; xmm2 = W[I-2] << 13
-	pslld	xmm6, 13					; xmm6 = W[I-2+1] << 13
-	psrld	xmm1, 7						; xmm1 = W[I-2] >> 17
-	psrld	xmm5, 7						; xmm5 = W[I-2+1] >> 17
-
-	paddd	xmm4, [edi-(7-(%1+1))*16]			; xmm4 = s0(W[I-15+1]) + W[I-16+1] + W[I-7+1]
-
-	pxor	xmm3, xmm1					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17)
-	pxor	xmm7, xmm5					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17)
-	psrld	xmm1, 2						; xmm1 = W[I-2] >> 19
-	psrld	xmm5, 2						; xmm5 = W[I-2+1] >> 19
-	pxor	xmm3, xmm2					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13)
-	pxor	xmm7, xmm6					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13)
-	pslld	xmm2, 2						; xmm2 = W[I-2] << 15
-	pslld	xmm6, 2						; xmm6 = W[I-2+1] << 15
-	pxor	xmm3, xmm1					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19)
-	pxor	xmm7, xmm5					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19)
-	pxor	xmm3, xmm2					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) ^ (W[I-2] << 15)
-	pxor	xmm7, xmm6					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) ^ (W[I-2+1] << 15)
-
-	paddd	xmm0, xmm3					; xmm0 = s0(W[I-15]) + W[I-16] + s1(W[I-2]) + W[I-7]
-	paddd	xmm4, xmm7					; xmm4 = s0(W[I-15+1]) + W[I-16+1] + s1(W[I-2+1]) + W[I-7+1]
-	movdqa	[edi+(%1*16)], xmm0
-	movdqa	[edi+((%1+1)*16)], xmm4
-%endmacro
-
-%assign i 0
-%rep    LAB_CALC_UNROLL
-        lab_calc_blk i
-%assign i i+LAB_CALC_PARA
-%endrep
-
-; Load the init values of the message into the hash.
-
-	movdqa	xmm7, [init]
-	pshufd	xmm5, xmm7, 0x55		; xmm5 == b
-	pshufd	xmm4, xmm7, 0xAA		; xmm4 == c
-	pshufd	xmm3, xmm7, 0xFF		; xmm3 == d
-	pshufd	xmm7, xmm7, 0			; xmm7 == a
-
-	movdqa	xmm0, [init+4*4]
-	pshufd	xmm1, xmm0, 0x55		; [hash+0*16] == f
-	movdqa	[hash+0*16], xmm1
-
-	pshufd	xmm1, xmm0, 0xAA		; [hash+1*16] == g
-	movdqa	[hash+1*16], xmm1
-
-	pshufd	xmm1, xmm0, 0xFF		; [hash+2*16] == h
-	movdqa	[hash+2*16], xmm1
-
-	pshufd	xmm0, xmm0, 0			; xmm0 == e
-
-
-LAB_LOOP:
-
-;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32<T>(g_sha256_k[j]) + w[j]
-
-%macro	lab_loop_blk 1
-	movdqa	xmm6, [data+%1]
-	paddd	xmm6, _sha256_consts_m128i[%1]
-
-	paddd	xmm6, [hash+2*16]		; +h
-
-	movdqa	xmm1, xmm0
-	movdqa	xmm2, [hash+1*16]
-	pandn	xmm1, xmm2	; ~e & g
-
-	movdqa	[hash+2*16], xmm2		; h = g
-	movdqa	xmm2, [hash+0*16]		; f
-	movdqa	[hash+1*16], xmm2		; g = f
-
-
-	pand	xmm2, xmm0	; e & f
-	pxor	xmm1, xmm2	; (e & f) ^ (~e & g)
-	movdqa	[hash+0*16], xmm0		; f = e
-
-	paddd	xmm6, xmm1	; Ch + h + w[i] + k[i]
-
-	movdqa	xmm1, xmm0
-	psrld	xmm0, 6
-	movdqa	xmm2, xmm0
-	pslld	xmm1, 7
-	psrld	xmm2, 5
-	pxor	xmm0, xmm1
-	pxor	xmm0, xmm2
-	pslld	xmm1, 14
-	psrld	xmm2, 14
-	pxor	xmm0, xmm1
-	pxor	xmm0, xmm2
-	pslld	xmm1, 5
-	pxor	xmm0, xmm1	; Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)
-	paddd	xmm6, xmm0	; xmm6 = t1
-
-	movdqa	xmm0, xmm3	; d
-	paddd	xmm0, xmm6	; e = d+t1
-
-	movdqa	xmm1, xmm5	; =b
-	movdqa	xmm3, xmm4	; d = c
-	movdqa	xmm2, xmm4	; c
-	pand	xmm2, xmm5	; b & c
-	pand	xmm4, xmm7	; a & c
-	pand	xmm1, xmm7	; a & b
-	pxor	xmm1, xmm4
-	movdqa	xmm4, xmm5	; c = b
-	movdqa	xmm5, xmm7	; b = a
-	pxor	xmm1, xmm2	; (a & c) ^ (a & d) ^ (c & d)
-	paddd	xmm6, xmm1	; t1 + ((a & c) ^ (a & d) ^ (c & d))
-
-	movdqa	xmm2, xmm7
-	psrld	xmm7, 2
-	movdqa	xmm1, xmm7
-	pslld	xmm2, 10
-	psrld	xmm1, 11
-	pxor	xmm7, xmm2
-	pxor	xmm7, xmm1
-	pslld	xmm2, 9
-	psrld	xmm1, 9
-	pxor	xmm7, xmm2
-	pxor	xmm7, xmm1
-	pslld	xmm2, 11
-	pxor	xmm7, xmm2
-	paddd	xmm7, xmm6	; a = t1 + (Rotr32(a, 2) ^ Rotr32(a, 13) ^ Rotr32(a, 22)) + ((a & c) ^ (a & d) ^ (c & d));
-%endmacro
-
-%assign i 0
-%rep    LAB_LOOP_UNROLL
-        lab_loop_blk i
-%assign i i+16
-%endrep
-
-; Finished the 64 rounds, calculate hash and save
-
-	movdqa	xmm1, [init+16]
-
-	pshufd	xmm2, xmm1, 0xFF
-	movdqa  xmm6, [hash+2*16]
-	paddd   xmm2, xmm6
-	movdqa  [hash+7*16], xmm2
-
-	pshufd	xmm2, xmm1, 0xAA
-	movdqa  xmm6, [hash+1*16]
-	paddd   xmm2, xmm6
-	movdqa  [hash+6*16], xmm2
-
-	pshufd  xmm2, xmm1, 0x55
-	movdqa  xmm6, [hash+0*16]
-	paddd   xmm2, xmm6
-	movdqa  [hash+5*16], xmm2
-
-	pshufd	xmm1, xmm1, 0
-	paddd	xmm0, xmm1
-	movdqa  [hash+4*16], xmm0
-
-	movdqa  xmm1, [init]
-
-	pshufd  xmm2, xmm1, 0xFF
-	paddd   xmm3, xmm2
-	movdqa  [hash+3*16], xmm3
-
-	pshufd  xmm2, xmm1, 0xAA
-	paddd   xmm4, xmm2
-	movdqa  [hash+2*16], xmm4
-
-        pshufd  xmm2, xmm1, 0x55
-        paddd   xmm5, xmm2
-        movdqa  [hash+1*16], xmm5
-
-	pshufd  xmm1, xmm1, 0
-	paddd   xmm7, xmm1
-	movdqa	[hash+0*16], xmm7
-
-LAB_RET:
-	pop	edi
-	pop	esi
-	retn	4
-
-%ifidn __OUTPUT_FORMAT__,elf
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf32
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif

+ 0 - 1
x86_64/.gitignore

@@ -1 +0,0 @@
-libx8664.a

+ 0 - 8
x86_64/Makefile.am

@@ -1,8 +0,0 @@
-noinst_LIBRARIES	= libx8664.a
-
-SUFFIXES = .asm
-
-libx8664_a_SOURCES	= sha256_xmm_amd64.asm sha256_sse4_amd64.asm
-
-.asm.o:
-	$(YASM) -f $(YASM_FMT) -o $@ $<

+ 0 - 292
x86_64/sha256_sse4_amd64.asm

@@ -1,292 +0,0 @@
-;; SHA-256 for X86-64 for Linux, based off of:
-
-; (c) Ufasoft 2011 http://ufasoft.com mailto:support@ufasoft.com
-; Version 2011
-; This software is Public Domain
-
-; Significant re-write/optimisation and reordering by,
-; Neil Kettle <mu-b@digit-labs.org>
-; ~18% performance improvement
-
-; SHA-256 CPU SSE cruncher for Bitcoin Miner
-
-ALIGN 32
-BITS 64
-
-%ifidn __OUTPUT_FORMAT__,win64
-%define hash rcx
-%define data rdx
-%define init r8
-%define temp r9
-%else
-%define hash rdi
-%define data rsi
-%define init rdx
-%define temp rcx
-%endif
-
-; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
-%define LAB_CALC_PARA	2
-%define LAB_CALC_UNROLL	8
-
-%define LAB_LOOP_UNROLL 8
-
-extern g_4sha256_k
-
-global CalcSha256_x64_sse4
-;	CalcSha256	hash(rdi), data(rsi), init(rdx)
-;	CalcSha256	hash(rcx), data(rdx), init(r8)
-CalcSha256_x64_sse4:
-
-	push	rbx
-%ifidn __OUTPUT_FORMAT__,win64
-	sub	rsp, 16 * 6
-	movdqa	[rsp + 16*0], xmm6
-	movdqa	[rsp + 16*1], xmm7
-	movdqa	[rsp + 16*2], xmm8
-	movdqa	[rsp + 16*3], xmm9
-	movdqa	[rsp + 16*4], xmm10
-	movdqa	[rsp + 16*5], xmm11
-%endif
-
-LAB_NEXT_NONCE:
-
-	mov	temp, 64*4					; 256 - temp is # of SHA-2 rounds
-	mov	rax, 16*4					; 64 - rax is where we expand to
-
-LAB_SHA:
-	push	temp
-	lea	temp, qword [data+temp*4]			; + 1024
-	lea	r11, qword [data+rax*4]				; + 256
-
-LAB_CALC:
-%macro	lab_calc_blk 1
-
-	movntdqa	xmm0, [r11-(15-%1)*16]				; xmm0 = W[I-15]
-	movdqa	xmm2, xmm0					; xmm2 = W[I-15]	
-	movntdqa	xmm4, [r11-(15-(%1+1))*16]			; xmm4 = W[I-15+1]
-	movdqa	xmm6, xmm4					; xmm6 = W[I-15+1]	
-
-	psrld	xmm0, 3						; xmm0 = W[I-15] >> 3
-	movdqa	xmm1, xmm0					; xmm1 = W[I-15] >> 3	
-	pslld	xmm2, 14					; xmm2 = W[I-15] << 14			
-	psrld	xmm4, 3						; xmm4 = W[I-15+1] >> 3
-	movdqa	xmm5, xmm4					; xmm5 = W[I-15+1] >> 3
-	psrld	xmm5, 4						; xmm5 = W[I-15+1] >> 7	
-	pxor	xmm4, xmm5					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7)	
-	pslld	xmm6, 14					; xmm6 = W[I-15+1] << 14
-	psrld	xmm1, 4						; xmm1 = W[I-15] >> 7
-	pxor	xmm0, xmm1					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7)
-	pxor	xmm0, xmm2					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14)
-	psrld	xmm1, 11					; xmm1 = W[I-15] >> 18
-	psrld	xmm5, 11					; xmm5 = W[I-15+1] >> 18
-	pxor	xmm4, xmm6					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14)
-	pxor	xmm4, xmm5					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18)	
-	pslld	xmm2, 11					; xmm2 = W[I-15] << 25
-	pslld	xmm6, 11					; xmm6 = W[I-15+1] << 25
-	pxor	xmm4, xmm6					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) ^ (W[I-15+1] << 25)
-	pxor	xmm0, xmm1					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18)
-	pxor	xmm0, xmm2					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) ^ (W[I-15] << 25)
-	paddd	xmm0, [r11-(16-%1)*16]				; xmm0 = s0(W[I-15]) + W[I-16]
-	paddd	xmm4, [r11-(16-(%1+1))*16]			; xmm4 = s0(W[I-15+1]) + W[I-16+1]
-	movntdqa	xmm3, [r11-(2-%1)*16]				; xmm3 = W[I-2]
-	movntdqa	xmm7, [r11-(2-(%1+1))*16]			; xmm7 = W[I-2+1]
-
-;;;;;;;;;;;;;;;;;;
-
-	movdqa	xmm2, xmm3					; xmm2 = W[I-2]
-	psrld	xmm3, 10					; xmm3 = W[I-2] >> 10
-	movdqa	xmm1, xmm3					; xmm1 = W[I-2] >> 10
-	movdqa	xmm6, xmm7					; xmm6 = W[I-2+1]
-	psrld	xmm7, 10					; xmm7 = W[I-2+1] >> 10
-	movdqa	xmm5, xmm7					; xmm5 = W[I-2+1] >> 10
-
-	paddd	xmm0, [r11-(7-%1)*16]				; xmm0 = s0(W[I-15]) + W[I-16] + W[I-7]
-	paddd	xmm4, [r11-(7-(%1+1))*16]			; xmm4 = s0(W[I-15+1]) + W[I-16+1] + W[I-7+1]
-	
-	pslld	xmm2, 13					; xmm2 = W[I-2] << 13
-	pslld	xmm6, 13					; xmm6 = W[I-2+1] << 13
-	psrld	xmm1, 7						; xmm1 = W[I-2] >> 17
-	psrld	xmm5, 7						; xmm5 = W[I-2+1] >> 17
-
-
-
-	pxor	xmm3, xmm1					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17)
-	psrld	xmm1, 2						; xmm1 = W[I-2] >> 19
-	pxor	xmm3, xmm2					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13)
-	pslld	xmm2, 2						; xmm2 = W[I-2] << 15
-	pxor	xmm7, xmm5					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17)
-	psrld	xmm5, 2						; xmm5 = W[I-2+1] >> 19	
-	pxor	xmm7, xmm6					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13)
-	pslld	xmm6, 2						; xmm6 = W[I-2+1] << 15
-
-
-
-	pxor	xmm3, xmm1					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19)
-	pxor	xmm3, xmm2					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) ^ (W[I-2] << 15)
-	paddd	xmm0, xmm3					; xmm0 = s0(W[I-15]) + W[I-16] + s1(W[I-2]) + W[I-7]
-	pxor	xmm7, xmm5					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19)	
-	pxor	xmm7, xmm6					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) ^ (W[I-2+1] << 15)
-	paddd	xmm4, xmm7					; xmm4 = s0(W[I-15+1]) + W[I-16+1] + s1(W[I-2+1]) + W[I-7+1]
-
-	movdqa	[r11+(%1*16)], xmm0
-	movdqa	[r11+((%1+1)*16)], xmm4
-%endmacro
-
-%assign i 0
-%rep    LAB_CALC_UNROLL
-        lab_calc_blk i
-%assign i i+LAB_CALC_PARA
-%endrep
-
-	add	r11, LAB_CALC_UNROLL*LAB_CALC_PARA*16
-	cmp	r11, temp
-	jb	LAB_CALC
-
-	pop	temp
-	mov	rax, 0
-
-; Load the init values of the message into the hash.
-
-	movntdqa	xmm7, [init]
-	pshufd	xmm5, xmm7, 0x55		; xmm5 == b
-	pshufd	xmm4, xmm7, 0xAA		; xmm4 == c
-	pshufd	xmm3, xmm7, 0xFF		; xmm3 == d
-	pshufd	xmm7, xmm7, 0			; xmm7 == a
-
-	movntdqa	xmm0, [init+4*4]
-	pshufd	xmm8, xmm0, 0x55		; xmm8 == f
-	pshufd	xmm9, xmm0, 0xAA		; xmm9 == g
-	pshufd	xmm10, xmm0, 0xFF		; xmm10 == h
-	pshufd	xmm0, xmm0, 0			; xmm0 == e
-
-LAB_LOOP:
-
-;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32<T>(g_sha256_k[j]) + w[j]
-
-%macro	lab_loop_blk 0
-	movntdqa	xmm6, [data+rax*4]
-	paddd	xmm6, g_4sha256_k[rax*4]
-	add	rax, 4
-
-	paddd	xmm6, xmm10	; +h
-
-	movdqa	xmm1, xmm0
-	movdqa	xmm2, xmm9
-	pandn	xmm1, xmm2	; ~e & g
-
-	movdqa	xmm10, xmm2	; h = g
-	movdqa	xmm2, xmm8	; f
-	movdqa	xmm9, xmm2	; g = f
-
-	pand	xmm2, xmm0	; e & f
-	pxor	xmm1, xmm2	; (e & f) ^ (~e & g)
-	movdqa	xmm8, xmm0	; f = e
-
-	paddd	xmm6, xmm1	; Ch + h + w[i] + k[i]
-
-	movdqa	xmm1, xmm0
-	psrld	xmm0, 6
-	movdqa	xmm2, xmm0
-	pslld	xmm1, 7
-	psrld	xmm2, 5
-	pxor	xmm0, xmm1
-	pxor	xmm0, xmm2
-	pslld	xmm1, 14
-	psrld	xmm2, 14
-	pxor	xmm0, xmm1
-	pxor	xmm0, xmm2
-	pslld	xmm1, 5
-	pxor	xmm0, xmm1	; Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)
-	paddd	xmm6, xmm0	; xmm6 = t1
-
-	movdqa	xmm0, xmm3	; d
-	paddd	xmm0, xmm6	; e = d+t1
-
-	movdqa	xmm1, xmm5	; =b
-	movdqa	xmm3, xmm4	; d = c
-	movdqa	xmm2, xmm4	; c
-	pand	xmm2, xmm5	; b & c
-	pand	xmm4, xmm7	; a & c
-	pand	xmm1, xmm7	; a & b
-	pxor	xmm1, xmm4
-	movdqa	xmm4, xmm5	; c = b
-	movdqa	xmm5, xmm7	; b = a
-	pxor	xmm1, xmm2	; (a & c) ^ (a & d) ^ (c & d)
-	paddd	xmm6, xmm1	; t1 + ((a & c) ^ (a & d) ^ (c & d))
-
-	movdqa	xmm2, xmm7
-	psrld	xmm7, 2
-	movdqa	xmm1, xmm7
-	pslld	xmm2, 10
-	psrld	xmm1, 11
-	pxor	xmm7, xmm2
-	pxor	xmm7, xmm1
-	pslld	xmm2, 9
-	psrld	xmm1, 9
-	pxor	xmm7, xmm2
-	pxor	xmm7, xmm1
-	pslld	xmm2, 11
-	pxor	xmm7, xmm2
-	paddd	xmm7, xmm6	; a = t1 + (Rotr32(a, 2) ^ Rotr32(a, 13) ^ Rotr32(a, 22)) + ((a & c) ^ (a & d) ^ (c & d));
-%endmacro
-
-%assign i 0
-%rep    LAB_LOOP_UNROLL
-        lab_loop_blk
-%assign i i+1
-%endrep
-
-	cmp	rax, temp
-	jb	LAB_LOOP
-
-; Finished the 64 rounds, calculate hash and save
-
-	movntdqa	xmm1, [init]
-	pshufd	xmm2, xmm1, 0x55
-	paddd	xmm5, xmm2
-	pshufd	xmm6, xmm1, 0xAA
-	paddd	xmm4, xmm6
-	pshufd	xmm11, xmm1, 0xFF
-	paddd	xmm3, xmm11
-	pshufd	xmm1, xmm1, 0
-	paddd	xmm7, xmm1
-
-	movntdqa	xmm1, [init+4*4]
-	pshufd	xmm2, xmm1, 0x55
-	paddd	xmm8, xmm2
-	pshufd	xmm6, xmm1, 0xAA
-	paddd	xmm9, xmm6
-	pshufd	xmm11, xmm1, 0xFF
-	paddd	xmm10, xmm11
-	pshufd	xmm1, xmm1, 0
-	paddd	xmm0, xmm1
-
-	movdqa	[hash+0*16], xmm7
-	movdqa	[hash+1*16], xmm5
-	movdqa	[hash+2*16], xmm4
-	movdqa	[hash+3*16], xmm3
-	movdqa	[hash+4*16], xmm0
-	movdqa	[hash+5*16], xmm8
-	movdqa	[hash+6*16], xmm9
-	movdqa	[hash+7*16], xmm10
-
-LAB_RET:
-%ifidn __OUTPUT_FORMAT__,win64
-	movdqa	xmm6, [rsp + 16*0]
-	movdqa	xmm7, [rsp + 16*1]
-	movdqa	xmm8, [rsp + 16*2]
-	movdqa	xmm9, [rsp + 16*3]
-	movdqa	xmm10, [rsp + 16*4]
-	movdqa	xmm11, [rsp + 16*5]
-	add	rsp, 16 * 6
-%endif
-	pop	rbx
-	ret
-
-%ifidn __OUTPUT_FORMAT__,elf
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf64
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif

+ 0 - 354
x86_64/sha256_xmm_amd64.asm

@@ -1,354 +0,0 @@
-;/*
-; * Copyright (C) 2011 - Neil Kettle <neil@digit-labs.org>
-; *
-; * This file is part of cpuminer-ng.
-; *
-; * cpuminer-ng is free software: you can redistribute it and/or modify
-; * it under the terms of the GNU General Public License as published by
-; * the Free Software Foundation, either version 3 of the License, or
-; * (at your option) any later version.
-; *
-; * cpuminer-ng is distributed in the hope that it will be useful,
-; * but WITHOUT ANY WARRANTY; without even the implied warranty of
-; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-; * GNU General Public License for more details.
-; *
-; * You should have received a copy of the GNU General Public License
-; * along with cpuminer-ng.  If not, see <http://www.gnu.org/licenses/>.
-; */
-
-; %rbp, %rbx, and %r12-%r15 - callee save
-
-ALIGN 32
-BITS 64
-
-%ifidn __OUTPUT_FORMAT__,win64
-%define hash  rcx
-%define hash1 rdx
-%define data  r8
-%define init  r9
-%else
-%define hash  rdi
-%define hash1 rsi
-%define data  rdx
-%define init  rcx
-%endif
-
-; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
-%define SHA_CALC_W_PARA         2
-%define SHA_CALC_W_UNROLL       8
-
-%define SHA_ROUND_LOOP_UNROLL   16
-
-%ifidn __YASM_OBJFMT__, macho64
-extern _sha256_consts_m128i
-extern _sha256_init
-%else
-extern sha256_consts_m128i
-extern sha256_init
-%endif
-
-%ifidn __YASM_OBJFMT__, macho64
-global _sha256_sse2_64_new
-%else
-global sha256_sse2_64_new
-%endif
-
-%define sr1   xmm6
-%define sr2   xmm1
-%define sr3   xmm2
-%define sr4   xmm13
-
-%define rA    xmm7
-%define rB    xmm5
-%define rC    xmm4
-%define rD    xmm3
-%define rE    xmm0
-%define rF    xmm8
-%define rG    xmm9
-%define rH    xmm10
-
-%macro  sha_round_blk 0
-    movdqa    sr1, [data+rax]                   ; T1  =                                             w;
-    ;movdqa    sr1, xmm11
-    movdqa    sr2, rE                           ; sr2 = rE
-
-    pandn     sr2, rG                           ; sr2 = ~rE & rG
-    movdqa    sr3, rF                           ; sr3 = rF
-
-    paddd     sr1, rH                           ; T1  = h                + sha256_consts_m128i[i] + w;
-    movdqa    rH, rG                            ; rH  = rG
-
-    pand      sr3, rE                           ; sr3 = rE & rF
-    movdqa    rG, rF                            ; rG  = rF
-
-%ifidn __YASM_OBJFMT__, macho64
-    paddd     sr1, [rcx+rax]
-%else
-    paddd     sr1, sha256_consts_m128i[rax]     ; T1  =                    sha256_consts_m128i[i] + w;
-%endif
-    pxor      sr2, sr3                          ; sr2 = (rE & rF) ^ (~rE & rG) = Ch (e, f, g)
-
-    movdqa    rF, rE                            ; rF  = rE
-    paddd     sr1, sr2                          ; T1  = h + Ch (e, f, g) + sha256_consts_m128i[i] + w;
-
-    movdqa    sr2, rE                           ; sr2 = rE
-    psrld     rE, 6                 ; e >> 6
-
-    movdqa    sr3, rE               ; e >> 6
-    pslld     sr2, 7                ; e << 7
-
-    psrld     sr3, 5                ; e >> 11
-    pxor      rE, sr2               ; e >> 6 ^ e << 7
-
-    pslld     sr2, 14               ; e << 21
-    pxor      rE, sr3               ; e >> 6 ^ e << 7 ^ e >> 11
-
-    psrld     sr3, 14               ; e >> 25
-    pxor      rE, sr2               ; e >> 6 ^ e << 7 ^ e >> 11 ^ e << 21
-
-    pslld     sr2, 5                ; e << 26
-    pxor      rE, sr3               ; e >> 6 ^ e << 7 ^ e >> 11 ^ e << 21 ^ e >> 25
-
-    pxor      rE, sr2               ; e >> 6 ^ e << 7 ^ e >> 11 ^ e << 21 ^ e >> 25 ^ e << 26
-    movdqa    sr2, rB                           ; sr2 = rB
-
-    paddd     sr1, rE                           ; sr1 = h + BIGSIGMA1_256(e) + Ch (e, f, g) + sha256_consts_m128i[i] + w;
-    movdqa    rE, rD                            ; rE  = rD
-
-    movdqa    rD, rC                            ; rD  = rC
-    paddd     rE, sr1                           ; rE  = rD + T1
-
-    movdqa    sr3, rC                           ; sr3 = rC
-    pand      rC, rA                            ; rC  = rC & rA
-
-    pand      sr3, rB                           ; sr3 = rB & rC
-    pand      sr2, rA                           ; sr2 = rB & rA
-
-    pxor      sr2, rC                           ; sr2 = (rB & rA) ^ (rC & rA)
-    movdqa    rC, rB                            ; rC  = rB
-
-    pxor      sr2, sr3                          ; sr2 = (rB & rA) ^ (rC & rA) ^ (rB & rC)
-    movdqa    rB, rA                            ; rB  = rA
-
-    paddd     sr1, sr2                          ; sr1 = T1 + (rB & rA) ^ (rC & rA) ^ (rB & rC)
-    lea       rax, [rax+16]
-
-    movdqa    sr3, rA                           ; sr3 = rA
-    psrld     rA, 2                 ; a >> 2
-
-    pslld     sr3, 10               ; a << 10
-    movdqa    sr2, rA               ; a >> 2
-
-    pxor      rA, sr3               ; a >> 2 ^ a << 10
-    psrld     sr2, 11               ; a >> 13
-
-    pxor      rA, sr2               ; a >> 2 ^ a << 10 ^ a >> 13
-    pslld     sr3, 9                ; a << 19
-
-    pxor      rA, sr3               ; a >> 2 ^ a << 10 ^ a >> 13 ^ a << 19
-    psrld     sr2, 9                ; a >> 21
-
-    pxor      rA, sr2               ; a >> 2 ^ a << 10 ^ a >> 13 ^ a << 19 ^ a >> 21
-    pslld     sr3, 11               ; a << 30
-
-    pxor      rA, sr3               ; a >> 2 ^ a << 10 ^ a >> 13 ^ a << 19 ^ a >> 21 ^ a << 30
-    paddd     rA, sr1                           ; T1 + BIGSIGMA0_256(a) + Maj(a, b, c);
-%endmacro
-
-%macro  sha_calc_w_blk 1
-    movdqa	xmm0, [r11-(15-%1)*16]				; xmm0 = W[I-15]
-    movdqa	xmm4, [r11-(15-(%1+1))*16]			; xmm4 = W[I-15+1]
-    movdqa	xmm2, xmm0					; xmm2 = W[I-15]
-    movdqa	xmm6, xmm4					; xmm6 = W[I-15+1]
-    psrld	xmm0, 3						; xmm0 = W[I-15] >> 3
-    psrld	xmm4, 3						; xmm4 = W[I-15+1] >> 3
-    movdqa	xmm1, xmm0					; xmm1 = W[I-15] >> 3
-    movdqa	xmm5, xmm4					; xmm5 = W[I-15+1] >> 3
-    pslld	xmm2, 14					; xmm2 = W[I-15] << 14
-    pslld	xmm6, 14					; xmm6 = W[I-15+1] << 14
-    psrld	xmm1, 4						; xmm1 = W[I-15] >> 7
-    psrld	xmm5, 4						; xmm5 = W[I-15+1] >> 7
-    pxor	xmm0, xmm1					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7)
-    pxor	xmm4, xmm5					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7)
-    psrld	xmm1, 11					; xmm1 = W[I-15] >> 18
-    psrld	xmm5, 11					; xmm5 = W[I-15+1] >> 18
-    pxor	xmm0, xmm2					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14)
-    pxor	xmm4, xmm6					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14)
-    pslld	xmm2, 11					; xmm2 = W[I-15] << 25
-    pslld	xmm6, 11					; xmm6 = W[I-15+1] << 25
-    pxor	xmm0, xmm1					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18)
-    pxor	xmm4, xmm5					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18)
-    pxor	xmm0, xmm2					; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) ^ (W[I-15] << 25)
-    pxor	xmm4, xmm6					; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) ^ (W[I-15+1] << 25)
-
-    movdqa	xmm3, [r11-(2-%1)*16]				; xmm3 = W[I-2]
-    movdqa	xmm7, [r11-(2-(%1+1))*16]			; xmm7 = W[I-2+1]
-
-    paddd	xmm0, [r11-(16-%1)*16]				; xmm0 = s0(W[I-15]) + W[I-16]
-    paddd	xmm4, [r11-(16-(%1+1))*16]			; xmm4 = s0(W[I-15+1]) + W[I-16+1]
-
-;;;;;;;;;;;;;;;;;;
-
-    movdqa	xmm2, xmm3					; xmm2 = W[I-2]
-    movdqa	xmm6, xmm7					; xmm6 = W[I-2+1]
-    psrld	xmm3, 10					; xmm3 = W[I-2] >> 10
-    psrld	xmm7, 10					; xmm7 = W[I-2+1] >> 10
-    movdqa	xmm1, xmm3					; xmm1 = W[I-2] >> 10
-    movdqa	xmm5, xmm7					; xmm5 = W[I-2+1] >> 10
-
-    paddd	xmm0, [r11-(7-%1)*16]				; xmm0 = s0(W[I-15]) + W[I-16] + W[I-7]
-
-    pslld	xmm2, 13					; xmm2 = W[I-2] << 13
-    pslld	xmm6, 13					; xmm6 = W[I-2+1] << 13
-    psrld	xmm1, 7						; xmm1 = W[I-2] >> 17
-    psrld	xmm5, 7						; xmm5 = W[I-2+1] >> 17
-
-    paddd	xmm4, [r11-(7-(%1+1))*16]			; xmm4 = s0(W[I-15+1]) + W[I-16+1] + W[I-7+1]
-
-    pxor	xmm3, xmm1					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17)
-    pxor	xmm7, xmm5					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17)
-    psrld	xmm1, 2						; xmm1 = W[I-2] >> 19
-    psrld	xmm5, 2						; xmm5 = W[I-2+1] >> 19
-    pxor	xmm3, xmm2					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13)
-    pxor	xmm7, xmm6					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13)
-    pslld	xmm2, 2						; xmm2 = W[I-2] << 15
-    pslld	xmm6, 2						; xmm6 = W[I-2+1] << 15
-    pxor	xmm3, xmm1					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19)
-    pxor	xmm7, xmm5					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19)
-    pxor	xmm3, xmm2					; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) ^ (W[I-2] << 15)
-    pxor	xmm7, xmm6					; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) ^ (W[I-2+1] << 15)
-
-    paddd	xmm0, xmm3					; xmm0 = s0(W[I-15]) + W[I-16] + s1(W[I-2]) + W[I-7]
-    paddd	xmm4, xmm7					; xmm4 = s0(W[I-15+1]) + W[I-16+1] + s1(W[I-2+1]) + W[I-7+1]
-    movdqa	[r11+(%1*16)], xmm0
-    movdqa	[r11+((%1+1)*16)], xmm4
-%endmacro
-
-; _sha256_sse2_64_new hash(rdi), hash1(rsi), data(rdx), init(rcx),
-
-%ifidn __YASM_OBJFMT__, macho64
-_sha256_sse2_64_new:
-%else
-sha256_sse2_64_new:
-%endif
-
-    push        rbx
-%ifidn __OUTPUT_FORMAT__,win64
-    sub         rsp, 16 * 6
-    movdqa      [rsp + 16*0], xmm6
-    movdqa      [rsp + 16*1], xmm7
-    movdqa      [rsp + 16*2], xmm8
-    movdqa      [rsp + 16*3], xmm9
-    movdqa      [rsp + 16*4], xmm10
-    movdqa      [rsp + 16*5], xmm13
-%endif
-
-%macro  SHA_256  0
-    mov         rbx, 64*4   ; rbx is # of SHA-2 rounds
-    mov         rax, 16*4   ; rax is where we expand to
-
-    push        rbx
-    lea         rbx, qword [data+rbx*4]
-    lea         r11, qword [data+rax*4]
-
-%%SHA_CALC_W:
-%assign i 0
-%rep    SHA_CALC_W_UNROLL
-        sha_calc_w_blk i
-%assign i i+SHA_CALC_W_PARA
-%endrep
-    add       r11, SHA_CALC_W_UNROLL*SHA_CALC_W_PARA*16
-    cmp       r11, rbx
-    jb        %%SHA_CALC_W
-
-    pop       rbx
-    mov       rax, 0
-    lea       rbx, [rbx*4]
-
-    movdqa    rA, [init]
-    pshufd    rB, rA, 0x55          ; rB == B
-    pshufd    rC, rA, 0xAA          ; rC == C
-    pshufd    rD, rA, 0xFF          ; rD == D
-    pshufd    rA, rA, 0             ; rA == A
-
-    movdqa    rE, [init+4*4]
-    pshufd    rF, rE, 0x55          ; rF == F
-    pshufd    rG, rE, 0xAA          ; rG == G
-    pshufd    rH, rE, 0xFF          ; rH == H
-    pshufd    rE, rE, 0             ; rE == E
-
-%ifidn __YASM_OBJFMT__, macho64
-    lea       rcx, [_sha256_consts_m128i wrt rip]
-%endif
-
-%%SHAROUND_LOOP:
-%assign i 0
-%rep    SHA_ROUND_LOOP_UNROLL
-        sha_round_blk
-%assign i i+1
-%endrep
-    cmp   rax, rbx
-    jb    %%SHAROUND_LOOP
-
-; Finished the 64 rounds, calculate hash and save
-
-    movdqa    sr1, [init]
-    pshufd    sr2, sr1, 0x55
-    pshufd    sr3, sr1, 0xAA
-    pshufd    sr4, sr1, 0xFF
-    pshufd    sr1, sr1, 0
-
-    paddd     rB, sr2
-    paddd     rC, sr3
-    paddd     rD, sr4
-    paddd     rA, sr1
-
-    movdqa    sr1, [init+4*4]
-    pshufd    sr2, sr1, 0x55
-    pshufd    sr3, sr1, 0xAA
-    pshufd    sr4, sr1, 0xFF
-    pshufd    sr1, sr1, 0
-
-    paddd     rF, sr2
-    paddd     rG, sr3
-    paddd     rH, sr4
-    paddd     rE, sr1
-%endmacro
-
-    SHA_256
-    movdqa    [hash1+0*16], rA
-    movdqa    [hash1+1*16], rB
-    movdqa    [hash1+2*16], rC
-    movdqa    [hash1+3*16], rD
-    movdqa    [hash1+4*16], rE
-    movdqa    [hash1+5*16], rF
-    movdqa    [hash1+6*16], rG
-    movdqa    [hash1+7*16], rH
-
-    mov       data, hash1
-    mov       init, sha256_init
-
-    SHA_256
-
-    movdqa    [hash+7*16], rH
-
-LAB_RET:
-%ifidn __OUTPUT_FORMAT__,win64
-    movdqa    xmm6, [rsp + 16*0]
-    movdqa    xmm7, [rsp + 16*1]
-    movdqa    xmm8, [rsp + 16*2]
-    movdqa    xmm9, [rsp + 16*3]
-    movdqa    xmm10, [rsp + 16*4]
-    movdqa    xmm13, [rsp + 16*5]
-    add       rsp, 16 * 6
-%endif
-    pop       rbx
-    ret
-
-%ifidn __OUTPUT_FORMAT__,elf
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-%ifidn __OUTPUT_FORMAT__,elf64
-section .note.GNU-stack noalloc noexec nowrite progbits
-%endif