Browse Source

Merge gpumining from oclmine. Unstable.

Con Kolivas 14 years ago
parent
commit
dde7039726
9 changed files with 1032 additions and 20 deletions
  1. 2 1
      Makefile.am
  2. 2 0
      configure.ac
  3. 229 19
      cpu-miner.c
  4. 197 0
      findnonce.c
  5. 23 0
      findnonce.h
  6. 2 0
      miner.h
  7. 271 0
      ocl.c
  8. 22 0
      ocl.h
  9. 284 0
      oclminer.cl

+ 2 - 1
Makefile.am

@@ -15,10 +15,11 @@ bin_PROGRAMS	= minerd
 
 
 minerd_SOURCES	= elist.h miner.h compat.h			\
 minerd_SOURCES	= elist.h miner.h compat.h			\
 		  cpu-miner.c util.c				\
 		  cpu-miner.c util.c				\
+		  ocl.c findnonce.c				\
 		  sha256_generic.c sha256_4way.c sha256_via.c	\
 		  sha256_generic.c sha256_4way.c sha256_via.c	\
 		  sha256_cryptopp.c sha256_sse2_amd64.c
 		  sha256_cryptopp.c sha256_sse2_amd64.c
 minerd_LDFLAGS	= $(PTHREAD_FLAGS)
 minerd_LDFLAGS	= $(PTHREAD_FLAGS)
-minerd_LDADD	= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@
+minerd_LDADD	= @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @OPENCL_LIBS@
 minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@
 minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@
 
 
 if HAVE_x86_64
 if HAVE_x86_64

+ 2 - 0
configure.ac

@@ -40,6 +40,7 @@ case $target in
 esac
 esac
 
 
 
 
+AC_CHECK_LIB(OpenCL, clSetKernelArg, OPENCL_LIBS=-lOpenCL)
 AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
 AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
 AC_CHECK_LIB(pthread, pthread_create, PTHREAD_LIBS=-lpthread)
 AC_CHECK_LIB(pthread, pthread_create, PTHREAD_LIBS=-lpthread)
 
 
@@ -95,6 +96,7 @@ PKG_PROG_PKG_CONFIG()
 LIBCURL_CHECK_CONFIG(, 7.10.1, ,
 LIBCURL_CHECK_CONFIG(, 7.10.1, ,
   [AC_MSG_ERROR([Missing required libcurl >= 7.10.1])])
   [AC_MSG_ERROR([Missing required libcurl >= 7.10.1])])
 
 
+AC_SUBST(OPENCL_LIBS)
 AC_SUBST(JANSSON_LIBS)
 AC_SUBST(JANSSON_LIBS)
 AC_SUBST(PTHREAD_FLAGS)
 AC_SUBST(PTHREAD_FLAGS)
 AC_SUBST(PTHREAD_LIBS)
 AC_SUBST(PTHREAD_LIBS)

+ 229 - 19
cpu-miner.c

@@ -19,6 +19,7 @@
 #include <unistd.h>
 #include <unistd.h>
 #include <sys/time.h>
 #include <sys/time.h>
 #include <time.h>
 #include <time.h>
+#include <math.h>
 #ifndef WIN32
 #ifndef WIN32
 #include <sys/resource.h>
 #include <sys/resource.h>
 #endif
 #endif
@@ -27,6 +28,8 @@
 #include <curl/curl.h>
 #include <curl/curl.h>
 #include "compat.h"
 #include "compat.h"
 #include "miner.h"
 #include "miner.h"
+#include "findnonce.h"
+#include "ocl.h"
 
 
 #define PROGRAM_NAME		"minerd"
 #define PROGRAM_NAME		"minerd"
 #define DEF_RPC_URL		"http://127.0.0.1:8332/"
 #define DEF_RPC_URL		"http://127.0.0.1:8332/"
@@ -108,6 +111,7 @@ static const char *algo_names[] = {
 
 
 bool opt_debug = false;
 bool opt_debug = false;
 bool opt_protocol = false;
 bool opt_protocol = false;
+bool opt_ndevs = false;
 bool want_longpoll = true;
 bool want_longpoll = true;
 bool have_longpoll = false;
 bool have_longpoll = false;
 bool use_syslog = false;
 bool use_syslog = false;
@@ -122,6 +126,7 @@ static enum sha256_algos opt_algo = ALGO_SSE2_64;
 #else
 #else
 static enum sha256_algos opt_algo = ALGO_C;
 static enum sha256_algos opt_algo = ALGO_C;
 #endif
 #endif
+static int nDevs;
 static int opt_n_threads;
 static int opt_n_threads;
 static int num_processors;
 static int num_processors;
 static char *rpc_url;
 static char *rpc_url;
@@ -135,7 +140,7 @@ pthread_mutex_t time_lock;
 static pthread_mutex_t hash_lock;
 static pthread_mutex_t hash_lock;
 static unsigned long total_hashes_done;
 static unsigned long total_hashes_done;
 static struct timeval total_tv_start;
 static struct timeval total_tv_start;
-static int solutions;
+static int accepted, rejected;
 
 
 
 
 struct option_help {
 struct option_help {
@@ -175,6 +180,9 @@ static struct option_help options_help[] = {
 	{ "debug",
 	{ "debug",
 	  "(-D) Enable debug output (default: off)" },
 	  "(-D) Enable debug output (default: off)" },
 
 
+	{ "ndevs",
+	  "(-n) Display number of detected GPUs" },
+
 	{ "no-longpoll",
 	{ "no-longpoll",
 	  "Disable X-Long-Polling support (default: enabled)" },
 	  "Disable X-Long-Polling support (default: enabled)" },
 
 
@@ -223,6 +231,7 @@ static struct option options[] = {
 	{ "config", 1, NULL, 'c' },
 	{ "config", 1, NULL, 'c' },
 	{ "debug", 0, NULL, 'D' },
 	{ "debug", 0, NULL, 'D' },
 	{ "help", 0, NULL, 'h' },
 	{ "help", 0, NULL, 'h' },
+	{ "ndevs", 0, NULL, 'n' },
 	{ "no-longpoll", 0, NULL, 1003 },
 	{ "no-longpoll", 0, NULL, 1003 },
 	{ "pass", 1, NULL, 'p' },
 	{ "pass", 1, NULL, 'p' },
 	{ "protocol-dump", 0, NULL, 'P' },
 	{ "protocol-dump", 0, NULL, 'P' },
@@ -237,8 +246,6 @@ static struct option options[] = {
 	{ "url", 1, NULL, 1001 },
 	{ "url", 1, NULL, 1001 },
 	{ "user", 1, NULL, 'u' },
 	{ "user", 1, NULL, 'u' },
 	{ "userpass", 1, NULL, 1002 },
 	{ "userpass", 1, NULL, 1002 },
-
-	{ }
 };
 };
 
 
 struct work {
 struct work {
@@ -248,6 +255,12 @@ struct work {
 	unsigned char	target[32];
 	unsigned char	target[32];
 
 
 	unsigned char	hash[32];
 	unsigned char	hash[32];
+
+	uint32_t		output[MAXTHREADS];
+	uint32_t		res_nonce;
+	uint32_t		valid;
+	uint32_t		ready;
+	dev_blk_ctx		blk;
 };
 };
 
 
 static bool jobj_binary(const json_t *obj, const char *key,
 static bool jobj_binary(const json_t *obj, const char *key,
@@ -335,10 +348,12 @@ static bool submit_upstream_work(CURL *curl, const struct work *work)
 	res = json_object_get(val, "result");
 	res = json_object_get(val, "result");
 
 
 	if (json_is_true(res)) {
 	if (json_is_true(res)) {
-		solutions++;
+		accepted++;
 		applog(LOG_INFO, "PROOF OF WORK RESULT: true (yay!!!)");
 		applog(LOG_INFO, "PROOF OF WORK RESULT: true (yay!!!)");
-	} else
+	} else {
+		rejected++;
 		applog(LOG_INFO, "PROOF OF WORK RESULT: false (booooo)");
 		applog(LOG_INFO, "PROOF OF WORK RESULT: false (booooo)");
+	}
 
 
 	json_decref(val);
 	json_decref(val);
 
 
@@ -493,7 +508,7 @@ static void hashmeter(int thr_id, struct timeval *diff,
 	khashes = hashes_done / 1000.0;
 	khashes = hashes_done / 1000.0;
 	secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0);
 	secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0);
 
 
-	if (opt_n_threads > 1) {
+	if (opt_n_threads + nDevs > 1) {
 		double total_mhashes, total_secs;
 		double total_mhashes, total_secs;
 
 
 		/* Totals are updated by all threads so can race without locking */
 		/* Totals are updated by all threads so can race without locking */
@@ -505,13 +520,17 @@ static void hashmeter(int thr_id, struct timeval *diff,
 		pthread_mutex_unlock(&hash_lock);
 		pthread_mutex_unlock(&hash_lock);
 		total_secs = (double)total_diff.tv_sec +
 		total_secs = (double)total_diff.tv_sec +
 			((double)total_diff.tv_usec / 1000000.0);
 			((double)total_diff.tv_usec / 1000000.0);
-		applog(LOG_INFO, "[Total: %.2f Mhash/sec] "
-		       "[thread %d: %lu hashes, %.0f khash/sec] [Solved: %d]",
-		       total_mhashes / total_secs, thr_id, hashes_done,
-		       khashes / secs, solutions);
+		if (opt_debug)
+			applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]",
+			       thr_id, hashes_done);
+		if (!thr_id)
+			applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
+			       total_mhashes / total_secs, accepted, rejected);
 	} else {
 	} else {
-		applog(LOG_INFO, "[%lu hashes, %.0f khash/sec] [Solved: %d]",
-		       hashes_done, khashes / secs, solutions);
+		if (opt_debug)
+			applog(LOG_DEBUG, "[%lu hashes]", hashes_done);
+		applog(LOG_INFO, "%.0f khash/sec] [%d Accepted] [%d Rejected]",
+				khashes / secs, accepted, rejected);
 	}
 	}
 }
 }
 
 
@@ -574,6 +593,15 @@ err_out:
 	return false;
 	return false;
 }
 }
 
 
+bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce)
+{
+	work->data[64+12+0] = (nonce>>0) & 0xff;
+	work->data[64+12+1] = (nonce>>8) & 0xff;
+	work->data[64+12+2] = (nonce>>16) & 0xff;
+	work->data[64+12+3] = (nonce>>24) & 0xff;
+	return submit_work(thr, work);
+}
+
 static void *miner_thread(void *userdata)
 static void *miner_thread(void *userdata)
 {
 {
 	struct thr_info *mythr = userdata;
 	struct thr_info *mythr = userdata;
@@ -693,11 +721,162 @@ out:
 	return NULL;
 	return NULL;
 }
 }
 
 
+enum {
+	STAT_SLEEP_INTERVAL		= 1,
+	STAT_CTR_INTERVAL		= 10000000,
+	FAILURE_INTERVAL		= 30,
+};
+
+static int block = 0;
+static _clState *clStates[16];
+
+static void *gpuminer_thread(void *userdata)
+{
+	struct thr_info *mythr = userdata;
+	int thr_id = mythr->id;
+	int failures = 0;
+
+	uint32_t res[MAXTHREADS];
+
+	setpriority(PRIO_PROCESS, 0, 19);
+	drop_policy();
+
+	size_t globalThreads[1];
+	size_t localThreads[1];
+
+	cl_int status;
+
+	_clState *clState = clStates[thr_id];
+
+	status = clSetKernelArg(clState->kernel, 0,  sizeof(cl_mem), (void *)&clState->inputBuffer);
+	if(status != CL_SUCCESS) { printf("Error: Setting kernel argument 1.\n"); return false; }
+
+	status = clSetKernelArg(clState->kernel, 1,  sizeof(cl_mem), (void *)&clState->outputBuffer);
+	if(status != CL_SUCCESS) { printf("Error: Setting kernel argument 2.\n"); return false; }
+
+	struct work *work;
+	work = malloc(sizeof(struct work)*2);
+
+	work[0].ready = 0;
+	work[1].ready = 0;
+
+	int frame = 0;
+	int res_frame = 0;
+	int my_block = block;
+	bool need_work = true;
+	unsigned long hashes_done;
+	hashes_done = 0;
+
+	unsigned int h0count = 0;
+
+	while (1) {
+		struct timeval tv_start, tv_end, diff;
+		int threads;
+		bool rc;
+
+		gettimeofday(&tv_start, NULL);
+
+		if (need_work || my_block != block) {
+			frame++;
+			frame %= 2;
+
+			if (opt_debug)
+				fprintf(stderr, "getwork\n");
+
+			/* obtain new work from internal workio thread */
+			if (unlikely(!get_work(mythr, work + frame))) {
+				applog(LOG_ERR, "work retrieval failed, exiting "
+					"gpu mining thread %d", mythr->id);
+				goto out;
+			}
+
+			precalc_hash(&work[frame].blk, (uint32_t *)(work[frame].midstate), (uint32_t *)(work[frame].data + 64));
+
+			work[frame].blk.nonce = 0;
+			work[frame].valid = true;
+			work[frame].ready = 0;
+			
+			my_block = block;
+			need_work = false;
+		}
+	
+		threads = 102400 * 4;
+		globalThreads[0] = threads;
+		localThreads[0] = 128;
+
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0,
+				sizeof(dev_blk_ctx), (void *)&work[frame].blk, 0, NULL, NULL);
+		if(status != CL_SUCCESS) { printf("Error: clEnqueueWriteBuffer failed.\n"); goto out; }
+
+		clFinish(clState->commandQueue);
+
+		status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL, 
+				globalThreads, localThreads, 0,  NULL, NULL);
+		if (status != CL_SUCCESS) { printf("Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)\n"); goto out; }
+
+		clFlush(clState->commandQueue);
+
+		hashes_done = 1024 * threads;
+
+		if (work[res_frame].ready) {
+			rc = false;
+
+			uint32_t bestG = ~0;
+			uint32_t nonce;
+			int j;
+			for(j = 0; j < work[res_frame].ready; j++) {
+				if(res[j]) { 
+					uint32_t start = (work[res_frame].res_nonce + j)<<10;
+					uint32_t my_g, my_nonce;
+					my_g = postcalc_hash(mythr, &work[res_frame].blk, &work[res_frame], start, start + 1026, &my_nonce, &h0count);
+
+					rc = true;
+				}       
+			}       
+			
+			work[res_frame].ready = false;
+
+			uint32_t *target = (uint32_t *)(work[res_frame].target + 24);
+		}
+
+		gettimeofday(&tv_end, NULL);
+		timeval_subtract(&diff, &tv_end, &tv_start);
+
+		hashmeter(thr_id, &diff, hashes_done);
+
+		/* adjust max_nonce to meet target scan time */
+		if (diff.tv_usec > 500000)
+			diff.tv_sec++;
+		if (diff.tv_sec > 0)
+			applog(LOG_INFO, "Not reaching opt_scantime by %d", diff.tv_sec);
+
+		status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, 
+				sizeof(uint32_t) * threads, res, 0, NULL, NULL);   
+		if (status != CL_SUCCESS) { printf("Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)\n"); goto out;}
+
+		res_frame = frame;
+		work[res_frame].ready = threads;
+		work[res_frame].res_nonce = work[res_frame].blk.nonce;
+
+		work[frame].blk.nonce += threads;
+
+		if (unlikely(work[frame].blk.nonce > 4000000 - threads))
+			need_work = true;
+
+		failures = 0;
+	}
+
+out:
+	tq_freeze(mythr->q);
+
+	return NULL;
+}
+
 static void restart_threads(void)
 static void restart_threads(void)
 {
 {
 	int i;
 	int i;
 
 
-	for (i = 0; i < opt_n_threads; i++)
+	for (i = 0; i < opt_n_threads + nDevs; i++)
 		work_restart[i].restart = 1;
 		work_restart[i].restart = 1;
 }
 }
 
 
@@ -948,6 +1127,13 @@ int main (int argc, char *argv[])
 {
 {
 	struct thr_info *thr;
 	struct thr_info *thr;
 	int i;
 	int i;
+	char name[32];
+
+	nDevs = clDevicesNum();
+	if (opt_ndevs) {
+		printf("%i\n", nDevs);
+		return nDevs;
+	}
 
 
 	rpc_url = strdup(DEF_RPC_URL);
 	rpc_url = strdup(DEF_RPC_URL);
 
 
@@ -975,16 +1161,16 @@ int main (int argc, char *argv[])
 		openlog("cpuminer", LOG_PID, LOG_USER);
 		openlog("cpuminer", LOG_PID, LOG_USER);
 #endif
 #endif
 
 
-	work_restart = calloc(opt_n_threads, sizeof(*work_restart));
+	work_restart = calloc(opt_n_threads + nDevs, sizeof(*work_restart));
 	if (!work_restart)
 	if (!work_restart)
 		return 1;
 		return 1;
 
 
-	thr_info = calloc(opt_n_threads + 2, sizeof(*thr));
+	thr_info = calloc(opt_n_threads + 2 + nDevs, sizeof(*thr));
 	if (!thr_info)
 	if (!thr_info)
 		return 1;
 		return 1;
 
 
 	/* init workio thread info */
 	/* init workio thread info */
-	work_thr_id = opt_n_threads;
+	work_thr_id = opt_n_threads + nDevs;
 	thr = &thr_info[work_thr_id];
 	thr = &thr_info[work_thr_id];
 	thr->id = work_thr_id;
 	thr->id = work_thr_id;
 	thr->q = tq_new();
 	thr->q = tq_new();
@@ -999,7 +1185,7 @@ int main (int argc, char *argv[])
 
 
 	/* init longpoll thread info */
 	/* init longpoll thread info */
 	if (want_longpoll) {
 	if (want_longpoll) {
-		longpoll_thr_id = opt_n_threads + 1;
+		longpoll_thr_id = opt_n_threads + nDevs + 1;
 		thr = &thr_info[longpoll_thr_id];
 		thr = &thr_info[longpoll_thr_id];
 		thr->id = longpoll_thr_id;
 		thr->id = longpoll_thr_id;
 		thr->q = tq_new();
 		thr->q = tq_new();
@@ -1015,8 +1201,32 @@ int main (int argc, char *argv[])
 		longpoll_thr_id = -1;
 		longpoll_thr_id = -1;
 
 
 	gettimeofday(&total_tv_start, NULL);
 	gettimeofday(&total_tv_start, NULL);
+
+	/* start gpu mining threads */
+	for (i = 0; i < nDevs; i++) {
+		thr = &thr_info[i];
+
+		thr->id = i;
+		thr->q = tq_new();
+		if (!thr->q)
+			return 1;
+
+		printf("Init GPU %i\n", i);
+		clStates[i] = initCl(i, name, sizeof(name));
+		printf("initCl() finished. Found %s\n", name);
+
+		if (unlikely(pthread_create(&thr->pth, NULL, gpuminer_thread, thr))) {
+			applog(LOG_ERR, "thread %d create failed", i);
+			return 1;
+		}
+
+		sleep(1);	/* don't pound RPC server all at once */
+	}
+
+	fprintf(stderr, "%d gpu miner threads started\n", i);
+
 	/* start mining threads */
 	/* start mining threads */
-	for (i = 0; i < opt_n_threads; i++) {
+	for (i = nDevs; i < nDevs + opt_n_threads; i++) {
 		thr = &thr_info[i];
 		thr = &thr_info[i];
 
 
 		thr->id = i;
 		thr->id = i;
@@ -1032,7 +1242,7 @@ int main (int argc, char *argv[])
 		sleep(1);	/* don't pound RPC server all at once */
 		sleep(1);	/* don't pound RPC server all at once */
 	}
 	}
 
 
-	applog(LOG_INFO, "%d miner threads started, "
+	applog(LOG_INFO, "%d cpu miner threads started, "
 		"using SHA256 '%s' algorithm.",
 		"using SHA256 '%s' algorithm.",
 		opt_n_threads,
 		opt_n_threads,
 		algo_names[opt_algo]);
 		algo_names[opt_algo]);

+ 197 - 0
findnonce.c

@@ -0,0 +1,197 @@
+/*
+ * Copyright 2011 Nils Schneider
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) 
+ * any later version.  See COPYING for more details.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "ocl.h"
+#include "findnonce.h"
+
+const uint32_t SHA256_K[64] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+inline uint32_t ByteReverse(uint32_t value)
+{
+	__asm__ ("bswap %0" : "=r" (value) : "0" (value));
+	return value;
+}
+
+#define rotate(x,y) ((x<<y) | (x>>(sizeof(x)*8-y)))
+#define rotr(x,y) ((x>>y) | (x<<(sizeof(x)*8-y)))
+
+#define R(a, b, c, d, e, f, g, h, w, k) \
+	h = h + (rotate(e, 26) ^ rotate(e, 21) ^ rotate(e, 7)) + (g ^ (e & (f ^ g))) + k + w; \
+	d = d + h; \
+	h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
+
+void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
+	cl_uint A, B, C, D, E, F, G, H;
+
+	A = state[0];
+	B = state[1];
+	C = state[2];
+	D = state[3];
+	E = state[4];
+	F = state[5];
+	G = state[6];
+	H = state[7];
+
+	R(A, B, C, D, E, F, G, H, data[0], SHA256_K[0]); 
+	R(H, A, B, C, D, E, F, G, data[1], SHA256_K[1]);
+	R(G, H, A, B, C, D, E, F, data[2], SHA256_K[2]);
+
+	blk->cty_a = A;
+	blk->cty_b = B;
+	blk->cty_c = C;
+	blk->cty_d = D;
+	blk->cty_e = E;
+	blk->cty_f = F;
+	blk->cty_g = G;
+	blk->cty_h = H;
+
+	blk->ctx_a = state[0];
+	blk->ctx_b = state[1];
+	blk->ctx_c = state[2];
+	blk->ctx_d = state[3];
+	blk->ctx_e = state[4];
+	blk->ctx_f = state[5];
+	blk->ctx_g = state[6];
+	blk->ctx_h = state[7];
+
+	blk->merkle = data[0];
+	blk->ntime = data[1];
+	blk->nbits = data[2];
+
+	blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
+	blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
+	blk->fW2 = data[2] + (rotr(blk->fW0, 17) ^ rotr(blk->fW0, 19) ^ (blk->fW0 >> 10));
+	blk->fW3 = 0x11002000 + (rotr(blk->fW1, 17) ^ rotr(blk->fW1, 19) ^ (blk->fW1 >> 10));
+	blk->fW15 = 0x00000280 + (rotr(blk->fW0, 7) ^ rotr(blk->fW0, 18) ^ (blk->fW0 >> 3));
+	blk->fW01r = blk->fW0 + (rotr(blk->fW1, 7) ^ rotr(blk->fW1, 18) ^ (blk->fW1 >> 3));
+
+	blk->fcty_e = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
+	blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+}
+
+#define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))
+
+#define IR(u) \
+  R(A, B, C, D, E, F, G, H, W[u+0], SHA256_K[u+0]); \
+  R(H, A, B, C, D, E, F, G, W[u+1], SHA256_K[u+1]); \
+  R(G, H, A, B, C, D, E, F, W[u+2], SHA256_K[u+2]); \
+  R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \
+  R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \
+  R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \
+  R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \
+  R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7])
+#define FR(u) \
+  R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \
+  R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \
+  R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \
+  R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \
+  R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
+  R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5]); \
+  R(C, D, E, F, G, H, A, B, P(u+6), SHA256_K[u+6]); \
+  R(B, C, D, E, F, G, H, A, P(u+7), SHA256_K[u+7])
+
+#define PIR(u) \
+  R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \
+  R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \
+  R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \
+  R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \
+  R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7])
+
+#define PFR(u) \
+  R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \
+  R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \
+  R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \
+  R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \
+  R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
+  R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])
+
+uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
+		       struct work *work, uint32_t start, uint32_t end, 
+		       uint32_t *best_nonce, unsigned int *h0count)
+{
+	cl_uint A, B, C, D, E, F, G, H;
+	cl_uint W[16];
+	cl_uint nonce;
+	cl_uint best_g = ~0;
+
+	work_restart[thr->id].restart = 0;
+	for (nonce = start; nonce != end; nonce+=1) {
+		A = blk->cty_a; B = blk->cty_b;
+		C = blk->cty_c; D = blk->cty_d;
+		E = blk->cty_e; F = blk->cty_f;
+		G = blk->cty_g; H = blk->cty_h;
+		W[0] = blk->merkle; W[1] = blk->ntime;
+		W[2] = blk->nbits; W[3] = nonce;;
+		W[4] = 0x80000000; W[5] = 0x00000000; W[6] = 0x00000000; W[7] = 0x00000000;
+		W[8] = 0x00000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000;
+		W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000280;
+		PIR(0); IR(8);
+		FR(16); FR(24);
+		FR(32); FR(40);
+		FR(48); FR(56);
+
+		W[0] = A + blk->ctx_a; W[1] = B + blk->ctx_b;
+		W[2] = C + blk->ctx_c; W[3] = D + blk->ctx_d;
+		W[4] = E + blk->ctx_e; W[5] = F + blk->ctx_f;
+		W[6] = G + blk->ctx_g; W[7] = H + blk->ctx_h;
+		W[8] = 0x80000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000;
+		W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000100;
+		A = 0x6a09e667; B = 0xbb67ae85;
+		C = 0x3c6ef372; D = 0xa54ff53a;
+		E = 0x510e527f; F = 0x9b05688c;
+		G = 0x1f83d9ab; H = 0x5be0cd19;
+		IR(0); IR(8);
+		FR(16); FR(24);
+		FR(32); FR(40);
+		FR(48); PFR(56);
+
+		if (unlikely(H == 0xA41F32E7)) {
+			(*h0count)++;
+
+			if (unlikely(submit_nonce(thr, work, nonce) == false)) {
+				applog(LOG_ERR, "Failed to submit work, exiting");
+				goto out;
+			}
+
+			G += 0x1f83d9ab;
+			G = ByteReverse(G);
+
+			if (G < best_g) {
+				*best_nonce = nonce;
+				best_g = G;
+			}
+		}
+		if (work_restart[thr->id].restart)
+			break;
+	}
+out:
+	if (best_g == ~0) printf("No best_g found! Error in OpenCL code?\n");
+
+	return best_g;
+}

+ 23 - 0
findnonce.h

@@ -0,0 +1,23 @@
+#define MAXTHREADS 2000000
+
+#ifdef __APPLE_CC__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+#include "miner.h"
+
+typedef struct {
+    cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;
+    cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
+    cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d;
+    cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h;
+    cl_uint merkle; cl_uint ntime; cl_uint nbits; cl_uint nonce;
+	cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15;
+	cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2;
+} dev_blk_ctx;
+
+extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
+extern uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
+			      struct work *work, uint32_t start, uint32_t end,
+			      uint32_t *best_nonce, unsigned int *h0count);

+ 2 - 0
miner.h

@@ -194,6 +194,8 @@ extern bool use_syslog;
 extern struct thr_info *thr_info;
 extern struct thr_info *thr_info;
 extern int longpoll_thr_id;
 extern int longpoll_thr_id;
 extern struct work_restart *work_restart;
 extern struct work_restart *work_restart;
+struct work;
+bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce);
 
 
 extern void applog(int prio, const char *fmt, ...);
 extern void applog(int prio, const char *fmt, ...);
 extern struct thread_q *tq_new(void);
 extern struct thread_q *tq_new(void);

+ 271 - 0
ocl.c

@@ -0,0 +1,271 @@
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h> 
+#include <time.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+#include "findnonce.h"
+#include "ocl.h"
+
+char *file_contents(const char *filename, int *length)
+{
+	FILE *f = fopen(filename, "r");
+	void *buffer;
+
+	if (!f) {
+		fprintf(stderr, "Unable to open %s for reading\n", filename);
+		return NULL;
+	}
+
+	fseek(f, 0, SEEK_END);
+	*length = ftell(f);
+	fseek(f, 0, SEEK_SET);
+
+	buffer = malloc(*length+1);
+	*length = fread(buffer, 1, *length, f);
+	fclose(f);
+	((char*)buffer)[*length] = '\0';
+
+	return (char*)buffer;
+}
+
+int clDevicesNum() {
+	cl_int status = 0;
+
+	cl_uint numPlatforms;
+	cl_platform_id platform = NULL;
+	status = clGetPlatformIDs(0, NULL, &numPlatforms);
+	if(status != CL_SUCCESS)
+	{   
+		printf("Error: Getting Platforms. (clGetPlatformsIDs)\n");
+		return -1;
+	}   
+
+	if(numPlatforms > 0)
+	{   
+		cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
+		status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+		if(status != CL_SUCCESS)
+		{   
+			printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");
+			return -1;
+		}   
+
+		unsigned int i;
+		for(i=0; i < numPlatforms; ++i)
+		{   
+			char pbuff[100];
+			status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
+			if(status != CL_SUCCESS)
+			{   
+				printf("Error: Getting Platform Info. (clGetPlatformInfo)\n");
+				free(platforms);
+				return -1;
+			}   
+			platform = platforms[i];
+			if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))
+			{   
+				break;
+			}  
+		}   
+		free(platforms);
+	}   
+
+	if(platform == NULL) {
+		perror("NULL platform found!\n");
+		return -1;
+	}
+
+	cl_uint numDevices;
+	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
+	if(status != CL_SUCCESS)
+	{
+		printf("Error: Getting Device IDs (num)\n");
+		return -1;
+	}
+
+	return numDevices;
+}
+
+_clState *initCl(int gpu, char *name, size_t nameSize) {
+	cl_int status = 0;
+
+	_clState *clState = malloc(sizeof(_clState));;
+
+	cl_uint numPlatforms;
+	cl_platform_id platform = NULL;
+	status = clGetPlatformIDs(0, NULL, &numPlatforms);
+	if(status != CL_SUCCESS)
+	{   
+		printf("Error: Getting Platforms. (clGetPlatformsIDs)\n");
+		return NULL;
+	}   
+
+	if(numPlatforms > 0)
+	{   
+		cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
+		status = clGetPlatformIDs(numPlatforms, platforms, NULL);
+		if(status != CL_SUCCESS)
+		{   
+			printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");
+			return NULL;
+		}   
+
+		unsigned int i;
+		for(i=0; i < numPlatforms; ++i)
+		{   
+			char pbuff[100];
+			status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
+			if(status != CL_SUCCESS)
+			{   
+				printf("Error: Getting Platform Info. (clGetPlatformInfo)\n");
+				free(platforms);
+				return NULL;
+			}   
+			platform = platforms[i];
+			if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))
+			{   
+				break;
+			}  
+		}   
+		free(platforms);
+	}   
+
+	if(platform == NULL) {
+		perror("NULL platform found!\n");
+		return NULL;
+	}
+
+	cl_uint numDevices;
+	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
+	if(status != CL_SUCCESS)
+	{
+		printf("Error: Getting Device IDs (num)\n");
+		return NULL;
+	}
+
+	cl_device_id *devices;
+	if(numDevices > 0 ) {
+		devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
+
+		/* Now, get the device list data */
+
+		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
+		if(status != CL_SUCCESS)
+		{
+			printf("Error: Getting Device IDs (list)\n");
+			return NULL;
+		}
+
+		printf("List of devices:\n");
+
+		int i;
+		for(i=0; i<numDevices; i++) {
+			char pbuff[100];
+			status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
+			if(status != CL_SUCCESS)
+			{
+				printf("Error: Getting Device Info\n");
+				return NULL;
+			}
+
+			printf("\t%i\t%s\n", i, pbuff);
+		}
+
+		if (gpu >= 0 && gpu < numDevices) {
+			char pbuff[100];
+			status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
+			if(status != CL_SUCCESS)
+			{
+				printf("Error: Getting Device Info\n");
+				return NULL;
+			}
+
+			printf("Selected %i: %s\n", gpu, pbuff);
+			strncpy(name, pbuff, nameSize);
+		} else {
+			printf("Invalid GPU %i\n", gpu);
+			return NULL;
+		}
+
+	} else return NULL;
+
+	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
+
+	clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
+	if(status != CL_SUCCESS) 
+	{   
+		printf("Error: Creating Context. (clCreateContextFromType)\n");
+		return NULL; 
+	}
+
+
+	/////////////////////////////////////////////////////////////////
+	// Load CL file, build CL program object, create CL kernel object
+	/////////////////////////////////////////////////////////////////
+	//
+	const char * filename  = "oclminer.cl";
+	int pl;
+	char *source = file_contents(filename, &pl);
+	size_t sourceSize[] = {(size_t)pl};
+
+	clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
+	if(status != CL_SUCCESS) 
+	{   
+		printf("Error: Loading Binary into cl_program (clCreateProgramWithBinary)\n");
+		return NULL;
+	}
+
+	/* create a cl program executable for all the devices specified */
+	status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
+	if(status != CL_SUCCESS) 
+	{   
+		printf("Error: Building Program (clBuildProgram)\n");
+		size_t logSize;
+		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
+
+		char *log = malloc(logSize);
+		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
+		printf("%s\n", log);
+		return NULL; 
+	}
+
+	/* get a kernel object handle for a kernel with the given name */
+	clState->kernel = clCreateKernel(clState->program, "oclminer", &status);
+	if(status != CL_SUCCESS)
+	{
+		printf("Error: Creating Kernel from program. (clCreateKernel)\n");
+		return NULL;
+	}
+
+	/////////////////////////////////////////////////////////////////
+	// Create an OpenCL command queue
+	/////////////////////////////////////////////////////////////////
+	clState->commandQueue = clCreateCommandQueue( clState->context, devices[gpu], 0, &status);
+	if(status != CL_SUCCESS)
+	{
+		printf("Creating Command Queue. (clCreateCommandQueue)\n");
+		return NULL;
+	}
+
+    clState->inputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(dev_blk_ctx), NULL, &status);
+    if(status != CL_SUCCESS) {
+        printf("Error: clCreateBuffer (inputBuffer)\n");
+        return NULL;
+    }   
+
+	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(uint32_t) * MAXTHREADS, NULL, &status);
+	if(status != CL_SUCCESS) {
+		printf("Error: clCreateBuffer (outputBuffer)\n");
+		return NULL;
+	}
+
+	return clState;
+}
+

+ 22 - 0
ocl.h

@@ -0,0 +1,22 @@
+#ifndef __OCL_H__
+#define __OCL_H__
+#ifdef __APPLE_CC__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+typedef struct {
+	cl_context context;
+	cl_kernel kernel;
+	cl_command_queue commandQueue;
+	cl_program program;
+	cl_mem inputBuffer;
+	cl_mem outputBuffer;
+} _clState;
+
+extern char *file_contents(const char *filename, int *length);
+extern int clDevicesNum();
+extern _clState *initCl(int gpu, char *name, size_t nameSize);
+
+#endif /* __OCL_H__ */

+ 284 - 0
oclminer.cl

@@ -0,0 +1,284 @@
+#define rotr(x, n) rotate(x, (uint)(32 - n))
+
+#define WGS __attribute__((reqd_work_group_size(128, 1, 1)))
+
+__constant uint K[64] = {
+  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+typedef struct {
+    uint ctx_a; uint ctx_b; uint ctx_c; uint ctx_d;
+    uint ctx_e; uint ctx_f; uint ctx_g; uint ctx_h;
+    uint cty_a; uint cty_b; uint cty_c; uint cty_d;
+    uint cty_e; uint cty_f; uint cty_g; uint cty_h;
+    uint merkle; uint ntime; uint nbits; uint nonce;
+    uint fW0; uint fW1; uint fW2; uint fW3; uint fW15;
+    uint fW01r; uint fcty_e; uint fcty_e2;
+} dev_blk_ctx;
+
+__kernel __attribute__((vec_type_hint(uint))) WGS void oclminer(
+	__constant dev_blk_ctx *ctx, __global uint *output)
+{
+  const uint fW0 = ctx->fW0;
+  const uint fW1 = ctx->fW1;
+  const uint fW2 = ctx->fW2;
+  const uint fW3 = ctx->fW3;
+  const uint fW15 = ctx->fW15;
+  const uint fW01r = ctx->fW01r;
+  const uint fcty_e = ctx->fcty_e;
+  const uint fcty_e2 = ctx->fcty_e2;
+  const uint state0 = ctx->ctx_a;
+  const uint state1 = ctx->ctx_b;
+  const uint state2 = ctx->ctx_c;
+  const uint state3 = ctx->ctx_d;
+  const uint state4 = ctx->ctx_e;
+  const uint state5 = ctx->ctx_f;
+  const uint state6 = ctx->ctx_g;
+  const uint state7 = ctx->ctx_h;
+  const uint B1 = ctx->cty_b;
+  const uint C1 = ctx->cty_c;
+  const uint D1 = ctx->cty_d;
+  const uint F1 = ctx->cty_f;
+  const uint G1 = ctx->cty_g;
+  const uint H1 = ctx->cty_h;
+
+  uint A, B, C, D, E, F, G, H;
+  uint W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15;
+  uint it, res = 0;
+  const uint myid = get_global_id(0);
+
+  const uint tnonce = (ctx->nonce + myid)<<10;
+  
+  for(it = 0; it != 1024; it++) {
+    W3 = it ^ tnonce;
+    E = fcty_e +  W3; A = state0 + E; E = E + fcty_e2;
+    D = D1 + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C1 ^ (A & (B1 ^ C1))) + K[ 4] +  0x80000000; H = H1 + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F1) | (G1 & (E | F1)));
+    C = C1 + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B1 ^ (H & (A ^ B1))) + K[ 5]; G = G1 + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F1 & (D | E)));
+    B = B1 + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[ 6]; F = F1 + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[ 7]; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[ 8]; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[15] + 0x00000280; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[16] + fW0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[17] + fW1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W2 = (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + fW2;
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[18] +  W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W3 = W3 + fW3;
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[19] +  W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W4 = (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)) + 0x80000000; 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[20] +  W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W5 = (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[21] +  W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W6 = (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)) + 0x00000280; 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[22] +  W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W7 = (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)) + fW0; 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[23] +  W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W8 = (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)) + fW1; 
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[24] +  W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); 
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[25] +  W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); 
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W14 = 0x00a00055 + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W15 = fW15 + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W0 = fW01r + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10));
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[32] +  W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W1 = fW1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10));
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[33] +  W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[34] +  W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[35] +  W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[36] +  W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[37] +  W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[38] +  W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[39] +  W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); 
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[40] +  W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); 
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[41] +  W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); 
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10));
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[48] +  W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10));
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[49] +  W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[50] +  W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[51] +  W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[52] +  W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[53] +  W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[54] +  W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[55] +  W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); 
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[56] +  W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); 
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[57] +  W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); 
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[58] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[59] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[60] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[61] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[62] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[63] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    
+    W0 = A + state0; W1 = B + state1;
+    W2 = C + state2; W3 = D + state3;
+    W4 = E + state4; W5 = F + state5;
+    W6 = G + state6; W7 = H + state7;
+    H = 0xb0edbdd0 + K[ 0] +  W0; D = 0xa54ff53a + H; H = H + 0x08909ae5;
+    G = 0x1f83d9ab + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (0x9b05688c ^ (D & 0xca0b3af3)) + K[ 1] +  W1; C = 0x3c6ef372 + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & 0x6a09e667) | (0xbb67ae85 & (H | 0x6a09e667)));
+    F = 0x9b05688c + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (0x510e527f ^ (C & (D ^ 0x510e527f))) + K[ 2] +  W2; B = 0xbb67ae85 + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (0x6a09e667 & (G | H)));
+    E = 0x510e527f + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[ 3] +  W3; A = 0x6a09e667 + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[ 4] +  W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[ 5] +  W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[ 6] +  W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[ 7] +  W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[ 8] +  0x80000000; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[15] + 0x00000100; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3));
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[16] +  W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + 0x00a00000;
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[17] +  W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[18] +  W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[19] +  W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[20] +  W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[21] +  W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + 0x00000100 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[22] +  W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W7 = W7 + 0x11002000 + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[23] +  W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W8 = 0x80000000 + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); 
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[24] +  W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); 
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[25] +  W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); 
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W14 = 0x00400022 + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W15 = 0x00000100 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10));
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[32] +  W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10));
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[33] +  W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[34] +  W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[35] +  W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[36] +  W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[37] +  W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[38] +  W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[39] +  W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); 
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[40] +  W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); 
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[41] +  W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); 
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10));
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[48] +  W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10));
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[49] +  W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
+    W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[50] +  W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
+    W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[51] +  W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
+    W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[52] +  W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
+    W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); 
+    C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[53] +  W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
+    W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); 
+    B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[54] +  W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
+    W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); 
+    A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[55] +  W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
+    W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); 
+    H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[56] +  W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
+    W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); 
+    G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[57] +  W9; C = C + G;
+    W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); 
+    F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[58] + W10; B = B + F;
+    W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); 
+    E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[59] + W11; A = A + E;
+    W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); 
+    D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[60] + W12; H = H + D;
+
+	res |= (H==0xa41f32e7);
+  }
+
+  output[myid] = res;
+}