Browse Source

Merge branch 'diakgcn' of https://github.com/Diapolo/cgminer into diakgcn

Con Kolivas 14 years ago
parent
commit
c462ba5bc9
5 changed files with 20 additions and 8 deletions
  1. 1 1
      device-gpu.c
  2. 1 1
      diakgcn120208.cl
  3. 4 6
      findnonce.c
  4. 13 0
      ocl.c
  5. 1 0
      ocl.h

+ 1 - 1
device-gpu.c

@@ -740,7 +740,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
 	nonces = alloca(sizeof(uint) * vwidth);
 	for (i = 0; i < vwidth; i++)
 		nonces[i] = blk->nonce + i;
-	status |= clSetKernelArg(*kernel, num++, vwidth * sizeof(uint), (void *)nonces);
+	CL_SET_VARG(vwidth, nonces);
 
 	CL_SET_BLKARG(W16);
 	CL_SET_BLKARG(W17);

+ 1 - 1
diakgcn120208.cl

@@ -57,7 +57,7 @@ __kernel
 			const uint state0, const uint state1, const uint state2, const uint state3,
 			const uint state4, const uint state5, const uint state6, const uint state7,
 			const uint state0A, const uint state0B,
-			__global int * output)
+			__global uint * output)
 {
 	u W[17];
 	u V[8];

+ 4 - 6
findnonce.c

@@ -66,9 +66,6 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
 	blk->cty_a = A;
 	blk->cty_b = B;
 	blk->cty_c = C;
-
-	blk->C1addK5 = C + SHA256_K[5];
-
 	blk->cty_d = D;
 
 	blk->D1A = D + 0xb956c25b;
@@ -93,12 +90,12 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
 
 	blk->W16 = blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
 	blk->W17 = blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
-	blk->PreVal4 = blk->fcty_e = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
+	blk->PreVal4 = blk->fcty_e = blk->ctx_e + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
 	blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
 	blk->PreVal4_2 = blk->PreVal4 + blk->T1;
-	blk->PreVal0 = blk->PreVal4 + state[0];
+	blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
 	blk->PreW31 = 0x00000280 + (rotr(blk->W16,  7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
-	blk->PreW32 = blk->W16 + ((rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3)));
+	blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
 	blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
 	blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
 
@@ -117,6 +114,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
 	blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
 	blk->T1substate0 = blk->ctx_a - blk->T1;
 
+	blk->C1addK5 = blk->cty_c + SHA256_K[5];
 	blk->B1addK6 = blk->cty_b + SHA256_K[6];
 	blk->PreVal0addK7 = blk->PreVal0 + SHA256_K[7];
 	blk->W16addK16 = blk->W16 + SHA256_K[16];

+ 13 - 0
ocl.c

@@ -302,6 +302,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	find = strstr(extensions, camo);
 	if (find)
 		clState->hasBitAlign = true;
+		
+	/* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */
+	char * devoclver = malloc(1024);
+	const char * ocl10 = "OpenCL 1.0";
+
+	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_VERSION, 1024, (void *)devoclver, NULL);
+	if (status != CL_SUCCESS) {
+		applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_VERSION");
+		return NULL;
+	}
+	find = strstr(devoclver, ocl10);
+	if !(find)
+		clState->hasOpenCL11plus = true;
 
 	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
 	if (status != CL_SUCCESS) {

+ 1 - 0
ocl.h

@@ -18,6 +18,7 @@ typedef struct {
 	cl_program program;
 	cl_mem outputBuffer;
 	bool hasBitAlign;
+	bool hasOpenCL11plus;
 	cl_uint preferred_vwidth;
 	size_t max_work_size;
 	size_t work_size;