13 years ago · 211063908b
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -1748,6 +1748,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
 
				 	const int thr_id = thr->id;
			
 
				 	struct opencl_thread_data *thrdata = thr->cgpu_data;
			
 
				 	_clState *clState = clStates[thr_id];
			
 
				+	struct cgpu_info *gpu = thr->cgpu;
			
 
				+
			
 
				+	if (gpu->dynamic)
			
 
				+		return;
			
 
				 
			
 
				 	clFinish(clState->commandQueue);
			
 
				 	if (thrdata->res[FOUND]) {
			
@@ -1778,7 +1782,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 	_clState *clState = clStates[thr_id];
			
 
				 	const cl_kernel *kernel = &clState->kernel;
			
 
				 	const int dynamic_us = opt_dynamic_interval * 1000;
			
 
				-	cl_bool blocking;
			
 
				 
			
 
				 	cl_int status;
			
 
				 	size_t globalThreads[1];
			
@@ -1786,52 +1789,19 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 	unsigned int threads;
			
 
				 	int64_t hashes;
			
 
				 
			
 
				-	if (gpu->dynamic)
			
 
				-		blocking = CL_TRUE;
			
 
				-	else
			
 
				-		blocking = CL_FALSE;
			
 
				-
			
 
				 	/* This finish flushes the readbuffer set with CL_FALSE later */
			
 
				-	if (!blocking)
			
 
				+	if (!gpu->dynamic)
			
 
				 		clFinish(clState->commandQueue);
			
 
				 
			
 
				-	if (gpu->dynamic) {
			
 
				-		struct timeval diff;
			
 
				-		suseconds_t gpu_us;
			
 
				-
			
 
				-		gettimeofday(&gpu->tv_gpuend, NULL);
			
 
				-		timersub(&gpu->tv_gpuend, &gpu->tv_gpustart, &diff);
			
 
				-		gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
			
 
				-		if (likely(gpu_us >= 0)) {
			
 
				-			gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
			
 
				-
			
 
				-			/* Try to not let the GPU be out for longer than 
			
 
				-			 * opt_dynamic_interval in ms, but increase
			
 
				-			 * intensity when the system is idle in dynamic mode */
			
 
				-			if (gpu->gpu_us_average > dynamic_us) {
			
 
				-				if (gpu->intensity > MIN_INTENSITY)
			
 
				-					--gpu->intensity;
			
 
				-			} else if (gpu->gpu_us_average < dynamic_us / 2) {
			
 
				-				if (gpu->intensity < MAX_INTENSITY)
			
 
				-					++gpu->intensity;
			
 
				-			}
			
 
				-		}
			
 
				-	}
			
 
				 	set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
			
 
				 			   localThreads[0], gpu->intensity);
			
 
				 	if (hashes > gpu->max_hashes)
			
 
				 		gpu->max_hashes = hashes;
			
 
				 
			
 
				-	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
			
 
				-	if (unlikely(status != CL_SUCCESS)) {
			
 
				-		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
			
 
				-		return -1;
			
 
				-	}
			
 
				-
			
 
				 	/* MAXBUFFERS entry is used as a flag to say nonces exist */
			
 
				 	if (thrdata->res[FOUND]) {
			
 
				 		/* Clear the buffer again */
			
 
				-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
			
 
				+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
			
 
				 				BUFFERSIZE, blank_res, 0, NULL, NULL);
			
 
				 		if (unlikely(status != CL_SUCCESS)) {
			
 
				 			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
			
@@ -1846,12 +1816,17 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 			postcalc_hash_async(thr, work, thrdata->res);
			
 
				 		}
			
 
				 		memset(thrdata->res, 0, BUFFERSIZE);
			
 
				-		if (!blocking)
			
 
				-			clFinish(clState->commandQueue);
			
 
				+		clFinish(clState->commandQueue);
			
 
				 	}
			
 
				 
			
 
				 	gettimeofday(&gpu->tv_gpustart, NULL);
			
 
				 
			
 
				+	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
			
 
				+	if (unlikely(status != CL_SUCCESS)) {
			
 
				+		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				 	if (clState->goffset) {
			
 
				 		size_t global_work_offset[1];
			
 
				 
			
@@ -1866,13 +1841,37 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				-	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
			
 
				+	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
			
 
				 			BUFFERSIZE, thrdata->res, 0, NULL, NULL);
			
 
				 	if (unlikely(status != CL_SUCCESS)) {
			
 
				 		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
			
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				+	if (gpu->dynamic) {
			
 
				+		struct timeval diff;
			
 
				+		suseconds_t gpu_us;
			
 
				+
			
 
				+		clFinish(clState->commandQueue);
			
 
				+		gettimeofday(&gpu->tv_gpuend, NULL);
			
 
				+		timersub(&gpu->tv_gpuend, &gpu->tv_gpustart, &diff);
			
 
				+		gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
			
 
				+		if (likely(gpu_us >= 0)) {
			
 
				+			gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
			
 
				+
			
 
				+			/* Try to not let the GPU be out for longer than
			
 
				+			 * opt_dynamic_interval in ms, but increase
			
 
				+			 * intensity when the system is idle in dynamic mode */
			
 
				+			if (gpu->gpu_us_average > dynamic_us) {
			
 
				+				if (gpu->intensity > MIN_INTENSITY)
			
 
				+					--gpu->intensity;
			
 
				+			} else if (gpu->gpu_us_average < dynamic_us / 2) {
			
 
				+				if (gpu->intensity < MAX_INTENSITY)
			
 
				+					++gpu->intensity;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	/* The amount of work scanned can fluctuate when intensity changes
			
 
				 	 * and since we do this one cycle behind, we increment the work more
			
 
				 	 * than enough to prevent repeating work */