13 years ago · be360ffae2
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -1682,10 +1682,8 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
 
				 	const int thr_id = thr->id;
			
 
				 	struct opencl_thread_data *thrdata = thr->cgpu_data;
			
 
				 	_clState *clState = clStates[thr_id];
			
 
				-	struct cgpu_info *gpu = thr->cgpu;
			
 
				 
			
 
				-	if (!gpu->dynamic)
			
 
				-		clFinish(clState->commandQueue);
			
 
				+	clFinish(clState->commandQueue);
			
 
				 
			
 
				 	if (thrdata->res[FOUND]) {
			
 
				 		thrdata->last_work = &thrdata->_last_work;
			
@@ -1715,7 +1713,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 	_clState *clState = clStates[thr_id];
			
 
				 	const cl_kernel *kernel = &clState->kernel;
			
 
				 	const int dynamic_us = opt_dynamic_interval * 1000;
			
 
				-	struct timeval tv_gpuend;
			
 
				 
			
 
				 	cl_int status;
			
 
				 	size_t globalThreads[1];
			
@@ -1723,8 +1720,25 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 	int64_t hashes;
			
 
				 
			
 
				 	/* This finish flushes the readbuffer set with CL_FALSE later */
			
 
				-	if (!gpu->dynamic)
			
 
				-		clFinish(clState->commandQueue);
			
 
				+	clFinish(clState->commandQueue);
			
 
				+
			
 
				+	/* Windows' timer resolution is only 15ms so oversample 5x */
			
 
				+	if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 75) {
			
 
				+		struct timeval tv_gpuend;
			
 
				+		double gpu_us;
			
 
				+
			
 
				+		gettimeofday(&tv_gpuend, NULL);
			
 
				+		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
			
 
				+		if (gpu_us > dynamic_us) {
			
 
				+			if (gpu->intensity > MIN_INTENSITY)
			
 
				+				--gpu->intensity;
			
 
				+		} else if (gpu_us < dynamic_us / 2) {
			
 
				+			if (gpu->intensity < MAX_INTENSITY)
			
 
				+				++gpu->intensity;
			
 
				+		}
			
 
				+		memcpy(&(gpu->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
			
 
				+		gpu->intervals = 0;
			
 
				+	}
			
 
				 
			
 
				 	set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity);
			
 
				 	if (hashes > gpu->max_hashes)
			
@@ -1751,18 +1765,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 		clFinish(clState->commandQueue);
			
 
				 	}
			
 
				 
			
 
				-	if (gpu->dynamic) {
			
 
				-		gettimeofday(&gpu->tv_gpumid, NULL);
			
 
				-		if (gpu->new_work) {
			
 
				-			gpu->new_work = false;
			
 
				-			gpu->intervals = gpu->hit = 0;
			
 
				-		}
			
 
				-		if (!gpu->intervals) {
			
 
				-			gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
			
 
				-			gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
			
 
				 	if (unlikely(status != CL_SUCCESS)) {
			
 
				 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
			
@@ -1790,39 +1792,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 
				 		return -1;
			
 
				 	}
			
 
				 
			
 
				-	if (gpu->dynamic) {
			
 
				-		double gpu_us;
			
 
				-
			
 
				-		clFinish(clState->commandQueue);
			
 
				-		/* Windows returns the same time for gettimeofday due to its
			
 
				-		 * 15ms timer resolution, so we must average the result over
			
 
				-		 * at least 5 values that are actually different to get an
			
 
				-		 * accurate result */
			
 
				-		gpu->intervals++;
			
 
				-		gettimeofday(&tv_gpuend, NULL);
			
 
				-		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
			
 
				-		if (gpu_us > 0 && ++gpu->hit > 4) {
			
 
				-			gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
			
 
				-			/* Very rarely we may get an overflow so put an upper
			
 
				-			 * limit on the detected time */
			
 
				-			if (unlikely(gpu->gpu_us_average > 0 && gpu_us > gpu->gpu_us_average * 4))
			
 
				-				gpu_us = gpu->gpu_us_average * 4;
			
 
				-			gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
			
 
				-
			
 
				-			/* Try to not let the GPU be out for longer than
			
 
				-			 * opt_dynamic_interval in ms, but increase
			
 
				-			 * intensity when the system is idle in dynamic mode */
			
 
				-			if (gpu->gpu_us_average > dynamic_us) {
			
 
				-				if (gpu->intensity > MIN_INTENSITY)
			
 
				-					--gpu->intensity;
			
 
				-			} else if (gpu->gpu_us_average < dynamic_us / 2) {	
			
 
				-				if (gpu->intensity < MAX_INTENSITY)
			
 
				-					++gpu->intensity;
			
 
				-			}
			
 
				-			gpu->intervals = gpu->hit = 0;
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				 	/* The amount of work scanned can fluctuate when intensity changes
			
 
				 	 * and since we do this one cycle behind, we increment the work more
			
 
				 	 * than enough to prevent repeating work */
			
--- a/miner.h
+++ b/miner.h
@@ -454,9 +454,7 @@ struct cgpu_info {
 
				 	size_t shaders;
			
 
				 #endif
			
 
				 	struct timeval tv_gpustart;
			
 
				-	struct timeval tv_gpumid;
			
 
				-	double gpu_us_average;
			
 
				-	int intervals, hit;
			
 
				+	int intervals;
			
 
				 #endif
			
 
				 
			
 
				 	bool new_work;