Browse Source

Dramatically simplify the dynamic intensity calculation by oversampling many runs through the opencl kernel till we're likely well within the timer resolution on windows.

Con Kolivas 13 years ago
parent
commit
7450b25e75
2 changed files with 21 additions and 54 deletions
  1. 20 51
      driver-opencl.c
  2. 1 3
      miner.h

+ 20 - 51
driver-opencl.c

@@ -1463,10 +1463,8 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
 	const int thr_id = thr->id;
 	struct opencl_thread_data *thrdata = thr->cgpu_data;
 	_clState *clState = clStates[thr_id];
-	struct cgpu_info *gpu = thr->cgpu;
 
-	if (!gpu->dynamic)
-		clFinish(clState->commandQueue);
+	clFinish(clState->commandQueue);
 
 	if (thrdata->res[FOUND]) {
 		thrdata->last_work = &thrdata->_last_work;
@@ -1496,7 +1494,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	_clState *clState = clStates[thr_id];
 	const cl_kernel *kernel = &clState->kernel;
 	const int dynamic_us = opt_dynamic_interval * 1000;
-	struct timeval tv_gpuend;
 
 	cl_int status;
 	size_t globalThreads[1];
@@ -1504,8 +1501,25 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	int64_t hashes;
 
 	/* This finish flushes the readbuffer set with CL_FALSE later */
-	if (!gpu->dynamic)
-		clFinish(clState->commandQueue);
+	clFinish(clState->commandQueue);
+
+	/* Windows' timer resolution is only 15ms so oversample 5x */
+	if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 75) {
+		struct timeval tv_gpuend;
+		double gpu_us;
+
+		gettimeofday(&tv_gpuend, NULL);
+		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
+		if (gpu_us > dynamic_us) {
+			if (gpu->intensity > MIN_INTENSITY)
+				--gpu->intensity;
+		} else if (gpu_us < dynamic_us / 2) {
+			if (gpu->intensity < MAX_INTENSITY)
+				++gpu->intensity;
+		}
+		memcpy(&(gpu->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
+		gpu->intervals = 0;
+	}
 
 	set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity);
 	if (hashes > gpu->max_hashes)
@@ -1532,18 +1546,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		clFinish(clState->commandQueue);
 	}
 
-	if (gpu->dynamic) {
-		gettimeofday(&gpu->tv_gpumid, NULL);
-		if (gpu->new_work) {
-			gpu->new_work = false;
-			gpu->intervals = gpu->hit = 0;
-		}
-		if (!gpu->intervals) {
-			gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
-			gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
-		}
-	}
-
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
@@ -1571,39 +1573,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 		return -1;
 	}
 
-	if (gpu->dynamic) {
-		double gpu_us;
-
-		clFinish(clState->commandQueue);
-		/* Windows returns the same time for gettimeofday due to its
-		 * 15ms timer resolution, so we must average the result over
-		 * at least 5 values that are actually different to get an
-		 * accurate result */
-		gpu->intervals++;
-		gettimeofday(&tv_gpuend, NULL);
-		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
-		if (gpu_us > 0 && ++gpu->hit > 4) {
-			gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
-			/* Very rarely we may get an overflow so put an upper
-			 * limit on the detected time */
-			if (unlikely(gpu->gpu_us_average > 0 && gpu_us > gpu->gpu_us_average * 4))
-				gpu_us = gpu->gpu_us_average * 4;
-			gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
-
-			/* Try to not let the GPU be out for longer than
-			 * opt_dynamic_interval in ms, but increase
-			 * intensity when the system is idle in dynamic mode */
-			if (gpu->gpu_us_average > dynamic_us) {
-				if (gpu->intensity > MIN_INTENSITY)
-					--gpu->intensity;
-			} else if (gpu->gpu_us_average < dynamic_us / 2) {	
-				if (gpu->intensity < MAX_INTENSITY)
-					++gpu->intensity;
-			}
-			gpu->intervals = gpu->hit = 0;
-		}
-	}
-
 	/* The amount of work scanned can fluctuate when intensity changes
 	 * and since we do this one cycle behind, we increment the work more
 	 * than enough to prevent repeating work */

+ 1 - 3
miner.h

@@ -401,9 +401,7 @@ struct cgpu_info {
 	size_t shaders;
 #endif
 	struct timeval tv_gpustart;
-	struct timeval tv_gpumid;
-	double gpu_us_average;
-	int intervals, hit;
+	int intervals;
 #endif
 
 	bool new_work;