|
|
@@ -1748,6 +1748,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
|
|
|
const int thr_id = thr->id;
|
|
|
struct opencl_thread_data *thrdata = thr->cgpu_data;
|
|
|
_clState *clState = clStates[thr_id];
|
|
|
+ struct cgpu_info *gpu = thr->cgpu;
|
|
|
+
|
|
|
+ if (gpu->dynamic)
|
|
|
+ return;
|
|
|
|
|
|
clFinish(clState->commandQueue);
|
|
|
if (thrdata->res[FOUND]) {
|
|
|
@@ -1778,7 +1782,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|
|
_clState *clState = clStates[thr_id];
|
|
|
const cl_kernel *kernel = &clState->kernel;
|
|
|
const int dynamic_us = opt_dynamic_interval * 1000;
|
|
|
- cl_bool blocking;
|
|
|
|
|
|
cl_int status;
|
|
|
size_t globalThreads[1];
|
|
|
@@ -1786,52 +1789,19 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|
|
unsigned int threads;
|
|
|
int64_t hashes;
|
|
|
|
|
|
- if (gpu->dynamic)
|
|
|
- blocking = CL_TRUE;
|
|
|
- else
|
|
|
- blocking = CL_FALSE;
|
|
|
-
|
|
|
/* This finish flushes the readbuffer set with CL_FALSE later */
|
|
|
- if (!blocking)
|
|
|
+ if (!gpu->dynamic)
|
|
|
clFinish(clState->commandQueue);
|
|
|
|
|
|
- if (gpu->dynamic) {
|
|
|
- struct timeval diff;
|
|
|
- suseconds_t gpu_us;
|
|
|
-
|
|
|
- gettimeofday(&gpu->tv_gpuend, NULL);
|
|
|
- timersub(&gpu->tv_gpuend, &gpu->tv_gpustart, &diff);
|
|
|
- gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
|
|
|
- if (likely(gpu_us >= 0)) {
|
|
|
- gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
|
|
|
-
|
|
|
- /* Try to not let the GPU be out for longer than
|
|
|
- * opt_dynamic_interval in ms, but increase
|
|
|
- * intensity when the system is idle in dynamic mode */
|
|
|
- if (gpu->gpu_us_average > dynamic_us) {
|
|
|
- if (gpu->intensity > MIN_INTENSITY)
|
|
|
- --gpu->intensity;
|
|
|
- } else if (gpu->gpu_us_average < dynamic_us / 2) {
|
|
|
- if (gpu->intensity < MAX_INTENSITY)
|
|
|
- ++gpu->intensity;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
|
|
|
localThreads[0], gpu->intensity);
|
|
|
if (hashes > gpu->max_hashes)
|
|
|
gpu->max_hashes = hashes;
|
|
|
|
|
|
- status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
|
|
|
- if (unlikely(status != CL_SUCCESS)) {
|
|
|
- applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
|
|
|
- return -1;
|
|
|
- }
|
|
|
-
|
|
|
/* MAXBUFFERS entry is used as a flag to say nonces exist */
|
|
|
if (thrdata->res[FOUND]) {
|
|
|
/* Clear the buffer again */
|
|
|
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
|
|
|
+ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
|
|
BUFFERSIZE, blank_res, 0, NULL, NULL);
|
|
|
if (unlikely(status != CL_SUCCESS)) {
|
|
|
applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
|
|
|
@@ -1846,12 +1816,17 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|
|
postcalc_hash_async(thr, work, thrdata->res);
|
|
|
}
|
|
|
memset(thrdata->res, 0, BUFFERSIZE);
|
|
|
- if (!blocking)
|
|
|
- clFinish(clState->commandQueue);
|
|
|
+ clFinish(clState->commandQueue);
|
|
|
}
|
|
|
|
|
|
gettimeofday(&gpu->tv_gpustart, NULL);
|
|
|
|
|
|
+ status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
|
|
|
+ if (unlikely(status != CL_SUCCESS)) {
|
|
|
+ applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+
|
|
|
if (clState->goffset) {
|
|
|
size_t global_work_offset[1];
|
|
|
|
|
|
@@ -1866,13 +1841,37 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|
|
return -1;
|
|
|
}
|
|
|
|
|
|
- status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
|
|
|
+ status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
|
|
BUFFERSIZE, thrdata->res, 0, NULL, NULL);
|
|
|
if (unlikely(status != CL_SUCCESS)) {
|
|
|
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
|
|
|
return -1;
|
|
|
}
|
|
|
|
|
|
+ if (gpu->dynamic) {
|
|
|
+ struct timeval diff;
|
|
|
+ suseconds_t gpu_us;
|
|
|
+
|
|
|
+ clFinish(clState->commandQueue);
|
|
|
+ gettimeofday(&gpu->tv_gpuend, NULL);
|
|
|
+ timersub(&gpu->tv_gpuend, &gpu->tv_gpustart, &diff);
|
|
|
+ gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
|
|
|
+ if (likely(gpu_us >= 0)) {
|
|
|
+ gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
|
|
|
+
|
|
|
+ /* Try to not let the GPU be out for longer than
|
|
|
+ * opt_dynamic_interval in ms, but increase
|
|
|
+ * intensity when the system is idle in dynamic mode */
|
|
|
+ if (gpu->gpu_us_average > dynamic_us) {
|
|
|
+ if (gpu->intensity > MIN_INTENSITY)
|
|
|
+ --gpu->intensity;
|
|
|
+ } else if (gpu->gpu_us_average < dynamic_us / 2) {
|
|
|
+ if (gpu->intensity < MAX_INTENSITY)
|
|
|
+ ++gpu->intensity;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/* The amount of work scanned can fluctuate when intensity changes
|
|
|
* and since we do this one cycle behind, we increment the work more
|
|
|
* than enough to prevent repeating work */
|