Browse Source

Do away with the flaky free_work api in the driver code which would often lose the work data in opencl and simply flush it before exiting the opencl scanhash.

Con Kolivas 13 years ago
parent
commit
2822fe8ccc
3 changed files with 26 additions and 41 deletions
  1. 26 38
      driver-opencl.c
  2. 0 2
      miner.c
  3. 0 1
      miner.h

+ 26 - 38
driver-opencl.c

@@ -1675,18 +1675,6 @@ static bool opencl_thread_init(struct thr_info *thr)
 	return true;
 	return true;
 }
 }
 
 
-static void opencl_free_work(struct thr_info *thr, struct work *work)
-{
-	const int thr_id = thr->id;
-	struct opencl_thread_data *thrdata = thr->cgpu_data;
-	_clState *clState = clStates[thr_id];
-
-	clFinish(clState->commandQueue);
-
-	if (thrdata->res[FOUND])
-		// FIXME: This should copy work, not thrdata->_last_work
-		thrdata->last_work = copy_work(&thrdata->_last_work);
-}
 
 
 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
 {
@@ -1716,9 +1704,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	size_t localThreads[1] = { clState->wsize };
 	size_t localThreads[1] = { clState->wsize };
 	int64_t hashes;
 	int64_t hashes;
 
 
-	/* This finish flushes the readbuffer set with CL_FALSE later */
-	clFinish(clState->commandQueue);
-
 	/* Windows' timer resolution is only 15ms so oversample 5x */
 	/* Windows' timer resolution is only 15ms so oversample 5x */
 	if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
 	if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
 		struct timeval tv_gpuend;
 		struct timeval tv_gpuend;
@@ -1741,28 +1726,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	if (hashes > gpu->max_hashes)
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
 		gpu->max_hashes = hashes;
 
 
-	/* FOUND entry is used as a counter to say how many nonces exist */
-	if (thrdata->res[FOUND]) {
-		/* Clear the buffer again */
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-				BUFFERSIZE, blank_res, 0, NULL, NULL);
-		if (unlikely(status != CL_SUCCESS)) {
-			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
-			return -1;
-		}
-		if (unlikely(thrdata->last_work)) {
-			applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
-			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
-			free_work(thrdata->last_work);
-			thrdata->last_work = NULL;
-		} else {
-			applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
-			postcalc_hash_async(thr, work, thrdata->res);
-		}
-		memset(thrdata->res, 0, BUFFERSIZE);
-		clFinish(clState->commandQueue);
-	}
-
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
@@ -1795,6 +1758,32 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	 * than enough to prevent repeating work */
 	 * than enough to prevent repeating work */
 	work->blk.nonce += gpu->max_hashes;
 	work->blk.nonce += gpu->max_hashes;
 
 
+	/* This finish flushes the readbuffer set with CL_FALSE in clEnqueueReadBuffer */
+	clFinish(clState->commandQueue);
+
+	/* FOUND entry is used as a counter to say how many nonces exist */
+	if (thrdata->res[FOUND]) {
+		/* Clear the buffer again */
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+				BUFFERSIZE, blank_res, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS)) {
+			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
+			return -1;
+		}
+		if (unlikely(thrdata->last_work)) {
+			applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
+			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
+			free_work(thrdata->last_work);
+			thrdata->last_work = NULL;
+		} else {
+			applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
+			postcalc_hash_async(thr, work, thrdata->res);
+		}
+		memset(thrdata->res, 0, BUFFERSIZE);
+		/* This finish flushes the writebuffer set with CL_FALSE in clEnqueueWriteBuffer */
+		clFinish(clState->commandQueue);
+	}
+
 	return hashes;
 	return hashes;
 }
 }
 
 
@@ -1820,7 +1809,6 @@ struct device_api opencl_api = {
 	.get_api_extra_device_status = get_opencl_api_extra_device_status,
 	.get_api_extra_device_status = get_opencl_api_extra_device_status,
 	.thread_prepare = opencl_thread_prepare,
 	.thread_prepare = opencl_thread_prepare,
 	.thread_init = opencl_thread_init,
 	.thread_init = opencl_thread_init,
-	.free_work = opencl_free_work,
 	.prepare_work = opencl_prepare_work,
 	.prepare_work = opencl_prepare_work,
 	.scanhash = opencl_scanhash,
 	.scanhash = opencl_scanhash,
 	.thread_shutdown = opencl_thread_shutdown,
 	.thread_shutdown = opencl_thread_shutdown,

+ 0 - 2
miner.c

@@ -6361,8 +6361,6 @@ void *miner_thread(void *userdata)
 
 
 	while (1) {
 	while (1) {
 		mythr->work_restart = false;
 		mythr->work_restart = false;
-		if (api->free_work && likely(work->pool))
-			api->free_work(mythr, work);
 		get_work(work, mythr, thr_id);
 		get_work(work, mythr, thr_id);
 		cgpu->new_work = true;
 		cgpu->new_work = true;
 
 

+ 0 - 1
miner.h

@@ -312,7 +312,6 @@ struct device_api {
 	bool (*thread_prepare)(struct thr_info*);
 	bool (*thread_prepare)(struct thr_info*);
 	uint64_t (*can_limit_work)(struct thr_info*);
 	uint64_t (*can_limit_work)(struct thr_info*);
 	bool (*thread_init)(struct thr_info*);
 	bool (*thread_init)(struct thr_info*);
-	void (*free_work)(struct thr_info*, struct work*);
 	bool (*prepare_work)(struct thr_info*, struct work*);
 	bool (*prepare_work)(struct thr_info*, struct work*);
 	int64_t (*scanhash)(struct thr_info*, struct work*, int64_t);
 	int64_t (*scanhash)(struct thr_info*, struct work*, int64_t);
 	void (*hw_error)(struct thr_info*);
 	void (*hw_error)(struct thr_info*);