|
|
@@ -4128,8 +4128,14 @@ static void *watchdog_thread(void *userdata)
|
|
|
|
|
|
//for (i = 0; i < mining_threads; i++) {
|
|
|
for (i = 0; i < gpu_threads; i++) {
|
|
|
- struct thr_info *thr = &thr_info[i];
|
|
|
- int gpu = thr->cgpu->cpu_gpu;
|
|
|
+ struct thr_info *thr;
|
|
|
+ int gpu;
|
|
|
+
|
|
|
+ /* Use only one thread per device to determine if the GPU is healthy */
|
|
|
+ if (i >= nDevs)
|
|
|
+ break;
|
|
|
+ thr = &thr_info[i];
|
|
|
+ gpu = thr->cgpu->cpu_gpu;
|
|
|
|
|
|
/* Thread is waiting on getwork or disabled */
|
|
|
if (thr->getwork || !gpu_devices[gpu])
|
|
|
@@ -4150,6 +4156,7 @@ static void *watchdog_thread(void *userdata)
|
|
|
applog(LOG_ERR, "Thread %d not responding for more than 10 minutes, GPU %d declared DEAD!", i, gpu);
|
|
|
} else if (now.tv_sec - thr->sick.tv_sec > 60 && gpus[i].status == LIFE_SICK) {
|
|
|
/* Attempt to restart a GPU once every minute */
|
|
|
+ gettimeofday(&thr->sick, NULL);
|
|
|
reinit_device(thr->cgpu);
|
|
|
}
|
|
|
}
|