Browse Source

Keep track of bad nonces independently from generic hw errors

Luke Dashjr 12 years ago
parent
commit
af7a9aad22
7 changed files with 29 additions and 26 deletions
  1. 10 10
      driver-bitforce.c
  2. 1 4
      driver-modminer.c
  3. 1 4
      driver-x6500.c
  4. 1 1
      driver-ztex.c
  5. 1 1
      findnonce.c
  6. 12 5
      miner.c
  7. 3 1
      miner.h

+ 10 - 10
driver-bitforce.c

@@ -337,7 +337,7 @@ void bitforce_comm_error(struct thr_info *thr)
 	data->noncebuf[0] = '\0';
 	applog(LOG_ERR, "%"PRIpreprv": Comms error", bitforce->proc_repr);
 	dev_error(bitforce, REASON_DEV_COMMS_ERROR);
-	inc_hw_errors(thr);
+	inc_hw_errors_only(thr);
 	BFclose(*p_fdDev);
 	int fd = *p_fdDev = BFopen(bitforce->device_path);
 	if (fd == -1)
@@ -590,7 +590,7 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)
 	if (unlikely(!pdevbuf[0])) {
 		struct thr_info *thr = bitforce->thr[0];
 		applog(LOG_ERR, "%"PRIpreprv": Error: Get temp returned empty string/timed out", bitforce->proc_repr);
-		inc_hw_errors(thr);
+		inc_hw_errors_only(thr);
 		return false;
 	}
 
@@ -623,7 +623,7 @@ static bool bitforce_get_temp(struct cgpu_info *bitforce)
 		applog(LOG_WARNING, "%"PRIpreprv": Garbled response probably throttling, clearing buffer", bitforce->proc_repr);
 		dev_error(bitforce, REASON_DEV_THROTTLE);
 		/* Count throttling episodes as hardware errors */
-		inc_hw_errors(thr);
+		inc_hw_errors_only(thr);
 		bitforce_clear_buffer(bitforce);
 		return false;
 	}
@@ -1034,7 +1034,7 @@ noqr:
 		applog(LOG_ERR, "%"PRIpreprv": took %lums - longer than %lums", bitforce->proc_repr,
 			tv_to_ms(elapsed), (unsigned long)BITFORCE_TIMEOUT_MS);
 		dev_error(bitforce, REASON_DEV_OVER_HEAT);
-		inc_hw_errors(thr);
+		inc_hw_errors_only(thr);
 
 		/* If the device truly throttled, it didn't process the job and there
 		 * are no results. But check first, just in case we're wrong about it
@@ -1090,7 +1090,7 @@ noqr:
 
 	applog(LOG_DEBUG, "%"PRIpreprv": waited %dms until %s", bitforce->proc_repr, bitforce->wait_ms, pdevbuf);
 	if (count < 0 && strncasecmp(pdevbuf, "I", 1)) {
-		inc_hw_errors(thr);
+		inc_hw_errors_only(thr);
 		applog(LOG_WARNING, "%"PRIpreprv": Error: Get result reports: %s", bitforce->proc_repr, pdevbuf);
 		bitforce_clear_buffer(bitforce);
 	}
@@ -1160,7 +1160,7 @@ void bitforce_process_qresult_line(struct thr_info *thr, char *buf, struct work
 	    || bitforce_process_qresult_line_i(thr, midstate, datatail, buf, thr->next_work) ))
 	{
 		applog(LOG_ERR, "%"PRIpreprv": Failed to find work for queued results", bitforce->proc_repr);
-		inc_hw_errors(thr);
+		inc_hw_errors_only(thr);
 	}
 }
 
@@ -1562,7 +1562,7 @@ bool bitforce_queue_do_results(struct thr_info *thr)
 	if (unlikely(count < 0))
 	{
 		applog(LOG_ERR, "%"PRIpreprv": Received unexpected queue result response: %s", bitforce->proc_repr, noncebuf);
-		inc_hw_errors(thr);
+		inc_hw_errors_only(thr);
 		return false;
 	}
 	
@@ -1615,7 +1615,7 @@ bool bitforce_queue_do_results(struct thr_info *thr)
 		if (unlikely(!thiswork))
 		{
 			applog(LOG_ERR, "%"PRIpreprv": Failed to find work for queue results: %s", chip_cgpu->proc_repr, buf);
-			inc_hw_errors(chip_thr);
+			inc_hw_errors_only(chip_thr);
 			goto next_qline;
 		}
 		
@@ -1726,7 +1726,7 @@ bool bitforce_queue_append(struct thr_info *thr, struct work *work)
 		{
 			// Problem sending queue, retry again in a few seconds
 			applog(LOG_ERR, "%"PRIpreprv": Failed to send queue", bitforce->proc_repr);
-			inc_hw_errors(thr);
+			inc_hw_errors_only(thr);
 			data->want_to_send_queue = true;
 		}
 	}
@@ -1872,7 +1872,7 @@ void bitforce_queue_poll(struct thr_info *thr)
 			if (!data->queued)
 			{
 				applog(LOG_ERR, "%"PRIpreprv": Failed to send queue, and queue empty; retrying after 1 second", bitforce->proc_repr);
-				inc_hw_errors(thr);
+				inc_hw_errors_only(thr);
 				sleep_us = 1000000;
 			}
 	

+ 1 - 4
driver-modminer.c

@@ -712,10 +712,7 @@ modminer_process_results(struct thr_info*thr)
 				submit_nonce(thr, work, nonce);
 			}
 			else {
-				applog(LOG_DEBUG, "%s: Nonce with H not zero  : %02x%02x%02x%02x",
-				       modminer->proc_repr,
-				       NONCE_CHARS(nonce));
-				inc_hw_errors(thr);
+				inc_hw_errors(thr, work, nonce);
 				++state->bad_share_counter;
 				++immediate_bad_nonces;
 			}

+ 1 - 4
driver-x6500.c

@@ -745,10 +745,7 @@ int64_t x6500_process_results(struct thr_info *thr, struct work *work)
 				       x6500->proc_repr,
 				       (unsigned long)nonce);
 			} else {
-				applog(LOG_DEBUG, "%"PRIprepr": Nonce with H not zero  : %08lx",
-				       x6500->proc_repr,
-				       (unsigned long)nonce);
-				inc_hw_errors(thr);
+				inc_hw_errors(thr, work, nonce);
 
 				dclk_gotNonces(&fpga->dclk);
 				dclk_errorCount(&fpga->dclk, 1.);

+ 1 - 1
driver-ztex.c

@@ -266,7 +266,7 @@ static int64_t ztex_scanhash(struct thr_info *thr, struct work *work,
 				if (count > 2)
 					dclk_errorCount(&ztex->dclk, 1.0 / ztex->numNonces);
 
-				inc_hw_errors(thr);
+				inc_hw_errors_only(thr);
 			}
 
 			for (j=0; j<=ztex->extraSolutions; j++) {

+ 1 - 1
findnonce.c

@@ -156,7 +156,7 @@ static void *postcalc_hash(void *userdata)
 	if (unlikely(pcd->res[found] & ~found)) {
 		applog(LOG_WARNING, "%"PRIpreprv": invalid nonce count - HW error",
 				thr->cgpu->proc_repr);
-		inc_hw_errors(thr);
+		inc_hw_errors_only(thr);
 		pcd->res[found] &= found;
 	}
 

+ 12 - 5
miner.c

@@ -251,6 +251,7 @@ notifier_t submit_waiting_notifier;
 
 int hw_errors;
 int total_accepted, total_rejected, total_diff1;
+int total_bad_nonces;
 int total_getworks, total_stale, total_discarded;
 uint64_t total_bytes_xfer;
 double total_diff_accepted, total_diff_rejected, total_diff_stale;
@@ -7133,13 +7134,22 @@ static void submit_work_async(struct work *work_in, struct timeval *tv_work_foun
 	_submit_work_async(work);
 }
 
-void inc_hw_errors(struct thr_info *thr)
+void inc_hw_errors(struct thr_info *thr, const struct work *work, const uint32_t bad_nonce)
 {
 	struct cgpu_info * const cgpu = thr->cgpu;
 	
+	if (work)
+		applog(LOG_DEBUG, "%"PRIpreprv": invalid nonce (%08lx) - HW error",
+		       cgpu->proc_repr, (unsigned long)be32toh(bad_nonce));
+	
 	mutex_lock(&stats_lock);
 	hw_errors++;
 	++cgpu->hw_errors;
+	if (work)
+	{
+		++total_bad_nonces;
+		++cgpu->bad_nonces;
+	}
 	mutex_unlock(&stats_lock);
 
 	if (thr->cgpu->drv->hw_error)
@@ -7205,10 +7215,7 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce)
 	
 	if (unlikely(res == TNR_BAD))
 		{
-			struct cgpu_info *cgpu = thr->cgpu;
-			applog(LOG_DEBUG, "%"PRIpreprv": invalid nonce - HW error",
-			       cgpu->proc_repr);
-			inc_hw_errors(thr);
+			inc_hw_errors(thr, work, nonce);
 			ret = false;
 			goto out;
 		}

+ 3 - 1
miner.h

@@ -475,6 +475,7 @@ struct cgpu_info {
 	int accepted;
 	int rejected;
 	int stale;
+	int bad_nonces;
 	int hw_errors;
 	double rolling;
 	double total_mhashes;
@@ -1243,7 +1244,8 @@ struct work {
 };
 
 extern void get_datestamp(char *, struct timeval *);
-extern void inc_hw_errors(struct thr_info *thr);
+extern void inc_hw_errors(struct thr_info *, const struct work *, const uint32_t bad_nonce);
+#define inc_hw_errors_only(thr)  inc_hw_errors(thr, NULL, 0)
 enum test_nonce2_result {
 	TNR_GOOD = 1,
 	TNR_HIGH = 0,