Browse Source

Merge remote-tracking branch 'knc/titan-flush-optimize' into bfgminer

Luke Dashjr 11 years ago
parent
commit
06bb63b94a
5 changed files with 139 additions and 66 deletions
  1. 57 59
      driver-titan.c
  2. 1 1
      knc-asic
  3. 2 2
      miner.c
  4. 65 4
      titan-asic.c
  5. 14 0
      titan-asic.h

+ 57 - 59
driver-titan.c

@@ -85,6 +85,8 @@ struct knc_titan_info {
 	struct cgpu_info *cgpu;
 	int cores;
 	struct knc_titan_die dies[KNC_TITAN_MAX_ASICS][KNC_TITAN_DIES_PER_ASIC];
+	bool asic_served_by_fpga[KNC_TITAN_MAX_ASICS];
+	struct timeval tv_prev;
 
 	struct work *workqueue;
 	int workqueue_size;
@@ -209,7 +211,7 @@ static void knc_titan_clean_flush(const char *repr, void * const ctx, int asic,
 	knc_titan_set_work(repr, ctx, asic, die, core, 0, NULL, true, &unused, &report);
 }
 
-static uint32_t nonce_tops[KNC_TITAN_DIES_PER_ASIC][KNC_TITAN_CORES_PER_DIE];
+static uint32_t nonce_tops[KNC_TITAN_CORES_PER_DIE];
 static bool nonce_tops_inited = false;
 
 static void get_nonce_range(int dieno, int coreno, uint32_t *nonce_bottom, uint32_t *nonce_top)
@@ -220,30 +222,25 @@ static void get_nonce_range(int dieno, int coreno, uint32_t *nonce_bottom, uint3
 		int die, core;
 
 		nonce_f = 0.0;
-		nonce_step = 4294967296.0 / KNC_TITAN_CORES_PER_ASIC;
-
-		for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
-			for (core = 0; core < KNC_TITAN_CORES_PER_DIE; ++core) {
-				nonce_f += nonce_step;
-				if ((core < (KNC_TITAN_CORES_PER_DIE - 1)) || (die < (KNC_TITAN_DIES_PER_ASIC - 1)))
-					top = nonce_f;
-				else
-					top = 0xFFFFFFFF;
-				nonce_tops[die][core] = top;
-			}
+		nonce_step = 4294967296.0 / KNC_TITAN_CORES_PER_DIE;
+
+		for (core = 0; core < KNC_TITAN_CORES_PER_DIE; ++core) {
+			nonce_f += nonce_step;
+			if (core < (KNC_TITAN_CORES_PER_DIE - 1))
+				top = nonce_f;
+			else
+				top = 0xFFFFFFFF;
+			nonce_tops[core] = top;
 		}
 
 		nonce_tops_inited = true;
 	}
 
-	*nonce_top = nonce_tops[dieno][coreno];
+	*nonce_top = nonce_tops[coreno];
 	if (coreno > 0) {
-		*nonce_bottom = nonce_tops[dieno][coreno - 1] + 1;
+		*nonce_bottom = nonce_tops[coreno - 1] + 1;
 		return;
 	}
-	if (dieno > 0) {
-		*nonce_bottom = nonce_tops[dieno - 1][KNC_TITAN_CORES_PER_DIE - 1] + 1;
-	}
 	*nonce_bottom = 0;
 }
 
@@ -278,6 +275,7 @@ static bool configure_one_die(struct knc_titan_info *knc, int asic, int die)
 
 	first_proc = die_p->first_proc;
 	repr = first_proc->device->dev_repr;
+	bool success = true;
 	for (proc = first_proc; proc; proc = proc->next_proc) {
 		mythr = proc->thr[0];
 		knccore = mythr->cgpu_data;
@@ -286,9 +284,10 @@ static bool configure_one_die(struct knc_titan_info *knc, int asic, int die)
 		knc_titan_clean_flush(repr, knc->ctx, knccore->asicno, knccore->dieno, knccore->coreno);
 		get_nonce_range(knccore->dieno, knccore->coreno, &setup_params.nonce_bottom, &setup_params.nonce_top);
 		applog(LOG_DEBUG, "%s[%d:%d:%d]: Setup core, nonces 0x%08X - 0x%08X", repr, knccore->asicno, knccore->dieno, knccore->coreno, setup_params.nonce_bottom, setup_params.nonce_top);
-		knc_titan_setup_core_local(repr, knc->ctx, knccore->asicno, knccore->dieno, knccore->coreno, &setup_params);
+		if (!knc_titan_setup_core_local(repr, knc->ctx, knccore->asicno, knccore->dieno, knccore->coreno, &setup_params))
+			success = false;
 	}
-	applog(LOG_NOTICE, "%s[%d-%d] Die configured", repr, asic, die);
+	applog(LOG_NOTICE, "%s[%d-%d] Die configur%s", repr, asic, die, success ? "ed successfully" : "ation failed");
 	die_p->need_flush = true;
 	timer_set_now(&(die_p->last_share));
 	die_p->broadcast_flushes = false;
@@ -363,6 +362,7 @@ static bool knc_titan_init(struct thr_info * const thr)
 		}
 
 		knc->cores = total_cores;
+		knc->asic_served_by_fpga[asic] = true;
 	}
 
 	cgpu_set_defaults(cgpu);
@@ -372,6 +372,9 @@ static bool knc_titan_init(struct thr_info * const thr)
 
 	knc = cgpu->device_data;
 	for (asic = 0; asic < KNC_TITAN_MAX_ASICS; ++asic) {
+		knc_titan_setup_spi("ASIC", knc->ctx, asic, KNC_TITAN_FPGA_SPI_DIVIDER,
+				    KNC_TITAN_FPGA_SPI_PRECLK, KNC_TITAN_FPGA_SPI_DECLK,
+				    KNC_TITAN_FPGA_SPI_SSLOWMIN);
 		for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
 			configure_one_die(knc, asic, die);
 			knc->dies[asic][die].next_slot = KNC_TITAN_MIN_WORK_SLOT_NUM;
@@ -515,6 +518,7 @@ static void knc_titan_queue_flush(struct thr_info * const thr)
 			for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
 				knc->dies[asic][die].need_flush = true;
 			}
+			knc->asic_served_by_fpga[asic] = true;
 		}
 		timer_set_now(&thr->tv_poll);
 	}
@@ -567,13 +571,16 @@ static void knc_titan_poll(struct thr_info * const thr)
 	int asic;
 	int die;
 	struct knc_titan_die *die_p;
-	struct timeval tv_now, tv_prev;
-	bool any_was_flushed = false;
+	struct timeval tv_now;
+	int num_request_busy;
+	int num_status_byte_error[4];
+	bool fpga_status_checked;
 
 	knc_titan_prune_local_queue(thr);
-	timer_set_now(&tv_prev);
 
 	for (asic = 0; asic < KNC_TITAN_MAX_ASICS; ++asic) {
+                fpga_status_checked = false;
+                num_request_busy = KNC_TITAN_DIES_PER_ASIC;
 		for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
 			die_p = &(knc->dies[asic][die]);
 			if (0 >= die_p->cores)
@@ -594,34 +601,30 @@ static void knc_titan_poll(struct thr_info * const thr)
 							work_accepted = true;
 						}
 					} else {
-						/* Use unicasts */
-						bool work_acc_arr[die_p->cores];
-						struct knc_report reports[die_p->cores];
-						for (proc = first_proc; proc; proc = proc->next_proc) {
-							mythr = proc->thr[0];
-							core1 = mythr->cgpu_data;
-							if ((core1->dieno != die) || (core1->asicno != asic))
-								break;
-							work_acc_arr[core1->coreno] = false;
+						/* Use FPGA accelerated unicasts */
+						if (!fpga_status_checked) {
+							timer_set_now(&knc->tv_prev);
+							knc_titan_get_work_status(first_proc->device->dev_repr, knc->ctx, asic, &num_request_busy, num_status_byte_error);
+							fpga_status_checked = true;
 						}
-						if (knc_titan_set_work_multi(first_proc->device->dev_repr, knc->ctx, asic, die, 0, die_p->next_slot, work, true, work_acc_arr, reports, die_p->cores)) {
-							for (proc = first_proc; proc; proc = proc->next_proc) {
-								mythr = proc->thr[0];
-								core1 = mythr->cgpu_data;
-								if ((core1->dieno != die) || (core1->asicno != asic))
-									break;
-								if (work_acc_arr[core1->coreno]) {
-									/* Submit stale shares just in case we are working with multi-coin pool
-									 * and those shares still might be useful (merged mining case etc) */
-									if (knc_titan_process_report(knc, core1, &(reports[core1->coreno])))
-										timer_set_now(&(die_p->last_share));
-									work_accepted = true;
-								}
+						if (num_request_busy == 0) {
+							if (knc_titan_set_work_parallel(first_proc->device->dev_repr, knc->ctx, asic, 1 << die, 0, die_p->next_slot, work, true, die_p->cores, KNC_TITAN_FPGA_RETRIES)) {
+								work_accepted = true;
 							}
 						}
 					}
 				} else {
-					if (!knc_titan_set_work(first_proc->dev_repr, knc->ctx, asic, die, ALL_CORES, die_p->next_slot, work, false, &work_accepted, &report))
+					if (knc->asic_served_by_fpga[asic]) {
+						knc_titan_get_work_status(first_proc->device->dev_repr, knc->ctx, asic, &num_request_busy, num_status_byte_error);
+						if (num_request_busy == 0) {
+							timer_set_now(&tv_now);
+							double diff = ((tv_now.tv_sec - knc->tv_prev.tv_sec) * 1000000.0 + (tv_now.tv_usec - knc->tv_prev.tv_usec)) / 1000000.0;
+							applog(LOG_INFO, "%s: Flush took %f secs for ASIC %d", knc_titan_drv.dname, diff, asic);
+							applog(LOG_DEBUG, "FPGA CRC error counters: %d %d %d %d", num_status_byte_error[0], num_status_byte_error[1], num_status_byte_error[2], num_status_byte_error[3]);
+							knc->asic_served_by_fpga[asic] = false;
+						}
+					}
+					if (knc->asic_served_by_fpga[asic] || !knc_titan_set_work(first_proc->dev_repr, knc->ctx, asic, die, ALL_CORES, die_p->next_slot, work, false, &work_accepted, &report))
 						work_accepted = false;
 				}
 				knccore = first_proc->thr[0]->cgpu_data;
@@ -629,25 +632,24 @@ static void knc_titan_poll(struct thr_info * const thr)
 					break;
 				bool was_flushed = false;
 				if (die_p->need_flush || need_replace) {
-					struct work *work1, *tmp1;
 					applog(LOG_NOTICE, "%s[%d-%d] Flushing stale works (%s)", first_proc->dev_repr, asic, die,
 					       die_p->need_flush ? "New work" : "Slot collision");
 					die_p->need_flush = false;
 					die_p->first_slot = die_p->next_slot;
-					HASH_ITER(hh, knc->devicework, work1, tmp1) {
-						if ( (asic == ASIC_FROM_WORKID(work1->device_id)) &&
-							 (die == DIE_FROM_WORKID(work1->device_id)) ) {
-							HASH_DEL(knc->devicework, work1);
-							free_work(work1);
-						}
-					}
 					delay_usecs = 0;
 					was_flushed = true;
-					any_was_flushed = true;
 				}
 				--knc->workqueue_size;
 				DL_DELETE(knc->workqueue, work);
 				work->device_id = MAKE_WORKID(asic, die, die_p->next_slot);
+				struct work *replaced_work;
+				struct work *work1, *tmp1;
+				HASH_ITER(hh, knc->devicework, work1, tmp1) {
+					if (work->device_id == work1->device_id) {
+						HASH_DEL(knc->devicework, work1);
+						free_work(work1);
+					}
+				}
 				HASH_ADD(hh, knc->devicework, device_id, sizeof(work->device_id), work);
 				if (++(die_p->next_slot) > KNC_TITAN_MAX_WORK_SLOT_NUM)
 					die_p->next_slot = KNC_TITAN_MIN_WORK_SLOT_NUM;
@@ -663,10 +665,6 @@ static void knc_titan_poll(struct thr_info * const thr)
 
 	applog(LOG_DEBUG, "%s: %d jobs accepted to queue (max=%d)", knc_titan_drv.dname, workaccept, knc->workqueue_max);
 	timer_set_now(&tv_now);
-	if (any_was_flushed) {
-		double diff = ((tv_now.tv_sec - tv_prev.tv_sec) * 1000000.0 + (tv_now.tv_usec - tv_prev.tv_usec)) / 1000000.0;
-		applog(LOG_INFO, "%s: Flush took %f secs", knc_titan_drv.dname, diff);
-	}
 
 	for (asic = 0; asic < KNC_TITAN_MAX_ASICS; ++asic) {
 		for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
@@ -675,7 +673,7 @@ static void knc_titan_poll(struct thr_info * const thr)
 				continue;
 			die_info.cores = die_p->cores; /* core hint */
 			die_info.version = KNC_VERSION_TITAN;
-			if (!knc_titan_get_info(cgpu->dev_repr, knc->ctx, asic, die, &die_info))
+			if (knc->asic_served_by_fpga[asic] || !knc_titan_get_info(cgpu->dev_repr, knc->ctx, asic, die, &die_info))
 				continue;
 			for (proc = die_p->first_proc; proc; proc = proc->next_proc) {
 				mythr = proc->thr[0];

+ 1 - 1
knc-asic

@@ -1 +1 @@
-Subproject commit 6a5c83c4a576a17d5663464e01bb8609b20b0161
+Subproject commit f15761c85e1cf35e28aeab519ef57d857d263fd4

+ 2 - 2
miner.c

@@ -11450,7 +11450,7 @@ void drv_set_defaults(const struct device_drv * const drv, const void *datap, vo
 /* Makes sure the hashmeter keeps going even if mining threads stall, updates
  * the screen at regular intervals, and restarts threads if they appear to have
  * died. */
-#define WATCHDOG_SICK_TIME		60
+#define WATCHDOG_SICK_TIME		300
 #define WATCHDOG_DEAD_TIME		600
 #define WATCHDOG_SICK_COUNT		(WATCHDOG_SICK_TIME/WATCHDOG_INTERVAL)
 #define WATCHDOG_DEAD_COUNT		(WATCHDOG_DEAD_TIME/WATCHDOG_INTERVAL)
@@ -11629,7 +11629,7 @@ void bfg_watchdog(struct cgpu_info * const cgpu, struct timeval * const tvp_now)
 			} else if (cgpu->status == LIFE_WELL && (tvp_now->tv_sec - thr->last.tv_sec > WATCHDOG_SICK_TIME)) {
 				thr->rolling = cgpu->rolling = 0;
 				cgpu->status = LIFE_SICK;
-				applog(LOG_ERR, "%s: Idle for more than 60 seconds, declaring SICK!", dev_str);
+				applog(LOG_ERR, "%s: Idle for more than %d seconds, declaring SICK!", dev_str, WATCHDOG_SICK_TIME);
 				cgtime(&thr->sick);
 
 				dev_error(cgpu, REASON_DEV_SICK_IDLE_60);

+ 65 - 4
titan-asic.c

@@ -147,13 +147,74 @@ bool knc_titan_get_report(const char *repr, void * const ctx, int channel, int d
 	return true;
 }
 
-/* Use bare function without extra checks */
-extern bool knc_titan_setup_core_(void * const ctx, int channel, int die, int core, struct titan_setup_core_params *params);
-
 /* This fails if core is hashing!
  * Stop it before setting up.
  */
 bool knc_titan_setup_core_local(const char *repr, void * const ctx, int channel, int die, int core, struct titan_setup_core_params *params)
 {
-	return knc_titan_setup_core_(ctx, channel, die, core, params);
+	return knc_titan_setup_core_(LOG_INFO, ctx, channel, die, core, params);
+}
+
+bool knc_titan_setup_spi(const char *repr, void * const ctx, int asic, int divider, int preclk, int declk, int sslowmin)
+{
+	uint8_t request[7];
+	int request_length;
+	int status;
+
+	request_length = knc_prepare_titan_setup(request, asic, divider, preclk, declk, sslowmin);
+
+	status = knc_syncronous_transfer_fpga(ctx, request_length, request, 0, NULL);
+	if (status) {
+		applog(LOG_INFO, "%s[%d]: setup_spi failed (%x)", repr, asic, status);
+		return false;
+	}
+
+	return true;
+}
+
+bool knc_titan_set_work_parallel(const char *repr, void * const ctx, int asic, int die, int core_start, int slot, struct work *work, bool urgent, int num, int resend)
+{
+	uint8_t request[9 + BLOCK_HEADER_BYTES_WITHOUT_NONCE];
+	int request_length;
+	int status;
+
+	request_length = knc_prepare_titan_work_request(request, asic, die, slot, core_start, core_start + num - 1, resend, work);
+
+	status = knc_syncronous_transfer_fpga(ctx, request_length, request, 0, NULL);
+	if (status) {
+		applog(LOG_INFO, "%s[%d]: set_work_parallel failed (%x)", repr, asic, status);
+		return false;
+	}
+
+	return true;
+}
+
+bool knc_titan_get_work_status(const char *repr, void * const ctx, int asic, int *num_request_busy, int *num_status_byte_error)
+{
+	uint8_t request[2];
+	int request_length;
+	int response_length = 12;
+	uint8_t response[response_length];
+	int status;
+	uint8_t num_request_busy_byte;
+	uint16_t num_status_byte_error_counters[4];
+
+	request_length = knc_prepare_titan_work_status(request, asic);
+
+	status = knc_syncronous_transfer_fpga(ctx, request_length, request, response_length, response);
+	if (status) {
+		applog(LOG_INFO, "%s[%d]: get_work_status failed (%x)", repr, asic, status);
+		return false;
+	}
+
+	status = knc_decode_work_status(response + 2, &num_request_busy_byte, num_status_byte_error_counters);
+	if (status) {
+		applog(LOG_INFO, "%s[%d]: get_work_status got undefined response", repr, asic);
+		return false;
+	}
+
+	*num_request_busy = num_request_busy_byte;
+	for (int i = 0 ; i < KNC_STATUS_BYTE_ERROR_COUNTERS ; i++)
+		num_status_byte_error[i] = num_status_byte_error_counters[i];
+	return true;
 }

+ 14 - 0
titan-asic.h

@@ -16,6 +16,17 @@
 #define	KNC_TITAN_MIN_WORK_SLOT_NUM	1
 #define	KNC_TITAN_MAX_WORK_SLOT_NUM	15
 
+#define KNC_TITAN_FPGA_SYSCLK_FREQ      24576000
+#define KNC_TITAN_FPGA_SPI_FREQ         6144000
+#define KNC_TITAN_FPGA_SPI_DIVIDER      (KNC_TITAN_FPGA_SYSCLK_FREQ / (2*KNC_TITAN_FPGA_SPI_FREQ) - 1)
+#if KNC_TITAN_FPGA_SYSCLK_FREQ % (2*KNC_TITAN_FPGA_SPI_FREQ) != 0
+#warning Requested SPI frequency could not be accomplished exactly, adjusting as needed
+#endif
+#define KNC_TITAN_FPGA_SPI_PRECLK       7
+#define KNC_TITAN_FPGA_SPI_DECLK        7
+#define KNC_TITAN_FPGA_SPI_SSLOWMIN     15
+#define KNC_TITAN_FPGA_RETRIES          1
+
 struct nonce_report {
 	uint32_t nonce;
 	uint8_t slot;
@@ -26,5 +37,8 @@ bool knc_titan_set_work(const char *repr, void * const ctx, int channel, int die
 bool knc_titan_set_work_multi(const char *repr, void * const ctx, int channel, int die, int core_start, int slot, struct work *work, bool urgent, bool *work_accepted, struct knc_report *reports, int num);
 bool knc_titan_get_report(const char *repr, void * const ctx, int channel, int die, int core, struct knc_report *report);
 bool knc_titan_setup_core_local(const char *repr, void * const ctx, int channel, int die, int core, struct titan_setup_core_params *params);
+bool knc_titan_setup_spi(const char *repr, void * const ctx, int asic, int divider, int preclk, int declk, int sslowmin);
+bool knc_titan_set_work_parallel(const char *repr, void * const ctx, int asic, int die, int core_start, int slot, struct work *work, bool urgent, int num, int resend);
+bool knc_titan_get_work_status(const char *repr, void * const ctx, int asic, int *num_request_busy, int *num_status_byte_error);
 
 #endif /* __TITAN_ASIC_H */