Browse Source

Titan flush optimization: First rough implementation

Pelle Nilsson 11 years ago
parent
commit
8f6574793a
3 changed files with 104 additions and 14 deletions
  1. 29 14
      driver-titan.c
  2. 61 0
      titan-asic.c
  3. 14 0
      titan-asic.h

+ 29 - 14
driver-titan.c

@@ -85,6 +85,7 @@ struct knc_titan_info {
 	struct cgpu_info *cgpu;
 	struct cgpu_info *cgpu;
 	int cores;
 	int cores;
 	struct knc_titan_die dies[KNC_TITAN_MAX_ASICS][KNC_TITAN_DIES_PER_ASIC];
 	struct knc_titan_die dies[KNC_TITAN_MAX_ASICS][KNC_TITAN_DIES_PER_ASIC];
+	bool asic_served_by_fpga[KNC_TITAN_MAX_ASICS];
 
 
 	struct work *workqueue;
 	struct work *workqueue;
 	int workqueue_size;
 	int workqueue_size;
@@ -360,6 +361,7 @@ static bool knc_titan_init(struct thr_info * const thr)
 		}
 		}
 
 
 		knc->cores = total_cores;
 		knc->cores = total_cores;
+		knc->asic_served_by_fpga[asic] = true;
 	}
 	}
 
 
 	cgpu_set_defaults(cgpu);
 	cgpu_set_defaults(cgpu);
@@ -369,6 +371,9 @@ static bool knc_titan_init(struct thr_info * const thr)
 
 
 	knc = cgpu->device_data;
 	knc = cgpu->device_data;
 	for (asic = 0; asic < KNC_TITAN_MAX_ASICS; ++asic) {
 	for (asic = 0; asic < KNC_TITAN_MAX_ASICS; ++asic) {
+		knc_titan_setup_spi("ASIC", knc->ctx, asic, KNC_TITAN_FPGA_SPI_DIVIDER,
+				    KNC_TITAN_FPGA_SPI_PRECLK, KNC_TITAN_FPGA_SPI_DECLK,
+				    KNC_TITAN_FPGA_SPI_SSLOWMIN);
 		for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
 		for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
 			configure_one_die(knc, asic, die);
 			configure_one_die(knc, asic, die);
 			knc->dies[asic][die].next_slot = KNC_TITAN_MIN_WORK_SLOT_NUM;
 			knc->dies[asic][die].next_slot = KNC_TITAN_MIN_WORK_SLOT_NUM;
@@ -514,6 +519,7 @@ static void knc_titan_queue_flush(struct thr_info * const thr)
 			for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
 			for (die = 0; die < KNC_TITAN_DIES_PER_ASIC; ++die) {
 				knc->dies[asic][die].need_flush = true;
 				knc->dies[asic][die].need_flush = true;
 			}
 			}
+			knc->asic_served_by_fpga[asic] = true;
 		}
 		}
 		timer_set_now(&thr->tv_poll);
 		timer_set_now(&thr->tv_poll);
 	}
 	}
@@ -568,6 +574,7 @@ static void knc_titan_poll(struct thr_info * const thr)
 	struct knc_titan_die *die_p;
 	struct knc_titan_die *die_p;
 	struct timeval tv_now, tv_prev;
 	struct timeval tv_now, tv_prev;
 	bool any_was_flushed = false;
 	bool any_was_flushed = false;
+	int num_request_busy;
 
 
 	knc_titan_prune_local_queue(thr);
 	knc_titan_prune_local_queue(thr);
 	timer_set_now(&tv_prev);
 	timer_set_now(&tv_prev);
@@ -595,7 +602,6 @@ static void knc_titan_poll(struct thr_info * const thr)
 					} else {
 					} else {
 						/* Use unicasts */
 						/* Use unicasts */
 						bool work_acc_arr[die_p->cores];
 						bool work_acc_arr[die_p->cores];
-						struct knc_report reports[die_p->cores];
 						for (proc = first_proc; proc; proc = proc->next_proc) {
 						for (proc = first_proc; proc; proc = proc->next_proc) {
 							mythr = proc->thr[0];
 							mythr = proc->thr[0];
 							core1 = mythr->cgpu_data;
 							core1 = mythr->cgpu_data;
@@ -603,24 +609,33 @@ static void knc_titan_poll(struct thr_info * const thr)
 								break;
 								break;
 							work_acc_arr[core1->coreno] = false;
 							work_acc_arr[core1->coreno] = false;
 						}
 						}
-						if (knc_titan_set_work_multi(first_proc->device->dev_repr, knc->ctx, asic, die, 0, die_p->next_slot, work, true, work_acc_arr, reports, die_p->cores)) {
-							for (proc = first_proc; proc; proc = proc->next_proc) {
-								mythr = proc->thr[0];
-								core1 = mythr->cgpu_data;
-								if ((core1->dieno != die) || (core1->asicno != asic))
-									break;
-								if (work_acc_arr[core1->coreno]) {
-									/* Submit stale shares just in case we are working with multi-coin pool
-									 * and those shares still might be useful (merged mining case etc) */
-									if (knc_titan_process_report(knc, core1, &(reports[core1->coreno])))
-										timer_set_now(&(die_p->last_share));
-									work_accepted = true;
+						knc_titan_get_work_status(first_proc->device->dev_repr, knc->ctx, asic, &num_request_busy);
+						if (num_request_busy == 0) {
+							if (knc_titan_set_work_parallel(first_proc->device->dev_repr, knc->ctx, asic, 1 << die, 0, die_p->next_slot, work, true, work_acc_arr, die_p->cores, KNC_TITAN_FPGA_RETRIES)) {
+								work_accepted = true;
+								for (proc = first_proc; proc; proc = proc->next_proc) {
+									mythr = proc->thr[0];
+									core1 = mythr->cgpu_data;
+									if ((core1->dieno != die) || (core1->asicno != asic))
+										break;
+									if (work_acc_arr[core1->coreno]) {
+										/* Submit stale shares just in case we are working with multi-coin pool
+										 * and those shares still might be useful (merged mining case etc) */
+										/* if (knc_titan_process_report(knc, core1, &(reports[core1->coreno]))) */
+										/* 	timer_set_now(&(die_p->last_share)); */
+										work_accepted = true;
+									}
 								}
 								}
 							}
 							}
 						}
 						}
 					}
 					}
 				} else {
 				} else {
-					if (!knc_titan_set_work(first_proc->dev_repr, knc->ctx, asic, die, ALL_CORES, die_p->next_slot, work, false, &work_accepted, &report))
+					if (knc->asic_served_by_fpga[asic]) {
+						knc_titan_get_work_status(first_proc->device->dev_repr, knc->ctx, asic, &num_request_busy);
+						if (num_request_busy == 0)
+							knc->asic_served_by_fpga[asic] = false;
+					}
+					if (knc->asic_served_by_fpga[asic] || !knc_titan_set_work(first_proc->dev_repr, knc->ctx, asic, die, ALL_CORES, die_p->next_slot, work, false, &work_accepted, &report))
 						work_accepted = false;
 						work_accepted = false;
 				}
 				}
 				knccore = first_proc->thr[0]->cgpu_data;
 				knccore = first_proc->thr[0]->cgpu_data;

+ 61 - 0
titan-asic.c

@@ -154,3 +154,64 @@ bool knc_titan_setup_core_local(const char *repr, void * const ctx, int channel,
 {
 {
 	return knc_titan_setup_core_(LOG_INFO, ctx, channel, die, core, params);
 	return knc_titan_setup_core_(LOG_INFO, ctx, channel, die, core, params);
 }
 }
+
+bool knc_titan_setup_spi(const char *repr, void * const ctx, int asic, int divider, int preclk, int declk, int sslowmin)
+{
+	uint8_t request[7];
+	int request_length;
+	int status;
+
+	request_length = knc_prepare_titan_setup(request, asic, divider, preclk, declk, sslowmin);
+
+	status = knc_syncronous_transfer_fpga(ctx, request_length, request, 0, NULL);
+	if (status) {
+		applog(LOG_INFO, "%s[%d]: setup_spi failed (%x)", repr, asic, status);
+		return false;
+	}
+
+	return true;
+}
+
+bool knc_titan_set_work_parallel(const char *repr, void * const ctx, int asic, int die, int core_start, int slot, struct work *work, bool urgent, bool *work_accepted, int num, int resend)
+{
+	uint8_t request[9 + BLOCK_HEADER_BYTES_WITHOUT_NONCE];
+	int request_length;
+	int status;
+
+	request_length = knc_prepare_titan_work_request(request, asic, die, slot, core_start, core_start + num - 1, resend, work);
+
+	status = knc_syncronous_transfer_fpga(ctx, request_length, request, 0, NULL);
+	if (status) {
+		applog(LOG_INFO, "%s[%d]: set_work_parallel failed (%x)", repr, asic, status);
+		return false;
+	}
+
+	return true;
+}
+
+bool knc_titan_get_work_status(const char *repr, void * const ctx, int asic, int *num_request_busy)
+{
+	uint8_t request[2];
+	int request_length;
+	int response_length = 4;
+	uint8_t response[response_length];
+	int status;
+	uint8_t num_request_busy_byte;
+
+	request_length = knc_prepare_titan_work_status(request, asic);
+
+	status = knc_syncronous_transfer_fpga(ctx, request_length, request, response_length, response);
+	if (status) {
+		applog(LOG_INFO, "%s[%d]: get_work_status failed (%x)", repr, asic, status);
+		return false;
+	}
+
+	status = knc_decode_work_status(response + 2, &num_request_busy_byte);
+	if (status) {
+		applog(LOG_INFO, "%s[%d]: get_work_status got undefined response", repr, asic);
+		return false;
+	}
+
+	*num_request_busy = num_request_busy_byte;
+	return true;
+}

+ 14 - 0
titan-asic.h

@@ -16,6 +16,17 @@
 #define	KNC_TITAN_MIN_WORK_SLOT_NUM	1
 #define	KNC_TITAN_MIN_WORK_SLOT_NUM	1
 #define	KNC_TITAN_MAX_WORK_SLOT_NUM	15
 #define	KNC_TITAN_MAX_WORK_SLOT_NUM	15
 
 
+#define KNC_TITAN_FPGA_SYSCLK_FREQ      24576000
+#define KNC_TITAN_FPGA_SPI_FREQ         3072000
+#define KNC_TITAN_FPGA_SPI_DIVIDER      (KNC_TITAN_FPGA_SYSCLK_FREQ / (2*KNC_TITAN_FPGA_SPI_FREQ) - 1)
+#if KNC_TITAN_FPGA_SYSCLK_FREQ % (2*KNC_TITAN_FPGA_SPI_FREQ) != 0
+#warning Requested SPI frequency could not be accomplished exactly, adjusting as needed
+#endif
+#define KNC_TITAN_FPGA_SPI_PRECLK       7
+#define KNC_TITAN_FPGA_SPI_DECLK        7
+#define KNC_TITAN_FPGA_SPI_SSLOWMIN     15
+#define KNC_TITAN_FPGA_RETRIES          1
+
 struct nonce_report {
 struct nonce_report {
 	uint32_t nonce;
 	uint32_t nonce;
 	uint8_t slot;
 	uint8_t slot;
@@ -26,5 +37,8 @@ bool knc_titan_set_work(const char *repr, void * const ctx, int channel, int die
 bool knc_titan_set_work_multi(const char *repr, void * const ctx, int channel, int die, int core_start, int slot, struct work *work, bool urgent, bool *work_accepted, struct knc_report *reports, int num);
 bool knc_titan_set_work_multi(const char *repr, void * const ctx, int channel, int die, int core_start, int slot, struct work *work, bool urgent, bool *work_accepted, struct knc_report *reports, int num);
 bool knc_titan_get_report(const char *repr, void * const ctx, int channel, int die, int core, struct knc_report *report);
 bool knc_titan_get_report(const char *repr, void * const ctx, int channel, int die, int core, struct knc_report *report);
 bool knc_titan_setup_core_local(const char *repr, void * const ctx, int channel, int die, int core, struct titan_setup_core_params *params);
 bool knc_titan_setup_core_local(const char *repr, void * const ctx, int channel, int die, int core, struct titan_setup_core_params *params);
+bool knc_titan_setup_spi(const char *repr, void * const ctx, int asic, int divider, int preclk, int declk, int sslowmin);
+bool knc_titan_set_work_parallel(const char *repr, void * const ctx, int asic, int die, int core_start, int slot, struct work *work, bool urgent, bool *work_accepted, int num, int resend);
+bool knc_titan_get_work_status(const char *repr, void * const ctx, int asic, int *num_request_busy);
 
 
 #endif /* __TITAN_ASIC_H */
 #endif /* __TITAN_ASIC_H */