driver-knc.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871
  1. /*
  2. * Copyright 2013 Luke Dashjr
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU General Public License as published by the Free
  6. * Software Foundation; either version 3 of the License, or (at your option)
  7. * any later version. See COPYING for more details.
  8. */
  9. #include "config.h"
  10. #include <stdbool.h>
  11. #include <stddef.h>
  12. #include <stdint.h>
  13. #include <sys/ioctl.h>
  14. #include <sys/types.h>
  15. #include <sys/stat.h>
  16. #include <fcntl.h>
  17. #include <math.h>
  18. #ifdef HAVE_LINUX_I2C_DEV_USER_H
  19. #include <linux/i2c-dev-user.h>
  20. #else
  21. #include <linux/i2c-dev.h>
  22. #endif
  23. #include <linux/spi/spidev.h>
  24. #include <uthash.h>
  25. #include "deviceapi.h"
  26. #include "logging.h"
  27. #include "lowl-spi.h"
  28. #include "miner.h"
  29. #include "util.h"
  30. #define KNC_POLL_INTERVAL_US 10000
  31. #define KNC_SPI_SPEED 3000000
  32. #define KNC_SPI_DELAY 0
  33. #define KNC_SPI_MODE (SPI_CPHA | SPI_CPOL | SPI_CS_HIGH)
  34. #define KNC_SPI_BITS 8
  35. /*
  36. The core disable/enable strategy is as follows:
  37. If a core gets 10 HW errors in a row without doing any proper work
  38. it is disabled for 10 seconds.
  39. When a core gets 10 HW errors the next time it checks when it was enabled
  40. the last time and compare that to when it started to get errors.
  41. If those times are close (50%) the disabled time is doubled,
  42. if not it is just disabled for 10s again.
  43. */
  44. #define KNC_MAX_HWERR_IN_ROW 10
  45. #define KNC_HWERR_DISABLE_SECS (10)
  46. #define KNC_MAX_DISABLE_SECS (15 * 60)
  47. #define KNC_CORES_PER_DIE 0x30
  48. #define KNC_DIE_PER_CHIP 4
  49. #define KNC_CORES_PER_CHIP (KNC_CORES_PER_DIE * KNC_DIE_PER_CHIP)
  50. static const char * const i2cpath = "/dev/i2c-2";
  51. #define KNC_I2C_TEMPLATE "/dev/i2c-%d"
  52. enum knc_request_cmd {
  53. KNC_REQ_SUBMIT_WORK = 2,
  54. KNC_REQ_FLUSH_QUEUE = 3,
  55. };
  56. enum knc_reply_type {
  57. KNC_REPLY_NONCE_FOUND = 1,
  58. KNC_REPLY_WORK_DONE = 2,
  59. };
  60. enum knc_i2c_core_status {
  61. KNC_I2CSTATUS_DISABLED = 2,
  62. KNC_I2CSTATUS_ENABLED = 3,
  63. };
  64. BFG_REGISTER_DRIVER(knc_drv)
  65. static const struct bfg_set_device_definition knc_set_device_funcs[];
  66. struct knc_device {
  67. int i2c;
  68. struct spi_port *spi;
  69. struct cgpu_info *cgpu;
  70. bool need_flush;
  71. struct work *workqueue;
  72. int workqueue_size;
  73. int workqueue_max;
  74. int next_id;
  75. struct work *devicework;
  76. };
  77. struct knc_core {
  78. int asicno;
  79. int coreno;
  80. bool use_dcdc;
  81. float volt;
  82. float current;
  83. int hwerr_in_row;
  84. int hwerr_disable_time;
  85. struct timeval enable_at;
  86. struct timeval first_hwerr;
  87. };
  88. static
  89. bool knc_detect_one(const char *devpath)
  90. {
  91. static struct cgpu_info *prev_cgpu = NULL;
  92. struct cgpu_info *cgpu;
  93. int i;
  94. const int fd = open(i2cpath, O_RDWR);
  95. char *leftover = NULL;
  96. const int i2cslave = strtol(devpath, &leftover, 0);
  97. uint8_t buf[0x20];
  98. if (leftover && leftover[0])
  99. return false;
  100. if (unlikely(fd == -1))
  101. {
  102. applog(LOG_DEBUG, "%s: Failed to open %s", __func__, i2cpath);
  103. return false;
  104. }
  105. if (ioctl(fd, I2C_SLAVE, i2cslave))
  106. {
  107. close(fd);
  108. applog(LOG_DEBUG, "%s: Failed to select i2c slave 0x%x",
  109. __func__, i2cslave);
  110. return false;
  111. }
  112. i = i2c_smbus_read_i2c_block_data(fd, 0, 0x20, buf);
  113. close(fd);
  114. if (-1 == i)
  115. {
  116. applog(LOG_DEBUG, "%s: 0x%x: Failed to read i2c block data",
  117. __func__, i2cslave);
  118. return false;
  119. }
  120. for (i = 0; ; ++i)
  121. {
  122. if (buf[i] == 3)
  123. break;
  124. if (i == 0x1f)
  125. return false;
  126. }
  127. cgpu = malloc(sizeof(*cgpu));
  128. *cgpu = (struct cgpu_info){
  129. .drv = &knc_drv,
  130. .device_path = strdup(devpath),
  131. .set_device_funcs = knc_set_device_funcs,
  132. .deven = DEV_ENABLED,
  133. .procs = KNC_CORES_PER_CHIP,
  134. .threads = prev_cgpu ? 0 : 1,
  135. };
  136. const bool rv = add_cgpu_slave(cgpu, prev_cgpu);
  137. prev_cgpu = cgpu;
  138. return rv;
  139. }
  140. static int knc_detect_auto(void)
  141. {
  142. const int first = 0x20, last = 0x26;
  143. char devpath[4];
  144. int found = 0, i;
  145. for (i = first; i <= last; ++i)
  146. {
  147. sprintf(devpath, "%d", i);
  148. if (knc_detect_one(devpath))
  149. ++found;
  150. }
  151. return found;
  152. }
  153. static void knc_detect(void)
  154. {
  155. generic_detect(&knc_drv, knc_detect_one, knc_detect_auto, GDF_REQUIRE_DNAME | GDF_DEFAULT_NOAUTO);
  156. }
  157. static
  158. bool knc_spi_open(const char *repr, struct spi_port * const spi)
  159. {
  160. const char * const spipath = "/dev/spidev1.0";
  161. const int fd = open(spipath, O_RDWR);
  162. const uint8_t lsbfirst = 0;
  163. if (fd == -1)
  164. return false;
  165. if (ioctl(fd, SPI_IOC_WR_MODE , &spi->mode )) goto fail;
  166. if (ioctl(fd, SPI_IOC_WR_LSB_FIRST , &lsbfirst )) goto fail;
  167. if (ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &spi->bits )) goto fail;
  168. if (ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ , &spi->speed)) goto fail;
  169. spi->fd = fd;
  170. return true;
  171. fail:
  172. close(fd);
  173. spi->fd = -1;
  174. applog(LOG_WARNING, "%s: Failed to open %s", repr, spipath);
  175. return false;
  176. }
  177. #define knc_spi_txrx linux_spi_txrx
  178. static
  179. void knc_clean_flush(struct spi_port * const spi)
  180. {
  181. const uint8_t flushcmd = KNC_REQ_FLUSH_QUEUE << 4;
  182. const size_t spi_req_sz = 0x1000;
  183. spi_clear_buf(spi);
  184. spi_emit_buf(spi, &flushcmd, 1);
  185. spi_emit_nop(spi, spi_req_sz - spi_getbufsz(spi));
  186. applog(LOG_DEBUG, "%s: Issuing flush command to clear out device queues", knc_drv.dname);
  187. spi_txrx(spi);
  188. }
  189. static
  190. bool knc_init(struct thr_info * const thr)
  191. {
  192. const int max_cores = KNC_CORES_PER_CHIP;
  193. struct thr_info *mythr;
  194. struct cgpu_info * const cgpu = thr->cgpu, *proc;
  195. struct knc_device *knc;
  196. struct knc_core *knccore;
  197. struct spi_port *spi;
  198. const int i2c = open(i2cpath, O_RDWR);
  199. int i2cslave, i, j;
  200. uint8_t buf[0x20];
  201. if (unlikely(i2c == -1))
  202. {
  203. applog(LOG_DEBUG, "%s: Failed to open %s", __func__, i2cpath);
  204. return false;
  205. }
  206. knc = malloc(sizeof(*knc));
  207. for (proc = cgpu; proc; )
  208. {
  209. if (proc->device != proc)
  210. {
  211. applog(LOG_WARNING, "%"PRIpreprv": Extra processor?", proc->proc_repr);
  212. proc = proc->next_proc;
  213. continue;
  214. }
  215. i2cslave = atoi(proc->device_path);
  216. if (ioctl(i2c, I2C_SLAVE, i2cslave))
  217. {
  218. applog(LOG_DEBUG, "%s: Failed to select i2c slave 0x%x",
  219. __func__, i2cslave);
  220. return false;
  221. }
  222. for (i = 0; i < max_cores; i += 0x20)
  223. {
  224. i2c_smbus_read_i2c_block_data(i2c, i, 0x20, buf);
  225. for (j = 0; j < 0x20; ++j)
  226. {
  227. mythr = proc->thr[0];
  228. mythr->cgpu_data = knccore = malloc(sizeof(*knccore));
  229. *knccore = (struct knc_core){
  230. .asicno = i2cslave - 0x20,
  231. .coreno = i + j,
  232. .hwerr_in_row = 0,
  233. .hwerr_disable_time = KNC_HWERR_DISABLE_SECS,
  234. .use_dcdc = true,
  235. };
  236. timer_set_now(&knccore->enable_at);
  237. proc->device_data = knc;
  238. switch (buf[j])
  239. {
  240. case KNC_I2CSTATUS_ENABLED:
  241. break;
  242. default: // permanently disabled
  243. proc->deven = DEV_DISABLED;
  244. break;
  245. case KNC_I2CSTATUS_DISABLED:
  246. proc->deven = DEV_RECOVER_DRV;
  247. break;
  248. }
  249. proc = proc->next_proc;
  250. if ((!proc) || proc->device == proc)
  251. goto nomorecores;
  252. }
  253. }
  254. nomorecores: ;
  255. }
  256. spi = malloc(sizeof(*spi));
  257. *knc = (struct knc_device){
  258. .i2c = i2c,
  259. .spi = spi,
  260. .cgpu = cgpu,
  261. .workqueue_max = 1,
  262. };
  263. /* Be careful, read lowl-spi.h comments for warnings */
  264. memset(spi, 0, sizeof(*spi));
  265. spi->txrx = knc_spi_txrx;
  266. spi->cgpu = cgpu;
  267. spi->repr = knc_drv.dname;
  268. spi->logprio = LOG_ERR;
  269. spi->speed = KNC_SPI_SPEED;
  270. spi->delay = KNC_SPI_DELAY;
  271. spi->mode = KNC_SPI_MODE;
  272. spi->bits = KNC_SPI_BITS;
  273. cgpu_set_defaults(cgpu);
  274. if (!knc_spi_open(cgpu->dev_repr, spi))
  275. return false;
  276. knc_clean_flush(spi);
  277. timer_set_now(&thr->tv_poll);
  278. return true;
  279. }
  280. static
  281. void knc_set_queue_full(struct knc_device * const knc)
  282. {
  283. const bool full = (knc->workqueue_size >= knc->workqueue_max);
  284. struct cgpu_info *proc;
  285. for (proc = knc->cgpu; proc; proc = proc->next_proc)
  286. {
  287. struct thr_info * const thr = proc->thr[0];
  288. thr->queue_full = full;
  289. }
  290. }
  291. static
  292. void knc_remove_local_queue(struct knc_device * const knc, struct work * const work)
  293. {
  294. DL_DELETE(knc->workqueue, work);
  295. free_work(work);
  296. --knc->workqueue_size;
  297. }
  298. static
  299. void knc_prune_local_queue(struct thr_info *thr)
  300. {
  301. struct cgpu_info * const cgpu = thr->cgpu;
  302. struct knc_device * const knc = cgpu->device_data;
  303. struct work *work, *tmp;
  304. DL_FOREACH_SAFE(knc->workqueue, work, tmp)
  305. {
  306. if (stale_work(work, false))
  307. knc_remove_local_queue(knc, work);
  308. }
  309. knc_set_queue_full(knc);
  310. }
  311. static
  312. bool knc_queue_append(struct thr_info * const thr, struct work * const work)
  313. {
  314. struct cgpu_info * const cgpu = thr->cgpu;
  315. struct knc_device * const knc = cgpu->device_data;
  316. if (knc->workqueue_size >= knc->workqueue_max)
  317. {
  318. knc_prune_local_queue(thr);
  319. if (thr->queue_full)
  320. return false;
  321. }
  322. DL_APPEND(knc->workqueue, work);
  323. ++knc->workqueue_size;
  324. knc_set_queue_full(knc);
  325. if (thr->queue_full)
  326. knc_prune_local_queue(thr);
  327. return true;
  328. }
  329. #define HASH_LAST_ADDED(head, out) \
  330. (out = (head) ? (ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail)) : NULL)
  331. static
  332. void knc_queue_flush(struct thr_info * const thr)
  333. {
  334. struct cgpu_info * const cgpu = thr->cgpu;
  335. struct knc_device * const knc = cgpu->device_data;
  336. struct work *work, *tmp;
  337. if (knc->cgpu != cgpu)
  338. return;
  339. DL_FOREACH_SAFE(knc->workqueue, work, tmp)
  340. {
  341. knc_remove_local_queue(knc, work);
  342. }
  343. knc_set_queue_full(knc);
  344. HASH_LAST_ADDED(knc->devicework, work);
  345. if (work && stale_work(work, true))
  346. {
  347. knc->need_flush = true;
  348. timer_set_now(&thr->tv_poll);
  349. }
  350. }
  351. static inline
  352. uint16_t get_u16be(const void * const p)
  353. {
  354. const uint8_t * const b = p;
  355. return (((uint16_t)b[0]) << 8) | b[1];
  356. }
  357. static inline
  358. uint32_t get_u32be(const void * const p)
  359. {
  360. const uint8_t * const b = p;
  361. return (((uint32_t)b[0]) << 0x18)
  362. | (((uint32_t)b[1]) << 0x10)
  363. | (((uint32_t)b[2]) << 8)
  364. | b[3];
  365. }
  366. static
  367. void knc_poll(struct thr_info * const thr)
  368. {
  369. struct thr_info *mythr;
  370. struct cgpu_info * const cgpu = thr->cgpu, *proc;
  371. struct knc_device * const knc = cgpu->device_data;
  372. struct spi_port * const spi = knc->spi;
  373. struct knc_core *knccore;
  374. struct work *work, *tmp;
  375. uint8_t buf[0x30], *rxbuf;
  376. int works_sent = 0, asicno, i;
  377. uint16_t workaccept;
  378. int workid = knc->next_id;
  379. uint32_t nonce, coreno;
  380. size_t spi_req_sz = 0x1000;
  381. unsigned long delay_usecs = KNC_POLL_INTERVAL_US;
  382. knc_prune_local_queue(thr);
  383. spi_clear_buf(spi);
  384. if (knc->need_flush)
  385. {
  386. applog(LOG_NOTICE, "%s: Abandoning stale searches to restart", knc_drv.dname);
  387. buf[0] = KNC_REQ_FLUSH_QUEUE << 4;
  388. spi_emit_buf(spi, buf, sizeof(buf));
  389. }
  390. DL_FOREACH(knc->workqueue, work)
  391. {
  392. buf[0] = KNC_REQ_SUBMIT_WORK << 4;
  393. buf[1] = 0;
  394. buf[2] = (workid >> 8) & 0x7f;
  395. buf[3] = workid & 0xff;
  396. for (i = 0; i < 0x20; ++i)
  397. buf[4 + i] = work->midstate[0x1f - i];
  398. for (i = 0; i < 0xc; ++i)
  399. buf[0x24 + i] = work->data[0x4b - i];
  400. spi_emit_buf(spi, buf, sizeof(buf));
  401. ++works_sent;
  402. ++workid;
  403. }
  404. spi_emit_nop(spi, spi_req_sz - spi_getbufsz(spi));
  405. spi_txrx(spi);
  406. rxbuf = spi_getrxbuf(spi);
  407. if (rxbuf[3] & 1)
  408. applog(LOG_DEBUG, "%s: Receive buffer overflow reported", knc_drv.dname);
  409. workaccept = get_u16be(&rxbuf[6]);
  410. applog(LOG_DEBUG, "%s: %lu/%d jobs accepted to queue (max=%d)",
  411. knc_drv.dname, (unsigned long)workaccept, works_sent, knc->workqueue_max);
  412. while (true)
  413. {
  414. rxbuf += 0xc;
  415. spi_req_sz -= 0xc;
  416. if (spi_req_sz < 0xc)
  417. break;
  418. const int rtype = rxbuf[0] >> 6;
  419. if (rtype && opt_debug)
  420. {
  421. char x[(0xc * 2) + 1];
  422. bin2hex(x, rxbuf, 0xc);
  423. applog(LOG_DEBUG, "%s: RECV: %s", knc_drv.dname, x);
  424. }
  425. if (rtype != KNC_REPLY_NONCE_FOUND && rtype != KNC_REPLY_WORK_DONE)
  426. continue;
  427. asicno = (rxbuf[0] & 0x38) >> 3;
  428. coreno = get_u32be(&rxbuf[8]);
  429. proc = cgpu;
  430. while (true)
  431. {
  432. knccore = proc->thr[0]->cgpu_data;
  433. if (knccore->asicno == asicno)
  434. break;
  435. do {
  436. proc = proc->next_proc;
  437. } while(proc != proc->device);
  438. }
  439. for (i = 0; i < coreno; ++i)
  440. proc = proc->next_proc;
  441. mythr = proc->thr[0];
  442. knccore = mythr->cgpu_data;
  443. i = get_u16be(&rxbuf[2]);
  444. HASH_FIND_INT(knc->devicework, &i, work);
  445. if (!work)
  446. {
  447. const char * const msgtype = (rtype == KNC_REPLY_NONCE_FOUND) ? "nonce found" : "work done";
  448. applog(LOG_WARNING, "%"PRIpreprv": Got %s message about unknown work 0x%04x",
  449. proc->proc_repr, msgtype, i);
  450. if (KNC_REPLY_NONCE_FOUND == rtype)
  451. {
  452. nonce = get_u32be(&rxbuf[4]);
  453. nonce = le32toh(nonce);
  454. inc_hw_errors2(mythr, NULL, &nonce);
  455. }
  456. else
  457. inc_hw_errors2(mythr, NULL, NULL);
  458. continue;
  459. }
  460. switch (rtype)
  461. {
  462. case KNC_REPLY_NONCE_FOUND:
  463. nonce = get_u32be(&rxbuf[4]);
  464. nonce = le32toh(nonce);
  465. if (submit_nonce(mythr, work, nonce))
  466. knccore->hwerr_in_row = 0;
  467. break;
  468. case KNC_REPLY_WORK_DONE:
  469. HASH_DEL(knc->devicework, work);
  470. free_work(work);
  471. hashes_done2(mythr, 0x100000000, NULL);
  472. break;
  473. }
  474. }
  475. if (knc->need_flush)
  476. {
  477. knc->need_flush = false;
  478. HASH_ITER(hh, knc->devicework, work, tmp)
  479. {
  480. HASH_DEL(knc->devicework, work);
  481. free_work(work);
  482. }
  483. delay_usecs = 0;
  484. }
  485. if (workaccept)
  486. {
  487. if (workaccept >= knc->workqueue_max)
  488. {
  489. knc->workqueue_max = workaccept;
  490. delay_usecs = 0;
  491. }
  492. DL_FOREACH_SAFE(knc->workqueue, work, tmp)
  493. {
  494. --knc->workqueue_size;
  495. DL_DELETE(knc->workqueue, work);
  496. work->device_id = knc->next_id++ & 0x7fff;
  497. HASH_ADD(hh, knc->devicework, device_id, sizeof(work->device_id), work);
  498. if (!--workaccept)
  499. break;
  500. }
  501. knc_set_queue_full(knc);
  502. }
  503. timer_set_delay_from_now(&thr->tv_poll, delay_usecs);
  504. }
  505. static
  506. bool _knc_core_setstatus(struct thr_info * const thr, uint8_t val)
  507. {
  508. struct cgpu_info * const proc = thr->cgpu;
  509. struct knc_device * const knc = proc->device_data;
  510. struct knc_core * const knccore = thr->cgpu_data;
  511. const int i2c = knc->i2c;
  512. const int i2cslave = 0x20 + knccore->asicno;
  513. if (ioctl(i2c, I2C_SLAVE, i2cslave))
  514. {
  515. applog(LOG_DEBUG, "%"PRIpreprv": %s: Failed to select i2c slave 0x%x",
  516. proc->proc_repr, __func__, i2cslave);
  517. return false;
  518. }
  519. return (-1 != i2c_smbus_write_byte_data(i2c, knccore->coreno, val));
  520. }
  521. static
  522. void knc_core_disable(struct thr_info * const thr)
  523. {
  524. _knc_core_setstatus(thr, 0);
  525. }
  526. static
  527. void knc_core_enable(struct thr_info * const thr)
  528. {
  529. struct knc_core * const knccore = thr->cgpu_data;
  530. timer_set_now(&knccore->enable_at);
  531. _knc_core_setstatus(thr, 1);
  532. }
  533. static
  534. float knc_dcdc_decode_5_11(uint16_t raw)
  535. {
  536. if (raw == 0)
  537. return 0.0;
  538. int dcdc_vin_exp = (raw & 0xf800) >> 11;
  539. float dcdc_vin_man = raw & 0x07ff;
  540. if (dcdc_vin_exp >= 16)
  541. dcdc_vin_exp = -32 + dcdc_vin_exp;
  542. float dcdc_vin = dcdc_vin_man * exp2(dcdc_vin_exp);
  543. return dcdc_vin;
  544. }
  545. static
  546. void knc_hw_error(struct thr_info * const thr)
  547. {
  548. struct cgpu_info * const proc = thr->cgpu;
  549. struct knc_core * const knccore = thr->cgpu_data;
  550. if(knccore->hwerr_in_row == 0)
  551. timer_set_now(&knccore->first_hwerr);
  552. ++knccore->hwerr_in_row;
  553. if (knccore->hwerr_in_row >= KNC_MAX_HWERR_IN_ROW && proc->deven == DEV_ENABLED)
  554. {
  555. struct timeval now;
  556. timer_set_now(&now);
  557. float first_err_dt = tdiff(&now, &knccore->first_hwerr);
  558. float enable_dt = tdiff(&now, &knccore->enable_at);
  559. if(first_err_dt * 1.5 > enable_dt)
  560. {
  561. // didn't really do much good
  562. knccore->hwerr_disable_time *= 2;
  563. if (knccore->hwerr_disable_time > KNC_MAX_DISABLE_SECS)
  564. knccore->hwerr_disable_time = KNC_MAX_DISABLE_SECS;
  565. }
  566. else
  567. knccore->hwerr_disable_time = KNC_HWERR_DISABLE_SECS;
  568. proc->deven = DEV_RECOVER_DRV;
  569. applog(LOG_WARNING, "%"PRIpreprv": Disabled. %d hwerr in %.3f / %.3f . disabled %d s",
  570. proc->proc_repr, knccore->hwerr_in_row,
  571. enable_dt, first_err_dt, knccore->hwerr_disable_time);
  572. timer_set_delay_from_now(&knccore->enable_at, knccore->hwerr_disable_time * 1000000);
  573. }
  574. }
  575. static
  576. bool knc_get_stats(struct cgpu_info * const cgpu)
  577. {
  578. if (cgpu->device != cgpu)
  579. return true;
  580. struct thr_info *thr = cgpu->thr[0];
  581. struct knc_core *knccore = thr->cgpu_data;
  582. struct cgpu_info *proc;
  583. const int i2cdev = knccore->asicno + 3;
  584. const int i2cslave_temp = 0x48;
  585. const int i2cslave_dcdc[] = {0x10, 0x12, 0x14, 0x17};
  586. int die, i;
  587. int i2c;
  588. int32_t rawtemp, rawvolt, rawcurrent;
  589. float temp, volt, current;
  590. struct timeval tv_now;
  591. bool rv = false;
  592. char i2cpath[sizeof(KNC_I2C_TEMPLATE)];
  593. sprintf(i2cpath, KNC_I2C_TEMPLATE, i2cdev);
  594. i2c = open(i2cpath, O_RDWR);
  595. if (i2c == -1)
  596. {
  597. applog(LOG_DEBUG, "%s: %s: Failed to open %s",
  598. cgpu->dev_repr, __func__, i2cpath);
  599. return false;
  600. }
  601. if (ioctl(i2c, I2C_SLAVE, i2cslave_temp))
  602. {
  603. applog(LOG_DEBUG, "%s: %s: Failed to select i2c slave 0x%x",
  604. cgpu->dev_repr, __func__, i2cslave_temp);
  605. goto out;
  606. }
  607. rawtemp = i2c_smbus_read_word_data(i2c, 0);
  608. if (rawtemp == -1)
  609. goto out;
  610. temp = ((float)(rawtemp & 0xff));
  611. if (rawtemp & 0x8000)
  612. temp += 0.5;
  613. /* DCDC i2c slaves are on 0x10 + [0-7]
  614. 8 DCDC boards have all populated
  615. 4 DCDC boards only have 0,2,4,7 populated
  616. Only 0,2,4,7 are used
  617. Each DCDC powers one die in the chip, each die has 48 cores
  618. Datasheet at http://www.lineagepower.com/oem/pdf/MDT040A0X.pdf
  619. */
  620. timer_set_now(&tv_now);
  621. volt = current = 0;
  622. for (proc = cgpu, i = 0; proc && proc->device == cgpu; proc = proc->next_proc, ++i)
  623. {
  624. thr = proc->thr[0];
  625. knccore = thr->cgpu_data;
  626. die = i / KNC_CORES_PER_DIE;
  627. if (0 == i % KNC_CORES_PER_DIE && knccore->use_dcdc)
  628. {
  629. if (ioctl(i2c, I2C_SLAVE, i2cslave_dcdc[die]))
  630. {
  631. applog(LOG_DEBUG, "%s: %s: Failed to select i2c slave 0x%x",
  632. cgpu->dev_repr, __func__, i2cslave_dcdc[die]);
  633. goto out;
  634. }
  635. rawvolt = i2c_smbus_read_word_data(i2c, 0x8b); // VOUT
  636. if (rawvolt == -1)
  637. goto out;
  638. rawcurrent = i2c_smbus_read_word_data(i2c, 0x8c); // IOUT
  639. if (rawcurrent == -1)
  640. goto out;
  641. volt = (float)rawvolt * exp2(-10);
  642. current = (float)knc_dcdc_decode_5_11(rawcurrent);
  643. applog(LOG_DEBUG, "%s: die %d %6.3fV %5.2fA",
  644. cgpu->dev_repr, die, volt, current);
  645. }
  646. proc->temp = temp;
  647. knccore->volt = volt;
  648. knccore->current = current;
  649. // NOTE: We need to check _mt_disable_called because otherwise enabling won't assert it to i2c (it's false when getting stats for eg proc 0 before proc 1+ haven't initialised completely yet)
  650. if (proc->deven == DEV_RECOVER_DRV && timer_passed(&knccore->enable_at, &tv_now) && thr->_mt_disable_called)
  651. {
  652. knccore->hwerr_in_row = 0;
  653. proc_enable(proc);
  654. }
  655. }
  656. rv = true;
  657. out:
  658. close(i2c);
  659. return rv;
  660. }
  661. static
  662. struct api_data *knc_api_extra_device_status(struct cgpu_info * const cgpu)
  663. {
  664. struct api_data *root = NULL;
  665. struct thr_info * const thr = cgpu->thr[0];
  666. struct knc_core * const knccore = thr->cgpu_data;
  667. if (knccore->use_dcdc)
  668. {
  669. root = api_add_volts(root, "Voltage", &knccore->volt, false);
  670. root = api_add_volts(root, "DCDC Current", &knccore->current, false);
  671. }
  672. return root;
  673. }
  674. #ifdef HAVE_CURSES
  675. static
  676. void knc_wlogprint_status(struct cgpu_info * const cgpu)
  677. {
  678. struct thr_info * const thr = cgpu->thr[0];
  679. struct knc_core * const knccore = thr->cgpu_data;
  680. if (knccore->use_dcdc)
  681. wlogprint("Voltage: %.3f DCDC Current: %.3f\n",
  682. knccore->volt, knccore->current);
  683. }
  684. #endif
  685. static
  686. const char *knc_set_use_dcdc(struct cgpu_info *proc, const char * const optname, const char * const newvalue, char * const replybuf, enum bfg_set_device_replytype * const out_success)
  687. {
  688. int core_index_on_die = proc->proc_id % KNC_CORES_PER_DIE;
  689. bool nv;
  690. char *end;
  691. nv = bfg_strtobool(newvalue, &end, 0);
  692. if (!(newvalue[0] && !end[0]))
  693. return "Usage: use_dcdc=yes/no";
  694. if (core_index_on_die)
  695. {
  696. const int seek = (proc->proc_id / KNC_CORES_PER_DIE) * KNC_CORES_PER_DIE;
  697. proc = proc->device;
  698. for (int i = 0; i < seek; ++i)
  699. proc = proc->next_proc;
  700. }
  701. {
  702. struct thr_info * const mythr = proc->thr[0];
  703. struct knc_core * const knccore = mythr->cgpu_data;
  704. if (knccore->use_dcdc == nv)
  705. return NULL;
  706. }
  707. for (int i = 0; i < KNC_CORES_PER_DIE; (proc = proc->next_proc), ++i)
  708. {
  709. struct thr_info * const mythr = proc->thr[0];
  710. struct knc_core * const knccore = mythr->cgpu_data;
  711. knccore->use_dcdc = nv;
  712. }
  713. return NULL;
  714. }
  715. static const struct bfg_set_device_definition knc_set_device_funcs[] = {
  716. {"use_dcdc", knc_set_use_dcdc, "whether to access DCDC module for voltage/current information"},
  717. {NULL}
  718. };
  719. struct device_drv knc_drv = {
  720. .dname = "knc",
  721. .name = "KNC",
  722. .drv_detect = knc_detect,
  723. .thread_init = knc_init,
  724. .thread_disable = knc_core_disable,
  725. .thread_enable = knc_core_enable,
  726. .minerloop = minerloop_queue,
  727. .queue_append = knc_queue_append,
  728. .queue_flush = knc_queue_flush,
  729. .poll = knc_poll,
  730. .hw_error = knc_hw_error,
  731. .get_stats = knc_get_stats,
  732. .get_api_extra_device_status = knc_api_extra_device_status,
  733. #ifdef HAVE_CURSES
  734. .proc_wlogprint_status = knc_wlogprint_status,
  735. #endif
  736. };