driver-knc.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829
  1. /*
  2. * Copyright 2013 Luke Dashjr
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU General Public License as published by the Free
  6. * Software Foundation; either version 3 of the License, or (at your option)
  7. * any later version. See COPYING for more details.
  8. */
  9. #include "config.h"
  10. #include <stdbool.h>
  11. #include <stddef.h>
  12. #include <stdint.h>
  13. #include <sys/ioctl.h>
  14. #include <sys/types.h>
  15. #include <sys/stat.h>
  16. #include <fcntl.h>
  17. #include <math.h>
  18. #ifdef HAVE_LINUX_I2C_DEV_USER_H
  19. #include <linux/i2c-dev-user.h>
  20. #else
  21. #include <linux/i2c-dev.h>
  22. #endif
  23. #include <linux/spi/spidev.h>
  24. #include <uthash.h>
  25. #include "deviceapi.h"
  26. #include "logging.h"
  27. #include "miner.h"
  28. #include "spidevc.h"
  29. #define KNC_POLL_INTERVAL_US 10000
  30. #define KNC_SPI_SPEED 3000000
  31. #define KNC_SPI_DELAY 0
  32. #define KNC_SPI_MODE (SPI_CPHA | SPI_CPOL | SPI_CS_HIGH)
  33. #define KNC_SPI_BITS 8
  34. /*
  35. The core disable/enable strategy is as follows:
  36. If a core gets 10 HW errors in a row without doing any proper work
  37. it is disabled for 10 seconds.
  38. When a core gets 10 HW errors the next time it checks when it was enabled
  39. the last time and compare that to when it started to get errors.
  40. If those times are close (50%) the disabled time is doubled,
  41. if not it is just disabled for 10s again.
  42. */
  43. #define KNC_MAX_HWERR_IN_ROW 10
  44. #define KNC_HWERR_DISABLE_SECS (10)
  45. #define KNC_MAX_DISABLE_SECS (15 * 60)
  46. static const char * const i2cpath = "/dev/i2c-2";
  47. #define KNC_I2C_TEMPLATE "/dev/i2c-%d"
  48. enum knc_request_cmd {
  49. KNC_REQ_SUBMIT_WORK = 2,
  50. KNC_REQ_FLUSH_QUEUE = 3,
  51. };
  52. enum knc_reply_type {
  53. KNC_REPLY_NONCE_FOUND = 1,
  54. KNC_REPLY_WORK_DONE = 2,
  55. };
  56. enum knc_i2c_core_status {
  57. KNC_I2CSTATUS_DISABLED = 2,
  58. KNC_I2CSTATUS_ENABLED = 3,
  59. };
  60. BFG_REGISTER_DRIVER(knc_drv)
  61. struct knc_device {
  62. int i2c;
  63. struct spi_port *spi;
  64. struct cgpu_info *cgpu;
  65. bool need_flush;
  66. struct work *workqueue;
  67. int workqueue_size;
  68. int workqueue_max;
  69. int next_id;
  70. struct work *devicework;
  71. };
  72. struct knc_core {
  73. int asicno;
  74. int coreno;
  75. float volt;
  76. float current;
  77. int hwerr_in_row;
  78. int hwerr_disable_time;
  79. struct timeval enable_at;
  80. struct timeval first_hwerr;
  81. };
  82. static
  83. bool knc_detect_one(const char *devpath)
  84. {
  85. static struct cgpu_info *prev_cgpu = NULL;
  86. struct cgpu_info *cgpu;
  87. int i;
  88. const int fd = open(i2cpath, O_RDWR);
  89. char *leftover = NULL;
  90. const int i2cslave = strtol(devpath, &leftover, 0);
  91. uint8_t buf[0x20];
  92. if (leftover && leftover[0])
  93. return false;
  94. if (unlikely(fd == -1))
  95. {
  96. applog(LOG_DEBUG, "%s: Failed to open %s", __func__, i2cpath);
  97. return false;
  98. }
  99. if (ioctl(fd, I2C_SLAVE, i2cslave))
  100. {
  101. close(fd);
  102. applog(LOG_DEBUG, "%s: Failed to select i2c slave 0x%x",
  103. __func__, i2cslave);
  104. return false;
  105. }
  106. i = i2c_smbus_read_i2c_block_data(fd, 0, 0x20, buf);
  107. close(fd);
  108. if (-1 == i)
  109. {
  110. applog(LOG_DEBUG, "%s: 0x%x: Failed to read i2c block data",
  111. __func__, i2cslave);
  112. return false;
  113. }
  114. for (i = 0; ; ++i)
  115. {
  116. if (buf[i] == 3)
  117. break;
  118. if (i == 0x1f)
  119. return false;
  120. }
  121. cgpu = malloc(sizeof(*cgpu));
  122. *cgpu = (struct cgpu_info){
  123. .drv = &knc_drv,
  124. .device_path = strdup(devpath),
  125. .deven = DEV_ENABLED,
  126. .procs = 192,
  127. .threads = prev_cgpu ? 0 : 1,
  128. };
  129. const bool rv = add_cgpu_slave(cgpu, prev_cgpu);
  130. prev_cgpu = cgpu;
  131. return rv;
  132. }
  133. static int knc_detect_auto(void)
  134. {
  135. const int first = 0x20, last = 0x26;
  136. char devpath[4];
  137. int found = 0, i;
  138. for (i = first; i <= last; ++i)
  139. {
  140. sprintf(devpath, "%d", i);
  141. if (knc_detect_one(devpath))
  142. ++found;
  143. }
  144. return found;
  145. }
  146. static void knc_detect(void)
  147. {
  148. generic_detect(&knc_drv, knc_detect_one, knc_detect_auto, GDF_REQUIRE_DNAME | GDF_DEFAULT_NOAUTO);
  149. }
  150. static
  151. bool knc_spi_open(const char *repr, struct spi_port * const spi)
  152. {
  153. const char * const spipath = "/dev/spidev1.0";
  154. const int fd = open(spipath, O_RDWR);
  155. const uint8_t lsbfirst = 0;
  156. if (fd == -1)
  157. return false;
  158. if (ioctl(fd, SPI_IOC_WR_MODE , &spi->mode )) goto fail;
  159. if (ioctl(fd, SPI_IOC_WR_LSB_FIRST , &lsbfirst )) goto fail;
  160. if (ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &spi->bits )) goto fail;
  161. if (ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ , &spi->speed)) goto fail;
  162. spi->fd = fd;
  163. return true;
  164. fail:
  165. close(fd);
  166. spi->fd = -1;
  167. applog(LOG_WARNING, "%s: Failed to open %s", repr, spipath);
  168. return false;
  169. }
  170. static
  171. bool knc_spi_txrx(struct spi_port * const spi)
  172. {
  173. const void * const wrbuf = spi_gettxbuf(spi);
  174. void * const rdbuf = spi_getrxbuf(spi);
  175. const size_t bufsz = spi_getbufsz(spi);
  176. const int fd = spi->fd;
  177. struct spi_ioc_transfer xf = {
  178. .tx_buf = (uintptr_t) wrbuf,
  179. .rx_buf = (uintptr_t) rdbuf,
  180. .len = bufsz,
  181. .delay_usecs = spi->delay,
  182. .speed_hz = spi->speed,
  183. .bits_per_word = spi->bits,
  184. };
  185. return (ioctl(fd, SPI_IOC_MESSAGE(1), &xf) > 0);
  186. }
  187. static
  188. void knc_clean_flush(struct spi_port * const spi)
  189. {
  190. const uint8_t flushcmd = KNC_REQ_FLUSH_QUEUE << 4;
  191. const size_t spi_req_sz = 0x1000;
  192. spi_clear_buf(spi);
  193. spi_emit_buf(spi, &flushcmd, 1);
  194. spi_emit_nop(spi, spi_req_sz - spi_getbufsz(spi));
  195. applog(LOG_DEBUG, "%s: Issuing flush command to clear out device queues", knc_drv.dname);
  196. spi_txrx(spi);
  197. }
  198. static
  199. bool knc_init(struct thr_info * const thr)
  200. {
  201. const int max_cores = 192;
  202. struct thr_info *mythr;
  203. struct cgpu_info * const cgpu = thr->cgpu, *proc;
  204. struct knc_device *knc;
  205. struct knc_core *knccore;
  206. struct spi_port *spi;
  207. const int i2c = open(i2cpath, O_RDWR);
  208. int i2cslave, i, j;
  209. uint8_t buf[0x20];
  210. if (unlikely(i2c == -1))
  211. {
  212. applog(LOG_DEBUG, "%s: Failed to open %s", __func__, i2cpath);
  213. return false;
  214. }
  215. knc = malloc(sizeof(*knc));
  216. for (proc = cgpu; proc; )
  217. {
  218. if (proc->device != proc)
  219. {
  220. applog(LOG_WARNING, "%"PRIpreprv": Extra processor?", proc->proc_repr);
  221. proc = proc->next_proc;
  222. continue;
  223. }
  224. i2cslave = atoi(proc->device_path);
  225. if (ioctl(i2c, I2C_SLAVE, i2cslave))
  226. {
  227. applog(LOG_DEBUG, "%s: Failed to select i2c slave 0x%x",
  228. __func__, i2cslave);
  229. return false;
  230. }
  231. for (i = 0; i < max_cores; i += 0x20)
  232. {
  233. i2c_smbus_read_i2c_block_data(i2c, i, 0x20, buf);
  234. for (j = 0; j < 0x20; ++j)
  235. {
  236. mythr = proc->thr[0];
  237. mythr->cgpu_data = knccore = malloc(sizeof(*knccore));
  238. *knccore = (struct knc_core){
  239. .asicno = i2cslave - 0x20,
  240. .coreno = i + j,
  241. .hwerr_in_row = 0,
  242. .hwerr_disable_time = KNC_HWERR_DISABLE_SECS,
  243. };
  244. timer_set_now(&knccore->enable_at);
  245. proc->device_data = knc;
  246. switch (buf[j])
  247. {
  248. case KNC_I2CSTATUS_ENABLED:
  249. break;
  250. default: // permanently disabled
  251. proc->deven = DEV_DISABLED;
  252. break;
  253. case KNC_I2CSTATUS_DISABLED:
  254. proc->deven = DEV_RECOVER_DRV;
  255. break;
  256. }
  257. proc = proc->next_proc;
  258. if ((!proc) || proc->device == proc)
  259. goto nomorecores;
  260. }
  261. }
  262. nomorecores: ;
  263. }
  264. spi = malloc(sizeof(*spi));
  265. *knc = (struct knc_device){
  266. .i2c = i2c,
  267. .spi = spi,
  268. .cgpu = cgpu,
  269. .workqueue_max = 1,
  270. };
  271. /* Be careful, read spidevc.h comments for warnings */
  272. memset(spi, 0, sizeof(*spi));
  273. spi->txrx = knc_spi_txrx;
  274. spi->cgpu = cgpu;
  275. spi->repr = knc_drv.dname;
  276. spi->logprio = LOG_ERR;
  277. spi->speed = KNC_SPI_SPEED;
  278. spi->delay = KNC_SPI_DELAY;
  279. spi->mode = KNC_SPI_MODE;
  280. spi->bits = KNC_SPI_BITS;
  281. if (!knc_spi_open(cgpu->dev_repr, spi))
  282. return false;
  283. knc_clean_flush(spi);
  284. timer_set_now(&thr->tv_poll);
  285. return true;
  286. }
  287. static
  288. void knc_set_queue_full(struct knc_device * const knc)
  289. {
  290. const bool full = (knc->workqueue_size >= knc->workqueue_max);
  291. struct cgpu_info *proc;
  292. for (proc = knc->cgpu; proc; proc = proc->next_proc)
  293. {
  294. struct thr_info * const thr = proc->thr[0];
  295. thr->queue_full = full;
  296. }
  297. }
  298. static
  299. void knc_remove_local_queue(struct knc_device * const knc, struct work * const work)
  300. {
  301. DL_DELETE(knc->workqueue, work);
  302. free_work(work);
  303. --knc->workqueue_size;
  304. }
  305. static
  306. void knc_prune_local_queue(struct thr_info *thr)
  307. {
  308. struct cgpu_info * const cgpu = thr->cgpu;
  309. struct knc_device * const knc = cgpu->device_data;
  310. struct work *work, *tmp;
  311. DL_FOREACH_SAFE(knc->workqueue, work, tmp)
  312. {
  313. if (stale_work(work, false))
  314. knc_remove_local_queue(knc, work);
  315. }
  316. knc_set_queue_full(knc);
  317. }
  318. static
  319. bool knc_queue_append(struct thr_info * const thr, struct work * const work)
  320. {
  321. struct cgpu_info * const cgpu = thr->cgpu;
  322. struct knc_device * const knc = cgpu->device_data;
  323. if (knc->workqueue_size >= knc->workqueue_max)
  324. {
  325. knc_prune_local_queue(thr);
  326. if (thr->queue_full)
  327. return false;
  328. }
  329. DL_APPEND(knc->workqueue, work);
  330. ++knc->workqueue_size;
  331. knc_set_queue_full(knc);
  332. if (thr->queue_full)
  333. knc_prune_local_queue(thr);
  334. return true;
  335. }
  336. #define HASH_LAST_ADDED(head, out) \
  337. (out = (head) ? (ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail)) : NULL)
  338. static
  339. void knc_queue_flush(struct thr_info * const thr)
  340. {
  341. struct cgpu_info * const cgpu = thr->cgpu;
  342. struct knc_device * const knc = cgpu->device_data;
  343. struct work *work, *tmp;
  344. if (knc->cgpu != cgpu)
  345. return;
  346. DL_FOREACH_SAFE(knc->workqueue, work, tmp)
  347. {
  348. knc_remove_local_queue(knc, work);
  349. }
  350. knc_set_queue_full(knc);
  351. HASH_LAST_ADDED(knc->devicework, work);
  352. if (work && stale_work(work, true))
  353. {
  354. knc->need_flush = true;
  355. timer_set_now(&thr->tv_poll);
  356. }
  357. }
  358. static inline
  359. uint16_t get_u16be(const void * const p)
  360. {
  361. const uint8_t * const b = p;
  362. return (((uint16_t)b[0]) << 8) | b[1];
  363. }
  364. static inline
  365. uint32_t get_u32be(const void * const p)
  366. {
  367. const uint8_t * const b = p;
  368. return (((uint32_t)b[0]) << 0x18)
  369. | (((uint32_t)b[1]) << 0x10)
  370. | (((uint32_t)b[2]) << 8)
  371. | b[3];
  372. }
  373. static
  374. void knc_poll(struct thr_info * const thr)
  375. {
  376. struct thr_info *mythr;
  377. struct cgpu_info * const cgpu = thr->cgpu, *proc;
  378. struct knc_device * const knc = cgpu->device_data;
  379. struct spi_port * const spi = knc->spi;
  380. struct knc_core *knccore;
  381. struct work *work, *tmp;
  382. uint8_t buf[0x30], *rxbuf;
  383. int works_sent = 0, asicno, i;
  384. uint16_t workaccept;
  385. int workid = knc->next_id;
  386. uint32_t nonce, coreno;
  387. size_t spi_req_sz = 0x1000;
  388. unsigned long delay_usecs = KNC_POLL_INTERVAL_US;
  389. knc_prune_local_queue(thr);
  390. spi_clear_buf(spi);
  391. if (knc->need_flush)
  392. {
  393. applog(LOG_NOTICE, "%s: Abandoning stale searches to restart", knc_drv.dname);
  394. buf[0] = KNC_REQ_FLUSH_QUEUE << 4;
  395. spi_emit_buf(spi, buf, sizeof(buf));
  396. }
  397. DL_FOREACH(knc->workqueue, work)
  398. {
  399. buf[0] = KNC_REQ_SUBMIT_WORK << 4;
  400. buf[1] = 0;
  401. buf[2] = (workid >> 8) & 0x7f;
  402. buf[3] = workid & 0xff;
  403. for (i = 0; i < 0x20; ++i)
  404. buf[4 + i] = work->midstate[0x1f - i];
  405. for (i = 0; i < 0xc; ++i)
  406. buf[0x24 + i] = work->data[0x4b - i];
  407. spi_emit_buf(spi, buf, sizeof(buf));
  408. ++works_sent;
  409. ++workid;
  410. }
  411. spi_emit_nop(spi, spi_req_sz - spi_getbufsz(spi));
  412. spi_txrx(spi);
  413. rxbuf = spi_getrxbuf(spi);
  414. if (rxbuf[3] & 1)
  415. applog(LOG_DEBUG, "%s: Receive buffer overflow reported", knc_drv.dname);
  416. workaccept = get_u16be(&rxbuf[6]);
  417. applog(LOG_DEBUG, "%s: %lu/%d jobs accepted to queue (max=%d)",
  418. knc_drv.dname, (unsigned long)workaccept, works_sent, knc->workqueue_max);
  419. while (true)
  420. {
  421. rxbuf += 0xc;
  422. spi_req_sz -= 0xc;
  423. if (spi_req_sz < 0xc)
  424. break;
  425. const int rtype = rxbuf[0] >> 6;
  426. if (rtype && opt_debug)
  427. {
  428. char x[(0xc * 2) + 1];
  429. bin2hex(x, rxbuf, 0xc);
  430. applog(LOG_DEBUG, "%s: RECV: %s", knc_drv.dname, x);
  431. }
  432. if (rtype != KNC_REPLY_NONCE_FOUND && rtype != KNC_REPLY_WORK_DONE)
  433. continue;
  434. asicno = (rxbuf[0] & 0x38) >> 3;
  435. coreno = get_u32be(&rxbuf[8]);
  436. proc = cgpu;
  437. while (true)
  438. {
  439. knccore = proc->thr[0]->cgpu_data;
  440. if (knccore->asicno == asicno)
  441. break;
  442. do {
  443. proc = proc->next_proc;
  444. } while(proc != proc->device);
  445. }
  446. for (i = 0; i < coreno; ++i)
  447. proc = proc->next_proc;
  448. mythr = proc->thr[0];
  449. knccore = mythr->cgpu_data;
  450. i = get_u16be(&rxbuf[2]);
  451. HASH_FIND_INT(knc->devicework, &i, work);
  452. if (!work)
  453. {
  454. const char * const msgtype = (rtype == KNC_REPLY_NONCE_FOUND) ? "nonce found" : "work done";
  455. applog(LOG_WARNING, "%"PRIpreprv": Got %s message about unknown work 0x%04x",
  456. proc->proc_repr, msgtype, i);
  457. if (KNC_REPLY_NONCE_FOUND == rtype)
  458. {
  459. nonce = get_u32be(&rxbuf[4]);
  460. nonce = le32toh(nonce);
  461. inc_hw_errors2(mythr, NULL, &nonce);
  462. }
  463. else
  464. inc_hw_errors2(mythr, NULL, NULL);
  465. continue;
  466. }
  467. switch (rtype)
  468. {
  469. case KNC_REPLY_NONCE_FOUND:
  470. nonce = get_u32be(&rxbuf[4]);
  471. nonce = le32toh(nonce);
  472. if (submit_nonce(mythr, work, nonce))
  473. knccore->hwerr_in_row = 0;
  474. break;
  475. case KNC_REPLY_WORK_DONE:
  476. HASH_DEL(knc->devicework, work);
  477. free_work(work);
  478. hashes_done2(mythr, 0x100000000, NULL);
  479. break;
  480. }
  481. }
  482. if (knc->need_flush)
  483. {
  484. knc->need_flush = false;
  485. HASH_ITER(hh, knc->devicework, work, tmp)
  486. {
  487. HASH_DEL(knc->devicework, work);
  488. free_work(work);
  489. }
  490. delay_usecs = 0;
  491. }
  492. if (workaccept)
  493. {
  494. if (workaccept >= knc->workqueue_max)
  495. {
  496. knc->workqueue_max = workaccept;
  497. delay_usecs = 0;
  498. }
  499. DL_FOREACH_SAFE(knc->workqueue, work, tmp)
  500. {
  501. --knc->workqueue_size;
  502. DL_DELETE(knc->workqueue, work);
  503. work->device_id = knc->next_id++ & 0x7fff;
  504. HASH_ADD_INT(knc->devicework, device_id, work);
  505. if (!--workaccept)
  506. break;
  507. }
  508. knc_set_queue_full(knc);
  509. }
  510. timer_set_delay_from_now(&thr->tv_poll, delay_usecs);
  511. }
  512. static
  513. bool _knc_core_setstatus(struct thr_info * const thr, uint8_t val)
  514. {
  515. struct cgpu_info * const proc = thr->cgpu;
  516. struct knc_device * const knc = proc->device_data;
  517. struct knc_core * const knccore = thr->cgpu_data;
  518. const int i2c = knc->i2c;
  519. const int i2cslave = 0x20 + knccore->asicno;
  520. if (ioctl(i2c, I2C_SLAVE, i2cslave))
  521. {
  522. applog(LOG_DEBUG, "%"PRIpreprv": %s: Failed to select i2c slave 0x%x",
  523. proc->proc_repr, __func__, i2cslave);
  524. return false;
  525. }
  526. return (-1 != i2c_smbus_write_byte_data(i2c, knccore->coreno, val));
  527. }
  528. static
  529. void knc_core_disable(struct thr_info * const thr)
  530. {
  531. _knc_core_setstatus(thr, 0);
  532. }
  533. static
  534. void knc_core_enable(struct thr_info * const thr)
  535. {
  536. struct knc_core * const knccore = thr->cgpu_data;
  537. timer_set_now(&knccore->enable_at);
  538. _knc_core_setstatus(thr, 1);
  539. }
  540. static
  541. float knc_dcdc_decode_5_11(uint16_t raw)
  542. {
  543. if (raw == 0)
  544. return 0.0;
  545. int dcdc_vin_exp = (raw & 0xf800) >> 11;
  546. float dcdc_vin_man = raw & 0x07ff;
  547. if (dcdc_vin_exp >= 16)
  548. dcdc_vin_exp = -32 + dcdc_vin_exp;
  549. float dcdc_vin = dcdc_vin_man * exp2(dcdc_vin_exp);
  550. return dcdc_vin;
  551. }
  552. static
  553. void knc_hw_error(struct thr_info * const thr)
  554. {
  555. struct cgpu_info * const proc = thr->cgpu;
  556. struct knc_core * const knccore = thr->cgpu_data;
  557. if(knccore->hwerr_in_row == 0)
  558. timer_set_now(&knccore->first_hwerr);
  559. ++knccore->hwerr_in_row;
  560. if (knccore->hwerr_in_row >= KNC_MAX_HWERR_IN_ROW && proc->deven == DEV_ENABLED)
  561. {
  562. struct timeval now;
  563. timer_set_now(&now);
  564. float first_err_dt = tdiff(&now, &knccore->first_hwerr);
  565. float enable_dt = tdiff(&now, &knccore->enable_at);
  566. if(first_err_dt * 1.5 > enable_dt)
  567. {
  568. // didn't really do much good
  569. knccore->hwerr_disable_time *= 2;
  570. if (knccore->hwerr_disable_time > KNC_MAX_DISABLE_SECS)
  571. knccore->hwerr_disable_time = KNC_MAX_DISABLE_SECS;
  572. }
  573. else
  574. knccore->hwerr_disable_time = KNC_HWERR_DISABLE_SECS;
  575. proc->deven = DEV_RECOVER_DRV;
  576. applog(LOG_WARNING, "%"PRIpreprv": Disabled. %d hwerr in %.3f / %.3f . disabled %d s",
  577. proc->proc_repr, knccore->hwerr_in_row,
  578. enable_dt, first_err_dt, knccore->hwerr_disable_time);
  579. timer_set_delay_from_now(&knccore->enable_at, knccore->hwerr_disable_time * 1000000);
  580. }
  581. }
  582. static
  583. bool knc_get_stats(struct cgpu_info * const cgpu)
  584. {
  585. if (cgpu->device != cgpu)
  586. return true;
  587. struct thr_info *thr = cgpu->thr[0];
  588. struct knc_core *knccore = thr->cgpu_data;
  589. struct cgpu_info *proc;
  590. const int i2cdev = knccore->asicno + 3;
  591. const int i2cslave_temp = 0x48;
  592. const int i2cslave_dcdc[] = {0x10, 0x12, 0x14, 0x17};
  593. int die, i;
  594. int i2c;
  595. int32_t rawtemp, rawvolt, rawcurrent;
  596. float temp, volt, current;
  597. struct timeval tv_now;
  598. bool rv = false;
  599. char i2cpath[sizeof(KNC_I2C_TEMPLATE)];
  600. sprintf(i2cpath, KNC_I2C_TEMPLATE, i2cdev);
  601. i2c = open(i2cpath, O_RDWR);
  602. if (i2c == -1)
  603. {
  604. applog(LOG_DEBUG, "%s: %s: Failed to open %s",
  605. cgpu->dev_repr, __func__, i2cpath);
  606. return false;
  607. }
  608. if (ioctl(i2c, I2C_SLAVE, i2cslave_temp))
  609. {
  610. applog(LOG_DEBUG, "%s: %s: Failed to select i2c slave 0x%x",
  611. cgpu->dev_repr, __func__, i2cslave_temp);
  612. goto out;
  613. }
  614. rawtemp = i2c_smbus_read_word_data(i2c, 0);
  615. if (rawtemp == -1)
  616. goto out;
  617. temp = ((float)(rawtemp & 0xff));
  618. if (rawtemp & 0x8000)
  619. temp += 0.5;
  620. /* DCDC i2c slaves are on 0x10 + [0-7]
  621. 8 DCDC boards have all populated
  622. 4 DCDC boards only have 0,2,4,7 populated
  623. Only 0,2,4,7 are used
  624. Each DCDC powers one die in the chip, each die has 48 cores
  625. Datasheet at http://www.lineagepower.com/oem/pdf/MDT040A0X.pdf
  626. */
  627. timer_set_now(&tv_now);
  628. volt = current = 0;
  629. for (proc = cgpu, i = 0; proc && proc->device == cgpu; proc = proc->next_proc, ++i)
  630. {
  631. thr = proc->thr[0];
  632. knccore = thr->cgpu_data;
  633. die = i / 0x30;
  634. if (0 == i % 0x30)
  635. {
  636. if (ioctl(i2c, I2C_SLAVE, i2cslave_dcdc[die]))
  637. {
  638. applog(LOG_DEBUG, "%s: %s: Failed to select i2c slave 0x%x",
  639. cgpu->dev_repr, __func__, i2cslave_dcdc[die]);
  640. goto out;
  641. }
  642. rawvolt = i2c_smbus_read_word_data(i2c, 0x8b); // VOUT
  643. if (rawvolt == -1)
  644. goto out;
  645. rawcurrent = i2c_smbus_read_word_data(i2c, 0x8c); // IOUT
  646. if (rawcurrent == -1)
  647. goto out;
  648. volt = (float)rawvolt * exp2(-10);
  649. current = (float)knc_dcdc_decode_5_11(rawcurrent);
  650. applog(LOG_DEBUG, "%s: die %d %6.3fV %5.2fA",
  651. cgpu->dev_repr, die, volt, current);
  652. }
  653. proc->temp = temp;
  654. knccore->volt = volt;
  655. knccore->current = current;
  656. // NOTE: We need to check _mt_disable_called because otherwise enabling won't assert it to i2c (it's false when getting stats for eg proc 0 before proc 1+ haven't initialised completely yet)
  657. if (proc->deven == DEV_RECOVER_DRV && timer_passed(&knccore->enable_at, &tv_now) && thr->_mt_disable_called)
  658. {
  659. knccore->hwerr_in_row = 0;
  660. proc_enable(proc);
  661. }
  662. }
  663. rv = true;
  664. out:
  665. close(i2c);
  666. return rv;
  667. }
  668. static
  669. struct api_data *knc_api_extra_device_status(struct cgpu_info * const cgpu)
  670. {
  671. struct api_data *root = NULL;
  672. struct thr_info * const thr = cgpu->thr[0];
  673. struct knc_core * const knccore = thr->cgpu_data;
  674. root = api_add_volts(root, "Voltage", &knccore->volt, false);
  675. root = api_add_volts(root, "DCDC Current", &knccore->current, false);
  676. return root;
  677. }
  678. #ifdef HAVE_CURSES
  679. static
  680. void knc_wlogprint_status(struct cgpu_info * const cgpu)
  681. {
  682. struct thr_info * const thr = cgpu->thr[0];
  683. struct knc_core * const knccore = thr->cgpu_data;
  684. wlogprint("Voltage: %.3f DCDC Current: %.3f\n",
  685. knccore->volt, knccore->current);
  686. }
  687. #endif
  688. struct device_drv knc_drv = {
  689. .dname = "knc",
  690. .name = "KNC",
  691. .drv_detect = knc_detect,
  692. .thread_init = knc_init,
  693. .thread_disable = knc_core_disable,
  694. .thread_enable = knc_core_enable,
  695. .minerloop = minerloop_queue,
  696. .queue_append = knc_queue_append,
  697. .queue_flush = knc_queue_flush,
  698. .poll = knc_poll,
  699. .hw_error = knc_hw_error,
  700. .get_stats = knc_get_stats,
  701. .get_api_extra_device_status = knc_api_extra_device_status,
  702. #ifdef HAVE_CURSES
  703. .proc_wlogprint_status = knc_wlogprint_status,
  704. #endif
  705. };