cdump.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. /* MIT (BSD) license - see LICENSE file for details */
  2. #include "cdump.h"
  3. #include <ccan/tal/str/str.h>
  4. #include <assert.h>
  5. struct token {
  6. const char *p;
  7. size_t len;
  8. };
  9. static void add_token(struct token **toks, const char *p, size_t len)
  10. {
  11. size_t n = tal_count(*toks);
  12. tal_resize(toks, n+1);
  13. (*toks)[n].p = p;
  14. (*toks)[n].len = len;
  15. }
  16. /* Simplified tokenizer: comments and preproc directives removed,
  17. identifiers are a token, others are single char tokens. */
  18. static struct token *tokenize(const void *ctx, const char *code)
  19. {
  20. unsigned int i, len, tok_start = -1;
  21. bool start_of_line = true;
  22. struct token *toks = tal_arr(ctx, struct token, 0);
  23. for (i = 0; code[i]; i += len) {
  24. if (code[i] == '#' && start_of_line) {
  25. /* Preprocessor line. */
  26. len = strcspn(code+i, "\n");
  27. } else if (code[i] == '/' && code[i+1] == '/') {
  28. /* One line comment. */
  29. len = strcspn(code+i, "\n");
  30. if (tok_start != -1U) {
  31. add_token(&toks, code+tok_start, i - tok_start);
  32. tok_start = -1U;
  33. }
  34. } else if (code[i] == '/' && code[i+1] == '*') {
  35. /* Multi-line comment. */
  36. const char *end = strstr(code+i+2, "*/");
  37. len = (end + 2) - (code + i);
  38. if (!end)
  39. len = strlen(code + i);
  40. if (tok_start != -1U) {
  41. add_token(&toks, code+tok_start, i - tok_start);
  42. tok_start = -1U;
  43. }
  44. } else if (cisalnum(code[i]) || code[i] == '_') {
  45. /* Identifier or part thereof */
  46. if (tok_start == -1U)
  47. tok_start = i;
  48. len = 1;
  49. } else if (!cisspace(code[i])) {
  50. /* Punctuation: treat as single char token. */
  51. if (tok_start != -1U) {
  52. add_token(&toks, code+tok_start, i - tok_start);
  53. tok_start = -1U;
  54. }
  55. add_token(&toks, code+i, 1);
  56. len = 1;
  57. } else {
  58. /* Whitespace. */
  59. if (tok_start != -1U) {
  60. add_token(&toks, code+tok_start, i - tok_start);
  61. tok_start = -1U;
  62. }
  63. len = 1;
  64. }
  65. if (code[i] == '\n')
  66. start_of_line = true;
  67. else if (!cisspace(code[i]))
  68. start_of_line = false;
  69. }
  70. /* Add terminating NULL. */
  71. tal_resizez(&toks, tal_count(toks) + 1);
  72. return toks;
  73. }
  74. struct parse_state {
  75. const char *code;
  76. const struct token *toks;
  77. struct cdump_definitions *defs;
  78. char *complaints;
  79. };
  80. static bool tok_is(const struct token **toks, const char *target)
  81. {
  82. return (*toks)->p && (*toks)->len == strlen(target)
  83. && memcmp((*toks)->p, target, (*toks)->len) == 0;
  84. }
  85. static const struct token *tok_peek(const struct token **toks)
  86. {
  87. if (toks[0]->p)
  88. return toks[0];
  89. return NULL;
  90. }
  91. static const struct token *tok_take(const struct token **toks)
  92. {
  93. if (!toks[0]->p)
  94. return NULL;
  95. return (*toks)++;
  96. }
  97. static const struct token *tok_take_if(const struct token **toks,
  98. const char *target)
  99. {
  100. if (tok_is(toks, target))
  101. return tok_take(toks);
  102. return NULL;
  103. }
  104. static const char *tok_take_ident(const tal_t *ctx, const struct token **toks)
  105. {
  106. const struct token *t = tok_peek(toks);
  107. if (!t)
  108. return NULL;
  109. if (strspn(t->p, "_0123456789"
  110. "abcdefghijklmnopqrstuvwxyz"
  111. "ABCDEFGHIJKLMNOPQRSTUVWXYZ") < t->len)
  112. return NULL;
  113. t = tok_take(toks);
  114. return tal_strndup(ctx, t->p, t->len);
  115. }
  116. static char *string_of_toks(const tal_t *ctx,
  117. const struct token *first,
  118. const struct token *until)
  119. {
  120. const struct token *end = until - 1;
  121. return tal_strndup(ctx, first->p, end->p - first->p + end->len);
  122. }
  123. static char *tok_take_until(const tal_t *ctx,
  124. const struct token **toks,
  125. const char *delims)
  126. {
  127. const struct token *t, *start;
  128. start = tok_peek(toks);
  129. while ((t = tok_peek(toks)) != NULL) {
  130. /* If this contains a delimiter, copy up to prev token. */
  131. if (strcspn(t->p, delims) < t->len)
  132. return string_of_toks(ctx, start, t);
  133. tok_take(toks);
  134. };
  135. /* EOF without finding delimiter */
  136. return NULL;
  137. }
  138. static bool type_defined(const struct cdump_type *t)
  139. {
  140. switch (t->kind) {
  141. case CDUMP_STRUCT:
  142. case CDUMP_UNION:
  143. return (t->u.members != NULL);
  144. case CDUMP_ENUM:
  145. return (t->u.enum_vals != NULL);
  146. /* These shouldn't happen; we don't try to define them. */
  147. case CDUMP_UNKNOWN:
  148. case CDUMP_ARRAY:
  149. case CDUMP_POINTER:
  150. break;
  151. }
  152. abort();
  153. }
  154. /* May allocate a new type if not already found (steals @name) */
  155. static struct cdump_type *get_type(struct cdump_definitions *defs,
  156. enum cdump_type_kind kind,
  157. const char *name)
  158. {
  159. struct cdump_map *m;
  160. struct cdump_type *t;
  161. switch (kind) {
  162. case CDUMP_STRUCT:
  163. m = &defs->structs;
  164. break;
  165. case CDUMP_UNION:
  166. m = &defs->unions;
  167. break;
  168. case CDUMP_ENUM:
  169. m = &defs->enums;
  170. break;
  171. case CDUMP_UNKNOWN:
  172. case CDUMP_ARRAY:
  173. case CDUMP_POINTER:
  174. m = NULL;
  175. }
  176. /* Do we already have it? */
  177. if (m) {
  178. t = strmap_get(m, name);
  179. if (t)
  180. return t;
  181. }
  182. t = tal(defs, struct cdump_type);
  183. t->kind = kind;
  184. t->name = name ? tal_steal(t, name) : NULL;
  185. /* These are actually the same, but be thorough */
  186. t->u.members = NULL;
  187. t->u.enum_vals = NULL;
  188. if (m)
  189. strmap_add(m, t->name, t);
  190. return t;
  191. }
  192. static void complain(struct parse_state *ps, const char *complaint)
  193. {
  194. unsigned int linenum;
  195. const char *p = ps->code;
  196. for (linenum = 1; p < ps->toks[0].p; linenum++) {
  197. p = strchr(p+1, '\n');
  198. if (!p)
  199. break;
  200. }
  201. tal_append_fmt(&ps->complaints,
  202. "Line %u: '%.*s': %s\n",
  203. linenum, (int)ps->toks[0].len,
  204. ps->toks[0].p, complaint);
  205. }
  206. static void tok_take_unknown_statement(struct parse_state *ps)
  207. {
  208. complain(ps, "Ignoring unknown statement until next semicolon");
  209. tal_free(tok_take_until(NULL, &ps->toks, ";"));
  210. tok_take_if(&ps->toks, ";");
  211. }
  212. /* [ ... */
  213. static bool tok_take_array(struct parse_state *ps, struct cdump_type **type)
  214. {
  215. /* This will be some arbitrary expression! */
  216. struct cdump_type *arr = get_type(ps->defs, CDUMP_ARRAY, NULL);
  217. arr->u.arr.size = tok_take_until(arr, &ps->toks, "]");
  218. if (!arr->u.arr.size) {
  219. complain(ps, "Could not find closing array size ]");
  220. return false;
  221. }
  222. arr->u.arr.type = *type;
  223. *type = arr;
  224. /* Swallow ] */
  225. tok_take(&ps->toks);
  226. return true;
  227. }
  228. static struct cdump_type *ptr_of(struct parse_state *ps,
  229. const struct cdump_type *ptr_to)
  230. {
  231. struct cdump_type *ptr = get_type(ps->defs, CDUMP_POINTER, NULL);
  232. ptr->u.ptr = ptr_to;
  233. return ptr;
  234. }
  235. static bool tok_take_type(struct parse_state *ps, struct cdump_type **type)
  236. {
  237. const char *name;
  238. const struct token *types;
  239. enum cdump_type_kind kind;
  240. /* Ignoring weird typedefs, only these can be combined. */
  241. types = ps->toks;
  242. while (tok_take_if(&ps->toks, "int")
  243. || tok_take_if(&ps->toks, "long")
  244. || tok_take_if(&ps->toks, "short")
  245. || tok_take_if(&ps->toks, "double")
  246. || tok_take_if(&ps->toks, "float")
  247. || tok_take_if(&ps->toks, "char")
  248. || tok_take_if(&ps->toks, "signed")
  249. || tok_take_if(&ps->toks, "unsigned"));
  250. /* Did we get some? */
  251. if (ps->toks != types) {
  252. name = string_of_toks(NULL, types, tok_peek(&ps->toks));
  253. kind = CDUMP_UNKNOWN;
  254. } else {
  255. /* Try normal types (or simple typedefs, etc). */
  256. if (tok_take_if(&ps->toks, "struct")) {
  257. kind = CDUMP_STRUCT;
  258. } else if (tok_take_if(&ps->toks, "union")) {
  259. kind = CDUMP_UNION;
  260. } else if (tok_take_if(&ps->toks, "enum")) {
  261. kind = CDUMP_ENUM;
  262. } else
  263. kind = CDUMP_UNKNOWN;
  264. name = tok_take_ident(ps->defs, &ps->toks);
  265. if (!name) {
  266. complain(ps, "Invalid typename");
  267. return false;
  268. }
  269. }
  270. *type = get_type(ps->defs, kind, name);
  271. return true;
  272. }
  273. /* struct|union ... */
  274. static bool tok_take_conglom(struct parse_state *ps,
  275. enum cdump_type_kind conglom_kind)
  276. {
  277. struct cdump_type *e;
  278. const char *name;
  279. size_t n;
  280. assert(conglom_kind == CDUMP_STRUCT || conglom_kind == CDUMP_UNION);
  281. name = tok_take_ident(ps->defs, &ps->toks);
  282. if (!name) {
  283. complain(ps, "Invalid struct/union name");
  284. return false;
  285. }
  286. e = get_type(ps->defs, conglom_kind, name);
  287. if (type_defined(e)) {
  288. complain(ps, "Type already defined");
  289. return false;
  290. }
  291. if (!tok_take_if(&ps->toks, "{")) {
  292. complain(ps, "Expected { for struct/union");
  293. return false;
  294. }
  295. e->u.members = tal_arr(e, struct cdump_member, n = 0);
  296. while (!tok_is(&ps->toks, "}")) {
  297. struct cdump_type *basetype;
  298. const struct token *quals;
  299. unsigned int num_quals = 0;
  300. /* Anything can have these prepended. */
  301. quals = ps->toks;
  302. while (tok_take_if(&ps->toks, "const")
  303. || tok_take_if(&ps->toks, "volatile"))
  304. num_quals++;
  305. /* eg. "struct foo" or "varint_t" */
  306. if (!tok_take_type(ps, &basetype)) {
  307. complain(ps, "Expected typename inside struct/union");
  308. return false;
  309. }
  310. do {
  311. struct cdump_member *m;
  312. tal_resize(&e->u.members, n+1);
  313. m = &e->u.members[n++];
  314. m->type = basetype;
  315. if (num_quals) {
  316. m->qualifiers
  317. = string_of_toks(e, quals,
  318. quals + num_quals);
  319. } else
  320. m->qualifiers = NULL;
  321. /* May have multiple asterisks. */
  322. while (tok_take_if(&ps->toks, "*"))
  323. m->type = ptr_of(ps, m->type);
  324. m->name = tok_take_ident(e, &ps->toks);
  325. if (!m->name) {
  326. complain(ps, "Expected name for member");
  327. return false;
  328. }
  329. /* May be an array. */
  330. while (tok_take_if(&ps->toks, "[")) {
  331. if (!tok_take_array(ps, &m->type))
  332. return false;
  333. }
  334. } while (tok_take_if(&ps->toks, ","));
  335. if (!tok_take_if(&ps->toks, ";")) {
  336. complain(ps, "Expected ; at end of member");
  337. return false;
  338. }
  339. }
  340. if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
  341. return true;
  342. complain(ps, "Expected }; at end of struct/union");
  343. return false;
  344. }
  345. /* enum ... */
  346. static bool tok_take_enum(struct parse_state *ps)
  347. {
  348. size_t n = 0;
  349. struct cdump_type *e;
  350. const char *name;
  351. name = tok_take_ident(ps->defs, &ps->toks);
  352. if (!name) {
  353. complain(ps, "Expected enum name");
  354. return false;
  355. }
  356. e = get_type(ps->defs, CDUMP_ENUM, name);
  357. /* Duplicate name? */
  358. if (type_defined(e)) {
  359. complain(ps, "enum already defined");
  360. return false;
  361. }
  362. if (!tok_take_if(&ps->toks, "{")) {
  363. complain(ps, "Expected { after enum name");
  364. return false;
  365. }
  366. e->u.enum_vals = tal_arr(e, struct cdump_enum_val, n);
  367. do {
  368. struct cdump_enum_val *v;
  369. tal_resize(&e->u.enum_vals, n+1);
  370. v = &e->u.enum_vals[n++];
  371. v->name = tok_take_ident(e, &ps->toks);
  372. if (!v->name) {
  373. complain(ps, "Expected enum value name");
  374. return false;
  375. }
  376. if (tok_take_if(&ps->toks, "=")) {
  377. v->value = tok_take_until(e, &ps->toks, ",}");
  378. if (!v->value) {
  379. complain(ps, "Expected , or } to end value");
  380. return false;
  381. }
  382. } else
  383. v->value = NULL;
  384. } while (tok_take_if(&ps->toks, ","));
  385. if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
  386. return true;
  387. complain(ps, "Expected }; at end of enum");
  388. return false;
  389. }
  390. static bool gather_undefines(const char *name,
  391. struct cdump_type *t,
  392. struct cdump_map *undefs)
  393. {
  394. if (!type_defined(t))
  395. strmap_add(undefs, name, t);
  396. return true;
  397. }
  398. static bool remove_from_map(const char *name,
  399. struct cdump_type *t,
  400. struct cdump_map *map)
  401. {
  402. strmap_del(map, name, NULL);
  403. return true;
  404. }
  405. static void remove_undefined(struct cdump_map *map)
  406. {
  407. struct cdump_map undefs;
  408. /* We can't delete inside iterator, so gather all the undefs
  409. * then remove them. */
  410. strmap_init(&undefs);
  411. strmap_iterate(map, gather_undefines, &undefs);
  412. strmap_iterate(&undefs, remove_from_map, map);
  413. strmap_clear(&undefs);
  414. }
  415. static void destroy_definitions(struct cdump_definitions *defs)
  416. {
  417. strmap_clear(&defs->enums);
  418. strmap_clear(&defs->structs);
  419. strmap_clear(&defs->unions);
  420. }
  421. /* Simple LL(1) parser, inspired by Tridge's genstruct.pl. */
  422. struct cdump_definitions *cdump_extract(const tal_t *ctx, const char *code,
  423. char **complaints)
  424. {
  425. struct parse_state ps;
  426. const struct token *toks;
  427. ps.defs = tal(ctx, struct cdump_definitions);
  428. ps.complaints = tal_strdup(ctx, "");
  429. ps.code = code;
  430. strmap_init(&ps.defs->enums);
  431. strmap_init(&ps.defs->structs);
  432. strmap_init(&ps.defs->unions);
  433. tal_add_destructor(ps.defs, destroy_definitions);
  434. toks = ps.toks = tokenize(ps.defs, code);
  435. while (tok_peek(&ps.toks)) {
  436. if (tok_take_if(&ps.toks, "struct")) {
  437. if (!tok_take_conglom(&ps, CDUMP_STRUCT))
  438. goto fail;
  439. } else if (tok_take_if(&ps.toks, "union")) {
  440. if (!tok_take_conglom(&ps, CDUMP_UNION))
  441. goto fail;
  442. } else if (tok_take_if(&ps.toks, "enum")) {
  443. if (!tok_take_enum(&ps))
  444. goto fail;
  445. } else
  446. tok_take_unknown_statement(&ps);
  447. }
  448. /* Now, remove any undefined types! */
  449. remove_undefined(&ps.defs->enums);
  450. remove_undefined(&ps.defs->structs);
  451. remove_undefined(&ps.defs->unions);
  452. tal_free(toks);
  453. out:
  454. if (streq(ps.complaints, ""))
  455. ps.complaints = tal_free(ps.complaints);
  456. if (complaints)
  457. *complaints = ps.complaints;
  458. else
  459. tal_free(ps.complaints);
  460. return ps.defs;
  461. fail:
  462. ps.defs = tal_free(ps.defs);
  463. goto out;
  464. }