| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543 |
- /* MIT (BSD) license - see LICENSE file for details */
- #include "cdump.h"
- #include <ccan/tal/str/str.h>
- #include <assert.h>
- struct token {
- const char *p;
- size_t len;
- };
- static void add_token(struct token **toks, const char *p, size_t len)
- {
- size_t n = tal_count(*toks);
- tal_resize(toks, n+1);
- (*toks)[n].p = p;
- (*toks)[n].len = len;
- }
- /* Simplified tokenizer: comments and preproc directives removed,
- identifiers are a token, others are single char tokens. */
- static struct token *tokenize(const void *ctx, const char *code)
- {
- unsigned int i, len, tok_start = -1;
- bool start_of_line = true;
- struct token *toks = tal_arr(ctx, struct token, 0);
- for (i = 0; code[i]; i += len) {
- if (code[i] == '#' && start_of_line) {
- /* Preprocessor line. */
- len = strcspn(code+i, "\n");
- } else if (code[i] == '/' && code[i+1] == '/') {
- /* One line comment. */
- len = strcspn(code+i, "\n");
- if (tok_start != -1U) {
- add_token(&toks, code+tok_start, i - tok_start);
- tok_start = -1U;
- }
- } else if (code[i] == '/' && code[i+1] == '*') {
- /* Multi-line comment. */
- const char *end = strstr(code+i+2, "*/");
- len = (end + 2) - (code + i);
- if (!end)
- len = strlen(code + i);
- if (tok_start != -1U) {
- add_token(&toks, code+tok_start, i - tok_start);
- tok_start = -1U;
- }
- } else if (cisalnum(code[i]) || code[i] == '_') {
- /* Identifier or part thereof */
- if (tok_start == -1U)
- tok_start = i;
- len = 1;
- } else if (!cisspace(code[i])) {
- /* Punctuation: treat as single char token. */
- if (tok_start != -1U) {
- add_token(&toks, code+tok_start, i - tok_start);
- tok_start = -1U;
- }
- add_token(&toks, code+i, 1);
- len = 1;
- } else {
- /* Whitespace. */
- if (tok_start != -1U) {
- add_token(&toks, code+tok_start, i - tok_start);
- tok_start = -1U;
- }
- len = 1;
- }
- if (code[i] == '\n')
- start_of_line = true;
- else if (!cisspace(code[i]))
- start_of_line = false;
- }
- /* Add terminating NULL. */
- tal_resizez(&toks, tal_count(toks) + 1);
- return toks;
- }
- struct parse_state {
- const char *code;
- const struct token *toks;
- struct cdump_definitions *defs;
- char *complaints;
- };
- static bool tok_is(const struct token **toks, const char *target)
- {
- return (*toks)->p && (*toks)->len == strlen(target)
- && memcmp((*toks)->p, target, (*toks)->len) == 0;
- }
- static const struct token *tok_peek(const struct token **toks)
- {
- if (toks[0]->p)
- return toks[0];
- return NULL;
- }
- static const struct token *tok_take(const struct token **toks)
- {
- if (!toks[0]->p)
- return NULL;
- return (*toks)++;
- }
- static const struct token *tok_take_if(const struct token **toks,
- const char *target)
- {
- if (tok_is(toks, target))
- return tok_take(toks);
- return NULL;
- }
- static const char *tok_take_ident(const tal_t *ctx, const struct token **toks)
- {
- const struct token *t = tok_peek(toks);
- if (!t)
- return NULL;
- if (strspn(t->p, "_0123456789"
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ") < t->len)
- return NULL;
- t = tok_take(toks);
- return tal_strndup(ctx, t->p, t->len);
- }
- static char *string_of_toks(const tal_t *ctx,
- const struct token *first,
- const struct token *until)
- {
- const struct token *end = until - 1;
- return tal_strndup(ctx, first->p, end->p - first->p + end->len);
- }
- static char *tok_take_until(const tal_t *ctx,
- const struct token **toks,
- const char *delims)
- {
- const struct token *t, *start;
- start = tok_peek(toks);
- while ((t = tok_peek(toks)) != NULL) {
- /* If this contains a delimiter, copy up to prev token. */
- if (strcspn(t->p, delims) < t->len)
- return string_of_toks(ctx, start, t);
- tok_take(toks);
- };
- /* EOF without finding delimiter */
- return NULL;
- }
- static bool type_defined(const struct cdump_type *t)
- {
- switch (t->kind) {
- case CDUMP_STRUCT:
- case CDUMP_UNION:
- return (t->u.members != NULL);
- case CDUMP_ENUM:
- return (t->u.enum_vals != NULL);
- /* These shouldn't happen; we don't try to define them. */
- case CDUMP_UNKNOWN:
- case CDUMP_ARRAY:
- case CDUMP_POINTER:
- break;
- }
- abort();
- }
- /* May allocate a new type if not already found (steals @name) */
- static struct cdump_type *get_type(struct cdump_definitions *defs,
- enum cdump_type_kind kind,
- const char *name)
- {
- struct cdump_map *m;
- struct cdump_type *t;
- switch (kind) {
- case CDUMP_STRUCT:
- m = &defs->structs;
- break;
- case CDUMP_UNION:
- m = &defs->unions;
- break;
- case CDUMP_ENUM:
- m = &defs->enums;
- break;
- case CDUMP_UNKNOWN:
- case CDUMP_ARRAY:
- case CDUMP_POINTER:
- m = NULL;
- }
- /* Do we already have it? */
- if (m) {
- t = strmap_get(m, name);
- if (t)
- return t;
- }
- t = tal(defs, struct cdump_type);
- t->kind = kind;
- t->name = name ? tal_steal(t, name) : NULL;
- /* These are actually the same, but be thorough */
- t->u.members = NULL;
- t->u.enum_vals = NULL;
- if (m)
- strmap_add(m, t->name, t);
- return t;
- }
- static void complain(struct parse_state *ps, const char *complaint)
- {
- unsigned int linenum;
- const char *p = ps->code;
- for (linenum = 1; p < ps->toks[0].p; linenum++) {
- p = strchr(p+1, '\n');
- if (!p)
- break;
- }
- tal_append_fmt(&ps->complaints,
- "Line %u: '%.*s': %s\n",
- linenum, (int)ps->toks[0].len,
- ps->toks[0].p, complaint);
- }
- static void tok_take_unknown_statement(struct parse_state *ps)
- {
- complain(ps, "Ignoring unknown statement until next semicolon");
- tal_free(tok_take_until(NULL, &ps->toks, ";"));
- tok_take_if(&ps->toks, ";");
- }
- /* [ ... */
- static bool tok_take_array(struct parse_state *ps, struct cdump_type **type)
- {
- /* This will be some arbitrary expression! */
- struct cdump_type *arr = get_type(ps->defs, CDUMP_ARRAY, NULL);
- arr->u.arr.size = tok_take_until(arr, &ps->toks, "]");
- if (!arr->u.arr.size) {
- complain(ps, "Could not find closing array size ]");
- return false;
- }
- arr->u.arr.type = *type;
- *type = arr;
- /* Swallow ] */
- tok_take(&ps->toks);
- return true;
- }
- static struct cdump_type *ptr_of(struct parse_state *ps,
- const struct cdump_type *ptr_to)
- {
- struct cdump_type *ptr = get_type(ps->defs, CDUMP_POINTER, NULL);
- ptr->u.ptr = ptr_to;
- return ptr;
- }
- static bool tok_take_type(struct parse_state *ps, struct cdump_type **type)
- {
- const char *name;
- const struct token *types;
- enum cdump_type_kind kind;
- /* Ignoring weird typedefs, only these can be combined. */
- types = ps->toks;
- while (tok_take_if(&ps->toks, "int")
- || tok_take_if(&ps->toks, "long")
- || tok_take_if(&ps->toks, "short")
- || tok_take_if(&ps->toks, "double")
- || tok_take_if(&ps->toks, "float")
- || tok_take_if(&ps->toks, "char")
- || tok_take_if(&ps->toks, "signed")
- || tok_take_if(&ps->toks, "unsigned"));
- /* Did we get some? */
- if (ps->toks != types) {
- name = string_of_toks(NULL, types, tok_peek(&ps->toks));
- kind = CDUMP_UNKNOWN;
- } else {
- /* Try normal types (or simple typedefs, etc). */
- if (tok_take_if(&ps->toks, "struct")) {
- kind = CDUMP_STRUCT;
- } else if (tok_take_if(&ps->toks, "union")) {
- kind = CDUMP_UNION;
- } else if (tok_take_if(&ps->toks, "enum")) {
- kind = CDUMP_ENUM;
- } else
- kind = CDUMP_UNKNOWN;
- name = tok_take_ident(ps->defs, &ps->toks);
- if (!name) {
- complain(ps, "Invalid typename");
- return false;
- }
- }
- *type = get_type(ps->defs, kind, name);
- return true;
- }
- /* struct|union ... */
- static bool tok_take_conglom(struct parse_state *ps,
- enum cdump_type_kind conglom_kind)
- {
- struct cdump_type *e;
- const char *name;
- size_t n;
- assert(conglom_kind == CDUMP_STRUCT || conglom_kind == CDUMP_UNION);
- name = tok_take_ident(ps->defs, &ps->toks);
- if (!name) {
- complain(ps, "Invalid struct/union name");
- return false;
- }
- e = get_type(ps->defs, conglom_kind, name);
- if (type_defined(e)) {
- complain(ps, "Type already defined");
- return false;
- }
- if (!tok_take_if(&ps->toks, "{")) {
- complain(ps, "Expected { for struct/union");
- return false;
- }
- e->u.members = tal_arr(e, struct cdump_member, n = 0);
- while (!tok_is(&ps->toks, "}")) {
- struct cdump_type *basetype;
- const struct token *quals;
- unsigned int num_quals = 0;
- /* Anything can have these prepended. */
- quals = ps->toks;
- while (tok_take_if(&ps->toks, "const")
- || tok_take_if(&ps->toks, "volatile"))
- num_quals++;
- /* eg. "struct foo" or "varint_t" */
- if (!tok_take_type(ps, &basetype)) {
- complain(ps, "Expected typename inside struct/union");
- return false;
- }
- do {
- struct cdump_member *m;
- tal_resize(&e->u.members, n+1);
- m = &e->u.members[n++];
- m->type = basetype;
- if (num_quals) {
- m->qualifiers
- = string_of_toks(e, quals,
- quals + num_quals);
- } else
- m->qualifiers = NULL;
- /* May have multiple asterisks. */
- while (tok_take_if(&ps->toks, "*"))
- m->type = ptr_of(ps, m->type);
- m->name = tok_take_ident(e, &ps->toks);
- if (!m->name) {
- complain(ps, "Expected name for member");
- return false;
- }
- /* May be an array. */
- while (tok_take_if(&ps->toks, "[")) {
- if (!tok_take_array(ps, &m->type))
- return false;
- }
- } while (tok_take_if(&ps->toks, ","));
- if (!tok_take_if(&ps->toks, ";")) {
- complain(ps, "Expected ; at end of member");
- return false;
- }
- }
- if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
- return true;
- complain(ps, "Expected }; at end of struct/union");
- return false;
- }
- /* enum ... */
- static bool tok_take_enum(struct parse_state *ps)
- {
- size_t n = 0;
- struct cdump_type *e;
- const char *name;
- name = tok_take_ident(ps->defs, &ps->toks);
- if (!name) {
- complain(ps, "Expected enum name");
- return false;
- }
- e = get_type(ps->defs, CDUMP_ENUM, name);
- /* Duplicate name? */
- if (type_defined(e)) {
- complain(ps, "enum already defined");
- return false;
- }
- if (!tok_take_if(&ps->toks, "{")) {
- complain(ps, "Expected { after enum name");
- return false;
- }
- e->u.enum_vals = tal_arr(e, struct cdump_enum_val, n);
- do {
- struct cdump_enum_val *v;
- tal_resize(&e->u.enum_vals, n+1);
- v = &e->u.enum_vals[n++];
- v->name = tok_take_ident(e, &ps->toks);
- if (!v->name) {
- complain(ps, "Expected enum value name");
- return false;
- }
- if (tok_take_if(&ps->toks, "=")) {
- v->value = tok_take_until(e, &ps->toks, ",}");
- if (!v->value) {
- complain(ps, "Expected , or } to end value");
- return false;
- }
- } else
- v->value = NULL;
- } while (tok_take_if(&ps->toks, ","));
- if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
- return true;
- complain(ps, "Expected }; at end of enum");
- return false;
- }
- static bool gather_undefines(const char *name,
- struct cdump_type *t,
- struct cdump_map *undefs)
- {
- if (!type_defined(t))
- strmap_add(undefs, name, t);
- return true;
- }
- static bool remove_from_map(const char *name,
- struct cdump_type *t,
- struct cdump_map *map)
- {
- strmap_del(map, name, NULL);
- return true;
- }
- static void remove_undefined(struct cdump_map *map)
- {
- struct cdump_map undefs;
- /* We can't delete inside iterator, so gather all the undefs
- * then remove them. */
- strmap_init(&undefs);
- strmap_iterate(map, gather_undefines, &undefs);
- strmap_iterate(&undefs, remove_from_map, map);
- strmap_clear(&undefs);
- }
- static void destroy_definitions(struct cdump_definitions *defs)
- {
- strmap_clear(&defs->enums);
- strmap_clear(&defs->structs);
- strmap_clear(&defs->unions);
- }
- /* Simple LL(1) parser, inspired by Tridge's genstruct.pl. */
- struct cdump_definitions *cdump_extract(const tal_t *ctx, const char *code,
- char **complaints)
- {
- struct parse_state ps;
- const struct token *toks;
- ps.defs = tal(ctx, struct cdump_definitions);
- ps.complaints = tal_strdup(ctx, "");
- ps.code = code;
- strmap_init(&ps.defs->enums);
- strmap_init(&ps.defs->structs);
- strmap_init(&ps.defs->unions);
- tal_add_destructor(ps.defs, destroy_definitions);
- toks = ps.toks = tokenize(ps.defs, code);
- while (tok_peek(&ps.toks)) {
- if (tok_take_if(&ps.toks, "struct")) {
- if (!tok_take_conglom(&ps, CDUMP_STRUCT))
- goto fail;
- } else if (tok_take_if(&ps.toks, "union")) {
- if (!tok_take_conglom(&ps, CDUMP_UNION))
- goto fail;
- } else if (tok_take_if(&ps.toks, "enum")) {
- if (!tok_take_enum(&ps))
- goto fail;
- } else
- tok_take_unknown_statement(&ps);
- }
- /* Now, remove any undefined types! */
- remove_undefined(&ps.defs->enums);
- remove_undefined(&ps.defs->structs);
- remove_undefined(&ps.defs->unions);
- tal_free(toks);
- out:
- if (streq(ps.complaints, ""))
- ps.complaints = tal_free(ps.complaints);
- if (complaints)
- *complaints = ps.complaints;
- else
- tal_free(ps.complaints);
- return ps.defs;
- fail:
- ps.defs = tal_free(ps.defs);
- goto out;
- }
|