| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408 |
- //for strtold
- #define _ISOC99_SOURCE
- #include <stdlib.h>
- #undef _ISOC99_SOURCE
- #include "ccan_tokenizer.h"
- #ifndef ULLONG_MAX
- #define ULLONG_MAX 18446744073709551615ULL
- #endif
- static const char *skipnum(const char *s, const char *e, readui_base base) {
- for (;s<e;s++) {
- unsigned int c = (unsigned char)*s;
-
- if (cdigit(c)) {
- if ( c-'0' >= (base & 0xFF) &&
- !(base & READUI_ALLOWHIGHERDIGITS) )
- break;
- } else if (c>='A' && c<='Z') {
- if (!(base & READUI_ALLOWCAPLETTERS))
- break;
- if ( c-'A'+10 >= (base & 0xFF) &&
- !(base & READUI_ALLOWHIGHERDIGITS))
- break;
- } else if (c>='a' && c<='z') {
- if (!(base & READUI_ALLOWLCASELETTERS))
- break;
- if ( c-'a'+10 >= (base & 0xFF) &&
- !(base & READUI_ALLOWHIGHERDIGITS))
- break;
- } else
- break;
- }
-
- return s;
- }
- static uint64_t readui_valid(const char *s, const char *e, readui_base base) {
- uint64_t ret = 0;
- uint64_t multiplier = 1;
- uint64_t digit_value;
-
- //64-bit multiplication with overflow checking
- #define multiply(dest, src) do { \
- uint32_t a0 = (uint64_t)(dest) & 0xFFFFFFFF; \
- uint32_t a1 = (uint64_t)(dest) >> 32; \
- uint32_t b0 = (uint64_t)(src) & 0xFFFFFFFF; \
- uint32_t b1 = (uint64_t)(src) >> 32; \
- uint64_t a, b; \
- \
- if (a1 && b1) \
- goto overflowed; \
- a = (uint64_t)a1*b0 + (uint64_t)a0*b1; \
- if (a > 0xFFFFFFFF) \
- goto overflowed; \
- a <<= 32; \
- b = (uint64_t)a0*b0; \
- \
- if (a+b < a) \
- goto overflowed; \
- (dest) = a+b; \
- } while(0)
-
- if (s >= e || ((base&0xFF) < 1)) {
- errno = EINVAL;
- return 0;
- }
-
- while (s<e && *s=='0') s++;
-
- if (e > s) {
- for (;;) {
- char c = *--e;
-
- //this series of if statements takes advantage of the fact that 'a'>'A'>'0'
- if (c >= 'a')
- c -= 'a'-10;
- else if (c >= 'A')
- c -= 'A'-10;
- else
- c -= '0';
- digit_value = c;
-
- //TODO: Write/find a testcase where temp *= multiplier does overflow
- multiply(digit_value, multiplier);
-
- if (ret+digit_value < ret)
- goto overflowed;
- ret += digit_value;
-
- if (e <= s)
- break;
-
- multiply(multiplier, base & 0xFF);
- }
- }
- errno = 0;
- return ret;
-
- overflowed:
- errno = ERANGE;
- return ULLONG_MAX;
-
- #undef multiply
- }
- uint64_t readui(const char **sp, const char *e, readui_base base) {
- const char *s = *sp;
-
- while (s<e && cwhite(*s)) s++;
- e = skipnum(s, e, base);
-
- *sp = e;
- return readui_valid(s, e, base);
- }
- #define MESSAGE_PATH "tokenize/read_cnumber/"
- struct scan_number {
- /*
- * Each of the pointers points to the first character of a given component.
- * Consider 0x50.1p+1f . It would be broken down into:
- */
- const char *prefix; // 0x
- const char *digits; // 50.1
- const char *exponent; // p+1
- const char *suffix; // f
- const char *end;
- size_t dots_found; // 1
- };
- /*
- * Scans past all the characters in a number token, fills the struct, and
- * returns one of TOK_INTEGER or TOK_FLOATING to indicate the type.
- *
- * First character must be [0-9 '.']
- */
- static enum token_type scan_number(struct scan_number *sn,
- const char *s, const char *e) {
- enum token_type type;
-
- sn->dots_found = 0;
-
- sn->prefix = s;
- sn->digits = s;
- if (s+3<=e && s[0]=='0') {
- if (s[1]=='X' || s[1]=='x') {
- //hexadecimal
- s += 2;
- sn->digits = s;
- for (;s<e;s++) {
- if (*s == '.')
- sn->dots_found++;
- else if (!chex(*s))
- break;
- }
- goto done_scanning_digits;
- } else if (s[1]=='B' || s[1]=='b') {
- //binary
- s += 2;
- if (*s!='0' && *s!='1')
- s -= 2;
- sn->digits = s;
- }
- }
-
- //binary, decimal, or octal
- for (;s<e;s++) {
- if (*s == '.')
- sn->dots_found++;
- else if (!cdigit(*s))
- break;
- }
- done_scanning_digits:
-
- sn->exponent = s;
- if (s<e && (
- (sn->prefix==sn->digits && (*s=='E' || *s=='e')) ||
- (sn->prefix < sn->digits && (*s=='P' || *s=='p'))
- )) {
- s++;
- if (s<e && (*s=='+' || *s=='-'))
- s++;
- while (s<e && cdigit(*s)) s++;
- }
-
- sn->suffix = s;
- while (s<e && (cdigit(*s) || cletter(*s) ||
- *s=='.' || *s=='_' || *s=='$')) s++;
-
- sn->end = s;
-
- //Now we're done scanning, but now we want to know what type this is
- type = TOK_INTEGER;
- if (sn->dots_found)
- type = TOK_FLOATING;
- if (sn->exponent < sn->suffix)
- type = TOK_FLOATING;
-
- //if this is an octal, make the leading 0 a prefix
- if (type==TOK_INTEGER && sn->prefix==sn->digits &&
- sn->digits < s && sn->digits[0]=='0')
- sn->digits++;
-
- return type;
- }
- static enum tok_suffix read_number_suffix(const char *s, const char *e,
- enum token_type type, tok_message_queue *mq) {
- const char *orig_s = s;
- enum tok_suffix sfx = 0;
-
- //read the suffix in pieces
- while (s<e) {
- enum tok_suffix sfx_prev = sfx;
- char c = *s++;
- if (c>='a' && c<='z')
- c -= 'a'-'A';
-
- if (c=='L') {
- if (s<e && (*s=='L' || *s=='l')) {
- s++;
- sfx |= TOK_LL;
-
- //TOK_L and TOK_LL are mutually exclusive
- if (sfx & TOK_L)
- goto invalid;
- } else {
- sfx |= TOK_L;
- }
- }
- else if (c=='U')
- sfx |= TOK_U;
- else if (c=='F')
- sfx |= TOK_F;
- else if (c=='I')
- sfx |= TOK_I;
- else
- goto invalid;
-
- if (sfx == sfx_prev)
- goto invalid; //suffix piece was repeated
- }
-
- //make sure the suffix is appropriate for this number type
- if (type==TOK_INTEGER && (sfx & TOK_F)) {
- tok_msg_error(suffix_float_only, orig_s,
- "Suffix only valid for floating point numbers");
- sfx = TOK_NOSUFFIX;
- }
- if (type==TOK_FLOATING && (sfx & (TOK_U | TOK_LL))) {
- tok_msg_error(suffix_integer_only, orig_s,
- "Suffix only valid for integers");
- sfx = TOK_NOSUFFIX;
- }
-
- return sfx;
-
- invalid:
- if (type==TOK_INTEGER)
- tok_msg_error(integer_suffix_invalid, orig_s,
- "Integer suffix invalid");
- else
- tok_msg_error(floating_suffix_invalid, orig_s,
- "Floating point suffix invalid");
- return TOK_NOSUFFIX;
- }
- static void read_integer(struct tok_integer *out, const struct scan_number *sn,
- tok_message_queue *mq) {
- /*
- Assertions about an integer's struct scan_number:
- prefix is empty or [0 0B 0b 0X 0x]
- sn->digits is not empty (i.e. sn->digits < sn->exponent)
- *unless* the prefix is "0"
- has no exponent
- suffix is [0-9 A-Z a-z '.']*
- dots_found == 0
- */
- readui_base base = READUI_DEC;
- const char *tokstart = sn->prefix;
- const char *s = sn->digits, *e = sn->exponent;
-
- if (sn->prefix+1 < sn->digits) {
- if (sn->prefix[1]=='X' || sn->prefix[1]=='x')
- base = READUI_HEX;
- else
- base = READUI_OCT;
- } else if (sn->prefix < sn->digits) {
- base = READUI_OCT;
- }
-
- if (s>=e && base==READUI_OCT) {
- //octal contains no digits
- out->v = 0;
- out->base = 8;
- goto suffix;
- }
-
- out->v = readui(&s, sn->exponent, base);
- out->base = base & 0xFF;
-
- if (s != e || errno == EINVAL) {
- tok_msg_error(integer_invalid_digits, tokstart,
- "Integer constant contains invalid digits");
- } else if (errno) {
- if (errno == ERANGE) {
- tok_msg_error(integer_out_of_range, tokstart,
- "Integer constant out of range");
- } else {
- tok_msg_bug(readui_unknown, tokstart,
- "Unknown error returned by readui");
- }
- }
-
- suffix:
- out->suffix =
- read_number_suffix(sn->suffix, sn->end, TOK_INTEGER, mq);
-
- return;
- }
- static void read_floating(struct tok_floating *out, const struct scan_number *sn,
- tok_message_queue *mq) {
- /*
- Assertions about a float's struct scan_number:
- prefix is empty or [0B 0b 0X 0x] (note: no octal prefix 0)
- sn->digits not empty, ever
- exponent may or may not exist
- If exponent exists, it is valid and formatted as:
- ( [E P e p] ['+' '-']*0..1 [0-9]* )
- An exponent starts with E if this is decimal, P if it is hex/binary
- suffix is [0-9 A-Z a-z '.']*
- dots_found can be anything
- */
- const char *tokstart = sn->prefix;
- const char *s = sn->prefix, *e = sn->suffix;
- char borrow = *sn->end;
- //long double strtold(const char *nptr, char **endptr);
-
- out->v = 0.0;
- out->suffix = TOK_NOSUFFIX;
-
- if (sn->prefix < sn->digits) {
- if (sn->prefix[1]=='B' || sn->prefix[1]=='b') {
- tok_msg_error(binary_float, tokstart,
- "Binary floating point constants not allowed");
- return;
- }
- if (sn->exponent >= sn->suffix) {
- tok_msg_error(hex_float_no_exponent, tokstart,
- "Hex floating point constant missing exponent");
- return;
- }
- }
-
-
- /* Stick a null terminator at the end of the input so strtold
- * won't read beyond the given input.
- *
- * This is thread-safe because the input is from
- * token_list.txt, which was generated in the
- * tokenize function which is still running.
- */
- *(char*)sn->end = 0;
- errno = 0;
- out->v = strtold(s, (char**)&s);
- //don't forget to set it back
- *(char*)sn->end = borrow;
-
- if (errno) {
- //for some reason, strtold may errno to EDOM to indicate underrun
- //open test/run.c and search "floating_out_of_range" for more details
- if (errno == ERANGE || errno == EDOM) {
- tok_msg_error(floating_out_of_range, tokstart,
- "Floating point constant out of range");
- } else {
- tok_msg_bug(strtold_unknown, tokstart,
- "Unknown error returned by strtold");
- }
- }
-
- if (s != e) {
- tok_msg_error(floating_invalid_digits, tokstart,
- "Floating point constant contains invalid digits");
- }
-
- out->suffix =
- read_number_suffix(sn->suffix, sn->end, TOK_FLOATING, mq);
- }
- char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq) {
- struct scan_number sn;
-
- tok->type = scan_number(&sn, s, e);
- if (tok->type == TOK_INTEGER)
- read_integer(&tok->integer, &sn, mq);
- else
- read_floating(&tok->floating, &sn, mq);
-
- return (char*)sn.end;
- }
- #undef MESSAGE_PATH
|