read_cstring.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #include "ccan_tokenizer.h"
  2. static char *strdup_rng(const char *s, const char *e) {
  3. char *ret = malloc(e-s+1);
  4. memcpy(ret, s, e-s);
  5. ret[e-s] = 0;
  6. return ret;
  7. }
  8. #define MESSAGE_PATH "tokenize/read_cstring/"
  9. //Reads a C string starting at s until quoteChar is found or e is reached
  10. // Returns the pointer to the terminating quote character or e if none was found
  11. char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
  12. const char * const tokstart = s;
  13. const char *p;
  14. int has_endquote=0, has_newlines=0;
  15. //tok_msg_debug(called, s, "Called read_cstring on `%s`", s);
  16. #define append(startptr,endptr) darray_append_items(*out, startptr, (endptr)-(startptr))
  17. #define append_char(theChar) darray_append(*out, theChar)
  18. #define append_zero() do {darray_append(*out, 0); out->size--;} while(0)
  19. p = s;
  20. while (p<e) {
  21. char c = *p++;
  22. if (c == '\\') {
  23. append(s, p-1);
  24. s = p;
  25. if (p >= e) {
  26. append_char('\\');
  27. tok_msg_error(ended_in_backslash, p-1,
  28. "read_cstring input ended in backslash");
  29. break;
  30. }
  31. c = *p++;
  32. if (c>='0' && c<='9') {
  33. unsigned int octal = c-'0';
  34. size_t digit_count = 0;
  35. while (p<e && *p>='0' && *p<='9') {
  36. octal <<= 3;
  37. octal += (*p++) - '0';
  38. if (++digit_count >= 2)
  39. break;
  40. }
  41. if (p<e && *p>='0' && *p<='9') {
  42. tok_msg_info(ambiguous_octal, s-2,
  43. "Octal followed by digit");
  44. }
  45. if (octal > 0xFF) {
  46. tok_msg_warn(octal_overflow, s-2,
  47. "Octal out of range");
  48. }
  49. c = octal;
  50. } else {
  51. switch (c) {
  52. case 'x': {
  53. size_t digit_count = 0;
  54. size_t zero_count = 0;
  55. unsigned int hex = 0;
  56. while (p<e && *p=='0') p++, zero_count++;
  57. for (;p<e;digit_count++) {
  58. c = *p++;
  59. if (c>='0' && c<='9')
  60. c -= '0';
  61. else if (c>='A' && c<='F')
  62. c -= 'A'-10;
  63. else if (c>='a' && c<='f')
  64. c -= 'a'-10;
  65. else {
  66. p--;
  67. break;
  68. }
  69. hex <<= 4;
  70. hex += c;
  71. }
  72. if (zero_count+digit_count > 2) {
  73. char *hex_string = strdup_rng(s-2, p);
  74. tok_msg_warn(ambiguous_hex, s-2,
  75. "Hex escape '%s' is ambiguous", hex_string);
  76. if (digit_count > 2)
  77. tok_msg_warn(hex_overflow, s-2,
  78. "Hex escape '%s' out of range", hex_string);
  79. free(hex_string);
  80. }
  81. c = hex & 0xFF;
  82. } break;
  83. case 'a':
  84. c=0x7;
  85. break;
  86. case 'b':
  87. c=0x8;
  88. break;
  89. case 'e':
  90. c=0x1B;
  91. break;
  92. case 'f':
  93. c=0xC;
  94. break;
  95. case 'n':
  96. c=0xA;
  97. break;
  98. case 'r':
  99. c=0xD;
  100. break;
  101. case 't':
  102. c=0x9;
  103. break;
  104. case 'v':
  105. c=0xB;
  106. break;
  107. case '\\':
  108. break;
  109. default:
  110. if (c == quoteChar)
  111. break;
  112. if (c=='\'' && quoteChar=='"') {
  113. /* tok_msg_info(escaped_single_quote, s-2,
  114. "Single quote characters need not be escaped within double quotes"); */
  115. break;
  116. }
  117. if (c=='"' && quoteChar=='\'') {
  118. /* tok_msg_info(escaped_double_quote, s-2,
  119. "Double quote characters need not be escaped within single quotes"); */
  120. break;
  121. }
  122. if (c=='?') // \? is needed in some situations to avoid building a trigraph
  123. break;
  124. tok_msg_warn(unknown_escape, s-2,
  125. "Unknown escape sequence '\\%c'", c);
  126. break;
  127. }
  128. }
  129. s = p;
  130. append_char(c);
  131. } else if (c == quoteChar) {
  132. p--;
  133. has_endquote = 1;
  134. break;
  135. } else if (creturn(c)) {
  136. has_newlines = 1;
  137. }
  138. }
  139. append(s, p);
  140. append_zero();
  141. if (!has_endquote) {
  142. tok_msg_error(missing_endquote, tokstart,
  143. "Missing endquote on %s literal",
  144. quoteChar=='\'' ? "character" : "string");
  145. } else if (has_newlines) {
  146. tok_msg_warn(quote_newlines, tokstart,
  147. "%s literal contains newline character(s)",
  148. quoteChar=='\'' ? "Character" : "String");
  149. }
  150. return (char*)p;
  151. #undef append
  152. #undef append_char
  153. #undef append_zero
  154. }
  155. #undef MESSAGE_PATH