run.c 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466
  1. /*
  2. Copyright (c) 2009 Joseph A. Adams
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions
  6. are met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in the
  11. documentation and/or other materials provided with the distribution.
  12. 3. The name of the author may not be used to endorse or promote products
  13. derived from this software without specific prior written permission.
  14. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  15. IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  18. INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19. NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23. THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include <ccan/ccan_tokenizer/read_cnumber.c>
  26. #include <ccan/ccan_tokenizer/read_cstring.c>
  27. #include <ccan/ccan_tokenizer/dict.c>
  28. #include <ccan/ccan_tokenizer/ccan_tokenizer.c>
  29. #include <ccan/ccan_tokenizer/queue.c>
  30. #include <ccan/ccan_tokenizer/charflag.c>
  31. #include <ccan/ccan_tokenizer/ccan_tokenizer.h>
  32. #include <ccan/tap/tap.h>
  33. #include <math.h>
  34. #define array_count_pair(type, ...) (const type []){__VA_ARGS__}, sizeof((const type []){__VA_ARGS__})/sizeof(type)
  35. static void test_read_cstring(void) {
  36. #define next() do {darray_free(str); darray_init(str); csp++;} while(0)
  37. #define cs (*csp)
  38. #define verify_quotechar(correct, correct_continuation_offset, quotechar) do { \
  39. const size_t s = sizeof(correct)-1; \
  40. p = read_cstring(&str, cs, cs ? strchr(cs, 0) : NULL, quotechar, &mq); \
  41. ok(str.size==s && str.alloc>s && str.item[s]==0 && \
  42. !memcmp(str.item, correct, s), \
  43. "\"%s: Is output correct?", cs); \
  44. ok(p == cs+correct_continuation_offset, "\"%s: Is continuation pointer correct?", cs); \
  45. next(); \
  46. } while(0)
  47. #define verify(correct, correct_continuation_offset) verify_quotechar(correct, correct_continuation_offset, '"')
  48. const char * const cstrings[] = {
  49. NULL,
  50. "",
  51. "\"",
  52. "Hello world!\"",
  53. "Hello world!",
  54. "\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\"",
  55. "\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\'",
  56. "الأدب العربي\"",
  57. "Ends with \\",
  58. "Tab: '\\011' Space: '\\040' Overflow: '\\777' Ambiguous: '\\1013'\"",
  59. "\\x50\\x35\\x12\\xEF\\xFE\\x00012\\x345\""
  60. };
  61. const char * const *csp = cstrings;
  62. const char *p;
  63. darray_char str = darray_new();
  64. tok_message_queue mq;
  65. queue_init(mq, NULL);
  66. //check null input
  67. verify("", 0);
  68. //Check an empty input
  69. verify("", 0);
  70. //Check an empty quote-terminated string
  71. verify("", 0);
  72. //Check a simple string
  73. verify("Hello world!", 12);
  74. //Check a simple string without an end quote
  75. verify("Hello world!", 12);
  76. //Check a collection of single-character sequences
  77. verify("\\\f\e\b\0\a\r\nw\t\v\'\"", 26);
  78. //Check same collection of single-character sequences, this time using a single quote terminator
  79. verify_quotechar("\\\f\e\b\0\a\r\nw\t\v\'\"", 26, '\'');
  80. //Check a real UTF-8 string
  81. verify("\xd8\xa7\xd9\x84\xd8\xa3\xd8\xaf\xd8\xa8\x20\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", 23);
  82. //Check string ending in backslash
  83. verify("Ends with \\", 11);
  84. //Check a series of octal escapes
  85. verify("Tab: '\t' Space: ' ' Overflow: '\377' Ambiguous: 'A3'", 61);
  86. //Check a series of hex escapes
  87. verify("\x50\x35\x12\xEF\xFE\x12\x45", 32);
  88. darray_free(str);
  89. //tok_message_queue_dump(&mq);
  90. //Verify the message queue
  91. if (1)
  92. {
  93. struct tok_message m;
  94. struct tok_message correct_messages[] = {
  95. {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
  96. {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
  97. {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
  98. {.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
  99. //{.level=TM_INFO, .path="tokenize/read_cstring/escaped_single_quote"},
  100. {.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
  101. //{.level=TM_INFO, .path="tokenize/read_cstring/escaped_double_quote"},
  102. {.level=TM_ERROR, .path="tokenize/read_cstring/ended_in_backslash"},
  103. {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
  104. {.level=TM_WARN, .path="tokenize/read_cstring/octal_overflow"},
  105. {.level=TM_INFO, .path="tokenize/read_cstring/ambiguous_octal"},
  106. {.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
  107. {.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
  108. {.level=TM_WARN, .path="tokenize/read_cstring/hex_overflow"},
  109. };
  110. size_t i, e=sizeof(correct_messages)/sizeof(*correct_messages);
  111. while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
  112. queue_skip(mq);
  113. for (i=0; i<e; i++) {
  114. if (!queue_count(mq))
  115. break;
  116. m = dequeue(mq);
  117. if (m.level != correct_messages[i].level)
  118. break;
  119. if (strcmp(m.path, correct_messages[i].path))
  120. break;
  121. while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
  122. queue_skip(mq);
  123. }
  124. if (i<e)
  125. printf("Item %zu is incorrect\n", i);
  126. ok(i==e, "Is message queue correct?");
  127. ok(!queue_count(mq), "Message queue should be empty now.");
  128. }
  129. queue_free(mq);
  130. #undef next
  131. #undef cs
  132. #undef verify_quotechar
  133. #undef verify
  134. }
  135. #if 0
  136. static void p(const char *str) {
  137. if (str)
  138. puts(str);
  139. else
  140. puts("(null)");
  141. }
  142. #endif
  143. static void test_queue(void) {
  144. #define next() do {queue_free(q); queue_init(q, NULL);} while(0)
  145. const char * const s[] = {
  146. "zero",
  147. "one",
  148. "two",
  149. "three",
  150. "four",
  151. "five",
  152. "six",
  153. "seven",
  154. "eight",
  155. "nine",
  156. "ten",
  157. "eleven",
  158. "twelve",
  159. "thirteen",
  160. "fourteen",
  161. "fifteen"
  162. };
  163. queue(const char*) q;
  164. queue_init(q, NULL);
  165. enqueue(q, s[0]);
  166. enqueue(q, s[1]);
  167. enqueue(q, s[2]);
  168. enqueue(q, s[3]);
  169. enqueue(q, s[4]);
  170. enqueue(q, s[5]);
  171. ok(queue_count(q) == 6, "Checking queue count");
  172. ok(dequeue_check(q)==s[0] &&
  173. dequeue_check(q)==s[1] &&
  174. dequeue_check(q)==s[2], "Dequeuing/checking 3 items");
  175. ok(queue_count(q) == 3, "Checking queue count");
  176. enqueue(q, s[6]);
  177. enqueue(q, s[7]);
  178. enqueue(q, s[8]);
  179. enqueue(q, s[9]);
  180. enqueue(q, s[10]);
  181. enqueue(q, s[11]);
  182. enqueue(q, s[12]);
  183. enqueue(q, s[13]);
  184. enqueue(q, s[14]);
  185. enqueue(q, s[15]);
  186. ok(queue_count(q) == 13, "Checking queue count");
  187. ok(dequeue_check(q)==s[3] &&
  188. dequeue_check(q)==s[4] &&
  189. dequeue_check(q)==s[5] &&
  190. dequeue_check(q)==s[6] &&
  191. dequeue_check(q)==s[7] &&
  192. dequeue_check(q)==s[8] &&
  193. dequeue_check(q)==s[9] &&
  194. dequeue_check(q)==s[10] &&
  195. dequeue_check(q)==s[11] &&
  196. dequeue_check(q)==s[12] &&
  197. dequeue_check(q)==s[13] &&
  198. dequeue_check(q)==s[14] &&
  199. dequeue_check(q)==s[15], "Are queue items correct?");
  200. ok(dequeue_check(q)==NULL && dequeue_check(q)==NULL && queue_count(q)==0, "Does queue run out correctly?");
  201. queue_free(q);
  202. #undef next
  203. }
  204. #define test_dict_single() _test_dict_single(dict, str, sizeof(str)-1, correct, sizeof(correct)/sizeof(*correct))
  205. static void _test_dict_single(struct dict *dict, const char *str, size_t len, int *correct, size_t correct_count) {
  206. const char *s=str, *e=str+len;
  207. size_t i;
  208. struct dict_entry *entry;
  209. for (i=0; s<e && i<correct_count; i++) {
  210. const char *s_last = s;
  211. entry = dict_lookup(dict, &s, e);
  212. if (!entry) {
  213. if (s_last != s)
  214. break; //dict_lookup should not modify *sp when it returns NULL
  215. s++;
  216. if (correct[i] != -100)
  217. break;
  218. continue;
  219. }
  220. if (correct[i] != entry->id)
  221. break;
  222. if (!*entry->str) {
  223. if (s_last+1 != s)
  224. break;
  225. if (s[-1] != 0)
  226. break;
  227. } else {
  228. size_t len = strlen(entry->str);
  229. if (s_last+len != s)
  230. break;
  231. if (strncmp(entry->str, s-len, len))
  232. break;
  233. }
  234. //printf("Correctly read %s\n", entry->str);
  235. }
  236. if (s!=e || i!=correct_count) {
  237. printf("Tokenization failed at ");
  238. fwrite(s, 1, e-s, stdout);
  239. printf("\n");
  240. }
  241. ok(s==e && i==correct_count, "All of the tokens are correct");
  242. }
  243. static void test_dict(void) {
  244. struct dict_entry dict_orig[] = {
  245. {-1, ""},
  246. {0, " "},
  247. {1, "it"},
  248. {2, "it's"},
  249. {3, "a"},
  250. {4, "beautiful"},
  251. {5, "be"},
  252. {6, "day"},
  253. {7, "d"},
  254. {8, "in"},
  255. {9, "the"},
  256. {10, "t"},
  257. {11, "neighborhood"},
  258. {12, "neighbor"},
  259. {13, "won't"},
  260. {14, " you"},
  261. {15, "my"},
  262. {16, "??"},
  263. {17, "item"},
  264. {18, "ip"},
  265. {19, "\xFF\xFA"},
  266. {20, "\xFF\xEE"},
  267. {21, "\x80\x12\x34"},
  268. {22, "\x80\x32"},
  269. {23, "\x80\x32\x34"}
  270. };
  271. struct dict *dict = dict_build(NULL, dict_orig, sizeof(dict_orig)/sizeof(*dict_orig));
  272. {
  273. const char *s=NULL, *e=NULL;
  274. ok(dict_lookup(dict, &s, e)==NULL && s==NULL && e==NULL, "dict_lookup does nothing and returns null on empty input");
  275. }
  276. {
  277. const char str[] = "it's a beautiful day in the neighborhood\0won't you be my neighbor?";
  278. int correct[] = {2,0, 3,0, 4,0, 6,0, 8,0, 9,0, 11,-1, 13, 14,0, 5,0, 15,0, 12, -100};
  279. test_dict_single();
  280. }
  281. //check equal-length tokens
  282. {
  283. const char str[] = "it'sitem initip";
  284. int correct[] = {2,17,0, 8,1,18};
  285. test_dict_single();
  286. }
  287. //check mostly invalid tokens
  288. {
  289. const char str[] = "&^&beaumx yo youthx";
  290. int correct[] = {-100,-100,-100, 5,3,-100,-100,-100, 0,-100,-100, 14,10,-100,-100};
  291. test_dict_single();
  292. }
  293. //check tokens that start with a character greater than 0x7F
  294. {
  295. const char str[] = "\x80\x12\x34\x80\x32\x80\x32\x34\xFF\xFA\xFF\xEE";
  296. int correct[] = {21, 22, 23, 19, 20};
  297. test_dict_single();
  298. }
  299. talloc_free(dict);
  300. //make sure dict_build doesn't blow up on an empty dictionary
  301. dict = dict_build(NULL, NULL, 0);
  302. talloc_free(dict);
  303. }
  304. static void test_charflag(void) {
  305. char i;
  306. int correct = 0;
  307. #define CONTROL do { \
  308. if (ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
  309. !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
  310. !cextended(i) ) \
  311. correct++; \
  312. } while(0)
  313. #define SPACE do { \
  314. if (!ccontrol(i) && cspace(i) && !creturn(i) && cwhite(i) && \
  315. !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
  316. !cextended(i) ) \
  317. correct++; \
  318. } while(0)
  319. #define RETURN do { \
  320. if (!ccontrol(i) && !cspace(i) && creturn(i) && cwhite(i) && \
  321. !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
  322. !cextended(i) ) \
  323. correct++; \
  324. } while(0)
  325. #define SYMBOL do { \
  326. if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
  327. !cdigit(i) && !cletter(i) && !chex(i) && csymbol(i) && \
  328. !cextended(i) ) \
  329. correct++; \
  330. } while(0)
  331. #define DIGIT do { \
  332. if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
  333. cdigit(i) && !cletter(i) && chex(i) && !csymbol(i) && \
  334. !cextended(i) ) \
  335. correct++; \
  336. } while(0)
  337. #define LETTER_HEX do { \
  338. if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
  339. !cdigit(i) && cletter(i) && chex(i) && !csymbol(i) && \
  340. !cextended(i) ) \
  341. correct++; \
  342. } while(0)
  343. #define LETTER do { \
  344. if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
  345. !cdigit(i) && cletter(i) && !chex(i) && !csymbol(i) && \
  346. !cextended(i) ) \
  347. correct++; \
  348. } while(0)
  349. #define EXTENDED do { \
  350. if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
  351. !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
  352. cextended(i) ) \
  353. correct++; \
  354. } while(0)
  355. for (i=0; i<'\t'; i++) CONTROL;
  356. i = '\t'; SPACE;
  357. i = '\n'; RETURN;
  358. i = '\v'; SPACE;
  359. i = '\f'; SPACE;
  360. i = '\r'; RETURN;
  361. for (i='\r'+1; i<' '; i++) CONTROL;
  362. i = ' '; SPACE;
  363. for (i='!'; i<='/'; i++) SYMBOL;
  364. for (i='0'; i<='9'; i++) DIGIT;
  365. for (i=':'; i<='@'; i++) SYMBOL;
  366. for (i='A'; i<='F'; i++) LETTER_HEX;
  367. for (i='G'; i<='Z'; i++) LETTER;
  368. for (i='['; i<='`'; i++) SYMBOL;
  369. for (i='a'; i<='f'; i++) LETTER_HEX;
  370. for (i='g'; i<='z'; i++) LETTER;
  371. for (i='{'; i<='~'; i++) SYMBOL;
  372. i = '\x7F'; CONTROL;
  373. ok(correct==128, "ASCII characters have correct charflags");
  374. correct = 0;
  375. //We do some goofy stuff here to make sure sign extension doesn't cause problems with charflags
  376. {
  377. unsigned int ui;
  378. int si;
  379. for (ui=128; ui<=255; ui++) {
  380. i = ui;
  381. EXTENDED;
  382. }
  383. for (si=-128; si<0; si++) {
  384. i = si;
  385. EXTENDED;
  386. }
  387. }
  388. {
  389. int i;
  390. for (i=-128; i<0; i++) EXTENDED;
  391. }
  392. {
  393. unsigned int i;
  394. for (i=128; i<=255; i++) EXTENDED;
  395. }
  396. ok(correct==512, "Extended characters have correct charflags");
  397. #undef CONTROL
  398. #undef SPACE
  399. #undef RETURN
  400. #undef SYMBOL
  401. #undef DIGIT
  402. #undef LETTER_HEX
  403. #undef LETTER
  404. #undef EXTENDED
  405. }
  406. struct readui_test {
  407. const char *txt;
  408. size_t txt_size;
  409. readui_base base;
  410. uint64_t correct_integer;
  411. int correct_errno;
  412. size_t correct_advance;
  413. };
  414. #define T(txt, ...) {txt, sizeof(txt)-1, __VA_ARGS__}
  415. #define M (18446744073709551615ULL)
  416. struct readui_test readui_tests[] = {
  417. //Basic reads
  418. T("0",READUI_DEC, 0,0,1),
  419. T(" \t42 ",READUI_DEC, 42,0,4),
  420. //Different bases
  421. T("BADBEEFDEADBAT",READUI_HEX, 0xBADBEEFDEADBAULL,0,13),
  422. T("7559",READUI_OCT, 0755,0,3),
  423. T("01010010110012",READUI_BIN, 2649,0,13),
  424. T("1000000000",0x7F, 8594754748609397887ULL,0,10),
  425. //Errors
  426. T("",READUI_DEC, 0,EINVAL,0),
  427. T("18446744073709551616",
  428. READUI_DEC,M,ERANGE,20),
  429. T("1000000000000000000000000",
  430. READUI_DEC,M,ERANGE,25),
  431. T("10000000000000000",
  432. READUI_HEX,M,ERANGE,17),
  433. T("10000000000000000000000000000000000000000000000000000000000000000",
  434. READUI_BIN,M,ERANGE,65),
  435. T("10000000000",
  436. 0x7D,M,ERANGE,11),
  437. T("9000000000",0x7F, M,ERANGE,10),
  438. //Misc
  439. T("18446744073709551615",READUI_DEC, M,0,20),
  440. };
  441. static void test_readui_single(struct readui_test *test) {
  442. uint64_t result_integer;
  443. int result_errno;
  444. size_t result_advance;
  445. const char *s = test->txt, *e = s+test->txt_size;
  446. errno = 0;
  447. result_integer = readui(&s, e, test->base);
  448. result_errno = errno;
  449. result_advance = s-test->txt;
  450. ok(result_integer == test->correct_integer &&
  451. result_errno == test->correct_errno &&
  452. result_advance == test->correct_advance,
  453. "Testing \"%s\"", test->txt);
  454. }
  455. static void test_readui(void) {
  456. size_t i, count = sizeof(readui_tests)/sizeof(*readui_tests);
  457. for (i=0; i<count; i++)
  458. test_readui_single(readui_tests+i);
  459. }
  460. #undef T
  461. #undef M
  462. static void scan_number_sanity_check(const struct scan_number *sn,
  463. enum token_type type, const char *str_pipes, const char *msg) {
  464. //If there is a prefix, it should follow
  465. //the pattern (0 [B X b x]*0..1)
  466. if (sn->prefix < sn->digits) {
  467. int len = sn->digits - sn->prefix;
  468. if (len!=1 && len!=2) {
  469. fail("%s : Prefix length is %d; should be 1 or 2",
  470. str_pipes, len);
  471. return;
  472. }
  473. if (sn->prefix[0] != '0') {
  474. fail("%s : Prefix does not start with 0",
  475. str_pipes);
  476. return;
  477. }
  478. if (len==2 && !strchr("BXbx", sn->prefix[1])) {
  479. fail("%s : Prefix is 0%c; should be 0, 0b, or 0x",
  480. str_pipes, sn->prefix[1]);
  481. return;
  482. }
  483. if (len==1 && type==TOK_FLOATING) {
  484. fail("%s : Octal prefix appears on floating point number",
  485. str_pipes);
  486. return;
  487. }
  488. } else {
  489. //if there is no prefix, the first digit should not be 0
  490. // unless this is a floating point number
  491. if (sn->digits < sn->exponent && sn->digits[0]=='0' &&
  492. type==TOK_INTEGER) {
  493. fail("%s : First digit of non-prefix integer is 0",
  494. str_pipes);
  495. return;
  496. }
  497. }
  498. //Make sure sn->digits contains valid digits and is not empty
  499. // (unless prefix is "0")
  500. {
  501. const char *s = sn->digits, *e = sn->exponent;
  502. if (sn->prefix+1 < sn->digits) {
  503. if (s >= e) {
  504. fail("%s : 0%c not followed by any digits",
  505. str_pipes, sn->prefix[1]);
  506. return;
  507. }
  508. if (sn->prefix[1] == 'X' || sn->prefix[1] == 'x') {
  509. while (s<e && strchr(
  510. "0123456789ABCDEFabcdef.", *s)) s++;
  511. } else {
  512. if (s[0]!='0' && s[0]!='1') {
  513. fail("%s: Binary prefix not followed by a 0 or 1",
  514. str_pipes);
  515. return;
  516. }
  517. while (s<e && strchr(
  518. "0123456789.", *s)) s++;
  519. }
  520. } else {
  521. if (type==TOK_FLOATING && s >= e) {
  522. fail("%s : sn->digits is empty in a floating point number",
  523. str_pipes);
  524. return;
  525. }
  526. if (sn->prefix >= sn->digits && s >= e) {
  527. fail("%s : both sn->prefix and sn->digits are empty",
  528. str_pipes);
  529. return;
  530. }
  531. while (s<e && strchr("0123456789.", *s)) s++;
  532. }
  533. if (s != e) {
  534. fail("%s : sn->digits is not entirely valid", str_pipes);
  535. return;
  536. }
  537. }
  538. //Make sure exponent follows the rules
  539. if (sn->exponent < sn->suffix) {
  540. char c = sn->exponent[0];
  541. if (type==TOK_INTEGER) {
  542. fail("%s : sn->exponent is not empty in an integer", str_pipes);
  543. return;
  544. }
  545. if (sn->prefix < sn->digits && (c=='E' || c=='e')) {
  546. fail("%s : Exponent for hex/binary starts with %c", str_pipes, c);
  547. return;
  548. }
  549. if (sn->prefix >= sn->digits && (c=='P' || c=='p')) {
  550. fail("%s : Exponent for decimal starts with %c", str_pipes, c);
  551. return;
  552. }
  553. }
  554. pass("%s%s", str_pipes, msg);
  555. return;
  556. }
  557. static void test_scan_number_single(const char *str_pipes,
  558. enum token_type type, size_t dots_found) {
  559. char *str = malloc(strlen(str_pipes)+1);
  560. const char *expected[5];
  561. struct scan_number sn;
  562. enum token_type given_type;
  563. {
  564. const char *s = str_pipes;
  565. char *d = str;
  566. size_t pipes = 0;
  567. expected[0] = d;
  568. for (;*s;s++) {
  569. if (*s == ' ')
  570. continue;
  571. if (*s == '|') {
  572. if (++pipes > 4)
  573. goto fail_too_many_pipes;
  574. expected[pipes] = d;
  575. } else
  576. *d++ = *s;
  577. }
  578. *d = 0;
  579. if (pipes < 3)
  580. goto fail_not_enough_pipes;
  581. if (pipes == 3)
  582. expected[4] = d;
  583. }
  584. given_type = scan_number(&sn, str, strchr(str,0));
  585. if (sn.prefix != expected[0]) {
  586. fail("%s : sn.prefix is wrong", str_pipes);
  587. return;
  588. }
  589. if (sn.digits != expected[1]) {
  590. fail("%s : sn.digits is wrong", str_pipes);
  591. return;
  592. }
  593. if (sn.exponent != expected[2]) {
  594. fail("%s : sn.exponent is wrong", str_pipes);
  595. return;
  596. }
  597. if (sn.suffix != expected[3]) {
  598. fail("%s : sn.suffix is wrong", str_pipes);
  599. return;
  600. }
  601. if (sn.end != expected[4]) {
  602. fail("%s : sn.end is wrong", str_pipes);
  603. return;
  604. }
  605. if (given_type != type) {
  606. fail("%s : Type incorrect", str_pipes);
  607. return;
  608. }
  609. if (sn.dots_found != dots_found) {
  610. fail("%s : sn.dots_found is %zu; should be %zu", str_pipes,
  611. sn.dots_found, dots_found);
  612. return;
  613. }
  614. scan_number_sanity_check(&sn, type, str_pipes, "");
  615. free(str);
  616. return;
  617. fail_too_many_pipes:
  618. fail("Too many pipes in the test string \"%s\"; should be 3", str_pipes);
  619. return;
  620. fail_not_enough_pipes:
  621. fail("Not enough pipes in the test string \"%s\"; should be 3", str_pipes);
  622. return;
  623. }
  624. #define T(str, type, dots_found) test_scan_number_single(str,type,dots_found)
  625. static void test_scan_number(void) {
  626. T("0x | 50.1 | p+1 | f", TOK_FLOATING, 1);
  627. T("| 100 || L", TOK_INTEGER, 0);
  628. T("0 ||| b21", TOK_INTEGER, 0);
  629. T("0b | 101 || L", TOK_INTEGER, 0);
  630. T("0X | 7Af ||| \t2", TOK_INTEGER, 0);
  631. T("0|||b", TOK_INTEGER, 0);
  632. T("0|||x", TOK_INTEGER, 0);
  633. }
  634. #undef T
  635. #define T(string, value, theBase, theSuffix) do { \
  636. queue_init(mq, NULL); \
  637. str = (string); \
  638. type = scan_number(&sn, str, str+sizeof(string)-1); \
  639. ok(type==TOK_INTEGER, "%s : type==TOK_INTEGER", str); \
  640. scan_number_sanity_check(&sn, type, str, \
  641. " : scan_number_sanity_check passed"); \
  642. read_integer(&integer, &sn, &mq); \
  643. ok(integer.v==(value) && integer.base==(theBase) && \
  644. integer.suffix==(theSuffix), \
  645. "%s : Correct value and suffix", str); \
  646. } while(0)
  647. #define Q(name) do { \
  648. if (queue_count(mq)) { \
  649. const char *path = dequeue(mq).path; \
  650. ok(!strcmp(path, "tokenize/read_cnumber/" #name), \
  651. "%s : Dequeued %s", str, path); \
  652. } \
  653. } while(0)
  654. #define E() do { \
  655. ok(queue_count(mq)==0, "%s : Message queue empty", str); \
  656. if (queue_count(mq)) \
  657. tok_message_queue_dump(&mq); \
  658. queue_free(mq); \
  659. } while(0)
  660. static void test_read_integer(void) {
  661. struct scan_number sn;
  662. tok_message_queue mq;
  663. const char *str;
  664. enum token_type type;
  665. struct tok_integer integer;
  666. T("0b0lu", 0, 8, TOK_UL);
  667. E();
  668. T("1", 1, 10, TOK_NOSUFFIX);
  669. E();
  670. T("32Q", 32, 10, TOK_NOSUFFIX);
  671. Q(integer_suffix_invalid);
  672. E();
  673. T("32i", 32, 10, TOK_I);
  674. E();
  675. T("0755f", 493, 8, TOK_NOSUFFIX);
  676. Q(suffix_float_only);
  677. E();
  678. T("0xDeadBeef", 0xDEADBEEF, 16, TOK_NOSUFFIX);
  679. E();
  680. T("12345678901234567890$1_LONG.SUFFIX", 12345678901234567890ULL, 10, TOK_NOSUFFIX);
  681. ok1(sn.end == strchr(str, 0));
  682. Q(integer_suffix_invalid);
  683. E();
  684. T("0xDEADBEEFlull", 0xDEADBEEF, 16, TOK_NOSUFFIX);
  685. Q(integer_suffix_invalid);
  686. E();
  687. T("0xBALLuu", 0xBA, 16, TOK_NOSUFFIX);
  688. Q(integer_suffix_invalid);
  689. E();
  690. T("123456789012345678901", 18446744073709551615ULL, 10, TOK_NOSUFFIX);
  691. Q(integer_out_of_range);
  692. E();
  693. T("09", 0, 8, TOK_NOSUFFIX);
  694. Q(integer_invalid_digits);
  695. E();
  696. }
  697. #undef T
  698. #undef E
  699. #define Teq(string, equals, theSuffix) do { \
  700. queue_init(mq, NULL); \
  701. str = malloc(sizeof(string)); \
  702. memcpy(str, string, sizeof(string)); \
  703. type = scan_number(&sn, str, str+sizeof(string)-1); \
  704. ok(type==TOK_FLOATING, "%s : type==TOK_FLOATING", str); \
  705. scan_number_sanity_check(&sn, type, str, \
  706. " : scan_number_sanity_check passed"); \
  707. read_floating(&floating, &sn, &mq); \
  708. ok((equals) && \
  709. floating.suffix==(theSuffix), \
  710. "%s : Correct value and suffix", str); \
  711. } while(0)
  712. #define T(string, value, theSuffix) \
  713. Teq(string, fabsl(floating.v - (value)) <= 0.00000000000000001, theSuffix)
  714. #define E() do { \
  715. ok(queue_count(mq)==0, "%s : Message queue empty", str); \
  716. if (queue_count(mq)) \
  717. tok_message_queue_dump(&mq); \
  718. queue_free(mq); \
  719. free(str); \
  720. } while(0)
  721. static void test_read_floating(void) {
  722. struct scan_number sn;
  723. tok_message_queue mq;
  724. char *str; //str is a malloced copy so read_floating can do its null terminator trick
  725. enum token_type type;
  726. struct tok_floating floating;
  727. T("1.0", 1.0, TOK_NOSUFFIX);
  728. E();
  729. T("0.0", 0.0, TOK_NOSUFFIX);
  730. E();
  731. T("0755e1", 7550.0, TOK_NOSUFFIX);
  732. E();
  733. T("0xD.Bp0", 0xD.Bp0, TOK_NOSUFFIX);
  734. E();
  735. //GCC doesn't throw any errors or warnings for this odd case,
  736. //but we call it an error to be consistent with strtold
  737. T("0x.p0", 0.0, TOK_NOSUFFIX);
  738. Q(floating_invalid_digits);
  739. E();
  740. T("32.0Q", 32.0, TOK_NOSUFFIX);
  741. Q(floating_suffix_invalid);
  742. E();
  743. T("32.0Li", 32.0, TOK_IMAG_L);
  744. E();
  745. T("32.0LL", 32.0, TOK_NOSUFFIX);
  746. Q(suffix_integer_only);
  747. E();
  748. Teq("0xDEAD.BEEF", floating.v==0.0, TOK_NOSUFFIX);
  749. Q(hex_float_no_exponent);
  750. E();
  751. T("0b101.0p0", 0, TOK_NOSUFFIX);
  752. Q(binary_float);
  753. E();
  754. /* If any of the following three tests fails, consider increasing
  755. the e+ and e- values. */
  756. Teq("1.e+4933", isinf(floating.v), TOK_NOSUFFIX);
  757. Q(floating_out_of_range);
  758. E();
  759. /* for some reason, strtold sets errno=EDOM on x86, and
  760. on my PowerPC G4 on Fedora 10, the same phenomenon occurs
  761. but the exponents are e+309, e-324, and e-325 */
  762. Teq("1.e-4951", floating.v==0.0, TOK_NOSUFFIX);
  763. Q(floating_out_of_range);
  764. E();
  765. Teq("1.e-4952", floating.v==0.0, TOK_NOSUFFIX);
  766. Q(floating_out_of_range);
  767. E();
  768. }
  769. #undef Teq
  770. #undef T
  771. #undef Q
  772. #undef E
  773. struct tokenizer_test {
  774. const char *txt;
  775. size_t txt_size;
  776. const struct token *tokens;
  777. size_t token_count;
  778. };
  779. #define T(txt, ...) {txt, sizeof(txt)-1, array_count_pair(struct token, __VA_ARGS__)}
  780. #define string(txt) {.string=(darray_char[1]){{.item = (txt), .size = sizeof(txt)-1}}}
  781. #define opkw(v) {.opkw = (v)}
  782. #define txt(t) .txt = (t), .txt_size = sizeof(t)-1
  783. #define integer(...) {.integer={__VA_ARGS__}}
  784. #define floating(...) {.floating={__VA_ARGS__}}
  785. #define space {.type = TOK_WHITE, .txt = " ", .txt_size = 1}
  786. #define startline {.type = TOK_STARTLINE}
  787. #define include(str) {.include = (str)}
  788. struct tokenizer_msg_test {
  789. struct tokenizer_test test;
  790. const char * const *messages;
  791. size_t message_count;
  792. };
  793. #define M(...) array_count_pair(const char *, __VA_ARGS__)
  794. struct tokenizer_test tokenizer_tests[] = {
  795. { "", 0, 0 },
  796. T("\n",
  797. {.type = TOK_WHITE, txt("\n")}
  798. ),
  799. T("\na",
  800. {.type = TOK_WHITE, txt("\n")},
  801. startline,
  802. {.type = TOK_IDENTIFIER, txt("a")}
  803. ),
  804. T("int n = c++;",
  805. {.type = TOK_KEYWORD,
  806. opkw(INT),
  807. txt("int")
  808. }, space,
  809. {.type = TOK_IDENTIFIER,
  810. txt("n")
  811. }, space,
  812. {.type = TOK_OPERATOR,
  813. opkw('='),
  814. txt("=")
  815. }, space,
  816. {.type = TOK_IDENTIFIER,
  817. txt("c")
  818. },
  819. {.type = TOK_OPERATOR,
  820. opkw(INC_OP),
  821. txt("++")
  822. },
  823. {.type = TOK_OPERATOR,
  824. opkw(';'),
  825. txt(";")
  826. }
  827. ),
  828. T(".5 42 ",
  829. {.type = TOK_FLOATING,
  830. floating(.5, TOK_NOSUFFIX),
  831. txt(".5")
  832. }, space,
  833. {.type = TOK_INTEGER,
  834. integer(42, 10, TOK_NOSUFFIX),
  835. txt("42")
  836. }, space,
  837. ),
  838. //Make sure TOK_STRAY doesn't take over the universe
  839. T("``AS IS'' AND",
  840. {.type = TOK_STRAY,
  841. txt("``")
  842. },
  843. {.type = TOK_IDENTIFIER,
  844. txt("AS")
  845. }, space,
  846. {.type = TOK_IDENTIFIER,
  847. txt("IS")
  848. },
  849. {.type = TOK_CHAR,
  850. string(""),
  851. txt("\'\'")
  852. }, space,
  853. {.type = TOK_IDENTIFIER,
  854. txt("AND")
  855. }
  856. ),
  857. //Make sure starting with 0 doesn't result in skipping whitespace
  858. T("0 .05 0 500",
  859. {.type = TOK_INTEGER,
  860. integer(0, 8, TOK_NOSUFFIX),
  861. txt("0")
  862. }, space,
  863. {.type = TOK_FLOATING,
  864. floating(.05, TOK_NOSUFFIX),
  865. txt(".05")
  866. }, space,
  867. {.type = TOK_INTEGER,
  868. integer(0, 8, TOK_NOSUFFIX),
  869. txt("0")
  870. }, space,
  871. {.type = TOK_INTEGER,
  872. integer(500, 10, TOK_NOSUFFIX),
  873. txt("500")
  874. }
  875. ),
  876. //Make sure a simple preprocessor directive works
  877. T("\t/*comment*/ #include \"include.h\"\n",
  878. {.flags={1,0}, .type=TOK_WHITE, txt("\t")},
  879. {.flags={1,0}, .type=TOK_CCOMMENT, txt("/*comment*/")},
  880. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  881. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  882. {.flags={1,1}, .type=TOK_KEYWORD, opkw(INCLUDE), txt("include")},
  883. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  884. {.flags={1,0}, .type=TOK_STRING_IQUOTE, include("include.h"), txt("\"include.h\"")},
  885. {.flags={1,0}, .type=TOK_WHITE, txt("\n")}
  886. ),
  887. //Make sure __VA_ARGS__ is lexed correctly
  888. T("if #define __VA_ARGS__=0X5FULL;\n"
  889. " #define __VA_ARGS__(__VA_ARGS__, ...\t)__VA_ARGS__ bar int define",
  890. {.type=TOK_KEYWORD, opkw(IF), txt("if")},
  891. space,
  892. {.type=TOK_OPERATOR, opkw('#'), txt("#")},
  893. {.type=TOK_IDENTIFIER, txt("define")},
  894. space,
  895. {.type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
  896. {.type=TOK_OPERATOR, opkw('='), txt("=")},
  897. {.type=TOK_INTEGER, integer(0x5F,16,TOK_ULL), txt("0X5FULL")},
  898. {.type=TOK_OPERATOR, opkw(';'), txt(";")},
  899. {.type=TOK_WHITE, txt("\n")},
  900. {.flags={1,0}, .type=TOK_STARTLINE},
  901. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  902. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  903. {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
  904. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  905. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
  906. {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
  907. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
  908. {.flags={1,0}, .type=TOK_OPERATOR, opkw(','), txt(",")},
  909. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  910. {.flags={1,0}, .type=TOK_OPERATOR, opkw(ELLIPSIS), txt("...")},
  911. {.flags={1,0}, .type=TOK_WHITE, txt("\t")},
  912. {.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
  913. {.flags={1,0}, .type=TOK_KEYWORD, opkw(VA_ARGS), txt("__VA_ARGS__")},
  914. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  915. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
  916. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  917. {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
  918. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  919. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
  920. ),
  921. //__VA_ARGS__ is an identifier if no ... operator is in the parameter list or if there is no parameter list
  922. T("#define foo __VA_ARGS__ bar int define\n#define foo() __VA_ARGS__ bar int define",
  923. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  924. {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
  925. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  926. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
  927. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  928. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
  929. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  930. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
  931. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  932. {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
  933. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  934. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
  935. {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
  936. {.flags={1,0}, .type=TOK_STARTLINE},
  937. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  938. {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
  939. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  940. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
  941. {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
  942. {.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
  943. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  944. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
  945. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  946. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
  947. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  948. {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
  949. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  950. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")}
  951. ),
  952. //Test various integer suffixen
  953. T("1 1u 1l 1ul 1lu 1ll 1ull 1llu 1U 1L 1UL 1LU 1LL 1ULL 1LLU "
  954. "1uq 1lq 1llq 1ulq 1luq 1f 1i",
  955. {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1")}, space,
  956. {.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1u")}, space,
  957. {.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1l")}, space,
  958. {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1ul")}, space,
  959. {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1lu")}, space,
  960. {.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1ll")}, space,
  961. {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ull")}, space,
  962. {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1llu")}, space,
  963. {.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1U")}, space,
  964. {.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1L")}, space,
  965. {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1UL")}, space,
  966. {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1LU")}, space,
  967. {.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1LL")}, space,
  968. {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ULL")}, space,
  969. {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1LLU")}, space,
  970. {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1uq")}, space,
  971. {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1lq")}, space,
  972. {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1llq")}, space,
  973. {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1ulq")}, space,
  974. {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1luq")}, space,
  975. {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1f")}, space,
  976. {.type=TOK_INTEGER, integer(1, 10, TOK_I), txt("1i")}
  977. ),
  978. //Test non-standard newlines
  979. T("\n\r\n \r\n\rint",
  980. {.type=TOK_WHITE, txt("\n\r")}, startline,
  981. {.type=TOK_WHITE, txt("\n")}, startline,
  982. space,
  983. {.type=TOK_WHITE, txt("\r\n")}, startline,
  984. {.type=TOK_WHITE, txt("\r")}, startline,
  985. {.type=TOK_KEYWORD, opkw(INT), txt("int")}
  986. ),
  987. //Test backslash-broken lines
  988. T("oner\\ \nous",
  989. {.type=TOK_IDENTIFIER, txt("onerous")}
  990. ),
  991. T("\\\n\\\n\\\n\\",
  992. {.type=TOK_STRAY, txt("\\")}
  993. ),
  994. T("in\\\nt i\\;\nf\\ \r\nor (i=0; i<10; i++) {\\",
  995. {.type=TOK_KEYWORD, opkw(INT), txt("int")}, space,
  996. {.type=TOK_IDENTIFIER, txt("i")},
  997. {.type=TOK_STRAY, txt("\\")},
  998. {.type=TOK_OPERATOR, opkw(';'), txt(";")},
  999. {.type=TOK_WHITE, txt("\n")},
  1000. startline,
  1001. {.type=TOK_KEYWORD, opkw(FOR), txt("for")}, space,
  1002. {.type=TOK_OPERATOR, opkw('('), txt("(")},
  1003. {.type=TOK_IDENTIFIER, txt("i")},
  1004. {.type=TOK_OPERATOR, opkw('='), txt("=")},
  1005. {.type=TOK_INTEGER, integer(0,8,0), txt("0")},
  1006. {.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
  1007. {.type=TOK_IDENTIFIER, txt("i")},
  1008. {.type=TOK_OPERATOR, opkw('<'), txt("<")},
  1009. {.type=TOK_INTEGER, integer(10,10,0), txt("10")},
  1010. {.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
  1011. {.type=TOK_IDENTIFIER, txt("i")},
  1012. {.type=TOK_OPERATOR, opkw(INC_OP), txt("++")},
  1013. {.type=TOK_OPERATOR, opkw(')'), txt(")")}, space,
  1014. {.type=TOK_OPERATOR, opkw('{'), txt("{")},
  1015. {.type=TOK_STRAY, txt("\\")}
  1016. ),
  1017. //More preprocessor directive tests
  1018. T("#apple\n#pragma\n#const\n#define \t\n#define foo(x",
  1019. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  1020. {.flags={1,1}, .type=TOK_IDENTIFIER, txt("apple")},
  1021. {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
  1022. {.flags={1,0}, .type=TOK_STARTLINE},
  1023. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  1024. {.flags={1,1}, .type=TOK_KEYWORD, opkw(PRAGMA), txt("pragma")},
  1025. {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
  1026. {.flags={1,0}, .type=TOK_STARTLINE},
  1027. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  1028. {.flags={1,1}, .type=TOK_IDENTIFIER, txt("const")},
  1029. {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
  1030. {.flags={1,0}, .type=TOK_STARTLINE},
  1031. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  1032. {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
  1033. {.flags={1,0}, .type=TOK_WHITE, txt(" \t")},
  1034. {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
  1035. {.flags={1,0}, .type=TOK_STARTLINE},
  1036. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  1037. {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
  1038. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  1039. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
  1040. {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
  1041. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("x")}
  1042. ),
  1043. T("#define",
  1044. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  1045. {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")}
  1046. ),
  1047. T("#define foo",
  1048. {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
  1049. {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
  1050. {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
  1051. {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")}
  1052. ),
  1053. T("`#define foo",
  1054. {.type=TOK_STRAY, txt("`")},
  1055. {.type=TOK_OPERATOR, opkw('#'), txt("#")},
  1056. {.type=TOK_IDENTIFIER, txt("define")},
  1057. space,
  1058. {.type=TOK_IDENTIFIER, txt("foo")}
  1059. )
  1060. };
  1061. struct tokenizer_msg_test tokenizer_msg_tests[] = {
  1062. {T("/* Unterminated C comment",
  1063. {.type=TOK_CCOMMENT, txt("/* Unterminated C comment")}
  1064. ), M(
  1065. "unterminated_comment"
  1066. )},
  1067. {T("\"\n\"\"\n",
  1068. {.type=TOK_STRING, string("\n"), txt("\"\n\"")},
  1069. {.type=TOK_STRING, string("\n"), txt("\"\n")}
  1070. ), M(
  1071. "read_cstring/quote_newlines",
  1072. "read_cstring/missing_endquote"
  1073. )},
  1074. };
  1075. #undef T
  1076. #undef string
  1077. #undef opkw
  1078. #undef txt
  1079. #undef integer
  1080. #undef floating
  1081. #undef M
  1082. #undef include
  1083. static void test_tokenizer_single(struct tokenizer_test *t, tok_message_queue *mq) {
  1084. struct token_list *tl;
  1085. size_t i, count = t->token_count, gen_count;
  1086. const struct token *tok_gen, *tok_correct;
  1087. int success = 1;
  1088. char *txt = talloc_memdup(NULL, t->txt, t->txt_size);
  1089. size_t txt_size = t->txt_size;
  1090. #define failed(fmt, ...) do { \
  1091. printf("Error: " fmt "\n", ##__VA_ARGS__); \
  1092. success = 0; \
  1093. goto done; \
  1094. } while(0)
  1095. tl = tokenize(txt, txt, txt_size, mq);
  1096. if (tl->orig != txt || tl->orig_size != txt_size)
  1097. failed("tokenize() did not replicate orig/orig_size from arguments");
  1098. if (!token_list_sanity_check(tl, stdout))
  1099. failed("Sanity check failed");
  1100. gen_count = token_list_count(tl);
  1101. if (gen_count != count+1)
  1102. failed("Incorrect number of tokens (%zu, should be %zu)\n",
  1103. gen_count, count+1);
  1104. tok_gen = tl->first->next; //skip the beginning TOK_STARTLINE
  1105. tok_correct = t->tokens;
  1106. for (i=0; i<count; i++, tok_gen=tok_gen->next, tok_correct++) {
  1107. if (tok_gen->type != tok_correct->type)
  1108. failed("Token \"%s\": Incorrect type", tok_correct->txt);
  1109. {
  1110. struct token_flags g=tok_gen->flags, c=tok_correct->flags;
  1111. if (g.pp!=c.pp || g.pp_directive!=c.pp_directive)
  1112. failed("Token \"%s\": Incorrect flags", tok_correct->txt);
  1113. }
  1114. switch (tok_gen->type) {
  1115. case TOK_INTEGER:
  1116. if (tok_gen->integer.v != tok_correct->integer.v ||
  1117. tok_gen->integer.base != tok_correct->integer.base ||
  1118. tok_gen->integer.suffix != tok_correct->integer.suffix)
  1119. failed("Token \"%s\": Integer value/base/suffix incorrect", tok_correct->txt);;
  1120. break;
  1121. case TOK_FLOATING:
  1122. if (fabsl(tok_gen->floating.v - tok_correct->floating.v) > 0.00000000000000001 ||
  1123. tok_gen->floating.suffix != tok_correct->floating.suffix)
  1124. failed("Token \"%s\": Floating point value/suffix incorrect", tok_correct->txt);
  1125. break;
  1126. case TOK_OPERATOR:
  1127. if (tok_gen->opkw != tok_correct->opkw)
  1128. failed("Token \"%s\": Operator opkw incorrect", tok_correct->txt);
  1129. break;
  1130. case TOK_KEYWORD:
  1131. if (tok_gen->opkw != tok_correct->opkw)
  1132. failed("Token \"%s\": Keyword opkw incorrect", tok_correct->txt);
  1133. break;
  1134. case TOK_CHAR:
  1135. case TOK_STRING:
  1136. //anything using string
  1137. if (tok_gen->string->size != tok_correct->string->size ||
  1138. memcmp(tok_gen->string->item, tok_correct->string->item,
  1139. tok_gen->string->size) ||
  1140. tok_gen->string->item[tok_gen->string->size] != 0 )
  1141. failed("Token \"%s\": String value incorrect", tok_correct->txt);
  1142. break;
  1143. case TOK_STRING_IQUOTE:
  1144. case TOK_STRING_IANGLE:
  1145. if (strcmp(tok_gen->include, tok_correct->include))
  1146. failed("Token \"%s\": #include string incorrect", tok_correct->txt);
  1147. break;
  1148. case TOK_IDENTIFIER:
  1149. case TOK_CCOMMENT:
  1150. case TOK_CPPCOMMENT:
  1151. case TOK_WHITE:
  1152. case TOK_STARTLINE:
  1153. case TOK_STRAY:
  1154. break;
  1155. }
  1156. if (tok_gen->type!=TOK_STARTLINE && (
  1157. tok_gen->txt_size != tok_correct->txt_size ||
  1158. memcmp(tok_gen->txt, tok_correct->txt, tok_gen->txt_size))
  1159. )
  1160. failed("Token \"%s\": txt incorrect", tok_correct->txt);
  1161. }
  1162. #undef failed
  1163. done:
  1164. ok(success==1, "Tokenize %s", t->txt);
  1165. if (!success)
  1166. token_list_dump(tl, stdout);
  1167. talloc_free(txt);
  1168. }
  1169. static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
  1170. FILE *f = fopen(file_name, "rb");
  1171. darray_char *text = talloc_darray(NULL);
  1172. const size_t inc = 1024;
  1173. struct token_list *tl;
  1174. if (!f) {
  1175. fail("Could not read file '%s': %s", file_name, strerror(errno));
  1176. goto end;
  1177. }
  1178. for (;;) {
  1179. size_t read_len;
  1180. darray_realloc(*text, text->size+inc+1);
  1181. read_len = fread(text->item+text->size, 1, inc, f);
  1182. text->size += read_len;
  1183. text->item[text->size] = 0;
  1184. if (read_len < inc)
  1185. break;
  1186. }
  1187. if (ferror(f)) {
  1188. fail("Error reading file '%s': %s", file_name, strerror(errno));
  1189. goto end;
  1190. }
  1191. tl = tokenize(text, text->item, text->size, mq);
  1192. tl->filename = file_name;
  1193. //printf("File '%s' has %zu tokens\n", file_name, token_list_count(tl));
  1194. //token_list_dump(tl, stdout);
  1195. if (!token_list_sanity_check(tl, stdout)) {
  1196. fail("Sanity check failed for file '%s'", file_name);
  1197. goto end;
  1198. }
  1199. pass("File '%s' has %zu tokens", file_name, token_list_count(tl));
  1200. /*while (queue_count(*mq)) {
  1201. struct tok_message msg = dequeue(*mq);
  1202. tok_message_print(&msg, tl);
  1203. }*/
  1204. end:
  1205. talloc_free(text);
  1206. if (f)
  1207. fclose(f);
  1208. }
  1209. static void test_tokenizer(void) {
  1210. tok_message_queue mq;
  1211. size_t i, count;
  1212. int has_warn_or_worse = 0;
  1213. queue_init(mq, NULL);
  1214. count = sizeof(tokenizer_tests)/sizeof(*tokenizer_tests);
  1215. for (i=0; i<count; i++) {
  1216. test_tokenizer_single(tokenizer_tests+i, &mq);
  1217. while (queue_count(mq)) {
  1218. struct tok_message msg = dequeue(mq);
  1219. (void) msg;
  1220. //tok_message_dump(&msg);
  1221. }
  1222. }
  1223. count = sizeof(tokenizer_msg_tests)/sizeof(*tokenizer_msg_tests);
  1224. for (i=0; i<count; i++) {
  1225. size_t j;
  1226. test_tokenizer_single(&tokenizer_msg_tests[i].test, &mq);
  1227. if (queue_count(mq) != tokenizer_msg_tests[i].message_count) {
  1228. fail("Incorrect number of messages from tokenize()");
  1229. while (queue_count(mq))
  1230. (void) dequeue(mq);
  1231. goto msg_fail;
  1232. }
  1233. for (j=0; queue_count(mq); j++) {
  1234. struct tok_message msg = dequeue(mq);
  1235. const char *base = "tokenize/";
  1236. size_t baselen = strlen(base);
  1237. //tok_message_dump(&msg);
  1238. if (strncmp(msg.path, base, baselen)) {
  1239. fail("Message from tokenize() doesn't start with \"%s\"",
  1240. base);
  1241. goto msg_fail;
  1242. }
  1243. if (strcmp(msg.path+baselen,
  1244. tokenizer_msg_tests[i].messages[j])) {
  1245. fail("Incorrect message %s, should be %s",
  1246. msg.path+baselen, tokenizer_msg_tests[i].messages[j]);
  1247. goto msg_fail;
  1248. }
  1249. }
  1250. pass("Messages from tokenize() are correct");
  1251. msg_fail:;
  1252. }
  1253. test_tokenizer_file("test/run.c", &mq);
  1254. while (queue_count(mq)) {
  1255. struct tok_message msg = dequeue(mq);
  1256. if (msg.level >= TM_WARN) {
  1257. has_warn_or_worse = 1;
  1258. tok_message_dump(&msg);
  1259. }
  1260. //else tok_message_dump(&msg);
  1261. }
  1262. ok(has_warn_or_worse==0, "Tokenizing run.c generated%s warnings, errors, or bugs",
  1263. has_warn_or_worse ? "" : " no");
  1264. queue_free(mq);
  1265. }
  1266. #include <unistd.h>
  1267. int main(void)
  1268. {
  1269. plan_tests(195);
  1270. diag("* Checking queue...");
  1271. test_queue();
  1272. diag("* Checking read_cstring...");
  1273. test_read_cstring();
  1274. diag("* Checking dict...");
  1275. test_dict();
  1276. diag("* Checking charflag...");
  1277. test_charflag();
  1278. diag("* Checking readui...");
  1279. test_readui();
  1280. diag("* Checking scan_number...");
  1281. test_scan_number();
  1282. diag("* Checking read_integer...");
  1283. test_read_integer();
  1284. diag("* Checking read_floating...");
  1285. test_read_floating();
  1286. diag("* Checking tokenizer...");
  1287. test_tokenizer();
  1288. /* This exits depending on whether all tests passed */
  1289. return exit_status();
  1290. }