Pchen0
/
ccan
mirror of https://git.ozlabs.org/~ccan/ccan


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466
							/*
        Copyright (c) 2009  Joseph A. Adams
        All rights reserved.
        
        Redistribution and use in source and binary forms, with or without
        modification, are permitted provided that the following conditions
        are met:
        1. Redistributions of source code must retain the above copyright
           notice, this list of conditions and the following disclaimer.
        2. Redistributions in binary form must reproduce the above copyright
           notice, this list of conditions and the following disclaimer in the
           documentation and/or other materials provided with the distribution.
        3. The name of the author may not be used to endorse or promote products
           derived from this software without specific prior written permission.
        
        THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
        IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
        OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
        IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
        INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
        NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
        DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
        THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
        THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <ccan/ccan_tokenizer/read_cnumber.c>
#include <ccan/ccan_tokenizer/read_cstring.c>
#include <ccan/ccan_tokenizer/dict.c>
#include <ccan/ccan_tokenizer/ccan_tokenizer.c>
#include <ccan/ccan_tokenizer/queue.c>
#include <ccan/ccan_tokenizer/charflag.c>

#include <ccan/ccan_tokenizer/ccan_tokenizer.h>

#include <ccan/tap/tap.h>

#include <math.h>

#define array_count_pair(type, ...) (const type []){__VA_ARGS__}, sizeof((const type []){__VA_ARGS__})/sizeof(type)

static void test_read_cstring(void) {
	#define next() do {darray_free(str); darray_init(str); csp++;} while(0)
	#define cs (*csp)
	#define verify_quotechar(correct, correct_continuation_offset, quotechar) do { \
		const size_t s = sizeof(correct)-1; \
		p = read_cstring(&str, cs, cs ? strchr(cs, 0) : NULL, quotechar, &mq); \
		ok(str.size==s && str.alloc>s && str.item[s]==0 && \
		!memcmp(str.item, correct, s), \
		"\"%s: Is output correct?", cs); \
		ok(p == cs+correct_continuation_offset, "\"%s: Is continuation pointer correct?", cs); \
		next(); \
	} while(0)
	#define verify(correct, correct_continuation_offset) verify_quotechar(correct, correct_continuation_offset, '"')
	
	const char * const cstrings[] = {
		NULL,
		"",
		"\"",
		"Hello world!\"",
		"Hello world!",
		"\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\"",
		"\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\'",
		"الأدب العربي\"",
		"Ends with \\",
		"Tab: '\\011' Space: '\\040' Overflow: '\\777' Ambiguous: '\\1013'\"",
		"\\x50\\x35\\x12\\xEF\\xFE\\x00012\\x345\""
	};
	const char * const *csp = cstrings;
	const char *p;
	darray_char str = darray_new();
	tok_message_queue mq;
	
	queue_init(mq, NULL);
	
	//check null input
	verify("", 0);
	
	//Check an empty input
	verify("", 0);
	
	//Check an empty quote-terminated string
	verify("", 0);
	
	//Check a simple string
	verify("Hello world!", 12);
	
	//Check a simple string without an end quote
	verify("Hello world!", 12);
	
	//Check a collection of single-character sequences
	verify("\\\f\e\b\0\a\r\nw\t\v\'\"", 26);
	
	//Check same collection of single-character sequences, this time using a single quote terminator
	verify_quotechar("\\\f\e\b\0\a\r\nw\t\v\'\"", 26, '\'');
	
	//Check a real UTF-8 string
	verify("\xd8\xa7\xd9\x84\xd8\xa3\xd8\xaf\xd8\xa8\x20\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", 23);
	
	//Check string ending in backslash
	verify("Ends with \\", 11);
	
	//Check a series of octal escapes
	verify("Tab: '\t' Space: ' ' Overflow: '\377' Ambiguous: 'A3'", 61);
	
	//Check a series of hex escapes
	verify("\x50\x35\x12\xEF\xFE\x12\x45", 32);
	
	darray_free(str);
	
	//tok_message_queue_dump(&mq);
	
	//Verify the message queue
	if (1)
	{
		struct tok_message m;
		struct tok_message correct_messages[] = {
			{.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
			{.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
			{.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
			{.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
			//{.level=TM_INFO, .path="tokenize/read_cstring/escaped_single_quote"},
			{.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
			//{.level=TM_INFO, .path="tokenize/read_cstring/escaped_double_quote"},
			{.level=TM_ERROR, .path="tokenize/read_cstring/ended_in_backslash"},
			{.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
			{.level=TM_WARN, .path="tokenize/read_cstring/octal_overflow"},
			{.level=TM_INFO, .path="tokenize/read_cstring/ambiguous_octal"},
			{.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
			{.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
			{.level=TM_WARN, .path="tokenize/read_cstring/hex_overflow"},
		};
		size_t i, e=sizeof(correct_messages)/sizeof(*correct_messages);
		
		while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
			queue_skip(mq);
		for (i=0; i<e; i++) {
			if (!queue_count(mq))
				break;
			m = dequeue(mq);
			if (m.level != correct_messages[i].level)
				break;
			if (strcmp(m.path, correct_messages[i].path))
				break;
			while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
				queue_skip(mq);
		}
		if (i<e)
			printf("Item %zu is incorrect\n", i);
		ok(i==e, "Is message queue correct?");
		ok(!queue_count(mq), "Message queue should be empty now.");
	}
	
	queue_free(mq);
	#undef next
	#undef cs
	#undef verify_quotechar
	#undef verify
}

#if 0
static void p(const char *str) {
	if (str)
		puts(str);
	else
		puts("(null)");
}
#endif

static void test_queue(void) {
	#define next() do {queue_free(q); queue_init(q, NULL);} while(0)
	
	const char * const s[] = {
		"zero",
		"one",
		"two",
		"three",
		"four",
		"five",
		"six",
		"seven",
		"eight",
		"nine",
		"ten",
		"eleven",
		"twelve",
		"thirteen",
		"fourteen",
		"fifteen"
	};
	queue(const char*) q;
	queue_init(q, NULL);
	
	enqueue(q, s[0]);
	enqueue(q, s[1]);
	enqueue(q, s[2]);
	enqueue(q, s[3]);
	enqueue(q, s[4]);
	enqueue(q, s[5]);
	ok(queue_count(q) == 6, "Checking queue count");
	
	ok(dequeue_check(q)==s[0] &&
		dequeue_check(q)==s[1] &&
		dequeue_check(q)==s[2], "Dequeuing/checking 3 items");
	ok(queue_count(q) == 3, "Checking queue count");
	
	enqueue(q, s[6]);
	enqueue(q, s[7]);
	enqueue(q, s[8]);
	enqueue(q, s[9]);
	enqueue(q, s[10]);
	enqueue(q, s[11]);
	enqueue(q, s[12]);
	enqueue(q, s[13]);
	enqueue(q, s[14]);
	enqueue(q, s[15]);
	ok(queue_count(q) == 13, "Checking queue count");
	
	ok(dequeue_check(q)==s[3] &&
		dequeue_check(q)==s[4] &&
		dequeue_check(q)==s[5] &&
		dequeue_check(q)==s[6] &&
		dequeue_check(q)==s[7] &&
		dequeue_check(q)==s[8] &&
		dequeue_check(q)==s[9] &&
		dequeue_check(q)==s[10] &&
		dequeue_check(q)==s[11] &&
		dequeue_check(q)==s[12] &&
		dequeue_check(q)==s[13] &&
		dequeue_check(q)==s[14] &&
		dequeue_check(q)==s[15], "Are queue items correct?");
	ok(dequeue_check(q)==NULL && dequeue_check(q)==NULL && queue_count(q)==0, "Does queue run out correctly?");
	
	queue_free(q);
	
	#undef next
}

#define test_dict_single() _test_dict_single(dict, str, sizeof(str)-1, correct, sizeof(correct)/sizeof(*correct))
static void _test_dict_single(struct dict *dict, const char *str, size_t len, int *correct, size_t correct_count) {
	const char *s=str, *e=str+len;
	size_t i;
	struct dict_entry *entry;
	
	for (i=0; s<e && i<correct_count; i++) {
		const char *s_last = s;
		entry = dict_lookup(dict, &s, e);
		if (!entry) {
			if (s_last != s)
				break; //dict_lookup should not modify *sp when it returns NULL
			s++;
			if (correct[i] != -100)
				break;
			continue;
		}
		if (correct[i] != entry->id)
			break;
		if (!*entry->str) {
			if (s_last+1 != s)
				break;
			if (s[-1] != 0)
				break;
		} else {
			size_t len = strlen(entry->str);
			if (s_last+len != s)
				break;
			if (strncmp(entry->str, s-len, len))
				break;
		}
		//printf("Correctly read %s\n", entry->str);
	}
	
	if (s!=e || i!=correct_count) {
		printf("Tokenization failed at ");
		fwrite(s, 1, e-s, stdout);
		printf("\n");
	}
	
	ok(s==e && i==correct_count, "All of the tokens are correct");
}

static void test_dict(void) {
	struct dict_entry dict_orig[] = {
		{-1, ""},
		{0, " "},
		{1, "it"},
		{2, "it's"},
		{3, "a"},
		{4, "beautiful"},
		{5, "be"},
		{6, "day"},
		{7, "d"},
		{8, "in"},
		{9, "the"},
		{10, "t"},
		{11, "neighborhood"},
		{12, "neighbor"},
		{13, "won't"},
		{14, " you"},
		{15, "my"},
		{16, "??"},
		{17, "item"},
		{18, "ip"},
		{19, "\xFF\xFA"},
		{20, "\xFF\xEE"},
		{21, "\x80\x12\x34"},
		{22, "\x80\x32"},
		{23, "\x80\x32\x34"}
	};
	struct dict *dict = dict_build(NULL, dict_orig, sizeof(dict_orig)/sizeof(*dict_orig));
	
	{
		const char *s=NULL, *e=NULL;
		ok(dict_lookup(dict, &s, e)==NULL && s==NULL && e==NULL, "dict_lookup does nothing and returns null on empty input");
	}
	
	{
		const char str[] = "it's a beautiful day in the neighborhood\0won't you be my neighbor?";
		int correct[] = {2,0, 3,0, 4,0, 6,0, 8,0, 9,0, 11,-1, 13, 14,0, 5,0, 15,0, 12, -100};
		test_dict_single();
	}
	
	//check equal-length tokens
	{
		const char str[] = "it'sitem initip";
		int correct[] = {2,17,0, 8,1,18};
		test_dict_single();
	}
	
	//check mostly invalid tokens
	{
		const char str[] = "&^&beaumx yo youthx";
		int correct[] = {-100,-100,-100, 5,3,-100,-100,-100, 0,-100,-100, 14,10,-100,-100};
		test_dict_single();
	}
	
	//check tokens that start with a character greater than 0x7F
	{
		const char str[] = "\x80\x12\x34\x80\x32\x80\x32\x34\xFF\xFA\xFF\xEE";
		int correct[] = {21, 22, 23, 19, 20};
		test_dict_single();
	}
	
	talloc_free(dict);
	
	//make sure dict_build doesn't blow up on an empty dictionary
	dict = dict_build(NULL, NULL, 0);
	talloc_free(dict);
}

static void test_charflag(void) {
	char i;
	int correct = 0;
	
	#define CONTROL do { \
		if (ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
			!cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
			!cextended(i) ) \
			correct++; \
		} while(0)
	#define SPACE do { \
		if (!ccontrol(i) && cspace(i) && !creturn(i) && cwhite(i) && \
			!cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
			!cextended(i) ) \
			correct++; \
		} while(0)
	#define RETURN do { \
		if (!ccontrol(i) && !cspace(i) && creturn(i) && cwhite(i) && \
			!cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
			!cextended(i) ) \
			correct++; \
		} while(0)
	#define SYMBOL do { \
		if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
			!cdigit(i) && !cletter(i) && !chex(i) && csymbol(i) && \
			!cextended(i) ) \
			correct++; \
		} while(0)
	#define DIGIT do { \
		if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
			cdigit(i) && !cletter(i) && chex(i) && !csymbol(i) && \
			!cextended(i) ) \
			correct++; \
		} while(0)
	#define LETTER_HEX do { \
		if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
			!cdigit(i) && cletter(i) && chex(i) && !csymbol(i) && \
			!cextended(i) ) \
			correct++; \
		} while(0)
	#define LETTER do { \
		if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
			!cdigit(i) && cletter(i) && !chex(i) && !csymbol(i) && \
			!cextended(i) ) \
			correct++; \
		} while(0)
	#define EXTENDED do { \
		if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
			!cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
			cextended(i) ) \
			correct++; \
		} while(0)
	
	for (i=0; i<'\t'; i++) CONTROL;
	i = '\t'; SPACE;
	i = '\n'; RETURN;
	i = '\v'; SPACE;
	i = '\f'; SPACE;
	i = '\r'; RETURN;
	for (i='\r'+1; i<' '; i++) CONTROL;
	i = ' '; SPACE;
	for (i='!'; i<='/'; i++) SYMBOL;
	for (i='0'; i<='9'; i++) DIGIT;
	for (i=':'; i<='@'; i++) SYMBOL;
	for (i='A'; i<='F'; i++) LETTER_HEX;
	for (i='G'; i<='Z'; i++) LETTER;
	for (i='['; i<='`'; i++) SYMBOL;
	for (i='a'; i<='f'; i++) LETTER_HEX;
	for (i='g'; i<='z'; i++) LETTER;
	for (i='{'; i<='~'; i++) SYMBOL;
	i = '\x7F'; CONTROL;
	
	ok(correct==128, "ASCII characters have correct charflags");
	correct = 0;
	
	//We do some goofy stuff here to make sure sign extension doesn't cause problems with charflags
	{
		unsigned int ui;
		int si;
		
		for (ui=128; ui<=255; ui++) {
			i = ui;
			EXTENDED;
		}
		for (si=-128; si<0; si++) {
			i = si;
			EXTENDED;
		}
	}
	{
		int i;
		for (i=-128; i<0; i++) EXTENDED;
	}
	{
		unsigned int i;
		for (i=128; i<=255; i++) EXTENDED;
	}
	
	ok(correct==512, "Extended characters have correct charflags");
	
	#undef CONTROL
	#undef SPACE
	#undef RETURN
	#undef SYMBOL
	#undef DIGIT
	#undef LETTER_HEX
	#undef LETTER
	#undef EXTENDED
}

struct readui_test {
	const char *txt;
	size_t txt_size;
	readui_base base;
	
	uint64_t correct_integer;
	int correct_errno;
	size_t correct_advance;
};

#define T(txt, ...) {txt, sizeof(txt)-1, __VA_ARGS__}
#define M (18446744073709551615ULL)

struct readui_test readui_tests[] = {
	//Basic reads
	T("0",READUI_DEC, 0,0,1),
	T(" \t42  ",READUI_DEC, 42,0,4),
	
	//Different bases
	T("BADBEEFDEADBAT",READUI_HEX, 0xBADBEEFDEADBAULL,0,13),
	T("7559",READUI_OCT, 0755,0,3),
	T("01010010110012",READUI_BIN, 2649,0,13),
	T("1000000000",0x7F, 8594754748609397887ULL,0,10),
	
	//Errors
	T("",READUI_DEC, 0,EINVAL,0),
	T("18446744073709551616",
		READUI_DEC,M,ERANGE,20),
	T("1000000000000000000000000",
		READUI_DEC,M,ERANGE,25),
	T("10000000000000000",
		READUI_HEX,M,ERANGE,17),
	T("10000000000000000000000000000000000000000000000000000000000000000",
		READUI_BIN,M,ERANGE,65),
	T("10000000000",
		0x7D,M,ERANGE,11),
	T("9000000000",0x7F, M,ERANGE,10),
	
	//Misc
	T("18446744073709551615",READUI_DEC, M,0,20),
};

static void test_readui_single(struct readui_test *test) {
	uint64_t result_integer;
	int result_errno;
	size_t result_advance;
	
	const char *s = test->txt, *e = s+test->txt_size;
	errno = 0;
	result_integer = readui(&s, e, test->base);
	result_errno = errno;
	result_advance = s-test->txt;
	
	ok(result_integer == test->correct_integer &&
	   result_errno   == test->correct_errno &&
	   result_advance == test->correct_advance,
	   "Testing \"%s\"", test->txt);
}

static void test_readui(void) {
	size_t i, count = sizeof(readui_tests)/sizeof(*readui_tests);
	
	for (i=0; i<count; i++)
		test_readui_single(readui_tests+i);
}

#undef T
#undef M

static void scan_number_sanity_check(const struct scan_number *sn,
		enum token_type type, const char *str_pipes, const char *msg) {
	//If there is a prefix, it should follow
	//the pattern (0 [B X b x]*0..1)
	if (sn->prefix < sn->digits) {
		int len = sn->digits - sn->prefix;
		if (len!=1 && len!=2) {
			fail("%s : Prefix length is %d; should be 1 or 2",
				str_pipes, len);
			return;
		}
		if (sn->prefix[0] != '0') {
			fail("%s : Prefix does not start with 0",
				str_pipes);
			return;
		}
		if (len==2 && !strchr("BXbx", sn->prefix[1])) {
			fail("%s : Prefix is 0%c; should be 0, 0b, or 0x",
				str_pipes, sn->prefix[1]);
			return;
		}
		if (len==1 && type==TOK_FLOATING) {
			fail("%s : Octal prefix appears on floating point number",
				str_pipes);
			return;
		}
	} else {
	//if there is no prefix, the first digit should not be 0
	//  unless this is a floating point number
		if (sn->digits < sn->exponent && sn->digits[0]=='0' &&
				type==TOK_INTEGER) {
			fail("%s : First digit of non-prefix integer is 0",
				str_pipes);
			return;
		}
	}
	
	//Make sure sn->digits contains valid digits and is not empty
	//  (unless prefix is "0")
	{
		const char *s = sn->digits, *e = sn->exponent;
		if (sn->prefix+1 < sn->digits) {
			if (s >= e) {
				fail("%s : 0%c not followed by any digits",
					str_pipes, sn->prefix[1]);
				return;
			}
			if (sn->prefix[1] == 'X' || sn->prefix[1] == 'x') {
				while (s<e && strchr(
					"0123456789ABCDEFabcdef.", *s)) s++;
			} else {
				if (s[0]!='0' && s[0]!='1') {
					fail("%s: Binary prefix not followed by a 0 or 1",
						str_pipes);
					return;
				}
				while (s<e && strchr(
					"0123456789.", *s)) s++;
			}
		} else {
			if (type==TOK_FLOATING && s >= e) {
				fail("%s : sn->digits is empty in a floating point number",
					str_pipes);
				return;
			}
			if (sn->prefix >= sn->digits && s >= e) {
				fail("%s : both sn->prefix and sn->digits are empty",
					str_pipes);
				return;
			}
			while (s<e && strchr("0123456789.", *s)) s++;
		}
		if (s != e) {
			fail("%s : sn->digits is not entirely valid", str_pipes);
			return;
		}
	}
	
	//Make sure exponent follows the rules
	if (sn->exponent < sn->suffix) {
		char c = sn->exponent[0];
		if (type==TOK_INTEGER) {
			fail("%s : sn->exponent is not empty in an integer", str_pipes);
			return;
		}
		if (sn->prefix < sn->digits && (c=='E' || c=='e')) {
			fail("%s : Exponent for hex/binary starts with %c", str_pipes, c);
			return;
		}
		if (sn->prefix >= sn->digits && (c=='P' || c=='p')) {
			fail("%s : Exponent for decimal starts with %c", str_pipes, c);
			return;
		}
	}
	
	pass("%s%s", str_pipes, msg);
	return;
}

static void test_scan_number_single(const char *str_pipes,
				enum token_type type, size_t dots_found) {
	char *str = malloc(strlen(str_pipes)+1);
	const char *expected[5];
	struct scan_number sn;
	enum token_type given_type;
	
	{
		const char *s = str_pipes;
		char *d = str;
		size_t pipes = 0;
		
		expected[0] = d;
		for (;*s;s++) {
			if (*s == ' ')
				continue;
			if (*s == '|') {
				if (++pipes > 4)
					goto fail_too_many_pipes;
				expected[pipes] = d;
			} else
				*d++ = *s;
		}
		*d = 0;
		
		if (pipes < 3)
			goto fail_not_enough_pipes;
		if (pipes == 3)
			expected[4] = d;
	}
	
	given_type = scan_number(&sn, str, strchr(str,0));
	
	if (sn.prefix != expected[0]) {
		fail("%s : sn.prefix is wrong", str_pipes);
		return;
	}
	if (sn.digits != expected[1]) {
		fail("%s : sn.digits is wrong", str_pipes);
		return;
	}
	if (sn.exponent != expected[2]) {
		fail("%s : sn.exponent is wrong", str_pipes);
		return;
	}
	if (sn.suffix != expected[3]) {
		fail("%s : sn.suffix is wrong", str_pipes);
		return;
	}
	if (sn.end != expected[4]) {
		fail("%s : sn.end is wrong", str_pipes);
		return;
	}
	if (given_type != type) {
		fail("%s : Type incorrect", str_pipes);
		return;
	}
	if (sn.dots_found != dots_found) {
		fail("%s : sn.dots_found is %zu; should be %zu", str_pipes,
			sn.dots_found, dots_found);
		return;
	}
	
	scan_number_sanity_check(&sn, type, str_pipes, "");
	
	free(str);
	return;
	
fail_too_many_pipes:
	fail("Too many pipes in the test string \"%s\"; should be 3", str_pipes);
	return;
fail_not_enough_pipes:
	fail("Not enough pipes in the test string \"%s\"; should be 3", str_pipes);
	return;
}

#define T(str, type, dots_found) test_scan_number_single(str,type,dots_found)

static void test_scan_number(void) {
	T("0x | 50.1 | p+1 | f", TOK_FLOATING, 1);
	T("| 100 || L", TOK_INTEGER, 0);
	T("0 ||| b21", TOK_INTEGER, 0);
	T("0b | 101 || L", TOK_INTEGER, 0);
	T("0X | 7Af ||| \t2", TOK_INTEGER, 0);
	T("0|||b", TOK_INTEGER, 0);
	T("0|||x", TOK_INTEGER, 0);
}

#undef T

#define T(string, value, theBase, theSuffix) do { \
	queue_init(mq, NULL); \
	str = (string); \
	type = scan_number(&sn, str, str+sizeof(string)-1); \
	ok(type==TOK_INTEGER, "%s : type==TOK_INTEGER", str); \
	scan_number_sanity_check(&sn, type, str, \
		" : scan_number_sanity_check passed"); \
	read_integer(&integer, &sn, &mq); \
	ok(integer.v==(value) && integer.base==(theBase) && \
		integer.suffix==(theSuffix), \
		"%s : Correct value and suffix", str); \
	} while(0)
#define Q(name) do { \
	if (queue_count(mq)) { \
		const char *path = dequeue(mq).path; \
		ok(!strcmp(path, "tokenize/read_cnumber/" #name), \
			"%s : Dequeued %s", str, path); \
	} \
	} while(0)
#define E() do { \
	ok(queue_count(mq)==0, "%s : Message queue empty", str); \
	if (queue_count(mq)) \
		tok_message_queue_dump(&mq); \
	queue_free(mq); \
	} while(0)

static void test_read_integer(void) {
	struct scan_number sn;
	tok_message_queue mq;
	const char *str;
	enum token_type type;
	struct tok_integer integer;
	
	T("0b0lu", 0, 8, TOK_UL);
	E();
	
	T("1", 1, 10, TOK_NOSUFFIX);
	E();
	
	T("32Q", 32, 10, TOK_NOSUFFIX);
	Q(integer_suffix_invalid);
	E();
	
	T("32i", 32, 10, TOK_I);
	E();
	
	T("0755f", 493, 8, TOK_NOSUFFIX);
	Q(suffix_float_only);
	E();
	
	T("0xDeadBeef", 0xDEADBEEF, 16, TOK_NOSUFFIX);
	E();
	
	T("12345678901234567890$1_LONG.SUFFIX", 12345678901234567890ULL, 10, TOK_NOSUFFIX);
	ok1(sn.end == strchr(str, 0));
	Q(integer_suffix_invalid);
	E();
	
	T("0xDEADBEEFlull", 0xDEADBEEF, 16, TOK_NOSUFFIX);
	Q(integer_suffix_invalid);
	E();
	
	T("0xBALLuu", 0xBA, 16, TOK_NOSUFFIX);
	Q(integer_suffix_invalid);
	E();
	
	T("123456789012345678901", 18446744073709551615ULL, 10, TOK_NOSUFFIX);
	Q(integer_out_of_range);
	E();
	
	T("09", 0, 8, TOK_NOSUFFIX);
	Q(integer_invalid_digits);
	E();
}

#undef T
#undef E

#define Teq(string, equals, theSuffix) do { \
	queue_init(mq, NULL); \
	str = malloc(sizeof(string)); \
	memcpy(str, string, sizeof(string)); \
	type = scan_number(&sn, str, str+sizeof(string)-1); \
	ok(type==TOK_FLOATING, "%s : type==TOK_FLOATING", str); \
	scan_number_sanity_check(&sn, type, str, \
		" : scan_number_sanity_check passed"); \
	read_floating(&floating, &sn, &mq); \
	ok((equals) && \
		floating.suffix==(theSuffix), \
		"%s : Correct value and suffix", str); \
	} while(0)
#define T(string, value, theSuffix) \
	Teq(string, fabsl(floating.v - (value)) <= 0.00000000000000001, theSuffix)
#define E() do { \
	ok(queue_count(mq)==0, "%s : Message queue empty", str); \
	if (queue_count(mq)) \
		tok_message_queue_dump(&mq); \
	queue_free(mq); \
	free(str); \
	} while(0)

static void test_read_floating(void) {
	struct scan_number sn;
	tok_message_queue mq;
	char *str; //str is a malloced copy so read_floating can do its null terminator trick
	enum token_type type;
	struct tok_floating floating;
	
	T("1.0", 1.0, TOK_NOSUFFIX);
	E();
	
	T("0.0", 0.0, TOK_NOSUFFIX);
	E();
	
	T("0755e1", 7550.0, TOK_NOSUFFIX);
	E();
	
	T("0xD.Bp0", 0xD.Bp0, TOK_NOSUFFIX);
	E();
	
	//GCC doesn't throw any errors or warnings for this odd case,
	//but we call it an error to be consistent with strtold
	T("0x.p0", 0.0, TOK_NOSUFFIX);
	Q(floating_invalid_digits);
	E();
	
	T("32.0Q", 32.0, TOK_NOSUFFIX);
	Q(floating_suffix_invalid);
	E();
	
	T("32.0Li", 32.0, TOK_IMAG_L);
	E();
	
	T("32.0LL", 32.0, TOK_NOSUFFIX);
	Q(suffix_integer_only);
	E();
	
	Teq("0xDEAD.BEEF", floating.v==0.0, TOK_NOSUFFIX);
	Q(hex_float_no_exponent);
	E();
	
	T("0b101.0p0", 0, TOK_NOSUFFIX);
	Q(binary_float);
	E();
	
	/* If any of the following three tests fails, consider increasing
	   the e+ and e- values. */
	
	Teq("1.e+4933", isinf(floating.v), TOK_NOSUFFIX);
	Q(floating_out_of_range);
	E();
	
	/* for some reason, strtold sets errno=EDOM on x86, and
	   on my PowerPC G4 on Fedora 10, the same phenomenon occurs
	   but the exponents are e+309, e-324, and e-325 */
	Teq("1.e-4951", floating.v==0.0, TOK_NOSUFFIX);
	Q(floating_out_of_range);
	E();
	
	Teq("1.e-4952", floating.v==0.0, TOK_NOSUFFIX);
	Q(floating_out_of_range);
	E();
	
}

#undef Teq
#undef T
#undef Q
#undef E

struct tokenizer_test {
	const char *txt;
	size_t txt_size;
	
	const struct token *tokens;
	size_t token_count;
};

#define T(txt, ...) {txt, sizeof(txt)-1, array_count_pair(struct token, __VA_ARGS__)}
#define string(txt) {.string=(darray_char[1]){{.item = (txt), .size = sizeof(txt)-1}}}
#define opkw(v) {.opkw = (v)}
#define txt(t) .txt = (t), .txt_size = sizeof(t)-1
#define integer(...) {.integer={__VA_ARGS__}}
#define floating(...) {.floating={__VA_ARGS__}}
#define space {.type = TOK_WHITE, .txt = " ", .txt_size = 1}
#define startline {.type = TOK_STARTLINE}
#define include(str) {.include = (str)}

struct tokenizer_msg_test {
	struct tokenizer_test test;
	
	const char * const *messages;
	size_t message_count;
};

#define M(...) array_count_pair(const char *, __VA_ARGS__)

struct tokenizer_test tokenizer_tests[] = {
	{ "", 0, 0 },
	T("\n",
		{.type = TOK_WHITE, txt("\n")}
	),
	T("\na",
		{.type = TOK_WHITE, txt("\n")},
		startline,
		{.type = TOK_IDENTIFIER, txt("a")}
	),
	T("int n = c++;",
		{.type = TOK_KEYWORD,
			opkw(INT),
			txt("int")
		}, space,
		{.type = TOK_IDENTIFIER,
			txt("n")
		}, space,
		{.type = TOK_OPERATOR,
			opkw('='),
			txt("=")
		}, space,
		{.type = TOK_IDENTIFIER,
			txt("c")
		},
		{.type = TOK_OPERATOR,
			opkw(INC_OP),
			txt("++")
		},
		{.type = TOK_OPERATOR,
			opkw(';'),
			txt(";")
		}
	),
	T(".5 42 ",
		{.type = TOK_FLOATING,
			floating(.5, TOK_NOSUFFIX),
			txt(".5")
		}, space,
		{.type = TOK_INTEGER,
			integer(42, 10, TOK_NOSUFFIX),
			txt("42")
		}, space,
	),
	//Make sure TOK_STRAY doesn't take over the universe
	T("``AS IS'' AND",
		{.type = TOK_STRAY,
			txt("``")
		},
		{.type = TOK_IDENTIFIER,
			txt("AS")
		}, space,
		{.type = TOK_IDENTIFIER,
			txt("IS")
		},
		{.type = TOK_CHAR,
			string(""),
			txt("\'\'")
		}, space,
		{.type = TOK_IDENTIFIER,
			txt("AND")
		}
	),
	//Make sure starting with 0 doesn't result in skipping whitespace
	T("0 .05 0 500",
		{.type = TOK_INTEGER,
			integer(0, 8, TOK_NOSUFFIX),
			txt("0")
		}, space,
		{.type = TOK_FLOATING,
			floating(.05, TOK_NOSUFFIX),
			txt(".05")
		}, space,
		{.type = TOK_INTEGER,
			integer(0, 8, TOK_NOSUFFIX),
			txt("0")
		}, space,
		{.type = TOK_INTEGER,
			integer(500, 10, TOK_NOSUFFIX),
			txt("500")
		}
	),
	//Make sure a simple preprocessor directive works
	T("\t/*comment*/ #include \"include.h\"\n",
		{.flags={1,0}, .type=TOK_WHITE, txt("\t")},
		{.flags={1,0}, .type=TOK_CCOMMENT, txt("/*comment*/")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(INCLUDE), txt("include")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_STRING_IQUOTE, include("include.h"), txt("\"include.h\"")},
		{.flags={1,0}, .type=TOK_WHITE, txt("\n")}
	),
	//Make sure __VA_ARGS__ is lexed correctly
	T("if #define __VA_ARGS__=0X5FULL;\n"
	  " #define __VA_ARGS__(__VA_ARGS__, ...\t)__VA_ARGS__ bar int define",
		{.type=TOK_KEYWORD, opkw(IF), txt("if")},
		space,
		{.type=TOK_OPERATOR, opkw('#'), txt("#")},
		{.type=TOK_IDENTIFIER, txt("define")},
		space,
		{.type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
		{.type=TOK_OPERATOR, opkw('='), txt("=")},
		{.type=TOK_INTEGER, integer(0x5F,16,TOK_ULL), txt("0X5FULL")},
		{.type=TOK_OPERATOR, opkw(';'), txt(";")},
		{.type=TOK_WHITE, txt("\n")},
		{.flags={1,0}, .type=TOK_STARTLINE},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
		{.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
		{.flags={1,0}, .type=TOK_OPERATOR, opkw(','), txt(",")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_OPERATOR, opkw(ELLIPSIS), txt("...")},
		{.flags={1,0}, .type=TOK_WHITE, txt("\t")},
		{.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
		{.flags={1,0}, .type=TOK_KEYWORD, opkw(VA_ARGS), txt("__VA_ARGS__")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
	),
	//__VA_ARGS__ is an identifier if no ... operator is in the parameter list or if there is no parameter list
	T("#define foo __VA_ARGS__ bar int define\n#define foo() __VA_ARGS__ bar int define",
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
		{.flags={1,0}, .type=TOK_WHITE, txt("\n")},
		
		{.flags={1,0}, .type=TOK_STARTLINE},
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
		{.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
		{.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")}
	),
	
	//Test various integer suffixen
	T("1 1u 1l 1ul 1lu 1ll 1ull 1llu 1U 1L 1UL 1LU 1LL 1ULL 1LLU "
	  "1uq 1lq 1llq 1ulq 1luq 1f 1i",
		{.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1u")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1l")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1ul")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1lu")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1ll")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ull")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1llu")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1U")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1L")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1UL")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1LU")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1LL")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ULL")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1LLU")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1uq")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1lq")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1llq")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1ulq")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1luq")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1f")}, space,
		{.type=TOK_INTEGER, integer(1, 10, TOK_I), txt("1i")}
	),
	//Test non-standard newlines
	T("\n\r\n \r\n\rint",
		{.type=TOK_WHITE, txt("\n\r")}, startline,
		{.type=TOK_WHITE, txt("\n")}, startline,
		space,
		{.type=TOK_WHITE, txt("\r\n")}, startline,
		{.type=TOK_WHITE, txt("\r")}, startline,
		{.type=TOK_KEYWORD, opkw(INT), txt("int")}
	),
	//Test backslash-broken lines
	T("oner\\ \nous",
		{.type=TOK_IDENTIFIER, txt("onerous")}
	),
	T("\\\n\\\n\\\n\\",
		{.type=TOK_STRAY, txt("\\")}
	),
	T("in\\\nt i\\;\nf\\ \r\nor (i=0; i<10; i++) {\\",
		{.type=TOK_KEYWORD, opkw(INT), txt("int")}, space,
		{.type=TOK_IDENTIFIER, txt("i")},
		{.type=TOK_STRAY, txt("\\")},
		{.type=TOK_OPERATOR, opkw(';'), txt(";")},
		{.type=TOK_WHITE, txt("\n")},
		
		startline,
		{.type=TOK_KEYWORD, opkw(FOR), txt("for")}, space,
		{.type=TOK_OPERATOR, opkw('('), txt("(")},
		{.type=TOK_IDENTIFIER, txt("i")},
		{.type=TOK_OPERATOR, opkw('='), txt("=")},
		{.type=TOK_INTEGER, integer(0,8,0), txt("0")},
		{.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
		{.type=TOK_IDENTIFIER, txt("i")},
		{.type=TOK_OPERATOR, opkw('<'), txt("<")},
		{.type=TOK_INTEGER, integer(10,10,0), txt("10")},
		{.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
		{.type=TOK_IDENTIFIER, txt("i")},
		{.type=TOK_OPERATOR, opkw(INC_OP), txt("++")},
		{.type=TOK_OPERATOR, opkw(')'), txt(")")}, space,
		{.type=TOK_OPERATOR, opkw('{'), txt("{")},
		{.type=TOK_STRAY, txt("\\")}
	),
	//More preprocessor directive tests
	T("#apple\n#pragma\n#const\n#define \t\n#define foo(x",
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_IDENTIFIER, txt("apple")},
		{.flags={1,0}, .type=TOK_WHITE, txt("\n")},
		
		{.flags={1,0}, .type=TOK_STARTLINE},
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(PRAGMA), txt("pragma")},
		{.flags={1,0}, .type=TOK_WHITE, txt("\n")},
		
		{.flags={1,0}, .type=TOK_STARTLINE},
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_IDENTIFIER, txt("const")},
		{.flags={1,0}, .type=TOK_WHITE, txt("\n")},
		
		{.flags={1,0}, .type=TOK_STARTLINE},
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" \t")},
		{.flags={1,0}, .type=TOK_WHITE, txt("\n")},
		
		{.flags={1,0}, .type=TOK_STARTLINE},
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
		{.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("x")}
	),
	T("#define",
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")}
	),
	T("#define foo",
		{.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
		{.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
		{.flags={1,0}, .type=TOK_WHITE, txt(" ")},
		{.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")}
	),
	T("`#define foo",
		{.type=TOK_STRAY, txt("`")},
		{.type=TOK_OPERATOR, opkw('#'), txt("#")},
		{.type=TOK_IDENTIFIER, txt("define")},
		space,
		{.type=TOK_IDENTIFIER, txt("foo")}
	)
};

struct tokenizer_msg_test tokenizer_msg_tests[] = {
	{T("/* Unterminated C comment",
		{.type=TOK_CCOMMENT, txt("/* Unterminated C comment")}
	), M(
		"unterminated_comment"
	)},
	{T("\"\n\"\"\n",
		{.type=TOK_STRING, string("\n"), txt("\"\n\"")},
		{.type=TOK_STRING, string("\n"), txt("\"\n")}
	), M(
		"read_cstring/quote_newlines",
		"read_cstring/missing_endquote"
	)},
};

#undef T
#undef string
#undef opkw
#undef txt
#undef integer
#undef floating
#undef M
#undef include

static void test_tokenizer_single(struct tokenizer_test *t, tok_message_queue *mq) {
	struct token_list *tl;
	size_t i, count = t->token_count, gen_count;
	const struct token *tok_gen, *tok_correct;
	int success = 1;
	char *txt = talloc_memdup(NULL, t->txt, t->txt_size);
	size_t txt_size = t->txt_size;
	#define failed(fmt, ...) do { \
		printf("Error: " fmt "\n", ##__VA_ARGS__); \
		success = 0; \
		goto done; \
	} while(0)
	
	tl = tokenize(txt, txt, txt_size, mq);
	
	if (tl->orig != txt || tl->orig_size != txt_size)
		failed("tokenize() did not replicate orig/orig_size from arguments");
	if (!token_list_sanity_check(tl, stdout))
		failed("Sanity check failed");
	
	gen_count = token_list_count(tl);
	if (gen_count != count+1)
		failed("Incorrect number of tokens (%zu, should be %zu)\n",
			gen_count, count+1);
	
	tok_gen = tl->first->next; //skip the beginning TOK_STARTLINE
	tok_correct = t->tokens;
	for (i=0; i<count; i++, tok_gen=tok_gen->next, tok_correct++) {
		if (tok_gen->type != tok_correct->type)
			failed("Token \"%s\": Incorrect type", tok_correct->txt);
		{
			struct token_flags g=tok_gen->flags, c=tok_correct->flags;
			if (g.pp!=c.pp || g.pp_directive!=c.pp_directive)
				failed("Token \"%s\": Incorrect flags", tok_correct->txt);
		}
		switch (tok_gen->type) {
			case TOK_INTEGER:
				if (tok_gen->integer.v != tok_correct->integer.v ||
				    tok_gen->integer.base != tok_correct->integer.base ||
				    tok_gen->integer.suffix != tok_correct->integer.suffix)
					failed("Token \"%s\": Integer value/base/suffix incorrect", tok_correct->txt);;
				break;
			case TOK_FLOATING:
				if (fabsl(tok_gen->floating.v - tok_correct->floating.v) > 0.00000000000000001 ||
				    tok_gen->floating.suffix != tok_correct->floating.suffix)
					failed("Token \"%s\": Floating point value/suffix incorrect", tok_correct->txt);
				break;
			case TOK_OPERATOR:
				if (tok_gen->opkw != tok_correct->opkw)
					failed("Token \"%s\": Operator opkw incorrect", tok_correct->txt);
				break;
			case TOK_KEYWORD:
				if (tok_gen->opkw != tok_correct->opkw)
					failed("Token \"%s\": Keyword opkw incorrect", tok_correct->txt);
				break;
			case TOK_CHAR:
			case TOK_STRING:
				//anything using string
				if (tok_gen->string->size != tok_correct->string->size ||
					memcmp(tok_gen->string->item, tok_correct->string->item,
					tok_gen->string->size) ||
					tok_gen->string->item[tok_gen->string->size] != 0 )
					failed("Token \"%s\": String value incorrect", tok_correct->txt);
				break;
			case TOK_STRING_IQUOTE:
			case TOK_STRING_IANGLE:
				if (strcmp(tok_gen->include, tok_correct->include))
					failed("Token \"%s\": #include string incorrect", tok_correct->txt);
				break;
			case TOK_IDENTIFIER:
			case TOK_CCOMMENT:
			case TOK_CPPCOMMENT:
			case TOK_WHITE:
			case TOK_STARTLINE:
			case TOK_STRAY:
				break;
		}
		if (tok_gen->type!=TOK_STARTLINE && (
			tok_gen->txt_size != tok_correct->txt_size ||
			memcmp(tok_gen->txt, tok_correct->txt, tok_gen->txt_size))
			)
			failed("Token \"%s\": txt incorrect", tok_correct->txt);
	}
	
	#undef failed
done:
	ok(success==1, "Tokenize %s", t->txt);
	
	if (!success)
		token_list_dump(tl, stdout);
	
	talloc_free(txt);
}

static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
	FILE *f = fopen(file_name, "rb");
	darray_char *text = talloc_darray(NULL);
	const size_t inc = 1024;
	struct token_list *tl;
	
	if (!f) {
		fail("Could not read file '%s': %s", file_name, strerror(errno));
		goto end;
	}
	
	for (;;) {
		size_t read_len;
		
		darray_realloc(*text, text->size+inc+1);
		read_len = fread(text->item+text->size, 1, inc, f);
		text->size += read_len;
		text->item[text->size] = 0;
		
		if (read_len < inc)
			break;
		
	}
	if (ferror(f)) {
		fail("Error reading file '%s': %s", file_name, strerror(errno));
		goto end;
	}
	
	tl = tokenize(text, text->item, text->size, mq);
	tl->filename = file_name;
	
	//printf("File '%s' has %zu tokens\n", file_name, token_list_count(tl));
	//token_list_dump(tl, stdout);
	
	if (!token_list_sanity_check(tl, stdout)) {
		fail("Sanity check failed for file '%s'", file_name);
		goto end;
	}
	
	pass("File '%s' has %zu tokens", file_name, token_list_count(tl));
	
	/*while (queue_count(*mq)) {
		struct tok_message msg = dequeue(*mq);
		tok_message_print(&msg, tl);
	}*/
	
end:
	talloc_free(text);
	if (f)
		fclose(f);
}

static void test_tokenizer(void) {
	tok_message_queue mq;
	size_t i, count;
	int has_warn_or_worse = 0;
	
	queue_init(mq, NULL);
	
	count = sizeof(tokenizer_tests)/sizeof(*tokenizer_tests);
	for (i=0; i<count; i++) {
		test_tokenizer_single(tokenizer_tests+i, &mq);
		while (queue_count(mq)) {
			struct tok_message msg = dequeue(mq);
			(void) msg;
			//tok_message_dump(&msg);
		}
	}
	
	count = sizeof(tokenizer_msg_tests)/sizeof(*tokenizer_msg_tests);
	for (i=0; i<count; i++) {
		size_t j;
		test_tokenizer_single(&tokenizer_msg_tests[i].test, &mq);
		
		if (queue_count(mq) != tokenizer_msg_tests[i].message_count) {
			fail("Incorrect number of messages from tokenize()");
			while (queue_count(mq))
				(void) dequeue(mq);
			goto msg_fail;
		}
		
		for (j=0; queue_count(mq); j++) {
			struct tok_message msg = dequeue(mq);
			const char *base = "tokenize/";
			size_t baselen = strlen(base);
			//tok_message_dump(&msg);
			
			if (strncmp(msg.path, base, baselen)) {
				fail("Message from tokenize() doesn't start with \"%s\"",
					base);
				goto msg_fail;
			}
			if (strcmp(msg.path+baselen,
					tokenizer_msg_tests[i].messages[j])) {
				fail("Incorrect message %s, should be %s",
					msg.path+baselen, tokenizer_msg_tests[i].messages[j]);
				goto msg_fail;
			}
		}
		
		pass("Messages from tokenize() are correct");
	msg_fail:;
	}
	
	test_tokenizer_file("test/run.c", &mq);
	
	while (queue_count(mq)) {
		struct tok_message msg = dequeue(mq);
		if (msg.level >= TM_WARN) {
			has_warn_or_worse = 1;
			tok_message_dump(&msg);
		}
		//else tok_message_dump(&msg);
	}
	
	ok(has_warn_or_worse==0, "Tokenizing run.c generated%s warnings, errors, or bugs",
		has_warn_or_worse ? "" : " no");
	
	queue_free(mq);
}

#include <unistd.h>

int main(void)
{
	plan_tests(195);
	
	diag("* Checking queue...");
	test_queue();
	
	diag("* Checking read_cstring...");
	test_read_cstring();
	
	diag("* Checking dict...");
	test_dict();
	
	diag("* Checking charflag...");
	test_charflag();
	
	diag("* Checking readui...");
	test_readui();
	
	diag("* Checking scan_number...");
	test_scan_number();
	
	diag("* Checking read_integer...");
	test_read_integer();
	
	diag("* Checking read_floating...");
	test_read_floating();
	
	diag("* Checking tokenizer...");
	test_tokenizer();
	
	/* This exits depending on whether all tests passed */
	return exit_status();
}