Pchen0
/
ccan
mirror of https://git.ozlabs.org/~ccan/ccan


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
							#include "ccan_tokenizer.h"

static char *strdup_rng(const char *s, const char *e) {
	char *ret = malloc(e-s+1);
	memcpy(ret, s, e-s);
	ret[e-s] = 0;
	return ret;
}

#define MESSAGE_PATH "tokenize/read_cstring/"

//Reads a C string starting at s until quoteChar is found or e is reached
//  Returns the pointer to the terminating quote character or e if none was found
char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
	const char * const tokstart = s;
	const char *p;
	int has_endquote=0, has_newlines=0;
	
	//tok_msg_debug(called, s, "Called read_cstring on `%s`", s);
	
	#define append(startptr,endptr) darray_append_items(*out, startptr, (endptr)-(startptr))
	#define append_char(theChar) darray_append(*out, theChar)
	#define append_zero() do {darray_append(*out, 0); out->size--;} while(0)
	
	p = s;
	while (p<e) {
		char c = *p++;
		if (c == '\\') {
			append(s, p-1);
			s = p;
			if (p >= e) {
				append_char('\\');
				tok_msg_error(ended_in_backslash, p-1,
					"read_cstring input ended in backslash");
				break;
			}
			c = *p++;
			if (c>='0' && c<='9') {
				unsigned int octal = c-'0';
				size_t digit_count = 0;
				while (p<e && *p>='0' && *p<='9') {
					octal <<= 3;
					octal += (*p++) - '0';
					if (++digit_count >= 2)
						break;
				}
				if (p<e && *p>='0' && *p<='9') {
					tok_msg_info(ambiguous_octal, s-2,
						"Octal followed by digit");
				}
				if (octal > 0xFF) {
					tok_msg_warn(octal_overflow, s-2,
						"Octal out of range");
				}
				c = octal;
			} else {
				switch (c) {
					case 'x': {
						size_t digit_count = 0;
						size_t zero_count = 0;
						unsigned int hex = 0;
						while (p<e && *p=='0') p++, zero_count++;
						for (;p<e;digit_count++) {
							c = *p++;
							if (c>='0' && c<='9')
								c -= '0';
							else if (c>='A' && c<='F')
								c -= 'A'-10;
							else if (c>='a' && c<='f')
								c -= 'a'-10;
							else {
								p--;
								break;
							}
							hex <<= 4;
							hex += c;
						}
						if (zero_count+digit_count > 2) {
							char *hex_string = strdup_rng(s-2, p);
							tok_msg_warn(ambiguous_hex, s-2,
								"Hex escape '%s' is ambiguous", hex_string);
							if (digit_count > 2)
								tok_msg_warn(hex_overflow, s-2,
									"Hex escape '%s' out of range", hex_string);
							free(hex_string);
						}
						c = hex & 0xFF;
					}	break;
					case 'a':
						c=0x7;
						break;
					case 'b':
						c=0x8;
						break;
					case 'e':
						c=0x1B;
						break;
					case 'f':
						c=0xC;
						break;
					case 'n':
						c=0xA;
						break;
					case 'r':
						c=0xD;
						break;
					case 't':
						c=0x9;
						break;
					case 'v':
						c=0xB;
						break;
					case '\\':
						break;
					default:
						if (c == quoteChar)
							break;
						if (c=='\'' && quoteChar=='"') {
							/* tok_msg_info(escaped_single_quote, s-2,
								"Single quote characters need not be escaped within double quotes"); */
							break;
						}
						if (c=='"' && quoteChar=='\'') {
							/* tok_msg_info(escaped_double_quote, s-2,
								"Double quote characters need not be escaped within single quotes"); */
							break;
						}
						if (c=='?') // \? is needed in some situations to avoid building a trigraph
							break;
						tok_msg_warn(unknown_escape, s-2,
							"Unknown escape sequence '\\%c'", c);
						break;
				}
			}
			s = p;
			append_char(c);
		} else if (c == quoteChar) {
			p--;
			has_endquote = 1;
			break;
		} else if (creturn(c)) {
			has_newlines = 1;
		}
	}
	append(s, p);
	append_zero();
	if (!has_endquote) {
		tok_msg_error(missing_endquote, tokstart,
			"Missing endquote on %s literal",
			quoteChar=='\'' ? "character" : "string");
	} else if (has_newlines) {
		tok_msg_warn(quote_newlines, tokstart,
			"%s literal contains newline character(s)",
			quoteChar=='\'' ? "Character" : "String");
	}
	return (char*)p;
	
	#undef append
	#undef append_char
	#undef append_zero
}

#undef MESSAGE_PATH