Browse Source

ttxml: Rplaced ratchet buffer with regular buffer, 20x speed increase

Daniel Burke 14 years ago
parent
commit
385e0e538b
2 changed files with 166 additions and 114 deletions
  1. 164 112
      ccan/ttxml/ttxml.c
  2. 2 2
      ccan/ttxml/ttxml.h

+ 164 - 112
ccan/ttxml/ttxml.c

@@ -1,4 +1,5 @@
 /* Licensed under GPL - see LICENSE file for details */
+
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -6,7 +7,34 @@
 #include "ttxml.h"
 
 
-XmlNode* xml_new(char * name, char * attrib)
+#define BUFFER 3264
+
+
+#define XML_LETTER	1
+#define XML_NUMBER	2
+#define XML_SPACE	4
+#define XML_SLASH	8
+#define XML_OPEN	16
+#define XML_EQUALS	32
+#define XML_CLOSE	64
+#define XML_QUOTE	128
+#define XML_OTHER	256
+
+#define XML_ALL 0xFFFFFFFF
+
+
+typedef struct XMLBUF
+{
+	FILE * fptr;
+	char * buf;
+	int len;
+	int read_index;
+	int eof;
+} XMLBUF;
+
+
+/* Allocate a new XmlNode */
+XmlNode* xml_new(char * name)
 {
 	XmlNode * ret = malloc(sizeof(XmlNode));
 	if(!ret)return NULL;
@@ -19,7 +47,7 @@ XmlNode* xml_new(char * name, char * attrib)
 	return ret;
 }
 
-
+/* free a previously allocated XmlNode */
 void xml_free(XmlNode *target)
 {
 	int i;
@@ -34,19 +62,12 @@ void xml_free(XmlNode *target)
 	free(target);
 }
 
-#define XML_LETTER	1
-#define XML_NUMBER	2
-#define XML_SPACE	4
-#define XML_SLASH	8
-#define XML_OPEN	16
-#define XML_EQUALS	32
-#define XML_CLOSE	64
-#define XML_QUOTE	128
-#define XML_OTHER	256
-
-#define XML_ALL 0xFFFFFFFF
-
-static int is_special(char item)
+/* raise flags if we have a character of special meaning
+ *
+ * This is where I've hidden the switch statements :-p
+ *
+ */
+int is_special(char item)
 {
 	if((item >= 'a' && item <= 'z') || (item >= 'A' && item <='Z'))
 		return XML_LETTER;
@@ -67,67 +88,53 @@ static int is_special(char item)
 	return 128;
 }
 
-struct XMLBUF
+/* Refresh the buffer, expects not to be called when EOF */
+static void xml_read_file(XMLBUF *xml)
 {
-	FILE * fptr;
-	char * buf;
-	int len;
-	int eof;
-};
-
-
-static void xml_consume(struct XMLBUF *xml, int offset)
-{
-	int size, request, received;
+	int size;
 	
-	size = xml->len - offset;
-
-	if(!xml->len)
-		return;
-
-	if(size)
-	{
-//		printf("Size=%d, off=%d, len=%d\n", size, offset, xml->len);
-		memmove(xml->buf, xml->buf + offset, size);
-	}
-
-	if(xml->eof)
+	size = fread( xml->buf,	1, xml->len, xml->fptr);
+	if( size != xml->len )
 	{
 		xml->len = size;
 		xml->buf[size]=0;
-		return;
+		xml->eof = 1;
 	}
+}
 
-	request = xml->len - size;
-	received = fread(xml->buf + size, 1, request, xml->fptr);
-	if( received == request )
-		return;
 
-	xml->len = size + received;
-	xml->eof = 1;
-	xml->buf[xml->len] = 0;
-	return;
+/* All reading of the XML buffer done through these two functions */
+/*** read a byte without advancing the offset */
+static char xml_peek(XMLBUF *xml)
+{
+	return xml->buf[xml->read_index];
 }
 
-
-static void xml_skip( struct XMLBUF *xml, int mask )
+/*** read a byte and advance the offset */
+static char xml_read_byte(XMLBUF *xml)
 {
-	int offset = 0;
-	if(!xml->len)return;
-	while( is_special(xml->buf[offset]) & mask )
+	char ret = xml_peek(xml);
+	xml->read_index++;
+	if(xml->read_index >= xml->len)
 	{
-		offset ++;
-		if(offset == xml->len)
-		{
-			xml_consume(xml, offset);
-			offset = 0;
-			if(!xml->len)
-				return;
-		}
+		if(xml->eof)return ret;
+		xml->read_index = 0 ;
+		xml_read_file(xml);
 	}
-	xml_consume(xml, offset);
+	return ret;
 }
 
+
+/* skip over bytes matching the is_special mask */
+static void xml_skip( XMLBUF *xml, int mask)
+{
+	printf("just called\n");
+	while( is_special(xml_peek(xml)) & mask && xml->len )
+		xml_read_byte(xml);
+}
+
+
+/* character matching tests for the feed functions */
 static char quotechar = 0;
 static int test_quote(const char x)
 {
@@ -148,38 +155,59 @@ static int test_mask(const char x)
 	return !(is_special(x) & feed_mask);
 }
 
-static char* xml_feed( struct XMLBUF *xml, int (*test)(char) )
+/*
+ * char* xml_feed(x, test)
+ *
+ * Reads as many contiguous chars that pass test() into a newly allocated
+ * string.
+ *
+ * Instead of calling xml_read_byte and flogging realloc() for each byte,
+ * it checks the buffer itself.
+*/
+static char* xml_feed( XMLBUF *xml, int (*test)(char) )
 {
-	int offset = 0;
+	int offset = xml->read_index;
+	int delta;
 	char *ret = NULL;
 	int size = 0;
 
+	/* perform first and N middle realloc()'s */
 	while( test(xml->buf[offset]) )
 	{
-		offset++;
-		if(offset == xml->len)
+		offset ++;
+
+		if(offset >= xml->len)
 		{
-			ret = realloc(ret, size+offset+1);
-			memcpy(ret+size, xml->buf, offset);
-			size += offset;
+			delta = offset - xml->read_index;
+			ret = realloc(ret, size + delta + 1);
+			memcpy(ret+size, xml->buf + xml->read_index, delta);
+			size += delta;
 			ret[size]=0;
-			xml_consume(xml, offset);
+			if(xml->eof)return ret;
+			xml_read_file(xml);
+			xml->read_index = 0;
 			offset = 0;
-			if(!xml->len)return ret;
 		}
 	}
-
-	if(offset)
+	/* perform final realloc() if needed */
+	if(offset > xml->read_index)
 	{
-		ret = realloc(ret, size+offset+1);
-		memcpy(ret+size, xml->buf, offset);
-		size += offset;
+		delta = offset - xml->read_index;
+		ret = realloc(ret, size + delta + 1);
+		memcpy(ret+size, xml->buf + xml->read_index, delta);
+		xml->read_index = offset;
+		size += delta;
 		ret[size]=0;
-		xml_consume(xml, offset);
 	}
 	return ret;
 }
 
+/* this reads attributes from tags, of the form...
+ *
+ * <tag attr1="some arguments" attr2=argument>
+ *
+ * It is aware of quotes, and will allow anything inside quoted arguments
+ */
 static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
 {
 	int n=0;
@@ -187,7 +215,7 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
 	// how does this tag finish?
 	while(xml->len)
 	{
-		if( is_special(xml->buf[0]) & (XML_CLOSE | XML_SLASH) )
+		if( is_special(xml_peek(xml)) & (XML_CLOSE | XML_SLASH) )
 			return;
 
 		n = ++node->nattrib;
@@ -196,19 +224,18 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
 		
 		feed_mask = XML_EQUALS | XML_SPACE | XML_CLOSE | XML_SLASH;
 		node->attrib[n*2] = xml_feed(xml, test_mask );
-		if( xml->buf[0] == '=' )
+		if( xml_peek(xml) == '=' )
 		{
-			if( is_special(xml->buf[1]) & XML_QUOTE )
+			xml_read_byte(xml);
+			if( is_special(xml_peek(xml)) & XML_QUOTE )
 			{
-				quotechar = xml->buf[1];
-				xml_consume(xml, 2);
+				quotechar = xml_read_byte(xml);
 				node->attrib[n*2+1] = xml_feed(xml, test_quote);
-				xml_consume(xml, 1);
+				xml_read_byte(xml);
 			}
 			else
 			{
 				feed_mask = XML_SPACE | XML_CLOSE | XML_SLASH;
-				xml_consume(xml, 1);
 				node->attrib[n*2+1] = xml_feed(xml, test_mask);
 			}
 		}
@@ -216,6 +243,11 @@ static void xml_read_attr(struct XMLBUF *xml, XmlNode *node)
 	}
 }
 
+/* The big decision maker, is it a regular node, or a text node.
+ * If it's a node, it will check if it should have children, and if so
+ * will recurse over them.
+ * Text nodes don't have children, so no recursing.
+ */
 static XmlNode* xml_parse(struct XMLBUF *xml)
 {
 	int offset;
@@ -227,43 +259,48 @@ static XmlNode* xml_parse(struct XMLBUF *xml)
 
 	xml_skip(xml, XML_SPACE);	// skip whitespace
 	offset=0;
-	while(xml->len)
+	while( (xml->read_index < xml->len) || !xml->eof )
 	{
-		switch(is_special(xml->buf[offset]))
+		switch(is_special(xml_peek(xml)))
 		{
 			case XML_OPEN:
-				xml_consume(xml, 1);
-				if(xml->buf[offset] == '/')
+				xml_read_byte(xml);
+				if(xml_peek(xml) == '/')
 					return ret;		// parents close tag
 				// read the tag name
 				feed_mask = XML_SPACE | XML_SLASH | XML_CLOSE;
-				*this = xml_new( xml_feed(xml, test_mask), NULL );
+				*this = xml_new( xml_feed(xml, test_mask));
 				xml_skip(xml, XML_SPACE);	// skip any whitespace
 
 				xml_read_attr(xml, *this);	// read attributes
 
 				// how does this tag finish?
-				switch(is_special(xml->buf[0]))
+				switch(is_special(xml_peek(xml)))
 				{
 					case XML_CLOSE:		// child-nodes ahead
-						xml_consume(xml, 1);
+						xml_read_byte(xml);
 						(*this)->child = xml_parse(xml);
 						xml_skip(xml, XML_ALL ^ XML_CLOSE);
-						xml_consume(xml, 1);
+						xml_read_byte(xml);
 						break;
 					case XML_SLASH:		// self closing tag
-						xml_consume(xml, 2);
+						xml_read_byte(xml);
+						xml_read_byte(xml);
 						break;
 				}
 				break;
 
 			default:	// text node
-				*this = xml_new(0, 0);
+				*this = xml_new(0);
 				xml_skip(xml, XML_SPACE);	// skip any whitespace
 				feed_mask = XML_OPEN;
 				(*this)->nattrib=1;
 				(*this)->attrib = malloc(sizeof(char*)*2);
+				(*this)->attrib[1] = NULL;
 				tmp = (*this)->attrib[0] = xml_feed(xml, test_mask);
+
+				/* trim the whitespace off the end of text nodes,
+				 * by overwriting the spaces will null termination. */
 				toff = strlen(tmp)-1;
 				while( ( is_special(tmp[toff]) & XML_SPACE ) )
 				{
@@ -271,7 +308,6 @@ static XmlNode* xml_parse(struct XMLBUF *xml)
 					toff --;
 				}
 
-				(*this)->attrib[1] = NULL;
 				break;
 		}
 		this = &(*this)->next; 
@@ -282,14 +318,16 @@ static XmlNode* xml_parse(struct XMLBUF *xml)
 }
 
 
-
-#define BUF 3264
+/* bootstrap the structures for xml_parse() to be able to get started */
 XmlNode* xml_load(const char * filename)
 {
 	struct XMLBUF xml;
 	XmlNode *ret = NULL;
 
+//	printf("xml_load(\"%s\");\n", filename);
+
 	xml.eof = 0;
+	xml.read_index = 0;
 	xml.fptr = fopen(filename, "rb");
 	if(!xml.fptr)
 	{
@@ -297,12 +335,13 @@ XmlNode* xml_load(const char * filename)
 		return NULL;
 	}
 
-	xml.buf = malloc(BUF);
+	xml.buf = malloc(BUFFER+1);
+	xml.buf[BUFFER]=0;
 	if(!xml.buf)
 		goto xml_load_fail_malloc_buf;
 	
-	xml.len = fread(xml.buf, 1, BUF, xml.fptr);
-	if(xml.len < BUF)
+	xml.len = fread(xml.buf, 1, BUFFER, xml.fptr);
+	if(xml.len < BUFFER)
 		xml.eof = 1;
 
 	ret = xml_parse(&xml);
@@ -312,8 +351,8 @@ xml_load_fail_malloc_buf:
 	fclose(xml.fptr);
 	return ret;
 }
-#undef BUF
 
+/* very basic function that will get you the first node with a given name */
 XmlNode * xml_find(XmlNode *xml, const char *name)
 {
 	XmlNode * ret;
@@ -331,7 +370,7 @@ XmlNode * xml_find(XmlNode *xml, const char *name)
 	return NULL;
 }
 
-
+/* very basic attribute lookup function */
 char* xml_attr(XmlNode *x, const char *name)
 {
 	int i;
@@ -344,6 +383,7 @@ char* xml_attr(XmlNode *x, const char *name)
 
 
 #ifdef TEST
+/* print out the heirarchy of an XML file, useful for debugging */
 void xp(XmlNode *x, int level, int max)
 {
 	int i;
@@ -357,6 +397,7 @@ void xp(XmlNode *x, int level, int max)
 	if(x->name)
 	for(i=0; i<x->nattrib; i++)
 		printf("%s=\"%s\",", x->attrib[i*2], x->attrib[i*2+1]);
+	else printf("%s", x->attrib[0]);
 	printf("\n");
 	if(x->child)xp(x->child, level+1, max);
 	if(x->next)xp(x->next, level, max);
@@ -365,7 +406,8 @@ void xp(XmlNode *x, int level, int max)
 
 int main(int argc, char *argv[])
 {
-	XmlNode *x;
+	XmlNode *x, *tmp;
+	int i;
 
 	if(!argv[1])
 	{
@@ -374,19 +416,29 @@ int main(int argc, char *argv[])
 		return 1;
 	}
 
-	printf("Loading file \"%s\"\n", argv[1]);
-
-	x = xml_load(argv[1]);
-
-	if(!x)
+#ifdef PROFILE
+	for(i=0; i<1000; i++)
 	{
-		printf("Failed to load.\n");
-		return 2;
+#endif
+		x = xml_load(argv[1]);
+
+		if(!x)
+		{
+			printf("Failed to load.\n");
+			return 2;
+		}
+#ifndef PROFILE
+		xp(x, 1, 20);
+#endif
+		xml_free(x);
+#ifdef PROFILE
 	}
+#endif
 
-	xp(x, 1, 6);
-	xml_free(x);
-	printf("Happily free.\n");
+	
+//	tmp = xml_find(x, "geometry");
+//	xp(x, 1, 6);
+//	printf("Happily free.\n");
 	return 0;
 }
 #endif

+ 2 - 2
ccan/ttxml/ttxml.h

@@ -1,4 +1,4 @@
-/* Licensed under GPL - see LICENSE file for details */
+
 typedef struct XmlNode {
 	char * name;
 	char ** attrib;
@@ -8,7 +8,7 @@ typedef struct XmlNode {
 } XmlNode;
 
 
-XmlNode* xml_new(char * name, char * attrib);
+XmlNode* xml_new(char * name);
 XmlNode* xml_load(const char * filename);
 void xml_free(XmlNode *target);
 char* xml_attr(XmlNode *x, const char *name);