Browse Source

Added module stringmap

Joey Adams 16 years ago
parent
commit
083a691fe3
4 changed files with 497 additions and 0 deletions
  1. 66 0
      ccan/stringmap/_info
  2. 157 0
      ccan/stringmap/stringmap.c
  3. 71 0
      ccan/stringmap/stringmap.h
  4. 203 0
      ccan/stringmap/test/run.c

+ 66 - 0
ccan/stringmap/_info

@@ -0,0 +1,66 @@
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+
+/**
+ * stringmap - Macros for mapping strings to things
+ *
+ * stringmap provides a generic string map via macros.
+ *
+ * Features which are sorely lacking in this version of stringmap are deletion and traversal.
+ *
+ * Example:
+ *
+ * #include <ccan/stringmap/stringmap.h>                                                                                                                                        
+ *
+ * static const char *get_string(void) {                                                                                                                                        
+ *      static char buffer[4096];                                                                                                                                               
+ *      char *tail;                                                                                                                                                             
+ *      if (!fgets(buffer, sizeof(buffer), stdin))                                                                                                                              
+ *              return NULL;                                                                                                                                                    
+ *      tail = strchr(buffer, 0);                                                                                                                                               
+ *      if (tail>buffer && tail[-1]=='\n')                                                                                                                                      
+ *              *--tail = 0;                                                                                                                                                    
+ *      if (!*buffer)                                                                                                                                                           
+ *              return NULL;                                                                                                                                                    
+ *      return buffer;                                                                                                                                                          
+ * }                                                                                                                                                                            
+ *
+ * int main(void) {                                                                                                                                                             
+ *      stringmap(int) map = stringmap_new(NULL);
+ *      const char *string;
+ *
+ *      while ((string = get_string()) != NULL) {
+ *              int *count = stringmap_lookup(map, string);
+ *
+ *              if (!count) {
+ *                      printf("\"%s\" is new\n", string);
+ *                      count = stringmap_enter(map, string);
+ *              }
+ *
+ *              (*count) ++;
+ *
+ *              printf("\"%s\" has been entered %d times\n", string, *count);
+ *      }
+ *
+ *      stringmap_free(map);
+ *
+ *    return 0;
+ * }
+ *
+ *	Authors: Joey Adams, Anders Magnusson
+ *	License: BSD
+ */
+int main(int argc, char *argv[])
+{
+	/* Expect exactly one argument */
+	if (argc != 2)
+		return 1;
+
+	if (strcmp(argv[1], "depends") == 0) {
+		printf("ccan/block_pool\n");
+		return 0;
+	}
+
+	return 1;
+}

+ 157 - 0
ccan/stringmap/stringmap.c

@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2004 Anders Magnusson (ragge@ludd.luth.se).
+ * Copyright (c) 2009 Joseph Adams (joeyadams3.14159@gmail.com).
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This is a heavily modified version of the Patricia tree implementation
+   in PCC at http://pcc.zentus.com/cgi-bin/cvsweb.cgi/cc/cpp/cpp.c?rev=1.96 */
+
+#include "stringmap.h"
+
+#if 0
+#include <assert.h>
+#else
+#define assert(...) do {} while(0)
+#endif
+
+#define BITNO(x)		((x) & ~(LEFT_IS_LEAF|RIGHT_IS_LEAF))
+#define LEFT_IS_LEAF		0x80000000
+#define RIGHT_IS_LEAF		0x40000000
+#define IS_LEFT_LEAF(x)		(((x) & LEFT_IS_LEAF) != 0)
+#define IS_RIGHT_LEAF(x)	(((x) & RIGHT_IS_LEAF) != 0)
+#define P_BIT(key, bit)		(key[bit >> 3] >> (bit & 7)) & 1
+#define CHECKBITS		8
+
+struct T {
+	char *str;
+};
+
+static void *T_new(struct block_pool *bp, const char *key, size_t T_size) {
+	struct T *leaf = block_pool_alloc(bp, T_size);
+	memset(leaf, 0, T_size);
+	leaf->str = block_pool_strdup(bp, key);
+	return leaf;
+}
+
+void *stringmap_lookup_real(struct stringmap *t, const char *key, int enterf, const size_t T_size) {
+	struct T *sp;
+	struct stringmap_node *w, *new, *last;
+	int len, cix, bit, fbit, svbit, ix, bitno;
+	const char *k, *m, *sm;
+	
+	if (!t->root) {
+		if (!enterf)
+			return NULL;
+		
+		t->bp = block_pool_new(t->bp);
+		
+		t->root = T_new(t->bp, key, T_size);
+		t->count = 1;
+		
+		return t->root;
+	}
+
+	/* Count full string length */
+	for (k = key, len = 0; *k; k++, len++)
+		;
+	
+	if (t->count == 1) {
+		w = t->root;
+		svbit = 0;
+	} else {
+		w = t->root;
+		bitno = len * CHECKBITS;
+		for (;;) {
+			bit = BITNO(w->bitno);
+			fbit = bit > bitno ? 0 : P_BIT(key, bit);
+			svbit = fbit ? IS_RIGHT_LEAF(w->bitno) :
+			    IS_LEFT_LEAF(w->bitno);
+			w = w->lr[fbit];
+			if (svbit)
+				break;
+		}
+	}
+
+	sp = (struct T *)w;
+
+	sm = m = sp->str;
+	k = key;
+
+	/* Check for correct string and return */
+	for (cix = 0; *m && *k && *m == *k; m++, k++, cix += CHECKBITS)
+		;
+	if (*m == 0 && *k == 0) {
+		//if (!enterf && sp->value == NULL)
+		//	return NULL;
+		return sp;
+	}
+
+	if (!enterf)
+		return NULL; /* no string found and do not enter */
+
+	ix = *m ^ *k;
+	while ((ix & 1) == 0)
+		ix >>= 1, cix++;
+
+	/* Create new node */
+	new = block_pool_alloc(t->bp, sizeof *new);
+	bit = P_BIT(key, cix);
+	new->bitno = cix | (bit ? RIGHT_IS_LEAF : LEFT_IS_LEAF);
+	new->lr[bit] = T_new(t->bp, key, T_size);
+
+	if (t->count++ == 1) {
+		new->lr[!bit] = t->root;
+		new->bitno |= (bit ? LEFT_IS_LEAF : RIGHT_IS_LEAF);
+		t->root = new;
+		return (struct T *)new->lr[bit];
+	}
+
+	w = t->root;
+	last = NULL;
+	for (;;) {
+		fbit = w->bitno;
+		bitno = BITNO(w->bitno);
+		assert(bitno != cix);
+		if (bitno > cix)
+			break;
+		svbit = P_BIT(key, bitno);
+		last = w;
+		w = w->lr[svbit];
+		if (fbit & (svbit ? RIGHT_IS_LEAF : LEFT_IS_LEAF))
+			break;
+	}
+
+	new->lr[!bit] = w;
+	if (last == NULL) {
+		t->root = new;
+	} else {
+		last->lr[svbit] = new;
+		last->bitno &= ~(svbit ? RIGHT_IS_LEAF : LEFT_IS_LEAF);
+	}
+	if (bitno < cix)
+		new->bitno |= (bit ? LEFT_IS_LEAF : RIGHT_IS_LEAF);
+	return (struct T *)new->lr[bit];
+}

+ 71 - 0
ccan/stringmap/stringmap.h

@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2004 Anders Magnusson (ragge@ludd.luth.se).
+ * Copyright (c) 2009 Joseph Adams (joeyadams3.14159@gmail.com).
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CCAN_STRINGMAP_H
+#define CCAN_STRINGMAP_H
+
+#include <ccan/block_pool/block_pool.h>
+#include <stdint.h>
+
+#define stringmap(theType) struct {struct stringmap t; struct {char *str; theType value;} *last;}
+	//the 'last' pointer here is used as a hacky typeof() alternative
+
+#define stringmap_new(ctx) {{0,0,(struct block_pool*)(ctx)},0}
+#define stringmap_init(sm, ctx) do { \
+		(sm).t.root = 0; \
+		(sm).t.count = 0; \
+		(sm).t.bp = (struct block_pool*)(ctx); \
+		(sm).last = 0; \
+	} while(0)
+#define stringmap_free(sm) do { \
+		if ((sm).t.root) \
+			block_pool_free((sm).t.bp); \
+	} while(0)
+
+#define stringmap_lookup(sm, key) stringmap_le(sm, key, 0)
+#define stringmap_enter(sm, key) stringmap_le(sm, key, 1)
+
+//this macro sets sm.last so it can exploit its type
+#define stringmap_le(sm, key, enterf) ((((sm).last) = stringmap_lookup_real(&(sm).t, key, enterf, sizeof(*(sm).last))) ? &(sm).last->value : NULL)
+
+
+struct stringmap_node {
+	uint32_t bitno;
+	struct stringmap_node *lr[2];
+};
+
+struct stringmap {
+	struct stringmap_node *root;
+	size_t count;
+	struct block_pool *bp;
+	//hack: 'bp' holds talloc ctx when 'root' is NULL
+};
+
+void *stringmap_lookup_real(struct stringmap *t, const char *key, int enterf, size_t T_size);
+
+#endif

+ 203 - 0
ccan/stringmap/test/run.c

@@ -0,0 +1,203 @@
+#include "stringmap/stringmap.h"
+#include "stringmap/stringmap.c"
+
+#include "tap/tap.h"
+
+static void test_trivial(void) {
+	stringmap(int) map = stringmap_new(NULL);
+	
+	ok1(stringmap_lookup(map, "one") == NULL);
+	*stringmap_enter(map, "one") = 1;
+	
+	ok1(stringmap_lookup(map, "two") == NULL);
+	*stringmap_enter(map, "two") = 2;
+	
+	ok1(stringmap_lookup(map, "three") == NULL);
+	*stringmap_enter(map, "three") = 3;
+	
+	ok1(stringmap_lookup(map, "four") == NULL);
+	*stringmap_enter(map, "four") = 4;
+	
+	ok1(*stringmap_lookup(map, "three") == 3);
+	ok1(*stringmap_lookup(map, "one") == 1);
+	ok1(*stringmap_lookup(map, "four") == 4);
+	ok1(*stringmap_lookup(map, "two") == 2);
+	
+	ok1(map.t.count == 4);
+	
+	stringmap_free(map);
+}
+
+
+static void scramble(void *base, size_t nmemb, size_t size) {
+   char *i = base;
+   char *o;
+   size_t sd;
+   for (;nmemb>1;nmemb--) {
+      o = i + size*(random()%nmemb);
+      for (sd=size;sd--;) {
+         char tmp = *o;
+         *o++ = *i;
+         *i++ = tmp;
+      }
+   }
+}
+
+//#define RANDOM_STRING_READABLE
+
+static char *random_string(struct block_pool *bp) {
+	size_t len = random() % 100;
+	char *str = block_pool_alloc(bp, len+1);
+	char *i;
+	
+	for (i=str; len--; i++) {
+		#ifndef RANDOM_STRING_READABLE
+		char c = random();
+		*i = c ? c : ' ';
+		#else
+		//only generate characters [32,126]
+		char c = random()%95 + 32;
+		*i = c;
+		#endif
+	}
+	*i = 0;
+	
+	return str;
+}
+
+struct test_entry {
+	const char *str;
+	char *value;
+};
+
+static int by_str(const void *ap, const void *bp) {
+	return strcmp(((struct test_entry*)ap)->str, ((struct test_entry*)bp)->str);
+}
+
+static void cull_duplicates(struct test_entry *entries, size_t *count) {
+	struct test_entry *i, *o, *e = entries + *count;
+	
+	qsort(entries, *count, sizeof(*entries), by_str);
+	
+	for (i=entries, o=entries; i<e;) {
+		//skip repeated strings
+		if (i>entries) {
+			const char *last = i[-1].str;
+			if (!strcmp(last, i->str)) {
+				do i++; while(i<e && !strcmp(last, i->str));
+				continue;
+			}
+		}
+		
+		//write all entries with the same value (should also have same string)
+		{
+			char *value = i->value;
+			do *o++ = *i++; while(i<e && i->value == value);
+		}
+	}
+	
+	*count = o-entries;
+}
+
+static int test_stringmap(size_t count, FILE *out) {
+	stringmap(char*) map = stringmap_new(NULL);
+	
+	#define print(tag, fmt, ...) do { \
+			if (out) \
+				fprintf(out, tag fmt "\n", ##__VA_ARGS__); \
+		} while(0)
+	#define err(...) do { \
+			print("error: ", __VA_ARGS__); \
+			goto fail; \
+		} while(0)
+	#define debug(...) print("debug: ", __VA_ARGS__)
+	#define msg(...) print("info: ", __VA_ARGS__)
+	
+	struct block_pool *bp = block_pool_new(NULL);
+	struct test_entry *entries = block_pool_alloc(bp, sizeof(*entries) * count);
+	struct test_entry *i, *e = entries+count;
+	char *value_base = block_pool_alloc(bp, count), *value = value_base;
+	size_t unique_count = 0;
+	
+	//we use value to track whether an entry has been added or not
+	memset(value, 0, count);
+	
+	msg("Generating %zu test entries...", count);
+	
+	for (i=entries; i<e; value++) {
+		char *str = random_string(bp);
+		size_t same_count = (random()%5 ? random()%3 : random()%10) + 1;
+		
+		for (;same_count-- && i<e; i++) {
+			i->str = str;
+			i->value = value;
+		}
+	}
+	
+	cull_duplicates(entries, &count);
+	e = entries+count;
+	scramble(entries, count, sizeof(*entries));
+	
+	msg("Inserting/looking up %zu entries...", count);
+	
+	for (i=entries; i<e; i++) {
+		char **node;
+		
+		debug("Looking up %s", i->str);
+		
+		node = stringmap_lookup(map, i->str);
+		
+		if (!node) {
+			if (*i->value)
+				err("Previously inserted entry not found");
+			
+			debug("Not found; entering");
+			
+			node = stringmap_enter(map, i->str);
+			if (!node || strcmp(i->str, map.last->str))
+				err("Node not properly entered");
+			*node = i->value;
+			*i->value = 1; //mark that the entry is entered
+			
+			unique_count++;
+		} else {
+			if (strcmp(i->value, map.last->value))
+				err("lookup returned incorrect string");
+			if (i->value != *node)
+				err("lookup returned incorrect value");
+			if (!**node)
+				err("lookup returned bogus value");
+		}
+	}
+	
+	if (map.t.count != unique_count)
+		err("Map has incorrect count");
+	
+	printf("stringmap test passed after %zu inserts, %zu lookups (%zu total operations)\n", unique_count, (i-entries)-unique_count, i-entries);
+	
+	block_pool_free(bp);
+	stringmap_free(map);
+	return 1;
+
+fail:
+	printf("stringmap test failed after %zu inserts, %zu lookups (%zu total operations)\n", unique_count, (i-entries)-unique_count, i-entries);
+	
+	block_pool_free(bp);
+	stringmap_free(map);
+	return 0;
+	
+	#undef print
+	#undef err
+	#undef debug
+	#undef msg
+}
+
+int main(void)
+{
+	plan_tests(10);
+	
+	test_trivial();
+	ok1(test_stringmap(10000, NULL));
+	
+	return exit_status();
+}