Browse Source

strmap: new module for ordered map of strings using a critbit tree.

Rusty Russell 14 years ago
parent
commit
20f3b26031
6 changed files with 786 additions and 0 deletions
  1. 62 0
      ccan/strmap/_info
  2. 238 0
      ccan/strmap/strmap.c
  3. 224 0
      ccan/strmap/strmap.h
  4. 96 0
      ccan/strmap/test/run-order.c
  5. 97 0
      ccan/strmap/test/run-prefix.c
  6. 69 0
      ccan/strmap/test/run.c

+ 62 - 0
ccan/strmap/_info

@@ -0,0 +1,62 @@
+#include <string.h>
+#include "config.h"
+
+/**
+ * strmap - an ordered map of strings to values
+ *
+ * This code implements an ordered map of strings as a critbit tree. See:
+ *
+ *  http://cr.yp.to/critbit.html
+ *  http://github.com/agl/critbit (which this code is based on)
+ *
+ * License: Public domain (but some dependencies are LGPL!)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ * Ccanlint:
+ *	license_depends_compat FAIL
+ *
+ * Example:
+ * #include <ccan/strmap/strmap.h>
+ * #include <stdio.h>
+ *
+ * static bool dump(const char *member, size_t value, void *unused)
+ * {
+ * 	printf("%s at %zu. ", member, value);
+ * 	// false means keep going with iteration.
+ * 	return false;
+ * }
+ *
+ * int main(int argc, char *argv[])
+ * {
+ * 	size_t i;
+ * 	struct { STRMAP_MEMBERS(size_t); } map;
+ *
+ * 	strmap_init(&map);
+ * 	for (i = 1; i < argc; i++)
+ *		// This only adds the first time for this arg.
+ * 		strmap_add(&map, argv[i], i);
+ *
+ * 	strmap_iterate(&map, dump, NULL);
+ *	printf("\n");
+ * 	return 0;
+ * }
+ * // Given 'foo' outputs 'foo at 1. '
+ * // Given 'foo bar' outputs 'bar at 2. foo at 1. '
+ * // Given 'foo foo bar zebra' outputs 'bar at 3. foo at 1. zebra at 4. '
+ */
+int main(int argc, char *argv[])
+{
+	/* Expect exactly one argument */
+	if (argc != 2)
+		return 1;
+
+	if (strcmp(argv[1], "depends") == 0) {
+		printf("ccan/ilog\n"
+		       "ccan/short_types\n"
+		       "ccan/str\n"
+		       "ccan/tcon\n"
+		       "ccan/typesafe_cb\n");
+		return 0;
+	}
+
+	return 1;
+}

+ 238 - 0
ccan/strmap/strmap.c

@@ -0,0 +1,238 @@
+/* This code is based on ccan/strset.c. */
+#include <ccan/strmap/strmap.h>
+#include <ccan/short_types/short_types.h>
+#include <ccan/str/str.h>
+#include <ccan/ilog/ilog.h>
+#include <assert.h>
+#include <stdlib.h>
+
+struct node {
+	/* These point to strings or nodes. */
+	struct strmap child[2];
+	/* The byte number where first bit differs. */
+	size_t byte_num;
+	/* The bit where these children differ. */
+	u8 bit_num;
+};
+
+/* Closest member to this in a non-empty map. */
+static struct strmap *closest(struct strmap *n, const char *member)
+{
+	size_t len = strlen(member);
+	const u8 *bytes = (const u8 *)member;
+
+	/* Anything with NULL value is a node. */
+	while (!n->v) {
+		u8 direction = 0;
+
+		if (n->u.n->byte_num < len) {
+			u8 c = bytes[n->u.n->byte_num];
+			direction = (c >> n->u.n->bit_num) & 1;
+		}
+		n = &n->u.n->child[direction];
+	}
+	return n;
+}
+
+void *strmap_get_(const struct strmap *map, const char *member)
+{
+	struct strmap *n;
+
+	/* Empty map? */
+	if (!map->u.n)
+		return NULL;
+	n = closest((struct strmap *)map, member);
+	if (streq(member, n->u.s))
+		return n->v;
+	return NULL;
+}
+
+bool strmap_add_(struct strmap *map, const char *member, const void *value)
+{
+	size_t len = strlen(member);
+	const u8 *bytes = (const u8 *)member;
+	struct strmap *n;
+	struct node *newn;
+	size_t byte_num;
+	u8 bit_num, new_dir;
+
+	assert(value);
+
+	/* Empty map? */
+	if (!map->u.n) {
+		map->u.s = member;
+		map->v = (void *)value;
+		return true;
+	}
+
+	/* Find closest existing member. */
+	n = closest(map, member);
+
+	/* Find where they differ. */
+	for (byte_num = 0; n->u.s[byte_num] == member[byte_num]; byte_num++) {
+		if (member[byte_num] == '\0') {
+			/* All identical! */
+			return false;
+		}
+	}
+
+	/* Find which bit differs (if we had ilog8, we'd use it) */
+	bit_num = ilog32_nz((u8)n->u.s[byte_num] ^ bytes[byte_num]) - 1;
+	assert(bit_num < CHAR_BIT);
+
+	/* Which direction do we go at this bit? */
+	new_dir = ((bytes[byte_num]) >> bit_num) & 1;
+
+	/* Allocate new node. */
+	newn = malloc(sizeof(*newn));
+	if (!newn) {
+		/* FIXME */
+		return false;
+	}
+	newn->byte_num = byte_num;
+	newn->bit_num = bit_num;
+	newn->child[new_dir].v = (void *)value;
+	newn->child[new_dir].u.s = member;
+
+	/* Find where to insert: not closest, but first which differs! */
+	n = map;
+	while (!n->v) {
+		u8 direction = 0;
+
+		if (n->u.n->byte_num > byte_num)
+			break;
+		/* Subtle: bit numbers are "backwards" for comparison */
+		if (n->u.n->byte_num == byte_num && n->u.n->bit_num < bit_num)
+			break;
+
+		if (n->u.n->byte_num < len) {
+			u8 c = bytes[n->u.n->byte_num];
+			direction = (c >> n->u.n->bit_num) & 1;
+		}
+		n = &n->u.n->child[direction];
+	}
+
+	newn->child[!new_dir] = *n;
+	n->u.n = newn;
+	n->v = NULL;
+	return true;
+}
+
+char *strmap_del_(struct strmap *map, const char *member, void **valuep)
+{
+	size_t len = strlen(member);
+	const u8 *bytes = (const u8 *)member;
+	struct strmap *parent = NULL, *n;
+	const char *ret = NULL;
+	u8 direction = 0; /* prevent bogus gcc warning. */
+
+	/* Empty map? */
+	if (!map->u.n)
+		return NULL;
+
+	/* Find closest, but keep track of parent. */
+	n = map;
+	/* Anything with NULL value is a node. */
+	while (!n->v) {
+		u8 c = 0;
+
+		parent = n;
+		if (n->u.n->byte_num < len) {
+			c = bytes[n->u.n->byte_num];
+			direction = (c >> n->u.n->bit_num) & 1;
+		} else
+			direction = 0;
+		n = &n->u.n->child[direction];
+	}
+
+	/* Did we find it? */
+	if (!streq(member, n->u.s))
+		return NULL;
+
+	ret = n->u.s;
+	if (valuep)
+		*valuep = n->v;
+
+	if (!parent) {
+		/* We deleted last node. */
+		map->u.n = NULL;
+	} else {
+		struct node *old = parent->u.n;
+		/* Raise other node to parent. */
+		*parent = old->child[!direction];
+		free(old);
+	}
+
+	return (char *)ret;
+}
+
+static bool iterate(struct strmap n,
+		    bool (*handle)(const char *, void *, void *), void *data)
+{
+	if (n.v)
+		return handle(n.u.s, n.v, data);
+
+	return iterate(n.u.n->child[0], handle, data)
+		|| iterate(n.u.n->child[1], handle, data);
+}
+
+void strmap_iterate_(const struct strmap *map,
+		     bool (*handle)(const char *, void *, void *), void *data)
+{
+	/* Empty map? */
+	if (!map->u.n)
+		return;
+
+	iterate(*map, handle, data);
+}
+
+const struct strmap *strmap_prefix_(const struct strmap *map,
+				    const char *prefix)
+{
+	const struct strmap *n, *top;
+	size_t len = strlen(prefix);
+	const u8 *bytes = (const u8 *)prefix;
+
+	/* Empty map -> return empty map. */
+	if (!map->u.n)
+		return map;
+
+	top = n = map;
+
+	/* We walk to find the top, but keep going to check prefix matches. */
+	while (!n->v) {
+		u8 c = 0, direction;
+
+		if (n->u.n->byte_num < len)
+			c = bytes[n->u.n->byte_num];
+
+		direction = (c >> n->u.n->bit_num) & 1;
+		n = &n->u.n->child[direction];
+		if (c)
+			top = n;
+	}
+
+	if (!strstarts(n->u.s, prefix)) {
+		/* Convenient return for prefixes which do not appear in map. */
+		static const struct strmap empty_map;
+		return &empty_map;
+	}
+
+	return top;
+}
+
+static void clear(struct strmap n)
+{
+	if (!n.v) {
+		clear(n.u.n->child[0]);
+		clear(n.u.n->child[1]);
+		free(n.u.n);
+	}
+}
+
+void strmap_clear_(struct strmap *map)
+{
+	if (map->u.n)
+		clear(*map);
+	map->u.n = NULL;
+}

+ 224 - 0
ccan/strmap/strmap.h

@@ -0,0 +1,224 @@
+#ifndef CCAN_STRMAP_H
+#define CCAN_STRMAP_H
+#include "config.h"
+#include <ccan/tcon/tcon.h>
+#include <ccan/typesafe_cb/typesafe_cb.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+/**
+ * struct strmap - representation of a string map
+ *
+ * It's exposed here to allow you to embed it and so we can inline the
+ * trivial functions.
+ */
+struct strmap {
+	union {
+		struct node *n;
+		const char *s;
+	} u;
+	void *v;
+};
+
+/**
+ * STRMAP_MEMBERS - declare members for a type-specific strmap.
+ * @type: type for this map's values, or void * for any pointer.
+ *
+ * You use this to create your own typed strmap for a particular type.
+ * You can use an integer type, *but* remember you can't use "0" as a
+ * value!
+ *
+ * Example:
+ *	struct strmap_intp {
+ *		STRMAP_MEMBERS(int *);
+ *	};
+ */
+#define STRMAP_MEMBERS(type)			\
+	struct strmap raw;			\
+	TCON(type canary)
+
+
+/**
+ * strmap_init - initialize a string map (empty)
+ * @map: the typed strmap to initialize.
+ *
+ * For completeness; if you've arranged for it to be NULL already you don't
+ * need this.
+ *
+ * Example:
+ *	struct strmap_intp map;
+ *
+ *	strmap_init(&map);
+ */
+#define strmap_init(map) strmap_init_(&(map)->raw)
+
+static inline void strmap_init_(struct strmap *map)
+{
+	map->u.n = NULL;
+}
+
+/**
+ * strmap_empty - is this string map empty?
+ * @map: the typed strmap to check.
+ *
+ * Example:
+ *	if (!strmap_empty(&map))
+ *		abort();
+ */
+#define strmap_empty(map) strmap_empty_(&(map)->raw)
+
+static inline bool strmap_empty_(const struct strmap *map)
+{
+	return map->u.n == NULL;
+}
+
+/**
+ * strmap_get - get a value from a string map
+ * @map: the typed strmap to search.
+ * @member: the string to search for.
+ *
+ * Returns the value, or NULL if it isn't in the map.
+ *
+ * Example:
+ *	int *val = strmap_get(&map, "hello");
+ *	if (val)
+ *		printf("hello => %i\n", *val);
+ */
+#define strmap_get(map, member) \
+	tcon_cast((map), canary, strmap_get_(&(map)->raw, (member)))
+void *strmap_get_(const struct strmap *map, const char *member);
+
+/**
+ * strmap_add - place a member in the string map.
+ * @map: the typed strmap to add to.
+ * @member: the string to place in the map.
+ * @v: the (non-NULL) value.
+ *
+ * This returns false if we run out of memory, or (more normally) if that
+ * string already appears in the map.
+ *
+ * Note that the pointer is placed in the map, the string is not copied.  If
+ * you want a copy in the map, use strdup().  Similarly for the value.
+ *
+ * Example:
+ *	val = malloc(sizeof *val);
+ *	*val = 17;
+ *	if (!strmap_add(&map, "goodbye", val))
+ *		printf("goodbye was already in the map\n");
+ */
+#define strmap_add(map, member, value)				\
+	strmap_add_(&tcon_check((map), canary, (value))->raw,	\
+		    (member), (void *)(value))
+
+bool strmap_add_(struct strmap *map, const char *member, const void *value);
+
+/**
+ * strmap_del - remove a member from the string map.
+ * @map: the typed strmap to delete from.
+ * @member: the string to remove from the map.
+ * @valuep: the value (if non-NULL)
+ *
+ * This returns the string which was passed to strmap_map(), or NULL.
+ * This means that if you allocated a string (eg. using strdup()), you
+ * can free it here.  Similarly, the value is returned in @valuep if
+ * @valuep is not NULL.
+ *
+ * Example:
+ *	if (!strmap_del(&map, "goodbye", NULL))
+ *		printf("goodbye was not in the map?\n");
+ */
+#define strmap_del(map, member, valuep)					\
+	strmap_del_(&tcon_check_ptr((map), canary, valuep)->raw,	\
+		    (member), (void **)valuep)
+char *strmap_del_(struct strmap *map, const char *member, void **valuep);
+
+/**
+ * strmap_clear - remove every member from the map.
+ * @map: the typed strmap to clear.
+ *
+ * The map will be empty after this.
+ *
+ * Example:
+ *	strmap_clear(&map);
+ */
+#define strmap_clear(map) strmap_clear_(&(map)->raw)
+
+void strmap_clear_(struct strmap *map);
+
+/**
+ * strmap_iterate - ordered iteration over a map
+ * @map: the typed strmap to iterate through.
+ * @handle: the function to call.
+ * @arg: the argument for the function (types should match).
+ *
+ * @handle's prototype should be:
+ *	bool @handle(const char *member, type value, typeof(arg) arg)
+ *
+ * If @handle returns true, the iteration will stop.
+ * You should not alter the map within the @handle function!
+ *
+ * Example:
+ *	struct strmap_intp {
+ *		STRMAP_MEMBERS(int *);
+ *	};
+ *	static bool dump_some(const char *member, int *value, int *num)
+ *	{
+ *		// Only dump out num nodes.
+ *		if (*(num--) == 0)
+ *			return true;
+ *		printf("%s=>%i\n", member, *value);
+ *		return false;
+ *	}
+ *
+ *	static void dump_map(const struct strmap_intp *map)
+ *	{
+ *		int max = 100;
+ *		strmap_iterate(map, dump_some, &max);
+ *		if (max < 0)
+ *			printf("... (truncated to 100 entries)\n");
+ *	}
+ */
+#define strmap_iterate(map, handle, arg)				\
+	strmap_iterate_(&(map)->raw,					\
+			typesafe_cb_cast(bool (*)(const char *,		\
+						  void *, void *),	\
+					 bool (*)(const char *,		\
+						  tcon_type((map), canary), \
+						  __typeof__(arg)), (handle)), \
+			(arg))
+void strmap_iterate_(const struct strmap *map,
+		     bool (*handle)(const char *, void *, void *), void *data);
+
+
+/**
+ * strmap_prefix - return a submap matching a prefix
+ * @map: the map.
+ * @prefix: the prefix.
+ *
+ * This returns a pointer into @map, so don't alter @map while using
+ * the return value.  You can use strmap_iterate(), strmap_get() or
+ * strmap_empty() on the returned pointer.
+ *
+ * Example:
+ *	static void dump_prefix(const struct strmap_intp *map,
+ *				const char *prefix)
+ *	{
+ *		int max = 100;
+ *		printf("Nodes with prefix %s:\n", prefix);
+ *		strmap_iterate(strmap_prefix(map, prefix), dump_some, &max);
+ *		if (max < 0)
+ *			printf("... (truncated to 100 entries)\n");
+ *	}
+ */
+#if HAVE_TYPEOF
+#define strmap_prefix(map, prefix) \
+	((const __typeof__(map))strmap_prefix_(&(map)->raw, (prefix)))
+#else
+#define strmap_prefix(map, prefix) \
+	((const void *)strmap_prefix_(&(map)->raw, (prefix)))
+#endif
+
+const struct strmap *strmap_prefix_(const struct strmap *map,
+				    const char *prefix);
+
+#endif /* CCAN_STRMAP_H */

+ 96 - 0
ccan/strmap/test/run-order.c

@@ -0,0 +1,96 @@
+#include <ccan/strmap/strmap.h>
+#include <ccan/strmap/strmap.c>
+#include <ccan/tap/tap.h>
+#include <stdio.h>
+
+#define NUM 1000
+
+static bool in_order(const char *member, char *value, unsigned int *count)
+{
+	int i = atoi(member);
+	ok1(i == atoi(value));
+	ok1(*count == i);
+	(*count)++;
+	return false;
+}
+
+static bool in_order_by_2(const char *member, char *value, unsigned int *count)
+{
+	int i = atoi(member);
+	ok1(i == atoi(value));
+	ok1(*count == i);
+	(*count) += 2;
+	return false;
+}
+
+static bool dump(const char *member, char *value, bool *ok)
+{
+	diag("%s=>%s", member, value);
+	if (value != member + 1)
+		*ok = false;
+	return false;
+}
+
+int main(void)
+{
+	struct strmap_charp {
+		STRMAP_MEMBERS(char *);
+	} map;
+	unsigned int i;
+	char *str[NUM];
+	bool dump_ok;
+
+	plan_tests(1 + NUM * 4 + 3 * NUM);
+	strmap_init(&map);
+
+	for (i = 0; i < NUM; i++) {
+		char template[10];
+		sprintf(template, "%08u", i);
+		str[i] = strdup(template);
+	}
+
+	for (i = 0; i < NUM; i++)
+		strmap_add(&map, str[i], str[i]+1);
+
+	dump_ok = true;
+	strmap_iterate(&map, dump, &dump_ok);
+	ok1(dump_ok);
+
+	/* Iterate. */
+	i = 0;
+	strmap_iterate(&map, in_order, &i);
+
+	/* Preserve order after deletion. */
+	for (i = 0; i < NUM; i += 2) {
+		char *v;
+		ok1(strmap_del(&map, str[i], &v) == str[i]);
+		ok1(v == str[i] + 1);
+	}
+
+	i = 1;
+	strmap_iterate(&map, in_order_by_2, &i);
+
+	for (i = 1; i < NUM; i += 2) {
+		char *v;
+		ok1(strmap_del(&map, str[i], &v) == str[i]);
+		ok1(v == str[i] + 1);
+	}
+
+	/* empty traverse. */
+	strmap_iterate(&map, in_order_by_2, (unsigned int *)NULL);
+
+	/* insert backwards, should be fine. */
+	for (i = 0; i < NUM; i++)
+		strmap_add(&map, str[NUM-1-i], str[NUM-1-i]+1);
+
+	i = 0;
+	strmap_iterate(&map, in_order, &i);
+
+	strmap_clear(&map);
+
+	for (i = 0; i < NUM; i++)
+		free(str[i]);
+
+	/* This exits depending on whether all tests passed */
+	return exit_status();
+}

+ 97 - 0
ccan/strmap/test/run-prefix.c

@@ -0,0 +1,97 @@
+#include <ccan/strmap/strmap.h>
+#include <ccan/strmap/strmap.c>
+#include <ccan/tap/tap.h>
+#include <stdio.h>
+
+/* Must be > 100, see below. */
+#define NUM 200
+
+static bool in_order(const char *index, char *value, unsigned int *count)
+{
+	int i = atoi(index);
+	ok1(i == atoi(value));
+	ok1(*count == i);
+	(*count)++;
+	return false;
+}
+
+static bool find_empty(const char *index, char *value, char *empty)
+{
+	if (index == empty)
+		pass("Found empty entry!");
+	return false;
+}
+
+int main(void)
+{
+	struct map {
+		STRMAP_MEMBERS(char *);
+	};
+	struct map map;
+	const struct map *sub;
+	unsigned int i;
+	char *str[NUM], *empty;
+
+	plan_tests(8 + 2 * (1 + 10 + 100) + 1);
+	strmap_init(&map);
+
+	for (i = 0; i < NUM; i++) {
+		char template[10];
+		sprintf(template, "%08u", i);
+		str[i] = strdup(template);
+	}
+
+	/* All prefixes of an empty map are empty. */
+	sub = strmap_prefix(&map, "a");
+	ok1(strmap_empty(sub));
+	sub = strmap_prefix(&map, "");
+	ok1(strmap_empty(sub));
+
+	for (i = 0; i < NUM; i++)
+		strmap_add(&map, str[i], str[i]+1);
+
+	/* Nothing */
+	sub = strmap_prefix(&map, "a");
+	ok1(strmap_empty(sub));
+
+	/* Everything */
+	sub = strmap_prefix(&map, "0");
+	ok1(sub->raw.u.n == map.raw.u.n);
+	sub = strmap_prefix(&map, "");
+	ok1(sub->raw.u.n == map.raw.u.n);
+
+	/* Single. */
+	sub = strmap_prefix(&map, "00000000");
+	i = 0;
+	strmap_iterate(sub, in_order, &i);
+	ok1(i == 1);
+
+	/* First 10. */
+	sub = strmap_prefix(&map, "0000000");
+	i = 0;
+	strmap_iterate(sub, in_order, &i);
+	ok1(i == 10);
+
+	/* First 100. */
+	sub = strmap_prefix(&map, "000000");
+	i = 0;
+	strmap_iterate(sub, in_order, &i);
+	ok1(i == 100);
+
+	/* Everything, *plus* empty string. */
+	empty = strdup("");
+	strmap_add(&map, empty, empty);
+
+	sub = strmap_prefix(&map, "");
+	/* Check we get *our* empty string back! */
+	strmap_iterate(sub, find_empty, empty);
+
+	strmap_clear(&map);
+
+	for (i = 0; i < NUM; i++)
+		free(str[i]);
+	free(empty);
+
+	/* This exits depending on whether all tests passed */
+	return exit_status();
+}

+ 69 - 0
ccan/strmap/test/run.c

@@ -0,0 +1,69 @@
+#include <ccan/strmap/strmap.h>
+#include <ccan/strmap/strmap.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+	struct strmap_charp {
+		STRMAP_MEMBERS(char *);
+	} map;
+	const char str[] = "hello";
+	const char val[] = "there";
+	const char none[] = "";
+	char *dup = strdup(str);
+	char *v;
+
+	/* This is how many tests you plan to run */
+	plan_tests(31);
+
+	strmap_init(&map);
+
+	ok1(!strmap_get(&map, str));
+	ok1(!strmap_get(&map, none));
+	ok1(!strmap_del(&map, str, NULL));
+	ok1(!strmap_del(&map, none, NULL));
+
+	ok1(strmap_add(&map, str, val));
+	ok1(strmap_get(&map, str) == val);
+	/* We compare the string, not the pointer. */
+	ok1(strmap_get(&map, dup) == val);
+	ok1(!strmap_get(&map, none));
+
+	/* Add a duplicate should fail. */
+	ok1(!strmap_add(&map, dup, val));
+	ok1(strmap_get(&map, dup) == val);
+
+	/* Delete should return original string. */
+	ok1(strmap_del(&map, dup, &v) == str);
+	ok1(v == val);
+	ok1(!strmap_get(&map, str));
+	ok1(!strmap_get(&map, none));
+
+	/* Try insert and delete of empty string. */
+	ok1(strmap_add(&map, none, none));
+	ok1(strmap_get(&map, none) == none);
+	ok1(!strmap_get(&map, str));
+
+	/* Delete should return original string. */
+	ok1(strmap_del(&map, "", &v) == none);
+	ok1(v == none);
+	ok1(!strmap_get(&map, str));
+	ok1(!strmap_get(&map, none));
+
+	/* Both at once... */
+	ok1(strmap_add(&map, none, none));
+	ok1(strmap_add(&map, str, val));
+	ok1(strmap_get(&map, str) == val);
+	ok1(strmap_get(&map, none) == none);
+	ok1(strmap_del(&map, "does not exist", NULL) == NULL);
+	ok1(strmap_del(&map, "", NULL) == none);
+	ok1(strmap_get(&map, str) == val);
+	ok1(strmap_del(&map, dup, &v) == str);
+	ok1(v == val);
+
+	ok1(strmap_empty(&map));
+	free(dup);
+
+	/* This exits depending on whether all tests passed */
+	return exit_status();
+}