Browse Source

Hashtable routines.

Rusty Russell 15 years ago
parent
commit
581af90298
4 changed files with 639 additions and 0 deletions
  1. 116 0
      ccan/hashtable/_info
  2. 201 0
      ccan/hashtable/hashtable.c
  3. 74 0
      ccan/hashtable/hashtable.h
  4. 248 0
      ccan/hashtable/test/run.c

+ 116 - 0
ccan/hashtable/_info

@@ -0,0 +1,116 @@
+#include <string.h>
+
+/**
+ * hashtable - hash table routines
+ *
+ * A hash table is an efficient structure for looking up keys.  This version
+ * grows with usage and allows efficient deletion.
+ *
+ * Caveat: pointers placed into the hash must be aligned to 64 bits.
+ * ie. from malloc or because the structure they point to is aligned.
+ * An assert will be raised if this isn't true.
+ *
+ * Example:
+ *	#include <ccan/hashtable/hashtable.h>
+ *	#include <ccan/hash/hash.h>
+ *	#include <stdio.h>
+ *	#include <err.h>
+ *	#include <string.h>
+ *	
+ *	struct name_to_digit {
+ *		const char *name;
+ *		unsigned int val;
+ *	};
+ *	
+ *	static struct name_to_digit map[] = { 
+ *		{ "zero", 0},
+ *		{ "one", 1 },
+ *		{ "two", 2 },
+ *		{ "three", 3 },
+ *		{ "four", 4 },
+ *		{ "five", 5 },
+ *		{ "six", 6 },
+ *		{ "seven", 7 },
+ *		{ "eight", 8 },
+ *		{ "nine", 9 }
+ *	};
+ *	
+ *	// Wrapper for rehash function pointer.
+ *	static unsigned long rehash(const void *e, void *unused)
+ *	{
+ *		return hash_string(((struct name_to_digit *)e)->name);
+ *	}
+ *	
+ *	// Comparison function.
+ *	static bool streq(const void *e, void *string)
+ *	{
+ *		return strcmp(((struct name_to_digit *)e)->name, string) == 0;
+ *	}
+ *	
+ *	// We let them add their own aliases, eg. --alias=v=5
+ *	static void add_alias(struct hashtable *ht, const char *alias)
+ *	{
+ *		char *eq;
+ *		struct name_to_digit *n;
+ *	
+ *		n = malloc(sizeof(*n));
+ *		n->name = strdup(alias);
+ *	
+ *		eq = strchr(n->name, '=');
+ *		if (!eq || ((n->val = atoi(eq+1)) == 0 && !strcmp(eq+1, "0")))
+ *			errx(1, "Usage: --alias=<name>=<value>");
+ *		*eq = '\0';
+ *		hashtable_add(ht, hash_string(n->name), n);
+ *	}
+ *	
+ *	int main(int argc, char *argv[])
+ *	{
+ *		struct hashtable *ht;
+ *		unsigned int i;
+ *		unsigned long val;
+ *	
+ *		if (argc < 2)
+ *			errx(1, "Usage: %s [--alias=<name>=<val>]... <str>...",
+ *			     argv[0]);
+ *	
+ *		// Create and populate hash table.
+ *		ht = hashtable_new(rehash, NULL);
+ *		for (i = 0; i < sizeof(map)/sizeof(map[0]); i++)
+ *			hashtable_add(ht, hash_string(map[i].name), &map[i]);
+ *	
+ *		// Add any aliases to the hash table.
+ *		for (i = 1; i < argc; i++) {
+ *			if (!strncmp(argv[i], "--alias=", strlen("--alias=")))
+ *				add_alias(ht, argv[i] + strlen("--alias="));
+ *			else
+ *				break;
+ *		}
+ *	
+ *		// Find the other args in the hash table.
+ *		for (val = 0; i < argc; i++) {
+ *			struct name_to_digit *n;
+ *			n = hashtable_find(ht, hash_string(argv[i]),
+ *					   streq, argv[i]);
+ *			if (!n)
+ *				errx(1, "Invalid digit name %s", argv[i]);
+ *			// Append it to the value we are building up.
+ *			val *= 10;
+ *			val += n->val;
+ *		}
+ *		printf("%lu\n", val);
+ *		return 0;
+ *	}
+ *
+ * Licence: GPLv2 (or later)
+ */
+int main(int argc, char *argv[])
+{
+	if (argc != 2)
+		return 1;
+
+	if (strcmp(argv[1], "depends") == 0) {
+		return 0;
+	}
+
+	return 1;
+}

+ 201 - 0
ccan/hashtable/hashtable.c

@@ -0,0 +1,201 @@
+#include <ccan/hashtable/hashtable.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* This means a struct hashtable takes at least 512 bytes / 1k (32/64 bits). */
+#define HASHTABLE_BASE_BITS 7
+
+/* We use 0x1 as deleted marker. */
+#define HASHTABLE_DELETED ((void *)0x1)
+
+/* Number of redundant bits in a pointer. */
+#define PTR_STEAL_BITS 3
+
+static inline uintptr_t get_extra_ptr_bits(const void *p)
+{
+	return (uintptr_t)p & (((uintptr_t)1 << PTR_STEAL_BITS)-1);
+}
+
+static inline void *get_raw_ptr(const void *p)
+{
+	return (void *)((uintptr_t)p & ~(((uintptr_t)1 << PTR_STEAL_BITS)-1));
+}
+
+static inline void *make_ptr(const void *p, uintptr_t bits)
+{
+	return (void *)((uintptr_t)p | bits);
+}
+
+struct hashtable {
+	unsigned long (*rehash)(const void *elem, void *priv);
+	void *priv;
+	unsigned int bits;
+	unsigned long elems, max;
+	void **table;
+};
+
+static inline bool ptr_is_valid(const void *e)
+{
+	return e > HASHTABLE_DELETED;
+}
+
+struct hashtable *hashtable_new(unsigned long (*rehash)(const void *elem,
+							void *priv),
+				void *priv)
+{
+	struct hashtable *ht = malloc(sizeof(struct hashtable));
+	if (ht) {
+		ht->bits = HASHTABLE_BASE_BITS;
+		ht->rehash = rehash;
+		ht->priv = priv;
+		ht->elems = 0;
+		ht->max = (1 << ht->bits) * 3 / 4;
+		ht->table = calloc(1 << ht->bits, sizeof(void *));
+		if (!ht->table) {
+			free(ht);
+			ht = NULL;
+		}
+	}
+	return ht;
+}
+
+void hashtable_free(struct hashtable *ht)
+{
+	free(ht->table);
+	free(ht);
+}
+
+static unsigned long hash_bucket(const struct hashtable *ht,
+				 unsigned long h, unsigned long *ptrbits)
+{
+	*ptrbits = (h >> ht->bits) & (((uintptr_t)1 << PTR_STEAL_BITS)-1);
+	return h & ((1 << ht->bits)-1);
+}
+
+/* This does not expand the hash table, that's up to caller. */
+static void ht_add(struct hashtable *ht, const void *new, unsigned long h)
+{
+	unsigned long i, h2;
+
+	i = hash_bucket(ht, h, &h2);
+
+	while (ptr_is_valid(ht->table[i])) {
+		i = (i + 1) & ((1 << ht->bits)-1);
+	}
+	ht->table[i] = make_ptr(new, h2);
+}
+
+static bool double_table(struct hashtable *ht)
+{
+	unsigned int i;
+	size_t oldnum = (size_t)1 << ht->bits;
+	void **oldtable, *e;
+
+	oldtable = ht->table;
+	ht->table = calloc(1 << (ht->bits+1), sizeof(void *));
+	if (!ht->table) {
+		ht->table = oldtable;
+		return false;
+	}
+	ht->bits++;
+	ht->max *= 2;
+
+	for (i = 0; i < oldnum; i++) {
+		if (ptr_is_valid(e = oldtable[i])) {
+			e = get_raw_ptr(e);
+			ht_add(ht, e, ht->rehash(e, ht->priv));
+		}
+	}
+	free(oldtable);
+	return true;
+}
+
+bool hashtable_add(struct hashtable *ht, unsigned long hash, const void *p)
+{
+	if (ht->elems+1 > ht->max && !double_table(ht))
+		return false;
+	ht->elems++;
+	assert(p);
+	assert(((uintptr_t)p & (((uintptr_t)1 << PTR_STEAL_BITS)-1)) == 0);
+	ht_add(ht, p, hash);
+	return true;
+}
+
+/* Find bucket for an element. */
+static size_t ht_find(struct hashtable *ht,
+		      unsigned long hash,
+		      bool (*cmp)(const void *htelem, void *cmpdata),
+		      void *cmpdata)
+{
+	void *e;
+	unsigned long i, h2;
+
+	i = hash_bucket(ht, hash, &h2);
+
+	while ((e = ht->table[i]) != NULL) {
+		if (e != HASHTABLE_DELETED
+		    && h2 == get_extra_ptr_bits(e)
+		    && cmp(get_raw_ptr(ht->table[i]), cmpdata))
+			return i;
+		i = (i + 1) & ((1 << ht->bits)-1);
+	}
+	return (size_t)-1;
+}
+
+
+/* If every one of the following buckets are DELETED (up to the next unused
+   one), we can actually mark them all unused. */
+static void delete_run(struct hashtable *ht, unsigned int num)
+{
+	unsigned int i, last = num + 1;
+
+	while (ht->table[last & ((1 << ht->bits)-1)]) {
+		if (ptr_is_valid(ht->table[last & ((1 << ht->bits)-1)]))
+			return;
+		last++;
+	}
+	for (i = num; i < last; i++)
+		ht->table[i & ((1 << ht->bits)-1)] = NULL;
+}
+
+void *hashtable_find(struct hashtable *ht,
+		     unsigned long hash,
+		     bool (*cmp)(const void *htelem, void *cmpdata),
+		     void *cmpdata)
+{
+	size_t i = ht_find(ht, hash, cmp, cmpdata);
+	if (i != (size_t)-1)
+		return get_raw_ptr(ht->table[i]);
+	return NULL;
+}
+
+static bool ptr_cmp(const void *htelem, void *cmpdata)
+{
+	return htelem == cmpdata;
+}
+
+bool hashtable_del(struct hashtable *ht, unsigned long hash, const void *p)
+{
+	size_t i = ht_find(ht, hash, ptr_cmp, (void *)p);
+	if (i == (size_t)-1)
+		return false;
+
+	ht->elems--;
+	ht->table[i] = HASHTABLE_DELETED;
+	delete_run(ht, i);
+	return true;
+}
+	
+	
+void hashtable_traverse(struct hashtable *ht, bool (*cb)(void *p, void *cbarg),
+			void *cbarg)
+{
+	size_t i;
+
+	for (i = 0; i < (1 << ht->bits); i++) {
+		if (ptr_is_valid(ht->table[i]))
+			if (cb(get_raw_ptr(ht->table[i]), cbarg))
+				return;
+	}
+}

+ 74 - 0
ccan/hashtable/hashtable.h

@@ -0,0 +1,74 @@
+#ifndef CCAN_HASHTABLE_H
+#define CCAN_HASHTABLE_H
+#include "config.h"
+#include <stdbool.h>
+
+struct hashtable;
+
+/**
+ * hashtable_new - allocate a hash tree.
+ * @rehash: hash function to use for rehashing.
+ * @priv: private argument to @rehash function.
+ */
+struct hashtable *hashtable_new(unsigned long (*rehash)(const void *elem,
+							void *priv),
+				void *priv);
+
+/**
+ * hashtable_free - dellocate a hash tree.
+ *
+ * This doesn't do anything to any pointers left in it.
+ */
+void hashtable_free(struct hashtable *);
+
+/**
+ * hashtable_find - look for an object in a hash tree.
+ * @ht: the hashtable
+ * @hash: the hash value of the object you are seeking
+ * @cmp: a comparison function: returns true if the object is found
+ * @cmpdata: the data to hand as second arg to @cmp
+ *
+ * Note that you can do all the work inside the cmp function if you want to:
+ * hashtable_find will return @htelem if @cmp returns true.
+ */
+void *hashtable_find(struct hashtable *ht,
+		     unsigned long hash,
+		     bool (*cmp)(const void *htelem, void *cmpdata),
+		     void *cmpdata);
+
+/**
+ * hashtable_add - add an (aligned) pointer into a hash tree.
+ * @ht: the hashtable
+ * @hash: the hash value of the object
+ * @p: the non-NULL pointer, usually from malloc or equiv.
+ *
+ * Note that this implementation (ab)uses the lower bits of the pointer, so
+ * it will abort if the pointer is not aligned to 8 bytes.
+ *
+ * Also note that this can only fail due to allocation failure.  Otherwise, it
+ * returns true.
+ */
+bool hashtable_add(struct hashtable *ht, unsigned long hash, const void *p);
+
+/**
+ * hashtable_del - remove a pointer from a hash tree
+ * @ht: the hashtable
+ * @hash: the hash value of the object
+ * @p: the pointer
+ *
+ * Returns true if the pointer was found (and deleted).
+ */
+bool hashtable_del(struct hashtable *ht, unsigned long hash, const void *p);
+
+/**
+ * hashtable_traverse - call a function on every pointer in hash tree
+ * @ht: the hashtable
+ * @cb: the callback: returns true to abort traversal.
+ * @cbarg: the argument to the callback
+ *
+ * Note that your traversal callback may delete any entry (it won't crash),
+ * but it may make the traverse unreliable.
+ */
+void hashtable_traverse(struct hashtable *ht, bool (*cb)(void *p, void *cbarg),
+			void *cbarg);
+#endif /* CCAN_HASHTABLE_H */

+ 248 - 0
ccan/hashtable/test/run.c

@@ -0,0 +1,248 @@
+#include <ccan/hashtable/hashtable.h>
+#include <ccan/hashtable/hashtable.c>
+#include <ccan/tap/tap.h>
+#include <stdbool.h>
+#include <string.h>
+
+#define NUM_VALS (1 << HASHTABLE_BASE_BITS)
+
+/* We use the number divided by two as the hash (for lots of
+   collisions), plus set all the higher bits so we can detect if they
+   don't get masked out. */
+static unsigned long hash(const void *elem, void *unused)
+{
+	unsigned long h = *(uint64_t *)elem / 2;
+	h |= -1UL << HASHTABLE_BASE_BITS;
+	return h;
+}
+
+static bool objcmp(const void *htelem, void *cmpdata)
+{
+	return *(uint64_t *)htelem == *(uint64_t *)cmpdata;
+}
+
+static void add_vals(struct hashtable *ht,
+		     const uint64_t val[], unsigned int num)
+{
+	uint64_t i;
+
+	for (i = 0; i < num; i++) {
+		if (hashtable_find(ht, hash(&i, NULL), objcmp, &i)) {
+			fail("%llu already in hash", (long long)i);
+			return;
+		}
+		hashtable_add(ht, hash(&val[i], NULL), &val[i]);
+		if (hashtable_find(ht, hash(&i, NULL), objcmp, &i) != &val[i]) {
+			fail("%llu not added to hash", (long long)i);
+			return;
+		}
+	}
+	pass("Added %llu numbers to hash", (long long)i);
+}
+
+static void refill_vals(struct hashtable *ht,
+			const uint64_t val[], unsigned int num)
+{
+	uint64_t i;
+
+	for (i = 0; i < num; i++) {
+		if (hashtable_find(ht, hash(&i, NULL), objcmp, &i))
+			continue;
+		hashtable_add(ht, hash(&val[i], NULL), &val[i]);
+	}
+}
+
+static void find_vals(struct hashtable *ht,
+		      const uint64_t val[], unsigned int num)
+{
+	uint64_t i;
+
+	for (i = 0; i < num; i++) {
+		if (hashtable_find(ht, hash(&i, NULL), objcmp, &i) != &val[i]) {
+			fail("%llu not found in hash", (long long)i);
+			return;
+		}
+	}
+	pass("Found %llu numbers in hash", (long long)i);
+}
+
+static void del_vals(struct hashtable *ht,
+		     const uint64_t val[], unsigned int num)
+{
+	uint64_t i;
+
+	for (i = 0; i < num; i++) {
+		if (!hashtable_del(ht, hash(&val[i], NULL), &val[i])) {
+			fail("%llu not deleted from hash", (long long)i);
+			return;
+		}
+	}
+	pass("Deleted %llu numbers in hash", (long long)i);
+}
+
+struct travarg {
+	unsigned int count;
+	struct hashtable *ht;
+	char touched[NUM_VALS];
+	uint64_t *val;
+};
+
+static bool count(void *p, void *cbarg)
+{
+	struct travarg *travarg = cbarg;
+	travarg->count++;
+	travarg->touched[*(uint64_t *)p]++;
+	return false;
+}
+
+static bool delete_self(void *p, void *cbarg)
+{
+	struct travarg *travarg = cbarg;
+	uint64_t val = *(uint64_t *)p;
+
+	travarg->count++;
+	travarg->touched[val]++;
+	return !hashtable_del(travarg->ht, hash(p, NULL), p);
+}
+
+static bool delete_next(void *p, void *cbarg)
+{
+	struct travarg *travarg = cbarg;
+	uint64_t val = *(uint64_t *)p;
+	uint64_t *next = &travarg->val[(val + 1) % NUM_VALS];
+
+	travarg->count++;
+	travarg->touched[val]++;
+	return !hashtable_del(travarg->ht, hash(next, NULL), next);
+}
+
+static bool delete_prev(void *p, void *cbarg)
+{
+	struct travarg *travarg = cbarg;
+	uint64_t val = *(uint64_t *)p;
+	uint64_t *prev = &travarg->val[(val - 1) % NUM_VALS];
+
+	travarg->count++;
+	travarg->touched[val]++;
+	return !hashtable_del(travarg->ht, hash(prev, NULL), prev);
+}
+
+static bool stop_halfway(void *p, void *cbarg)
+{
+	struct travarg *travarg = cbarg;
+	travarg->count++;
+	travarg->touched[*(uint64_t *)p]++;
+
+	return (travarg->count == NUM_VALS / 2);
+}
+
+static void check_all_touched_once(struct travarg *travarg)
+{
+	unsigned int i;
+
+	for (i = 0; i < NUM_VALS; i++) {
+		if (travarg->touched[i] != 1) {
+			fail("Value %u touched %u times",
+			     i, travarg->touched[i]);
+			return;
+		}
+	}
+	pass("All values touched once");
+}
+
+static void check_only_touched_once(struct travarg *travarg)
+{
+	unsigned int i;
+
+	for (i = 0; i < NUM_VALS; i++) {
+		if (travarg->touched[i] > 1) {
+			fail("Value %u touched multiple (%u) times",
+			     i, travarg->touched[i]);
+			return;
+		}
+	}
+	pass("No value touched twice");
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned int i;
+	struct hashtable *ht;
+	uint64_t val[NUM_VALS];
+	struct travarg travarg;
+	uint64_t dne;
+
+	plan_tests(20);
+	for (i = 0; i < NUM_VALS; i++)
+		val[i] = i;
+	dne = i;
+
+	ht = hashtable_new(hash, NULL);
+	ok1(ht->max < (1 << ht->bits));
+	ok1(ht->bits == HASHTABLE_BASE_BITS);
+
+	/* We cannot find an entry which doesn't exist. */
+	ok1(!hashtable_find(ht, hash(&dne, NULL), objcmp, &dne));
+
+	/* Fill it, it should increase in size (once). */
+	add_vals(ht, val, NUM_VALS);
+	ok1(ht->bits == HASHTABLE_BASE_BITS + 1);
+	ok1(ht->max < (1 << ht->bits));
+
+	/* Find all. */
+	find_vals(ht, val, NUM_VALS);
+	ok1(!hashtable_find(ht, hash(&dne, NULL), objcmp, &dne));
+
+	/* Delete all. */
+	del_vals(ht, val, NUM_VALS);
+	ok1(!hashtable_find(ht, hash(&val[0], NULL), objcmp, &val[0]));
+
+	/* Refill. */
+	refill_vals(ht, val, NUM_VALS);
+
+	/* Traverse tests. */
+	travarg.ht = ht;
+	travarg.val = val;
+	memset(travarg.touched, 0, sizeof(travarg.touched));
+	travarg.count = 0;
+
+	/* Traverse. */
+	hashtable_traverse(ht, count, &travarg);
+	ok1(travarg.count == NUM_VALS);
+	check_all_touched_once(&travarg);
+
+	memset(travarg.touched, 0, sizeof(travarg.touched));
+	travarg.count = 0;
+	hashtable_traverse(ht, stop_halfway, &travarg);
+	ok1(travarg.count == NUM_VALS / 2);
+	check_only_touched_once(&travarg);
+
+	memset(travarg.touched, 0, sizeof(travarg.touched));
+	travarg.count = 0;
+	i = 0;
+	/* Delete until we make no more progress. */
+	for (;;) {
+		hashtable_traverse(ht, delete_self, &travarg);
+		if (travarg.count == i || travarg.count > NUM_VALS)
+			break;
+		i = travarg.count;
+	}
+	ok1(travarg.count == NUM_VALS);
+	check_all_touched_once(&travarg);
+
+	memset(travarg.touched, 0, sizeof(travarg.touched));
+	travarg.count = 0;
+	refill_vals(ht, val, NUM_VALS);
+	hashtable_traverse(ht, delete_next, &travarg);
+	ok1(travarg.count <= NUM_VALS);
+	check_only_touched_once(&travarg);
+
+	memset(travarg.touched, 0, sizeof(travarg.touched));
+	travarg.count = 0;
+	refill_vals(ht, val, NUM_VALS);
+	hashtable_traverse(ht, delete_prev, &travarg);
+	ok1(travarg.count <= NUM_VALS);
+	check_only_touched_once(&travarg);
+
+	return exit_status();
+}