| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298 |
- /* This code is based on the public domain code at
- * http://github.com/agl/critbit writtem by Adam Langley
- * <agl@imperialviolet.org>.
- *
- * Here are the main implementation differences:
- * (1) We don't strdup the string on insert; we use the pointer we're given.
- * (2) We use a straight bit number rather than a mask; it's simpler.
- * (3) We don't use the bottom bit of the pointer, but instead use a leading
- * zero to distinguish nodes from strings.
- * (4) The empty string (which would look like a node) is handled
- * using a special "empty node".
- * (5) Delete returns the string, so you can free it if you want to.
- * (6) Unions instead of void *, bool instead of int.
- */
- #include <ccan/strset/strset.h>
- #include <ccan/short_types/short_types.h>
- #include <ccan/likely/likely.h>
- #include <ccan/str/str.h>
- #include <ccan/ilog/ilog.h>
- #include <assert.h>
- #include <stdlib.h>
- struct node {
- /* To differentiate us from strings. */
- char nul_byte;
- /* The bit where these children differ. */
- u8 bit_num;
- /* The byte number where first bit differs (-1 == empty string node). */
- size_t byte_num;
- /* These point to strings or nodes. */
- struct strset child[2];
- };
- /* Closest member to this in a non-empty set. */
- static const char *closest(struct strset n, const char *member)
- {
- size_t len = strlen(member);
- const u8 *bytes = (const u8 *)member;
- /* Anything with first byte 0 is a node. */
- while (!n.u.s[0]) {
- u8 direction = 0;
- /* Special node which represents the empty string. */
- if (unlikely(n.u.n->byte_num == (size_t)-1)) {
- n = n.u.n->child[0];
- break;
- }
- if (n.u.n->byte_num < len) {
- u8 c = bytes[n.u.n->byte_num];
- direction = (c >> n.u.n->bit_num) & 1;
- }
- n = n.u.n->child[direction];
- }
- return n.u.s;
- }
- char *strset_test(const struct strset *set, const char *member)
- {
- const char *str;
- /* Empty set? */
- if (!set->u.n)
- return NULL;
- str = closest(*set, member);
- if (streq(member, str))
- return (char *)str;
- return NULL;
- }
- static bool set_string(struct strset *set,
- struct strset *n, const char *member)
- {
- /* Substitute magic empty node if this is the empty string */
- if (unlikely(!member[0])) {
- n->u.n = malloc(sizeof(*n->u.n));
- if (unlikely(!n->u.n))
- return false;
- n->u.n->nul_byte = '\0';
- n->u.n->byte_num = (size_t)-1;
- /* Attach the string to child[0] */
- n = &n->u.n->child[0];
- }
- n->u.s = member;
- return true;
- }
- bool strset_set(struct strset *set, const char *member)
- {
- size_t len = strlen(member);
- const u8 *bytes = (const u8 *)member;
- struct strset *np;
- const char *str;
- struct node *newn;
- size_t byte_num;
- u8 bit_num, new_dir;
- /* Empty set? */
- if (!set->u.n) {
- return set_string(set, set, member);
- }
- /* Find closest existing member. */
- str = closest(*set, member);
- /* Find where they differ. */
- for (byte_num = 0; str[byte_num] == member[byte_num]; byte_num++) {
- if (member[byte_num] == '\0') {
- /* All identical! */
- return false;
- }
- }
- /* Find which bit differs (if we had ilog8, we'd use it) */
- bit_num = ilog32_nz((u8)str[byte_num] ^ bytes[byte_num]) - 1;
- assert(bit_num < CHAR_BIT);
- /* Which direction do we go at this bit? */
- new_dir = ((bytes[byte_num]) >> bit_num) & 1;
- /* Allocate new node. */
- newn = malloc(sizeof(*newn));
- if (!newn) {
- /* FIXME */
- return false;
- }
- newn->nul_byte = '\0';
- newn->byte_num = byte_num;
- newn->bit_num = bit_num;
- if (unlikely(!set_string(set, &newn->child[new_dir], member))) {
- free(newn);
- return false;
- }
- /* Find where to insert: not closest, but first which differs! */
- np = set;
- while (!np->u.s[0]) {
- u8 direction = 0;
- /* Special node which represents the empty string will
- * break here too! */
- if (np->u.n->byte_num > byte_num)
- break;
- /* Subtle: bit numbers are "backwards" for comparison */
- if (np->u.n->byte_num == byte_num && np->u.n->bit_num < bit_num)
- break;
- if (np->u.n->byte_num < len) {
- u8 c = bytes[np->u.n->byte_num];
- direction = (c >> np->u.n->bit_num) & 1;
- }
- np = &np->u.n->child[direction];
- }
- newn->child[!new_dir]= *np;
- np->u.n = newn;
- return true;
- }
- char *strset_clear(struct strset *set, const char *member)
- {
- size_t len = strlen(member);
- const u8 *bytes = (const u8 *)member;
- struct strset *parent = NULL, *n;
- const char *ret = NULL;
- u8 direction = 0; /* prevent bogus gcc warning. */
- /* Empty set? */
- if (!set->u.n)
- return NULL;
- /* Find closest, but keep track of parent. */
- n = set;
- /* Anything with first byte 0 is a node. */
- while (!n->u.s[0]) {
- u8 c = 0;
- /* Special node which represents the empty string. */
- if (unlikely(n->u.n->byte_num == (size_t)-1)) {
- const char *empty_str = n->u.n->child[0].u.s;
- if (member[0])
- return NULL;
- /* Sew empty string back so remaining logic works */
- free(n->u.n);
- n->u.s = empty_str;
- break;
- }
- parent = n;
- if (n->u.n->byte_num < len) {
- c = bytes[n->u.n->byte_num];
- direction = (c >> n->u.n->bit_num) & 1;
- } else
- direction = 0;
- n = &n->u.n->child[direction];
- }
- /* Did we find it? */
- if (!streq(member, n->u.s))
- return NULL;
- ret = n->u.s;
- if (!parent) {
- /* We deleted last node. */
- set->u.n = NULL;
- } else {
- struct node *old = parent->u.n;
- /* Raise other node to parent. */
- *parent = old->child[!direction];
- free(old);
- }
- return (char *)ret;
- }
- static bool iterate(struct strset n,
- bool (*handle)(const char *, void *), void *data)
- {
- if (n.u.s[0])
- return handle(n.u.s, data);
- if (unlikely(n.u.n->byte_num == (size_t)-1))
- return handle(n.u.n->child[0].u.s, data);
- return iterate(n.u.n->child[0], handle, data)
- || iterate(n.u.n->child[1], handle, data);
- }
- void strset_iterate_(const struct strset *set,
- bool (*handle)(const char *, void *), void *data)
- {
- /* Empty set? */
- if (!set->u.n)
- return;
- iterate(*set, handle, data);
- }
- const struct strset *strset_prefix(const struct strset *set, const char *prefix)
- {
- const struct strset *n, *top;
- size_t len = strlen(prefix);
- const u8 *bytes = (const u8 *)prefix;
- /* Empty set -> return empty set. */
- if (!set->u.n)
- return set;
- top = n = set;
- /* We walk to find the top, but keep going to check prefix matches. */
- while (!n->u.s[0]) {
- u8 c = 0, direction;
- /* Special node which represents the empty string. */
- if (unlikely(n->u.n->byte_num == (size_t)-1)) {
- n = &n->u.n->child[0];
- break;
- }
- if (n->u.n->byte_num < len)
- c = bytes[n->u.n->byte_num];
- direction = (c >> n->u.n->bit_num) & 1;
- n = &n->u.n->child[direction];
- if (c)
- top = n;
- }
- if (!strstarts(n->u.s, prefix)) {
- /* Convenient return for prefixes which do not appear in set. */
- static const struct strset empty_set;
- return &empty_set;
- }
- return top;
- }
- static void destroy(struct strset n)
- {
- if (!n.u.s[0]) {
- if (likely(n.u.n->byte_num != (size_t)-1)) {
- destroy(n.u.n->child[0]);
- destroy(n.u.n->child[1]);
- }
- free(n.u.n);
- }
- }
- void strset_destroy(struct strset *set)
- {
- if (set->u.n)
- destroy(*set);
- set->u.n = NULL;
- }
|