strset.c 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. /* This code is based on the public domain code at
  2. * http://github.com/agl/critbit writtem by Adam Langley
  3. * <agl@imperialviolet.org>.
  4. *
  5. * Here are the main implementation differences:
  6. * (1) We don't strdup the string on insert; we use the pointer we're given.
  7. * (2) We use a straight bit number rather than a mask; it's simpler.
  8. * (3) We don't use the bottom bit of the pointer, but instead use a leading
  9. * zero to distinguish nodes from strings.
  10. * (4) The empty string (which would look like a node) is handled
  11. * using a special "empty node".
  12. * (5) Delete returns the string, so you can free it if you want to.
  13. * (6) Unions instead of void *, bool instead of int.
  14. */
  15. #include <ccan/strset/strset.h>
  16. #include <ccan/short_types/short_types.h>
  17. #include <ccan/likely/likely.h>
  18. #include <ccan/str/str.h>
  19. #include <ccan/ilog/ilog.h>
  20. #include <assert.h>
  21. #include <stdlib.h>
  22. #include <errno.h>
  23. struct node {
  24. /* To differentiate us from strings. */
  25. char nul_byte;
  26. /* The bit where these children differ. */
  27. u8 bit_num;
  28. /* The byte number where first bit differs (-1 == empty string node). */
  29. size_t byte_num;
  30. /* These point to strings or nodes. */
  31. struct strset child[2];
  32. };
  33. /* Closest member to this in a non-empty set. */
  34. static const char *closest(struct strset n, const char *member)
  35. {
  36. size_t len = strlen(member);
  37. const u8 *bytes = (const u8 *)member;
  38. /* Anything with first byte 0 is a node. */
  39. while (!n.u.s[0]) {
  40. u8 direction = 0;
  41. /* Special node which represents the empty string. */
  42. if (unlikely(n.u.n->byte_num == (size_t)-1)) {
  43. n = n.u.n->child[0];
  44. break;
  45. }
  46. if (n.u.n->byte_num < len) {
  47. u8 c = bytes[n.u.n->byte_num];
  48. direction = (c >> n.u.n->bit_num) & 1;
  49. }
  50. n = n.u.n->child[direction];
  51. }
  52. return n.u.s;
  53. }
  54. char *strset_get(const struct strset *set, const char *member)
  55. {
  56. const char *str;
  57. /* Non-empty set? */
  58. if (set->u.n) {
  59. str = closest(*set, member);
  60. if (streq(member, str))
  61. return (char *)str;
  62. }
  63. errno = ENOENT;
  64. return NULL;
  65. }
  66. static bool set_string(struct strset *set,
  67. struct strset *n, const char *member)
  68. {
  69. /* Substitute magic empty node if this is the empty string */
  70. if (unlikely(!member[0])) {
  71. n->u.n = malloc(sizeof(*n->u.n));
  72. if (unlikely(!n->u.n)) {
  73. errno = ENOMEM;
  74. return false;
  75. }
  76. n->u.n->nul_byte = '\0';
  77. n->u.n->byte_num = (size_t)-1;
  78. /* Attach the string to child[0] */
  79. n = &n->u.n->child[0];
  80. }
  81. n->u.s = member;
  82. return true;
  83. }
  84. bool strset_add(struct strset *set, const char *member)
  85. {
  86. size_t len = strlen(member);
  87. const u8 *bytes = (const u8 *)member;
  88. struct strset *np;
  89. const char *str;
  90. struct node *newn;
  91. size_t byte_num;
  92. u8 bit_num, new_dir;
  93. /* Empty set? */
  94. if (!set->u.n) {
  95. return set_string(set, set, member);
  96. }
  97. /* Find closest existing member. */
  98. str = closest(*set, member);
  99. /* Find where they differ. */
  100. for (byte_num = 0; str[byte_num] == member[byte_num]; byte_num++) {
  101. if (member[byte_num] == '\0') {
  102. /* All identical! */
  103. errno = EEXIST;
  104. return false;
  105. }
  106. }
  107. /* Find which bit differs (if we had ilog8, we'd use it) */
  108. bit_num = ilog32_nz((u8)str[byte_num] ^ bytes[byte_num]) - 1;
  109. assert(bit_num < CHAR_BIT);
  110. /* Which direction do we go at this bit? */
  111. new_dir = ((bytes[byte_num]) >> bit_num) & 1;
  112. /* Allocate new node. */
  113. newn = malloc(sizeof(*newn));
  114. if (!newn) {
  115. errno = ENOMEM;
  116. return false;
  117. }
  118. newn->nul_byte = '\0';
  119. newn->byte_num = byte_num;
  120. newn->bit_num = bit_num;
  121. if (unlikely(!set_string(set, &newn->child[new_dir], member))) {
  122. free(newn);
  123. return false;
  124. }
  125. /* Find where to insert: not closest, but first which differs! */
  126. np = set;
  127. while (!np->u.s[0]) {
  128. u8 direction = 0;
  129. /* Special node which represents the empty string will
  130. * break here too! */
  131. if (np->u.n->byte_num > byte_num)
  132. break;
  133. /* Subtle: bit numbers are "backwards" for comparison */
  134. if (np->u.n->byte_num == byte_num && np->u.n->bit_num < bit_num)
  135. break;
  136. if (np->u.n->byte_num < len) {
  137. u8 c = bytes[np->u.n->byte_num];
  138. direction = (c >> np->u.n->bit_num) & 1;
  139. }
  140. np = &np->u.n->child[direction];
  141. }
  142. newn->child[!new_dir]= *np;
  143. np->u.n = newn;
  144. return true;
  145. }
  146. char *strset_del(struct strset *set, const char *member)
  147. {
  148. size_t len = strlen(member);
  149. const u8 *bytes = (const u8 *)member;
  150. struct strset *parent = NULL, *n;
  151. const char *ret = NULL;
  152. u8 direction = 0; /* prevent bogus gcc warning. */
  153. /* Empty set? */
  154. if (!set->u.n) {
  155. errno = ENOENT;
  156. return NULL;
  157. }
  158. /* Find closest, but keep track of parent. */
  159. n = set;
  160. /* Anything with first byte 0 is a node. */
  161. while (!n->u.s[0]) {
  162. u8 c = 0;
  163. /* Special node which represents the empty string. */
  164. if (unlikely(n->u.n->byte_num == (size_t)-1)) {
  165. const char *empty_str = n->u.n->child[0].u.s;
  166. if (member[0]) {
  167. errno = ENOENT;
  168. return NULL;
  169. }
  170. /* Sew empty string back so remaining logic works */
  171. free(n->u.n);
  172. n->u.s = empty_str;
  173. break;
  174. }
  175. parent = n;
  176. if (n->u.n->byte_num < len) {
  177. c = bytes[n->u.n->byte_num];
  178. direction = (c >> n->u.n->bit_num) & 1;
  179. } else
  180. direction = 0;
  181. n = &n->u.n->child[direction];
  182. }
  183. /* Did we find it? */
  184. if (!streq(member, n->u.s)) {
  185. errno = ENOENT;
  186. return NULL;
  187. }
  188. ret = n->u.s;
  189. if (!parent) {
  190. /* We deleted last node. */
  191. set->u.n = NULL;
  192. } else {
  193. struct node *old = parent->u.n;
  194. /* Raise other node to parent. */
  195. *parent = old->child[!direction];
  196. free(old);
  197. }
  198. return (char *)ret;
  199. }
  200. static bool iterate(struct strset n,
  201. bool (*handle)(const char *, void *), const void *data)
  202. {
  203. if (n.u.s[0])
  204. return handle(n.u.s, (void *)data);
  205. if (unlikely(n.u.n->byte_num == (size_t)-1))
  206. return handle(n.u.n->child[0].u.s, (void *)data);
  207. return iterate(n.u.n->child[0], handle, data)
  208. && iterate(n.u.n->child[1], handle, data);
  209. }
  210. void strset_iterate_(const struct strset *set,
  211. bool (*handle)(const char *, void *), const void *data)
  212. {
  213. /* Empty set? */
  214. if (!set->u.n)
  215. return;
  216. iterate(*set, handle, data);
  217. }
  218. const struct strset *strset_prefix(const struct strset *set, const char *prefix)
  219. {
  220. const struct strset *n, *top;
  221. size_t len = strlen(prefix);
  222. const u8 *bytes = (const u8 *)prefix;
  223. /* Empty set -> return empty set. */
  224. if (!set->u.n)
  225. return set;
  226. top = n = set;
  227. /* We walk to find the top, but keep going to check prefix matches. */
  228. while (!n->u.s[0]) {
  229. u8 c = 0, direction;
  230. /* Special node which represents the empty string. */
  231. if (unlikely(n->u.n->byte_num == (size_t)-1)) {
  232. n = &n->u.n->child[0];
  233. break;
  234. }
  235. if (n->u.n->byte_num < len)
  236. c = bytes[n->u.n->byte_num];
  237. direction = (c >> n->u.n->bit_num) & 1;
  238. n = &n->u.n->child[direction];
  239. if (c)
  240. top = n;
  241. }
  242. if (!strstarts(n->u.s, prefix)) {
  243. /* Convenient return for prefixes which do not appear in set. */
  244. static const struct strset empty_set;
  245. return &empty_set;
  246. }
  247. return top;
  248. }
  249. static void clear(struct strset n)
  250. {
  251. if (!n.u.s[0]) {
  252. if (likely(n.u.n->byte_num != (size_t)-1)) {
  253. clear(n.u.n->child[0]);
  254. clear(n.u.n->child[1]);
  255. }
  256. free(n.u.n);
  257. }
  258. }
  259. void strset_clear(struct strset *set)
  260. {
  261. if (set->u.n)
  262. clear(*set);
  263. set->u.n = NULL;
  264. }