hash.h 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /* CC0 (Public domain) - see LICENSE file for details */
  2. #ifndef CCAN_HASH_H
  3. #define CCAN_HASH_H
  4. #include "config.h"
  5. #include <stdint.h>
  6. #include <stdlib.h>
  7. #include <ccan/build_assert/build_assert.h>
  8. /* Stolen mostly from: lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  9. *
  10. * http://burtleburtle.net/bob/c/lookup3.c
  11. */
  12. /**
  13. * hash - fast hash of an array for internal use
  14. * @p: the array or pointer to first element
  15. * @num: the number of elements to hash
  16. * @base: the base number to roll into the hash (usually 0)
  17. *
  18. * The memory region pointed to by p is combined with the base to form
  19. * a 32-bit hash.
  20. *
  21. * This hash will have different results on different machines, so is
  22. * only useful for internal hashes (ie. not hashes sent across the
  23. * network or saved to disk).
  24. *
  25. * It may also change with future versions: it could even detect at runtime
  26. * what the fastest hash to use is.
  27. *
  28. * See also: hash64, hash_stable.
  29. *
  30. * Example:
  31. * #include <ccan/hash/hash.h>
  32. * #include <err.h>
  33. * #include <stdio.h>
  34. * #include <string.h>
  35. *
  36. * // Simple demonstration: idential strings will have the same hash, but
  37. * // two different strings will probably not.
  38. * int main(int argc, char *argv[])
  39. * {
  40. * uint32_t hash1, hash2;
  41. *
  42. * if (argc != 3)
  43. * err(1, "Usage: %s <string1> <string2>", argv[0]);
  44. *
  45. * hash1 = hash(argv[1], strlen(argv[1]), 0);
  46. * hash2 = hash(argv[2], strlen(argv[2]), 0);
  47. * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different");
  48. * return 0;
  49. * }
  50. */
  51. #define hash(p, num, base) hash_any((p), (num)*sizeof(*(p)), (base))
  52. /**
  53. * hash_stable - hash of an array for external use
  54. * @p: the array or pointer to first element
  55. * @num: the number of elements to hash
  56. * @base: the base number to roll into the hash (usually 0)
  57. *
  58. * The array of simple integer types pointed to by p is combined with
  59. * the base to form a 32-bit hash.
  60. *
  61. * This hash will have the same results on different machines, so can
  62. * be used for external hashes (ie. hashes sent across the network or
  63. * saved to disk). The results will not change in future versions of
  64. * this module.
  65. *
  66. * Note that it is only legal to hand an array of simple integer types
  67. * to this hash (ie. char, uint16_t, int64_t, etc). In these cases,
  68. * the same values will have the same hash result, even though the
  69. * memory representations of integers depend on the machine
  70. * endianness.
  71. *
  72. * See also:
  73. * hash64_stable
  74. *
  75. * Example:
  76. * #include <ccan/hash/hash.h>
  77. * #include <err.h>
  78. * #include <stdio.h>
  79. * #include <string.h>
  80. *
  81. * int main(int argc, char *argv[])
  82. * {
  83. * if (argc != 2)
  84. * err(1, "Usage: %s <string-to-hash>", argv[0]);
  85. *
  86. * printf("Hash stable result is %u\n",
  87. * hash_stable(argv[1], strlen(argv[1]), 0));
  88. * return 0;
  89. * }
  90. */
  91. #define hash_stable(p, num, base) \
  92. (BUILD_ASSERT_OR_ZERO(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \
  93. || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \
  94. sizeof(*(p)) == 8 ? hash_stable_64((p), (num), (base)) \
  95. : sizeof(*(p)) == 4 ? hash_stable_32((p), (num), (base)) \
  96. : sizeof(*(p)) == 2 ? hash_stable_16((p), (num), (base)) \
  97. : hash_stable_8((p), (num), (base)))
  98. /**
  99. * hash_u32 - fast hash an array of 32-bit values for internal use
  100. * @key: the array of uint32_t
  101. * @num: the number of elements to hash
  102. * @base: the base number to roll into the hash (usually 0)
  103. *
  104. * The array of uint32_t pointed to by @key is combined with the base
  105. * to form a 32-bit hash. This is 2-3 times faster than hash() on small
  106. * arrays, but the advantage vanishes over large hashes.
  107. *
  108. * This hash will have different results on different machines, so is
  109. * only useful for internal hashes (ie. not hashes sent across the
  110. * network or saved to disk).
  111. */
  112. uint32_t hash_u32(const uint32_t *key, size_t num, uint32_t base);
  113. /**
  114. * hash_string - very fast hash of an ascii string
  115. * @str: the nul-terminated string
  116. *
  117. * The string is hashed, using a hash function optimized for ASCII and
  118. * similar strings. It's weaker than the other hash functions.
  119. *
  120. * This hash may have different results on different machines, so is
  121. * only useful for internal hashes (ie. not hashes sent across the
  122. * network or saved to disk). The results will be different from the
  123. * other hash functions in this module, too.
  124. */
  125. static inline uint32_t hash_string(const char *string)
  126. {
  127. /* This is Karl Nelson <kenelson@ece.ucdavis.edu>'s X31 hash.
  128. * It's a little faster than the (much better) lookup3 hash(): 56ns vs
  129. * 84ns on my 2GHz Intel Core Duo 2 laptop for a 10 char string. */
  130. uint32_t ret;
  131. for (ret = 0; *string; string++)
  132. ret = (ret << 5) - ret + *string;
  133. return ret;
  134. }
  135. /**
  136. * hash64 - fast 64-bit hash of an array for internal use
  137. * @p: the array or pointer to first element
  138. * @num: the number of elements to hash
  139. * @base: the 64-bit base number to roll into the hash (usually 0)
  140. *
  141. * The memory region pointed to by p is combined with the base to form
  142. * a 64-bit hash.
  143. *
  144. * This hash will have different results on different machines, so is
  145. * only useful for internal hashes (ie. not hashes sent across the
  146. * network or saved to disk).
  147. *
  148. * It may also change with future versions: it could even detect at runtime
  149. * what the fastest hash to use is.
  150. *
  151. * See also: hash.
  152. *
  153. * Example:
  154. * #include <ccan/hash/hash.h>
  155. * #include <err.h>
  156. * #include <stdio.h>
  157. * #include <string.h>
  158. *
  159. * // Simple demonstration: idential strings will have the same hash, but
  160. * // two different strings will probably not.
  161. * int main(int argc, char *argv[])
  162. * {
  163. * uint64_t hash1, hash2;
  164. *
  165. * if (argc != 3)
  166. * err(1, "Usage: %s <string1> <string2>", argv[0]);
  167. *
  168. * hash1 = hash64(argv[1], strlen(argv[1]), 0);
  169. * hash2 = hash64(argv[2], strlen(argv[2]), 0);
  170. * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different");
  171. * return 0;
  172. * }
  173. */
  174. #define hash64(p, num, base) hash64_any((p), (num)*sizeof(*(p)), (base))
  175. /**
  176. * hash64_stable - 64 bit hash of an array for external use
  177. * @p: the array or pointer to first element
  178. * @num: the number of elements to hash
  179. * @base: the base number to roll into the hash (usually 0)
  180. *
  181. * The array of simple integer types pointed to by p is combined with
  182. * the base to form a 64-bit hash.
  183. *
  184. * This hash will have the same results on different machines, so can
  185. * be used for external hashes (ie. hashes sent across the network or
  186. * saved to disk). The results will not change in future versions of
  187. * this module.
  188. *
  189. * Note that it is only legal to hand an array of simple integer types
  190. * to this hash (ie. char, uint16_t, int64_t, etc). In these cases,
  191. * the same values will have the same hash result, even though the
  192. * memory representations of integers depend on the machine
  193. * endianness.
  194. *
  195. * See also:
  196. * hash_stable
  197. *
  198. * Example:
  199. * #include <ccan/hash/hash.h>
  200. * #include <err.h>
  201. * #include <stdio.h>
  202. * #include <string.h>
  203. *
  204. * int main(int argc, char *argv[])
  205. * {
  206. * if (argc != 2)
  207. * err(1, "Usage: %s <string-to-hash>", argv[0]);
  208. *
  209. * printf("Hash stable result is %llu\n",
  210. * (long long)hash64_stable(argv[1], strlen(argv[1]), 0));
  211. * return 0;
  212. * }
  213. */
  214. #define hash64_stable(p, num, base) \
  215. (BUILD_ASSERT_OR_ZERO(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \
  216. || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \
  217. sizeof(*(p)) == 8 ? hash64_stable_64((p), (num), (base)) \
  218. : sizeof(*(p)) == 4 ? hash64_stable_32((p), (num), (base)) \
  219. : sizeof(*(p)) == 2 ? hash64_stable_16((p), (num), (base)) \
  220. : hash64_stable_8((p), (num), (base)))
  221. /**
  222. * hashl - fast 32/64-bit hash of an array for internal use
  223. * @p: the array or pointer to first element
  224. * @num: the number of elements to hash
  225. * @base: the base number to roll into the hash (usually 0)
  226. *
  227. * This is either hash() or hash64(), on 32/64 bit long machines.
  228. */
  229. #define hashl(p, num, base) \
  230. (BUILD_ASSERT_OR_ZERO(sizeof(long) == sizeof(uint32_t) \
  231. || sizeof(long) == sizeof(uint64_t)) + \
  232. (sizeof(long) == sizeof(uint64_t) \
  233. ? hash64((p), (num), (base)) : hash((p), (num), (base))))
  234. /* Our underlying operations. */
  235. uint32_t hash_any(const void *key, size_t length, uint32_t base);
  236. uint32_t hash_stable_64(const void *key, size_t n, uint32_t base);
  237. uint32_t hash_stable_32(const void *key, size_t n, uint32_t base);
  238. uint32_t hash_stable_16(const void *key, size_t n, uint32_t base);
  239. uint32_t hash_stable_8(const void *key, size_t n, uint32_t base);
  240. uint64_t hash64_any(const void *key, size_t length, uint64_t base);
  241. uint64_t hash64_stable_64(const void *key, size_t n, uint64_t base);
  242. uint64_t hash64_stable_32(const void *key, size_t n, uint64_t base);
  243. uint64_t hash64_stable_16(const void *key, size_t n, uint64_t base);
  244. uint64_t hash64_stable_8(const void *key, size_t n, uint64_t base);
  245. /**
  246. * hash_pointer - hash a pointer for internal use
  247. * @p: the pointer value to hash
  248. * @base: the base number to roll into the hash (usually 0)
  249. *
  250. * The pointer p (not what p points to!) is combined with the base to form
  251. * a 32-bit hash.
  252. *
  253. * This hash will have different results on different machines, so is
  254. * only useful for internal hashes (ie. not hashes sent across the
  255. * network or saved to disk).
  256. *
  257. * Example:
  258. * #include <ccan/hash/hash.h>
  259. *
  260. * // Code to keep track of memory regions.
  261. * struct region {
  262. * struct region *chain;
  263. * void *start;
  264. * unsigned int size;
  265. * };
  266. * // We keep a simple hash table.
  267. * static struct region *region_hash[128];
  268. *
  269. * static void add_region(struct region *r)
  270. * {
  271. * unsigned int h = hash_pointer(r->start, 0);
  272. *
  273. * r->chain = region_hash[h];
  274. * region_hash[h] = r->chain;
  275. * }
  276. *
  277. * static struct region *find_region(const void *start)
  278. * {
  279. * struct region *r;
  280. *
  281. * for (r = region_hash[hash_pointer(start, 0)]; r; r = r->chain)
  282. * if (r->start == start)
  283. * return r;
  284. * return NULL;
  285. * }
  286. */
  287. static inline uint32_t hash_pointer(const void *p, uint32_t base)
  288. {
  289. if (sizeof(p) % sizeof(uint32_t) == 0) {
  290. /* This convoluted union is the right way of aliasing. */
  291. union {
  292. uint32_t a[sizeof(p) / sizeof(uint32_t)];
  293. const void *p;
  294. } u;
  295. u.p = p;
  296. return hash_u32(u.a, sizeof(p) / sizeof(uint32_t), base);
  297. } else
  298. return hash(&p, 1, base);
  299. }
  300. #endif /* HASH_H */