hash.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. #ifndef CCAN_HASH_H
  2. #define CCAN_HASH_H
  3. #include <stdint.h>
  4. #include <stdlib.h>
  5. #include "config.h"
  6. /* Stolen mostly from: lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  7. *
  8. * http://burtleburtle.net/bob/c/lookup3.c
  9. */
  10. /**
  11. * hash - fast hash of an array for internal use
  12. * @p: the array or pointer to first element
  13. * @num: the number of elements to hash
  14. * @base: the base number to roll into the hash (usually 0)
  15. *
  16. * The memory region pointed to by p is combined with the base to form
  17. * a 32-bit hash.
  18. *
  19. * This hash will have different results on different machines, so is
  20. * only useful for internal hashes (ie. not hashes sent across the
  21. * network or saved to disk).
  22. *
  23. * It may also change with future versions: it could even detect at runtime
  24. * what the fastest hash to use is.
  25. *
  26. * See also: hash_stable.
  27. *
  28. * Example:
  29. * #include "hash/hash.h"
  30. * #include <err.h>
  31. * #include <stdio.h>
  32. *
  33. * // Simple demonstration: idential strings will have the same hash, but
  34. * // two different strings will probably not.
  35. * int main(int argc, char *argv[])
  36. * {
  37. * uint32_t hash1, hash2;
  38. *
  39. * if (argc != 3)
  40. * err(1, "Usage: %s <string1> <string2>", argv[0]);
  41. *
  42. * hash1 = hash(argv[1], strlen(argv[1]), 0);
  43. * hash2 = hash(argv[2], strlen(argv[2]), 0);
  44. * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different");
  45. * return 0;
  46. * }
  47. */
  48. #define hash(p, num, base) hash_any((p), (num)*sizeof(*(p)), (base))
  49. /**
  50. * hash_stable - hash of an array for external use
  51. * @p: the array or pointer to first element
  52. * @num: the number of elements to hash
  53. * @base: the base number to roll into the hash (usually 0)
  54. *
  55. * The memory region pointed to by p is combined with the base to form
  56. * a 32-bit hash.
  57. *
  58. * This hash will have the same results on different machines, so can
  59. * be used for external hashes (ie. hashes sent across the network or
  60. * saved to disk). The results will not change in future versions of
  61. * this module.
  62. *
  63. * Example:
  64. * #include "hash/hash.h"
  65. * #include <err.h>
  66. * #include <stdio.h>
  67. *
  68. * int main(int argc, char *argv[])
  69. * {
  70. * if (argc != 2)
  71. * err(1, "Usage: %s <string-to-hash>", argv[0]);
  72. *
  73. * printf("Hash stable result is %u\n",
  74. * hash_stable(argv[1], strlen(argv[1]), 0));
  75. * return 0;
  76. * }
  77. */
  78. #define hash_stable(p, num, base) \
  79. hash_any_stable((p), (num)*sizeof(*(p)), (base))
  80. /**
  81. * hash_u32 - fast hash an array of 32-bit values for internal use
  82. * @key: the array of uint32_t
  83. * @num: the number of elements to hash
  84. * @base: the base number to roll into the hash (usually 0)
  85. *
  86. * The array of uint32_t pointed to by @key is combined with the base
  87. * to form a 32-bit hash. This is 2-3 times faster than hash() on small
  88. * arrays, but the advantage vanishes over large hashes.
  89. *
  90. * This hash will have different results on different machines, so is
  91. * only useful for internal hashes (ie. not hashes sent across the
  92. * network or saved to disk).
  93. */
  94. uint32_t hash_u32(const uint32_t *key, size_t num, uint32_t base);
  95. /**
  96. * hash_string - very fast hash of an ascii string
  97. * @str: the nul-terminated string
  98. *
  99. * The string is hashed, using a hash function optimized for ASCII and
  100. * similar strings. It's weaker than the other hash functions.
  101. *
  102. * This hash may have different results on different machines, so is
  103. * only useful for internal hashes (ie. not hashes sent across the
  104. * network or saved to disk). The results will be different from the
  105. * other hash functions in this module, too.
  106. */
  107. static inline uint32_t hash_string(const char *string)
  108. {
  109. /* This is Karl Nelson <kenelson@ece.ucdavis.edu>'s X31 hash.
  110. * It's a little faster than the (much better) lookup3 hash(): 56ns vs
  111. * 84ns on my 2GHz Intel Core Duo 2 laptop for a 10 char string. */
  112. uint32_t ret;
  113. for (ret = 0; *string; string++)
  114. ret = (ret << 5) - ret + *string;
  115. return ret;
  116. }
  117. /* Our underlying operations. */
  118. uint32_t hash_any(const void *key, size_t length, uint32_t base);
  119. uint32_t hash_any_stable(const void *key, size_t length, uint32_t base);
  120. /**
  121. * hash_pointer - hash a pointer for internal use
  122. * @p: the pointer value to hash
  123. * @base: the base number to roll into the hash (usually 0)
  124. *
  125. * The pointer p (not what p points to!) is combined with the base to form
  126. * a 32-bit hash.
  127. *
  128. * This hash will have different results on different machines, so is
  129. * only useful for internal hashes (ie. not hashes sent across the
  130. * network or saved to disk).
  131. *
  132. * Example:
  133. * #include "hash/hash.h"
  134. *
  135. * // Code to keep track of memory regions.
  136. * struct region {
  137. * struct region *chain;
  138. * void *start;
  139. * unsigned int size;
  140. * };
  141. * // We keep a simple hash table.
  142. * static struct region *region_hash[128];
  143. *
  144. * static void add_region(struct region *r)
  145. * {
  146. * unsigned int h = hash_pointer(r->start);
  147. *
  148. * r->chain = region_hash[h];
  149. * region_hash[h] = r->chain;
  150. * }
  151. *
  152. * static void find_region(const void *start)
  153. * {
  154. * struct region *r;
  155. *
  156. * for (r = region_hash[hash_pointer(start)]; r; r = r->chain)
  157. * if (r->start == start)
  158. * return r;
  159. * return NULL;
  160. * }
  161. */
  162. static inline uint32_t hash_pointer(const void *p, uint32_t base)
  163. {
  164. if (sizeof(p) % sizeof(uint32_t) == 0) {
  165. /* This convoluted union is the right way of aliasing. */
  166. union {
  167. uint32_t u32[sizeof(p) / sizeof(uint32_t)];
  168. const void *p;
  169. } u;
  170. u.p = p;
  171. return hash_u32(u.u32, sizeof(p) / sizeof(uint32_t), base);
  172. } else
  173. return hash(&p, 1, base);
  174. }
  175. #endif /* HASH_H */