Browse Source

hash: 64 bit variants.

Rusty Russell 15 years ago
parent
commit
7449ae0ff0
5 changed files with 350 additions and 265 deletions
  1. 2 0
      ccan/hash/_info
  2. 55 251
      ccan/hash/hash.c
  3. 123 12
      ccan/hash/hash.h
  4. 137 1
      ccan/hash/test/api-hash_stable.c
  5. 33 1
      ccan/hash/test/run.c

+ 2 - 0
ccan/hash/_info

@@ -1,4 +1,5 @@
 #include <string.h>
 #include <string.h>
+#include <stdio.h>
 
 
 /**
 /**
  * hash - routines for hashing bytes
  * hash - routines for hashing bytes
@@ -22,6 +23,7 @@ int main(int argc, char *argv[])
 		return 1;
 		return 1;
 
 
 	if (strcmp(argv[1], "depends") == 0) {
 	if (strcmp(argv[1], "depends") == 0) {
+		printf("ccan/build_assert\n");
 		return 0;
 		return 0;
 	}
 	}
 
 

+ 55 - 251
ccan/hash/hash.c

@@ -208,63 +208,16 @@ uint32_t        initval)         /* the previous hash, or an arbitrary value */
   return c;
   return c;
 }
 }
 
 
-
-#if 0
-/*
---------------------------------------------------------------------
-hash_word2() -- same as hash_word(), but take two seeds and return two
-32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
-both be initialized with seeds.  If you pass in (*pb)==0, the output 
-(*pc) will be the same as the return value from hash_word().
---------------------------------------------------------------------
-*/
-void hash_word2 (
-const uint32_t *k,                   /* the key, an array of uint32_t values */
-size_t          length,               /* the length of the key, in uint32_ts */
-uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
-uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
-{
-  uint32_t a,b,c;
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
-  c += *pb;
-
-  /*------------------------------------------------- handle most of the key */
-  while (length > 3)
-  {
-    a += k[0];
-    b += k[1];
-    c += k[2];
-    mix(a,b,c);
-    length -= 3;
-    k += 3;
-  }
-
-  /*------------------------------------------- handle the last 3 uint32_t's */
-  switch(length)                     /* all the case statements fall through */
-  { 
-  case 3 : c+=k[2];
-  case 2 : b+=k[1];
-  case 1 : a+=k[0];
-    final(a,b,c);
-  case 0:     /* case 0: nothing left to add */
-    break;
-  }
-  /*------------------------------------------------------ report the result */
-  *pc=c; *pb=b;
-}
-#endif
-
 /*
 /*
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 hashlittle() -- hash a variable-length key into a 32-bit value
 hashlittle() -- hash a variable-length key into a 32-bit value
   k       : the key (the unaligned variable-length array of bytes)
   k       : the key (the unaligned variable-length array of bytes)
   length  : the length of the key, counting by bytes
   length  : the length of the key, counting by bytes
-  initval : can be any 4-byte value
+  val2    : IN: can be any 4-byte value OUT: second 32 bit hash.
 Returns a 32-bit value.  Every bit of the key affects every bit of
 Returns a 32-bit value.  Every bit of the key affects every bit of
 the return value.  Two keys differing by one or two bits will have
 the return value.  Two keys differing by one or two bits will have
-totally different hash values.
+totally different hash values.  Note that the return value is better
+mixed than val2, so use that first.
 
 
 The best hash table sizes are powers of 2.  There is no need to do
 The best hash table sizes are powers of 2.  There is no need to do
 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
@@ -283,13 +236,13 @@ acceptable.  Do NOT use for cryptographic purposes.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 */
 */
 
 
-static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
+static uint32_t hashlittle( const void *key, size_t length, uint32_t *val2 )
 {
 {
   uint32_t a,b,c;                                          /* internal state */
   uint32_t a,b,c;                                          /* internal state */
   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 
 
   /* Set up the internal state */
   /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2;
 
 
   u.ptr = key;
   u.ptr = key;
   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
@@ -451,209 +404,23 @@ static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
   }
   }
 
 
   final(a,b,c);
   final(a,b,c);
+  *val2 = b;
   return c;
   return c;
 }
 }
 
 
-#if 0
-/*
- * hashlittle2: return 2 32-bit hash values
- *
- * This is identical to hashlittle(), except it returns two 32-bit hash
- * values instead of just one.  This is good enough for hash table
- * lookup with 2^^64 buckets, or if you want a second hash if you're not
- * happy with the first, or if you want a probably-unique 64-bit ID for
- * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
- * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
- */
-void hashlittle2( 
-  const void *key,       /* the key to hash */
-  size_t      length,    /* length of the key */
-  uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
-  uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
-{
-  uint32_t a,b,c;                                          /* internal state */
-  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
-  c += *pb;
-
-  u.ptr = key;
-  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
-    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
-    const uint8_t  *k8;
-
-    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      b += k[1];
-      c += k[2];
-      mix(a,b,c);
-      length -= 12;
-      k += 3;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    /* 
-     * "k[2]&0xffffff" actually reads beyond the end of the string, but
-     * then masks off the part it's not allowed to read.  Because the
-     * string is aligned, the masked-off tail is in the same word as the
-     * rest of the string.  Every machine with memory protection I've seen
-     * does it on word boundaries, so is OK with this.  But VALGRIND will
-     * still catch it and complain.  The masking trick does make the hash
-     * noticably faster for short strings (like English words).
-     */
-#ifndef VALGRIND
-
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
-    case 5 : b+=k[1]&0xff; a+=k[0]; break;
-    case 4 : a+=k[0]; break;
-    case 3 : a+=k[0]&0xffffff; break;
-    case 2 : a+=k[0]&0xffff; break;
-    case 1 : a+=k[0]&0xff; break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-#else /* make valgrind happy */
-
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
-    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
-    case 9 : c+=k8[8];                   /* fall through */
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
-    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
-    case 5 : b+=k8[4];                   /* fall through */
-    case 4 : a+=k[0]; break;
-    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
-    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
-    case 1 : a+=k8[0]; break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-#endif /* !valgrind */
-
-  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
-    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
-    const uint8_t  *k8;
-
-    /*--------------- all but last block: aligned reads and different mixing */
-    while (length > 12)
-    {
-      a += k[0] + (((uint32_t)k[1])<<16);
-      b += k[2] + (((uint32_t)k[3])<<16);
-      c += k[4] + (((uint32_t)k[5])<<16);
-      mix(a,b,c);
-      length -= 12;
-      k += 6;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
-    case 10: c+=k[4];
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 9 : c+=k8[8];                      /* fall through */
-    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
-    case 6 : b+=k[2];
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 5 : b+=k8[4];                      /* fall through */
-    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
-    case 2 : a+=k[0];
-             break;
-    case 1 : a+=k8[0];
-             break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-  } else {                        /* need to read the key one byte at a time */
-    const uint8_t *k = (const uint8_t *)key;
-
-    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      a += ((uint32_t)k[1])<<8;
-      a += ((uint32_t)k[2])<<16;
-      a += ((uint32_t)k[3])<<24;
-      b += k[4];
-      b += ((uint32_t)k[5])<<8;
-      b += ((uint32_t)k[6])<<16;
-      b += ((uint32_t)k[7])<<24;
-      c += k[8];
-      c += ((uint32_t)k[9])<<8;
-      c += ((uint32_t)k[10])<<16;
-      c += ((uint32_t)k[11])<<24;
-      mix(a,b,c);
-      length -= 12;
-      k += 12;
-    }
-
-    /*-------------------------------- last block: affect all 32 bits of (c) */
-    switch(length)                   /* all the case statements fall through */
-    {
-    case 12: c+=((uint32_t)k[11])<<24;
-    case 11: c+=((uint32_t)k[10])<<16;
-    case 10: c+=((uint32_t)k[9])<<8;
-    case 9 : c+=k[8];
-    case 8 : b+=((uint32_t)k[7])<<24;
-    case 7 : b+=((uint32_t)k[6])<<16;
-    case 6 : b+=((uint32_t)k[5])<<8;
-    case 5 : b+=k[4];
-    case 4 : a+=((uint32_t)k[3])<<24;
-    case 3 : a+=((uint32_t)k[2])<<16;
-    case 2 : a+=((uint32_t)k[1])<<8;
-    case 1 : a+=k[0];
-             break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-  }
-
-  final(a,b,c);
-  *pc=c; *pb=b;
-}
-#endif
-
-
 /*
 /*
  * hashbig():
  * hashbig():
  * This is the same as hash_word() on big-endian machines.  It is different
  * This is the same as hash_word() on big-endian machines.  It is different
  * from hashlittle() on all machines.  hashbig() takes advantage of
  * from hashlittle() on all machines.  hashbig() takes advantage of
  * big-endian byte ordering. 
  * big-endian byte ordering. 
  */
  */
-static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
+static uint32_t hashbig( const void *key, size_t length, uint32_t *val2)
 {
 {
   uint32_t a,b,c;
   uint32_t a,b,c;
   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
 
 
   /* Set up the internal state */
   /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2;
 
 
   u.ptr = key;
   u.ptr = key;
   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
@@ -768,6 +535,7 @@ static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
   }
   }
 
 
   final(a,b,c);
   final(a,b,c);
+  *val2 = b;
   return c;
   return c;
 }
 }
 
 
@@ -775,7 +543,7 @@ static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
  * element.  This delivers least-surprise: hash such as "int arr[] = {
  * element.  This delivers least-surprise: hash such as "int arr[] = {
  * 1, 2 }; hash_stable(arr, 2, 0);" will be the same on big and little
  * 1, 2 }; hash_stable(arr, 2, 0);" will be the same on big and little
  * endian machines, even though a bytewise hash wouldn't be. */
  * endian machines, even though a bytewise hash wouldn't be. */
-uint32_t hash_stable_64(const void *key, size_t n, uint32_t base)
+uint64_t hash64_stable_64(const void *key, size_t n, uint32_t base)
 {
 {
 	const uint64_t *k = key;
 	const uint64_t *k = key;
 	uint32_t a,b,c;
 	uint32_t a,b,c;
@@ -811,10 +579,10 @@ uint32_t hash_stable_64(const void *key, size_t n, uint32_t base)
 		return c;
 		return c;
 	}
 	}
 	final(a,b,c);
 	final(a,b,c);
-	return c;
+	return ((uint64_t)b << 32) | c;
 }
 }
 
 
-uint32_t hash_stable_32(const void *key, size_t n, uint32_t base)
+uint64_t hash64_stable_32(const void *key, size_t n, uint32_t base)
 {
 {
 	const uint32_t *k = key;
 	const uint32_t *k = key;
 	uint32_t a,b,c;
 	uint32_t a,b,c;
@@ -841,10 +609,10 @@ uint32_t hash_stable_32(const void *key, size_t n, uint32_t base)
 		return c;
 		return c;
 	}
 	}
 	final(a,b,c);
 	final(a,b,c);
-	return c;
+	return ((uint64_t)b << 32) | c;
 }
 }
 
 
-uint32_t hash_stable_16(const void *key, size_t n, uint32_t base)
+uint64_t hash64_stable_16(const void *key, size_t n, uint32_t base)
 {
 {
 	const uint16_t *k = key;
 	const uint16_t *k = key;
 	uint32_t a,b,c;
 	uint32_t a,b,c;
@@ -878,20 +646,56 @@ uint32_t hash_stable_16(const void *key, size_t n, uint32_t base)
 		return c;
 		return c;
 	}
 	}
 	final(a,b,c);
 	final(a,b,c);
-	return c;
+	return ((uint64_t)b << 32) | c;
 }
 }
 	
 	
-uint32_t hash_stable_8(const void *key, size_t n, uint32_t base)
+uint64_t hash64_stable_8(const void *key, size_t n, uint32_t base)
 {
 {
-	return hashlittle(key, n, base);
+	uint32_t lower = hashlittle(key, n, &base);
+
+	return ((uint64_t)base << 32) | lower;	
 }
 }
 
 
 uint32_t hash_any(const void *key, size_t length, uint32_t base)
 uint32_t hash_any(const void *key, size_t length, uint32_t base)
 {
 {
 	if (HASH_BIG_ENDIAN)
 	if (HASH_BIG_ENDIAN)
-		return hashbig(key, length, base);
+		return hashbig(key, length, &base);
 	else
 	else
-		return hashlittle(key, length, base);
+		return hashlittle(key, length, &base);
+}
+
+uint32_t hash_stable_64(const void *key, size_t n, uint32_t base)
+{
+	return hash64_stable_64(key, n, base);
+}
+
+uint32_t hash_stable_32(const void *key, size_t n, uint32_t base)
+{
+	return hash64_stable_32(key, n, base);
+}
+
+uint32_t hash_stable_16(const void *key, size_t n, uint32_t base)
+{
+	return hash64_stable_16(key, n, base);
+}
+
+uint32_t hash_stable_8(const void *key, size_t n, uint32_t base)
+{
+	return hashlittle(key, n, &base);
+}
+
+/* Jenkins' lookup8 is a 64 bit hash, but he says it's obsolete.  Use
+ * the plain one and recombine into 64 bits. */
+uint64_t hash64_any(const void *key, size_t length, uint32_t base)
+{
+	uint32_t lower;
+
+	if (HASH_BIG_ENDIAN)
+		lower = hashbig(key, length, &base);
+	else
+		lower = hashlittle(key, length, &base);
+
+	return ((uint64_t)base << 32) | lower;
 }
 }
 
 
 #ifdef SELF_TEST
 #ifdef SELF_TEST

+ 123 - 12
ccan/hash/hash.h

@@ -3,6 +3,7 @@
 #include <stdint.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdlib.h>
 #include "config.h"
 #include "config.h"
+#include <ccan/build_assert/build_assert.h>
 
 
 /* Stolen mostly from: lookup3.c, by Bob Jenkins, May 2006, Public Domain.
 /* Stolen mostly from: lookup3.c, by Bob Jenkins, May 2006, Public Domain.
  * 
  * 
@@ -25,7 +26,7 @@
  * It may also change with future versions: it could even detect at runtime
  * It may also change with future versions: it could even detect at runtime
  * what the fastest hash to use is.
  * what the fastest hash to use is.
  *
  *
- * See also: hash_stable.
+ * See also: hash64, hash_stable.
  *
  *
  * Example:
  * Example:
  *	#include "hash/hash.h"
  *	#include "hash/hash.h"
@@ -69,6 +70,9 @@
  * memory representations of integers depend on the machine
  * memory representations of integers depend on the machine
  * endianness.
  * endianness.
  *
  *
+ * See also:
+ *	hash64_stable
+ *
  * Example:
  * Example:
  *	#include "hash/hash.h"
  *	#include "hash/hash.h"
  *	#include <err.h>
  *	#include <err.h>
@@ -85,11 +89,12 @@
  *	}
  *	}
  */
  */
 #define hash_stable(p, num, base)					\
 #define hash_stable(p, num, base)					\
-	(sizeof(*(p)) == 8 ? hash_stable_64((p), (num), (base))		\
+	(EXPR_BUILD_ASSERT(sizeof(*(p)) == 8 || sizeof(*(p)) == 4	\
+			   || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) +	\
+	 sizeof(*(p)) == 8 ? hash_stable_64((p), (num), (base))		\
 	 : sizeof(*(p)) == 4 ? hash_stable_32((p), (num), (base))	\
 	 : sizeof(*(p)) == 4 ? hash_stable_32((p), (num), (base))	\
 	 : sizeof(*(p)) == 2 ? hash_stable_16((p), (num), (base))	\
 	 : sizeof(*(p)) == 2 ? hash_stable_16((p), (num), (base))	\
-	 : sizeof(*(p)) == 1 ? hash_stable_8((p), (num), (base))	\
-	 : hash_stable_fail((p), (num), sizeof(*(p)), (base)))
+	 : hash_stable_8((p), (num), (base)))
 
 
 /**
 /**
  * hash_u32 - fast hash an array of 32-bit values for internal use
  * hash_u32 - fast hash an array of 32-bit values for internal use
@@ -107,6 +112,18 @@
  */
  */
 uint32_t hash_u32(const uint32_t *key, size_t num, uint32_t base);
 uint32_t hash_u32(const uint32_t *key, size_t num, uint32_t base);
 
 
+/* Our underlying operations. */
+uint32_t hash_any(const void *key, size_t length, uint32_t base);
+uint32_t hash_stable_64(const void *key, size_t n, uint32_t base);
+uint32_t hash_stable_32(const void *key, size_t n, uint32_t base);
+uint32_t hash_stable_16(const void *key, size_t n, uint32_t base);
+uint32_t hash_stable_8(const void *key, size_t n, uint32_t base);
+uint64_t hash64_any(const void *key, size_t length, uint32_t base);
+uint64_t hash64_stable_64(const void *key, size_t n, uint32_t base);
+uint64_t hash64_stable_32(const void *key, size_t n, uint32_t base);
+uint64_t hash64_stable_16(const void *key, size_t n, uint32_t base);
+uint64_t hash64_stable_8(const void *key, size_t n, uint32_t base);
+
 /**
 /**
  * hash_string - very fast hash of an ascii string
  * hash_string - very fast hash of an ascii string
  * @str: the nul-terminated string
  * @str: the nul-terminated string
@@ -132,14 +149,6 @@ static inline uint32_t hash_string(const char *string)
 	return ret;
 	return ret;
 }
 }
 
 
-/* Our underlying operations. */
-uint32_t hash_any(const void *key, size_t length, uint32_t base);
-uint32_t hash_stable_64(const void *key, size_t n, uint32_t base);
-uint32_t hash_stable_32(const void *key, size_t n, uint32_t base);
-uint32_t hash_stable_16(const void *key, size_t n, uint32_t base);
-uint32_t hash_stable_8(const void *key, size_t n, uint32_t base);
-uint32_t hash_stable_fail(const void *key, size_t n, size_t len, uint32_t base);
-
 /**
 /**
  * hash_pointer - hash a pointer for internal use
  * hash_pointer - hash a pointer for internal use
  * @p: the pointer value to hash
  * @p: the pointer value to hash
@@ -195,4 +204,106 @@ static inline uint32_t hash_pointer(const void *p, uint32_t base)
 	} else
 	} else
 		return hash(&p, 1, base);
 		return hash(&p, 1, base);
 }
 }
+
+/**
+ * hash64 - fast 64-bit hash of an array for internal use
+ * @p: the array or pointer to first element
+ * @num: the number of elements to hash
+ * @base: the base number to roll into the hash (usually 0)
+ *
+ * The memory region pointed to by p is combined with the base to form
+ * a 64-bit hash.
+ *
+ * This hash will have different results on different machines, so is
+ * only useful for internal hashes (ie. not hashes sent across the
+ * network or saved to disk).
+ *
+ * It may also change with future versions: it could even detect at runtime
+ * what the fastest hash to use is.
+ *
+ * See also: hash.
+ *
+ * Example:
+ *	#include <ccan/hash/hash.h>
+ *	#include <err.h>
+ *	#include <stdio.h>
+ *
+ *	// Simple demonstration: idential strings will have the same hash, but
+ *	// two different strings will probably not.
+ *	int main(int argc, char *argv[])
+ *	{
+ *		uint64_t hash1, hash2;
+ *
+ *		if (argc != 3)
+ *			err(1, "Usage: %s <string1> <string2>", argv[0]);
+ *
+ *		hash1 = hash64(argv[1], strlen(argv[1]), 0);
+ *		hash2 = hash64(argv[2], strlen(argv[2]), 0);
+ *		printf("Hash is %s\n", hash1 == hash2 ? "same" : "different");
+ *		return 0;
+ *	}
+ */
+#define hash64(p, num, base) hash64_any((p), (num)*sizeof(*(p)), (base))
+
+/**
+ * hash64_stable - 64 bit hash of an array for external use
+ * @p: the array or pointer to first element
+ * @num: the number of elements to hash
+ * @base: the base number to roll into the hash (usually 0)
+ *
+ * The array of simple integer types pointed to by p is combined with
+ * the base to form a 64-bit hash.
+ *
+ * This hash will have the same results on different machines, so can
+ * be used for external hashes (ie. hashes sent across the network or
+ * saved to disk).  The results will not change in future versions of
+ * this module.
+ *
+ * Note that it is only legal to hand an array of simple integer types
+ * to this hash (ie. char, uint16_t, int64_t, etc).  In these cases,
+ * the same values will have the same hash result, even though the
+ * memory representations of integers depend on the machine
+ * endianness.
+ *
+ * See also:
+ *	hash_stable
+ *
+ * Example:
+ *	#include <ccan/hash/hash.h>
+ *	#include <err.h>
+ *	#include <stdio.h>
+ *
+ *	int main(int argc, char *argv[])
+ *	{
+ *		if (argc != 2)
+ *			err(1, "Usage: %s <string-to-hash>", argv[0]);
+ *
+ *		printf("Hash stable result is %llu\n",
+ *		       (long long)hash64_stable(argv[1], strlen(argv[1]), 0));
+ *		return 0;
+ *	}
+ */
+#define hash64_stable(p, num, base)					\
+	(EXPR_BUILD_ASSERT(sizeof(*(p)) == 8 || sizeof(*(p)) == 4	\
+			   || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) +	\
+	 sizeof(*(p)) == 8 ? hash64_stable_64((p), (num), (base))	\
+	 : sizeof(*(p)) == 4 ? hash64_stable_32((p), (num), (base))	\
+	 : sizeof(*(p)) == 2 ? hash64_stable_16((p), (num), (base))	\
+	 : hash64_stable_8((p), (num), (base)))
+
+
+/**
+ * hashl - fast 32/64-bit hash of an array for internal use
+ * @p: the array or pointer to first element
+ * @num: the number of elements to hash
+ * @base: the base number to roll into the hash (usually 0)
+ *
+ * This is either hash() or hash64(), on 32/64 bit long machines.
+ */
+#define hashl(p, num, base)						\
+	(EXPR_BUILD_ASSERT(sizeof(long) == sizeof(uint32_t)		\
+			   || sizeof(long) == sizeof(uint64_t)) +	\
+	(sizeof(long) == sizeof(uint64_t)				\
+	 ? hash64((p), (num), (base)) : hash((p), (num), (base))))
+
 #endif /* HASH_H */
 #endif /* HASH_H */

+ 137 - 1
ccan/hash/test/api-hash_stable.c

@@ -21,7 +21,7 @@ int main(int argc, char *argv[])
 		u64array[i] = i;
 		u64array[i] = i;
 	}
 	}
 
 
-	plan_tests(132);
+	plan_tests(264);
 
 
 	/* hash_stable is API-guaranteed. */
 	/* hash_stable is API-guaranteed. */
 	ok1(hash_stable(u8array, ARRAY_WORDS, 0) == 0x1d4833cc);
 	ok1(hash_stable(u8array, ARRAY_WORDS, 0) == 0x1d4833cc);
@@ -160,5 +160,141 @@ int main(int argc, char *argv[])
 	ok1(hash_stable(u64array, ARRAY_WORDS, 1073741824) == 0x1b346394);
 	ok1(hash_stable(u64array, ARRAY_WORDS, 1073741824) == 0x1b346394);
 	ok1(hash_stable(u64array, ARRAY_WORDS, 2147483648U) == 0x6c3a1592);
 	ok1(hash_stable(u64array, ARRAY_WORDS, 2147483648U) == 0x6c3a1592);
 
 
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 0) == 16887282882572727244ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 1) == 12032777473133454818ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 2) == 18183407363221487738ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 4) == 17860764172704150171ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 8) == 18076051600675559233ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 16) == 9909361918431556721ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 32) == 12937969888744675813ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 64) == 5245669057381736951ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 128) == 4376874646406519665ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 256) == 14219974419871569521ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 512) == 2263415354134458951ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 1024) == 4953859694526221685ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 2048) == 3432228642067641593ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 4096) == 1219647244417697483ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 8192) == 7629939424585859553ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 16384) == 10041660531376789749ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 32768) == 13859885793922603927ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 65536) == 15069060338344675120ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 131072) == 818163430835601100ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 262144) == 14914314323019517069ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 524288) == 17518437749769352214ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 1048576) == 14920048004901212706ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 2097152) == 8758567366332536138ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 4194304) == 6226655736088907885ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 8388608) == 13716650013685832100ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 16777216) == 305325651636315638ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 33554432) == 16784147606583781671ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 67108864) == 16509467555140798205ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 134217728) == 8717281234694060584ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 268435456) == 8098476701725660537ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 536870912) == 16345871539461094006ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 1073741824) == 3755557000429964408ULL);
+	ok1(hash64_stable(u8array, ARRAY_WORDS, 2147483648U) == 15017348801959710081ULL);
+
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 0) == 1038028831307724039ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 1) == 10155473272642627302ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 2) == 5714751190106841420ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 4) == 3923885607767527866ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 8) == 3931017318293995558ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 16) == 1469696588339313177ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 32) == 11522218526952715051ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 64) == 6953517591561958496ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 128) == 7406689491740052867ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 256) == 10101844489704093104ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 512) == 12511348870707245959ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 1024) == 1614019938016861468ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 2048) == 5294796182374592721ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 4096) == 16089570706643716675ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 8192) == 1689302638424579464ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 16384) == 1446340172370386893ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 32768) == 16535503506744393039ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 65536) == 3496794142527150328ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 131072) == 6568245367474548504ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 262144) == 9487676460765485949ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 524288) == 4519762130966530000ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 1048576) == 15623412069215340610ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 2097152) == 544013388676438108ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 4194304) == 5594904760290840266ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 8388608) == 18098755780041592043ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 16777216) == 6389168672387330316ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 33554432) == 896986127732419381ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 67108864) == 13232626471143901354ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 134217728) == 53378562890493093ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 268435456) == 10072361400297824771ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 536870912) == 14511948118285144529ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 1073741824) == 6981033484844447277ULL);
+	ok1(hash64_stable(u16array, ARRAY_WORDS, 2147483648U) == 5619339091684126808ULL);
+
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 0) == 3037571077312110476ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 1) == 14732398743825071988ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 2) == 14949132158206672071ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 4) == 1291370080511561429ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 8) == 10792665964172133092ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 16) == 14250138032054339435ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 32) == 17136741522078732741ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 64) == 3260193403318236635ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 128) == 10526616652205653536ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 256) == 9019690373358576579ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 512) == 6997491436599677436ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 1024) == 18302783371416533798ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 2048) == 10149320644446516025ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 4096) == 7073759949410623868ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 8192) == 17442399482223760073ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 16384) == 2983906194216281861ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 32768) == 4975845419129060524ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 65536) == 594019910205413268ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 131072) == 11903010186073691112ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 262144) == 7339636527154847008ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 524288) == 15243305400579108736ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 1048576) == 16737926245392043198ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 2097152) == 15725083267699862972ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 4194304) == 12527834265678833794ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 8388608) == 13908436455987824848ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 16777216) == 9672773345173872588ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 33554432) == 2305314279896710501ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 67108864) == 1866733780381408751ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 134217728) == 11906263969465724709ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 268435456) == 5501594918093830069ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 536870912) == 15823785789276225477ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 1073741824) == 17353000723889475410ULL);
+	ok1(hash64_stable(u32array, ARRAY_WORDS, 2147483648U) == 7494736910655503182ULL);
+
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 0) == 9765419389786481410ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 1) == 11182806172127114246ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 2) == 2559155171395472619ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 4) == 3311692033324815378ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 8) == 1297175419505333844ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 16) == 617896928653569210ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 32) == 1517398559958603553ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 64) == 4504821917445110758ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 128) == 1971743331114904452ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 256) == 6177667912354374306ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 512) == 15570521289777792458ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 1024) == 9204559632415917331ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 2048) == 9008982669760028237ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 4096) == 14803537660281700281ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 8192) == 2873966517448487327ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 16384) == 5859277625928363661ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 32768) == 15520461285618185970ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 65536) == 16746489793331175369ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 131072) == 514952025484227461ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 262144) == 10867212269810675249ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 524288) == 9822204377278314587ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 1048576) == 3295088921987850465ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 2097152) == 7559197431498053712ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 4194304) == 1667267269116771849ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 8388608) == 2916804068951374862ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 16777216) == 14422558383125688561ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 33554432) == 10083112683694342602ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 67108864) == 7222777647078298513ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 134217728) == 18424513674048212529ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 268435456) == 14913668581101810784ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 536870912) == 14377721174297902048ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 1073741824) == 6031715005667500948ULL);
+	ok1(hash64_stable(u64array, ARRAY_WORDS, 2147483648U) == 4827100319722378642ULL);
+
 	return exit_status();
 	return exit_status();
 }
 }

+ 33 - 1
ccan/hash/test/run.c

@@ -17,7 +17,7 @@ int main(int argc, char *argv[])
 	for (i = 0; i < ARRAY_WORDS; i++)
 	for (i = 0; i < ARRAY_WORDS; i++)
 		array[i] = i;
 		array[i] = i;
 
 
-	plan_tests(22);
+	plan_tests(39);
 	/* Hash should be the same, indep of memory alignment. */
 	/* Hash should be the same, indep of memory alignment. */
 	val = hash(array, sizeof(array), 0);
 	val = hash(array, sizeof(array), 0);
 	for (i = 0; i < sizeof(uint32_t); i++) {
 	for (i = 0; i < sizeof(uint32_t); i++) {
@@ -51,6 +51,31 @@ int main(int argc, char *argv[])
 		diag("Byte %i, range %u-%u", i, lowest, highest);
 		diag("Byte %i, range %u-%u", i, lowest, highest);
 	}
 	}
 
 
+	/* Hash of random values should have random distribution:
+	 * check one byte at a time. */
+	for (i = 0; i < sizeof(uint64_t); i++) {
+		unsigned int lowest = -1U, highest = 0;
+
+		memset(results, 0, sizeof(results));
+
+		for (j = 0; j < 256000; j++) {
+			for (k = 0; k < ARRAY_WORDS; k++)
+				array[k] = random();
+			results[(hash64(array, sizeof(array), 0) >> i*8)&0xFF]++;
+		}
+
+		for (j = 0; j < 256; j++) {
+			if (results[j] < lowest)
+				lowest = results[j];
+			if (results[j] > highest)
+				highest = results[j];
+		}
+		/* Expect within 20% */
+		ok(lowest > 800, "Byte %i lowest %i", i, lowest);
+		ok(highest < 1200, "Byte %i highest %i", i, highest);
+		diag("Byte %i, range %u-%u", i, lowest, highest);
+	}
+
 	/* Hash of pointer values should also have random distribution. */
 	/* Hash of pointer values should also have random distribution. */
 	for (i = 0; i < sizeof(uint32_t); i++) {
 	for (i = 0; i < sizeof(uint32_t); i++) {
 		unsigned int lowest = -1U, highest = 0;
 		unsigned int lowest = -1U, highest = 0;
@@ -75,6 +100,13 @@ int main(int argc, char *argv[])
 		diag("hash_pointer byte %i, range %u-%u", i, lowest, highest);
 		diag("hash_pointer byte %i, range %u-%u", i, lowest, highest);
 	}
 	}
 
 
+	if (sizeof(long) == sizeof(uint32_t))
+		ok1(hashl(array, sizeof(array), 0)
+		    == hash(array, sizeof(array), 0));
+	else
+		ok1(hashl(array, sizeof(array), 0)
+		    == hash64(array, sizeof(array), 0));
+
 	/* String hash: weak, so only test bottom byte */
 	/* String hash: weak, so only test bottom byte */
 	for (i = 0; i < 1; i++) {
 	for (i = 0; i < 1; i++) {
 		unsigned int num = 0, cursor, lowest = -1U, highest = 0;
 		unsigned int num = 0, cursor, lowest = -1U, highest = 0;