Browse Source

tdb2: now checking a new empty database works.

Rusty Russell 15 years ago
parent
commit
ebdd6451e2
8 changed files with 94 additions and 59 deletions
  1. 1 0
      ccan/tdb2/_info
  2. 5 5
      ccan/tdb2/check.c
  3. 48 24
      ccan/tdb2/io.c
  4. 10 9
      ccan/tdb2/lock.c
  5. 4 2
      ccan/tdb2/private.h
  6. 21 15
      ccan/tdb2/tdb.c
  7. 1 2
      ccan/tdb2/tdb2.h
  8. 4 2
      ccan/tdb2/test/run-encode.c

+ 1 - 0
ccan/tdb2/_info

@@ -74,6 +74,7 @@ int main(int argc, char *argv[])
 		printf("ccan/hash\n");
 		printf("ccan/likely\n");
 		printf("ccan/asearch\n");
+		printf("ccan/build_assert\n");
 		return 0;
 	}
 

+ 5 - 5
ccan/tdb2/check.c

@@ -187,7 +187,8 @@ static bool check_hash_list(struct tdb_context *tdb,
 		num_nonzero++;
 	}
 
-	if (num_found != num_used) {
+	/* free table and hash table are two of the used blocks. */
+	if (num_found != num_used - 2) {
 		tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
 			 "tdb_check: Not all entries are in hash\n");
 		return false;
@@ -322,11 +323,10 @@ int tdb_check(struct tdb_context *tdb,
 	size_t num_free = 0, num_used = 0;
 	bool hash_found = false, free_found = false;
 
+	/* This always ensures the header is uptodate. */
 	if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0)
 		return -1;
 
-	update_header(tdb);
-
 	if (!check_header(tdb))
 		goto fail;
 
@@ -403,9 +403,9 @@ int tdb_check(struct tdb_context *tdb,
 		goto fail;
 
 	tdb_allrecord_unlock(tdb, F_RDLCK);
-	return true;
+	return 0;
 
 fail:
 	tdb_allrecord_unlock(tdb, F_RDLCK);
-	return false;
+	return -1;
 }

+ 48 - 24
ccan/tdb2/io.c

@@ -125,11 +125,8 @@ static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
 /* Either make a copy into pad and return that, or return ptr into mmap. */
 /* Note: pad has to be a real object, so we can't get here if len
  * overflows size_t */
-/* FIXME: Transaction */
 void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
 {
-	ssize_t r;
-
 	if (likely(!(tdb->flags & TDB_CONVERT))) {
 		void *ret = tdb_direct(tdb, off, len);
 		if (ret)
@@ -139,18 +136,8 @@ void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len)
 	if (unlikely(tdb_oob(tdb, off + len, false) == -1))
 		return NULL;
 
-	r = pread(tdb->fd, pad, len, off);
-	if (r != (ssize_t)len) {
-		/* Ensure ecode is set for log fn. */
-		tdb->ecode = TDB_ERR_IO;
-		tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
-			 "tdb_read failed at %llu "
-			 "len=%lld ret=%lld (%s) map_size=%lld\n",
-			 (long long)off, (long long)len,
-			 (long long)r, strerror(errno),
-			 (long long)tdb->map_size);
+	if (tdb->methods->read(tdb, off, pad, len) == -1)
 		return NULL;
-	}
 	return tdb_convert(tdb, pad, len);
 }
 
@@ -249,7 +236,7 @@ tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
 {
 	tdb_off_t pad, *ret;
 
-	ret = tdb_get(tdb, off, &pad, sizeof(ret));
+	ret = tdb_get(tdb, off, &pad, sizeof(pad));
 	if (!ret) {
 		return TDB_OFF_ERR;
 	}
@@ -260,7 +247,7 @@ tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off)
 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
 {
 	while (len) {
-		size_t ret;
+		ssize_t ret;
 		ret = pwrite(fd, buf, len, off);
 		if (ret < 0)
 			return false;
@@ -268,13 +255,51 @@ bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off)
 			errno = ENOSPC;
 			return false;
 		}
-		buf += ret;
+		buf = (char *)buf + ret;
 		off += ret;
 		len -= ret;
 	}
 	return true;
 }
 
+/* Even on files, we can get partial reads due to signals. */
+bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off)
+{
+	while (len) {
+		ssize_t ret;
+		ret = pread(fd, buf, len, off);
+		if (ret < 0)
+			return false;
+		if (ret == 0) {
+			/* ETOOSHORT? */
+			errno = EWOULDBLOCK;
+			return false;
+		}
+		buf = (char *)buf + ret;
+		off += ret;
+		len -= ret;
+	}
+	return true;
+}
+
+bool tdb_read_all(int fd, void *buf, size_t len)
+{
+	while (len) {
+		ssize_t ret;
+		ret = read(fd, buf, len);
+		if (ret < 0)
+			return false;
+		if (ret == 0) {
+			/* ETOOSHORT? */
+			errno = EWOULDBLOCK;
+			return false;
+		}
+		buf = (char *)buf + ret;
+		len -= ret;
+	}
+	return true;
+}
+
 /* write a lump of data at a specified offset */
 static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
 		     const void *buf, tdb_len_t len)
@@ -316,15 +341,14 @@ static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
 	if (tdb->map_ptr) {
 		memcpy(buf, off + (char *)tdb->map_ptr, len);
 	} else {
-		ssize_t ret = pread(tdb->fd, buf, len, off);
-		if (ret != (ssize_t)len) {
+		if (!tdb_pread_all(tdb->fd, buf, len, off)) {
 			/* Ensure ecode is set for log fn. */
 			tdb->ecode = TDB_ERR_IO;
 			tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,
 				 "tdb_read failed at %lld "
-				 "len=%lld ret=%lld (%s) map_size=%lld\n",
+				 "len=%lld (%s) map_size=%lld\n",
 				 (long long)off, (long long)len,
-				 (long long)ret, strerror(errno),
+				 strerror(errno),
 				 (long long)tdb->map_size);
 			return -1;
 		}
@@ -376,17 +400,17 @@ uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off)
 	void *key;
 	uint64_t klen, hash;
 
-	r = tdb_get(tdb, off, &pad, sizeof(*r));
+	r = tdb_get(tdb, off, &pad, sizeof(pad));
 	if (!r)
 		/* FIXME */
 		return 0;
 
 	klen = rec_key_length(r);
-	key = tdb_direct(tdb, off + sizeof(*r), klen);
+	key = tdb_direct(tdb, off + sizeof(pad), klen);
 	if (likely(key))
 		return tdb_hash(tdb, key, klen);
 
-	key = tdb_alloc_read(tdb, off + sizeof(*r), klen);
+	key = tdb_alloc_read(tdb, off + sizeof(pad), klen);
 	if (unlikely(!key))
 		return 0;
 	hash = tdb_hash(tdb, key, klen);

+ 10 - 9
ccan/tdb2/lock.c

@@ -436,7 +436,8 @@ static int tdb_lock_gradual(struct tdb_context *tdb,
 /* lock/unlock entire database.  It can only be upgradable if you have some
  * other way of guaranteeing exclusivity (ie. transaction write lock).
  * Note that we don't lock the free chains: noone can get those locks
- * without a hash chain lock first. */
+ * without a hash chain lock first.
+ * The header *will be* up to date once this returns success. */
 int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
 		       enum tdb_lock_flags flags, bool upgradable)
 {
@@ -494,27 +495,27 @@ again:
 		return -1;
 	}
 
+	tdb->allrecord_lock.count = 1;
+	/* If it's upgradable, it's actually exclusive so we can treat
+	 * it as a write lock. */
+	tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
+	tdb->allrecord_lock.off = upgradable;
+
 	/* Now we re-check header, holding lock. */
 	if (unlikely(update_header(tdb))) {
-		tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size);
+		tdb_allrecord_unlock(tdb, ltype);
 		goto again;
 	}
 
 	/* Now check for needing recovery. */
 	if (unlikely(tdb_needs_recovery(tdb))) {
-		tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size);
+		tdb_allrecord_unlock(tdb, ltype);
 		if (tdb_lock_and_recover(tdb) == -1) {
 			return -1;
 		}		
 		goto again;
 	}
 
-
-	tdb->allrecord_lock.count = 1;
-	/* If it's upgradable, it's actually exclusive so we can treat
-	 * it as a write lock. */
-	tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
-	tdb->allrecord_lock.off = upgradable;
 	return 0;
 }
 

+ 4 - 2
ccan/tdb2/private.h

@@ -83,10 +83,10 @@ typedef uint64_t tdb_off_t;
 /* Hash chain locks. */
 #define TDB_HASH_LOCK_START 2
 
-/* We start wih 256 hash buckets, 10 free buckets.  A 1k-sized zone. */
+/* We start wih 256 hash buckets, 10 free buckets.  A 4k-sized zone. */
 #define INITIAL_HASH_BITS 8
 #define INITIAL_FREE_BUCKETS 10
-#define INITIAL_ZONE_BITS 10
+#define INITIAL_ZONE_BITS 12
 
 #if !HAVE_BSWAP_64
 static inline uint64_t bswap_64(uint64_t x)
@@ -328,6 +328,8 @@ tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
 
 /* Even on files, we can get partial writes due to signals. */
 bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off);
+bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off);
+bool tdb_read_all(int fd, void *buf, size_t len);
 
 /* Allocate and make a copy of some offset. */
 void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);

+ 21 - 15
ccan/tdb2/tdb.c

@@ -1,6 +1,7 @@
 #include "private.h"
 #include <ccan/tdb2/tdb2.h>
 #include <ccan/hash/hash.h>
+#include <ccan/build_assert/build_assert.h>
 #include <ccan/likely/likely.h>
 #include <assert.h>
 
@@ -48,7 +49,7 @@ bool update_header(struct tdb_context *tdb)
 static uint64_t jenkins_hash(const void *key, size_t length, uint64_t seed,
 			     void *arg)
 {
-	return hash64_any(key, length, seed);
+	return hash64_stable((const unsigned char *)key, length, seed);
 }
 
 uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len)
@@ -77,7 +78,7 @@ static uint64_t random_number(struct tdb_context *tdb)
 
 	fd = open("/dev/urandom", O_RDONLY);
 	if (fd >= 0) {
-		if (read(fd, &ret, sizeof(ret)) == sizeof(ret)) {
+		if (tdb_read_all(fd, &ret, sizeof(ret))) {
 			tdb->log(tdb, TDB_DEBUG_TRACE, tdb->log_priv,
 				 "tdb_open: random from /dev/urandom\n");
 			close(fd);
@@ -130,6 +131,7 @@ static int tdb_new_database(struct tdb_context *tdb)
 {
 	/* We make it up in memory, then write it out if not internal */
 	struct new_database newdb;
+	unsigned int magic_off = offsetof(struct tdb_header, magic_food);
 
 	/* Fill in the header */
 	newdb.hdr.version = TDB_VERSION;
@@ -142,6 +144,9 @@ static int tdb_new_database(struct tdb_context *tdb)
 
 	newdb.hdr.v.generation = 0;
 
+	/* The initial zone must cover the initial database size! */
+	BUILD_ASSERT((1ULL << INITIAL_ZONE_BITS) >= sizeof(newdb));
+
 	/* Free array has 1 zone, 10 buckets.  All buckets empty. */
 	newdb.hdr.v.num_zones = 1;
 	newdb.hdr.v.zone_bits = INITIAL_ZONE_BITS;
@@ -158,6 +163,17 @@ static int tdb_new_database(struct tdb_context *tdb)
 		   sizeof(newdb.hash), sizeof(newdb.hash), 0);
 	memset(newdb.hash, 0, sizeof(newdb.hash));
 
+	/* Magic food */
+	memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
+	strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
+
+	/* This creates an endian-converted database, as if read from disk */
+	tdb_convert(tdb,
+		    (char *)&newdb.hdr + magic_off,
+		    sizeof(newdb) - magic_off);
+
+	tdb->header = newdb.hdr;
+
 	if (tdb->flags & TDB_INTERNAL) {
 		tdb->map_size = sizeof(newdb);
 		tdb->map_ptr = malloc(tdb->map_size);
@@ -166,9 +182,6 @@ static int tdb_new_database(struct tdb_context *tdb)
 			return -1;
 		}
 		memcpy(tdb->map_ptr, &newdb, tdb->map_size);
-		tdb->header = newdb.hdr;
-		/* Convert the `ondisk' version if asked. */
-		tdb_convert(tdb, tdb->map_ptr, sizeof(newdb));
 		return 0;
 	}
 	if (lseek(tdb->fd, 0, SEEK_SET) == -1)
@@ -177,14 +190,6 @@ static int tdb_new_database(struct tdb_context *tdb)
 	if (ftruncate(tdb->fd, 0) == -1)
 		return -1;
 
-	/* This creates an endian-converted header, as if read from disk */
-	tdb->header = newdb.hdr;
-	tdb_convert(tdb, &tdb->header, sizeof(tdb->header));
-
-	/* Don't endian-convert the magic food! */
-	memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food));
-	strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD);
-
 	if (!tdb_pwrite_all(tdb->fd, &newdb, sizeof(newdb), 0)) {
 		tdb->ecode = TDB_ERR_IO;
 		return -1;
@@ -215,6 +220,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
 	tdb->log_priv = NULL;
 	tdb->khash = jenkins_hash;
 	tdb->hash_priv = NULL;
+	tdb_io_init(tdb);
 
 	/* FIXME */
 	if (attr) {
@@ -246,6 +252,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
 			goto fail;
 		}
 		TEST_IT(tdb->flags & TDB_CONVERT);
+		tdb_convert(tdb, &tdb->header, sizeof(tdb->header));
 		goto internal;
 	}
 
@@ -268,8 +275,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
 		goto fail;	/* errno set by tdb_brlock */
 	}
 
-	errno = 0;
-	if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header)
+	if (!tdb_pread_all(tdb->fd, &tdb->header, sizeof(tdb->header), 0)
 	    || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) {
 		if (!(open_flags & O_CREAT) || tdb_new_database(tdb) == -1) {
 			if (errno == 0) {

+ 1 - 2
ccan/tdb2/tdb2.h

@@ -52,8 +52,7 @@ extern "C" {
 #define TDB_INTERNAL 2 /* don't store on disk */
 #define TDB_NOLOCK   4 /* don't do any locking */
 #define TDB_NOMMAP   8 /* don't use mmap */
-#define TDB_CONVERT 16 /* convert endian (internal use) */
-#define TDB_BIGENDIAN 32 /* header is big-endian (internal use) */
+#define TDB_CONVERT 16 /* convert endian */
 #define TDB_NOSYNC   64 /* don't use synchronous transactions */
 #define TDB_SEQNUM   128 /* maintain a sequence number */
 #define TDB_VOLATILE   256 /* Activate the per-hashchain freelist, default 5 */

+ 4 - 2
ccan/tdb2/test/run-encode.c

@@ -3,14 +3,15 @@
 #include <ccan/tdb2/lock.c>
 #include <ccan/tdb2/io.c>
 #include <ccan/tap/tap.h>
+#include "logging.h"
 
 int main(int argc, char *argv[])
 {
 	unsigned int i;
 	struct tdb_used_record rec;
-	struct tdb_context tdb = { .log = null_log_fn, .log_priv = NULL };
+	struct tdb_context tdb = { .log = tap_log_fn, .log_priv = NULL };
 
-	plan_tests(64 + 32 + 48*6);
+	plan_tests(64 + 32 + 48*6 + 1);
 
 	/* We should be able to encode any data value. */
 	for (i = 0; i < 64; i++)
@@ -36,5 +37,6 @@ int main(int argc, char *argv[])
 		ok1(rec_hash(&rec) == h);
 		ok1(rec_magic(&rec) == TDB_MAGIC);
 	}
+	ok1(tap_log_messages == 0);
 	return exit_status();
 }