Browse Source

tdb2: don't hold access to tdb mmap during traverse.

Rusty Russell 15 years ago
parent
commit
48038e705f
4 changed files with 33 additions and 56 deletions
  1. 3 6
      ccan/tdb2/free.c
  2. 16 36
      ccan/tdb2/hash.c
  3. 9 10
      ccan/tdb2/io.c
  4. 5 4
      ccan/tdb2/private.h

+ 3 - 6
ccan/tdb2/free.c

@@ -164,13 +164,10 @@ tdb_off_t bucket_off(tdb_off_t zone_off, tdb_off_t bucket)
 /* Returns free_buckets + 1, or list number to search. */
 /* Returns free_buckets + 1, or list number to search. */
 static tdb_off_t find_free_head(struct tdb_context *tdb, tdb_off_t bucket)
 static tdb_off_t find_free_head(struct tdb_context *tdb, tdb_off_t bucket)
 {
 {
-	tdb_off_t b;
-
 	/* Speculatively search for a non-zero bucket. */
 	/* Speculatively search for a non-zero bucket. */
-	b = tdb_find_nonzero_off(tdb, bucket_off(tdb->zone_off, bucket),
-				 BUCKETS_FOR_ZONE(tdb->zhdr.zone_bits) + 1
-				 - bucket);
-	return bucket + b;
+	return tdb_find_nonzero_off(tdb, bucket_off(tdb->zone_off, 0),
+				    bucket,
+				    BUCKETS_FOR_ZONE(tdb->zhdr.zone_bits) + 1);
 }
 }
 
 
 /* Remove from free bucket. */
 /* Remove from free bucket. */

+ 16 - 36
ccan/tdb2/hash.c

@@ -450,74 +450,55 @@ int add_to_hash(struct tdb_context *tdb, struct hash_info *h, tdb_off_t new_off)
 	return add_to_hash(tdb, h, new_off);
 	return add_to_hash(tdb, h, new_off);
 }
 }
 
 
-/* No point holding references/copies of db once we drop lock. */
-static void release_entries(struct tdb_context *tdb,
-			    struct traverse_info *tinfo)
-{
-	unsigned int i;
-
-	for (i = 0; i < tinfo->num_levels; i++) {
-		if (tinfo->levels[i].entries) {
-			tdb_access_release(tdb, tinfo->levels[i].entries);
-			tinfo->levels[i].entries = NULL;
-		}
-	}
-}
-
 /* Traverse support: returns offset of record, or 0 or TDB_OFF_ERR. */
 /* Traverse support: returns offset of record, or 0 or TDB_OFF_ERR. */
 static tdb_off_t iterate_hash(struct tdb_context *tdb,
 static tdb_off_t iterate_hash(struct tdb_context *tdb,
 			      struct traverse_info *tinfo)
 			      struct traverse_info *tinfo)
 {
 {
-	tdb_off_t off;
+	tdb_off_t off, val;
 	unsigned int i;
 	unsigned int i;
 	struct traverse_level *tlevel;
 	struct traverse_level *tlevel;
 
 
 	tlevel = &tinfo->levels[tinfo->num_levels-1];
 	tlevel = &tinfo->levels[tinfo->num_levels-1];
 
 
 again:
 again:
-	if (!tlevel->entries) {
-		tlevel->entries = tdb_access_read(tdb, tlevel->hashtable,
-						  sizeof(tdb_off_t)
-						  * tlevel->total_buckets,
-						  true);
-		if (!tlevel->entries)
+	for (i = tdb_find_nonzero_off(tdb, tlevel->hashtable,
+				      tlevel->entry, tlevel->total_buckets);
+	     i != tlevel->total_buckets;
+	     i = tdb_find_nonzero_off(tdb, tlevel->hashtable,
+				      i+1, tlevel->total_buckets)) {
+		val = tdb_read_off(tdb, tlevel->hashtable+sizeof(tdb_off_t)*i);
+		if (unlikely(val == TDB_OFF_ERR))
 			return TDB_OFF_ERR;
 			return TDB_OFF_ERR;
-	}
 
 
-	/* FIXME: Use tdb_find_nonzero_off? */ 
-	for (i = tlevel->entry; i < tlevel->total_buckets; i++) {
-		if (!tlevel->entries[i] || tlevel->entries[i] == tinfo->prev)
+		/* This makes the delete-all-in-traverse case work
+		 * (and simplifies our logic a little). */
+		if (val == tinfo->prev)
 			continue;
 			continue;
 
 
 		tlevel->entry = i;
 		tlevel->entry = i;
-		off = tlevel->entries[i] & TDB_OFF_MASK;
+		off = val & TDB_OFF_MASK;
 
 
-		if (!is_subhash(tlevel->entries[i])) {
+		if (!is_subhash(val)) {
 			/* Found one. */
 			/* Found one. */
-			tinfo->prev = tlevel->entries[i];
-			release_entries(tdb, tinfo);
+			tinfo->prev = val;
 			return off;
 			return off;
 		}
 		}
 
 
-		/* When we come back, we want tne next one */
+		/* When we come back, we want the next one */
 		tlevel->entry++;
 		tlevel->entry++;
 		tinfo->num_levels++;
 		tinfo->num_levels++;
 		tlevel++;
 		tlevel++;
 		tlevel->hashtable = off + sizeof(struct tdb_used_record);
 		tlevel->hashtable = off + sizeof(struct tdb_used_record);
 		tlevel->entry = 0;
 		tlevel->entry = 0;
-		tlevel->entries = NULL;
 		tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS);
 		tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS);
 		goto again;
 		goto again;
 	}
 	}
 
 
 	/* Nothing there? */
 	/* Nothing there? */
-	if (tinfo->num_levels == 1) {
-		release_entries(tdb, tinfo);
+	if (tinfo->num_levels == 1)
 		return 0;
 		return 0;
-	}
 
 
 	/* Go back up and keep searching. */
 	/* Go back up and keep searching. */
-	tdb_access_release(tdb, tlevel->entries);
 	tinfo->num_levels--;
 	tinfo->num_levels--;
 	tlevel--;
 	tlevel--;
 	goto again;
 	goto again;
@@ -586,7 +567,6 @@ int first_in_hash(struct tdb_context *tdb, int ltype,
 	tinfo->toplevel_group = 0;
 	tinfo->toplevel_group = 0;
 	tinfo->num_levels = 1;
 	tinfo->num_levels = 1;
 	tinfo->levels[0].hashtable = offsetof(struct tdb_header, hashtable);
 	tinfo->levels[0].hashtable = offsetof(struct tdb_header, hashtable);
-	tinfo->levels[0].entries = NULL;
 	tinfo->levels[0].entry = 0;
 	tinfo->levels[0].entry = 0;
 	tinfo->levels[0].total_buckets = (1 << TDB_HASH_GROUP_BITS);
 	tinfo->levels[0].total_buckets = (1 << TDB_HASH_GROUP_BITS);
 
 

+ 9 - 10
ccan/tdb2/io.c

@@ -73,9 +73,8 @@ static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe)
 	struct stat st;
 	struct stat st;
 	int ret;
 	int ret;
 
 
-	/* FIXME: We can't hold pointers during this: we could unmap! */
-	/* (We currently do this in traverse!) */
-//	assert(!tdb->direct_access || tdb_has_expansion_lock(tdb));
+	/* We can't hold pointers during this: we could unmap! */
+	assert(!tdb->direct_access || tdb_has_expansion_lock(tdb));
 
 
 	if (len <= tdb->map_size)
 	if (len <= tdb->map_size)
 		return 0;
 		return 0;
@@ -161,25 +160,25 @@ void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size)
 	return buf;
 	return buf;
 }
 }
 
 
-/* Return first non-zero offset in num offset array, or num. */
 /* FIXME: Return the off? */
 /* FIXME: Return the off? */
-uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off,
-			      uint64_t num)
+uint64_t tdb_find_nonzero_off(struct tdb_context *tdb,
+			      tdb_off_t base, uint64_t start, uint64_t end)
 {
 {
 	uint64_t i;
 	uint64_t i;
 	const uint64_t *val;
 	const uint64_t *val;
 
 
 	/* Zero vs non-zero is the same unconverted: minor optimization. */
 	/* Zero vs non-zero is the same unconverted: minor optimization. */
-	val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false);
+	val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t),
+			      (end - start) * sizeof(tdb_off_t), false);
 	if (!val)
 	if (!val)
-		return num;
+		return end;
 
 
-	for (i = 0; i < num; i++) {
+	for (i = 0; i < (end - start); i++) {
 		if (val[i])
 		if (val[i])
 			break;
 			break;
 	}
 	}
 	tdb_access_release(tdb, val);
 	tdb_access_release(tdb, val);
-	return i;
+	return start + i;
 }
 }
 
 
 /* Return first zero offset in num offset array, or num. */
 /* Return first zero offset in num offset array, or num. */

+ 5 - 4
ccan/tdb2/private.h

@@ -242,7 +242,6 @@ struct hash_info {
 struct traverse_info {
 struct traverse_info {
 	struct traverse_level {
 	struct traverse_level {
 		tdb_off_t hashtable;
 		tdb_off_t hashtable;
-		const tdb_off_t *entries;
 		/* We ignore groups here, and treat it as a big array. */
 		/* We ignore groups here, and treat it as a big array. */
 		unsigned entry;
 		unsigned entry;
 		unsigned int total_buckets;
 		unsigned int total_buckets;
@@ -418,9 +417,11 @@ int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val);
 /* Clear an ondisk area. */
 /* Clear an ondisk area. */
 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len);
 int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len);
 
 
-/* Return a non-zero offset in this array, or num. */
-tdb_off_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off,
-			       uint64_t num);
+/* Return a non-zero offset between >= start < end in this array (or end). */
+tdb_off_t tdb_find_nonzero_off(struct tdb_context *tdb,
+			       tdb_off_t base,
+			       uint64_t start,
+			       uint64_t end);
 
 
 /* Return a zero offset in this array, or num. */
 /* Return a zero offset in this array, or num. */
 tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
 tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,