Browse Source

tdb2: Add stats attribute.

This is good for deep debugging.
Rusty Russell 15 years ago
parent
commit
fe55330a60
9 changed files with 139 additions and 5 deletions
  1. 16 1
      ccan/tdb2/free.c
  2. 1 0
      ccan/tdb2/hash.c
  3. 7 1
      ccan/tdb2/io.c
  4. 7 1
      ccan/tdb2/lock.c
  5. 9 0
      ccan/tdb2/private.h
  6. 9 0
      ccan/tdb2/tdb.c
  7. 23 1
      ccan/tdb2/tdb2.h
  8. 66 1
      ccan/tdb2/tools/speed.c
  9. 1 0
      ccan/tdb2/transaction.c

+ 16 - 1
ccan/tdb2/free.c

@@ -262,6 +262,7 @@ static int coalesce(struct tdb_context *tdb,
 	struct tdb_free_record pad, *r;
 	tdb_off_t end;
 
+	add_stat(tdb, alloc_coalesce_tried, 1);
 	end = off + sizeof(struct tdb_used_record) + data_len;
 
 	while (end < tdb->map_size) {
@@ -281,8 +282,10 @@ static int coalesce(struct tdb_context *tdb,
 		nb_off = bucket_off(flist_offset(tdb, flist), bucket);
 
 		/* We may be violating lock order here, so best effort. */
-		if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT) == -1)
+		if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT) == -1) {
+			add_stat(tdb, alloc_coalesce_lockfail, 1);
 			break;
+		}
 
 		/* Now we have lock, re-check. */
 		r = tdb_get(tdb, end, &pad, sizeof(pad));
@@ -292,12 +295,14 @@ static int coalesce(struct tdb_context *tdb,
 		}
 
 		if (unlikely(frec_magic(r) != TDB_FREE_MAGIC)) {
+			add_stat(tdb, alloc_coalesce_race, 1);
 			tdb_unlock_free_bucket(tdb, nb_off);
 			break;
 		}
 
 		if (unlikely(frec_flist(r) != flist)
 		    || unlikely(size_to_bucket(frec_len(r)) != bucket)) {
+			add_stat(tdb, alloc_coalesce_race, 1);
 			tdb_unlock_free_bucket(tdb, nb_off);
 			break;
 		}
@@ -309,6 +314,7 @@ static int coalesce(struct tdb_context *tdb,
 
 		end += sizeof(struct tdb_used_record) + frec_len(r);
 		tdb_unlock_free_bucket(tdb, nb_off);
+		add_stat(tdb, alloc_coalesce_num_merged, 1);
 	}
 
 	/* Didn't find any adjacent free? */
@@ -343,6 +349,7 @@ static int coalesce(struct tdb_context *tdb,
 	if (tdb_access_commit(tdb, r) != 0)
 		goto err;
 
+	add_stat(tdb, alloc_coalesce_succeeded, 1);
 	tdb_unlock_free_bucket(tdb, b_off);
 
 	if (add_free_record(tdb, off, end - off) == -1)
@@ -368,6 +375,7 @@ static tdb_off_t lock_and_alloc(struct tdb_context *tdb,
 	double multiplier;
 	size_t size = adjust_size(keylen, datalen);
 
+	add_stat(tdb, allocs, 1);
 again:
 	b_off = bucket_off(flist_off, bucket);
 
@@ -453,6 +461,7 @@ again:
 		/* Bucket of leftover will be <= current bucket, so nested
 		 * locking is allowed. */
 		if (leftover) {
+			add_stat(tdb, alloc_leftover, 1);
 			if (add_free_record(tdb,
 					    best_off + sizeof(rec)
 					    + frec_len(&best) - leftover,
@@ -502,6 +511,10 @@ static tdb_off_t get_free(struct tdb_context *tdb,
 			if (off == TDB_OFF_ERR)
 				return TDB_OFF_ERR;
 			if (off != 0) {
+				if (b == start_b)
+					add_stat(tdb, alloc_bucket_exact, 1);
+				if (b == TDB_FREE_BUCKETS - 1)
+					add_stat(tdb, alloc_bucket_max, 1);
 				/* Worked?  Stay using this list. */
 				tdb->flist_off = flist_off;
 				tdb->flist = flist;
@@ -513,6 +526,7 @@ static tdb_off_t get_free(struct tdb_context *tdb,
 		/* Hmm, try next list. */
 		flist_off = next_flist(tdb, flist_off);
 		flist++;
+
 		if (flist_off == 0) {
 			wrapped = true;
 			flist_off = first_flist(tdb);
@@ -596,6 +610,7 @@ static int tdb_expand(struct tdb_context *tdb, tdb_len_t size)
 	/* We need to drop this lock before adding free record. */
 	tdb_unlock_expand(tdb, F_WRLCK);
 
+	add_stat(tdb, expands, 1);
 	return add_free_record(tdb, old_size, wanted);
 }
 

+ 1 - 0
ccan/tdb2/hash.c

@@ -358,6 +358,7 @@ static int expand_group(struct tdb_context *tdb, struct hash_info *h)
 	if (subhash == TDB_OFF_ERR)
 		return -1;
 
+	add_stat(tdb, alloc_subhash, 1);
 	if (zero_out(tdb, subhash + sizeof(struct tdb_used_record),
 		     sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) == -1)
 		return -1;

+ 7 - 1
ccan/tdb2/io.c

@@ -443,7 +443,7 @@ static int tdb_expand_file(struct tdb_context *tdb, tdb_len_t addition)
 		   file isn't sparse, which would be very bad if we ran out of
 		   disk. This must be done with write, not via mmap */
 		memset(buf, 0x43, sizeof(buf));
-		if (fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
+		if (0 || fill(tdb, buf, sizeof(buf), tdb->map_size, addition) == -1)
 			return -1;
 		tdb->map_size += addition;
 		tdb_mmap(tdb);
@@ -546,6 +546,12 @@ static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len)
 	return (char *)tdb->map_ptr + off;
 }
 
+void add_stat_(struct tdb_context *tdb, uint64_t *stat, size_t val)
+{
+	if ((uintptr_t)stat < (uintptr_t)tdb->stats + tdb->stats->size)
+		*stat += val;
+}
+
 static const struct tdb_methods io_methods = {
 	tdb_read,
 	tdb_write,

+ 7 - 1
ccan/tdb2/lock.c

@@ -40,10 +40,13 @@ static int fcntl_lock(struct tdb_context *tdb,
 	fl.l_len = len;
 	fl.l_pid = 0;
 
+	add_stat(tdb, lock_lowlevel, 1);
 	if (waitflag)
 		return fcntl(tdb->fd, F_SETLKW, &fl);
-	else
+	else {
+		add_stat(tdb, lock_nonblock, 1);
 		return fcntl(tdb->fd, F_SETLK, &fl);
+	}
 }
 
 static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
@@ -286,6 +289,8 @@ static int tdb_nest_lock(struct tdb_context *tdb, tdb_off_t offset, int ltype,
 	if (tdb->flags & TDB_NOLOCK)
 		return 0;
 
+	add_stat(tdb, locks, 1);
+
 	new_lck = find_nestlock(tdb, offset);
 	if (new_lck) {
 		if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
@@ -486,6 +491,7 @@ int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
 		return -1;
 	}
 
+	add_stat(tdb, locks, 1);
 again:
 	/* Lock hashes, gradually. */
 	if (tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,

+ 9 - 0
ccan/tdb2/private.h

@@ -331,6 +331,8 @@ struct tdb_context {
 	uint64_t num_lockrecs;
 	struct tdb_lock_type *lockrecs;
 
+	struct tdb_attribute_stats *stats;
+
 	/* Single list of all TDBs, to avoid multiple opens. */
 	struct tdb_context *next;
 	dev_t device;	
@@ -466,6 +468,13 @@ int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
 int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
 		     void *rec, size_t len);
 
+/* Adds a stat, if it's in range. */
+void add_stat_(struct tdb_context *tdb, uint64_t *stat, size_t val);
+#define add_stat(tdb, statname, val)					\
+	do {								\
+		if (unlikely((tdb)->stats))				\
+			add_stat_((tdb), &(tdb)->stats->statname, (val)); \
+	} while (0)
 
 /* lock.c: */
 void tdb_lock_init(struct tdb_context *tdb);

+ 9 - 0
ccan/tdb2/tdb.c

@@ -177,6 +177,7 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
 	tdb->log = null_log_fn;
 	tdb->log_priv = NULL;
 	tdb->transaction = NULL;
+	tdb->stats = NULL;
 	tdb_hash_init(tdb);
 	tdb_io_init(tdb);
 	tdb_lock_init(tdb);
@@ -194,6 +195,12 @@ struct tdb_context *tdb_open(const char *name, int tdb_flags,
 		case TDB_ATTRIBUTE_SEED:
 			seed = &attr->seed;
 			break;
+		case TDB_ATTRIBUTE_STATS:
+			tdb->stats = &attr->stats;
+			/* They have stats we don't know about?  Tell them. */
+			if (tdb->stats->size > sizeof(attr->stats))
+				tdb->stats->size = sizeof(attr->stats);
+			break;
 		default:
 			tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv,
 				 "tdb_open: unknown attribute type %u\n",
@@ -386,6 +393,7 @@ static int replace_data(struct tdb_context *tdb,
 
 	/* We didn't like the existing one: remove it. */
 	if (old_off) {
+		add_stat(tdb, frees, 1);
 		add_free_record(tdb, old_off,
 				sizeof(struct tdb_used_record)
 				+ key.dsize + old_room);
@@ -582,6 +590,7 @@ int tdb_delete(struct tdb_context *tdb, struct tdb_data key)
 		goto unlock_err;
 
 	/* Free the deleted entry. */
+	add_stat(tdb, frees, 1);
 	if (add_free_record(tdb, off,
 			    sizeof(struct tdb_used_record)
 			    + rec_key_length(&rec)

+ 23 - 1
ccan/tdb2/tdb2.h

@@ -87,7 +87,8 @@ typedef uint64_t (*tdb_hashfn_t)(const void *key, size_t len, uint64_t seed,
 enum tdb_attribute_type {
 	TDB_ATTRIBUTE_LOG = 0,
 	TDB_ATTRIBUTE_HASH = 1,
-	TDB_ATTRIBUTE_SEED = 2
+	TDB_ATTRIBUTE_SEED = 2,
+	TDB_ATTRIBUTE_STATS = 3
 };
 
 struct tdb_attribute_base {
@@ -112,11 +113,32 @@ struct tdb_attribute_seed {
 	uint64_t seed;
 };
 
+struct tdb_attribute_stats {
+	struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_STATS */
+	size_t size; /* = sizeof(struct tdb_attribute_stats) */
+	uint64_t allocs;
+	uint64_t   alloc_subhash;
+	uint64_t   alloc_bucket_exact;
+	uint64_t   alloc_bucket_max;
+	uint64_t   alloc_leftover;
+	uint64_t   alloc_coalesce_tried;
+	uint64_t     alloc_coalesce_lockfail;
+	uint64_t     alloc_coalesce_race;
+	uint64_t     alloc_coalesce_succeeded;
+	uint64_t        alloc_coalesce_num_merged;
+	uint64_t expands;
+	uint64_t frees;
+	uint64_t locks;
+	uint64_t    lock_lowlevel;
+	uint64_t    lock_nonblock;
+};
+
 union tdb_attribute {
 	struct tdb_attribute_base base;
 	struct tdb_attribute_log log;
 	struct tdb_attribute_hash hash;
 	struct tdb_attribute_seed seed;
+	struct tdb_attribute_stats stats;
 };
 		
 struct tdb_context *tdb_open(const char *name, int tdb_flags,

+ 66 - 1
ccan/tdb2/tools/speed.c

@@ -43,6 +43,43 @@ static int count_record(struct tdb_context *tdb,
 	return 0;
 }
 
+static void dump_and_clear_stats(struct tdb_attribute_stats *stats)
+{
+	printf("allocs = %llu\n",
+	       (unsigned long long)stats->allocs);
+	printf("  alloc_subhash = %llu\n",
+	       (unsigned long long)stats->alloc_subhash);
+	printf("  alloc_bucket_exact = %llu\n",
+	       (unsigned long long)stats->alloc_bucket_exact);
+	printf("  alloc_bucket_max = %llu\n",
+	       (unsigned long long)stats->alloc_bucket_max);
+	printf("  alloc_leftover = %llu\n",
+	       (unsigned long long)stats->alloc_leftover);
+	printf("  alloc_coalesce_tried = %llu\n",
+	       (unsigned long long)stats->alloc_coalesce_tried);
+	printf("    alloc_coalesce_lockfail = %llu\n",
+	       (unsigned long long)stats->alloc_coalesce_lockfail);
+	printf("    alloc_coalesce_race = %llu\n",
+	       (unsigned long long)stats->alloc_coalesce_race);
+	printf("    alloc_coalesce_succeeded = %llu\n",
+	       (unsigned long long)stats->alloc_coalesce_succeeded);
+	printf("       alloc_coalesce_num_merged = %llu\n",
+	       (unsigned long long)stats->alloc_coalesce_num_merged);
+	printf("expands = %llu\n",
+	       (unsigned long long)stats->expands);
+	printf("frees = %llu\n",
+	       (unsigned long long)stats->frees);
+	printf("locks = %llu\n",
+	       (unsigned long long)stats->locks);
+	printf("   lock_lowlevel = %llu\n",
+	       (unsigned long long)stats->lock_lowlevel);
+	printf("   lock_nonblock = %llu\n",
+	       (unsigned long long)stats->lock_nonblock);
+
+	/* Now clear. */
+	memset(&stats->allocs, 0, (char *)(stats+1) - (char *)&stats->allocs);
+}
+
 int main(int argc, char *argv[])
 {
 	unsigned int i, j, num = 1000, stage = 0, stopat = -1;
@@ -51,13 +88,18 @@ int main(int argc, char *argv[])
 	TDB_DATA key, data;
 	struct tdb_context *tdb;
 	struct timeval start, stop;
-	union tdb_attribute seed;
+	union tdb_attribute seed, stats;
 
 	/* Try to keep benchmarks even. */
 	seed.base.attr = TDB_ATTRIBUTE_SEED;
 	seed.base.next = NULL;
 	seed.seed.seed = 0;
 
+	memset(&stats, 0, sizeof(stats));
+	stats.base.attr = TDB_ATTRIBUTE_STATS;
+	stats.base.next = NULL;
+	stats.stats.size = sizeof(stats);
+
 	if (argv[1] && strcmp(argv[1], "--internal") == 0) {
 		flags = TDB_INTERNAL;
 		argc--;
@@ -68,6 +110,11 @@ int main(int argc, char *argv[])
 		argc--;
 		argv++;
 	}
+	if (argv[1] && strcmp(argv[1], "--stats") == 0) {
+		seed.base.next = &stats;
+		argc--;
+		argv++;
+	}
 
 	tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR|O_CREAT|O_TRUNC,
 		       0600, &seed);
@@ -105,6 +152,9 @@ int main(int argc, char *argv[])
 		errx(1, "committing transaction: %s", tdb_errorstr(tdb));
 	printf(" %zu ns (%zu bytes)\n",
 	       normalize(&start, &stop, num), file_size());
+
+	if (seed.base.next)
+		dump_and_clear_stats(&stats.stats);
 	if (++stage == stopat)
 		exit(0);
 
@@ -126,6 +176,8 @@ int main(int argc, char *argv[])
 		errx(1, "committing transaction: %s", tdb_errorstr(tdb));
 	printf(" %zu ns (%zu bytes)\n",
 	       normalize(&start, &stop, num), file_size());
+	if (seed.base.next)
+		dump_and_clear_stats(&stats.stats);
 	if (++stage == stopat)
 		exit(0);
 
@@ -146,6 +198,8 @@ int main(int argc, char *argv[])
 		errx(1, "committing transaction: %s", tdb_errorstr(tdb));
 	printf(" %zu ns (%zu bytes)\n",
 	       normalize(&start, &stop, num), file_size());
+	if (seed.base.next)
+		dump_and_clear_stats(&stats.stats);
 	if (++stage == stopat)
 		exit(0);
 
@@ -165,6 +219,8 @@ int main(int argc, char *argv[])
 		errx(1, "committing transaction: %s", tdb_errorstr(tdb));
 	printf(" %zu ns (%zu bytes)\n",
 	       normalize(&start, &stop, num), file_size());
+	if (seed.base.next)
+		dump_and_clear_stats(&stats.stats);
 	if (++stage == stopat)
 		exit(0);
 
@@ -185,6 +241,8 @@ int main(int argc, char *argv[])
 		errx(1, "committing transaction: %s", tdb_errorstr(tdb));
 	printf(" %zu ns (%zu bytes)\n",
 	       normalize(&start, &stop, num), file_size());
+	if (seed.base.next)
+		dump_and_clear_stats(&stats.stats);
 	if (++stage == stopat)
 		exit(0);
 
@@ -205,6 +263,8 @@ int main(int argc, char *argv[])
 		errx(1, "committing transaction: %s", tdb_errorstr(tdb));
 	printf(" %zu ns (%zu bytes)\n",
 	       normalize(&start, &stop, num), file_size());
+	if (seed.base.next)
+		dump_and_clear_stats(&stats.stats);
 	if (++stage == stopat)
 		exit(0);
 
@@ -248,5 +308,10 @@ int main(int argc, char *argv[])
 	printf(" %zu ns (%zu bytes)\n",
 	       normalize(&start, &stop, num), file_size());
 
+	if (seed.base.next)
+		dump_and_clear_stats(&stats.stats);
+	if (++stage == stopat)
+		exit(0);
+
 	return 0;
 }

+ 1 - 0
ccan/tdb2/transaction.c

@@ -621,6 +621,7 @@ static int tdb_recovery_allocate(struct tdb_context *tdb,
 	   us an area that is being currently used (as of the start of
 	   the transaction) */
 	if (recovery_head != 0) {
+		add_stat(tdb, frees, 1);
 		if (add_free_record(tdb, recovery_head,
 				    sizeof(rec) + rec.max_len) != 0) {
 			tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv,