Browse Source

First cut of replay_trace for tdb.

Rusty Russell 16 years ago
parent
commit
6d35d746f1

+ 2 - 2
ccan/tdb/open.c

@@ -379,9 +379,9 @@ int tdb_close(struct tdb_context *tdb)
 	struct tdb_context **i;
 	int ret = 0;
 
-	tdb_trace(tdb, "tdb_close");
+	tdb_trace(tdb, "tdb_close\n");
 	if (tdb->transaction) {
-		tdb_transaction_cancel(tdb);
+		tdb_transaction_cancel_internal(tdb);
 	}
 
 	if (tdb->map_ptr) {

+ 9 - 9
ccan/tdb/tdb.c

@@ -65,8 +65,6 @@ static void tdb_increment_seqnum(struct tdb_context *tdb)
 		return;
 	}
 
-	tdb_trace(tdb, "tdb_increment_seqnum");
-
 	tdb_increment_seqnum_nonblock(tdb);
 
 	tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
@@ -417,7 +415,7 @@ int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
 	ret = tdb_delete_hash(tdb, key, hash);
 	tdb_trace(tdb, "tdb_delete ");
 	tdb_trace_record(tdb, key);
-	tdb_trace(tdb, "= %i\n", ret); 
+	tdb_trace(tdb, "= %s\n", ret ? "ENOENT" : "0"); 
 	return ret;
 }
 
@@ -693,8 +691,8 @@ int tdb_get_seqnum(struct tdb_context *tdb)
 {
 	tdb_off_t seqnum=0;
 
-	tdb_trace(tdb, "tdb_get_seqnum\n");
 	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
+	tdb_trace(tdb, "tdb_get_seqnum = %u\n", seqnum);
 	return seqnum;
 }
 
@@ -857,23 +855,25 @@ void tdb_trace(const struct tdb_context *tdb, const char *fmt, ...)
 {
 	char msg[256];
 	va_list args;
-	int len;
+	int len, err;
 
 	va_start(args, fmt);
 	len = vsprintf(msg, fmt, args);
 	va_end(args);
 
-	write(tdb->tracefd, msg, len);
+	err = write(tdb->tracefd, msg, len);
 }
 
 void tdb_trace_record(const struct tdb_context *tdb, TDB_DATA rec)
 {
 	char msg[20];
 	unsigned int i;
+	int err;
 
-	write(tdb->tracefd, msg, sprintf(msg, "%zu:", rec.dsize));
+	err = write(tdb->tracefd, msg, sprintf(msg, "%zu:", rec.dsize));
 	for (i = 0; i < rec.dsize; i++)
-		write(tdb->tracefd, msg, sprintf(msg, "%02x", rec.dptr[i]));
-	write(tdb->tracefd, " ", 1);
+		err += write(tdb->tracefd, msg, sprintf(msg, "%02x",
+							rec.dptr[i]));
+	err += write(tdb->tracefd, " ", 1);
 }
 #endif

+ 3 - 0
ccan/tdb/tdb_private.h

@@ -49,6 +49,8 @@
 #endif
 #include "tdb.h"
 
+/* #define TDB_TRACE 1 */
+
 #if HAVE_GETPAGESIZE
 #define getpagesize() 0x2000
 #endif
@@ -225,6 +227,7 @@ int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
 int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off);
 int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off);
+int tdb_transaction_cancel_internal(struct tdb_context *tdb);
 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec);
 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec);
 int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct list_struct *rec);

+ 8 - 0
ccan/tdb/tools/Makefile

@@ -0,0 +1,8 @@
+LDLIBS:=-lccan
+CFLAGS:=-I../../.. -Wall -g -pg -O3
+LDFLAGS:=-L../../..
+
+default: replay_trace tdbtorture
+
+clean:
+	rm -f replay_trace tdbtorture *.o

+ 509 - 0
ccan/tdb/tools/replay_trace.c

@@ -0,0 +1,509 @@
+#include <ccan/tdb/tdb.h>
+#include <ccan/grab_file/grab_file.h>
+#include <ccan/talloc/talloc.h>
+#include <ccan/str_talloc/str_talloc.h>
+#include <ccan/str/str.h>
+#include <err.h>
+#include <ctype.h>
+#include <sys/time.h>
+
+#define STRINGIFY2(x) #x
+#define STRINGIFY(x) STRINGIFY2(x)
+
+/* Try or die. */
+#define try(expr, op)							\
+	do {								\
+		int ret = (expr);					\
+		if (ret < 0) {						\
+			if (tdb_error(tdb) != -op.ret)			\
+				errx(1, "Line %u: " STRINGIFY(expr)	\
+				     "= %i: %s",			\
+				     i+1, ret, tdb_errorstr(tdb));	\
+		} else if (ret != op.ret)				\
+			errx(1, "Line %u: " STRINGIFY(expr) "= %i: %s",	\
+			     i+1, ret, tdb_errorstr(tdb));		\
+	} while (0)
+
+/* Try or imitate results. */
+#define unreliable(expr, expect, force, undo)				\
+	do {								\
+		int ret = expr;						\
+		if (ret != expect) {					\
+			warnx("Line %u: %s gave %i not %i",		\
+			      i+1, STRINGIFY(expr), ret, expect);	\
+			if (expect == 0)				\
+				force;					\
+			else						\
+				undo;					\
+		}							\
+	} while (0)
+
+enum op_type {
+	OP_TDB_LOCKALL,
+	OP_TDB_LOCKALL_MARK,
+	OP_TDB_LOCKALL_UNMARK,
+	OP_TDB_LOCKALL_NONBLOCK,
+	OP_TDB_UNLOCKALL,
+	OP_TDB_LOCKALL_READ,
+	OP_TDB_LOCKALL_READ_NONBLOCK,
+	OP_TDB_UNLOCKALL_READ,
+	OP_TDB_CHAINLOCK,
+	OP_TDB_CHAINLOCK_NONBLOCK,
+	OP_TDB_CHAINLOCK_MARK,
+	OP_TDB_CHAINLOCK_UNMARK,
+	OP_TDB_CHAINUNLOCK,
+	OP_TDB_CHAINLOCK_READ,
+	OP_TDB_CHAINUNLOCK_READ,
+	OP_TDB_INCREMENT_SEQNUM_NONBLOCK,
+	OP_TDB_PARSE_RECORD,
+	OP_TDB_EXISTS,
+	OP_TDB_STORE,
+	OP_TDB_APPEND,
+	OP_TDB_GET_SEQNUM,
+	OP_TDB_WIPE_ALL,
+	OP_TDB_TRANSACTION_START,
+	OP_TDB_TRANSACTION_CANCEL,
+	OP_TDB_TRANSACTION_COMMIT,
+	OP_TDB_TRAVERSE_READ_START,
+	OP_TDB_TRAVERSE_START,
+	OP_TDB_TRAVERSE_END,
+	OP_TDB_TRAVERSE,
+	OP_TDB_FIRSTKEY,
+	OP_TDB_NEXTKEY,
+	OP_TDB_FETCH,
+	OP_TDB_DELETE,
+	OP_TDB_CLOSE,
+};
+
+struct op {
+	enum op_type op;
+	TDB_DATA key;
+	TDB_DATA data;
+	int flag;
+	int ret;
+};
+
+static unsigned char hex_char(unsigned int line, char c)
+{
+	c = toupper(c);
+	if (c >= 'A' && c <= 'F')
+		return c - 'A' + 10;
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	errx(1, "Line %u: invalid hex character '%c'", line, c);
+}
+
+/* TDB data is <size>:<%02x>* */
+static TDB_DATA make_tdb_data(const void *ctx,
+			      unsigned int line, const char *word)
+{
+	TDB_DATA data;
+	unsigned int i;
+	const char *p;
+
+	data.dsize = atoi(word);
+	data.dptr = talloc_array(ctx, unsigned char, data.dsize);
+	p = strchr(word, ':');
+	if (!p)
+		errx(1, "Line %u: Invalid tdb data '%s'", line, word);
+	p++;
+	for (i = 0; i < data.dsize; i++)
+		data.dptr[i] = hex_char(line, p[i*2])*16
+			+ hex_char(line, p[i*2+1]);
+	return data;
+}
+
+static void add_op(struct op **op, unsigned int i,
+		   enum op_type type, const char *key, const char *data,
+		   int flag, int ret)
+{
+	struct op *new;
+	*op = talloc_realloc(NULL, *op, struct op, i+1);
+	new = (*op) + i;
+	new->op = type;
+	new->flag = flag;
+	new->ret = ret;
+	if (key)
+		new->key = make_tdb_data(*op, i+1, key);
+	else
+		new->key = tdb_null;
+	if (data)
+		new->data = make_tdb_data(*op, i+1, data);
+	else
+		new->data = tdb_null;
+}
+
+static int get_len(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+	return data.dsize;
+}
+
+struct traverse_info {
+	const struct op *op;
+	unsigned int start_line;
+	unsigned int i;
+	unsigned int num;
+};
+
+static unsigned run_ops(struct tdb_context *tdb, const struct op op[],
+			unsigned int start, unsigned int stop);
+
+static int traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
+		    void *_tinfo)
+{
+	struct traverse_info *tinfo = _tinfo;
+
+	if (tinfo->i == tinfo->num)
+		errx(1, "Transaction starting line %u did not terminate",
+		     tinfo->start_line);
+
+	if (tinfo->op[tinfo->i].op != OP_TDB_TRAVERSE)
+		errx(1, "Transaction starting line %u terminatd early",
+		     tinfo->start_line);
+
+	/* Check we have right key. */
+	if (key.dsize != tinfo->op[tinfo->i].key.dsize
+	    || memcmp(key.dptr, tinfo->op[tinfo->i].key.dptr, key.dsize))
+		errx(1, "Line %u: bad traverse key", tinfo->i+1);
+	if (data.dsize != tinfo->op[tinfo->i].data.dsize
+	    || memcmp(data.dptr, tinfo->op[tinfo->i].data.dptr, data.dsize))
+		errx(1, "Line %u: bad traverse data", tinfo->i+1);
+
+	tinfo->i++;
+	/* Run any normal ops. */
+	tinfo->i = run_ops(tdb, tinfo->op, tinfo->i, tinfo->num);
+
+	if (tinfo->op[tinfo->i].op == OP_TDB_TRAVERSE_END)
+		return 1;
+	return 0;
+}
+
+static unsigned op_traverse(struct tdb_context *tdb,
+			    const struct op op[],
+			    unsigned int line,
+			    unsigned int num)
+{
+	struct traverse_info tinfo = { op, line, line, num };
+
+	tdb_traverse(tdb, traverse, &tinfo);
+	if (tinfo.i < num && op[tinfo.i].op != OP_TDB_TRAVERSE_END)
+		errx(1, "%u: Short traversal", line);
+	return tinfo.i;
+}
+
+static unsigned op_read_traverse(struct tdb_context *tdb,
+				 const struct op op[],
+				 unsigned int line,
+				 unsigned int num)
+{
+	struct traverse_info tinfo = { op, line, line, num };
+
+	tdb_traverse_read(tdb, traverse, &tinfo);
+	if (tinfo.i < num && op[tinfo.i].op != OP_TDB_TRAVERSE_END)
+		errx(1, "%u: Short traversal", line);
+	return tinfo.i;
+}
+
+static __attribute__((noinline))
+unsigned run_ops(struct tdb_context *tdb, const struct op op[],
+			unsigned int start, unsigned int stop)
+{
+	unsigned int i;
+	TDB_DATA data;
+
+	for (i = start; i < stop; i++) {
+		switch (op[i].op) {
+		case OP_TDB_LOCKALL:
+			try(tdb_lockall(tdb), op[i]);
+			break;
+		case OP_TDB_LOCKALL_MARK:
+			try(tdb_lockall_mark(tdb), op[i]);
+			break;
+		case OP_TDB_LOCKALL_UNMARK:
+			try(tdb_lockall_unmark(tdb), op[i]);
+			break;
+		case OP_TDB_LOCKALL_NONBLOCK:
+			unreliable(tdb_lockall_nonblock(tdb), op[i].ret,
+				   tdb_lockall(tdb), tdb_unlockall(tdb));
+			break;
+		case OP_TDB_UNLOCKALL:
+			try(tdb_unlockall(tdb), op[i]);
+			break;
+		case OP_TDB_LOCKALL_READ:
+			try(tdb_lockall_read(tdb), op[i]);
+			break;
+		case OP_TDB_LOCKALL_READ_NONBLOCK:
+			unreliable(tdb_lockall_read_nonblock(tdb), op[i].ret,
+				   tdb_lockall_read(tdb),
+				   tdb_unlockall_read(tdb));
+			break;
+		case OP_TDB_UNLOCKALL_READ:
+			try(tdb_unlockall_read(tdb), op[i]);
+			break;
+		case OP_TDB_CHAINLOCK:
+			try(tdb_chainlock(tdb, op[i].key), op[i]);
+			break;
+		case OP_TDB_CHAINLOCK_NONBLOCK:
+			unreliable(tdb_chainlock_nonblock(tdb, op[i].key),
+				   op[i].ret,
+				   tdb_chainlock(tdb, op[i].key),
+				   tdb_chainunlock(tdb, op[i].key));
+			break;
+		case OP_TDB_CHAINLOCK_MARK:
+			try(tdb_chainlock_mark(tdb, op[i].key), op[i]);
+			break;
+		case OP_TDB_CHAINLOCK_UNMARK:
+			try(tdb_chainlock_unmark(tdb, op[i].key), op[i]);
+			break;
+		case OP_TDB_CHAINUNLOCK:
+			try(tdb_chainunlock(tdb, op[i].key), op[i]);
+			break;
+		case OP_TDB_CHAINLOCK_READ:
+			try(tdb_chainlock_read(tdb, op[i].key), op[i]);
+			break;
+		case OP_TDB_CHAINUNLOCK_READ:
+			try(tdb_chainunlock_read(tdb, op[i].key), op[i]);
+			break;
+		case OP_TDB_INCREMENT_SEQNUM_NONBLOCK:
+			tdb_increment_seqnum_nonblock(tdb);
+			break;
+		case OP_TDB_PARSE_RECORD:
+			try(tdb_parse_record(tdb, op[i].key, get_len, NULL), op[i]);
+			break;
+		case OP_TDB_EXISTS:
+			try(tdb_exists(tdb, op[i].key), op[i]);
+			break;
+		case OP_TDB_STORE:
+			try(tdb_store(tdb, op[i].key, op[i].data, op[i].flag), op[i]);
+			break;
+		case OP_TDB_APPEND:
+			try(tdb_append(tdb, op[i].key, op[i].data), op[i]);
+			break;
+		case OP_TDB_GET_SEQNUM:
+			try(tdb_get_seqnum(tdb), op[i]);
+			break;
+		case OP_TDB_WIPE_ALL:
+			try(tdb_wipe_all(tdb), op[i]);
+			break;
+		case OP_TDB_TRANSACTION_START:
+			try(tdb_transaction_start(tdb), op[i]);
+			break;
+		case OP_TDB_TRANSACTION_CANCEL:
+			try(tdb_transaction_cancel(tdb), op[i]);
+			break;
+		case OP_TDB_TRANSACTION_COMMIT:
+			try(tdb_transaction_commit(tdb), op[i]);
+			break;
+		case OP_TDB_TRAVERSE_READ_START:
+			i = op_read_traverse(tdb, op, i+1, stop);
+			break;
+		case OP_TDB_TRAVERSE_START:
+			i = op_traverse(tdb, op, i+1, stop);
+			break;
+		case OP_TDB_TRAVERSE:
+		case OP_TDB_TRAVERSE_END:
+			/* Either of these mean we're in a traversal,
+			 * finished this iteration. */
+			return i;
+		case OP_TDB_FIRSTKEY:
+			data = tdb_firstkey(tdb);
+			if (data.dsize != op[i].data.dsize
+			    || memcmp(data.dptr, op[i].data.dptr, data.dsize))
+				errx(1, "Line %u: bad firstkey", i+1);
+			break;
+		case OP_TDB_NEXTKEY:
+			data = tdb_nextkey(tdb, op[i].key);
+			if (data.dsize != op[i].data.dsize
+			    || memcmp(data.dptr, op[i].data.dptr, data.dsize))
+				errx(1, "Line %u: bad nextkey", i+1);
+			break;
+		case OP_TDB_FETCH:
+			data = tdb_fetch(tdb, op[i].key);
+			if (data.dsize != op[i].data.dsize
+			    || memcmp(data.dptr, op[i].data.dptr, data.dsize))
+				errx(1, "Line %u: bad fetch", i+1);
+			break;
+		case OP_TDB_DELETE:
+			try(tdb_delete(tdb, op[i].key), op[i]);
+			break;
+		case OP_TDB_CLOSE:
+			errx(1, "Line %u: unexpected close", i+1);
+			break;
+		}
+	}
+	return i;
+}
+
+int main(int argc, char *argv[])
+{
+	const char *file;
+	char **lines;
+	unsigned int i;
+	struct tdb_context *tdb = NULL;
+	struct op *op = talloc_array(NULL, struct op, 1);
+	struct timeval start, end;
+
+	if (argc != 3)
+		errx(1, "Usage: %s <tracefile> <tdbfile>", argv[0]);
+
+	file = grab_file(NULL, argv[1], NULL);
+	if (!file)
+		err(1, "Reading %s", argv[1]);
+
+	lines = strsplit(file, file, "\n", NULL);
+
+	for (i = 0; lines[i]; i++) {
+		char **words = strsplit(lines, lines[i], " ", NULL);
+		if (!tdb && !streq(words[0], "tdb_open"))
+			errx(1, "Line %u is not tdb_open", i+1);
+
+		if (streq(words[0], "tdb_open")) {
+			if (tdb)
+				errx(1, "Line %u: tdb_open again?", i+1);
+			tdb = tdb_open(argv[2], atoi(words[2]),
+				       strtoul(words[3], NULL, 0),
+				       strtoul(words[4], NULL, 0), 0600);
+			if (!tdb)
+				err(1, "Opening tdb %s", argv[2]);
+		} else if (streq(words[0], "tdb_lockall")) {
+			add_op(&op, i, OP_TDB_LOCKALL, NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_lockall_mark")) {
+			add_op(&op, i, OP_TDB_LOCKALL_MARK, NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_lockall_unmark")) {
+			add_op(&op, i, OP_TDB_LOCKALL_UNMARK, NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_lockall_nonblock")) {
+			add_op(&op, i, OP_TDB_LOCKALL_NONBLOCK, NULL, NULL, 0,
+			       atoi(words[1]));
+		} else if (streq(words[0], "tdb_unlockall")) {
+			add_op(&op, i, OP_TDB_UNLOCKALL, NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_lockall_read")) {
+			add_op(&op, i, OP_TDB_LOCKALL_READ, NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_lockall_read_nonblock")) {
+			add_op(&op, i, OP_TDB_LOCKALL_READ_NONBLOCK, NULL, NULL,
+			       0, atoi(words[1]));
+		} else if (streq(words[0], "tdb_unlockall_read\n")) {
+			add_op(&op, i, OP_TDB_UNLOCKALL_READ, NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_chainlock")) {
+			add_op(&op, i, OP_TDB_CHAINLOCK, words[1], NULL, 0, 0);
+		} else if (streq(words[0], "tdb_chainlock_nonblock")) {
+			add_op(&op, i, OP_TDB_CHAINLOCK_NONBLOCK,
+			       words[1], NULL, 0, atoi(words[3]));
+		} else if (streq(words[0], "tdb_chainlock_mark")) {
+			add_op(&op, i, OP_TDB_CHAINLOCK_MARK, words[1], NULL,
+			       0, 0);
+		} else if (streq(words[0], "tdb_chainlock_unmark")) {
+			add_op(&op, i, OP_TDB_CHAINLOCK_UNMARK, words[1], NULL,
+			       0, 0);
+		} else if (streq(words[0], "tdb_chainunlock")) {
+			add_op(&op, i, OP_TDB_CHAINUNLOCK, words[1], NULL,
+			       0, 0);
+		} else if (streq(words[0], "tdb_chainlock_read")) {
+			add_op(&op, i, OP_TDB_CHAINLOCK_READ, words[1],
+			       NULL, 0, 0);
+		} else if (streq(words[0], "tdb_chainunlock_read")) {
+			add_op(&op, i, OP_TDB_CHAINUNLOCK_READ, words[1],
+			       NULL, 0, 0);
+		} else if (streq(words[0], "tdb_close")) {
+			add_op(&op, i, OP_TDB_CLOSE, NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_increment_seqnum_nonblock")) {
+			add_op(&op, i, OP_TDB_INCREMENT_SEQNUM_NONBLOCK,
+			       NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_fetch")) {
+			if (streq(words[3], "ENOENT"))
+				add_op(&op, i, OP_TDB_FETCH, words[1], NULL, 0,
+				       -TDB_ERR_NOEXIST);
+			else
+				add_op(&op, i, OP_TDB_FETCH, words[1], words[3],
+				       0, 0);
+		} else if (streq(words[0], "tdb_parse_record")) {
+			if (streq(words[3], "ENOENT"))
+				add_op(&op, i, OP_TDB_PARSE_RECORD,
+				       words[1], NULL, 0, -TDB_ERR_NOEXIST);
+			else
+				add_op(&op, i, OP_TDB_PARSE_RECORD,
+				       words[1], NULL, 0, atoi(words[3]));
+		} else if (streq(words[0], "tdb_exists")) {
+			add_op(&op, i, OP_TDB_EXISTS, words[1], NULL, 0,
+			       atoi(words[3]));
+		} else if (streq(words[0], "tdb_delete")) {
+			add_op(&op, i, OP_TDB_DELETE, words[1], NULL, 0,
+			       streq(words[3], "ENOENT")
+			       ? -TDB_ERR_NOEXIST : 0);
+		} else if (streq(words[0], "tdb_store")) {
+			int flag;
+
+			if (streq(words[1], "insert"))
+				flag = TDB_INSERT;
+			else if (streq(words[1], "modify"))
+				flag = TDB_MODIFY;
+			else if (streq(words[1], "normal"))
+				flag = 0;
+			else
+				errx(1, "Line %u: invalid tdb_store", i+1);
+
+			if (streq(words[5], "EEXIST"))
+				add_op(&op, i, OP_TDB_STORE, words[2], words[3],
+				       flag, -TDB_ERR_EXISTS);
+			else if (streq(words[5], "ENOENT"))
+				add_op(&op, i, OP_TDB_STORE, words[2], words[3],
+				       flag, -TDB_ERR_NOEXIST);
+			else
+				add_op(&op, i, OP_TDB_STORE, words[2], words[3],
+				       flag, 0);
+		} else if (streq(words[0], "tdb_append")) {
+			add_op(&op, i, OP_TDB_STORE, words[1], words[2], 0, 0);
+		} else if (streq(words[0], "tdb_get_seqnum")) {
+			add_op(&op, i, OP_TDB_GET_SEQNUM, NULL, NULL, 0,
+			       atoi(words[2]));
+		} else if (streq(words[0], "tdb_wipe_all")) {
+			add_op(&op, i, OP_TDB_WIPE_ALL, NULL, NULL, 0, 0);
+		} else if (streq(words[0], "tdb_transaction_start")) {
+			add_op(&op, i, OP_TDB_TRANSACTION_START, NULL, NULL,
+			       0, 0);
+		} else if (streq(words[0], "tdb_transaction_cancel")) {
+			add_op(&op, i, OP_TDB_TRANSACTION_CANCEL, NULL, NULL,
+			       0, 0);
+		} else if (streq(words[0], "tdb_transaction_commit")) {
+			add_op(&op, i, OP_TDB_TRANSACTION_COMMIT, NULL, NULL,
+			       0, 0);
+		} else if (streq(words[0], "tdb_traverse_read_start")) {
+			add_op(&op, i, OP_TDB_TRAVERSE_READ_START, NULL, NULL,
+			       0, 0);
+		} else if (streq(words[0], "tdb_traverse_start")) {
+			add_op(&op, i, OP_TDB_TRAVERSE_START, NULL, NULL,
+			       0, 0);
+		} else if (streq(words[0], "tdb_traverse_end")) {
+			add_op(&op, i, OP_TDB_TRAVERSE_END, NULL, NULL,
+			       0, atoi(words[2]));
+		} else if (streq(words[0], "traverse")) {
+			add_op(&op, i, OP_TDB_TRAVERSE, words[1], words[2],
+			       0, 0);
+		} else if (streq(words[0], "tdb_firstkey")) {
+			if (streq(words[2], "ENOENT"))
+				add_op(&op, i, OP_TDB_FIRSTKEY, NULL, NULL,
+				       0, -TDB_ERR_NOEXIST);
+			else
+				add_op(&op, i, OP_TDB_FIRSTKEY, NULL, words[2],
+				       0, 0);
+		} else if (streq(words[0], "tdb_nextkey")) {
+			if (streq(words[3], "ENOENT"))
+				add_op(&op, i, OP_TDB_NEXTKEY, words[1], NULL,
+				       0, -TDB_ERR_NOEXIST);
+			else
+				add_op(&op, i, OP_TDB_NEXTKEY,
+				       words[1], words[3], 0, 0);
+		} else
+			errx(1, "Line %u: unknown op '%s'", i+1, words[0]);
+	}
+
+	gettimeofday(&start, NULL);
+	run_ops(tdb, op, 1, i-1);
+	gettimeofday(&end, NULL);
+	if (op[i-1].op != OP_TDB_CLOSE)
+		warnx("Last operation is not tdb_close: incomplete?");
+	tdb_close(tdb);
+	end.tv_sec -= start.tv_sec;
+	printf("Time replaying: %lu usec\n",
+	       end.tv_sec * 1000000UL + (end.tv_usec - start.tv_usec));
+	exit(0);
+}

+ 331 - 0
ccan/tdb/tools/tdbtorture.c

@@ -0,0 +1,331 @@
+/* this tests tdb by doing lots of ops from several simultaneous
+   writers - that stresses the locking code. 
+*/
+
+#include <ccan/tdb/tdb.h>
+#include <stdlib.h>
+#include <err.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <time.h>
+#include <sys/wait.h>
+
+#define REOPEN_PROB 30
+#define DELETE_PROB 8
+#define STORE_PROB 4
+#define APPEND_PROB 6
+#define TRANSACTION_PROB 10
+#define TRANSACTION_PREPARE_PROB 2
+#define LOCKSTORE_PROB 5
+#define TRAVERSE_PROB 20
+#define TRAVERSE_READ_PROB 20
+#define CULL_PROB 100
+#define KEYLEN 3
+#define DATALEN 100
+
+static struct tdb_context *db;
+static int in_transaction;
+static int error_count;
+
+#ifdef PRINTF_ATTRIBUTE
+static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4);
+#endif
+static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...)
+{
+	va_list ap;
+    
+	error_count++;
+
+	va_start(ap, format);
+	vfprintf(stdout, format, ap);
+	va_end(ap);
+	fflush(stdout);
+#if 0
+	{
+		char *ptr;
+		asprintf(&ptr,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid());
+		system(ptr);
+		free(ptr);
+	}
+#endif	
+}
+
+static void fatal(const char *why)
+{
+	perror(why);
+	error_count++;
+}
+
+static char *randbuf(int len)
+{
+	char *buf;
+	int i;
+	buf = (char *)malloc(len+1);
+
+	for (i=0;i<len;i++) {
+		buf[i] = 'a' + (rand() % 26);
+	}
+	buf[i] = 0;
+	return buf;
+}
+
+static int cull_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
+			 void *state)
+{
+#if CULL_PROB
+	if (random() % CULL_PROB == 0) {
+		tdb_delete(tdb, key);
+	}
+#endif
+	return 0;
+}
+
+static void addrec_db(void)
+{
+	int klen, dlen;
+	char *k, *d;
+	TDB_DATA key, data;
+
+	klen = 1 + (rand() % KEYLEN);
+	dlen = 1 + (rand() % DATALEN);
+
+	k = randbuf(klen);
+	d = randbuf(dlen);
+
+	key.dptr = (unsigned char *)k;
+	key.dsize = klen+1;
+
+	data.dptr = (unsigned char *)d;
+	data.dsize = dlen+1;
+
+#if TRANSACTION_PROB
+	if (in_transaction == 0 && random() % TRANSACTION_PROB == 0) {
+		if (tdb_transaction_start(db) != 0) {
+			fatal("tdb_transaction_start failed");
+		}
+		in_transaction++;
+		goto next;
+	}
+	if (in_transaction && random() % TRANSACTION_PROB == 0) {
+#if 0
+		if (random() % TRANSACTION_PREPARE_PROB == 0) {
+			if (tdb_transaction_prepare_commit(db) != 0) {
+				fatal("tdb_transaction_prepare_commit failed");
+			}
+		}
+#endif
+		if (tdb_transaction_commit(db) != 0) {
+			fatal("tdb_transaction_commit failed");
+		}
+		in_transaction--;
+		goto next;
+	}
+	if (in_transaction && random() % TRANSACTION_PROB == 0) {
+		if (tdb_transaction_cancel(db) != 0) {
+			fatal("tdb_transaction_cancel failed");
+		}
+		in_transaction--;
+		goto next;
+	}
+#endif
+
+#if REOPEN_PROB
+	if (in_transaction == 0 && random() % REOPEN_PROB == 0) {
+		tdb_reopen_all(0);
+		goto next;
+	} 
+#endif
+
+#if DELETE_PROB
+	if (random() % DELETE_PROB == 0) {
+		tdb_delete(db, key);
+		goto next;
+	}
+#endif
+
+#if STORE_PROB
+	if (random() % STORE_PROB == 0) {
+		if (tdb_store(db, key, data, TDB_REPLACE) != 0) {
+			fatal("tdb_store failed");
+		}
+		goto next;
+	}
+#endif
+
+#if APPEND_PROB
+	if (random() % APPEND_PROB == 0) {
+		if (tdb_append(db, key, data) != 0) {
+			fatal("tdb_append failed");
+		}
+		goto next;
+	}
+#endif
+
+#if LOCKSTORE_PROB
+	if (random() % LOCKSTORE_PROB == 0) {
+		tdb_chainlock(db, key);
+		data = tdb_fetch(db, key);
+		if (tdb_store(db, key, data, TDB_REPLACE) != 0) {
+			fatal("tdb_store failed");
+		}
+		if (data.dptr) free(data.dptr);
+		tdb_chainunlock(db, key);
+		goto next;
+	} 
+#endif
+
+#if TRAVERSE_PROB
+	if (random() % TRAVERSE_PROB == 0) {
+		tdb_traverse(db, cull_traverse, NULL);
+		goto next;
+	}
+#endif
+
+#if TRAVERSE_READ_PROB
+	if (random() % TRAVERSE_READ_PROB == 0) {
+		tdb_traverse_read(db, NULL, NULL);
+		goto next;
+	}
+#endif
+
+	data = tdb_fetch(db, key);
+	if (data.dptr) free(data.dptr);
+
+next:
+	free(k);
+	free(d);
+}
+
+static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
+                       void *state)
+{
+	tdb_delete(tdb, key);
+	return 0;
+}
+
+static void usage(void)
+{
+	printf("Usage: tdbtorture [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n");
+	exit(0);
+}
+
+int main(int argc, char * const *argv)
+{
+	int i, seed = -1;
+	int num_procs = 3;
+	int num_loops = 5000;
+	int hash_size = 2;
+	int c;
+	extern char *optarg;
+	pid_t *pids;
+
+	struct tdb_logging_context log_ctx;
+	log_ctx.log_fn = tdb_log;
+
+	while ((c = getopt(argc, argv, "n:l:s:H:h")) != -1) {
+		switch (c) {
+		case 'n':
+			num_procs = strtol(optarg, NULL, 0);
+			break;
+		case 'l':
+			num_loops = strtol(optarg, NULL, 0);
+			break;
+		case 'H':
+			hash_size = strtol(optarg, NULL, 0);
+			break;
+		case 's':
+			seed = strtol(optarg, NULL, 0);
+			break;
+		default:
+			usage();
+		}
+	}
+
+	unlink("torture.tdb");
+
+	pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
+	pids[0] = getpid();
+
+	for (i=0;i<num_procs-1;i++) {
+		if ((pids[i+1]=fork()) == 0) break;
+	}
+
+	db = tdb_open_ex("torture.tdb", hash_size, TDB_CLEAR_IF_FIRST, 
+			 O_RDWR | O_CREAT, 0600, &log_ctx, NULL);
+	if (!db) {
+		fatal("db open failed");
+	}
+
+	if (seed == -1) {
+		seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
+	}
+
+	if (i == 0) {
+		printf("testing with %d processes, %d loops, %d hash_size, seed=%d\n", 
+		       num_procs, num_loops, hash_size, seed);
+	}
+
+	srand(seed + i);
+	srandom(seed + i);
+
+	for (i=0;i<num_loops && error_count == 0;i++) {
+		addrec_db();
+	}
+
+	if (error_count == 0) {
+		tdb_traverse_read(db, NULL, NULL);
+		tdb_traverse(db, traverse_fn, NULL);
+		tdb_traverse(db, traverse_fn, NULL);
+	}
+
+	tdb_close(db);
+
+	if (getpid() != pids[0]) {
+		return error_count;
+	}
+
+	for (i=1;i<num_procs;i++) {
+		int status, j;
+		pid_t pid;
+		if (error_count != 0) {
+			/* try and stop the test on any failure */
+			for (j=1;j<num_procs;j++) {
+				if (pids[j] != 0) {
+					kill(pids[j], SIGTERM);
+				}
+			}
+		}
+		pid = waitpid(-1, &status, 0);
+		if (pid == -1) {
+			perror("failed to wait for child\n");
+			exit(1);
+		}
+		for (j=1;j<num_procs;j++) {
+			if (pids[j] == pid) break;
+		}
+		if (j == num_procs) {
+			printf("unknown child %d exited!?\n", (int)pid);
+			exit(1);
+		}
+		if (WEXITSTATUS(status) != 0) {
+			printf("child %d exited with status %d\n",
+			       (int)pid, WEXITSTATUS(status));
+			error_count++;
+		}
+		pids[j] = 0;
+	}
+
+	free(pids);
+
+	if (error_count == 0) {
+		printf("OK\n");
+	}
+
+	return error_count;
+}

+ 63 - 60
ccan/tdb/transaction.c

@@ -398,6 +398,58 @@ static const struct tdb_methods transaction_methods = {
 	transaction_brlock
 };
 
+int tdb_transaction_cancel_internal(struct tdb_context *tdb)
+{
+	int i;
+
+	if (tdb->transaction == NULL) {
+		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n"));
+		return -1;
+	}
+
+	if (tdb->transaction->nesting != 0) {
+		tdb->transaction->transaction_error = 1;
+		tdb->transaction->nesting--;
+		return 0;
+	}		
+
+	tdb->map_size = tdb->transaction->old_map_size;
+
+	/* free all the transaction blocks */
+	for (i=0;i<tdb->transaction->num_blocks;i++) {
+		if (tdb->transaction->blocks[i] != NULL) {
+			free(tdb->transaction->blocks[i]);
+		}
+	}
+	SAFE_FREE(tdb->transaction->blocks);
+
+	/* remove any global lock created during the transaction */
+	if (tdb->global_lock.count != 0) {
+		tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size);
+		tdb->global_lock.count = 0;
+	}
+
+	/* remove any locks created during the transaction */
+	if (tdb->num_locks != 0) {
+		for (i=0;i<tdb->num_lockrecs;i++) {
+			tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list,
+				   F_UNLCK,F_SETLKW, 0, 1);
+		}
+		tdb->num_locks = 0;
+		tdb->num_lockrecs = 0;
+		SAFE_FREE(tdb->lockrecs);
+	}
+
+	/* restore the normal io methods */
+	tdb->methods = tdb->transaction->io_methods;
+
+	tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
+	tdb_transaction_unlock(tdb);
+	SAFE_FREE(tdb->transaction->hash_heads);
+	SAFE_FREE(tdb->transaction);
+	
+	return 0;
+}
 
 /*
   start a tdb transaction. No token is returned, as only a single
@@ -422,7 +474,7 @@ int tdb_transaction_start(struct tdb_context *tdb)
 				 tdb->transaction->nesting));
 			return 0;
 		} else {
-			tdb_transaction_cancel(tdb);
+			tdb_transaction_cancel_internal(tdb);
 			TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: cancelling previous transaction\n"));
 		}
 	}
@@ -514,58 +566,9 @@ fail:
 */
 int tdb_transaction_cancel(struct tdb_context *tdb)
 {	
-	int i;
-
 	tdb_trace(tdb, "tdb_transaction_cancel\n");
-	if (tdb->transaction == NULL) {
-		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_cancel: no transaction\n"));
-		return -1;
-	}
-
-	if (tdb->transaction->nesting != 0) {
-		tdb->transaction->transaction_error = 1;
-		tdb->transaction->nesting--;
-		return 0;
-	}		
-
-	tdb->map_size = tdb->transaction->old_map_size;
-
-	/* free all the transaction blocks */
-	for (i=0;i<tdb->transaction->num_blocks;i++) {
-		if (tdb->transaction->blocks[i] != NULL) {
-			free(tdb->transaction->blocks[i]);
-		}
-	}
-	SAFE_FREE(tdb->transaction->blocks);
-
-	/* remove any global lock created during the transaction */
-	if (tdb->global_lock.count != 0) {
-		tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 4*tdb->header.hash_size);
-		tdb->global_lock.count = 0;
-	}
-
-	/* remove any locks created during the transaction */
-	if (tdb->num_locks != 0) {
-		for (i=0;i<tdb->num_lockrecs;i++) {
-			tdb_brlock(tdb,FREELIST_TOP+4*tdb->lockrecs[i].list,
-				   F_UNLCK,F_SETLKW, 0, 1);
-		}
-		tdb->num_locks = 0;
-		tdb->num_lockrecs = 0;
-		SAFE_FREE(tdb->lockrecs);
-	}
-
-	/* restore the normal io methods */
-	tdb->methods = tdb->transaction->io_methods;
-
-	tdb_brlock(tdb, FREELIST_TOP, F_UNLCK, F_SETLKW, 0, 0);
-	tdb_transaction_unlock(tdb);
-	SAFE_FREE(tdb->transaction->hash_heads);
-	SAFE_FREE(tdb->transaction);
-	
-	return 0;
+	return tdb_transaction_cancel_internal(tdb);
 }
-
 /*
   sync to disk
 */
@@ -856,7 +859,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 
 	if (tdb->transaction->transaction_error) {
 		tdb->ecode = TDB_ERR_IO;
-		tdb_transaction_cancel(tdb);
+		tdb_transaction_cancel_internal(tdb);
 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: transaction error pending\n"));
 		return -1;
 	}
@@ -869,7 +872,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 
 	/* check for a null transaction */
 	if (tdb->transaction->blocks == NULL) {
-		tdb_transaction_cancel(tdb);
+		tdb_transaction_cancel_internal(tdb);
 		return 0;
 	}
 
@@ -880,7 +883,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 	if (tdb->num_locks || tdb->global_lock.count) {
 		tdb->ecode = TDB_ERR_LOCK;
 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: locks pending on commit\n"));
-		tdb_transaction_cancel(tdb);
+		tdb_transaction_cancel_internal(tdb);
 		return -1;
 	}
 
@@ -888,7 +891,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 	if (tdb_brlock_upgrade(tdb, FREELIST_TOP, 0) == -1) {
 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: failed to upgrade hash locks\n"));
 		tdb->ecode = TDB_ERR_LOCK;
-		tdb_transaction_cancel(tdb);
+		tdb_transaction_cancel_internal(tdb);
 		return -1;
 	}
 
@@ -897,7 +900,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 	if (tdb_brlock(tdb, GLOBAL_LOCK, F_WRLCK, F_SETLKW, 0, 1) == -1) {
 		TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_commit: failed to get global lock\n"));
 		tdb->ecode = TDB_ERR_LOCK;
-		tdb_transaction_cancel(tdb);
+		tdb_transaction_cancel_internal(tdb);
 		return -1;
 	}
 
@@ -906,7 +909,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 		if (transaction_setup_recovery(tdb, &magic_offset) == -1) {
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: failed to setup recovery data\n"));
 			tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
-			tdb_transaction_cancel(tdb);
+			tdb_transaction_cancel_internal(tdb);
 			return -1;
 		}
 	}
@@ -919,7 +922,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 			tdb->ecode = TDB_ERR_IO;
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: expansion failed\n"));
 			tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
-			tdb_transaction_cancel(tdb);
+			tdb_transaction_cancel_internal(tdb);
 			return -1;
 		}
 		tdb->map_size = tdb->transaction->old_map_size;
@@ -950,7 +953,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 			tdb->methods = methods;
 			tdb_transaction_recover(tdb); 
 
-			tdb_transaction_cancel(tdb);
+			tdb_transaction_cancel_internal(tdb);
 			tdb_brlock(tdb, GLOBAL_LOCK, F_UNLCK, F_SETLKW, 0, 1);
 
 			TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_commit: write failed\n"));
@@ -999,7 +1002,7 @@ int tdb_transaction_commit(struct tdb_context *tdb)
 
 	/* use a transaction cancel to free memory and remove the
 	   transaction locks */
-	tdb_transaction_cancel(tdb);
+	tdb_transaction_cancel_internal(tdb);
 
 	return 0;
 }

+ 9 - 2
ccan/tdb/traverse.c

@@ -169,6 +169,11 @@ static int tdb_traverse_internal(struct tdb_context *tdb,
 		dbuf.dptr = key.dptr + rec.key_len;
 		dbuf.dsize = rec.data_len;
 
+		tdb_trace(tdb, "traverse ");
+		tdb_trace_record(tdb, key);
+		tdb_trace_record(tdb, dbuf);
+		tdb_trace(tdb, "\n");
+
 		/* Drop chain lock, call out */
 		if (tdb_unlock(tdb, tl->hash, tl->lock_rw) != 0) {
 			ret = -1;
@@ -212,9 +217,10 @@ int tdb_traverse_read(struct tdb_context *tdb,
 	}
 
 	tdb->traverse_read++;
+	tdb_trace(tdb, "tdb_traverse_read_start\n");
 	ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
+	tdb_trace(tdb, "tdb_traverse_end = %i\n", ret);
 	tdb->traverse_read--;
-	tdb_trace(tdb, "tdb_traverse_read = %i\n", ret);
 
 	tdb_transaction_unlock(tdb);
 
@@ -243,9 +249,10 @@ int tdb_traverse(struct tdb_context *tdb,
 	}
 
 	tdb->traverse_write++;
+	tdb_trace(tdb, "tdb_traverse_start\n");
 	ret = tdb_traverse_internal(tdb, fn, private_data, &tl);
+	tdb_trace(tdb, "tdb_traverse_end = %i\n", ret);
 	tdb->traverse_write--;
-	tdb_trace(tdb, "tdb_traverse = %i\n", ret);
 
 	tdb_transaction_unlock(tdb);