Browse Source

tdb: new test for dying during a transaction.

This demonstrates some serious failings, which get cleaned up in
following patches.
Rusty Russell 16 years ago
parent
commit
b86f8688fe

+ 66 - 12
ccan/tdb/test/external-transaction.c

@@ -9,8 +9,12 @@
 #include <string.h>
 #include <errno.h>
 #include <ccan/tdb/tdb.h>
+#include <ccan/tdb/tdb_private.h>
 #include <ccan/tap/tap.h>
 #include <stdio.h>
+#include <stdarg.h>
+
+static struct tdb_context *tdb;
 
 static volatile sig_atomic_t alarmed;
 static void do_alarm(int signum)
@@ -18,20 +22,61 @@ static void do_alarm(int signum)
 	alarmed++;
 }
 
+static void taplog(struct tdb_context *tdb,
+		   enum tdb_debug_level level,
+		   const char *fmt, ...)
+{
+	va_list ap;
+	char line[200];
+
+	va_start(ap, fmt);
+	vsprintf(line, fmt, ap);
+	va_end(ap);
+
+	diag("external: %s", line);
+}
+
 static int do_operation(enum operation op, const char *name)
 {
+	struct tdb_logging_context logctx = { taplog, NULL };
+
 	TDB_DATA k = { .dptr = (void *)"a", .dsize = 1 };
 	TDB_DATA d = { .dptr = (void *)"b", .dsize = 1 };
-	struct tdb_context *tdb;
 
-	tdb = tdb_open(name, 0, op == OPEN_WITH_CLEAR_IF_FIRST ?
-		       TDB_CLEAR_IF_FIRST : TDB_DEFAULT, O_RDWR, 0);
-	if (!tdb)
-		return -1;
+	if (op <= KEEP_OPENED) {
+		tdb = tdb_open_ex(name, 0, op == OPEN_WITH_CLEAR_IF_FIRST ?
+				  TDB_CLEAR_IF_FIRST : TDB_DEFAULT, O_RDWR, 0,
+				  &logctx, NULL);
+		if (!tdb)
+			return -1;
+	}
 
-	if (op == OPEN || op == OPEN_WITH_CLEAR_IF_FIRST) {
-		tdb_close(tdb);
+	if (op == KEEP_OPENED) {
 		return 0;
+	} else if (op == OPEN || op == OPEN_WITH_CLEAR_IF_FIRST || op == CLOSE) {
+		tdb_close(tdb);
+		tdb = NULL;
+		return 1;
+	} else if (op == STORE_KEEP_OPENED) {
+		if (tdb_store(tdb, k, d, 0) != 0)
+			return -2;
+		return 1;
+	} else if (op == FETCH_KEEP_OPENED) {
+		TDB_DATA ret;
+		ret = tdb_fetch(tdb, k);
+		if (ret.dptr == NULL) {
+			if (tdb_error(tdb) == TDB_ERR_NOEXIST)
+				return 1;
+			return -3;
+		}
+		if (ret.dsize != 1 || *(char *)ret.dptr != 'b')
+			return -4;
+		free(ret.dptr);
+		return 1;
+	} else if (op == CHECK_KEEP_OPENED) {
+		return tdb_check(tdb, NULL, 0) == 0;
+	} else if (op == NEEDS_RECOVERY_KEEP_OPENED) {
+		return tdb_maybe_needs_recovery(tdb);
 	}
 
 	alarmed = 0;
@@ -41,21 +86,29 @@ static int do_operation(enum operation op, const char *name)
 	if (tdb_transaction_start(tdb) != 0)
 		goto maybe_alarmed;
 
+	alarm(0);
 	if (tdb_store(tdb, k, d, 0) != 0) {
 		tdb_transaction_cancel(tdb);
 		tdb_close(tdb);
+		tdb = NULL;
 		return -2;
 	}
 
 	if (tdb_transaction_commit(tdb) == 0) {
 		tdb_delete(tdb, k);
-		tdb_close(tdb);
+		if (op != TRANSACTION_KEEP_OPENED) {
+			tdb_close(tdb);
+			tdb = NULL;
+		}
 		return 1;
 	}
 
 	tdb_delete(tdb, k);
 maybe_alarmed:
-	tdb_close(tdb);
+	if (op != TRANSACTION_KEEP_OPENED) {
+		tdb_close(tdb);
+		tdb = NULL;
+	}
 	if (alarmed)
 		return 0;
 	return -3;
@@ -68,7 +121,7 @@ struct agent {
 /* Do this before doing any tdb stuff.  Return handle, or NULL. */
 struct agent *prepare_external_agent(void)
 {
-	int pid;
+	int pid, ret;
 	int command[2], response[2];
 	struct sigaction act = { .sa_handler = do_alarm };
 	char name[1+PATH_MAX];
@@ -94,7 +147,7 @@ struct agent *prepare_external_agent(void)
 	close(response[0]);
 	sigaction(SIGALRM, &act, NULL);
 
-	while (read(command[0], name, sizeof(name)) != 0) {
+	while ((ret = read(command[0], name, sizeof(name))) > 0) {
 		int result;
 
 		result = do_operation(name[0], name+1);
@@ -102,11 +155,12 @@ struct agent *prepare_external_agent(void)
 		    != sizeof(result))
 			err(1, "Writing response");
 	}
+	diag("external: read %i: %s", ret, strerror(errno));
 	exit(0);
 }
 
 /* Ask the external agent to try to do an operation. */
-bool external_agent_operation(struct agent *agent,
+int external_agent_operation(struct agent *agent,
 			      enum operation op, const char *tdbname)
 {
 	int res;

+ 10 - 3
ccan/tdb/test/external-transaction.h

@@ -6,15 +6,22 @@ enum operation {
 	OPEN,
 	OPEN_WITH_CLEAR_IF_FIRST,
 	TRANSACTION,
+	KEEP_OPENED,
+	TRANSACTION_KEEP_OPENED,
+	FETCH_KEEP_OPENED,
+	STORE_KEEP_OPENED,
+	CHECK_KEEP_OPENED,
+	NEEDS_RECOVERY_KEEP_OPENED,
+	CLOSE,
 };
 
 /* Do this before doing any tdb stuff.  Return handle, or -1. */
 struct agent *prepare_external_agent(void);
 
 /* Ask the external agent to try to do an operation. */
-bool external_agent_operation(struct agent *handle,
-			      enum operation op,
-			      const char *tdbname);
+int external_agent_operation(struct agent *handle,
+			     enum operation op,
+			     const char *tdbname);
 
 /* Ask... */
 void external_agent_operation_start(struct agent *agent,

+ 1 - 0
ccan/tdb/test/run-3G-file.c

@@ -10,6 +10,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>

+ 1 - 0
ccan/tdb/test/run-bad-tdb-header.c

@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>

+ 280 - 0
ccan/tdb/test/run-die-during-transaction.c

@@ -0,0 +1,280 @@
+#define _XOPEN_SOURCE 500
+#include <unistd.h>
+static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset);
+static ssize_t write_check(int fd, const void *buf, size_t count);
+static int fcntl_check(int fd, int cmd, ... /* arg */ );
+static int ftruncate_check(int fd, off_t length);
+
+#define pwrite pwrite_check
+#define write write_check
+#define fcntl fcntl_check
+#define ftruncate ftruncate_check
+
+#include <ccan/tdb/tdb.h>
+#include <ccan/tdb/io.c>
+#include <ccan/tdb/tdb.c>
+#include <ccan/tdb/lock.c>
+#include <ccan/tdb/freelist.c>
+#include <ccan/tdb/traverse.c>
+#include <ccan/tdb/transaction.c>
+#include <ccan/tdb/error.c>
+#include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
+#include <ccan/tap/tap.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <err.h>
+#include <setjmp.h>
+#include "external-transaction.h"
+
+#undef write
+#undef pwrite
+#undef fcntl
+#undef ftruncate
+
+static bool in_transaction;
+static bool suppress_logging;
+static int target, current;
+static jmp_buf jmpbuf;
+#define TEST_DBNAME "/tmp/test7.tdb"
+
+/* We save the locks so we can reaquire them. */
+struct lock {
+	struct lock *next;
+	off_t off;
+	unsigned int len;
+	int type;
+};
+static struct lock *locks;
+
+static void taplog(struct tdb_context *tdb,
+		   enum tdb_debug_level level,
+		   const char *fmt, ...)
+{
+	va_list ap;
+	char line[200];
+
+	if (suppress_logging)
+		return;
+
+	va_start(ap, fmt);
+	vsprintf(line, fmt, ap);
+	va_end(ap);
+
+	diag("%s", line);
+}
+
+static void check_file_contents(int fd)
+{
+	if (current++ == target) {
+		longjmp(jmpbuf, 1);
+	}
+}
+
+static ssize_t pwrite_check(int fd,
+			    const void *buf, size_t count, off_t offset)
+{
+	ssize_t ret;
+
+	if (in_transaction)
+		check_file_contents(fd);
+
+	ret = pwrite(fd, buf, count, offset);
+	if (ret != count)
+		return ret;
+
+	if (in_transaction)
+		check_file_contents(fd);
+	return ret;
+}
+
+static ssize_t write_check(int fd, const void *buf, size_t count)
+{
+	ssize_t ret;
+
+	if (in_transaction)
+		check_file_contents(fd);
+
+	ret = write(fd, buf, count);
+	if (ret != count)
+		return ret;
+
+	if (in_transaction)
+		check_file_contents(fd);
+	return ret;
+}
+
+extern int fcntl(int fd, int cmd, ... /* arg */ );
+
+static int fcntl_check(int fd, int cmd, ... /* arg */ )
+{
+	va_list ap;
+	int ret, arg3;
+	struct flock *fl;
+
+	if (cmd != F_SETLK && cmd != F_SETLKW) {
+		/* This may be totally bogus, but we don't know in general. */
+		va_start(ap, cmd);
+		arg3 = va_arg(ap, int);
+		va_end(ap);
+
+		return fcntl(fd, cmd, arg3);
+	}
+
+	va_start(ap, cmd);
+	fl = va_arg(ap, struct flock *);
+	va_end(ap);
+
+	ret = fcntl(fd, cmd, fl);
+	if (ret == 0) {
+		if (fl->l_type == F_UNLCK) {
+			struct lock **l;
+			struct lock *old = NULL;
+			
+			for (l = &locks; *l; l = &(*l)->next) {
+				if ((*l)->off == fl->l_start
+				    && (*l)->len == fl->l_len) {
+					old = *l;
+					*l = (*l)->next;
+					free(old);
+					break;
+				}
+			}
+			if (!old)
+				errx(1, "Unknown lock");
+		} else {
+			struct lock *new = malloc(sizeof *new);
+			new->off = fl->l_start;
+			new->len = fl->l_len;
+			new->type = fl->l_type;
+			new->next = locks;
+			locks = new;
+		}
+	}
+
+	if (in_transaction && fl->l_type == F_UNLCK)
+		check_file_contents(fd);
+	return ret;
+}
+
+static int ftruncate_check(int fd, off_t length)
+{
+	int ret;
+
+	if (in_transaction)
+		check_file_contents(fd);
+
+	ret = ftruncate(fd, length);
+
+	if (in_transaction)
+		check_file_contents(fd);
+	return ret;
+}
+
+static bool test_death(enum operation op, struct agent *agent)
+{
+	struct tdb_context *tdb;
+	TDB_DATA key, data;
+	struct tdb_logging_context logctx = { taplog, NULL };
+	int needed_recovery = 0;
+
+	current = target = 0;
+reset:
+	if (setjmp(jmpbuf) != 0) {
+		/* We're partway through.  Simulate our death. */
+		close(tdb->fd);
+		in_transaction = false;
+
+		if (external_agent_operation(agent, NEEDS_RECOVERY_KEEP_OPENED,
+					     ""))
+			needed_recovery++;
+
+		if (external_agent_operation(agent, op, "") != 1) {
+			diag("Step %u op failed", current);
+			return false;
+		}
+
+		if (external_agent_operation(agent, NEEDS_RECOVERY_KEEP_OPENED,
+					     "")) {
+			diag("Still needs recovery after step %u", current);
+			return false;
+		}
+
+		if (external_agent_operation(agent, CHECK_KEEP_OPENED, "")
+		    != 1) {
+			diag("Step %u check failed", current);
+			return false;
+		}
+
+		external_agent_operation(agent, CLOSE, "");
+		/* Suppress logging as this tries to use closed fd. */
+		suppress_logging = true;
+		tdb_close(tdb);
+		suppress_logging = false;
+		target++;
+		current = 0;
+		goto reset;
+	}
+
+	unlink(TEST_DBNAME);
+	tdb = tdb_open_ex(TEST_DBNAME, 1024, TDB_NOMMAP,
+			  O_CREAT|O_TRUNC|O_RDWR, 0600, &logctx, NULL);
+
+	if (external_agent_operation(agent, KEEP_OPENED, TEST_DBNAME) != 0)
+		errx(1, "Agent failed to open?");
+
+	if (tdb_transaction_start(tdb) != 0)
+		return false;
+
+	in_transaction = true;
+	key.dsize = strlen("hi");
+	key.dptr = (void *)"hi";
+	data.dptr = (void *)"world";
+	data.dsize = strlen("world");
+
+	if (tdb_store(tdb, key, data, TDB_INSERT) != 0)
+		return false;
+	if (tdb_transaction_commit(tdb) != 0)
+		return false;
+
+	in_transaction = false;
+
+	/* We made it! */
+	diag("Completed %u runs", current);
+	tdb_close(tdb);
+	external_agent_operation(agent, CLOSE, "");
+
+	ok1(needed_recovery);
+	return true;
+}
+
+int main(int argc, char *argv[])
+{
+	enum operation ops[] = { FETCH_KEEP_OPENED,
+				 STORE_KEEP_OPENED,
+				 TRANSACTION_KEEP_OPENED };
+	struct agent *agent;
+	int i;
+
+	plan_tests(6);
+	agent = prepare_external_agent();
+	if (!agent)
+		err(1, "preparing agent");
+
+	/* Nice ourselves down: we can't tell the difference between agent
+	 * blocking on lock, and agent not scheduled. */
+	nice(15);
+
+	for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) {
+		diag("Testing %s after death",
+		     ops[i] == TRANSACTION_KEEP_OPENED ? "transaction"
+		     : ops[i] == FETCH_KEEP_OPENED ? "fetch"
+		     : ops[i] == STORE_KEEP_OPENED ? "store"
+		     : NULL);
+
+		ok1(test_death(ops[i], agent));
+	}
+
+	return exit_status();
+}

+ 1 - 0
ccan/tdb/test/run-endian.c

@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>

+ 1 - 0
ccan/tdb/test/run-nested-transactions.c

@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <stdbool.h>

+ 1 - 0
ccan/tdb/test/run-nested-traverse.c

@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <stdbool.h>

+ 2 - 1
ccan/tdb/test/run-open-during-transaction.c

@@ -19,6 +19,7 @@ static int ftruncate_check(int fd, off_t length);
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <stdbool.h>
@@ -80,7 +81,7 @@ static void check_for_agent(int fd, bool block)
 	if (!external_agent_operation_check(agent, block, &res))
 		return;
 
-	if (res != 0)
+	if (res != 1)
 		err(1, "Agent failed open");
 	agent_pending = false;
 

+ 1 - 0
ccan/tdb/test/run-traverse-in-transaction.c

@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <stdbool.h>

+ 1 - 0
ccan/tdb/test/run-zero-append.c

@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>

+ 1 - 0
ccan/tdb/test/run.c

@@ -8,6 +8,7 @@
 #include <ccan/tdb/transaction.c>
 #include <ccan/tdb/error.c>
 #include <ccan/tdb/open.c>
+#include <ccan/tdb/check.c>
 #include <ccan/tap/tap.h>
 #include <stdlib.h>
 #include <err.h>