Browse Source

Add -k option to tdbtorture, run tdb_check at end.

This patch is made larger by moving the work out to a separate function: with -k, the parent can't run the ops itself but the children must do it.
Rusty Russell 16 years ago
parent
commit
5ebc170189
1 changed files with 148 additions and 62 deletions
  1. 148 62
      ccan/tdb/tools/tdbtorture.c

+ 148 - 62
ccan/tdb/tools/tdbtorture.c

@@ -36,6 +36,10 @@ static int in_transaction;
 static int in_traverse;
 static int in_traverse;
 static int error_count;
 static int error_count;
 static int always_transaction = 0;
 static int always_transaction = 0;
+static int hash_size = 2;
+static int loopnum;
+static int count_pipe;
+static struct tdb_logging_context log_ctx;
 
 
 #ifdef PRINTF_ATTRIBUTE
 #ifdef PRINTF_ATTRIBUTE
 static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4);
 static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...) PRINTF_ATTRIBUTE(3,4);
@@ -43,16 +47,18 @@ static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const c
 static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...)
 static void tdb_log(struct tdb_context *tdb, enum tdb_debug_level level, const char *format, ...)
 {
 {
 	va_list ap;
 	va_list ap;
-    
-	error_count++;
+
+	if (level != TDB_DEBUG_TRACE)
+		error_count++;
 
 
 	va_start(ap, format);
 	va_start(ap, format);
 	vfprintf(stdout, format, ap);
 	vfprintf(stdout, format, ap);
 	va_end(ap);
 	va_end(ap);
 	fflush(stdout);
 	fflush(stdout);
 #if 0
 #if 0
-	{
+	if (level != TDB_DEBUG_TRACE) {
 		char *ptr;
 		char *ptr;
+		signal(SIGUSR1, SIG_IGN);
 		asprintf(&ptr,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid());
 		asprintf(&ptr,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid());
 		system(ptr);
 		system(ptr);
 		free(ptr);
 		free(ptr);
@@ -232,54 +238,20 @@ static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf,
 
 
 static void usage(void)
 static void usage(void)
 {
 {
-	printf("Usage: tdbtorture [-t] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n");
+	printf("Usage: tdbtorture [-t] [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-H HASH_SIZE]\n");
 	exit(0);
 	exit(0);
 }
 }
 
 
-int main(int argc, char * const *argv)
+static void send_count_and_suicide(int sig)
 {
 {
-	int i, seed = -1;
-	int num_procs = 3;
-	int num_loops = 5000;
-	int hash_size = 2;
-	int c;
-	extern char *optarg;
-	pid_t *pids;
-
-	struct tdb_logging_context log_ctx;
-	log_ctx.log_fn = tdb_log;
-
-	while ((c = getopt(argc, argv, "n:l:s:H:th")) != -1) {
-		switch (c) {
-		case 'n':
-			num_procs = strtol(optarg, NULL, 0);
-			break;
-		case 'l':
-			num_loops = strtol(optarg, NULL, 0);
-			break;
-		case 'H':
-			hash_size = strtol(optarg, NULL, 0);
-			break;
-		case 's':
-			seed = strtol(optarg, NULL, 0);
-			break;
-		case 't':
-			always_transaction = 1;
-			break;
-		default:
-			usage();
-		}
-	}
-
-	unlink("torture.tdb");
-
-	pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
-	pids[0] = getpid();
-
-	for (i=0;i<num_procs-1;i++) {
-		if ((pids[i+1]=fork()) == 0) break;
-	}
+	/* This ensures our successor can continue where we left off. */
+	write(count_pipe, &loopnum, sizeof(loopnum));
+	/* This gives a unique signature. */
+	kill(getpid(), SIGUSR2);
+}
 
 
+static int run_child(int i, int seed, unsigned num_loops, unsigned start)
+{
 	db = tdb_open_ex("torture.tdb", hash_size, TDB_CLEAR_IF_FIRST, 
 	db = tdb_open_ex("torture.tdb", hash_size, TDB_CLEAR_IF_FIRST, 
 			 O_RDWR | O_CREAT, 0600, &log_ctx, NULL);
 			 O_RDWR | O_CREAT, 0600, &log_ctx, NULL);
 	if (!db) {
 	if (!db) {
@@ -290,15 +262,14 @@ int main(int argc, char * const *argv)
 		seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
 		seed = (getpid() + time(NULL)) & 0x7FFFFFFF;
 	}
 	}
 
 
-	if (i == 0) {
-		printf("testing with %d processes, %d loops, %d hash_size, seed=%d%s\n", 
-		       num_procs, num_loops, hash_size, seed, always_transaction ? " (all within transactions)" : "");
-	}
-
 	srand(seed + i);
 	srand(seed + i);
 	srandom(seed + i);
 	srandom(seed + i);
 
 
-	for (i=0;i<num_loops && error_count == 0;i++) {
+	/* Set global, then we're ready to handle being killed. */
+	loopnum = start;
+	signal(SIGUSR1, send_count_and_suicide);
+
+	for (;loopnum<num_loops && error_count == 0;loopnum++) {
 		addrec_db();
 		addrec_db();
 	}
 	}
 
 
@@ -322,44 +293,159 @@ int main(int argc, char * const *argv)
 
 
 	tdb_close(db);
 	tdb_close(db);
 
 
-	if (getpid() != pids[0]) {
-		return error_count;
+	return (error_count < 100 ? error_count : 100);
+}
+
+int main(int argc, char * const *argv)
+{
+	int i, seed = -1;
+	int num_loops = 5000;
+	int num_procs = 3;
+	int c, pfds[2];
+	extern char *optarg;
+	pid_t *pids;
+	int kill_random = 0;
+	int *done;
+
+	log_ctx.log_fn = tdb_log;
+
+	while ((c = getopt(argc, argv, "n:l:s:H:thk")) != -1) {
+		switch (c) {
+		case 'n':
+			num_procs = strtol(optarg, NULL, 0);
+			break;
+		case 'l':
+			num_loops = strtol(optarg, NULL, 0);
+			break;
+		case 'H':
+			hash_size = strtol(optarg, NULL, 0);
+			break;
+		case 's':
+			seed = strtol(optarg, NULL, 0);
+			break;
+		case 't':
+			always_transaction = 1;
+			break;
+		case 'k':
+			kill_random = 1;
+			break;
+		default:
+			usage();
+		}
 	}
 	}
 
 
-	for (i=1;i<num_procs;i++) {
+	unlink("torture.tdb");
+
+	if (num_procs == 1 && !kill_random) {
+		/* Don't fork for this case, makes debugging easier. */
+		error_count = run_child(0, seed, num_loops, 0);
+		goto done;
+	}
+
+	pids = (pid_t *)calloc(sizeof(pid_t), num_procs);
+	done = (int *)calloc(sizeof(int), num_procs);
+
+	if (pipe(pfds) != 0) {
+		perror("Creating pipe");
+		exit(1);
+	}
+	count_pipe = pfds[1];
+
+	for (i=0;i<num_procs;i++) {
+		if ((pids[i]=fork()) == 0) {
+			close(pfds[0]);
+			if (i == 0) {
+				printf("testing with %d processes, %d loops, %d hash_size, seed=%d%s\n", 
+				       num_procs, num_loops, hash_size, seed, always_transaction ? " (all within transactions)" : "");
+			}
+			exit(run_child(i, seed, num_loops, 0));
+		}
+	}
+
+	while (num_procs) {
 		int status, j;
 		int status, j;
 		pid_t pid;
 		pid_t pid;
+
 		if (error_count != 0) {
 		if (error_count != 0) {
 			/* try and stop the test on any failure */
 			/* try and stop the test on any failure */
-			for (j=1;j<num_procs;j++) {
+			for (j=0;j<num_procs;j++) {
 				if (pids[j] != 0) {
 				if (pids[j] != 0) {
 					kill(pids[j], SIGTERM);
 					kill(pids[j], SIGTERM);
 				}
 				}
 			}
 			}
 		}
 		}
-		pid = waitpid(-1, &status, 0);
+
+		pid = waitpid(-1, &status, kill_random ? WNOHANG : 0);
+		if (pid == 0) {
+			struct timespec ts;
+
+			/* Sleep for 1/10 second. */
+			ts.tv_sec = 0;
+			ts.tv_nsec = 100000000;
+			nanosleep(&ts, NULL);
+
+			/* Kill someone. */
+			kill(pids[random() % num_procs], SIGUSR1);
+			continue;
+		}
+
 		if (pid == -1) {
 		if (pid == -1) {
 			perror("failed to wait for child\n");
 			perror("failed to wait for child\n");
 			exit(1);
 			exit(1);
 		}
 		}
-		for (j=1;j<num_procs;j++) {
+
+		for (j=0;j<num_procs;j++) {
 			if (pids[j] == pid) break;
 			if (pids[j] == pid) break;
 		}
 		}
 		if (j == num_procs) {
 		if (j == num_procs) {
 			printf("unknown child %d exited!?\n", (int)pid);
 			printf("unknown child %d exited!?\n", (int)pid);
 			exit(1);
 			exit(1);
 		}
 		}
-		if (WEXITSTATUS(status) != 0) {
-			printf("child %d exited with status %d\n",
-			       (int)pid, WEXITSTATUS(status));
+		if (WIFSIGNALED(status)) {
+			if (WTERMSIG(status) == SIGUSR2
+			    || WTERMSIG(status) == SIGUSR1) {
+				/* SIGUSR2 means they wrote to pipe. */
+				if (WTERMSIG(status) == SIGUSR2) {
+					read(pfds[0], &done[j],
+					     sizeof(done[j]));
+				}
+				pids[j] = fork();
+				if (pids[j] == 0)
+					exit(run_child(j, seed, num_loops,
+						       done[j]));
+				printf("Restarting child %i for %u-%u\n",
+				       j, done[j], num_loops);
+				continue;
+			}
+			printf("child %d exited with signal %d\n",
+			       (int)pid, WTERMSIG(status));
 			error_count++;
 			error_count++;
+		} else {
+			if (WEXITSTATUS(status) != 0) {
+				printf("child %d exited with status %d\n",
+				       (int)pid, WEXITSTATUS(status));
+				error_count++;
+			}
 		}
 		}
-		pids[j] = 0;
+		memmove(&pids[j], &pids[j+1],
+			(num_procs - j - 1)*sizeof(pids[0]));
+		num_procs--;
 	}
 	}
 
 
 	free(pids);
 	free(pids);
 
 
+done:
 	if (error_count == 0) {
 	if (error_count == 0) {
+		db = tdb_open_ex("torture.tdb", hash_size, TDB_DEFAULT, 
+				 O_RDWR, 0, &log_ctx, NULL);
+		if (!db) {
+			fatal("db open failed");
+		}
+		if (tdb_check(db, NULL, NULL) == -1) {
+			printf("db check failed");
+			exit(1);
+		}
+		tdb_close(db);
 		printf("OK\n");
 		printf("OK\n");
 	}
 	}