Browse Source

timers: implementation of lazily-ordered timers.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Rusty Russell 13 years ago
parent
commit
606cca7b0e

+ 1 - 0
Makefile-ccan

@@ -88,6 +88,7 @@ MODS_WITH_SRC := antithread \
 	tally \
 	tap \
 	time \
+	timer \
 	ttxml \
 	wwviaudio
 

+ 1 - 0
ccan/timer/LICENSE

@@ -0,0 +1 @@
+../../licenses/LGPL-2.1

+ 80 - 0
ccan/timer/_info

@@ -0,0 +1,80 @@
+#include <string.h>
+#include "config.h"
+
+/**
+ * timer - efficient implementation of rarely-expiring timers.
+ *
+ * This is a lazy implementation of timers: you can add and delete timers
+ * very quickly, and they are only sorted as their expiry approaches.
+ *
+ * This is a common case for timeouts, which must often be set, but
+ * rarely expire.
+ *
+ * Example:
+ *	// Silly example which outputs strings until timers expire.
+ *	#include <ccan/timer/timer.h>
+ *	#include <ccan/time/time.h>
+ *	#include <stdlib.h>
+ *	#include <stdio.h>
+ *
+ *	struct timed_string {
+ *		struct list_node node;
+ *		struct timer timer;
+ *		const char *string;
+ *	};
+ *
+ *	int main(int argc, char *argv[])
+ *	{
+ *		struct timers timers;
+ *		struct list_head strings;
+ *		struct list_head expired;
+ *		struct timed_string *s;
+ *
+ *		timers_init(&timers, time_now());
+ *		list_head_init(&strings);
+ *
+ *		while (argv[1]) {
+ *			s = malloc(sizeof(*s));
+ *			s->string = argv[1];
+ *			timer_add(&timers, &s->timer,
+ *				  time_add(time_now(),
+ *					   time_from_msec(atol(argv[2]))));
+ *			list_add_tail(&strings, &s->node);
+ *			argv += 2;
+ *		}
+ *
+ *		while (!list_empty(&strings)) {
+ *			struct timespec now = time_now();
+ *			list_for_each(&strings, s, node)
+ *				printf("%s", s->string);
+ *			timers_expire(&timers, now, &expired);
+ *			while ((s = list_pop(&expired, struct timed_string,
+ *					     timer.list)) != NULL) {
+ *				list_del_from(&strings, &s->node);
+ *				free(s);
+ *			}
+ *		}
+ *
+ *		exit(0);
+ *	}
+ *
+ * License: LGPL (v2.1 or any later version)
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+	/* Expect exactly one argument */
+	if (argc != 2)
+		return 1;
+
+	if (strcmp(argv[1], "depends") == 0) {
+		printf("ccan/array_size\n");
+		printf("ccan/ilog\n");
+		printf("ccan/likely\n");
+		printf("ccan/list\n");
+		printf("ccan/time\n");
+		return 0;
+	}
+
+	return 1;
+}

+ 35 - 0
ccan/timer/benchmarks/Makefile

@@ -0,0 +1,35 @@
+ALL:=expected-usage
+CCANDIR:=../../..
+CFLAGS:=-Wall -I$(CCANDIR) -O3 -flto
+LDFLAGS:=-O3 -flto
+LDLIBS:=-lrt
+
+OBJS:=time.o timer.o list.o opt_opt.o opt_parse.o opt_usage.o opt_helpers.o expected-usage.o
+
+default: $(ALL)
+
+expected-usage: $(OBJS)
+
+opt_parse.o: $(CCANDIR)/ccan/opt/parse.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+opt_usage.o: $(CCANDIR)/ccan/opt/usage.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+opt_helpers.o: $(CCANDIR)/ccan/opt/helpers.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+opt_opt.o: $(CCANDIR)/ccan/opt/opt.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+time.o: $(CCANDIR)/ccan/time/time.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+timer.o: $(CCANDIR)/ccan/timer/timer.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+list.o: $(CCANDIR)/ccan/list/list.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+clean:
+	$(RM) *.o $(ALL)

+ 53 - 0
ccan/timer/benchmarks/benchmark.c

@@ -0,0 +1,53 @@
+#include <ccan/time/time.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef FIRST_APPROX
+#include "first-approx.c"
+#endif
+#ifdef SECOND_APPROX
+#include "second-approx.c"
+#endif
+#ifdef NO_APPROX
+#include "no-approx.c"
+#endif
+
+int main(int argc, char *argv[])
+{
+	struct timespec start, val, val2, end, diff;
+	unsigned int i, j, limit = atoi(argv[1] ?: "100000");
+	uint64_t val64;
+
+	val = start = time_now();
+	val64 = to_u64(start);
+	val2.tv_sec = 0;
+	val2.tv_nsec = 1;
+
+	for (j = 0; j < limit; j++) {
+		for (i = 0; i < limit; i++) {
+			val = time_add(val, val2);
+			val64 += to_u64(val2);
+		}
+	}
+
+	end = time_now();
+
+	printf("val64 says %lu.%09lu\n",
+	       from_u64(val64).tv_sec,
+	       from_u64(val64).tv_nsec);
+
+	printf("val says %lu.%09lu\n",
+	       val.tv_sec,
+	       val.tv_nsec);
+
+	if (time_greater(val, from_u64(val64)))
+		diff = time_sub(val, from_u64(val64));
+	else
+		diff = time_sub(from_u64(val64), val);
+
+	printf("Time %lluns, error = %i%%\n",
+	       (long long)time_to_nsec(time_sub(end, start)),
+	       (int)(100 * time_to_nsec(diff) / time_to_nsec(time_sub(val, start))));
+	return 0;
+}

+ 71 - 0
ccan/timer/benchmarks/expected-usage.c

@@ -0,0 +1,71 @@
+/* We expect a timer to rarely go off, so benchmark that case:
+ * Every 1ms a connection comes in, we set up a 30 second timer for it.
+ * After 8192ms we finish the connection (and thus delete the timer).
+ */
+#include <ccan/timer/timer.h>
+#include <ccan/opt/opt.h>
+#include <ccan/array_size/array_size.h>
+#include <stdio.h>
+
+#define PER_CONN_TIME 8192
+#define CONN_TIMEOUT_MS 30000
+
+int main(int argc, char *argv[])
+{
+	struct timespec start, curr;
+	struct timers timers;
+	struct list_head expired;
+	struct timer t[PER_CONN_TIME];
+	unsigned int i, num;
+	bool check = false;
+
+	opt_register_noarg("-c|--check", opt_set_bool, &check,
+			   "Check timer structure during progress");
+
+	opt_parse(&argc, argv, opt_log_stderr_exit);
+
+	num = argv[1] ? atoi(argv[1]) : (check ? 10000 : 1000000);
+
+	list_head_init(&expired);
+	curr = start = time_now();
+	timers_init(&timers, start);
+
+	for (i = 0; i < num; i++) {
+		curr = time_add(curr, time_from_msec(1));
+		if (check)
+			timers_check(&timers, NULL);
+		timers_expire(&timers, curr, &expired);
+		if (check)
+			timers_check(&timers, NULL);
+		assert(list_empty(&expired));
+
+		if (i >= PER_CONN_TIME) {
+			timer_del(&timers, &t[i%PER_CONN_TIME]);
+			if (check)
+				timers_check(&timers, NULL);
+		}
+		timer_add(&timers, &t[i%PER_CONN_TIME],
+			  time_add(curr, time_from_msec(CONN_TIMEOUT_MS)));
+		if (check)
+			timers_check(&timers, NULL);
+	}
+	if (num > PER_CONN_TIME) {
+		for (i = 0; i < PER_CONN_TIME; i++)
+			timer_del(&timers, &t[i]);
+	}
+
+	curr = time_sub(time_now(), start);
+	if (check)
+		timers_check(&timers, NULL);
+	timers_cleanup(&timers);
+	opt_free_table();
+
+	for (i = 0; i < ARRAY_SIZE(timers.level); i++)
+		if (!timers.level[i])
+			break;
+
+	printf("%u in %lu.%09lu (%u levels / %zu)\n",
+	       num, (long)curr.tv_sec, curr.tv_nsec,
+	       i, ARRAY_SIZE(timers.level));
+	return 0;
+}

+ 76 - 0
ccan/timer/design.txt

@@ -0,0 +1,76 @@
+Cascading timer design.
+
+Inspired by the Linux kernel approach, documented roughly at:
+	https://lwn.net/Articles/152436/
+
+For easy description, we use whole seconds and powers of 10: in the
+implementation, we use powers of 2 (eg. 256 entries) and smaller
+granularities.
+
+We start with a simple data structure:
+
+struct timer_level {
+	struct timer_level *next;
+
+	/* Ten buckets: 	[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] */
+	struct list_head bucket[10];
+};
+
+struct timers {
+	/* We can never have a timer before this, aka "now". */
+	time_t offset;
+
+	struct timer_level *level;
+
+	/* Anything too far in the future. */
+	struct list_head far;
+}
+
+The first level of timers holds anything which will happen in the next
+10 seconds.  The next level holds things which will happen in the next
+100 seconds.  And so on.
+
+When we want to add a new timer into the structure, we need to figure
+out first what level it goes into, and second, which bucket.  Say our
+offset is 500,000,001 (about Tue Nov 5, 1985 in Unix time).  And our
+timer is set to go off in 5 seconds, ie. 500,000,006.
+
+The level is easy: the difference between the timer and the offset is
+5, and that's less than 10, so it's in the first level.  The position,
+however, depends on the absolute time, in this case the last digit 6,
+so it's in bucket 6.
+
+Adding a timer at 500,000,123?  The difference is > 100 and < 1000, so
+it's in the third level.  The bucket is 1.  If there's no third level,
+we just add it to the 'far' list for stuff which is in the far future.
+
+Deleting a timer is as simple as removing it; there is no external
+bookkeeping in this scheme.  This matters, since timers used for
+timeouts are almost always deleted before they expire.
+
+Now, when a second passes, we need to know if there are any timers
+which are due.  We increment the offset to 500,000,002, and look in
+the first level, bucket 2 for any timers, so lookup is simple.
+
+We do this eight more times, and we increment the offset to
+500,000,010.  We've swept around back to bucket 0, though it may not
+be empty if we added more timers as we were going.
+
+But we need to look into the next level since a timer at 500,000,010
+added when the offset was 500,000,000 would have gone up there.  We
+empty bucket 1 (due to the '1' in 500,000,010) into these buckets,
+which will contain timers between 500,000,010 and 500,000,019, which
+all now are less than 10 seconds away, so belong in the bottom level.
+
+Similarly, at 500,000,020 we will empty bucket 1 of the second level
+into the first level.  And at 500,000,100 we will empty bucket 1 of
+the third level into the second level then bucket 0 of the second
+level into the first level.  We do it in this order, since emptying
+bucket 1 on the third level (500,000,100 - 500,000,199) may put more
+entries (500,000,100 - 500,000,109) into bucket 0 on the second level.
+
+When we get to 500,001,000 we should empty the fourth level.  If there
+is no fourth level, that's when we sort through the 'far' list and
+empty any which are less than 500,002,000.  If there are many entries
+in the far list, we should add more levels to reduce the number, or at
+least the frequency we have to check it.

+ 52 - 0
ccan/timer/test/run-add.c

@@ -0,0 +1,52 @@
+#include <ccan/timer/timer.h>
+/* Include the C files directly. */
+#include <ccan/timer/timer.c>
+#include <ccan/tap/tap.h>
+
+/* More than 32 bits */
+#define MAX_ORD 34
+
+/* 0...17, 63, 64, 65, 127, 128, 129, 255, 256, 257, ... */
+static uint64_t next(uint64_t base)
+{
+	if (base > 16 && ((base - 1) & ((base - 1) >> 1)) == 0)
+		return base * 2 - 3;
+	return base+1;
+}
+
+int main(void)
+{
+	struct timers timers;
+	struct timer t;
+	uint64_t diff;
+	unsigned int i;
+
+	/* This is how many tests you plan to run */
+	plan_tests(2 + (18 + (MAX_ORD - 4) * 3) * (18 + (MAX_ORD - 4) * 3));
+
+	timers_init(&timers, time_from_nsec(0));
+	ok1(timers_check(&timers, NULL));
+
+	for (i = 0; i < 4; i++)
+		add_level(&timers, i);
+
+	i = 0;
+	for (diff = 0; diff < (1ULL << MAX_ORD)+2; diff = next(diff)) {
+		i++;
+		for (timers.base = 0;
+		     timers.base < (1ULL << MAX_ORD)+2;
+		     timers.base = next(timers.base)) {
+			t.time = timers.base + diff;
+			timer_add_raw(&timers, &t);
+			ok1(timers_check(&timers, NULL));
+			timer_del(&timers, &t);
+		}
+	}
+
+	ok1(timers_check(&timers, NULL));
+
+	timers_cleanup(&timers);
+
+	/* This exits depending on whether all tests passed */
+	return exit_status();
+}

+ 31 - 0
ccan/timer/test/run-expiry.c

@@ -0,0 +1,31 @@
+#include <ccan/timer/timer.h>
+/* Include the C files directly. */
+#include <ccan/timer/timer.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+	struct timers timers;
+	struct timer t;
+	struct list_head list;
+
+	/* This is how many tests you plan to run */
+	plan_tests(7);
+
+	timers_init(&timers, grains_to_time(1364984760903400ULL));
+	ok1(timers.base == 1364984760903400ULL);
+	timer_add(&timers, &t, grains_to_time(1364984761003398ULL));
+	ok1(t.time == 1364984761003398ULL);
+	ok1(timers.first == 1364984761003398ULL);
+	timers_expire(&timers, grains_to_time(1364984760903444ULL), &list);
+	ok1(timers_check(&timers, NULL));
+	ok1(list_pop(&list, struct timer, list) == NULL);
+	timers_expire(&timers, grains_to_time(1364984761002667ULL), &list);
+	ok1(timers_check(&timers, NULL));
+	ok1(list_pop(&list, struct timer, list) == NULL);
+
+	timers_cleanup(&timers);
+
+	/* This exits depending on whether all tests passed */
+	return exit_status();
+}

+ 27 - 0
ccan/timer/test/run-ff.c

@@ -0,0 +1,27 @@
+#include <ccan/timer/timer.h>
+/* Include the C files directly. */
+#include <ccan/timer/timer.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+	struct timers timers;
+	struct timer t;
+	struct list_head expired;
+
+	/* This is how many tests you plan to run */
+	plan_tests(3);
+
+	timers_init(&timers, time_from_usec(1364726722653919ULL));
+	timer_add(&timers, &t, time_from_usec(1364726722703919ULL));
+	timers_expire(&timers, time_from_usec(1364726722653920ULL), &expired);
+	ok1(list_empty(&expired));
+	timers_expire(&timers, time_from_usec(1364726725454187ULL), &expired);
+	ok1(!list_empty(&expired));
+	ok1(list_top(&expired, struct timer, list) == &t);
+
+	timers_cleanup(&timers);
+
+	/* This exits depending on whether all tests passed */
+	return exit_status();
+}

+ 84 - 0
ccan/timer/test/run.c

@@ -0,0 +1,84 @@
+#include <ccan/timer/timer.h>
+/* Include the C files directly. */
+#include <ccan/timer/timer.c>
+#include <ccan/tap/tap.h>
+
+int main(void)
+{
+	struct timers timers;
+	struct timer t[64];
+	struct list_head expired;
+	struct timespec earliest;
+	uint64_t i;
+
+	/* This is how many tests you plan to run */
+	plan_tests(488);
+
+	timers_init(&timers, time_from_nsec(0));
+	ok1(timers_check(&timers, NULL));
+	ok1(!timer_earliest(&timers, &earliest));
+
+	timer_add(&timers, &t[0], time_from_nsec(1));
+	ok1(timers_check(&timers, NULL));
+	ok1(timer_earliest(&timers, &earliest));
+	ok1(time_eq(earliest, grains_to_time(t[0].time)));
+	timer_del(&timers, &t[0]);
+	ok1(timers_check(&timers, NULL));
+	ok1(!timer_earliest(&timers, &earliest));
+
+	/* Check timer ordering. */
+	for (i = 0; i < 32; i++) {
+		timer_add(&timers, &t[i*2], time_from_nsec(1ULL << i));
+		ok1(timers_check(&timers, NULL));
+		timer_add(&timers, &t[i*2+1], time_from_nsec((1ULL << i) + 1));
+		ok1(timers_check(&timers, NULL));
+	}
+
+	for (i = 0; i < 32; i++) {
+		const struct timer *t1, *t2;
+
+		t1 = get_first(&timers);
+		ok1(t1 == &t[i*2] || t1 == &t[i*2+1]);
+		timer_del(&timers, (struct timer *)t1);
+		ok1(timers_check(&timers, NULL));
+
+		t2 = get_first(&timers);
+		ok1(t2 != t1 && (t2 == &t[i*2] || t2 == &t[i*2+1]));
+		timer_del(&timers, (struct timer *)t2);
+		ok1(timers_check(&timers, NULL));
+	}
+
+	/* Check expiry. */
+	for (i = 0; i < 32; i++) {
+		uint64_t exp = (uint64_t)TIMER_GRANULARITY << i;
+
+		timer_add(&timers, &t[i*2], time_from_nsec(exp));
+		ok1(timers_check(&timers, NULL));
+		timer_add(&timers, &t[i*2+1], time_from_nsec(exp + 1));
+		ok1(timers_check(&timers, NULL));
+	}
+
+	for (i = 0; i < 32; i++) {
+		struct timer *t1, *t2;
+
+		ok1(timer_earliest(&timers, &earliest));
+		timers_expire(&timers, earliest, &expired);
+
+		t1 = list_pop(&expired, struct timer, list);
+		ok1(t1);
+		t2 = list_pop(&expired, struct timer, list);
+		ok1(t2);
+		ok1(list_empty(&expired));
+
+		ok1(t1 == &t[i*2] || t1 == &t[i*2+1]);
+		ok1(t2 != t1 && (t2 == &t[i*2] || t2 == &t[i*2+1]));
+		ok1(timers_check(&timers, NULL));
+	}
+
+	ok1(!timer_earliest(&timers, &earliest));
+
+	timers_cleanup(&timers);
+
+	/* This exits depending on whether all tests passed */
+	return exit_status();
+}

+ 431 - 0
ccan/timer/timer.c

@@ -0,0 +1,431 @@
+/* LGPL (v2.1 or any later version) - see LICENSE file for details */
+#include <ccan/timer/timer.h>
+#include <ccan/array_size/array_size.h>
+#include <ccan/ilog/ilog.h>
+#include <ccan/likely/likely.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define PER_LEVEL (1ULL << TIMER_LEVEL_BITS)
+
+struct timer_level {
+	struct list_head list[PER_LEVEL];
+};
+
+static uint64_t time_to_grains(struct timespec ts)
+{
+	return ts.tv_sec * ((uint64_t)1000000000 / TIMER_GRANULARITY)
+		+ (ts.tv_nsec / TIMER_GRANULARITY);
+}
+
+static struct timespec grains_to_time(uint64_t grains)
+{
+	struct timespec ts;
+
+	ts.tv_sec = grains / (1000000000 / TIMER_GRANULARITY);
+	ts.tv_nsec = (grains % (1000000000 / TIMER_GRANULARITY))
+		* TIMER_GRANULARITY;
+	return ts;
+}
+
+void timers_init(struct timers *timers, struct timespec start)
+{
+	unsigned int i;
+
+	list_head_init(&timers->far);
+	timers->base = time_to_grains(start);
+	for (i = 0; i < ARRAY_SIZE(timers->level); i++)
+		timers->level[i] = NULL;
+}
+
+static void timer_add_raw(struct timers *timers, struct timer *t)
+{
+	struct list_head *l;
+	uint64_t diff;
+	unsigned int level;
+
+	/* Level depends how far away it is. */
+	diff = t->time - timers->base;
+	level = ilog64(diff / 2) / TIMER_LEVEL_BITS;
+
+	if (!timers->level[level])
+		l = &timers->far;
+	else {
+		int off = (t->time >> (level*TIMER_LEVEL_BITS)) & (PER_LEVEL-1);
+		l = &timers->level[level]->list[off];
+	}
+
+	list_add_tail(l, &t->list);
+}
+
+void timer_add(struct timers *timers, struct timer *t, struct timespec when)
+{
+	t->time = time_to_grains(when);
+
+	/* Added in the past?  Treat it as imminent. */
+	if (t->time < timers->base)
+		t->time = timers->base;
+
+	timer_add_raw(timers, t);
+}
+
+/* FIXME: inline */
+void timer_del(struct timers *timers, struct timer *t)
+{
+	list_del(&t->list);
+}
+
+static void timers_far_get(struct timers *timers,
+			   struct list_head *list,
+			   uint64_t when)
+{
+	struct timer *i, *next;
+
+	list_for_each_safe(&timers->far, i, next, list) {
+		if (i->time <= when) {
+			list_del_from(&timers->far, &i->list);
+			list_add_tail(list, &i->list);
+		}
+	}
+}
+
+static void add_level(struct timers *timers, unsigned int level)
+{
+	struct timer_level *l;
+	struct timer *t;
+	unsigned int i;
+	struct list_head from_far;
+
+	l = malloc(sizeof(*l));
+	if (!l)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(l->list); i++)
+		list_head_init(&l->list[i]);
+	timers->level[level] = l;
+
+	list_head_init(&from_far);
+	timers_far_get(timers, &from_far,
+		       timers->base + (1ULL << ((level+1)*TIMER_LEVEL_BITS)) - 1);
+
+	while ((t = list_pop(&from_far, struct timer, list)) != NULL)
+		timer_add_raw(timers, t);
+}
+
+/* Take timers from level and distribute them down one. */
+static void cascade(struct timers *timers, unsigned int level)
+{
+	struct timer *i;
+	struct list_head from_far, *list;
+
+	if (level == ARRAY_SIZE(timers->level) || !timers->level[level]) {
+		list_head_init(&from_far);
+		timers_far_get(timers, &from_far,
+			       timers->base
+			       + (1ULL << (level*TIMER_LEVEL_BITS))-1);
+		list = &from_far;
+		if (level != ARRAY_SIZE(timers->level))
+			add_level(timers, level);
+	} else {
+		unsigned src;
+
+		src = (timers->base >> (level * TIMER_LEVEL_BITS)) % PER_LEVEL;
+		if (src == 0)
+			cascade(timers, level + 1);
+		list = &timers->level[level]->list[src];
+	}
+
+	while ((i = list_pop(list, struct timer, list)) != NULL) {
+		unsigned dst;
+
+		assert(i->time >= timers->base);
+		assert(i->time < (timers->base
+				  + (1ULL << ((level+1)*TIMER_LEVEL_BITS))));
+
+		dst = (i->time >> ((level-1)*TIMER_LEVEL_BITS)) % PER_LEVEL;
+		list_add_tail(&timers->level[level-1]->list[dst], &i->list);
+	}
+}
+
+static const struct timer *find_first(const struct list_head *list,
+				      const struct timer *prev)
+{
+	struct timer *t;
+
+	list_for_each(list, t, list) {
+		if (!prev || t->time < prev->time)
+			prev = t;
+	}
+	return prev;
+}
+
+static struct timer *get_first(const struct timers *timers)
+{
+	unsigned int level = 0, i, off;
+	bool need_next;
+	uint64_t base = timers->base;
+	const struct timer *found = NULL;
+	struct list_head *h;
+
+next:
+	if (!timers->level[level])
+		return (struct timer *)find_first(&timers->far, NULL);
+
+	need_next = false;
+	off = base % PER_LEVEL;
+	for (i = 0; i < PER_LEVEL; i++) {
+		h = &timers->level[level]->list[(i+off) % PER_LEVEL];
+
+		if (!list_empty(h))
+			break;
+
+		/* We haven't cascaded yet, so if we wrap, we'll need to
+		 * check next level, too. */
+		if (i + off == PER_LEVEL)
+			need_next = true;
+	}
+	if (i == PER_LEVEL) {
+		level++;
+		base >>= TIMER_LEVEL_BITS;
+		goto next;
+	}
+
+	/* Level 0 is exact, so they're all the same. */
+	if (level == 0)
+		found = list_top(h, struct timer, list);
+	else
+		found = find_first(h, NULL);
+
+	if (need_next) {
+		if (!timers->level[level+1]) {
+			found = find_first(&timers->far, found);
+		} else {
+			base >>= TIMER_LEVEL_BITS;
+			off = base % PER_LEVEL;
+			h = &timers->level[level+1]->list[off];
+			found = find_first(h, found);
+		}
+	}
+
+	return (struct timer *)found;
+}
+
+bool timer_earliest(const struct timers *timers, struct timespec *first)
+{
+	struct timer *found = get_first(timers);
+
+	if (!found)
+		return false;
+	*first = grains_to_time(found->time);
+	return true;
+}
+
+/* Assume no timers before 'time', cascade down and update base time. */
+static void timer_fast_forward(struct timers *timers, uint64_t time)
+{
+	unsigned int level, changed;
+	int need_level = -1;
+	struct list_head list;
+	struct timer *i;
+
+	/* How many bits changed between base and time?
+	 * Each time we wrap, we need to empty buckets from above. */
+	if (time == timers->base)
+		return;
+
+	changed = ilog64_nz(time ^ timers->base);
+	level = (changed - 1) / TIMER_LEVEL_BITS;
+
+	/* Buckets always empty downwards, so we could cascade manually,
+	 * but it's rarely very many so we just remove and re-add */
+	list_head_init(&list);
+
+	do {
+		if (!timers->level[level]) {
+			/* We need any which belong on this level. */
+			timers_far_get(timers, &list,
+				       timers->base
+				       + (1ULL << ((level+1)*TIMER_LEVEL_BITS))-1);
+			need_level = level;
+		} else {
+			unsigned src;
+
+			/* Get all timers from this bucket. */
+			src = (time >> (level * TIMER_LEVEL_BITS)) % PER_LEVEL;
+			list_append_list(&list,
+					 &timers->level[level]->list[src]);
+		}
+	} while (level--);
+
+	/* Did we hit the last level?  If so, add. */
+	if (need_level != -1)
+		add_level(timers, need_level);
+
+	/* Fast-forward the time, and re-add everyone. */
+	timers->base = time;
+	while ((i = list_pop(&list, struct timer, list)) != NULL)
+		timer_add_raw(timers, i);
+}
+
+/* Fills list of expired timers. */
+void timers_expire(struct timers *timers,
+		   struct timespec expire,
+		   struct list_head *list)
+{
+	uint64_t now = time_to_grains(expire);
+	unsigned int off;
+	const struct timer *first;
+
+	assert(now >= timers->base);
+
+	list_head_init(list);
+
+	if (!timers->level[0]) {
+		if (list_empty(&timers->far))
+			return;
+		add_level(timers, 0);
+	}
+
+	while ((first = get_first(timers)) != NULL) {
+		assert(first->time >= timers->base);
+		if (first->time > now) {
+			timer_fast_forward(timers, now);
+			break;
+		}
+
+		timer_fast_forward(timers, first->time);
+		off = timers->base % PER_LEVEL;
+
+		list_append_list(list, &timers->level[0]->list[off]);
+		if (timers->base == now)
+			break;
+	}
+}
+
+static bool timer_list_check(const struct list_head *l,
+			     uint64_t min, uint64_t max,
+			     const char *abortstr)
+{
+	const struct timer *t;
+
+	if (!list_check(l, abortstr))
+		return false;
+
+	list_for_each(l, t, list) {
+		if (t->time < min || t->time > max) {
+			if (abortstr) {
+				fprintf(stderr,
+					"%s: timer %p %llu not %llu-%llu\n",
+					abortstr, t, t->time, min, max);
+				abort();
+			}
+			return false;
+		}
+	}
+	return true;
+}
+
+struct timers *timers_check(const struct timers *timers, const char *abortstr)
+{
+	unsigned int l, i, off;
+	uint64_t base;
+
+	l = 0;
+	if (!timers->level[0])
+		goto past_levels;
+
+	/* First level is simple. */
+	off = timers->base % PER_LEVEL;
+	for (i = 0; i < PER_LEVEL; i++) {
+		struct list_head *h;
+
+		h = &timers->level[l]->list[(i+off) % PER_LEVEL];
+		if (!timer_list_check(h, timers->base + i, timers->base + i,
+				      abortstr))
+			return NULL;
+	}
+
+	/* For other levels, "current" bucket has been emptied, and may contain
+	 * entries for the current + level_size bucket. */
+	for (l = 1; timers->level[l] && l < PER_LEVEL; l++) {
+		uint64_t per_bucket = 1ULL << (TIMER_LEVEL_BITS * l);
+
+		off = ((timers->base >> (l*TIMER_LEVEL_BITS)) % PER_LEVEL);
+		/* We start at *next* bucket. */
+		base = (timers->base & ~(per_bucket - 1)) + per_bucket;
+
+		for (i = 1; i <= PER_LEVEL; i++) {
+			struct list_head *h;
+
+			h = &timers->level[l]->list[(i+off) % PER_LEVEL];
+			if (!timer_list_check(h, base, base + per_bucket - 1,
+					      abortstr))
+				return NULL;
+			base += per_bucket;
+		}
+	}
+
+past_levels:
+	base = (timers->base & ~((1ULL << (TIMER_LEVEL_BITS * l)) - 1))
+		+ (1ULL << (TIMER_LEVEL_BITS * l)) - 1;
+	if (!timer_list_check(&timers->far, base, -1ULL, abortstr))
+		return NULL;
+
+	return (struct timers *)timers;
+}
+
+//#ifdef CCAN_TIMER_DEBUG
+void timers_dump(const struct timers *timers, FILE *fp)
+{
+	unsigned int l, i;
+	uint64_t min, max, num;
+	struct timer *t;
+
+	if (!fp)
+		fp = stderr;
+
+	fprintf(fp, "Base: %llu\n", timers->base);
+
+	for (l = 0; timers->level[l] && l < ARRAY_SIZE(timers->level); l++) {
+		fprintf(fp, "Level %i (+%llu):\n",
+			l, (uint64_t)1 << (TIMER_LEVEL_BITS * l));
+		for (i = 0; i < (1 << TIMER_LEVEL_BITS); i++) {
+
+			if (list_empty(&timers->level[l]->list[i]))
+				continue;
+			min = -1ULL;
+			max = 0;
+			num = 0;
+			list_for_each(&timers->level[l]->list[i], t, list) {
+				if (t->time < min)
+					min = t->time;
+				if (t->time > max)
+					max = t->time;
+				num++;
+			}
+			fprintf(stderr, "  %llu (+%llu-+%llu)\n",
+				num, min - timers->base, max - timers->base);
+		}
+	}
+
+	min = -1ULL;
+	max = 0;
+	num = 0;
+	list_for_each(&timers->far, t, list) {
+		if (t->time < min)
+			min = t->time;
+		if (t->time > max)
+			max = t->time;
+		num++;
+	}
+	fprintf(stderr, "Far: %llu (%llu-%llu)\n", num, min, max);
+}
+//#endif
+
+void timers_cleanup(struct timers *timers)
+{
+	unsigned int l;
+
+	for (l = 0; l < ARRAY_SIZE(timers->level); l++)
+		free(timers->level[l]);
+}

+ 143 - 0
ccan/timer/timer.h

@@ -0,0 +1,143 @@
+/* LGPL (v2.1 or any later version) - see LICENSE file for details */
+#ifndef CCAN_TIMER_H
+#define CCAN_TIMER_H
+#include <ccan/time/time.h>
+#include <ccan/list/list.h>
+#include <stdint.h>
+
+/* We divide all nsec values by 1000, reducing it to usec granularity. */
+#define TIMER_GRANULARITY 1000
+/* This gives 16 pointers per level, up to 13 levels deep. */
+#define TIMER_LEVEL_BITS 4
+
+struct timers;
+struct timer;
+
+/**
+ * timers_init - initialize a timers struct.
+ * @timers: the struct timers
+ * @start: the minimum time which will ever be added.
+ *
+ * This sets up a timers struct: any timers added before @start will be
+ * set to expire immediately.
+ */
+void timers_init(struct timers *timers, struct timespec start);
+
+/**
+ * timers_cleanup - free allocations within timers struct.
+ * @timers: the struct timers
+ *
+ * This frees any timer layers allocated during use.
+ */
+void timers_cleanup(struct timers *timers);
+
+/**
+ * timer_add - insert a timer.
+ * @timers: the struct timers
+ * @timer: the (uninitialized) timer to add
+ * @when: when @timer expires.
+ *
+ * This efficiently adds @timer to @timers, to expire @when (rounded to
+ * TIMER_GRANULARITY nanoseconds).
+ */
+void timer_add(struct timers *timers, struct timer *timer,
+	       struct timespec when);
+
+/**
+ * timer_del - remove an unexpired timer.
+ * @timers: the struct timers
+ * @timer: the timer previously added with timer_add()
+ *
+ * This efficiently removes @timer from @timers.
+ */
+void timer_del(struct timers *timers, struct timer *timer);
+
+/**
+ * timer_earliest - find out the first time when a timer will expire
+ * @timers: the struct timers
+ * @first: the time, only set if there is a timer.
+ *
+ * This returns false, and doesn't alter @first if there are no
+ * timers.  Otherwise, it sets @first to the expiry time of the first
+ * timer (rounded to TIMER_GRANULARITY nanoseconds), and returns true.
+ */
+bool timer_earliest(const struct timers *timers, struct timespec *first);
+
+/**
+ * timer_expire - update timers structure and remove expired timers.
+ * @timers: the struct timers
+ * @expire: the current time
+ * @list: the list for expired timers.
+ *
+ * @list will be initialized to the empty list, then all timers added
+ * with a @when arg less than or equal to @expire will be added to it in
+ * expiry order (within TIMER_GRANULARITY nanosecond precision).
+ *
+ * After this, @expire is considered the current time, and adding any
+ * timers with @when before this value will be silently changed to
+ * adding them with immediate expiration.
+ *
+ * You should not move @expire backwards, though it need not move
+ * forwards.
+ */
+void timers_expire(struct timers *timers,
+		   struct timespec expire,
+		   struct list_head *list);
+
+/**
+ * timers_check - check timer structure for consistency
+ * @t: the struct timers
+ * @abortstr: the location to print on aborting, or NULL.
+ *
+ * Because timers have redundant information, consistency checking can
+ * be done on the tree.  This is useful as a debugging check.  If
+ * @abortstr is non-NULL, that will be printed in a diagnostic if the
+ * timers structure is inconsistent, and the function will abort.
+ *
+ * Returns the timers struct if it is consistent, NULL if not (it can
+ * never return NULL if @abortstr is set).
+ */
+struct timers *timers_check(const struct timers *t, const char *abortstr);
+
+#ifdef CCAN_TIMER_DEBUG
+#include <stdio.h>
+
+/**
+ * timers_dump - dump the timers datastructure (for debugging it)
+ * @t: the struct timers
+ * @fp: the FILE to dump to (stderr if @fp is NULL)
+ */
+void timers_dump(const struct timers *timers, FILE *fp);
+#endif
+
+/**
+ * struct timers - structure to hold a set of timers.
+ *
+ * Initialized using timers_init, the levels of the timer are
+ * allocated as necessary, using malloc.
+ *
+ * See Also:
+ *	timers_init(), timers_cleanup()
+ */
+struct timers {
+	/* Far in the future. */
+	struct list_head far;
+	uint64_t base;
+
+	struct timer_level *level[(64 + TIMER_LEVEL_BITS-1) / TIMER_LEVEL_BITS];
+};
+
+/**
+ * struct timer - a single timer.
+ *
+ * Set up by timer_add(), this is usually contained within an
+ * application-specific structure.
+ *
+ * See Also:
+ *	ccan/container_of, timer_add(), timer_del()
+ */
+struct timer {
+	struct list_node list;
+	uint64_t time;
+};
+#endif /* CCAN_TIMER_H */