Browse Source

bdelta: new module for binary diff/patch

Joey Adams 14 years ago
parent
commit
d1a8796172

+ 1 - 0
ccan/bdelta/LICENSE

@@ -0,0 +1 @@
+../../licenses/BSD-MIT

+ 143 - 0
ccan/bdelta/_info

@@ -0,0 +1,143 @@
+#include <string.h>
+#include "config.h"
+
+/**
+ * bdelta - Generate and apply binary deltas
+ *
+ * This library takes two strings containing binary data, and produces a
+ * "patch" that can be applied to the first one to produce the second one.
+ * It can be used to save bandwidth and disk space when many texts differing
+ * by a small number of bytes need to be transmitted or stored.
+ *
+ * Patches produced by this version of the library can be applied using future
+ * versions, but not past versions.
+ *
+ * bdelta implements the algorithm described in
+ * An O(ND) Difference Algorithm and Its Variations by Eugene W. Myers.
+ * Because its memory usage and expected running time are O(N + D^2),
+ * it works well only when the strings differ by a small number of bytes.
+ * This implementation stops trying when the strings differ by more than
+ * 1000 bytes, and falls back to producing a patch that simply emits the new
+ * string.
+ *
+ * Thus, bdelta does not save any space when given two strings that differ by
+ * more than 1000 bytes.  This may be improved in a future version of the
+ * library.
+ *
+ * Example:
+ *	#include <ccan/bdelta/bdelta.h>
+ *	#include <stdio.h>
+ *	#include <stdlib.h>
+ *	#include <string.h>
+ *
+ *	static void gulp(const char *filename, void **data_out, size_t *size_out);
+ *
+ *	static int usage(const char *prog)
+ *	{
+ *		fprintf(
+ *			stderr,
+ *			"Usage: %s diff  <old> <new>    >  <patch>\n"
+ *			"       %s patch <old> <patch>  >  <new>\n",
+ *			prog, prog
+ *		);
+ *		return 1;
+ *	}
+ *
+ *	int main(int argc, char *argv[])
+ *	{
+ *		void *old, *new_, *patch;
+ *		size_t old_size, new_size, patch_size;
+ *		BDELTAcode rc;
+ *
+ *		if (argc != 4)
+ *			return usage(argv[0]);
+ *
+ *		if (strcmp(argv[1], "diff") == 0) {
+ *			gulp(argv[2], &old, &old_size);
+ *			gulp(argv[3], &new_, &new_size);
+ *
+ *			rc = bdelta_diff(old, old_size, new_, new_size, &patch, &patch_size);
+ *			if (rc != BDELTA_OK) {
+ *				bdelta_perror("bdelta_diff", rc);
+ *				return 1;
+ *			}
+ *
+ *			if (fwrite(patch, 1, patch_size, stdout) != patch_size) {
+ *				perror("stdout");
+ *				return 1;
+ *			}
+ *		} else if (strcmp(argv[1], "patch") == 0) {
+ *			gulp(argv[2], &old, &old_size);
+ *			gulp(argv[3], &patch, &patch_size);
+ *
+ *			rc = bdelta_patch(old, old_size, patch, patch_size, &new_, &new_size);
+ *			if (rc != BDELTA_OK) {
+ *				bdelta_perror("bdelta_patch", rc);
+ *				return 1;
+ *			}
+ *
+ *			if (fwrite(new_, 1, new_size, stdout) != new_size) {
+ *				perror("stdout");
+ *				return 1;
+ *			}
+ *		} else {
+ *			return usage(argv[0]);
+ *		}
+ *
+ *		free(old);
+ *		free(new_);
+ *		free(patch);
+ *		return 0;
+ *	}
+ *
+ *	static void gulp(const char *filename, void **data_out, size_t *size_out)
+ *	{
+ *		FILE *f = fopen(filename, "rb");
+ *		size_t size = 0;
+ *		size_t alloc = 16;
+ *		char *data = malloc(alloc);
+ *
+ *		if (f == NULL || data == NULL)
+ *			goto error;
+ *
+ *		for (;;) {
+ *			size += fread(data + size, 1, alloc - size, f);
+ *			if (size < alloc) {
+ *				if (!feof(f))
+ *					goto error;
+ *				break;
+ *			}
+ *			data = realloc(data, alloc *= 2);
+ *			if (data == NULL)
+ *				goto error;
+ *		}
+ *
+ *		if (fclose(f) != 0)
+ *			goto error;
+ *
+ *		*data_out = data;
+ *		*size_out = size;
+ *		return;
+ *
+ *	error:
+ *		perror(filename);
+ *		exit(EXIT_FAILURE);
+ *	}
+ *
+ * Author: Joey Adams <joeyadams3.14159@gmail.com>
+ * License: MIT
+ * Version: 0.1.1
+ */
+int main(int argc, char *argv[])
+{
+	/* Expect exactly one argument */
+	if (argc != 2)
+		return 1;
+
+	if (strcmp(argv[1], "depends") == 0) {
+		/* Nothing */
+		return 0;
+	}
+
+	return 1;
+}

+ 838 - 0
ccan/bdelta/bdelta.c

@@ -0,0 +1,838 @@
+/*
+ * Copyright (C) 2011 Joseph Adams <joeyadams3.14159@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "bdelta.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct
+{
+	unsigned char *cur;    /* End of string; insertion point for new bytes */
+	unsigned char *end;    /* End of buffer */
+	unsigned char *start;  /* Beginning of string */
+} SB;
+
+/* sb is evaluated multiple times in these macros. */
+#define sb_size(sb)  ((size_t)((sb)->cur - (sb)->start))
+#define sb_avail(sb) ((size_t)((sb)->end - (sb)->cur))
+
+/* sb and need may be evaluated multiple times. */
+#define sb_need(sb, need) do {           \
+		if (sb_avail(sb) < (need))       \
+			if (sb_grow(sb, need) != 0)  \
+				goto out_of_memory;      \
+	} while (0)
+
+static int sb_init(SB *sb)
+{
+	sb->start = malloc(17);
+	if (sb->start == NULL)
+		return -1;
+	sb->cur = sb->start;
+	sb->end = sb->start + 16;
+	return 0;
+}
+
+static int sb_grow(SB *sb, size_t need)
+{
+	size_t length = sb->cur - sb->start;
+	size_t alloc = sb->end - sb->start;
+	unsigned char *tmp;
+	
+	do {
+		alloc *= 2;
+	} while (alloc < length + need);
+	
+	tmp = realloc(sb->start, alloc + 1);
+	if (tmp == NULL)
+		return -1;
+	sb->start = tmp;
+	sb->cur = tmp + length;
+	sb->end = tmp + alloc;
+	return 0;
+}
+
+static int sb_putc(SB *sb, unsigned char c)
+{
+	sb_need(sb, 1);
+	*sb->cur++ = c;
+	return 0;
+
+out_of_memory:
+	return -1;
+}
+
+static int sb_write(SB *sb, const void *data, size_t size)
+{
+	sb_need(sb, size);
+	memcpy(sb->cur, data, size);
+	sb->cur += size;
+	return 0;
+	
+out_of_memory:
+	return -1;
+}
+
+static void sb_return(SB *sb, void **data_out, size_t *length_out)
+{
+	*sb->cur = 0;
+	if (data_out)
+		*data_out = sb->start;
+	else
+		free(sb->start);
+	if (length_out)
+		*length_out = sb->cur - sb->start;
+}
+
+static void sb_discard(SB *sb, void **data_out, size_t *length_out)
+{
+	free(sb->start);
+	if (data_out)
+		*data_out = NULL;
+	if (length_out)
+		*length_out = 0;
+}
+
+/*
+ * The first byte in a patch is the "patch type", which indicates how the
+ * patch is formatted.  This keeps the patch format flexible while retaining
+ * backward compatibility.  Patches produced with an older version of
+ * the library can be applied with a newer version.
+ *
+ * PT_LITERAL
+ *     Contains nothing more than the content of the new text.
+ *
+ * PT_CSI32
+ *     A string of copy, skip, and insert instructions for generating the new
+ *     string from the old.
+ *
+ *         copy(size):   Copy @size bytes from old to new.
+ *         skip(size):   Skip @size bytes of old.
+ *         insert(text): Insert @text into new.
+ *
+ *     The syntax is as follows:
+ *
+ *         copy:   instruction_byte(1) size
+ *         skip:   instruction_byte(2) size
+ *         insert: instruction_byte(3) size $size*OCTET
+ *
+ *         0 <= size_param_length <= 4
+ *         instruction_byte(op) = op | size_param_length << 2
+ *         size: $size_param_length*OCTET
+ *               -- size is an unsigned integer encoded in big endian.
+ *               -- However, if size_param_length is 0, the operation size is 1.
+ *
+ *     Simply put, an instruction starts with an opcode and operation size.
+ *     An insert instruction is followed by the bytes to be inserted.
+ */
+#define PT_LITERAL   10
+#define PT_CSI32     11
+
+#define OP_COPY      1
+#define OP_SKIP      2
+#define OP_INSERT    3
+
+static unsigned int bytes_needed_for_size(uint32_t size)
+{
+	if (size == 1)
+		return 0;
+	else if (size <= 0xFF)
+		return 1;
+	else if (size <= 0xFFFF)
+		return 2;
+	else if (size <= 0xFFFFFF)
+		return 3;
+	else
+		return 4;
+}
+
+/*
+ * Return values:
+ *
+ *  BDELTA_OK:      Success
+ *  BDELTA_MEMORY:  Memory allocation failed
+ */
+static BDELTAcode csi32_emit_op(SB *patch_out, int op, uint32_t size, const char **new_)
+{
+	unsigned int i;
+	unsigned int size_param_length;
+	size_t need;
+	uint32_t tmp;
+	
+	assert(op >= 1 && op <= 3);
+	
+	if (size == 0)
+		return BDELTA_OK;
+	size_param_length = bytes_needed_for_size(size);
+	
+	need = 1 + size_param_length;
+	if (op == OP_INSERT)
+		need += size;
+	sb_need(patch_out, need);
+	
+	*patch_out->cur++ = (unsigned int)op | size_param_length << 2;
+	for (i = size_param_length, tmp = size; i-- > 0; tmp >>= 8)
+		patch_out->cur[i] = tmp & 0xFF;
+	patch_out->cur += size_param_length;
+	
+	switch (op) {
+		case OP_COPY:
+			*new_ += size;
+			break;
+		case OP_SKIP:
+			break;
+		case OP_INSERT:
+			memcpy(patch_out->cur, *new_, size);
+			patch_out->cur += size;
+			*new_ += size;
+			break;
+		default:
+			assert(0);
+	}
+	
+	return BDELTA_OK;
+
+out_of_memory:
+	return BDELTA_MEMORY;
+}
+
+/*
+ * On success, returns 1, advances *sp past the parsed text, and sets *op_out and *size_out.
+ * On error or EOF, returns 0.
+ */
+static int csi32_parse_op(
+	const unsigned char **sp, const unsigned char *e,
+	int *op_out, uint32_t *size_out)
+{
+	const unsigned char *s = *sp;
+	int op;
+	unsigned int i;
+	unsigned int size_param_length;
+	uint32_t size;
+	
+	if (s >= e)
+		return 0;
+	op = *s & 3;
+	size_param_length = *s >> 2;
+	s++;
+	if (op == 0 || size_param_length > 4)
+		return 0;
+	
+	if (size_param_length == 0) {
+		size = 1;
+	} else {
+		if ((size_t)(e - s) < size_param_length)
+			return 0;
+		size = 0;
+		for (i = 0; i < size_param_length; i++) {
+			size <<= 8;
+			size |= *s++ & 0xFF;
+		}
+	}
+	
+	/* Make sure insert data fits in the patch, but don't consume it. */
+	if (op == OP_INSERT && (size_t)(e - s) < size)
+		return 0;
+	
+	*op_out = op;
+	*size_out = size;
+	*sp = s;
+	return 1;
+}
+
+/*
+ * bdelta uses the algorithm described in:
+ *
+ *     Myers, E. (1986). An O(ND) Difference Algorithm and Its Variations.
+ *     Retrieved from http://www.xmailserver.org/diff2.pdf
+ *
+ * The pseudocode in Myers' paper (Figure 2) uses an array called V,
+ * where (V[k], V[k] - k) is the endpoint of the furthest-reaching
+ * D-path ending on diagonal k.
+ *
+ * The structure below holds the V array for every iteration of the outer loop.
+ * Because each iteration produces D+1 values, a triangle is formed:
+ *
+ *                      k
+ *        -5 -4 -3 -2 -1  0  1  2  3  4  5
+ *      ----------------------------------
+ *    0 |                 0                (copy 0)
+ *      |                   \              skip 1
+ *    1 |              0     1
+ *      |                      \           skip 1, then copy 1
+ *    2 |           2     2     3
+ *  D   |                      /           insert 1, then copy 2
+ *    3 |        3     4     5     5
+ *      |                     \            skip 1, then copy 1
+ *    4 |     3     4     5     7     7
+ *      |                      /           insert 1
+ *    5 |  3     4     5     7     -     -
+ *
+ * @data will literally contain: 0 0 1 2 2 3 3 4 5 5 3 4 5 7 7 3 4 5 7
+ *
+ * To convert this to an edit script, we first climb back to the top,
+ * using the same procedure as was used when the triangle was generated:
+ *
+ *     If k = -D, climb right (the only way we can go).
+ *     If k = +D, climb left  (the only way we can go).
+ *     Otherwise, favor the greater number.
+ *     If the numbers are the same, climb left.
+ *
+ * Finally, we convert the descent to the solution to a patch script:
+ *
+ *     The top number n corresponds to:
+ *         copy   n
+ *
+ *     A descent left from a to b corresponds to:
+ *         insert 1
+ *         copy   b-a
+ *
+ *     A descent right from a to b corresponds to:
+ *         skip   1
+ *         copy   b-a-1
+ */
+typedef struct
+{
+	uint32_t *data;
+	int solution_d;
+	int solution_k;
+	uint32_t *solution_ptr;
+} Triangle;
+
+/*
+ * Return values:
+ *
+ *  BDELTA_OK:                      Success
+ *  BDELTA_MEMORY:                  Memory allocation failed
+ *  BDELTA_INTERNAL_DMAX_EXCEEDED:  d_max exceeded (strings are too different)
+ */
+static BDELTAcode build_triangle(
+	const char *old,  uint32_t old_size,
+	const char *new_, uint32_t new_size,
+	int d_max,
+	Triangle *triangle_out)
+{
+	int d, k;
+	uint32_t x, y;
+	uint32_t *data;
+	uint32_t *vprev; /* position within previous row */
+	uint32_t *v;     /* position within current row */
+	uint32_t *vcur;  /* beginning of current row */
+	size_t data_alloc = 16;
+	
+	memset(triangle_out, 0, sizeof(*triangle_out));
+	
+	data = malloc(data_alloc * sizeof(*data));
+	if (data == NULL)
+		return BDELTA_MEMORY;
+	
+	/* Allow dmax < 0 to mean "no limit". */
+	if (d_max < 0)
+		d_max = old_size + new_size;
+	
+	/*
+	 * Compute the farthest-reaching 0-path so the loop after this
+	 * will have a "previous" row to start with.
+	 */
+	for (x = 0; x < old_size && x < new_size && old[x] == new_[x]; )
+		x++;
+	*data = x;
+	if (x >= old_size && x >= new_size) {
+		/* Strings are equal, so return a triangle with one row (a dot). */
+		assert(x == old_size && x == new_size);
+		triangle_out->data = data;
+		triangle_out->solution_d = 0;
+		triangle_out->solution_k = 0;
+		triangle_out->solution_ptr = data;
+		return BDELTA_OK;
+	}
+	vprev = data;
+	vcur = v = data + 1;
+	
+	/*
+	 * Here is the core of the Myers diff algorithm.
+	 *
+	 * This is a direct translation of the pseudocode in Myers' paper,
+	 * with implementation-specific adaptations:
+	 *
+	 *  * Every V array is preserved per iteration of the outer loop.
+	 *    This is necessary so we can determine the actual patch, not just
+	 *    the length of the shortest edit string.  See the coment above
+	 *    the definition of Triangle for an in-depth explanation.
+	 *
+	 *  * Array items are stored consecutively so as to not waste space.
+	 *
+	 *  * The buffer holding the V arrays is expanded dynamically.
+	 */
+	for (d = 1; d <= d_max; d++, vprev = vcur, vcur = v) {
+		/* Ensure that the buffer has enough room for this row. */
+		if ((size_t)(v - data + d + 1) > data_alloc) {
+			size_t vprev_idx = vprev - data;
+			size_t v_idx     = v     - data;
+			size_t vcur_idx  = vcur  - data;
+			uint32_t *tmp;
+			
+			do {
+				data_alloc *= 2;
+			} while ((size_t)(v - data + d + 1) > data_alloc);
+			
+			tmp = realloc(data, data_alloc * sizeof(*data));
+			if (tmp == NULL) {
+				free(data);
+				return BDELTA_MEMORY;
+			}
+			data = tmp;
+			
+			/* Relocate pointers to the buffer we just expanded. */
+			vprev = data + vprev_idx;
+			v     = data + v_idx;
+			vcur  = data + vcur_idx;
+		}
+		
+		for (k = -d; k <= d; k += 2, vprev++) {
+			if (k == -d || (k != d && vprev[-1] < vprev[0]))
+				x = vprev[0];
+			else
+				x = vprev[-1] + 1;
+			y = x - k;
+			while (x < old_size && y < new_size && old[x] == new_[y])
+				x++, y++;
+			*v++ = x;
+			if (x >= old_size && y >= new_size) {
+				/* Shortest edit string found. */
+				assert(x == old_size && y == new_size);
+				triangle_out->data = data;
+				triangle_out->solution_d = d;
+				triangle_out->solution_k = k;
+				triangle_out->solution_ptr = v - 1;
+				return BDELTA_OK;
+			}
+		}
+	}
+	
+	free(data);
+	return BDELTA_INTERNAL_DMAX_EXCEEDED;
+}
+
+/*
+ * Trace a solution back to the top, returning a string of instructions
+ * for descending from the top to the solution.
+ *
+ * An instruction is one of the following:
+ *
+ *  -1: Descend left.
+ *  +1: Descend right.
+ *   0: Finished.  You should be at the solution now.
+ *
+ * If memory allocation fails, this function will return NULL.
+ */
+static signed char *climb_triangle(const Triangle *triangle)
+{
+	signed char *descent;
+	int d, k;
+	uint32_t *p;
+	
+	assert(triangle->solution_d >= 0);
+	
+	descent = malloc(triangle->solution_d + 1);
+	if (descent == NULL)
+		return NULL;
+	d = triangle->solution_d;
+	k = triangle->solution_k;
+	p = triangle->solution_ptr;
+	descent[d] = 0;
+	
+	while (d > 0) {
+		if (k == -d || (k != d && *(p-d-1) < *(p-d))) {
+			/* Climb right */
+			k++;
+			p = p - d;
+			descent[--d] = -1;
+		} else {
+			/* Climb left */
+			k--;
+			p = p - d - 1;
+			descent[--d] = 1;
+		}
+	}
+	
+	return descent;
+}
+
+/*
+ * Generate the actual patch, given data produced by build_triangle and
+ * climb_triangle.  new_ is needed for the content of the insertions.
+ *
+ * See the comment above the definition of Triangle.  It concisely documents
+ * how a descent down the triangle corresponds to a patch script.
+ *
+ * The resulting patch, including the patch type byte, is appended to patch_out.
+ *
+ * Return values:
+ *
+ *  BDELTA_OK:      Success
+ *  BDELTA_MEMORY:  Memory allocation failed
+ */
+static BDELTAcode descent_to_patch(
+	const signed char *descent,
+	const Triangle *triangle,
+	const char *new_, uint32_t new_size,
+	SB *patch_out)
+{
+	const char *new_end = new_ + new_size;
+	uint32_t *p = triangle->data;
+	uint32_t *p2;
+	int d = 0;
+	int k = 0;
+	int pending_op = 0;
+	int current_op;
+	uint32_t pending_length = 0;
+	uint32_t copy_length;
+	
+	if (sb_putc(patch_out, PT_CSI32) != 0)
+		return BDELTA_MEMORY;
+	
+	if (*p > 0) {
+		if (csi32_emit_op(patch_out, OP_COPY, *p, &new_) != BDELTA_OK)
+			return BDELTA_MEMORY;
+	}
+	
+	for (; *descent != 0; descent++, p = p2) {
+		if (*descent < 0) {
+			/* Descend left. */
+			d++;
+			k--;
+			p2 = p + d;
+			current_op = OP_INSERT;
+			assert(*p2 >= *p);
+			copy_length = *p2 - *p;
+		} else {
+			/* Descend right. */
+			d++;
+			k++;
+			p2 = p + d + 1;
+			current_op = OP_SKIP;
+			assert(*p2 > *p);
+			copy_length = *p2 - *p - 1;
+		}
+		
+		if (pending_op == current_op) {
+			pending_length++;
+		} else {
+			if (pending_op != 0) {
+				if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK)
+					return BDELTA_MEMORY;
+			}
+			pending_op = current_op;
+			pending_length = 1;
+		}
+		
+		if (copy_length > 0) {
+			if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK)
+				return BDELTA_MEMORY;
+			pending_op = 0;
+			if (csi32_emit_op(patch_out, OP_COPY, copy_length, &new_) != BDELTA_OK)
+				return BDELTA_MEMORY;
+		}
+	}
+	assert(d == triangle->solution_d);
+	assert(k == triangle->solution_k);
+	assert(p == triangle->solution_ptr);
+	
+	/* Emit the last pending op, unless it's a skip. */
+	if (pending_op != 0 && pending_op != OP_SKIP) {
+		if (csi32_emit_op(patch_out, pending_op, pending_length, &new_) != BDELTA_OK)
+			return BDELTA_MEMORY;
+	}
+	
+	assert(new_ == new_end);
+	return BDELTA_OK;
+}
+
+/*
+ * Generate a patch using Myers' O(ND) algorithm.
+ *
+ * The patch is appended to @patch_out, which must be initialized before calling.
+ *
+ * Return values:
+ *
+ *  BDELTA_OK:                         Success
+ *  BDELTA_MEMORY:                     Memory allocation failed
+ *  BDELTA_INTERNAL_INPUTS_TOO_LARGE:  Input sizes are too large
+ *  BDELTA_INTERNAL_DMAX_EXCEEDED:     d_max exceeded (strings are too different)
+ */
+static BDELTAcode diff_myers(
+	const char *old,  size_t old_size,
+	const char *new_, size_t new_size,
+	SB *patch_out)
+{
+	Triangle triangle;
+	signed char *descent;
+	BDELTAcode rc;
+	
+	/* Make sure old_size + new_size does not overflow int or uint32_t. */
+	if (old_size >= UINT32_MAX ||
+	    new_size >= UINT32_MAX - old_size ||
+	    old_size >= (unsigned int)INT_MAX ||
+	    new_size >= (unsigned int)INT_MAX - old_size)
+		return BDELTA_INTERNAL_INPUTS_TOO_LARGE;
+	
+	rc = build_triangle(old, old_size, new_, new_size, 1000, &triangle);
+	if (rc != BDELTA_OK)
+		return rc;
+	
+	descent = climb_triangle(&triangle);
+	if (descent == NULL)
+		goto oom1;
+	
+	if (descent_to_patch(descent, &triangle, new_, new_size, patch_out) != BDELTA_OK)
+		goto oom2;
+	
+	free(descent);
+	free(triangle.data);
+	return BDELTA_OK;
+	
+oom2:
+	free(descent);
+oom1:
+	free(triangle.data);
+	return BDELTA_MEMORY;
+}
+
+BDELTAcode bdelta_diff(
+	const void  *old,       size_t  old_size,
+	const void  *new_,      size_t  new_size,
+	void       **patch_out, size_t *patch_size_out)
+{
+	SB patch;
+	
+	if (sb_init(&patch) != 0)
+		goto out_of_memory;
+	
+	if (new_size == 0)
+		goto emit_new_literally;
+	
+	if (diff_myers(old, old_size, new_, new_size, &patch) != BDELTA_OK)
+		goto emit_new_literally;
+	
+	if (sb_size(&patch) > new_size) {
+		/*
+		 * A literal copy of new is no longer than this patch.
+		 * All that for nothing.
+		 */
+		goto emit_new_literally;
+	}
+	
+	/*
+	 * Verify that patch, when applied to old, produces the correct text.
+	 * If it doesn't, it's a bug, but fall back to a simple emit
+	 * to avert data corruption.
+	 */
+	{
+		void *result;
+		size_t result_size;
+		BDELTAcode rc;
+		int correct;
+		
+		rc = bdelta_patch(
+			old, old_size,
+			patch.start, patch.cur - patch.start,
+			&result, &result_size
+		);
+		
+		switch (rc) {
+			case BDELTA_OK:
+				correct = (result_size == new_size &&
+				           memcmp(result, new_, new_size) == 0);
+				free(result);
+				break;
+			
+			case BDELTA_MEMORY:
+				goto out_of_memory;
+			
+			default:
+				correct = 0;
+				break;
+		}
+		
+		if (!correct) {
+			assert(0);
+			goto emit_new_literally;
+		}
+	}
+	
+	sb_return(&patch, patch_out, patch_size_out);
+	return BDELTA_OK;
+
+emit_new_literally:
+	if (patch.cur != patch.start) {
+		free(patch.start);
+		if (sb_init(&patch) != 0)
+			goto out_of_memory;
+	}
+	if (sb_putc(&patch, PT_LITERAL) != 0 || sb_write(&patch, new_, new_size) != 0)
+		goto out_of_memory;
+	sb_return(&patch, patch_out, patch_size_out);
+	return BDELTA_OK;
+
+out_of_memory:
+	sb_discard(&patch, patch_out, patch_size_out);
+	return BDELTA_MEMORY;
+}
+
+/*
+ * Return values:
+ *
+ *  BDELTA_OK:              Success
+ *  BDELTA_PATCH_INVALID:   Patch is malformed
+ *  BDELTA_PATCH_MISMATCH:  Old string is too small
+ *  BDELTA_MEMORY:          Memory allocation failed
+ */
+static BDELTAcode patch_csi32(
+	const unsigned char *o, const unsigned char *oe,
+	const unsigned char *p, const unsigned char *pe,
+	SB *new_out)
+{
+	int op;
+	uint32_t size;
+	
+	while (csi32_parse_op(&p, pe, &op, &size)) {
+		if ((op == OP_COPY || op == OP_SKIP) && (size_t)(oe - o) < size) {
+			/* Copy or skip instruction exceeds length of old string. */
+			return BDELTA_PATCH_MISMATCH;
+		}
+		if (op == OP_COPY || op == OP_INSERT)
+			sb_need(new_out, size);
+		
+		switch (op) {
+			case OP_COPY:  /* Copy @size bytes from old string. */
+				memcpy(new_out->cur, o, size);
+				new_out->cur += size;
+				o += size;
+				break;
+			
+			case OP_SKIP:  /* Skip @size bytes of old string. */
+				o += size;
+				break;
+			
+			case OP_INSERT:  /* Insert @size new bytes (from the patch script). */
+				memcpy(new_out->cur, p, size);
+				new_out->cur += size;
+				p += size;
+				break;
+			
+			default:
+				assert(0);
+		}
+	}
+	if (p != pe)
+		return BDELTA_PATCH_INVALID;
+	
+	return BDELTA_OK;
+
+out_of_memory:
+	return BDELTA_MEMORY;
+}
+
+BDELTAcode bdelta_patch(
+	const void  *old,     size_t  old_size,
+	const void  *patch,   size_t  patch_size,
+	void       **new_out, size_t *new_size_out)
+{
+	const unsigned char *o = old;
+	const unsigned char *oe = o + old_size;
+	const unsigned char *p = patch;
+	const unsigned char *pe = p + patch_size;
+	SB result;
+	BDELTAcode rc;
+	
+	if (sb_init(&result) != 0) {
+		rc = BDELTA_MEMORY;
+		goto discard;
+	}
+	
+	if (p >= pe) {
+		rc = BDELTA_PATCH_INVALID;
+		goto discard;
+	}
+	
+	switch (*p++) {
+		case PT_LITERAL:
+			if (sb_write(&result, p, pe - p) != 0) {
+				rc = BDELTA_MEMORY;
+				goto discard;
+			}
+			break;
+		
+		case PT_CSI32:
+			rc = patch_csi32(o, oe, p, pe, &result);
+			if (rc != BDELTA_OK)
+				goto discard;
+			break;
+		
+		default:
+			rc = BDELTA_PATCH_INVALID;
+			goto discard;
+	}
+	
+	sb_return(&result, new_out, new_size_out);
+	return BDELTA_OK;
+
+discard:
+	sb_discard(&result, new_out, new_size_out);
+	return rc;
+}
+
+const char *bdelta_strerror(BDELTAcode code)
+{
+	switch (code) {
+		case BDELTA_OK:
+			return "Success";
+		case BDELTA_MEMORY:
+			return "Could not allocate memory";
+		case BDELTA_PATCH_INVALID:
+			return "Patch is invalid";
+		case BDELTA_PATCH_MISMATCH:
+			return "Patch applied to wrong data";
+		
+		case BDELTA_INTERNAL_DMAX_EXCEEDED:
+			return "Difference threshold exceeded (internal error)";
+		case BDELTA_INTERNAL_INPUTS_TOO_LARGE:
+			return "Inputs are too large (internal error)";
+		
+		default:
+			return "Invalid error code";
+	}
+}
+
+void bdelta_perror(const char *s, BDELTAcode code)
+{
+	if (s != NULL && *s != '\0')
+		fprintf(stderr, "%s: %s\n", s, bdelta_strerror(code));
+	else
+		fprintf(stderr, "%s\n", bdelta_strerror(code));
+}

+ 112 - 0
ccan/bdelta/bdelta.h

@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2011 Joseph Adams <joeyadams3.14159@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef CCAN_BDELTA_H
+#define CCAN_BDELTA_H
+
+#include <stddef.h>
+
+typedef enum {
+	BDELTA_OK               = 0,  /* Operation succeeded. */
+
+	BDELTA_MEMORY           = 1,  /* Memory allocation failed. */
+	BDELTA_PATCH_INVALID    = 2,  /* Patch is malformed. */
+	BDELTA_PATCH_MISMATCH   = 3,  /* Patch applied to wrong original string. */
+	
+	/* Internal error codes.  These will never be returned by API functions. */
+	BDELTA_INTERNAL_DMAX_EXCEEDED    = -10,
+	BDELTA_INTERNAL_INPUTS_TOO_LARGE = -11,
+} BDELTAcode;
+
+/*
+ * bdelta_diff - Given two byte strings, generate a "patch" (also a byte string)
+ * that describes how to transform the old string into the new string.
+ *
+ * On success, returns BDELTA_OK, and passes a malloc'd block
+ * and its size through *patch_out and *patch_size_out.
+ *
+ * On failure, returns an error code, and clears *patch_out and *patch_size_out.
+ *
+ * Example:
+ *	const char *old = "abcabba";
+ *	const char *new_ = "cbabac";
+ *	void *patch;
+ *	size_t patch_size;
+ *	BDELTAcode rc;
+ *
+ *	rc = bdelta_diff(old, strlen(old), new_, strlen(new_), &patch, &patch_size);
+ *	if (rc != BDELTA_OK) {
+ *		bdelta_perror("bdelta_diff", rc);
+ *		return;
+ *	}
+ *	...
+ *	free(patch);
+ */
+BDELTAcode bdelta_diff(
+	const void  *old,       size_t  old_size,
+	const void  *new_,      size_t  new_size,
+	void       **patch_out, size_t *patch_size_out
+);
+
+/*
+ * bdelta_patch - Apply a patch produced by bdelta_diff to the
+ * old string to recover the new string.
+ *
+ * On success, returns BDELTA_OK, and passes a malloc'd block
+ * and its size through *new_out and *new_size_out.
+ *
+ * On failure, returns an error code, and clears *new_out and *new_size_out.
+ *
+ * Example:
+ *	const char *old = "abcabba";
+ *	void *new_;
+ *	size_t new_size;
+ *	BDELTAcode rc;
+ *
+ *	rc = bdelta_patch(old, strlen(old), patch, patch_size, &new_, &new_size);
+ *	if (rc != BDELTA_OK) {
+ *		bdelta_perror("bdelta_patch", rc);
+ *		return;
+ *	}
+ *	fwrite(new_, 1, new_size, stdout);
+ *	putchar('\n');
+ *	free(new_);
+ */
+BDELTAcode bdelta_patch(
+	const void  *old,     size_t  old_size,
+	const void  *patch,   size_t  patch_size,
+	void       **new_out, size_t *new_size_out
+);
+
+/*
+ * bdelta_strerror - Return a string describing a bdelta error code.
+ */
+const char *bdelta_strerror(BDELTAcode code);
+
+/*
+ * bdelta_perror - Print a bdelta error message to stderr.
+ *
+ * This function handles @s the same way perror does.
+ */
+void bdelta_perror(const char *s, BDELTAcode code);
+
+#endif

+ 294 - 0
ccan/bdelta/test/common.h

@@ -0,0 +1,294 @@
+#include <ccan/bdelta/bdelta.h>
+#include <ccan/bdelta/bdelta.c>
+#include <ccan/tap/tap.h>
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * Finds a pseudorandom 32-bit number from 0 to 2^32-1 .
+ * Uses the BCPL linear congruential generator method.
+ *
+ * Used instead of system RNG to ensure tests are consistent.
+ */
+static uint32_t rand32(void)
+{
+#if 0
+	/*
+	 * Tests should be run with a different random function
+	 * from time to time.  I've found that the method below
+	 * sometimes behaves poorly for testing purposes.
+	 * For example, rand32() % N might only return even numbers.
+	 */
+	assert(RAND_MAX == 2147483647);
+	return ((random() & 0xFFFF) << 16) | (random() & 0xFFFF);
+#else
+	static uint32_t rand32_state = 0;
+	rand32_state *= (uint32_t)0x7FF8A3ED;
+	rand32_state += (uint32_t)0x2AA01D31;
+	return rand32_state;
+#endif
+}
+
+#define RSTRING_OK                0  /* Operation succeeded */
+#define RSTRING_MEMORY            1  /* Memory allocation failed */
+#define RSTRING_ZERO_CARDINALITY  2  /* Cardinality of byte range is zero */
+#define RSTRING_RANGE_OVERLAP     3  /* Byte range contains overlapping characters */
+
+struct rstring_byte_range {
+	unsigned int cardinality;
+	unsigned int multiplier;
+	unsigned int offset;
+};
+
+static int check_range(const struct rstring_byte_range *range)
+{
+	unsigned int i;
+	unsigned int c;
+	char set[256];
+	
+	if (range == NULL)
+		return RSTRING_OK;
+	
+	if (range->cardinality == 0)
+		return RSTRING_ZERO_CARDINALITY;
+	
+	memset(set, 0, sizeof(set));
+	
+	c = range->offset & 0xFF;
+	for (i = 0; i < range->cardinality; i++) {
+		if (set[c])
+			return RSTRING_RANGE_OVERLAP;
+		set[c] = 1;
+		c += range->multiplier;
+		c &= 0xFF;
+	}
+	
+	return RSTRING_OK;
+}
+
+/*
+ * Generate a pseudorandom string of the given length,
+ * writing it into a user-supplied buffer.
+ */
+static uint8_t *random_string_into(uint8_t *str, uint32_t size, const struct rstring_byte_range *range)
+{
+	uint32_t i;
+	uint32_t r;
+	
+	if (range == NULL) {
+		for (i = 0; size - i >= 4; ) {
+			r = rand32();
+			str[i++] = r & 0xFF;
+			r >>= 8;
+			str[i++] = r & 0xFF;
+			r >>= 8;
+			str[i++] = r & 0xFF;
+			r >>= 8;
+			str[i++] = r & 0xFF;
+		}
+		
+		if (i < size) {
+			r = rand32();
+			do {
+				str[i++] = r & 0xFF;
+				r >>= 8;
+			} while (i < size);
+		}
+	} else {
+		for (i = 0; i < size; )
+			str[i++] = ((rand32() % range->cardinality) * range->multiplier + range->offset) & 0xFF;
+	}
+	
+	return str;
+}
+
+/*
+ * Generate a pseudorandom string of the given length.
+ */
+static uint8_t *random_string(uint32_t size, const struct rstring_byte_range *range)
+{
+	uint8_t *str;
+	
+	if (check_range(range) != RSTRING_OK) {
+		fprintf(stderr, "Invalid byte range for random string generation\n");
+		exit(EXIT_FAILURE);
+	}
+	
+	str = malloc(size);
+	if (str == NULL)
+		return NULL;
+	
+	return random_string_into(str, size, range);
+}
+
+/*
+ * Generate two byte strings:
+ *
+ *  An "old" string, of length @old_size
+ *  A "new" string, differing from "old" by at most @diff_size bytes
+ *  (where modifying a character is considered one change).
+ *
+ * Returns RSTRING_OK on success, RSTRING_MEMORY if memory allocation fails.
+ */
+static int random_string_pair(
+	uint32_t old_size, uint32_t diff_size, const struct rstring_byte_range *range,
+	uint8_t **old_out, uint8_t **new_out, uint32_t *new_size_out)
+{
+	uint8_t *old;
+	uint8_t *new_;
+	uint8_t *nptr;
+	uint32_t new_size;
+	uint32_t i;
+	uint32_t j;
+	
+	/* insertions[i] is the number of characters to insert before position i. */
+	uint32_t *insertions;
+	
+	/*
+	 * changes[i] indicates what to do to the character at position i:
+	 *
+	 *  0: Leave it as-is.
+	 *  1: Delete it.
+	 *  2: Change it to another byte (possibly the same byte).
+	 */
+	char *changes;
+	
+	/* String of new bytes to use for insertions and changes. */
+	uint8_t *new_bytes;
+	uint8_t *new_bytes_ptr;
+	uint32_t new_bytes_size;
+	
+	old = random_string(old_size, range);
+	if (old == NULL)
+		goto oom0;
+	
+	insertions = calloc(old_size + 1, sizeof(*insertions));
+	if (insertions == NULL)
+		goto oom1;
+	
+	changes = calloc(old_size, sizeof(*changes));
+	if (changes == NULL)
+		goto oom2;
+	
+	/*
+	 * Generate a collection of edits, which will be
+	 * applied to the old string "simultaneously".
+	 */
+	new_size = old_size;
+	new_bytes_size = 0;
+	for (i = 0; i < diff_size; i++) {
+		uint32_t pos;
+		
+		switch (rand32() % 3) {
+			case 0: /* Insert a byte. */
+				pos = rand32() % (old_size + 1);
+				insertions[pos]++;
+				new_size++;
+				new_bytes_size++;
+				break;
+			
+			case 1: /* Delete a byte. */
+				pos = rand32() % old_size;
+				if (changes[pos]) {
+					/*
+					 * This character is already deleted or changed.
+					 * Do something else instead.
+					 *
+					 * For the observative: i will overflow if it's 0.
+					 * However, overflow of unsigned integers is well-defined
+					 * by the C standard.  i will wrap around to UINT32_MAX,
+					 * then the for-loop increment above will wrap it back to 0.
+					 */
+					i--;
+				} else {
+					changes[pos] = 1;
+					new_size--;
+				}
+				break;
+			
+			default: /* Change a byte. */
+				pos = rand32() % old_size;
+				if (changes[pos]) {
+					/*
+					 * This character is already deleted or changed.
+					 * Do something else instead.
+					 */
+					i--;
+				} else {
+					changes[pos] = 2;
+					new_bytes_size++;
+				}
+				break;
+		}
+	}
+	
+	new_bytes = malloc(new_bytes_size);
+	if (new_bytes == NULL)
+		goto oom3;
+	random_string_into(new_bytes, new_bytes_size, range);
+	
+	new_ = malloc(new_size);
+	if (new_ == NULL)
+		goto oom4;
+	
+	/* Apply the insertions and changes generated above. */
+	nptr = new_;
+	new_bytes_ptr = new_bytes;
+	for (i = 0; i < old_size; i++) {
+		for (j = 0; j < insertions[i]; j++)
+			*nptr++ = *new_bytes_ptr++;
+		
+		switch (changes[i]) {
+			case 0: /* No change */
+				*nptr++ = old[i];
+				break;
+			
+			case 1: /* Delete */
+				break;
+			
+			default: /* Change value */
+				*nptr++ = *new_bytes_ptr++;
+				break;
+		}
+	}
+	for (j = 0; j < insertions[i]; j++)
+		*nptr++ = *new_bytes_ptr++;
+	assert((size_t)(nptr - new_) == new_size);
+	assert((size_t)(new_bytes_ptr - new_bytes) == new_bytes_size);
+	
+	free(new_bytes);
+	free(changes);
+	free(insertions);
+	
+	if (old_out)
+		*old_out = old;
+	else
+		free(old);
+	if (new_out)
+		*new_out = new_;
+	else
+		free(new_);
+	if (new_size_out)
+		*new_size_out = new_size;
+	
+	return RSTRING_OK;
+	
+oom4:
+	free(new_bytes);
+oom3:
+	free(changes);
+oom2:
+	free(insertions);
+oom1:
+	free(old);
+oom0:
+	if (old_out)
+		*old_out = NULL;
+	if (new_out)
+		*new_out = NULL;
+	if (new_size_out)
+		*new_size_out = 0;
+	return RSTRING_MEMORY;
+}

+ 79 - 0
ccan/bdelta/test/run-medium.c

@@ -0,0 +1,79 @@
+#include "common.h"
+
+/*
+ * Note that bdelta_diff verifies the patch before returning it (except for
+ * when it returns a PT_LITERAL patch, as its correctness is easy to prove).
+ * Only the trivial tests check the result explicitly using bdiff_patch.
+ */
+static int test_random(
+	uint32_t old_size, uint32_t diff_size,
+	unsigned int cardinality, unsigned int multiplier, unsigned int offset)
+{
+	struct rstring_byte_range range;
+	uint8_t *old;
+	uint8_t *new_;
+	uint32_t new_size;
+	BDELTAcode rc;
+	
+	range.cardinality = cardinality;
+	range.multiplier = multiplier;
+	range.offset = offset;
+	
+	if (random_string_pair(old_size, diff_size, cardinality == 0 ? NULL : &range,
+	                       &old, &new_, &new_size) != RSTRING_OK)
+	{
+		fprintf(stderr, "Error generating random string pair\n");
+		exit(EXIT_FAILURE);
+	}
+	
+	rc = bdelta_diff(old, old_size, new_, new_size, NULL, NULL);
+	if (rc != BDELTA_OK) {
+		bdelta_perror("bdelta_diff", rc);
+		return 0;
+	}
+	
+	free(new_);
+	free(old);
+	return 1;
+}
+
+int main(void)
+{
+	int i;
+	int count = 25;
+	
+	plan_tests(count * 14);
+	
+	for (i = 0; i < count; i++)
+		ok1(test_random(100, 10, 0, 0, 0));
+	for (i = 0; i < count; i++)
+		ok1(test_random(100, rand32() % 200, 0, 0, 0));
+	for (i = 0; i < count; i++)
+		ok1(test_random(1000, rand32() % 200, 0, 0, 0));
+	for (i = 0; i < count; i++)
+		ok1(test_random(1000, rand32() % 2000, 0, 0, 0));
+	for (i = 0; i < count; i++)
+		ok1(test_random(10000, rand32() % 200, 0, 0, 0));
+	for (i = 0; i < count; i++)
+		ok1(test_random(10000, rand32() % 2000, 0, 0, 0));
+	for (i = 0; i < count; i++)
+		ok1(test_random(rand32() % 20000, rand32() % 20000, 0, 0, 0));
+	
+	/* Low-cardinality tests */
+	for (i = 0; i < count; i++)
+		ok1(test_random(100, 10, rand32() % 20 + 1, 1, i));
+	for (i = 0; i < count; i++)
+		ok1(test_random(100, rand32() % 200, rand32() % 20 + 1, 1, i));
+	for (i = 0; i < count; i++)
+		ok1(test_random(1000, rand32() % 200, rand32() % 20 + 1, 1, i));
+	for (i = 0; i < count; i++)
+		ok1(test_random(1000, rand32() % 2000, rand32() % 20 + 1, 1, i));
+	for (i = 0; i < count; i++)
+		ok1(test_random(10000, rand32() % 200, rand32() % 20 + 1, 1, i));
+	for (i = 0; i < count; i++)
+		ok1(test_random(10000, rand32() % 2000, rand32() % 20 + 1, 1, i));
+	for (i = 0; i < count; i++)
+		ok1(test_random(rand32() % 20000, rand32() % 20000, rand32() % 20 + 1, 1, i));
+	
+	return exit_status();
+}

+ 202 - 0
ccan/bdelta/test/run-myers.c

@@ -0,0 +1,202 @@
+#include "common.h"
+
+#define is_digit(c) ((c) >= '0' && (c) <= '9')
+
+static uint32_t parse_int(const char **sp)
+{
+	const char *s = *sp;
+	uint32_t n = 0;
+	
+	while (is_digit(*s)) {
+		n *= 10;
+		n += *s++ - '0';
+	}
+	
+	*sp = s;
+	return n;
+}
+
+static const char *op_name(int op)
+{
+	switch (op) {
+		case OP_COPY:
+			return "copy";
+		case OP_SKIP:
+			return "skip";
+		case OP_INSERT:
+			return "insert";
+		
+		default:
+			return "<invalid opcode>";
+	}
+}
+
+static const char *verify_csi32(
+	const unsigned char *patch_start, const unsigned char *patch_end,
+	const char *expected)
+{
+	const unsigned char *p = patch_start;
+	
+	if (p >= patch_end)
+		return "Patch type byte missing";
+	if (*p++ != PT_CSI32)
+		return "Patch type byte is not PT_CSI32";
+	
+	for (;;) {
+		int patch_op;
+		uint32_t patch_size;
+		int expected_op;
+		uint32_t expected_size;
+		
+		while (*expected == ' ')
+			expected++;
+		if (*expected == '\0')
+			break;
+		
+		if (!csi32_parse_op(&p, patch_end, &patch_op, &patch_size)) {
+			if (p == patch_end)
+				return "Patch shorter than expected.";
+			else
+				return "Patch contains invalid CSI-32";
+		}
+		
+		switch (*expected) {
+			case 'c':
+				expected_op = OP_COPY;
+				break;
+			case 's':
+				expected_op = OP_SKIP;
+				break;
+			case 'i':
+				expected_op = OP_INSERT;
+				break;
+			
+			default:
+				diag("verify_csi32: Unexpected character %c", *expected);
+				return "Syntax error in expected difference";
+		}
+		expected++;
+		
+		while (*expected == ' ')
+			expected++;
+		
+		if (patch_op != expected_op) {
+			diag("verify_csi32: Expected %s, but next op is %s %lu",
+			     op_name(expected_op), op_name(patch_op), (unsigned long)patch_size);
+			return "Operation mismatch";
+		}
+		
+		if (expected_op == OP_COPY || expected_op == OP_SKIP) {
+			if (!is_digit(*expected)) {
+				diag("verify_csi32: Expected size after %s instruction",
+				     op_name(expected_op));
+				return "Syntax error in expected difference";
+			}
+			expected_size = parse_int(&expected);
+			
+			if (patch_size != expected_size) {
+				diag("verify_csi32: Expected %s %lu, but patch says %s %lu",
+				     op_name(expected_op), (unsigned long)expected_size,
+				     op_name(expected_op), (unsigned long)patch_size);
+				return "Operation size mismatch";
+			}
+		} else {
+			if (*expected++ != '\'') {
+				diag("verify_csi32: Expected single-quoted string after insert instruction");
+				return "Syntax error in expected difference";
+			}
+			
+			for (expected_size = 0; ; expected_size++) {
+				unsigned char c;
+				
+				if (*expected == '\'' && *(expected + 1) == '\'') {
+					c = '\'';
+					expected += 2;
+				} else if (*expected == '\'') {
+					expected++;
+					break;
+				} else if (*expected == '\0') {
+					diag("verify_csi32: Missing end quote");
+					return "Syntax error in expected difference";
+				} else {
+					c = *expected++;
+				}
+				
+				if (!(p < patch_end && *p++ == c))
+					return "Insertion string mismatch";
+			}
+			
+			if (patch_size != expected_size)
+				return "Insertion string mismatch";
+		}
+	}
+	
+	if (p != patch_end)
+		return "Patch longer than expected.";
+	
+	return NULL;
+}
+
+static void test_myers(const char *old, const char *new_, const char *expected_difference)
+{
+	SB patch;
+	BDELTAcode rc;
+	const char *verify_msg;
+	
+	if (sb_init(&patch) != 0) {
+		fail("Out of memory");
+		return;
+	}
+	
+	rc = diff_myers(old, strlen(old), new_, strlen(new_), &patch);
+	if (rc != BDELTA_OK) {
+		fail("test_myers(%s, %s, %s): diff_myers failed: %s", old, new_, expected_difference, bdelta_strerror(rc));
+		sb_discard(&patch, NULL, NULL);
+		return;
+	}
+	
+	verify_msg = verify_csi32(patch.start, patch.cur, expected_difference);
+	sb_discard(&patch, NULL, NULL);
+	
+	if (verify_msg != NULL) {
+		fail("test_myers(%s, %s, %s): %s", old, new_, expected_difference, verify_msg);
+		return;
+	}
+	
+	pass("test_myers(%s, %s, %s)", old, new_, expected_difference);
+}
+
+int main(void)
+{
+	(void) random_string_pair;
+	
+	plan_tests(17);
+	
+	test_myers("abcabba", "cbabac",   "s2 c1 i'b' c2 s1 c1 i'c'");
+	test_myers("abcdefg", "abcdefg",  "c7");
+	test_myers("abcdefg", "abcde",    "c5");
+	test_myers("abcdefg", "abcdefga", "c7 i'a'");
+	test_myers("abbbbbb", "bbbbbb",   "s1 c6");
+	test_myers("abbbbbb", "bbbbbbb",  "s1 c6 i'b'");
+	test_myers("bbbbbb",  "abbbbbb",  "i'a' c6");
+	test_myers("bbbbbbb", "abbbbbb",  "i'a' c6");
+	test_myers("bbbbbb",  "abbbbbbb", "i'a' c6 i'b'");
+	
+	test_myers("ac",   "ba",   "i'b' c1");
+	test_myers("ac",   "bc",   "s1 i'b' c1");
+	test_myers("abcd", "cabd", "i'c' c2 s1 c1");
+	test_myers("",     "abc",  "i'abc'");
+	test_myers("abc",  "",     "");
+	test_myers("abcd", "d",    "s3 c1");
+	test_myers("", "", "");
+	
+	/*
+	 * In the event the strings have no characters in common, diff_myers will
+	 * emit a skip of all the characters in the old string.  Although it is
+	 * unnecessary, it is tricky to get rid of.  bdelta_diff will discard the
+	 * patch anyway, because it's bigger than a literal.
+	 */
+	test_myers("aaaaaaa", "bbbbbbb",  "s7 i'bbbbbbb'");
+	
+	return exit_status();
+}

+ 67 - 0
ccan/bdelta/test/run-trivial.c

@@ -0,0 +1,67 @@
+#include <ccan/bdelta/bdelta.h>
+#include <ccan/bdelta/bdelta.c>
+#include <ccan/tap/tap.h>
+
+static int test_trivial(const char *old, const char *new_)
+{
+	void *patch;
+	size_t patch_size;
+	BDELTAcode rc;
+	
+	void *new2;
+	size_t new2_size;
+	
+	rc = bdelta_diff(old, strlen(old), new_, strlen(new_), &patch, &patch_size);
+	if (rc != BDELTA_OK) {
+		bdelta_perror("bdelta_diff", rc);
+		return 0;
+	}
+	
+	if (patch_size > strlen(new_) + 1) {
+		fprintf(stderr, "bdelta_diff produced a patch larger than a simple literal emitting the new string.\n");
+		return 0;
+	}
+	
+	rc = bdelta_patch(old, strlen(old), patch, patch_size, &new2, &new2_size);
+	if (rc != BDELTA_OK) {
+		bdelta_perror("bdelta_patch", rc);
+		return 0;
+	}
+	
+	if (new2_size != strlen(new_) || strcmp(new2, new_) != 0) {
+		fprintf(stderr, "patch(old, diff(old, new)) != new\n");
+		return 0;
+	}
+	
+	/* Make sure bdelta_diff properly discards unwanted return values. */
+	rc = bdelta_diff(old, strlen(old), new_, strlen(new_), NULL, NULL);
+	if (rc != BDELTA_OK) {
+		bdelta_perror("bdelta_diff (second time)", rc);
+		return 0;
+	}
+	
+	free(new2);
+	free(patch);
+	return 1;
+}
+
+int main(void)
+{
+	plan_tests(13);
+	
+	ok1(test_trivial("abcabba", "cbabac"));
+	ok1(test_trivial("aaabbbcdaabcc", "aaabbcdaabeca"));
+	ok1(test_trivial("aaaaaaaa", "bbbbbbbb"));
+	ok1(test_trivial("aaaaaaaa", ""));
+	ok1(test_trivial("", "bbbbbbbb"));
+	ok1(test_trivial("", ""));
+	ok1(test_trivial("aaaaaaaa", "aaaaaaaabbbbbbbb"));
+	ok1(test_trivial("aaaaaaaa", "bbbbbbbbaaaaaaaa"));
+	ok1(test_trivial("aaaaaaaabbbbbbbb", "aaaaaaaa"));
+	ok1(test_trivial("aaaaaaaabbbbbbbb", "bbbbbbbb"));
+	ok1(test_trivial("aaaaaaaabbbbbbbb", "bbbbbbbb"));
+	ok1(test_trivial("abababababababab", "babababababababa"));
+	ok1(test_trivial("aababcabcdabcde", "aababcabcdabcde"));
+	
+	return exit_status();
+}