Browse Source

edit_distance: Rename ED_STACK_ELEMS ED_STACK_DIST_VALS

The previous name was misleading, since it does not define the number of
elements (ed_elem) on the stack, but rather the number of distance
values (ed_dist).  Rename to make this more clear and add more
documentation about what it does and how best to define it.

Note:  This is an API change for custom-compiled versions, but since the
module has only been included for a couple days I don't think it's worth
a back-compat #ifdef at this point.

Signed-off-by: Kevin Locke <kevin@kevinlocke.name>
Kevin Locke 9 years ago
parent
commit
346058c002

+ 27 - 6
ccan/edit_distance/edit_distance-params.h

@@ -109,18 +109,39 @@
 # define ED_HASH_ELEM(e) ((unsigned char)e)
 /** Maximum value that can be returned from #ED_HASH_ELEM */
 # define ED_HASH_MAX UCHAR_MAX
-/** Can an array of #ED_HASH_MAX ::ed_dist values be stored on the stack? */
+/** Can an array of #ED_HASH_MAX ::ed_dist values be stored on the stack?
+ * @see #ED_STACK_DIST_VALS
+ */
 # define ED_HASH_ON_STACK
 #endif
 
-#ifndef ED_STACK_ELEMS
-/** Maximum number of elements that will be allocated on the stack.
+#ifndef ED_STACK_DIST_VALS
+/** Maximum number of ::ed_dist values that will be allocated on the stack.
+ *
+ * The edit distance algorithms which use a dynamic programming (all currently
+ * supported algorithms) can store intermediate values on the stack to avoid
+ * the overhead of calling @c malloc.  This macro defines the maximum number
+ * of intermediate distance values which can be stored on the stack for a
+ * single function call.  It should be large enough to cover common cases,
+ * where possible, and small enough to avoid overflowing the stack or frame
+ * size limits.  The algorithms have the following requirements:
  *
- * Note: The threshold does not need to be tight since with
- * <code>ED_STACK_ELEMS > ~100</code> algorithm cost will dominate @c malloc
+ * - ed_measure::EDIT_DISTANCE_LCS and ed_measure::EDIT_DISTANCE_LEV require
+ *   @c min(slen,tlen) values when #ED_COST_IS_SYMMETRIC is defined, @c slen
+ *   otherwise.
+ * - ed_measure::EDIT_DISTANCE_RDL requires @c 2*min(slen,tlen) values when
+ *   #ED_COST_IS_SYMMETRIC is defined, @c 2*slen otherwise.
+ * - ed_measure::EDIT_DISTANCE_DL requires @c slen*tlen values (in addition to
+ *   the #ED_HASH_MAX values stored on the stack if #ED_HASH_ON_STACK is
+ *   defined).
+ *
+ * This value does not need to be a tight bound, since when @c slen*tlen is
+ * greater than around 10,000 the algorithm cost will dominate the @c malloc
  * cost anyway.
+ *
+ * @see #ED_HASH_ON_STACK
  */
-# define ED_STACK_ELEMS 512
+# define ED_STACK_DIST_VALS 512
 #endif
 
 #endif

+ 2 - 2
ccan/edit_distance/edit_distance_dl.c

@@ -15,11 +15,11 @@ ed_dist edit_distance_dl(const ed_elem *src, ed_size slen,
 {
 	/* Optimization: Avoid malloc when distance matrix can fit on the stack.
 	 */
-	ed_dist stackdist[ED_STACK_ELEMS];
+	ed_dist stackdist[ED_STACK_DIST_VALS];
 
 	/* Lowrance-Wagner distance matrix, in row-major order. */
 	size_t matsize = ((size_t)slen + 2) * (tlen + 2);
-	ed_dist *distmem = matsize <= ED_STACK_ELEMS ? stackdist :
+	ed_dist *distmem = matsize <= ED_STACK_DIST_VALS ? stackdist :
 	    malloc(matsize * sizeof(ed_dist));
 	ed_dist *dist = distmem;
 

+ 2 - 2
ccan/edit_distance/edit_distance_lcs.c

@@ -16,10 +16,10 @@ ed_dist edit_distance_lcs(const ed_elem *src, ed_size slen,
 	/* Optimization: Avoid malloc when row of distance matrix can fit on
 	 * the stack.
 	 */
-	ed_dist stackdist[ED_STACK_ELEMS];
+	ed_dist stackdist[ED_STACK_DIST_VALS];
 
 	/* One row of the Wagner-Fischer distance matrix. */
-	ed_dist *dist = slen < ED_STACK_ELEMS ? stackdist :
+	ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist :
 	    malloc((slen + 1) * sizeof(ed_dist));
 
 	/* Initialize row with cost to delete src[0..i-1] */

+ 2 - 2
ccan/edit_distance/edit_distance_lev.c

@@ -16,10 +16,10 @@ ed_dist edit_distance_lev(const ed_elem *src, ed_size slen,
 	/* Optimization: Avoid malloc when row of distance matrix can fit on
 	 * the stack.
 	 */
-	ed_dist stackdist[ED_STACK_ELEMS];
+	ed_dist stackdist[ED_STACK_DIST_VALS];
 
 	/* One row of the Wagner-Fischer distance matrix. */
-	ed_dist *dist = slen < ED_STACK_ELEMS ? stackdist :
+	ed_dist *dist = slen < ED_STACK_DIST_VALS ? stackdist :
 	    malloc((slen + 1) * sizeof(ed_dist));
 
 	/* Initialize row with cost to delete src[0..i-1] */

+ 2 - 2
ccan/edit_distance/edit_distance_rdl.c

@@ -16,11 +16,11 @@ ed_dist edit_distance_rdl(const ed_elem *src, ed_size slen,
 	/* Optimization: Avoid malloc when required rows of distance matrix can
 	 * fit on the stack.
 	 */
-	ed_dist stackdist[ED_STACK_ELEMS];
+	ed_dist stackdist[ED_STACK_DIST_VALS];
 
 	/* Two rows of the Wagner-Fischer distance matrix. */
 	ed_dist *distmem, *dist, *prevdist;
-	if (slen < ED_STACK_ELEMS / 2) {
+	if (slen < ED_STACK_DIST_VALS / 2) {
 		distmem = stackdist;
 		dist = distmem;
 		prevdist = distmem + slen + 1;

+ 4 - 4
ccan/edit_distance/test/run-weights.c

@@ -224,14 +224,14 @@ static void test_dl(void)
 static void test_mem_use(void)
 {
 	char tgt[] = "BC";
-	char src[ED_STACK_ELEMS + 1];
-	for (size_t i = 0; i < ED_STACK_ELEMS; ++i) {
+	char src[ED_STACK_DIST_VALS + 1];
+	for (size_t i = 0; i < ED_STACK_DIST_VALS; ++i) {
 		src[i] = (char)('A' + (i % 26));
 	}
-	src[ED_STACK_ELEMS] = '\0';
+	src[ED_STACK_DIST_VALS] = '\0';
 
 	for (ed_size tlen = 1; tlen < 3; ++tlen) {
-		ed_size slen = ED_STACK_ELEMS;
+		ed_size slen = ED_STACK_DIST_VALS;
 		/* Above threshold, causes allocation */
 		ok(edit_distance_lcs(src, slen, tgt, tlen) == slen - tlen,
 		   "edit_distance_lcs(\"%.3s..., %u, \"%.*s\", %u) == %u",