Browse Source

tal/str: new module.

Tal-enhanced string routines, copied from str_talloc (which I also wrote).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Rusty Russell 13 years ago
parent
commit
d873aaec13
8 changed files with 442 additions and 0 deletions
  1. 3 0
      Makefile
  2. 1 0
      Makefile-ccan
  3. 1 0
      ccan/tal/str/LICENSE
  4. 52 0
      ccan/tal/str/_info
  5. 106 0
      ccan/tal/str/str.c
  6. 119 0
      ccan/tal/str/str.h
  7. 95 0
      ccan/tal/str/test/run-strreg.c
  8. 65 0
      ccan/tal/str/test/run.c

+ 3 - 0
Makefile

@@ -59,6 +59,9 @@ summary-fastcheck-%: tools/ccanlint/ccanlint $(OBJFILES)
 summary-fastcheck-antithread/%: tools/ccanlint/ccanlint $(OBJFILES)
 	tools/ccanlint/ccanlint -x tests_pass_valgrind -x tests_compile_coverage -s ccan/antithread/$*
 
+summary-fastcheck-tal/%: tools/ccanlint/ccanlint $(OBJFILES)
+	tools/ccanlint/ccanlint -x tests_pass_valgrind -x tests_compile_coverage -s ccan/tal/$*
+
 ccan/%/info: ccan/%/_info
 	$(CC) $(CCAN_CFLAGS) -o $@ -x c $<
 

+ 1 - 0
Makefile-ccan

@@ -72,6 +72,7 @@ MODS_NORMAL_WITH_SRC := antithread \
 	str_talloc \
 	take \
 	tal \
+	tal/str \
 	talloc \
 	talloc_link \
 	tally \

+ 1 - 0
ccan/tal/str/LICENSE

@@ -0,0 +1 @@
+../../../licenses/BSD-MIT

+ 52 - 0
ccan/tal/str/_info

@@ -0,0 +1,52 @@
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+
+/**
+ * tal/str - string helper routines which use tal
+ *
+ * This is a grab bag of functions for string operations, designed to enhance
+ * the standard string.h; these are separated from the non-tal-needing
+ * string utilities in "str.h".
+ *
+ * Example:
+ *	#include <ccan/tal/str/str.h>
+ *	#include <ccan/grab_file/grab_file.h>
+ *	#include <err.h>
+ *
+ *	// Dumb demo program to double-linespace a file.
+ *	int main(int argc, char *argv[])
+ *	{
+ *		char *textfile;
+ *		char **lines;
+ *
+ *		// Grab lines in file.
+ *		textfile = grab_file(NULL, argv[1], NULL);
+ *		if (!textfile)
+ *			err(1, "Failed reading %s", argv[1]);
+ *		lines = strsplit(textfile, textfile, "\n", STR_EMPTY_OK);
+ *
+ *		// Join them back together with two linefeeds.
+ *		printf("%s", strjoin(textfile, lines, "\n\n", STR_TRAIL));
+ *
+ *		// Free everything, just because we can.
+ *		tal_free(textfile);
+ *		return 0;
+ *	}
+ *
+ * License: BSD-MIT
+ * Author: Rusty Russell <rusty@rustcorp.com.au>
+ */
+int main(int argc, char *argv[])
+{
+	if (argc != 2)
+		return 1;
+
+	if (strcmp(argv[1], "depends") == 0) {
+		printf("ccan/str\n");
+		printf("ccan/tal\n");
+		return 0;
+	}
+
+	return 1;
+}

+ 106 - 0
ccan/tal/str/str.c

@@ -0,0 +1,106 @@
+/* Licensed under BSD-MIT - see LICENSE file for details */
+#include <unistd.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "str.h"
+#include <sys/types.h>
+#include <regex.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <ccan/tal/tal.h>
+#include <ccan/str/str.h>
+
+char **strsplit(const void *ctx, const char *string, const char *delims,
+		enum strsplit flags)
+{
+	char **lines = NULL;
+	size_t max = 64, num = 0;
+
+	lines = tal_arr(ctx, char *, max+1);
+
+	if (flags == STR_NO_EMPTY)
+		string += strspn(string, delims);
+
+	while (*string != '\0') {
+		size_t len = strcspn(string, delims), dlen;
+
+		lines[num] = tal_arr(lines, char, len + 1);
+		memcpy(lines[num], string, len);
+		lines[num][len] = '\0';
+		string += len;
+		dlen = strspn(string, delims);
+		if (flags == STR_EMPTY_OK && dlen)
+			dlen = 1;
+		string += dlen;
+		if (++num == max)
+			tal_resize(&lines, max*=2 + 1);
+	}
+	lines[num] = NULL;
+	return lines;
+}
+
+char *strjoin(const void *ctx, char *strings[], const char *delim,
+	      enum strjoin flags)
+{
+	unsigned int i;
+	char *ret = tal_strdup(ctx, "");
+	size_t totlen = 0, dlen = strlen(delim);
+
+	for (i = 0; strings[i]; i++) {
+		size_t len = strlen(strings[i]);
+		if (flags == STR_NO_TRAIL && !strings[i+1])
+			dlen = 0;
+		tal_resize(&ret, totlen + len + dlen + 1);
+		memcpy(ret + totlen, strings[i], len);
+		totlen += len;
+		memcpy(ret + totlen, delim, dlen);
+		totlen += dlen;
+	}
+	ret[totlen] = '\0';
+	return ret;
+}
+
+bool strreg(const void *ctx, const char *string, const char *regex, ...)
+{
+	size_t nmatch = 1 + strcount(regex, "(");
+	regmatch_t *matches = tal_arr(ctx, regmatch_t, nmatch);
+	regex_t r;
+	bool ret;
+
+	if (!matches || regcomp(&r, regex, REG_EXTENDED) != 0)
+		return false;
+
+	if (regexec(&r, string, nmatch, matches, 0) == 0) {
+		unsigned int i;
+		va_list ap;
+
+		ret = true;
+		va_start(ap, regex);
+		for (i = 1; i < nmatch; i++) {
+			char **arg;
+			arg = va_arg(ap, char **);
+			if (arg) {
+				/* eg. ([a-z])? can give "no match". */
+				if (matches[i].rm_so == -1)
+					*arg = NULL;
+				else {
+					*arg = tal_strndup(ctx,
+						      string + matches[i].rm_so,
+						      matches[i].rm_eo
+						      - matches[i].rm_so);
+					if (!*arg) {
+						ret = false;
+						break;
+					}
+				}
+			}
+		}
+		va_end(ap);
+	} else
+		ret = false;
+	tal_free(matches);
+	regfree(&r);
+	return ret;
+}

+ 119 - 0
ccan/tal/str/str.h

@@ -0,0 +1,119 @@
+/* Licensed under BSD-MIT - see LICENSE file for details */
+#ifndef CCAN_STR_TAL_H
+#define CCAN_STR_TAL_H
+#include <ccan/tal/tal.h>
+#include <ccan/tal/tal.h>
+#include <string.h>
+#include <stdbool.h>
+
+enum strsplit {
+	STR_EMPTY_OK,
+	STR_NO_EMPTY
+};
+
+/**
+ * strsplit - Split string into an array of substrings
+ * @ctx: the parent to tal from (often NULL)
+ * @string: the string to split
+ * @delims: delimiters where lines should be split.
+ * @flags: whether to include empty substrings.
+ *
+ * This function splits a single string into multiple strings.  The
+ * original string is untouched: an array is allocated (using tal)
+ * pointing to copies of each substring.  Multiple delimiters result
+ * in empty substrings.  By definition, no delimiters will appear in
+ * the substrings.
+ *
+ * The final char * in the array will be NULL.
+ *
+ * Example:
+ *	#include <ccan/tal/str/str.h>
+ *	...
+ *	static unsigned int count_long_lines(const char *string)
+ *	{
+ *		char **lines;
+ *		unsigned int i, long_lines = 0;
+ *
+ *		// Can only fail on out-of-memory.
+ *		lines = strsplit(NULL, string, "\n", STR_NO_EMPTY);
+ *		for (i = 0; lines[i] != NULL; i++)
+ *			if (strlen(lines[i]) > 80)
+ *				long_lines++;
+ *		tal_free(lines);
+ *		return long_lines;
+ *	}
+ */
+char **strsplit(const void *ctx, const char *string, const char *delims,
+		enum strsplit flags);
+
+enum strjoin {
+	STR_TRAIL,
+	STR_NO_TRAIL
+};
+
+/**
+ * strjoin - Join an array of substrings into one long string
+ * @ctx: the context to tal from (often NULL)
+ * @strings: the NULL-terminated array of strings to join
+ * @delim: the delimiter to insert between the strings
+ * @flags: whether to add a delimieter to the end
+ *
+ * This function joins an array of strings into a single string.  The
+ * return value is allocated using tal.  Each string in @strings is
+ * followed by a copy of @delim.
+ *
+ * Example:
+ *	// Append the string "--EOL" to each line.
+ *	static char *append_to_all_lines(const char *string)
+ *	{
+ *		char **lines, *ret;
+ *
+ *		lines = strsplit(NULL, string, "\n", STR_EMPTY_OK);
+ *		ret = strjoin(NULL, lines, "-- EOL\n", STR_TRAIL);
+ *		tal_free(lines);
+ *		return ret;
+ *	}
+ */
+char *strjoin(const void *ctx, char *strings[], const char *delim,
+	      enum strjoin flags);
+
+/**
+ * strreg - match and extract from a string via (extended) regular expressions.
+ * @ctx: the context to tal from (often NULL)
+ * @string: the string to try to match.
+ * @regex: the regular expression to match.
+ * ...: pointers to strings to allocate for subexpressions.
+ *
+ * Returns true if we matched, in which case any parenthesized
+ * expressions in @regex are allocated and placed in the char **
+ * arguments following @regex.  NULL arguments mean the match is not
+ * saved.  The order of the strings is the order
+ * of opening braces in the expression: in the case of repeated
+ * expressions (eg "([a-z])*") the last one is saved, in the case of
+ * non-existent matches (eg "([a-z]*)?") the pointer is set to NULL.
+ *
+ * Allocation failures or malformed regular expressions return false.
+ *
+ * See Also:
+ *	regcomp(3), regex(3).
+ *
+ * Example:
+ *	// Given 'My name is Rusty' outputs 'Hello Rusty!'
+ *	// Given 'my first name is Rusty Russell' outputs 'Hello Rusty Russell!'
+ *	// Given 'My name isnt Rusty Russell' outputs 'Hello there!'
+ *	int main(int argc, char *argv[])
+ *	{
+ *		char *person, *input;
+ *
+ *		// Join args and trim trailing space.
+ *		input = strjoin(NULL, argv+1, " ", STR_NO_TRAIL);
+ *		if (strreg(NULL, input, "[Mm]y (first )?name is ([A-Za-z ]+)",
+ *			   NULL, &person))
+ *			printf("Hello %s!\n", person);
+ *		else
+ *			printf("Hello there!\n");
+ *		return 0;
+ *	}
+ */
+bool strreg(const void *ctx, const char *string, const char *regex, ...);
+#endif /* CCAN_STR_TAL_H */

+ 95 - 0
ccan/tal/str/test/run-strreg.c

@@ -0,0 +1,95 @@
+#include <ccan/tal/str/str.h>
+#include <ccan/tal/str/str.c>
+#include <ccan/tap/tap.h>
+
+static unsigned int tal_total_blocks(tal_t *ctx)
+{
+	unsigned int num = 1;
+	tal_t *i;
+
+	for (i = tal_first(ctx); i; i = tal_next(ctx, i))
+		num++;
+	return num;
+}
+
+static bool find_parent(tal_t *child, tal_t *parent)
+{
+	tal_t *i;
+
+	for (i = child; i; i = tal_parent(i))
+		if (i == parent)
+			return true;
+
+	return false;
+}
+
+int main(int argc, char *argv[])
+{
+	void *ctx = tal_strdup(NULL, "toplevel");
+	unsigned int top_blocks = tal_total_blocks(ctx);
+	char *a, *b;
+	/* If it accesses this, it will crash. */
+	char **invalid = (char **)1L;
+
+	plan_tests(25);
+	/* Simple matching. */
+	ok1(strreg(ctx, "hello world!", "hello") == true);
+	ok1(strreg(ctx, "hello world!", "hi") == false);
+
+	/* No parentheses means we don't use any extra args. */
+	ok1(strreg(ctx, "hello world!", "hello", invalid) == true);
+	ok1(strreg(ctx, "hello world!", "hi", invalid) == false);
+
+	ok1(strreg(ctx, "hello world!", "[a-z]+", invalid) == true);
+	ok1(strreg(ctx, "hello world!", "([a-z]+)", &a, invalid) == true);
+	/* Found string */
+	ok1(streq(a, "hello"));
+	/* Allocated off ctx */
+	ok1(find_parent(a, ctx));
+	tal_free(a);
+
+	ok1(strreg(ctx, "hello world!", "([a-z]*) ([a-z]+)",
+		   &a, &b, invalid) == true);
+	ok1(streq(a, "hello"));
+	ok1(streq(b, "world"));
+	ok1(find_parent(a, ctx));
+	ok1(find_parent(b, ctx));
+	tal_free(a);
+	tal_free(b);
+
+	/* * after parentheses returns last match. */
+	ok1(strreg(ctx, "hello world!", "([a-z])* ([a-z]+)",
+		   &a, &b, invalid) == true);
+	ok1(streq(a, "o"));
+	ok1(streq(b, "world"));
+	tal_free(a);
+	tal_free(b);
+
+	/* Nested parentheses are ordered by open brace. */
+	ok1(strreg(ctx, "hello world!", "(([a-z]*) world)",
+		   &a, &b, invalid) == true);
+	ok1(streq(a, "hello world"));
+	ok1(streq(b, "hello"));
+	tal_free(a);
+	tal_free(b);
+
+	/* Nested parentheses are ordered by open brace. */
+	ok1(strreg(ctx, "hello world!", "(([a-z]*) world)",
+		   &a, &b, invalid) == true);
+	ok1(streq(a, "hello world"));
+	ok1(streq(b, "hello"));
+	tal_free(a);
+	tal_free(b);
+
+	/* NULL means we're not interested. */
+	ok1(strreg(ctx, "hello world!", "((hello|goodbye) world)",
+		   &a, NULL, invalid) == true);
+	ok1(streq(a, "hello world"));
+	tal_free(a);
+
+	/* No leaks! */
+	ok1(tal_total_blocks(ctx) == top_blocks);
+	tal_free(ctx);
+
+	return exit_status();
+}

+ 65 - 0
ccan/tal/str/test/run.c

@@ -0,0 +1,65 @@
+#include <ccan/tal/str/str.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ccan/tal/str/str.c>
+#include <ccan/tap/tap.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+
+static const char *substrings[]
+= { "far", "bar", "baz", "b", "ba", "z", "ar", NULL };
+
+int main(int argc, char *argv[])
+{
+	char **split, *str;
+	void *ctx;
+
+	plan_tests(24);
+	split = strsplit(NULL, "hello  world", " ", STR_EMPTY_OK);
+	ok1(!strcmp(split[0], "hello"));
+	ok1(!strcmp(split[1], ""));
+	ok1(!strcmp(split[2], "world"));
+	ok1(split[3] == NULL);
+	tal_free(split);
+
+	split = strsplit(NULL, "hello  world", " ", STR_NO_EMPTY);
+	ok1(!strcmp(split[0], "hello"));
+	ok1(!strcmp(split[1], "world"));
+	ok1(split[2] == NULL);
+	tal_free(split);
+
+	split = strsplit(NULL, "  hello  world", " ", STR_NO_EMPTY);
+	ok1(!strcmp(split[0], "hello"));
+	ok1(!strcmp(split[1], "world"));
+	ok1(split[2] == NULL);
+	tal_free(split);
+
+	split = strsplit(NULL, "hello  world", "o ", STR_EMPTY_OK);
+	ok1(!strcmp(split[0], "hell"));
+	ok1(!strcmp(split[1], ""));
+	ok1(!strcmp(split[2], ""));
+	ok1(!strcmp(split[3], "w"));
+	ok1(!strcmp(split[4], "rld"));
+	ok1(split[5] == NULL);
+
+	ctx = split;
+	split = strsplit(ctx, "hello  world", "o ", STR_EMPTY_OK);
+	ok1(tal_parent(split) == ctx);
+	tal_free(ctx);
+
+	str = strjoin(NULL, (char **)substrings, ", ", STR_TRAIL);
+	ok1(!strcmp(str, "far, bar, baz, b, ba, z, ar, "));
+	ctx = str;
+	str = strjoin(ctx, (char **)substrings, "", STR_TRAIL);
+	ok1(!strcmp(str, "farbarbazbbazar"));
+	ok1(tal_parent(str) == ctx);
+	str = strjoin(ctx, (char **)substrings, ", ", STR_NO_TRAIL);
+	ok1(tal_parent(str) == ctx);
+	ok1(!strcmp(str, "far, bar, baz, b, ba, z, ar"));
+	str = strjoin(ctx, (char **)substrings, "", STR_NO_TRAIL);
+	ok1(!strcmp(str, "farbarbazbbazar"));
+	ok1(tal_parent(str) == ctx);
+	tal_free(ctx);
+
+	return exit_status();
+}