Browse Source

str_talloc: strreg

Useful wrapper for extended POSIX regular expressions.
Rusty Russell 15 years ago
parent
commit
fa64b45993

+ 1 - 0
ccan/str_talloc/_info

@@ -44,6 +44,7 @@ int main(int argc, char *argv[])
 		return 1;
 
 	if (strcmp(argv[1], "depends") == 0) {
+		printf("ccan/str\n");
 		printf("ccan/talloc\n");
 		printf("ccan/noerr\n");
 		return 0;

+ 48 - 0
ccan/str_talloc/str_talloc.c

@@ -4,7 +4,12 @@
 #include <limits.h>
 #include <stdlib.h>
 #include "str_talloc.h"
+#include <sys/types.h>
+#include <regex.h>
+#include <stdarg.h>
+#include <unistd.h>
 #include <ccan/talloc/talloc.h>
+#include <ccan/str/str.h>
 
 char **strsplit(const void *ctx, const char *string, const char *delims,
 		 unsigned int *nump)
@@ -41,3 +46,46 @@ char *strjoin(const void *ctx, char *strings[], const char *delim)
 	}
 	return ret;
 }
+
+bool strreg(const void *ctx, const char *string, const char *regex, ...)
+{
+	size_t nmatch = 1 + strcount(regex, "(");
+	regmatch_t *matches = talloc_array(ctx, regmatch_t, nmatch);
+	regex_t r;
+	bool ret;
+
+	if (!matches || regcomp(&r, regex, REG_EXTENDED) != 0)
+		return false;
+
+	if (regexec(&r, string, nmatch, matches, 0) == 0) {
+		unsigned int i;
+		va_list ap;
+
+		ret = true;
+		va_start(ap, regex);
+		for (i = 1; i < nmatch; i++) {
+			char **arg;
+			arg = va_arg(ap, char **);
+			if (arg) {
+				/* eg. ([a-z])? can give "no match". */
+				if (matches[i].rm_so == -1)
+					*arg = NULL;
+				else {
+					*arg = talloc_strndup(ctx,
+						      string + matches[i].rm_so,
+						      matches[i].rm_eo
+						      - matches[i].rm_so);
+					if (!*arg) {
+						ret = false;
+						break;
+					}
+				}
+			}
+		}
+		va_end(ap);
+	} else
+		ret = false;
+	talloc_free(matches);
+	regfree(&r);
+	return ret;
+}

+ 43 - 0
ccan/str_talloc/str_talloc.h

@@ -63,4 +63,47 @@ char **strsplit(const void *ctx, const char *string, const char *delims,
  *	}
  */
 char *strjoin(const void *ctx, char *strings[], const char *delim);
+
+/**
+ * strreg - match and extract from a string via (extended) regular expressions.
+ * @ctx: the context to tallocate from (often NULL)
+ * @string: the string to try to match.
+ * @regex: the regular expression to match.
+ * ...: pointers to strings to allocate for subexpressions.
+ *
+ * Returns true if we matched, in which case any parenthesized
+ * expressions in @regex are allocated and placed in the char **
+ * arguments following @regex.  NULL arguments mean the match is not
+ * saved.  The order of the strings is the order
+ * of opening braces in the expression: in the case of repeated
+ * expressions (eg "([a-z])*") the last one is saved, in the case of
+ * non-existent matches (eg "([a-z]*)?") the pointer is set to NULL.
+ *
+ * Allocation failures or malformed regular expressions return false.
+ *
+ * See Also:
+ *	regcomp(3), regex(3).
+ *
+ * Example:
+ *	// Given 'My name is Rusty' outputs 'Hello Rusty!'
+ *	// Given 'my first name is Rusty Russell' outputs 'Hello Rusty Russell!'
+ *	// Given 'My name isnt Rusty Russell' outputs 'Hello there!'
+ *	int main(int argc, char *argv[])
+ *	{
+ *		char *person, *input;
+ *
+ *		// Join args and trim trailing space.
+ *		input = strjoin(NULL, argv+1, " ");
+ *		if (strlen(input) != 0)
+ *			input[strlen(input)-1] = '\0';
+ *
+ *		if (strreg(NULL, input, "[Mm]y (first )?name is ([A-Za-z ]+)",
+ *			   NULL, &person))
+ *			printf("Hello %s!\n", person);
+ *		else
+ *			printf("Hello there!\n");
+ *		return 0;
+ *	}
+ */
+bool strreg(const void *ctx, const char *string, const char *regex, ...);
 #endif /* CCAN_STR_TALLOC_H */

+ 75 - 0
ccan/str_talloc/test/run-strreg.c

@@ -0,0 +1,75 @@
+#include <ccan/str_talloc/str_talloc.h>
+#include <ccan/str_talloc/str_talloc.c>
+#include <ccan/tap/tap.h>
+
+int main(int argc, char *argv[])
+{
+	void *ctx = talloc_init("toplevel");
+	unsigned int top_blocks = talloc_total_blocks(ctx);
+	char *a, *b;
+	/* If it accesses this, it will crash. */
+	char **invalid = (char **)1L;
+
+	plan_tests(25);
+	/* Simple matching. */
+	ok1(strreg(ctx, "hello world!", "hello") == true);
+	ok1(strreg(ctx, "hello world!", "hi") == false);
+
+	/* No parentheses means we don't use any extra args. */
+	ok1(strreg(ctx, "hello world!", "hello", invalid) == true);
+	ok1(strreg(ctx, "hello world!", "hi", invalid) == false);
+
+	ok1(strreg(ctx, "hello world!", "[a-z]+", invalid) == true);
+	ok1(strreg(ctx, "hello world!", "([a-z]+)", &a, invalid) == true);
+	/* Found string */
+	ok1(streq(a, "hello"));
+	/* Allocated off ctx */
+	ok1(talloc_find_parent_byname(a, "toplevel") == ctx);
+	talloc_free(a);
+
+	ok1(strreg(ctx, "hello world!", "([a-z]*) ([a-z]+)",
+		   &a, &b, invalid) == true);
+	ok1(streq(a, "hello"));
+	ok1(streq(b, "world"));
+	ok1(talloc_find_parent_byname(a, "toplevel") == ctx);
+	ok1(talloc_find_parent_byname(b, "toplevel") == ctx);
+	talloc_free(a);
+	talloc_free(b);
+
+	/* * after parentheses returns last match. */
+	ok1(strreg(ctx, "hello world!", "([a-z])* ([a-z]+)",
+		   &a, &b, invalid) == true);
+	ok1(streq(a, "o"));
+	ok1(streq(b, "world"));
+	talloc_free(a);
+	talloc_free(b);
+
+	/* Nested parentheses are ordered by open brace. */
+	ok1(strreg(ctx, "hello world!", "(([a-z]*) world)",
+		   &a, &b, invalid) == true);
+	ok1(streq(a, "hello world"));
+	ok1(streq(b, "hello"));
+	talloc_free(a);
+	talloc_free(b);
+
+	/* Nested parentheses are ordered by open brace. */
+	ok1(strreg(ctx, "hello world!", "(([a-z]*) world)",
+		   &a, &b, invalid) == true);
+	ok1(streq(a, "hello world"));
+	ok1(streq(b, "hello"));
+	talloc_free(a);
+	talloc_free(b);
+
+	/* NULL means we're not interested. */
+	ok1(strreg(ctx, "hello world!", "((hello|goodbye) world)",
+		   &a, NULL, invalid) == true);
+	ok1(streq(a, "hello world"));
+	talloc_free(a);
+
+	/* No leaks! */
+	ok1(talloc_total_blocks(ctx) == top_blocks);
+	talloc_free(ctx);
+	talloc_disable_null_tracking();
+
+	return exit_status();
+}				

+ 1 - 1
tools/Makefile

@@ -1,6 +1,6 @@
 ALL_TOOLS = tools/configurator/configurator tools/ccan_depends tools/doc_extract tools/namespacize tools/ccanlint/ccanlint
 
-DEP_OBJS = tools/depends.o tools/compile.o tools/tools.o ccan/str_talloc/str_talloc.o ccan/grab_file/grab_file.o ccan/talloc/talloc.o ccan/noerr/noerr.o ccan/read_write_all/read_write_all.o
+DEP_OBJS = tools/depends.o tools/compile.o tools/tools.o ccan/str_talloc/str_talloc.o ccan/str/str.o ccan/grab_file/grab_file.o ccan/talloc/talloc.o ccan/noerr/noerr.o ccan/read_write_all/read_write_all.o
 
 .PHONY: tools
 tools: $(ALL_TOOLS)