Browse Source

Somewhat decent cpp analysis for ccanlint.

Rusty Russell 17 years ago
parent
commit
08f8cff8f2

+ 1 - 0
.bzrignore

@@ -12,3 +12,4 @@ inter-depends
 test-depends
 lib-depends
 tools/_infotojson/infotojson
+tools/ccanlint/test/run-file_analysis

+ 52 - 2
tools/ccanlint/ccanlint.h

@@ -49,13 +49,49 @@ struct ccanlint {
 /* Ask the user a yes/no question: the answer is NO if there's an error. */
 bool ask(const char *question);
 
+enum line_info_type {
+	PREPROC_LINE, /* Line starts with # */
+	CODE_LINE, /* Code (ie. not pure comment). */
+	DOC_LINE, /* Line with kernel-doc-style comment. */
+	COMMENT_LINE, /* (pure) comment line */
+};
+
+/* So far, only do simple #ifdef/#ifndef/#if defined/#if !defined tests,
+ * and #if <SYMBOL>/#if !<SYMBOL> */
+struct pp_conditions {
+	/* We're inside another ifdef? */
+	struct pp_conditions *parent;
+
+	enum {
+		PP_COND_IF,
+		PP_COND_IFDEF,
+		PP_COND_UNKNOWN,
+	} type;
+
+	bool inverse;
+	const char *symbol;
+};
+
+/* Preprocessor information about each line. */
+struct line_info {
+	enum line_info_type type;
+
+	/* Is this actually a continuation of line above? (which ends in \) */
+	bool continued;
+
+	/* Conditions for this line to be compiled. */
+	struct pp_conditions *cond;
+};
+
 struct ccan_file {
 	struct list_node list;
 
 	char *name;
 
+	/* Use get_ccan_file_lines / get_ccan_line_info to fill these. */
 	unsigned int num_lines;
 	char **lines;
+	struct line_info *line_info;
 
 	struct list_head *doc_sections;
 };
@@ -63,9 +99,25 @@ struct ccan_file {
 /* Use this rather than accessing f->lines directly: loads on demand. */
 char **get_ccan_file_lines(struct ccan_file *f);
 
+/* Use this rather than accessing f->lines directly: loads on demand. */
+struct line_info *get_ccan_line_info(struct ccan_file *f);
+
+enum line_compiled {
+	NOT_COMPILED,
+	COMPILED,
+	MAYBE_COMPILED,
+};
+
+/* Simple evaluator: if this pre-processor symbol is defined to this
+ * value, is this line compiled? (Other symbols assumed undefined) */
+enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
+				    const char *symbol,
+				    unsigned int value);
+
 /* Similarly for ->doc_sections */
 struct list_head *get_ccan_file_docs(struct ccan_file *f);
 
+
 /* Call the reporting on every line in the file.  sofar contains
  * previous results. */
 char *report_on_lines(struct list_head *files,
@@ -78,6 +130,4 @@ extern struct ccanlint has_main_header;
 
 /* Normal tests. */
 extern struct ccanlint trailing_whitespace;
-
-
 #endif /* CCAN_LINT_H */

+ 300 - 0
tools/ccanlint/file_analysis.c

@@ -4,6 +4,7 @@
 #include <ccan/str_talloc/str_talloc.h>
 #include <ccan/grab_file/grab_file.h>
 #include <ccan/noerr/noerr.h>
+#include "../tools.h"
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -11,6 +12,7 @@
 #include <err.h>
 #include <errno.h>
 #include <dirent.h>
+#include <ctype.h>
 
 char **get_ccan_file_lines(struct ccan_file *f)
 {
@@ -165,3 +167,301 @@ struct manifest *get_manifest(void)
 	add_files(m, "");
 	return m;
 }
+
+
+/**
+ * remove_comments - strip comments from a line, return copy.
+ * @line: line to copy
+ * @in_comment: are we already within a comment (from prev line).
+ * @unterminated: are we still in a comment for next line.
+ */
+static char *remove_comments(const char *line, bool in_comment,
+			     bool *unterminated)
+{
+	char *p, *ret = talloc_array(line, char, strlen(line) + 1);
+
+	p = ret;
+	for (;;) {
+		if (!in_comment) {
+			/* Find first comment. */
+			const char *old_comment = strstr(line, "/*");
+			const char *new_comment = strstr(line, "//");
+			const char *comment;
+
+			if (new_comment && old_comment)
+				comment = new_comment < old_comment
+					? new_comment : old_comment;
+			else if (old_comment)
+				comment = old_comment;
+			else if (new_comment)
+				comment = new_comment;
+			else {
+				/* Nothing more. */
+				strcpy(p, line);
+				*unterminated = false;
+				break;
+			}
+
+			/* Copy up to comment. */
+			memcpy(p, line, comment - line);
+			p += comment - line;
+			line += comment - line + 2;
+
+			if (comment == new_comment) {
+				/* We're done: goes to EOL. */
+				p[0] = '\0';
+				*unterminated = false;
+				break;
+			}
+			in_comment = true;
+		}
+
+		if (in_comment) {
+			const char *end = strstr(line, "*/");
+			if (!end) {
+				*unterminated = true;
+				p[0] = '\0';
+				break;
+			}
+			line = end+2;
+			in_comment = false;
+		}
+	}
+	return ret;
+}
+
+static bool is_empty(const char *line)
+{
+	return strspn(line, " \t") == strlen(line);
+}
+
+static bool continues(const char *line)
+{
+	/* Technically, any odd number of these.  But who cares? */
+	return strends(line, "\\");
+}
+
+/* Get token if it's equal to token. */
+static bool get_token(const char **line, const char *token)
+{
+	unsigned int toklen;
+
+	*line += strspn(*line, " \t");
+	if (isalnum(token[0]) || token[0] == '_')
+		toklen = strspn(*line, IDENT_CHARS);
+	else {
+		/* FIXME: real tokenizer handles ++ and other multi-chars.  */
+		toklen = strlen(token);
+	}
+
+	if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
+		*line += toklen;
+		return true;
+	}
+	return false;
+}
+
+static char *get_symbol_token(void *ctx, const char **line)
+{
+	unsigned int toklen;
+	char *ret;
+
+	*line += strspn(*line, " \t");
+	toklen = strspn(*line, IDENT_CHARS);
+	if (!toklen)
+		return NULL;
+	ret = talloc_strndup(ctx, *line, toklen);
+	*line += toklen;
+	return ret;
+}
+
+static bool parse_hash_if(struct pp_conditions *cond, const char **line)
+{
+	bool brackets, defined;
+
+	cond->inverse = get_token(line, "!");
+	defined = get_token(line, "defined");
+	brackets = get_token(line, "(");
+	cond->symbol = get_symbol_token(cond, line);
+	if (!cond->symbol)
+		return false;
+	if (brackets && !get_token(line, ")"))
+		return false;
+	if (!defined)
+		cond->type = PP_COND_IF;
+	return true;
+}
+
+/* FIXME: Get serious! */
+static struct pp_conditions *analyze_directive(struct ccan_file *f,
+					       const char *line,
+					       struct pp_conditions *parent)
+{
+	struct pp_conditions *cond = talloc(f, struct pp_conditions);
+	bool unused;
+
+	line = remove_comments(line, false, &unused);
+
+	cond->parent = parent;
+	cond->type = PP_COND_IFDEF;
+
+	if (!get_token(&line, "#"))
+		abort();
+
+	if (get_token(&line, "if")) {
+		if (!parse_hash_if(cond, &line))
+			goto unknown;
+	} else if (get_token(&line, "elif")) {
+		/* Malformed? */
+		if (!parent)
+			return NULL;
+		cond->parent = parent->parent;
+		/* FIXME: Not quite true.  This implies !parent, but we don't
+		 * do multiple conditionals yet. */
+		if (!parse_hash_if(cond, &line))
+			goto unknown;
+	} else if (get_token(&line, "ifdef")) {
+		bool brackets;
+		cond->inverse = false;
+		brackets = get_token(&line, "(");
+		cond->symbol = get_symbol_token(cond, &line);
+		if (!cond->symbol)
+			goto unknown;
+		if (brackets && !get_token(&line, ")"))
+			goto unknown;
+	} else if (get_token(&line, "ifndef")) {
+		bool brackets;
+		cond->inverse = true;
+		brackets = get_token(&line, "(");
+		cond->symbol = get_symbol_token(cond, &line);
+		if (!cond->symbol)
+			goto unknown;
+		if (brackets && !get_token(&line, ")"))
+			goto unknown;
+	} else if (get_token(&line, "else")) {
+		/* Malformed? */
+		if (!parent)
+			return NULL;
+
+		*cond = *parent;
+		cond->inverse = !cond->inverse;
+		return cond;
+	} else if (get_token(&line, "endif")) {
+		talloc_free(cond);
+		/* Malformed? */
+		if (!parent)
+			return NULL;
+		/* Back up one! */
+		return parent->parent;
+	} else {
+		/* Not a conditional. */
+		talloc_free(cond);
+		return parent;
+	}
+
+	if (!is_empty(line))
+		goto unknown;
+	return cond;
+
+unknown:
+	cond->type = PP_COND_UNKNOWN;
+	return cond;
+}
+
+/* This parser is rough, but OK if code is reasonably neat. */
+struct line_info *get_ccan_line_info(struct ccan_file *f)
+{
+	bool continued = false, in_comment = false;
+	struct pp_conditions *cond = NULL;
+	unsigned int i;
+
+	if (f->line_info)
+		return f->line_info;
+
+	get_ccan_file_lines(f);
+	f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
+
+	for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
+		char *p;
+		bool still_doc_line;
+
+		/* Current conditions apply to this line. */
+		f->line_info[i].cond = cond;
+		f->line_info[i].continued = continued;
+
+		if (continued) {
+			/* Same as last line. */
+			f->line_info[i].type = f->line_info[i-1].type;
+			/* Update in_comment. */
+			remove_comments(f->lines[i], in_comment, &in_comment);
+			continue;
+		}
+
+		/* Preprocessor directive? */
+		if (!in_comment
+		    && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
+			f->line_info[i].type = PREPROC_LINE;
+			cond = analyze_directive(f, f->lines[i], cond);
+			continue;
+		}
+
+		still_doc_line = (in_comment
+				  && f->line_info[i-1].type == DOC_LINE);
+
+		p = remove_comments(f->lines[i], in_comment, &in_comment);
+		if (is_empty(p)) {
+			if (strstarts(f->lines[i], "/**") || still_doc_line)
+				f->line_info[i].type = DOC_LINE;
+			else
+				f->line_info[i].type = COMMENT_LINE;
+		} else
+			f->line_info[i].type = CODE_LINE;
+		talloc_free(p);
+	}
+	return f->line_info;
+}
+
+enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
+				    const char *symbol,
+				    unsigned int value)
+{
+	enum line_compiled ret;
+
+	/* No conditions?  Easy. */
+	if (!cond)
+		return COMPILED;
+
+	/* Check we get here at all. */
+	ret = get_ccan_line_pp(cond->parent, symbol, value);
+	if (ret != COMPILED)
+		return ret;
+
+	switch (cond->type) {
+	case PP_COND_IF:
+		if (streq(cond->symbol, symbol)) {
+			if (!value == cond->inverse)
+				return COMPILED;
+			else
+				return NOT_COMPILED;
+		}
+		/* Unknown symbol, will be 0. */
+		if (cond->inverse)
+			return COMPILED;
+		return NOT_COMPILED;
+
+	case PP_COND_IFDEF:
+		if (streq(cond->symbol, symbol)) {
+			if (cond->inverse)
+				return NOT_COMPILED;
+			else
+				return COMPILED;
+		}
+		/* Unknown symbol, assume undefined. */
+		if (cond->inverse)
+			return COMPILED;
+		return NOT_COMPILED;
+		
+	default: /* Unknown. */
+		return MAYBE_COMPILED;
+	}
+}

+ 3 - 0
tools/ccanlint/test/Makefile

@@ -0,0 +1,3 @@
+CFLAGS=-g -Wall -I../../../ 
+
+run-file_analysis: run-file_analysis.o ../../doc_extract-core.o ../../../libccan.a

+ 203 - 0
tools/ccanlint/test/run-file_analysis.c

@@ -0,0 +1,203 @@
+#include "tools/ccanlint/ccanlint.h"
+#include "ccan/tap/tap.h"
+#include "tools/ccanlint/file_analysis.c"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <assert.h>
+
+/* This is our test file. */
+struct test {
+	enum line_info_type type;
+	bool continued;
+	const char *line;
+};
+
+static struct test testfile[] = {
+	{ PREPROC_LINE,	false, "#ifndef TEST_H" },
+	{ PREPROC_LINE,	false, "#define TEST_H" },
+	{ DOC_LINE,	false, "/**" },
+	{ DOC_LINE,	false, " * Comment here." },
+	{ DOC_LINE,	false, " * Comment here too." },
+	{ DOC_LINE,	false, " */" },
+	{ COMMENT_LINE,	false, "// Normal one-line comment" },
+	{ COMMENT_LINE,	false, "  // Spaced one-line comment" },
+	{ COMMENT_LINE,	false, "/* Normal one-line comment */" },
+	{ COMMENT_LINE,	false, "  /* Spaced one-line comment */" },
+	{ COMMENT_LINE,	false, "  /* Spaced two-line comment" },
+	{ COMMENT_LINE,	false, "  continued comment */" },
+	{ CODE_LINE,	false, "extern int x;"},
+	{ CODE_LINE,	false, "extern int y; // With new-style comment"},
+	{ CODE_LINE,	false, "extern int z; /* With old-style comment */"},
+	{ CODE_LINE,	false, "extern int v; /* With two-line comment"},
+	{ COMMENT_LINE,	false, "		 Second line of comment"},
+	{ COMMENT_LINE, false, "/* comment1  */ // comment 2"},
+	{ COMMENT_LINE, false, "/* comment1  */ /* comment 2 */ "},
+	{ CODE_LINE,	false, "/* comment1  */ code; /* comment 2 */ "},
+	{ CODE_LINE,	false, "/* comment1  */ code; // comment 2"},
+	{ COMMENT_LINE,	false, "/* comment start  \\"},
+	{ COMMENT_LINE,	true,  "   comment finish */"},
+	{ PREPROC_LINE,	false, "#define foo \\"},
+	{ PREPROC_LINE,	true,  "	(bar + \\"},
+	{ PREPROC_LINE,	true,  "	 baz)"},
+	{ CODE_LINE,	false, "extern int \\"},
+	{ CODE_LINE,	true,  "#x;"},
+
+	/* Variants of the same thing. */
+	{ PREPROC_LINE,	false, "#ifdef BAR"},
+	{ CODE_LINE,	false, "BAR"},
+	{ PREPROC_LINE,	false, "#else"},
+	{ CODE_LINE,	false, "!BAR"},
+	{ PREPROC_LINE,	false, "#endif"},
+
+	{ PREPROC_LINE,	false, "#if defined BAR"},
+	{ CODE_LINE,	false, "BAR"},
+	{ PREPROC_LINE,	false, "#else"},
+	{ CODE_LINE,	false, "!BAR"},
+	{ PREPROC_LINE,	false, "#endif"},
+
+	{ PREPROC_LINE,	false, "#if defined(BAR)"},
+	{ CODE_LINE,	false, "BAR"},
+	{ PREPROC_LINE,	false, "#else"},
+	{ CODE_LINE,	false, "!BAR"},
+	{ PREPROC_LINE,	false, "#endif"},
+
+	{ PREPROC_LINE,	false, "#if !defined(BAR)"},
+	{ CODE_LINE,	false, "!BAR"},
+	{ PREPROC_LINE,	false, "#else"},
+	{ CODE_LINE,	false, "BAR"},
+	{ PREPROC_LINE,	false, "#endif"},
+
+	{ PREPROC_LINE,	false, "#if HAVE_FOO"},
+	{ CODE_LINE,	false, "HAVE_FOO"},
+	{ PREPROC_LINE,	false, "#elif HAVE_BAR"},
+	{ CODE_LINE,	false, "HAVE_BAR"},
+	{ PREPROC_LINE,	false, "#else"},
+	{ CODE_LINE,	false, "neither"},
+	{ PREPROC_LINE,	false, "#endif /* With a comment. */"},
+
+	{ PREPROC_LINE,	false, "#endif /* TEST_H */" },
+};
+
+#define NUM_LINES (sizeof(testfile)/sizeof(testfile[0]))
+
+static const char *line_type_name(enum line_info_type type)
+{
+	switch (type) {
+	case PREPROC_LINE: return "PREPROC_LINE";
+	case CODE_LINE: return "CODE_LINE";
+	case DOC_LINE: return "DOC_LINE";
+	case COMMENT_LINE: return "COMMENT_LINE";
+	default: return "**INVALID**";
+	}
+}
+
+/* This just tests parser for the moment. */
+int main(int argc, char *argv[])
+{
+	unsigned int i;
+	struct line_info *line_info;
+	struct ccan_file *f = talloc(NULL, struct ccan_file);
+
+	plan_tests(NUM_LINES * 2 + 2 + 66);
+
+	f->num_lines = NUM_LINES;
+	f->line_info = NULL;
+	f->lines = talloc_array(f, char *, f->num_lines);
+	for (i = 0; i < f->num_lines; i++)
+		f->lines[i] = talloc_strdup(f->lines, testfile[i].line);
+	
+	line_info = get_ccan_line_info(f);
+	ok1(line_info == f->line_info);
+	for (i = 0; i < f->num_lines; i++) {
+		ok(f->line_info[i].type == testfile[i].type,
+		   "Line %u:'%s' type %s should be %s",
+		   i, testfile[i].line,
+		   line_type_name(f->line_info[i].type),
+		   line_type_name(testfile[i].type));
+		ok(f->line_info[i].continued == testfile[i].continued,
+		   "Line %u:'%s' continued should be %s",
+		   i, testfile[i].line,
+		   testfile[i].continued ? "TRUE" : "FALSE");
+	}
+
+	/* Should cache. */
+	ok1(get_ccan_line_info(f) == line_info);
+
+	/* Expect line 1 condition to be NULL. */
+	ok1(line_info[0].cond == NULL);
+	/* Line 2, should depend on TEST_H being undefined. */
+	ok1(line_info[1].cond != NULL);
+	ok1(line_info[1].cond->type == PP_COND_IFDEF);
+	ok1(line_info[1].cond->inverse);
+	ok1(line_info[1].cond->parent == NULL);
+	ok1(streq(line_info[1].cond->symbol, "TEST_H"));
+
+	/* Every line BAR should depend on BAR being defined. */
+	for (i = 0; i < f->num_lines; i++) {
+		if (!streq(testfile[i].line, "BAR"))
+			continue;
+		ok1(line_info[i].cond->type == PP_COND_IFDEF);
+		ok1(!line_info[i].cond->inverse);
+		ok1(streq(line_info[i].cond->symbol, "BAR"));
+		ok1(line_info[i].cond->parent == line_info[1].cond);
+	}
+
+	/* Every line !BAR should depend on BAR being undefined. */
+	for (i = 0; i < f->num_lines; i++) {
+		if (!streq(testfile[i].line, "!BAR"))
+			continue;
+		ok1(line_info[i].cond->type == PP_COND_IFDEF);
+		ok1(line_info[i].cond->inverse);
+		ok1(streq(line_info[i].cond->symbol, "BAR"));
+		ok1(line_info[i].cond->parent == line_info[1].cond);
+	}
+	
+	/* Every line HAVE_BAR should depend on HAVE_BAR being set. */
+	for (i = 0; i < f->num_lines; i++) {
+		if (!streq(testfile[i].line, "HAVE_BAR"))
+			continue;
+		ok1(line_info[i].cond->type == PP_COND_IF);
+		ok1(!line_info[i].cond->inverse);
+		ok1(streq(line_info[i].cond->symbol, "HAVE_BAR"));
+		ok1(line_info[i].cond->parent == line_info[1].cond);
+	}
+	
+	/* Every line HAVE_FOO should depend on HAVE_FOO being set. */
+	for (i = 0; i < f->num_lines; i++) {
+		if (!streq(testfile[i].line, "HAVE_FOO"))
+			continue;
+		ok1(line_info[i].cond->type == PP_COND_IF);
+		ok1(!line_info[i].cond->inverse);
+		ok1(streq(line_info[i].cond->symbol, "HAVE_FOO"));
+		ok1(line_info[i].cond->parent == line_info[1].cond);
+	}
+
+	/* Now check using interface. */
+	for (i = 0; i < f->num_lines; i++) {
+		if (streq(testfile[i].line, "BAR")) {
+			ok1(get_ccan_line_pp(line_info[i].cond, "BAR", 1)
+			    == COMPILED);
+			ok1(get_ccan_line_pp(line_info[i].cond, "FOO", 1)
+			    == NOT_COMPILED);
+		} else if (streq(testfile[i].line, "!BAR")) {
+			ok1(get_ccan_line_pp(line_info[i].cond, "BAR", 1)
+			    == NOT_COMPILED);
+			ok1(get_ccan_line_pp(line_info[i].cond, "FOO", 1)
+			    == COMPILED);
+		} else if (streq(testfile[i].line, "HAVE_BAR")) {
+			ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_BAR", 1)
+			    == COMPILED);
+			ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_BAR", 0)
+			    == NOT_COMPILED);
+		} else if (streq(testfile[i].line, "HAVE_FOO")) {
+			ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_FOO", 1)
+			    == COMPILED);
+			ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_FOO", 0)
+			    == NOT_COMPILED);
+		}
+	}
+
+	return exit_status();
+}