doc_extract-core.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. /* This merely extracts, doesn't do XML or anything. */
  2. #include <ccan/talloc/talloc.h>
  3. #include <ccan/str/str.h>
  4. #include <ccan/str_talloc/str_talloc.h>
  5. #include <err.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <unistd.h>
  9. #include <string.h>
  10. #include <sys/types.h>
  11. #include <sys/stat.h>
  12. #include <fcntl.h>
  13. #include <stdbool.h>
  14. #include <ctype.h>
  15. #include "doc_extract.h"
  16. #include "tools.h"
  17. static char **grab_doc(char **lines, unsigned int **linemap,
  18. const char *file)
  19. {
  20. char **ret;
  21. unsigned int i, num;
  22. bool printing = false;
  23. ret = talloc_array(NULL, char *, talloc_array_length(lines));
  24. *linemap = talloc_array(ret, unsigned int, talloc_array_length(lines));
  25. num = 0;
  26. for (i = 0; lines[i]; i++) {
  27. if (streq(lines[i], "/**")) {
  28. printing = true;
  29. if (num != 0) {
  30. ret[num-1] = talloc_append_string(ret[num-1],
  31. "\n");
  32. }
  33. } else if (streq(lines[i], " */"))
  34. printing = false;
  35. else if (printing) {
  36. if (strstarts(lines[i], " * "))
  37. ret[num++] = talloc_strdup(ret, lines[i]+3);
  38. else if (strstarts(lines[i], " *"))
  39. ret[num++] = talloc_strdup(ret, lines[i]+2);
  40. else {
  41. /* Weird, malformed? */
  42. static bool warned;
  43. if (!warned) {
  44. warnx("%s:%u:"
  45. " Expected ' *' in comment.",
  46. file, i+1);
  47. warned++;
  48. }
  49. ret[num++] = talloc_strdup(ret, lines[i]);
  50. if (strstr(lines[i], "*/"))
  51. printing = false;
  52. }
  53. (*linemap)[num-1] = i;
  54. }
  55. }
  56. ret[num] = NULL;
  57. return ret;
  58. }
  59. static bool is_blank(const char *line)
  60. {
  61. return line && line[strspn(line, " \t\n")] == '\0';
  62. }
  63. static char *is_section(const void *ctx, const char *line, char **value)
  64. {
  65. char *secname;
  66. /* Any number of upper case words separated by spaces, ending in : */
  67. if (!strreg(ctx, line,
  68. "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
  69. &secname, NULL, value))
  70. return NULL;
  71. return secname;
  72. }
  73. /* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
  74. static unsigned int is_summary_line(const char *line)
  75. {
  76. unsigned int id_len;
  77. /* We allow /, because it can be in (nested) module names. */
  78. id_len = strspn(line, IDENT_CHARS" /");
  79. if (id_len == 0)
  80. return 0;
  81. if (strspn(line, " ") == id_len)
  82. return 0;
  83. if (!strstarts(line + id_len-1, " - "))
  84. return 0;
  85. return id_len - 1;
  86. }
  87. static bool empty_section(struct doc_section *d)
  88. {
  89. unsigned int i;
  90. for (i = 0; i < d->num_lines; i++)
  91. if (!is_blank(d->lines[i]))
  92. return false;
  93. return true;
  94. }
  95. static struct doc_section *new_section(struct list_head *list,
  96. const char *function,
  97. const char *type,
  98. unsigned int srcline)
  99. {
  100. struct doc_section *d;
  101. char *lowertype;
  102. unsigned int i;
  103. /* If previous section was empty, delete it. */
  104. d = list_tail(list, struct doc_section, list);
  105. if (d && empty_section(d)) {
  106. list_del(&d->list);
  107. talloc_free(d);
  108. }
  109. d = talloc(list, struct doc_section);
  110. d->function = function;
  111. lowertype = talloc_size(d, strlen(type) + 1);
  112. /* Canonicalize type to lower case. */
  113. for (i = 0; i < strlen(type)+1; i++)
  114. lowertype[i] = tolower(type[i]);
  115. d->type = lowertype;
  116. d->lines = NULL;
  117. d->num_lines = 0;
  118. d->srcline = srcline;
  119. list_add_tail(list, &d->list);
  120. return d;
  121. }
  122. static void add_line(struct doc_section *curr, const char *line)
  123. {
  124. curr->lines = talloc_realloc(curr, curr->lines, char *,
  125. curr->num_lines+1);
  126. curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line);
  127. }
  128. /* We convert tabs to spaces here. */
  129. static void add_detabbed_line(struct doc_section *curr, const char *rawline)
  130. {
  131. unsigned int i, eff_i, len, off = 0;
  132. char *line;
  133. /* Worst-case alloc: 8 spaces per tab. */
  134. line = talloc_array(curr, char, strlen(rawline) +
  135. strcount(rawline, "\t") * 7 + 1);
  136. len = 0;
  137. /* We keep track of the *effective* offset of i. */
  138. for (i = eff_i = 0; i < strlen(rawline); i++) {
  139. if (rawline[i] == '\t') {
  140. do {
  141. line[len++] = ' ';
  142. eff_i++;
  143. } while (eff_i % 8 != 0);
  144. } else {
  145. line[len++] = rawline[i];
  146. if (off == 0 && rawline[i] == '*')
  147. off = i + 1;
  148. eff_i++;
  149. }
  150. }
  151. line[len] = '\0';
  152. add_line(curr, line + off);
  153. talloc_free(line);
  154. }
  155. /* Not very efficient: we could track prefix length while doing
  156. * add_detabbed_line */
  157. static void trim_lines(struct doc_section *curr)
  158. {
  159. unsigned int i, trim = -1;
  160. int last_non_empty = -1;
  161. /* Get minimum whitespace prefix. */
  162. for (i = 0; i < curr->num_lines; i++) {
  163. unsigned int prefix = strspn(curr->lines[i], " ");
  164. /* Ignore blank lines */
  165. if (curr->lines[i][prefix] == '\0')
  166. continue;
  167. if (prefix < trim)
  168. trim = prefix;
  169. }
  170. /* Now trim it. */
  171. for (i = 0; i < curr->num_lines; i++) {
  172. unsigned int prefix = strspn(curr->lines[i], " ");
  173. if (prefix < trim)
  174. curr->lines[i] += prefix;
  175. else
  176. curr->lines[i] += trim;
  177. /* All blank? Potential to trim. */
  178. if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
  179. last_non_empty = i;
  180. }
  181. /* Remove trailing blank lines. */
  182. curr->num_lines = last_non_empty + 1;
  183. }
  184. struct list_head *extract_doc_sections(char **rawlines, const char *file)
  185. {
  186. unsigned int *linemap;
  187. char **lines = grab_doc(rawlines, &linemap, file);
  188. const char *function = NULL;
  189. struct doc_section *curr = NULL;
  190. unsigned int i;
  191. struct list_head *list;
  192. list = talloc(NULL, struct list_head);
  193. list_head_init(list);
  194. for (i = 0; lines[i]; i++) {
  195. unsigned funclen;
  196. char *type, *extra;
  197. funclen = is_summary_line(lines[i]);
  198. if (funclen) {
  199. function = talloc_strndup(list, lines[i], funclen);
  200. curr = new_section(list, function, "summary",
  201. linemap[i]);
  202. add_line(curr, lines[i] + funclen + 3);
  203. curr = new_section(list, function, "description",
  204. linemap[i]);
  205. } else if ((type = is_section(list, lines[i], &extra)) != NULL){
  206. curr = new_section(list, function, type, linemap[i]);
  207. if (!streq(extra, "")) {
  208. add_line(curr, extra);
  209. curr = NULL;
  210. }
  211. } else {
  212. if (curr)
  213. add_detabbed_line(curr, rawlines[linemap[i]]);
  214. }
  215. }
  216. list_for_each(list, curr, list)
  217. trim_lines(curr);
  218. talloc_free(lines);
  219. return list;
  220. }