doc_extract-core.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. /* This merely extracts, doesn't do XML or anything. */
  2. #include <ccan/take/take.h>
  3. #include <ccan/str/str.h>
  4. #include <err.h>
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <unistd.h>
  8. #include <string.h>
  9. #include <sys/types.h>
  10. #include <sys/stat.h>
  11. #include <fcntl.h>
  12. #include <stdbool.h>
  13. #include <ctype.h>
  14. #include "doc_extract.h"
  15. #include "tools.h"
  16. static char **grab_doc(char **lines, unsigned int **linemap,
  17. const char *file)
  18. {
  19. char **ret;
  20. unsigned int i, num;
  21. bool printing = false;
  22. ret = tal_arr(NULL, char *, tal_count(lines));
  23. *linemap = tal_arr(ret, unsigned int, tal_count(lines));
  24. num = 0;
  25. for (i = 0; lines[i]; i++) {
  26. if (streq(lines[i], "/**")) {
  27. printing = true;
  28. if (num != 0) {
  29. ret[num-1] = tal_strcat(NULL,
  30. take(ret[num-1]), "\n");
  31. }
  32. } else if (streq(lines[i], " */"))
  33. printing = false;
  34. else if (printing) {
  35. if (strstarts(lines[i], " * "))
  36. ret[num++] = tal_strdup(ret, lines[i]+3);
  37. else if (strstarts(lines[i], " *"))
  38. ret[num++] = tal_strdup(ret, lines[i]+2);
  39. else {
  40. /* Weird, malformed? */
  41. static bool warned;
  42. if (!warned) {
  43. warnx("%s:%u:"
  44. " Expected ' *' in comment.",
  45. file, i+1);
  46. warned = true;
  47. }
  48. ret[num++] = tal_strdup(ret, lines[i]);
  49. if (strstr(lines[i], "*/"))
  50. printing = false;
  51. }
  52. (*linemap)[num-1] = i;
  53. }
  54. }
  55. ret[num] = NULL;
  56. return ret;
  57. }
  58. static bool is_blank(const char *line)
  59. {
  60. return line && line[strspn(line, " \t\n")] == '\0';
  61. }
  62. static char *is_section(const void *ctx, const char *line, char **value)
  63. {
  64. char *secname;
  65. /* Any number of upper case words separated by spaces, ending in : */
  66. if (!tal_strreg(ctx, line,
  67. "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
  68. &secname, NULL, value))
  69. return NULL;
  70. return secname;
  71. }
  72. /* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
  73. static unsigned int is_summary_line(const char *line)
  74. {
  75. unsigned int id_len;
  76. /* We allow /, because it can be in (nested) module names. */
  77. id_len = strspn(line, IDENT_CHARS" /");
  78. if (id_len == 0)
  79. return 0;
  80. if (strspn(line, " ") == id_len)
  81. return 0;
  82. if (!strstarts(line + id_len-1, " - "))
  83. return 0;
  84. return id_len - 1;
  85. }
  86. static bool empty_section(struct doc_section *d)
  87. {
  88. unsigned int i;
  89. for (i = 0; i < d->num_lines; i++)
  90. if (!is_blank(d->lines[i]))
  91. return false;
  92. return true;
  93. }
  94. static struct doc_section *new_section(struct list_head *list,
  95. const char *function,
  96. const char *type,
  97. unsigned int srcline)
  98. {
  99. struct doc_section *d;
  100. char *lowertype;
  101. unsigned int i;
  102. /* If previous section was empty, delete it. */
  103. d = list_tail(list, struct doc_section, list);
  104. if (d && empty_section(d)) {
  105. list_del(&d->list);
  106. tal_free(d);
  107. }
  108. d = tal(list, struct doc_section);
  109. d->function = function;
  110. lowertype = tal_arr(d, char, strlen(type) + 1);
  111. /* Canonicalize type to lower case. */
  112. for (i = 0; i < strlen(type)+1; i++)
  113. lowertype[i] = tolower(type[i]);
  114. d->type = lowertype;
  115. d->lines = tal_arr(d, char *, 0);
  116. d->num_lines = 0;
  117. d->srcline = srcline;
  118. list_add_tail(list, &d->list);
  119. return d;
  120. }
  121. static void add_line(struct doc_section *curr, const char *line)
  122. {
  123. char *myline = tal_strdup(curr->lines, line);
  124. tal_expand(&curr->lines, &myline, 1);
  125. curr->num_lines++;
  126. }
  127. /* We convert tabs to spaces here. */
  128. static void add_detabbed_line(struct doc_section *curr, const char *rawline)
  129. {
  130. unsigned int i, eff_i, len, off = 0;
  131. char *line;
  132. /* Worst-case alloc: 8 spaces per tab. */
  133. line = tal_arr(curr, char, strlen(rawline) +
  134. strcount(rawline, "\t") * 7 + 1);
  135. len = 0;
  136. /* We keep track of the *effective* offset of i. */
  137. for (i = eff_i = 0; i < strlen(rawline); i++) {
  138. if (rawline[i] == '\t') {
  139. do {
  140. line[len++] = ' ';
  141. eff_i++;
  142. } while (eff_i % 8 != 0);
  143. } else {
  144. line[len++] = rawline[i];
  145. if (off == 0 && rawline[i] == '*')
  146. off = i + 1;
  147. eff_i++;
  148. }
  149. }
  150. line[len] = '\0';
  151. add_line(curr, line + off);
  152. tal_free(line);
  153. }
  154. /* Not very efficient: we could track prefix length while doing
  155. * add_detabbed_line */
  156. static void trim_lines(struct doc_section *curr)
  157. {
  158. unsigned int i, trim = -1;
  159. int last_non_empty = -1;
  160. /* Get minimum whitespace prefix. */
  161. for (i = 0; i < curr->num_lines; i++) {
  162. unsigned int prefix = strspn(curr->lines[i], " ");
  163. /* Ignore blank lines */
  164. if (curr->lines[i][prefix] == '\0')
  165. continue;
  166. if (prefix < trim)
  167. trim = prefix;
  168. }
  169. /* Now trim it. */
  170. for (i = 0; i < curr->num_lines; i++) {
  171. unsigned int prefix = strspn(curr->lines[i], " ");
  172. if (prefix < trim)
  173. curr->lines[i] += prefix;
  174. else
  175. curr->lines[i] += trim;
  176. /* All blank? Potential to trim. */
  177. if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
  178. last_non_empty = i;
  179. }
  180. /* Remove trailing blank lines. */
  181. curr->num_lines = last_non_empty + 1;
  182. }
  183. struct list_head *extract_doc_sections(char **rawlines, const char *file)
  184. {
  185. unsigned int *linemap;
  186. char **lines = grab_doc(rawlines, &linemap, file);
  187. const char *function = NULL;
  188. struct doc_section *curr = NULL;
  189. unsigned int i;
  190. struct list_head *list;
  191. list = tal(NULL, struct list_head);
  192. list_head_init(list);
  193. for (i = 0; lines[i]; i++) {
  194. unsigned funclen;
  195. char *type, *extra;
  196. funclen = is_summary_line(lines[i]);
  197. if (funclen) {
  198. function = tal_strndup(list, lines[i], funclen);
  199. curr = new_section(list, function, "summary",
  200. linemap[i]);
  201. add_line(curr, lines[i] + funclen + 3);
  202. curr = new_section(list, function, "description",
  203. linemap[i]);
  204. } else if ((type = is_section(list, lines[i], &extra)) != NULL){
  205. curr = new_section(list, function, type, linemap[i]);
  206. if (!streq(extra, "")) {
  207. add_line(curr, extra);
  208. curr = NULL;
  209. }
  210. } else {
  211. if (curr)
  212. add_detabbed_line(curr, rawlines[linemap[i]]);
  213. }
  214. }
  215. list_for_each(list, curr, list)
  216. trim_lines(curr);
  217. tal_free(lines);
  218. return list;
  219. }