doc_extract-core.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. /* This merely extracts, doesn't do XML or anything. */
  2. #include <ccan/talloc/talloc.h>
  3. #include <ccan/str/str.h>
  4. #include <ccan/str_talloc/str_talloc.h>
  5. #include <err.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <unistd.h>
  9. #include <string.h>
  10. #include <sys/types.h>
  11. #include <sys/stat.h>
  12. #include <fcntl.h>
  13. #include <stdbool.h>
  14. #include <ctype.h>
  15. #include "doc_extract.h"
  16. #include "tools.h"
  17. static char **grab_doc(char **lines, unsigned int **linemap)
  18. {
  19. char **ret;
  20. unsigned int i, num;
  21. bool printing = false;
  22. ret = talloc_array(NULL, char *, talloc_array_length(lines));
  23. *linemap = talloc_array(ret, unsigned int, talloc_array_length(lines));
  24. num = 0;
  25. for (i = 0; lines[i]; i++) {
  26. if (streq(lines[i], "/**")) {
  27. printing = true;
  28. if (num != 0) {
  29. ret[num-1] = talloc_append_string(ret[num-1],
  30. "\n");
  31. }
  32. } else if (streq(lines[i], " */"))
  33. printing = false;
  34. else if (printing) {
  35. if (strstarts(lines[i], " * "))
  36. ret[num++] = talloc_strdup(ret, lines[i]+3);
  37. else if (strstarts(lines[i], " *"))
  38. ret[num++] = talloc_strdup(ret, lines[i]+2);
  39. else
  40. errx(1, "Malformed line %u", i);
  41. (*linemap)[num-1] = i;
  42. }
  43. }
  44. ret[num] = NULL;
  45. return ret;
  46. }
  47. static bool is_blank(const char *line)
  48. {
  49. return line && line[strspn(line, " \t\n")] == '\0';
  50. }
  51. static char *is_section(const void *ctx, const char *line, char **value)
  52. {
  53. char *secname;
  54. /* Any number of upper case words separated by spaces, ending in : */
  55. if (!strreg(ctx, line,
  56. "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
  57. &secname, NULL, value))
  58. return NULL;
  59. return secname;
  60. }
  61. /* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
  62. static unsigned int is_summary_line(const char *line)
  63. {
  64. unsigned int id_len;
  65. /* We allow /, because it can be in (nested) module names. */
  66. id_len = strspn(line, IDENT_CHARS" /");
  67. if (id_len == 0)
  68. return 0;
  69. if (strspn(line, " ") == id_len)
  70. return 0;
  71. if (!strstarts(line + id_len-1, " - "))
  72. return 0;
  73. return id_len - 1;
  74. }
  75. static bool empty_section(struct doc_section *d)
  76. {
  77. unsigned int i;
  78. for (i = 0; i < d->num_lines; i++)
  79. if (!is_blank(d->lines[i]))
  80. return false;
  81. return true;
  82. }
  83. static struct doc_section *new_section(struct list_head *list,
  84. const char *function,
  85. const char *type,
  86. unsigned int srcline)
  87. {
  88. struct doc_section *d;
  89. char *lowertype;
  90. unsigned int i;
  91. /* If previous section was empty, delete it. */
  92. d = list_tail(list, struct doc_section, list);
  93. if (d && empty_section(d)) {
  94. list_del(&d->list);
  95. talloc_free(d);
  96. }
  97. d = talloc(list, struct doc_section);
  98. d->function = function;
  99. lowertype = talloc_size(d, strlen(type) + 1);
  100. /* Canonicalize type to lower case. */
  101. for (i = 0; i < strlen(type)+1; i++)
  102. lowertype[i] = tolower(type[i]);
  103. d->type = lowertype;
  104. d->lines = NULL;
  105. d->num_lines = 0;
  106. d->srcline = srcline;
  107. list_add_tail(list, &d->list);
  108. return d;
  109. }
  110. static void add_line(struct doc_section *curr, const char *line)
  111. {
  112. curr->lines = talloc_realloc(curr, curr->lines, char *,
  113. curr->num_lines+1);
  114. curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line);
  115. }
  116. /* We convert tabs to spaces here. */
  117. static void add_detabbed_line(struct doc_section *curr, const char *rawline)
  118. {
  119. unsigned int i, eff_i, len, off = 0;
  120. char *line;
  121. /* Worst-case alloc: 8 spaces per tab. */
  122. line = talloc_array(curr, char, strlen(rawline) +
  123. strcount(rawline, "\t") * 7 + 1);
  124. len = 0;
  125. /* We keep track of the *effective* offset of i. */
  126. for (i = eff_i = 0; i < strlen(rawline); i++) {
  127. if (rawline[i] == '\t') {
  128. do {
  129. line[len++] = ' ';
  130. eff_i++;
  131. } while (eff_i % 8 != 0);
  132. } else {
  133. line[len++] = rawline[i];
  134. if (off == 0 && rawline[i] == '*')
  135. off = i + 1;
  136. eff_i++;
  137. }
  138. }
  139. line[len] = '\0';
  140. add_line(curr, line + off);
  141. talloc_free(line);
  142. }
  143. /* Not very efficient: we could track prefix length while doing
  144. * add_detabbed_line */
  145. static void trim_lines(struct doc_section *curr)
  146. {
  147. unsigned int i, trim = -1;
  148. int last_non_empty = -1;
  149. /* Get minimum whitespace prefix. */
  150. for (i = 0; i < curr->num_lines; i++) {
  151. unsigned int prefix = strspn(curr->lines[i], " ");
  152. /* Ignore blank lines */
  153. if (curr->lines[i][prefix] == '\0')
  154. continue;
  155. if (prefix < trim)
  156. trim = prefix;
  157. }
  158. /* Now trim it. */
  159. for (i = 0; i < curr->num_lines; i++) {
  160. unsigned int prefix = strspn(curr->lines[i], " ");
  161. if (prefix < trim)
  162. curr->lines[i] += prefix;
  163. else
  164. curr->lines[i] += trim;
  165. /* All blank? Potential to trim. */
  166. if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
  167. last_non_empty = i;
  168. }
  169. /* Remove trailing blank lines. */
  170. curr->num_lines = last_non_empty + 1;
  171. }
  172. struct list_head *extract_doc_sections(char **rawlines)
  173. {
  174. unsigned int *linemap;
  175. char **lines = grab_doc(rawlines, &linemap);
  176. const char *function = NULL;
  177. struct doc_section *curr = NULL;
  178. unsigned int i;
  179. struct list_head *list;
  180. list = talloc(NULL, struct list_head);
  181. list_head_init(list);
  182. for (i = 0; lines[i]; i++) {
  183. unsigned funclen;
  184. char *type, *extra;
  185. funclen = is_summary_line(lines[i]);
  186. if (funclen) {
  187. function = talloc_strndup(list, lines[i], funclen);
  188. curr = new_section(list, function, "summary",
  189. linemap[i]);
  190. add_line(curr, lines[i] + funclen + 3);
  191. curr = new_section(list, function, "description",
  192. linemap[i]);
  193. } else if ((type = is_section(list, lines[i], &extra)) != NULL){
  194. curr = new_section(list, function, type, linemap[i]);
  195. if (!streq(extra, "")) {
  196. add_line(curr, extra);
  197. curr = NULL;
  198. }
  199. } else {
  200. if (curr)
  201. add_detabbed_line(curr, rawlines[linemap[i]]);
  202. }
  203. }
  204. list_for_each(list, curr, list)
  205. trim_lines(curr);
  206. talloc_free(lines);
  207. return list;
  208. }