doc_extract-core.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. /* This merely extracts, doesn't do XML or anything. */
  2. #include <ccan/talloc/talloc.h>
  3. #include <ccan/str/str.h>
  4. #include <ccan/str_talloc/str_talloc.h>
  5. #include <err.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <unistd.h>
  9. #include <string.h>
  10. #include <sys/types.h>
  11. #include <sys/stat.h>
  12. #include <fcntl.h>
  13. #include <stdbool.h>
  14. #include <ctype.h>
  15. #include "doc_extract.h"
  16. #include "tools.h"
  17. static char **grab_doc(char **lines, unsigned int **linemap)
  18. {
  19. char **ret;
  20. unsigned int i, num;
  21. bool printing = false;
  22. ret = talloc_array(NULL, char *, talloc_array_length(lines));
  23. *linemap = talloc_array(ret, unsigned int, talloc_array_length(lines));
  24. num = 0;
  25. for (i = 0; lines[i]; i++) {
  26. if (streq(lines[i], "/**")) {
  27. printing = true;
  28. if (num != 0) {
  29. ret[num-1] = talloc_append_string(ret[num-1],
  30. "\n");
  31. }
  32. } else if (streq(lines[i], " */"))
  33. printing = false;
  34. else if (printing) {
  35. if (strstarts(lines[i], " * "))
  36. ret[num++] = talloc_strdup(ret, lines[i]+3);
  37. else if (strstarts(lines[i], " *"))
  38. ret[num++] = talloc_strdup(ret, lines[i]+2);
  39. else
  40. errx(1, "Malformed line %u", i);
  41. (*linemap)[num-1] = i;
  42. }
  43. }
  44. ret[num] = NULL;
  45. return ret;
  46. }
  47. static bool is_blank(const char *line)
  48. {
  49. return line && line[strspn(line, " \t\n")] == '\0';
  50. }
  51. static char *is_section(const void *ctx, const char *line, char **value)
  52. {
  53. char *secname;
  54. /* Any number of upper case words separated by spaces, ending in : */
  55. if (!strreg(ctx, line,
  56. "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
  57. &secname, NULL, value))
  58. return NULL;
  59. return secname;
  60. }
  61. /* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
  62. static unsigned int is_summary_line(const char *line)
  63. {
  64. unsigned int id_len;
  65. id_len = strspn(line, IDENT_CHARS" ");
  66. if (id_len == 0)
  67. return 0;
  68. if (strspn(line, " ") == id_len)
  69. return 0;
  70. if (!strstarts(line + id_len-1, " - "))
  71. return 0;
  72. return id_len - 1;
  73. }
  74. static bool empty_section(struct doc_section *d)
  75. {
  76. unsigned int i;
  77. for (i = 0; i < d->num_lines; i++)
  78. if (!is_blank(d->lines[i]))
  79. return false;
  80. return true;
  81. }
  82. static struct doc_section *new_section(struct list_head *list,
  83. const char *function,
  84. const char *type,
  85. unsigned int srcline)
  86. {
  87. struct doc_section *d;
  88. char *lowertype;
  89. unsigned int i;
  90. /* If previous section was empty, delete it. */
  91. d = list_tail(list, struct doc_section, list);
  92. if (d && empty_section(d)) {
  93. list_del(&d->list);
  94. talloc_free(d);
  95. }
  96. d = talloc(list, struct doc_section);
  97. d->function = function;
  98. lowertype = talloc_size(d, strlen(type) + 1);
  99. /* Canonicalize type to lower case. */
  100. for (i = 0; i < strlen(type)+1; i++)
  101. lowertype[i] = tolower(type[i]);
  102. d->type = lowertype;
  103. d->lines = NULL;
  104. d->num_lines = 0;
  105. d->srcline = srcline;
  106. list_add_tail(list, &d->list);
  107. return d;
  108. }
  109. static void add_line(struct doc_section *curr, const char *line)
  110. {
  111. curr->lines = talloc_realloc(curr, curr->lines, char *,
  112. curr->num_lines+1);
  113. curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line);
  114. }
  115. /* We convert tabs to spaces here. */
  116. static void add_detabbed_line(struct doc_section *curr, const char *rawline)
  117. {
  118. unsigned int i, eff_i, len, off = 0;
  119. char *line;
  120. /* Worst-case alloc: 8 spaces per tab. */
  121. line = talloc_array(curr, char, strlen(rawline) +
  122. strcount(rawline, "\t") * 7 + 1);
  123. len = 0;
  124. /* We keep track of the *effective* offset of i. */
  125. for (i = eff_i = 0; i < strlen(rawline); i++) {
  126. if (rawline[i] == '\t') {
  127. do {
  128. line[len++] = ' ';
  129. eff_i++;
  130. } while (eff_i % 8 != 0);
  131. } else {
  132. line[len++] = rawline[i];
  133. if (off == 0 && rawline[i] == '*')
  134. off = i + 1;
  135. eff_i++;
  136. }
  137. }
  138. line[len] = '\0';
  139. add_line(curr, line + off);
  140. talloc_free(line);
  141. }
  142. /* Not very efficient: we could track prefix length while doing
  143. * add_detabbed_line */
  144. static void trim_lines(struct doc_section *curr)
  145. {
  146. unsigned int i, trim = -1;
  147. int last_non_empty = -1;
  148. /* Get minimum whitespace prefix. */
  149. for (i = 0; i < curr->num_lines; i++) {
  150. unsigned int prefix = strspn(curr->lines[i], " ");
  151. /* Ignore blank lines */
  152. if (curr->lines[i][prefix] == '\0')
  153. continue;
  154. if (prefix < trim)
  155. trim = prefix;
  156. }
  157. /* Now trim it. */
  158. for (i = 0; i < curr->num_lines; i++) {
  159. unsigned int prefix = strspn(curr->lines[i], " ");
  160. if (prefix < trim)
  161. curr->lines[i] += prefix;
  162. else
  163. curr->lines[i] += trim;
  164. /* All blank? Potential to trim. */
  165. if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
  166. last_non_empty = i;
  167. }
  168. /* Remove trailing blank lines. */
  169. curr->num_lines = last_non_empty + 1;
  170. }
  171. struct list_head *extract_doc_sections(char **rawlines)
  172. {
  173. unsigned int *linemap;
  174. char **lines = grab_doc(rawlines, &linemap);
  175. const char *function = NULL;
  176. struct doc_section *curr = NULL;
  177. unsigned int i;
  178. struct list_head *list;
  179. list = talloc(NULL, struct list_head);
  180. list_head_init(list);
  181. for (i = 0; lines[i]; i++) {
  182. unsigned funclen;
  183. char *type, *extra;
  184. funclen = is_summary_line(lines[i]);
  185. if (funclen) {
  186. function = talloc_strndup(list, lines[i], funclen);
  187. curr = new_section(list, function, "summary",
  188. linemap[i]);
  189. add_line(curr, lines[i] + funclen + 3);
  190. curr = new_section(list, function, "description",
  191. linemap[i]);
  192. } else if ((type = is_section(list, lines[i], &extra)) != NULL){
  193. curr = new_section(list, function, type, linemap[i]);
  194. if (!streq(extra, "")) {
  195. add_line(curr, extra);
  196. curr = NULL;
  197. }
  198. } else {
  199. if (curr)
  200. add_detabbed_line(curr, rawlines[linemap[i]]);
  201. }
  202. }
  203. list_for_each(list, curr, list)
  204. trim_lines(curr);
  205. talloc_free(lines);
  206. return list;
  207. }