replay_trace.c 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725
  1. #include <ccan/tdb/tdb.h>
  2. #include <ccan/grab_file/grab_file.h>
  3. #include <ccan/hash/hash.h>
  4. #include <ccan/talloc/talloc.h>
  5. #include <ccan/str_talloc/str_talloc.h>
  6. #include <ccan/str/str.h>
  7. #include <ccan/list/list.h>
  8. #include <err.h>
  9. #include <ctype.h>
  10. #include <string.h>
  11. #include <unistd.h>
  12. #include <sys/types.h>
  13. #include <sys/wait.h>
  14. #include <sys/time.h>
  15. #include <errno.h>
  16. #include <signal.h>
  17. #include <assert.h>
  18. #define STRINGIFY2(x) #x
  19. #define STRINGIFY(x) STRINGIFY2(x)
  20. /* Avoid mod by zero */
  21. static unsigned int total_keys = 1;
  22. /* #define DEBUG_DEPS 1 */
  23. /* Traversals block transactions in the current implementation. */
  24. #define TRAVERSALS_TAKE_TRANSACTION_LOCK 1
  25. struct pipe {
  26. int fd[2];
  27. };
  28. static struct pipe *pipes;
  29. static void __attribute__((noreturn)) fail(const char *filename,
  30. unsigned int line,
  31. const char *fmt, ...)
  32. {
  33. va_list ap;
  34. va_start(ap, fmt);
  35. fprintf(stderr, "%s:%u: FAIL: ", filename, line);
  36. vfprintf(stderr, fmt, ap);
  37. fprintf(stderr, "\n");
  38. va_end(ap);
  39. exit(1);
  40. }
  41. /* Try or die. */
  42. #define try(expr, expect) \
  43. do { \
  44. int ret = (expr); \
  45. if (ret != (expect)) \
  46. fail(filename[file], i+1, \
  47. STRINGIFY(expr) "= %i", ret); \
  48. } while (0)
  49. /* Try or imitate results. */
  50. #define unreliable(expr, expect, force, undo) \
  51. do { \
  52. int ret = expr; \
  53. if (ret != expect) { \
  54. fprintf(stderr, "%s:%u: %s gave %i not %i", \
  55. filename[file], i+1, STRINGIFY(expr), \
  56. ret, expect); \
  57. if (expect == 0) \
  58. force; \
  59. else \
  60. undo; \
  61. } \
  62. } while (0)
  63. static bool key_eq(TDB_DATA a, TDB_DATA b)
  64. {
  65. if (a.dsize != b.dsize)
  66. return false;
  67. return memcmp(a.dptr, b.dptr, a.dsize) == 0;
  68. }
  69. /* This is based on the hash algorithm from gdbm */
  70. static unsigned int hash_key(TDB_DATA *key)
  71. {
  72. uint32_t value; /* Used to compute the hash value. */
  73. uint32_t i; /* Used to cycle through random values. */
  74. /* Set the initial value from the key size. */
  75. for (value = 0x238F13AF ^ key->dsize, i=0; i < key->dsize; i++)
  76. value = (value + (key->dptr[i] << (i*5 % 24)));
  77. return (1103515243 * value + 12345);
  78. }
  79. enum op_type {
  80. OP_TDB_LOCKALL,
  81. OP_TDB_LOCKALL_MARK,
  82. OP_TDB_LOCKALL_UNMARK,
  83. OP_TDB_LOCKALL_NONBLOCK,
  84. OP_TDB_UNLOCKALL,
  85. OP_TDB_LOCKALL_READ,
  86. OP_TDB_LOCKALL_READ_NONBLOCK,
  87. OP_TDB_UNLOCKALL_READ,
  88. OP_TDB_CHAINLOCK,
  89. OP_TDB_CHAINLOCK_NONBLOCK,
  90. OP_TDB_CHAINLOCK_MARK,
  91. OP_TDB_CHAINLOCK_UNMARK,
  92. OP_TDB_CHAINUNLOCK,
  93. OP_TDB_CHAINLOCK_READ,
  94. OP_TDB_CHAINUNLOCK_READ,
  95. OP_TDB_PARSE_RECORD,
  96. OP_TDB_EXISTS,
  97. OP_TDB_STORE,
  98. OP_TDB_APPEND,
  99. OP_TDB_GET_SEQNUM,
  100. OP_TDB_WIPE_ALL,
  101. OP_TDB_TRANSACTION_START,
  102. OP_TDB_TRANSACTION_CANCEL,
  103. OP_TDB_TRANSACTION_COMMIT,
  104. OP_TDB_TRAVERSE_READ_START,
  105. OP_TDB_TRAVERSE_START,
  106. OP_TDB_TRAVERSE_END,
  107. OP_TDB_TRAVERSE,
  108. OP_TDB_FIRSTKEY,
  109. OP_TDB_NEXTKEY,
  110. OP_TDB_FETCH,
  111. OP_TDB_DELETE,
  112. };
  113. struct op {
  114. unsigned int serial;
  115. enum op_type op;
  116. TDB_DATA key;
  117. TDB_DATA data;
  118. int ret;
  119. /* Who is waiting for us? */
  120. struct list_head post;
  121. /* What are we waiting for? */
  122. struct list_head pre;
  123. /* If I'm part of a group (traverse/transaction) where is
  124. * start? (Otherwise, 0) */
  125. unsigned int group_start;
  126. union {
  127. int flag; /* open and store */
  128. struct { /* append */
  129. TDB_DATA pre;
  130. TDB_DATA post;
  131. } append;
  132. /* transaction/traverse start/chainlock */
  133. unsigned int group_len;
  134. };
  135. };
  136. static unsigned char hex_char(const char *filename, unsigned int line, char c)
  137. {
  138. c = toupper(c);
  139. if (c >= 'A' && c <= 'F')
  140. return c - 'A' + 10;
  141. if (c >= '0' && c <= '9')
  142. return c - '0';
  143. fail(filename, line, "invalid hex character '%c'", c);
  144. }
  145. /* TDB data is <size>:<%02x>* */
  146. static TDB_DATA make_tdb_data(const void *ctx,
  147. const char *filename, unsigned int line,
  148. const char *word)
  149. {
  150. TDB_DATA data;
  151. unsigned int i;
  152. const char *p;
  153. if (streq(word, "NULL"))
  154. return tdb_null;
  155. data.dsize = atoi(word);
  156. data.dptr = talloc_array(ctx, unsigned char, data.dsize);
  157. p = strchr(word, ':');
  158. if (!p)
  159. fail(filename, line, "invalid tdb data '%s'", word);
  160. p++;
  161. for (i = 0; i < data.dsize; i++)
  162. data.dptr[i] = hex_char(filename, line, p[i*2])*16
  163. + hex_char(filename, line, p[i*2+1]);
  164. return data;
  165. }
  166. static void add_op(const char *filename, struct op **op, unsigned int i,
  167. unsigned int serial, enum op_type type)
  168. {
  169. struct op *new;
  170. *op = talloc_realloc(NULL, *op, struct op, i+1);
  171. new = (*op) + i;
  172. new->op = type;
  173. new->serial = serial;
  174. new->ret = 0;
  175. new->group_start = 0;
  176. }
  177. static void op_add_nothing(const char *filename,
  178. struct op op[], unsigned int op_num, char *words[])
  179. {
  180. if (words[2])
  181. fail(filename, op_num+1, "Expected no arguments");
  182. op[op_num].key = tdb_null;
  183. }
  184. static void op_add_key(const char *filename,
  185. struct op op[], unsigned int op_num, char *words[])
  186. {
  187. if (words[2] == NULL || words[3])
  188. fail(filename, op_num+1, "Expected just a key");
  189. op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
  190. total_keys++;
  191. }
  192. static void op_add_key_ret(const char *filename,
  193. struct op op[], unsigned int op_num, char *words[])
  194. {
  195. if (!words[2] || !words[3] || !words[4] || words[5]
  196. || !streq(words[3], "="))
  197. fail(filename, op_num+1, "Expected <key> = <ret>");
  198. op[op_num].ret = atoi(words[4]);
  199. op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
  200. /* May only be a unique key if it fails */
  201. if (op[op_num].ret != 0)
  202. total_keys++;
  203. }
  204. static void op_add_key_data(const char *filename,
  205. struct op op[], unsigned int op_num, char *words[])
  206. {
  207. if (!words[2] || !words[3] || !words[4] || words[5]
  208. || !streq(words[3], "="))
  209. fail(filename, op_num+1, "Expected <key> = <data>");
  210. op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
  211. op[op_num].data = make_tdb_data(op, filename, op_num+1, words[4]);
  212. /* May only be a unique key if it fails */
  213. if (!op[op_num].data.dptr)
  214. total_keys++;
  215. }
  216. /* We don't record the keys or data for a traverse, as we don't use them. */
  217. static void op_add_traverse(const char *filename,
  218. struct op op[], unsigned int op_num, char *words[])
  219. {
  220. if (!words[2] || !words[3] || !words[4] || words[5]
  221. || !streq(words[3], "="))
  222. fail(filename, op_num+1, "Expected <key> = <data>");
  223. op[op_num].key = tdb_null;
  224. }
  225. /* Full traverse info is useful for debugging, but changing it to
  226. * "traversefn" without the data makes the traces *much* smaller! */
  227. static void op_add_traversefn(const char *filename,
  228. struct op op[], unsigned int op_num, char *words[])
  229. {
  230. if (words[2])
  231. fail(filename, op_num+1, "Expected no values");
  232. op[op_num].key = tdb_null;
  233. }
  234. /* <serial> tdb_store <rec> <rec> <flag> = <ret> */
  235. static void op_add_store(const char *filename,
  236. struct op op[], unsigned int op_num, char *words[])
  237. {
  238. if (!words[2] || !words[3] || !words[4] || !words[5] || !words[6]
  239. || words[7] || !streq(words[5], "="))
  240. fail(filename, op_num+1, "Expect <key> <data> <flag> = <ret>");
  241. op[op_num].flag = strtoul(words[4], NULL, 0);
  242. op[op_num].ret = atoi(words[6]);
  243. op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
  244. op[op_num].data = make_tdb_data(op, filename, op_num+1, words[3]);
  245. total_keys++;
  246. }
  247. /* <serial> tdb_append <rec> <rec> = <rec> */
  248. static void op_add_append(const char *filename,
  249. struct op op[], unsigned int op_num, char *words[])
  250. {
  251. if (!words[2] || !words[3] || !words[4] || !words[5] || words[6]
  252. || !streq(words[4], "="))
  253. fail(filename, op_num+1, "Expect <key> <data> = <rec>");
  254. op[op_num].key = make_tdb_data(op, filename, op_num+1, words[2]);
  255. op[op_num].data = make_tdb_data(op, filename, op_num+1, words[3]);
  256. op[op_num].append.post
  257. = make_tdb_data(op, filename, op_num+1, words[5]);
  258. /* By subtraction, figure out what previous data was. */
  259. op[op_num].append.pre.dptr = op[op_num].append.post.dptr;
  260. op[op_num].append.pre.dsize
  261. = op[op_num].append.post.dsize - op[op_num].data.dsize;
  262. total_keys++;
  263. }
  264. /* <serial> tdb_get_seqnum = <ret> */
  265. static void op_add_seqnum(const char *filename,
  266. struct op op[], unsigned int op_num, char *words[])
  267. {
  268. if (!words[2] || !words[3] || words[4] || !streq(words[2], "="))
  269. fail(filename, op_num+1, "Expect = <ret>");
  270. op[op_num].key = tdb_null;
  271. op[op_num].ret = atoi(words[3]);
  272. }
  273. static void op_add_traverse_start(const char *filename,
  274. struct op op[],
  275. unsigned int op_num, char *words[])
  276. {
  277. if (words[2])
  278. fail(filename, op_num+1, "Expect no arguments");
  279. op[op_num].key = tdb_null;
  280. op[op_num].group_len = 0;
  281. }
  282. static void op_add_transaction(const char *filename, struct op op[],
  283. unsigned int op_num, char *words[])
  284. {
  285. if (words[2])
  286. fail(filename, op_num+1, "Expect no arguments");
  287. op[op_num].key = tdb_null;
  288. op[op_num].group_len = 0;
  289. }
  290. static void op_add_chainlock(const char *filename,
  291. struct op op[], unsigned int op_num, char *words[])
  292. {
  293. if (words[2] == NULL || words[3])
  294. fail(filename, op_num+1, "Expected just a key");
  295. /* A chainlock key isn't a key in the normal sense; it doesn't
  296. * have to be in the db at all. Also, we don't want to hash this op. */
  297. op[op_num].data = make_tdb_data(op, filename, op_num+1, words[2]);
  298. op[op_num].key = tdb_null;
  299. op[op_num].group_len = 0;
  300. }
  301. static void op_add_chainlock_ret(const char *filename,
  302. struct op op[], unsigned int op_num,
  303. char *words[])
  304. {
  305. if (!words[2] || !words[3] || !words[4] || words[5]
  306. || !streq(words[3], "="))
  307. fail(filename, op_num+1, "Expected <key> = <ret>");
  308. op[op_num].ret = atoi(words[4]);
  309. op[op_num].data = make_tdb_data(op, filename, op_num+1, words[2]);
  310. op[op_num].key = tdb_null;
  311. op[op_num].group_len = 0;
  312. total_keys++;
  313. }
  314. static int op_find_start(struct op op[], unsigned int op_num, enum op_type type)
  315. {
  316. unsigned int i;
  317. for (i = op_num-1; i > 0; i--) {
  318. if (op[i].op == type && !op[i].group_len)
  319. return i;
  320. }
  321. return 0;
  322. }
  323. static void op_analyze_transaction(const char *filename,
  324. struct op op[], unsigned int op_num,
  325. char *words[])
  326. {
  327. unsigned int start, i;
  328. op[op_num].key = tdb_null;
  329. if (words[2])
  330. fail(filename, op_num+1, "Expect no arguments");
  331. start = op_find_start(op, op_num, OP_TDB_TRANSACTION_START);
  332. if (!start)
  333. fail(filename, op_num+1, "no transaction start found");
  334. op[start].group_len = op_num - start;
  335. /* This rolls in nested transactions. I think that's right. */
  336. for (i = start; i <= op_num; i++)
  337. op[i].group_start = start;
  338. }
  339. /* We treat chainlocks a lot like transactions, even though that's overkill */
  340. static void op_analyze_chainlock(const char *filename,
  341. struct op op[], unsigned int op_num,
  342. char *words[])
  343. {
  344. unsigned int i, start;
  345. if (words[2] == NULL || words[3])
  346. fail(filename, op_num+1, "Expected just a key");
  347. op[op_num].data = make_tdb_data(op, filename, op_num+1, words[2]);
  348. op[op_num].key = tdb_null;
  349. total_keys++;
  350. start = op_find_start(op, op_num, OP_TDB_CHAINLOCK);
  351. if (!start)
  352. start = op_find_start(op, op_num, OP_TDB_CHAINLOCK_READ);
  353. if (!start)
  354. fail(filename, op_num+1, "no initial chainlock found");
  355. /* FIXME: We'd have to do something clever to make this work
  356. * vs. deadlock. */
  357. if (!key_eq(op[start].data, op[op_num].data))
  358. fail(filename, op_num+1, "nested chainlock calls?");
  359. op[start].group_len = op_num - start;
  360. for (i = start; i <= op_num; i++)
  361. op[i].group_start = start;
  362. }
  363. static void op_analyze_traverse(const char *filename,
  364. struct op op[], unsigned int op_num,
  365. char *words[])
  366. {
  367. int i, start;
  368. op[op_num].key = tdb_null;
  369. /* = %u means traverse function terminated. */
  370. if (words[2]) {
  371. if (!streq(words[2], "=") || !words[3] || words[4])
  372. fail(filename, op_num+1, "expect = <num>");
  373. op[op_num].ret = atoi(words[3]);
  374. } else
  375. op[op_num].ret = 0;
  376. start = op_find_start(op, op_num, OP_TDB_TRAVERSE_START);
  377. if (!start)
  378. start = op_find_start(op, op_num, OP_TDB_TRAVERSE_READ_START);
  379. if (!start)
  380. fail(filename, op_num+1, "no traversal start found");
  381. op[start].group_len = op_num - start;
  382. /* Don't roll in nested traverse/chainlock */
  383. for (i = start; i <= op_num; i++)
  384. if (!op[i].group_start)
  385. op[i].group_start = start;
  386. }
  387. /* Keep -Wmissing-declarations happy: */
  388. const struct op_table *
  389. find_keyword (register const char *str, register unsigned int len);
  390. #include "keywords.c"
  391. struct depend {
  392. /* We can have more than one */
  393. struct list_node pre_list;
  394. struct list_node post_list;
  395. unsigned int needs_file;
  396. unsigned int needs_opnum;
  397. unsigned int satisfies_file;
  398. unsigned int satisfies_opnum;
  399. };
  400. static void check_deps(const char *filename, struct op op[], unsigned int num)
  401. {
  402. #ifdef DEBUG_DEPS
  403. unsigned int i;
  404. for (i = 1; i < num; i++)
  405. if (!list_empty(&op[i].pre))
  406. fail(filename, i+1, "Still has dependencies");
  407. #endif
  408. }
  409. static void dump_pre(char *filename[], struct op *op[],
  410. unsigned int file, unsigned int i)
  411. {
  412. struct depend *dep;
  413. printf("%s:%u (%u) still waiting for:\n", filename[file], i+1,
  414. op[file][i].serial);
  415. list_for_each(&op[file][i].pre, dep, pre_list)
  416. printf(" %s:%u (%u)\n",
  417. filename[dep->satisfies_file], dep->satisfies_opnum+1,
  418. op[dep->satisfies_file][dep->satisfies_opnum].serial);
  419. check_deps(filename[file], op[file], i);
  420. }
  421. /* We simply read/write pointers, since we all are children. */
  422. static bool do_pre(struct tdb_context *tdb,
  423. char *filename[], struct op *op[],
  424. unsigned int file, int pre_fd, unsigned int i,
  425. bool backoff)
  426. {
  427. while (!list_empty(&op[file][i].pre)) {
  428. struct depend *dep;
  429. #if DEBUG_DEPS
  430. printf("%s:%u:waiting for pre\n", filename[file], i+1);
  431. fflush(stdout);
  432. #endif
  433. if (backoff)
  434. alarm(2);
  435. else
  436. alarm(10);
  437. while (read(pre_fd, &dep, sizeof(dep)) != sizeof(dep)) {
  438. if (errno == EINTR) {
  439. if (backoff) {
  440. warnx("%s:%u:avoiding deadlock",
  441. filename[file], i+1);
  442. return false;
  443. }
  444. dump_pre(filename, op, file, i);
  445. exit(1);
  446. } else
  447. errx(1, "Reading from pipe");
  448. }
  449. alarm(0);
  450. #if DEBUG_DEPS
  451. printf("%s:%u:got pre %u from %s:%u\n", filename[file], i+1,
  452. dep->needs_opnum+1, filename[dep->satisfies_file],
  453. dep->satisfies_opnum+1);
  454. fflush(stdout);
  455. #endif
  456. /* This could be any op, not just this one. */
  457. talloc_free(dep);
  458. }
  459. return true;
  460. }
  461. static void do_post(char *filename[], struct op *op[],
  462. unsigned int file, unsigned int i)
  463. {
  464. struct depend *dep;
  465. list_for_each(&op[file][i].post, dep, post_list) {
  466. #if DEBUG_DEPS
  467. printf("%s:%u:sending to file %s:%u\n", filename[file], i+1,
  468. filename[dep->needs_file], dep->needs_opnum+1);
  469. #endif
  470. if (write(pipes[dep->needs_file].fd[1], &dep, sizeof(dep))
  471. != sizeof(dep))
  472. err(1, "%s:%u failed to tell file %s",
  473. filename[file], i+1, filename[dep->needs_file]);
  474. }
  475. }
  476. static int get_len(TDB_DATA key, TDB_DATA data, void *private_data)
  477. {
  478. return data.dsize;
  479. }
  480. static unsigned run_ops(struct tdb_context *tdb,
  481. int pre_fd,
  482. char *filename[],
  483. struct op *op[],
  484. unsigned int file,
  485. unsigned int start, unsigned int stop,
  486. bool backoff);
  487. struct traverse_info {
  488. struct op **op;
  489. char **filename;
  490. unsigned file;
  491. int pre_fd;
  492. unsigned int start;
  493. unsigned int i;
  494. };
  495. /* More complex. Just do whatever's they did at the n'th entry. */
  496. static int nontrivial_traverse(struct tdb_context *tdb,
  497. TDB_DATA key, TDB_DATA data,
  498. void *_tinfo)
  499. {
  500. struct traverse_info *tinfo = _tinfo;
  501. unsigned int trav_len = tinfo->op[tinfo->file][tinfo->start].group_len;
  502. bool avoid_deadlock = false;
  503. if (tinfo->i == tinfo->start + trav_len) {
  504. /* This can happen if traverse expects to be empty. */
  505. if (trav_len == 1)
  506. return 1;
  507. fail(tinfo->filename[tinfo->file], tinfo->start + 1,
  508. "traverse did not terminate");
  509. }
  510. if (tinfo->op[tinfo->file][tinfo->i].op != OP_TDB_TRAVERSE)
  511. fail(tinfo->filename[tinfo->file], tinfo->start + 1,
  512. "%s:%u:traverse terminated early");
  513. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  514. avoid_deadlock = true;
  515. #endif
  516. /* Run any normal ops. */
  517. tinfo->i = run_ops(tdb, tinfo->pre_fd, tinfo->filename, tinfo->op,
  518. tinfo->file, tinfo->i+1, tinfo->start + trav_len,
  519. avoid_deadlock);
  520. /* We backed off, or we hit OP_TDB_TRAVERSE_END. */
  521. if (tinfo->op[tinfo->file][tinfo->i].op != OP_TDB_TRAVERSE)
  522. return 1;
  523. return 0;
  524. }
  525. static unsigned op_traverse(struct tdb_context *tdb,
  526. int pre_fd,
  527. char *filename[],
  528. unsigned int file,
  529. int (*traversefn)(struct tdb_context *,
  530. tdb_traverse_func, void *),
  531. struct op *op[],
  532. unsigned int start)
  533. {
  534. struct traverse_info tinfo = { op, filename, file, pre_fd,
  535. start, start+1 };
  536. traversefn(tdb, nontrivial_traverse, &tinfo);
  537. /* Traversing in wrong order can have strange effects: eg. if
  538. * original traverse went A (delete A), B, we might do B
  539. * (delete A). So if we have ops left over, we do it now. */
  540. while (tinfo.i != start + op[file][start].group_len) {
  541. if (op[file][tinfo.i].op == OP_TDB_TRAVERSE)
  542. tinfo.i++;
  543. else
  544. tinfo.i = run_ops(tdb, pre_fd, filename, op, file,
  545. tinfo.i,
  546. start + op[file][start].group_len,
  547. false);
  548. }
  549. return tinfo.i;
  550. }
  551. static void break_out(int sig)
  552. {
  553. }
  554. static __attribute__((noinline))
  555. unsigned run_ops(struct tdb_context *tdb,
  556. int pre_fd,
  557. char *filename[],
  558. struct op *op[],
  559. unsigned int file,
  560. unsigned int start, unsigned int stop,
  561. bool backoff)
  562. {
  563. unsigned int i;
  564. struct sigaction sa;
  565. sa.sa_handler = break_out;
  566. sa.sa_flags = 0;
  567. sigaction(SIGALRM, &sa, NULL);
  568. for (i = start; i < stop; i++) {
  569. if (!do_pre(tdb, filename, op, file, pre_fd, i, backoff))
  570. return i;
  571. switch (op[file][i].op) {
  572. case OP_TDB_LOCKALL:
  573. try(tdb_lockall(tdb), op[file][i].ret);
  574. break;
  575. case OP_TDB_LOCKALL_MARK:
  576. try(tdb_lockall_mark(tdb), op[file][i].ret);
  577. break;
  578. case OP_TDB_LOCKALL_UNMARK:
  579. try(tdb_lockall_unmark(tdb), op[file][i].ret);
  580. break;
  581. case OP_TDB_LOCKALL_NONBLOCK:
  582. unreliable(tdb_lockall_nonblock(tdb), op[file][i].ret,
  583. tdb_lockall(tdb), tdb_unlockall(tdb));
  584. break;
  585. case OP_TDB_UNLOCKALL:
  586. try(tdb_unlockall(tdb), op[file][i].ret);
  587. break;
  588. case OP_TDB_LOCKALL_READ:
  589. try(tdb_lockall_read(tdb), op[file][i].ret);
  590. break;
  591. case OP_TDB_LOCKALL_READ_NONBLOCK:
  592. unreliable(tdb_lockall_read_nonblock(tdb),
  593. op[file][i].ret,
  594. tdb_lockall_read(tdb),
  595. tdb_unlockall_read(tdb));
  596. break;
  597. case OP_TDB_UNLOCKALL_READ:
  598. try(tdb_unlockall_read(tdb), op[file][i].ret);
  599. break;
  600. case OP_TDB_CHAINLOCK:
  601. try(tdb_chainlock(tdb, op[file][i].key),
  602. op[file][i].ret);
  603. break;
  604. case OP_TDB_CHAINLOCK_NONBLOCK:
  605. unreliable(tdb_chainlock_nonblock(tdb, op[file][i].key),
  606. op[file][i].ret,
  607. tdb_chainlock(tdb, op[file][i].key),
  608. tdb_chainunlock(tdb, op[file][i].key));
  609. break;
  610. case OP_TDB_CHAINLOCK_MARK:
  611. try(tdb_chainlock_mark(tdb, op[file][i].key),
  612. op[file][i].ret);
  613. break;
  614. case OP_TDB_CHAINLOCK_UNMARK:
  615. try(tdb_chainlock_unmark(tdb, op[file][i].key),
  616. op[file][i].ret);
  617. break;
  618. case OP_TDB_CHAINUNLOCK:
  619. try(tdb_chainunlock(tdb, op[file][i].key),
  620. op[file][i].ret);
  621. break;
  622. case OP_TDB_CHAINLOCK_READ:
  623. try(tdb_chainlock_read(tdb, op[file][i].key),
  624. op[file][i].ret);
  625. break;
  626. case OP_TDB_CHAINUNLOCK_READ:
  627. try(tdb_chainunlock_read(tdb, op[file][i].key),
  628. op[file][i].ret);
  629. break;
  630. case OP_TDB_PARSE_RECORD:
  631. try(tdb_parse_record(tdb, op[file][i].key, get_len,
  632. NULL),
  633. op[file][i].ret);
  634. break;
  635. case OP_TDB_EXISTS:
  636. try(tdb_exists(tdb, op[file][i].key), op[file][i].ret);
  637. break;
  638. case OP_TDB_STORE:
  639. try(tdb_store(tdb, op[file][i].key, op[file][i].data,
  640. op[file][i].flag),
  641. op[file][i].ret);
  642. break;
  643. case OP_TDB_APPEND:
  644. try(tdb_append(tdb, op[file][i].key, op[file][i].data),
  645. op[file][i].ret);
  646. break;
  647. case OP_TDB_GET_SEQNUM:
  648. try(tdb_get_seqnum(tdb), op[file][i].ret);
  649. break;
  650. case OP_TDB_WIPE_ALL:
  651. try(tdb_wipe_all(tdb), op[file][i].ret);
  652. break;
  653. case OP_TDB_TRANSACTION_START:
  654. try(tdb_transaction_start(tdb), op[file][i].ret);
  655. break;
  656. case OP_TDB_TRANSACTION_CANCEL:
  657. try(tdb_transaction_cancel(tdb), op[file][i].ret);
  658. break;
  659. case OP_TDB_TRANSACTION_COMMIT:
  660. try(tdb_transaction_commit(tdb), op[file][i].ret);
  661. break;
  662. case OP_TDB_TRAVERSE_READ_START:
  663. i = op_traverse(tdb, pre_fd, filename, file,
  664. tdb_traverse_read, op, i);
  665. break;
  666. case OP_TDB_TRAVERSE_START:
  667. i = op_traverse(tdb, pre_fd, filename, file,
  668. tdb_traverse, op, i);
  669. break;
  670. case OP_TDB_TRAVERSE:
  671. /* Terminate: we're in a traverse, and we've
  672. * done our ops. */
  673. return i;
  674. case OP_TDB_TRAVERSE_END:
  675. fail(filename[file], i+1, "unexpected end traverse");
  676. /* FIXME: These must be treated like traverse. */
  677. case OP_TDB_FIRSTKEY:
  678. if (!key_eq(tdb_firstkey(tdb), op[file][i].data))
  679. fail(filename[file], i+1, "bad firstkey");
  680. break;
  681. case OP_TDB_NEXTKEY:
  682. if (!key_eq(tdb_nextkey(tdb, op[file][i].key),
  683. op[file][i].data))
  684. fail(filename[file], i+1, "bad nextkey");
  685. break;
  686. case OP_TDB_FETCH: {
  687. TDB_DATA f = tdb_fetch(tdb, op[file][i].key);
  688. if (!key_eq(f, op[file][i].data))
  689. fail(filename[file], i+1, "bad fetch %u",
  690. f.dsize);
  691. break;
  692. }
  693. case OP_TDB_DELETE:
  694. try(tdb_delete(tdb, op[file][i].key), op[file][i].ret);
  695. break;
  696. }
  697. do_post(filename, op, file, i);
  698. }
  699. return i;
  700. }
  701. /* tdbtorture, in particular, can do a tdb_close with a transaction in
  702. * progress. */
  703. static struct op *maybe_cancel_transaction(const char *filename,
  704. struct op *op, unsigned int *num)
  705. {
  706. unsigned int start = op_find_start(op, *num, OP_TDB_TRANSACTION_START);
  707. if (start) {
  708. char *words[] = { "<unknown>", "tdb_close", NULL };
  709. add_op(filename, &op, *num, op[start].serial,
  710. OP_TDB_TRANSACTION_CANCEL);
  711. op_analyze_transaction(filename, op, *num, words);
  712. (*num)++;
  713. }
  714. return op;
  715. }
  716. static struct op *load_tracefile(const char *filename, unsigned int *num,
  717. unsigned int *hashsize,
  718. unsigned int *tdb_flags,
  719. unsigned int *open_flags)
  720. {
  721. unsigned int i;
  722. struct op *op = talloc_array(NULL, struct op, 1);
  723. char **words;
  724. char **lines;
  725. char *file;
  726. file = grab_file(NULL, filename, NULL);
  727. if (!file)
  728. err(1, "Reading %s", filename);
  729. lines = strsplit(file, file, "\n", NULL);
  730. if (!lines[0])
  731. errx(1, "%s is empty", filename);
  732. words = strsplit(lines, lines[0], " ", NULL);
  733. if (!streq(words[1], "tdb_open"))
  734. fail(filename, 1, "does not start with tdb_open");
  735. *hashsize = atoi(words[2]);
  736. *tdb_flags = strtoul(words[3], NULL, 0);
  737. *open_flags = strtoul(words[4], NULL, 0);
  738. for (i = 1; lines[i]; i++) {
  739. const struct op_table *opt;
  740. words = strsplit(lines, lines[i], " ", NULL);
  741. if (!words[0] || !words[1])
  742. fail(filename, i+1, "Expected serial number and op");
  743. opt = find_keyword(words[1], strlen(words[1]));
  744. if (!opt) {
  745. if (streq(words[1], "tdb_close")) {
  746. if (lines[i+1])
  747. fail(filename, i+2,
  748. "lines after tdb_close");
  749. *num = i;
  750. talloc_free(lines);
  751. return maybe_cancel_transaction(filename,
  752. op, num);
  753. }
  754. fail(filename, i+1, "Unknown operation '%s'", words[1]);
  755. }
  756. add_op(filename, &op, i, atoi(words[0]), opt->type);
  757. opt->enhance_op(filename, op, i, words);
  758. }
  759. fprintf(stderr, "%s:%u:last operation is not tdb_close: incomplete?",
  760. filename, i);
  761. talloc_free(lines);
  762. *num = i - 1;
  763. return maybe_cancel_transaction(filename, op, num);
  764. }
  765. /* We remember all the keys we've ever seen, and who has them. */
  766. struct key_user {
  767. unsigned int file;
  768. unsigned int op_num;
  769. };
  770. struct keyinfo {
  771. TDB_DATA key;
  772. unsigned int num_users;
  773. struct key_user *user;
  774. };
  775. static const TDB_DATA must_not_exist;
  776. static const TDB_DATA must_exist;
  777. static const TDB_DATA not_exists_or_empty;
  778. /* NULL means doesn't care if it exists or not, &must_exist means
  779. * it must exist but we don't care what, &must_not_exist means it must
  780. * not exist, otherwise the data it needs. */
  781. static const TDB_DATA *needs(const struct op *op)
  782. {
  783. switch (op->op) {
  784. /* FIXME: Pull forward deps, since we can deadlock */
  785. case OP_TDB_CHAINLOCK:
  786. case OP_TDB_CHAINLOCK_NONBLOCK:
  787. case OP_TDB_CHAINLOCK_MARK:
  788. case OP_TDB_CHAINLOCK_UNMARK:
  789. case OP_TDB_CHAINUNLOCK:
  790. case OP_TDB_CHAINLOCK_READ:
  791. case OP_TDB_CHAINUNLOCK_READ:
  792. return NULL;
  793. case OP_TDB_APPEND:
  794. if (op->append.pre.dsize == 0)
  795. return &not_exists_or_empty;
  796. return &op->append.pre;
  797. case OP_TDB_STORE:
  798. if (op->flag == TDB_INSERT) {
  799. if (op->ret < 0)
  800. return &must_exist;
  801. else
  802. return &must_not_exist;
  803. } else if (op->flag == TDB_MODIFY) {
  804. if (op->ret < 0)
  805. return &must_not_exist;
  806. else
  807. return &must_exist;
  808. }
  809. /* No flags? Don't care */
  810. return NULL;
  811. case OP_TDB_EXISTS:
  812. if (op->ret == 1)
  813. return &must_exist;
  814. else
  815. return &must_not_exist;
  816. case OP_TDB_PARSE_RECORD:
  817. if (op->ret < 0)
  818. return &must_not_exist;
  819. return &must_exist;
  820. /* FIXME: handle these. */
  821. case OP_TDB_WIPE_ALL:
  822. case OP_TDB_FIRSTKEY:
  823. case OP_TDB_NEXTKEY:
  824. case OP_TDB_GET_SEQNUM:
  825. case OP_TDB_TRAVERSE:
  826. case OP_TDB_TRANSACTION_COMMIT:
  827. case OP_TDB_TRANSACTION_CANCEL:
  828. case OP_TDB_TRANSACTION_START:
  829. return NULL;
  830. case OP_TDB_FETCH:
  831. if (!op->data.dptr)
  832. return &must_not_exist;
  833. return &op->data;
  834. case OP_TDB_DELETE:
  835. if (op->ret < 0)
  836. return &must_not_exist;
  837. return &must_exist;
  838. default:
  839. errx(1, "Unexpected op %i", op->op);
  840. }
  841. }
  842. static bool starts_transaction(const struct op *op)
  843. {
  844. return op->op == OP_TDB_TRANSACTION_START;
  845. }
  846. static bool in_transaction(const struct op op[], unsigned int i)
  847. {
  848. return op[i].group_start && starts_transaction(&op[op[i].group_start]);
  849. }
  850. static bool successful_transaction(const struct op *op)
  851. {
  852. return starts_transaction(op)
  853. && op[op->group_len].op == OP_TDB_TRANSACTION_COMMIT;
  854. }
  855. static bool starts_traverse(const struct op *op)
  856. {
  857. return op->op == OP_TDB_TRAVERSE_START
  858. || op->op == OP_TDB_TRAVERSE_READ_START;
  859. }
  860. static bool in_traverse(const struct op op[], unsigned int i)
  861. {
  862. return op[i].group_start && starts_traverse(&op[op[i].group_start]);
  863. }
  864. static bool starts_chainlock(const struct op *op)
  865. {
  866. return op->op == OP_TDB_CHAINLOCK_READ || op->op == OP_TDB_CHAINLOCK;
  867. }
  868. static bool in_chainlock(const struct op op[], unsigned int i)
  869. {
  870. return op[i].group_start && starts_chainlock(&op[op[i].group_start]);
  871. }
  872. /* What's the data after this op? pre if nothing changed. */
  873. static const TDB_DATA *gives(const TDB_DATA *key, const TDB_DATA *pre,
  874. const struct op *op)
  875. {
  876. if (starts_transaction(op) || starts_chainlock(op)) {
  877. unsigned int i;
  878. /* Cancelled transactions don't change anything. */
  879. if (op[op->group_len].op == OP_TDB_TRANSACTION_CANCEL)
  880. return pre;
  881. assert(op[op->group_len].op == OP_TDB_TRANSACTION_COMMIT
  882. || op[op->group_len].op == OP_TDB_CHAINUNLOCK_READ
  883. || op[op->group_len].op == OP_TDB_CHAINUNLOCK);
  884. for (i = 1; i < op->group_len; i++) {
  885. /* This skips nested transactions, too */
  886. if (key_eq(op[i].key, *key))
  887. pre = gives(key, pre, &op[i]);
  888. }
  889. return pre;
  890. }
  891. /* Failed ops don't change state of db. */
  892. if (op->ret < 0)
  893. return pre;
  894. if (op->op == OP_TDB_DELETE || op->op == OP_TDB_WIPE_ALL)
  895. return &tdb_null;
  896. if (op->op == OP_TDB_APPEND)
  897. return &op->append.post;
  898. if (op->op == OP_TDB_STORE)
  899. return &op->data;
  900. return pre;
  901. }
  902. static struct keyinfo *hash_ops(struct op *op[], unsigned int num_ops[],
  903. unsigned int num)
  904. {
  905. unsigned int i, j, h;
  906. struct keyinfo *hash;
  907. hash = talloc_zero_array(op[0], struct keyinfo, total_keys*2);
  908. for (i = 0; i < num; i++) {
  909. for (j = 1; j < num_ops[i]; j++) {
  910. /* We can't do this on allocation, due to realloc. */
  911. list_head_init(&op[i][j].post);
  912. list_head_init(&op[i][j].pre);
  913. if (!op[i][j].key.dptr)
  914. continue;
  915. h = hash_key(&op[i][j].key) % (total_keys * 2);
  916. while (!key_eq(hash[h].key, op[i][j].key)) {
  917. if (!hash[h].key.dptr) {
  918. hash[h].key = op[i][j].key;
  919. break;
  920. }
  921. h = (h + 1) % (total_keys * 2);
  922. }
  923. /* Might as well save some memory if we can. */
  924. if (op[i][j].key.dptr != hash[h].key.dptr) {
  925. talloc_free(op[i][j].key.dptr);
  926. op[i][j].key.dptr = hash[h].key.dptr;
  927. }
  928. hash[h].user = talloc_realloc(hash, hash[h].user,
  929. struct key_user,
  930. hash[h].num_users+1);
  931. /* If it's in a transaction, it's the transaction which
  932. * matters from an analysis POV. */
  933. if (in_transaction(op[i], j)
  934. || in_chainlock(op[i], j)) {
  935. unsigned start = op[i][j].group_start;
  936. /* Don't include twice. */
  937. if (hash[h].num_users
  938. && hash[h].user[hash[h].num_users-1].file
  939. == i
  940. && hash[h].user[hash[h].num_users-1].op_num
  941. == start)
  942. continue;
  943. hash[h].user[hash[h].num_users].op_num = start;
  944. } else
  945. hash[h].user[hash[h].num_users].op_num = j;
  946. hash[h].user[hash[h].num_users].file = i;
  947. hash[h].num_users++;
  948. }
  949. }
  950. return hash;
  951. }
  952. static bool satisfies(const TDB_DATA *key, const TDB_DATA *data,
  953. const struct op *op)
  954. {
  955. const TDB_DATA *need = NULL;
  956. if (starts_transaction(op) || starts_chainlock(op)) {
  957. unsigned int i;
  958. /* Look through for an op in this transaction which
  959. * needs this key. */
  960. for (i = 1; i < op->group_len; i++) {
  961. if (key_eq(op[i].key, *key)) {
  962. need = needs(&op[i]);
  963. /* tdb_exists() is special: there might be
  964. * something in the transaction with more
  965. * specific requirements. Other ops don't have
  966. * specific requirements (eg. store or delete),
  967. * but they change the value so we can't get
  968. * more information from future ops. */
  969. if (op[i].op != OP_TDB_EXISTS)
  970. break;
  971. }
  972. }
  973. } else
  974. need = needs(op);
  975. /* Don't need anything? Cool. */
  976. if (!need)
  977. return true;
  978. /* This should be tdb_null or a real value. */
  979. assert(data != &must_exist);
  980. assert(data != &must_not_exist);
  981. assert(data != &not_exists_or_empty);
  982. /* Must not exist? data must not exist. */
  983. if (need == &must_not_exist)
  984. return data == &tdb_null;
  985. /* Must exist? */
  986. if (need == &must_exist)
  987. return data != &tdb_null;
  988. /* Either noexist or empty. */
  989. if (need == &not_exists_or_empty)
  990. return data->dsize == 0;
  991. /* Needs something specific. */
  992. return key_eq(*data, *need);
  993. }
  994. static void move_to_front(struct key_user res[], unsigned off, unsigned elem)
  995. {
  996. if (elem != off) {
  997. struct key_user tmp = res[elem];
  998. memmove(res + off + 1, res + off, (elem - off)*sizeof(res[0]));
  999. res[off] = tmp;
  1000. }
  1001. }
  1002. static void restore_to_pos(struct key_user res[], unsigned off, unsigned elem)
  1003. {
  1004. if (elem != off) {
  1005. struct key_user tmp = res[off];
  1006. memmove(res + off, res + off + 1, (elem - off)*sizeof(res[0]));
  1007. res[elem] = tmp;
  1008. }
  1009. }
  1010. static bool sort_deps(char *filename[], struct op *op[],
  1011. struct key_user res[],
  1012. unsigned off, unsigned num,
  1013. const TDB_DATA *key, const TDB_DATA *data,
  1014. unsigned num_files, unsigned fuzz)
  1015. {
  1016. unsigned int i, files_done;
  1017. struct op *this_op;
  1018. bool done[num_files];
  1019. /* None left? We're sorted. */
  1020. if (off == num)
  1021. return true;
  1022. /* Does this make serial numbers go backwards? Allow a little fuzz. */
  1023. if (off > 0) {
  1024. int serial1 = op[res[off-1].file][res[off-1].op_num].serial;
  1025. int serial2 = op[res[off].file][res[off].op_num].serial;
  1026. if (serial1 - serial2 > (int)fuzz) {
  1027. #if DEBUG_DEPS
  1028. printf("Serial jump too far (%u -> %u)\n",
  1029. serial1, serial2);
  1030. #endif
  1031. return false;
  1032. }
  1033. }
  1034. memset(done, 0, sizeof(done));
  1035. /* Since ops within a trace file are ordered, we just need to figure
  1036. * out which file to try next. Since we don't take into account
  1037. * inter-key relationships (which exist by virtue of trace file order),
  1038. * we minimize the chance of harm by trying to keep in serial order. */
  1039. for (files_done = 0, i = off; i < num && files_done < num_files; i++) {
  1040. if (done[res[i].file])
  1041. continue;
  1042. this_op = &op[res[i].file][res[i].op_num];
  1043. /* Is what we have good enough for this op? */
  1044. if (satisfies(key, data, this_op)) {
  1045. move_to_front(res, off, i);
  1046. if (sort_deps(filename, op, res, off+1, num,
  1047. key, gives(key, data, this_op),
  1048. num_files, fuzz))
  1049. return true;
  1050. restore_to_pos(res, off, i);
  1051. }
  1052. done[res[i].file] = true;
  1053. files_done++;
  1054. }
  1055. /* No combination worked. */
  1056. return false;
  1057. }
  1058. static void check_dep_sorting(struct key_user user[], unsigned num_users,
  1059. unsigned num_files)
  1060. {
  1061. #if DEBUG_DEPS
  1062. unsigned int i;
  1063. unsigned minima[num_files];
  1064. memset(minima, 0, sizeof(minima));
  1065. for (i = 0; i < num_users; i++) {
  1066. assert(minima[user[i].file] < user[i].op_num);
  1067. minima[user[i].file] = user[i].op_num;
  1068. }
  1069. #endif
  1070. }
  1071. /* All these ops happen on the same key. Which comes first?
  1072. *
  1073. * This can happen both because read ops or failed write ops don't
  1074. * change serial number, and also due to race since we access the
  1075. * number unlocked (the race can cause less detectable ordering problems,
  1076. * in which case we'll deadlock and report: fix manually in that case).
  1077. */
  1078. static void figure_deps(char *filename[], struct op *op[],
  1079. const TDB_DATA *key, struct key_user user[],
  1080. unsigned num_users, unsigned num_files)
  1081. {
  1082. /* We assume database starts empty. */
  1083. const struct TDB_DATA *data = &tdb_null;
  1084. unsigned int fuzz;
  1085. /* We prefer to keep strict serial order if possible: it's the
  1086. * most likely. We get more lax if that fails. */
  1087. for (fuzz = 0; fuzz < 100; fuzz = (fuzz + 1)*2) {
  1088. if (sort_deps(filename, op, user, 0, num_users, key, data,
  1089. num_files, fuzz))
  1090. break;
  1091. }
  1092. if (fuzz >= 100)
  1093. fail(filename[user[0].file], user[0].op_num+1,
  1094. "Could not resolve inter-dependencies");
  1095. check_dep_sorting(user, num_users, num_files);
  1096. }
  1097. static void sort_ops(struct keyinfo hash[], char *filename[], struct op *op[],
  1098. unsigned int num)
  1099. {
  1100. unsigned int h;
  1101. /* Gcc nexted function extension. How cool is this? */
  1102. int compare_serial(const void *_a, const void *_b)
  1103. {
  1104. const struct key_user *a = _a, *b = _b;
  1105. /* First, maintain order within any trace file. */
  1106. if (a->file == b->file)
  1107. return a->op_num - b->op_num;
  1108. /* Otherwise, arrange by serial order. */
  1109. if (op[a->file][a->op_num].serial !=
  1110. op[b->file][b->op_num].serial)
  1111. return op[a->file][a->op_num].serial
  1112. - op[b->file][b->op_num].serial;
  1113. /* Cancelled transactions are assumed to happen first. */
  1114. if (starts_transaction(&op[a->file][a->op_num])
  1115. && !successful_transaction(&op[a->file][a->op_num]))
  1116. return -1;
  1117. if (starts_transaction(&op[b->file][b->op_num])
  1118. && !successful_transaction(&op[b->file][b->op_num]))
  1119. return 1;
  1120. /* No idea. */
  1121. return 0;
  1122. }
  1123. /* Now sort into serial order. */
  1124. for (h = 0; h < total_keys * 2; h++) {
  1125. struct key_user *user = hash[h].user;
  1126. qsort(user, hash[h].num_users, sizeof(user[0]), compare_serial);
  1127. figure_deps(filename, op, &hash[h].key, user, hash[h].num_users,
  1128. num);
  1129. }
  1130. }
  1131. static int destroy_depend(struct depend *dep)
  1132. {
  1133. list_del(&dep->pre_list);
  1134. list_del(&dep->post_list);
  1135. return 0;
  1136. }
  1137. static void add_dependency(void *ctx,
  1138. struct op *op[],
  1139. char *filename[],
  1140. unsigned int needs_file,
  1141. unsigned int needs_opnum,
  1142. unsigned int satisfies_file,
  1143. unsigned int satisfies_opnum)
  1144. {
  1145. struct depend *dep;
  1146. /* We don't depend on ourselves. */
  1147. if (needs_file == satisfies_file) {
  1148. assert(satisfies_opnum < needs_opnum);
  1149. return;
  1150. }
  1151. #if DEBUG_DEPS
  1152. printf("%s:%u: depends on %s:%u\n",
  1153. filename[needs_file], needs_opnum+1,
  1154. filename[satisfies_file], satisfies_opnum+1);
  1155. #endif
  1156. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  1157. /* If something in a traverse depends on something in another
  1158. * traverse/transaction, it creates a dependency between the
  1159. * two groups. */
  1160. if ((in_traverse(op[satisfies_file], satisfies_opnum)
  1161. && (starts_transaction(&op[needs_file][needs_opnum])
  1162. || starts_traverse(&op[needs_file][needs_opnum])))
  1163. || (in_traverse(op[needs_file], needs_opnum)
  1164. && (starts_transaction(&op[satisfies_file][satisfies_opnum])
  1165. || starts_traverse(&op[satisfies_file][satisfies_opnum])))){
  1166. unsigned int sat;
  1167. /* We are satisfied by end of group. */
  1168. sat = op[satisfies_file][satisfies_opnum].group_start;
  1169. satisfies_opnum = sat + op[satisfies_file][sat].group_len;
  1170. /* And we need that done by start of our group. */
  1171. needs_opnum = op[needs_file][needs_opnum].group_start;
  1172. }
  1173. /* There is also this case:
  1174. * <traverse> <read foo> ...
  1175. * <transaction> ... </transaction> <create foo>
  1176. * Where if we start the traverse then wait, we could block
  1177. * the transaction and deadlock.
  1178. *
  1179. * We try to address this by ensuring that where seqnum indicates it's
  1180. * possible, we wait for <create foo> before *starting* traverse.
  1181. */
  1182. else if (in_traverse(op[needs_file], needs_opnum)) {
  1183. struct op *need = &op[needs_file][needs_opnum];
  1184. if (op[needs_file][need->group_start].serial >
  1185. op[satisfies_file][satisfies_opnum].serial) {
  1186. needs_opnum = need->group_start;
  1187. }
  1188. }
  1189. #endif
  1190. /* If you depend on a transaction or chainlock, you actually
  1191. * depend on it ending. */
  1192. if (starts_transaction(&op[satisfies_file][satisfies_opnum])
  1193. || starts_chainlock(&op[satisfies_file][satisfies_opnum])) {
  1194. satisfies_opnum
  1195. += op[satisfies_file][satisfies_opnum].group_len;
  1196. #if DEBUG_DEPS
  1197. printf("-> Actually end of transaction %s:%u\n",
  1198. filename[satisfies_file], satisfies_opnum+1);
  1199. #endif
  1200. } else
  1201. /* We should never create a dependency from middle of
  1202. * a transaction. */
  1203. assert(!in_transaction(op[satisfies_file], satisfies_opnum)
  1204. || op[satisfies_file][satisfies_opnum].op
  1205. == OP_TDB_TRANSACTION_COMMIT
  1206. || op[satisfies_file][satisfies_opnum].op
  1207. == OP_TDB_TRANSACTION_CANCEL);
  1208. assert(op[needs_file][needs_opnum].op != OP_TDB_TRAVERSE);
  1209. assert(op[satisfies_file][satisfies_opnum].op != OP_TDB_TRAVERSE);
  1210. dep = talloc(ctx, struct depend);
  1211. dep->needs_file = needs_file;
  1212. dep->needs_opnum = needs_opnum;
  1213. dep->satisfies_file = satisfies_file;
  1214. dep->satisfies_opnum = satisfies_opnum;
  1215. list_add(&op[satisfies_file][satisfies_opnum].post, &dep->post_list);
  1216. list_add(&op[needs_file][needs_opnum].pre, &dep->pre_list);
  1217. talloc_set_destructor(dep, destroy_depend);
  1218. }
  1219. static bool changes_db(const TDB_DATA *key, const struct op *op)
  1220. {
  1221. return gives(key, NULL, op) != NULL;
  1222. }
  1223. static void depend_on_previous(struct op *op[],
  1224. char *filename[],
  1225. unsigned int num,
  1226. struct key_user user[],
  1227. unsigned int i,
  1228. int prev)
  1229. {
  1230. bool deps[num];
  1231. int j;
  1232. if (i == 0)
  1233. return;
  1234. if (prev == i - 1) {
  1235. /* Just depend on previous. */
  1236. add_dependency(NULL, op, filename,
  1237. user[i].file, user[i].op_num,
  1238. user[prev].file, user[prev].op_num);
  1239. return;
  1240. }
  1241. /* We have to wait for the readers. Find last one in *each* file. */
  1242. memset(deps, 0, sizeof(deps));
  1243. deps[user[i].file] = true;
  1244. for (j = i - 1; j > prev; j--) {
  1245. if (!deps[user[j].file]) {
  1246. add_dependency(NULL, op, filename,
  1247. user[i].file, user[i].op_num,
  1248. user[j].file, user[j].op_num);
  1249. deps[user[j].file] = true;
  1250. }
  1251. }
  1252. }
  1253. /* This is simple, but not complete. We don't take into account
  1254. * indirect dependencies. */
  1255. static void optimize_dependencies(struct op *op[], unsigned int num_ops[],
  1256. unsigned int num)
  1257. {
  1258. unsigned int i, j;
  1259. /* There can only be one real dependency on each file */
  1260. for (i = 0; i < num; i++) {
  1261. for (j = 1; j < num_ops[i]; j++) {
  1262. struct depend *dep, *next;
  1263. struct depend *prev[num];
  1264. memset(prev, 0, sizeof(prev));
  1265. list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
  1266. if (!prev[dep->satisfies_file]) {
  1267. prev[dep->satisfies_file] = dep;
  1268. continue;
  1269. }
  1270. if (prev[dep->satisfies_file]->satisfies_opnum
  1271. < dep->satisfies_opnum) {
  1272. talloc_free(prev[dep->satisfies_file]);
  1273. prev[dep->satisfies_file] = dep;
  1274. } else
  1275. talloc_free(dep);
  1276. }
  1277. }
  1278. }
  1279. for (i = 0; i < num; i++) {
  1280. int deps[num];
  1281. for (j = 0; j < num; j++)
  1282. deps[j] = -1;
  1283. for (j = 1; j < num_ops[i]; j++) {
  1284. struct depend *dep, *next;
  1285. list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
  1286. if (deps[dep->satisfies_file]
  1287. >= (int)dep->satisfies_opnum)
  1288. talloc_free(dep);
  1289. else
  1290. deps[dep->satisfies_file]
  1291. = dep->satisfies_opnum;
  1292. }
  1293. }
  1294. }
  1295. }
  1296. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  1297. struct traverse_dep {
  1298. unsigned int file;
  1299. unsigned int op_num;
  1300. };
  1301. /* Force an order among the traversals, so they don't deadlock (as much) */
  1302. static void make_traverse_depends(char *filename[],
  1303. struct op *op[], unsigned int num_ops[],
  1304. unsigned int num)
  1305. {
  1306. unsigned int i, num_traversals = 0;
  1307. int j;
  1308. struct traverse_dep *dep;
  1309. /* Sort by which one runs first. */
  1310. int compare_traverse_dep(const void *_a, const void *_b)
  1311. {
  1312. const struct traverse_dep *ta = _a, *tb = _b;
  1313. const struct op *a = &op[ta->file][ta->op_num],
  1314. *b = &op[tb->file][tb->op_num];
  1315. if (a->serial != b->serial)
  1316. return a->serial - b->serial;
  1317. /* If they have same serial, it means one didn't make any
  1318. * changes. Thus sort by end in that case. */
  1319. return a[a->group_len].serial - b[b->group_len].serial;
  1320. }
  1321. dep = talloc_array(NULL, struct traverse_dep, 1);
  1322. /* Count them. */
  1323. for (i = 0; i < num; i++) {
  1324. for (j = 1; j < num_ops[i]; j++) {
  1325. /* Traverse start (ignore those in
  1326. * transactions; they're already covered by
  1327. * transaction dependencies). */
  1328. if (starts_traverse(&op[i][j])
  1329. && !in_transaction(op[i], j)) {
  1330. dep = talloc_realloc(NULL, dep,
  1331. struct traverse_dep,
  1332. num_traversals+1);
  1333. dep[num_traversals].file = i;
  1334. dep[num_traversals].op_num = j;
  1335. num_traversals++;
  1336. }
  1337. }
  1338. }
  1339. qsort(dep, num_traversals, sizeof(dep[0]), compare_traverse_dep);
  1340. for (i = 1; i < num_traversals; i++) {
  1341. const struct op *prev = &op[dep[i-1].file][dep[i-1].op_num];
  1342. const struct op *curr = &op[dep[i].file][dep[i].op_num];
  1343. /* Read traverses don't depend on each other (read lock). */
  1344. if (prev->op == OP_TDB_TRAVERSE_READ_START
  1345. && curr->op == OP_TDB_TRAVERSE_READ_START)
  1346. continue;
  1347. /* Only make dependency if it's clear. */
  1348. if (compare_traverse_dep(&dep[i], &dep[i-1])) {
  1349. /* i depends on end of traverse i-1. */
  1350. add_dependency(NULL, op, filename,
  1351. dep[i].file, dep[i].op_num,
  1352. dep[i-1].file, dep[i-1].op_num
  1353. + prev->group_len);
  1354. }
  1355. }
  1356. talloc_free(dep);
  1357. }
  1358. #endif
  1359. static void derive_dependencies(char *filename[],
  1360. struct op *op[], unsigned int num_ops[],
  1361. unsigned int num)
  1362. {
  1363. struct keyinfo *hash;
  1364. unsigned int h, i;
  1365. /* Create hash table for faster key lookup. */
  1366. hash = hash_ops(op, num_ops, num);
  1367. /* Sort them by serial number. */
  1368. sort_ops(hash, filename, op, num);
  1369. /* Create dependencies back to the last change, rather than
  1370. * creating false dependencies by naively making each one
  1371. * depend on the previous. This has two purposes: it makes
  1372. * later optimization simpler, and it also avoids deadlock with
  1373. * same sequence number ops inside traversals (if one
  1374. * traversal doesn't write anything, two ops can have the same
  1375. * sequence number yet we can create a traversal dependency
  1376. * the other way). */
  1377. for (h = 0; h < total_keys * 2; h++) {
  1378. int prev = -1;
  1379. if (hash[h].num_users < 2)
  1380. continue;
  1381. for (i = 0; i < hash[h].num_users; i++) {
  1382. if (changes_db(&hash[h].key, &op[hash[h].user[i].file]
  1383. [hash[h].user[i].op_num])) {
  1384. depend_on_previous(op, filename, num,
  1385. hash[h].user, i, prev);
  1386. prev = i;
  1387. } else if (prev >= 0)
  1388. add_dependency(hash, op, filename,
  1389. hash[h].user[i].file,
  1390. hash[h].user[i].op_num,
  1391. hash[h].user[prev].file,
  1392. hash[h].user[prev].op_num);
  1393. }
  1394. }
  1395. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  1396. make_traverse_depends(filename, op, num_ops, num);
  1397. #endif
  1398. optimize_dependencies(op, num_ops, num);
  1399. }
  1400. int main(int argc, char *argv[])
  1401. {
  1402. struct timeval start, end;
  1403. unsigned int i, num_ops[argc], hashsize[argc], tdb_flags[argc], open_flags[argc];
  1404. struct op *op[argc];
  1405. int fds[2];
  1406. char c;
  1407. bool ok = true;
  1408. if (argc < 3)
  1409. errx(1, "Usage: %s <tdbfile> <tracefile>...", argv[0]);
  1410. pipes = talloc_array(NULL, struct pipe, argc - 2);
  1411. for (i = 0; i < argc - 2; i++) {
  1412. printf("Loading tracefile %s...", argv[2+i]);
  1413. fflush(stdout);
  1414. op[i] = load_tracefile(argv[2+i], &num_ops[i], &hashsize[i],
  1415. &tdb_flags[i], &open_flags[i]);
  1416. if (pipe(pipes[i].fd) != 0)
  1417. err(1, "creating pipe");
  1418. printf("done\n");
  1419. }
  1420. printf("Calculating inter-dependencies...");
  1421. fflush(stdout);
  1422. derive_dependencies(argv+2, op, num_ops, i);
  1423. printf("done\n");
  1424. /* Don't fork for single arg case: simple debugging. */
  1425. if (argc == 3) {
  1426. struct tdb_context *tdb;
  1427. tdb = tdb_open_ex(argv[1], hashsize[0], tdb_flags[0]|TDB_NOSYNC,
  1428. open_flags[0], 0600, NULL, hash_key);
  1429. printf("Single threaded run...");
  1430. fflush(stdout);
  1431. run_ops(tdb, pipes[0].fd[0], argv+2, op, 0, 1, num_ops[0],
  1432. false);
  1433. check_deps(argv[2], op[0], num_ops[0]);
  1434. printf("done\n");
  1435. exit(0);
  1436. }
  1437. if (pipe(fds) != 0)
  1438. err(1, "creating pipe");
  1439. for (i = 0; i < argc - 2; i++) {
  1440. struct tdb_context *tdb;
  1441. switch (fork()) {
  1442. case -1:
  1443. err(1, "fork failed");
  1444. case 0:
  1445. close(fds[1]);
  1446. tdb = tdb_open_ex(argv[1], hashsize[i],
  1447. tdb_flags[i]|TDB_NOSYNC,
  1448. open_flags[i], 0600, NULL, hash_key);
  1449. if (!tdb)
  1450. err(1, "Opening tdb %s", argv[1]);
  1451. /* This catches parent exiting. */
  1452. if (read(fds[0], &c, 1) != 1)
  1453. exit(1);
  1454. run_ops(tdb, pipes[i].fd[0], argv+2, op, i, 1,
  1455. num_ops[i], false);
  1456. check_deps(argv[2+i], op[i], num_ops[i]);
  1457. exit(0);
  1458. default:
  1459. break;
  1460. }
  1461. }
  1462. /* Let everything settle. */
  1463. sleep(1);
  1464. printf("Starting run...");
  1465. fflush(stdout);
  1466. gettimeofday(&start, NULL);
  1467. /* Tell them all to go! Any write of sufficient length will do. */
  1468. if (write(fds[1], hashsize, i) != i)
  1469. err(1, "Writing to wakeup pipe");
  1470. for (i = 0; i < argc - 2; i++) {
  1471. int status;
  1472. wait(&status);
  1473. if (!WIFEXITED(status)) {
  1474. warnx("Child died with signal %i", WTERMSIG(status));
  1475. ok = false;
  1476. } else if (WEXITSTATUS(status) != 0)
  1477. /* Assume child spat out error. */
  1478. ok = false;
  1479. }
  1480. if (!ok)
  1481. exit(1);
  1482. gettimeofday(&end, NULL);
  1483. printf("done\n");
  1484. end.tv_sec -= start.tv_sec;
  1485. printf("Time replaying: %lu usec\n",
  1486. end.tv_sec * 1000000UL + (end.tv_usec - start.tv_usec));
  1487. exit(0);
  1488. }