replay_trace.c 51 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955
  1. #include <ccan/tdb/tdb.h>
  2. #include <ccan/grab_file/grab_file.h>
  3. #include <ccan/hash/hash.h>
  4. #include <ccan/talloc/talloc.h>
  5. #include <ccan/str_talloc/str_talloc.h>
  6. #include <ccan/str/str.h>
  7. #include <ccan/list/list.h>
  8. #include <err.h>
  9. #include <ctype.h>
  10. #include <string.h>
  11. #include <unistd.h>
  12. #include <sys/types.h>
  13. #include <sys/wait.h>
  14. #include <sys/time.h>
  15. #include <errno.h>
  16. #include <signal.h>
  17. #include <assert.h>
  18. #include <fcntl.h>
  19. #define STRINGIFY2(x) #x
  20. #define STRINGIFY(x) STRINGIFY2(x)
  21. static bool quiet = false;
  22. /* Avoid mod by zero */
  23. static unsigned int total_keys = 1;
  24. /* All the wipe_all ops. */
  25. static struct op_desc *wipe_alls = NULL;
  26. static unsigned int num_wipe_alls = 0;
  27. /* #define DEBUG_DEPS 1 */
  28. /* Traversals block transactions in the current implementation. */
  29. #define TRAVERSALS_TAKE_TRANSACTION_LOCK 1
  30. struct pipe {
  31. int fd[2];
  32. };
  33. static struct pipe *pipes;
  34. static int backoff_fd = -1;
  35. static void __attribute__((noreturn)) fail(const char *filename,
  36. unsigned int line,
  37. const char *fmt, ...)
  38. {
  39. va_list ap;
  40. va_start(ap, fmt);
  41. fprintf(stderr, "%s:%u: FAIL: ", filename, line);
  42. vfprintf(stderr, fmt, ap);
  43. fprintf(stderr, "\n");
  44. va_end(ap);
  45. exit(1);
  46. }
  47. /* Try or die. */
  48. #define try(expr, expect) \
  49. do { \
  50. int ret = (expr); \
  51. if (ret != (expect)) \
  52. fail(filename[file], i+1, \
  53. STRINGIFY(expr) "= %i", ret); \
  54. } while (0)
  55. /* Try or imitate results. */
  56. #define unreliable(expr, expect, force, undo) \
  57. do { \
  58. int ret = expr; \
  59. if (ret != expect) { \
  60. fprintf(stderr, "%s:%u: %s gave %i not %i", \
  61. filename[file], i+1, STRINGIFY(expr), \
  62. ret, expect); \
  63. if (expect == 0) \
  64. force; \
  65. else \
  66. undo; \
  67. } \
  68. } while (0)
  69. static bool key_eq(TDB_DATA a, TDB_DATA b)
  70. {
  71. if (a.dsize != b.dsize)
  72. return false;
  73. return memcmp(a.dptr, b.dptr, a.dsize) == 0;
  74. }
  75. /* This is based on the hash algorithm from gdbm */
  76. static unsigned int hash_key(TDB_DATA *key)
  77. {
  78. uint32_t value; /* Used to compute the hash value. */
  79. uint32_t i; /* Used to cycle through random values. */
  80. /* Set the initial value from the key size. */
  81. for (value = 0x238F13AF ^ key->dsize, i=0; i < key->dsize; i++)
  82. value = (value + (key->dptr[i] << (i*5 % 24)));
  83. return (1103515243 * value + 12345);
  84. }
  85. enum op_type {
  86. OP_TDB_LOCKALL,
  87. OP_TDB_LOCKALL_MARK,
  88. OP_TDB_LOCKALL_UNMARK,
  89. OP_TDB_LOCKALL_NONBLOCK,
  90. OP_TDB_UNLOCKALL,
  91. OP_TDB_LOCKALL_READ,
  92. OP_TDB_LOCKALL_READ_NONBLOCK,
  93. OP_TDB_UNLOCKALL_READ,
  94. OP_TDB_CHAINLOCK,
  95. OP_TDB_CHAINLOCK_NONBLOCK,
  96. OP_TDB_CHAINLOCK_MARK,
  97. OP_TDB_CHAINLOCK_UNMARK,
  98. OP_TDB_CHAINUNLOCK,
  99. OP_TDB_CHAINLOCK_READ,
  100. OP_TDB_CHAINUNLOCK_READ,
  101. OP_TDB_PARSE_RECORD,
  102. OP_TDB_EXISTS,
  103. OP_TDB_STORE,
  104. OP_TDB_APPEND,
  105. OP_TDB_GET_SEQNUM,
  106. OP_TDB_WIPE_ALL,
  107. OP_TDB_TRANSACTION_START,
  108. OP_TDB_TRANSACTION_CANCEL,
  109. OP_TDB_TRANSACTION_PREPARE_COMMIT,
  110. OP_TDB_TRANSACTION_COMMIT,
  111. OP_TDB_TRAVERSE_READ_START,
  112. OP_TDB_TRAVERSE_START,
  113. OP_TDB_TRAVERSE_END,
  114. OP_TDB_TRAVERSE,
  115. OP_TDB_TRAVERSE_END_EARLY,
  116. OP_TDB_FIRSTKEY,
  117. OP_TDB_NEXTKEY,
  118. OP_TDB_FETCH,
  119. OP_TDB_DELETE,
  120. OP_TDB_REPACK,
  121. };
  122. struct op {
  123. unsigned int seqnum;
  124. enum op_type type;
  125. TDB_DATA key;
  126. TDB_DATA data;
  127. int ret;
  128. /* Who is waiting for us? */
  129. struct list_head post;
  130. /* What are we waiting for? */
  131. struct list_head pre;
  132. /* If I'm part of a group (traverse/transaction) where is
  133. * start? (Otherwise, 0) */
  134. unsigned int group_start;
  135. union {
  136. int flag; /* open and store */
  137. struct { /* append */
  138. TDB_DATA pre;
  139. TDB_DATA post;
  140. } append;
  141. /* transaction/traverse start/chainlock */
  142. unsigned int group_len;
  143. };
  144. };
  145. struct op_desc {
  146. unsigned int file;
  147. unsigned int op_num;
  148. };
  149. static unsigned char hex_char(const char *filename, unsigned int line, char c)
  150. {
  151. c = toupper(c);
  152. if (c >= 'A' && c <= 'F')
  153. return c - 'A' + 10;
  154. if (c >= '0' && c <= '9')
  155. return c - '0';
  156. fail(filename, line, "invalid hex character '%c'", c);
  157. }
  158. /* TDB data is <size>:<%02x>* */
  159. static TDB_DATA make_tdb_data(const void *ctx,
  160. const char *filename, unsigned int line,
  161. const char *word)
  162. {
  163. TDB_DATA data;
  164. unsigned int i;
  165. const char *p;
  166. if (streq(word, "NULL"))
  167. return tdb_null;
  168. data.dsize = atoi(word);
  169. data.dptr = talloc_array(ctx, unsigned char, data.dsize);
  170. p = strchr(word, ':');
  171. if (!p)
  172. fail(filename, line, "invalid tdb data '%s'", word);
  173. p++;
  174. for (i = 0; i < data.dsize; i++)
  175. data.dptr[i] = hex_char(filename, line, p[i*2])*16
  176. + hex_char(filename, line, p[i*2+1]);
  177. return data;
  178. }
  179. static void add_op(const char *filename, struct op **op, unsigned int i,
  180. unsigned int seqnum, enum op_type type)
  181. {
  182. struct op *new;
  183. *op = talloc_realloc(NULL, *op, struct op, i+1);
  184. new = (*op) + i;
  185. new->type = type;
  186. new->seqnum = seqnum;
  187. new->ret = 0;
  188. new->group_start = 0;
  189. }
  190. static void op_add_nothing(char *filename[], struct op op[],
  191. unsigned file, unsigned op_num, char *words[])
  192. {
  193. if (words[2])
  194. fail(filename[file], op_num+1, "Expected no arguments");
  195. op[op_num].key = tdb_null;
  196. }
  197. static void op_add_key(char *filename[], struct op op[],
  198. unsigned file, unsigned op_num, char *words[])
  199. {
  200. if (words[2] == NULL || words[3])
  201. fail(filename[file], op_num+1, "Expected just a key");
  202. op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
  203. total_keys++;
  204. }
  205. static void op_add_key_ret(char *filename[], struct op op[],
  206. unsigned file, unsigned op_num, char *words[])
  207. {
  208. if (!words[2] || !words[3] || !words[4] || words[5]
  209. || !streq(words[3], "="))
  210. fail(filename[file], op_num+1, "Expected <key> = <ret>");
  211. op[op_num].ret = atoi(words[4]);
  212. op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
  213. /* May only be a unique key if it fails */
  214. if (op[op_num].ret != 0)
  215. total_keys++;
  216. }
  217. static void op_add_key_data(char *filename[], struct op op[],
  218. unsigned file, unsigned op_num, char *words[])
  219. {
  220. if (!words[2] || !words[3] || !words[4] || words[5]
  221. || !streq(words[3], "="))
  222. fail(filename[file], op_num+1, "Expected <key> = <data>");
  223. op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
  224. op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[4]);
  225. /* Likely only be a unique key if it fails */
  226. if (!op[op_num].data.dptr)
  227. total_keys++;
  228. else if (random() % 2)
  229. total_keys++;
  230. }
  231. /* We don't record the keys or data for a traverse, as we don't use them. */
  232. static void op_add_traverse(char *filename[], struct op op[],
  233. unsigned file, unsigned op_num, char *words[])
  234. {
  235. if (!words[2] || !words[3] || !words[4] || words[5]
  236. || !streq(words[3], "="))
  237. fail(filename[file], op_num+1, "Expected <key> = <data>");
  238. op[op_num].key = tdb_null;
  239. }
  240. /* Full traverse info is useful for debugging, but changing it to
  241. * "traversefn" without the data makes the traces *much* smaller! */
  242. static void op_add_traversefn(char *filename[], struct op op[],
  243. unsigned file, unsigned op_num, char *words[])
  244. {
  245. if (words[2])
  246. fail(filename[file], op_num+1, "Expected no values");
  247. op[op_num].key = tdb_null;
  248. }
  249. /* <seqnum> tdb_store <rec> <rec> <flag> = <ret> */
  250. static void op_add_store(char *filename[], struct op op[],
  251. unsigned file, unsigned op_num, char *words[])
  252. {
  253. if (!words[2] || !words[3] || !words[4] || !words[5] || !words[6]
  254. || words[7] || !streq(words[5], "="))
  255. fail(filename[file], op_num+1, "Expect <key> <data> <flag> = <ret>");
  256. op[op_num].flag = strtoul(words[4], NULL, 0);
  257. op[op_num].ret = atoi(words[6]);
  258. op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
  259. op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[3]);
  260. total_keys++;
  261. }
  262. /* <seqnum> tdb_append <rec> <rec> = <rec> */
  263. static void op_add_append(char *filename[], struct op op[],
  264. unsigned file, unsigned op_num, char *words[])
  265. {
  266. if (!words[2] || !words[3] || !words[4] || !words[5] || words[6]
  267. || !streq(words[4], "="))
  268. fail(filename[file], op_num+1, "Expect <key> <data> = <rec>");
  269. op[op_num].key = make_tdb_data(op, filename[file], op_num+1, words[2]);
  270. op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[3]);
  271. op[op_num].append.post
  272. = make_tdb_data(op, filename[file], op_num+1, words[5]);
  273. /* By subtraction, figure out what previous data was. */
  274. op[op_num].append.pre.dptr = op[op_num].append.post.dptr;
  275. op[op_num].append.pre.dsize
  276. = op[op_num].append.post.dsize - op[op_num].data.dsize;
  277. total_keys++;
  278. }
  279. /* <seqnum> tdb_get_seqnum = <ret> */
  280. static void op_add_seqnum(char *filename[], struct op op[],
  281. unsigned file, unsigned op_num, char *words[])
  282. {
  283. if (!words[2] || !words[3] || words[4] || !streq(words[2], "="))
  284. fail(filename[file], op_num+1, "Expect = <ret>");
  285. op[op_num].key = tdb_null;
  286. op[op_num].ret = atoi(words[3]);
  287. }
  288. static void op_add_traverse_start(char *filename[], struct op op[],
  289. unsigned file, unsigned op_num, char *words[])
  290. {
  291. if (words[2])
  292. fail(filename[file], op_num+1, "Expect no arguments");
  293. op[op_num].key = tdb_null;
  294. op[op_num].group_len = 0;
  295. }
  296. static void op_add_transaction(char *filename[], struct op op[],
  297. unsigned file, unsigned op_num, char *words[])
  298. {
  299. if (words[2])
  300. fail(filename[file], op_num+1, "Expect no arguments");
  301. op[op_num].key = tdb_null;
  302. op[op_num].group_len = 0;
  303. }
  304. static void op_add_chainlock(char *filename[], struct op op[],
  305. unsigned file, unsigned op_num, char *words[])
  306. {
  307. if (words[2] == NULL || words[3])
  308. fail(filename[file], op_num+1, "Expected just a key");
  309. /* A chainlock key isn't a key in the normal sense; it doesn't
  310. * have to be in the db at all. Also, we don't want to hash this op. */
  311. op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[2]);
  312. op[op_num].key = tdb_null;
  313. op[op_num].group_len = 0;
  314. }
  315. static void op_add_chainlock_ret(char *filename[], struct op op[],
  316. unsigned file, unsigned op_num, char *words[])
  317. {
  318. if (!words[2] || !words[3] || !words[4] || words[5]
  319. || !streq(words[3], "="))
  320. fail(filename[file], op_num+1, "Expected <key> = <ret>");
  321. op[op_num].ret = atoi(words[4]);
  322. op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[2]);
  323. op[op_num].key = tdb_null;
  324. op[op_num].group_len = 0;
  325. total_keys++;
  326. }
  327. static void op_add_wipe_all(char *filename[], struct op op[],
  328. unsigned file, unsigned op_num, char *words[])
  329. {
  330. if (words[2])
  331. fail(filename[file], op_num+1, "Expected no arguments");
  332. op[op_num].key = tdb_null;
  333. wipe_alls = talloc_realloc(NULL, wipe_alls, struct op_desc,
  334. num_wipe_alls+1);
  335. wipe_alls[num_wipe_alls].file = file;
  336. wipe_alls[num_wipe_alls].op_num = op_num;
  337. num_wipe_alls++;
  338. }
  339. static int op_find_start(struct op op[], unsigned int op_num, enum op_type type)
  340. {
  341. unsigned int i;
  342. for (i = op_num-1; i > 0; i--) {
  343. if (op[i].type == type && !op[i].group_len)
  344. return i;
  345. }
  346. return 0;
  347. }
  348. static void op_analyze_transaction(char *filename[], struct op op[],
  349. unsigned file, unsigned op_num,
  350. char *words[])
  351. {
  352. unsigned int start, i;
  353. op[op_num].key = tdb_null;
  354. if (words[2])
  355. fail(filename[file], op_num+1, "Expect no arguments");
  356. start = op_find_start(op, op_num, OP_TDB_TRANSACTION_START);
  357. if (!start)
  358. fail(filename[file], op_num+1, "no transaction start found");
  359. op[start].group_len = op_num - start;
  360. /* This rolls in nested transactions. I think that's right. */
  361. for (i = start; i <= op_num; i++)
  362. op[i].group_start = start;
  363. }
  364. /* We treat chainlocks a lot like transactions, even though that's overkill */
  365. static void op_analyze_chainlock(char *filename[], struct op op[],
  366. unsigned file, unsigned op_num, char *words[])
  367. {
  368. unsigned int i, start;
  369. if (words[2] == NULL || words[3])
  370. fail(filename[file], op_num+1, "Expected just a key");
  371. op[op_num].data = make_tdb_data(op, filename[file], op_num+1, words[2]);
  372. op[op_num].key = tdb_null;
  373. total_keys++;
  374. start = op_find_start(op, op_num, OP_TDB_CHAINLOCK);
  375. if (!start)
  376. start = op_find_start(op, op_num, OP_TDB_CHAINLOCK_READ);
  377. if (!start)
  378. fail(filename[file], op_num+1, "no initial chainlock found");
  379. /* FIXME: We'd have to do something clever to make this work
  380. * vs. deadlock. */
  381. if (!key_eq(op[start].data, op[op_num].data))
  382. fail(filename[file], op_num+1, "nested chainlock calls?");
  383. op[start].group_len = op_num - start;
  384. for (i = start; i <= op_num; i++)
  385. op[i].group_start = start;
  386. }
  387. static void op_analyze_traverse(char *filename[], struct op op[],
  388. unsigned file, unsigned op_num, char *words[])
  389. {
  390. int i, start;
  391. op[op_num].key = tdb_null;
  392. /* = %u means traverse function terminated. */
  393. if (words[2]) {
  394. if (!streq(words[2], "=") || !words[3] || words[4])
  395. fail(filename[file], op_num+1, "expect = <num>");
  396. op[op_num].ret = atoi(words[3]);
  397. } else
  398. op[op_num].ret = 0;
  399. start = op_find_start(op, op_num, OP_TDB_TRAVERSE_START);
  400. if (!start)
  401. start = op_find_start(op, op_num, OP_TDB_TRAVERSE_READ_START);
  402. if (!start)
  403. fail(filename[file], op_num+1, "no traversal start found");
  404. op[start].group_len = op_num - start;
  405. /* Don't roll in nested traverse/chainlock */
  406. for (i = start; i <= op_num; i++)
  407. if (!op[i].group_start)
  408. op[i].group_start = start;
  409. }
  410. /* Keep -Wmissing-declarations happy: */
  411. const struct op_table *
  412. find_keyword (register const char *str, register unsigned int len);
  413. #include "keywords.c"
  414. struct depend {
  415. /* We can have more than one */
  416. struct list_node pre_list;
  417. struct list_node post_list;
  418. struct op_desc needs;
  419. struct op_desc prereq;
  420. };
  421. static void check_deps(const char *filename, struct op op[], unsigned int num)
  422. {
  423. #ifdef DEBUG_DEPS
  424. unsigned int i;
  425. for (i = 1; i < num; i++)
  426. if (!list_empty(&op[i].pre))
  427. fail(filename, i+1, "Still has dependencies");
  428. #endif
  429. }
  430. static void dump_pre(char *filename[], struct op *op[],
  431. unsigned int file, unsigned int i)
  432. {
  433. struct depend *dep;
  434. if (!quiet) {
  435. printf("%s:%u (%u) still waiting for:\n", filename[file], i+1,
  436. op[file][i].seqnum);
  437. list_for_each(&op[file][i].pre, dep, pre_list)
  438. printf(" %s:%u (%u)\n",
  439. filename[dep->prereq.file], dep->prereq.op_num+1,
  440. op[dep->prereq.file][dep->prereq.op_num].seqnum);
  441. }
  442. check_deps(filename[file], op[file], i);
  443. }
  444. /* We simply read/write pointers, since we all are children. */
  445. static bool do_pre(struct tdb_context *tdb,
  446. char *filename[], struct op *op[],
  447. unsigned int file, int pre_fd, unsigned int i,
  448. bool backoff)
  449. {
  450. while (!list_empty(&op[file][i].pre)) {
  451. struct depend *dep;
  452. #if DEBUG_DEPS
  453. printf("%s:%u:waiting for pre\n", filename[file], i+1);
  454. fflush(stdout);
  455. #endif
  456. if (backoff)
  457. alarm(2);
  458. else
  459. alarm(10);
  460. while (read(pre_fd, &dep, sizeof(dep)) != sizeof(dep)) {
  461. if (errno == EINTR) {
  462. if (backoff) {
  463. struct op_desc desc = { file,i };
  464. warnx("%s:%u:avoiding deadlock",
  465. filename[file], i+1);
  466. if (write(backoff_fd, &desc,
  467. sizeof(desc)) != sizeof(desc))
  468. err(1, "writing backoff_fd");
  469. return false;
  470. }
  471. dump_pre(filename, op, file, i);
  472. exit(1);
  473. } else
  474. errx(1, "Reading from pipe");
  475. }
  476. alarm(0);
  477. #if DEBUG_DEPS
  478. printf("%s:%u:got pre %u from %s:%u\n", filename[file], i+1,
  479. dep->needs.op_num+1, filename[dep->prereq.file],
  480. dep->prereq.op_num+1);
  481. fflush(stdout);
  482. #endif
  483. /* This could be any op, not just this one. */
  484. talloc_free(dep);
  485. }
  486. return true;
  487. }
  488. static void do_post(char *filename[], struct op *op[],
  489. unsigned int file, unsigned int i)
  490. {
  491. struct depend *dep;
  492. list_for_each(&op[file][i].post, dep, post_list) {
  493. #if DEBUG_DEPS
  494. printf("%s:%u:sending to file %s:%u\n", filename[file], i+1,
  495. filename[dep->needs.file], dep->needs.op_num+1);
  496. #endif
  497. if (write(pipes[dep->needs.file].fd[1], &dep, sizeof(dep))
  498. != sizeof(dep))
  499. err(1, "%s:%u failed to tell file %s",
  500. filename[file], i+1, filename[dep->needs.file]);
  501. }
  502. }
  503. static int get_len(TDB_DATA key, TDB_DATA data, void *private_data)
  504. {
  505. return data.dsize;
  506. }
  507. static unsigned run_ops(struct tdb_context *tdb,
  508. int pre_fd,
  509. char *filename[],
  510. struct op *op[],
  511. unsigned int file,
  512. unsigned int start, unsigned int stop,
  513. bool backoff);
  514. struct traverse_info {
  515. struct op **op;
  516. char **filename;
  517. unsigned file;
  518. int pre_fd;
  519. unsigned int start;
  520. unsigned int i;
  521. };
  522. /* More complex. Just do whatever's they did at the n'th entry. */
  523. static int nontrivial_traverse(struct tdb_context *tdb,
  524. TDB_DATA key, TDB_DATA data,
  525. void *_tinfo)
  526. {
  527. struct traverse_info *tinfo = _tinfo;
  528. unsigned int trav_len = tinfo->op[tinfo->file][tinfo->start].group_len;
  529. bool avoid_deadlock = false;
  530. if (tinfo->i == tinfo->start + trav_len) {
  531. /* This can happen if traverse expects to be empty. */
  532. if (trav_len == 1)
  533. return 1;
  534. fail(tinfo->filename[tinfo->file], tinfo->start + 1,
  535. "traverse did not terminate");
  536. }
  537. if (tinfo->op[tinfo->file][tinfo->i].type != OP_TDB_TRAVERSE)
  538. fail(tinfo->filename[tinfo->file], tinfo->start + 1,
  539. "%s:%u:traverse terminated early");
  540. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  541. avoid_deadlock = true;
  542. #endif
  543. /* Run any normal ops. */
  544. tinfo->i = run_ops(tdb, tinfo->pre_fd, tinfo->filename, tinfo->op,
  545. tinfo->file, tinfo->i+1, tinfo->start + trav_len,
  546. avoid_deadlock);
  547. /* We backed off, or we hit OP_TDB_TRAVERSE_END/EARLY. */
  548. if (tinfo->op[tinfo->file][tinfo->i].type != OP_TDB_TRAVERSE)
  549. return 1;
  550. return 0;
  551. }
  552. static unsigned op_traverse(struct tdb_context *tdb,
  553. int pre_fd,
  554. char *filename[],
  555. unsigned int file,
  556. int (*traversefn)(struct tdb_context *,
  557. tdb_traverse_func, void *),
  558. struct op *op[],
  559. unsigned int start)
  560. {
  561. struct traverse_info tinfo = { op, filename, file, pre_fd,
  562. start, start+1 };
  563. traversefn(tdb, nontrivial_traverse, &tinfo);
  564. /* Traversing in wrong order can have strange effects: eg. if
  565. * original traverse went A (delete A), B, we might do B
  566. * (delete A). So if we have ops left over, we do it now. */
  567. while (tinfo.i != start + op[file][start].group_len) {
  568. if (op[file][tinfo.i].type == OP_TDB_TRAVERSE
  569. || op[file][tinfo.i].type == OP_TDB_TRAVERSE_END_EARLY)
  570. tinfo.i++;
  571. else
  572. tinfo.i = run_ops(tdb, pre_fd, filename, op, file,
  573. tinfo.i,
  574. start + op[file][start].group_len,
  575. false);
  576. }
  577. return tinfo.i;
  578. }
  579. static void break_out(int sig)
  580. {
  581. }
  582. static __attribute__((noinline))
  583. unsigned run_ops(struct tdb_context *tdb,
  584. int pre_fd,
  585. char *filename[],
  586. struct op *op[],
  587. unsigned int file,
  588. unsigned int start, unsigned int stop,
  589. bool backoff)
  590. {
  591. unsigned int i;
  592. struct sigaction sa;
  593. sa.sa_handler = break_out;
  594. sa.sa_flags = 0;
  595. sigaction(SIGALRM, &sa, NULL);
  596. for (i = start; i < stop; i++) {
  597. if (!do_pre(tdb, filename, op, file, pre_fd, i, backoff))
  598. return i;
  599. switch (op[file][i].type) {
  600. case OP_TDB_LOCKALL:
  601. try(tdb_lockall(tdb), op[file][i].ret);
  602. break;
  603. case OP_TDB_LOCKALL_MARK:
  604. try(tdb_lockall_mark(tdb), op[file][i].ret);
  605. break;
  606. case OP_TDB_LOCKALL_UNMARK:
  607. try(tdb_lockall_unmark(tdb), op[file][i].ret);
  608. break;
  609. case OP_TDB_LOCKALL_NONBLOCK:
  610. unreliable(tdb_lockall_nonblock(tdb), op[file][i].ret,
  611. tdb_lockall(tdb), tdb_unlockall(tdb));
  612. break;
  613. case OP_TDB_UNLOCKALL:
  614. try(tdb_unlockall(tdb), op[file][i].ret);
  615. break;
  616. case OP_TDB_LOCKALL_READ:
  617. try(tdb_lockall_read(tdb), op[file][i].ret);
  618. break;
  619. case OP_TDB_LOCKALL_READ_NONBLOCK:
  620. unreliable(tdb_lockall_read_nonblock(tdb),
  621. op[file][i].ret,
  622. tdb_lockall_read(tdb),
  623. tdb_unlockall_read(tdb));
  624. break;
  625. case OP_TDB_UNLOCKALL_READ:
  626. try(tdb_unlockall_read(tdb), op[file][i].ret);
  627. break;
  628. case OP_TDB_CHAINLOCK:
  629. try(tdb_chainlock(tdb, op[file][i].key),
  630. op[file][i].ret);
  631. break;
  632. case OP_TDB_CHAINLOCK_NONBLOCK:
  633. unreliable(tdb_chainlock_nonblock(tdb, op[file][i].key),
  634. op[file][i].ret,
  635. tdb_chainlock(tdb, op[file][i].key),
  636. tdb_chainunlock(tdb, op[file][i].key));
  637. break;
  638. case OP_TDB_CHAINLOCK_MARK:
  639. try(tdb_chainlock_mark(tdb, op[file][i].key),
  640. op[file][i].ret);
  641. break;
  642. case OP_TDB_CHAINLOCK_UNMARK:
  643. try(tdb_chainlock_unmark(tdb, op[file][i].key),
  644. op[file][i].ret);
  645. break;
  646. case OP_TDB_CHAINUNLOCK:
  647. try(tdb_chainunlock(tdb, op[file][i].key),
  648. op[file][i].ret);
  649. break;
  650. case OP_TDB_CHAINLOCK_READ:
  651. try(tdb_chainlock_read(tdb, op[file][i].key),
  652. op[file][i].ret);
  653. break;
  654. case OP_TDB_CHAINUNLOCK_READ:
  655. try(tdb_chainunlock_read(tdb, op[file][i].key),
  656. op[file][i].ret);
  657. break;
  658. case OP_TDB_PARSE_RECORD:
  659. try(tdb_parse_record(tdb, op[file][i].key, get_len,
  660. NULL),
  661. op[file][i].ret);
  662. break;
  663. case OP_TDB_EXISTS:
  664. try(tdb_exists(tdb, op[file][i].key), op[file][i].ret);
  665. break;
  666. case OP_TDB_STORE:
  667. try(tdb_store(tdb, op[file][i].key, op[file][i].data,
  668. op[file][i].flag),
  669. op[file][i].ret);
  670. break;
  671. case OP_TDB_APPEND:
  672. try(tdb_append(tdb, op[file][i].key, op[file][i].data),
  673. op[file][i].ret);
  674. break;
  675. case OP_TDB_GET_SEQNUM:
  676. try(tdb_get_seqnum(tdb), op[file][i].ret);
  677. break;
  678. case OP_TDB_WIPE_ALL:
  679. try(tdb_wipe_all(tdb), op[file][i].ret);
  680. break;
  681. case OP_TDB_TRANSACTION_START:
  682. try(tdb_transaction_start(tdb), op[file][i].ret);
  683. break;
  684. case OP_TDB_TRANSACTION_CANCEL:
  685. try(tdb_transaction_cancel(tdb), op[file][i].ret);
  686. break;
  687. case OP_TDB_TRANSACTION_PREPARE_COMMIT:
  688. try(tdb_transaction_prepare_commit(tdb),
  689. op[file][i].ret);
  690. break;
  691. case OP_TDB_TRANSACTION_COMMIT:
  692. try(tdb_transaction_commit(tdb), op[file][i].ret);
  693. break;
  694. case OP_TDB_TRAVERSE_READ_START:
  695. i = op_traverse(tdb, pre_fd, filename, file,
  696. tdb_traverse_read, op, i);
  697. break;
  698. case OP_TDB_TRAVERSE_START:
  699. i = op_traverse(tdb, pre_fd, filename, file,
  700. tdb_traverse, op, i);
  701. break;
  702. case OP_TDB_TRAVERSE:
  703. case OP_TDB_TRAVERSE_END_EARLY:
  704. /* Terminate: we're in a traverse, and we've
  705. * done our ops. */
  706. return i;
  707. case OP_TDB_TRAVERSE_END:
  708. fail(filename[file], i+1, "unexpected end traverse");
  709. /* FIXME: These must be treated like traverse. */
  710. case OP_TDB_FIRSTKEY:
  711. if (!key_eq(tdb_firstkey(tdb), op[file][i].data))
  712. fail(filename[file], i+1, "bad firstkey");
  713. break;
  714. case OP_TDB_NEXTKEY:
  715. if (!key_eq(tdb_nextkey(tdb, op[file][i].key),
  716. op[file][i].data))
  717. fail(filename[file], i+1, "bad nextkey");
  718. break;
  719. case OP_TDB_FETCH: {
  720. TDB_DATA f = tdb_fetch(tdb, op[file][i].key);
  721. if (!key_eq(f, op[file][i].data))
  722. fail(filename[file], i+1, "bad fetch %u",
  723. f.dsize);
  724. break;
  725. }
  726. case OP_TDB_DELETE:
  727. try(tdb_delete(tdb, op[file][i].key), op[file][i].ret);
  728. break;
  729. case OP_TDB_REPACK:
  730. /* We do nothing here: the transaction and traverse are
  731. * traced. It's in the trace to mark it, since it
  732. * may become unnecessary in future. */
  733. break;
  734. }
  735. do_post(filename, op, file, i);
  736. }
  737. return i;
  738. }
  739. /* tdbtorture, in particular, can do a tdb_close with a transaction in
  740. * progress. */
  741. static struct op *maybe_cancel_transaction(char *filename[], unsigned int file,
  742. struct op *op, unsigned int *num)
  743. {
  744. unsigned int start = op_find_start(op, *num, OP_TDB_TRANSACTION_START);
  745. if (start) {
  746. char *words[] = { "<unknown>", "tdb_close", NULL };
  747. add_op(filename[file], &op, *num, op[start].seqnum,
  748. OP_TDB_TRANSACTION_CANCEL);
  749. op_analyze_transaction(filename, op, file, *num, words);
  750. (*num)++;
  751. }
  752. return op;
  753. }
  754. static struct op *load_tracefile(char *filename[],
  755. unsigned int file,
  756. unsigned int *num,
  757. unsigned int *hashsize,
  758. unsigned int *tdb_flags,
  759. unsigned int *open_flags)
  760. {
  761. unsigned int i;
  762. struct op *op = talloc_array(NULL, struct op, 1);
  763. char **words;
  764. char **lines;
  765. char *contents;
  766. contents = grab_file(NULL, filename[file], NULL);
  767. if (!contents)
  768. err(1, "Reading %s", filename[file]);
  769. lines = strsplit(contents, contents, "\n");
  770. if (!lines[0])
  771. errx(1, "%s is empty", filename[file]);
  772. words = strsplit(lines, lines[0], " ");
  773. if (!streq(words[1], "tdb_open"))
  774. fail(filename[file], 1, "does not start with tdb_open");
  775. *hashsize = atoi(words[2]);
  776. *tdb_flags = strtoul(words[3], NULL, 0);
  777. *open_flags = strtoul(words[4], NULL, 0);
  778. for (i = 1; lines[i]; i++) {
  779. const struct op_table *opt;
  780. words = strsplit(lines, lines[i], " ");
  781. if (!words[0] || !words[1])
  782. fail(filename[file], i+1,
  783. "Expected seqnum number and op");
  784. opt = find_keyword(words[1], strlen(words[1]));
  785. if (!opt) {
  786. if (streq(words[1], "tdb_close")) {
  787. if (lines[i+1])
  788. fail(filename[file], i+2,
  789. "lines after tdb_close");
  790. *num = i;
  791. talloc_free(lines);
  792. return maybe_cancel_transaction(filename, file,
  793. op, num);
  794. }
  795. fail(filename[file], i+1,
  796. "Unknown operation '%s'", words[1]);
  797. }
  798. add_op(filename[file], &op, i, atoi(words[0]), opt->type);
  799. opt->enhance_op(filename, op, file, i, words);
  800. }
  801. if (!quiet)
  802. fprintf(stderr,
  803. "%s:%u:last operation is not tdb_close: incomplete?",
  804. filename[file], i);
  805. talloc_free(contents);
  806. *num = i - 1;
  807. return maybe_cancel_transaction(filename, file, op, num);
  808. }
  809. /* We remember all the keys we've ever seen, and who has them. */
  810. struct keyinfo {
  811. TDB_DATA key;
  812. unsigned int num_users;
  813. struct op_desc *user;
  814. };
  815. static bool starts_transaction(const struct op *op)
  816. {
  817. return op->type == OP_TDB_TRANSACTION_START;
  818. }
  819. static bool in_transaction(const struct op op[], unsigned int i)
  820. {
  821. return op[i].group_start && starts_transaction(&op[op[i].group_start]);
  822. }
  823. static bool successful_transaction(const struct op *op)
  824. {
  825. return starts_transaction(op)
  826. && op[op->group_len].type == OP_TDB_TRANSACTION_COMMIT;
  827. }
  828. static bool starts_traverse(const struct op *op)
  829. {
  830. return op->type == OP_TDB_TRAVERSE_START
  831. || op->type == OP_TDB_TRAVERSE_READ_START;
  832. }
  833. static bool in_traverse(const struct op op[], unsigned int i)
  834. {
  835. return op[i].group_start && starts_traverse(&op[op[i].group_start]);
  836. }
  837. static bool starts_chainlock(const struct op *op)
  838. {
  839. return op->type == OP_TDB_CHAINLOCK_READ
  840. || op->type == OP_TDB_CHAINLOCK;
  841. }
  842. static bool in_chainlock(const struct op op[], unsigned int i)
  843. {
  844. return op[i].group_start && starts_chainlock(&op[op[i].group_start]);
  845. }
  846. static const TDB_DATA must_not_exist;
  847. static const TDB_DATA must_exist;
  848. static const TDB_DATA not_exists_or_empty;
  849. /* NULL means doesn't care if it exists or not, &must_exist means
  850. * it must exist but we don't care what, &must_not_exist means it must
  851. * not exist, otherwise the data it needs. */
  852. static const TDB_DATA *needs(const TDB_DATA *key, const struct op *op)
  853. {
  854. /* Look through for an op in this transaction which needs this key. */
  855. if (starts_transaction(op) || starts_chainlock(op)) {
  856. unsigned int i;
  857. const TDB_DATA *need = NULL;
  858. for (i = 1; i < op->group_len; i++) {
  859. if (key_eq(op[i].key, *key)
  860. || op[i].type == OP_TDB_WIPE_ALL) {
  861. need = needs(key, &op[i]);
  862. /* tdb_exists() is special: there might be
  863. * something in the transaction with more
  864. * specific requirements. Other ops don't have
  865. * specific requirements (eg. store or delete),
  866. * but they change the value so we can't get
  867. * more information from future ops. */
  868. if (op[i].type != OP_TDB_EXISTS)
  869. break;
  870. }
  871. }
  872. return need;
  873. }
  874. switch (op->type) {
  875. /* FIXME: Pull forward deps, since we can deadlock */
  876. case OP_TDB_CHAINLOCK:
  877. case OP_TDB_CHAINLOCK_NONBLOCK:
  878. case OP_TDB_CHAINLOCK_MARK:
  879. case OP_TDB_CHAINLOCK_UNMARK:
  880. case OP_TDB_CHAINUNLOCK:
  881. case OP_TDB_CHAINLOCK_READ:
  882. case OP_TDB_CHAINUNLOCK_READ:
  883. return NULL;
  884. case OP_TDB_APPEND:
  885. if (op->append.pre.dsize == 0)
  886. return &not_exists_or_empty;
  887. return &op->append.pre;
  888. case OP_TDB_STORE:
  889. if (op->flag == TDB_INSERT) {
  890. if (op->ret < 0)
  891. return &must_exist;
  892. else
  893. return &must_not_exist;
  894. } else if (op->flag == TDB_MODIFY) {
  895. if (op->ret < 0)
  896. return &must_not_exist;
  897. else
  898. return &must_exist;
  899. }
  900. /* No flags? Don't care */
  901. return NULL;
  902. case OP_TDB_EXISTS:
  903. if (op->ret == 1)
  904. return &must_exist;
  905. else
  906. return &must_not_exist;
  907. case OP_TDB_PARSE_RECORD:
  908. if (op->ret < 0)
  909. return &must_not_exist;
  910. return &must_exist;
  911. /* FIXME: handle these. */
  912. case OP_TDB_WIPE_ALL:
  913. case OP_TDB_FIRSTKEY:
  914. case OP_TDB_NEXTKEY:
  915. case OP_TDB_GET_SEQNUM:
  916. case OP_TDB_TRAVERSE:
  917. case OP_TDB_TRANSACTION_COMMIT:
  918. case OP_TDB_TRANSACTION_CANCEL:
  919. case OP_TDB_TRANSACTION_START:
  920. return NULL;
  921. case OP_TDB_FETCH:
  922. if (!op->data.dptr)
  923. return &must_not_exist;
  924. return &op->data;
  925. case OP_TDB_DELETE:
  926. if (op->ret < 0)
  927. return &must_not_exist;
  928. return &must_exist;
  929. default:
  930. errx(1, "Unexpected op type %i", op->type);
  931. }
  932. }
  933. /* What's the data after this op? pre if nothing changed. */
  934. static const TDB_DATA *gives(const TDB_DATA *key, const TDB_DATA *pre,
  935. const struct op *op)
  936. {
  937. if (starts_transaction(op) || starts_chainlock(op)) {
  938. unsigned int i;
  939. /* Cancelled transactions don't change anything. */
  940. if (op[op->group_len].type == OP_TDB_TRANSACTION_CANCEL)
  941. return pre;
  942. assert(op[op->group_len].type == OP_TDB_TRANSACTION_COMMIT
  943. || op[op->group_len].type == OP_TDB_CHAINUNLOCK_READ
  944. || op[op->group_len].type == OP_TDB_CHAINUNLOCK);
  945. for (i = 1; i < op->group_len; i++) {
  946. /* This skips nested transactions, too */
  947. if (key_eq(op[i].key, *key)
  948. || op[i].type == OP_TDB_WIPE_ALL)
  949. pre = gives(key, pre, &op[i]);
  950. }
  951. return pre;
  952. }
  953. /* Failed ops don't change state of db. */
  954. if (op->ret < 0)
  955. return pre;
  956. if (op->type == OP_TDB_DELETE || op->type == OP_TDB_WIPE_ALL)
  957. return &tdb_null;
  958. if (op->type == OP_TDB_APPEND)
  959. return &op->append.post;
  960. if (op->type == OP_TDB_STORE)
  961. return &op->data;
  962. return pre;
  963. }
  964. static void add_hash_user(struct keyinfo *hash,
  965. unsigned int h,
  966. struct op *op[],
  967. unsigned int file,
  968. unsigned int op_num)
  969. {
  970. hash[h].user = talloc_realloc(hash, hash[h].user,
  971. struct op_desc, hash[h].num_users+1);
  972. /* If it's in a transaction, it's the transaction which
  973. * matters from an analysis POV. */
  974. if (in_transaction(op[file], op_num)
  975. || in_chainlock(op[file], op_num)) {
  976. unsigned i;
  977. op_num = op[file][op_num].group_start;
  978. /* Don't include twice. */
  979. for (i = 0; i < hash[h].num_users; i++) {
  980. if (hash[h].user[i].file == file
  981. && hash[h].user[i].op_num == op_num)
  982. return;
  983. }
  984. }
  985. hash[h].user[hash[h].num_users].op_num = op_num;
  986. hash[h].user[hash[h].num_users].file = file;
  987. hash[h].num_users++;
  988. }
  989. static struct keyinfo *hash_ops(struct op *op[], unsigned int num_ops[],
  990. unsigned int num)
  991. {
  992. unsigned int i, j, h;
  993. struct keyinfo *hash;
  994. hash = talloc_zero_array(op[0], struct keyinfo, total_keys*2);
  995. for (i = 0; i < num; i++) {
  996. for (j = 1; j < num_ops[i]; j++) {
  997. /* We can't do this on allocation, due to realloc. */
  998. list_head_init(&op[i][j].post);
  999. list_head_init(&op[i][j].pre);
  1000. if (!op[i][j].key.dptr)
  1001. continue;
  1002. h = hash_key(&op[i][j].key) % (total_keys * 2);
  1003. while (!key_eq(hash[h].key, op[i][j].key)) {
  1004. if (!hash[h].key.dptr) {
  1005. hash[h].key = op[i][j].key;
  1006. break;
  1007. }
  1008. h = (h + 1) % (total_keys * 2);
  1009. }
  1010. /* Might as well save some memory if we can. */
  1011. if (op[i][j].key.dptr != hash[h].key.dptr) {
  1012. talloc_free(op[i][j].key.dptr);
  1013. op[i][j].key.dptr = hash[h].key.dptr;
  1014. }
  1015. add_hash_user(hash, h, op, i, j);
  1016. }
  1017. }
  1018. /* Any wipe all entries need adding to all hash entries. */
  1019. for (h = 0; h < total_keys*2; h++) {
  1020. if (!hash[h].num_users)
  1021. continue;
  1022. for (i = 0; i < num_wipe_alls; i++)
  1023. add_hash_user(hash, h, op,
  1024. wipe_alls[i].file, wipe_alls[i].op_num);
  1025. }
  1026. return hash;
  1027. }
  1028. static bool satisfies(const TDB_DATA *key, const TDB_DATA *data,
  1029. const struct op *op)
  1030. {
  1031. const TDB_DATA *need = needs(key, op);
  1032. /* Don't need anything? Cool. */
  1033. if (!need)
  1034. return true;
  1035. /* This should be tdb_null or a real value. */
  1036. assert(data != &must_exist);
  1037. assert(data != &must_not_exist);
  1038. assert(data != &not_exists_or_empty);
  1039. /* Must not exist? data must not exist. */
  1040. if (need == &must_not_exist)
  1041. return data == &tdb_null;
  1042. /* Must exist? */
  1043. if (need == &must_exist)
  1044. return data != &tdb_null;
  1045. /* Either noexist or empty. */
  1046. if (need == &not_exists_or_empty)
  1047. return data->dsize == 0;
  1048. /* Needs something specific. */
  1049. return key_eq(*data, *need);
  1050. }
  1051. static void move_to_front(struct op_desc res[], unsigned off, unsigned elem)
  1052. {
  1053. if (elem != off) {
  1054. struct op_desc tmp = res[elem];
  1055. memmove(res + off + 1, res + off, (elem - off)*sizeof(res[0]));
  1056. res[off] = tmp;
  1057. }
  1058. }
  1059. static void restore_to_pos(struct op_desc res[], unsigned off, unsigned elem)
  1060. {
  1061. if (elem != off) {
  1062. struct op_desc tmp = res[off];
  1063. memmove(res + off, res + off + 1, (elem - off)*sizeof(res[0]));
  1064. res[elem] = tmp;
  1065. }
  1066. }
  1067. static bool sort_deps(char *filename[], struct op *op[],
  1068. struct op_desc res[],
  1069. unsigned off, unsigned num,
  1070. const TDB_DATA *key, const TDB_DATA *data,
  1071. unsigned num_files, unsigned fuzz)
  1072. {
  1073. unsigned int i, files_done;
  1074. struct op *this_op;
  1075. bool done[num_files];
  1076. /* None left? We're sorted. */
  1077. if (off == num)
  1078. return true;
  1079. /* Does this make sequence number go backwards? Allow a little fuzz. */
  1080. if (off > 0) {
  1081. int seqnum1 = op[res[off-1].file][res[off-1].op_num].seqnum;
  1082. int seqnum2 = op[res[off].file][res[off].op_num].seqnum;
  1083. if (seqnum1 - seqnum2 > (int)fuzz) {
  1084. #if DEBUG_DEPS
  1085. printf("Seqnum jump too far (%u -> %u)\n",
  1086. seqnum1, seqnum2);
  1087. #endif
  1088. return false;
  1089. }
  1090. }
  1091. memset(done, 0, sizeof(done));
  1092. /* Since ops within a trace file are ordered, we just need to figure
  1093. * out which file to try next. Since we don't take into account
  1094. * inter-key relationships (which exist by virtue of trace file order),
  1095. * we minimize the chance of harm by trying to keep in seqnum order. */
  1096. for (files_done = 0, i = off; i < num && files_done < num_files; i++) {
  1097. if (done[res[i].file])
  1098. continue;
  1099. this_op = &op[res[i].file][res[i].op_num];
  1100. /* Is what we have good enough for this op? */
  1101. if (satisfies(key, data, this_op)) {
  1102. move_to_front(res, off, i);
  1103. if (sort_deps(filename, op, res, off+1, num,
  1104. key, gives(key, data, this_op),
  1105. num_files, fuzz))
  1106. return true;
  1107. restore_to_pos(res, off, i);
  1108. }
  1109. done[res[i].file] = true;
  1110. files_done++;
  1111. }
  1112. /* No combination worked. */
  1113. return false;
  1114. }
  1115. static void check_dep_sorting(struct op_desc user[], unsigned num_users,
  1116. unsigned num_files)
  1117. {
  1118. #if DEBUG_DEPS
  1119. unsigned int i;
  1120. unsigned minima[num_files];
  1121. memset(minima, 0, sizeof(minima));
  1122. for (i = 0; i < num_users; i++) {
  1123. assert(minima[user[i].file] < user[i].op_num);
  1124. minima[user[i].file] = user[i].op_num;
  1125. }
  1126. #endif
  1127. }
  1128. /* All these ops happen on the same key. Which comes first?
  1129. *
  1130. * This can happen both because read ops or failed write ops don't
  1131. * change sequence number, and also due to race since we access the
  1132. * number unlocked (the race can cause less detectable ordering problems,
  1133. * in which case we'll deadlock and report: fix manually in that case).
  1134. */
  1135. static bool figure_deps(char *filename[], struct op *op[],
  1136. const TDB_DATA *key, const TDB_DATA *data,
  1137. struct op_desc user[],
  1138. unsigned num_users, unsigned num_files)
  1139. {
  1140. unsigned int fuzz;
  1141. /* We prefer to keep strict seqnum order if possible: it's the
  1142. * most likely. We get more lax if that fails. */
  1143. for (fuzz = 0; fuzz < 100; fuzz = (fuzz + 1)*2) {
  1144. if (sort_deps(filename, op, user, 0, num_users, key, data,
  1145. num_files, fuzz))
  1146. break;
  1147. }
  1148. if (fuzz >= 100)
  1149. return false;
  1150. check_dep_sorting(user, num_users, num_files);
  1151. return true;
  1152. }
  1153. /* We're having trouble sorting out dependencies for this key. Assume that it's
  1154. * a pre-existing record in the db, so determine a likely value. */
  1155. static const TDB_DATA *preexisting_data(char *filename[], struct op *op[],
  1156. const TDB_DATA *key,
  1157. struct op_desc *user,
  1158. unsigned int num_users)
  1159. {
  1160. unsigned int i;
  1161. const TDB_DATA *data;
  1162. for (i = 0; i < num_users; i++) {
  1163. data = needs(key, &op[user->file][user->op_num]);
  1164. if (data && data != &must_not_exist) {
  1165. if (!quiet)
  1166. printf("%s:%u: needs pre-existing record\n",
  1167. filename[user->file], user->op_num+1);
  1168. return data;
  1169. }
  1170. }
  1171. return &tdb_null;
  1172. }
  1173. static void sort_ops(struct tdb_context *tdb,
  1174. struct keyinfo hash[], char *filename[], struct op *op[],
  1175. unsigned int num)
  1176. {
  1177. unsigned int h;
  1178. /* Gcc nexted function extension. How cool is this? */
  1179. int compare_seqnum(const void *_a, const void *_b)
  1180. {
  1181. const struct op_desc *a = _a, *b = _b;
  1182. /* First, maintain order within any trace file. */
  1183. if (a->file == b->file)
  1184. return a->op_num - b->op_num;
  1185. /* Otherwise, arrange by seqnum order. */
  1186. if (op[a->file][a->op_num].seqnum !=
  1187. op[b->file][b->op_num].seqnum)
  1188. return op[a->file][a->op_num].seqnum
  1189. - op[b->file][b->op_num].seqnum;
  1190. /* Cancelled transactions are assumed to happen first. */
  1191. if (starts_transaction(&op[a->file][a->op_num])
  1192. && !successful_transaction(&op[a->file][a->op_num]))
  1193. return -1;
  1194. if (starts_transaction(&op[b->file][b->op_num])
  1195. && !successful_transaction(&op[b->file][b->op_num]))
  1196. return 1;
  1197. /* No idea. */
  1198. return 0;
  1199. }
  1200. /* Now sort into seqnum order. */
  1201. for (h = 0; h < total_keys * 2; h++) {
  1202. struct op_desc *user = hash[h].user;
  1203. qsort(user, hash[h].num_users, sizeof(user[0]), compare_seqnum);
  1204. if (!figure_deps(filename, op, &hash[h].key, &tdb_null, user,
  1205. hash[h].num_users, num)) {
  1206. const TDB_DATA *data;
  1207. data = preexisting_data(filename, op, &hash[h].key,
  1208. user, hash[h].num_users);
  1209. /* Give the first op what it wants: does that help? */
  1210. if (!figure_deps(filename, op, &hash[h].key, data, user,
  1211. hash[h].num_users, num))
  1212. fail(filename[user[0].file], user[0].op_num+1,
  1213. "Could not resolve inter-dependencies");
  1214. if (tdb_store(tdb, hash[h].key, *data, TDB_INSERT) != 0)
  1215. errx(1, "Could not store initial value");
  1216. }
  1217. }
  1218. }
  1219. static int destroy_depend(struct depend *dep)
  1220. {
  1221. list_del(&dep->pre_list);
  1222. list_del(&dep->post_list);
  1223. return 0;
  1224. }
  1225. static void add_dependency(void *ctx,
  1226. struct op *op[],
  1227. char *filename[],
  1228. const struct op_desc *needs,
  1229. const struct op_desc *prereq)
  1230. {
  1231. struct depend *dep;
  1232. /* We don't depend on ourselves. */
  1233. if (needs->file == prereq->file) {
  1234. assert(prereq->op_num < needs->op_num);
  1235. return;
  1236. }
  1237. #if DEBUG_DEPS
  1238. printf("%s:%u: depends on %s:%u\n",
  1239. filename[needs->file], needs->op_num+1,
  1240. filename[prereq->file], prereq->op_num+1);
  1241. #endif
  1242. dep = talloc(ctx, struct depend);
  1243. dep->needs = *needs;
  1244. dep->prereq = *prereq;
  1245. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  1246. /* If something in a traverse depends on something in another
  1247. * traverse/transaction, it creates a dependency between the
  1248. * two groups. */
  1249. if ((in_traverse(op[prereq->file], prereq->op_num)
  1250. && (starts_transaction(&op[needs->file][needs->op_num])
  1251. || starts_traverse(&op[needs->file][needs->op_num])))
  1252. || (in_traverse(op[needs->file], needs->op_num)
  1253. && (starts_transaction(&op[prereq->file][prereq->op_num])
  1254. || starts_traverse(&op[prereq->file][prereq->op_num])))) {
  1255. unsigned int start;
  1256. /* We are satisfied by end of group. */
  1257. start = op[prereq->file][prereq->op_num].group_start;
  1258. dep->prereq.op_num = start + op[prereq->file][start].group_len;
  1259. /* And we need that done by start of our group. */
  1260. dep->needs.op_num = op[needs->file][needs->op_num].group_start;
  1261. }
  1262. /* There is also this case:
  1263. * <traverse> <read foo> ...
  1264. * <transaction> ... </transaction> <create foo>
  1265. * Where if we start the traverse then wait, we could block
  1266. * the transaction and deadlock.
  1267. *
  1268. * We try to address this by ensuring that where seqnum indicates it's
  1269. * possible, we wait for <create foo> before *starting* traverse.
  1270. */
  1271. else if (in_traverse(op[needs->file], needs->op_num)) {
  1272. struct op *need = &op[needs->file][needs->op_num];
  1273. if (op[needs->file][need->group_start].seqnum >
  1274. op[prereq->file][prereq->op_num].seqnum) {
  1275. dep->needs.op_num = need->group_start;
  1276. }
  1277. }
  1278. #endif
  1279. /* If you depend on a transaction or chainlock, you actually
  1280. * depend on it ending. */
  1281. if (starts_transaction(&op[prereq->file][dep->prereq.op_num])
  1282. || starts_chainlock(&op[prereq->file][dep->prereq.op_num])) {
  1283. dep->prereq.op_num
  1284. += op[dep->prereq.file][dep->prereq.op_num].group_len;
  1285. #if DEBUG_DEPS
  1286. printf("-> Actually end of transaction %s:%u\n",
  1287. filename[dep->prereq->file], dep->prereq->op_num+1);
  1288. #endif
  1289. } else
  1290. /* We should never create a dependency from middle of
  1291. * a transaction. */
  1292. assert(!in_transaction(op[prereq->file], dep->prereq.op_num)
  1293. || op[prereq->file][dep->prereq.op_num].type
  1294. == OP_TDB_TRANSACTION_COMMIT
  1295. || op[prereq->file][dep->prereq.op_num].type
  1296. == OP_TDB_TRANSACTION_CANCEL);
  1297. list_add(&op[dep->prereq.file][dep->prereq.op_num].post,
  1298. &dep->post_list);
  1299. list_add(&op[dep->needs.file][dep->needs.op_num].pre,
  1300. &dep->pre_list);
  1301. talloc_set_destructor(dep, destroy_depend);
  1302. }
  1303. static bool changes_db(const TDB_DATA *key, const struct op *op)
  1304. {
  1305. return gives(key, NULL, op) != NULL;
  1306. }
  1307. static void depend_on_previous(struct op *op[],
  1308. char *filename[],
  1309. unsigned int num,
  1310. struct op_desc user[],
  1311. unsigned int i,
  1312. int prev)
  1313. {
  1314. bool deps[num];
  1315. int j;
  1316. if (i == 0)
  1317. return;
  1318. if (prev == i - 1) {
  1319. /* Just depend on previous. */
  1320. add_dependency(NULL, op, filename, &user[i], &user[prev]);
  1321. return;
  1322. }
  1323. /* We have to wait for the readers. Find last one in *each* file. */
  1324. memset(deps, 0, sizeof(deps));
  1325. deps[user[i].file] = true;
  1326. for (j = i - 1; j > prev; j--) {
  1327. if (!deps[user[j].file]) {
  1328. add_dependency(NULL, op, filename, &user[i], &user[j]);
  1329. deps[user[j].file] = true;
  1330. }
  1331. }
  1332. }
  1333. /* This is simple, but not complete. We don't take into account
  1334. * indirect dependencies. */
  1335. static void optimize_dependencies(struct op *op[], unsigned int num_ops[],
  1336. unsigned int num)
  1337. {
  1338. unsigned int i, j;
  1339. /* There can only be one real dependency on each file */
  1340. for (i = 0; i < num; i++) {
  1341. for (j = 1; j < num_ops[i]; j++) {
  1342. struct depend *dep, *next;
  1343. struct depend *prev[num];
  1344. memset(prev, 0, sizeof(prev));
  1345. list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
  1346. if (!prev[dep->prereq.file]) {
  1347. prev[dep->prereq.file] = dep;
  1348. continue;
  1349. }
  1350. if (prev[dep->prereq.file]->prereq.op_num
  1351. < dep->prereq.op_num) {
  1352. talloc_free(prev[dep->prereq.file]);
  1353. prev[dep->prereq.file] = dep;
  1354. } else
  1355. talloc_free(dep);
  1356. }
  1357. }
  1358. }
  1359. for (i = 0; i < num; i++) {
  1360. int deps[num];
  1361. for (j = 0; j < num; j++)
  1362. deps[j] = -1;
  1363. for (j = 1; j < num_ops[i]; j++) {
  1364. struct depend *dep, *next;
  1365. list_for_each_safe(&op[i][j].pre, dep, next, pre_list) {
  1366. if (deps[dep->prereq.file]
  1367. >= (int)dep->prereq.op_num)
  1368. talloc_free(dep);
  1369. else
  1370. deps[dep->prereq.file]
  1371. = dep->prereq.op_num;
  1372. }
  1373. }
  1374. }
  1375. }
  1376. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  1377. /* Force an order among the traversals, so they don't deadlock (as much) */
  1378. static void make_traverse_depends(char *filename[],
  1379. struct op *op[], unsigned int num_ops[],
  1380. unsigned int num)
  1381. {
  1382. unsigned int i, num_traversals = 0;
  1383. int j;
  1384. struct op_desc *desc;
  1385. /* Sort by which one runs first. */
  1386. int compare_traverse_desc(const void *_a, const void *_b)
  1387. {
  1388. const struct op_desc *da = _a, *db = _b;
  1389. const struct op *a = &op[da->file][da->op_num],
  1390. *b = &op[db->file][db->op_num];
  1391. if (a->seqnum != b->seqnum)
  1392. return a->seqnum - b->seqnum;
  1393. /* If they have same seqnum, it means one didn't make any
  1394. * changes. Thus sort by end in that case. */
  1395. return a[a->group_len].seqnum - b[b->group_len].seqnum;
  1396. }
  1397. desc = talloc_array(NULL, struct op_desc, 1);
  1398. /* Count them. */
  1399. for (i = 0; i < num; i++) {
  1400. for (j = 1; j < num_ops[i]; j++) {
  1401. /* Traverse start (ignore those in
  1402. * transactions; they're already covered by
  1403. * transaction dependencies). */
  1404. if (starts_traverse(&op[i][j])
  1405. && !in_transaction(op[i], j)) {
  1406. desc = talloc_realloc(NULL, desc,
  1407. struct op_desc,
  1408. num_traversals+1);
  1409. desc[num_traversals].file = i;
  1410. desc[num_traversals].op_num = j;
  1411. num_traversals++;
  1412. }
  1413. }
  1414. }
  1415. qsort(desc, num_traversals, sizeof(desc[0]), compare_traverse_desc);
  1416. for (i = 1; i < num_traversals; i++) {
  1417. const struct op *prev = &op[desc[i-1].file][desc[i-1].op_num];
  1418. const struct op *curr = &op[desc[i].file][desc[i].op_num];
  1419. /* Read traverses don't depend on each other (read lock). */
  1420. if (prev->type == OP_TDB_TRAVERSE_READ_START
  1421. && curr->type == OP_TDB_TRAVERSE_READ_START)
  1422. continue;
  1423. /* Only make dependency if it's clear. */
  1424. if (compare_traverse_desc(&desc[i], &desc[i-1])) {
  1425. /* i depends on end of traverse i-1. */
  1426. struct op_desc end = desc[i-1];
  1427. end.op_num += prev->group_len;
  1428. add_dependency(NULL, op, filename, &desc[i], &end);
  1429. }
  1430. }
  1431. talloc_free(desc);
  1432. }
  1433. static void set_nonblock(int fd)
  1434. {
  1435. if (fcntl(fd, F_SETFL, fcntl(fd, F_GETFL)|O_NONBLOCK) != 0)
  1436. err(1, "Setting pipe nonblocking");
  1437. }
  1438. static bool handle_backoff(struct op *op[], int fd)
  1439. {
  1440. struct op_desc desc;
  1441. bool handled = false;
  1442. /* Sloppy coding: we assume PIPEBUF never fills. */
  1443. while (read(fd, &desc, sizeof(desc)) != -1) {
  1444. unsigned int i;
  1445. handled = true;
  1446. for (i = desc.op_num; i > 0; i--) {
  1447. if (op[desc.file][i].type == OP_TDB_TRAVERSE) {
  1448. /* We insert a fake end here. */
  1449. op[desc.file][i].type
  1450. = OP_TDB_TRAVERSE_END_EARLY;
  1451. break;
  1452. } else if (starts_traverse(&op[desc.file][i])) {
  1453. unsigned int start = i;
  1454. struct op tmp = op[desc.file][i];
  1455. /* Move the ops outside traverse. */
  1456. memmove(&op[desc.file][i],
  1457. &op[desc.file][i+1],
  1458. (desc.op_num-i-1) * sizeof(op[0][0]));
  1459. op[desc.file][desc.op_num] = tmp;
  1460. while (op[desc.file][i].group_start == start) {
  1461. op[desc.file][i++].group_start
  1462. = desc.op_num;
  1463. }
  1464. break;
  1465. }
  1466. }
  1467. }
  1468. return handled;
  1469. }
  1470. #else /* !TRAVERSALS_TAKE_TRANSACTION_LOCK */
  1471. static bool handle_backoff(struct op *op[], int fd)
  1472. {
  1473. return false;
  1474. }
  1475. #endif
  1476. static void derive_dependencies(struct tdb_context *tdb,
  1477. char *filename[],
  1478. struct op *op[], unsigned int num_ops[],
  1479. unsigned int num)
  1480. {
  1481. struct keyinfo *hash;
  1482. unsigned int h, i;
  1483. /* Create hash table for faster key lookup. */
  1484. hash = hash_ops(op, num_ops, num);
  1485. /* Sort them by sequence number. */
  1486. sort_ops(tdb, hash, filename, op, num);
  1487. /* Create dependencies back to the last change, rather than
  1488. * creating false dependencies by naively making each one
  1489. * depend on the previous. This has two purposes: it makes
  1490. * later optimization simpler, and it also avoids deadlock with
  1491. * same sequence number ops inside traversals (if one
  1492. * traversal doesn't write anything, two ops can have the same
  1493. * sequence number yet we can create a traversal dependency
  1494. * the other way). */
  1495. for (h = 0; h < total_keys * 2; h++) {
  1496. int prev = -1;
  1497. if (hash[h].num_users < 2)
  1498. continue;
  1499. for (i = 0; i < hash[h].num_users; i++) {
  1500. if (changes_db(&hash[h].key, &op[hash[h].user[i].file]
  1501. [hash[h].user[i].op_num])) {
  1502. depend_on_previous(op, filename, num,
  1503. hash[h].user, i, prev);
  1504. prev = i;
  1505. } else if (prev >= 0)
  1506. add_dependency(hash, op, filename,
  1507. &hash[h].user[i],
  1508. &hash[h].user[prev]);
  1509. }
  1510. }
  1511. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  1512. make_traverse_depends(filename, op, num_ops, num);
  1513. #endif
  1514. optimize_dependencies(op, num_ops, num);
  1515. }
  1516. static struct timeval run_test(char *argv[],
  1517. unsigned int num_ops[],
  1518. unsigned int hashsize[],
  1519. unsigned int tdb_flags[],
  1520. unsigned int open_flags[],
  1521. struct op *op[],
  1522. int fds[2])
  1523. {
  1524. unsigned int i;
  1525. struct timeval start, end, diff;
  1526. bool ok = true;
  1527. for (i = 0; argv[i+2]; i++) {
  1528. struct tdb_context *tdb;
  1529. char c;
  1530. switch (fork()) {
  1531. case -1:
  1532. err(1, "fork failed");
  1533. case 0:
  1534. close(fds[1]);
  1535. tdb = tdb_open(argv[1], hashsize[i],
  1536. tdb_flags[i], open_flags[i], 0600);
  1537. if (!tdb)
  1538. err(1, "Opening tdb %s", argv[1]);
  1539. /* This catches parent exiting. */
  1540. if (read(fds[0], &c, 1) != 1)
  1541. exit(1);
  1542. run_ops(tdb, pipes[i].fd[0], argv+2, op, i, 1,
  1543. num_ops[i], false);
  1544. check_deps(argv[2+i], op[i], num_ops[i]);
  1545. exit(0);
  1546. default:
  1547. break;
  1548. }
  1549. }
  1550. /* Let everything settle. */
  1551. sleep(1);
  1552. if (!quiet)
  1553. printf("Starting run...");
  1554. fflush(stdout);
  1555. gettimeofday(&start, NULL);
  1556. /* Tell them all to go! Any write of sufficient length will do. */
  1557. if (write(fds[1], hashsize, i) != i)
  1558. err(1, "Writing to wakeup pipe");
  1559. for (i = 0; argv[i + 2]; i++) {
  1560. int status;
  1561. wait(&status);
  1562. if (!WIFEXITED(status)) {
  1563. warnx("Child died with signal %i", WTERMSIG(status));
  1564. ok = false;
  1565. } else if (WEXITSTATUS(status) != 0)
  1566. /* Assume child spat out error. */
  1567. ok = false;
  1568. }
  1569. if (!ok)
  1570. exit(1);
  1571. gettimeofday(&end, NULL);
  1572. if (!quiet)
  1573. printf("done\n");
  1574. if (end.tv_usec < start.tv_usec) {
  1575. end.tv_usec += 1000000;
  1576. end.tv_sec--;
  1577. }
  1578. diff.tv_sec = end.tv_sec - start.tv_sec;
  1579. diff.tv_usec = end.tv_usec - start.tv_usec;
  1580. return diff;
  1581. }
  1582. static void init_tdb(struct tdb_context *master_tdb,
  1583. const char *name, unsigned int hashsize)
  1584. {
  1585. TDB_DATA key, data;
  1586. struct tdb_context *tdb;
  1587. tdb = tdb_open(name, hashsize, TDB_CLEAR_IF_FIRST|TDB_NOSYNC,
  1588. O_CREAT|O_TRUNC|O_RDWR, 0600);
  1589. if (!tdb)
  1590. errx(1, "opening tdb %s", name);
  1591. for (key = tdb_firstkey(master_tdb);
  1592. key.dptr;
  1593. key = tdb_nextkey(master_tdb, key)) {
  1594. data = tdb_fetch(master_tdb, key);
  1595. if (tdb_store(tdb, key, data, TDB_INSERT) != 0)
  1596. errx(1, "Failed to store initial key");
  1597. }
  1598. tdb_close(tdb);
  1599. }
  1600. int main(int argc, char *argv[])
  1601. {
  1602. struct timeval diff;
  1603. unsigned int i, num_ops[argc], hashsize[argc], tdb_flags[argc], open_flags[argc];
  1604. struct op *op[argc];
  1605. int fds[2];
  1606. struct tdb_context *master;
  1607. unsigned int runs = 1;
  1608. if (argc < 3)
  1609. errx(1, "Usage: %s [--quiet] [-n <number>] <tdbfile> <tracefile>...", argv[0]);
  1610. if (streq(argv[1], "--quiet")) {
  1611. quiet = true;
  1612. argv++;
  1613. argc--;
  1614. }
  1615. if (streq(argv[1], "-n")) {
  1616. runs = atoi(argv[2]);
  1617. argv += 2;
  1618. argc -= 2;
  1619. }
  1620. pipes = talloc_array(NULL, struct pipe, argc - 1);
  1621. for (i = 0; i < argc - 2; i++) {
  1622. if (!quiet)
  1623. printf("Loading tracefile %s...", argv[2+i]);
  1624. fflush(stdout);
  1625. op[i] = load_tracefile(argv+2, i, &num_ops[i], &hashsize[i],
  1626. &tdb_flags[i], &open_flags[i]);
  1627. if (pipe(pipes[i].fd) != 0)
  1628. err(1, "creating pipe");
  1629. /* Don't truncate, or clear if first: we do that. */
  1630. open_flags[i] &= ~(O_TRUNC);
  1631. tdb_flags[i] &= ~(TDB_CLEAR_IF_FIRST);
  1632. /* Open NOSYNC, to save time. */
  1633. tdb_flags[i] |= TDB_NOSYNC;
  1634. if (!quiet)
  1635. printf("done\n");
  1636. }
  1637. /* Dependency may figure we need to create seed records. */
  1638. master = tdb_open(NULL, 0, TDB_INTERNAL, O_RDWR, 0);
  1639. if (!quiet) {
  1640. printf("Calculating inter-dependencies...");
  1641. fflush(stdout);
  1642. }
  1643. derive_dependencies(master, argv+2, op, num_ops, i);
  1644. if (!quiet)
  1645. printf("done\n");
  1646. for (i = 0; i < runs; i++) {
  1647. init_tdb(master, argv[1], hashsize[0]);
  1648. /* Don't fork for single arg case: simple debugging. */
  1649. if (argc == 3) {
  1650. struct timeval start, end;
  1651. struct tdb_context *tdb;
  1652. tdb = tdb_open(argv[1], hashsize[0], tdb_flags[0],
  1653. open_flags[0], 0600);
  1654. if (!quiet) {
  1655. printf("Single threaded run...");
  1656. fflush(stdout);
  1657. }
  1658. gettimeofday(&start, NULL);
  1659. run_ops(tdb, pipes[0].fd[0], argv+2, op, 0, 1,
  1660. num_ops[0], false);
  1661. gettimeofday(&end, NULL);
  1662. if (!quiet)
  1663. printf("done\n");
  1664. tdb_close(tdb);
  1665. check_deps(argv[2], op[0], num_ops[0]);
  1666. if (end.tv_usec < start.tv_usec) {
  1667. end.tv_usec += 1000000;
  1668. end.tv_sec--;
  1669. }
  1670. diff.tv_sec = end.tv_sec - start.tv_sec;
  1671. diff.tv_usec = end.tv_usec - start.tv_usec;
  1672. goto print_time;
  1673. }
  1674. if (pipe(fds) != 0)
  1675. err(1, "creating pipe");
  1676. #if TRAVERSALS_TAKE_TRANSACTION_LOCK
  1677. if (pipe(pipes[argc-2].fd) != 0)
  1678. err(1, "creating pipe");
  1679. backoff_fd = pipes[argc-2].fd[1];
  1680. set_nonblock(pipes[argc-2].fd[1]);
  1681. set_nonblock(pipes[argc-2].fd[0]);
  1682. #endif
  1683. do {
  1684. diff = run_test(argv, num_ops, hashsize, tdb_flags,
  1685. open_flags, op, fds);
  1686. } while (handle_backoff(op, pipes[argc-2].fd[0]));
  1687. print_time:
  1688. if (!quiet)
  1689. printf("Time replaying: ");
  1690. printf("%lu usec\n", diff.tv_sec * 1000000UL + diff.tv_usec);
  1691. }
  1692. exit(0);
  1693. }