check.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816
  1. /*
  2. Trivial Database 2: free list/block handling
  3. Copyright (C) Rusty Russell 2010
  4. This library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 3 of the License, or (at your option) any later version.
  8. This library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with this library; if not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include "private.h"
  16. #include <ccan/likely/likely.h>
  17. #include <ccan/asearch/asearch.h>
  18. /* We keep an ordered array of offsets. */
  19. static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
  20. {
  21. tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
  22. if (!new)
  23. return false;
  24. new[(*num)++] = off;
  25. *arr = new;
  26. return true;
  27. }
  28. static enum TDB_ERROR check_header(struct tdb_context *tdb, tdb_off_t *recovery,
  29. uint64_t *features)
  30. {
  31. uint64_t hash_test;
  32. struct tdb_header hdr;
  33. enum TDB_ERROR ecode;
  34. ecode = tdb_read_convert(tdb, 0, &hdr, sizeof(hdr));
  35. if (ecode != TDB_SUCCESS) {
  36. return ecode;
  37. }
  38. /* magic food should not be converted, so convert back. */
  39. tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
  40. hash_test = TDB_HASH_MAGIC;
  41. hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
  42. if (hdr.hash_test != hash_test) {
  43. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  44. "check: hash test %llu should be %llu",
  45. (long long)hdr.hash_test,
  46. (long long)hash_test);
  47. }
  48. if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
  49. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  50. "check: bad magic '%.*s'",
  51. (unsigned)sizeof(hdr.magic_food),
  52. hdr.magic_food);
  53. }
  54. /* Features which are used must be a subset of features offered. */
  55. if (hdr.features_used & ~hdr.features_offered) {
  56. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  57. "check: features used (0x%llx) which"
  58. " are not offered (0x%llx)",
  59. (long long)hdr.features_used,
  60. (long long)hdr.features_offered);
  61. }
  62. *features = hdr.features_offered;
  63. *recovery = hdr.recovery;
  64. if (*recovery) {
  65. if (*recovery < sizeof(hdr)
  66. || *recovery > tdb->file->map_size) {
  67. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  68. "tdb_check:"
  69. " invalid recovery offset %zu",
  70. (size_t)*recovery);
  71. }
  72. }
  73. /* Don't check reserved: they *can* be used later. */
  74. return TDB_SUCCESS;
  75. }
  76. static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
  77. tdb_off_t off, unsigned int group_bits,
  78. uint64_t hprefix,
  79. unsigned hprefix_bits,
  80. tdb_off_t used[],
  81. size_t num_used,
  82. size_t *num_found,
  83. enum TDB_ERROR (*check)(TDB_DATA,
  84. TDB_DATA, void *),
  85. void *private_data);
  86. static enum TDB_ERROR check_hash_chain(struct tdb_context *tdb,
  87. tdb_off_t off,
  88. uint64_t hash,
  89. tdb_off_t used[],
  90. size_t num_used,
  91. size_t *num_found,
  92. enum TDB_ERROR (*check)(TDB_DATA,
  93. TDB_DATA,
  94. void *),
  95. void *private_data)
  96. {
  97. struct tdb_used_record rec;
  98. enum TDB_ERROR ecode;
  99. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
  100. if (ecode != TDB_SUCCESS) {
  101. return ecode;
  102. }
  103. if (rec_magic(&rec) != TDB_CHAIN_MAGIC) {
  104. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  105. "tdb_check: Bad hash chain magic %llu",
  106. (long long)rec_magic(&rec));
  107. }
  108. if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
  109. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  110. "tdb_check:"
  111. " Bad hash chain length %llu vs %zu",
  112. (long long)rec_data_length(&rec),
  113. sizeof(struct tdb_chain));
  114. }
  115. if (rec_key_length(&rec) != 0) {
  116. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  117. "tdb_check: Bad hash chain key length %llu",
  118. (long long)rec_key_length(&rec));
  119. }
  120. if (rec_hash(&rec) != 0) {
  121. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  122. "tdb_check: Bad hash chain hash value %llu",
  123. (long long)rec_hash(&rec));
  124. }
  125. off += sizeof(rec);
  126. ecode = check_hash_tree(tdb, off, 0, hash, 64,
  127. used, num_used, num_found, check, private_data);
  128. if (ecode != TDB_SUCCESS) {
  129. return ecode;
  130. }
  131. off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
  132. if (TDB_OFF_IS_ERR(off)) {
  133. return off;
  134. }
  135. if (off == 0)
  136. return TDB_SUCCESS;
  137. (*num_found)++;
  138. return check_hash_chain(tdb, off, hash, used, num_used, num_found,
  139. check, private_data);
  140. }
  141. static enum TDB_ERROR check_hash_record(struct tdb_context *tdb,
  142. tdb_off_t off,
  143. uint64_t hprefix,
  144. unsigned hprefix_bits,
  145. tdb_off_t used[],
  146. size_t num_used,
  147. size_t *num_found,
  148. enum TDB_ERROR (*check)(TDB_DATA,
  149. TDB_DATA,
  150. void *),
  151. void *private_data)
  152. {
  153. struct tdb_used_record rec;
  154. enum TDB_ERROR ecode;
  155. if (hprefix_bits >= 64)
  156. return check_hash_chain(tdb, off, hprefix, used, num_used,
  157. num_found, check, private_data);
  158. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
  159. if (ecode != TDB_SUCCESS) {
  160. return ecode;
  161. }
  162. if (rec_magic(&rec) != TDB_HTABLE_MAGIC) {
  163. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  164. "tdb_check: Bad hash table magic %llu",
  165. (long long)rec_magic(&rec));
  166. }
  167. if (rec_data_length(&rec)
  168. != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
  169. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  170. "tdb_check:"
  171. " Bad hash table length %llu vs %llu",
  172. (long long)rec_data_length(&rec),
  173. (long long)sizeof(tdb_off_t)
  174. << TDB_SUBLEVEL_HASH_BITS);
  175. }
  176. if (rec_key_length(&rec) != 0) {
  177. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  178. "tdb_check: Bad hash table key length %llu",
  179. (long long)rec_key_length(&rec));
  180. }
  181. if (rec_hash(&rec) != 0) {
  182. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  183. "tdb_check: Bad hash table hash value %llu",
  184. (long long)rec_hash(&rec));
  185. }
  186. off += sizeof(rec);
  187. return check_hash_tree(tdb, off,
  188. TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
  189. hprefix, hprefix_bits,
  190. used, num_used, num_found, check, private_data);
  191. }
  192. static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
  193. {
  194. /* Can overflow an int. */
  195. return *a > *b ? 1
  196. : *a < *b ? -1
  197. : 0;
  198. }
  199. static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
  200. {
  201. *used += num;
  202. return (h >> (64 - *used)) & ((1U << num) - 1);
  203. }
  204. static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
  205. tdb_off_t off, unsigned int group_bits,
  206. uint64_t hprefix,
  207. unsigned hprefix_bits,
  208. tdb_off_t used[],
  209. size_t num_used,
  210. size_t *num_found,
  211. enum TDB_ERROR (*check)(TDB_DATA,
  212. TDB_DATA, void *),
  213. void *private_data)
  214. {
  215. unsigned int g, b;
  216. const tdb_off_t *hash;
  217. struct tdb_used_record rec;
  218. enum TDB_ERROR ecode;
  219. hash = tdb_access_read(tdb, off,
  220. sizeof(tdb_off_t)
  221. << (group_bits + TDB_HASH_GROUP_BITS),
  222. true);
  223. if (TDB_PTR_IS_ERR(hash)) {
  224. return TDB_PTR_ERR(hash);
  225. }
  226. for (g = 0; g < (1 << group_bits); g++) {
  227. const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
  228. for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
  229. unsigned int bucket, i, used_bits;
  230. uint64_t h;
  231. tdb_off_t *p;
  232. if (group[b] == 0)
  233. continue;
  234. off = group[b] & TDB_OFF_MASK;
  235. p = asearch(&off, used, num_used, off_cmp);
  236. if (!p) {
  237. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  238. TDB_LOG_ERROR,
  239. "tdb_check: Invalid offset"
  240. " %llu in hash",
  241. (long long)off);
  242. goto fail;
  243. }
  244. /* Mark it invalid. */
  245. *p ^= 1;
  246. (*num_found)++;
  247. if (hprefix_bits == 64) {
  248. /* Chained entries are unordered. */
  249. if (is_subhash(group[b])) {
  250. ecode = TDB_ERR_CORRUPT;
  251. tdb_logerr(tdb, ecode,
  252. TDB_LOG_ERROR,
  253. "tdb_check: Invalid chain"
  254. " entry subhash");
  255. goto fail;
  256. }
  257. h = hash_record(tdb, off);
  258. if (h != hprefix) {
  259. ecode = TDB_ERR_CORRUPT;
  260. tdb_logerr(tdb, ecode,
  261. TDB_LOG_ERROR,
  262. "check: bad hash chain"
  263. " placement"
  264. " 0x%llx vs 0x%llx",
  265. (long long)h,
  266. (long long)hprefix);
  267. goto fail;
  268. }
  269. ecode = tdb_read_convert(tdb, off, &rec,
  270. sizeof(rec));
  271. if (ecode != TDB_SUCCESS) {
  272. goto fail;
  273. }
  274. goto check;
  275. }
  276. if (is_subhash(group[b])) {
  277. uint64_t subprefix;
  278. subprefix = (hprefix
  279. << (group_bits + TDB_HASH_GROUP_BITS))
  280. + g * (1 << TDB_HASH_GROUP_BITS) + b;
  281. ecode = check_hash_record(tdb,
  282. group[b] & TDB_OFF_MASK,
  283. subprefix,
  284. hprefix_bits
  285. + group_bits
  286. + TDB_HASH_GROUP_BITS,
  287. used, num_used, num_found,
  288. check, private_data);
  289. if (ecode != TDB_SUCCESS) {
  290. goto fail;
  291. }
  292. continue;
  293. }
  294. /* A normal entry */
  295. /* Does it belong here at all? */
  296. h = hash_record(tdb, off);
  297. used_bits = 0;
  298. if (get_bits(h, hprefix_bits, &used_bits) != hprefix
  299. && hprefix_bits) {
  300. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  301. TDB_LOG_ERROR,
  302. "check: bad hash placement"
  303. " 0x%llx vs 0x%llx",
  304. (long long)h,
  305. (long long)hprefix);
  306. goto fail;
  307. }
  308. /* Does it belong in this group? */
  309. if (get_bits(h, group_bits, &used_bits) != g) {
  310. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  311. TDB_LOG_ERROR,
  312. "check: bad group %llu"
  313. " vs %u",
  314. (long long)h, g);
  315. goto fail;
  316. }
  317. /* Are bucket bits correct? */
  318. bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
  319. if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
  320. != bucket) {
  321. used_bits -= TDB_HASH_GROUP_BITS;
  322. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  323. TDB_LOG_ERROR,
  324. "check: bad bucket %u vs %u",
  325. (unsigned)get_bits(h,
  326. TDB_HASH_GROUP_BITS,
  327. &used_bits),
  328. bucket);
  329. goto fail;
  330. }
  331. /* There must not be any zero entries between
  332. * the bucket it belongs in and this one! */
  333. for (i = bucket;
  334. i != b;
  335. i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
  336. if (group[i] == 0) {
  337. ecode = TDB_ERR_CORRUPT;
  338. tdb_logerr(tdb, ecode,
  339. TDB_LOG_ERROR,
  340. "check: bad group placement"
  341. " %u vs %u",
  342. b, bucket);
  343. goto fail;
  344. }
  345. }
  346. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
  347. if (ecode != TDB_SUCCESS) {
  348. goto fail;
  349. }
  350. /* Bottom bits must match header. */
  351. if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
  352. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  353. TDB_LOG_ERROR,
  354. "tdb_check: Bad hash magic"
  355. " at offset %llu"
  356. " (0x%llx vs 0x%llx)",
  357. (long long)off,
  358. (long long)h,
  359. (long long)rec_hash(&rec));
  360. goto fail;
  361. }
  362. check:
  363. if (check) {
  364. TDB_DATA key, data;
  365. key.dsize = rec_key_length(&rec);
  366. data.dsize = rec_data_length(&rec);
  367. key.dptr = (void *)tdb_access_read(tdb,
  368. off + sizeof(rec),
  369. key.dsize + data.dsize,
  370. false);
  371. if (TDB_PTR_IS_ERR(key.dptr)) {
  372. ecode = TDB_PTR_ERR(key.dptr);
  373. goto fail;
  374. }
  375. data.dptr = key.dptr + key.dsize;
  376. ecode = check(key, data, private_data);
  377. if (ecode != TDB_SUCCESS) {
  378. goto fail;
  379. }
  380. tdb_access_release(tdb, key.dptr);
  381. }
  382. }
  383. }
  384. tdb_access_release(tdb, hash);
  385. return TDB_SUCCESS;
  386. fail:
  387. tdb_access_release(tdb, hash);
  388. return ecode;
  389. }
  390. static enum TDB_ERROR check_hash(struct tdb_context *tdb,
  391. tdb_off_t used[],
  392. size_t num_used, size_t num_ftables,
  393. int (*check)(TDB_DATA, TDB_DATA, void *),
  394. void *private_data)
  395. {
  396. /* Free tables also show up as used. */
  397. size_t num_found = num_ftables;
  398. enum TDB_ERROR ecode;
  399. ecode = check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
  400. TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
  401. 0, 0, used, num_used, &num_found,
  402. check, private_data);
  403. if (ecode == TDB_SUCCESS) {
  404. if (num_found != num_used) {
  405. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  406. "tdb_check: Not all entries"
  407. " are in hash");
  408. }
  409. }
  410. return ecode;
  411. }
  412. static enum TDB_ERROR check_free(struct tdb_context *tdb,
  413. tdb_off_t off,
  414. const struct tdb_free_record *frec,
  415. tdb_off_t prev, unsigned int ftable,
  416. unsigned int bucket)
  417. {
  418. enum TDB_ERROR ecode;
  419. if (frec_magic(frec) != TDB_FREE_MAGIC) {
  420. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  421. "tdb_check: offset %llu bad magic 0x%llx",
  422. (long long)off,
  423. (long long)frec->magic_and_prev);
  424. }
  425. if (frec_ftable(frec) != ftable) {
  426. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  427. "tdb_check: offset %llu bad freetable %u",
  428. (long long)off, frec_ftable(frec));
  429. }
  430. ecode = tdb->methods->oob(tdb, off
  431. + frec_len(frec)
  432. + sizeof(struct tdb_used_record),
  433. false);
  434. if (ecode != TDB_SUCCESS) {
  435. return ecode;
  436. }
  437. if (size_to_bucket(frec_len(frec)) != bucket) {
  438. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  439. "tdb_check: offset %llu in wrong bucket"
  440. " (%u vs %u)",
  441. (long long)off,
  442. bucket, size_to_bucket(frec_len(frec)));
  443. }
  444. if (prev != frec_prev(frec)) {
  445. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  446. "tdb_check: offset %llu bad prev"
  447. " (%llu vs %llu)",
  448. (long long)off,
  449. (long long)prev, (long long)frec_len(frec));
  450. }
  451. return TDB_SUCCESS;
  452. }
  453. static enum TDB_ERROR check_free_table(struct tdb_context *tdb,
  454. tdb_off_t ftable_off,
  455. unsigned ftable_num,
  456. tdb_off_t fr[],
  457. size_t num_free,
  458. size_t *num_found)
  459. {
  460. struct tdb_freetable ft;
  461. tdb_off_t h;
  462. unsigned int i;
  463. enum TDB_ERROR ecode;
  464. ecode = tdb_read_convert(tdb, ftable_off, &ft, sizeof(ft));
  465. if (ecode != TDB_SUCCESS) {
  466. return ecode;
  467. }
  468. if (rec_magic(&ft.hdr) != TDB_FTABLE_MAGIC
  469. || rec_key_length(&ft.hdr) != 0
  470. || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
  471. || rec_hash(&ft.hdr) != 0) {
  472. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  473. "tdb_check: Invalid header on free table");
  474. }
  475. for (i = 0; i < TDB_FREE_BUCKETS; i++) {
  476. tdb_off_t off, prev = 0, *p;
  477. struct tdb_free_record f;
  478. h = bucket_off(ftable_off, i);
  479. for (off = tdb_read_off(tdb, h); off; off = f.next) {
  480. if (TDB_OFF_IS_ERR(off)) {
  481. return off;
  482. }
  483. ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
  484. if (ecode != TDB_SUCCESS) {
  485. return ecode;
  486. }
  487. ecode = check_free(tdb, off, &f, prev, ftable_num, i);
  488. if (ecode != TDB_SUCCESS) {
  489. return ecode;
  490. }
  491. /* FIXME: Check hash bits */
  492. p = asearch(&off, fr, num_free, off_cmp);
  493. if (!p) {
  494. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  495. TDB_LOG_ERROR,
  496. "tdb_check: Invalid offset"
  497. " %llu in free table",
  498. (long long)off);
  499. }
  500. /* Mark it invalid. */
  501. *p ^= 1;
  502. (*num_found)++;
  503. prev = off;
  504. }
  505. }
  506. return TDB_SUCCESS;
  507. }
  508. /* Slow, but should be very rare. */
  509. tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off)
  510. {
  511. size_t len;
  512. enum TDB_ERROR ecode;
  513. for (len = 0; off + len < tdb->file->map_size; len++) {
  514. char c;
  515. ecode = tdb->methods->tread(tdb, off, &c, 1);
  516. if (ecode != TDB_SUCCESS) {
  517. return ecode;
  518. }
  519. if (c != 0 && c != 0x43)
  520. break;
  521. }
  522. return len;
  523. }
  524. static enum TDB_ERROR check_linear(struct tdb_context *tdb,
  525. tdb_off_t **used, size_t *num_used,
  526. tdb_off_t **fr, size_t *num_free,
  527. uint64_t features, tdb_off_t recovery)
  528. {
  529. tdb_off_t off;
  530. tdb_len_t len;
  531. enum TDB_ERROR ecode;
  532. bool found_recovery = false;
  533. for (off = sizeof(struct tdb_header);
  534. off < tdb->file->map_size;
  535. off += len) {
  536. union {
  537. struct tdb_used_record u;
  538. struct tdb_free_record f;
  539. struct tdb_recovery_record r;
  540. } rec;
  541. /* r is larger: only get that if we need to. */
  542. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.f));
  543. if (ecode != TDB_SUCCESS) {
  544. return ecode;
  545. }
  546. /* If we crash after ftruncate, we can get zeroes or fill. */
  547. if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC
  548. || rec.r.magic == 0x4343434343434343ULL) {
  549. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
  550. if (ecode != TDB_SUCCESS) {
  551. return ecode;
  552. }
  553. if (recovery == off) {
  554. found_recovery = true;
  555. len = sizeof(rec.r) + rec.r.max_len;
  556. } else {
  557. len = dead_space(tdb, off);
  558. if (TDB_OFF_IS_ERR(len)) {
  559. return len;
  560. }
  561. if (len < sizeof(rec.r)) {
  562. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  563. TDB_LOG_ERROR,
  564. "tdb_check: invalid"
  565. " dead space at %zu",
  566. (size_t)off);
  567. }
  568. tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
  569. "Dead space at %zu-%zu (of %zu)",
  570. (size_t)off, (size_t)(off + len),
  571. (size_t)tdb->file->map_size);
  572. }
  573. } else if (rec.r.magic == TDB_RECOVERY_MAGIC) {
  574. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
  575. if (ecode != TDB_SUCCESS) {
  576. return ecode;
  577. }
  578. if (recovery != off) {
  579. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  580. TDB_LOG_ERROR,
  581. "tdb_check: unexpected"
  582. " recovery record at offset"
  583. " %zu",
  584. (size_t)off);
  585. }
  586. if (rec.r.len > rec.r.max_len) {
  587. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  588. TDB_LOG_ERROR,
  589. "tdb_check: invalid recovery"
  590. " length %zu",
  591. (size_t)rec.r.len);
  592. }
  593. if (rec.r.eof > tdb->file->map_size) {
  594. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  595. TDB_LOG_ERROR,
  596. "tdb_check: invalid old EOF"
  597. " %zu", (size_t)rec.r.eof);
  598. }
  599. found_recovery = true;
  600. len = sizeof(rec.r) + rec.r.max_len;
  601. } else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) {
  602. len = sizeof(rec.u) + frec_len(&rec.f);
  603. if (off + len > tdb->file->map_size) {
  604. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  605. TDB_LOG_ERROR,
  606. "tdb_check: free overlength"
  607. " %llu at offset %llu",
  608. (long long)len,
  609. (long long)off);
  610. }
  611. /* This record should be in free lists. */
  612. if (frec_ftable(&rec.f) != TDB_FTABLE_NONE
  613. && !append(fr, num_free, off)) {
  614. return tdb_logerr(tdb, TDB_ERR_OOM,
  615. TDB_LOG_ERROR,
  616. "tdb_check: tracking %zu'th"
  617. " free record.", *num_free);
  618. }
  619. } else if (rec_magic(&rec.u) == TDB_USED_MAGIC
  620. || rec_magic(&rec.u) == TDB_CHAIN_MAGIC
  621. || rec_magic(&rec.u) == TDB_HTABLE_MAGIC
  622. || rec_magic(&rec.u) == TDB_FTABLE_MAGIC) {
  623. uint64_t klen, dlen, extra;
  624. /* This record is used! */
  625. if (!append(used, num_used, off)) {
  626. return tdb_logerr(tdb, TDB_ERR_OOM,
  627. TDB_LOG_ERROR,
  628. "tdb_check: tracking %zu'th"
  629. " used record.", *num_used);
  630. }
  631. klen = rec_key_length(&rec.u);
  632. dlen = rec_data_length(&rec.u);
  633. extra = rec_extra_padding(&rec.u);
  634. len = sizeof(rec.u) + klen + dlen + extra;
  635. if (off + len > tdb->file->map_size) {
  636. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  637. TDB_LOG_ERROR,
  638. "tdb_check: used overlength"
  639. " %llu at offset %llu",
  640. (long long)len,
  641. (long long)off);
  642. }
  643. if (len < sizeof(rec.f)) {
  644. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  645. TDB_LOG_ERROR,
  646. "tdb_check: too short record"
  647. " %llu at %llu",
  648. (long long)len,
  649. (long long)off);
  650. }
  651. /* Check that records have correct 0 at end (but may
  652. * not in future). */
  653. if (extra && !features) {
  654. const char *p;
  655. char c;
  656. p = tdb_access_read(tdb, off + sizeof(rec.u)
  657. + klen + dlen, 1, false);
  658. if (TDB_PTR_IS_ERR(p))
  659. return TDB_PTR_ERR(p);
  660. c = *p;
  661. tdb_access_release(tdb, p);
  662. if (c != '\0') {
  663. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  664. TDB_LOG_ERROR,
  665. "tdb_check:"
  666. " non-zero extra"
  667. " at %llu",
  668. (long long)off);
  669. }
  670. }
  671. } else {
  672. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  673. TDB_LOG_ERROR,
  674. "tdb_check: Bad magic 0x%llx"
  675. " at offset %zu",
  676. (long long)rec_magic(&rec.u),
  677. (size_t)off);
  678. }
  679. }
  680. /* We must have found recovery area if there was one. */
  681. if (recovery != 0 && !found_recovery) {
  682. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  683. "tdb_check: expected a recovery area at %zu",
  684. (size_t)recovery);
  685. }
  686. return TDB_SUCCESS;
  687. }
  688. enum TDB_ERROR tdb_check_(struct tdb_context *tdb,
  689. enum TDB_ERROR (*check)(TDB_DATA key, TDB_DATA data,
  690. void *private),
  691. void *private)
  692. {
  693. tdb_off_t *fr = NULL, *used = NULL, ft, recovery;
  694. size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0;
  695. uint64_t features;
  696. enum TDB_ERROR ecode;
  697. ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
  698. if (ecode != TDB_SUCCESS) {
  699. return ecode;
  700. }
  701. ecode = tdb_lock_expand(tdb, F_RDLCK);
  702. if (ecode != TDB_SUCCESS) {
  703. tdb_allrecord_unlock(tdb, F_RDLCK);
  704. return ecode;
  705. }
  706. ecode = check_header(tdb, &recovery, &features);
  707. if (ecode != TDB_SUCCESS)
  708. goto out;
  709. /* First we do a linear scan, checking all records. */
  710. ecode = check_linear(tdb, &used, &num_used, &fr, &num_free, features,
  711. recovery);
  712. if (ecode != TDB_SUCCESS)
  713. goto out;
  714. for (ft = first_ftable(tdb); ft; ft = next_ftable(tdb, ft)) {
  715. if (TDB_OFF_IS_ERR(ft)) {
  716. ecode = ft;
  717. goto out;
  718. }
  719. ecode = check_free_table(tdb, ft, num_ftables, fr, num_free,
  720. &num_found);
  721. if (ecode != TDB_SUCCESS)
  722. goto out;
  723. num_ftables++;
  724. }
  725. /* FIXME: Check key uniqueness? */
  726. ecode = check_hash(tdb, used, num_used, num_ftables, check, private);
  727. if (ecode != TDB_SUCCESS)
  728. goto out;
  729. if (num_found != num_free) {
  730. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  731. "tdb_check: Not all entries are in"
  732. " free table");
  733. }
  734. out:
  735. tdb_allrecord_unlock(tdb, F_RDLCK);
  736. tdb_unlock_expand(tdb, F_RDLCK);
  737. free(fr);
  738. free(used);
  739. return ecode;
  740. }