check.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. /*
  2. Trivial Database 2: free list/block handling
  3. Copyright (C) Rusty Russell 2010
  4. This library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 3 of the License, or (at your option) any later version.
  8. This library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with this library; if not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include "private.h"
  16. #include <ccan/likely/likely.h>
  17. #include <ccan/asearch/asearch.h>
  18. /* We keep an ordered array of offsets. */
  19. static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off)
  20. {
  21. tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t));
  22. if (!new)
  23. return false;
  24. new[(*num)++] = off;
  25. *arr = new;
  26. return true;
  27. }
  28. static enum TDB_ERROR check_header(struct tdb_context *tdb, tdb_off_t *recovery,
  29. uint64_t *features)
  30. {
  31. uint64_t hash_test;
  32. struct tdb_header hdr;
  33. enum TDB_ERROR ecode;
  34. ecode = tdb_read_convert(tdb, 0, &hdr, sizeof(hdr));
  35. if (ecode != TDB_SUCCESS) {
  36. return ecode;
  37. }
  38. /* magic food should not be converted, so convert back. */
  39. tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food));
  40. hash_test = TDB_HASH_MAGIC;
  41. hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test));
  42. if (hdr.hash_test != hash_test) {
  43. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  44. "check: hash test %llu should be %llu",
  45. (long long)hdr.hash_test,
  46. (long long)hash_test);
  47. }
  48. if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) {
  49. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  50. "check: bad magic '%.*s'",
  51. (unsigned)sizeof(hdr.magic_food),
  52. hdr.magic_food);
  53. }
  54. /* Features which are used must be a subset of features offered. */
  55. if (hdr.features_used & ~hdr.features_offered) {
  56. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  57. "check: features used (0x%llx) which"
  58. " are not offered (0x%llx)",
  59. (long long)hdr.features_used,
  60. (long long)hdr.features_offered);
  61. }
  62. *features = hdr.features_offered;
  63. *recovery = hdr.recovery;
  64. if (*recovery) {
  65. if (*recovery < sizeof(hdr)
  66. || *recovery > tdb->file->map_size) {
  67. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  68. "tdb_check:"
  69. " invalid recovery offset %zu",
  70. (size_t)*recovery);
  71. }
  72. }
  73. /* Don't check reserved: they *can* be used later. */
  74. return TDB_SUCCESS;
  75. }
  76. static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
  77. tdb_off_t off, unsigned int group_bits,
  78. uint64_t hprefix,
  79. unsigned hprefix_bits,
  80. tdb_off_t used[],
  81. size_t num_used,
  82. size_t *num_found,
  83. enum TDB_ERROR (*check)(TDB_DATA,
  84. TDB_DATA, void *),
  85. void *data);
  86. static enum TDB_ERROR check_hash_chain(struct tdb_context *tdb,
  87. tdb_off_t off,
  88. uint64_t hash,
  89. tdb_off_t used[],
  90. size_t num_used,
  91. size_t *num_found,
  92. enum TDB_ERROR (*check)(TDB_DATA,
  93. TDB_DATA,
  94. void *),
  95. void *data)
  96. {
  97. struct tdb_used_record rec;
  98. enum TDB_ERROR ecode;
  99. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
  100. if (ecode != TDB_SUCCESS) {
  101. return ecode;
  102. }
  103. if (rec_magic(&rec) != TDB_CHAIN_MAGIC) {
  104. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  105. "tdb_check: Bad hash chain magic %llu",
  106. (long long)rec_magic(&rec));
  107. }
  108. if (rec_data_length(&rec) != sizeof(struct tdb_chain)) {
  109. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  110. "tdb_check:"
  111. " Bad hash chain length %llu vs %zu",
  112. (long long)rec_data_length(&rec),
  113. sizeof(struct tdb_chain));
  114. }
  115. if (rec_key_length(&rec) != 0) {
  116. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  117. "tdb_check: Bad hash chain key length %llu",
  118. (long long)rec_key_length(&rec));
  119. }
  120. if (rec_hash(&rec) != 0) {
  121. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  122. "tdb_check: Bad hash chain hash value %llu",
  123. (long long)rec_hash(&rec));
  124. }
  125. off += sizeof(rec);
  126. ecode = check_hash_tree(tdb, off, 0, hash, 64,
  127. used, num_used, num_found, check, data);
  128. if (ecode != TDB_SUCCESS) {
  129. return ecode;
  130. }
  131. off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next));
  132. if (TDB_OFF_IS_ERR(off)) {
  133. return off;
  134. }
  135. if (off == 0)
  136. return TDB_SUCCESS;
  137. (*num_found)++;
  138. return check_hash_chain(tdb, off, hash, used, num_used, num_found,
  139. check, data);
  140. }
  141. static enum TDB_ERROR check_hash_record(struct tdb_context *tdb,
  142. tdb_off_t off,
  143. uint64_t hprefix,
  144. unsigned hprefix_bits,
  145. tdb_off_t used[],
  146. size_t num_used,
  147. size_t *num_found,
  148. enum TDB_ERROR (*check)(TDB_DATA,
  149. TDB_DATA,
  150. void *),
  151. void *data)
  152. {
  153. struct tdb_used_record rec;
  154. enum TDB_ERROR ecode;
  155. if (hprefix_bits >= 64)
  156. return check_hash_chain(tdb, off, hprefix, used, num_used,
  157. num_found, check, data);
  158. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
  159. if (ecode != TDB_SUCCESS) {
  160. return ecode;
  161. }
  162. if (rec_magic(&rec) != TDB_HTABLE_MAGIC) {
  163. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  164. "tdb_check: Bad hash table magic %llu",
  165. (long long)rec_magic(&rec));
  166. }
  167. if (rec_data_length(&rec)
  168. != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) {
  169. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  170. "tdb_check:"
  171. " Bad hash table length %llu vs %llu",
  172. (long long)rec_data_length(&rec),
  173. (long long)sizeof(tdb_off_t)
  174. << TDB_SUBLEVEL_HASH_BITS);
  175. }
  176. if (rec_key_length(&rec) != 0) {
  177. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  178. "tdb_check: Bad hash table key length %llu",
  179. (long long)rec_key_length(&rec));
  180. }
  181. if (rec_hash(&rec) != 0) {
  182. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  183. "tdb_check: Bad hash table hash value %llu",
  184. (long long)rec_hash(&rec));
  185. }
  186. off += sizeof(rec);
  187. return check_hash_tree(tdb, off,
  188. TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
  189. hprefix, hprefix_bits,
  190. used, num_used, num_found, check, data);
  191. }
  192. static int off_cmp(const tdb_off_t *a, const tdb_off_t *b)
  193. {
  194. /* Can overflow an int. */
  195. return *a > *b ? 1
  196. : *a < *b ? -1
  197. : 0;
  198. }
  199. static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used)
  200. {
  201. *used += num;
  202. return (h >> (64 - *used)) & ((1U << num) - 1);
  203. }
  204. static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb,
  205. tdb_off_t off, unsigned int group_bits,
  206. uint64_t hprefix,
  207. unsigned hprefix_bits,
  208. tdb_off_t used[],
  209. size_t num_used,
  210. size_t *num_found,
  211. enum TDB_ERROR (*check)(TDB_DATA,
  212. TDB_DATA, void *),
  213. void *data)
  214. {
  215. unsigned int g, b;
  216. const tdb_off_t *hash;
  217. struct tdb_used_record rec;
  218. enum TDB_ERROR ecode;
  219. hash = tdb_access_read(tdb, off,
  220. sizeof(tdb_off_t)
  221. << (group_bits + TDB_HASH_GROUP_BITS),
  222. true);
  223. if (TDB_PTR_IS_ERR(hash)) {
  224. return TDB_PTR_ERR(hash);
  225. }
  226. for (g = 0; g < (1 << group_bits); g++) {
  227. const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS);
  228. for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) {
  229. unsigned int bucket, i, used_bits;
  230. uint64_t h;
  231. tdb_off_t *p;
  232. if (group[b] == 0)
  233. continue;
  234. off = group[b] & TDB_OFF_MASK;
  235. p = asearch(&off, used, num_used, off_cmp);
  236. if (!p) {
  237. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  238. TDB_LOG_ERROR,
  239. "tdb_check: Invalid offset"
  240. " %llu in hash",
  241. (long long)off);
  242. goto fail;
  243. }
  244. /* Mark it invalid. */
  245. *p ^= 1;
  246. (*num_found)++;
  247. if (hprefix_bits == 64) {
  248. /* Chained entries are unordered. */
  249. if (is_subhash(group[b])) {
  250. ecode = TDB_ERR_CORRUPT;
  251. tdb_logerr(tdb, ecode,
  252. TDB_LOG_ERROR,
  253. "tdb_check: Invalid chain"
  254. " entry subhash");
  255. goto fail;
  256. }
  257. h = hash_record(tdb, off);
  258. if (h != hprefix) {
  259. ecode = TDB_ERR_CORRUPT;
  260. tdb_logerr(tdb, ecode,
  261. TDB_LOG_ERROR,
  262. "check: bad hash chain"
  263. " placement"
  264. " 0x%llx vs 0x%llx",
  265. (long long)h,
  266. (long long)hprefix);
  267. goto fail;
  268. }
  269. ecode = tdb_read_convert(tdb, off, &rec,
  270. sizeof(rec));
  271. if (ecode != TDB_SUCCESS) {
  272. goto fail;
  273. }
  274. goto check;
  275. }
  276. if (is_subhash(group[b])) {
  277. uint64_t subprefix;
  278. subprefix = (hprefix
  279. << (group_bits + TDB_HASH_GROUP_BITS))
  280. + g * (1 << TDB_HASH_GROUP_BITS) + b;
  281. ecode = check_hash_record(tdb,
  282. group[b] & TDB_OFF_MASK,
  283. subprefix,
  284. hprefix_bits
  285. + group_bits
  286. + TDB_HASH_GROUP_BITS,
  287. used, num_used, num_found,
  288. check, data);
  289. if (ecode != TDB_SUCCESS) {
  290. goto fail;
  291. }
  292. continue;
  293. }
  294. /* A normal entry */
  295. /* Does it belong here at all? */
  296. h = hash_record(tdb, off);
  297. used_bits = 0;
  298. if (get_bits(h, hprefix_bits, &used_bits) != hprefix
  299. && hprefix_bits) {
  300. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  301. TDB_LOG_ERROR,
  302. "check: bad hash placement"
  303. " 0x%llx vs 0x%llx",
  304. (long long)h,
  305. (long long)hprefix);
  306. goto fail;
  307. }
  308. /* Does it belong in this group? */
  309. if (get_bits(h, group_bits, &used_bits) != g) {
  310. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  311. TDB_LOG_ERROR,
  312. "check: bad group %llu"
  313. " vs %u",
  314. (long long)h, g);
  315. goto fail;
  316. }
  317. /* Are bucket bits correct? */
  318. bucket = group[b] & TDB_OFF_HASH_GROUP_MASK;
  319. if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits)
  320. != bucket) {
  321. used_bits -= TDB_HASH_GROUP_BITS;
  322. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  323. TDB_LOG_ERROR,
  324. "check: bad bucket %u vs %u",
  325. (unsigned)get_bits(h,
  326. TDB_HASH_GROUP_BITS,
  327. &used_bits),
  328. bucket);
  329. goto fail;
  330. }
  331. /* There must not be any zero entries between
  332. * the bucket it belongs in and this one! */
  333. for (i = bucket;
  334. i != b;
  335. i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) {
  336. if (group[i] == 0) {
  337. ecode = TDB_ERR_CORRUPT;
  338. tdb_logerr(tdb, ecode,
  339. TDB_LOG_ERROR,
  340. "check: bad group placement"
  341. " %u vs %u",
  342. b, bucket);
  343. goto fail;
  344. }
  345. }
  346. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec));
  347. if (ecode != TDB_SUCCESS) {
  348. goto fail;
  349. }
  350. /* Bottom bits must match header. */
  351. if ((h & ((1 << 11)-1)) != rec_hash(&rec)) {
  352. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT,
  353. TDB_LOG_ERROR,
  354. "tdb_check: Bad hash magic"
  355. " at offset %llu"
  356. " (0x%llx vs 0x%llx)",
  357. (long long)off,
  358. (long long)h,
  359. (long long)rec_hash(&rec));
  360. goto fail;
  361. }
  362. check:
  363. if (check) {
  364. TDB_DATA k, d;
  365. const unsigned char *kptr;
  366. kptr = tdb_access_read(tdb,
  367. off + sizeof(rec),
  368. rec_key_length(&rec)
  369. + rec_data_length(&rec),
  370. false);
  371. if (TDB_PTR_IS_ERR(kptr)) {
  372. ecode = TDB_PTR_ERR(kptr);
  373. goto fail;
  374. }
  375. k = tdb_mkdata(kptr, rec_key_length(&rec));
  376. d = tdb_mkdata(kptr + k.dsize,
  377. rec_data_length(&rec));
  378. ecode = check(k, d, data);
  379. tdb_access_release(tdb, kptr);
  380. if (ecode != TDB_SUCCESS) {
  381. goto fail;
  382. }
  383. }
  384. }
  385. }
  386. tdb_access_release(tdb, hash);
  387. return TDB_SUCCESS;
  388. fail:
  389. tdb_access_release(tdb, hash);
  390. return ecode;
  391. }
  392. static enum TDB_ERROR check_hash(struct tdb_context *tdb,
  393. tdb_off_t used[],
  394. size_t num_used, size_t num_ftables,
  395. int (*check)(TDB_DATA, TDB_DATA, void *),
  396. void *data)
  397. {
  398. /* Free tables also show up as used. */
  399. size_t num_found = num_ftables;
  400. enum TDB_ERROR ecode;
  401. ecode = check_hash_tree(tdb, offsetof(struct tdb_header, hashtable),
  402. TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS,
  403. 0, 0, used, num_used, &num_found,
  404. check, data);
  405. if (ecode == TDB_SUCCESS) {
  406. if (num_found != num_used) {
  407. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  408. "tdb_check: Not all entries"
  409. " are in hash");
  410. }
  411. }
  412. return ecode;
  413. }
  414. static enum TDB_ERROR check_free(struct tdb_context *tdb,
  415. tdb_off_t off,
  416. const struct tdb_free_record *frec,
  417. tdb_off_t prev, unsigned int ftable,
  418. unsigned int bucket)
  419. {
  420. enum TDB_ERROR ecode;
  421. if (frec_magic(frec) != TDB_FREE_MAGIC) {
  422. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  423. "tdb_check: offset %llu bad magic 0x%llx",
  424. (long long)off,
  425. (long long)frec->magic_and_prev);
  426. }
  427. if (frec_ftable(frec) != ftable) {
  428. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  429. "tdb_check: offset %llu bad freetable %u",
  430. (long long)off, frec_ftable(frec));
  431. }
  432. ecode = tdb->methods->oob(tdb, off
  433. + frec_len(frec)
  434. + sizeof(struct tdb_used_record),
  435. false);
  436. if (ecode != TDB_SUCCESS) {
  437. return ecode;
  438. }
  439. if (size_to_bucket(frec_len(frec)) != bucket) {
  440. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  441. "tdb_check: offset %llu in wrong bucket"
  442. " (%u vs %u)",
  443. (long long)off,
  444. bucket, size_to_bucket(frec_len(frec)));
  445. }
  446. if (prev && prev != frec_prev(frec)) {
  447. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  448. "tdb_check: offset %llu bad prev"
  449. " (%llu vs %llu)",
  450. (long long)off,
  451. (long long)prev, (long long)frec_len(frec));
  452. }
  453. return TDB_SUCCESS;
  454. }
  455. static enum TDB_ERROR check_free_table(struct tdb_context *tdb,
  456. tdb_off_t ftable_off,
  457. unsigned ftable_num,
  458. tdb_off_t fr[],
  459. size_t num_free,
  460. size_t *num_found)
  461. {
  462. struct tdb_freetable ft;
  463. tdb_off_t h;
  464. unsigned int i;
  465. enum TDB_ERROR ecode;
  466. ecode = tdb_read_convert(tdb, ftable_off, &ft, sizeof(ft));
  467. if (ecode != TDB_SUCCESS) {
  468. return ecode;
  469. }
  470. if (rec_magic(&ft.hdr) != TDB_FTABLE_MAGIC
  471. || rec_key_length(&ft.hdr) != 0
  472. || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr)
  473. || rec_hash(&ft.hdr) != 0) {
  474. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  475. "tdb_check: Invalid header on free table");
  476. }
  477. for (i = 0; i < TDB_FREE_BUCKETS; i++) {
  478. tdb_off_t off, prev = 0, *p, first = 0;
  479. struct tdb_free_record f;
  480. h = bucket_off(ftable_off, i);
  481. for (off = tdb_read_off(tdb, h); off; off = f.next) {
  482. if (TDB_OFF_IS_ERR(off)) {
  483. return off;
  484. }
  485. if (!first) {
  486. off &= TDB_OFF_MASK;
  487. first = off;
  488. }
  489. ecode = tdb_read_convert(tdb, off, &f, sizeof(f));
  490. if (ecode != TDB_SUCCESS) {
  491. return ecode;
  492. }
  493. ecode = check_free(tdb, off, &f, prev, ftable_num, i);
  494. if (ecode != TDB_SUCCESS) {
  495. return ecode;
  496. }
  497. /* FIXME: Check hash bits */
  498. p = asearch(&off, fr, num_free, off_cmp);
  499. if (!p) {
  500. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  501. TDB_LOG_ERROR,
  502. "tdb_check: Invalid offset"
  503. " %llu in free table",
  504. (long long)off);
  505. }
  506. /* Mark it invalid. */
  507. *p ^= 1;
  508. (*num_found)++;
  509. prev = off;
  510. }
  511. if (first) {
  512. /* Now we can check first back pointer. */
  513. ecode = tdb_read_convert(tdb, first, &f, sizeof(f));
  514. if (ecode != TDB_SUCCESS) {
  515. return ecode;
  516. }
  517. ecode = check_free(tdb, first, &f, prev, ftable_num, i);
  518. if (ecode != TDB_SUCCESS) {
  519. return ecode;
  520. }
  521. }
  522. }
  523. return TDB_SUCCESS;
  524. }
  525. /* Slow, but should be very rare. */
  526. tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off)
  527. {
  528. size_t len;
  529. enum TDB_ERROR ecode;
  530. for (len = 0; off + len < tdb->file->map_size; len++) {
  531. char c;
  532. ecode = tdb->methods->tread(tdb, off, &c, 1);
  533. if (ecode != TDB_SUCCESS) {
  534. return ecode;
  535. }
  536. if (c != 0 && c != 0x43)
  537. break;
  538. }
  539. return len;
  540. }
  541. static enum TDB_ERROR check_linear(struct tdb_context *tdb,
  542. tdb_off_t **used, size_t *num_used,
  543. tdb_off_t **fr, size_t *num_free,
  544. uint64_t features, tdb_off_t recovery)
  545. {
  546. tdb_off_t off;
  547. tdb_len_t len;
  548. enum TDB_ERROR ecode;
  549. bool found_recovery = false;
  550. for (off = sizeof(struct tdb_header);
  551. off < tdb->file->map_size;
  552. off += len) {
  553. union {
  554. struct tdb_used_record u;
  555. struct tdb_free_record f;
  556. struct tdb_recovery_record r;
  557. } rec;
  558. /* r is larger: only get that if we need to. */
  559. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.f));
  560. if (ecode != TDB_SUCCESS) {
  561. return ecode;
  562. }
  563. /* If we crash after ftruncate, we can get zeroes or fill. */
  564. if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC
  565. || rec.r.magic == 0x4343434343434343ULL) {
  566. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
  567. if (ecode != TDB_SUCCESS) {
  568. return ecode;
  569. }
  570. if (recovery == off) {
  571. found_recovery = true;
  572. len = sizeof(rec.r) + rec.r.max_len;
  573. } else {
  574. len = dead_space(tdb, off);
  575. if (TDB_OFF_IS_ERR(len)) {
  576. return len;
  577. }
  578. if (len < sizeof(rec.r)) {
  579. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  580. TDB_LOG_ERROR,
  581. "tdb_check: invalid"
  582. " dead space at %zu",
  583. (size_t)off);
  584. }
  585. tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING,
  586. "Dead space at %zu-%zu (of %zu)",
  587. (size_t)off, (size_t)(off + len),
  588. (size_t)tdb->file->map_size);
  589. }
  590. } else if (rec.r.magic == TDB_RECOVERY_MAGIC) {
  591. ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r));
  592. if (ecode != TDB_SUCCESS) {
  593. return ecode;
  594. }
  595. if (recovery != off) {
  596. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  597. TDB_LOG_ERROR,
  598. "tdb_check: unexpected"
  599. " recovery record at offset"
  600. " %zu",
  601. (size_t)off);
  602. }
  603. if (rec.r.len > rec.r.max_len) {
  604. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  605. TDB_LOG_ERROR,
  606. "tdb_check: invalid recovery"
  607. " length %zu",
  608. (size_t)rec.r.len);
  609. }
  610. if (rec.r.eof > tdb->file->map_size) {
  611. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  612. TDB_LOG_ERROR,
  613. "tdb_check: invalid old EOF"
  614. " %zu", (size_t)rec.r.eof);
  615. }
  616. found_recovery = true;
  617. len = sizeof(rec.r) + rec.r.max_len;
  618. } else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) {
  619. len = sizeof(rec.u) + frec_len(&rec.f);
  620. if (off + len > tdb->file->map_size) {
  621. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  622. TDB_LOG_ERROR,
  623. "tdb_check: free overlength"
  624. " %llu at offset %llu",
  625. (long long)len,
  626. (long long)off);
  627. }
  628. /* This record should be in free lists. */
  629. if (frec_ftable(&rec.f) != TDB_FTABLE_NONE
  630. && !append(fr, num_free, off)) {
  631. return tdb_logerr(tdb, TDB_ERR_OOM,
  632. TDB_LOG_ERROR,
  633. "tdb_check: tracking %zu'th"
  634. " free record.", *num_free);
  635. }
  636. } else if (rec_magic(&rec.u) == TDB_USED_MAGIC
  637. || rec_magic(&rec.u) == TDB_CHAIN_MAGIC
  638. || rec_magic(&rec.u) == TDB_HTABLE_MAGIC
  639. || rec_magic(&rec.u) == TDB_FTABLE_MAGIC) {
  640. uint64_t klen, dlen, extra;
  641. /* This record is used! */
  642. if (!append(used, num_used, off)) {
  643. return tdb_logerr(tdb, TDB_ERR_OOM,
  644. TDB_LOG_ERROR,
  645. "tdb_check: tracking %zu'th"
  646. " used record.", *num_used);
  647. }
  648. klen = rec_key_length(&rec.u);
  649. dlen = rec_data_length(&rec.u);
  650. extra = rec_extra_padding(&rec.u);
  651. len = sizeof(rec.u) + klen + dlen + extra;
  652. if (off + len > tdb->file->map_size) {
  653. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  654. TDB_LOG_ERROR,
  655. "tdb_check: used overlength"
  656. " %llu at offset %llu",
  657. (long long)len,
  658. (long long)off);
  659. }
  660. if (len < sizeof(rec.f)) {
  661. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  662. TDB_LOG_ERROR,
  663. "tdb_check: too short record"
  664. " %llu at %llu",
  665. (long long)len,
  666. (long long)off);
  667. }
  668. /* Check that records have correct 0 at end (but may
  669. * not in future). */
  670. if (extra && !features) {
  671. const char *p;
  672. char c;
  673. p = tdb_access_read(tdb, off + sizeof(rec.u)
  674. + klen + dlen, 1, false);
  675. if (TDB_PTR_IS_ERR(p))
  676. return TDB_PTR_ERR(p);
  677. c = *p;
  678. tdb_access_release(tdb, p);
  679. if (c != '\0') {
  680. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  681. TDB_LOG_ERROR,
  682. "tdb_check:"
  683. " non-zero extra"
  684. " at %llu",
  685. (long long)off);
  686. }
  687. }
  688. } else {
  689. return tdb_logerr(tdb, TDB_ERR_CORRUPT,
  690. TDB_LOG_ERROR,
  691. "tdb_check: Bad magic 0x%llx"
  692. " at offset %zu",
  693. (long long)rec_magic(&rec.u),
  694. (size_t)off);
  695. }
  696. }
  697. /* We must have found recovery area if there was one. */
  698. if (recovery != 0 && !found_recovery) {
  699. return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  700. "tdb_check: expected a recovery area at %zu",
  701. (size_t)recovery);
  702. }
  703. return TDB_SUCCESS;
  704. }
  705. enum TDB_ERROR tdb_check_(struct tdb_context *tdb,
  706. enum TDB_ERROR (*check)(TDB_DATA, TDB_DATA, void *),
  707. void *data)
  708. {
  709. tdb_off_t *fr = NULL, *used = NULL, ft, recovery;
  710. size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0;
  711. uint64_t features;
  712. enum TDB_ERROR ecode;
  713. ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
  714. if (ecode != TDB_SUCCESS) {
  715. return tdb->last_error = ecode;
  716. }
  717. ecode = tdb_lock_expand(tdb, F_RDLCK);
  718. if (ecode != TDB_SUCCESS) {
  719. tdb_allrecord_unlock(tdb, F_RDLCK);
  720. return tdb->last_error = ecode;
  721. }
  722. ecode = check_header(tdb, &recovery, &features);
  723. if (ecode != TDB_SUCCESS)
  724. goto out;
  725. /* First we do a linear scan, checking all records. */
  726. ecode = check_linear(tdb, &used, &num_used, &fr, &num_free, features,
  727. recovery);
  728. if (ecode != TDB_SUCCESS)
  729. goto out;
  730. for (ft = first_ftable(tdb); ft; ft = next_ftable(tdb, ft)) {
  731. if (TDB_OFF_IS_ERR(ft)) {
  732. ecode = ft;
  733. goto out;
  734. }
  735. ecode = check_free_table(tdb, ft, num_ftables, fr, num_free,
  736. &num_found);
  737. if (ecode != TDB_SUCCESS)
  738. goto out;
  739. num_ftables++;
  740. }
  741. /* FIXME: Check key uniqueness? */
  742. ecode = check_hash(tdb, used, num_used, num_ftables, check, data);
  743. if (ecode != TDB_SUCCESS)
  744. goto out;
  745. if (num_found != num_free) {
  746. ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR,
  747. "tdb_check: Not all entries are in"
  748. " free table");
  749. }
  750. out:
  751. tdb_allrecord_unlock(tdb, F_RDLCK);
  752. tdb_unlock_expand(tdb, F_RDLCK);
  753. free(fr);
  754. free(used);
  755. return tdb->last_error = ecode;
  756. }