tdb.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578
  1. /*
  2. Trivial Database 2: fetch, store and misc routines.
  3. Copyright (C) Rusty Russell 2010
  4. This library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 3 of the License, or (at your option) any later version.
  8. This library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with this library; if not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include "private.h"
  16. #include <ccan/asprintf/asprintf.h>
  17. #include <stdarg.h>
  18. static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb,
  19. tdb_off_t off,
  20. tdb_len_t keylen,
  21. tdb_len_t datalen,
  22. struct tdb_used_record *rec,
  23. uint64_t h)
  24. {
  25. uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec);
  26. enum TDB_ERROR ecode;
  27. ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen,
  28. keylen + dataroom, h);
  29. if (ecode == TDB_SUCCESS) {
  30. ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec));
  31. }
  32. return ecode;
  33. }
  34. static enum TDB_ERROR replace_data(struct tdb_context *tdb,
  35. struct hash_info *h,
  36. struct tdb_data key, struct tdb_data dbuf,
  37. tdb_off_t old_off, tdb_len_t old_room,
  38. bool growing)
  39. {
  40. tdb_off_t new_off;
  41. enum TDB_ERROR ecode;
  42. /* Allocate a new record. */
  43. new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC,
  44. growing);
  45. if (TDB_OFF_IS_ERR(new_off)) {
  46. return new_off;
  47. }
  48. /* We didn't like the existing one: remove it. */
  49. if (old_off) {
  50. tdb->stats.frees++;
  51. ecode = add_free_record(tdb, old_off,
  52. sizeof(struct tdb_used_record)
  53. + key.dsize + old_room,
  54. TDB_LOCK_WAIT, true);
  55. if (ecode == TDB_SUCCESS)
  56. ecode = replace_in_hash(tdb, h, new_off);
  57. } else {
  58. ecode = add_to_hash(tdb, h, new_off);
  59. }
  60. if (ecode != TDB_SUCCESS) {
  61. return ecode;
  62. }
  63. new_off += sizeof(struct tdb_used_record);
  64. ecode = tdb->tdb2.io->twrite(tdb, new_off, key.dptr, key.dsize);
  65. if (ecode != TDB_SUCCESS) {
  66. return ecode;
  67. }
  68. new_off += key.dsize;
  69. ecode = tdb->tdb2.io->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize);
  70. if (ecode != TDB_SUCCESS) {
  71. return ecode;
  72. }
  73. if (tdb->flags & TDB_SEQNUM)
  74. tdb_inc_seqnum(tdb);
  75. return TDB_SUCCESS;
  76. }
  77. static enum TDB_ERROR update_data(struct tdb_context *tdb,
  78. tdb_off_t off,
  79. struct tdb_data dbuf,
  80. tdb_len_t extra)
  81. {
  82. enum TDB_ERROR ecode;
  83. ecode = tdb->tdb2.io->twrite(tdb, off, dbuf.dptr, dbuf.dsize);
  84. if (ecode == TDB_SUCCESS && extra) {
  85. /* Put a zero in; future versions may append other data. */
  86. ecode = tdb->tdb2.io->twrite(tdb, off + dbuf.dsize, "", 1);
  87. }
  88. if (tdb->flags & TDB_SEQNUM)
  89. tdb_inc_seqnum(tdb);
  90. return ecode;
  91. }
  92. enum TDB_ERROR tdb_store(struct tdb_context *tdb,
  93. struct tdb_data key, struct tdb_data dbuf, int flag)
  94. {
  95. struct hash_info h;
  96. tdb_off_t off;
  97. tdb_len_t old_room = 0;
  98. struct tdb_used_record rec;
  99. enum TDB_ERROR ecode;
  100. if (tdb->flags & TDB_VERSION1) {
  101. if (tdb1_store(tdb, key, dbuf, flag) == -1)
  102. return tdb->last_error;
  103. return TDB_SUCCESS;
  104. }
  105. off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
  106. if (TDB_OFF_IS_ERR(off)) {
  107. return tdb->last_error = off;
  108. }
  109. /* Now we have lock on this hash bucket. */
  110. if (flag == TDB_INSERT) {
  111. if (off) {
  112. ecode = TDB_ERR_EXISTS;
  113. goto out;
  114. }
  115. } else {
  116. if (off) {
  117. old_room = rec_data_length(&rec)
  118. + rec_extra_padding(&rec);
  119. if (old_room >= dbuf.dsize) {
  120. /* Can modify in-place. Easy! */
  121. ecode = update_rec_hdr(tdb, off,
  122. key.dsize, dbuf.dsize,
  123. &rec, h.h);
  124. if (ecode != TDB_SUCCESS) {
  125. goto out;
  126. }
  127. ecode = update_data(tdb,
  128. off + sizeof(rec)
  129. + key.dsize, dbuf,
  130. old_room - dbuf.dsize);
  131. if (ecode != TDB_SUCCESS) {
  132. goto out;
  133. }
  134. tdb_unlock_hashes(tdb, h.hlock_start,
  135. h.hlock_range, F_WRLCK);
  136. return tdb->last_error = TDB_SUCCESS;
  137. }
  138. } else {
  139. if (flag == TDB_MODIFY) {
  140. /* if the record doesn't exist and we
  141. are in TDB_MODIFY mode then we should fail
  142. the store */
  143. ecode = TDB_ERR_NOEXIST;
  144. goto out;
  145. }
  146. }
  147. }
  148. /* If we didn't use the old record, this implies we're growing. */
  149. ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off);
  150. out:
  151. tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
  152. return tdb->last_error = ecode;
  153. }
  154. enum TDB_ERROR tdb_append(struct tdb_context *tdb,
  155. struct tdb_data key, struct tdb_data dbuf)
  156. {
  157. struct hash_info h;
  158. tdb_off_t off;
  159. struct tdb_used_record rec;
  160. tdb_len_t old_room = 0, old_dlen;
  161. unsigned char *newdata;
  162. struct tdb_data new_dbuf;
  163. enum TDB_ERROR ecode;
  164. if (tdb->flags & TDB_VERSION1) {
  165. if (tdb1_append(tdb, key, dbuf) == -1)
  166. return tdb->last_error;
  167. return TDB_SUCCESS;
  168. }
  169. off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
  170. if (TDB_OFF_IS_ERR(off)) {
  171. return tdb->last_error = off;
  172. }
  173. if (off) {
  174. old_dlen = rec_data_length(&rec);
  175. old_room = old_dlen + rec_extra_padding(&rec);
  176. /* Fast path: can append in place. */
  177. if (rec_extra_padding(&rec) >= dbuf.dsize) {
  178. ecode = update_rec_hdr(tdb, off, key.dsize,
  179. old_dlen + dbuf.dsize, &rec,
  180. h.h);
  181. if (ecode != TDB_SUCCESS) {
  182. goto out;
  183. }
  184. off += sizeof(rec) + key.dsize + old_dlen;
  185. ecode = update_data(tdb, off, dbuf,
  186. rec_extra_padding(&rec));
  187. goto out;
  188. }
  189. /* Slow path. */
  190. newdata = malloc(key.dsize + old_dlen + dbuf.dsize);
  191. if (!newdata) {
  192. ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
  193. "tdb_append:"
  194. " failed to allocate %zu bytes",
  195. (size_t)(key.dsize + old_dlen
  196. + dbuf.dsize));
  197. goto out;
  198. }
  199. ecode = tdb->tdb2.io->tread(tdb, off + sizeof(rec) + key.dsize,
  200. newdata, old_dlen);
  201. if (ecode != TDB_SUCCESS) {
  202. goto out_free_newdata;
  203. }
  204. memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize);
  205. new_dbuf.dptr = newdata;
  206. new_dbuf.dsize = old_dlen + dbuf.dsize;
  207. } else {
  208. newdata = NULL;
  209. new_dbuf = dbuf;
  210. }
  211. /* If they're using tdb_append(), it implies they're growing record. */
  212. ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true);
  213. out_free_newdata:
  214. free(newdata);
  215. out:
  216. tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
  217. return tdb->last_error = ecode;
  218. }
  219. enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key,
  220. struct tdb_data *data)
  221. {
  222. tdb_off_t off;
  223. struct tdb_used_record rec;
  224. struct hash_info h;
  225. enum TDB_ERROR ecode;
  226. if (tdb->flags & TDB_VERSION1)
  227. return tdb1_fetch(tdb, key, data);
  228. off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
  229. if (TDB_OFF_IS_ERR(off)) {
  230. return tdb->last_error = off;
  231. }
  232. if (!off) {
  233. ecode = TDB_ERR_NOEXIST;
  234. } else {
  235. data->dsize = rec_data_length(&rec);
  236. data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize,
  237. data->dsize);
  238. if (TDB_PTR_IS_ERR(data->dptr)) {
  239. ecode = TDB_PTR_ERR(data->dptr);
  240. } else
  241. ecode = TDB_SUCCESS;
  242. }
  243. tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
  244. return tdb->last_error = ecode;
  245. }
  246. bool tdb_exists(struct tdb_context *tdb, TDB_DATA key)
  247. {
  248. tdb_off_t off;
  249. struct tdb_used_record rec;
  250. struct hash_info h;
  251. if (tdb->flags & TDB_VERSION1) {
  252. return tdb1_exists(tdb, key);
  253. }
  254. off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
  255. if (TDB_OFF_IS_ERR(off)) {
  256. tdb->last_error = off;
  257. return false;
  258. }
  259. tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
  260. tdb->last_error = TDB_SUCCESS;
  261. return off ? true : false;
  262. }
  263. enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key)
  264. {
  265. tdb_off_t off;
  266. struct tdb_used_record rec;
  267. struct hash_info h;
  268. enum TDB_ERROR ecode;
  269. if (tdb->flags & TDB_VERSION1) {
  270. if (tdb1_delete(tdb, key) == -1)
  271. return tdb->last_error;
  272. return TDB_SUCCESS;
  273. }
  274. off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL);
  275. if (TDB_OFF_IS_ERR(off)) {
  276. return tdb->last_error = off;
  277. }
  278. if (!off) {
  279. ecode = TDB_ERR_NOEXIST;
  280. goto unlock;
  281. }
  282. ecode = delete_from_hash(tdb, &h);
  283. if (ecode != TDB_SUCCESS) {
  284. goto unlock;
  285. }
  286. /* Free the deleted entry. */
  287. tdb->stats.frees++;
  288. ecode = add_free_record(tdb, off,
  289. sizeof(struct tdb_used_record)
  290. + rec_key_length(&rec)
  291. + rec_data_length(&rec)
  292. + rec_extra_padding(&rec),
  293. TDB_LOCK_WAIT, true);
  294. if (tdb->flags & TDB_SEQNUM)
  295. tdb_inc_seqnum(tdb);
  296. unlock:
  297. tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK);
  298. return tdb->last_error = ecode;
  299. }
  300. unsigned int tdb_get_flags(struct tdb_context *tdb)
  301. {
  302. return tdb->flags;
  303. }
  304. static bool inside_transaction(const struct tdb_context *tdb)
  305. {
  306. if (tdb->flags & TDB_VERSION1)
  307. return tdb->tdb1.transaction != NULL;
  308. else
  309. return tdb->tdb2.transaction != NULL;
  310. }
  311. static bool readonly_changable(struct tdb_context *tdb, const char *caller)
  312. {
  313. if (inside_transaction(tdb)) {
  314. tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
  315. TDB_LOG_USE_ERROR,
  316. "%s: can't change"
  317. " TDB_RDONLY inside transaction",
  318. caller);
  319. return false;
  320. }
  321. if (tdb->file->allrecord_lock.count != 0
  322. || tdb->file->num_lockrecs != 0) {
  323. tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
  324. TDB_LOG_USE_ERROR,
  325. "%s: can't change"
  326. " TDB_RDONLY holding locks",
  327. caller);
  328. return false;
  329. }
  330. return true;
  331. }
  332. void tdb_add_flag(struct tdb_context *tdb, unsigned flag)
  333. {
  334. if (tdb->flags & TDB_INTERNAL) {
  335. tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
  336. TDB_LOG_USE_ERROR,
  337. "tdb_add_flag: internal db");
  338. return;
  339. }
  340. switch (flag) {
  341. case TDB_NOLOCK:
  342. tdb->flags |= TDB_NOLOCK;
  343. break;
  344. case TDB_NOMMAP:
  345. tdb->flags |= TDB_NOMMAP;
  346. tdb_munmap(tdb->file);
  347. break;
  348. case TDB_NOSYNC:
  349. tdb->flags |= TDB_NOSYNC;
  350. break;
  351. case TDB_SEQNUM:
  352. tdb->flags |= TDB_SEQNUM;
  353. break;
  354. case TDB_ALLOW_NESTING:
  355. tdb->flags |= TDB_ALLOW_NESTING;
  356. break;
  357. case TDB_RDONLY:
  358. if (readonly_changable(tdb, "tdb_add_flag"))
  359. tdb->flags |= TDB_RDONLY;
  360. break;
  361. default:
  362. tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
  363. TDB_LOG_USE_ERROR,
  364. "tdb_add_flag: Unknown flag %u",
  365. flag);
  366. }
  367. }
  368. void tdb_remove_flag(struct tdb_context *tdb, unsigned flag)
  369. {
  370. if (tdb->flags & TDB_INTERNAL) {
  371. tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
  372. TDB_LOG_USE_ERROR,
  373. "tdb_remove_flag: internal db");
  374. return;
  375. }
  376. switch (flag) {
  377. case TDB_NOLOCK:
  378. tdb->flags &= ~TDB_NOLOCK;
  379. break;
  380. case TDB_NOMMAP:
  381. tdb->flags &= ~TDB_NOMMAP;
  382. tdb_mmap(tdb);
  383. break;
  384. case TDB_NOSYNC:
  385. tdb->flags &= ~TDB_NOSYNC;
  386. break;
  387. case TDB_SEQNUM:
  388. tdb->flags &= ~TDB_SEQNUM;
  389. break;
  390. case TDB_ALLOW_NESTING:
  391. tdb->flags &= ~TDB_ALLOW_NESTING;
  392. break;
  393. case TDB_RDONLY:
  394. if ((tdb->open_flags & O_ACCMODE) == O_RDONLY) {
  395. tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
  396. TDB_LOG_USE_ERROR,
  397. "tdb_remove_flag: can't"
  398. " remove TDB_RDONLY on tdb"
  399. " opened with O_RDONLY");
  400. break;
  401. }
  402. if (readonly_changable(tdb, "tdb_remove_flag"))
  403. tdb->flags &= ~TDB_RDONLY;
  404. break;
  405. default:
  406. tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL,
  407. TDB_LOG_USE_ERROR,
  408. "tdb_remove_flag: Unknown flag %u",
  409. flag);
  410. }
  411. }
  412. const char *tdb_errorstr(enum TDB_ERROR ecode)
  413. {
  414. /* Gcc warns if you miss a case in the switch, so use that. */
  415. switch (ecode) {
  416. case TDB_SUCCESS: return "Success";
  417. case TDB_ERR_CORRUPT: return "Corrupt database";
  418. case TDB_ERR_IO: return "IO Error";
  419. case TDB_ERR_LOCK: return "Locking error";
  420. case TDB_ERR_OOM: return "Out of memory";
  421. case TDB_ERR_EXISTS: return "Record exists";
  422. case TDB_ERR_EINVAL: return "Invalid parameter";
  423. case TDB_ERR_NOEXIST: return "Record does not exist";
  424. case TDB_ERR_RDONLY: return "write not permitted";
  425. }
  426. return "Invalid error code";
  427. }
  428. enum TDB_ERROR tdb_error(struct tdb_context *tdb)
  429. {
  430. return tdb->last_error;
  431. }
  432. enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb,
  433. enum TDB_ERROR ecode,
  434. enum tdb_log_level level,
  435. const char *fmt, ...)
  436. {
  437. char *message;
  438. va_list ap;
  439. size_t len;
  440. /* tdb_open paths care about errno, so save it. */
  441. int saved_errno = errno;
  442. if (!tdb->log_fn)
  443. return ecode;
  444. va_start(ap, fmt);
  445. len = vasprintf(&message, fmt, ap);
  446. va_end(ap);
  447. if (len < 0) {
  448. tdb->log_fn(tdb, TDB_LOG_ERROR, TDB_ERR_OOM,
  449. "out of memory formatting message:", tdb->log_data);
  450. tdb->log_fn(tdb, level, ecode, fmt, tdb->log_data);
  451. } else {
  452. tdb->log_fn(tdb, level, ecode, message, tdb->log_data);
  453. free(message);
  454. }
  455. errno = saved_errno;
  456. return ecode;
  457. }
  458. enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb,
  459. TDB_DATA key,
  460. enum TDB_ERROR (*parse)(TDB_DATA k,
  461. TDB_DATA d,
  462. void *data),
  463. void *data)
  464. {
  465. tdb_off_t off;
  466. struct tdb_used_record rec;
  467. struct hash_info h;
  468. enum TDB_ERROR ecode;
  469. if (tdb->flags & TDB_VERSION1) {
  470. return tdb->last_error = tdb1_parse_record(tdb, key, parse,
  471. data);
  472. }
  473. off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL);
  474. if (TDB_OFF_IS_ERR(off)) {
  475. return tdb->last_error = off;
  476. }
  477. if (!off) {
  478. ecode = TDB_ERR_NOEXIST;
  479. } else {
  480. const void *dptr;
  481. dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize,
  482. rec_data_length(&rec), false);
  483. if (TDB_PTR_IS_ERR(dptr)) {
  484. ecode = TDB_PTR_ERR(dptr);
  485. } else {
  486. TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec));
  487. ecode = parse(key, d, data);
  488. tdb_access_release(tdb, dptr);
  489. }
  490. }
  491. tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK);
  492. return tdb->last_error = ecode;
  493. }
  494. const char *tdb_name(const struct tdb_context *tdb)
  495. {
  496. return tdb->name;
  497. }
  498. int64_t tdb_get_seqnum(struct tdb_context *tdb)
  499. {
  500. tdb_off_t off = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum));
  501. if (TDB_OFF_IS_ERR(off))
  502. tdb->last_error = off;
  503. else
  504. tdb->last_error = TDB_SUCCESS;
  505. return off;
  506. }
  507. int tdb_fd(const struct tdb_context *tdb)
  508. {
  509. return tdb->file->fd;
  510. }