private.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. #ifndef TDB_PRIVATE_H
  2. #define TDB_PRIVATE_H
  3. /*
  4. Trivial Database 2: private types and prototypes
  5. Copyright (C) Rusty Russell 2010
  6. This library is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public
  8. License as published by the Free Software Foundation; either
  9. version 3 of the License, or (at your option) any later version.
  10. This library is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. Lesser General Public License for more details.
  14. You should have received a copy of the GNU Lesser General Public
  15. License along with this library; if not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #define _XOPEN_SOURCE 500
  18. #define _FILE_OFFSET_BITS 64
  19. #include <stdint.h>
  20. #include <stdbool.h>
  21. #include <stdlib.h>
  22. #include <sys/time.h>
  23. #include <sys/mman.h>
  24. #include <unistd.h>
  25. #include <fcntl.h>
  26. #include <string.h>
  27. #include <errno.h>
  28. #include <stdio.h>
  29. #include <utime.h>
  30. #include <unistd.h>
  31. #include "config.h"
  32. #include <ccan/tdb2/tdb2.h>
  33. #include <ccan/likely/likely.h>
  34. #ifdef HAVE_BYTESWAP_H
  35. #include <byteswap.h>
  36. #endif
  37. #ifndef TEST_IT
  38. #define TEST_IT(cond)
  39. #endif
  40. /* #define TDB_TRACE 1 */
  41. #ifndef __STRING
  42. #define __STRING(x) #x
  43. #endif
  44. #ifndef __STRINGSTRING
  45. #define __STRINGSTRING(x) __STRING(x)
  46. #endif
  47. #ifndef __location__
  48. #define __location__ __FILE__ ":" __STRINGSTRING(__LINE__)
  49. #endif
  50. typedef uint64_t tdb_len_t;
  51. typedef uint64_t tdb_off_t;
  52. #ifndef offsetof
  53. #define offsetof(t,f) ((unsigned int)&((t *)0)->f)
  54. #endif
  55. #define TDB_MAGIC_FOOD "TDB file\n"
  56. #define TDB_VERSION ((uint64_t)(0x26011967 + 7))
  57. #define TDB_MAGIC ((uint64_t)0x1999)
  58. #define TDB_FREE_MAGIC (~(uint64_t)TDB_MAGIC)
  59. #define TDB_HASH_MAGIC (0xA1ABE11A01092008ULL)
  60. #define TDB_RECOVERY_MAGIC (0xf53bc0e7U)
  61. #define TDB_RECOVERY_INVALID_MAGIC (0x0)
  62. #define TDB_EXTRA_HASHBITS (11) /* We steal 11 bits to stash hash info. */
  63. #define TDB_EXTRA_HASHBITS_NUM (3)
  64. #define TDB_OFF_ERR ((tdb_off_t)-1)
  65. /* Prevent others from opening the file. */
  66. #define TDB_OPEN_LOCK 0
  67. /* Doing a transaction. */
  68. #define TDB_TRANSACTION_LOCK 1
  69. /* Hash chain locks. */
  70. #define TDB_HASH_LOCK_START 2
  71. /* We start wih 256 hash buckets, 10 free buckets. A 4k-sized zone. */
  72. #define INITIAL_HASH_BITS 8
  73. #define INITIAL_FREE_BUCKETS 10
  74. #define INITIAL_ZONE_BITS 12
  75. #if !HAVE_BSWAP_64
  76. static inline uint64_t bswap_64(uint64_t x)
  77. {
  78. return (((x&0x000000FFULL)<<56)
  79. | ((x&0x0000FF00ULL)<<48)
  80. | ((x&0x00FF0000ULL)<<40)
  81. | ((x&0xFF000000ULL)<<32)
  82. | ((x>>8)&0xFF000000ULL)
  83. | ((x>>16)&0x00FF0000ULL)
  84. | ((x>>24)&0x0000FF00ULL)
  85. | ((x>>32)&0x000000FFULL));
  86. }
  87. #endif
  88. struct tdb_used_record {
  89. /* For on-disk compatibility, we avoid bitfields:
  90. magic: 16, (highest)
  91. key_len_bits: 5,
  92. hash:11,
  93. extra_padding: 32 (lowest)
  94. */
  95. uint64_t magic_and_meta;
  96. /* The bottom key_len_bits*2 are key length, rest is data length. */
  97. uint64_t key_and_data_len;
  98. };
  99. static inline unsigned rec_key_bits(const struct tdb_used_record *r)
  100. {
  101. return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2;
  102. }
  103. static inline uint64_t rec_key_length(const struct tdb_used_record *r)
  104. {
  105. return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1);
  106. }
  107. static inline uint64_t rec_data_length(const struct tdb_used_record *r)
  108. {
  109. return r->key_and_data_len >> rec_key_bits(r);
  110. }
  111. static inline uint64_t rec_extra_padding(const struct tdb_used_record *r)
  112. {
  113. return r->magic_and_meta & 0xFFFFFFFF;
  114. }
  115. static inline uint64_t rec_hash(const struct tdb_used_record *r)
  116. {
  117. return ((r->magic_and_meta >> 32) & ((1ULL << 11) - 1)) << (64 - 11);
  118. }
  119. static inline uint16_t rec_magic(const struct tdb_used_record *r)
  120. {
  121. return (r->magic_and_meta >> 48);
  122. }
  123. struct tdb_free_record {
  124. uint64_t magic;
  125. uint64_t data_len; /* Not counting these two fields. */
  126. /* This is why the minimum record size is 16 bytes. */
  127. uint64_t next, prev;
  128. };
  129. /* These parts can change while we have db open. */
  130. struct tdb_header_volatile {
  131. uint64_t generation; /* Makes sure it changes on every update. */
  132. uint64_t hash_bits; /* Entries in hash table. */
  133. uint64_t hash_off; /* Offset of hash table. */
  134. uint64_t num_zones; /* How many zones in the file. */
  135. uint64_t zone_bits; /* Size of zones. */
  136. uint64_t free_buckets; /* How many buckets in each zone. */
  137. uint64_t free_off; /* Arrays of free entries. */
  138. };
  139. /* this is stored at the front of every database */
  140. struct tdb_header {
  141. char magic_food[32]; /* for /etc/magic */
  142. uint64_t version; /* version of the code */
  143. uint64_t hash_test; /* result of hashing HASH_MAGIC. */
  144. uint64_t hash_seed; /* "random" seed written at creation time. */
  145. struct tdb_header_volatile v;
  146. tdb_off_t reserved[19];
  147. };
  148. enum tdb_lock_flags {
  149. /* WAIT == F_SETLKW, NOWAIT == F_SETLK */
  150. TDB_LOCK_NOWAIT = 0,
  151. TDB_LOCK_WAIT = 1,
  152. /* If set, don't log an error on failure. */
  153. TDB_LOCK_PROBE = 2,
  154. };
  155. struct tdb_lock_type {
  156. uint32_t off;
  157. uint32_t count;
  158. uint32_t ltype;
  159. };
  160. struct tdb_context {
  161. /* Filename of the database. */
  162. const char *name;
  163. /* Mmap (if any), or malloc (for TDB_INTERNAL). */
  164. void *map_ptr;
  165. /* Open file descriptor (undefined for TDB_INTERNAL). */
  166. int fd;
  167. /* How much space has been mapped (<= current file size) */
  168. tdb_len_t map_size;
  169. /* Opened read-only? */
  170. bool read_only;
  171. /* Error code for last tdb error. */
  172. enum TDB_ERROR ecode;
  173. /* A cached copy of the header */
  174. struct tdb_header header;
  175. /* (for debugging). */
  176. bool header_uptodate;
  177. /* the flags passed to tdb_open, for tdb_reopen. */
  178. uint32_t flags;
  179. /* Logging function */
  180. tdb_logfn_t log;
  181. void *log_priv;
  182. /* Hash function. */
  183. tdb_hashfn_t khash;
  184. void *hash_priv;
  185. /* What zone of the tdb to use, for spreading load. */
  186. uint64_t last_zone;
  187. /* IO methods: changes for transactions. */
  188. const struct tdb_methods *methods;
  189. /* Lock information */
  190. struct tdb_lock_type allrecord_lock;
  191. uint64_t num_lockrecs;
  192. struct tdb_lock_type *lockrecs;
  193. /* Set if we are in a transaction. */
  194. struct tdb_transaction *transaction;
  195. /* Single list of all TDBs, to avoid multiple opens. */
  196. struct tdb_context *next;
  197. dev_t device;
  198. ino_t inode;
  199. };
  200. struct tdb_methods {
  201. int (*read)(struct tdb_context *, tdb_off_t, void *, tdb_len_t);
  202. int (*write)(struct tdb_context *, tdb_off_t, const void *, tdb_len_t);
  203. int (*oob)(struct tdb_context *, tdb_off_t, bool);
  204. int (*expand_file)(struct tdb_context *, tdb_len_t, tdb_len_t);
  205. };
  206. /*
  207. internal prototypes
  208. */
  209. /* tdb.c: */
  210. /* Returns true if header changed. */
  211. bool update_header(struct tdb_context *tdb);
  212. /* Hash random memory. */
  213. uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len);
  214. /* free.c: */
  215. uint64_t random_free_zone(struct tdb_context *tdb);
  216. /* If this fails, try tdb_expand. */
  217. tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen,
  218. uint64_t hash, bool growing);
  219. /* Put this record in a free list. */
  220. int add_free_record(struct tdb_context *tdb,
  221. tdb_off_t off, tdb_len_t len_with_header);
  222. /* Set up header for a used record. */
  223. int set_header(struct tdb_context *tdb,
  224. struct tdb_used_record *rec,
  225. uint64_t keylen, uint64_t datalen,
  226. uint64_t actuallen, uint64_t hash);
  227. /* Used by tdb_check to verify. */
  228. unsigned int size_to_bucket(struct tdb_context *tdb, tdb_len_t data_len);
  229. tdb_off_t zone_of(struct tdb_context *tdb, tdb_off_t off);
  230. /* io.c: */
  231. /* Initialize tdb->methods. */
  232. void tdb_io_init(struct tdb_context *tdb);
  233. /* Convert endian of the buffer if required. */
  234. void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size);
  235. /* Unmap and try to map the tdb. */
  236. void tdb_munmap(struct tdb_context *tdb);
  237. void tdb_mmap(struct tdb_context *tdb);
  238. /* Hand data to a function, direct if possible */
  239. int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
  240. tdb_off_t offset, tdb_len_t len,
  241. int (*parser)(TDB_DATA key, TDB_DATA data,
  242. void *private_data),
  243. void *private_data);
  244. /* Either make a copy into pad and return that, or return ptr into mmap.
  245. * Converts endian (ie. will use pad in that case). */
  246. void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len);
  247. /* Either alloc a copy, or give direct access. Release frees or noop. */
  248. const void *tdb_access_read(struct tdb_context *tdb,
  249. tdb_off_t off, tdb_len_t len);
  250. void tdb_access_release(struct tdb_context *tdb, const void *p);
  251. /* Convenience routine to get an offset. */
  252. tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off);
  253. /* Write an offset at an offset. */
  254. int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val);
  255. /* Clear an ondisk area. */
  256. int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len);
  257. /* Return a non-zero offset in this array, or num. */
  258. tdb_off_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off,
  259. uint64_t num);
  260. /* Return a zero offset in this array, or num. */
  261. tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off,
  262. uint64_t num);
  263. /* Even on files, we can get partial writes due to signals. */
  264. bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off);
  265. bool tdb_pread_all(int fd, void *buf, size_t len, tdb_off_t off);
  266. bool tdb_read_all(int fd, void *buf, size_t len);
  267. /* Allocate and make a copy of some offset. */
  268. void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
  269. /* Munges record and writes it */
  270. int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off,
  271. void *rec, size_t len);
  272. /* Reads record and converts it */
  273. int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off,
  274. void *rec, size_t len);
  275. /* Hash on disk. */
  276. uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off);
  277. /* lock.c: */
  278. /* Lock/unlock a particular hash list. */
  279. int tdb_lock_list(struct tdb_context *tdb, tdb_off_t list,
  280. int ltype, enum tdb_lock_flags waitflag);
  281. int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype);
  282. /* Lock/unlock a particular free list. */
  283. int tdb_lock_free_list(struct tdb_context *tdb, tdb_off_t flist,
  284. enum tdb_lock_flags waitflag);
  285. void tdb_unlock_free_list(struct tdb_context *tdb, tdb_off_t flist);
  286. /* Do we have any locks? */
  287. bool tdb_has_locks(struct tdb_context *tdb);
  288. /* Lock entire database. */
  289. int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
  290. enum tdb_lock_flags flags, bool upgradable);
  291. int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype);
  292. /* Serialize db open. */
  293. int tdb_lock_open(struct tdb_context *tdb);
  294. void tdb_unlock_open(struct tdb_context *tdb);
  295. /* Expand the file. */
  296. int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen,
  297. bool growing);
  298. #if 0
  299. /* Low-level locking primitives. */
  300. int tdb_nest_lock(struct tdb_context *tdb, tdb_off_t offset, int ltype,
  301. enum tdb_lock_flags flags);
  302. int tdb_nest_unlock(struct tdb_context *tdb, tdb_off_t offset, int ltype);
  303. int tdb_munmap(struct tdb_context *tdb);
  304. void tdb_mmap(struct tdb_context *tdb);
  305. int tdb_lock(struct tdb_context *tdb, int list, int ltype);
  306. int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
  307. bool tdb_have_locks(struct tdb_context *tdb);
  308. int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
  309. int tdb_brlock(struct tdb_context *tdb,
  310. int rw_type, tdb_off_t offset, size_t len,
  311. enum tdb_lock_flags flags);
  312. int tdb_brunlock(struct tdb_context *tdb,
  313. int rw_type, tdb_off_t offset, size_t len);
  314. bool tdb_have_extra_locks(struct tdb_context *tdb);
  315. void tdb_release_extra_locks(struct tdb_context *tdb);
  316. int tdb_transaction_lock(struct tdb_context *tdb, int ltype);
  317. int tdb_transaction_unlock(struct tdb_context *tdb, int ltype);
  318. int tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
  319. enum tdb_lock_flags flags, bool upgradable);
  320. int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype);
  321. int tdb_allrecord_upgrade(struct tdb_context *tdb);
  322. int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off);
  323. int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off);
  324. int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
  325. int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
  326. int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
  327. tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec);
  328. int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
  329. int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
  330. int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off);
  331. int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off);
  332. bool tdb_needs_recovery(struct tdb_context *tdb);
  333. int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
  334. int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
  335. int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec);
  336. unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len);
  337. int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
  338. tdb_off_t offset, tdb_len_t len,
  339. int (*parser)(TDB_DATA key, TDB_DATA data,
  340. void *private_data),
  341. void *private_data);
  342. tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
  343. struct tdb_record *rec);
  344. void tdb_io_init(struct tdb_context *tdb);
  345. int tdb_expand(struct tdb_context *tdb, tdb_off_t size);
  346. int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off,
  347. struct tdb_record *rec);
  348. #endif
  349. #ifdef TDB_TRACE
  350. void tdb_trace(struct tdb_context *tdb, const char *op);
  351. void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op);
  352. void tdb_trace_open(struct tdb_context *tdb, const char *op,
  353. unsigned hash_size, unsigned tdb_flags, unsigned open_flags);
  354. void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret);
  355. void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret);
  356. void tdb_trace_1rec(struct tdb_context *tdb, const char *op,
  357. TDB_DATA rec);
  358. void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op,
  359. TDB_DATA rec, int ret);
  360. void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op,
  361. TDB_DATA rec, TDB_DATA ret);
  362. void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op,
  363. TDB_DATA rec1, TDB_DATA rec2, unsigned flag,
  364. int ret);
  365. void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op,
  366. TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret);
  367. #else
  368. #define tdb_trace(tdb, op)
  369. #define tdb_trace_seqnum(tdb, seqnum, op)
  370. #define tdb_trace_open(tdb, op, hash_size, tdb_flags, open_flags)
  371. #define tdb_trace_ret(tdb, op, ret)
  372. #define tdb_trace_retrec(tdb, op, ret)
  373. #define tdb_trace_1rec(tdb, op, rec)
  374. #define tdb_trace_1rec_ret(tdb, op, rec, ret)
  375. #define tdb_trace_1rec_retrec(tdb, op, rec, ret)
  376. #define tdb_trace_2rec_flag_ret(tdb, op, rec1, rec2, flag, ret)
  377. #define tdb_trace_2rec_retrec(tdb, op, rec1, rec2, ret)
  378. #endif /* !TDB_TRACE */
  379. #endif