layout.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /* TDB tools to create various canned database layouts. */
  2. #include "layout.h"
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <assert.h>
  6. #include <err.h>
  7. #include "logging.h"
  8. struct tdb_layout *new_tdb_layout(const char *filename)
  9. {
  10. struct tdb_layout *layout = malloc(sizeof(*layout));
  11. layout->filename = filename;
  12. layout->num_elems = 0;
  13. layout->elem = NULL;
  14. return layout;
  15. }
  16. static void add(struct tdb_layout *layout, union tdb_layout_elem elem)
  17. {
  18. layout->elem = realloc(layout->elem,
  19. sizeof(layout->elem[0])
  20. * (layout->num_elems+1));
  21. layout->elem[layout->num_elems++] = elem;
  22. }
  23. void tdb_layout_add_freetable(struct tdb_layout *layout)
  24. {
  25. union tdb_layout_elem elem;
  26. elem.base.type = FREETABLE;
  27. add(layout, elem);
  28. }
  29. void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len,
  30. unsigned ftable)
  31. {
  32. union tdb_layout_elem elem;
  33. elem.base.type = FREE;
  34. elem.free.len = len;
  35. elem.free.ftable_num = ftable;
  36. add(layout, elem);
  37. }
  38. static struct tdb_data dup_key(struct tdb_data key)
  39. {
  40. struct tdb_data ret;
  41. ret.dsize = key.dsize;
  42. ret.dptr = malloc(ret.dsize);
  43. memcpy(ret.dptr, key.dptr, ret.dsize);
  44. return ret;
  45. }
  46. void tdb_layout_add_used(struct tdb_layout *layout,
  47. TDB_DATA key, TDB_DATA data,
  48. tdb_len_t extra)
  49. {
  50. union tdb_layout_elem elem;
  51. elem.base.type = DATA;
  52. elem.used.key = dup_key(key);
  53. elem.used.data = dup_key(data);
  54. elem.used.extra = extra;
  55. add(layout, elem);
  56. }
  57. static tdb_len_t free_record_len(tdb_len_t len)
  58. {
  59. return sizeof(struct tdb_used_record) + len;
  60. }
  61. static tdb_len_t data_record_len(struct tle_used *used)
  62. {
  63. tdb_len_t len;
  64. len = sizeof(struct tdb_used_record)
  65. + used->key.dsize + used->data.dsize + used->extra;
  66. assert(len >= sizeof(struct tdb_free_record));
  67. return len;
  68. }
  69. static tdb_len_t hashtable_len(struct tle_hashtable *htable)
  70. {
  71. return sizeof(struct tdb_used_record)
  72. + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS)
  73. + htable->extra;
  74. }
  75. static tdb_len_t freetable_len(struct tle_freetable *ftable)
  76. {
  77. return sizeof(struct tdb_freetable);
  78. }
  79. static void set_free_record(void *mem, tdb_len_t len)
  80. {
  81. /* We do all the work in add_to_freetable */
  82. }
  83. static void set_data_record(void *mem, struct tdb_context *tdb,
  84. struct tle_used *used)
  85. {
  86. struct tdb_used_record *u = mem;
  87. set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize,
  88. used->key.dsize + used->data.dsize + used->extra,
  89. tdb_hash(tdb, used->key.dptr, used->key.dsize));
  90. memcpy(u + 1, used->key.dptr, used->key.dsize);
  91. memcpy((char *)(u + 1) + used->key.dsize,
  92. used->data.dptr, used->data.dsize);
  93. }
  94. static void set_hashtable(void *mem, struct tdb_context *tdb,
  95. struct tle_hashtable *htable)
  96. {
  97. struct tdb_used_record *u = mem;
  98. tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS;
  99. set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0);
  100. memset(u + 1, 0, len);
  101. }
  102. static void set_freetable(void *mem, struct tdb_context *tdb,
  103. struct tle_freetable *freetable, struct tdb_header *hdr,
  104. tdb_off_t last_ftable)
  105. {
  106. struct tdb_freetable *ftable = mem;
  107. memset(ftable, 0, sizeof(*ftable));
  108. set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0,
  109. sizeof(*ftable) - sizeof(ftable->hdr),
  110. sizeof(*ftable) - sizeof(ftable->hdr), 0);
  111. if (last_ftable) {
  112. ftable = (struct tdb_freetable *)((char *)hdr + last_ftable);
  113. ftable->next = freetable->base.off;
  114. } else {
  115. hdr->free_table = freetable->base.off;
  116. }
  117. }
  118. static void add_to_freetable(struct tdb_context *tdb,
  119. tdb_off_t eoff,
  120. tdb_off_t elen,
  121. unsigned ftable,
  122. struct tle_freetable *freetable)
  123. {
  124. tdb->ftable_off = freetable->base.off;
  125. tdb->ftable = ftable;
  126. add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen);
  127. }
  128. static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup)
  129. {
  130. return group_start
  131. + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t);
  132. }
  133. /* Get bits from a value. */
  134. static uint32_t bits(uint64_t val, unsigned start, unsigned num)
  135. {
  136. assert(num <= 32);
  137. return (val >> start) & ((1U << num) - 1);
  138. }
  139. /* We take bits from the top: that way we can lock whole sections of the hash
  140. * by using lock ranges. */
  141. static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used)
  142. {
  143. *used += num;
  144. return bits(h, 64 - *used, num);
  145. }
  146. static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket,
  147. uint64_t h)
  148. {
  149. return bucket
  150. | new_off
  151. | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA,
  152. TDB_OFF_UPPER_STEAL_EXTRA)
  153. << TDB_OFF_HASH_EXTRA_BIT);
  154. }
  155. /* FIXME: Our hash table handling here is primitive: we don't expand! */
  156. static void add_to_hashtable(struct tdb_context *tdb,
  157. tdb_off_t eoff,
  158. struct tdb_data key)
  159. {
  160. uint64_t h = tdb_hash(tdb, key.dptr, key.dsize);
  161. tdb_off_t b_off, group_start;
  162. unsigned i, group, in_group;
  163. unsigned used = 0;
  164. group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used);
  165. in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used);
  166. group_start = offsetof(struct tdb_header, hashtable)
  167. + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS);
  168. for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) {
  169. unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS);
  170. b_off = hbucket_off(group_start, bucket);
  171. if (tdb_read_off(tdb, b_off) == 0) {
  172. tdb_write_off(tdb, b_off,
  173. encode_offset(eoff, bucket, h));
  174. return;
  175. }
  176. }
  177. abort();
  178. }
  179. static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num)
  180. {
  181. unsigned i;
  182. for (i = 0; i < layout->num_elems; i++) {
  183. if (layout->elem[i].base.type != FREETABLE)
  184. continue;
  185. if (num == 0)
  186. return &layout->elem[i].ftable;
  187. num--;
  188. }
  189. abort();
  190. }
  191. /* FIXME: Support TDB_CONVERT */
  192. struct tdb_context *tdb_layout_get(struct tdb_layout *layout)
  193. {
  194. unsigned int i;
  195. tdb_off_t off, len, last_ftable;
  196. char *mem;
  197. struct tdb_context *tdb;
  198. off = sizeof(struct tdb_header);
  199. /* First pass of layout: calc lengths */
  200. for (i = 0; i < layout->num_elems; i++) {
  201. union tdb_layout_elem *e = &layout->elem[i];
  202. e->base.off = off;
  203. switch (e->base.type) {
  204. case FREETABLE:
  205. len = freetable_len(&e->ftable);
  206. break;
  207. case FREE:
  208. len = free_record_len(e->free.len);
  209. break;
  210. case DATA:
  211. len = data_record_len(&e->used);
  212. break;
  213. case HASHTABLE:
  214. len = hashtable_len(&e->hashtable);
  215. break;
  216. default:
  217. abort();
  218. }
  219. off += len;
  220. }
  221. mem = malloc(off);
  222. /* Now populate our header, cribbing from a real TDB header. */
  223. tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr);
  224. memcpy(mem, tdb->map_ptr, sizeof(struct tdb_header));
  225. /* Mug the tdb we have to make it use this. */
  226. free(tdb->map_ptr);
  227. tdb->map_ptr = mem;
  228. tdb->map_size = off;
  229. last_ftable = 0;
  230. for (i = 0; i < layout->num_elems; i++) {
  231. union tdb_layout_elem *e = &layout->elem[i];
  232. switch (e->base.type) {
  233. case FREETABLE:
  234. set_freetable(mem + e->base.off, tdb, &e->ftable,
  235. (struct tdb_header *)mem, last_ftable);
  236. last_ftable = e->base.off;
  237. break;
  238. case FREE:
  239. set_free_record(mem + e->base.off, e->free.len);
  240. break;
  241. case DATA:
  242. set_data_record(mem + e->base.off, tdb, &e->used);
  243. break;
  244. case HASHTABLE:
  245. set_hashtable(mem + e->base.off, tdb, &e->hashtable);
  246. break;
  247. }
  248. }
  249. /* Must have a free table! */
  250. assert(last_ftable);
  251. /* Now fill the free and hash tables. */
  252. for (i = 0; i < layout->num_elems; i++) {
  253. union tdb_layout_elem *e = &layout->elem[i];
  254. switch (e->base.type) {
  255. case FREE:
  256. add_to_freetable(tdb, e->base.off, e->free.len,
  257. e->free.ftable_num,
  258. find_ftable(layout, e->free.ftable_num));
  259. break;
  260. case DATA:
  261. add_to_hashtable(tdb, e->base.off, e->used.key);
  262. break;
  263. default:
  264. break;
  265. }
  266. }
  267. tdb->ftable_off = find_ftable(layout, 0)->base.off;
  268. /* Get physical if they asked for it. */
  269. if (layout->filename) {
  270. int fd = open(layout->filename, O_WRONLY|O_TRUNC|O_CREAT,
  271. 0600);
  272. if (fd < 0)
  273. err(1, "opening %s for writing", layout->filename);
  274. if (write(fd, tdb->map_ptr, tdb->map_size) != tdb->map_size)
  275. err(1, "writing %s", layout->filename);
  276. close(fd);
  277. tdb_close(tdb);
  278. /* NOMMAP is for lockcheck. */
  279. tdb = tdb_open(layout->filename, TDB_NOMMAP, O_RDWR, 0,
  280. &tap_log_attr);
  281. }
  282. return tdb;
  283. }
  284. void tdb_layout_free(struct tdb_layout *layout)
  285. {
  286. unsigned int i;
  287. for (i = 0; i < layout->num_elems; i++) {
  288. if (layout->elem[i].base.type == DATA) {
  289. free(layout->elem[i].used.key.dptr);
  290. free(layout->elem[i].used.data.dptr);
  291. }
  292. }
  293. free(layout->elem);
  294. free(layout);
  295. }