open.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911
  1. /*
  2. Trivial Database 2: opening and closing TDBs
  3. Copyright (C) Rusty Russell 2010
  4. This library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 3 of the License, or (at your option) any later version.
  8. This library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with this library; if not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include "private.h"
  16. #include <ccan/build_assert/build_assert.h>
  17. /* all tdbs, to detect double-opens (fcntl file don't nest!) */
  18. static struct ntdb_context *tdbs = NULL;
  19. static struct ntdb_file *find_file(dev_t device, ino_t ino)
  20. {
  21. struct ntdb_context *i;
  22. for (i = tdbs; i; i = i->next) {
  23. if (i->file->device == device && i->file->inode == ino) {
  24. i->file->refcnt++;
  25. return i->file;
  26. }
  27. }
  28. return NULL;
  29. }
  30. static bool read_all(int fd, void *buf, size_t len)
  31. {
  32. while (len) {
  33. ssize_t ret;
  34. ret = read(fd, buf, len);
  35. if (ret < 0)
  36. return false;
  37. if (ret == 0) {
  38. /* ETOOSHORT? */
  39. errno = EWOULDBLOCK;
  40. return false;
  41. }
  42. buf = (char *)buf + ret;
  43. len -= ret;
  44. }
  45. return true;
  46. }
  47. static uint32_t random_number(struct ntdb_context *ntdb)
  48. {
  49. int fd;
  50. uint32_t ret = 0;
  51. struct timeval now;
  52. fd = open("/dev/urandom", O_RDONLY);
  53. if (fd >= 0) {
  54. if (read_all(fd, &ret, sizeof(ret))) {
  55. close(fd);
  56. return ret;
  57. }
  58. close(fd);
  59. }
  60. /* FIXME: Untested! Based on Wikipedia protocol description! */
  61. fd = open("/dev/egd-pool", O_RDWR);
  62. if (fd >= 0) {
  63. /* Command is 1, next byte is size we want to read. */
  64. char cmd[2] = { 1, sizeof(uint32_t) };
  65. if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) {
  66. char reply[1 + sizeof(uint32_t)];
  67. int r = read(fd, reply, sizeof(reply));
  68. if (r > 1) {
  69. /* Copy at least some bytes. */
  70. memcpy(&ret, reply+1, r - 1);
  71. if (reply[0] == sizeof(uint32_t)
  72. && r == sizeof(reply)) {
  73. close(fd);
  74. return ret;
  75. }
  76. }
  77. }
  78. close(fd);
  79. }
  80. /* Fallback: pid and time. */
  81. gettimeofday(&now, NULL);
  82. ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec;
  83. ntdb_logerr(ntdb, NTDB_SUCCESS, NTDB_LOG_WARNING,
  84. "ntdb_open: random from getpid and time");
  85. return ret;
  86. }
  87. static void ntdb_context_init(struct ntdb_context *ntdb)
  88. {
  89. /* Initialize the NTDB fields here */
  90. ntdb_io_init(ntdb);
  91. ntdb->transaction = NULL;
  92. ntdb->access = NULL;
  93. }
  94. /* initialise a new database:
  95. *
  96. * struct ntdb_header;
  97. * struct {
  98. * struct ntdb_used_record hash_header;
  99. * ntdb_off_t hash_buckets[1 << ntdb->hash_bits];
  100. * } hash;
  101. * struct ntdb_freetable ftable;
  102. * struct {
  103. * struct ntdb_free_record free_header;
  104. * char forty_three[...];
  105. * } remainder;
  106. */
  107. #define NEW_DATABASE_HDR_SIZE(hbits) \
  108. (sizeof(struct ntdb_header) \
  109. + sizeof(struct ntdb_used_record) + (sizeof(ntdb_off_t) << hbits) \
  110. + sizeof(struct ntdb_freetable) \
  111. + sizeof(struct ntdb_free_record))
  112. static enum NTDB_ERROR ntdb_new_database(struct ntdb_context *ntdb,
  113. struct ntdb_attribute_seed *seed,
  114. struct ntdb_header *rhdr)
  115. {
  116. /* We make it up in memory, then write it out if not internal */
  117. struct ntdb_freetable *ftable;
  118. struct ntdb_used_record *htable;
  119. struct ntdb_header *hdr;
  120. struct ntdb_free_record *remainder;
  121. char *mem;
  122. unsigned int magic_len;
  123. ssize_t rlen;
  124. size_t dbsize, hashsize, hdrsize, remaindersize;
  125. enum NTDB_ERROR ecode;
  126. hashsize = sizeof(ntdb_off_t) << ntdb->hash_bits;
  127. /* Always make db a multiple of NTDB_PGSIZE */
  128. hdrsize = NEW_DATABASE_HDR_SIZE(ntdb->hash_bits);
  129. dbsize = (hdrsize + NTDB_PGSIZE-1) & ~(NTDB_PGSIZE-1);
  130. mem = ntdb->alloc_fn(ntdb, dbsize, ntdb->alloc_data);
  131. if (!mem) {
  132. return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
  133. "ntdb_new_database: failed to allocate");
  134. }
  135. hdr = (void *)mem;
  136. htable = (void *)(mem + sizeof(*hdr));
  137. ftable = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize);
  138. remainder = (void *)(mem + sizeof(*hdr) + sizeof(*htable) + hashsize
  139. + sizeof(*ftable));
  140. /* Fill in the header */
  141. hdr->version = NTDB_VERSION;
  142. if (seed)
  143. hdr->hash_seed = seed->seed;
  144. else
  145. hdr->hash_seed = random_number(ntdb);
  146. hdr->hash_test = NTDB_HASH_MAGIC;
  147. hdr->hash_test = ntdb->hash_fn(&hdr->hash_test,
  148. sizeof(hdr->hash_test),
  149. hdr->hash_seed,
  150. ntdb->hash_data);
  151. hdr->hash_bits = ntdb->hash_bits;
  152. hdr->recovery = 0;
  153. hdr->features_used = hdr->features_offered = NTDB_FEATURE_MASK;
  154. hdr->seqnum = 0;
  155. hdr->capabilities = 0;
  156. memset(hdr->reserved, 0, sizeof(hdr->reserved));
  157. /* Hash is all zero after header. */
  158. set_header(NULL, htable, NTDB_HTABLE_MAGIC, 0, hashsize, hashsize);
  159. memset(htable + 1, 0, hashsize);
  160. /* Free is empty. */
  161. hdr->free_table = (char *)ftable - (char *)hdr;
  162. memset(ftable, 0, sizeof(*ftable));
  163. ecode = set_header(NULL, &ftable->hdr, NTDB_FTABLE_MAGIC, 0,
  164. sizeof(*ftable) - sizeof(ftable->hdr),
  165. sizeof(*ftable) - sizeof(ftable->hdr));
  166. if (ecode != NTDB_SUCCESS) {
  167. goto out;
  168. }
  169. /* Rest of database is a free record, containing junk. */
  170. remaindersize = dbsize - hdrsize;
  171. remainder->ftable_and_len
  172. = (remaindersize + sizeof(*remainder)
  173. - sizeof(struct ntdb_used_record));
  174. remainder->next = 0;
  175. remainder->magic_and_prev
  176. = (NTDB_FREE_MAGIC << (64-NTDB_OFF_UPPER_STEAL))
  177. | ((char *)remainder - (char *)hdr);
  178. memset(remainder + 1, 0x43, remaindersize);
  179. /* Put in our single free entry. */
  180. ftable->buckets[size_to_bucket(remaindersize)] =
  181. (char *)remainder - (char *)hdr;
  182. /* Magic food */
  183. memset(hdr->magic_food, 0, sizeof(hdr->magic_food));
  184. strcpy(hdr->magic_food, NTDB_MAGIC_FOOD);
  185. /* This creates an endian-converted database, as if read from disk */
  186. magic_len = sizeof(hdr->magic_food);
  187. ntdb_convert(ntdb, (char *)hdr + magic_len, hdrsize - magic_len);
  188. /* Return copy of header. */
  189. *rhdr = *hdr;
  190. if (ntdb->flags & NTDB_INTERNAL) {
  191. ntdb->file->map_size = dbsize;
  192. ntdb->file->map_ptr = hdr;
  193. return NTDB_SUCCESS;
  194. }
  195. if (lseek(ntdb->file->fd, 0, SEEK_SET) == -1) {
  196. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  197. "ntdb_new_database:"
  198. " failed to seek: %s", strerror(errno));
  199. goto out;
  200. }
  201. if (ftruncate(ntdb->file->fd, 0) == -1) {
  202. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  203. "ntdb_new_database:"
  204. " failed to truncate: %s", strerror(errno));
  205. goto out;
  206. }
  207. rlen = write(ntdb->file->fd, hdr, dbsize);
  208. if (rlen != dbsize) {
  209. if (rlen >= 0)
  210. errno = ENOSPC;
  211. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  212. "ntdb_new_database: %zi writing header: %s",
  213. rlen, strerror(errno));
  214. goto out;
  215. }
  216. out:
  217. ntdb->free_fn(hdr, ntdb->alloc_data);
  218. return ecode;
  219. }
  220. static enum NTDB_ERROR ntdb_new_file(struct ntdb_context *ntdb)
  221. {
  222. ntdb->file = ntdb->alloc_fn(NULL, sizeof(*ntdb->file), ntdb->alloc_data);
  223. if (!ntdb->file)
  224. return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
  225. "ntdb_open: cannot alloc ntdb_file structure");
  226. ntdb->file->num_lockrecs = 0;
  227. ntdb->file->lockrecs = NULL;
  228. ntdb->file->allrecord_lock.count = 0;
  229. ntdb->file->refcnt = 1;
  230. ntdb->file->map_ptr = NULL;
  231. ntdb->file->direct_count = 0;
  232. ntdb->file->old_mmaps = NULL;
  233. return NTDB_SUCCESS;
  234. }
  235. _PUBLIC_ enum NTDB_ERROR ntdb_set_attribute(struct ntdb_context *ntdb,
  236. const union ntdb_attribute *attr)
  237. {
  238. switch (attr->base.attr) {
  239. case NTDB_ATTRIBUTE_LOG:
  240. ntdb->log_fn = attr->log.fn;
  241. ntdb->log_data = attr->log.data;
  242. break;
  243. case NTDB_ATTRIBUTE_HASH:
  244. case NTDB_ATTRIBUTE_SEED:
  245. case NTDB_ATTRIBUTE_OPENHOOK:
  246. case NTDB_ATTRIBUTE_HASHSIZE:
  247. return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  248. NTDB_LOG_USE_ERROR,
  249. "ntdb_set_attribute:"
  250. " cannot set %s after opening",
  251. attr->base.attr == NTDB_ATTRIBUTE_HASH
  252. ? "NTDB_ATTRIBUTE_HASH"
  253. : attr->base.attr == NTDB_ATTRIBUTE_SEED
  254. ? "NTDB_ATTRIBUTE_SEED"
  255. : attr->base.attr == NTDB_ATTRIBUTE_OPENHOOK
  256. ? "NTDB_ATTRIBUTE_OPENHOOK"
  257. : "NTDB_ATTRIBUTE_HASHSIZE");
  258. case NTDB_ATTRIBUTE_STATS:
  259. return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  260. NTDB_LOG_USE_ERROR,
  261. "ntdb_set_attribute:"
  262. " cannot set NTDB_ATTRIBUTE_STATS");
  263. case NTDB_ATTRIBUTE_FLOCK:
  264. ntdb->lock_fn = attr->flock.lock;
  265. ntdb->unlock_fn = attr->flock.unlock;
  266. ntdb->lock_data = attr->flock.data;
  267. break;
  268. case NTDB_ATTRIBUTE_ALLOCATOR:
  269. ntdb->alloc_fn = attr->alloc.alloc;
  270. ntdb->expand_fn = attr->alloc.expand;
  271. ntdb->free_fn = attr->alloc.free;
  272. ntdb->alloc_data = attr->alloc.priv_data;
  273. break;
  274. default:
  275. return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  276. NTDB_LOG_USE_ERROR,
  277. "ntdb_set_attribute:"
  278. " unknown attribute type %u",
  279. attr->base.attr);
  280. }
  281. return NTDB_SUCCESS;
  282. }
  283. _PUBLIC_ enum NTDB_ERROR ntdb_get_attribute(struct ntdb_context *ntdb,
  284. union ntdb_attribute *attr)
  285. {
  286. switch (attr->base.attr) {
  287. case NTDB_ATTRIBUTE_LOG:
  288. if (!ntdb->log_fn)
  289. return NTDB_ERR_NOEXIST;
  290. attr->log.fn = ntdb->log_fn;
  291. attr->log.data = ntdb->log_data;
  292. break;
  293. case NTDB_ATTRIBUTE_HASH:
  294. attr->hash.fn = ntdb->hash_fn;
  295. attr->hash.data = ntdb->hash_data;
  296. break;
  297. case NTDB_ATTRIBUTE_SEED:
  298. attr->seed.seed = ntdb->hash_seed;
  299. break;
  300. case NTDB_ATTRIBUTE_OPENHOOK:
  301. if (!ntdb->openhook)
  302. return NTDB_ERR_NOEXIST;
  303. attr->openhook.fn = ntdb->openhook;
  304. attr->openhook.data = ntdb->openhook_data;
  305. break;
  306. case NTDB_ATTRIBUTE_STATS: {
  307. size_t size = attr->stats.size;
  308. if (size > ntdb->stats.size)
  309. size = ntdb->stats.size;
  310. memcpy(&attr->stats, &ntdb->stats, size);
  311. break;
  312. }
  313. case NTDB_ATTRIBUTE_FLOCK:
  314. attr->flock.lock = ntdb->lock_fn;
  315. attr->flock.unlock = ntdb->unlock_fn;
  316. attr->flock.data = ntdb->lock_data;
  317. break;
  318. case NTDB_ATTRIBUTE_ALLOCATOR:
  319. attr->alloc.alloc = ntdb->alloc_fn;
  320. attr->alloc.expand = ntdb->expand_fn;
  321. attr->alloc.free = ntdb->free_fn;
  322. attr->alloc.priv_data = ntdb->alloc_data;
  323. break;
  324. case NTDB_ATTRIBUTE_HASHSIZE:
  325. attr->hashsize.size = 1 << ntdb->hash_bits;
  326. break;
  327. default:
  328. return ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  329. NTDB_LOG_USE_ERROR,
  330. "ntdb_get_attribute:"
  331. " unknown attribute type %u",
  332. attr->base.attr);
  333. }
  334. attr->base.next = NULL;
  335. return NTDB_SUCCESS;
  336. }
  337. _PUBLIC_ void ntdb_unset_attribute(struct ntdb_context *ntdb,
  338. enum ntdb_attribute_type type)
  339. {
  340. switch (type) {
  341. case NTDB_ATTRIBUTE_LOG:
  342. ntdb->log_fn = NULL;
  343. break;
  344. case NTDB_ATTRIBUTE_OPENHOOK:
  345. ntdb->openhook = NULL;
  346. break;
  347. case NTDB_ATTRIBUTE_HASH:
  348. case NTDB_ATTRIBUTE_SEED:
  349. ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
  350. "ntdb_unset_attribute: cannot unset %s after opening",
  351. type == NTDB_ATTRIBUTE_HASH
  352. ? "NTDB_ATTRIBUTE_HASH"
  353. : "NTDB_ATTRIBUTE_SEED");
  354. break;
  355. case NTDB_ATTRIBUTE_STATS:
  356. ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  357. NTDB_LOG_USE_ERROR,
  358. "ntdb_unset_attribute:"
  359. "cannot unset NTDB_ATTRIBUTE_STATS");
  360. break;
  361. case NTDB_ATTRIBUTE_FLOCK:
  362. ntdb->lock_fn = ntdb_fcntl_lock;
  363. ntdb->unlock_fn = ntdb_fcntl_unlock;
  364. break;
  365. default:
  366. ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  367. NTDB_LOG_USE_ERROR,
  368. "ntdb_unset_attribute: unknown attribute type %u",
  369. type);
  370. }
  371. }
  372. /* The top three bits of the capability tell us whether it matters. */
  373. enum NTDB_ERROR unknown_capability(struct ntdb_context *ntdb, const char *caller,
  374. ntdb_off_t type)
  375. {
  376. if (type & NTDB_CAP_NOOPEN) {
  377. return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  378. "%s: file has unknown capability %llu",
  379. caller, type & NTDB_CAP_NOOPEN);
  380. }
  381. if ((type & NTDB_CAP_NOWRITE) && !(ntdb->flags & NTDB_RDONLY)) {
  382. return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_ERROR,
  383. "%s: file has unknown capability %llu"
  384. " (cannot write to it)",
  385. caller, type & NTDB_CAP_NOOPEN);
  386. }
  387. if (type & NTDB_CAP_NOCHECK) {
  388. ntdb->flags |= NTDB_CANT_CHECK;
  389. }
  390. return NTDB_SUCCESS;
  391. }
  392. static enum NTDB_ERROR capabilities_ok(struct ntdb_context *ntdb,
  393. ntdb_off_t capabilities)
  394. {
  395. ntdb_off_t off, next;
  396. enum NTDB_ERROR ecode = NTDB_SUCCESS;
  397. const struct ntdb_capability *cap;
  398. /* Check capability list. */
  399. for (off = capabilities; off && ecode == NTDB_SUCCESS; off = next) {
  400. cap = ntdb_access_read(ntdb, off, sizeof(*cap), true);
  401. if (NTDB_PTR_IS_ERR(cap)) {
  402. return NTDB_PTR_ERR(cap);
  403. }
  404. switch (cap->type & NTDB_CAP_TYPE_MASK) {
  405. /* We don't understand any capabilities (yet). */
  406. default:
  407. ecode = unknown_capability(ntdb, "ntdb_open", cap->type);
  408. }
  409. next = cap->next;
  410. ntdb_access_release(ntdb, cap);
  411. }
  412. return ecode;
  413. }
  414. static void *default_alloc(const void *owner, size_t len, void *priv_data)
  415. {
  416. return malloc(len);
  417. }
  418. static void *default_expand(void *ptr, size_t len, void *priv_data)
  419. {
  420. return realloc(ptr, len);
  421. }
  422. static void default_free(void *ptr, void *priv_data)
  423. {
  424. free(ptr);
  425. }
  426. /* First allocation needs manual search of attributes. */
  427. static struct ntdb_context *alloc_ntdb(const union ntdb_attribute *attr,
  428. const char *name)
  429. {
  430. size_t len = sizeof(struct ntdb_context) + strlen(name) + 1;
  431. while (attr) {
  432. if (attr->base.attr == NTDB_ATTRIBUTE_ALLOCATOR) {
  433. return attr->alloc.alloc(NULL, len,
  434. attr->alloc.priv_data);
  435. }
  436. attr = attr->base.next;
  437. }
  438. return default_alloc(NULL, len, NULL);
  439. }
  440. static unsigned int next_pow2(uint64_t size)
  441. {
  442. unsigned int bits = 1;
  443. while ((1ULL << bits) < size)
  444. bits++;
  445. return bits;
  446. }
  447. _PUBLIC_ struct ntdb_context *ntdb_open(const char *name, int ntdb_flags,
  448. int open_flags, mode_t mode,
  449. union ntdb_attribute *attr)
  450. {
  451. struct ntdb_context *ntdb;
  452. struct stat st;
  453. int saved_errno = 0;
  454. uint64_t hash_test;
  455. unsigned v;
  456. ssize_t rlen;
  457. struct ntdb_header hdr;
  458. struct ntdb_attribute_seed *seed = NULL;
  459. ntdb_bool_err berr;
  460. enum NTDB_ERROR ecode;
  461. int openlock;
  462. ntdb = alloc_ntdb(attr, name);
  463. if (!ntdb) {
  464. /* Can't log this */
  465. errno = ENOMEM;
  466. return NULL;
  467. }
  468. /* Set name immediately for logging functions. */
  469. ntdb->name = strcpy((char *)(ntdb + 1), name);
  470. ntdb->flags = ntdb_flags;
  471. ntdb->log_fn = NULL;
  472. ntdb->open_flags = open_flags;
  473. ntdb->file = NULL;
  474. ntdb->openhook = NULL;
  475. ntdb->lock_fn = ntdb_fcntl_lock;
  476. ntdb->unlock_fn = ntdb_fcntl_unlock;
  477. ntdb->hash_fn = ntdb_jenkins_hash;
  478. memset(&ntdb->stats, 0, sizeof(ntdb->stats));
  479. ntdb->stats.base.attr = NTDB_ATTRIBUTE_STATS;
  480. ntdb->stats.size = sizeof(ntdb->stats);
  481. ntdb->alloc_fn = default_alloc;
  482. ntdb->expand_fn = default_expand;
  483. ntdb->free_fn = default_free;
  484. ntdb->hash_bits = NTDB_DEFAULT_HBITS; /* 64k of hash by default. */
  485. while (attr) {
  486. switch (attr->base.attr) {
  487. case NTDB_ATTRIBUTE_HASH:
  488. ntdb->hash_fn = attr->hash.fn;
  489. ntdb->hash_data = attr->hash.data;
  490. break;
  491. case NTDB_ATTRIBUTE_SEED:
  492. seed = &attr->seed;
  493. break;
  494. case NTDB_ATTRIBUTE_OPENHOOK:
  495. ntdb->openhook = attr->openhook.fn;
  496. ntdb->openhook_data = attr->openhook.data;
  497. break;
  498. case NTDB_ATTRIBUTE_HASHSIZE:
  499. ntdb->hash_bits = next_pow2(attr->hashsize.size);
  500. if (ntdb->hash_bits > 31) {
  501. ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  502. NTDB_LOG_USE_ERROR,
  503. "ntdb_open: hash_size %u"
  504. " too large",
  505. attr->hashsize.size);
  506. goto fail;
  507. }
  508. break;
  509. default:
  510. /* These are set as normal. */
  511. ecode = ntdb_set_attribute(ntdb, attr);
  512. if (ecode != NTDB_SUCCESS)
  513. goto fail;
  514. }
  515. attr = attr->base.next;
  516. }
  517. if (ntdb_flags & ~(NTDB_INTERNAL | NTDB_NOLOCK | NTDB_NOMMAP | NTDB_CONVERT
  518. | NTDB_NOSYNC | NTDB_SEQNUM | NTDB_ALLOW_NESTING
  519. | NTDB_RDONLY)) {
  520. ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
  521. "ntdb_open: unknown flags %u", ntdb_flags);
  522. goto fail;
  523. }
  524. if (seed) {
  525. if (!(ntdb_flags & NTDB_INTERNAL) && !(open_flags & O_CREAT)) {
  526. ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  527. NTDB_LOG_USE_ERROR,
  528. "ntdb_open:"
  529. " cannot set NTDB_ATTRIBUTE_SEED"
  530. " without O_CREAT.");
  531. goto fail;
  532. }
  533. }
  534. if ((open_flags & O_ACCMODE) == O_WRONLY) {
  535. ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL, NTDB_LOG_USE_ERROR,
  536. "ntdb_open: can't open ntdb %s write-only",
  537. name);
  538. goto fail;
  539. }
  540. if ((open_flags & O_ACCMODE) == O_RDONLY) {
  541. openlock = F_RDLCK;
  542. ntdb->flags |= NTDB_RDONLY;
  543. } else {
  544. if (ntdb_flags & NTDB_RDONLY) {
  545. ecode = ntdb_logerr(ntdb, NTDB_ERR_EINVAL,
  546. NTDB_LOG_USE_ERROR,
  547. "ntdb_open: can't use NTDB_RDONLY"
  548. " without O_RDONLY");
  549. goto fail;
  550. }
  551. openlock = F_WRLCK;
  552. }
  553. /* internal databases don't need any of the rest. */
  554. if (ntdb->flags & NTDB_INTERNAL) {
  555. ntdb->flags |= (NTDB_NOLOCK | NTDB_NOMMAP);
  556. ecode = ntdb_new_file(ntdb);
  557. if (ecode != NTDB_SUCCESS) {
  558. goto fail;
  559. }
  560. ntdb->file->fd = -1;
  561. ecode = ntdb_new_database(ntdb, seed, &hdr);
  562. if (ecode == NTDB_SUCCESS) {
  563. ntdb_convert(ntdb, &hdr.hash_seed,
  564. sizeof(hdr.hash_seed));
  565. ntdb->hash_seed = hdr.hash_seed;
  566. ntdb_context_init(ntdb);
  567. ntdb_ftable_init(ntdb);
  568. }
  569. if (ecode != NTDB_SUCCESS) {
  570. goto fail;
  571. }
  572. return ntdb;
  573. }
  574. if (stat(name, &st) != -1)
  575. ntdb->file = find_file(st.st_dev, st.st_ino);
  576. if (!ntdb->file) {
  577. ecode = ntdb_new_file(ntdb);
  578. if (ecode != NTDB_SUCCESS) {
  579. goto fail;
  580. }
  581. /* Set this now, as ntdb_nest_lock examines it. */
  582. ntdb->file->map_size = 0;
  583. if ((ntdb->file->fd = open(name, open_flags, mode)) == -1) {
  584. enum ntdb_log_level lvl;
  585. /* errno set by open(2) */
  586. saved_errno = errno;
  587. /* Probing for files like this is a common pattern. */
  588. if (!(open_flags & O_CREAT) && errno == ENOENT) {
  589. lvl = NTDB_LOG_WARNING;
  590. } else {
  591. lvl = NTDB_LOG_ERROR;
  592. }
  593. ntdb_logerr(ntdb, NTDB_ERR_IO, lvl,
  594. "ntdb_open: could not open file %s: %s",
  595. name, strerror(errno));
  596. goto fail_errno;
  597. }
  598. /* ensure there is only one process initialising at once:
  599. * do it immediately to reduce the create/openlock race. */
  600. ecode = ntdb_lock_open(ntdb, openlock,
  601. NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
  602. if (ecode != NTDB_SUCCESS) {
  603. saved_errno = errno;
  604. goto fail_errno;
  605. }
  606. /* on exec, don't inherit the fd */
  607. v = fcntl(ntdb->file->fd, F_GETFD, 0);
  608. fcntl(ntdb->file->fd, F_SETFD, v | FD_CLOEXEC);
  609. if (fstat(ntdb->file->fd, &st) == -1) {
  610. saved_errno = errno;
  611. ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  612. "ntdb_open: could not stat open %s: %s",
  613. name, strerror(errno));
  614. goto fail_errno;
  615. }
  616. ntdb->file->device = st.st_dev;
  617. ntdb->file->inode = st.st_ino;
  618. /* call their open hook if they gave us one. */
  619. if (ntdb->openhook) {
  620. ecode = ntdb->openhook(ntdb->file->fd, ntdb->openhook_data);
  621. if (ecode != NTDB_SUCCESS) {
  622. ntdb_logerr(ntdb, ecode, NTDB_LOG_ERROR,
  623. "ntdb_open: open hook failed");
  624. goto fail;
  625. }
  626. open_flags |= O_CREAT;
  627. }
  628. } else {
  629. /* ensure there is only one process initialising at once */
  630. ecode = ntdb_lock_open(ntdb, openlock,
  631. NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
  632. if (ecode != NTDB_SUCCESS) {
  633. saved_errno = errno;
  634. goto fail_errno;
  635. }
  636. }
  637. /* If they used O_TRUNC, read will return 0. */
  638. rlen = pread(ntdb->file->fd, &hdr, sizeof(hdr), 0);
  639. if (rlen == 0 && (open_flags & O_CREAT)) {
  640. ecode = ntdb_new_database(ntdb, seed, &hdr);
  641. if (ecode != NTDB_SUCCESS) {
  642. goto fail;
  643. }
  644. } else if (rlen < 0) {
  645. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  646. "ntdb_open: error %s reading %s",
  647. strerror(errno), name);
  648. goto fail;
  649. } else if (rlen < sizeof(hdr)
  650. || strcmp(hdr.magic_food, NTDB_MAGIC_FOOD) != 0) {
  651. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  652. "ntdb_open: %s is not a ntdb file", name);
  653. goto fail;
  654. }
  655. if (hdr.version != NTDB_VERSION) {
  656. if (hdr.version == bswap_64(NTDB_VERSION))
  657. ntdb->flags |= NTDB_CONVERT;
  658. else {
  659. /* wrong version */
  660. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  661. "ntdb_open:"
  662. " %s is unknown version 0x%llx",
  663. name, (long long)hdr.version);
  664. goto fail;
  665. }
  666. } else if (ntdb->flags & NTDB_CONVERT) {
  667. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  668. "ntdb_open:"
  669. " %s does not need NTDB_CONVERT",
  670. name);
  671. goto fail;
  672. }
  673. ntdb_context_init(ntdb);
  674. ntdb_convert(ntdb, &hdr, sizeof(hdr));
  675. ntdb->hash_bits = hdr.hash_bits;
  676. ntdb->hash_seed = hdr.hash_seed;
  677. hash_test = NTDB_HASH_MAGIC;
  678. hash_test = ntdb_hash(ntdb, &hash_test, sizeof(hash_test));
  679. if (hdr.hash_test != hash_test) {
  680. /* wrong hash variant */
  681. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  682. "ntdb_open:"
  683. " %s uses a different hash function",
  684. name);
  685. goto fail;
  686. }
  687. ecode = capabilities_ok(ntdb, hdr.capabilities);
  688. if (ecode != NTDB_SUCCESS) {
  689. goto fail;
  690. }
  691. /* Clear any features we don't understand. */
  692. if ((open_flags & O_ACCMODE) != O_RDONLY) {
  693. hdr.features_used &= NTDB_FEATURE_MASK;
  694. ecode = ntdb_write_convert(ntdb, offsetof(struct ntdb_header,
  695. features_used),
  696. &hdr.features_used,
  697. sizeof(hdr.features_used));
  698. if (ecode != NTDB_SUCCESS)
  699. goto fail;
  700. }
  701. ntdb_unlock_open(ntdb, openlock);
  702. /* This makes sure we have current map_size and mmap. */
  703. ecode = ntdb_oob(ntdb, ntdb->file->map_size, 1, true);
  704. if (unlikely(ecode != NTDB_SUCCESS))
  705. goto fail;
  706. if (ntdb->file->map_size % NTDB_PGSIZE != 0) {
  707. ecode = ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  708. "ntdb_open:"
  709. " %s size %llu isn't a multiple of %u",
  710. name, (long long)ntdb->file->map_size,
  711. NTDB_PGSIZE);
  712. goto fail;
  713. }
  714. /* Now it's fully formed, recover if necessary. */
  715. berr = ntdb_needs_recovery(ntdb);
  716. if (unlikely(berr != false)) {
  717. if (berr < 0) {
  718. ecode = NTDB_OFF_TO_ERR(berr);
  719. goto fail;
  720. }
  721. ecode = ntdb_lock_and_recover(ntdb);
  722. if (ecode != NTDB_SUCCESS) {
  723. goto fail;
  724. }
  725. }
  726. ecode = ntdb_ftable_init(ntdb);
  727. if (ecode != NTDB_SUCCESS) {
  728. goto fail;
  729. }
  730. ntdb->next = tdbs;
  731. tdbs = ntdb;
  732. return ntdb;
  733. fail:
  734. /* Map ecode to some logical errno. */
  735. switch (NTDB_ERR_TO_OFF(ecode)) {
  736. case NTDB_ERR_TO_OFF(NTDB_ERR_CORRUPT):
  737. case NTDB_ERR_TO_OFF(NTDB_ERR_IO):
  738. saved_errno = EIO;
  739. break;
  740. case NTDB_ERR_TO_OFF(NTDB_ERR_LOCK):
  741. saved_errno = EWOULDBLOCK;
  742. break;
  743. case NTDB_ERR_TO_OFF(NTDB_ERR_OOM):
  744. saved_errno = ENOMEM;
  745. break;
  746. case NTDB_ERR_TO_OFF(NTDB_ERR_EINVAL):
  747. saved_errno = EINVAL;
  748. break;
  749. default:
  750. saved_errno = EINVAL;
  751. break;
  752. }
  753. fail_errno:
  754. #ifdef NTDB_TRACE
  755. close(ntdb->tracefd);
  756. #endif
  757. if (ntdb->file) {
  758. ntdb_lock_cleanup(ntdb);
  759. if (--ntdb->file->refcnt == 0) {
  760. assert(ntdb->file->num_lockrecs == 0);
  761. if (ntdb->file->map_ptr) {
  762. if (ntdb->flags & NTDB_INTERNAL) {
  763. ntdb->free_fn(ntdb->file->map_ptr,
  764. ntdb->alloc_data);
  765. } else
  766. ntdb_munmap(ntdb);
  767. }
  768. if (ntdb->file->fd != -1 && close(ntdb->file->fd) != 0)
  769. ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  770. "ntdb_open: failed to close ntdb fd"
  771. " on error: %s", strerror(errno));
  772. ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
  773. ntdb->free_fn(ntdb->file, ntdb->alloc_data);
  774. }
  775. }
  776. ntdb->free_fn(ntdb, ntdb->alloc_data);
  777. errno = saved_errno;
  778. return NULL;
  779. }
  780. _PUBLIC_ int ntdb_close(struct ntdb_context *ntdb)
  781. {
  782. int ret = 0;
  783. struct ntdb_context **i;
  784. ntdb_trace(ntdb, "ntdb_close");
  785. if (ntdb->transaction) {
  786. ntdb_transaction_cancel(ntdb);
  787. }
  788. ntdb_lock_cleanup(ntdb);
  789. if (--ntdb->file->refcnt == 0) {
  790. if (ntdb->file->map_ptr) {
  791. if (ntdb->flags & NTDB_INTERNAL) {
  792. ntdb->free_fn(ntdb->file->map_ptr,
  793. ntdb->alloc_data);
  794. } else {
  795. ntdb_munmap(ntdb);
  796. }
  797. }
  798. ret = close(ntdb->file->fd);
  799. ntdb->free_fn(ntdb->file->lockrecs, ntdb->alloc_data);
  800. ntdb->free_fn(ntdb->file, ntdb->alloc_data);
  801. }
  802. /* Remove from tdbs list */
  803. for (i = &tdbs; *i; i = &(*i)->next) {
  804. if (*i == ntdb) {
  805. *i = ntdb->next;
  806. break;
  807. }
  808. }
  809. #ifdef NTDB_TRACE
  810. close(ntdb->tracefd);
  811. #endif
  812. ntdb->free_fn(ntdb, ntdb->alloc_data);
  813. return ret;
  814. }
  815. _PUBLIC_ void ntdb_foreach_(int (*fn)(struct ntdb_context *, void *), void *p)
  816. {
  817. struct ntdb_context *i;
  818. for (i = tdbs; i; i = i->next) {
  819. if (fn(i, p) != 0)
  820. break;
  821. }
  822. }