lock.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. /*
  2. Unix SMB/CIFS implementation.
  3. trivial database library
  4. Copyright (C) Andrew Tridgell 1999-2005
  5. Copyright (C) Paul `Rusty' Russell 2000
  6. Copyright (C) Jeremy Allison 2000-2003
  7. ** NOTE! The following LGPL license applies to the ntdb
  8. ** library. This does NOT imply that all of Samba is released
  9. ** under the LGPL
  10. This library is free software; you can redistribute it and/or
  11. modify it under the terms of the GNU Lesser General Public
  12. License as published by the Free Software Foundation; either
  13. version 3 of the License, or (at your option) any later version.
  14. This library is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. Lesser General Public License for more details.
  18. You should have received a copy of the GNU Lesser General Public
  19. License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include "private.h"
  22. #include <ccan/build_assert/build_assert.h>
  23. /* If we were threaded, we could wait for unlock, but we're not, so fail. */
  24. enum NTDB_ERROR owner_conflict(struct ntdb_context *ntdb, const char *call)
  25. {
  26. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
  27. "%s: lock owned by another ntdb in this process.",
  28. call);
  29. }
  30. /* If we fork, we no longer really own locks. */
  31. bool check_lock_pid(struct ntdb_context *ntdb, const char *call, bool log)
  32. {
  33. /* No locks? No problem! */
  34. if (ntdb->file->allrecord_lock.count == 0
  35. && ntdb->file->num_lockrecs == 0) {
  36. return true;
  37. }
  38. /* No fork? No problem! */
  39. if (ntdb->file->locker == getpid()) {
  40. return true;
  41. }
  42. if (log) {
  43. ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
  44. "%s: fork() detected after lock acquisition!"
  45. " (%u vs %u)", call,
  46. (unsigned int)ntdb->file->locker,
  47. (unsigned int)getpid());
  48. }
  49. return false;
  50. }
  51. int ntdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag,
  52. void *unused)
  53. {
  54. struct flock fl;
  55. int ret;
  56. do {
  57. fl.l_type = rw;
  58. fl.l_whence = SEEK_SET;
  59. fl.l_start = off;
  60. fl.l_len = len;
  61. if (waitflag)
  62. ret = fcntl(fd, F_SETLKW, &fl);
  63. else
  64. ret = fcntl(fd, F_SETLK, &fl);
  65. } while (ret != 0 && errno == EINTR);
  66. return ret;
  67. }
  68. int ntdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused)
  69. {
  70. struct flock fl;
  71. int ret;
  72. do {
  73. fl.l_type = F_UNLCK;
  74. fl.l_whence = SEEK_SET;
  75. fl.l_start = off;
  76. fl.l_len = len;
  77. ret = fcntl(fd, F_SETLKW, &fl);
  78. } while (ret != 0 && errno == EINTR);
  79. return ret;
  80. }
  81. static int lock(struct ntdb_context *ntdb,
  82. int rw, off_t off, off_t len, bool waitflag)
  83. {
  84. int ret;
  85. if (ntdb->file->allrecord_lock.count == 0
  86. && ntdb->file->num_lockrecs == 0) {
  87. ntdb->file->locker = getpid();
  88. }
  89. ntdb->stats.lock_lowlevel++;
  90. ret = ntdb->lock_fn(ntdb->file->fd, rw, off, len, waitflag,
  91. ntdb->lock_data);
  92. if (!waitflag) {
  93. ntdb->stats.lock_nonblock++;
  94. if (ret != 0)
  95. ntdb->stats.lock_nonblock_fail++;
  96. }
  97. return ret;
  98. }
  99. static int unlock(struct ntdb_context *ntdb, int rw, off_t off, off_t len)
  100. {
  101. #if 0 /* Check they matched up locks and unlocks correctly. */
  102. char line[80];
  103. FILE *locks;
  104. bool found = false;
  105. locks = fopen("/proc/locks", "r");
  106. while (fgets(line, 80, locks)) {
  107. char *p;
  108. int type, start, l;
  109. /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */
  110. p = strchr(line, ':') + 1;
  111. if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY ")))
  112. continue;
  113. p += strlen(" FLOCK ADVISORY ");
  114. if (strncmp(p, "READ ", strlen("READ ")) == 0)
  115. type = F_RDLCK;
  116. else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
  117. type = F_WRLCK;
  118. else
  119. abort();
  120. p += 6;
  121. if (atoi(p) != getpid())
  122. continue;
  123. p = strchr(strchr(p, ' ') + 1, ' ') + 1;
  124. start = atoi(p);
  125. p = strchr(p, ' ') + 1;
  126. if (strncmp(p, "EOF", 3) == 0)
  127. l = 0;
  128. else
  129. l = atoi(p) - start + 1;
  130. if (off == start) {
  131. if (len != l) {
  132. fprintf(stderr, "Len %u should be %u: %s",
  133. (int)len, l, line);
  134. abort();
  135. }
  136. if (type != rw) {
  137. fprintf(stderr, "Type %s wrong: %s",
  138. rw == F_RDLCK ? "READ" : "WRITE", line);
  139. abort();
  140. }
  141. found = true;
  142. break;
  143. }
  144. }
  145. if (!found) {
  146. fprintf(stderr, "Unlock on %u@%u not found!",
  147. (int)off, (int)len);
  148. abort();
  149. }
  150. fclose(locks);
  151. #endif
  152. return ntdb->unlock_fn(ntdb->file->fd, rw, off, len, ntdb->lock_data);
  153. }
  154. /* a byte range locking function - return 0 on success
  155. this functions locks len bytes at the specified offset.
  156. note that a len of zero means lock to end of file
  157. */
  158. static enum NTDB_ERROR ntdb_brlock(struct ntdb_context *ntdb,
  159. int rw_type, ntdb_off_t offset, ntdb_off_t len,
  160. enum ntdb_lock_flags flags)
  161. {
  162. int ret;
  163. if (rw_type == F_WRLCK && (ntdb->flags & NTDB_RDONLY)) {
  164. return ntdb_logerr(ntdb, NTDB_ERR_RDONLY, NTDB_LOG_USE_ERROR,
  165. "Write lock attempted on read-only database");
  166. }
  167. if (ntdb->flags & NTDB_NOLOCK) {
  168. return NTDB_SUCCESS;
  169. }
  170. /* A 32 bit system cannot open a 64-bit file, but it could have
  171. * expanded since then: check here. */
  172. if ((size_t)(offset + len) != offset + len) {
  173. return ntdb_logerr(ntdb, NTDB_ERR_IO, NTDB_LOG_ERROR,
  174. "ntdb_brlock: lock on giant offset %llu",
  175. (long long)(offset + len));
  176. }
  177. ret = lock(ntdb, rw_type, offset, len, flags & NTDB_LOCK_WAIT);
  178. if (ret != 0) {
  179. /* Generic lock error. errno set by fcntl.
  180. * EAGAIN is an expected return from non-blocking
  181. * locks. */
  182. if (!(flags & NTDB_LOCK_PROBE)
  183. && (errno != EAGAIN && errno != EINTR)) {
  184. ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  185. "ntdb_brlock failed (fd=%d) at"
  186. " offset %zu rw_type=%d flags=%d len=%zu:"
  187. " %s",
  188. ntdb->file->fd, (size_t)offset, rw_type,
  189. flags, (size_t)len, strerror(errno));
  190. }
  191. return NTDB_ERR_LOCK;
  192. }
  193. return NTDB_SUCCESS;
  194. }
  195. static enum NTDB_ERROR ntdb_brunlock(struct ntdb_context *ntdb,
  196. int rw_type, ntdb_off_t offset, size_t len)
  197. {
  198. if (ntdb->flags & NTDB_NOLOCK) {
  199. return NTDB_SUCCESS;
  200. }
  201. if (!check_lock_pid(ntdb, "ntdb_brunlock", false))
  202. return NTDB_ERR_LOCK;
  203. if (unlock(ntdb, rw_type, offset, len) == -1) {
  204. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  205. "ntdb_brunlock failed (fd=%d) at offset %zu"
  206. " rw_type=%d len=%zu: %s",
  207. ntdb->file->fd, (size_t)offset, rw_type,
  208. (size_t)len, strerror(errno));
  209. }
  210. return NTDB_SUCCESS;
  211. }
  212. /*
  213. upgrade a read lock to a write lock. This needs to be handled in a
  214. special way as some OSes (such as solaris) have too conservative
  215. deadlock detection and claim a deadlock when progress can be
  216. made. For those OSes we may loop for a while.
  217. */
  218. enum NTDB_ERROR ntdb_allrecord_upgrade(struct ntdb_context *ntdb, off_t start)
  219. {
  220. int count = 1000;
  221. if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true))
  222. return NTDB_ERR_LOCK;
  223. if (ntdb->file->allrecord_lock.count != 1) {
  224. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  225. "ntdb_allrecord_upgrade failed:"
  226. " count %u too high",
  227. ntdb->file->allrecord_lock.count);
  228. }
  229. if (ntdb->file->allrecord_lock.off != 1) {
  230. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  231. "ntdb_allrecord_upgrade failed:"
  232. " already upgraded?");
  233. }
  234. if (ntdb->file->allrecord_lock.owner != ntdb) {
  235. return owner_conflict(ntdb, "ntdb_allrecord_upgrade");
  236. }
  237. while (count--) {
  238. struct timeval tv;
  239. if (ntdb_brlock(ntdb, F_WRLCK, start, 0,
  240. NTDB_LOCK_WAIT|NTDB_LOCK_PROBE) == NTDB_SUCCESS) {
  241. ntdb->file->allrecord_lock.ltype = F_WRLCK;
  242. ntdb->file->allrecord_lock.off = 0;
  243. return NTDB_SUCCESS;
  244. }
  245. if (errno != EDEADLK) {
  246. break;
  247. }
  248. /* sleep for as short a time as we can - more portable than usleep() */
  249. tv.tv_sec = 0;
  250. tv.tv_usec = 1;
  251. select(0, NULL, NULL, NULL, &tv);
  252. }
  253. if (errno != EAGAIN && errno != EINTR)
  254. ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  255. "ntdb_allrecord_upgrade failed");
  256. return NTDB_ERR_LOCK;
  257. }
  258. static struct ntdb_lock *find_nestlock(struct ntdb_context *ntdb, ntdb_off_t offset,
  259. const struct ntdb_context *owner)
  260. {
  261. unsigned int i;
  262. for (i=0; i<ntdb->file->num_lockrecs; i++) {
  263. if (ntdb->file->lockrecs[i].off == offset) {
  264. if (owner && ntdb->file->lockrecs[i].owner != owner)
  265. return NULL;
  266. return &ntdb->file->lockrecs[i];
  267. }
  268. }
  269. return NULL;
  270. }
  271. enum NTDB_ERROR ntdb_lock_and_recover(struct ntdb_context *ntdb)
  272. {
  273. enum NTDB_ERROR ecode;
  274. if (!check_lock_pid(ntdb, "ntdb_transaction_prepare_commit", true))
  275. return NTDB_ERR_LOCK;
  276. ecode = ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK,
  277. false);
  278. if (ecode != NTDB_SUCCESS) {
  279. return ecode;
  280. }
  281. ecode = ntdb_lock_open(ntdb, F_WRLCK, NTDB_LOCK_WAIT|NTDB_LOCK_NOCHECK);
  282. if (ecode != NTDB_SUCCESS) {
  283. ntdb_allrecord_unlock(ntdb, F_WRLCK);
  284. return ecode;
  285. }
  286. ecode = ntdb_transaction_recover(ntdb);
  287. ntdb_unlock_open(ntdb, F_WRLCK);
  288. ntdb_allrecord_unlock(ntdb, F_WRLCK);
  289. return ecode;
  290. }
  291. /* lock an offset in the database. */
  292. static enum NTDB_ERROR ntdb_nest_lock(struct ntdb_context *ntdb,
  293. ntdb_off_t offset, int ltype,
  294. enum ntdb_lock_flags flags)
  295. {
  296. struct ntdb_lock *new_lck;
  297. enum NTDB_ERROR ecode;
  298. assert(offset <= (NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)
  299. + ntdb->file->map_size / 8));
  300. if (ntdb->flags & NTDB_NOLOCK)
  301. return NTDB_SUCCESS;
  302. if (!check_lock_pid(ntdb, "ntdb_nest_lock", true)) {
  303. return NTDB_ERR_LOCK;
  304. }
  305. ntdb->stats.locks++;
  306. new_lck = find_nestlock(ntdb, offset, NULL);
  307. if (new_lck) {
  308. if (new_lck->owner != ntdb) {
  309. return owner_conflict(ntdb, "ntdb_nest_lock");
  310. }
  311. if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
  312. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  313. "ntdb_nest_lock:"
  314. " offset %zu has read lock",
  315. (size_t)offset);
  316. }
  317. /* Just increment the struct, posix locks don't stack. */
  318. new_lck->count++;
  319. return NTDB_SUCCESS;
  320. }
  321. #if 0
  322. if (ntdb->file->num_lockrecs
  323. && offset >= NTDB_HASH_LOCK_START
  324. && offset < NTDB_HASH_LOCK_START + NTDB_HASH_LOCK_RANGE) {
  325. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  326. "ntdb_nest_lock: already have a hash lock?");
  327. }
  328. #endif
  329. if (ntdb->file->lockrecs == NULL) {
  330. new_lck = ntdb->alloc_fn(ntdb->file, sizeof(*ntdb->file->lockrecs),
  331. ntdb->alloc_data);
  332. } else {
  333. new_lck = (struct ntdb_lock *)ntdb->expand_fn(
  334. ntdb->file->lockrecs,
  335. sizeof(*ntdb->file->lockrecs)
  336. * (ntdb->file->num_lockrecs+1),
  337. ntdb->alloc_data);
  338. }
  339. if (new_lck == NULL) {
  340. return ntdb_logerr(ntdb, NTDB_ERR_OOM, NTDB_LOG_ERROR,
  341. "ntdb_nest_lock:"
  342. " unable to allocate %zu lock struct",
  343. ntdb->file->num_lockrecs + 1);
  344. }
  345. ntdb->file->lockrecs = new_lck;
  346. /* Since fcntl locks don't nest, we do a lock for the first one,
  347. and simply bump the count for future ones */
  348. ecode = ntdb_brlock(ntdb, ltype, offset, 1, flags);
  349. if (ecode != NTDB_SUCCESS) {
  350. return ecode;
  351. }
  352. /* First time we grab a lock, perhaps someone died in commit? */
  353. if (!(flags & NTDB_LOCK_NOCHECK)
  354. && ntdb->file->num_lockrecs == 0) {
  355. ntdb_bool_err berr = ntdb_needs_recovery(ntdb);
  356. if (berr != false) {
  357. ntdb_brunlock(ntdb, ltype, offset, 1);
  358. if (berr < 0)
  359. return NTDB_OFF_TO_ERR(berr);
  360. ecode = ntdb_lock_and_recover(ntdb);
  361. if (ecode == NTDB_SUCCESS) {
  362. ecode = ntdb_brlock(ntdb, ltype, offset, 1,
  363. flags);
  364. }
  365. if (ecode != NTDB_SUCCESS) {
  366. return ecode;
  367. }
  368. }
  369. }
  370. ntdb->file->lockrecs[ntdb->file->num_lockrecs].owner = ntdb;
  371. ntdb->file->lockrecs[ntdb->file->num_lockrecs].off = offset;
  372. ntdb->file->lockrecs[ntdb->file->num_lockrecs].count = 1;
  373. ntdb->file->lockrecs[ntdb->file->num_lockrecs].ltype = ltype;
  374. ntdb->file->num_lockrecs++;
  375. return NTDB_SUCCESS;
  376. }
  377. static enum NTDB_ERROR ntdb_nest_unlock(struct ntdb_context *ntdb,
  378. ntdb_off_t off, int ltype)
  379. {
  380. struct ntdb_lock *lck;
  381. enum NTDB_ERROR ecode;
  382. if (ntdb->flags & NTDB_NOLOCK)
  383. return NTDB_SUCCESS;
  384. lck = find_nestlock(ntdb, off, ntdb);
  385. if ((lck == NULL) || (lck->count == 0)) {
  386. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  387. "ntdb_nest_unlock: no lock for %zu",
  388. (size_t)off);
  389. }
  390. if (lck->count > 1) {
  391. lck->count--;
  392. return NTDB_SUCCESS;
  393. }
  394. /*
  395. * This lock has count==1 left, so we need to unlock it in the
  396. * kernel. We don't bother with decrementing the in-memory array
  397. * element, we're about to overwrite it with the last array element
  398. * anyway.
  399. */
  400. ecode = ntdb_brunlock(ntdb, ltype, off, 1);
  401. /*
  402. * Shrink the array by overwriting the element just unlocked with the
  403. * last array element.
  404. */
  405. *lck = ntdb->file->lockrecs[--ntdb->file->num_lockrecs];
  406. return ecode;
  407. }
  408. /*
  409. get the transaction lock
  410. */
  411. enum NTDB_ERROR ntdb_transaction_lock(struct ntdb_context *ntdb, int ltype)
  412. {
  413. return ntdb_nest_lock(ntdb, NTDB_TRANSACTION_LOCK, ltype, NTDB_LOCK_WAIT);
  414. }
  415. /*
  416. release the transaction lock
  417. */
  418. void ntdb_transaction_unlock(struct ntdb_context *ntdb, int ltype)
  419. {
  420. ntdb_nest_unlock(ntdb, NTDB_TRANSACTION_LOCK, ltype);
  421. }
  422. /* We only need to lock individual bytes, but Linux merges consecutive locks
  423. * so we lock in contiguous ranges. */
  424. static enum NTDB_ERROR ntdb_lock_gradual(struct ntdb_context *ntdb,
  425. int ltype, enum ntdb_lock_flags flags,
  426. ntdb_off_t off, ntdb_off_t len)
  427. {
  428. enum NTDB_ERROR ecode;
  429. enum ntdb_lock_flags nb_flags = (flags & ~NTDB_LOCK_WAIT);
  430. if (len <= 1) {
  431. /* 0 would mean to end-of-file... */
  432. assert(len != 0);
  433. /* Single hash. Just do blocking lock. */
  434. return ntdb_brlock(ntdb, ltype, off, len, flags);
  435. }
  436. /* First we try non-blocking. */
  437. ecode = ntdb_brlock(ntdb, ltype, off, len, nb_flags);
  438. if (ecode != NTDB_ERR_LOCK) {
  439. return ecode;
  440. }
  441. /* Try locking first half, then second. */
  442. ecode = ntdb_lock_gradual(ntdb, ltype, flags, off, len / 2);
  443. if (ecode != NTDB_SUCCESS)
  444. return ecode;
  445. ecode = ntdb_lock_gradual(ntdb, ltype, flags,
  446. off + len / 2, len - len / 2);
  447. if (ecode != NTDB_SUCCESS) {
  448. ntdb_brunlock(ntdb, ltype, off, len / 2);
  449. }
  450. return ecode;
  451. }
  452. /* lock/unlock entire database. It can only be upgradable if you have some
  453. * other way of guaranteeing exclusivity (ie. transaction write lock). */
  454. enum NTDB_ERROR ntdb_allrecord_lock(struct ntdb_context *ntdb, int ltype,
  455. enum ntdb_lock_flags flags, bool upgradable)
  456. {
  457. enum NTDB_ERROR ecode;
  458. ntdb_bool_err berr;
  459. if (ntdb->flags & NTDB_NOLOCK) {
  460. return NTDB_SUCCESS;
  461. }
  462. if (!check_lock_pid(ntdb, "ntdb_allrecord_lock", true)) {
  463. return NTDB_ERR_LOCK;
  464. }
  465. if (ntdb->file->allrecord_lock.count) {
  466. if (ntdb->file->allrecord_lock.owner != ntdb) {
  467. return owner_conflict(ntdb, "ntdb_allrecord_lock");
  468. }
  469. if (ltype == F_RDLCK
  470. || ntdb->file->allrecord_lock.ltype == F_WRLCK) {
  471. ntdb->file->allrecord_lock.count++;
  472. return NTDB_SUCCESS;
  473. }
  474. /* a global lock of a different type exists */
  475. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
  476. "ntdb_allrecord_lock: already have %s lock",
  477. ntdb->file->allrecord_lock.ltype == F_RDLCK
  478. ? "read" : "write");
  479. }
  480. if (ntdb_has_hash_locks(ntdb)) {
  481. /* can't combine global and chain locks */
  482. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
  483. "ntdb_allrecord_lock:"
  484. " already have chain lock");
  485. }
  486. if (upgradable && ltype != F_RDLCK) {
  487. /* ntdb error: you can't upgrade a write lock! */
  488. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  489. "ntdb_allrecord_lock:"
  490. " can't upgrade a write lock");
  491. }
  492. ntdb->stats.locks++;
  493. again:
  494. /* Lock hashes, gradually. */
  495. ecode = ntdb_lock_gradual(ntdb, ltype, flags, NTDB_HASH_LOCK_START,
  496. 1 << ntdb->hash_bits);
  497. if (ecode != NTDB_SUCCESS)
  498. return ecode;
  499. /* Lock free tables: there to end of file. */
  500. ecode = ntdb_brlock(ntdb, ltype,
  501. NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits),
  502. 0, flags);
  503. if (ecode != NTDB_SUCCESS) {
  504. ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START,
  505. 1 << ntdb->hash_bits);
  506. return ecode;
  507. }
  508. ntdb->file->allrecord_lock.owner = ntdb;
  509. ntdb->file->allrecord_lock.count = 1;
  510. /* If it's upgradable, it's actually exclusive so we can treat
  511. * it as a write lock. */
  512. ntdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
  513. ntdb->file->allrecord_lock.off = upgradable;
  514. /* Now check for needing recovery. */
  515. if (flags & NTDB_LOCK_NOCHECK)
  516. return NTDB_SUCCESS;
  517. berr = ntdb_needs_recovery(ntdb);
  518. if (likely(berr == false))
  519. return NTDB_SUCCESS;
  520. ntdb_allrecord_unlock(ntdb, ltype);
  521. if (berr < 0)
  522. return NTDB_OFF_TO_ERR(berr);
  523. ecode = ntdb_lock_and_recover(ntdb);
  524. if (ecode != NTDB_SUCCESS) {
  525. return ecode;
  526. }
  527. goto again;
  528. }
  529. enum NTDB_ERROR ntdb_lock_open(struct ntdb_context *ntdb,
  530. int ltype, enum ntdb_lock_flags flags)
  531. {
  532. return ntdb_nest_lock(ntdb, NTDB_OPEN_LOCK, ltype, flags);
  533. }
  534. void ntdb_unlock_open(struct ntdb_context *ntdb, int ltype)
  535. {
  536. ntdb_nest_unlock(ntdb, NTDB_OPEN_LOCK, ltype);
  537. }
  538. bool ntdb_has_open_lock(struct ntdb_context *ntdb)
  539. {
  540. return !(ntdb->flags & NTDB_NOLOCK)
  541. && find_nestlock(ntdb, NTDB_OPEN_LOCK, ntdb) != NULL;
  542. }
  543. enum NTDB_ERROR ntdb_lock_expand(struct ntdb_context *ntdb, int ltype)
  544. {
  545. /* Lock doesn't protect data, so don't check (we recurse if we do!) */
  546. return ntdb_nest_lock(ntdb, NTDB_EXPANSION_LOCK, ltype,
  547. NTDB_LOCK_WAIT | NTDB_LOCK_NOCHECK);
  548. }
  549. void ntdb_unlock_expand(struct ntdb_context *ntdb, int ltype)
  550. {
  551. ntdb_nest_unlock(ntdb, NTDB_EXPANSION_LOCK, ltype);
  552. }
  553. /* unlock entire db */
  554. void ntdb_allrecord_unlock(struct ntdb_context *ntdb, int ltype)
  555. {
  556. if (ntdb->flags & NTDB_NOLOCK)
  557. return;
  558. if (ntdb->file->allrecord_lock.count == 0) {
  559. ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
  560. "ntdb_allrecord_unlock: not locked!");
  561. return;
  562. }
  563. if (ntdb->file->allrecord_lock.owner != ntdb) {
  564. ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
  565. "ntdb_allrecord_unlock: not locked by us!");
  566. return;
  567. }
  568. /* Upgradable locks are marked as write locks. */
  569. if (ntdb->file->allrecord_lock.ltype != ltype
  570. && (!ntdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
  571. ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  572. "ntdb_allrecord_unlock: have %s lock",
  573. ntdb->file->allrecord_lock.ltype == F_RDLCK
  574. ? "read" : "write");
  575. return;
  576. }
  577. if (ntdb->file->allrecord_lock.count > 1) {
  578. ntdb->file->allrecord_lock.count--;
  579. return;
  580. }
  581. ntdb->file->allrecord_lock.count = 0;
  582. ntdb->file->allrecord_lock.ltype = 0;
  583. ntdb_brunlock(ntdb, ltype, NTDB_HASH_LOCK_START, 0);
  584. }
  585. bool ntdb_has_expansion_lock(struct ntdb_context *ntdb)
  586. {
  587. return find_nestlock(ntdb, NTDB_EXPANSION_LOCK, ntdb) != NULL;
  588. }
  589. bool ntdb_has_hash_locks(struct ntdb_context *ntdb)
  590. {
  591. unsigned int i;
  592. for (i=0; i<ntdb->file->num_lockrecs; i++) {
  593. if (ntdb->file->lockrecs[i].off >= NTDB_HASH_LOCK_START
  594. && ntdb->file->lockrecs[i].off < (NTDB_HASH_LOCK_START
  595. + (1 << ntdb->hash_bits)))
  596. return true;
  597. }
  598. return false;
  599. }
  600. static bool ntdb_has_free_lock(struct ntdb_context *ntdb)
  601. {
  602. unsigned int i;
  603. if (ntdb->flags & NTDB_NOLOCK)
  604. return false;
  605. for (i=0; i<ntdb->file->num_lockrecs; i++) {
  606. if (ntdb->file->lockrecs[i].off
  607. > NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits))
  608. return true;
  609. }
  610. return false;
  611. }
  612. enum NTDB_ERROR ntdb_lock_hash(struct ntdb_context *ntdb,
  613. unsigned int h,
  614. int ltype)
  615. {
  616. unsigned l = NTDB_HASH_LOCK_START + h;
  617. assert(h < (1 << ntdb->hash_bits));
  618. /* a allrecord lock allows us to avoid per chain locks */
  619. if (ntdb->file->allrecord_lock.count) {
  620. if (!check_lock_pid(ntdb, "ntdb_lock_hashes", true))
  621. return NTDB_ERR_LOCK;
  622. if (ntdb->file->allrecord_lock.owner != ntdb)
  623. return owner_conflict(ntdb, "ntdb_lock_hashes");
  624. if (ltype == ntdb->file->allrecord_lock.ltype
  625. || ltype == F_RDLCK) {
  626. return NTDB_SUCCESS;
  627. }
  628. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
  629. "ntdb_lock_hashes:"
  630. " already have %s allrecordlock",
  631. ntdb->file->allrecord_lock.ltype == F_RDLCK
  632. ? "read" : "write");
  633. }
  634. if (ntdb_has_free_lock(ntdb)) {
  635. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  636. "ntdb_lock_hashes: already have free lock");
  637. }
  638. if (ntdb_has_expansion_lock(ntdb)) {
  639. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  640. "ntdb_lock_hashes:"
  641. " already have expansion lock");
  642. }
  643. return ntdb_nest_lock(ntdb, l, ltype, NTDB_LOCK_WAIT);
  644. }
  645. enum NTDB_ERROR ntdb_unlock_hash(struct ntdb_context *ntdb,
  646. unsigned int h, int ltype)
  647. {
  648. unsigned l = NTDB_HASH_LOCK_START + (h & ((1 << ntdb->hash_bits)-1));
  649. if (ntdb->flags & NTDB_NOLOCK)
  650. return 0;
  651. /* a allrecord lock allows us to avoid per chain locks */
  652. if (ntdb->file->allrecord_lock.count) {
  653. if (ntdb->file->allrecord_lock.ltype == F_RDLCK
  654. && ltype == F_WRLCK) {
  655. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  656. "ntdb_unlock_hashes RO allrecord!");
  657. }
  658. if (ntdb->file->allrecord_lock.owner != ntdb) {
  659. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_USE_ERROR,
  660. "ntdb_unlock_hashes:"
  661. " not locked by us!");
  662. }
  663. return NTDB_SUCCESS;
  664. }
  665. return ntdb_nest_unlock(ntdb, l, ltype);
  666. }
  667. /* Hash locks use NTDB_HASH_LOCK_START + <number of hash entries>..
  668. * Then we begin; bucket offsets are sizeof(ntdb_len_t) apart, so we divide.
  669. * The result is that on 32 bit systems we don't use lock values > 2^31 on
  670. * files that are less than 4GB.
  671. */
  672. static ntdb_off_t free_lock_off(const struct ntdb_context *ntdb,
  673. ntdb_off_t b_off)
  674. {
  675. return NTDB_HASH_LOCK_START + (1 << ntdb->hash_bits)
  676. + b_off / sizeof(ntdb_off_t);
  677. }
  678. enum NTDB_ERROR ntdb_lock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off,
  679. enum ntdb_lock_flags waitflag)
  680. {
  681. assert(b_off >= sizeof(struct ntdb_header));
  682. if (ntdb->flags & NTDB_NOLOCK)
  683. return 0;
  684. /* a allrecord lock allows us to avoid per chain locks */
  685. if (ntdb->file->allrecord_lock.count) {
  686. if (!check_lock_pid(ntdb, "ntdb_lock_free_bucket", true))
  687. return NTDB_ERR_LOCK;
  688. if (ntdb->file->allrecord_lock.owner != ntdb) {
  689. return owner_conflict(ntdb, "ntdb_lock_free_bucket");
  690. }
  691. if (ntdb->file->allrecord_lock.ltype == F_WRLCK)
  692. return 0;
  693. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  694. "ntdb_lock_free_bucket with"
  695. " read-only allrecordlock!");
  696. }
  697. #if 0 /* FIXME */
  698. if (ntdb_has_expansion_lock(ntdb)) {
  699. return ntdb_logerr(ntdb, NTDB_ERR_LOCK, NTDB_LOG_ERROR,
  700. "ntdb_lock_free_bucket:"
  701. " already have expansion lock");
  702. }
  703. #endif
  704. return ntdb_nest_lock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK,
  705. waitflag);
  706. }
  707. void ntdb_unlock_free_bucket(struct ntdb_context *ntdb, ntdb_off_t b_off)
  708. {
  709. if (ntdb->file->allrecord_lock.count)
  710. return;
  711. ntdb_nest_unlock(ntdb, free_lock_off(ntdb, b_off), F_WRLCK);
  712. }
  713. _PUBLIC_ enum NTDB_ERROR ntdb_lockall(struct ntdb_context *ntdb)
  714. {
  715. return ntdb_allrecord_lock(ntdb, F_WRLCK, NTDB_LOCK_WAIT, false);
  716. }
  717. _PUBLIC_ void ntdb_unlockall(struct ntdb_context *ntdb)
  718. {
  719. ntdb_allrecord_unlock(ntdb, F_WRLCK);
  720. }
  721. _PUBLIC_ enum NTDB_ERROR ntdb_lockall_read(struct ntdb_context *ntdb)
  722. {
  723. return ntdb_allrecord_lock(ntdb, F_RDLCK, NTDB_LOCK_WAIT, false);
  724. }
  725. _PUBLIC_ void ntdb_unlockall_read(struct ntdb_context *ntdb)
  726. {
  727. ntdb_allrecord_unlock(ntdb, F_RDLCK);
  728. }
  729. void ntdb_lock_cleanup(struct ntdb_context *ntdb)
  730. {
  731. unsigned int i;
  732. /* We don't want to warn: they're allowed to close ntdb after fork. */
  733. if (!check_lock_pid(ntdb, "ntdb_close", false))
  734. return;
  735. while (ntdb->file->allrecord_lock.count
  736. && ntdb->file->allrecord_lock.owner == ntdb) {
  737. ntdb_allrecord_unlock(ntdb, ntdb->file->allrecord_lock.ltype);
  738. }
  739. for (i=0; i<ntdb->file->num_lockrecs; i++) {
  740. if (ntdb->file->lockrecs[i].owner == ntdb) {
  741. ntdb_nest_unlock(ntdb,
  742. ntdb->file->lockrecs[i].off,
  743. ntdb->file->lockrecs[i].ltype);
  744. i--;
  745. }
  746. }
  747. }