lock.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848
  1. /*
  2. Unix SMB/CIFS implementation.
  3. trivial database library
  4. Copyright (C) Andrew Tridgell 1999-2005
  5. Copyright (C) Paul `Rusty' Russell 2000
  6. Copyright (C) Jeremy Allison 2000-2003
  7. ** NOTE! The following LGPL license applies to the tdb
  8. ** library. This does NOT imply that all of Samba is released
  9. ** under the LGPL
  10. This library is free software; you can redistribute it and/or
  11. modify it under the terms of the GNU Lesser General Public
  12. License as published by the Free Software Foundation; either
  13. version 3 of the License, or (at your option) any later version.
  14. This library is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. Lesser General Public License for more details.
  18. You should have received a copy of the GNU Lesser General Public
  19. License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include "private.h"
  22. #include <assert.h>
  23. #include <ccan/build_assert/build_assert.h>
  24. /* If we were threaded, we could wait for unlock, but we're not, so fail. */
  25. static enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call)
  26. {
  27. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  28. "%s: lock owned by another tdb in this process.",
  29. call);
  30. }
  31. /* If we fork, we no longer really own locks. */
  32. static bool check_lock_pid(struct tdb_context *tdb,
  33. const char *call, bool log)
  34. {
  35. /* No locks? No problem! */
  36. if (tdb->file->allrecord_lock.count == 0
  37. && tdb->file->num_lockrecs == 0) {
  38. return true;
  39. }
  40. /* No fork? No problem! */
  41. if (tdb->file->locker == getpid()) {
  42. return true;
  43. }
  44. if (log) {
  45. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  46. "%s: fork() detected after lock acquisition!"
  47. " (%u vs %u)", call, tdb->file->locker, getpid());
  48. }
  49. return false;
  50. }
  51. static int fcntl_lock(struct tdb_context *tdb,
  52. int rw, off_t off, off_t len, bool waitflag)
  53. {
  54. struct flock fl;
  55. fl.l_type = rw;
  56. fl.l_whence = SEEK_SET;
  57. fl.l_start = off;
  58. fl.l_len = len;
  59. fl.l_pid = 0;
  60. if (tdb->file->allrecord_lock.count == 0
  61. && tdb->file->num_lockrecs == 0) {
  62. tdb->file->locker = getpid();
  63. }
  64. add_stat(tdb, lock_lowlevel, 1);
  65. if (waitflag)
  66. return fcntl(tdb->file->fd, F_SETLKW, &fl);
  67. else {
  68. add_stat(tdb, lock_nonblock, 1);
  69. return fcntl(tdb->file->fd, F_SETLK, &fl);
  70. }
  71. }
  72. static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
  73. {
  74. struct flock fl;
  75. #if 0 /* Check they matched up locks and unlocks correctly. */
  76. char line[80];
  77. FILE *locks;
  78. bool found = false;
  79. locks = fopen("/proc/locks", "r");
  80. while (fgets(line, 80, locks)) {
  81. char *p;
  82. int type, start, l;
  83. /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */
  84. p = strchr(line, ':') + 1;
  85. if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY ")))
  86. continue;
  87. p += strlen(" FLOCK ADVISORY ");
  88. if (strncmp(p, "READ ", strlen("READ ")) == 0)
  89. type = F_RDLCK;
  90. else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
  91. type = F_WRLCK;
  92. else
  93. abort();
  94. p += 6;
  95. if (atoi(p) != getpid())
  96. continue;
  97. p = strchr(strchr(p, ' ') + 1, ' ') + 1;
  98. start = atoi(p);
  99. p = strchr(p, ' ') + 1;
  100. if (strncmp(p, "EOF", 3) == 0)
  101. l = 0;
  102. else
  103. l = atoi(p) - start + 1;
  104. if (off == start) {
  105. if (len != l) {
  106. fprintf(stderr, "Len %u should be %u: %s",
  107. (int)len, l, line);
  108. abort();
  109. }
  110. if (type != rw) {
  111. fprintf(stderr, "Type %s wrong: %s",
  112. rw == F_RDLCK ? "READ" : "WRITE", line);
  113. abort();
  114. }
  115. found = true;
  116. break;
  117. }
  118. }
  119. if (!found) {
  120. fprintf(stderr, "Unlock on %u@%u not found!",
  121. (int)off, (int)len);
  122. abort();
  123. }
  124. fclose(locks);
  125. #endif
  126. fl.l_type = F_UNLCK;
  127. fl.l_whence = SEEK_SET;
  128. fl.l_start = off;
  129. fl.l_len = len;
  130. fl.l_pid = 0;
  131. return fcntl(tdb->file->fd, F_SETLKW, &fl);
  132. }
  133. /* a byte range locking function - return 0 on success
  134. this functions locks len bytes at the specified offset.
  135. note that a len of zero means lock to end of file
  136. */
  137. static enum TDB_ERROR tdb_brlock(struct tdb_context *tdb,
  138. int rw_type, tdb_off_t offset, tdb_off_t len,
  139. enum tdb_lock_flags flags)
  140. {
  141. int ret;
  142. if (tdb->flags & TDB_NOLOCK) {
  143. return TDB_SUCCESS;
  144. }
  145. if (rw_type == F_WRLCK && tdb->read_only) {
  146. return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
  147. "Write lock attempted on read-only database");
  148. }
  149. /* A 32 bit system cannot open a 64-bit file, but it could have
  150. * expanded since then: check here. */
  151. if ((size_t)(offset + len) != offset + len) {
  152. return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
  153. "tdb_brlock: lock on giant offset %llu",
  154. (long long)(offset + len));
  155. }
  156. do {
  157. ret = fcntl_lock(tdb, rw_type, offset, len,
  158. flags & TDB_LOCK_WAIT);
  159. } while (ret == -1 && errno == EINTR);
  160. if (ret == -1) {
  161. /* Generic lock error. errno set by fcntl.
  162. * EAGAIN is an expected return from non-blocking
  163. * locks. */
  164. if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
  165. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  166. "tdb_brlock failed (fd=%d) at"
  167. " offset %zu rw_type=%d flags=%d len=%zu:"
  168. " %s",
  169. tdb->file->fd, (size_t)offset, rw_type,
  170. flags, (size_t)len, strerror(errno));
  171. }
  172. return TDB_ERR_LOCK;
  173. }
  174. return TDB_SUCCESS;
  175. }
  176. static enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb,
  177. int rw_type, tdb_off_t offset, size_t len)
  178. {
  179. int ret;
  180. if (tdb->flags & TDB_NOLOCK) {
  181. return TDB_SUCCESS;
  182. }
  183. do {
  184. ret = fcntl_unlock(tdb, rw_type, offset, len);
  185. } while (ret == -1 && errno == EINTR);
  186. /* If we fail, *then* we verify that we owned the lock. If not, ok. */
  187. if (ret == -1 && check_lock_pid(tdb, "tdb_brunlock", false)) {
  188. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  189. "tdb_brunlock failed (fd=%d) at offset %zu"
  190. " rw_type=%d len=%zu",
  191. tdb->file->fd, (size_t)offset, rw_type,
  192. (size_t)len);
  193. }
  194. return TDB_SUCCESS;
  195. }
  196. /*
  197. upgrade a read lock to a write lock. This needs to be handled in a
  198. special way as some OSes (such as solaris) have too conservative
  199. deadlock detection and claim a deadlock when progress can be
  200. made. For those OSes we may loop for a while.
  201. */
  202. enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb)
  203. {
  204. int count = 1000;
  205. if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
  206. return TDB_ERR_LOCK;
  207. if (tdb->file->allrecord_lock.count != 1) {
  208. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  209. "tdb_allrecord_upgrade failed:"
  210. " count %u too high",
  211. tdb->file->allrecord_lock.count);
  212. }
  213. if (tdb->file->allrecord_lock.off != 1) {
  214. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  215. "tdb_allrecord_upgrade failed:"
  216. " already upgraded?");
  217. }
  218. if (tdb->file->allrecord_lock.owner != tdb) {
  219. return owner_conflict(tdb, "tdb_allrecord_upgrade");
  220. }
  221. while (count--) {
  222. struct timeval tv;
  223. if (tdb_brlock(tdb, F_WRLCK,
  224. TDB_HASH_LOCK_START, 0,
  225. TDB_LOCK_WAIT|TDB_LOCK_PROBE) == TDB_SUCCESS) {
  226. tdb->file->allrecord_lock.ltype = F_WRLCK;
  227. tdb->file->allrecord_lock.off = 0;
  228. return TDB_SUCCESS;
  229. }
  230. if (errno != EDEADLK) {
  231. break;
  232. }
  233. /* sleep for as short a time as we can - more portable than usleep() */
  234. tv.tv_sec = 0;
  235. tv.tv_usec = 1;
  236. select(0, NULL, NULL, NULL, &tv);
  237. }
  238. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  239. "tdb_allrecord_upgrade failed");
  240. }
  241. static struct tdb_lock *find_nestlock(struct tdb_context *tdb, tdb_off_t offset,
  242. const struct tdb_context *owner)
  243. {
  244. unsigned int i;
  245. for (i=0; i<tdb->file->num_lockrecs; i++) {
  246. if (tdb->file->lockrecs[i].off == offset) {
  247. if (owner && tdb->file->lockrecs[i].owner != owner)
  248. return NULL;
  249. return &tdb->file->lockrecs[i];
  250. }
  251. }
  252. return NULL;
  253. }
  254. enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb)
  255. {
  256. enum TDB_ERROR ecode;
  257. if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
  258. return TDB_ERR_LOCK;
  259. ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK,
  260. false);
  261. if (ecode != TDB_SUCCESS) {
  262. return ecode;
  263. }
  264. ecode = tdb_lock_open(tdb, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
  265. if (ecode != TDB_SUCCESS) {
  266. tdb_allrecord_unlock(tdb, F_WRLCK);
  267. return ecode;
  268. }
  269. ecode = tdb_transaction_recover(tdb);
  270. tdb_unlock_open(tdb);
  271. tdb_allrecord_unlock(tdb, F_WRLCK);
  272. return ecode;
  273. }
  274. /* lock an offset in the database. */
  275. static enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb,
  276. tdb_off_t offset, int ltype,
  277. enum tdb_lock_flags flags)
  278. {
  279. struct tdb_lock *new_lck;
  280. enum TDB_ERROR ecode;
  281. if (offset > (TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
  282. + tdb->file->map_size / 8)) {
  283. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  284. "tdb_nest_lock: invalid offset %zu ltype=%d",
  285. (size_t)offset, ltype);
  286. }
  287. if (tdb->flags & TDB_NOLOCK)
  288. return TDB_SUCCESS;
  289. if (!check_lock_pid(tdb, "tdb_nest_lock", true)) {
  290. return TDB_ERR_LOCK;
  291. }
  292. add_stat(tdb, locks, 1);
  293. new_lck = find_nestlock(tdb, offset, NULL);
  294. if (new_lck) {
  295. if (new_lck->owner != tdb) {
  296. return owner_conflict(tdb, "tdb_nest_lock");
  297. }
  298. if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
  299. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  300. "tdb_nest_lock:"
  301. " offset %zu has read lock",
  302. (size_t)offset);
  303. }
  304. /* Just increment the struct, posix locks don't stack. */
  305. new_lck->count++;
  306. return TDB_SUCCESS;
  307. }
  308. if (tdb->file->num_lockrecs
  309. && offset >= TDB_HASH_LOCK_START
  310. && offset < TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) {
  311. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  312. "tdb_nest_lock: already have a hash lock?");
  313. }
  314. new_lck = (struct tdb_lock *)realloc(
  315. tdb->file->lockrecs,
  316. sizeof(*tdb->file->lockrecs) * (tdb->file->num_lockrecs+1));
  317. if (new_lck == NULL) {
  318. return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
  319. "tdb_nest_lock:"
  320. " unable to allocate %zu lock struct",
  321. tdb->file->num_lockrecs + 1);
  322. }
  323. tdb->file->lockrecs = new_lck;
  324. /* Since fcntl locks don't nest, we do a lock for the first one,
  325. and simply bump the count for future ones */
  326. ecode = tdb_brlock(tdb, ltype, offset, 1, flags);
  327. if (ecode != TDB_SUCCESS) {
  328. return ecode;
  329. }
  330. /* First time we grab a lock, perhaps someone died in commit? */
  331. if (!(flags & TDB_LOCK_NOCHECK)
  332. && tdb->file->num_lockrecs == 0) {
  333. tdb_bool_err berr = tdb_needs_recovery(tdb);
  334. if (berr != false) {
  335. tdb_brunlock(tdb, ltype, offset, 1);
  336. if (berr < 0)
  337. return berr;
  338. ecode = tdb_lock_and_recover(tdb);
  339. if (ecode == TDB_SUCCESS) {
  340. ecode = tdb_brlock(tdb, ltype, offset, 1,
  341. flags);
  342. }
  343. if (ecode != TDB_SUCCESS) {
  344. return ecode;
  345. }
  346. }
  347. }
  348. tdb->file->lockrecs[tdb->file->num_lockrecs].owner = tdb;
  349. tdb->file->lockrecs[tdb->file->num_lockrecs].off = offset;
  350. tdb->file->lockrecs[tdb->file->num_lockrecs].count = 1;
  351. tdb->file->lockrecs[tdb->file->num_lockrecs].ltype = ltype;
  352. tdb->file->num_lockrecs++;
  353. return TDB_SUCCESS;
  354. }
  355. static enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb,
  356. tdb_off_t off, int ltype)
  357. {
  358. struct tdb_lock *lck;
  359. enum TDB_ERROR ecode;
  360. if (tdb->flags & TDB_NOLOCK)
  361. return TDB_SUCCESS;
  362. lck = find_nestlock(tdb, off, tdb);
  363. if ((lck == NULL) || (lck->count == 0)) {
  364. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  365. "tdb_nest_unlock: no lock for %zu",
  366. (size_t)off);
  367. }
  368. if (lck->count > 1) {
  369. lck->count--;
  370. return TDB_SUCCESS;
  371. }
  372. /*
  373. * This lock has count==1 left, so we need to unlock it in the
  374. * kernel. We don't bother with decrementing the in-memory array
  375. * element, we're about to overwrite it with the last array element
  376. * anyway.
  377. */
  378. ecode = tdb_brunlock(tdb, ltype, off, 1);
  379. /*
  380. * Shrink the array by overwriting the element just unlocked with the
  381. * last array element.
  382. */
  383. *lck = tdb->file->lockrecs[--tdb->file->num_lockrecs];
  384. return ecode;
  385. }
  386. /*
  387. get the transaction lock
  388. */
  389. enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype)
  390. {
  391. return tdb_nest_lock(tdb, TDB_TRANSACTION_LOCK, ltype, TDB_LOCK_WAIT);
  392. }
  393. /*
  394. release the transaction lock
  395. */
  396. void tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
  397. {
  398. tdb_nest_unlock(tdb, TDB_TRANSACTION_LOCK, ltype);
  399. }
  400. /* We only need to lock individual bytes, but Linux merges consecutive locks
  401. * so we lock in contiguous ranges. */
  402. static enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb,
  403. int ltype, enum tdb_lock_flags flags,
  404. tdb_off_t off, tdb_off_t len)
  405. {
  406. enum TDB_ERROR ecode;
  407. enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
  408. if (len <= 1) {
  409. /* 0 would mean to end-of-file... */
  410. assert(len != 0);
  411. /* Single hash. Just do blocking lock. */
  412. return tdb_brlock(tdb, ltype, off, len, flags);
  413. }
  414. /* First we try non-blocking. */
  415. if (tdb_brlock(tdb, ltype, off, len, nb_flags) == TDB_SUCCESS) {
  416. return TDB_SUCCESS;
  417. }
  418. /* Try locking first half, then second. */
  419. ecode = tdb_lock_gradual(tdb, ltype, flags, off, len / 2);
  420. if (ecode != TDB_SUCCESS)
  421. return ecode;
  422. ecode = tdb_lock_gradual(tdb, ltype, flags,
  423. off + len / 2, len - len / 2);
  424. if (ecode != TDB_SUCCESS) {
  425. tdb_brunlock(tdb, ltype, off, len / 2);
  426. }
  427. return ecode;
  428. }
  429. /* lock/unlock entire database. It can only be upgradable if you have some
  430. * other way of guaranteeing exclusivity (ie. transaction write lock). */
  431. enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
  432. enum tdb_lock_flags flags, bool upgradable)
  433. {
  434. enum TDB_ERROR ecode;
  435. tdb_bool_err berr;
  436. if (tdb->flags & TDB_NOLOCK)
  437. return TDB_SUCCESS;
  438. if (!check_lock_pid(tdb, "tdb_allrecord_lock", true)) {
  439. return TDB_ERR_LOCK;
  440. }
  441. if (tdb->file->allrecord_lock.count) {
  442. if (tdb->file->allrecord_lock.owner != tdb) {
  443. return owner_conflict(tdb, "tdb_allrecord_lock");
  444. }
  445. if (ltype == F_RDLCK
  446. || tdb->file->allrecord_lock.ltype == F_WRLCK) {
  447. tdb->file->allrecord_lock.count++;
  448. return TDB_SUCCESS;
  449. }
  450. /* a global lock of a different type exists */
  451. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  452. "tdb_allrecord_lock: already have %s lock",
  453. tdb->file->allrecord_lock.ltype == F_RDLCK
  454. ? "read" : "write");
  455. }
  456. if (tdb_has_hash_locks(tdb)) {
  457. /* can't combine global and chain locks */
  458. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  459. "tdb_allrecord_lock:"
  460. " already have chain lock");
  461. }
  462. if (upgradable && ltype != F_RDLCK) {
  463. /* tdb error: you can't upgrade a write lock! */
  464. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  465. "tdb_allrecord_lock:"
  466. " can't upgrade a write lock");
  467. }
  468. add_stat(tdb, locks, 1);
  469. again:
  470. /* Lock hashes, gradually. */
  471. ecode = tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
  472. TDB_HASH_LOCK_RANGE);
  473. if (ecode != TDB_SUCCESS)
  474. return ecode;
  475. /* Lock free tables: there to end of file. */
  476. ecode = tdb_brlock(tdb, ltype,
  477. TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE,
  478. 0, flags);
  479. if (ecode != TDB_SUCCESS) {
  480. tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
  481. TDB_HASH_LOCK_RANGE);
  482. return ecode;
  483. }
  484. tdb->file->allrecord_lock.owner = tdb;
  485. tdb->file->allrecord_lock.count = 1;
  486. /* If it's upgradable, it's actually exclusive so we can treat
  487. * it as a write lock. */
  488. tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
  489. tdb->file->allrecord_lock.off = upgradable;
  490. /* Now check for needing recovery. */
  491. if (flags & TDB_LOCK_NOCHECK)
  492. return TDB_SUCCESS;
  493. berr = tdb_needs_recovery(tdb);
  494. if (likely(berr == false))
  495. return TDB_SUCCESS;
  496. tdb_allrecord_unlock(tdb, ltype);
  497. if (berr < 0)
  498. return berr;
  499. ecode = tdb_lock_and_recover(tdb);
  500. if (ecode != TDB_SUCCESS) {
  501. return ecode;
  502. }
  503. goto again;
  504. }
  505. enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb, enum tdb_lock_flags flags)
  506. {
  507. return tdb_nest_lock(tdb, TDB_OPEN_LOCK, F_WRLCK, flags);
  508. }
  509. void tdb_unlock_open(struct tdb_context *tdb)
  510. {
  511. tdb_nest_unlock(tdb, TDB_OPEN_LOCK, F_WRLCK);
  512. }
  513. bool tdb_has_open_lock(struct tdb_context *tdb)
  514. {
  515. return !(tdb->flags & TDB_NOLOCK)
  516. && find_nestlock(tdb, TDB_OPEN_LOCK, tdb) != NULL;
  517. }
  518. enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype)
  519. {
  520. /* Lock doesn't protect data, so don't check (we recurse if we do!) */
  521. return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype,
  522. TDB_LOCK_WAIT | TDB_LOCK_NOCHECK);
  523. }
  524. void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
  525. {
  526. tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
  527. }
  528. /* unlock entire db */
  529. void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
  530. {
  531. if (tdb->flags & TDB_NOLOCK)
  532. return;
  533. if (tdb->file->allrecord_lock.count == 0) {
  534. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  535. "tdb_allrecord_unlock: not locked!");
  536. return;
  537. }
  538. if (tdb->file->allrecord_lock.owner != tdb) {
  539. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  540. "tdb_allrecord_unlock: not locked by us!");
  541. return;
  542. }
  543. /* Upgradable locks are marked as write locks. */
  544. if (tdb->file->allrecord_lock.ltype != ltype
  545. && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
  546. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  547. "tdb_allrecord_unlock: have %s lock",
  548. tdb->file->allrecord_lock.ltype == F_RDLCK
  549. ? "read" : "write");
  550. return;
  551. }
  552. if (tdb->file->allrecord_lock.count > 1) {
  553. tdb->file->allrecord_lock.count--;
  554. return;
  555. }
  556. tdb->file->allrecord_lock.count = 0;
  557. tdb->file->allrecord_lock.ltype = 0;
  558. tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, 0);
  559. }
  560. bool tdb_has_expansion_lock(struct tdb_context *tdb)
  561. {
  562. return find_nestlock(tdb, TDB_EXPANSION_LOCK, tdb) != NULL;
  563. }
  564. bool tdb_has_hash_locks(struct tdb_context *tdb)
  565. {
  566. unsigned int i;
  567. for (i=0; i<tdb->file->num_lockrecs; i++) {
  568. if (tdb->file->lockrecs[i].off >= TDB_HASH_LOCK_START
  569. && tdb->file->lockrecs[i].off < (TDB_HASH_LOCK_START
  570. + TDB_HASH_LOCK_RANGE))
  571. return true;
  572. }
  573. return false;
  574. }
  575. static bool tdb_has_free_lock(struct tdb_context *tdb)
  576. {
  577. unsigned int i;
  578. if (tdb->flags & TDB_NOLOCK)
  579. return false;
  580. for (i=0; i<tdb->file->num_lockrecs; i++) {
  581. if (tdb->file->lockrecs[i].off
  582. > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE)
  583. return true;
  584. }
  585. return false;
  586. }
  587. enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
  588. tdb_off_t hash_lock,
  589. tdb_len_t hash_range,
  590. int ltype, enum tdb_lock_flags waitflag)
  591. {
  592. /* FIXME: Do this properly, using hlock_range */
  593. unsigned lock = TDB_HASH_LOCK_START
  594. + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
  595. /* a allrecord lock allows us to avoid per chain locks */
  596. if (tdb->file->allrecord_lock.count) {
  597. if (!check_lock_pid(tdb, "tdb_lock_hashes", true))
  598. return TDB_ERR_LOCK;
  599. if (tdb->file->allrecord_lock.owner != tdb)
  600. return owner_conflict(tdb, "tdb_lock_hashes");
  601. if (ltype == tdb->file->allrecord_lock.ltype
  602. || ltype == F_RDLCK) {
  603. return TDB_SUCCESS;
  604. }
  605. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  606. "tdb_lock_hashes:"
  607. " already have %s allrecordlock",
  608. tdb->file->allrecord_lock.ltype == F_RDLCK
  609. ? "read" : "write");
  610. }
  611. if (tdb_has_free_lock(tdb)) {
  612. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  613. "tdb_lock_hashes: already have free lock");
  614. }
  615. if (tdb_has_expansion_lock(tdb)) {
  616. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  617. "tdb_lock_hashes:"
  618. " already have expansion lock");
  619. }
  620. return tdb_nest_lock(tdb, lock, ltype, waitflag);
  621. }
  622. enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
  623. tdb_off_t hash_lock,
  624. tdb_len_t hash_range, int ltype)
  625. {
  626. unsigned lock = TDB_HASH_LOCK_START
  627. + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
  628. if (tdb->flags & TDB_NOLOCK)
  629. return 0;
  630. /* a allrecord lock allows us to avoid per chain locks */
  631. if (tdb->file->allrecord_lock.count) {
  632. if (tdb->file->allrecord_lock.ltype == F_RDLCK
  633. && ltype == F_WRLCK) {
  634. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  635. "tdb_unlock_hashes RO allrecord!");
  636. }
  637. return TDB_SUCCESS;
  638. }
  639. return tdb_nest_unlock(tdb, lock, ltype);
  640. }
  641. /* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
  642. * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
  643. * The result is that on 32 bit systems we don't use lock values > 2^31 on
  644. * files that are less than 4GB.
  645. */
  646. static tdb_off_t free_lock_off(tdb_off_t b_off)
  647. {
  648. return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
  649. + b_off / sizeof(tdb_off_t);
  650. }
  651. enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
  652. enum tdb_lock_flags waitflag)
  653. {
  654. assert(b_off >= sizeof(struct tdb_header));
  655. if (tdb->flags & TDB_NOLOCK)
  656. return 0;
  657. /* a allrecord lock allows us to avoid per chain locks */
  658. if (tdb->file->allrecord_lock.count) {
  659. if (!check_lock_pid(tdb, "tdb_lock_free_bucket", true))
  660. return TDB_ERR_LOCK;
  661. if (tdb->file->allrecord_lock.ltype == F_WRLCK)
  662. return 0;
  663. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  664. "tdb_lock_free_bucket with"
  665. " read-only allrecordlock!");
  666. }
  667. #if 0 /* FIXME */
  668. if (tdb_has_expansion_lock(tdb)) {
  669. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  670. "tdb_lock_free_bucket:"
  671. " already have expansion lock");
  672. }
  673. #endif
  674. return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
  675. }
  676. void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
  677. {
  678. if (tdb->file->allrecord_lock.count)
  679. return;
  680. tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
  681. }
  682. enum TDB_ERROR tdb_lockall(struct tdb_context *tdb)
  683. {
  684. return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
  685. }
  686. void tdb_unlockall(struct tdb_context *tdb)
  687. {
  688. tdb_allrecord_unlock(tdb, F_WRLCK);
  689. }
  690. enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb)
  691. {
  692. return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
  693. }
  694. void tdb_unlockall_read(struct tdb_context *tdb)
  695. {
  696. tdb_allrecord_unlock(tdb, F_RDLCK);
  697. }
  698. void tdb_lock_cleanup(struct tdb_context *tdb)
  699. {
  700. unsigned int i;
  701. while (tdb->file->allrecord_lock.count
  702. && tdb->file->allrecord_lock.owner == tdb) {
  703. tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
  704. }
  705. for (i=0; i<tdb->file->num_lockrecs; i++) {
  706. if (tdb->file->lockrecs[i].owner == tdb) {
  707. tdb_nest_unlock(tdb,
  708. tdb->file->lockrecs[i].off,
  709. tdb->file->lockrecs[i].ltype);
  710. i--;
  711. }
  712. }
  713. }