lock.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857
  1. /*
  2. Unix SMB/CIFS implementation.
  3. trivial database library
  4. Copyright (C) Andrew Tridgell 1999-2005
  5. Copyright (C) Paul `Rusty' Russell 2000
  6. Copyright (C) Jeremy Allison 2000-2003
  7. ** NOTE! The following LGPL license applies to the tdb
  8. ** library. This does NOT imply that all of Samba is released
  9. ** under the LGPL
  10. This library is free software; you can redistribute it and/or
  11. modify it under the terms of the GNU Lesser General Public
  12. License as published by the Free Software Foundation; either
  13. version 3 of the License, or (at your option) any later version.
  14. This library is distributed in the hope that it will be useful,
  15. but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. Lesser General Public License for more details.
  18. You should have received a copy of the GNU Lesser General Public
  19. License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20. */
  21. #include "private.h"
  22. #include <assert.h>
  23. #include <ccan/build_assert/build_assert.h>
  24. /* If we were threaded, we could wait for unlock, but we're not, so fail. */
  25. static enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call)
  26. {
  27. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  28. "%s: lock owned by another tdb in this process.",
  29. call);
  30. }
  31. /* If we fork, we no longer really own locks. */
  32. static bool check_lock_pid(struct tdb_context *tdb,
  33. const char *call, bool log)
  34. {
  35. /* No locks? No problem! */
  36. if (tdb->file->allrecord_lock.count == 0
  37. && tdb->file->num_lockrecs == 0) {
  38. return true;
  39. }
  40. /* No fork? No problem! */
  41. if (tdb->file->locker == getpid()) {
  42. return true;
  43. }
  44. if (log) {
  45. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  46. "%s: fork() detected after lock acquisition!"
  47. " (%u vs %u)", call, tdb->file->locker, getpid());
  48. }
  49. return false;
  50. }
  51. static int fcntl_lock(struct tdb_context *tdb,
  52. int rw, off_t off, off_t len, bool waitflag)
  53. {
  54. struct flock fl;
  55. fl.l_type = rw;
  56. fl.l_whence = SEEK_SET;
  57. fl.l_start = off;
  58. fl.l_len = len;
  59. fl.l_pid = 0;
  60. if (tdb->file->allrecord_lock.count == 0
  61. && tdb->file->num_lockrecs == 0) {
  62. tdb->file->locker = getpid();
  63. }
  64. add_stat(tdb, lock_lowlevel, 1);
  65. if (waitflag)
  66. return fcntl(tdb->file->fd, F_SETLKW, &fl);
  67. else {
  68. add_stat(tdb, lock_nonblock, 1);
  69. return fcntl(tdb->file->fd, F_SETLK, &fl);
  70. }
  71. }
  72. static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len)
  73. {
  74. struct flock fl;
  75. #if 0 /* Check they matched up locks and unlocks correctly. */
  76. char line[80];
  77. FILE *locks;
  78. bool found = false;
  79. locks = fopen("/proc/locks", "r");
  80. while (fgets(line, 80, locks)) {
  81. char *p;
  82. int type, start, l;
  83. /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */
  84. p = strchr(line, ':') + 1;
  85. if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY ")))
  86. continue;
  87. p += strlen(" FLOCK ADVISORY ");
  88. if (strncmp(p, "READ ", strlen("READ ")) == 0)
  89. type = F_RDLCK;
  90. else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0)
  91. type = F_WRLCK;
  92. else
  93. abort();
  94. p += 6;
  95. if (atoi(p) != getpid())
  96. continue;
  97. p = strchr(strchr(p, ' ') + 1, ' ') + 1;
  98. start = atoi(p);
  99. p = strchr(p, ' ') + 1;
  100. if (strncmp(p, "EOF", 3) == 0)
  101. l = 0;
  102. else
  103. l = atoi(p) - start + 1;
  104. if (off == start) {
  105. if (len != l) {
  106. fprintf(stderr, "Len %u should be %u: %s",
  107. (int)len, l, line);
  108. abort();
  109. }
  110. if (type != rw) {
  111. fprintf(stderr, "Type %s wrong: %s",
  112. rw == F_RDLCK ? "READ" : "WRITE", line);
  113. abort();
  114. }
  115. found = true;
  116. break;
  117. }
  118. }
  119. if (!found) {
  120. fprintf(stderr, "Unlock on %u@%u not found!",
  121. (int)off, (int)len);
  122. abort();
  123. }
  124. fclose(locks);
  125. #endif
  126. fl.l_type = F_UNLCK;
  127. fl.l_whence = SEEK_SET;
  128. fl.l_start = off;
  129. fl.l_len = len;
  130. fl.l_pid = 0;
  131. return fcntl(tdb->file->fd, F_SETLKW, &fl);
  132. }
  133. /* a byte range locking function - return 0 on success
  134. this functions locks len bytes at the specified offset.
  135. note that a len of zero means lock to end of file
  136. */
  137. static enum TDB_ERROR tdb_brlock(struct tdb_context *tdb,
  138. int rw_type, tdb_off_t offset, tdb_off_t len,
  139. enum tdb_lock_flags flags)
  140. {
  141. int ret;
  142. if (tdb->flags & TDB_NOLOCK) {
  143. return TDB_SUCCESS;
  144. }
  145. if (rw_type == F_WRLCK && tdb->read_only) {
  146. return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR,
  147. "Write lock attempted on read-only database");
  148. }
  149. /* A 32 bit system cannot open a 64-bit file, but it could have
  150. * expanded since then: check here. */
  151. if ((size_t)(offset + len) != offset + len) {
  152. return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR,
  153. "tdb_brlock: lock on giant offset %llu",
  154. (long long)(offset + len));
  155. }
  156. do {
  157. ret = fcntl_lock(tdb, rw_type, offset, len,
  158. flags & TDB_LOCK_WAIT);
  159. } while (ret == -1 && errno == EINTR);
  160. if (ret == -1) {
  161. /* Generic lock error. errno set by fcntl.
  162. * EAGAIN is an expected return from non-blocking
  163. * locks. */
  164. if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
  165. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  166. "tdb_brlock failed (fd=%d) at"
  167. " offset %zu rw_type=%d flags=%d len=%zu:"
  168. " %s",
  169. tdb->file->fd, (size_t)offset, rw_type,
  170. flags, (size_t)len, strerror(errno));
  171. }
  172. return TDB_ERR_LOCK;
  173. }
  174. return TDB_SUCCESS;
  175. }
  176. static enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb,
  177. int rw_type, tdb_off_t offset, size_t len)
  178. {
  179. int ret;
  180. if (tdb->flags & TDB_NOLOCK) {
  181. return TDB_SUCCESS;
  182. }
  183. do {
  184. ret = fcntl_unlock(tdb, rw_type, offset, len);
  185. } while (ret == -1 && errno == EINTR);
  186. /* If we fail, *then* we verify that we owned the lock. If not, ok. */
  187. if (ret == -1 && check_lock_pid(tdb, "tdb_brunlock", false)) {
  188. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  189. "tdb_brunlock failed (fd=%d) at offset %zu"
  190. " rw_type=%d len=%zu",
  191. tdb->file->fd, (size_t)offset, rw_type,
  192. (size_t)len);
  193. }
  194. return TDB_SUCCESS;
  195. }
  196. /*
  197. upgrade a read lock to a write lock. This needs to be handled in a
  198. special way as some OSes (such as solaris) have too conservative
  199. deadlock detection and claim a deadlock when progress can be
  200. made. For those OSes we may loop for a while.
  201. */
  202. enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb)
  203. {
  204. int count = 1000;
  205. if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
  206. return TDB_ERR_LOCK;
  207. if (tdb->file->allrecord_lock.count != 1) {
  208. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  209. "tdb_allrecord_upgrade failed:"
  210. " count %u too high",
  211. tdb->file->allrecord_lock.count);
  212. }
  213. if (tdb->file->allrecord_lock.off != 1) {
  214. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  215. "tdb_allrecord_upgrade failed:"
  216. " already upgraded?");
  217. }
  218. if (tdb->file->allrecord_lock.owner != tdb) {
  219. return owner_conflict(tdb, "tdb_allrecord_upgrade");
  220. }
  221. while (count--) {
  222. struct timeval tv;
  223. if (tdb_brlock(tdb, F_WRLCK,
  224. TDB_HASH_LOCK_START, 0,
  225. TDB_LOCK_WAIT|TDB_LOCK_PROBE) == TDB_SUCCESS) {
  226. tdb->file->allrecord_lock.ltype = F_WRLCK;
  227. tdb->file->allrecord_lock.off = 0;
  228. return TDB_SUCCESS;
  229. }
  230. if (errno != EDEADLK) {
  231. break;
  232. }
  233. /* sleep for as short a time as we can - more portable than usleep() */
  234. tv.tv_sec = 0;
  235. tv.tv_usec = 1;
  236. select(0, NULL, NULL, NULL, &tv);
  237. }
  238. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  239. "tdb_allrecord_upgrade failed");
  240. }
  241. static struct tdb_lock *find_nestlock(struct tdb_context *tdb, tdb_off_t offset,
  242. const struct tdb_context *owner)
  243. {
  244. unsigned int i;
  245. for (i=0; i<tdb->file->num_lockrecs; i++) {
  246. if (tdb->file->lockrecs[i].off == offset) {
  247. if (owner && tdb->file->lockrecs[i].owner != owner)
  248. return NULL;
  249. return &tdb->file->lockrecs[i];
  250. }
  251. }
  252. return NULL;
  253. }
  254. enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb)
  255. {
  256. enum TDB_ERROR ecode;
  257. if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true))
  258. return TDB_ERR_LOCK;
  259. ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK,
  260. false);
  261. if (ecode != TDB_SUCCESS) {
  262. return ecode;
  263. }
  264. ecode = tdb_lock_open(tdb, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK);
  265. if (ecode != TDB_SUCCESS) {
  266. tdb_allrecord_unlock(tdb, F_WRLCK);
  267. return ecode;
  268. }
  269. ecode = tdb_transaction_recover(tdb);
  270. tdb_unlock_open(tdb);
  271. tdb_allrecord_unlock(tdb, F_WRLCK);
  272. return ecode;
  273. }
  274. /* lock an offset in the database. */
  275. static enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb,
  276. tdb_off_t offset, int ltype,
  277. enum tdb_lock_flags flags)
  278. {
  279. struct tdb_lock *new_lck;
  280. enum TDB_ERROR ecode;
  281. if (offset > (TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
  282. + tdb->file->map_size / 8)) {
  283. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  284. "tdb_nest_lock: invalid offset %zu ltype=%d",
  285. (size_t)offset, ltype);
  286. }
  287. if (tdb->flags & TDB_NOLOCK)
  288. return TDB_SUCCESS;
  289. if (!check_lock_pid(tdb, "tdb_nest_lock", true)) {
  290. return TDB_ERR_LOCK;
  291. }
  292. add_stat(tdb, locks, 1);
  293. new_lck = find_nestlock(tdb, offset, NULL);
  294. if (new_lck) {
  295. if (new_lck->owner != tdb) {
  296. return owner_conflict(tdb, "tdb_nest_lock");
  297. }
  298. if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) {
  299. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  300. "tdb_nest_lock:"
  301. " offset %zu has read lock",
  302. (size_t)offset);
  303. }
  304. /* Just increment the struct, posix locks don't stack. */
  305. new_lck->count++;
  306. return TDB_SUCCESS;
  307. }
  308. if (tdb->file->num_lockrecs
  309. && offset >= TDB_HASH_LOCK_START
  310. && offset < TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) {
  311. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  312. "tdb_nest_lock: already have a hash lock?");
  313. }
  314. new_lck = (struct tdb_lock *)realloc(
  315. tdb->file->lockrecs,
  316. sizeof(*tdb->file->lockrecs) * (tdb->file->num_lockrecs+1));
  317. if (new_lck == NULL) {
  318. return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR,
  319. "tdb_nest_lock:"
  320. " unable to allocate %zu lock struct",
  321. tdb->file->num_lockrecs + 1);
  322. }
  323. tdb->file->lockrecs = new_lck;
  324. /* Since fcntl locks don't nest, we do a lock for the first one,
  325. and simply bump the count for future ones */
  326. ecode = tdb_brlock(tdb, ltype, offset, 1, flags);
  327. if (ecode != TDB_SUCCESS) {
  328. return ecode;
  329. }
  330. /* First time we grab a lock, perhaps someone died in commit? */
  331. if (!(flags & TDB_LOCK_NOCHECK)
  332. && tdb->file->num_lockrecs == 0) {
  333. tdb_bool_err berr = tdb_needs_recovery(tdb);
  334. if (berr != false) {
  335. tdb_brunlock(tdb, ltype, offset, 1);
  336. if (berr < 0)
  337. return berr;
  338. ecode = tdb_lock_and_recover(tdb);
  339. if (ecode == TDB_SUCCESS) {
  340. ecode = tdb_brlock(tdb, ltype, offset, 1,
  341. flags);
  342. }
  343. if (ecode != TDB_SUCCESS) {
  344. return ecode;
  345. }
  346. }
  347. }
  348. tdb->file->lockrecs[tdb->file->num_lockrecs].owner = tdb;
  349. tdb->file->lockrecs[tdb->file->num_lockrecs].off = offset;
  350. tdb->file->lockrecs[tdb->file->num_lockrecs].count = 1;
  351. tdb->file->lockrecs[tdb->file->num_lockrecs].ltype = ltype;
  352. tdb->file->num_lockrecs++;
  353. return TDB_SUCCESS;
  354. }
  355. static enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb,
  356. tdb_off_t off, int ltype)
  357. {
  358. struct tdb_lock *lck;
  359. enum TDB_ERROR ecode;
  360. if (tdb->flags & TDB_NOLOCK)
  361. return TDB_SUCCESS;
  362. lck = find_nestlock(tdb, off, tdb);
  363. if ((lck == NULL) || (lck->count == 0)) {
  364. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  365. "tdb_nest_unlock: no lock for %zu",
  366. (size_t)off);
  367. }
  368. if (lck->count > 1) {
  369. lck->count--;
  370. return TDB_SUCCESS;
  371. }
  372. /*
  373. * This lock has count==1 left, so we need to unlock it in the
  374. * kernel. We don't bother with decrementing the in-memory array
  375. * element, we're about to overwrite it with the last array element
  376. * anyway.
  377. */
  378. ecode = tdb_brunlock(tdb, ltype, off, 1);
  379. /*
  380. * Shrink the array by overwriting the element just unlocked with the
  381. * last array element.
  382. */
  383. *lck = tdb->file->lockrecs[--tdb->file->num_lockrecs];
  384. return ecode;
  385. }
  386. /*
  387. get the transaction lock
  388. */
  389. enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype)
  390. {
  391. return tdb_nest_lock(tdb, TDB_TRANSACTION_LOCK, ltype, TDB_LOCK_WAIT);
  392. }
  393. /*
  394. release the transaction lock
  395. */
  396. void tdb_transaction_unlock(struct tdb_context *tdb, int ltype)
  397. {
  398. tdb_nest_unlock(tdb, TDB_TRANSACTION_LOCK, ltype);
  399. }
  400. /* We only need to lock individual bytes, but Linux merges consecutive locks
  401. * so we lock in contiguous ranges. */
  402. static enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb,
  403. int ltype, enum tdb_lock_flags flags,
  404. tdb_off_t off, tdb_off_t len)
  405. {
  406. enum TDB_ERROR ecode;
  407. enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT);
  408. if (len <= 1) {
  409. /* 0 would mean to end-of-file... */
  410. assert(len != 0);
  411. /* Single hash. Just do blocking lock. */
  412. return tdb_brlock(tdb, ltype, off, len, flags);
  413. }
  414. /* First we try non-blocking. */
  415. if (tdb_brlock(tdb, ltype, off, len, nb_flags) == TDB_SUCCESS) {
  416. return TDB_SUCCESS;
  417. }
  418. /* Try locking first half, then second. */
  419. ecode = tdb_lock_gradual(tdb, ltype, flags, off, len / 2);
  420. if (ecode != TDB_SUCCESS)
  421. return ecode;
  422. ecode = tdb_lock_gradual(tdb, ltype, flags,
  423. off + len / 2, len - len / 2);
  424. if (ecode != TDB_SUCCESS) {
  425. tdb_brunlock(tdb, ltype, off, len / 2);
  426. }
  427. return ecode;
  428. }
  429. /* lock/unlock entire database. It can only be upgradable if you have some
  430. * other way of guaranteeing exclusivity (ie. transaction write lock). */
  431. enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype,
  432. enum tdb_lock_flags flags, bool upgradable)
  433. {
  434. enum TDB_ERROR ecode;
  435. tdb_bool_err berr;
  436. if (tdb->flags & TDB_NOLOCK)
  437. return TDB_SUCCESS;
  438. if (!check_lock_pid(tdb, "tdb_allrecord_lock", true)) {
  439. return TDB_ERR_LOCK;
  440. }
  441. if (tdb->file->allrecord_lock.count) {
  442. if (tdb->file->allrecord_lock.owner != tdb) {
  443. return owner_conflict(tdb, "tdb_allrecord_lock");
  444. }
  445. if (ltype == F_RDLCK
  446. || tdb->file->allrecord_lock.ltype == F_WRLCK) {
  447. tdb->file->allrecord_lock.count++;
  448. return TDB_SUCCESS;
  449. }
  450. /* a global lock of a different type exists */
  451. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  452. "tdb_allrecord_lock: already have %s lock",
  453. tdb->file->allrecord_lock.ltype == F_RDLCK
  454. ? "read" : "write");
  455. }
  456. if (tdb_has_hash_locks(tdb)) {
  457. /* can't combine global and chain locks */
  458. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  459. "tdb_allrecord_lock:"
  460. " already have chain lock");
  461. }
  462. if (upgradable && ltype != F_RDLCK) {
  463. /* tdb error: you can't upgrade a write lock! */
  464. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  465. "tdb_allrecord_lock:"
  466. " can't upgrade a write lock");
  467. }
  468. add_stat(tdb, locks, 1);
  469. again:
  470. /* Lock hashes, gradually. */
  471. ecode = tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START,
  472. TDB_HASH_LOCK_RANGE);
  473. if (ecode != TDB_SUCCESS) {
  474. if (!(flags & TDB_LOCK_PROBE)) {
  475. tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
  476. "tdb_allrecord_lock hashes failed");
  477. }
  478. return ecode;
  479. }
  480. /* Lock free tables: there to end of file. */
  481. ecode = tdb_brlock(tdb, ltype,
  482. TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE,
  483. 0, flags);
  484. if (ecode != TDB_SUCCESS) {
  485. if (!(flags & TDB_LOCK_PROBE)) {
  486. tdb_logerr(tdb, ecode, TDB_LOG_ERROR,
  487. "tdb_allrecord_lock freetables failed");
  488. }
  489. tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START,
  490. TDB_HASH_LOCK_RANGE);
  491. return ecode;
  492. }
  493. tdb->file->allrecord_lock.owner = tdb;
  494. tdb->file->allrecord_lock.count = 1;
  495. /* If it's upgradable, it's actually exclusive so we can treat
  496. * it as a write lock. */
  497. tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype;
  498. tdb->file->allrecord_lock.off = upgradable;
  499. /* Now check for needing recovery. */
  500. if (flags & TDB_LOCK_NOCHECK)
  501. return TDB_SUCCESS;
  502. berr = tdb_needs_recovery(tdb);
  503. if (likely(berr == false))
  504. return TDB_SUCCESS;
  505. tdb_allrecord_unlock(tdb, ltype);
  506. if (berr < 0)
  507. return berr;
  508. ecode = tdb_lock_and_recover(tdb);
  509. if (ecode != TDB_SUCCESS) {
  510. return ecode;
  511. }
  512. goto again;
  513. }
  514. enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb, enum tdb_lock_flags flags)
  515. {
  516. return tdb_nest_lock(tdb, TDB_OPEN_LOCK, F_WRLCK, flags);
  517. }
  518. void tdb_unlock_open(struct tdb_context *tdb)
  519. {
  520. tdb_nest_unlock(tdb, TDB_OPEN_LOCK, F_WRLCK);
  521. }
  522. bool tdb_has_open_lock(struct tdb_context *tdb)
  523. {
  524. return !(tdb->flags & TDB_NOLOCK)
  525. && find_nestlock(tdb, TDB_OPEN_LOCK, tdb) != NULL;
  526. }
  527. enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype)
  528. {
  529. /* Lock doesn't protect data, so don't check (we recurse if we do!) */
  530. return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype,
  531. TDB_LOCK_WAIT | TDB_LOCK_NOCHECK);
  532. }
  533. void tdb_unlock_expand(struct tdb_context *tdb, int ltype)
  534. {
  535. tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype);
  536. }
  537. /* unlock entire db */
  538. void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype)
  539. {
  540. if (tdb->flags & TDB_NOLOCK)
  541. return;
  542. if (tdb->file->allrecord_lock.count == 0) {
  543. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  544. "tdb_allrecord_unlock: not locked!");
  545. return;
  546. }
  547. if (tdb->file->allrecord_lock.owner != tdb) {
  548. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  549. "tdb_allrecord_unlock: not locked by us!");
  550. return;
  551. }
  552. /* Upgradable locks are marked as write locks. */
  553. if (tdb->file->allrecord_lock.ltype != ltype
  554. && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) {
  555. tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  556. "tdb_allrecord_unlock: have %s lock",
  557. tdb->file->allrecord_lock.ltype == F_RDLCK
  558. ? "read" : "write");
  559. return;
  560. }
  561. if (tdb->file->allrecord_lock.count > 1) {
  562. tdb->file->allrecord_lock.count--;
  563. return;
  564. }
  565. tdb->file->allrecord_lock.count = 0;
  566. tdb->file->allrecord_lock.ltype = 0;
  567. tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, 0);
  568. }
  569. bool tdb_has_expansion_lock(struct tdb_context *tdb)
  570. {
  571. return find_nestlock(tdb, TDB_EXPANSION_LOCK, tdb) != NULL;
  572. }
  573. bool tdb_has_hash_locks(struct tdb_context *tdb)
  574. {
  575. unsigned int i;
  576. for (i=0; i<tdb->file->num_lockrecs; i++) {
  577. if (tdb->file->lockrecs[i].off >= TDB_HASH_LOCK_START
  578. && tdb->file->lockrecs[i].off < (TDB_HASH_LOCK_START
  579. + TDB_HASH_LOCK_RANGE))
  580. return true;
  581. }
  582. return false;
  583. }
  584. static bool tdb_has_free_lock(struct tdb_context *tdb)
  585. {
  586. unsigned int i;
  587. if (tdb->flags & TDB_NOLOCK)
  588. return false;
  589. for (i=0; i<tdb->file->num_lockrecs; i++) {
  590. if (tdb->file->lockrecs[i].off
  591. > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE)
  592. return true;
  593. }
  594. return false;
  595. }
  596. enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb,
  597. tdb_off_t hash_lock,
  598. tdb_len_t hash_range,
  599. int ltype, enum tdb_lock_flags waitflag)
  600. {
  601. /* FIXME: Do this properly, using hlock_range */
  602. unsigned lock = TDB_HASH_LOCK_START
  603. + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
  604. /* a allrecord lock allows us to avoid per chain locks */
  605. if (tdb->file->allrecord_lock.count) {
  606. if (!check_lock_pid(tdb, "tdb_lock_hashes", true))
  607. return TDB_ERR_LOCK;
  608. if (tdb->file->allrecord_lock.owner != tdb)
  609. return owner_conflict(tdb, "tdb_lock_hashes");
  610. if (ltype == tdb->file->allrecord_lock.ltype
  611. || ltype == F_RDLCK) {
  612. return TDB_SUCCESS;
  613. }
  614. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR,
  615. "tdb_lock_hashes:"
  616. " already have %s allrecordlock",
  617. tdb->file->allrecord_lock.ltype == F_RDLCK
  618. ? "read" : "write");
  619. }
  620. if (tdb_has_free_lock(tdb)) {
  621. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  622. "tdb_lock_hashes: already have free lock");
  623. }
  624. if (tdb_has_expansion_lock(tdb)) {
  625. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  626. "tdb_lock_hashes:"
  627. " already have expansion lock");
  628. }
  629. return tdb_nest_lock(tdb, lock, ltype, waitflag);
  630. }
  631. enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb,
  632. tdb_off_t hash_lock,
  633. tdb_len_t hash_range, int ltype)
  634. {
  635. unsigned lock = TDB_HASH_LOCK_START
  636. + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS));
  637. if (tdb->flags & TDB_NOLOCK)
  638. return 0;
  639. /* a allrecord lock allows us to avoid per chain locks */
  640. if (tdb->file->allrecord_lock.count) {
  641. if (tdb->file->allrecord_lock.ltype == F_RDLCK
  642. && ltype == F_WRLCK) {
  643. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  644. "tdb_unlock_hashes RO allrecord!");
  645. }
  646. return TDB_SUCCESS;
  647. }
  648. return tdb_nest_unlock(tdb, lock, ltype);
  649. }
  650. /* Hash locks use TDB_HASH_LOCK_START + the next 30 bits.
  651. * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide.
  652. * The result is that on 32 bit systems we don't use lock values > 2^31 on
  653. * files that are less than 4GB.
  654. */
  655. static tdb_off_t free_lock_off(tdb_off_t b_off)
  656. {
  657. return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE
  658. + b_off / sizeof(tdb_off_t);
  659. }
  660. enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off,
  661. enum tdb_lock_flags waitflag)
  662. {
  663. assert(b_off >= sizeof(struct tdb_header));
  664. if (tdb->flags & TDB_NOLOCK)
  665. return 0;
  666. /* a allrecord lock allows us to avoid per chain locks */
  667. if (tdb->file->allrecord_lock.count) {
  668. if (!check_lock_pid(tdb, "tdb_lock_free_bucket", true))
  669. return TDB_ERR_LOCK;
  670. if (tdb->file->allrecord_lock.ltype == F_WRLCK)
  671. return 0;
  672. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  673. "tdb_lock_free_bucket with"
  674. " read-only allrecordlock!");
  675. }
  676. #if 0 /* FIXME */
  677. if (tdb_has_expansion_lock(tdb)) {
  678. return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR,
  679. "tdb_lock_free_bucket:"
  680. " already have expansion lock");
  681. }
  682. #endif
  683. return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag);
  684. }
  685. void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off)
  686. {
  687. if (tdb->file->allrecord_lock.count)
  688. return;
  689. tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK);
  690. }
  691. enum TDB_ERROR tdb_lockall(struct tdb_context *tdb)
  692. {
  693. return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false);
  694. }
  695. void tdb_unlockall(struct tdb_context *tdb)
  696. {
  697. tdb_allrecord_unlock(tdb, F_WRLCK);
  698. }
  699. enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb)
  700. {
  701. return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false);
  702. }
  703. void tdb_unlockall_read(struct tdb_context *tdb)
  704. {
  705. tdb_allrecord_unlock(tdb, F_RDLCK);
  706. }
  707. void tdb_lock_cleanup(struct tdb_context *tdb)
  708. {
  709. unsigned int i;
  710. while (tdb->file->allrecord_lock.count
  711. && tdb->file->allrecord_lock.owner == tdb) {
  712. tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype);
  713. }
  714. for (i=0; i<tdb->file->num_lockrecs; i++) {
  715. if (tdb->file->lockrecs[i].owner == tdb) {
  716. tdb_nest_unlock(tdb,
  717. tdb->file->lockrecs[i].off,
  718. tdb->file->lockrecs[i].ltype);
  719. i--;
  720. }
  721. }
  722. }