| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624 |
- /*
- Trivial Database 2: hash handling
- Copyright (C) Rusty Russell 2010
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 3 of the License, or (at your option) any later version.
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
- #include "private.h"
- #include <ccan/hash/hash.h>
- /* Default hash function. */
- uint32_t ntdb_jenkins_hash(const void *key, size_t length, uint32_t seed,
- void *unused)
- {
- return hash_stable((const unsigned char *)key, length, seed);
- }
- uint32_t ntdb_hash(struct ntdb_context *ntdb, const void *ptr, size_t len)
- {
- return ntdb->hash_fn(ptr, len, ntdb->hash_seed, ntdb->hash_data);
- }
- static ntdb_bool_err key_matches(struct ntdb_context *ntdb,
- const struct ntdb_used_record *rec,
- ntdb_off_t off,
- const NTDB_DATA *key,
- const char **rptr)
- {
- ntdb_bool_err ret = false;
- const char *rkey;
- if (rec_key_length(rec) != key->dsize) {
- ntdb->stats.compare_wrong_keylen++;
- return ret;
- }
- rkey = ntdb_access_read(ntdb, off + sizeof(*rec),
- key->dsize + rec_data_length(rec), false);
- if (NTDB_PTR_IS_ERR(rkey)) {
- return (ntdb_bool_err)NTDB_PTR_ERR(rkey);
- }
- if (memcmp(rkey, key->dptr, key->dsize) == 0) {
- if (rptr) {
- *rptr = rkey;
- } else {
- ntdb_access_release(ntdb, rkey);
- }
- return true;
- }
- ntdb->stats.compare_wrong_keycmp++;
- ntdb_access_release(ntdb, rkey);
- return ret;
- }
- /* Does entry match? */
- static ntdb_bool_err match(struct ntdb_context *ntdb,
- uint32_t hash,
- const NTDB_DATA *key,
- ntdb_off_t val,
- struct ntdb_used_record *rec,
- const char **rptr)
- {
- ntdb_off_t off;
- enum NTDB_ERROR ecode;
- ntdb->stats.compares++;
- /* Top bits of offset == next bits of hash. */
- if (bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL)
- != bits_from(val, 64-NTDB_OFF_UPPER_STEAL, NTDB_OFF_UPPER_STEAL)) {
- ntdb->stats.compare_wrong_offsetbits++;
- return false;
- }
- off = val & NTDB_OFF_MASK;
- ecode = ntdb_read_convert(ntdb, off, rec, sizeof(*rec));
- if (ecode != NTDB_SUCCESS) {
- return (ntdb_bool_err)ecode;
- }
- return key_matches(ntdb, rec, off, key, rptr);
- }
- static bool is_chain(ntdb_off_t val)
- {
- return val & (1ULL << NTDB_OFF_CHAIN_BIT);
- }
- static ntdb_off_t hbucket_off(ntdb_off_t base, ntdb_len_t idx)
- {
- return base + sizeof(struct ntdb_used_record)
- + idx * sizeof(ntdb_off_t);
- }
- /* This is the core routine which searches the hashtable for an entry.
- * On error, no locks are held and -ve is returned.
- * Otherwise, hinfo is filled in.
- * If not found, the return value is 0.
- * If found, the return value is the offset, and *rec is the record. */
- ntdb_off_t find_and_lock(struct ntdb_context *ntdb,
- NTDB_DATA key,
- int ltype,
- struct hash_info *h,
- struct ntdb_used_record *rec,
- const char **rptr)
- {
- ntdb_off_t off, val;
- const ntdb_off_t *arr = NULL;
- ntdb_len_t i;
- bool found_empty;
- enum NTDB_ERROR ecode;
- struct ntdb_used_record chdr;
- ntdb_bool_err berr;
- h->h = ntdb_hash(ntdb, key.dptr, key.dsize);
- h->table = NTDB_HASH_OFFSET;
- h->table_size = 1 << ntdb->hash_bits;
- h->bucket = bits_from(h->h, 0, ntdb->hash_bits);
- h->old_val = 0;
- ecode = ntdb_lock_hash(ntdb, h->bucket, ltype);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- off = hbucket_off(h->table, h->bucket);
- val = ntdb_read_off(ntdb, off);
- if (NTDB_OFF_IS_ERR(val)) {
- ecode = NTDB_OFF_TO_ERR(val);
- goto fail;
- }
- /* Directly in hash table? */
- if (!likely(is_chain(val))) {
- if (val) {
- berr = match(ntdb, h->h, &key, val, rec, rptr);
- if (berr < 0) {
- ecode = NTDB_OFF_TO_ERR(berr);
- goto fail;
- }
- if (berr) {
- return val & NTDB_OFF_MASK;
- }
- /* If you want to insert here, make a chain. */
- h->old_val = val;
- }
- return 0;
- }
- /* Nope? Iterate through chain. */
- h->table = val & NTDB_OFF_MASK;
- ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
- if (ecode != NTDB_SUCCESS) {
- goto fail;
- }
- if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "find_and_lock:"
- " corrupt record %#x at %llu",
- rec_magic(&chdr), (long long)off);
- goto fail;
- }
- h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t);
- arr = ntdb_access_read(ntdb, hbucket_off(h->table, 0),
- rec_data_length(&chdr), true);
- if (NTDB_PTR_IS_ERR(arr)) {
- ecode = NTDB_PTR_ERR(arr);
- goto fail;
- }
- found_empty = false;
- for (i = 0; i < h->table_size; i++) {
- if (arr[i] == 0) {
- if (!found_empty) {
- h->bucket = i;
- found_empty = true;
- }
- } else {
- berr = match(ntdb, h->h, &key, arr[i], rec, rptr);
- if (berr < 0) {
- ecode = NTDB_OFF_TO_ERR(berr);
- ntdb_access_release(ntdb, arr);
- goto fail;
- }
- if (berr) {
- /* We found it! */
- h->bucket = i;
- off = arr[i] & NTDB_OFF_MASK;
- ntdb_access_release(ntdb, arr);
- return off;
- }
- }
- }
- if (!found_empty) {
- /* Set to any non-zero value */
- h->old_val = 1;
- h->bucket = i;
- }
- ntdb_access_release(ntdb, arr);
- return 0;
- fail:
- ntdb_unlock_hash(ntdb, h->bucket, ltype);
- return NTDB_ERR_TO_OFF(ecode);
- }
- static ntdb_off_t encode_offset(const struct ntdb_context *ntdb,
- ntdb_off_t new_off, uint32_t hash)
- {
- ntdb_off_t extra;
- assert((new_off & (1ULL << NTDB_OFF_CHAIN_BIT)) == 0);
- assert((new_off >> (64 - NTDB_OFF_UPPER_STEAL)) == 0);
- /* We pack extra hash bits into the upper bits of the offset. */
- extra = bits_from(hash, ntdb->hash_bits, NTDB_OFF_UPPER_STEAL);
- extra <<= (64 - NTDB_OFF_UPPER_STEAL);
- return new_off | extra;
- }
- /* Simply overwrite the hash entry we found before. */
- enum NTDB_ERROR replace_in_hash(struct ntdb_context *ntdb,
- const struct hash_info *h,
- ntdb_off_t new_off)
- {
- return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket),
- encode_offset(ntdb, new_off, h->h));
- }
- enum NTDB_ERROR delete_from_hash(struct ntdb_context *ntdb,
- const struct hash_info *h)
- {
- return ntdb_write_off(ntdb, hbucket_off(h->table, h->bucket), 0);
- }
- enum NTDB_ERROR add_to_hash(struct ntdb_context *ntdb,
- const struct hash_info *h,
- ntdb_off_t new_off)
- {
- enum NTDB_ERROR ecode;
- ntdb_off_t chain;
- struct ntdb_used_record chdr;
- const ntdb_off_t *old;
- ntdb_off_t *new;
- /* We hit an empty bucket during search? That's where it goes. */
- if (!h->old_val) {
- return replace_in_hash(ntdb, h, new_off);
- }
- /* Full at top-level? Create a 2-element chain. */
- if (h->table == NTDB_HASH_OFFSET) {
- ntdb_off_t pair[2];
- /* One element is old value, the other is the new value. */
- pair[0] = h->old_val;
- pair[1] = encode_offset(ntdb, new_off, h->h);
- chain = alloc(ntdb, 0, sizeof(pair), NTDB_CHAIN_MAGIC, true);
- if (NTDB_OFF_IS_ERR(chain)) {
- return NTDB_OFF_TO_ERR(chain);
- }
- ecode = ntdb_write_convert(ntdb,
- chain
- + sizeof(struct ntdb_used_record),
- pair, sizeof(pair));
- if (ecode == NTDB_SUCCESS) {
- ecode = ntdb_write_off(ntdb,
- hbucket_off(h->table, h->bucket),
- chain
- | (1ULL << NTDB_OFF_CHAIN_BIT));
- }
- return ecode;
- }
- /* Full bucket. Expand. */
- ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- if (rec_extra_padding(&chdr) >= sizeof(new_off)) {
- /* Expand in place. */
- uint64_t dlen = rec_data_length(&chdr);
- ecode = set_header(ntdb, &chdr, NTDB_CHAIN_MAGIC, 0,
- dlen + sizeof(new_off),
- dlen + rec_extra_padding(&chdr));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- /* find_and_lock set up h to point to last bucket. */
- ecode = replace_in_hash(ntdb, h, new_off);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- ecode = ntdb_write_convert(ntdb, h->table, &chdr, sizeof(chdr));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- /* For futureproofing, we always make the first byte of padding
- * a zero. */
- if (rec_extra_padding(&chdr)) {
- ecode = ntdb->io->twrite(ntdb, h->table + sizeof(chdr)
- + dlen + sizeof(new_off),
- "", 1);
- }
- return ecode;
- }
- /* We need to reallocate the chain. */
- chain = alloc(ntdb, 0, (h->table_size + 1) * sizeof(ntdb_off_t),
- NTDB_CHAIN_MAGIC, true);
- if (NTDB_OFF_IS_ERR(chain)) {
- return NTDB_OFF_TO_ERR(chain);
- }
- /* Map both and copy across old buckets. */
- old = ntdb_access_read(ntdb, hbucket_off(h->table, 0),
- h->table_size*sizeof(ntdb_off_t), true);
- if (NTDB_PTR_IS_ERR(old)) {
- return NTDB_PTR_ERR(old);
- }
- new = ntdb_access_write(ntdb, hbucket_off(chain, 0),
- (h->table_size + 1)*sizeof(ntdb_off_t), true);
- if (NTDB_PTR_IS_ERR(new)) {
- ntdb_access_release(ntdb, old);
- return NTDB_PTR_ERR(new);
- }
- memcpy(new, old, h->bucket * sizeof(ntdb_off_t));
- new[h->bucket] = encode_offset(ntdb, new_off, h->h);
- ntdb_access_release(ntdb, old);
- ecode = ntdb_access_commit(ntdb, new);
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- /* Free the old chain. */
- ecode = add_free_record(ntdb, h->table,
- sizeof(struct ntdb_used_record)
- + rec_data_length(&chdr)
- + rec_extra_padding(&chdr),
- NTDB_LOCK_WAIT, true);
- /* Replace top-level to point to new chain */
- return ntdb_write_off(ntdb,
- hbucket_off(NTDB_HASH_OFFSET,
- bits_from(h->h, 0, ntdb->hash_bits)),
- chain | (1ULL << NTDB_OFF_CHAIN_BIT));
- }
- /* Traverse support: returns offset of record, or 0 or -ve error. */
- static ntdb_off_t iterate_chain(struct ntdb_context *ntdb,
- ntdb_off_t val,
- struct hash_info *h)
- {
- ntdb_off_t i;
- enum NTDB_ERROR ecode;
- struct ntdb_used_record chdr;
- /* First load up chain header. */
- h->table = val & NTDB_OFF_MASK;
- ecode = ntdb_read_convert(ntdb, h->table, &chdr, sizeof(chdr));
- if (ecode != NTDB_SUCCESS) {
- return ecode;
- }
- if (rec_magic(&chdr) != NTDB_CHAIN_MAGIC) {
- return ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "get_table:"
- " corrupt record %#x at %llu",
- rec_magic(&chdr),
- (long long)h->table);
- }
- /* Chain length is implied by data length. */
- h->table_size = rec_data_length(&chdr) / sizeof(ntdb_off_t);
- i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0), h->bucket,
- h->table_size);
- if (NTDB_OFF_IS_ERR(i)) {
- return i;
- }
- if (i != h->table_size) {
- /* Return to next bucket. */
- h->bucket = i + 1;
- val = ntdb_read_off(ntdb, hbucket_off(h->table, i));
- if (NTDB_OFF_IS_ERR(val)) {
- return val;
- }
- return val & NTDB_OFF_MASK;
- }
- /* Go back up to hash table. */
- h->table = NTDB_HASH_OFFSET;
- h->table_size = 1 << ntdb->hash_bits;
- h->bucket = bits_from(h->h, 0, ntdb->hash_bits) + 1;
- return 0;
- }
- /* Keeps hash locked unless returns 0 or error. */
- static ntdb_off_t lock_and_iterate_hash(struct ntdb_context *ntdb,
- struct hash_info *h)
- {
- ntdb_off_t val, i;
- enum NTDB_ERROR ecode;
- if (h->table != NTDB_HASH_OFFSET) {
- /* We're in a chain. */
- i = bits_from(h->h, 0, ntdb->hash_bits);
- ecode = ntdb_lock_hash(ntdb, i, F_RDLCK);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- /* We dropped lock, bucket might have moved! */
- val = ntdb_read_off(ntdb, hbucket_off(NTDB_HASH_OFFSET, i));
- if (NTDB_OFF_IS_ERR(val)) {
- goto unlock;
- }
- /* We don't remove chains: there should still be one there! */
- if (!val || !is_chain(val)) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "iterate_hash:"
- " vanished hchain %llu at %llu",
- (long long)val,
- (long long)i);
- val = NTDB_ERR_TO_OFF(ecode);
- goto unlock;
- }
- /* Find next bucket in the chain. */
- val = iterate_chain(ntdb, val, h);
- if (NTDB_OFF_IS_ERR(val)) {
- goto unlock;
- }
- if (val != 0) {
- return val;
- }
- ntdb_unlock_hash(ntdb, i, F_RDLCK);
- /* OK, we've reset h back to top level. */
- }
- /* We do this unlocked, then re-check. */
- for (i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0),
- h->bucket, h->table_size);
- i != h->table_size;
- i = ntdb_find_nonzero_off(ntdb, hbucket_off(h->table, 0),
- i+1, h->table_size)) {
- ecode = ntdb_lock_hash(ntdb, i, F_RDLCK);
- if (ecode != NTDB_SUCCESS) {
- return NTDB_ERR_TO_OFF(ecode);
- }
- val = ntdb_read_off(ntdb, hbucket_off(h->table, i));
- if (NTDB_OFF_IS_ERR(val)) {
- goto unlock;
- }
- /* Lost race, and it's empty? */
- if (!val) {
- ntdb->stats.traverse_val_vanished++;
- ntdb_unlock_hash(ntdb, i, F_RDLCK);
- continue;
- }
- if (!is_chain(val)) {
- /* So caller knows what lock to free. */
- h->h = i;
- /* Return to next bucket. */
- h->bucket = i + 1;
- val &= NTDB_OFF_MASK;
- return val;
- }
- /* Start at beginning of chain */
- h->bucket = 0;
- h->h = i;
- val = iterate_chain(ntdb, val, h);
- if (NTDB_OFF_IS_ERR(val)) {
- goto unlock;
- }
- if (val != 0) {
- return val;
- }
- /* Otherwise, bucket has been set to i+1 */
- ntdb_unlock_hash(ntdb, i, F_RDLCK);
- }
- return 0;
- unlock:
- ntdb_unlock_hash(ntdb, i, F_RDLCK);
- return val;
- }
- /* Return success if we find something, NTDB_ERR_NOEXIST if none. */
- enum NTDB_ERROR next_in_hash(struct ntdb_context *ntdb,
- struct hash_info *h,
- NTDB_DATA *kbuf, size_t *dlen)
- {
- ntdb_off_t off;
- struct ntdb_used_record rec;
- enum NTDB_ERROR ecode;
- off = lock_and_iterate_hash(ntdb, h);
- if (NTDB_OFF_IS_ERR(off)) {
- return NTDB_OFF_TO_ERR(off);
- } else if (off == 0) {
- return NTDB_ERR_NOEXIST;
- }
- /* The hash for this key is still locked. */
- ecode = ntdb_read_convert(ntdb, off, &rec, sizeof(rec));
- if (ecode != NTDB_SUCCESS) {
- goto unlock;
- }
- if (rec_magic(&rec) != NTDB_USED_MAGIC) {
- ecode = ntdb_logerr(ntdb, NTDB_ERR_CORRUPT,
- NTDB_LOG_ERROR,
- "next_in_hash:"
- " corrupt record at %llu",
- (long long)off);
- goto unlock;
- }
- kbuf->dsize = rec_key_length(&rec);
- /* They want data as well? */
- if (dlen) {
- *dlen = rec_data_length(&rec);
- kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec),
- kbuf->dsize + *dlen);
- } else {
- kbuf->dptr = ntdb_alloc_read(ntdb, off + sizeof(rec),
- kbuf->dsize);
- }
- if (NTDB_PTR_IS_ERR(kbuf->dptr)) {
- ecode = NTDB_PTR_ERR(kbuf->dptr);
- goto unlock;
- }
- ecode = NTDB_SUCCESS;
- unlock:
- ntdb_unlock_hash(ntdb, bits_from(h->h, 0, ntdb->hash_bits), F_RDLCK);
- return ecode;
- }
- enum NTDB_ERROR first_in_hash(struct ntdb_context *ntdb,
- struct hash_info *h,
- NTDB_DATA *kbuf, size_t *dlen)
- {
- h->table = NTDB_HASH_OFFSET;
- h->table_size = 1 << ntdb->hash_bits;
- h->bucket = 0;
- return next_in_hash(ntdb, h, kbuf, dlen);
- }
- /* Even if the entry isn't in this hash bucket, you'd have to lock this
- * bucket to find it. */
- static enum NTDB_ERROR chainlock(struct ntdb_context *ntdb,
- const NTDB_DATA *key, int ltype)
- {
- uint32_t h = ntdb_hash(ntdb, key->dptr, key->dsize);
- return ntdb_lock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), ltype);
- }
- /* lock/unlock one hash chain. This is meant to be used to reduce
- contention - it cannot guarantee how many records will be locked */
- _PUBLIC_ enum NTDB_ERROR ntdb_chainlock(struct ntdb_context *ntdb, NTDB_DATA key)
- {
- return chainlock(ntdb, &key, F_WRLCK);
- }
- _PUBLIC_ void ntdb_chainunlock(struct ntdb_context *ntdb, NTDB_DATA key)
- {
- uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
- ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_WRLCK);
- }
- _PUBLIC_ enum NTDB_ERROR ntdb_chainlock_read(struct ntdb_context *ntdb,
- NTDB_DATA key)
- {
- return chainlock(ntdb, &key, F_RDLCK);
- }
- _PUBLIC_ void ntdb_chainunlock_read(struct ntdb_context *ntdb, NTDB_DATA key)
- {
- uint32_t h = ntdb_hash(ntdb, key.dptr, key.dsize);
- ntdb_unlock_hash(ntdb, bits_from(h, 0, ntdb->hash_bits), F_RDLCK);
- }
|