X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fclass_hash.c;h=c15c761c5eb4711965a24c68c71ac4ea121e4312;hb=f5ed33ba0b64c4763f11286b139c2ddf4932e1e2;hp=46b4c5edda8ca73291a19d079e1604d80a48ff86;hpb=1a24137e8f26eaae9a2dac39a1e8a8a0bed46b6b;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/class_hash.c b/lustre/obdclass/class_hash.c index 46b4c5ed..c15c761 100644 --- a/lustre/obdclass/class_hash.c +++ b/lustre/obdclass/class_hash.c @@ -26,7 +26,7 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. */ /* @@ -55,77 +55,106 @@ #include +static void +lh_read_lock(lustre_hash_t *lh) +{ + if ((lh->lh_flags & LH_REHASH) != 0) + read_lock(&lh->lh_rwlock); +} + +static void +lh_read_unlock(lustre_hash_t *lh) +{ + if ((lh->lh_flags & LH_REHASH) != 0) + read_unlock(&lh->lh_rwlock); +} + +static void +lh_write_lock(lustre_hash_t *lh) +{ + if ((lh->lh_flags & LH_REHASH) != 0) + write_lock(&lh->lh_rwlock); +} + +static void +lh_write_unlock(lustre_hash_t *lh) +{ + if ((lh->lh_flags & LH_REHASH) != 0) + write_unlock(&lh->lh_rwlock); +} + /** * Initialize new lustre hash, where: * @name - Descriptive hash name - * @cur_size - Initial hash table size - * @max_size - Maximum allowed hash table resize + * @cur_bits - Initial hash table size, in bits + * @max_bits - Maximum allowed hash table resize, in bits * @ops - Registered hash table operations * @flags - LH_REHASH enable synamic hash resizing * - LH_SORT enable chained hash sort */ lustre_hash_t * -lustre_hash_init(char *name, unsigned int cur_size, unsigned int max_size, +lustre_hash_init(char *name, unsigned int cur_bits, unsigned int max_bits, lustre_hash_ops_t *ops, int flags) { lustre_hash_t *lh; int i; ENTRY; - + LASSERT(name != NULL); LASSERT(ops != NULL); - /* - * Ensure hash is a power of two to allow the use of a bitmask - * in the hash function instead of a more expensive modulus. - */ - LASSERTF(cur_size && (cur_size & (cur_size - 1)) == 0, - "Size (%u) is not power of 2\n", cur_size); - LASSERTF(max_size && (max_size & (max_size - 1)) == 0, - "Size (%u) is not power of 2\n", max_size); - - OBD_ALLOC_PTR(lh); + LASSERT(cur_bits > 0); + LASSERT(max_bits >= cur_bits); + LASSERT(max_bits < 31); + + LIBCFS_ALLOC_PTR(lh); if (!lh) RETURN(NULL); - - lh->lh_name_size = strlen(name) + 1; - rwlock_init(&lh->lh_rwlock); - - OBD_ALLOC(lh->lh_name, lh->lh_name_size); - if (!lh->lh_name) { - OBD_FREE_PTR(lh); - RETURN(NULL); - } - - strncpy(lh->lh_name, name, lh->lh_name_size); - - atomic_set(&lh->lh_count, 0); + + strncpy(lh->lh_name, name, sizeof(lh->lh_name)); + lh->lh_name[sizeof(lh->lh_name) - 1] = '\0'; atomic_set(&lh->lh_rehash_count, 0); - lh->lh_cur_size = cur_size; - lh->lh_min_size = cur_size; - lh->lh_max_size = max_size; - lh->lh_min_theta = 500; /* theta * 1000 */ - lh->lh_max_theta = 2000; /* theta * 1000 */ + atomic_set(&lh->lh_count, 0); + rwlock_init(&lh->lh_rwlock); + lh->lh_cur_bits = cur_bits; + lh->lh_cur_mask = (1 << cur_bits) - 1; + lh->lh_min_bits = cur_bits; + lh->lh_max_bits = max_bits; + /* XXX: need to fixup lustre_hash_rehash_bits() before this can be + * anything other than 0.5 and 2.0 */ + lh->lh_min_theta = 1 << (LH_THETA_BITS - 1); + lh->lh_max_theta = 1 << (LH_THETA_BITS + 1); lh->lh_ops = ops; lh->lh_flags = flags; + if (cur_bits != max_bits && (lh->lh_flags & LH_REHASH) == 0) + CERROR("Rehash is disabled for %s, ignore max_bits %d\n", + name, max_bits); - OBD_VMALLOC(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size); + /* theta * 1000 */ + __lustre_hash_set_theta(lh, 500, 2000); + + LIBCFS_ALLOC(lh->lh_buckets, sizeof(*lh->lh_buckets) << lh->lh_cur_bits); if (!lh->lh_buckets) { - OBD_FREE(lh->lh_name, lh->lh_name_size); - OBD_FREE_PTR(lh); + LIBCFS_FREE_PTR(lh); RETURN(NULL); } - - for (i = 0; i < lh->lh_cur_size; i++) { - INIT_HLIST_HEAD(&lh->lh_buckets[i].lhb_head); - rwlock_init(&lh->lh_buckets[i].lhb_rwlock); - atomic_set(&lh->lh_buckets[i].lhb_count, 0); + + for (i = 0; i <= lh->lh_cur_mask; i++) { + LIBCFS_ALLOC(lh->lh_buckets[i], sizeof(lustre_hash_bucket_t)); + if (lh->lh_buckets[i] == NULL) { + lustre_hash_exit(lh); + return NULL; + } + + INIT_HLIST_HEAD(&lh->lh_buckets[i]->lhb_head); + rwlock_init(&lh->lh_buckets[i]->lhb_rwlock); + atomic_set(&lh->lh_buckets[i]->lhb_count, 0); } - + return lh; } EXPORT_SYMBOL(lustre_hash_init); - + /** * Cleanup lustre hash @lh. */ @@ -137,52 +166,60 @@ lustre_hash_exit(lustre_hash_t *lh) struct hlist_node *pos; int i; ENTRY; - - if (!lh) - return; - - write_lock(&lh->lh_rwlock); - + + LASSERT(lh != NULL); + + lh_write_lock(lh); + lh_for_each_bucket(lh, lhb, i) { + if (lhb == NULL) + continue; + write_lock(&lhb->lhb_rwlock); hlist_for_each_safe(hnode, pos, &(lhb->lhb_head)) { __lustre_hash_bucket_validate(lh, lhb, hnode); __lustre_hash_bucket_del(lh, lhb, hnode); lh_exit(lh, hnode); } - - LASSERT(hlist_empty(&(lhb->lhb_head))); - LASSERT(atomic_read(&lhb->lhb_count) == 0); + + LASSERTF(hlist_empty(&(lhb->lhb_head)), + "hash bucket %d from %s is not empty\n", i, lh->lh_name); + LASSERTF(atomic_read(&lhb->lhb_count) == 0, + "hash bucket %d from %s has #entries > 0 (%d)\n", i, + lh->lh_name, atomic_read(&lhb->lhb_count)); write_unlock(&lhb->lhb_rwlock); + LIBCFS_FREE_PTR(lhb); } - - OBD_VFREE(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size); - OBD_FREE(lh->lh_name, lh->lh_name_size); - - LASSERT(atomic_read(&lh->lh_count) == 0); - write_unlock(&lh->lh_rwlock); - - OBD_FREE_PTR(lh); + + LASSERTF(atomic_read(&lh->lh_count) == 0, + "hash %s still has #entries > 0 (%d)\n", lh->lh_name, + atomic_read(&lh->lh_count)); + lh_write_unlock(lh); + + LIBCFS_FREE(lh->lh_buckets, sizeof(*lh->lh_buckets) << lh->lh_cur_bits); + LIBCFS_FREE_PTR(lh); EXIT; } EXPORT_SYMBOL(lustre_hash_exit); -static inline unsigned int lustre_hash_rehash_size(lustre_hash_t *lh) +static inline unsigned int lustre_hash_rehash_bits(lustre_hash_t *lh) { if (!(lh->lh_flags & LH_REHASH)) return 0; - if ((lh->lh_cur_size < lh->lh_max_size) && + /* XXX: need to handle case with max_theta != 2.0 + * and the case with min_theta != 0.5 */ + if ((lh->lh_cur_bits < lh->lh_max_bits) && (__lustre_hash_theta(lh) > lh->lh_max_theta)) - return MIN(lh->lh_cur_size * 2, lh->lh_max_size); + return lh->lh_cur_bits + 1; - if ((lh->lh_cur_size > lh->lh_min_size) && + if ((lh->lh_cur_bits > lh->lh_min_bits) && (__lustre_hash_theta(lh) < lh->lh_min_theta)) - return MAX(lh->lh_cur_size / 2, lh->lh_min_size); + return lh->lh_cur_bits - 1; return 0; } - + /** * Add item @hnode to lustre hash @lh using @key. The registered * ops->lh_get function will be called when the item is added. @@ -191,69 +228,86 @@ void lustre_hash_add(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { lustre_hash_bucket_t *lhb; - int size; + int bits; unsigned i; ENTRY; - + __lustre_hash_key_validate(lh, key, hnode); - read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); - lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + lh_read_lock(lh); + i = lh_hash(lh, key, lh->lh_cur_mask); + lhb = lh->lh_buckets[i]; + LASSERT(i <= lh->lh_cur_mask); LASSERT(hlist_unhashed(hnode)); write_lock(&lhb->lhb_rwlock); __lustre_hash_bucket_add(lh, lhb, hnode); write_unlock(&lhb->lhb_rwlock); - size = lustre_hash_rehash_size(lh); - read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); - + bits = lustre_hash_rehash_bits(lh); + lh_read_unlock(lh); + if (bits) + lustre_hash_rehash(lh, bits); + EXIT; } EXPORT_SYMBOL(lustre_hash_add); - -/** - * Add item @hnode to lustre hash @lh using @key. The registered - * ops->lh_get function will be called if the item was added. - * Returns 0 on success or -EALREADY on key collisions. - */ -int -lustre_hash_add_unique(lustre_hash_t *lh, void *key, struct hlist_node *hnode) + +static struct hlist_node * +lustre_hash_findadd_unique_hnode(lustre_hash_t *lh, void *key, + struct hlist_node *hnode) { + int bits = 0; + struct hlist_node *ehnode; lustre_hash_bucket_t *lhb; - int size; - int rc = -EALREADY; unsigned i; ENTRY; - + __lustre_hash_key_validate(lh, key, hnode); - - read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); - lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + + lh_read_lock(lh); + i = lh_hash(lh, key, lh->lh_cur_mask); + lhb = lh->lh_buckets[i]; + LASSERT(i <= lh->lh_cur_mask); LASSERT(hlist_unhashed(hnode)); - + write_lock(&lhb->lhb_rwlock); - if (!__lustre_hash_bucket_lookup(lh, lhb, key)) { + ehnode = __lustre_hash_bucket_lookup(lh, lhb, key); + if (ehnode) { + lh_get(lh, ehnode); + } else { __lustre_hash_bucket_add(lh, lhb, hnode); - rc = 0; + ehnode = hnode; + bits = lustre_hash_rehash_bits(lh); } write_unlock(&lhb->lhb_rwlock); - - size = lustre_hash_rehash_size(lh); - read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); - - RETURN(rc); + lh_read_unlock(lh); + if (bits) + lustre_hash_rehash(lh, bits); + + RETURN(ehnode); +} + +/** + * Add item @hnode to lustre hash @lh using @key. The registered + * ops->lh_get function will be called if the item was added. + * Returns 0 on success or -EALREADY on key collisions. + */ +int +lustre_hash_add_unique(lustre_hash_t *lh, void *key, struct hlist_node *hnode) +{ + struct hlist_node *ehnode; + ENTRY; + + ehnode = lustre_hash_findadd_unique_hnode(lh, key, hnode); + if (ehnode != hnode) { + lh_put(lh, ehnode); + RETURN(-EALREADY); + } + RETURN(0); } EXPORT_SYMBOL(lustre_hash_add_unique); - + /** * Add item @hnode to lustre hash @lh using @key. If this @key * already exists in the hash then ops->lh_get will be called on the @@ -264,38 +318,17 @@ void * lustre_hash_findadd_unique(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { - struct hlist_node *existing_hnode; - lustre_hash_bucket_t *lhb; - int size; - unsigned i; + struct hlist_node *ehnode; void *obj; ENTRY; - - __lustre_hash_key_validate(lh, key, hnode); - - read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); - lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); - LASSERT(hlist_unhashed(hnode)); - - write_lock(&lhb->lhb_rwlock); - existing_hnode = __lustre_hash_bucket_lookup(lh, lhb, key); - if (existing_hnode) - obj = lh_get(lh, existing_hnode); - else - obj = __lustre_hash_bucket_add(lh, lhb, hnode); - write_unlock(&lhb->lhb_rwlock); - size = lustre_hash_rehash_size(lh); - read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); - + ehnode = lustre_hash_findadd_unique_hnode(lh, key, hnode); + obj = lh_get(lh, ehnode); + lh_put(lh, ehnode); RETURN(obj); } EXPORT_SYMBOL(lustre_hash_findadd_unique); - + /** * Delete item @hnode from the lustre hash @lh using @key. The @key * is required to ensure the correct hash bucket is locked since there @@ -307,32 +340,63 @@ void * lustre_hash_del(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { lustre_hash_bucket_t *lhb; - int size; unsigned i; void *obj; ENTRY; - + __lustre_hash_key_validate(lh, key, hnode); - - read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); - lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); - LASSERT(!hlist_unhashed(hnode)); + + lh_read_lock(lh); + i = lh_hash(lh, key, lh->lh_cur_mask); + lhb = lh->lh_buckets[i]; + LASSERT(i <= lh->lh_cur_mask); write_lock(&lhb->lhb_rwlock); + LASSERT(!hlist_unhashed(hnode)); obj = __lustre_hash_bucket_del(lh, lhb, hnode); write_unlock(&lhb->lhb_rwlock); + lh_read_unlock(lh); - size = lustre_hash_rehash_size(lh); - read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); - RETURN(obj); } EXPORT_SYMBOL(lustre_hash_del); - + +/** + * Delete item from the lustre hash @lh when @func return true. + * The write lock being hold during loop for each bucket to avoid + * any object be reference. + */ +void +lustre_hash_cond_del(lustre_hash_t *lh, lh_cond_opt_cb func, void *data) +{ + lustre_hash_bucket_t *lhb; + struct hlist_node *hnode; + struct hlist_node *pos; + int i; + ENTRY; + + LASSERT(lh != NULL); + + lh_write_lock(lh); + lh_for_each_bucket(lh, lhb, i) { + if (lhb == NULL) + continue; + + write_lock(&lhb->lhb_rwlock); + hlist_for_each_safe(hnode, pos, &(lhb->lhb_head)) { + __lustre_hash_bucket_validate(lh, lhb, hnode); + if (func(lh_get(lh, hnode), data)) + __lustre_hash_bucket_del(lh, lhb, hnode); + (void)lh_put(lh, hnode); + } + write_unlock(&lhb->lhb_rwlock); + } + lh_write_unlock(lh); + + EXIT; +} +EXPORT_SYMBOL(lustre_hash_cond_del); + /** * Delete item given @key in lustre hash @lh. The first @key found in * the hash will be removed, if the key exists multiple times in the hash @@ -344,15 +408,14 @@ lustre_hash_del_key(lustre_hash_t *lh, void *key) { struct hlist_node *hnode; lustre_hash_bucket_t *lhb; - int size; unsigned i; void *obj = NULL; ENTRY; - - read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); - lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + + lh_read_lock(lh); + i = lh_hash(lh, key, lh->lh_cur_mask); + lhb = lh->lh_buckets[i]; + LASSERT(i <= lh->lh_cur_mask); write_lock(&lhb->lhb_rwlock); hnode = __lustre_hash_bucket_lookup(lh, lhb, key); @@ -360,16 +423,12 @@ lustre_hash_del_key(lustre_hash_t *lh, void *key) obj = __lustre_hash_bucket_del(lh, lhb, hnode); write_unlock(&lhb->lhb_rwlock); + lh_read_unlock(lh); - size = lustre_hash_rehash_size(lh); - read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); - RETURN(obj); } EXPORT_SYMBOL(lustre_hash_del_key); - + /** * Lookup an item using @key in the lustre hash @lh and return it. * If the @key is found in the hash lh->lh_get() is called and the @@ -386,24 +445,24 @@ lustre_hash_lookup(lustre_hash_t *lh, void *key) unsigned i; void *obj = NULL; ENTRY; - - read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); - lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + + lh_read_lock(lh); + i = lh_hash(lh, key, lh->lh_cur_mask); + lhb = lh->lh_buckets[i]; + LASSERT(i <= lh->lh_cur_mask); read_lock(&lhb->lhb_rwlock); hnode = __lustre_hash_bucket_lookup(lh, lhb, key); if (hnode) obj = lh_get(lh, hnode); - + read_unlock(&lhb->lhb_rwlock); - read_unlock(&lh->lh_rwlock); - + lh_read_unlock(lh); + RETURN(obj); } EXPORT_SYMBOL(lustre_hash_lookup); - + /** * For each item in the lustre hash @lh call the passed callback @func * and pass to it as an argument each hash item and the private @data. @@ -419,8 +478,8 @@ lustre_hash_for_each(lustre_hash_t *lh, lh_for_each_cb func, void *data) void *obj; int i; ENTRY; - - read_lock(&lh->lh_rwlock); + + lh_read_lock(lh); lh_for_each_bucket(lh, lhb, i) { read_lock(&lhb->lhb_rwlock); hlist_for_each(hnode, &(lhb->lhb_head)) { @@ -431,12 +490,12 @@ lustre_hash_for_each(lustre_hash_t *lh, lh_for_each_cb func, void *data) } read_unlock(&lhb->lhb_rwlock); } - read_unlock(&lh->lh_rwlock); + lh_read_unlock(lh); EXIT; } EXPORT_SYMBOL(lustre_hash_for_each); - + /** * For each item in the lustre hash @lh call the passed callback @func * and pass to it as an argument each hash item and the private @data. @@ -456,8 +515,8 @@ lustre_hash_for_each_safe(lustre_hash_t *lh, lh_for_each_cb func, void *data) void *obj; int i; ENTRY; - - read_lock(&lh->lh_rwlock); + + lh_read_lock(lh); lh_for_each_bucket(lh, lhb, i) { read_lock(&lhb->lhb_rwlock); hlist_for_each_safe(hnode, pos, &(lhb->lhb_head)) { @@ -470,11 +529,11 @@ lustre_hash_for_each_safe(lustre_hash_t *lh, lh_for_each_cb func, void *data) } read_unlock(&lhb->lhb_rwlock); } - read_unlock(&lh->lh_rwlock); + lh_read_unlock(lh); EXIT; } EXPORT_SYMBOL(lustre_hash_for_each_safe); - + /** * For each hash bucket in the lustre hash @lh call the passed callback * @func until all the hash buckets are empty. The passed callback @func @@ -491,32 +550,40 @@ lustre_hash_for_each_empty(lustre_hash_t *lh, lh_for_each_cb func, void *data) { struct hlist_node *hnode; lustre_hash_bucket_t *lhb; + lustre_hash_bucket_t **lhb_last = NULL; void *obj; - int i; + int i = 0; ENTRY; - + restart: - read_lock(&lh->lh_rwlock); - lh_for_each_bucket(lh, lhb, i) { + lh_read_lock(lh); + /* If the hash table has changed since we last held lh_rwlock, + * we need to start traversing the list from the start. */ + if (lh->lh_buckets != lhb_last) { + i = 0; + lhb_last = lh->lh_buckets; + } + lh_for_each_bucket_restart(lh, lhb, i) { write_lock(&lhb->lhb_rwlock); while (!hlist_empty(&lhb->lhb_head)) { hnode = lhb->lhb_head.first; __lustre_hash_bucket_validate(lh, lhb, hnode); obj = lh_get(lh, hnode); write_unlock(&lhb->lhb_rwlock); - read_unlock(&lh->lh_rwlock); + lh_read_unlock(lh); func(obj, data); (void)lh_put(lh, hnode); + cfs_cond_resched(); goto restart; } write_unlock(&lhb->lhb_rwlock); } - read_unlock(&lh->lh_rwlock); + lh_read_unlock(lh); EXIT; } EXPORT_SYMBOL(lustre_hash_for_each_empty); - - /* + +/* * For each item in the lustre hash @lh which matches the @key call * the passed callback @func and pass to it as an argument each hash * item and the private @data. Before each callback ops->lh_get will @@ -532,32 +599,32 @@ lustre_hash_for_each_key(lustre_hash_t *lh, void *key, lustre_hash_bucket_t *lhb; unsigned i; ENTRY; - - read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); - lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); - + + lh_read_lock(lh); + i = lh_hash(lh, key, lh->lh_cur_mask); + lhb = lh->lh_buckets[i]; + LASSERT(i <= lh->lh_cur_mask); + read_lock(&lhb->lhb_rwlock); hlist_for_each(hnode, &(lhb->lhb_head)) { __lustre_hash_bucket_validate(lh, lhb, hnode); - + if (!lh_compare(lh, key, hnode)) continue; - + func(lh_get(lh, hnode), data); (void)lh_put(lh, hnode); } - + read_unlock(&lhb->lhb_rwlock); - read_unlock(&lh->lh_rwlock); - + lh_read_unlock(lh); + EXIT; } EXPORT_SYMBOL(lustre_hash_for_each_key); - + /** - * Rehash the lustre hash @lh to the given @size. This can be used + * Rehash the lustre hash @lh to the given @bits. This can be used * to grow the hash size when excessive chaining is detected, or to * shrink the hash when it is larger than needed. When the LH_REHASH * flag is set in @lh the lustre hash may be dynamically rehashed @@ -568,93 +635,109 @@ EXPORT_SYMBOL(lustre_hash_for_each_key); * theta thresholds for @lh are tunable via lustre_hash_set_theta(). */ int -lustre_hash_rehash(lustre_hash_t *lh, int size) +lustre_hash_rehash(lustre_hash_t *lh, int bits) { struct hlist_node *hnode; struct hlist_node *pos; - lustre_hash_bucket_t *lh_buckets; - lustre_hash_bucket_t *rehash_buckets; + lustre_hash_bucket_t **lh_buckets; + lustre_hash_bucket_t **rehash_buckets; lustre_hash_bucket_t *lh_lhb; lustre_hash_bucket_t *rehash_lhb; int i; - int lh_size; int theta; + int lh_mask; + int lh_bits; + int mask = (1 << bits) - 1; + int rc = 0; void *key; ENTRY; - - LASSERT(size > 0); - - OBD_VMALLOC(rehash_buckets, sizeof(*rehash_buckets) * size); + + LASSERT(!in_interrupt()); + LASSERT(mask > 0); + LASSERT((lh->lh_flags & LH_REHASH) != 0); + + LIBCFS_ALLOC(rehash_buckets, sizeof(*rehash_buckets) << bits); if (!rehash_buckets) RETURN(-ENOMEM); - - for (i = 0; i < size; i++) { - INIT_HLIST_HEAD(&rehash_buckets[i].lhb_head); - rwlock_init(&rehash_buckets[i].lhb_rwlock); - atomic_set(&rehash_buckets[i].lhb_count, 0); + + for (i = 0; i <= mask; i++) { + LIBCFS_ALLOC(rehash_buckets[i], sizeof(*rehash_buckets[i])); + if (rehash_buckets[i] == NULL) + GOTO(free, rc = -ENOMEM); + + INIT_HLIST_HEAD(&rehash_buckets[i]->lhb_head); + rwlock_init(&rehash_buckets[i]->lhb_rwlock); + atomic_set(&rehash_buckets[i]->lhb_count, 0); } - - write_lock(&lh->lh_rwlock); - /* + lh_write_lock(lh); + + /* * Early return for multiple concurrent racing callers, - * ensure we only trigger the rehash if it is still needed. + * ensure we only trigger the rehash if it is still needed. */ theta = __lustre_hash_theta(lh); if ((theta >= lh->lh_min_theta) && (theta <= lh->lh_max_theta)) { - OBD_VFREE(rehash_buckets, sizeof(*rehash_buckets) * size); - write_unlock(&lh->lh_rwlock); - RETURN(-EALREADY); + lh_write_unlock(lh); + GOTO(free, rc = -EALREADY); } - - lh_size = lh->lh_cur_size; + + lh_bits = lh->lh_cur_bits; lh_buckets = lh->lh_buckets; - - lh->lh_cur_size = size; + lh_mask = (1 << lh_bits) - 1; + + lh->lh_cur_bits = bits; + lh->lh_cur_mask = (1 << bits) - 1; lh->lh_buckets = rehash_buckets; atomic_inc(&lh->lh_rehash_count); - for (i = 0; i < lh_size; i++) { - lh_lhb = &lh_buckets[i]; + for (i = 0; i <= lh_mask; i++) { + lh_lhb = lh_buckets[i]; write_lock(&lh_lhb->lhb_rwlock); hlist_for_each_safe(hnode, pos, &(lh_lhb->lhb_head)) { key = lh_key(lh, hnode); LASSERT(key); - /* + /* * Validate hnode is in the correct bucket. */ if (unlikely(lh->lh_flags & LH_DEBUG)) - LASSERT(lh_hash(lh, key, lh_size - 1) == i); + LASSERT(lh_hash(lh, key, lh_mask) == i); - /* + /* * Delete from old hash bucket. */ hlist_del(hnode); LASSERT(atomic_read(&lh_lhb->lhb_count) > 0); atomic_dec(&lh_lhb->lhb_count); - /* - * Add to rehash bucket, ops->lh_key must be defined. + /* + * Add to rehash bucket, ops->lh_key must be defined. */ - rehash_lhb = &rehash_buckets[lh_hash(lh, key, size-1)]; + rehash_lhb = rehash_buckets[lh_hash(lh, key, mask)]; hlist_add_head(hnode, &(rehash_lhb->lhb_head)); atomic_inc(&rehash_lhb->lhb_count); } - + LASSERT(hlist_empty(&(lh_lhb->lhb_head))); LASSERT(atomic_read(&lh_lhb->lhb_count) == 0); write_unlock(&lh_lhb->lhb_rwlock); } - - OBD_VFREE(lh_buckets, sizeof(*lh_buckets) * lh_size); - write_unlock(&lh->lh_rwlock); - - RETURN(0); + + lh_write_unlock(lh); + rehash_buckets = lh_buckets; + i = (1 << lh_bits); + bits = lh_bits; +free: + while (i-- > 0) + LIBCFS_FREE(rehash_buckets[i], sizeof(*rehash_buckets[i])); + LIBCFS_FREE(rehash_buckets, sizeof(*rehash_buckets) << bits); + + RETURN(rc); } EXPORT_SYMBOL(lustre_hash_rehash); - + /** * Rehash the object referenced by @hnode in the lustre hash @lh. The * @old_key must be provided to locate the objects previous location @@ -670,28 +753,37 @@ void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, void *new_key, lustre_hash_bucket_t *old_lhb; lustre_hash_bucket_t *new_lhb; unsigned i; - int j; + unsigned j; ENTRY; - + __lustre_hash_key_validate(lh, new_key, hnode); LASSERT(!hlist_unhashed(hnode)); - - read_lock(&lh->lh_rwlock); - - i = lh_hash(lh, old_key, lh->lh_cur_size - 1); - old_lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); - - j = lh_hash(lh, new_key, lh->lh_cur_size - 1); - new_lhb = &lh->lh_buckets[j]; - LASSERT(j < lh->lh_cur_size); - write_lock(&old_lhb->lhb_rwlock); - write_lock(&new_lhb->lhb_rwlock); + lh_read_lock(lh); + + i = lh_hash(lh, old_key, lh->lh_cur_mask); + old_lhb = lh->lh_buckets[i]; + LASSERT(i <= lh->lh_cur_mask); + + j = lh_hash(lh, new_key, lh->lh_cur_mask); + new_lhb = lh->lh_buckets[j]; + LASSERT(j <= lh->lh_cur_mask); + + if (i < j) { /* write_lock ordering */ + write_lock(&old_lhb->lhb_rwlock); + write_lock(&new_lhb->lhb_rwlock); + } else if (i > j) { + write_lock(&new_lhb->lhb_rwlock); + write_lock(&old_lhb->lhb_rwlock); + } else { /* do nothing */ + lh_read_unlock(lh); + EXIT; + return; + } - /* + /* * Migrate item between hash buckets without calling - * the lh_get() and lh_put() callback functions. + * the lh_get() and lh_put() callback functions. */ hlist_del(hnode); LASSERT(atomic_read(&old_lhb->lhb_count) > 0); @@ -701,16 +793,16 @@ void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, void *new_key, write_unlock(&new_lhb->lhb_rwlock); write_unlock(&old_lhb->lhb_rwlock); - read_unlock(&lh->lh_rwlock); - + lh_read_unlock(lh); + EXIT; } EXPORT_SYMBOL(lustre_hash_rehash_key); - + int lustre_hash_debug_header(char *str, int size) { return snprintf(str, size, - "%-36s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", + "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", LUSTRE_MAX_HASH_NAME, "name", "cur", "min", "max", "theta", "t-min", "t-max", "flags", "rehash", "count", " distribution"); } @@ -727,26 +819,30 @@ int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size) if (str == NULL || size == 0) return 0; - read_lock(&lh->lh_rwlock); + lh_read_lock(lh); theta = __lustre_hash_theta(lh); - c += snprintf(str + c, size - c, "%-36s ",lh->lh_name); - c += snprintf(str + c, size - c, "%5d ", lh->lh_cur_size); - c += snprintf(str + c, size - c, "%5d ", lh->lh_min_size); - c += snprintf(str + c, size - c, "%5d ", lh->lh_max_size); + c += snprintf(str + c, size - c, "%-*s ", + LUSTRE_MAX_HASH_NAME, lh->lh_name); + c += snprintf(str + c, size - c, "%5d ", 1 << lh->lh_cur_bits); + c += snprintf(str + c, size - c, "%5d ", 1 << lh->lh_min_bits); + c += snprintf(str + c, size - c, "%5d ", 1 << lh->lh_max_bits); c += snprintf(str + c, size - c, "%d.%03d ", - theta / 1000, theta % 1000); + __lustre_hash_theta_int(theta), + __lustre_hash_theta_frac(theta)); c += snprintf(str + c, size - c, "%d.%03d ", - lh->lh_min_theta / 1000, lh->lh_min_theta % 1000); + __lustre_hash_theta_int(lh->lh_min_theta), + __lustre_hash_theta_frac(lh->lh_min_theta)); c += snprintf(str + c, size - c, "%d.%03d ", - lh->lh_max_theta / 1000, lh->lh_max_theta % 1000); + __lustre_hash_theta_int(lh->lh_max_theta), + __lustre_hash_theta_frac(lh->lh_max_theta)); c += snprintf(str + c, size - c, " 0x%02x ", lh->lh_flags); c += snprintf(str + c, size - c, "%6d ", atomic_read(&lh->lh_rehash_count)); c += snprintf(str + c, size - c, "%5d ", atomic_read(&lh->lh_count)); - /* + /* * The distribution is a summary of the chained hash depth in * each of the lustre hash buckets. Each buckets lhb_count is * divided by the hash theta value and used to generate a @@ -760,14 +856,14 @@ int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size) * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1 */ lh_for_each_bucket(lh, lhb, i) - dist[MIN(fls(atomic_read(&lhb->lhb_count)/MAX(theta,1)),7)]++; + dist[min(__fls(atomic_read(&lhb->lhb_count)/max(theta,1)),7)]++; for (i = 0; i < 8; i++) c += snprintf(str + c, size - c, "%d%c", dist[i], (i == 7) ? '\n' : '/'); - - read_unlock(&lh->lh_rwlock); - + + lh_read_unlock(lh); + return c; } EXPORT_SYMBOL(lustre_hash_debug_str);