X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fclass_hash.c;h=5370a4e678ac1a5fd40f084eaeb505d77a5eb0c4;hb=a928591d58b5d0dbbcc9a7f534dca2b6df22da9e;hp=55b41dfe46f2ca6a62704d1402c828a42e98096e;hpb=f7b91f5aa2f6f07c6ee915869b30e7d0e5a35632;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/class_hash.c b/lustre/obdclass/class_hash.c index 55b41df..5370a4e 100644 --- a/lustre/obdclass/class_hash.c +++ b/lustre/obdclass/class_hash.c @@ -58,14 +58,14 @@ /** * Initialize new lustre hash, where: * @name - Descriptive hash name - * @cur_size - Initial hash table size - * @max_size - Maximum allowed hash table resize + * @cur_bits - Initial hash table size, in bits + * @max_bits - Maximum allowed hash table resize, in bits * @ops - Registered hash table operations * @flags - LH_REHASH enable synamic hash resizing * - LH_SORT enable chained hash sort */ lustre_hash_t * -lustre_hash_init(char *name, unsigned int cur_size, unsigned int max_size, +lustre_hash_init(char *name, unsigned int cur_bits, unsigned int max_bits, lustre_hash_ops_t *ops, int flags) { lustre_hash_t *lh; @@ -75,38 +75,40 @@ lustre_hash_init(char *name, unsigned int cur_size, unsigned int max_size, LASSERT(name != NULL); LASSERT(ops != NULL); - /* - * Ensure hash is a power of two to allow the use of a bitmask - * in the hash function instead of a more expensive modulus. - */ - LASSERTF(cur_size && (cur_size & (cur_size - 1)) == 0, - "Size (%u) is not power of 2\n", cur_size); - LASSERTF(max_size && (max_size & (max_size - 1)) == 0, - "Size (%u) is not power of 2\n", max_size); + LASSERT(cur_bits > 0); + LASSERT(max_bits >= cur_bits); + LASSERT(max_bits < 31); OBD_ALLOC_PTR(lh); if (!lh) RETURN(NULL); strncpy(lh->lh_name, name, sizeof(lh->lh_name)); + lh->lh_name[sizeof(lh->lh_name) - 1] = '\0'; atomic_set(&lh->lh_rehash_count, 0); atomic_set(&lh->lh_count, 0); rwlock_init(&lh->lh_rwlock); - lh->lh_cur_size = cur_size; - lh->lh_min_size = cur_size; - lh->lh_max_size = max_size; - lh->lh_min_theta = 500; /* theta * 1000 */ - lh->lh_max_theta = 2000; /* theta * 1000 */ + lh->lh_cur_bits = cur_bits; + lh->lh_cur_mask = (1 << cur_bits) - 1; + lh->lh_min_bits = cur_bits; + lh->lh_max_bits = max_bits; + /* XXX: need to fixup lustre_hash_rehash_bits() before this can be + * anything other than 0.5 and 2.0 */ + lh->lh_min_theta = 1 << (LH_THETA_BITS - 1); + lh->lh_max_theta = 1 << (LH_THETA_BITS + 1); lh->lh_ops = ops; lh->lh_flags = flags; - OBD_VMALLOC(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size); + /* theta * 1000 */ + __lustre_hash_set_theta(lh, 500, 2000); + + OBD_VMALLOC(lh->lh_buckets, sizeof(*lh->lh_buckets) << lh->lh_cur_bits); if (!lh->lh_buckets) { OBD_FREE_PTR(lh); RETURN(NULL); } - for (i = 0; i < lh->lh_cur_size; i++) { + for (i = 0; i <= lh->lh_cur_mask; i++) { INIT_HLIST_HEAD(&lh->lh_buckets[i].lhb_head); rwlock_init(&lh->lh_buckets[i].lhb_rwlock); atomic_set(&lh->lh_buckets[i].lhb_count, 0); @@ -128,8 +130,7 @@ lustre_hash_exit(lustre_hash_t *lh) int i; ENTRY; - if (!lh) - return; + LASSERT(lh != NULL); write_lock(&lh->lh_rwlock); @@ -146,27 +147,29 @@ lustre_hash_exit(lustre_hash_t *lh) write_unlock(&lhb->lhb_rwlock); } - OBD_VFREE(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size); LASSERT(atomic_read(&lh->lh_count) == 0); write_unlock(&lh->lh_rwlock); + OBD_VFREE(lh->lh_buckets, sizeof(*lh->lh_buckets) << lh->lh_cur_bits); OBD_FREE_PTR(lh); EXIT; } EXPORT_SYMBOL(lustre_hash_exit); -static inline unsigned int lustre_hash_rehash_size(lustre_hash_t *lh) +static inline unsigned int lustre_hash_rehash_bits(lustre_hash_t *lh) { if (!(lh->lh_flags & LH_REHASH)) return 0; - if ((lh->lh_cur_size < lh->lh_max_size) && + /* XXX: need to handle case with max_theta != 2.0 + * and the case with min_theta != 0.5 */ + if ((lh->lh_cur_bits < lh->lh_max_bits) && (__lustre_hash_theta(lh) > lh->lh_max_theta)) - return MIN(lh->lh_cur_size * 2, lh->lh_max_size); + return lh->lh_cur_bits + 1; - if ((lh->lh_cur_size > lh->lh_min_size) && + if ((lh->lh_cur_bits > lh->lh_min_bits) && (__lustre_hash_theta(lh) < lh->lh_min_theta)) - return MAX(lh->lh_cur_size / 2, lh->lh_min_size); + return lh->lh_cur_bits - 1; return 0; } @@ -179,26 +182,26 @@ void lustre_hash_add(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { lustre_hash_bucket_t *lhb; - int size; + int bits; unsigned i; ENTRY; __lustre_hash_key_validate(lh, key, hnode); read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); + i = lh_hash(lh, key, lh->lh_cur_mask); lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + LASSERT(i <= lh->lh_cur_mask); LASSERT(hlist_unhashed(hnode)); write_lock(&lhb->lhb_rwlock); __lustre_hash_bucket_add(lh, lhb, hnode); write_unlock(&lhb->lhb_rwlock); - size = lustre_hash_rehash_size(lh); + bits = lustre_hash_rehash_bits(lh); read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); + if (bits) + lustre_hash_rehash(lh, bits); EXIT; } @@ -208,18 +211,18 @@ static struct hlist_node * lustre_hash_findadd_unique_hnode(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { + int bits = 0; struct hlist_node *ehnode; lustre_hash_bucket_t *lhb; - int size; unsigned i; ENTRY; __lustre_hash_key_validate(lh, key, hnode); read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); + i = lh_hash(lh, key, lh->lh_cur_mask); lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + LASSERT(i <= lh->lh_cur_mask); LASSERT(hlist_unhashed(hnode)); write_lock(&lhb->lhb_rwlock); @@ -229,13 +232,12 @@ lustre_hash_findadd_unique_hnode(lustre_hash_t *lh, void *key, } else { __lustre_hash_bucket_add(lh, lhb, hnode); ehnode = hnode; + bits = lustre_hash_rehash_bits(lh); } write_unlock(&lhb->lhb_rwlock); - - size = lustre_hash_rehash_size(lh); read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); + if (bits) + lustre_hash_rehash(lh, bits); RETURN(ehnode); } @@ -252,9 +254,10 @@ lustre_hash_add_unique(lustre_hash_t *lh, void *key, struct hlist_node *hnode) ENTRY; ehnode = lustre_hash_findadd_unique_hnode(lh, key, hnode); - if (ehnode != hnode) + if (ehnode != hnode) { + lh_put(lh, ehnode); RETURN(-EALREADY); - + } RETURN(0); } EXPORT_SYMBOL(lustre_hash_add_unique); @@ -291,7 +294,6 @@ void * lustre_hash_del(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { lustre_hash_bucket_t *lhb; - int size; unsigned i; void *obj; ENTRY; @@ -299,19 +301,15 @@ lustre_hash_del(lustre_hash_t *lh, void *key, struct hlist_node *hnode) __lustre_hash_key_validate(lh, key, hnode); read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); + i = lh_hash(lh, key, lh->lh_cur_mask); lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + LASSERT(i <= lh->lh_cur_mask); LASSERT(!hlist_unhashed(hnode)); write_lock(&lhb->lhb_rwlock); obj = __lustre_hash_bucket_del(lh, lhb, hnode); write_unlock(&lhb->lhb_rwlock); - - size = lustre_hash_rehash_size(lh); read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); RETURN(obj); } @@ -328,15 +326,14 @@ lustre_hash_del_key(lustre_hash_t *lh, void *key) { struct hlist_node *hnode; lustre_hash_bucket_t *lhb; - int size; unsigned i; void *obj = NULL; ENTRY; read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); + i = lh_hash(lh, key, lh->lh_cur_mask); lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + LASSERT(i <= lh->lh_cur_mask); write_lock(&lhb->lhb_rwlock); hnode = __lustre_hash_bucket_lookup(lh, lhb, key); @@ -344,11 +341,7 @@ lustre_hash_del_key(lustre_hash_t *lh, void *key) obj = __lustre_hash_bucket_del(lh, lhb, hnode); write_unlock(&lhb->lhb_rwlock); - - size = lustre_hash_rehash_size(lh); read_unlock(&lh->lh_rwlock); - if (size) - lustre_hash_rehash(lh, size); RETURN(obj); } @@ -372,9 +365,9 @@ lustre_hash_lookup(lustre_hash_t *lh, void *key) ENTRY; read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); + i = lh_hash(lh, key, lh->lh_cur_mask); lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + LASSERT(i <= lh->lh_cur_mask); read_lock(&lhb->lhb_rwlock); hnode = __lustre_hash_bucket_lookup(lh, lhb, key); @@ -500,7 +493,7 @@ restart: } EXPORT_SYMBOL(lustre_hash_for_each_empty); - /* +/* * For each item in the lustre hash @lh which matches the @key call * the passed callback @func and pass to it as an argument each hash * item and the private @data. Before each callback ops->lh_get will @@ -518,9 +511,9 @@ lustre_hash_for_each_key(lustre_hash_t *lh, void *key, ENTRY; read_lock(&lh->lh_rwlock); - i = lh_hash(lh, key, lh->lh_cur_size - 1); + i = lh_hash(lh, key, lh->lh_cur_mask); lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + LASSERT(i <= lh->lh_cur_mask); read_lock(&lhb->lhb_rwlock); hlist_for_each(hnode, &(lhb->lhb_head)) { @@ -541,7 +534,7 @@ lustre_hash_for_each_key(lustre_hash_t *lh, void *key, EXPORT_SYMBOL(lustre_hash_for_each_key); /** - * Rehash the lustre hash @lh to the given @size. This can be used + * Rehash the lustre hash @lh to the given @bits. This can be used * to grow the hash size when excessive chaining is detected, or to * shrink the hash when it is larger than needed. When the LH_REHASH * flag is set in @lh the lustre hash may be dynamically rehashed @@ -552,7 +545,7 @@ EXPORT_SYMBOL(lustre_hash_for_each_key); * theta thresholds for @lh are tunable via lustre_hash_set_theta(). */ int -lustre_hash_rehash(lustre_hash_t *lh, int size) +lustre_hash_rehash(lustre_hash_t *lh, int bits) { struct hlist_node *hnode; struct hlist_node *pos; @@ -561,18 +554,21 @@ lustre_hash_rehash(lustre_hash_t *lh, int size) lustre_hash_bucket_t *lh_lhb; lustre_hash_bucket_t *rehash_lhb; int i; - int lh_size; int theta; + int lh_mask; + int lh_bits; + int mask = (1 << bits) - 1; void *key; ENTRY; - LASSERT(size > 0); + LASSERT(!in_interrupt()); + LASSERT(mask > 0); - OBD_VMALLOC(rehash_buckets, sizeof(*rehash_buckets) * size); + OBD_VMALLOC(rehash_buckets, sizeof(*rehash_buckets) << bits); if (!rehash_buckets) RETURN(-ENOMEM); - for (i = 0; i < size; i++) { + for (i = 0; i <= mask; i++) { INIT_HLIST_HEAD(&rehash_buckets[i].lhb_head); rwlock_init(&rehash_buckets[i].lhb_rwlock); atomic_set(&rehash_buckets[i].lhb_count, 0); @@ -586,19 +582,21 @@ lustre_hash_rehash(lustre_hash_t *lh, int size) */ theta = __lustre_hash_theta(lh); if ((theta >= lh->lh_min_theta) && (theta <= lh->lh_max_theta)) { - OBD_VFREE(rehash_buckets, sizeof(*rehash_buckets) * size); + OBD_VFREE(rehash_buckets, sizeof(*rehash_buckets) << bits); write_unlock(&lh->lh_rwlock); RETURN(-EALREADY); } - lh_size = lh->lh_cur_size; + lh_bits = lh->lh_cur_bits; lh_buckets = lh->lh_buckets; + lh_mask = (1 << lh_bits) - 1; - lh->lh_cur_size = size; + lh->lh_cur_bits = bits; + lh->lh_cur_mask = (1 << bits) - 1; lh->lh_buckets = rehash_buckets; atomic_inc(&lh->lh_rehash_count); - for (i = 0; i < lh_size; i++) { + for (i = 0; i <= lh_mask; i++) { lh_lhb = &lh_buckets[i]; write_lock(&lh_lhb->lhb_rwlock); @@ -610,7 +608,7 @@ lustre_hash_rehash(lustre_hash_t *lh, int size) * Validate hnode is in the correct bucket. */ if (unlikely(lh->lh_flags & LH_DEBUG)) - LASSERT(lh_hash(lh, key, lh_size - 1) == i); + LASSERT(lh_hash(lh, key, lh_mask) == i); /* * Delete from old hash bucket. @@ -622,7 +620,7 @@ lustre_hash_rehash(lustre_hash_t *lh, int size) /* * Add to rehash bucket, ops->lh_key must be defined. */ - rehash_lhb = &rehash_buckets[lh_hash(lh, key, size-1)]; + rehash_lhb = &rehash_buckets[lh_hash(lh, key, mask)]; hlist_add_head(hnode, &(rehash_lhb->lhb_head)); atomic_inc(&rehash_lhb->lhb_count); } @@ -632,7 +630,7 @@ lustre_hash_rehash(lustre_hash_t *lh, int size) write_unlock(&lh_lhb->lhb_rwlock); } - OBD_VFREE(lh_buckets, sizeof(*lh_buckets) * lh_size); + OBD_VFREE(lh_buckets, sizeof(*lh_buckets) << lh_bits); write_unlock(&lh->lh_rwlock); RETURN(0); @@ -654,7 +652,7 @@ void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, void *new_key, lustre_hash_bucket_t *old_lhb; lustre_hash_bucket_t *new_lhb; unsigned i; - int j; + unsigned j; ENTRY; __lustre_hash_key_validate(lh, new_key, hnode); @@ -662,16 +660,25 @@ void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, void *new_key, read_lock(&lh->lh_rwlock); - i = lh_hash(lh, old_key, lh->lh_cur_size - 1); + i = lh_hash(lh, old_key, lh->lh_cur_mask); old_lhb = &lh->lh_buckets[i]; - LASSERT(i < lh->lh_cur_size); + LASSERT(i <= lh->lh_cur_mask); - j = lh_hash(lh, new_key, lh->lh_cur_size - 1); + j = lh_hash(lh, new_key, lh->lh_cur_mask); new_lhb = &lh->lh_buckets[j]; - LASSERT(j < lh->lh_cur_size); - - write_lock(&old_lhb->lhb_rwlock); - write_lock(&new_lhb->lhb_rwlock); + LASSERT(j <= lh->lh_cur_mask); + + if (i < j) { /* write_lock ordering */ + write_lock(&old_lhb->lhb_rwlock); + write_lock(&new_lhb->lhb_rwlock); + } else if (i > j) { + write_lock(&new_lhb->lhb_rwlock); + write_lock(&old_lhb->lhb_rwlock); + } else { /* do nothing */ + read_unlock(&lh->lh_rwlock); + EXIT; + return; + } /* * Migrate item between hash buckets without calling @@ -694,7 +701,7 @@ EXPORT_SYMBOL(lustre_hash_rehash_key); int lustre_hash_debug_header(char *str, int size) { return snprintf(str, size, - "%-36s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", + "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", LUSTRE_MAX_HASH_NAME, "name", "cur", "min", "max", "theta", "t-min", "t-max", "flags", "rehash", "count", " distribution"); } @@ -714,16 +721,20 @@ int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size) read_lock(&lh->lh_rwlock); theta = __lustre_hash_theta(lh); - c += snprintf(str + c, size - c, "%-36s ",lh->lh_name); - c += snprintf(str + c, size - c, "%5d ", lh->lh_cur_size); - c += snprintf(str + c, size - c, "%5d ", lh->lh_min_size); - c += snprintf(str + c, size - c, "%5d ", lh->lh_max_size); + c += snprintf(str + c, size - c, "%-*s ", + LUSTRE_MAX_HASH_NAME, lh->lh_name); + c += snprintf(str + c, size - c, "%5d ", 1 << lh->lh_cur_bits); + c += snprintf(str + c, size - c, "%5d ", 1 << lh->lh_min_bits); + c += snprintf(str + c, size - c, "%5d ", 1 << lh->lh_max_bits); c += snprintf(str + c, size - c, "%d.%03d ", - theta / 1000, theta % 1000); + __lustre_hash_theta_int(theta), + __lustre_hash_theta_frac(theta)); c += snprintf(str + c, size - c, "%d.%03d ", - lh->lh_min_theta / 1000, lh->lh_min_theta % 1000); + __lustre_hash_theta_int(lh->lh_min_theta), + __lustre_hash_theta_frac(lh->lh_min_theta)); c += snprintf(str + c, size - c, "%d.%03d ", - lh->lh_max_theta / 1000, lh->lh_max_theta % 1000); + __lustre_hash_theta_int(lh->lh_max_theta), + __lustre_hash_theta_frac(lh->lh_max_theta)); c += snprintf(str + c, size - c, " 0x%02x ", lh->lh_flags); c += snprintf(str + c, size - c, "%6d ", atomic_read(&lh->lh_rehash_count)); @@ -744,7 +755,7 @@ int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size) * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1 */ lh_for_each_bucket(lh, lhb, i) - dist[MIN(__fls(atomic_read(&lhb->lhb_count)/MAX(theta,1)),7)]++; + dist[min(__fls(atomic_read(&lhb->lhb_count)/max(theta,1)),7)]++; for (i = 0; i < 8; i++) c += snprintf(str + c, size - c, "%d%c", dist[i],