Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / obdclass / class_hash.c
index ee90aaf..5370a4e 100644 (file)
 /**
  * Initialize new lustre hash, where:
  * @name     - Descriptive hash name
- * @cur_size - Initial hash table size
- * @max_size - Maximum allowed hash table resize
+ * @cur_bits - Initial hash table size, in bits
+ * @max_bits - Maximum allowed hash table resize, in bits
  * @ops      - Registered hash table operations
  * @flags    - LH_REHASH enable synamic hash resizing
  *           - LH_SORT enable chained hash sort
  */
 lustre_hash_t *
-lustre_hash_init(char *name, unsigned int cur_size, unsigned int max_size,
+lustre_hash_init(char *name, unsigned int cur_bits, unsigned int max_bits,
                  lustre_hash_ops_t *ops, int flags)
 {
         lustre_hash_t *lh;
@@ -75,39 +75,40 @@ lustre_hash_init(char *name, unsigned int cur_size, unsigned int max_size,
         LASSERT(name != NULL);
         LASSERT(ops != NULL);
 
-        /*
-         * Ensure hash is a power of two to allow the use of a bitmask
-         * in the hash function instead of a more expensive modulus.
-         */
-        LASSERTF(cur_size && (cur_size & (cur_size - 1)) == 0,
-                 "Size (%u) is not power of 2\n", cur_size);
-        LASSERTF(max_size && (max_size & (max_size - 1)) == 0,
-                 "Size (%u) is not power of 2\n", max_size);
+        LASSERT(cur_bits > 0);
+        LASSERT(max_bits >= cur_bits);
+        LASSERT(max_bits < 31);
 
         OBD_ALLOC_PTR(lh);
         if (!lh)
                 RETURN(NULL);
 
         strncpy(lh->lh_name, name, sizeof(lh->lh_name));
+        lh->lh_name[sizeof(lh->lh_name) - 1] = '\0';
         atomic_set(&lh->lh_rehash_count, 0);
         atomic_set(&lh->lh_count, 0);
         rwlock_init(&lh->lh_rwlock);
-        lh->lh_cur_size = cur_size;
-        lh->lh_min_size = cur_size;
-        lh->lh_max_size = max_size;
+        lh->lh_cur_bits = cur_bits;
+        lh->lh_cur_mask = (1 << cur_bits) - 1;
+        lh->lh_min_bits = cur_bits;
+        lh->lh_max_bits = max_bits;
+        /* XXX: need to fixup lustre_hash_rehash_bits() before this can be
+         *      anything other than 0.5 and 2.0 */
+        lh->lh_min_theta = 1 << (LH_THETA_BITS - 1);
+        lh->lh_max_theta = 1 << (LH_THETA_BITS + 1);
         lh->lh_ops = ops;
         lh->lh_flags = flags;
 
         /* theta * 1000 */
         __lustre_hash_set_theta(lh, 500, 2000);
 
-        OBD_VMALLOC(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size);
+        OBD_VMALLOC(lh->lh_buckets, sizeof(*lh->lh_buckets) << lh->lh_cur_bits);
         if (!lh->lh_buckets) {
                 OBD_FREE_PTR(lh);
                 RETURN(NULL);
         }
 
-        for (i = 0; i < lh->lh_cur_size; i++) {
+        for (i = 0; i <= lh->lh_cur_mask; i++) {
                 INIT_HLIST_HEAD(&lh->lh_buckets[i].lhb_head);
                 rwlock_init(&lh->lh_buckets[i].lhb_rwlock);
                 atomic_set(&lh->lh_buckets[i].lhb_count, 0);
@@ -146,35 +147,31 @@ lustre_hash_exit(lustre_hash_t *lh)
                 write_unlock(&lhb->lhb_rwlock);
         }
 
-        OBD_VFREE(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size);
         LASSERT(atomic_read(&lh->lh_count) == 0);
         write_unlock(&lh->lh_rwlock);
 
+        OBD_VFREE(lh->lh_buckets, sizeof(*lh->lh_buckets) << lh->lh_cur_bits);
         OBD_FREE_PTR(lh);
         EXIT;
 }
 EXPORT_SYMBOL(lustre_hash_exit);
 
-static inline unsigned int lustre_hash_rehash_size(lustre_hash_t *lh)
+static inline unsigned int lustre_hash_rehash_bits(lustre_hash_t *lh)
 {
-        int size;
-
         if (!(lh->lh_flags & LH_REHASH))
                 return 0;
 
-        if ((lh->lh_cur_size < lh->lh_max_size) &&
+        /* XXX: need to handle case with max_theta != 2.0
+         *      and the case with min_theta != 0.5 */
+        if ((lh->lh_cur_bits < lh->lh_max_bits) &&
             (__lustre_hash_theta(lh) > lh->lh_max_theta))
-                size = min(lh->lh_cur_size * 2, lh->lh_max_size);
-        else if ((lh->lh_cur_size > lh->lh_min_size) &&
-            (__lustre_hash_theta(lh) < lh->lh_min_theta))
-                size = max(lh->lh_cur_size / 2, lh->lh_min_size);
-        else
-                size = 0;
+                return lh->lh_cur_bits + 1;
 
-        if (lh->lh_cur_size == size)
-                size = 0;
+        if ((lh->lh_cur_bits > lh->lh_min_bits) &&
+            (__lustre_hash_theta(lh) < lh->lh_min_theta))
+                return lh->lh_cur_bits - 1;
 
-        return size;
+        return 0;
 }
 
 /**
@@ -185,26 +182,26 @@ void
 lustre_hash_add(lustre_hash_t *lh, void *key, struct hlist_node *hnode)
 {
         lustre_hash_bucket_t *lhb;
-        int                   size;
+        int                   bits;
         unsigned              i;
         ENTRY;
 
         __lustre_hash_key_validate(lh, key, hnode);
 
         read_lock(&lh->lh_rwlock);
-        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        i = lh_hash(lh, key, lh->lh_cur_mask);
         lhb = &lh->lh_buckets[i];
-        LASSERT(i < lh->lh_cur_size);
+        LASSERT(i <= lh->lh_cur_mask);
         LASSERT(hlist_unhashed(hnode));
 
         write_lock(&lhb->lhb_rwlock);
         __lustre_hash_bucket_add(lh, lhb, hnode);
         write_unlock(&lhb->lhb_rwlock);
 
-        size = lustre_hash_rehash_size(lh);
+        bits = lustre_hash_rehash_bits(lh);
         read_unlock(&lh->lh_rwlock);
-        if (size)
-                lustre_hash_rehash(lh, size);
+        if (bits)
+                lustre_hash_rehash(lh, bits);
 
         EXIT;
 }
@@ -214,7 +211,7 @@ static struct hlist_node *
 lustre_hash_findadd_unique_hnode(lustre_hash_t *lh, void *key,
                                  struct hlist_node *hnode)
 {
-        int                   size = 0;
+        int                   bits = 0;
         struct hlist_node    *ehnode;
         lustre_hash_bucket_t *lhb;
         unsigned              i;
@@ -223,9 +220,9 @@ lustre_hash_findadd_unique_hnode(lustre_hash_t *lh, void *key,
         __lustre_hash_key_validate(lh, key, hnode);
 
         read_lock(&lh->lh_rwlock);
-        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        i = lh_hash(lh, key, lh->lh_cur_mask);
         lhb = &lh->lh_buckets[i];
-        LASSERT(i < lh->lh_cur_size);
+        LASSERT(i <= lh->lh_cur_mask);
         LASSERT(hlist_unhashed(hnode));
 
         write_lock(&lhb->lhb_rwlock);
@@ -235,12 +232,12 @@ lustre_hash_findadd_unique_hnode(lustre_hash_t *lh, void *key,
         } else {
                 __lustre_hash_bucket_add(lh, lhb, hnode);
                 ehnode = hnode;
-                size = lustre_hash_rehash_size(lh);
+                bits = lustre_hash_rehash_bits(lh);
         }
         write_unlock(&lhb->lhb_rwlock);
         read_unlock(&lh->lh_rwlock);
-        if (size)
-                lustre_hash_rehash(lh, size);
+        if (bits)
+                lustre_hash_rehash(lh, bits);
 
         RETURN(ehnode);
 }
@@ -261,7 +258,6 @@ lustre_hash_add_unique(lustre_hash_t *lh, void *key, struct hlist_node *hnode)
                 lh_put(lh, ehnode);
                 RETURN(-EALREADY);
         }
-
         RETURN(0);
 }
 EXPORT_SYMBOL(lustre_hash_add_unique);
@@ -305,9 +301,9 @@ lustre_hash_del(lustre_hash_t *lh, void *key, struct hlist_node *hnode)
         __lustre_hash_key_validate(lh, key, hnode);
 
         read_lock(&lh->lh_rwlock);
-        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        i = lh_hash(lh, key, lh->lh_cur_mask);
         lhb = &lh->lh_buckets[i];
-        LASSERT(i < lh->lh_cur_size);
+        LASSERT(i <= lh->lh_cur_mask);
         LASSERT(!hlist_unhashed(hnode));
 
         write_lock(&lhb->lhb_rwlock);
@@ -335,9 +331,9 @@ lustre_hash_del_key(lustre_hash_t *lh, void *key)
         ENTRY;
 
         read_lock(&lh->lh_rwlock);
-        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        i = lh_hash(lh, key, lh->lh_cur_mask);
         lhb = &lh->lh_buckets[i];
-        LASSERT(i < lh->lh_cur_size);
+        LASSERT(i <= lh->lh_cur_mask);
 
         write_lock(&lhb->lhb_rwlock);
         hnode = __lustre_hash_bucket_lookup(lh, lhb, key);
@@ -369,9 +365,9 @@ lustre_hash_lookup(lustre_hash_t *lh, void *key)
         ENTRY;
 
         read_lock(&lh->lh_rwlock);
-        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        i = lh_hash(lh, key, lh->lh_cur_mask);
         lhb = &lh->lh_buckets[i];
-        LASSERT(i < lh->lh_cur_size);
+        LASSERT(i <= lh->lh_cur_mask);
 
         read_lock(&lhb->lhb_rwlock);
         hnode = __lustre_hash_bucket_lookup(lh, lhb, key);
@@ -497,7 +493,7 @@ restart:
 }
 EXPORT_SYMBOL(lustre_hash_for_each_empty);
 
-/**
+/*
  * For each item in the lustre hash @lh which matches the @key call
  * the passed callback @func and pass to it as an argument each hash
  * item and the private @data.  Before each callback ops->lh_get will
@@ -515,9 +511,9 @@ lustre_hash_for_each_key(lustre_hash_t *lh, void *key,
         ENTRY;
 
         read_lock(&lh->lh_rwlock);
-        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        i = lh_hash(lh, key, lh->lh_cur_mask);
         lhb = &lh->lh_buckets[i];
-        LASSERT(i < lh->lh_cur_size);
+        LASSERT(i <= lh->lh_cur_mask);
 
         read_lock(&lhb->lhb_rwlock);
         hlist_for_each(hnode, &(lhb->lhb_head)) {
@@ -538,7 +534,7 @@ lustre_hash_for_each_key(lustre_hash_t *lh, void *key,
 EXPORT_SYMBOL(lustre_hash_for_each_key);
 
 /**
- * Rehash the lustre hash @lh to the given @size.  This can be used
+ * Rehash the lustre hash @lh to the given @bits.  This can be used
  * to grow the hash size when excessive chaining is detected, or to
  * shrink the hash when it is larger than needed.  When the LH_REHASH
  * flag is set in @lh the lustre hash may be dynamically rehashed
@@ -549,7 +545,7 @@ EXPORT_SYMBOL(lustre_hash_for_each_key);
  * theta thresholds for @lh are tunable via lustre_hash_set_theta().
  */
 int
-lustre_hash_rehash(lustre_hash_t *lh, int size)
+lustre_hash_rehash(lustre_hash_t *lh, int bits)
 {
         struct hlist_node     *hnode;
         struct hlist_node     *pos;
@@ -558,19 +554,21 @@ lustre_hash_rehash(lustre_hash_t *lh, int size)
         lustre_hash_bucket_t  *lh_lhb;
         lustre_hash_bucket_t  *rehash_lhb;
         int                    i;
-        int                    lh_size;
         int                    theta;
+        int                    lh_mask;
+        int                    lh_bits;
+        int                    mask = (1 << bits) - 1;
         void                  *key;
         ENTRY;
 
-        LASSERT(size > 0);
         LASSERT(!in_interrupt());
+        LASSERT(mask > 0);
 
-        OBD_VMALLOC(rehash_buckets, sizeof(*rehash_buckets) * size);
+        OBD_VMALLOC(rehash_buckets, sizeof(*rehash_buckets) << bits);
         if (!rehash_buckets)
                 RETURN(-ENOMEM);
 
-        for (i = 0; i < size; i++) {
+        for (i = 0; i <= mask; i++) {
                 INIT_HLIST_HEAD(&rehash_buckets[i].lhb_head);
                 rwlock_init(&rehash_buckets[i].lhb_rwlock);
                 atomic_set(&rehash_buckets[i].lhb_count, 0);
@@ -584,19 +582,21 @@ lustre_hash_rehash(lustre_hash_t *lh, int size)
          */
         theta = __lustre_hash_theta(lh);
         if ((theta >= lh->lh_min_theta) && (theta <= lh->lh_max_theta)) {
-                OBD_VFREE(rehash_buckets, sizeof(*rehash_buckets) * size);
+                OBD_VFREE(rehash_buckets, sizeof(*rehash_buckets) << bits);
                 write_unlock(&lh->lh_rwlock);
                 RETURN(-EALREADY);
         }
 
-        lh_size = lh->lh_cur_size;
+        lh_bits = lh->lh_cur_bits;
         lh_buckets = lh->lh_buckets;
+        lh_mask = (1 << lh_bits) - 1;
 
-        lh->lh_cur_size = size;
+        lh->lh_cur_bits = bits;
+        lh->lh_cur_mask = (1 << bits) - 1;
         lh->lh_buckets = rehash_buckets;
         atomic_inc(&lh->lh_rehash_count);
 
-        for (i = 0; i < lh_size; i++) {
+        for (i = 0; i <= lh_mask; i++) {
                 lh_lhb = &lh_buckets[i];
 
                 write_lock(&lh_lhb->lhb_rwlock);
@@ -608,7 +608,7 @@ lustre_hash_rehash(lustre_hash_t *lh, int size)
                          * Validate hnode is in the correct bucket.
                          */
                         if (unlikely(lh->lh_flags & LH_DEBUG))
-                                LASSERT(lh_hash(lh, key, lh_size - 1) == i);
+                                LASSERT(lh_hash(lh, key, lh_mask) == i);
 
                         /*
                          * Delete from old hash bucket.
@@ -620,7 +620,7 @@ lustre_hash_rehash(lustre_hash_t *lh, int size)
                         /*
                          * Add to rehash bucket, ops->lh_key must be defined.
                          */
-                        rehash_lhb = &rehash_buckets[lh_hash(lh, key, size-1)];
+                        rehash_lhb = &rehash_buckets[lh_hash(lh, key, mask)];
                         hlist_add_head(hnode, &(rehash_lhb->lhb_head));
                         atomic_inc(&rehash_lhb->lhb_count);
                 }
@@ -630,7 +630,7 @@ lustre_hash_rehash(lustre_hash_t *lh, int size)
                 write_unlock(&lh_lhb->lhb_rwlock);
         }
 
-        OBD_VFREE(lh_buckets, sizeof(*lh_buckets) * lh_size);
+        OBD_VFREE(lh_buckets, sizeof(*lh_buckets) << lh_bits);
         write_unlock(&lh->lh_rwlock);
 
         RETURN(0);
@@ -652,7 +652,7 @@ void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, void *new_key,
         lustre_hash_bucket_t  *old_lhb;
         lustre_hash_bucket_t  *new_lhb;
         unsigned               i;
-        int                    j;
+        unsigned               j;
         ENTRY;
 
         __lustre_hash_key_validate(lh, new_key, hnode);
@@ -660,16 +660,25 @@ void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, void *new_key,
 
         read_lock(&lh->lh_rwlock);
 
-        i = lh_hash(lh, old_key, lh->lh_cur_size - 1);
+        i = lh_hash(lh, old_key, lh->lh_cur_mask);
         old_lhb = &lh->lh_buckets[i];
-        LASSERT(i < lh->lh_cur_size);
+        LASSERT(i <= lh->lh_cur_mask);
 
-        j = lh_hash(lh, new_key, lh->lh_cur_size - 1);
+        j = lh_hash(lh, new_key, lh->lh_cur_mask);
         new_lhb = &lh->lh_buckets[j];
-        LASSERT(j < lh->lh_cur_size);
-
-        write_lock(&old_lhb->lhb_rwlock);
-        write_lock(&new_lhb->lhb_rwlock);
+        LASSERT(j <= lh->lh_cur_mask);
+
+        if (i < j) { /* write_lock ordering */
+                write_lock(&old_lhb->lhb_rwlock);
+                write_lock(&new_lhb->lhb_rwlock);
+        } else if (i > j) {
+                write_lock(&new_lhb->lhb_rwlock);
+                write_lock(&old_lhb->lhb_rwlock);
+        } else { /* do nothing */
+                read_unlock(&lh->lh_rwlock);
+                EXIT;
+                return;
+        }
 
         /*
          * Migrate item between hash buckets without calling
@@ -692,7 +701,7 @@ EXPORT_SYMBOL(lustre_hash_rehash_key);
 int lustre_hash_debug_header(char *str, int size)
 {
         return snprintf(str, size,
-                 "%-36s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n",
+                 "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", LUSTRE_MAX_HASH_NAME,
                  "name", "cur", "min", "max", "theta", "t-min", "t-max",
                  "flags", "rehash", "count", " distribution");
 }
@@ -712,16 +721,20 @@ int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size)
         read_lock(&lh->lh_rwlock);
         theta = __lustre_hash_theta(lh);
 
-        c += snprintf(str + c, size - c, "%-36s ",lh->lh_name);
-        c += snprintf(str + c, size - c, "%5d ",  lh->lh_cur_size);
-        c += snprintf(str + c, size - c, "%5d ",  lh->lh_min_size);
-        c += snprintf(str + c, size - c, "%5d ",  lh->lh_max_size);
+        c += snprintf(str + c, size - c, "%-*s ",
+                      LUSTRE_MAX_HASH_NAME, lh->lh_name);
+        c += snprintf(str + c, size - c, "%5d ",  1 << lh->lh_cur_bits);
+        c += snprintf(str + c, size - c, "%5d ",  1 << lh->lh_min_bits);
+        c += snprintf(str + c, size - c, "%5d ",  1 << lh->lh_max_bits);
         c += snprintf(str + c, size - c, "%d.%03d ",
-                      theta / 1000, theta % 1000);
+                      __lustre_hash_theta_int(theta),
+                      __lustre_hash_theta_frac(theta));
         c += snprintf(str + c, size - c, "%d.%03d ",
-                      lh->lh_min_theta / 1000, lh->lh_min_theta % 1000);
+                      __lustre_hash_theta_int(lh->lh_min_theta),
+                      __lustre_hash_theta_frac(lh->lh_min_theta));
         c += snprintf(str + c, size - c, "%d.%03d ",
-                      lh->lh_max_theta / 1000, lh->lh_max_theta % 1000);
+                      __lustre_hash_theta_int(lh->lh_max_theta),
+                      __lustre_hash_theta_frac(lh->lh_max_theta));
         c += snprintf(str + c, size - c, " 0x%02x ", lh->lh_flags);
         c += snprintf(str + c, size - c, "%6d ",
                       atomic_read(&lh->lh_rehash_count));
@@ -742,7 +755,7 @@ int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size)
          * Non-Uniform hash distribution:  128/125/0/0/0/0/2/1
          */
         lh_for_each_bucket(lh, lhb, i)
-                dist[MIN(__fls(atomic_read(&lhb->lhb_count)/MAX(theta,1)),7)]++;
+                dist[min(__fls(atomic_read(&lhb->lhb_count)/max(theta,1)),7)]++;
 
         for (i = 0; i < 8; i++)
                 c += snprintf(str + c, size - c, "%d%c",  dist[i],