b=16776

author yury <yury>

Wed, 3 Sep 2008 09:54:06 +0000 (09:54 +0000)

committer yury <yury>

Wed, 3 Sep 2008 09:54:06 +0000 (09:54 +0000)
author yury <yury>
Wed, 3 Sep 2008 09:54:06 +0000 (09:54 +0000)
committer yury <yury>
Wed, 3 Sep 2008 09:54:06 +0000 (09:54 +0000)
diff --git a/lustre/include/class_hash.h b/lustre/include/class_hash.h

index 5eabcb8..fd7394e 100644 (file)
--- a/lustre/include/class_hash.h
+++ b/lustre/include/class_hash.h
@@ -39,132 +39,354 @@
  
  #include <lustre_lib.h>
  
-/* #define LUSTRE_HASH_DEBUG 1 */
-
-/* define the hash bucket*/
-struct lustre_hash_bucket { 
-        struct hlist_head lhb_head;
-        spinlock_t lhb_lock;
-#ifdef LUSTRE_HASH_DEBUG
-        /* the number of hash item per bucket, 
-         * it will help us to analyse the hash distribute 
-         */
-        int lhb_item_count; 
-#endif
-};
-
-struct lustre_hash_operations;
-
-struct lustre_class_hash_body {
-        char hashname[128];
-        spinlock_t lchb_lock; /* body lock */
-        struct lustre_hash_bucket *lchb_hash_tables;
-        __u32 lchb_hash_max_size; /* define the hash tables size */
-        /* define the hash operations */
-        struct lustre_hash_operations *lchb_hash_operations;
-};
-
-/* hash operations method define */
-struct lustre_hash_operations {
-        __u32 (*lustre_hashfn) (struct lustre_class_hash_body *hash_body, 
-                                void *key);
-        int   (*lustre_hash_key_compare) (void *key, 
-                                          struct hlist_node *compared_hnode);
-        /* add refcount */ 
-        void* (*lustre_hash_object_refcount_get) (struct hlist_node *hash_item);
-        /* dec refcount */
-        void  (*lustre_hash_object_refcount_put) (struct hlist_node *hash_item);
-};
-
-static inline struct hlist_node * 
-lustre_hash_getitem_in_bucket_nolock(struct lustre_class_hash_body *hash_body, 
-                                     int hashent, void *key)
-{
-        struct lustre_hash_bucket *bucket;
-        struct hlist_node  *hash_item_node;
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
-        int find = 0;
-        ENTRY;
-
-        bucket = &hash_body->lchb_hash_tables[hashent];
-        hlist_for_each(hash_item_node, &(bucket->lhb_head)) {
-                find = hop->lustre_hash_key_compare(key, hash_item_node);
-                if (find == 1)
-                        break;
-        }
-        RETURN(find == 1 ? hash_item_node : NULL);
+struct lustre_hash_ops;
+  
+typedef struct lustre_hash_bucket {
+        /** 
+         * Entries list. 
+         */
+        struct hlist_head           lhb_head;
+        /**
+         * Current entries.
+         */
+        atomic_t                    lhb_count;
+        /** 
+         * Lustre_hash_bucket. 
+         */
+        rwlock_t                    lhb_rwlock;
+} lustre_hash_bucket_t;
+
+typedef struct lustre_hash {
+        /** 
+         * Hash name. 
+         */
+        char                       *lh_name;
+        /**
+         * Hash name size. 
+         */
+        unsigned int                lh_name_size;
+        /**
+         * Current hash size. 
+         */
+        unsigned int                lh_cur_size;
+        /**
+         * Min hash size.
+         */
+        unsigned int                lh_min_size;
+        /**
+         * Max hash size.
+         */
+        unsigned int                lh_max_size;
+        /**
+         * Resize min threshold.
+         */
+        unsigned int                lh_min_theta;
+        /** 
+         * Resize max threshold.
+         */
+        unsigned int                lh_max_theta;
+        /** 
+         * Hash flags. 
+         */
+        int                         lh_flags;
+        /** 
+         * Current entries.
+         */
+        atomic_t                    lh_count;
+        /** 
+         * Resize count.
+         */
+        atomic_t                    lh_rehash_count;
+        /** 
+         * Hash buckets.
+         */
+        struct lustre_hash_bucket  *lh_buckets;
+        /** 
+         * Hash operations.
+         */
+        struct lustre_hash_ops     *lh_ops;
+        /** 
+         * Protects lustre_hash.
+         */
+        rwlock_t                    lh_rwlock;
+} lustre_hash_t;
+
+typedef struct lustre_hash_ops {
+        unsigned (*lh_hash)(lustre_hash_t *lh, void *key, unsigned mask);
+        void *   (*lh_key)(struct hlist_node *hnode);
+        int      (*lh_compare)(void *key, struct hlist_node *hnode);
+        void *   (*lh_get)(struct hlist_node *hnode);
+        void *   (*lh_put)(struct hlist_node *hnode);
+        void     (*lh_exit)(struct hlist_node *hnode);
+} lustre_hash_ops_t;
+
+/** 
+ * Enable expensive debug checks. 
+ */
+#define LH_DEBUG        0x0001
+/** 
+ * Enable dynamic hash resizing.
+ */
+#define LH_REHASH       0x0002
+
+#define LHO(lh)         (lh)->lh_ops
+#define LHP(lh, op)     (lh)->lh_ops->lh_ ## op
+
+static inline unsigned
+lh_hash(lustre_hash_t *lh, void *key, unsigned mask)
+{
+        LASSERT(lh);
+
+        if (LHO(lh) && LHP(lh, hash))
+                return LHP(lh, hash)(lh, key, mask);
+
+        return -EOPNOTSUPP;
  }
  
-static inline int 
-lustre_hash_delitem_nolock(struct lustre_class_hash_body *hash_body, 
-                           int hashent, struct hlist_node * hash_item)
+static inline void *
+lh_key(lustre_hash_t *lh, struct hlist_node *hnode)
  {
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
+        LASSERT(lh);
+        LASSERT(hnode);
  
-        hlist_del_init(hash_item);
+        if (LHO(lh) && LHP(lh, key))
+                return LHP(lh, key)(hnode);
  
-        hop->lustre_hash_object_refcount_put(hash_item);
+        return NULL;
+}
+
+/** 
+ * Returns 1 on a match,
+ * XXX: This would be better if it returned, -1, 0, or 1 for
+ *      <, =, > respectivly.  It could then be used to implement
+ *      a LH_SORT feature flags which could keep each lustre hash
+ *      bucket in order.  This would increase insertion times
+ *      but could reduce lookup times for deep chains.  Ideally,
+ *      the rehash should keep chain depth short but if that
+ *      ends up not being the case this would be a nice feature.
+ */
+static inline int
+lh_compare(lustre_hash_t *lh, void *key, struct hlist_node *hnode)
+{
+        LASSERT(lh);
+        LASSERT(hnode);
+
+        if (LHO(lh) && LHP(lh, compare))
+                return LHP(lh, compare)(key, hnode);
+
+        return -EOPNOTSUPP;
+}
+
+static inline void *
+lh_get(lustre_hash_t *lh, struct hlist_node *hnode)
+{
+        LASSERT(lh);
+        LASSERT(hnode);
+
+        if (LHO(lh) && LHP(lh, get))
+                return LHP(lh, get)(hnode);
+
+        return NULL;
+}
+
+static inline void *
+lh_put(lustre_hash_t *lh, struct hlist_node *hnode)
+{
+        LASSERT(lh);
+        LASSERT(hnode);
+
+        if (LHO(lh) && LHP(lh, put))
+                return LHP(lh, put)(hnode);
+
+        return NULL;
+}
  
-#ifdef LUSTRE_HASH_DEBUG
-        hash_body->lchb_hash_tables[hashent].lhb_item_count--;
-        CDEBUG(D_INFO, "hashname[%s] bucket[%d] has [%d] hashitem\n", 
-                        hash_body->hashname, hashent, 
-                        hash_body->lchb_hash_tables[hashent].lhb_item_count);
-#endif
+static inline void
+lh_exit(lustre_hash_t *lh, struct hlist_node *hnode)
+{
+        LASSERT(lh);
+        LASSERT(hnode);
+
+        if (LHO(lh) && LHP(lh, exit))
+                return LHP(lh, exit)(hnode);
+}
  
-        RETURN(0);
+/** 
+ * Validate hnode references the correct key. 
+ */
+static inline void
+__lustre_hash_key_validate(lustre_hash_t *lh, void *key,
+                           struct hlist_node *hnode)
+{
+        if (unlikely(lh->lh_flags & LH_DEBUG))
+                LASSERT(lh_compare(lh, key, hnode));
+}
+
+/* 
+ * Validate hnode is in the correct bucket. 
+ */
+static inline void
+__lustre_hash_bucket_validate(lustre_hash_t *lh, lustre_hash_bucket_t *lhb,
+                              struct hlist_node *hnode)
+{
+        unsigned i;
+
+        if (unlikely(lh->lh_flags & LH_DEBUG)) {
+                i = lh_hash(lh, lh_key(lh, hnode), lh->lh_cur_size - 1);
+                LASSERT(&lh->lh_buckets[i] == lhb);
+          }
+  }
+  
+static inline struct hlist_node *
+__lustre_hash_bucket_lookup(lustre_hash_t *lh,
+                            lustre_hash_bucket_t *lhb, void *key)
+{
+        struct hlist_node *hnode;
+
+        hlist_for_each(hnode, &lhb->lhb_head)
+                if (lh_compare(lh, key, hnode))
+                        return hnode;
+
+        return NULL;
+}
+
+static inline void *
+__lustre_hash_bucket_add(lustre_hash_t *lh,
+                         lustre_hash_bucket_t *lhb,
+                         struct hlist_node *hnode)
+{
+        hlist_add_head(hnode, &(lhb->lhb_head));
+        atomic_inc(&lhb->lhb_count);
+        atomic_inc(&lh->lh_count);
+
+        return lh_get(lh, hnode);
+}
+
+static inline void *
+__lustre_hash_bucket_del(lustre_hash_t *lh,
+                         lustre_hash_bucket_t *lhb,
+                         struct hlist_node *hnode)
+{
+        hlist_del_init(hnode);
+        atomic_dec(&lhb->lhb_count);
+        atomic_dec(&lh->lh_count);
+
+        return lh_put(lh, hnode);
+}
+
+/* 
+ * Hash init/cleanup functions. 
+ */
+lustre_hash_t *lustre_hash_init(char *name, unsigned int cur_size, 
+                                unsigned int max_size,
+                                lustre_hash_ops_t *ops, int flags);
+void lustre_hash_exit(lustre_hash_t *lh);
+
+/* 
+ * Hash addition functions. 
+ */
+void lustre_hash_add(lustre_hash_t *lh, void *key,
+                     struct hlist_node *hnode);
+int  lustre_hash_add_unique(lustre_hash_t *lh, void *key,
+                            struct hlist_node *hnode);
+void *lustre_hash_findadd_unique(lustre_hash_t *lh, void *key,
+                                 struct hlist_node *hnode);
+
+/* 
+ * Hash deletion functions.
+ */
+void *lustre_hash_del(lustre_hash_t *lh, void *key, struct hlist_node *hnode);
+void *lustre_hash_del_key(lustre_hash_t *lh, void *key);
+
+/* 
+ * Hash lookup/for_each functions.
+ */
+void *lustre_hash_lookup(lustre_hash_t *lh, void *key);
+typedef void (*lh_for_each_cb)(void *obj, void *data);
+void lustre_hash_for_each(lustre_hash_t *lh, lh_for_each_cb, void *data);
+void lustre_hash_for_each_safe(lustre_hash_t *lh, lh_for_each_cb, void *data);
+void lustre_hash_for_each_empty(lustre_hash_t *lh, lh_for_each_cb, void *data);
+void lustre_hash_for_each_key(lustre_hash_t *lh, void *key,
+                              lh_for_each_cb, void *data);
+
+/* 
+ * Rehash - theta is calculated to be the average chained
+ * hash depth assuming a perfectly uniform hash funcion. 
+ */
+int lustre_hash_rehash(lustre_hash_t *lh, int size);
+void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key,
+                            void *new_key, struct hlist_node *hnode);
+
+
+static inline int
+__lustre_hash_theta(lustre_hash_t *lh)
+{
+        return ((atomic_read(&lh->lh_count) * 1000) / lh->lh_cur_size);
+}
+
+static inline void
+__lustre_hash_set_theta(lustre_hash_t *lh, int min, int max)
+{
+        LASSERT(min < max);
+        lh->lh_min_theta = min;
+        lh->lh_min_theta = max;
+}
+
+/* 
+ * Generic debug formatting routines mainly for proc handler. 
+ */
+int lustre_hash_debug_header(char *str, int size);
+int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size);
+
+
+/**
+ * 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 
+ */
+#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
+/**
+ * 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 
+ */
+#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
+
+
+/**
+ * Generic djb2 hash algorithm for character arrays.
+ */
+static inline unsigned
+lh_djb2_hash(void *key, size_t size, unsigned mask)
+{
+        unsigned i, hash = 5381;
+
+        LASSERT(key != NULL);
+
+        for (i = 0; i < size; i++)
+                hash = hash * 33 + ((char *)key)[i];
+
+        RETURN(hash & mask);
+}
+
+/**
+ * Generic u32 hash algorithm.
+ */
+static inline unsigned
+lh_u32_hash(__u32 key, unsigned mask)
+{
+        RETURN((key * GOLDEN_RATIO_PRIME_32) & mask);
+}
+
+/**
+ * Generic u64 hash algorithm.
+ */
+static inline unsigned
+lh_u64_hash(__u64 key, unsigned mask)
+{
+        RETURN((unsigned)(key * GOLDEN_RATIO_PRIME_64) & mask);
  }
  
-typedef void (*hash_item_iterate_cb) (void *obj, void *data);
-
-int lustre_hash_init(struct lustre_class_hash_body **hash_body,
-                     char *hashname, __u32 hashsize, 
-                     struct lustre_hash_operations *hash_operations);
-void lustre_hash_exit(struct lustre_class_hash_body **hash_body);
-int lustre_hash_additem_unique(struct lustre_class_hash_body *hash_body, 
-                               void *key, struct hlist_node *actual_hnode);
-void *lustre_hash_findadd_unique(struct lustre_class_hash_body *hash_body,
-                                 void *key, struct hlist_node *actual_hnode);
-int lustre_hash_additem(struct lustre_class_hash_body *hash_body, void *key, 
-                        struct hlist_node *actual_hnode);
-int lustre_hash_delitem_by_key(struct lustre_class_hash_body *hash_body, 
-                               void *key);
-int lustre_hash_delitem(struct lustre_class_hash_body *hash_body, void *key, 
-                        struct hlist_node *hash_item);
-void lustre_hash_bucket_iterate(struct lustre_class_hash_body *hash_body,
-                                void *key, hash_item_iterate_cb,
-                                void *data);
-void lustre_hash_iterate_all(struct lustre_class_hash_body *hash_body,
-                             hash_item_iterate_cb, void *data);
-
-void * lustre_hash_get_object_by_key(struct lustre_class_hash_body *hash_body,
-                                      void *key);
-
-__u32 djb2_hashfn(struct lustre_class_hash_body *hash_body, void* key,
-                  size_t size);
-
-/* ( uuid <-> export ) hash operations define */
-__u32 uuid_hashfn(struct lustre_class_hash_body *hash_body,  void * key);
-int uuid_hash_key_compare(void *key, struct hlist_node * compared_hnode);
-void * uuid_export_refcount_get(struct hlist_node * actual_hnode);
-void uuid_export_refcount_put(struct hlist_node * actual_hnode);
-
-/* ( nid <-> export ) hash operations define */
-__u32 nid_hashfn(struct lustre_class_hash_body *hash_body,  void * key);
-int nid_hash_key_compare(void *key, struct hlist_node * compared_hnode);
-void * nid_export_refcount_get(struct hlist_node * actual_hnode);
-void nid_export_refcount_put(struct hlist_node * actual_hnode);
-
-/* ( net_peer <-> connection ) hash operations define */
-__u32 conn_hashfn(struct lustre_class_hash_body *hash_body,  void * key);
-int conn_hash_key_compare(void *key, struct hlist_node * compared_hnode);
-void * conn_refcount_get(struct hlist_node * actual_hnode);
-void conn_refcount_put(struct hlist_node * actual_hnode);
-
-/* ( nid <-> nidstats ) hash operations define. uses nid_hashfn */
-int nidstats_hash_key_compare(void *key, struct hlist_node * compared_hnode);
-void* nidstats_refcount_get(struct hlist_node * actual_hnode);
-void nidstats_refcount_put(struct hlist_node * actual_hnode);
-extern struct lustre_hash_operations nid_stat_hash_operations;
+#define lh_for_each_bucket(lh, lhb, pos)         \
+        for (pos = 0;                            \
+             pos < lh->lh_cur_size &&            \
+             ({ lhb = &lh->lh_buckets[i]; 1; }); \
+             pos++)
  
  #endif /* __CLASS_HASH_H */
diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h

index 38905dd..b62b08a 100644 (file)
--- a/lustre/include/liblustre.h
+++ b/lustre/include/liblustre.h
@@ -318,6 +318,43 @@ static inline int capable(int cap)
  #define might_sleep_if(c)
  #define smp_mb()
  
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static inline 
+int fls(int x)
+{
+       int r = 32;
+
+       if (!x)
+               return 0;
+       if (!(x & 0xffff0000u)) {
+               x <<= 16;
+               r -= 16;
+       }
+       if (!(x & 0xff000000u)) {
+               x <<= 8;
+               r -= 8;
+       }
+       if (!(x & 0xf0000000u)) {
+               x <<= 4;
+               r -= 4;
+       }
+       if (!(x & 0xc0000000u)) {
+               x <<= 2;
+               r -= 2;
+       }
+       if (!(x & 0x80000000u)) {
+               x <<= 1;
+               r -= 1;
+       }
+       return r;
+}
+
  static inline
  int test_and_set_bit(int nr, unsigned long *addr)
  {
diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h

index 32d3248..319d799 100644 (file)
--- a/lustre/include/lprocfs_status.h
+++ b/lustre/include/lprocfs_status.h
@@ -527,6 +527,10 @@ extern int lprocfs_counter_write(struct file *file, const char *buffer,
  int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
                                     int count, int *eof, void *data);
  
+/* lprocfs_statuc.c: hash statistics */
+int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
+                        int count, int *eof, void *data);
+
  extern int lprocfs_seq_release(struct inode *, struct file *);
  
  /* in lprocfs_stat.c, to protect the private data for proc entries */
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h

index b2d50df..380418d 100644 (file)
--- a/lustre/include/lustre_net.h
+++ b/lustre/include/lustre_net.h
@@ -184,7 +184,6 @@
  #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
  
  struct ptlrpc_connection {
-        struct list_head        c_link;
          struct hlist_node       c_hash;
          lnet_nid_t              c_self;
          lnet_process_id_t       c_peer;
@@ -193,9 +192,9 @@ struct ptlrpc_connection {
  };
  
  struct ptlrpc_client {
-        __u32                     cli_request_portal;
-        __u32                     cli_reply_portal;
-        char                     *cli_name;
+        __u32                   cli_request_portal;
+        __u32                   cli_reply_portal;
+        char                   *cli_name;
  };
  
  /* state flags of requests */
@@ -789,14 +788,13 @@ extern void reply_out_callback(lnet_event_t *ev);
  extern void server_bulk_callback (lnet_event_t *ev);
  
  /* ptlrpc/connection.c */
-void ptlrpc_dump_connections(void);
-void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *);
-struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer,
-                                                lnet_nid_t self, struct obd_uuid *uuid);
-int ptlrpc_put_connection(struct ptlrpc_connection *c);
+struct ptlrpc_connection *ptlrpc_connection_get(lnet_process_id_t peer,
+                                                lnet_nid_t self,
+                                                struct obd_uuid *uuid);
+int ptlrpc_connection_put(struct ptlrpc_connection *c);
  struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
-int ptlrpc_init_connection(void);
-void ptlrpc_cleanup_connection(void);
+int ptlrpc_connection_init(void);
+void ptlrpc_connection_fini(void);
  extern lnet_pid_t ptl_get_pid(void);
  
  /* ptlrpc/niobuf.c */
diff --git a/lustre/include/obd.h b/lustre/include/obd.h

index 972ebed..54e7e92 100644 (file)
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -929,11 +929,11 @@ struct obd_device {
                        obd_inactive:1;      /* device active/inactive
                                             * (for /proc/status only!!) */
          /* uuid-export hash body */
-        struct lustre_class_hash_body *obd_uuid_hash_body;
+        struct lustre_hash     *obd_uuid_hash;
          /* nid-export hash body */
-        struct lustre_class_hash_body *obd_nid_hash_body;
+        struct lustre_hash     *obd_nid_hash;
          /* nid stats body */
-        struct lustre_class_hash_body *obd_nid_stats_hash_body;
+        struct lustre_hash     *obd_nid_stats_hash;
          struct list_head        obd_nid_stats;
          atomic_t                obd_refcount;
          cfs_waitq_t             obd_refcount_waitq;
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c

index 7fa169b..c6de21f 100644 (file)
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -119,7 +119,7 @@ out_free:
          if (imp_conn)
                  OBD_FREE(imp_conn, sizeof(*imp_conn));
  out_put:
-        ptlrpc_put_connection(ptlrpc_conn);
+        ptlrpc_connection_put(ptlrpc_conn);
          RETURN(rc);
  }
  
@@ -162,20 +162,20 @@ int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
                                  GOTO(out, rc = -EBUSY);
                          }
  
-                        ptlrpc_put_connection(imp->imp_connection);
+                        ptlrpc_connection_put(imp->imp_connection);
                          imp->imp_connection = NULL;
  
                          dlmexp = class_conn2export(&imp->imp_dlm_handle);
                          if (dlmexp && dlmexp->exp_connection) {
                                  LASSERT(dlmexp->exp_connection ==
                                          imp_conn->oic_conn);
-                                ptlrpc_put_connection(dlmexp->exp_connection);
+                                ptlrpc_connection_put(dlmexp->exp_connection);
                                  dlmexp->exp_connection = NULL;
                          }
                  }
  
                  list_del(&imp_conn->oic_item);
-                ptlrpc_put_connection(imp_conn->oic_conn);
+                ptlrpc_connection_put(imp_conn->oic_conn);
                  OBD_FREE(imp_conn, sizeof(*imp_conn));
                  CDEBUG(D_HA, "imp %p@%s: remove connection %s\n",
                         imp, imp->imp_obd->obd_name, uuid->uuid);
@@ -719,7 +719,7 @@ int target_handle_connect(struct ptlrpc_request *req)
                  goto dont_check_exports;
  
          spin_lock(&target->obd_dev_lock);
-        export = lustre_hash_get_object_by_key(target->obd_uuid_hash_body, &cluuid);
+        export = lustre_hash_lookup(target->obd_uuid_hash, &cluuid);
  
          if (export != NULL && export->exp_connecting) { /* bug 9635, et. al. */
                  CWARN("%s: exp %p already connecting\n",
@@ -903,17 +903,18 @@ dont_check_exports:
          }
  
          if (export->exp_connection != NULL)
-                ptlrpc_put_connection(export->exp_connection);
-        export->exp_connection = ptlrpc_get_connection(req->rq_peer,
+                ptlrpc_connection_put(export->exp_connection);
+        export->exp_connection = ptlrpc_connection_get(req->rq_peer,
                                                         req->rq_self,
                                                         &remote_uuid);
  
          spin_lock(&target->obd_dev_lock);
          /* Export might be hashed already, e.g. if this is reconnect */
          if (hlist_unhashed(&export->exp_nid_hash))
-                lustre_hash_additem(export->exp_obd->obd_nid_hash_body,
-                                    &export->exp_connection->c_peer.nid,
-                                    &export->exp_nid_hash);
+                lustre_hash_add(export->exp_obd->obd_nid_hash,
+                                &export->exp_connection->c_peer.nid,
+                                &export->exp_nid_hash);
+
          spin_unlock(&target->obd_dev_lock);
  
          spin_lock_bh(&target->obd_processing_task_lock);
diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c

index 35d0dbf..6349be1 100644 (file)
--- a/lustre/mds/lproc_mds.c
+++ b/lustre/mds/lproc_mds.c
@@ -331,6 +331,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = {
          { "fstype",          lprocfs_rd_fstype,      0, 0 },
          { "mntdev",          lprocfs_mds_rd_mntdev,  0, 0 },
          { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
+        { "hash_stats",      lprocfs_obd_rd_hash,    0, 0 },
          { "evict_client",    0,                lprocfs_mds_wr_evict_client, 0 },
          { "evict_ost_nids",  lprocfs_mds_rd_evictostnids,
                                                 lprocfs_mds_wr_evictostnids, 0 },
diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c

index ffb2c41..9e45313 100644 (file)
--- a/lustre/mdt/mdt_lproc.c
+++ b/lustre/mdt/mdt_lproc.c
@@ -466,6 +466,7 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
          { "capa_count",                 lprocfs_rd_capa_count,           0, 0 },
          { "site_stats",                 lprocfs_rd_site_stats,           0, 0 },
          { "evict_client",               0, lprocfs_mdt_wr_evict_client,     0 },
+        { "hash_stats",                 lprocfs_obd_rd_hash,    0, 0 },
          { 0 }
  };
  
diff --git a/lustre/mgs/lproc_mgs.c b/lustre/mgs/lproc_mgs.c

index 83a5f40..1d84a12 100644 (file)
--- a/lustre/mgs/lproc_mgs.c
+++ b/lustre/mgs/lproc_mgs.c
@@ -230,6 +230,7 @@ struct lprocfs_vars lprocfs_mgs_obd_vars[] = {
          { "fstype",          lprocfs_rd_fstype,      0, 0 },
          { "mntdev",          lprocfs_mgs_rd_mntdev,  0, 0 },
          { "num_exports",     lprocfs_rd_num_exports, 0, 0 },
+        { "hash_stats",      lprocfs_obd_rd_hash,    0, 0 },
          { "evict_client",    0, lprocfs_wr_evict_client, 0 },
          { 0 }
  };
diff --git a/lustre/obdclass/class_hash.c b/lustre/obdclass/class_hash.c

index 02ef3f4..46b4c5e 100644 (file)
--- a/lustre/obdclass/class_hash.c
+++ b/lustre/obdclass/class_hash.c
@@ -38,6 +38,14 @@
   * Implement a hash class for hash process in lustre system.
   *
   * Author: YuZhangyong <yzy@clusterfs.com>
+ *
+ * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
+ * - Simplified API and improved documentation
+ * - Added per-hash feature flags:
+ *   * LH_DEBUG additional validation
+ *   * LH_REHASH dynamic rehashing
+ * - Added per-hash statistics
+ * - General performance enhancements
   */
  
  #ifndef __KERNEL__
@@ -45,658 +53,721 @@
  #include <obd.h>
  #endif
  
-#include <obd_class.h>
  #include <class_hash.h>
-#include <lustre_export.h>
-#include <obd_support.h>
-#include <lustre_net.h>
  
-int lustre_hash_init(struct lustre_class_hash_body **hash_body_new, 
-                     char *hashname, __u32 hashsize, 
-                     struct lustre_hash_operations *hash_operations)
+/**
+ * Initialize new lustre hash, where:
+ * @name     - Descriptive hash name
+ * @cur_size - Initial hash table size
+ * @max_size - Maximum allowed hash table resize
+ * @ops      - Registered hash table operations
+ * @flags    - LH_REHASH enable synamic hash resizing
+ *           - LH_SORT enable chained hash sort
+ */
+lustre_hash_t *
+lustre_hash_init(char *name, unsigned int cur_size, unsigned int max_size,
+                 lustre_hash_ops_t *ops, int flags)
  {
-        int i, n = 0;
-        struct lustre_class_hash_body *hash_body = NULL;
-
-        LASSERT(hashsize > 0);
-        LASSERT(hash_operations != NULL);
+        lustre_hash_t *lh;
+        int            i;
          ENTRY;
-
-        i = hashsize;
-        while (i != 0) {
-                if (i & 0x1)
-                        n++;
-                i >>= 1;
-        }
-
-        LASSERTF(n == 1, "hashsize %u isn't 2^n\n", hashsize);
-
-        /* alloc space for hash_body */   
-        OBD_ALLOC(hash_body, sizeof(*hash_body)); 
-
-        if (hash_body == NULL) {
-                CERROR("Cannot alloc space for hash body, hashname = %s \n", 
-                        hashname);
-                RETURN(-ENOMEM);
+  
+        LASSERT(name != NULL);
+        LASSERT(ops != NULL);
+
+        /* 
+         * Ensure hash is a power of two to allow the use of a bitmask
+         * in the hash function instead of a more expensive modulus. 
+         */
+        LASSERTF(cur_size && (cur_size & (cur_size - 1)) == 0,
+                 "Size (%u) is not power of 2\n", cur_size);
+        LASSERTF(max_size && (max_size & (max_size - 1)) == 0,
+                 "Size (%u) is not power of 2\n", max_size);
+  
+        OBD_ALLOC_PTR(lh);
+        if (!lh)
+                RETURN(NULL);
+  
+        lh->lh_name_size = strlen(name) + 1;
+        rwlock_init(&lh->lh_rwlock);
+  
+        OBD_ALLOC(lh->lh_name, lh->lh_name_size);
+        if (!lh->lh_name) {
+                OBD_FREE_PTR(lh);
+                RETURN(NULL);
          }
-
-        LASSERT(hashname != NULL && 
-                strlen(hashname) <= sizeof(hash_body->hashname));
-        strcpy(hash_body->hashname, hashname);
-        hash_body->lchb_hash_max_size = hashsize;      
-        hash_body->lchb_hash_operations = hash_operations;  
-
-        /* alloc space for the hash tables */
-        OBD_ALLOC(hash_body->lchb_hash_tables, 
-                  sizeof(*hash_body->lchb_hash_tables) * hash_body->lchb_hash_max_size);
-
-        if (hash_body->lchb_hash_tables == NULL) {
-                OBD_FREE(hash_body, sizeof(*hash_body)); 
-                CERROR("Cannot alloc space for hashtables, hashname = %s \n", 
-                        hash_body->hashname);
-                RETURN(-ENOMEM);
+  
+        strncpy(lh->lh_name, name, lh->lh_name_size);
+  
+        atomic_set(&lh->lh_count, 0);
+        atomic_set(&lh->lh_rehash_count, 0);
+        lh->lh_cur_size = cur_size;
+        lh->lh_min_size = cur_size;
+        lh->lh_max_size = max_size;
+        lh->lh_min_theta = 500;  /* theta * 1000 */
+        lh->lh_max_theta = 2000; /* theta * 1000 */
+        lh->lh_ops = ops;
+        lh->lh_flags = flags;
+
+        OBD_VMALLOC(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size);
+        if (!lh->lh_buckets) {
+                OBD_FREE(lh->lh_name, lh->lh_name_size);
+                OBD_FREE_PTR(lh);
+                RETURN(NULL);
          }
-
-        spin_lock_init(&hash_body->lchb_lock); /* initialize the body lock */
-
-        for(i = 0 ; i < hash_body->lchb_hash_max_size; i++) {
-                /* initial the bucket lock and list_head */
-                INIT_HLIST_HEAD(&hash_body->lchb_hash_tables[i].lhb_head);
-                spin_lock_init(&hash_body->lchb_hash_tables[i].lhb_lock);
+  
+        for (i = 0; i < lh->lh_cur_size; i++) {
+                INIT_HLIST_HEAD(&lh->lh_buckets[i].lhb_head);
+                rwlock_init(&lh->lh_buckets[i].lhb_rwlock);
+                atomic_set(&lh->lh_buckets[i].lhb_count, 0);
          }
-        *hash_body_new = hash_body;
-
-        RETURN(0);
+  
+        return lh;
  }
  EXPORT_SYMBOL(lustre_hash_init);
-
-void lustre_hash_exit(struct lustre_class_hash_body **new_hash_body)
+  
+/**
+ * Cleanup lustre hash @lh.
+ */
+void
+lustre_hash_exit(lustre_hash_t *lh)
  {
-        int i;
-        struct lustre_class_hash_body *hash_body = NULL;
+        lustre_hash_bucket_t *lhb;
+        struct hlist_node    *hnode;
+        struct hlist_node    *pos;
+        int                   i;
          ENTRY;
-
-        hash_body = *new_hash_body;
-
-        if (hash_body == NULL) {
-                CWARN("hash body has been deleted\n");
-                goto out_hash;
-        }
-
-        spin_lock(&hash_body->lchb_lock); /* lock the hash tables */
-
-        if (hash_body->lchb_hash_tables == NULL ) {
-                spin_unlock(&hash_body->lchb_lock);
-                CWARN("hash tables has been deleted\n");
-                goto out_hash;   
-        }
-
-        for( i = 0; i < hash_body->lchb_hash_max_size; i++ ) {
-                struct lustre_hash_bucket * bucket;
-                struct hlist_node * actual_hnode, *pos;
-
-                bucket = &hash_body->lchb_hash_tables[i];
-                spin_lock(&bucket->lhb_lock); /* lock the bucket */
-                hlist_for_each_safe(actual_hnode, pos, &(bucket->lhb_head)) {
-                        lustre_hash_delitem_nolock(hash_body, i, actual_hnode);
+  
+        if (!lh)
+                return;
+  
+        write_lock(&lh->lh_rwlock);
+  
+        lh_for_each_bucket(lh, lhb, i) {
+                write_lock(&lhb->lhb_rwlock);
+                hlist_for_each_safe(hnode, pos, &(lhb->lhb_head)) {
+                        __lustre_hash_bucket_validate(lh, lhb, hnode);
+                        __lustre_hash_bucket_del(lh, lhb, hnode);
+                        lh_exit(lh, hnode);
                  }
-                spin_unlock(&bucket->lhb_lock); 
+  
+                LASSERT(hlist_empty(&(lhb->lhb_head)));
+                LASSERT(atomic_read(&lhb->lhb_count) == 0);
+                write_unlock(&lhb->lhb_rwlock);
          }
+  
+        OBD_VFREE(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size);
+        OBD_FREE(lh->lh_name, lh->lh_name_size);
+  
+        LASSERT(atomic_read(&lh->lh_count) == 0);
+        write_unlock(&lh->lh_rwlock);
+  
+        OBD_FREE_PTR(lh);
+        EXIT;
+}
+EXPORT_SYMBOL(lustre_hash_exit);
  
-        /* free the hash_tables's memory space */
-        OBD_FREE(hash_body->lchb_hash_tables,
-                  sizeof(*hash_body->lchb_hash_tables) * hash_body->lchb_hash_max_size);     
+static inline unsigned int lustre_hash_rehash_size(lustre_hash_t *lh)
+{
+        if (!(lh->lh_flags & LH_REHASH))
+                return 0;
  
-        hash_body->lchb_hash_tables = NULL;
+        if ((lh->lh_cur_size < lh->lh_max_size) &&
+            (__lustre_hash_theta(lh) > lh->lh_max_theta))
+                return MIN(lh->lh_cur_size * 2, lh->lh_max_size);
  
-        spin_unlock(&hash_body->lchb_lock);
+        if ((lh->lh_cur_size > lh->lh_min_size) &&
+            (__lustre_hash_theta(lh) < lh->lh_min_theta))
+                return MAX(lh->lh_cur_size / 2, lh->lh_min_size);
  
-out_hash : 
-        /* free the hash_body's memory space */
-        if (hash_body != NULL) {
-                OBD_FREE(hash_body, sizeof(*hash_body));
-                *new_hash_body = NULL;
-        }
+        return 0;
+}
+  
+/**
+ * Add item @hnode to lustre hash @lh using @key.  The registered
+ * ops->lh_get function will be called when the item is added.
+ */
+void
+lustre_hash_add(lustre_hash_t *lh, void *key, struct hlist_node *hnode)
+{
+        lustre_hash_bucket_t *lhb;
+        int                   size;
+        unsigned              i;
+        ENTRY;
+  
+        __lustre_hash_key_validate(lh, key, hnode);
+
+        read_lock(&lh->lh_rwlock);
+        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        lhb = &lh->lh_buckets[i];
+        LASSERT(i < lh->lh_cur_size);
+        LASSERT(hlist_unhashed(hnode));
+
+        write_lock(&lhb->lhb_rwlock);
+        __lustre_hash_bucket_add(lh, lhb, hnode);
+        write_unlock(&lhb->lhb_rwlock);
+
+        size = lustre_hash_rehash_size(lh);
+        read_unlock(&lh->lh_rwlock);
+        if (size)
+                lustre_hash_rehash(lh, size);
+  
          EXIT;
  }
-EXPORT_SYMBOL(lustre_hash_exit);
-
-/*
- * only allow unique @key in hashtables, if the same @key has existed 
- * in hashtables, it will return with fails.
+EXPORT_SYMBOL(lustre_hash_add);
+  
+/**
+ * Add item @hnode to lustre hash @lh using @key.  The registered
+ * ops->lh_get function will be called if the item was added.
+ * Returns 0 on success or -EALREADY on key collisions.
   */
-int lustre_hash_additem_unique(struct lustre_class_hash_body *hash_body, 
-                               void *key, struct hlist_node *actual_hnode)
+int
+lustre_hash_add_unique(lustre_hash_t *lh, void *key, struct hlist_node *hnode)
  {
-        int hashent;
-        struct lustre_hash_bucket *bucket = NULL;
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
+        lustre_hash_bucket_t *lhb;
+        int                   size;
+        int                   rc = -EALREADY;
+        unsigned              i;
          ENTRY;
-
-        LASSERT(hlist_unhashed(actual_hnode));
-        hashent = hop->lustre_hashfn(hash_body, key);
-
-        /* get the hash-bucket and lock it */
-        bucket = &hash_body->lchb_hash_tables[hashent];
-        spin_lock(&bucket->lhb_lock);
-
-        if ( (lustre_hash_getitem_in_bucket_nolock(hash_body, hashent, key)) != NULL) {
-                /* the added-item exist in hashtables, so cannot add it again */
-                spin_unlock(&bucket->lhb_lock);
-
-                CWARN("Already found the key in hash [%s]\n", 
-                      hash_body->hashname);
-                RETURN(-EALREADY);
+  
+        __lustre_hash_key_validate(lh, key, hnode);
+  
+        read_lock(&lh->lh_rwlock);
+        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        lhb = &lh->lh_buckets[i];
+        LASSERT(i < lh->lh_cur_size);
+        LASSERT(hlist_unhashed(hnode));
+  
+        write_lock(&lhb->lhb_rwlock);
+        if (!__lustre_hash_bucket_lookup(lh, lhb, key)) {
+                __lustre_hash_bucket_add(lh, lhb, hnode);
+                rc = 0;
          }
-
-        hlist_add_head(actual_hnode, &(bucket->lhb_head));
-
-#ifdef LUSTRE_HASH_DEBUG
-        /* hash distribute debug */
-        hash_body->lchb_hash_tables[hashent].lhb_item_count++; 
-        CDEBUG(D_INFO, "hashname[%s] bucket[%d] has [%d] hashitem\n", 
-                        hash_body->hashname, hashent, 
-                        hash_body->lchb_hash_tables[hashent].lhb_item_count);
-#endif  
-        hop->lustre_hash_object_refcount_get(actual_hnode); 
-
-        spin_unlock(&bucket->lhb_lock);
-
-        RETURN(0);
+        write_unlock(&lhb->lhb_rwlock);
+  
+        size = lustre_hash_rehash_size(lh);
+        read_unlock(&lh->lh_rwlock);
+        if (size)
+                lustre_hash_rehash(lh, size);
+  
+        RETURN(rc);
  }
-EXPORT_SYMBOL(lustre_hash_additem_unique);
-
-/*
- * only allow unique @key in hashtables, if the same @key has existed
- * in hashtables, it will return with fails.
+EXPORT_SYMBOL(lustre_hash_add_unique);
+  
+/**
+ * Add item @hnode to lustre hash @lh using @key.  If this @key
+ * already exists in the hash then ops->lh_get will be called on the
+ * conflicting entry and that entry will be returned to the caller.
+ * Otherwise ops->lh_get is called on the item which was added.
   */
-void* lustre_hash_findadd_unique(struct lustre_class_hash_body *hash_body,
-                                     void *key, struct hlist_node *actual_hnode)
+void *
+lustre_hash_findadd_unique(lustre_hash_t *lh, void *key,
+                           struct hlist_node *hnode)
  {
-        int hashent;
-        struct lustre_hash_bucket *bucket = NULL;
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
-        struct hlist_node * hash_item_hnode = NULL;
-        void *obj;
+        struct hlist_node    *existing_hnode;
+        lustre_hash_bucket_t *lhb;
+        int                   size;
+        unsigned              i;
+        void                 *obj;
          ENTRY;
-
-        LASSERT(hlist_unhashed(actual_hnode));
-        hashent = hop->lustre_hashfn(hash_body, key);
-
-        /* get the hash-bucket and lock it */
-        bucket = &hash_body->lchb_hash_tables[hashent];
-        spin_lock(&bucket->lhb_lock);
-
-        hash_item_hnode = lustre_hash_getitem_in_bucket_nolock(hash_body,
-                                                               hashent, key);
-        if ( hash_item_hnode != NULL) {
-                /* the added-item exist in hashtables, so cannot add it again */
-                obj = hop->lustre_hash_object_refcount_get(hash_item_hnode);
-                spin_unlock(&bucket->lhb_lock);
-                RETURN(obj);
-        }
-
-        hlist_add_head(actual_hnode, &(bucket->lhb_head));
-
-#ifdef LUSTRE_HASH_DEBUG
-        /* hash distribute debug */
-        hash_body->lchb_hash_tables[hashent].lhb_item_count++;
-        CDEBUG(D_INFO, "hashname[%s] bucket[%d] has [%d] hashitem\n",
-                        hash_body->hashname, hashent,
-                        hash_body->lchb_hash_tables[hashent].lhb_item_count);
-#endif
-        obj = hop->lustre_hash_object_refcount_get(actual_hnode);
-
-        spin_unlock(&bucket->lhb_lock);
-
+  
+        __lustre_hash_key_validate(lh, key, hnode);
+  
+        read_lock(&lh->lh_rwlock);
+        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        lhb = &lh->lh_buckets[i];
+        LASSERT(i < lh->lh_cur_size);
+        LASSERT(hlist_unhashed(hnode));
+
+        write_lock(&lhb->lhb_rwlock);
+        existing_hnode = __lustre_hash_bucket_lookup(lh, lhb, key);
+        if (existing_hnode)
+                obj = lh_get(lh, existing_hnode);
+        else
+                obj = __lustre_hash_bucket_add(lh, lhb, hnode);
+        write_unlock(&lhb->lhb_rwlock);
+
+        size = lustre_hash_rehash_size(lh);
+        read_unlock(&lh->lh_rwlock);
+        if (size)
+                lustre_hash_rehash(lh, size);
+  
          RETURN(obj);
  }
  EXPORT_SYMBOL(lustre_hash_findadd_unique);
-
-/*
- * this version of additem, it allow multi same @key <key, value> in hashtables. 
- * in this additem version, we don't need to check if exist same @key in hash 
- * tables, we only add it to related hashbucket.
- * example: maybe same nid will be related to multi difference export
+  
+/**
+ * Delete item @hnode from the lustre hash @lh using @key.  The @key
+ * is required to ensure the correct hash bucket is locked since there
+ * is no direct linkage from the item to the bucket.  The object
+ * removed from the hash will be returned and obs->lh_put is called
+ * on the removed object.
   */
-int lustre_hash_additem(struct lustre_class_hash_body *hash_body, void *key, 
-                         struct hlist_node *actual_hnode)
+void *
+lustre_hash_del(lustre_hash_t *lh, void *key, struct hlist_node *hnode)
  {
-        int hashent;
-        struct lustre_hash_bucket *bucket = NULL;
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
+        lustre_hash_bucket_t *lhb;
+        int                   size;
+        unsigned              i;
+        void                 *obj;
          ENTRY;
-
-        LASSERT(hlist_unhashed(actual_hnode));
-
-        hashent = hop->lustre_hashfn(hash_body, key);
-
-        /* get the hashbucket and lock it */
-        bucket = &hash_body->lchb_hash_tables[hashent];
-        spin_lock(&bucket->lhb_lock);
-
-        hlist_add_head(actual_hnode, &(bucket->lhb_head));
-
-#ifdef LUSTRE_HASH_DEBUG
-        /* hash distribute debug */
-        hash_body->lchb_hash_tables[hashent].lhb_item_count++; 
-        CDEBUG(D_INFO, "hashname[%s] bucket[%d] has [%d] hashitem\n", 
-                        hash_body->hashname, hashent, 
-                        hash_body->lchb_hash_tables[hashent].lhb_item_count);
-#endif  
-        hop->lustre_hash_object_refcount_get(actual_hnode); 
-
-        spin_unlock(&bucket->lhb_lock);
-
-        RETURN(0);
+  
+        __lustre_hash_key_validate(lh, key, hnode);
+  
+        read_lock(&lh->lh_rwlock);
+        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        lhb = &lh->lh_buckets[i];
+        LASSERT(i < lh->lh_cur_size);
+        LASSERT(!hlist_unhashed(hnode));
+
+        write_lock(&lhb->lhb_rwlock);
+        obj = __lustre_hash_bucket_del(lh, lhb, hnode);
+        write_unlock(&lhb->lhb_rwlock);
+
+        size = lustre_hash_rehash_size(lh);
+        read_unlock(&lh->lh_rwlock);
+        if (size)
+                lustre_hash_rehash(lh, size);
+  
+        RETURN(obj);
  }
-EXPORT_SYMBOL(lustre_hash_additem);
-
-
-/*
- * this version of delitem will delete a hashitem with given @key, 
- * we need to search the <@key, @value> in hashbucket with @key, 
- * if match, the hashitem will be delete. 
- * we have a no-search version of delitem, it will directly delete a hashitem, 
- * doesn't need to search it in hashtables, so it is a O(1) delete.
+EXPORT_SYMBOL(lustre_hash_del);
+  
+/**
+ * Delete item given @key in lustre hash @lh.  The first @key found in
+ * the hash will be removed, if the key exists multiple times in the hash
+ * @lh this function must be called once per key.  The removed object
+ * will be returned and ops->lh_put is called on the removed object.
   */
-int lustre_hash_delitem_by_key(struct lustre_class_hash_body *hash_body, 
-                               void *key)
+void *
+lustre_hash_del_key(lustre_hash_t *lh, void *key)
  {
-        int hashent ;
-        struct hlist_node * hash_item;
-        struct lustre_hash_bucket *bucket = NULL;
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
-        int retval = 0;
+        struct hlist_node    *hnode;
+        lustre_hash_bucket_t *lhb;
+        int                   size;
+        unsigned              i;
+        void                 *obj = NULL;
          ENTRY;
-
-        hashent = hop->lustre_hashfn(hash_body, key);
-
-        /* first, lock the hashbucket */
-        bucket = &hash_body->lchb_hash_tables[hashent];
-        spin_lock(&bucket->lhb_lock);
-
-        /* get the hash_item from hash_bucket */
-        hash_item = lustre_hash_getitem_in_bucket_nolock(hash_body, hashent, 
-                                                         key);
-
-        if (hash_item == NULL) {
-                spin_unlock(&bucket->lhb_lock);
-                RETURN(-ENOENT);
-        }
-
-        /* call delitem_nolock() to delete the hash_item */
-        retval = lustre_hash_delitem_nolock(hash_body, hashent, hash_item);
-
-        spin_unlock(&bucket->lhb_lock);
-
-        RETURN(retval);
+  
+        read_lock(&lh->lh_rwlock);
+        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        lhb = &lh->lh_buckets[i];
+        LASSERT(i < lh->lh_cur_size);
+
+        write_lock(&lhb->lhb_rwlock);
+        hnode = __lustre_hash_bucket_lookup(lh, lhb, key);
+        if (hnode)
+                obj = __lustre_hash_bucket_del(lh, lhb, hnode);
+
+        write_unlock(&lhb->lhb_rwlock);
+
+        size = lustre_hash_rehash_size(lh);
+        read_unlock(&lh->lh_rwlock);
+        if (size)
+                lustre_hash_rehash(lh, size);
+  
+        RETURN(obj);
  }
-EXPORT_SYMBOL(lustre_hash_delitem_by_key);
-
-/*
- * the O(1) version of delete hash item, 
- * it will directly delete the hashitem with given @hash_item,
- * the parameter @key used to get the relation hash bucket and lock it.
+EXPORT_SYMBOL(lustre_hash_del_key);
+  
+/**
+ * Lookup an item using @key in the lustre hash @lh and return it.
+ * If the @key is found in the hash lh->lh_get() is called and the
+ * matching objects is returned.  It is the callers responsibility
+ * to call the counterpart ops->lh_put using the lh_put() macro
+ * when when finished with the object.  If the @key was not found
+ * in the hash @lh NULL is returned.
   */
-int lustre_hash_delitem(struct lustre_class_hash_body *hash_body, 
-                        void *key, struct hlist_node * hash_item)
-{  
-        int hashent = 0;
-        int retval = 0;
-        struct lustre_hash_bucket *bucket = NULL;
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
+void *
+lustre_hash_lookup(lustre_hash_t *lh, void *key)
+{
+        struct hlist_node    *hnode;
+        lustre_hash_bucket_t *lhb;
+        unsigned              i;
+        void                 *obj = NULL;
          ENTRY;
-
-        hashent = hop->lustre_hashfn(hash_body, key);
-
-        bucket = &hash_body->lchb_hash_tables[hashent];
-        spin_lock(&bucket->lhb_lock);
-
-        /* call delitem_nolock() to delete the hash_item */
-        retval = lustre_hash_delitem_nolock(hash_body, hashent, hash_item);
-
-        spin_unlock(&bucket->lhb_lock);
-
-        RETURN(retval);
+  
+        read_lock(&lh->lh_rwlock);
+        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        lhb = &lh->lh_buckets[i];
+        LASSERT(i < lh->lh_cur_size);
+
+        read_lock(&lhb->lhb_rwlock);
+        hnode = __lustre_hash_bucket_lookup(lh, lhb, key);
+        if (hnode)
+                obj = lh_get(lh, hnode);
+  
+        read_unlock(&lhb->lhb_rwlock);
+        read_unlock(&lh->lh_rwlock);
+  
+        RETURN(obj);
  }
-EXPORT_SYMBOL(lustre_hash_delitem);
-
-void lustre_hash_bucket_iterate(struct lustre_class_hash_body *hash_body,
-                                void *key, hash_item_iterate_cb func, void *data)
+EXPORT_SYMBOL(lustre_hash_lookup);
+  
+/**
+ * For each item in the lustre hash @lh call the passed callback @func
+ * and pass to it as an argument each hash item and the private @data.
+ * Before each callback ops->lh_get will be called, and after each
+ * callback ops->lh_put will be called.  Finally, during the callback
+ * the bucket lock is held so the callback must never sleep.
+ */
+void
+lustre_hash_for_each(lustre_hash_t *lh, lh_for_each_cb func, void *data)
  {
-        int hashent, find = 0;
-        struct lustre_hash_bucket *bucket = NULL;
-        struct hlist_node *hash_item_node = NULL;
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
-        struct obd_export *tmp = NULL;
-
+        struct hlist_node    *hnode;
+        lustre_hash_bucket_t *lhb;
+        void                 *obj;
+        int                   i;
          ENTRY;
-
-        hashent = hop->lustre_hashfn(hash_body, key);
-        bucket = &hash_body->lchb_hash_tables[hashent];
-
-        spin_lock(&bucket->lhb_lock);
-        hlist_for_each(hash_item_node, &(bucket->lhb_head)) {
-                find = hop->lustre_hash_key_compare(key, hash_item_node);
-                if (find) {
-                        tmp = hop->lustre_hash_object_refcount_get(hash_item_node);
-                        func(tmp, data);
-                        hop->lustre_hash_object_refcount_put(hash_item_node);
+  
+        read_lock(&lh->lh_rwlock);
+        lh_for_each_bucket(lh, lhb, i) {
+                read_lock(&lhb->lhb_rwlock);
+                hlist_for_each(hnode, &(lhb->lhb_head)) {
+                        __lustre_hash_bucket_validate(lh, lhb, hnode);
+                        obj = lh_get(lh, hnode);
+                        func(obj, data);
+                        (void)lh_put(lh, hnode);
                  }
+                read_unlock(&lhb->lhb_rwlock);
          }
-        spin_unlock(&bucket->lhb_lock);
-}
-EXPORT_SYMBOL(lustre_hash_bucket_iterate);
+        read_unlock(&lh->lh_rwlock);
  
-void lustre_hash_iterate_all(struct lustre_class_hash_body *hash_body,
-                            hash_item_iterate_cb func, void *data)
+        EXIT;
+}
+EXPORT_SYMBOL(lustre_hash_for_each);
+  
+/**
+ * For each item in the lustre hash @lh call the passed callback @func
+ * and pass to it as an argument each hash item and the private @data.
+ * Before each callback ops->lh_get will be called, and after each
+ * callback ops->lh_put will be called.  During the callback the
+ * bucket lock will not be held will allows for the current item
+ * to be removed from the hash during the callback.  However, care
+ * should be taken to prevent other callers from operating on the
+ * hash concurrently or list corruption may occur.
+ */
+void
+lustre_hash_for_each_safe(lustre_hash_t *lh, lh_for_each_cb func, void *data)
  {
-        int i;
-        struct lustre_hash_operations *hop = hash_body->lchb_hash_operations;
+        struct hlist_node    *hnode;
+        struct hlist_node    *pos;
+        lustre_hash_bucket_t *lhb;
+        void                 *obj;
+        int                   i;
          ENTRY;
-
-        for( i = 0; i < hash_body->lchb_hash_max_size; i++ ) {
-                struct lustre_hash_bucket * bucket;
-                struct hlist_node * actual_hnode, *pos;
-                void *obj;
-
-                bucket = &hash_body->lchb_hash_tables[i];
-#ifdef LUSTRE_HASH_DEBUG
-                CDEBUG(D_INFO, "idx %d - bucket %p\n", i, bucket);
-#endif
-                spin_lock(&bucket->lhb_lock); /* lock the bucket */
-                hlist_for_each_safe(actual_hnode, pos, &(bucket->lhb_head)) {
-                        obj = hop->lustre_hash_object_refcount_get(actual_hnode);
+  
+        read_lock(&lh->lh_rwlock);
+        lh_for_each_bucket(lh, lhb, i) {
+                read_lock(&lhb->lhb_rwlock);
+                hlist_for_each_safe(hnode, pos, &(lhb->lhb_head)) {
+                        __lustre_hash_bucket_validate(lh, lhb, hnode);
+                        obj = lh_get(lh, hnode);
+                        read_unlock(&lhb->lhb_rwlock);
                          func(obj, data);
-                        hop->lustre_hash_object_refcount_put(actual_hnode);
+                        read_lock(&lhb->lhb_rwlock);
+                        (void)lh_put(lh, hnode);
                  }
-                spin_unlock(&bucket->lhb_lock);
+                read_unlock(&lhb->lhb_rwlock);
          }
+        read_unlock(&lh->lh_rwlock);
          EXIT;
  }
-EXPORT_SYMBOL(lustre_hash_iterate_all);
-
-
-void * lustre_hash_get_object_by_key(struct lustre_class_hash_body *hash_body,
-                                     void *key)
+EXPORT_SYMBOL(lustre_hash_for_each_safe);
+  
+/**
+ * For each hash bucket in the lustre hash @lh call the passed callback
+ * @func until all the hash buckets are empty.  The passed callback @func
+ * or the previously registered callback lh->lh_put must remove the item
+ * from the hash.  You may either use the lustre_hash_del() or hlist_del()
+ * functions.  No rwlocks will be held during the callback @func it is
+ * safe to sleep if needed.  This function will not terminate until the
+ * hash is empty.  Note it is still possible to concurrently add new
+ * items in to the hash.  It is the callers responsibility to ensure
+ * the required locking is in place to prevent concurrent insertions.
+ */
+void
+lustre_hash_for_each_empty(lustre_hash_t *lh, lh_for_each_cb func, void *data)
  {
-        int hashent ;
-        struct hlist_node * hash_item_hnode = NULL;
-        void * obj_value = NULL;
-        struct lustre_hash_bucket *bucket = NULL;
-        struct lustre_hash_operations * hop = hash_body->lchb_hash_operations;
+        struct hlist_node    *hnode;
+        lustre_hash_bucket_t *lhb;
+        void                 *obj;
+        int                   i;
          ENTRY;
-
-        /* get the hash value from the given item */
-        hashent = hop->lustre_hashfn(hash_body, key);
-
-        bucket = &hash_body->lchb_hash_tables[hashent];
-        spin_lock(&bucket->lhb_lock); /* lock the bucket */
-
-        hash_item_hnode = lustre_hash_getitem_in_bucket_nolock(hash_body, 
-                                                               hashent, key);
-
-        if (hash_item_hnode == NULL) {
-                spin_unlock(&bucket->lhb_lock); /* lock the bucket */
-                RETURN(NULL);
+  
+restart:
+        read_lock(&lh->lh_rwlock);
+        lh_for_each_bucket(lh, lhb, i) {
+                write_lock(&lhb->lhb_rwlock);
+                while (!hlist_empty(&lhb->lhb_head)) {
+                        hnode =  lhb->lhb_head.first;
+                        __lustre_hash_bucket_validate(lh, lhb, hnode);
+                        obj = lh_get(lh, hnode);
+                        write_unlock(&lhb->lhb_rwlock);
+                        read_unlock(&lh->lh_rwlock);
+                        func(obj, data);
+                        (void)lh_put(lh, hnode);
+                        goto restart;
+                }
+                write_unlock(&lhb->lhb_rwlock);
          }
-
-        obj_value = hop->lustre_hash_object_refcount_get(hash_item_hnode);
-        spin_unlock(&bucket->lhb_lock); /* lock the bucket */
-
-        RETURN(obj_value);
-}
-EXPORT_SYMBOL(lustre_hash_get_object_by_key);
-
-/* string hashing using djb2 hash algorithm */
-__u32 djb2_hashfn(struct lustre_class_hash_body *hash_body,  void* key,
-                  size_t size)
-{
-        __u32 hash = 5381;
-        int i;
-        char *ptr = key;
-
-        LASSERT(key != NULL);
-
-        for( i = 0; i < size; i++ )
-                hash = hash * 33 + ptr[i];
-
-        hash &= (hash_body->lchb_hash_max_size - 1);
-
-        RETURN(hash);
-}
-
-/*
- * define (uuid <-> export) hash operations and function define
- */
-
-/* define the uuid hash operations */
-struct lustre_hash_operations uuid_hash_operations = {
-        .lustre_hashfn = uuid_hashfn,
-        .lustre_hash_key_compare = uuid_hash_key_compare,
-        .lustre_hash_object_refcount_get = uuid_export_refcount_get,
-        .lustre_hash_object_refcount_put = uuid_export_refcount_put,
-};
-
-__u32 uuid_hashfn(struct lustre_class_hash_body *hash_body,  void * key)
-{
-        struct obd_uuid * uuid_key = key;
-
-        return djb2_hashfn(hash_body, uuid_key->uuid, sizeof(uuid_key->uuid));
-}
-
-/* Note, it is impossible to find an export that is in failed state with
- * this function */
-int uuid_hash_key_compare(void *key, struct hlist_node *compared_hnode)
-{
-        struct obd_export *export = NULL;
-        struct obd_uuid *uuid_key = NULL, *compared_uuid = NULL;
-
-        LASSERT( key != NULL);
-
-        uuid_key = (struct obd_uuid*)key;
-
-        export = hlist_entry(compared_hnode, struct obd_export, exp_uuid_hash);
-
-        compared_uuid = &export->exp_client_uuid;
-
-        RETURN(obd_uuid_equals(uuid_key, compared_uuid) &&
-               !export->exp_failed);
-}
-
-void * uuid_export_refcount_get(struct hlist_node * actual_hnode)
-{
-        struct obd_export *export = NULL;
-
-        LASSERT(actual_hnode != NULL);
-
-        export = hlist_entry(actual_hnode, struct obd_export, exp_uuid_hash);
-
-        LASSERT(export != NULL);
-
-        class_export_get(export);
-
-        RETURN(export);
+        read_unlock(&lh->lh_rwlock);
+        EXIT;
  }
-
-void uuid_export_refcount_put(struct hlist_node * actual_hnode)
+EXPORT_SYMBOL(lustre_hash_for_each_empty);
+  
+  /*
+ * For each item in the lustre hash @lh which matches the @key call
+ * the passed callback @func and pass to it as an argument each hash
+ * item and the private @data.  Before each callback ops->lh_get will
+ * be called, and after each callback ops->lh_put will be called.
+ * Finally, during the callback the bucket lock is held so the
+ * callback must never sleep.
+   */
+void
+lustre_hash_for_each_key(lustre_hash_t *lh, void *key,
+                         lh_for_each_cb func, void *data)
  {
-        struct obd_export *export = NULL;
-
-        LASSERT(actual_hnode != NULL);
-
-        export = hlist_entry(actual_hnode, struct obd_export, exp_uuid_hash);
-
-        LASSERT(export != NULL);
-
-        class_export_put(export);
+        struct hlist_node    *hnode;
+        lustre_hash_bucket_t *lhb;
+        unsigned              i;
+        ENTRY;
+  
+        read_lock(&lh->lh_rwlock);
+        i = lh_hash(lh, key, lh->lh_cur_size - 1);
+        lhb = &lh->lh_buckets[i];
+        LASSERT(i < lh->lh_cur_size);
+  
+        read_lock(&lhb->lhb_rwlock);
+        hlist_for_each(hnode, &(lhb->lhb_head)) {
+                __lustre_hash_bucket_validate(lh, lhb, hnode);
+  
+                if (!lh_compare(lh, key, hnode))
+                        continue;
+  
+                func(lh_get(lh, hnode), data);
+                (void)lh_put(lh, hnode);
+        }
+  
+        read_unlock(&lhb->lhb_rwlock);
+        read_unlock(&lh->lh_rwlock);
+  
+        EXIT;
  }
-
-/*
- * define (nid <-> export) hash operations and function define
+EXPORT_SYMBOL(lustre_hash_for_each_key);
+  
+/**
+ * Rehash the lustre hash @lh to the given @size.  This can be used
+ * to grow the hash size when excessive chaining is detected, or to
+ * shrink the hash when it is larger than needed.  When the LH_REHASH
+ * flag is set in @lh the lustre hash may be dynamically rehashed
+ * during addition or removal if the hash's theta value exceeds
+ * either the lh->lh_min_theta or lh->max_theta values.  By default
+ * these values are tuned to keep the chained hash depth small, and
+ * this approach assumes a reasonably uniform hashing function.  The
+ * theta thresholds for @lh are tunable via lustre_hash_set_theta().
   */
-
-/* define the nid hash operations */
-struct lustre_hash_operations nid_hash_operations = {
-        .lustre_hashfn = nid_hashfn,
-        .lustre_hash_key_compare = nid_hash_key_compare,
-        .lustre_hash_object_refcount_get = nid_export_refcount_get,
-        .lustre_hash_object_refcount_put = nid_export_refcount_put,
-};
-
-__u32 nid_hashfn(struct lustre_class_hash_body *hash_body,  void * key)
-{
-        return djb2_hashfn(hash_body, key, sizeof(lnet_nid_t));
-}
-
-/* Note, it is impossible to find an export that is in failed state with
- * this function */
-int nid_hash_key_compare(void *key, struct hlist_node *compared_hnode)
-{
-        struct obd_export *export = NULL;
-        lnet_nid_t *nid_key = NULL;
-
-        LASSERT( key != NULL);
-
-        nid_key = (lnet_nid_t*)key;
-
-        export = hlist_entry(compared_hnode, struct obd_export, exp_nid_hash);
-
-        return (export->exp_connection->c_peer.nid == *nid_key &&
-                !export->exp_failed);
-}
-
-void *nid_export_refcount_get(struct hlist_node *actual_hnode)
-{
-        struct obd_export *export = NULL;
-
-        LASSERT(actual_hnode != NULL);
-
-        export = hlist_entry(actual_hnode, struct obd_export, exp_nid_hash);
-
-        LASSERT(export != NULL);
-
-        class_export_get(export);
-
-        RETURN(export);
-}
-
-void nid_export_refcount_put(struct hlist_node *actual_hnode)
+int
+lustre_hash_rehash(lustre_hash_t *lh, int size)
  {
-        struct obd_export *export = NULL;
-
-        LASSERT(actual_hnode != NULL);
-
-        export = hlist_entry(actual_hnode, struct obd_export, exp_nid_hash);
-
-        LASSERT(export != NULL);
-
-        class_export_put(export);
+        struct hlist_node     *hnode;
+        struct hlist_node     *pos;
+        lustre_hash_bucket_t  *lh_buckets;
+        lustre_hash_bucket_t  *rehash_buckets;
+        lustre_hash_bucket_t  *lh_lhb;
+        lustre_hash_bucket_t  *rehash_lhb;
+        int                    i;
+        int                    lh_size;
+        int                    theta;
+        void                  *key;
+        ENTRY;
+  
+        LASSERT(size > 0);
+  
+        OBD_VMALLOC(rehash_buckets, sizeof(*rehash_buckets) * size);
+        if (!rehash_buckets)
+                RETURN(-ENOMEM);
+  
+        for (i = 0; i < size; i++) {
+                INIT_HLIST_HEAD(&rehash_buckets[i].lhb_head);
+                rwlock_init(&rehash_buckets[i].lhb_rwlock);
+                atomic_set(&rehash_buckets[i].lhb_count, 0);
+        }
+  
+        write_lock(&lh->lh_rwlock);
+
+        /* 
+         * Early return for multiple concurrent racing callers,
+         * ensure we only trigger the rehash if it is still needed. 
+         */
+        theta = __lustre_hash_theta(lh);
+        if ((theta >= lh->lh_min_theta) && (theta <= lh->lh_max_theta)) {
+                OBD_VFREE(rehash_buckets, sizeof(*rehash_buckets) * size);
+                write_unlock(&lh->lh_rwlock);
+                RETURN(-EALREADY);
+        }
+  
+        lh_size = lh->lh_cur_size;
+        lh_buckets = lh->lh_buckets;
+  
+        lh->lh_cur_size = size;
+        lh->lh_buckets = rehash_buckets;
+        atomic_inc(&lh->lh_rehash_count);
+
+        for (i = 0; i < lh_size; i++) {
+                lh_lhb = &lh_buckets[i];
+
+                write_lock(&lh_lhb->lhb_rwlock);
+                hlist_for_each_safe(hnode, pos, &(lh_lhb->lhb_head)) {
+                        key = lh_key(lh, hnode);
+                        LASSERT(key);
+
+                        /* 
+                         * Validate hnode is in the correct bucket.
+                         */
+                        if (unlikely(lh->lh_flags & LH_DEBUG))
+                                LASSERT(lh_hash(lh, key, lh_size - 1) == i);
+
+                        /* 
+                         * Delete from old hash bucket.
+                         */
+                        hlist_del(hnode);
+                        LASSERT(atomic_read(&lh_lhb->lhb_count) > 0);
+                        atomic_dec(&lh_lhb->lhb_count);
+
+                        /* 
+                         * Add to rehash bucket, ops->lh_key must be defined. 
+                         */
+                        rehash_lhb = &rehash_buckets[lh_hash(lh, key, size-1)];
+                        hlist_add_head(hnode, &(rehash_lhb->lhb_head));
+                        atomic_inc(&rehash_lhb->lhb_count);
+                }
+  
+                LASSERT(hlist_empty(&(lh_lhb->lhb_head)));
+                LASSERT(atomic_read(&lh_lhb->lhb_count) == 0);
+                write_unlock(&lh_lhb->lhb_rwlock);
+        }
+  
+        OBD_VFREE(lh_buckets, sizeof(*lh_buckets) * lh_size);
+        write_unlock(&lh->lh_rwlock);
+  
+        RETURN(0);
  }
-
-/*
- * define (net_peer <-> connection) hash operations and function define
+EXPORT_SYMBOL(lustre_hash_rehash);
+  
+/**
+ * Rehash the object referenced by @hnode in the lustre hash @lh.  The
+ * @old_key must be provided to locate the objects previous location
+ * in the hash, and the @new_key will be used to reinsert the object.
+ * Use this function instead of a lustre_hash_add() + lustre_hash_del()
+ * combo when it is critical that there is no window in time where the
+ * object is missing from the hash.  When an object is being rehashed
+ * the registered lh_get() and lh_put() functions will not be called.
   */
-
-/* define the conn hash operations */
-struct lustre_hash_operations conn_hash_operations = {
-        .lustre_hashfn = conn_hashfn,
-        .lustre_hash_key_compare = conn_hash_key_compare,
-        .lustre_hash_object_refcount_get = conn_refcount_get,
-        .lustre_hash_object_refcount_put = conn_refcount_put,
-};
-EXPORT_SYMBOL(conn_hash_operations);
-
-__u32 conn_hashfn(struct lustre_class_hash_body *hash_body,  void * key)
+void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, void *new_key,
+                            struct hlist_node *hnode)
  {
-        return djb2_hashfn(hash_body, key, sizeof(lnet_process_id_t));
-}
-
-int conn_hash_key_compare(void *key, struct hlist_node *compared_hnode)
-{
-        struct ptlrpc_connection *c = NULL;
-        lnet_process_id_t *conn_key = NULL;
-
-        LASSERT( key != NULL);
-
-        conn_key = (lnet_process_id_t*)key;
-
-        c = hlist_entry(compared_hnode, struct ptlrpc_connection, c_hash);
-
-        return (conn_key->nid == c->c_peer.nid &&
-                conn_key->pid == c->c_peer.pid);
-}
-
-void *conn_refcount_get(struct hlist_node *actual_hnode)
-{
-        struct ptlrpc_connection *c = NULL;
-
-        LASSERT(actual_hnode != NULL);
-
-        c = hlist_entry(actual_hnode, struct ptlrpc_connection, c_hash);
-
-        LASSERT(c != NULL);
-
-        atomic_inc(&c->c_refcount);
-
-        RETURN(c);
-}
-
-void conn_refcount_put(struct hlist_node *actual_hnode)
-{
-        struct ptlrpc_connection *c = NULL;
-
-        LASSERT(actual_hnode != NULL);
-
-        c = hlist_entry(actual_hnode, struct ptlrpc_connection, c_hash);
-
-        LASSERT(c != NULL);
-
-        atomic_dec(&c->c_refcount);
-}
-
-/*******************************************************************************/
-/* ( nid<>nidstats ) hash operations define */
-
-struct lustre_hash_operations nid_stat_hash_operations = {
-        .lustre_hashfn = nid_hashfn,
-        .lustre_hash_key_compare = nidstats_hash_key_compare,
-        .lustre_hash_object_refcount_get = nidstats_refcount_get,
-        .lustre_hash_object_refcount_put = nidstats_refcount_put,
-};
-EXPORT_SYMBOL(nid_stat_hash_operations);
-
-int nidstats_hash_key_compare(void *key, struct hlist_node * compared_hnode)
-{
-        struct nid_stat *data;
-        lnet_nid_t *nid_key;
-
-        LASSERT( key != NULL);
-
-        nid_key = (lnet_nid_t*)key;
-        data = hlist_entry(compared_hnode, struct nid_stat, nid_hash);
-
-        return (data->nid == *nid_key);
+        lustre_hash_bucket_t  *old_lhb;
+        lustre_hash_bucket_t  *new_lhb;
+        unsigned               i;
+        int                    j;
+        ENTRY;
+  
+        __lustre_hash_key_validate(lh, new_key, hnode);
+        LASSERT(!hlist_unhashed(hnode));
+  
+        read_lock(&lh->lh_rwlock);
+  
+        i = lh_hash(lh, old_key, lh->lh_cur_size - 1);
+        old_lhb = &lh->lh_buckets[i];
+        LASSERT(i < lh->lh_cur_size);
+
+        j = lh_hash(lh, new_key, lh->lh_cur_size - 1);
+        new_lhb = &lh->lh_buckets[j];
+        LASSERT(j < lh->lh_cur_size);
+
+        write_lock(&old_lhb->lhb_rwlock);
+        write_lock(&new_lhb->lhb_rwlock);
+
+        /* 
+         * Migrate item between hash buckets without calling
+         * the lh_get() and lh_put() callback functions. 
+         */
+        hlist_del(hnode);
+        LASSERT(atomic_read(&old_lhb->lhb_count) > 0);
+        atomic_dec(&old_lhb->lhb_count);
+        hlist_add_head(hnode, &(new_lhb->lhb_head));
+        atomic_inc(&new_lhb->lhb_count);
+
+        write_unlock(&new_lhb->lhb_rwlock);
+        write_unlock(&old_lhb->lhb_rwlock);
+        read_unlock(&lh->lh_rwlock);
+  
+        EXIT;
  }
-
-void* nidstats_refcount_get(struct hlist_node * actual_hnode)
+EXPORT_SYMBOL(lustre_hash_rehash_key);
+  
+int lustre_hash_debug_header(char *str, int size)
  {
-        struct nid_stat *data;
-
-        data = hlist_entry(actual_hnode, struct nid_stat, nid_hash);
-        data->nid_exp_ref_count++;
-
-        RETURN(data);
+        return snprintf(str, size,
+                 "%-36s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n",
+                 "name", "cur", "min", "max", "theta", "t-min", "t-max",
+                 "flags", "rehash", "count", " distribution");
  }
+EXPORT_SYMBOL(lustre_hash_debug_header);
  
-void nidstats_refcount_put(struct hlist_node * actual_hnode)
+int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size)
  {
-        struct nid_stat *data;
-
-        data = hlist_entry(actual_hnode, struct nid_stat, nid_hash);
-        data->nid_exp_ref_count--;
-        EXIT;
+        lustre_hash_bucket_t  *lhb;
+        int                    theta;
+        int                    i;
+        int                    c = 0;
+        int                    dist[8] = { 0, };
+
+        if (str == NULL || size == 0)
+                return 0;
+
+        read_lock(&lh->lh_rwlock);
+        theta = __lustre_hash_theta(lh);
+
+        c += snprintf(str + c, size - c, "%-36s ",lh->lh_name);
+        c += snprintf(str + c, size - c, "%5d ",  lh->lh_cur_size);
+        c += snprintf(str + c, size - c, "%5d ",  lh->lh_min_size);
+        c += snprintf(str + c, size - c, "%5d ",  lh->lh_max_size);
+        c += snprintf(str + c, size - c, "%d.%03d ",
+                      theta / 1000, theta % 1000);
+        c += snprintf(str + c, size - c, "%d.%03d ",
+                      lh->lh_min_theta / 1000, lh->lh_min_theta % 1000);
+        c += snprintf(str + c, size - c, "%d.%03d ",
+                      lh->lh_max_theta / 1000, lh->lh_max_theta % 1000);
+        c += snprintf(str + c, size - c, " 0x%02x ", lh->lh_flags);
+        c += snprintf(str + c, size - c, "%6d ",
+                      atomic_read(&lh->lh_rehash_count));
+        c += snprintf(str + c, size - c, "%5d ",
+                      atomic_read(&lh->lh_count));
+
+        /* 
+         * The distribution is a summary of the chained hash depth in
+         * each of the lustre hash buckets.  Each buckets lhb_count is
+         * divided by the hash theta value and used to generate a
+         * histogram of the hash distribution.  A uniform hash will
+         * result in all hash buckets being close to the average thus
+         * only the first few entries in the histogram will be non-zero.
+         * If you hash function results in a non-uniform hash the will
+         * be observable by outlier bucks in the distribution histogram.
+         *
+         * Uniform hash distribution:      128/128/0/0/0/0/0/0
+         * Non-Uniform hash distribution:  128/125/0/0/0/0/2/1
+         */
+        lh_for_each_bucket(lh, lhb, i)
+                dist[MIN(fls(atomic_read(&lhb->lhb_count)/MAX(theta,1)),7)]++;
+
+        for (i = 0; i < 8; i++)
+                c += snprintf(str + c, size - c, "%d%c",  dist[i],
+                              (i == 7) ? '\n' : '/');
+  
+        read_unlock(&lh->lh_rwlock);
+  
+        return c;
  }
-
-/*******************************************************************************/
+EXPORT_SYMBOL(lustre_hash_debug_str);
diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c

index 4ea1aeb..3eb7073 100644 (file)
--- a/lustre/obdclass/genops.c
+++ b/lustre/obdclass/genops.c
@@ -758,16 +758,16 @@ struct obd_export *class_new_export(struct obd_device *obd,
  
          spin_lock(&obd->obd_dev_lock);
          if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) {
-               rc = lustre_hash_additem_unique(obd->obd_uuid_hash_body, cluuid,
-                                               &export->exp_uuid_hash);
-               if (rc != 0) {
-                       CWARN("%s: denying duplicate export for %s\n",
-                             obd->obd_name, cluuid->uuid);
-                       spin_unlock(&obd->obd_dev_lock);
-                       class_handle_unhash(&export->exp_handle);
-                       OBD_FREE_PTR(export);
-                       return ERR_PTR(-EALREADY);
-               }
+                rc = lustre_hash_add_unique(obd->obd_uuid_hash, cluuid,
+                                            &export->exp_uuid_hash);
+                if (rc != 0) {
+                        LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n",
+                                      obd->obd_name, cluuid->uuid, rc);
+                        spin_unlock(&obd->obd_dev_lock);
+                        class_handle_unhash(&export->exp_handle);
+                        OBD_FREE_PTR(export);
+                        return ERR_PTR(-EALREADY);
+                }
          }
  
          LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */
@@ -788,10 +788,11 @@ void class_unlink_export(struct obd_export *exp)
  
          spin_lock(&exp->exp_obd->obd_dev_lock);
          /* delete an uuid-export hashitem from hashtables */
-        if (!hlist_unhashed(&exp->exp_uuid_hash)) {
-                lustre_hash_delitem(exp->exp_obd->obd_uuid_hash_body,
-                                    &exp->exp_client_uuid, &exp->exp_uuid_hash);
-        }
+        if (!hlist_unhashed(&exp->exp_uuid_hash))
+                lustre_hash_del(exp->exp_obd->obd_uuid_hash,
+                                &exp->exp_client_uuid,
+                                &exp->exp_uuid_hash);
+
          list_del_init(&exp->exp_obd_chain);
          list_del_init(&exp->exp_obd_chain_timed);
          exp->exp_obd->obd_num_exports--;
@@ -1010,10 +1011,11 @@ int class_disconnect(struct obd_export *export)
          already_disconnected = export->exp_disconnected;
          export->exp_disconnected = 1;
  
-        if (!hlist_unhashed(&export->exp_nid_hash)) {
-                lustre_hash_delitem(export->exp_obd->obd_nid_hash_body,
-                                    &export->exp_connection->c_peer.nid, &export->exp_nid_hash);
-        }
+        if (!hlist_unhashed(&export->exp_nid_hash))
+                lustre_hash_del(export->exp_obd->obd_nid_hash,
+                                &export->exp_connection->c_peer.nid,
+                                &export->exp_nid_hash);
+
          spin_unlock(&export->exp_lock);
  
          /* class_cleanup(), abort_recovery(), and class_fail_export()
@@ -1341,8 +1343,7 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
          lnet_nid_t nid_key = libcfs_str2nid((char *)nid);
  
          do {
-                doomed_exp = lustre_hash_get_object_by_key(obd->obd_nid_hash_body,
-                                                           &nid_key);
+                doomed_exp = lustre_hash_lookup(obd->obd_nid_hash, &nid_key);
                  if (doomed_exp == NULL)
                          break;
  
@@ -1370,17 +1371,16 @@ EXPORT_SYMBOL(obd_export_evict_by_nid);
  int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
  {
          struct obd_export *doomed_exp = NULL;
-        struct obd_uuid doomed;
+        struct obd_uuid doomed_uuid;
          int exports_evicted = 0;
  
-        obd_str2uuid(&doomed, uuid);
-        if (obd_uuid_equals(&doomed, &obd->obd_uuid)) {
+        obd_str2uuid(&doomed_uuid, uuid);
+        if (obd_uuid_equals(&doomed_uuid, &obd->obd_uuid)) {
                  CERROR("%s: can't evict myself\n", obd->obd_name);
                  return exports_evicted;
          }
  
-        doomed_exp = lustre_hash_get_object_by_key(obd->obd_uuid_hash_body,
-                                                   &doomed);
+        doomed_exp = lustre_hash_lookup(obd->obd_uuid_hash, &doomed_uuid);
  
          if (doomed_exp == NULL) {
                  CERROR("%s: can't disconnect %s: no exports found\n",
diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c

index 44c0dee..7841007 100644 (file)
--- a/lustre/obdclass/lprocfs_status.c
+++ b/lustre/obdclass/lprocfs_status.c
@@ -1364,9 +1364,8 @@ int lprocfs_exp_rd_uuid(char *page, char **start, off_t off, int count,
          cb_data.count = count;
          cb_data.eof = eof;
          cb_data.len = &len;
-        lustre_hash_bucket_iterate(obd->obd_nid_hash_body,
-                                   &stats->nid, lprocfs_exp_print_uuid,
-                                   &cb_data);
+        lustre_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
+                                 lprocfs_exp_print_uuid, &cb_data);
          return (*cb_data.len);
  }
  
@@ -1417,8 +1416,8 @@ int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer,
          struct nid_stat *client_stat;
          CFS_LIST_HEAD(free_list);
  
-        lustre_hash_iterate_all(obd->obd_nid_stats_hash_body,
-                                lprocfs_nid_stats_clear_write_cb, &free_list);
+        lustre_hash_for_each(obd->obd_nid_stats_hash,
+                             lprocfs_nid_stats_clear_write_cb, &free_list);
  
          while (!list_empty(&free_list)) {
                  client_stat = list_entry(free_list.next, struct nid_stat, nid_list);
@@ -1440,7 +1439,7 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
          *newnid = 0;
  
          if (!exp || !exp->exp_obd || !exp->exp_obd->obd_proc_exports_entry ||
-            !exp->exp_obd->obd_nid_stats_hash_body)
+            !exp->exp_obd->obd_nid_stats_hash)
                  RETURN(-EINVAL);
  
         /* not test against zero because eric say:
@@ -1451,7 +1450,7 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
  
          obd = exp->exp_obd;
  
-        CDEBUG(D_CONFIG, "using hash %p\n", obd->obd_nid_stats_hash_body);
+        CDEBUG(D_CONFIG, "using hash %p\n", obd->obd_nid_stats_hash);
  
          OBD_ALLOC(tmp, sizeof(struct nid_stat));
          if (tmp == NULL)
@@ -1466,8 +1465,8 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
          list_add(&tmp->nid_list, &obd->obd_nid_stats);
          spin_unlock(&obd->obd_nid_lock);
  
-        tmp1= lustre_hash_findadd_unique(obd->obd_nid_stats_hash_body, nid,
-                                         &tmp->nid_hash);
+        tmp1 = lustre_hash_findadd_unique(obd->obd_nid_stats_hash,
+                                          nid, &tmp->nid_hash);
          CDEBUG(D_INFO, "Found stats %p for nid %s - ref %d\n",
                 tmp1, libcfs_nid2str(*nid), tmp->nid_exp_ref_count);
  
@@ -1481,8 +1480,7 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid)
          if (!tmp->nid_proc) {
                  CERROR("Error making export directory for"
                         " nid %s\n", libcfs_nid2str(*nid));
-                lustre_hash_delitem(obd->obd_nid_stats_hash_body, nid,
-                                    &tmp->nid_hash);
+                lustre_hash_del(obd->obd_nid_stats_hash, nid, &tmp->nid_hash);
                  GOTO(destroy_new, rc = -ENOMEM);
          }
  
@@ -1750,6 +1748,24 @@ void lprocfs_oh_clear(struct obd_histogram *oh)
  }
  EXPORT_SYMBOL(lprocfs_oh_clear);
  
+int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
+                        int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+        int c = 0;
+
+        if (obd == NULL)
+                return 0;
+
+        c += lustre_hash_debug_header(page, count);
+        c += lustre_hash_debug_str(obd->obd_uuid_hash, page + c, count - c);
+        c += lustre_hash_debug_str(obd->obd_nid_hash, page + c, count - c);
+        c += lustre_hash_debug_str(obd->obd_nid_stats_hash, page+c, count-c);
+
+        return c;
+}
+EXPORT_SYMBOL(lprocfs_obd_rd_hash);
+
  int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
                                     int count, int *eof, void *data)
  {
diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c

index f4c88ca..fbc8a8d 100644 (file)
--- a/lustre/obdclass/obd_config.c
+++ b/lustre/obdclass/obd_config.c
@@ -53,8 +53,9 @@
  #include <lustre_param.h>
  #include <class_hash.h>
  
-extern struct lustre_hash_operations uuid_hash_operations;
-extern struct lustre_hash_operations nid_hash_operations;
+static lustre_hash_ops_t uuid_hash_ops;
+static lustre_hash_ops_t nid_hash_ops;
+static lustre_hash_ops_t nid_stat_hash_ops;
  
  /*********** string parsing utils *********/
  
@@ -280,25 +281,28 @@ int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
          /* just leave this on forever.  I can't use obd_set_up here because
             other fns check that status, and we're not actually set up yet. */
          obd->obd_starting = 1;
+        obd->obd_uuid_hash = NULL;
+        obd->obd_nid_hash = NULL;
+        obd->obd_nid_stats_hash = NULL;
          spin_unlock(&obd->obd_dev_lock);
  
-        /* create an uuid-export hash body */
-        err = lustre_hash_init(&obd->obd_uuid_hash_body, "UUID_HASH",
-                               128, &uuid_hash_operations);
-        if (err)
-                GOTO(err_hash, err);
-
-        /* create a nid-export hash body */
-        err = lustre_hash_init(&obd->obd_nid_hash_body, "NID_HASH",
-                               128, &nid_hash_operations);
-        if (err)
-                GOTO(err_hash, err);
-
-        /* create a nid-stats hash body */
-        err = lustre_hash_init(&obd->obd_nid_stats_hash_body, "NID_STATS",
-                               128, &nid_stat_hash_operations);
-        if (err)
-                GOTO(err_hash, err);
+        /* create an uuid-export lustre hash */
+        obd->obd_uuid_hash = lustre_hash_init("UUID_HASH", 128, 128,
+                                              &uuid_hash_ops, 0);
+        if (!obd->obd_uuid_hash)
+                GOTO(err_hash, -ENOMEM);
+ 
+        /* create a nid-export lustre hash */
+        obd->obd_nid_hash = lustre_hash_init("NID_HASH", 128, 128,
+                                             &nid_hash_ops, 0);
+        if (!obd->obd_nid_hash)
+                GOTO(err_hash, -ENOMEM);
+ 
+        /* create a nid-stats lustre hash */
+        obd->obd_nid_stats_hash = lustre_hash_init("NID_STATS", 128, 128,
+                                                   &nid_stat_hash_ops, 0);
+        if (!obd->obd_nid_stats_hash)
+                GOTO(err_hash, -ENOMEM);
  
          exp = class_new_export(obd, &obd->obd_uuid);
          if (IS_ERR(exp))
@@ -328,9 +332,9 @@ err_exp:
          class_unlink_export(obd->obd_self_export);
          obd->obd_self_export = NULL;
  err_hash:
-        lustre_hash_exit(&obd->obd_uuid_hash_body);
-        lustre_hash_exit(&obd->obd_nid_hash_body);
-        lustre_hash_exit(&obd->obd_nid_stats_hash_body);
+        lustre_hash_exit(obd->obd_uuid_hash);
+        lustre_hash_exit(obd->obd_nid_hash);
+        lustre_hash_exit(obd->obd_nid_stats_hash);
          obd->obd_starting = 0;
          CERROR("setup %s failed (%d)\n", obd->obd_name, err);
          RETURN(err);
@@ -462,13 +466,13 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
          LASSERT(obd->obd_self_export);
  
          /* destroy an uuid-export hash body */
-        lustre_hash_exit(&obd->obd_uuid_hash_body);
+        lustre_hash_exit(obd->obd_uuid_hash);
  
          /* destroy a nid-export hash body */
-        lustre_hash_exit(&obd->obd_nid_hash_body);
+        lustre_hash_exit(obd->obd_nid_hash);
  
          /* destroy a nid-stats hash body */
-        lustre_hash_exit(&obd->obd_nid_stats_hash_body);
+        lustre_hash_exit(obd->obd_nid_stats_hash);
  
          /* Precleanup, we must make sure all exports get destroyed. */
          err = obd_precleanup(obd, OBD_CLEANUP_EXPORTS);
@@ -1236,3 +1240,188 @@ out:
          lustre_cfg_free(lcfg);
          RETURN(rc);
  }
+
+/*
+ * uuid<->export lustre hash operations
+ */
+
+static unsigned
+uuid_hash(lustre_hash_t *lh,  void *key, unsigned mask)
+{
+        return lh_djb2_hash(((struct obd_uuid *)key)->uuid,
+                            sizeof(((struct obd_uuid *)key)->uuid), mask);
+}
+
+static void *
+uuid_key(struct hlist_node *hnode)
+{
+        struct obd_export *exp;
+
+        exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+
+        RETURN(&exp->exp_client_uuid);
+}
+
+/*
+ * NOTE: It is impossible to find an export that is in failed
+ *       state with this function
+ */
+static int
+uuid_compare(void *key, struct hlist_node *hnode)
+{
+        struct obd_export *exp;
+
+        LASSERT(key);
+        exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+
+        RETURN(obd_uuid_equals((struct obd_uuid *)key,&exp->exp_client_uuid) &&
+               !exp->exp_failed);
+}
+
+static void *
+uuid_export_get(struct hlist_node *hnode)
+{
+        struct obd_export *exp;
+
+        exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+        class_export_get(exp);
+
+        RETURN(exp);
+}
+
+static void *
+uuid_export_put(struct hlist_node *hnode)
+{
+        struct obd_export *exp;
+
+        exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash);
+        class_export_put(exp);
+
+        RETURN(exp);
+}
+
+static lustre_hash_ops_t uuid_hash_ops = {
+        .lh_hash    = uuid_hash,
+        .lh_key     = uuid_key,
+        .lh_compare = uuid_compare,
+        .lh_get     = uuid_export_get,
+        .lh_put     = uuid_export_put,
+};
+
+
+/*
+ * nid<->export hash operations
+ */
+
+static unsigned
+nid_hash(lustre_hash_t *lh,  void *key, unsigned mask)
+{
+        return lh_djb2_hash(key, sizeof(lnet_nid_t), mask);
+}
+
+static void *
+nid_key(struct hlist_node *hnode)
+{
+        struct obd_export *exp;
+
+        exp = hlist_entry(hnode, struct obd_export, exp_nid_hash);
+
+        RETURN(&exp->exp_connection->c_peer.nid);
+}
+
+/*
+ * NOTE: It is impossible to find an export that is in failed
+ *       state with this function
+ */
+static int
+nid_compare(void *key, struct hlist_node *hnode)
+{
+        struct obd_export *exp;
+
+        LASSERT(key);
+        exp = hlist_entry(hnode, struct obd_export, exp_nid_hash);
+
+        RETURN(exp->exp_connection->c_peer.nid == *(lnet_nid_t *)key &&
+               !exp->exp_failed);
+}
+
+static void *
+nid_export_get(struct hlist_node *hnode)
+{
+        struct obd_export *exp;
+
+        exp = hlist_entry(hnode, struct obd_export, exp_nid_hash);
+        class_export_get(exp);
+
+        RETURN(exp);
+}
+
+static void *
+nid_export_put(struct hlist_node *hnode)
+{
+        struct obd_export *exp;
+
+        exp = hlist_entry(hnode, struct obd_export, exp_nid_hash);
+        class_export_put(exp);
+
+        RETURN(exp);
+}
+
+static lustre_hash_ops_t nid_hash_ops = {
+        .lh_hash    = nid_hash,
+        .lh_key     = nid_key,
+        .lh_compare = nid_compare,
+        .lh_get     = nid_export_get,
+        .lh_put     = nid_export_put,
+};
+
+
+/*
+ * nid<->nidstats hash operations
+ */
+
+static void *
+nidstats_key(struct hlist_node *hnode)
+{
+        struct nid_stat *ns;
+
+        ns = hlist_entry(hnode, struct nid_stat, nid_hash);
+
+        RETURN(&ns->nid);
+}
+
+static int
+nidstats_compare(void *key, struct hlist_node *hnode)
+{
+        RETURN(*(lnet_nid_t *)nidstats_key(hnode) == *(lnet_nid_t *)key);
+}
+
+static void *
+nidstats_get(struct hlist_node *hnode)
+{
+        struct nid_stat *ns;
+
+        ns = hlist_entry(hnode, struct nid_stat, nid_hash);
+        ns->nid_exp_ref_count++;
+
+        RETURN(ns);
+}
+
+static void *
+nidstats_put(struct hlist_node *hnode)
+{
+        struct nid_stat *ns;
+
+        ns = hlist_entry(hnode, struct nid_stat, nid_hash);
+        ns->nid_exp_ref_count--;
+
+        RETURN(ns);
+}
+
+static lustre_hash_ops_t nid_stat_hash_ops = {
+        .lh_hash    = nid_hash,
+        .lh_key     = nidstats_key,
+        .lh_compare = nidstats_compare,
+        .lh_get     = nidstats_get,
+        .lh_put     = nidstats_put,
+};
diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c

index 147f444..e25568c 100644 (file)
--- a/lustre/obdfilter/lproc_obdfilter.c
+++ b/lustre/obdfilter/lproc_obdfilter.c
@@ -257,6 +257,7 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = {
          { "tot_dirty",    lprocfs_filter_rd_tot_dirty,   0, 0 },
          { "tot_pending",  lprocfs_filter_rd_tot_pending, 0, 0 },
          { "tot_granted",  lprocfs_filter_rd_tot_granted, 0, 0 },
+        { "hash_stats",   lprocfs_obd_rd_hash,      0, 0 },
          { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
          { "recovery_maxtime", lprocfs_obd_rd_recovery_maxtime,
                                lprocfs_obd_wr_recovery_maxtime, 0},
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index 5d13a24..6c794b8 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -71,7 +71,7 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
                  return NULL;
          }
  
-        c = ptlrpc_get_connection(peer, self, uuid);
+        c = ptlrpc_connection_get(peer, self, uuid);
          if (c) {
                  memcpy(c->c_remote_uuid.uuid,
                         uuid->uuid, sizeof(c->c_remote_uuid.uuid));
@@ -82,24 +82,6 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
          return c;
  }
  
-void ptlrpc_readdress_connection(struct ptlrpc_connection *conn,
-                                 struct obd_uuid *uuid)
-{
-        lnet_nid_t        self;
-        lnet_process_id_t peer;
-        int               err;
-
-        err = ptlrpc_uuid_to_peer(uuid, &peer, &self);
-        if (err != 0) {
-                CERROR("cannot find peer %s!\n", uuid->uuid);
-                return;
-        }
-
-        conn->c_peer = peer;
-        conn->c_self = self;
-        return;
-}
-
  static inline struct ptlrpc_bulk_desc *new_bulk(int npages, int type, int portal)
  {
          struct ptlrpc_bulk_desc *desc;
diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c

index 2b9fa8d..9442bf7 100644 (file)
--- a/lustre/ptlrpc/connection.c
+++ b/lustre/ptlrpc/connection.c
@@ -46,206 +46,192 @@
  #include "ptlrpc_internal.h"
  #include <class_hash.h>
  
-static spinlock_t conn_lock;
-static struct list_head conn_list;
-static struct lustre_class_hash_body *conn_hash_body;
-static struct lustre_class_hash_body *conn_unused_hash_body;
+static lustre_hash_t *conn_hash = NULL;
+static lustre_hash_ops_t conn_hash_ops;
  
-extern struct lustre_hash_operations conn_hash_operations;
-
-void ptlrpc_dump_connection(void *obj, void *data)
+struct ptlrpc_connection *
+ptlrpc_connection_get(lnet_process_id_t peer, lnet_nid_t self,
+                      struct obd_uuid *uuid)
  {
-        struct ptlrpc_connection *c = obj;
+        struct ptlrpc_connection *conn, *conn2;
+        ENTRY;
  
-        CERROR("Connection %p/%s has refcount %d (nid=%s->%s)\n",
-                c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount),
-                libcfs_nid2str(c->c_self),
-                libcfs_nid2str(c->c_peer.nid));
+        conn = lustre_hash_lookup(conn_hash, &peer);
+        if (conn)
+                GOTO(out, conn);
+
+        OBD_ALLOC_PTR(conn);
+        if (!conn)
+                RETURN(NULL);
+
+        conn->c_peer = peer;
+        conn->c_self = self;
+        INIT_HLIST_NODE(&conn->c_hash);
+        atomic_set(&conn->c_refcount, 1);
+        if (uuid)
+                obd_str2uuid(&conn->c_remote_uuid, uuid->uuid);
+
+        /* 
+         * Add the newly created conn to the hash, on key collision we
+         * lost a racing addition and must destroy our newly allocated
+         * connection.  The object which exists in the has will be
+         * returned and may be compared against out object. 
+         */
+        conn2 = lustre_hash_findadd_unique(conn_hash, &peer, &conn->c_hash);
+        if (conn != conn2) {
+                OBD_FREE_PTR(conn);
+                conn = conn2;
+        }
+        EXIT;
+out:
+        CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+               conn, atomic_read(&conn->c_refcount), 
+               libcfs_nid2str(conn->c_peer.nid));
+        return conn;
  }
-
-void ptlrpc_dump_connections(void)
+  
+int ptlrpc_connection_put(struct ptlrpc_connection *conn)
  {
+        int rc = 0;
          ENTRY;
+  
+        if (!conn)
+                RETURN(rc);
+  
+        LASSERT(!hlist_unhashed(&conn->c_hash));
+  
+        /*
+         * We do not remove connection from hashtable and 
+         * do not free it even if last caller released ref,
+         * as we want to have it cached for the case it is
+         * needed again.
+         *
+         * Deallocating it and later creating new connection
+         * again would be wastful. This way we also avoid
+         * expensive locking to protect things from get/put 
+         * race when found cached connection is freed by 
+         * ptlrpc_connection_put().
+         *
+         * It will be freed later in module unload time,
+         * when ptlrpc_connection_fini()->lh_exit->conn_exit()
+         * path is called.
+         */
+        if (atomic_dec_return(&conn->c_refcount) == 1)
+                rc = 1;
  
-        lustre_hash_iterate_all(conn_hash_body, ptlrpc_dump_connection, NULL);
+        CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n",
+               conn, atomic_read(&conn->c_refcount),
+               libcfs_nid2str(conn->c_peer.nid));
  
-        EXIT;
+        RETURN(rc);
  }
-
-struct ptlrpc_connection*
-ptlrpc_lookup_conn_locked (lnet_process_id_t peer)
+  
+struct ptlrpc_connection *
+ptlrpc_connection_addref(struct ptlrpc_connection *conn)
  {
-        struct ptlrpc_connection *c = NULL;
-        int rc;
-
-        c = lustre_hash_get_object_by_key(conn_hash_body, &peer);
-        if (c != NULL)
-                return c;
-
-        c = lustre_hash_get_object_by_key(conn_unused_hash_body, &peer);
-        if (c != NULL) {
-                lustre_hash_delitem(conn_unused_hash_body, &peer, &c->c_hash);
-                rc = lustre_hash_additem_unique(conn_hash_body, &peer,
-                                                &c->c_hash);
-                if (rc) {
-                        /* can't add - try with new item */
-                        OBD_FREE_PTR(c);
-                        list_del(&c->c_link);
-                        c = NULL;
-                }
-        }
-
-        return c;
-}
+        ENTRY;
  
+        atomic_inc(&conn->c_refcount);
+        CDEBUG(D_INFO, "conn=%p refcount %d to %s\n",
+               conn, atomic_read(&conn->c_refcount),
+               libcfs_nid2str(conn->c_peer.nid));
  
-struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer,
-                                                lnet_nid_t self, struct obd_uuid *uuid)
+        RETURN(conn);
+}
+  
+int ptlrpc_connection_init(void)
  {
-        struct ptlrpc_connection *c;
-        struct ptlrpc_connection *c2;
-        int rc = 0;
          ENTRY;
  
-        CDEBUG(D_INFO, "self %s peer %s\n", 
-               libcfs_nid2str(self), libcfs_id2str(peer));
-
-        spin_lock(&conn_lock);
-        c = ptlrpc_lookup_conn_locked(peer);
-        spin_unlock(&conn_lock);
-
-        if (c != NULL)
-                RETURN (c);
-
-        OBD_ALLOC_PTR(c);
-        if (c == NULL)
-                RETURN (NULL);
-
-        atomic_set(&c->c_refcount, 1);
-        c->c_peer = peer;
-        c->c_self = self;
-        INIT_HLIST_NODE(&c->c_hash);
-        CFS_INIT_LIST_HEAD(&c->c_link);
-        if (uuid != NULL)
-                obd_str2uuid(&c->c_remote_uuid, uuid->uuid);
-
-        spin_lock(&conn_lock);
-
-        c2 = ptlrpc_lookup_conn_locked(peer);
-        if (c2 == NULL) {
-                rc = lustre_hash_additem_unique(conn_hash_body, &peer, 
-                                                &c->c_hash);
-                if (rc != 0) {
-                        CERROR("Cannot add connection to conn_hash_body\n");
-                        goto out_conn;
-                }
-                list_add(&c->c_link, &conn_list);
-        }
-
-out_conn:
-        spin_unlock(&conn_lock);
-
-        if (c2 == NULL && rc == 0)
-                RETURN (c);
-
-        if (c != NULL) 
-                OBD_FREE(c, sizeof(*c));
-        RETURN (c2);
+        conn_hash = lustre_hash_init("CONN_HASH", 32, 32768,
+                                     &conn_hash_ops, LH_REHASH);
+        if (!conn_hash)
+                RETURN(-ENOMEM);
+  
+        RETURN(0);
  }
-
-int ptlrpc_put_connection(struct ptlrpc_connection *c)
-{
-        int rc = 0;
+  
+void ptlrpc_connection_fini(void) {
          ENTRY;
+        lustre_hash_exit(conn_hash);
+        EXIT;
+}
  
-        if (c == NULL) {
-                CERROR("NULL connection\n");
-                RETURN(0);
-        }
+/*
+ * Hash operations for net_peer<->connection
+ */
+static unsigned
+conn_hashfn(lustre_hash_t *lh,  void *key, unsigned mask)
+{
+        return lh_djb2_hash(key, sizeof(lnet_process_id_t), mask);
+}
  
-        CDEBUG (D_INFO, "connection=%p refcount %d to %s\n",
-                c, atomic_read(&c->c_refcount) - 1, 
-                libcfs_nid2str(c->c_peer.nid));
+static int
+conn_compare(void *key, struct hlist_node *hnode)
+{
+        struct ptlrpc_connection *conn;
+        lnet_process_id_t *conn_key;
  
-        spin_lock(&conn_lock);
-        LASSERT(!hlist_unhashed(&c->c_hash));
-        spin_unlock(&conn_lock);
+        LASSERT(key != NULL);
+        conn_key = (lnet_process_id_t*)key;
+        conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
  
-        if (atomic_dec_return(&c->c_refcount) == 1) {
+        return conn_key->nid == conn->c_peer.nid &&
+               conn_key->pid == conn->c_peer.pid;
+}
  
-                spin_lock(&conn_lock);
-                lustre_hash_delitem(conn_hash_body, &c->c_peer, &c->c_hash);
-                rc = lustre_hash_additem_unique(conn_unused_hash_body, &c->c_peer, 
-                                                &c->c_hash);
-                spin_unlock(&conn_lock);
-                if (rc != 0) {
-                        CERROR("Cannot hash connection to conn_hash_body\n");
-                        GOTO(ret, rc);
-                }
+static void *
+conn_key(struct hlist_node *hnode)
+{
+        struct ptlrpc_connection *conn;
+        conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+        return &conn->c_peer;
+}
  
-                rc = 1;
-        } 
+static void *
+conn_get(struct hlist_node *hnode)
+{
+        struct ptlrpc_connection *conn;
  
-        if (atomic_read(&c->c_refcount) < 0)
-                CERROR("connection %p refcount %d!\n",
-                       c, atomic_read(&c->c_refcount));
-ret :
+        conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+        atomic_inc(&conn->c_refcount);
  
-        RETURN(rc);
+        return conn;
  }
  
-struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c)
+static void *
+conn_put(struct hlist_node *hnode)
  {
-        ENTRY;
-        atomic_inc(&c->c_refcount);
-        CDEBUG (D_INFO, "connection=%p refcount %d to %s\n",
-                c, atomic_read(&c->c_refcount),
-                libcfs_nid2str(c->c_peer.nid));
-        RETURN(c);
-}
+        struct ptlrpc_connection *conn;
  
-int ptlrpc_init_connection(void)
-{
-        int rc = 0;
-        CFS_INIT_LIST_HEAD(&conn_list);
-        rc = lustre_hash_init(&conn_hash_body, "CONN_HASH", 
-                              128, &conn_hash_operations);
-        if (rc)
-                GOTO(ret, rc);
-
-        rc = lustre_hash_init(&conn_unused_hash_body, "CONN_UNUSED_HASH", 
-                              128, &conn_hash_operations);
-        if (rc)
-                GOTO(ret, rc);
-
-        spin_lock_init(&conn_lock);
-ret:
-        if (rc) {
-                lustre_hash_exit(&conn_hash_body);
-                lustre_hash_exit(&conn_unused_hash_body);
-        }
-        RETURN(rc);
+        conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+        atomic_dec(&conn->c_refcount);
+
+        return conn;
  }
  
-void ptlrpc_cleanup_connection(void)
+static void
+conn_exit(struct hlist_node *hnode)
  {
-        struct list_head *tmp, *pos;
-        struct ptlrpc_connection *c;
-
-        spin_lock(&conn_lock);
-
-        lustre_hash_exit(&conn_unused_hash_body);
-        lustre_hash_exit(&conn_hash_body);
-
-        list_for_each_safe(tmp, pos, &conn_list) {
-                c = list_entry(tmp, struct ptlrpc_connection, c_link);
-                if (atomic_read(&c->c_refcount))
-                        CERROR("Connection %p/%s has refcount %d (nid=%s)\n",
-                               c, c->c_remote_uuid.uuid,
-                               atomic_read(&c->c_refcount),
-                               libcfs_nid2str(c->c_peer.nid));
-                list_del(&c->c_link);
-                OBD_FREE(c, sizeof(*c));
-        }
-        spin_unlock(&conn_lock);
+        struct ptlrpc_connection *conn;
+
+        conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash);
+        /* 
+         * Nothing should be left. Connection user put it and
+         * connection also was deleted from table by this time
+         * so we should have 0 refs.
+         */
+        LASSERTF(atomic_read(&conn->c_refcount) == 0, 
+                 "Busy connection with %d refs\n", 
+                 atomic_read(&conn->c_refcount));
+        OBD_FREE_PTR(conn);
  }
+
+static lustre_hash_ops_t conn_hash_ops = {
+        .lh_hash    = conn_hashfn,
+        .lh_compare = conn_compare,
+        .lh_key     = conn_key,
+        .lh_get     = conn_get,
+        .lh_put     = conn_put,
+        .lh_exit    = conn_exit,
+};
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c

index 7406c4d..309729f 100644 (file)
--- a/lustre/ptlrpc/import.c
+++ b/lustre/ptlrpc/import.c
@@ -417,13 +417,13 @@ static int import_select_connection(struct obd_import *imp)
  
          /* switch connection, don't mind if it's same as the current one */
          if (imp->imp_connection)
-                ptlrpc_put_connection(imp->imp_connection);
+                ptlrpc_connection_put(imp->imp_connection);
          imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
  
          dlmexp =  class_conn2export(&imp->imp_dlm_handle);
          LASSERT(dlmexp != NULL);
          if (dlmexp->exp_connection)
-                ptlrpc_put_connection(dlmexp->exp_connection);
+                ptlrpc_connection_put(dlmexp->exp_connection);
          dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn);
          class_export_put(dlmexp);
  
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c

index 866ee44..3ca88b7 100644 (file)
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -401,7 +401,7 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int flags)
          ptlrpc_at_set_reply(req, flags);
  
          if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
-                conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL);
+                conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL);
          else
                  conn = ptlrpc_connection_addref(req->rq_export->exp_connection);
  
@@ -427,7 +427,7 @@ out:
                  atomic_dec (&svc->srv_outstanding_replies);
                  ptlrpc_req_drop_rs(req);
          }
-        ptlrpc_put_connection(conn);
+        ptlrpc_connection_put(conn);
          return rc;
  }
  
diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c

index cbdcc88..b6d9a6e 100644 (file)
--- a/lustre/ptlrpc/ptlrpc_module.c
+++ b/lustre/ptlrpc/ptlrpc_module.c
@@ -80,12 +80,12 @@ __init int ptlrpc_init(void)
                  RETURN(rc);
          cleanup_phase = 2;
  
-        rc = ptlrpc_init_connection();
+        rc = ptlrpc_connection_init();
          if (rc)
                  GOTO(cleanup, rc);
          cleanup_phase = 3;
  
-        ptlrpc_put_connection_superhack = ptlrpc_put_connection;
+        ptlrpc_put_connection_superhack = ptlrpc_connection_put;
  
          rc = ptlrpc_start_pinger();
          if (rc)
@@ -117,7 +117,7 @@ cleanup:
          case 4:
                  ptlrpc_stop_pinger();
          case 3:
-                ptlrpc_cleanup_connection();
+                ptlrpc_connection_fini();
          case 2:
                  ptlrpc_exit_portals();
          case 1:
@@ -136,17 +136,15 @@ static void __exit ptlrpc_exit(void)
          ldlm_exit();
          ptlrpc_stop_pinger();
          ptlrpc_exit_portals();
-        ptlrpc_cleanup_connection();
+        ptlrpc_connection_fini();
  }
  
  /* connection.c */
-EXPORT_SYMBOL(ptlrpc_dump_connections);
-EXPORT_SYMBOL(ptlrpc_readdress_connection);
-EXPORT_SYMBOL(ptlrpc_get_connection);
-EXPORT_SYMBOL(ptlrpc_put_connection);
+EXPORT_SYMBOL(ptlrpc_connection_get);
+EXPORT_SYMBOL(ptlrpc_connection_put);
  EXPORT_SYMBOL(ptlrpc_connection_addref);
-EXPORT_SYMBOL(ptlrpc_init_connection);
-EXPORT_SYMBOL(ptlrpc_cleanup_connection);
+EXPORT_SYMBOL(ptlrpc_connection_init);
+EXPORT_SYMBOL(ptlrpc_connection_fini);
  
  /* niobuf.c */
  EXPORT_SYMBOL(ptlrpc_start_bulk_transfer);
author	yury <yury>
	Wed, 3 Sep 2008 09:54:06 +0000 (09:54 +0000)
committer	yury <yury>
	Wed, 3 Sep 2008 09:54:06 +0000 (09:54 +0000)
lustre/include/class_hash.h		patch \| blob \| history
lustre/include/liblustre.h		patch \| blob \| history
lustre/include/lprocfs_status.h		patch \| blob \| history
lustre/include/lustre_net.h		patch \| blob \| history
lustre/include/obd.h		patch \| blob \| history
lustre/ldlm/ldlm_lib.c		patch \| blob \| history
lustre/mds/lproc_mds.c		patch \| blob \| history
lustre/mdt/mdt_lproc.c		patch \| blob \| history
lustre/mgs/lproc_mgs.c		patch \| blob \| history
lustre/obdclass/class_hash.c		patch \| blob \| history
lustre/obdclass/genops.c		patch \| blob \| history
lustre/obdclass/lprocfs_status.c		patch \| blob \| history
lustre/obdclass/obd_config.c		patch \| blob \| history
lustre/obdfilter/lproc_obdfilter.c		patch \| blob \| history
lustre/ptlrpc/client.c		patch \| blob \| history
lustre/ptlrpc/connection.c		patch \| blob \| history
lustre/ptlrpc/import.c		patch \| blob \| history
lustre/ptlrpc/niobuf.c		patch \| blob \| history
lustre/ptlrpc/ptlrpc_module.c		patch \| blob \| history