Whamcloud - gitweb
LU-8130 lov: convert lo[v|d]_pool to use rhashtable 62/32662/13
authorNeilBrown <neilb@suse.com>
Fri, 24 Jan 2020 15:26:34 +0000 (10:26 -0500)
committerOleg Drokin <green@whamcloud.com>
Fri, 14 Feb 2020 05:49:46 +0000 (05:49 +0000)
The pools hashtable can be implemented using
the rhashtable implementation in lib.
This has the benefit that lookups are lock-free.

We need to use kfree_rcu() to free a pool so
that a lookup racing with a deletion will not access
freed memory.

rhashtable has no combined lookup-and-delete interface,
but as the lookup is lockless and the chains are short,
this brings little cost.  Even if a lookup finds a pool,
we must be prepared for the delete to fail to find it,
as we might race with another thread doing a delete.

We use atomic_inc_not_zero() after finding a pool in the
hash table and if that fails, we must have raced with a
deletion, so we treat the lookup as a failure.

Use hashlen_string() rather than a hand-crafted hash
function.
Note that the pool_name, and the search key, are
guaranteed to be nul terminated.

Based on

Linux-commit: 055ed193b190edac539f37a66699b02eae3a19a9

with the port of server side pool handling to rhashtables.

Change-Id: Ia5b4cbbd17515ea43a473e91719b3665f46b0d0a
Signed-off-by: NeilBrown <neilb@suse.com>
Reviewed-on: https://review.whamcloud.com/32662
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd.h
lustre/include/obd_support.h
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_pool.c
lustre/lov/lov_internal.h
lustre/lov/lov_obd.c
lustre/lov/lov_pool.c

index 78f9217..8461df7 100644 (file)
@@ -401,7 +401,7 @@ struct lov_obd {
        __u32                   lov_tgt_size;   /* size of tgts array */
        int                     lov_connects;
        int                     lov_pool_count;
        __u32                   lov_tgt_size;   /* size of tgts array */
        int                     lov_connects;
        int                     lov_pool_count;
-       struct cfs_hash        *lov_pools_hash_body; /* used for key access */
+       struct rhashtable       lov_pools_hash_body; /* used for key access */
        struct list_head        lov_pool_list;  /* used for sequential access */
        struct proc_dir_entry  *lov_pool_proc_entry;
        enum lustre_sec_part    lov_sp_me;
        struct list_head        lov_pool_list;  /* used for sequential access */
        struct proc_dir_entry  *lov_pool_proc_entry;
        enum lustre_sec_part    lov_sp_me;
index b23a6a7..3e2d73c 100644 (file)
@@ -71,9 +71,6 @@ extern atomic_long_t obd_dirty_pages;
 extern char obd_jobid_var[];
 
 /* Some hash init argument constants */
 extern char obd_jobid_var[];
 
 /* Some hash init argument constants */
-#define HASH_POOLS_BKT_BITS 3
-#define HASH_POOLS_CUR_BITS 3
-#define HASH_POOLS_MAX_BITS 7
 #define HASH_NID_BKT_BITS 5
 #define HASH_NID_CUR_BITS 7
 #define HASH_NID_MAX_BITS 12
 #define HASH_NID_BKT_BITS 5
 #define HASH_NID_CUR_BITS 7
 #define HASH_NID_MAX_BITS 12
index 65ac3b9..4aa2b10 100644 (file)
@@ -59,12 +59,16 @@ struct pool_desc {
        struct lu_tgt_pool       pool_obds;     /* pool members */
        atomic_t                 pool_refcount;
        struct lu_qos_rr         pool_rr;
        struct lu_tgt_pool       pool_obds;     /* pool members */
        atomic_t                 pool_refcount;
        struct lu_qos_rr         pool_rr;
-       struct hlist_node        pool_hash;     /* access by poolname */
+       struct rhash_head        pool_hash;     /* access by poolname */
        struct list_head         pool_list;
        struct list_head         pool_list;
+       struct rcu_head          pool_rcu;
        struct proc_dir_entry   *pool_proc_entry;
        struct obd_device       *pool_lobd;     /* owner */
 };
 
        struct proc_dir_entry   *pool_proc_entry;
        struct obd_device       *pool_lobd;     /* owner */
 };
 
+int lod_pool_hash_init(struct rhashtable *tbl);
+void lod_pool_hash_destroy(struct rhashtable *tbl);
+
 #define pool_tgt_count(p) ((p)->pool_obds.op_count)
 #define pool_tgt_array(p)  ((p)->pool_obds.op_array)
 #define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem)
 #define pool_tgt_count(p) ((p)->pool_obds.op_count)
 #define pool_tgt_array(p)  ((p)->pool_obds.op_array)
 #define pool_tgt_rw_sem(p) ((p)->pool_obds.op_rw_sem)
@@ -118,7 +122,7 @@ struct lod_device {
 
        /* OST pool data */
        int                     lod_pool_count;
 
        /* OST pool data */
        int                     lod_pool_count;
-       struct cfs_hash        *lod_pools_hash_body; /* used for key access */
+       struct rhashtable       lod_pools_hash_body; /* used for key access */
        struct list_head        lod_pool_list; /* used for sequential access */
        struct proc_dir_entry  *lod_pool_proc_entry;
 
        struct list_head        lod_pool_list; /* used for sequential access */
        struct proc_dir_entry  *lod_pool_proc_entry;
 
@@ -607,7 +611,6 @@ int lod_tgt_pool_extend(struct lu_tgt_pool *op, unsigned int min_count);
 struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname);
 void lod_pool_putref(struct pool_desc *pool);
 int lod_pool_del(struct obd_device *obd, char *poolname);
 struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname);
 void lod_pool_putref(struct pool_desc *pool);
 int lod_pool_del(struct obd_device *obd, char *poolname);
-extern struct cfs_hash_ops pool_hash_operations;
 int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool);
 int lod_pool_new(struct obd_device *obd, char *poolname);
 int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname);
 int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool);
 int lod_pool_new(struct obd_device *obd, char *poolname);
 int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname);
index f601afe..37f2b2c 100644 (file)
@@ -2109,14 +2109,9 @@ int lod_pools_init(struct lod_device *lod, struct lustre_cfg *lcfg)
        lod->lod_sp_me = LUSTRE_SP_CLI;
 
        /* Set up OST pool environment */
        lod->lod_sp_me = LUSTRE_SP_CLI;
 
        /* Set up OST pool environment */
-       lod->lod_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS,
-                                                  HASH_POOLS_MAX_BITS,
-                                                  HASH_POOLS_BKT_BITS, 0,
-                                                  CFS_HASH_MIN_THETA,
-                                                  CFS_HASH_MAX_THETA,
-                                                  &pool_hash_operations,
-                                                  CFS_HASH_DEFAULT);
-       if (lod->lod_pools_hash_body == NULL)
+       lod->lod_pool_count = 0;
+       rc = lod_pool_hash_init(&lod->lod_pools_hash_body);
+       if (rc)
                RETURN(-ENOMEM);
 
        INIT_LIST_HEAD(&lod->lod_pool_list);
                RETURN(-ENOMEM);
 
        INIT_LIST_HEAD(&lod->lod_pool_list);
@@ -2146,7 +2141,7 @@ out_mdt_rr_pool:
 out_mdt_pool:
        lod_tgt_pool_free(&lod->lod_mdt_descs.ltd_tgt_pool);
 out_hash:
 out_mdt_pool:
        lod_tgt_pool_free(&lod->lod_mdt_descs.ltd_tgt_pool);
 out_hash:
-       cfs_hash_putref(lod->lod_pools_hash_body);
+       lod_pool_hash_destroy(&lod->lod_pools_hash_body);
 
        return rc;
 }
 
        return rc;
 }
@@ -2173,7 +2168,7 @@ int lod_pools_fini(struct lod_device *lod)
                lod_pool_del(obd, pool->pool_name);
        }
 
                lod_pool_del(obd, pool->pool_name);
        }
 
-       cfs_hash_putref(lod->lod_pools_hash_body);
+       lod_pool_hash_destroy(&lod->lod_pools_hash_body);
        lod_tgt_pool_free(&lod->lod_ost_descs.ltd_qos.lq_rr.lqr_pool);
        lod_tgt_pool_free(&lod->lod_ost_descs.ltd_tgt_pool);
        lod_tgt_pool_free(&lod->lod_mdt_descs.ltd_qos.lq_rr.lqr_pool);
        lod_tgt_pool_free(&lod->lod_ost_descs.ltd_qos.lq_rr.lqr_pool);
        lod_tgt_pool_free(&lod->lod_ost_descs.ltd_tgt_pool);
        lod_tgt_pool_free(&lod->lod_mdt_descs.ltd_qos.lq_rr.lqr_pool);
index 51cff07..04d81dd 100644 (file)
@@ -58,6 +58,7 @@
 #define DEBUG_SUBSYSTEM S_LOV
 
 #include <libcfs/libcfs.h>
 #define DEBUG_SUBSYSTEM S_LOV
 
 #include <libcfs/libcfs.h>
+#include <libcfs/linux/linux-hash.h>
 #include <obd.h>
 #include "lod_internal.h"
 
 #include <obd.h>
 #include "lod_internal.h"
 
@@ -96,130 +97,38 @@ void lod_pool_putref(struct pool_desc *pool)
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
        if (atomic_dec_and_test(&pool->pool_refcount)) {
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
        if (atomic_dec_and_test(&pool->pool_refcount)) {
-               LASSERT(hlist_unhashed(&pool->pool_hash));
                LASSERT(list_empty(&pool->pool_list));
                LASSERT(pool->pool_proc_entry == NULL);
                lod_tgt_pool_free(&(pool->pool_rr.lqr_pool));
                lod_tgt_pool_free(&(pool->pool_obds));
                LASSERT(list_empty(&pool->pool_list));
                LASSERT(pool->pool_proc_entry == NULL);
                lod_tgt_pool_free(&(pool->pool_rr.lqr_pool));
                lod_tgt_pool_free(&(pool->pool_obds));
-               OBD_FREE_PTR(pool);
+               kfree_rcu(pool, pool_rcu);
                EXIT;
        }
 }
 
                EXIT;
        }
 }
 
-/**
- * Drop the refcount in cases where the caller holds a spinlock.
- *
- * This is needed if the caller cannot be blocked while freeing memory.
- * It assumes that there is some other known refcount held on the \a pool
- * and the memory cannot actually be freed, but the refcounting needs to
- * be kept accurate.
- *
- * \param[in] pool     pool descriptor on which to drop reference
- */
-static void pool_putref_locked(struct pool_desc *pool)
+static u32 pool_hashfh(const void *data, u32 len, u32 seed)
 {
 {
-       CDEBUG(D_INFO, "pool %p\n", pool);
-       LASSERT(atomic_read(&pool->pool_refcount) > 1);
+       const char *pool_name = data;
 
 
-       atomic_dec(&pool->pool_refcount);
+       return hashlen_hash(cfs_hashlen_string((void *)(unsigned long)seed,
+                                              pool_name));
 }
 
 }
 
-/*
- * Group of functions needed for cfs_hash implementation.  This
- * includes pool lookup, refcounting, and cleanup.
- */
-
-/**
- * Hash the pool name for use by the cfs_hash handlers.
- *
- * Use the standard DJB2 hash function for ASCII strings in Lustre.
- *
- * \param[in] hash_body        hash structure where this key is embedded (unused)
- * \param[in] key      key to be hashed (in this case the pool name)
- * \param[in] mask     bitmask to limit the hash value to the desired size
- *
- * \retval             computed hash value from \a key and limited by \a mask
- */
-static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key,
-                        unsigned mask)
-{
-       return cfs_hash_djb2_hash(key, strnlen(key, LOV_MAXPOOLNAME), mask);
-}
-
-/**
- * Return the actual key (pool name) from the hashed \a hnode.
- *
- * Allows extracting the key name when iterating over all hash entries.
- *
- * \param[in] hnode    hash node found by lookup or iteration
- *
- * \retval             char array referencing the pool name (no refcount)
- */
-static void *pool_key(struct hlist_node *hnode)
+static int pool_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
 {
 {
-       struct pool_desc *pool;
+       const struct pool_desc *pool = obj;
+       const char *pool_name = arg->key;
 
 
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-       return pool->pool_name;
+       return strcmp(pool_name, pool->pool_name);
 }
 
 }
 
-/**
- * Check if the specified hash key matches the hash node.
- *
- * This is needed in case there is a hash key collision, allowing the hash
- * table lookup/iteration to distinguish between the two entries.
- *
- * \param[in] key      key (pool name) being searched for
- * \param[in] compared current entry being compared
- *
- * \retval             0 if \a key is the same as the key of \a compared
- * \retval             1 if \a key is different from the key of \a compared
- */
-static int pool_hashkey_keycmp(const void *key, struct hlist_node *compared)
-{
-       return !strncmp(key, pool_key(compared), LOV_MAXPOOLNAME);
-}
-
-/**
- * Return the actual pool data structure from the hash table entry.
- *
- * Once the hash table entry is found, extract the pool data from it.
- * The return type of this function is void * because it needs to be
- * assigned to the generic hash operations table.
- *
- * \param[in] hnode    hash table entry
- *
- * \retval             struct pool_desc for the specified \a hnode
- */
-static void *pool_hashobject(struct hlist_node *hnode)
-{
-       return hlist_entry(hnode, struct pool_desc, pool_hash);
-}
-
-static void pool_hashrefcount_get(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-       struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-       pool_getref(pool);
-}
-
-static void pool_hashrefcount_put_locked(struct cfs_hash *hs,
-                                        struct hlist_node *hnode)
-{
-       struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-       pool_putref_locked(pool);
-}
-
-struct cfs_hash_ops pool_hash_operations = {
-       .hs_hash        = pool_hashfn,
-       .hs_key         = pool_key,
-       .hs_keycmp      = pool_hashkey_keycmp,
-       .hs_object      = pool_hashobject,
-       .hs_get         = pool_hashrefcount_get,
-       .hs_put_locked  = pool_hashrefcount_put_locked,
+static const struct rhashtable_params pools_hash_params = {
+       .key_len        = 1, /* actually variable */
+       .key_offset     = offsetof(struct pool_desc, pool_name),
+       .head_offset    = offsetof(struct pool_desc, pool_hash),
+       .hashfn         = pool_hashfh,
+       .obj_cmpfn      = pool_cmpfn,
+       .automatic_shrinking = true,
 };
 
 /*
 };
 
 /*
@@ -626,6 +535,23 @@ int lod_tgt_pool_free(struct lu_tgt_pool *op)
        RETURN(0);
 }
 
        RETURN(0);
 }
 
+static void pools_hash_exit(void *vpool, void *data)
+{
+       struct pool_desc *pool = vpool;
+
+       lod_pool_putref(pool);
+}
+
+int lod_pool_hash_init(struct rhashtable *tbl)
+{
+       return rhashtable_init(tbl, &pools_hash_params);
+}
+
+void lod_pool_hash_destroy(struct rhashtable *tbl)
+{
+       rhashtable_free_and_destroy(tbl, pools_hash_exit, NULL);
+}
+
 /**
  * Allocate a new pool for the specified device.
  *
 /**
  * Allocate a new pool for the specified device.
  *
@@ -650,7 +576,8 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
        if (strlen(poolname) > LOV_MAXPOOLNAME)
                RETURN(-ENAMETOOLONG);
 
        if (strlen(poolname) > LOV_MAXPOOLNAME)
                RETURN(-ENAMETOOLONG);
 
-       OBD_ALLOC_PTR(new_pool);
+       /* OBD_ALLOC_* doesn't work with direct kfree_rcu use */
+       new_pool = kmalloc(sizeof(*new_pool), GFP_KERNEL);
        if (new_pool == NULL)
                RETURN(-ENOMEM);
 
        if (new_pool == NULL)
                RETURN(-ENOMEM);
 
@@ -667,8 +594,6 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
        if (rc)
                GOTO(out_free_pool_obds, rc);
 
        if (rc)
                GOTO(out_free_pool_obds, rc);
 
-       INIT_HLIST_NODE(&new_pool->pool_hash);
-
 #ifdef CONFIG_PROC_FS
        pool_getref(new_pool);
        new_pool->pool_proc_entry = lprocfs_add_simple(lod->lod_pool_proc_entry,
 #ifdef CONFIG_PROC_FS
        pool_getref(new_pool);
        new_pool->pool_proc_entry = lprocfs_add_simple(lod->lod_pool_proc_entry,
@@ -689,11 +614,19 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
        lod->lod_pool_count++;
        spin_unlock(&obd->obd_dev_lock);
 
        lod->lod_pool_count++;
        spin_unlock(&obd->obd_dev_lock);
 
-       /* add to find only when it fully ready  */
-       rc = cfs_hash_add_unique(lod->lod_pools_hash_body, poolname,
-                                &new_pool->pool_hash);
-       if (rc)
-               GOTO(out_err, rc = -EEXIST);
+       /* Add to hash table only when it is fully ready. */
+       rc = rhashtable_lookup_insert_fast(&lod->lod_pools_hash_body,
+                                          &new_pool->pool_hash,
+                                          pools_hash_params);
+       if (rc) {
+               if (rc != -EEXIST)
+                       /*
+                        * Hide -E2BIG and -EBUSY which
+                        * are not helpful.
+                        */
+                       rc = -ENOMEM;
+               GOTO(out_err, rc);
+       }
 
        CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
                        poolname, lod->lod_pool_count);
 
        CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
                        poolname, lod->lod_pool_count);
@@ -731,8 +664,15 @@ int lod_pool_del(struct obd_device *obd, char *poolname)
        ENTRY;
 
        /* lookup and kill hash reference */
        ENTRY;
 
        /* lookup and kill hash reference */
-       pool = cfs_hash_del_key(lod->lod_pools_hash_body, poolname);
-       if (pool == NULL)
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && rhashtable_remove_fast(&lod->lod_pools_hash_body,
+                                          &pool->pool_hash,
+                                          pools_hash_params) != 0)
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
                RETURN(-ENOENT);
 
        if (pool->pool_proc_entry != NULL) {
                RETURN(-ENOENT);
 
        if (pool->pool_proc_entry != NULL) {
@@ -773,8 +713,13 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
        int rc = -EINVAL;
        ENTRY;
 
        int rc = -EINVAL;
        ENTRY;
 
-       pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
-       if (pool == NULL)
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
                RETURN(-ENOENT);
 
        obd_str2uuid(&ost_uuid, ostname);
                RETURN(-ENOENT);
 
        obd_str2uuid(&ost_uuid, ostname);
@@ -831,8 +776,14 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
        int rc = -EINVAL;
        ENTRY;
 
        int rc = -EINVAL;
        ENTRY;
 
-       pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
-       if (pool == NULL)
+       /* lookup and kill hash reference */
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
                RETURN(-ENOENT);
 
        obd_str2uuid(&ost_uuid, ostname);
                RETURN(-ENOENT);
 
        obd_str2uuid(&ost_uuid, ostname);
@@ -913,10 +864,15 @@ struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname)
 
        pool = NULL;
        if (poolname[0] != '\0') {
 
        pool = NULL;
        if (poolname[0] != '\0') {
-               pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
-               if (pool == NULL)
-                       CDEBUG(D_CONFIG, "%s: request for an unknown pool ("
-                              LOV_POOLNAMEF")\n",
+               rcu_read_lock();
+               pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
+                                        pools_hash_params);
+               if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+                       pool = NULL;
+               rcu_read_unlock();
+               if (!pool)
+                       CDEBUG(D_CONFIG,
+                              "%s: request for an unknown pool (" LOV_POOLNAMEF ")\n",
                               lod->lod_child_exp->exp_obd->obd_name, poolname);
                if (pool != NULL && pool_tgt_count(pool) == 0) {
                        CDEBUG(D_CONFIG, "%s: request for an empty pool ("
                               lod->lod_child_exp->exp_obd->obd_name, poolname);
                if (pool != NULL && pool_tgt_count(pool) == 0) {
                        CDEBUG(D_CONFIG, "%s: request for an empty pool ("
index 2307d10..ae80402 100644 (file)
@@ -236,12 +236,16 @@ struct pool_desc {
        char                     pool_name[LOV_MAXPOOLNAME + 1];
        struct lu_tgt_pool       pool_obds;
        atomic_t                 pool_refcount;
        char                     pool_name[LOV_MAXPOOLNAME + 1];
        struct lu_tgt_pool       pool_obds;
        atomic_t                 pool_refcount;
-       struct hlist_node        pool_hash;     /* access by poolname */
+       struct rhash_head        pool_hash;     /* access by poolname */
        struct list_head         pool_list;     /* serial access */
        struct list_head         pool_list;     /* serial access */
+       struct rcu_head          pool_rcu;
        struct proc_dir_entry   *pool_proc_entry;
        struct obd_device       *pool_lobd;     /* owner */
 };
 
        struct proc_dir_entry   *pool_proc_entry;
        struct obd_device       *pool_lobd;     /* owner */
 };
 
+int lov_pool_hash_init(struct rhashtable *tbl);
+void lov_pool_hash_destroy(struct rhashtable *tbl);
+
 struct lov_request {
        struct obd_info          rq_oi;
        struct lov_request_set  *rq_rqset;
 struct lov_request {
        struct obd_info          rq_oi;
        struct lov_request_set  *rq_rqset;
@@ -331,8 +335,6 @@ extern struct lu_device_type lov_device_type;
 
 #define LOV_MDC_TGT_MAX 256
 
 
 #define LOV_MDC_TGT_MAX 256
 
-/* pools */
-extern struct cfs_hash_ops pool_hash_operations;
 /* lu_tgt_pool methods */
 int lov_ost_pool_init(struct lu_tgt_pool *op, unsigned int count);
 int lov_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count);
 /* lu_tgt_pool methods */
 int lov_ost_pool_init(struct lu_tgt_pool *op, unsigned int count);
 int lov_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count);
index 2bf865f..a66b697 100644 (file)
@@ -759,15 +759,12 @@ int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 
        init_rwsem(&lov->lov_notify_lock);
 
 
        init_rwsem(&lov->lov_notify_lock);
 
-        lov->lov_pools_hash_body = cfs_hash_create("POOLS", HASH_POOLS_CUR_BITS,
-                                                   HASH_POOLS_MAX_BITS,
-                                                   HASH_POOLS_BKT_BITS, 0,
-                                                   CFS_HASH_MIN_THETA,
-                                                   CFS_HASH_MAX_THETA,
-                                                   &pool_hash_operations,
-                                                   CFS_HASH_DEFAULT);
        INIT_LIST_HEAD(&lov->lov_pool_list);
         lov->lov_pool_count = 0;
        INIT_LIST_HEAD(&lov->lov_pool_list);
         lov->lov_pool_count = 0;
+       rc = lov_pool_hash_init(&lov->lov_pools_hash_body);
+       if (rc)
+               GOTO(out, rc);
+
         rc = lov_ost_pool_init(&lov->lov_packed, 0);
         if (rc)
                GOTO(out, rc);
         rc = lov_ost_pool_init(&lov->lov_packed, 0);
         if (rc)
                GOTO(out, rc);
@@ -804,7 +801,7 @@ static int lov_cleanup(struct obd_device *obd)
                /* coverity[overrun-buffer-val] */
                 lov_pool_del(obd, pool->pool_name);
         }
                /* coverity[overrun-buffer-val] */
                 lov_pool_del(obd, pool->pool_name);
         }
-        cfs_hash_putref(lov->lov_pools_hash_body);
+       lov_pool_hash_destroy(&lov->lov_pools_hash_body);
         lov_ost_pool_free(&lov->lov_packed);
 
        lprocfs_obd_cleanup(obd);
         lov_ost_pool_free(&lov->lov_packed);
 
        lprocfs_obd_cleanup(obd);
index 8f69478..78bc92e 100644 (file)
@@ -41,6 +41,7 @@
 #define DEBUG_SUBSYSTEM S_LOV
 
 #include <libcfs/libcfs.h>
 #define DEBUG_SUBSYSTEM S_LOV
 
 #include <libcfs/libcfs.h>
+#include <libcfs/linux/linux-hash.h>
 
 #include <obd.h>
 #include "lov_internal.h"
 
 #include <obd.h>
 #include "lov_internal.h"
 #define pool_tgt(_p, _i) \
                _p->pool_lobd->u.lov.lov_tgts[_p->pool_obds.op_array[_i]]
 
 #define pool_tgt(_p, _i) \
                _p->pool_lobd->u.lov.lov_tgts[_p->pool_obds.op_array[_i]]
 
+static u32 pool_hashfh(const void *data, u32 len, u32 seed)
+{
+       const char *pool_name = data;
+
+       return hashlen_hash(cfs_hashlen_string((void *)(unsigned long)seed,
+                                              pool_name));
+}
+
+static int pool_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+       const struct pool_desc *pool = obj;
+       const char *pool_name = arg->key;
+
+       return strcmp(pool_name, pool->pool_name);
+}
+
+static const struct rhashtable_params pools_hash_params = {
+       .key_len        = 1, /* actually variable */
+       .key_offset     = offsetof(struct pool_desc, pool_name),
+       .head_offset    = offsetof(struct pool_desc, pool_hash),
+       .hashfn         = pool_hashfh,
+       .obj_cmpfn      = pool_cmpfn,
+       .automatic_shrinking = true,
+};
+
 static void lov_pool_getref(struct pool_desc *pool)
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
 static void lov_pool_getref(struct pool_desc *pool)
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
@@ -58,98 +84,14 @@ static void lov_pool_putref(struct pool_desc *pool)
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
        if (atomic_dec_and_test(&pool->pool_refcount)) {
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
        if (atomic_dec_and_test(&pool->pool_refcount)) {
-               LASSERT(hlist_unhashed(&pool->pool_hash));
                LASSERT(list_empty(&pool->pool_list));
                LASSERT(pool->pool_proc_entry == NULL);
                lov_ost_pool_free(&(pool->pool_obds));
                LASSERT(list_empty(&pool->pool_list));
                LASSERT(pool->pool_proc_entry == NULL);
                lov_ost_pool_free(&(pool->pool_obds));
-               OBD_FREE_PTR(pool);
+               kfree_rcu(pool, pool_rcu);
                EXIT;
        }
 }
 
                EXIT;
        }
 }
 
-static void lov_pool_putref_locked(struct pool_desc *pool)
-{
-       CDEBUG(D_INFO, "pool %p\n", pool);
-       LASSERT(atomic_read(&pool->pool_refcount) > 1);
-
-       atomic_dec(&pool->pool_refcount);
-}
-
-/*
- * hash function using a Rotating Hash algorithm
- * Knuth, D. The Art of Computer Programming,
- * Volume 3: Sorting and Searching,
- * Chapter 6.4.
- * Addison Wesley, 1973
- */
-static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key,
-                        unsigned mask)
-{
-        int i;
-        __u32 result;
-        char *poolname;
-
-        result = 0;
-        poolname = (char *)key;
-        for (i = 0; i < LOV_MAXPOOLNAME; i++) {
-                if (poolname[i] == '\0')
-                        break;
-                result = (result << 4)^(result >> 28) ^  poolname[i];
-        }
-        return (result % mask);
-}
-
-static void *pool_key(struct hlist_node *hnode)
-{
-        struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-        return (pool->pool_name);
-}
-
-static int
-pool_hashkey_keycmp(const void *key, struct hlist_node *compared_hnode)
-{
-        char *pool_name;
-        struct pool_desc *pool;
-
-        pool_name = (char *)key;
-       pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
-        return !strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
-}
-
-static void *pool_hashobject(struct hlist_node *hnode)
-{
-       return hlist_entry(hnode, struct pool_desc, pool_hash);
-}
-
-static void pool_hashrefcount_get(struct cfs_hash *hs, struct hlist_node *hnode)
-{
-        struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-        lov_pool_getref(pool);
-}
-
-static void pool_hashrefcount_put_locked(struct cfs_hash *hs,
-                                        struct hlist_node *hnode)
-{
-        struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-        lov_pool_putref_locked(pool);
-}
-
-struct cfs_hash_ops pool_hash_operations = {
-        .hs_hash        = pool_hashfn,
-        .hs_key         = pool_key,
-        .hs_keycmp      = pool_hashkey_keycmp,
-        .hs_object      = pool_hashobject,
-        .hs_get         = pool_hashrefcount_get,
-        .hs_put_locked  = pool_hashrefcount_put_locked,
-
-};
-
 #ifdef CONFIG_PROC_FS
 /*
  * pool /proc seq_file methods
 #ifdef CONFIG_PROC_FS
 /*
  * pool /proc seq_file methods
@@ -421,6 +363,22 @@ int lov_ost_pool_free(struct lu_tgt_pool *op)
        RETURN(0);
 }
 
        RETURN(0);
 }
 
+static void pools_hash_exit(void *vpool, void *data)
+{
+       struct pool_desc *pool = vpool;
+
+       lov_pool_putref(pool);
+}
+
+int lov_pool_hash_init(struct rhashtable *tbl)
+{
+       return rhashtable_init(tbl, &pools_hash_params);
+}
+
+void lov_pool_hash_destroy(struct rhashtable *tbl)
+{
+       rhashtable_free_and_destroy(tbl, pools_hash_exit, NULL);
+}
 
 int lov_pool_new(struct obd_device *obd, char *poolname)
 {
 
 int lov_pool_new(struct obd_device *obd, char *poolname)
 {
@@ -434,7 +392,8 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
         if (strlen(poolname) > LOV_MAXPOOLNAME)
                 RETURN(-ENAMETOOLONG);
 
         if (strlen(poolname) > LOV_MAXPOOLNAME)
                 RETURN(-ENAMETOOLONG);
 
-        OBD_ALLOC_PTR(new_pool);
+       /* OBD_ALLOC doesn't work with direct use of kfree_rcu */
+       new_pool = kmalloc(sizeof(*new_pool), GFP_KERNEL);
         if (new_pool == NULL)
                 RETURN(-ENOMEM);
 
         if (new_pool == NULL)
                 RETURN(-ENOMEM);
 
@@ -448,8 +407,6 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
        if (rc)
                GOTO(out_err, rc);
 
        if (rc)
                GOTO(out_err, rc);
 
-       INIT_HLIST_NODE(&new_pool->pool_hash);
-
 #ifdef CONFIG_PROC_FS
        /* get ref for /proc file */
         lov_pool_getref(new_pool);
 #ifdef CONFIG_PROC_FS
        /* get ref for /proc file */
         lov_pool_getref(new_pool);
@@ -470,11 +427,19 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
        lov->lov_pool_count++;
        spin_unlock(&obd->obd_dev_lock);
 
        lov->lov_pool_count++;
        spin_unlock(&obd->obd_dev_lock);
 
-        /* add to find only when it fully ready  */
-        rc = cfs_hash_add_unique(lov->lov_pools_hash_body, poolname,
-                                 &new_pool->pool_hash);
-        if (rc)
-                GOTO(out_err, rc = -EEXIST);
+       /* Add to hash table only when it is fully ready. */
+       rc = rhashtable_lookup_insert_fast(&lov->lov_pools_hash_body,
+                                          &new_pool->pool_hash,
+                                          pools_hash_params);
+       if (rc) {
+               if (rc != -EEXIST)
+                       /*
+                        * Hide -E2BIG and -EBUSY which
+                        * are not helpful.
+                        */
+                       rc = -ENOMEM;
+               GOTO(out_err, rc);
+       }
 
         CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
                poolname, lov->lov_pool_count);
 
         CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
                poolname, lov->lov_pool_count);
@@ -501,10 +466,17 @@ int lov_pool_del(struct obd_device *obd, char *poolname)
 
         lov = &(obd->u.lov);
 
 
         lov = &(obd->u.lov);
 
-        /* lookup and kill hash reference */
-        pool = cfs_hash_del_key(lov->lov_pools_hash_body, poolname);
-        if (pool == NULL)
-                RETURN(-ENOENT);
+       /* lookup and kill hash reference */
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && rhashtable_remove_fast(&lov->lov_pools_hash_body,
+                                          &pool->pool_hash,
+                                          pools_hash_params) != 0)
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
+               RETURN(-ENOENT);
 
         if (pool->pool_proc_entry != NULL) {
                 CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
 
         if (pool->pool_proc_entry != NULL) {
                 CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
@@ -535,9 +507,14 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 
         lov = &(obd->u.lov);
 
 
         lov = &(obd->u.lov);
 
-        pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
-        if (pool == NULL)
-                RETURN(-ENOENT);
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
+               RETURN(-ENOENT);
 
         obd_str2uuid(&ost_uuid, ostname);
 
 
         obd_str2uuid(&ost_uuid, ostname);
 
@@ -581,9 +558,15 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 
         lov = &(obd->u.lov);
 
 
         lov = &(obd->u.lov);
 
-        pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
-        if (pool == NULL)
-                RETURN(-ENOENT);
+       /* lookup and kill hash reference */
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
+               RETURN(-ENOENT);
 
         obd_str2uuid(&ost_uuid, ostname);
 
 
         obd_str2uuid(&ost_uuid, ostname);