Whamcloud - gitweb
LU-8130 lov: convert lo[v|d]_pool to use rhashtable
[fs/lustre-release.git] / lustre / lov / lov_pool.c
index 37be01e..78bc92e 100644 (file)
  * in the LICENSE file that accompanied this code).
  *
  * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see [sun.com URL with a
- * copy of GPLv2].
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2012, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -45,6 +41,7 @@
 #define DEBUG_SUBSYSTEM S_LOV
 
 #include <libcfs/libcfs.h>
+#include <libcfs/linux/linux-hash.h>
 
 #include <obd.h>
 #include "lov_internal.h"
 #define pool_tgt(_p, _i) \
                _p->pool_lobd->u.lov.lov_tgts[_p->pool_obds.op_array[_i]]
 
+static u32 pool_hashfh(const void *data, u32 len, u32 seed)
+{
+       const char *pool_name = data;
+
+       return hashlen_hash(cfs_hashlen_string((void *)(unsigned long)seed,
+                                              pool_name));
+}
+
+static int pool_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+       const struct pool_desc *pool = obj;
+       const char *pool_name = arg->key;
+
+       return strcmp(pool_name, pool->pool_name);
+}
+
+static const struct rhashtable_params pools_hash_params = {
+       .key_len        = 1, /* actually variable */
+       .key_offset     = offsetof(struct pool_desc, pool_name),
+       .head_offset    = offsetof(struct pool_desc, pool_hash),
+       .hashfn         = pool_hashfh,
+       .obj_cmpfn      = pool_cmpfn,
+       .automatic_shrinking = true,
+};
+
 static void lov_pool_getref(struct pool_desc *pool)
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
        atomic_inc(&pool->pool_refcount);
 }
 
-void lov_pool_putref(struct pool_desc *pool) 
+static void lov_pool_putref(struct pool_desc *pool)
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
        if (atomic_dec_and_test(&pool->pool_refcount)) {
-               LASSERT(hlist_unhashed(&pool->pool_hash));
                LASSERT(list_empty(&pool->pool_list));
                LASSERT(pool->pool_proc_entry == NULL);
                lov_ost_pool_free(&(pool->pool_obds));
-               OBD_FREE_PTR(pool);
+               kfree_rcu(pool, pool_rcu);
                EXIT;
        }
 }
 
-void lov_pool_putref_locked(struct pool_desc *pool)
-{
-       CDEBUG(D_INFO, "pool %p\n", pool);
-       LASSERT(atomic_read(&pool->pool_refcount) > 1);
-
-       atomic_dec(&pool->pool_refcount);
-}
-
-/*
- * hash function using a Rotating Hash algorithm
- * Knuth, D. The Art of Computer Programming,
- * Volume 3: Sorting and Searching,
- * Chapter 6.4.
- * Addison Wesley, 1973
- */
-static __u32 pool_hashfn(cfs_hash_t *hash_body, const void *key, unsigned mask)
-{
-        int i;
-        __u32 result;
-        char *poolname;
-
-        result = 0;
-        poolname = (char *)key;
-        for (i = 0; i < LOV_MAXPOOLNAME; i++) {
-                if (poolname[i] == '\0')
-                        break;
-                result = (result << 4)^(result >> 28) ^  poolname[i];
-        }
-        return (result % mask);
-}
-
-static void *pool_key(struct hlist_node *hnode)
-{
-        struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-        return (pool->pool_name);
-}
-
-static int
-pool_hashkey_keycmp(const void *key, struct hlist_node *compared_hnode)
-{
-        char *pool_name;
-        struct pool_desc *pool;
-
-        pool_name = (char *)key;
-       pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
-        return !strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
-}
-
-static void *pool_hashobject(struct hlist_node *hnode)
-{
-       return hlist_entry(hnode, struct pool_desc, pool_hash);
-}
-
-static void pool_hashrefcount_get(cfs_hash_t *hs, struct hlist_node *hnode)
-{
-        struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-        lov_pool_getref(pool);
-}
-
-static void pool_hashrefcount_put_locked(cfs_hash_t *hs,
-                                        struct hlist_node *hnode)
-{
-        struct pool_desc *pool;
-
-       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-        lov_pool_putref_locked(pool);
-}
-
-cfs_hash_ops_t pool_hash_operations = {
-        .hs_hash        = pool_hashfn,
-        .hs_key         = pool_key,
-        .hs_keycmp      = pool_hashkey_keycmp,
-        .hs_object      = pool_hashobject,
-        .hs_get         = pool_hashrefcount_get,
-        .hs_put_locked  = pool_hashrefcount_put_locked,
-
-};
-
-#ifdef LPROCFS
-/* ifdef needed for liblustre support */
+#ifdef CONFIG_PROC_FS
 /*
  * pool /proc seq_file methods
  */
@@ -184,14 +122,11 @@ static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
 
         /* iterate to find a non empty entry */
         prev_idx = iter->idx;
-       down_read(&pool_tgt_rw_sem(iter->pool));
         iter->idx++;
-        if (iter->idx == pool_tgt_count(iter->pool)) {
+       if (iter->idx >= pool_tgt_count(iter->pool)) {
                 iter->idx = prev_idx; /* we stay on the last entry */
-               up_read(&pool_tgt_rw_sem(iter->pool));
                 return NULL;
         }
-       up_read(&pool_tgt_rw_sem(iter->pool));
         (*pos)++;
         /* return != NULL to continue */
         return iter;
@@ -222,6 +157,7 @@ static void *pool_proc_start(struct seq_file *s, loff_t *pos)
          * we can free it at stop() */
         /* /!\ do not forget to restore it to pool before freeing it */
         s->private = iter;
+       down_read(&pool_tgt_rw_sem(pool));
         if (*pos > 0) {
                 loff_t i;
                 void *ptr;
@@ -243,13 +179,13 @@ static void pool_proc_stop(struct seq_file *s, void *v)
          * calling start() method (see seq_read() from fs/seq_file.c)
          * we have to free only if s->private is an iterator */
         if ((iter) && (iter->magic == POOL_IT_MAGIC)) {
+               up_read(&pool_tgt_rw_sem(iter->pool));
                 /* we restore s->private so next call to pool_proc_start()
                  * will work */
                 s->private = iter->pool;
                 lov_pool_putref(iter->pool);
                 OBD_FREE_PTR(iter);
         }
-        return;
 }
 
 static int pool_proc_show(struct seq_file *s, void *v)
@@ -261,9 +197,7 @@ static int pool_proc_show(struct seq_file *s, void *v)
        LASSERT(iter->pool != NULL);
        LASSERT(iter->idx <= pool_tgt_count(iter->pool));
 
-       down_read(&pool_tgt_rw_sem(iter->pool));
         tgt = pool_tgt(iter->pool, iter->idx);
-       up_read(&pool_tgt_rw_sem(iter->pool));
         if (tgt)
                 seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
 
@@ -295,7 +229,7 @@ static struct file_operations pool_proc_operations = {
         .llseek         = seq_lseek,
         .release        = seq_release,
 };
-#endif /* LPROCFS */
+#endif /* CONFIG_PROC_FS */
 
 void lov_dump_pool(int level, struct pool_desc *pool)
 {
@@ -320,50 +254,51 @@ void lov_dump_pool(int level, struct pool_desc *pool)
 }
 
 #define LOV_POOL_INIT_COUNT 2
-int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
+int lov_ost_pool_init(struct lu_tgt_pool *op, unsigned int count)
 {
-        ENTRY;
+       ENTRY;
 
-        if (count == 0)
-                count = LOV_POOL_INIT_COUNT;
-        op->op_array = NULL;
-        op->op_count = 0;
+       if (count == 0)
+               count = LOV_POOL_INIT_COUNT;
+       op->op_array = NULL;
+       op->op_count = 0;
        init_rwsem(&op->op_rw_sem);
-        op->op_size = count;
-        OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
-        if (op->op_array == NULL) {
-                op->op_size = 0;
-                RETURN(-ENOMEM);
-        }
-        EXIT;
-        return 0;
+       op->op_size = count * sizeof(op->op_array[0]);
+       OBD_ALLOC(op->op_array, op->op_size);
+       if (op->op_array == NULL) {
+               op->op_size = 0;
+               RETURN(-ENOMEM);
+       }
+       EXIT;
+       return 0;
 }
 
 /* Caller must hold write op_rwlock */
-int lov_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
+int lov_ost_pool_extend(struct lu_tgt_pool *op, unsigned int min_count)
 {
-        __u32 *new;
-        int new_size;
-
-        LASSERT(min_count != 0);
-
-        if (op->op_count < op->op_size)
-                return 0;
-
-        new_size = max(min_count, 2 * op->op_size);
-        OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
-        if (new == NULL)
-                return -ENOMEM;
-
-        /* copy old array to new one */
-        memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
-        OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
-        op->op_array = new;
-        op->op_size = new_size;
-        return 0;
+       __u32 *new;
+       __u32 new_size;
+
+       LASSERT(min_count != 0);
+
+       if (op->op_count * sizeof(op->op_array[0]) < op->op_size)
+               return 0;
+
+       new_size = max_t(__u32, min_count * sizeof(op->op_array[0]),
+                        2 * op->op_size);
+       OBD_ALLOC(new, new_size);
+       if (new == NULL)
+               return -ENOMEM;
+
+       /* copy old array to new one */
+       memcpy(new, op->op_array, op->op_size);
+       OBD_FREE(op->op_array, op->op_size);
+       op->op_array = new;
+       op->op_size = new_size;
+       return 0;
 }
 
-int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
+int lov_ost_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count)
 {
         int rc = 0, i;
         ENTRY;
@@ -388,7 +323,7 @@ out:
         return rc;
 }
 
-int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
+int lov_ost_pool_remove(struct lu_tgt_pool *op, __u32 idx)
 {
         int i;
         ENTRY;
@@ -410,24 +345,40 @@ int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
         RETURN(-EINVAL);
 }
 
-int lov_ost_pool_free(struct ost_pool *op)
+int lov_ost_pool_free(struct lu_tgt_pool *op)
 {
-        ENTRY;
+       ENTRY;
 
-        if (op->op_size == 0)
-                RETURN(0);
+       if (op->op_size == 0)
+               RETURN(0);
 
        down_write(&op->op_rw_sem);
 
-        OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
-        op->op_array = NULL;
-        op->op_count = 0;
-        op->op_size = 0;
+       OBD_FREE(op->op_array, op->op_size);
+       op->op_array = NULL;
+       op->op_count = 0;
+       op->op_size = 0;
 
        up_write(&op->op_rw_sem);
-        RETURN(0);
+       RETURN(0);
 }
 
+static void pools_hash_exit(void *vpool, void *data)
+{
+       struct pool_desc *pool = vpool;
+
+       lov_pool_putref(pool);
+}
+
+int lov_pool_hash_init(struct rhashtable *tbl)
+{
+       return rhashtable_init(tbl, &pools_hash_params);
+}
+
+void lov_pool_hash_destroy(struct rhashtable *tbl)
+{
+       rhashtable_free_and_destroy(tbl, pools_hash_exit, NULL);
+}
 
 int lov_pool_new(struct obd_device *obd, char *poolname)
 {
@@ -441,7 +392,8 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
         if (strlen(poolname) > LOV_MAXPOOLNAME)
                 RETURN(-ENAMETOOLONG);
 
-        OBD_ALLOC_PTR(new_pool);
+       /* OBD_ALLOC doesn't work with direct use of kfree_rcu */
+       new_pool = kmalloc(sizeof(*new_pool), GFP_KERNEL);
         if (new_pool == NULL)
                 RETURN(-ENOMEM);
 
@@ -455,21 +407,19 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
        if (rc)
                GOTO(out_err, rc);
 
-       INIT_HLIST_NODE(&new_pool->pool_hash);
-
-#ifdef LPROCFS
-        /* we need this assert seq_file is not implementated for liblustre */
-        /* get ref for /proc file */
+#ifdef CONFIG_PROC_FS
+       /* get ref for /proc file */
         lov_pool_getref(new_pool);
-        new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
-                                                       poolname, new_pool,
-                                                       &pool_proc_operations);
-        if (IS_ERR(new_pool->pool_proc_entry)) {
-                CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
-                new_pool->pool_proc_entry = NULL;
-                lov_pool_putref(new_pool);
-        }
-        CDEBUG(D_INFO, "pool %p - proc %p\n", new_pool, new_pool->pool_proc_entry);
+       new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
+                                                      poolname, new_pool,
+                                                      &pool_proc_operations);
+       if (IS_ERR(new_pool->pool_proc_entry)) {
+               CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
+               new_pool->pool_proc_entry = NULL;
+               lov_pool_putref(new_pool);
+       }
+       CDEBUG(D_INFO, "pool %p - proc %p\n",
+              new_pool, new_pool->pool_proc_entry);
 #endif
 
        spin_lock(&obd->obd_dev_lock);
@@ -477,11 +427,19 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
        lov->lov_pool_count++;
        spin_unlock(&obd->obd_dev_lock);
 
-        /* add to find only when it fully ready  */
-        rc = cfs_hash_add_unique(lov->lov_pools_hash_body, poolname,
-                                 &new_pool->pool_hash);
-        if (rc)
-                GOTO(out_err, rc = -EEXIST);
+       /* Add to hash table only when it is fully ready. */
+       rc = rhashtable_lookup_insert_fast(&lov->lov_pools_hash_body,
+                                          &new_pool->pool_hash,
+                                          pools_hash_params);
+       if (rc) {
+               if (rc != -EEXIST)
+                       /*
+                        * Hide -E2BIG and -EBUSY which
+                        * are not helpful.
+                        */
+                       rc = -ENOMEM;
+               GOTO(out_err, rc);
+       }
 
         CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
                poolname, lov->lov_pool_count);
@@ -508,10 +466,17 @@ int lov_pool_del(struct obd_device *obd, char *poolname)
 
         lov = &(obd->u.lov);
 
-        /* lookup and kill hash reference */
-        pool = cfs_hash_del_key(lov->lov_pools_hash_body, poolname);
-        if (pool == NULL)
-                RETURN(-ENOENT);
+       /* lookup and kill hash reference */
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && rhashtable_remove_fast(&lov->lov_pools_hash_body,
+                                          &pool->pool_hash,
+                                          pools_hash_params) != 0)
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
+               RETURN(-ENOENT);
 
         if (pool->pool_proc_entry != NULL) {
                 CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
@@ -542,15 +507,20 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 
         lov = &(obd->u.lov);
 
-        pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
-        if (pool == NULL)
-                RETURN(-ENOENT);
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
+               RETURN(-ENOENT);
 
         obd_str2uuid(&ost_uuid, ostname);
 
 
         /* search ost in lov array */
-        obd_getref(obd);
+       lov_tgts_getref(obd);
         for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
                 if (!lov->lov_tgts[lov_idx])
                         continue;
@@ -571,9 +541,10 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 
         EXIT;
 out:
-        obd_putref(obd);
-        lov_pool_putref(pool);
-        return rc;
+       lov_tgts_putref(obd);
+       lov_pool_putref(pool);
+
+       return rc;
 }
 
 int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
@@ -587,13 +558,19 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 
         lov = &(obd->u.lov);
 
-        pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
-        if (pool == NULL)
-                RETURN(-ENOENT);
+       /* lookup and kill hash reference */
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lov->lov_pools_hash_body, poolname,
+                                pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       if (!pool)
+               RETURN(-ENOENT);
 
         obd_str2uuid(&ost_uuid, ostname);
 
-        obd_getref(obd);
+       lov_tgts_getref(obd);
         /* search ost in lov array, to get index */
         for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
                 if (!lov->lov_tgts[lov_idx])
@@ -615,7 +592,8 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 
         EXIT;
 out:
-        obd_putref(obd);
-        lov_pool_putref(pool);
-        return rc;
+       lov_tgts_putref(obd);
+       lov_pool_putref(pool);
+
+       return rc;
 }