Whamcloud - gitweb
LU-4381 lov: to not hold sub locks at initialization
[fs/lustre-release.git] / lustre / lov / lov_pool.c
index 764a494..c017ab4 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -38,6 +38,8 @@
  * OST pool methods
  *
  * Author: Jacques-Charles LAFOUCRIERE <jc.lafoucriere@cea.fr>
+ * Author: Alex Lyashkov <Alexey.Lyashkov@Sun.COM>
+ * Author: Nathaniel Rutman <Nathan.Rutman@Sun.COM>
  */
 
 #define DEBUG_SUBSYSTEM S_LOV
 #include <obd.h>
 #include "lov_internal.h"
 
-static void lov_pool_getref(struct pool_desc *pool) {
-        atomic_inc(&pool->pool_refcount);
+#define pool_tgt(_p, _i) \
+               _p->pool_lobd->u.lov.lov_tgts[_p->pool_obds.op_array[_i]]
+
+static void lov_pool_getref(struct pool_desc *pool)
+{
+        CDEBUG(D_INFO, "pool %p\n", pool);
+        cfs_atomic_inc(&pool->pool_refcount);
 }
 
-static void lov_pool_putref(struct pool_desc *pool) {
-        if (atomic_dec_and_test(&pool->pool_refcount)) {
-                lov_ost_pool_free(&(pool->pool_rr.lqr_pool));
+void lov_pool_putref(struct pool_desc *pool) 
+{
+        CDEBUG(D_INFO, "pool %p\n", pool);
+        if (cfs_atomic_dec_and_test(&pool->pool_refcount)) {
+                LASSERT(cfs_hlist_unhashed(&pool->pool_hash));
+                LASSERT(cfs_list_empty(&pool->pool_list));
+                LASSERT(pool->pool_proc_entry == NULL);
                 lov_ost_pool_free(&(pool->pool_obds));
                 OBD_FREE_PTR(pool);
+                EXIT;
         }
 }
 
+void lov_pool_putref_locked(struct pool_desc *pool)
+{
+        CDEBUG(D_INFO, "pool %p\n", pool);
+        LASSERT(cfs_atomic_read(&pool->pool_refcount) > 1);
+
+        cfs_atomic_dec(&pool->pool_refcount);
+}
 
 /*
  * hash function using a Rotating Hash algorithm
@@ -71,7 +90,7 @@ static void lov_pool_putref(struct pool_desc *pool) {
  * Chapter 6.4.
  * Addison Wesley, 1973
  */
-static __u32 pool_hashfn(lustre_hash_t *hash_body, void *key, unsigned mask)
+static __u32 pool_hashfn(cfs_hash_t *hash_body, const void *key, unsigned mask)
 {
         int i;
         __u32 result;
@@ -87,50 +106,54 @@ static __u32 pool_hashfn(lustre_hash_t *hash_body, void *key, unsigned mask)
         return (result % mask);
 }
 
-static void *pool_key(struct hlist_node *hnode)
+static void *pool_key(cfs_hlist_node_t *hnode)
 {
         struct pool_desc *pool;
 
-        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+        pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
         return (pool->pool_name);
 }
 
-static int pool_hashkey_compare(void *key, struct hlist_node *compared_hnode)
+static int pool_hashkey_keycmp(const void *key, cfs_hlist_node_t *compared_hnode)
 {
         char *pool_name;
         struct pool_desc *pool;
-        int rc;
 
         pool_name = (char *)key;
-        pool = hlist_entry(compared_hnode, struct pool_desc, pool_hash);
-        rc = strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
-        return (!rc);
+        pool = cfs_hlist_entry(compared_hnode, struct pool_desc, pool_hash);
+        return !strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
+}
+
+static void *pool_hashobject(cfs_hlist_node_t *hnode)
+{
+        return cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
 }
 
-static void *pool_hashrefcount_get(struct hlist_node *hnode)
+static void pool_hashrefcount_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
 {
         struct pool_desc *pool;
 
-        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
+        pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
         lov_pool_getref(pool);
-        return (pool);
 }
 
-static void *pool_hashrefcount_put(struct hlist_node *hnode)
+static void pool_hashrefcount_put_locked(cfs_hash_t *hs,
+                                         cfs_hlist_node_t *hnode)
 {
         struct pool_desc *pool;
 
-        pool = hlist_entry(hnode, struct pool_desc, pool_hash);
-        lov_pool_putref(pool);
-        return (pool);
+        pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
+        lov_pool_putref_locked(pool);
 }
 
-lustre_hash_ops_t pool_hash_operations = {
-        .lh_hash        = pool_hashfn,
-        .lh_key         = pool_key,
-        .lh_compare     = pool_hashkey_compare,
-        .lh_get         = pool_hashrefcount_get,
-        .lh_put         = pool_hashrefcount_put,
+cfs_hash_ops_t pool_hash_operations = {
+        .hs_hash        = pool_hashfn,
+        .hs_key         = pool_key,
+        .hs_keycmp      = pool_hashkey_keycmp,
+        .hs_object      = pool_hashobject,
+        .hs_get         = pool_hashrefcount_get,
+        .hs_put_locked  = pool_hashrefcount_put_locked,
+
 };
 
 #ifdef LPROCFS
@@ -164,14 +187,14 @@ static void *pool_proc_next(struct seq_file *s, void *v, loff_t *pos)
 
         /* iterate to find a non empty entry */
         prev_idx = iter->idx;
-        down_read(&pool_tgt_rw_sem(iter->pool));
+       down_read(&pool_tgt_rw_sem(iter->pool));
         iter->idx++;
         if (iter->idx == pool_tgt_count(iter->pool)) {
                 iter->idx = prev_idx; /* we stay on the last entry */
-                up_read(&pool_tgt_rw_sem(iter->pool));
+               up_read(&pool_tgt_rw_sem(iter->pool));
                 return NULL;
         }
-        up_read(&pool_tgt_rw_sem(iter->pool));
+       up_read(&pool_tgt_rw_sem(iter->pool));
         (*pos)++;
         /* return != NULL to continue */
         return iter;
@@ -241,9 +264,9 @@ static int pool_proc_show(struct seq_file *s, void *v)
         LASSERT(iter->pool != NULL);
         LASSERT(iter->idx <= pool_tgt_count(iter->pool));
 
-        down_read(&pool_tgt_rw_sem(iter->pool));
+       down_read(&pool_tgt_rw_sem(iter->pool));
         tgt = pool_tgt(iter->pool, iter->idx);
-        up_read(&pool_tgt_rw_sem(iter->pool));
+       up_read(&pool_tgt_rw_sem(iter->pool));
         if (tgt)
                 seq_printf(s, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
 
@@ -264,7 +287,7 @@ static int pool_proc_open(struct inode *inode, struct file *file)
         rc = seq_open(file, &pool_proc_ops);
         if (!rc) {
                 struct seq_file *s = file->private_data;
-                s->private = PROC_I(inode)->pde->data;
+               s->private = PDE_DATA(inode);
         }
         return rc;
 }
@@ -285,7 +308,7 @@ void lov_dump_pool(int level, struct pool_desc *pool)
 
         CDEBUG(level, "pool "LOV_POOLNAMEF" has %d members\n",
                pool->pool_name, pool->pool_obds.op_count);
-        down_read(&pool_tgt_rw_sem(pool));
+       down_read(&pool_tgt_rw_sem(pool));
 
         for (i = 0; i < pool_tgt_count(pool) ; i++) {
                 if (!pool_tgt(pool, i) || !(pool_tgt(pool, i))->ltd_exp)
@@ -295,24 +318,27 @@ void lov_dump_pool(int level, struct pool_desc *pool)
                        obd_uuid2str(&((pool_tgt(pool, i))->ltd_uuid)));
         }
 
-        up_read(&pool_tgt_rw_sem(pool));
+       up_read(&pool_tgt_rw_sem(pool));
         lov_pool_putref(pool);
 }
 
 #define LOV_POOL_INIT_COUNT 2
 int lov_ost_pool_init(struct ost_pool *op, unsigned int count)
 {
+        ENTRY;
+
         if (count == 0)
                 count = LOV_POOL_INIT_COUNT;
         op->op_array = NULL;
         op->op_count = 0;
-        init_rwsem(&op->op_rw_sem);
+       init_rwsem(&op->op_rw_sem);
         op->op_size = count;
         OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
         if (op->op_array == NULL) {
                 op->op_size = 0;
-                return -ENOMEM;
+                RETURN(-ENOMEM);
         }
+        EXIT;
         return 0;
 }
 
@@ -345,7 +371,7 @@ int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
         int rc = 0, i;
         ENTRY;
 
-        down_write(&op->op_rw_sem);
+       down_write(&op->op_rw_sem);
 
         rc = lov_ost_pool_extend(op, min_count);
         if (rc)
@@ -359,45 +385,50 @@ int lov_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
         /* ost not found we add it */
         op->op_array[op->op_count] = idx;
         op->op_count++;
+        EXIT;
 out:
-        up_write(&op->op_rw_sem);
+       up_write(&op->op_rw_sem);
         return rc;
 }
 
 int lov_ost_pool_remove(struct ost_pool *op, __u32 idx)
 {
         int i;
+        ENTRY;
 
-        down_write(&op->op_rw_sem);
+       down_write(&op->op_rw_sem);
 
         for (i = 0; i < op->op_count; i++) {
                 if (op->op_array[i] == idx) {
                         memmove(&op->op_array[i], &op->op_array[i + 1],
                                 (op->op_count - i - 1) * sizeof(op->op_array[0]));
                         op->op_count--;
-                        up_write(&op->op_rw_sem);
+                       up_write(&op->op_rw_sem);
+                        EXIT;
                         return 0;
                 }
         }
 
-        up_write(&op->op_rw_sem);
-        return -EINVAL;
+       up_write(&op->op_rw_sem);
+        RETURN(-EINVAL);
 }
 
 int lov_ost_pool_free(struct ost_pool *op)
 {
+        ENTRY;
+
         if (op->op_size == 0)
-                return 0;
+                RETURN(0);
 
-        down_write(&op->op_rw_sem);
+       down_write(&op->op_rw_sem);
 
         OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
         op->op_array = NULL;
         op->op_count = 0;
         op->op_size = 0;
 
-        up_write(&op->op_rw_sem);
-        return 0;
+       up_write(&op->op_rw_sem);
+        RETURN(0);
 }
 
 
@@ -419,61 +450,62 @@ int lov_pool_new(struct obd_device *obd, char *poolname)
 
         strncpy(new_pool->pool_name, poolname, LOV_MAXPOOLNAME);
         new_pool->pool_name[LOV_MAXPOOLNAME] = '\0';
-        new_pool->pool_lov = lov;
+       new_pool->pool_lobd = obd;
         /* ref count init to 1 because when created a pool is always used
          * up to deletion
          */
-        atomic_set(&new_pool->pool_refcount, 1);
+        cfs_atomic_set(&new_pool->pool_refcount, 1);
         rc = lov_ost_pool_init(&new_pool->pool_obds, 0);
         if (rc)
                GOTO(out_err, rc);
 
-        memset(&(new_pool->pool_rr), 0, sizeof(struct lov_qos_rr));
-        rc = lov_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
-        if (rc) {
-                lov_ost_pool_free(&new_pool->pool_obds);
-                GOTO(out_err, rc);
-        }
-
-        INIT_HLIST_NODE(&new_pool->pool_hash);
-        rc = lustre_hash_add_unique(lov->lov_pools_hash_body, poolname,
-                                    &new_pool->pool_hash);
-        if (rc) {
-                lov_ost_pool_free(&new_pool->pool_rr.lqr_pool);
-                lov_ost_pool_free(&new_pool->pool_obds);
-                GOTO(out_err, rc = -EEXIST);
-        }
-
-        spin_lock(&obd->obd_dev_lock);
-        list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
-        lov->lov_pool_count++;
-
-        spin_unlock(&obd->obd_dev_lock);
-
-        CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
-               poolname, lov->lov_pool_count);
+        CFS_INIT_HLIST_NODE(&new_pool->pool_hash);
 
 #ifdef LPROCFS
-        /* ifdef needed for liblustre */
+        /* we need this assert seq_file is not implementated for liblustre */
         /* get ref for /proc file */
         lov_pool_getref(new_pool);
         new_pool->pool_proc_entry = lprocfs_add_simple(lov->lov_pool_proc_entry,
-                                                       poolname, NULL, NULL,
-                                                       new_pool,
-                                                       &pool_proc_operations);
+                                                       poolname,
+#ifndef HAVE_ONLY_PROCFS_SEQ
+                                                       NULL, NULL,
 #endif
-
+                                                       new_pool,
+                                                       &pool_proc_operations);
         if (IS_ERR(new_pool->pool_proc_entry)) {
                 CWARN("Cannot add proc pool entry "LOV_POOLNAMEF"\n", poolname);
                 new_pool->pool_proc_entry = NULL;
                 lov_pool_putref(new_pool);
         }
+        CDEBUG(D_INFO, "pool %p - proc %p\n", new_pool, new_pool->pool_proc_entry);
+#endif
+
+       spin_lock(&obd->obd_dev_lock);
+       cfs_list_add_tail(&new_pool->pool_list, &lov->lov_pool_list);
+       lov->lov_pool_count++;
+       spin_unlock(&obd->obd_dev_lock);
+
+        /* add to find only when it fully ready  */
+        rc = cfs_hash_add_unique(lov->lov_pools_hash_body, poolname,
+                                 &new_pool->pool_hash);
+        if (rc)
+                GOTO(out_err, rc = -EEXIST);
+
+        CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
+               poolname, lov->lov_pool_count);
 
         RETURN(0);
 
 out_err:
-        OBD_FREE_PTR(new_pool);
-        return rc;
+       spin_lock(&obd->obd_dev_lock);
+       cfs_list_del_init(&new_pool->pool_list);
+       lov->lov_pool_count--;
+       spin_unlock(&obd->obd_dev_lock);
+        lprocfs_remove(&new_pool->pool_proc_entry);
+       lov_ost_pool_free(&new_pool->pool_obds);
+       OBD_FREE_PTR(new_pool);
+
+       return rc;
 }
 
 int lov_pool_del(struct obd_device *obd, char *poolname)
@@ -484,36 +516,26 @@ int lov_pool_del(struct obd_device *obd, char *poolname)
 
         lov = &(obd->u.lov);
 
-        spin_lock(&obd->obd_dev_lock);
-
-        pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
-        if (pool == NULL) {
-                spin_unlock(&obd->obd_dev_lock);
+        /* lookup and kill hash reference */
+        pool = cfs_hash_del_key(lov->lov_pools_hash_body, poolname);
+        if (pool == NULL)
                 RETURN(-ENOENT);
-        }
 
-#ifdef LPROCFS
         if (pool->pool_proc_entry != NULL) {
-                remove_proc_entry(pool->pool_proc_entry->name,
-                                  pool->pool_proc_entry->parent);
-                /* remove ref for /proc file */
+                CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
+                lprocfs_remove(&pool->pool_proc_entry);
                 lov_pool_putref(pool);
         }
-#endif
 
-        lustre_hash_del_key(lov->lov_pools_hash_body, poolname);
-        list_del_init(&pool->pool_list);
+       spin_lock(&obd->obd_dev_lock);
+       cfs_list_del_init(&pool->pool_list);
+       lov->lov_pool_count--;
+       spin_unlock(&obd->obd_dev_lock);
 
-        lov->lov_pool_count--;
-        lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
-        spin_unlock(&obd->obd_dev_lock);
+       /* release last reference */
+       lov_pool_putref(pool);
 
-        /* remove ref got when pool was created in memory
-         * pool will be freed when refount will reach 0
-         */
-        lov_pool_putref(pool);
-
-        RETURN(0);
+       RETURN(0);
 }
 
 
@@ -522,13 +544,13 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
         struct obd_uuid ost_uuid;
         struct lov_obd *lov;
         struct pool_desc *pool;
-        unsigned int i, lov_idx;
+        unsigned int lov_idx;
         int rc;
         ENTRY;
 
         lov = &(obd->u.lov);
 
-        pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
+        pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
         if (pool == NULL)
                 RETURN(-ENOENT);
 
@@ -536,35 +558,29 @@ int lov_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 
 
         /* search ost in lov array */
-        mutex_down(&lov->lov_lock);
-        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                if (!lov->lov_tgts[i])
+        obd_getref(obd);
+        for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
+                if (!lov->lov_tgts[lov_idx])
                         continue;
-                if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
+                if (obd_uuid_equals(&ost_uuid,
+                                    &(lov->lov_tgts[lov_idx]->ltd_uuid)))
                         break;
         }
-
         /* test if ost found in lov */
-        if (i == lov->desc.ld_tgt_count) {
-                mutex_up(&lov->lov_lock);
+        if (lov_idx == lov->desc.ld_tgt_count)
                 GOTO(out, rc = -EINVAL);
-        }
-        mutex_up(&lov->lov_lock);
-
-        lov_idx = i;
 
         rc = lov_ost_pool_add(&pool->pool_obds, lov_idx, lov->lov_tgt_size);
         if (rc)
                 GOTO(out, rc);
 
-        pool->pool_rr.lqr_dirty = 1;
-
         CDEBUG(D_CONFIG, "Added %s to "LOV_POOLNAMEF" as member %d\n",
                ostname, poolname,  pool_tgt_count(pool));
 
         EXIT;
 out:
-        lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
+        obd_putref(obd);
+        lov_pool_putref(pool);
         return rc;
 }
 
@@ -573,97 +589,41 @@ int lov_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
         struct obd_uuid ost_uuid;
         struct lov_obd *lov;
         struct pool_desc *pool;
-        unsigned int i, lov_idx;
+        unsigned int lov_idx;
         int rc = 0;
         ENTRY;
 
         lov = &(obd->u.lov);
 
-        spin_lock(&obd->obd_dev_lock);
-        pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
-        if (pool == NULL) {
-                spin_unlock(&obd->obd_dev_lock);
+        pool = cfs_hash_lookup(lov->lov_pools_hash_body, poolname);
+        if (pool == NULL)
                 RETURN(-ENOENT);
-        }
 
         obd_str2uuid(&ost_uuid, ostname);
 
+        obd_getref(obd);
         /* search ost in lov array, to get index */
-        for (i = 0; i < lov->desc.ld_tgt_count; i++) {
-                if (!lov->lov_tgts[i])
+        for (lov_idx = 0; lov_idx < lov->desc.ld_tgt_count; lov_idx++) {
+                if (!lov->lov_tgts[lov_idx])
                         continue;
 
-                if (obd_uuid_equals(&ost_uuid, &(lov->lov_tgts[i]->ltd_uuid)))
+                if (obd_uuid_equals(&ost_uuid,
+                                    &(lov->lov_tgts[lov_idx]->ltd_uuid)))
                         break;
         }
 
         /* test if ost found in lov */
-        if (i == lov->desc.ld_tgt_count) {
-                spin_unlock(&obd->obd_dev_lock);
+        if (lov_idx == lov->desc.ld_tgt_count)
                 GOTO(out, rc = -EINVAL);
-        }
-
-        spin_unlock(&obd->obd_dev_lock);
-
-        lov_idx = i;
 
         lov_ost_pool_remove(&pool->pool_obds, lov_idx);
 
-        pool->pool_rr.lqr_dirty = 1;
-
         CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
                poolname);
 
         EXIT;
 out:
-        lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
-        return rc;
-}
-
-int lov_check_index_in_pool(__u32 idx, struct pool_desc *pool)
-{
-        int i, rc;
-        ENTRY;
-
-        /* caller may no have a ref on pool if it got the pool
-         * without calling lov_find_pool() (e.g. go through the lov pool
-         * list)
-         */
-        lov_pool_getref(pool);
-
-        down_read(&pool_tgt_rw_sem(pool));
-
-        for (i = 0; i < pool_tgt_count(pool); i++) {
-                if (pool_tgt_array(pool)[i] == idx)
-                        GOTO(out, rc = 0);
-        }
-        rc = -ENOENT;
-        EXIT;
-out:
-        up_read(&pool_tgt_rw_sem(pool));
-
+        obd_putref(obd);
         lov_pool_putref(pool);
         return rc;
 }
-
-struct pool_desc *lov_find_pool(struct lov_obd *lov, char *poolname)
-{
-        struct pool_desc *pool;
-
-        pool = NULL;
-        if (poolname[0] != '\0') {
-                pool = lustre_hash_lookup(lov->lov_pools_hash_body, poolname);
-                if (pool == NULL)
-                        CWARN("Request for an unknown pool ("LOV_POOLNAMEF")\n",
-                              poolname);
-                if ((pool != NULL) && (pool_tgt_count(pool) == 0)) {
-                        CWARN("Request for an empty pool ("LOV_POOLNAMEF")\n",
-                               poolname);
-                        /* pool is ignored, so we remove ref on it */
-                        lh_put(lov->lov_pools_hash_body, &pool->pool_hash);
-                        pool = NULL;
-                }
-        }
-        return pool;
-}
-