Whamcloud - gitweb
LU-12624 lod: alloc dir stripes by QoS
[fs/lustre-release.git] / lustre / lod / lod_pool.c
index 6eaa6d7..51cff07 100644 (file)
@@ -23,7 +23,7 @@
  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
  * Use is subject to license terms.
  *
- * Copyright (c) 2012, 2014 Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -96,11 +96,11 @@ void lod_pool_putref(struct pool_desc *pool)
 {
        CDEBUG(D_INFO, "pool %p\n", pool);
        if (atomic_dec_and_test(&pool->pool_refcount)) {
-               LASSERT(cfs_hlist_unhashed(&pool->pool_hash));
-               LASSERT(cfs_list_empty(&pool->pool_list));
+               LASSERT(hlist_unhashed(&pool->pool_hash));
+               LASSERT(list_empty(&pool->pool_list));
                LASSERT(pool->pool_proc_entry == NULL);
-               lod_ost_pool_free(&(pool->pool_rr.lqr_pool));
-               lod_ost_pool_free(&(pool->pool_obds));
+               lod_tgt_pool_free(&(pool->pool_rr.lqr_pool));
+               lod_tgt_pool_free(&(pool->pool_obds));
                OBD_FREE_PTR(pool);
                EXIT;
        }
@@ -132,11 +132,7 @@ static void pool_putref_locked(struct pool_desc *pool)
 /**
  * Hash the pool name for use by the cfs_hash handlers.
  *
- * hash function using a Rotating Hash algorithm
- * Knuth, D. The Art of Computer Programming,
- * Volume 3: Sorting and Searching,
- * Chapter 6.4.
- * Addison Wesley, 1973
+ * Use the standard DJB2 hash function for ASCII strings in Lustre.
  *
  * \param[in] hash_body        hash structure where this key is embedded (unused)
  * \param[in] key      key to be hashed (in this case the pool name)
@@ -144,20 +140,10 @@ static void pool_putref_locked(struct pool_desc *pool)
  *
  * \retval             computed hash value from \a key and limited by \a mask
  */
-static __u32 pool_hashfn(cfs_hash_t *hash_body, const void *key, unsigned mask)
+static __u32 pool_hashfn(struct cfs_hash *hash_body, const void *key,
+                        unsigned mask)
 {
-       int i;
-       __u32 result;
-       char *poolname;
-
-       result = 0;
-       poolname = (char *)key;
-       for (i = 0; i < LOV_MAXPOOLNAME; i++) {
-               if (poolname[i] == '\0')
-                       break;
-               result = (result << 4) ^ (result >> 28) ^ poolname[i];
-       }
-       return result % mask;
+       return cfs_hash_djb2_hash(key, strnlen(key, LOV_MAXPOOLNAME), mask);
 }
 
 /**
@@ -169,11 +155,11 @@ static __u32 pool_hashfn(cfs_hash_t *hash_body, const void *key, unsigned mask)
  *
  * \retval             char array referencing the pool name (no refcount)
  */
-static void *pool_key(cfs_hlist_node_t *hnode)
+static void *pool_key(struct hlist_node *hnode)
 {
        struct pool_desc *pool;
 
-       pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
+       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
        return pool->pool_name;
 }
 
@@ -189,14 +175,9 @@ static void *pool_key(cfs_hlist_node_t *hnode)
  * \retval             0 if \a key is the same as the key of \a compared
  * \retval             1 if \a key is different from the key of \a compared
  */
-static int pool_hashkey_keycmp(const void *key, cfs_hlist_node_t *compared_hnode)
+static int pool_hashkey_keycmp(const void *key, struct hlist_node *compared)
 {
-       char *pool_name;
-       struct pool_desc *pool;
-
-       pool_name = (char *)key;
-       pool = cfs_hlist_entry(compared_hnode, struct pool_desc, pool_hash);
-       return !strncmp(pool_name, pool->pool_name, LOV_MAXPOOLNAME);
+       return !strncmp(key, pool_key(compared), LOV_MAXPOOLNAME);
 }
 
 /**
@@ -210,29 +191,29 @@ static int pool_hashkey_keycmp(const void *key, cfs_hlist_node_t *compared_hnode
  *
  * \retval             struct pool_desc for the specified \a hnode
  */
-static void *pool_hashobject(cfs_hlist_node_t *hnode)
+static void *pool_hashobject(struct hlist_node *hnode)
 {
-       return cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
+       return hlist_entry(hnode, struct pool_desc, pool_hash);
 }
 
-static void pool_hashrefcount_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
+static void pool_hashrefcount_get(struct cfs_hash *hs, struct hlist_node *hnode)
 {
        struct pool_desc *pool;
 
-       pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
+       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
        pool_getref(pool);
 }
 
-static void pool_hashrefcount_put_locked(cfs_hash_t *hs,
-                                        cfs_hlist_node_t *hnode)
+static void pool_hashrefcount_put_locked(struct cfs_hash *hs,
+                                        struct hlist_node *hnode)
 {
        struct pool_desc *pool;
 
-       pool = cfs_hlist_entry(hnode, struct pool_desc, pool_hash);
+       pool = hlist_entry(hnode, struct pool_desc, pool_hash);
        pool_putref_locked(pool);
 }
 
-cfs_hash_ops_t pool_hash_operations = {
+struct cfs_hash_ops pool_hash_operations = {
        .hs_hash        = pool_hashfn,
        .hs_key         = pool_key,
        .hs_keycmp      = pool_hashkey_keycmp,
@@ -247,8 +228,8 @@ cfs_hash_ops_t pool_hash_operations = {
 
 #define POOL_IT_MAGIC 0xB001CEA0
 struct lod_pool_iterator {
-       int               lpi_magic;    /* POOL_IT_MAGIC */
-       int               lpi_idx;      /* from 0 to pool_tgt_size - 1 */
+       unsigned int      lpi_magic;    /* POOL_IT_MAGIC */
+       unsigned int      lpi_idx;      /* from 0 to pool_tgt_size - 1 */
        struct pool_desc *lpi_pool;
 };
 
@@ -279,16 +260,15 @@ static void *pool_proc_next(struct seq_file *seq, void *v, loff_t *pos)
        if (*pos >= pool_tgt_count(iter->lpi_pool))
                return NULL;
 
+       OBD_FAIL_TIMEOUT(OBD_FAIL_OST_LIST_ASSERT, cfs_fail_val);
+
        /* iterate to find a non empty entry */
        prev_idx = iter->lpi_idx;
-       down_read(&pool_tgt_rw_sem(iter->lpi_pool));
        iter->lpi_idx++;
-       if (iter->lpi_idx == pool_tgt_count(iter->lpi_pool)) {
+       if (iter->lpi_idx >= pool_tgt_count(iter->lpi_pool)) {
                iter->lpi_idx = prev_idx; /* we stay on the last entry */
-               up_read(&pool_tgt_rw_sem(iter->lpi_pool));
                return NULL;
        }
-       up_read(&pool_tgt_rw_sem(iter->lpi_pool));
        (*pos)++;
        /* return != NULL to continue */
        return iter;
@@ -331,6 +311,7 @@ static void *pool_proc_start(struct seq_file *seq, loff_t *pos)
        iter->lpi_idx = 0;
 
        seq->private = iter;
+       down_read(&pool_tgt_rw_sem(pool));
        if (*pos > 0) {
                loff_t i;
                void *ptr;
@@ -365,6 +346,7 @@ static void pool_proc_stop(struct seq_file *seq, void *v)
        struct lod_pool_iterator *iter = seq->private;
 
        if (iter != NULL && iter->lpi_magic == POOL_IT_MAGIC) {
+               up_read(&pool_tgt_rw_sem(iter->lpi_pool));
                seq->private = iter->lpi_pool;
                lod_pool_putref(iter->lpi_pool);
                OBD_FREE_PTR(iter);
@@ -388,9 +370,7 @@ static int pool_proc_show(struct seq_file *seq, void *v)
        LASSERT(iter->lpi_pool != NULL);
        LASSERT(iter->lpi_idx <= pool_tgt_count(iter->lpi_pool));
 
-       down_read(&pool_tgt_rw_sem(iter->lpi_pool));
        tgt = pool_tgt(iter->lpi_pool, iter->lpi_idx);
-       up_read(&pool_tgt_rw_sem(iter->lpi_pool));
        if (tgt != NULL)
                seq_printf(seq, "%s\n", obd_uuid2str(&(tgt->ltd_uuid)));
 
@@ -449,7 +429,7 @@ static struct file_operations pool_proc_operations = {
  */
 void lod_dump_pool(int level, struct pool_desc *pool)
 {
-       int i;
+       unsigned int i;
 
        pool_getref(pool);
 
@@ -484,7 +464,7 @@ void lod_dump_pool(int level, struct pool_desc *pool)
  * \retval             negative error number on failure
  */
 #define POOL_INIT_COUNT 2
-int lod_ost_pool_init(struct ost_pool *op, unsigned int count)
+int lod_tgt_pool_init(struct lu_tgt_pool *op, unsigned int count)
 {
        ENTRY;
 
@@ -493,8 +473,8 @@ int lod_ost_pool_init(struct ost_pool *op, unsigned int count)
        op->op_array = NULL;
        op->op_count = 0;
        init_rwsem(&op->op_rw_sem);
-       op->op_size = count;
-       OBD_ALLOC(op->op_array, op->op_size * sizeof(op->op_array[0]));
+       op->op_size = count * sizeof(op->op_array[0]);
+       OBD_ALLOC(op->op_array, op->op_size);
        if (op->op_array == NULL) {
                op->op_size = 0;
                RETURN(-ENOMEM);
@@ -516,24 +496,25 @@ int lod_ost_pool_init(struct ost_pool *op, unsigned int count)
  * \retval             0 on success
  * \retval             negative error number on failure.
  */
-int lod_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
+int lod_tgt_pool_extend(struct lu_tgt_pool *op, unsigned int min_count)
 {
        __u32 *new;
-       int new_size;
+       __u32 new_size;
 
        LASSERT(min_count != 0);
 
-       if (op->op_count < op->op_size)
+       if (op->op_count * sizeof(op->op_array[0]) < op->op_size)
                return 0;
 
-       new_size = max(min_count, 2 * op->op_size);
-       OBD_ALLOC(new, new_size * sizeof(op->op_array[0]));
+       new_size = max_t(__u32, min_count * sizeof(op->op_array[0]),
+                        2 * op->op_size);
+       OBD_ALLOC(new, new_size);
        if (new == NULL)
                return -ENOMEM;
 
        /* copy old array to new one */
-       memcpy(new, op->op_array, op->op_size * sizeof(op->op_array[0]));
-       OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
+       memcpy(new, op->op_array, op->op_size);
+       OBD_FREE(op->op_array, op->op_size);
        op->op_array = new;
        op->op_size = new_size;
 
@@ -553,14 +534,15 @@ int lod_ost_pool_extend(struct ost_pool *op, unsigned int min_count)
  * \retval             0 if target could be added to the pool
  * \retval             negative error if target \a idx was not added
  */
-int lod_ost_pool_add(struct ost_pool *op, __u32 idx, unsigned int min_count)
+int lod_tgt_pool_add(struct lu_tgt_pool *op, __u32 idx, unsigned int min_count)
 {
-       int rc = 0, i;
+       unsigned int i;
+       int rc = 0;
        ENTRY;
 
        down_write(&op->op_rw_sem);
 
-       rc = lod_ost_pool_extend(op, min_count);
+       rc = lod_tgt_pool_extend(op, min_count);
        if (rc)
                GOTO(out, rc);
 
@@ -592,9 +574,9 @@ out:
  * \retval             0 on success
  * \retval             negative error number on failure
  */
-int lod_ost_pool_remove(struct ost_pool *op, __u32 idx)
+int lod_tgt_pool_remove(struct lu_tgt_pool *op, __u32 idx)
 {
-       int i;
+       unsigned int i;
        ENTRY;
 
        down_write(&op->op_rw_sem);
@@ -626,7 +608,7 @@ int lod_ost_pool_remove(struct ost_pool *op, __u32 idx)
  *
  * \retval             0 on success or if pool was already freed
  */
-int lod_ost_pool_free(struct ost_pool *op)
+int lod_tgt_pool_free(struct lu_tgt_pool *op)
 {
        ENTRY;
 
@@ -635,7 +617,7 @@ int lod_ost_pool_free(struct ost_pool *op)
 
        down_write(&op->op_rw_sem);
 
-       OBD_FREE(op->op_array, op->op_size * sizeof(op->op_array[0]));
+       OBD_FREE(op->op_array, op->op_size);
        op->op_array = NULL;
        op->op_count = 0;
        op->op_size = 0;
@@ -675,25 +657,22 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
        strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name));
        new_pool->pool_lobd = obd;
        atomic_set(&new_pool->pool_refcount, 1);
-       rc = lod_ost_pool_init(&new_pool->pool_obds, 0);
+       rc = lod_tgt_pool_init(&new_pool->pool_obds, 0);
        if (rc)
                GOTO(out_err, rc);
 
-       memset(&new_pool->pool_rr, 0, sizeof(new_pool->pool_rr));
-       rc = lod_ost_pool_init(&new_pool->pool_rr.lqr_pool, 0);
+       lu_qos_rr_init(&new_pool->pool_rr);
+
+       rc = lod_tgt_pool_init(&new_pool->pool_rr.lqr_pool, 0);
        if (rc)
                GOTO(out_free_pool_obds, rc);
 
        INIT_HLIST_NODE(&new_pool->pool_hash);
 
-#ifdef LPROCFS
+#ifdef CONFIG_PROC_FS
        pool_getref(new_pool);
        new_pool->pool_proc_entry = lprocfs_add_simple(lod->lod_pool_proc_entry,
-                                                      poolname,
-#ifndef HAVE_ONLY_PROCFS_SEQ
-                                                      NULL, NULL,
-#endif
-                                                      new_pool,
+                                                      poolname, new_pool,
                                                       &pool_proc_operations);
        if (IS_ERR(new_pool->pool_proc_entry)) {
                CDEBUG(D_CONFIG, "%s: cannot add proc entry "LOV_POOLNAMEF"\n",
@@ -706,7 +685,7 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
 #endif
 
        spin_lock(&obd->obd_dev_lock);
-       cfs_list_add_tail(&new_pool->pool_list, &lod->lod_pool_list);
+       list_add_tail(&new_pool->pool_list, &lod->lod_pool_list);
        lod->lod_pool_count++;
        spin_unlock(&obd->obd_dev_lock);
 
@@ -723,15 +702,15 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
 
 out_err:
        spin_lock(&obd->obd_dev_lock);
-       cfs_list_del_init(&new_pool->pool_list);
+       list_del_init(&new_pool->pool_list);
        lod->lod_pool_count--;
        spin_unlock(&obd->obd_dev_lock);
 
        lprocfs_remove(&new_pool->pool_proc_entry);
 
-       lod_ost_pool_free(&new_pool->pool_rr.lqr_pool);
+       lod_tgt_pool_free(&new_pool->pool_rr.lqr_pool);
 out_free_pool_obds:
-       lod_ost_pool_free(&new_pool->pool_obds);
+       lod_tgt_pool_free(&new_pool->pool_obds);
        OBD_FREE_PTR(new_pool);
        return rc;
 }
@@ -763,7 +742,7 @@ int lod_pool_del(struct obd_device *obd, char *poolname)
        }
 
        spin_lock(&obd->obd_dev_lock);
-       cfs_list_del_init(&pool->pool_list);
+       list_del_init(&pool->pool_list);
        lod->lod_pool_count--;
        spin_unlock(&obd->obd_dev_lock);
 
@@ -787,11 +766,11 @@ int lod_pool_del(struct obd_device *obd, char *poolname)
  */
 int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 {
-       struct lod_device       *lod = lu2lod_dev(obd->obd_lu_dev);
-       struct obd_uuid          ost_uuid;
-       struct pool_desc        *pool;
-       unsigned int             idx;
-       int                      rc = -EINVAL;
+       struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev);
+       struct obd_uuid ost_uuid;
+       struct pool_desc *pool;
+       struct lu_tgt_desc *tgt;
+       int rc = -EINVAL;
        ENTRY;
 
        pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
@@ -802,8 +781,8 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
 
        /* search ost in lod array */
        lod_getref(&lod->lod_ost_descs);
-       lod_foreach_ost(lod, idx) {
-               if (obd_uuid_equals(&ost_uuid, &OST_TGT(lod, idx)->ltd_uuid)) {
+       lod_foreach_ost(lod, tgt) {
+               if (obd_uuid_equals(&ost_uuid, &tgt->ltd_uuid)) {
                        rc = 0;
                        break;
                }
@@ -812,7 +791,8 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
        if (rc)
                GOTO(out, rc);
 
-       rc = lod_ost_pool_add(&pool->pool_obds, idx, lod->lod_osts_size);
+       rc = lod_tgt_pool_add(&pool->pool_obds, tgt->ltd_index,
+                             lod->lod_ost_count);
        if (rc)
                GOTO(out, rc);
 
@@ -844,11 +824,11 @@ out:
  */
 int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
 {
-       struct lod_device       *lod = lu2lod_dev(obd->obd_lu_dev);
-       struct obd_uuid          ost_uuid;
-       struct pool_desc        *pool;
-       unsigned int             idx;
-       int                      rc = -EINVAL;
+       struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev);
+       struct lu_tgt_desc *ost;
+       struct obd_uuid ost_uuid;
+       struct pool_desc *pool;
+       int rc = -EINVAL;
        ENTRY;
 
        pool = cfs_hash_lookup(lod->lod_pools_hash_body, poolname);
@@ -858,8 +838,8 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
        obd_str2uuid(&ost_uuid, ostname);
 
        lod_getref(&lod->lod_ost_descs);
-       cfs_foreach_bit(lod->lod_ost_bitmap, idx) {
-               if (obd_uuid_equals(&ost_uuid, &OST_TGT(lod, idx)->ltd_uuid)) {
+       lod_foreach_ost(lod, ost) {
+               if (obd_uuid_equals(&ost_uuid, &ost->ltd_uuid)) {
                        rc = 0;
                        break;
                }
@@ -869,8 +849,7 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
        if (rc)
                GOTO(out, rc);
 
-       lod_ost_pool_remove(&pool->pool_obds, idx);
-
+       lod_tgt_pool_remove(&pool->pool_obds, ost->ltd_index);
        pool->pool_rr.lqr_dirty = 1;
 
        CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
@@ -897,7 +876,8 @@ out:
  */
 int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool)
 {
-       int i, rc;
+       unsigned int i;
+       int rc;
        ENTRY;
 
        pool_getref(pool);