Whamcloud - gitweb
LU-16743 lod: create stripe with correct attr
[fs/lustre-release.git] / lustre / lod / lod_pool.c
index 2f038c6..3f05f3f 100644 (file)
@@ -99,8 +99,8 @@ void lod_pool_putref(struct pool_desc *pool)
        if (atomic_dec_and_test(&pool->pool_refcount)) {
                LASSERT(list_empty(&pool->pool_list));
                LASSERT(pool->pool_proc_entry == NULL);
-               tgt_pool_free(&(pool->pool_rr.lqr_pool));
-               tgt_pool_free(&(pool->pool_obds));
+               lu_tgt_pool_free(&(pool->pool_rr.lqr_pool));
+               lu_tgt_pool_free(&(pool->pool_obds));
                kfree_rcu(pool, pool_rcu);
                EXIT;
        }
@@ -170,7 +170,7 @@ static void *pool_proc_next(struct seq_file *seq, void *v, loff_t *pos)
        if (*pos > pool_tgt_count(iter->lpi_pool))
                return NULL;
 
-       OBD_FAIL_TIMEOUT(OBD_FAIL_OST_LIST_ASSERT, cfs_fail_val);
+       CFS_FAIL_TIMEOUT(OBD_FAIL_OST_LIST_ASSERT, cfs_fail_val);
 
        /* iterate to find a non empty entry */
        prev_idx = iter->lpi_idx;
@@ -312,16 +312,16 @@ static int pool_proc_open(struct inode *inode, struct file *file)
        rc = seq_open(file, &pool_proc_ops);
        if (!rc) {
                struct seq_file *seq = file->private_data;
-               seq->private = PDE_DATA(inode);
+               seq->private = pde_data(inode);
        }
        return rc;
 }
 
 const static struct proc_ops pool_proc_operations = {
-       .open           = pool_proc_open,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = seq_release,
+       .proc_open      = pool_proc_open,
+       .proc_read      = seq_read,
+       .proc_lseek     = seq_lseek,
+       .proc_release   = seq_release,
 };
 
 /**
@@ -376,6 +376,60 @@ void lod_pool_hash_destroy(struct rhashtable *tbl)
        rhashtable_free_and_destroy(tbl, pools_hash_exit, NULL);
 }
 
+bool lod_pool_exists(struct lod_device *lod, char *poolname)
+{
+       struct pool_desc *pool;
+
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body,
+                               poolname,
+                               pools_hash_params);
+       rcu_read_unlock();
+       return pool != NULL;
+}
+
+struct pool_desc *lod_pool_find(struct lod_device *lod, char *poolname)
+{
+       struct pool_desc *pool;
+
+       rcu_read_lock();
+       pool = rhashtable_lookup(&lod->lod_pools_hash_body,
+                               poolname,
+                               pools_hash_params);
+       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
+               pool = NULL;
+       rcu_read_unlock();
+       return pool;
+}
+
+static int lod_ost_pool_weights_seq_show(struct seq_file *m, void *data)
+{
+       struct pool_desc *pool = m->private;
+       struct lod_device *lod = lu2lod_dev(pool->pool_lobd->obd_lu_dev);
+
+       return lod_tgt_weights_seq_show(m, lod, &pool->pool_obds, false);
+}
+
+static ssize_t
+lod_ost_pool_weights_seq_write(struct file *file, const char __user *buf,
+                              size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct pool_desc *pool = m->private;
+       struct lod_device *lod = lu2lod_dev(pool->pool_lobd->obd_lu_dev);
+
+       return lod_tgt_weights_seq_write(m, buf, count, lod, &pool->pool_obds,
+                                        false);
+}
+LDEBUGFS_SEQ_FOPS(lod_ost_pool_weights);
+
+static struct ldebugfs_vars ldebugfs_lod_pool_vars[] = {
+       { .name =       "qos_ost_weights",
+         .fops =       &lod_ost_pool_weights_fops,
+         .proc_mode =  0444 },
+       { 0 }
+};
+
 /**
  * Allocate a new pool for the specified device.
  *
@@ -401,20 +455,22 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
                RETURN(-ENAMETOOLONG);
 
        /* OBD_ALLOC_* doesn't work with direct kfree_rcu use */
-       new_pool = kmalloc(sizeof(*new_pool), GFP_KERNEL);
+       new_pool = kmalloc(sizeof(*new_pool), __GFP_ZERO);
        if (new_pool == NULL)
                RETURN(-ENOMEM);
 
        strlcpy(new_pool->pool_name, poolname, sizeof(new_pool->pool_name));
+       new_pool->pool_spill_target[0] = '\0';
+       atomic_set(&new_pool->pool_spill_hit, 0);
        new_pool->pool_lobd = obd;
        atomic_set(&new_pool->pool_refcount, 1);
-       rc = tgt_pool_init(&new_pool->pool_obds, 0);
+       rc = lu_tgt_pool_init(&new_pool->pool_obds, 0);
        if (rc)
-               GOTO(out_err, rc);
+               GOTO(out_free_pool, rc);
 
        lu_qos_rr_init(&new_pool->pool_rr);
 
-       rc = tgt_pool_init(&new_pool->pool_rr.lqr_pool, 0);
+       rc = lu_tgt_pool_init(&new_pool->pool_rr.lqr_pool, 0);
        if (rc)
                GOTO(out_free_pool_obds, rc);
 
@@ -429,6 +485,17 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
                new_pool->pool_proc_entry = NULL;
                lod_pool_putref(new_pool);
        }
+
+       pool_getref(new_pool);
+       new_pool->pool_spill_proc_entry =
+               lprocfs_register(poolname, lod->lod_spill_proc_entry,
+                       lprocfs_lod_spill_vars, new_pool);
+       if (IS_ERR(new_pool->pool_spill_proc_entry)) {
+               rc = PTR_ERR(new_pool->pool_spill_proc_entry);
+               new_pool->pool_proc_entry = NULL;
+               lod_pool_putref(new_pool);
+       }
+
        CDEBUG(D_INFO, "pool %p - proc %p\n", new_pool,
               new_pool->pool_proc_entry);
 #endif
@@ -452,6 +519,11 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
                GOTO(out_err, rc);
        }
 
+       new_pool->pool_debugfs = debugfs_create_dir(poolname,
+                                                   lod->lod_pool_debugfs);
+       ldebugfs_add_vars(new_pool->pool_debugfs, ldebugfs_lod_pool_vars,
+                         new_pool);
+
        CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
                        poolname, lod->lod_pool_count);
 
@@ -463,11 +535,13 @@ out_err:
        lod->lod_pool_count--;
        spin_unlock(&obd->obd_dev_lock);
 
+       lprocfs_remove(&new_pool->pool_spill_proc_entry);
        lprocfs_remove(&new_pool->pool_proc_entry);
 
-       tgt_pool_free(&new_pool->pool_rr.lqr_pool);
+       lu_tgt_pool_free(&new_pool->pool_rr.lqr_pool);
 out_free_pool_obds:
-       tgt_pool_free(&new_pool->pool_obds);
+       lu_tgt_pool_free(&new_pool->pool_obds);
+out_free_pool:
        OBD_FREE_PTR(new_pool);
        return rc;
 }
@@ -499,11 +573,18 @@ int lod_pool_del(struct obd_device *obd, char *poolname)
        if (!pool)
                RETURN(-ENOENT);
 
+       debugfs_remove_recursive(pool->pool_debugfs);
+
        if (pool->pool_proc_entry != NULL) {
                CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
                lprocfs_remove(&pool->pool_proc_entry);
                lod_pool_putref(pool);
        }
+       if (pool->pool_spill_proc_entry != NULL) {
+               CDEBUG(D_INFO, "proc entry %p\n", pool->pool_spill_proc_entry);
+               lprocfs_remove(&pool->pool_spill_proc_entry);
+               lod_pool_putref(pool);
+       }
 
        spin_lock(&obd->obd_dev_lock);
        list_del_init(&pool->pool_list);
@@ -537,12 +618,7 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
        int rc = -EINVAL;
        ENTRY;
 
-       rcu_read_lock();
-       pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
-                                pools_hash_params);
-       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
-               pool = NULL;
-       rcu_read_unlock();
+       pool = lod_pool_find(lod, poolname);
        if (!pool)
                RETURN(-ENOENT);
 
@@ -560,8 +636,8 @@ int lod_pool_add(struct obd_device *obd, char *poolname, char *ostname)
        if (rc)
                GOTO(out, rc);
 
-       rc = tgt_pool_add(&pool->pool_obds, tgt->ltd_index,
-                             lod->lod_ost_count);
+       rc = lu_tgt_pool_add(&pool->pool_obds, tgt->ltd_index,
+                            lod->lod_ost_count);
        if (rc)
                GOTO(out, rc);
 
@@ -601,12 +677,7 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
        ENTRY;
 
        /* lookup and kill hash reference */
-       rcu_read_lock();
-       pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
-                                pools_hash_params);
-       if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
-               pool = NULL;
-       rcu_read_unlock();
+       pool = lod_pool_find(lod, poolname);
        if (!pool)
                RETURN(-ENOENT);
 
@@ -624,7 +695,7 @@ int lod_pool_remove(struct obd_device *obd, char *poolname, char *ostname)
        if (rc)
                GOTO(out, rc);
 
-       tgt_pool_remove(&pool->pool_obds, ost->ltd_index);
+       lu_tgt_pool_remove(&pool->pool_obds, ost->ltd_index);
        set_bit(LQ_DIRTY, &pool->pool_rr.lqr_flags);
 
        CDEBUG(D_CONFIG, "%s removed from "LOV_POOLNAMEF"\n", ostname,
@@ -654,7 +725,7 @@ int lod_check_index_in_pool(__u32 idx, struct pool_desc *pool)
        int rc;
 
        pool_getref(pool);
-       rc = tgt_check_index(idx, &pool->pool_obds);
+       rc = lu_tgt_check_index(idx, &pool->pool_obds);
        lod_pool_putref(pool);
        return rc;
 }
@@ -673,27 +744,95 @@ struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname)
 {
        struct pool_desc *pool;
 
-       pool = NULL;
-       if (poolname[0] != '\0') {
-               rcu_read_lock();
-               pool = rhashtable_lookup(&lod->lod_pools_hash_body, poolname,
-                                        pools_hash_params);
-               if (pool && !atomic_inc_not_zero(&pool->pool_refcount))
-                       pool = NULL;
-               rcu_read_unlock();
-               if (!pool)
-                       CDEBUG(D_CONFIG,
-                              "%s: request for an unknown pool (" LOV_POOLNAMEF ")\n",
-                              lod->lod_child_exp->exp_obd->obd_name, poolname);
-               if (pool != NULL && pool_tgt_count(pool) == 0) {
-                       CDEBUG(D_CONFIG, "%s: request for an empty pool ("
-                              LOV_POOLNAMEF")\n",
-                              lod->lod_child_exp->exp_obd->obd_name, poolname);
-                       /* pool is ignored, so we remove ref on it */
-                       lod_pool_putref(pool);
-                       pool = NULL;
-               }
+       if (poolname[0] == '\0' || lov_pool_is_reserved(poolname))
+               return NULL;
+
+       pool = lod_pool_find(lod, poolname);
+       if (!pool)
+               CDEBUG(D_CONFIG,
+                      "%s: request for an unknown pool (" LOV_POOLNAMEF ")\n",
+                      lod->lod_child_exp->exp_obd->obd_name, poolname);
+       if (pool != NULL && pool_tgt_count(pool) == 0) {
+               CDEBUG(D_CONFIG, "%s: request for an empty pool ("
+                      LOV_POOLNAMEF")\n",
+                      lod->lod_child_exp->exp_obd->obd_name, poolname);
+               /* pool is ignored, so we remove ref on it */
+               lod_pool_putref(pool);
+               pool = NULL;
        }
+
        return pool;
 }
 
+void lod_spill_target_refresh(const struct lu_env *env, struct lod_device *lod,
+                             struct pool_desc *pool)
+{
+       __u64 avail_bytes = 0, total_bytes = 0;
+       struct lu_tgt_pool *osts;
+       int i;
+
+       if (ktime_get_seconds() < pool->pool_spill_expire)
+               return;
+
+       if (pool->pool_spill_threshold_pct == 0)
+               return;
+
+       lod_qos_statfs_update(env, lod, &lod->lod_ost_descs);
+
+       down_write(&pool_tgt_rw_sem(pool));
+       if (ktime_get_seconds() < pool->pool_spill_expire)
+               goto out_sem;
+       pool->pool_spill_expire = ktime_get_seconds() +
+               lod->lod_ost_descs.ltd_lov_desc.ld_qos_maxage;
+
+       osts = &(pool->pool_obds);
+       for (i = 0; i < osts->op_count; i++) {
+               int idx = osts->op_array[i];
+               struct lod_tgt_desc *tgt;
+               struct obd_statfs *sfs;
+
+               if (!test_bit(idx, lod->lod_ost_bitmap))
+                       continue;
+               tgt = OST_TGT(lod, idx);
+               if (!tgt->ltd_active)
+                       continue;
+               sfs = &tgt->ltd_statfs;
+
+               avail_bytes += sfs->os_bavail * sfs->os_bsize;
+               total_bytes += sfs->os_blocks * sfs->os_bsize;
+       }
+       if (total_bytes - avail_bytes >=
+           total_bytes * pool->pool_spill_threshold_pct / 100)
+               pool->pool_spill_is_active = true;
+       else
+               pool->pool_spill_is_active = false;
+
+out_sem:
+       up_write(&pool_tgt_rw_sem(pool));
+}
+
+/*
+ * XXX: consider a better schema to detect loops
+ */
+void lod_check_and_spill_pool(const struct lu_env *env, struct lod_device *lod,
+                             char **poolname)
+{
+       struct pool_desc *pool;
+
+       if (!poolname || !*poolname || (*poolname)[0] == '\0')
+               return;
+repeat:
+       pool = lod_pool_find(lod, *poolname);
+       if (!pool)
+               return;
+
+       lod_spill_target_refresh(env, lod, pool);
+       if (pool->pool_spill_is_active) {
+               lod_set_pool(poolname, pool->pool_spill_target);
+               atomic_inc(&pool->pool_spill_hit);
+               lod_pool_putref(pool);
+               goto repeat;
+       }
+
+       lod_pool_putref(pool);
+}