Whamcloud - gitweb
LU-16501 lod: add qos_ost_weights to debugfs 74/50074/6
authorSergey Cheremencev <scherementsev@ddn.com>
Mon, 20 Feb 2023 17:27:52 +0000 (20:27 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 21 Mar 2023 23:11:14 +0000 (23:11 +0000)
The patch adds files qos_ost_weights and qos_mdt_weights
at lod directory in debugfs. File qos_ost_weights would be
also added for each OST pool in a new directory lod/pool.

    lod.<fsname>-MDT*-mdtlov.qos_mdt_weights
    lod.<fsname>-MDT*-mdtlov.qos_ost_weights
    lod.<fsname>-MDT*-mdtlov.pool.<pool>.qos_ost_weights

These files provide target and server weights, penalties and other
data needed to debug QOS allocator imbalance issues in YAML:

- { ost_idx: 0, tgt_weight: 1137680, tgt_penalty: 0,
    tgt_penalty_per_obj: 115544, tgt_avail: 1137680,
    tgt_last_used: 1677104866, svr_nid: 192.168.100.31@tcp,
    svr_bavail: 2070560, svr_iavail: 1, svr_penalty: 0,
    svr_penalty_per_obj: 52572, svr_last_used: 1677104866 }

Writing to qos_ost_weights/qos_mdt_weights would cause
resetting of tgt_weight, tgt_penaly and svr_penalty.

The patch also adds sanity_205f to check YAML output.

Signed-off-by: Sergey Cheremencev <scherementsev@ddn.com>
Change-Id: I27e3f5abeb2f31b1c445658be035ec7e76c1572e
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50074
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lod/lod_internal.h
lustre/lod/lod_pool.c
lustre/lod/lproc_lod.c
lustre/tests/sanity.sh

index 09863d9..8e41dd4 100644 (file)
@@ -71,6 +71,7 @@ struct pool_desc {
        char                     pool_spill_target[LOV_MAXPOOLNAME + 1];
        bool                     pool_same_space; /* targets in pool balanced*/
        time64_t                 pool_same_space_expire; /*uses ld_qos_maxage*/
+       struct dentry           *pool_debugfs;
 };
 
 struct lod_device;
@@ -155,6 +156,7 @@ struct lod_device {
 
        struct proc_dir_entry *lod_symlink;
        struct dentry          *lod_debugfs;
+       struct dentry          *lod_pool_debugfs;
 
        /* ROOT object, used to fetch FS default striping */
        struct lod_object      *lod_md_root;
@@ -817,5 +819,10 @@ void lod_check_and_spill_pool(const struct lu_env *env, struct lod_device *lod,
 void lod_spill_target_refresh(const struct lu_env *env, struct lod_device *lod,
                              struct pool_desc *pool);
 struct pool_desc *lod_pool_find(struct lod_device *lod, char *poolname);
+int lod_tgt_weights_seq_show(struct seq_file *m, struct lod_device *lod,
+                            struct lu_tgt_pool *tgts, bool mdt);
+int lod_tgt_weights_seq_write(struct seq_file *m, const char __user *buf,
+                             size_t count, struct lod_device *lod,
+                             struct lu_tgt_pool *tgts, bool is_mdt);
 extern struct lprocfs_vars lprocfs_lod_spill_vars[];
 #endif
index 9db33cd..e0671b0 100644 (file)
@@ -401,6 +401,35 @@ struct pool_desc *lod_pool_find(struct lod_device *lod, char *poolname)
        rcu_read_unlock();
        return pool;
 }
+
+static int lod_ost_pool_weights_seq_show(struct seq_file *m, void *data)
+{
+       struct pool_desc *pool = m->private;
+       struct lod_device *lod = lu2lod_dev(pool->pool_lobd->obd_lu_dev);
+
+       return lod_tgt_weights_seq_show(m, lod, &pool->pool_obds, false);
+}
+
+static ssize_t
+lod_ost_pool_weights_seq_write(struct file *file, const char __user *buf,
+                              size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct pool_desc *pool = m->private;
+       struct lod_device *lod = lu2lod_dev(pool->pool_lobd->obd_lu_dev);
+
+       return lod_tgt_weights_seq_write(m, buf, count, lod, &pool->pool_obds,
+                                        false);
+}
+LDEBUGFS_SEQ_FOPS(lod_ost_pool_weights);
+
+static struct ldebugfs_vars ldebugfs_lod_pool_vars[] = {
+       { .name =       "qos_ost_weights",
+         .fops =       &lod_ost_pool_weights_fops,
+         .proc_mode =  0444 },
+       { 0 }
+};
+
 /**
  * Allocate a new pool for the specified device.
  *
@@ -490,6 +519,11 @@ int lod_pool_new(struct obd_device *obd, char *poolname)
                GOTO(out_err, rc);
        }
 
+       new_pool->pool_debugfs = debugfs_create_dir(poolname,
+                                                   lod->lod_pool_debugfs);
+       ldebugfs_add_vars(new_pool->pool_debugfs, ldebugfs_lod_pool_vars,
+                         new_pool);
+
        CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n",
                        poolname, lod->lod_pool_count);
 
@@ -539,6 +573,8 @@ int lod_pool_del(struct obd_device *obd, char *poolname)
        if (!pool)
                RETURN(-ENOENT);
 
+       debugfs_remove_recursive(pool->pool_debugfs);
+
        if (pool->pool_proc_entry != NULL) {
                CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry);
                lprocfs_remove(&pool->pool_proc_entry);
index d7004a3..6447eee 100644 (file)
@@ -1356,6 +1356,114 @@ static struct attribute *lod_attrs[] = {
 
 KOBJ_ATTRIBUTE_GROUPS(lod); /* creates lod_groups from lod_attrs */
 
+int lod_tgt_weights_seq_show(struct seq_file *m, struct lod_device *lod,
+                            struct lu_tgt_pool *tgts, bool is_mdt)
+{
+       int i;
+
+       if (!tgts->op_count)
+               return 0;
+
+       down_read(&tgts->op_rw_sem);
+       for (i = 0; i < tgts->op_count; i++) {
+               u32 *op_array = tgts->op_array;
+               struct lod_tgt_desc *tgt = is_mdt ? MDT_TGT(lod, op_array[i]) :
+                                                OST_TGT(lod, op_array[i]);
+               struct lu_svr_qos *svr = tgt->ltd_qos.ltq_svr;
+
+               seq_printf(m, "- { %s: %d, tgt_weight: %llu, tgt_penalty: %llu, tgt_penalty_per_obj: %llu, tgt_avail: %llu, tgt_last_used: %llu, svr_nid: %s, svr_bavail: %llu, svr_iavail: %llu, svr_penalty: %llu, svr_penalty_per_obj: %llu, svr_last_used: %llu }\n",
+                          is_mdt ? "mdt_idx" : "ost_idx", tgt->ltd_index,
+                          tgt->ltd_qos.ltq_weight,
+                          tgt->ltd_qos.ltq_penalty,
+                          tgt->ltd_qos.ltq_penalty_per_obj,
+                          tgt->ltd_qos.ltq_avail, tgt->ltd_qos.ltq_used,
+                          svr->lsq_uuid.uuid, svr->lsq_bavail, svr->lsq_iavail,
+                          svr->lsq_penalty, svr->lsq_penalty_per_obj,
+                          svr->lsq_used);
+       }
+       up_read(&tgts->op_rw_sem);
+
+       return 0;
+}
+
+int lod_tgt_weights_seq_write(struct seq_file *m, const char __user *buf,
+                             size_t count, struct lod_device *lod,
+                             struct lu_tgt_pool *tgts, bool is_mdt)
+{
+       int i;
+
+       if (!tgts->op_count)
+               return count;
+
+       down_read(&tgts->op_rw_sem);
+       down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+       for (i = 0; i < tgts->op_count; i++) {
+               u32 *op_array = tgts->op_array;
+               struct lod_tgt_desc *tgt = is_mdt ? MDT_TGT(lod, op_array[i]) :
+                                                OST_TGT(lod, op_array[i]);
+
+               tgt->ltd_qos.ltq_weight = 0;
+               tgt->ltd_qos.ltq_penalty = 0;
+               tgt->ltd_qos.ltq_svr->lsq_penalty = 0;
+       }
+       set_bit(LQ_DIRTY, &lod->lod_ost_descs.ltd_qos.lq_flags);
+       up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem);
+       up_read(&tgts->op_rw_sem);
+
+       return count;
+}
+
+static int lod_mdt_weights_seq_show(struct seq_file *m, void *data)
+{
+       struct lod_device *lod = m->private;
+       struct lu_tgt_pool *tgts = &lod->lod_mdt_descs.ltd_tgt_pool;
+
+       return lod_tgt_weights_seq_show(m, lod, tgts, true);
+}
+
+static ssize_t
+lod_mdt_weights_seq_write(struct file *file, const char __user *buf,
+                     size_t count, loff_t *off)
+{
+
+       struct seq_file *m = file->private_data;
+       struct lod_device *lod = m->private;
+       struct lu_tgt_pool *tgts = &lod->lod_mdt_descs.ltd_tgt_pool;
+
+       return lod_tgt_weights_seq_write(m, buf, count, lod, tgts, true);
+}
+LDEBUGFS_SEQ_FOPS(lod_mdt_weights);
+
+static int lod_ost_weights_seq_show(struct seq_file *m, void *data)
+{
+       struct lod_device *lod = m->private;
+       struct lu_tgt_pool *tgts = &lod->lod_ost_descs.ltd_tgt_pool;
+
+       return lod_tgt_weights_seq_show(m, lod, tgts, false);
+}
+
+static ssize_t
+lod_ost_weights_seq_write(struct file *file, const char __user *buf,
+                     size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct lod_device *lod = m->private;
+       struct lu_tgt_pool *tgts = &lod->lod_ost_descs.ltd_tgt_pool;
+
+       return lod_tgt_weights_seq_write(m, buf, count, lod, tgts, false);
+}
+LDEBUGFS_SEQ_FOPS(lod_ost_weights);
+
+static struct ldebugfs_vars ldebugfs_lod_vars[] = {
+       { .name =       "qos_mdt_weights",
+         .fops =       &lod_mdt_weights_fops,
+         .proc_mode =  0444 },
+       { .name =       "qos_ost_weights",
+         .fops =       &lod_ost_weights_fops,
+         .proc_mode =  0444 },
+       { 0 }
+};
+
 /**
  * Initialize procfs entries for LOD.
  *
@@ -1444,12 +1552,18 @@ int lod_procfs_init(struct lod_device *lod)
        obd->obd_debugfs_entry = debugfs_create_dir(obd->obd_name,
                                                    obd->obd_type->typ_debugfs_entry);
 
+
+       ldebugfs_add_vars(obd->obd_debugfs_entry, ldebugfs_lod_vars, lod);
+
        lod->lod_debugfs = ldebugfs_add_symlink(obd->obd_name, "lov",
                                                "../lod/%s", obd->obd_name);
        if (!lod->lod_debugfs)
                CERROR("%s: failed to create LOV debugfs symlink\n",
                       obd->obd_name);
 
+       lod->lod_pool_debugfs = debugfs_create_dir("pool",
+                                                  obd->obd_debugfs_entry);
+
        type = container_of(lov, struct obd_type, typ_kobj);
        if (!type->typ_procroot)
                RETURN(0);
@@ -1490,7 +1604,9 @@ void lod_procfs_fini(struct lod_device *lod)
                kobject_put(lov);
        }
 
+       debugfs_remove_recursive(lod->lod_pool_debugfs);
        debugfs_remove_recursive(lod->lod_debugfs);
+       debugfs_remove_recursive(obd->obd_debugfs_entry);
 
        if (obd->obd_proc_entry) {
                lprocfs_remove(&obd->obd_proc_entry);
index d23cd11..5316c1e 100755 (executable)
@@ -19517,6 +19517,16 @@ test_205e() {
 }
 run_test 205e "verify the output of lljobstat"
 
+test_205f() {
+       verify_yaml_available || skip_env "YAML verification not installed"
+
+       # check both qos_ost_weights and qos_mdt_weights
+       do_facet mds1 $LCTL get_param -n lod.*.qos*weights
+       do_facet mds1 $LCTL get_param -n lod.*.qos*weights | verify_yaml ||
+               error "qos_ost_weights is not valid YAML"
+}
+run_test 205f "verify qos_ost_weights YAML format "
+
 # LU-1480, LU-1773 and LU-1657
 test_206() {
        mkdir -p $DIR/$tdir
@@ -29623,7 +29633,6 @@ test_906() {
 }
 run_test 906 "Simple test for io_uring I/O engine via fio"
 
-
 complete $SECONDS
 [ -f $EXT2_DEV ] && rm $EXT2_DEV || true
 check_and_cleanup_lustre