From a44956f0d57d45109959fc83a32764628adf4446 Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Mon, 20 Feb 2023 20:27:52 +0300 Subject: [PATCH] LU-16501 lod: add qos_ost_weights to debugfs The patch adds files qos_ost_weights and qos_mdt_weights at lod directory in debugfs. File qos_ost_weights would be also added for each OST pool in a new directory lod/pool. lod.-MDT*-mdtlov.qos_mdt_weights lod.-MDT*-mdtlov.qos_ost_weights lod.-MDT*-mdtlov.pool..qos_ost_weights These files provide target and server weights, penalties and other data needed to debug QOS allocator imbalance issues in YAML: - { ost_idx: 0, tgt_weight: 1137680, tgt_penalty: 0, tgt_penalty_per_obj: 115544, tgt_avail: 1137680, tgt_last_used: 1677104866, svr_nid: 192.168.100.31@tcp, svr_bavail: 2070560, svr_iavail: 1, svr_penalty: 0, svr_penalty_per_obj: 52572, svr_last_used: 1677104866 } Writing to qos_ost_weights/qos_mdt_weights would cause resetting of tgt_weight, tgt_penaly and svr_penalty. The patch also adds sanity_205f to check YAML output. Signed-off-by: Sergey Cheremencev Change-Id: I27e3f5abeb2f31b1c445658be035ec7e76c1572e Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50074 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/lod/lod_internal.h | 7 +++ lustre/lod/lod_pool.c | 36 ++++++++++++++ lustre/lod/lproc_lod.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++ lustre/tests/sanity.sh | 11 ++++- 4 files changed, 169 insertions(+), 1 deletion(-) diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 09863d9..8e41dd4 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -71,6 +71,7 @@ struct pool_desc { char pool_spill_target[LOV_MAXPOOLNAME + 1]; bool pool_same_space; /* targets in pool balanced*/ time64_t pool_same_space_expire; /*uses ld_qos_maxage*/ + struct dentry *pool_debugfs; }; struct lod_device; @@ -155,6 +156,7 @@ struct lod_device { struct proc_dir_entry *lod_symlink; struct dentry *lod_debugfs; + struct dentry *lod_pool_debugfs; /* ROOT object, used to fetch FS default striping */ struct lod_object *lod_md_root; @@ -817,5 +819,10 @@ void lod_check_and_spill_pool(const struct lu_env *env, struct lod_device *lod, void lod_spill_target_refresh(const struct lu_env *env, struct lod_device *lod, struct pool_desc *pool); struct pool_desc *lod_pool_find(struct lod_device *lod, char *poolname); +int lod_tgt_weights_seq_show(struct seq_file *m, struct lod_device *lod, + struct lu_tgt_pool *tgts, bool mdt); +int lod_tgt_weights_seq_write(struct seq_file *m, const char __user *buf, + size_t count, struct lod_device *lod, + struct lu_tgt_pool *tgts, bool is_mdt); extern struct lprocfs_vars lprocfs_lod_spill_vars[]; #endif diff --git a/lustre/lod/lod_pool.c b/lustre/lod/lod_pool.c index 9db33cd..e0671b0 100644 --- a/lustre/lod/lod_pool.c +++ b/lustre/lod/lod_pool.c @@ -401,6 +401,35 @@ struct pool_desc *lod_pool_find(struct lod_device *lod, char *poolname) rcu_read_unlock(); return pool; } + +static int lod_ost_pool_weights_seq_show(struct seq_file *m, void *data) +{ + struct pool_desc *pool = m->private; + struct lod_device *lod = lu2lod_dev(pool->pool_lobd->obd_lu_dev); + + return lod_tgt_weights_seq_show(m, lod, &pool->pool_obds, false); +} + +static ssize_t +lod_ost_pool_weights_seq_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct pool_desc *pool = m->private; + struct lod_device *lod = lu2lod_dev(pool->pool_lobd->obd_lu_dev); + + return lod_tgt_weights_seq_write(m, buf, count, lod, &pool->pool_obds, + false); +} +LDEBUGFS_SEQ_FOPS(lod_ost_pool_weights); + +static struct ldebugfs_vars ldebugfs_lod_pool_vars[] = { + { .name = "qos_ost_weights", + .fops = &lod_ost_pool_weights_fops, + .proc_mode = 0444 }, + { 0 } +}; + /** * Allocate a new pool for the specified device. * @@ -490,6 +519,11 @@ int lod_pool_new(struct obd_device *obd, char *poolname) GOTO(out_err, rc); } + new_pool->pool_debugfs = debugfs_create_dir(poolname, + lod->lod_pool_debugfs); + ldebugfs_add_vars(new_pool->pool_debugfs, ldebugfs_lod_pool_vars, + new_pool); + CDEBUG(D_CONFIG, LOV_POOLNAMEF" is pool #%d\n", poolname, lod->lod_pool_count); @@ -539,6 +573,8 @@ int lod_pool_del(struct obd_device *obd, char *poolname) if (!pool) RETURN(-ENOENT); + debugfs_remove_recursive(pool->pool_debugfs); + if (pool->pool_proc_entry != NULL) { CDEBUG(D_INFO, "proc entry %p\n", pool->pool_proc_entry); lprocfs_remove(&pool->pool_proc_entry); diff --git a/lustre/lod/lproc_lod.c b/lustre/lod/lproc_lod.c index d7004a3..6447eee 100644 --- a/lustre/lod/lproc_lod.c +++ b/lustre/lod/lproc_lod.c @@ -1356,6 +1356,114 @@ static struct attribute *lod_attrs[] = { KOBJ_ATTRIBUTE_GROUPS(lod); /* creates lod_groups from lod_attrs */ +int lod_tgt_weights_seq_show(struct seq_file *m, struct lod_device *lod, + struct lu_tgt_pool *tgts, bool is_mdt) +{ + int i; + + if (!tgts->op_count) + return 0; + + down_read(&tgts->op_rw_sem); + for (i = 0; i < tgts->op_count; i++) { + u32 *op_array = tgts->op_array; + struct lod_tgt_desc *tgt = is_mdt ? MDT_TGT(lod, op_array[i]) : + OST_TGT(lod, op_array[i]); + struct lu_svr_qos *svr = tgt->ltd_qos.ltq_svr; + + seq_printf(m, "- { %s: %d, tgt_weight: %llu, tgt_penalty: %llu, tgt_penalty_per_obj: %llu, tgt_avail: %llu, tgt_last_used: %llu, svr_nid: %s, svr_bavail: %llu, svr_iavail: %llu, svr_penalty: %llu, svr_penalty_per_obj: %llu, svr_last_used: %llu }\n", + is_mdt ? "mdt_idx" : "ost_idx", tgt->ltd_index, + tgt->ltd_qos.ltq_weight, + tgt->ltd_qos.ltq_penalty, + tgt->ltd_qos.ltq_penalty_per_obj, + tgt->ltd_qos.ltq_avail, tgt->ltd_qos.ltq_used, + svr->lsq_uuid.uuid, svr->lsq_bavail, svr->lsq_iavail, + svr->lsq_penalty, svr->lsq_penalty_per_obj, + svr->lsq_used); + } + up_read(&tgts->op_rw_sem); + + return 0; +} + +int lod_tgt_weights_seq_write(struct seq_file *m, const char __user *buf, + size_t count, struct lod_device *lod, + struct lu_tgt_pool *tgts, bool is_mdt) +{ + int i; + + if (!tgts->op_count) + return count; + + down_read(&tgts->op_rw_sem); + down_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem); + for (i = 0; i < tgts->op_count; i++) { + u32 *op_array = tgts->op_array; + struct lod_tgt_desc *tgt = is_mdt ? MDT_TGT(lod, op_array[i]) : + OST_TGT(lod, op_array[i]); + + tgt->ltd_qos.ltq_weight = 0; + tgt->ltd_qos.ltq_penalty = 0; + tgt->ltd_qos.ltq_svr->lsq_penalty = 0; + } + set_bit(LQ_DIRTY, &lod->lod_ost_descs.ltd_qos.lq_flags); + up_write(&lod->lod_ost_descs.ltd_qos.lq_rw_sem); + up_read(&tgts->op_rw_sem); + + return count; +} + +static int lod_mdt_weights_seq_show(struct seq_file *m, void *data) +{ + struct lod_device *lod = m->private; + struct lu_tgt_pool *tgts = &lod->lod_mdt_descs.ltd_tgt_pool; + + return lod_tgt_weights_seq_show(m, lod, tgts, true); +} + +static ssize_t +lod_mdt_weights_seq_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + + struct seq_file *m = file->private_data; + struct lod_device *lod = m->private; + struct lu_tgt_pool *tgts = &lod->lod_mdt_descs.ltd_tgt_pool; + + return lod_tgt_weights_seq_write(m, buf, count, lod, tgts, true); +} +LDEBUGFS_SEQ_FOPS(lod_mdt_weights); + +static int lod_ost_weights_seq_show(struct seq_file *m, void *data) +{ + struct lod_device *lod = m->private; + struct lu_tgt_pool *tgts = &lod->lod_ost_descs.ltd_tgt_pool; + + return lod_tgt_weights_seq_show(m, lod, tgts, false); +} + +static ssize_t +lod_ost_weights_seq_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct lod_device *lod = m->private; + struct lu_tgt_pool *tgts = &lod->lod_ost_descs.ltd_tgt_pool; + + return lod_tgt_weights_seq_write(m, buf, count, lod, tgts, false); +} +LDEBUGFS_SEQ_FOPS(lod_ost_weights); + +static struct ldebugfs_vars ldebugfs_lod_vars[] = { + { .name = "qos_mdt_weights", + .fops = &lod_mdt_weights_fops, + .proc_mode = 0444 }, + { .name = "qos_ost_weights", + .fops = &lod_ost_weights_fops, + .proc_mode = 0444 }, + { 0 } +}; + /** * Initialize procfs entries for LOD. * @@ -1444,12 +1552,18 @@ int lod_procfs_init(struct lod_device *lod) obd->obd_debugfs_entry = debugfs_create_dir(obd->obd_name, obd->obd_type->typ_debugfs_entry); + + ldebugfs_add_vars(obd->obd_debugfs_entry, ldebugfs_lod_vars, lod); + lod->lod_debugfs = ldebugfs_add_symlink(obd->obd_name, "lov", "../lod/%s", obd->obd_name); if (!lod->lod_debugfs) CERROR("%s: failed to create LOV debugfs symlink\n", obd->obd_name); + lod->lod_pool_debugfs = debugfs_create_dir("pool", + obd->obd_debugfs_entry); + type = container_of(lov, struct obd_type, typ_kobj); if (!type->typ_procroot) RETURN(0); @@ -1490,7 +1604,9 @@ void lod_procfs_fini(struct lod_device *lod) kobject_put(lov); } + debugfs_remove_recursive(lod->lod_pool_debugfs); debugfs_remove_recursive(lod->lod_debugfs); + debugfs_remove_recursive(obd->obd_debugfs_entry); if (obd->obd_proc_entry) { lprocfs_remove(&obd->obd_proc_entry); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d23cd11..5316c1e 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -19517,6 +19517,16 @@ test_205e() { } run_test 205e "verify the output of lljobstat" +test_205f() { + verify_yaml_available || skip_env "YAML verification not installed" + + # check both qos_ost_weights and qos_mdt_weights + do_facet mds1 $LCTL get_param -n lod.*.qos*weights + do_facet mds1 $LCTL get_param -n lod.*.qos*weights | verify_yaml || + error "qos_ost_weights is not valid YAML" +} +run_test 205f "verify qos_ost_weights YAML format " + # LU-1480, LU-1773 and LU-1657 test_206() { mkdir -p $DIR/$tdir @@ -29623,7 +29633,6 @@ test_906() { } run_test 906 "Simple test for io_uring I/O engine via fio" - complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre -- 1.8.3.1