From c4719ee350a0beaebfb2ac934c2416aed6fe9257 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Sat, 6 Nov 2021 15:16:49 -0400 Subject: [PATCH] LU-15216 lmv: improve MDT QOS space balance When MDTs are not balanced, QOS code tries to keep subdirectory creation local to the same MDT when it is deep in the directory tree, to avoid creating too many remote directories, but the existing weight to stay on the parent MDT until 50% of other MDTs is too radical, and causes mkdirs to be "stuck" on the same MDT. * remove "lq_threshold_rr" from above calculation because the check in ltd_qos_is_usable() handles this, so use only "dir_depth". * the factor is changed to "16 / (dir_depth + 10)", then it's less likely to stick to the parent MDT for top levels, while more likely to stay on the parent MDT for low levels: depth=0 -> 160%, depth=4 -> 114%, depth=6 -> 100%, depth=8 -> 88%, depth=12 -> 72% * rename lli_depth to lli_dir_depth to make usage more clear. Lustre-change: https://review.whamcloud.com/45544 Lustre-commit: TBD (from 95398b056f7a88ec7830da353170e8993cecf036) Signed-off-by: Lai Siyao Change-Id: Iec6b77919b630d4baee6d54bee7bdb8ca9fb8574 Reviewed-on: https://review.whamcloud.com/45556 Tested-by: jenkins Reviewed-by: Hongchao Zhang Reviewed-by: Andreas Dilger Tested-by: Andreas Dilger --- lustre/llite/dir.c | 2 +- lustre/llite/llite_internal.h | 2 +- lustre/llite/llite_lib.c | 5 +++-- lustre/llite/namei.c | 6 +++--- lustre/lmv/lmv_obd.c | 7 ++++--- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 2e82137..9803b15 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -479,7 +479,7 @@ static int ll_dir_setdirstripe(struct dentry *dparent, struct lmv_user_md *lump, if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - op_data->op_dir_depth = ll_i2info(parent)->lli_depth; + op_data->op_dir_depth = ll_i2info(parent)->lli_dir_depth; if (ll_sbi_has_encrypt(sbi) && (IS_ENCRYPTED(parent) || diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index c647726..92146cb 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -170,7 +170,7 @@ struct ll_inode_info { * -- I am the owner of dir statahead. */ pid_t lli_opendir_pid; /* directory depth to ROOT */ - unsigned short lli_depth; + unsigned short lli_dir_depth; /* stat will try to access statahead entries or start * statahead if this flag is set, and this flag will be * set upon dir open, and cleared when dir is closed, diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 00dd545..1698cbd 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -2486,8 +2486,9 @@ void ll_update_dir_depth(struct inode *dir, struct inode *inode) return; lli = ll_i2info(inode); - lli->lli_depth = ll_i2info(dir)->lli_depth + 1; - CDEBUG(D_INODE, DFID" depth %hu\n", PFID(&lli->lli_fid), lli->lli_depth); + lli->lli_dir_depth = ll_i2info(dir)->lli_dir_depth + 1; + CDEBUG(D_INODE, DFID" depth %hu\n", + PFID(&lli->lli_fid), lli->lli_dir_depth); } int ll_read_inode2(struct inode *inode, void *opaque) diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index d30638e..d7bfd03 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -1465,7 +1465,7 @@ static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir) struct ll_inode_info *lli = ll_i2info(dir); struct lmv_stripe_md *lsm; - op_data->op_dir_depth = lli->lli_depth; + op_data->op_dir_depth = lli->lli_dir_depth; /* parent directory is striped */ if (unlikely(lli->lli_lsm_md)) @@ -1494,11 +1494,11 @@ static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir) if (lsm->lsm_md_max_inherit != LMV_INHERIT_NONE && (lsm->lsm_md_max_inherit == LMV_INHERIT_UNLIMITED || - lsm->lsm_md_max_inherit >= lli->lli_depth)) { + lsm->lsm_md_max_inherit >= lli->lli_dir_depth)) { op_data->op_flags |= MF_QOS_MKDIR; if (lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE && (lsm->lsm_md_max_inherit_rr == LMV_INHERIT_RR_UNLIMITED || - lsm->lsm_md_max_inherit_rr >= lli->lli_depth)) + lsm->lsm_md_max_inherit_rr >= lli->lli_dir_depth)) op_data->op_flags |= MF_RR_MKDIR; CDEBUG(D_INODE, DFID" requests qos mkdir %#x\n", PFID(&lli->lli_fid), op_data->op_flags); diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 4d0ad4f..7593cb7 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1502,10 +1502,11 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt, /* if current MDT has above-average space, within range of the QOS * threshold, stay on the same MDT to avoid creating needless remote - * MDT directories. It's more likely for low level directories. + * MDT directories. It's more likely for low level directories + * "16 / (dir_depth + 10)" is the factor to make it more unlikely for + * top level directories, while more likely for low levels. */ - rand = total_avail * (256 - lmv->lmv_qos.lq_threshold_rr) / - (total_usable * 256 * (1 + dir_depth / 4)); + rand = total_avail * 16 / (total_usable * (dir_depth + 10)); if (cur && cur->ltd_qos.ltq_avail >= rand) { tgt = cur; GOTO(unlock, rc = 0); -- 1.8.3.1