From: Andreas Dilger Date: Sun, 25 Apr 2021 11:02:19 +0000 (-0600) Subject: LU-13439 lmv: qos stay on current MDT if less full X-Git-Tag: 2.14.52~75 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=3f6fc483013da443b1494d81efe2d271ac67f901 LU-13439 lmv: qos stay on current MDT if less full Keep "space balanced" subdirectories on the parent MDT if it is less full than average, since it doesn't make sense to select another MDT which may occasionally be *more* full. This also reduces random "MDT jumping" and needless remote directories. Reduce the QOS threshold for space balanced LMV layouts, so that the MDTs don't become too imbalanced before trying to fix the problem. Change the LUSTRE_OP_MKDIR opcode to be 1 instead of 0, so it can be seen that a valid opcode has been stored into the structure. Signed-off-by: Lai Siyao Signed-off-by: Andreas Dilger Change-Id: Iab34c7eade03d761aa16b08f409f7e5d69cd70bd Reviewed-on: https://review.whamcloud.com/43445 Tested-by: jenkins Reviewed-by: Mike Pershin Reviewed-by: Hongchao Zhang Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd.h b/lustre/include/obd.h index a7a3daf..ed93ccf 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -850,11 +850,11 @@ enum md_cli_flags { }; enum md_op_code { - LUSTRE_OPC_MKDIR = 0, - LUSTRE_OPC_SYMLINK = 1, - LUSTRE_OPC_MKNOD = 2, - LUSTRE_OPC_CREATE = 3, - LUSTRE_OPC_ANY = 5, + LUSTRE_OPC_MKDIR = 1, + LUSTRE_OPC_SYMLINK, + LUSTRE_OPC_MKNOD, + LUSTRE_OPC_CREATE, + LUSTRE_OPC_ANY, }; /** diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 9297f0d..308a526 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -718,6 +718,12 @@ struct fsxattr { #define LOV_OFFSET_DEFAULT ((__u16)-1) #define LMV_OFFSET_DEFAULT ((__u32)-1) +#define LOV_QOS_DEF_THRESHOLD_RR_PCT 17 +#define LMV_QOS_DEF_THRESHOLD_RR_PCT 5 + +#define LOV_QOS_DEF_PRIO_FREE 90 +#define LMV_QOS_DEF_PRIO_FREE 90 + static inline bool lov_pattern_supported(__u32 pattern) { return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 || diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 5236e44..d231ed9 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1461,9 +1461,10 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data, static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt) { - struct lu_tgt_desc *tgt; + struct lu_tgt_desc *tgt, *cur = NULL; __u64 total_weight = 0; __u64 cur_weight = 0; + int total_usable = 0; __u64 rand; int rc; @@ -1482,15 +1483,29 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt) GOTO(unlock, tgt = ERR_PTR(rc)); lmv_foreach_tgt(lmv, tgt) { - tgt->ltd_qos.ltq_usable = 0; - if (!tgt->ltd_exp || !tgt->ltd_active) + if (!tgt->ltd_exp || !tgt->ltd_active) { + tgt->ltd_qos.ltq_usable = 0; continue; + } tgt->ltd_qos.ltq_usable = 1; lu_tgt_qos_weight_calc(tgt); + if (tgt->ltd_index == *mdt) { + cur = tgt; + cur_weight = tgt->ltd_qos.ltq_weight; + } total_weight += tgt->ltd_qos.ltq_weight; + total_usable++; + } + + /* if current MDT has higher-than-average space, stay on same MDT */ + rand = total_weight / total_usable; + if (cur_weight >= rand) { + tgt = cur; + GOTO(unlock, rc = 0); } + cur_weight = 0; rand = lu_prandom_u64_max(total_weight); lmv_foreach_connected_tgt(lmv, tgt) { diff --git a/lustre/obdclass/lu_tgt_descs.c b/lustre/obdclass/lu_tgt_descs.c index 04e2330..1e84857 100644 --- a/lustre/obdclass/lu_tgt_descs.c +++ b/lustre/obdclass/lu_tgt_descs.c @@ -273,13 +273,21 @@ int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt) init_rwsem(<d->ltd_qos.lq_rw_sem); set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags); set_bit(LQ_RESET, <d->ltd_qos.lq_flags); - /* Default priority is toward free space balance */ - ltd->ltd_qos.lq_prio_free = 232; - /* Default threshold for rr (roughly 17%) */ - ltd->ltd_qos.lq_threshold_rr = 43; ltd->ltd_is_mdt = is_mdt; - if (is_mdt) + /* MDT imbalance threshold is low to balance across MDTs + * relatively quickly, because each directory may result + * in a large number of files/subdirs created therein. + */ + if (is_mdt) { ltd->ltd_lmv_desc.ld_pattern = LMV_HASH_TYPE_DEFAULT; + ltd->ltd_qos.lq_prio_free = LMV_QOS_DEF_PRIO_FREE * 256 / 100; + ltd->ltd_qos.lq_threshold_rr = + LMV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100; + } else { + ltd->ltd_qos.lq_prio_free = LOV_QOS_DEF_PRIO_FREE * 256 / 100; + ltd->ltd_qos.lq_threshold_rr = + LOV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100; + } return 0; }