From: Andreas Dilger Date: Fri, 3 Feb 2023 10:14:39 +0000 (-0700) Subject: LU-16501 tgt: skip free inodes in OST weights X-Git-Tag: 2.15.55~159 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=511bf2f4ccd1482d6f2380942d43cc3e08b8e25b;p=fs%2Flustre-release.git LU-16501 tgt: skip free inodes in OST weights In lu_tgt_qos_weight_calc() calculate the target weight consistently with how the per-OST and per-OSS penalty calculation is done in ltd_qos_penalties_calc(). Otherwise, the QOS weighting calculations combine two different units, which incorrectly weighs allocations on OST with more free inodes over those with more free space. Fixes: d3090bb2b486 ("LU-11213 lod: share object alloc QoS code with LMV") Signed-off-by: Andreas Dilger Change-Id: I1ccc52d7ad5dc440ae48403ba129efd6a0a51c33 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49890 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Artem Blagodarenko Reviewed-by: Lai Siyao Reviewed-by: Sergey Cheremencev Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 03db2eb..8c5a979 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -1675,6 +1675,18 @@ struct lu_tgt_desc { ltd_connecting:1; /* target is connecting */ }; +static inline __u64 tgt_statfs_bavail(struct lu_tgt_desc *tgt) +{ + struct obd_statfs *statfs = &tgt->ltd_statfs; + + return statfs->os_bavail * statfs->os_bsize; +} + +static inline __u64 tgt_statfs_iavail(struct lu_tgt_desc *tgt) +{ + return tgt->ltd_statfs.os_ffree; +} + /* number of pointers at 2nd level */ #define TGT_PTRS_PER_BLOCK (PAGE_SIZE / sizeof(void *)) /* number of pointers at 1st level - only need as many as max OST/MDT count */ @@ -1739,7 +1751,7 @@ struct lu_tgt_descs { u64 lu_prandom_u64_max(u64 ep_ro); int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd); int lu_qos_del_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd); -void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt); +void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt, bool is_mdt); int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt); void lu_tgt_descs_fini(struct lu_tgt_descs *ltd); diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 84cc639..e16d24c 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1545,7 +1545,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, } tgt->ltd_qos.ltq_usable = 1; - lu_tgt_qos_weight_calc(tgt); + lu_tgt_qos_weight_calc(tgt, true); if (tgt->ltd_index == op_data->op_mds) cur = tgt; total_avail += tgt->ltd_qos.ltq_avail; @@ -1647,7 +1647,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_lf(struct lmv_obd *lmv) } tgt->ltd_qos.ltq_usable = 1; - lu_tgt_qos_weight_calc(tgt); + lu_tgt_qos_weight_calc(tgt, true); avail += tgt->ltd_qos.ltq_avail; if (!min || min->ltd_qos.ltq_avail > tgt->ltd_qos.ltq_avail) min = tgt; diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 8c13592..badc064 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -1444,8 +1444,7 @@ static int lod_pool_qos_penalties_calc(struct lod_device *lod, if (!ost->ltd_active) continue; - ba = ost->ltd_statfs.os_bavail * ost->ltd_statfs.os_bsize; - ba >>= 8; + ba = tgt_statfs_bavail(ost) >> 8; if (!ba) continue; @@ -1455,9 +1454,9 @@ static int lod_pool_qos_penalties_calc(struct lod_device *lod, /* * per-ost penalty is - * prio * bavail * iavail / (num_tgt - 1) / 2 + * prio * bavail / (num_tgt - 1) / prio_max / 2 */ - ost->ltd_qos.ltq_penalty_per_obj = prio_wide * ba >> 8; + ost->ltd_qos.ltq_penalty_per_obj = prio_wide * ba >> 9; do_div(ost->ltd_qos.ltq_penalty_per_obj, num_active); age = (now - ost->ltd_qos.ltq_used) >> 3; @@ -1641,7 +1640,7 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo, continue; ost->ltd_qos.ltq_usable = 1; - lu_tgt_qos_weight_calc(ost); + lu_tgt_qos_weight_calc(ost, false); total_weight += ost->ltd_qos.ltq_weight; good_osts++; @@ -1887,7 +1886,7 @@ int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo, continue; mdt->ltd_qos.ltq_usable = 1; - lu_tgt_qos_weight_calc(mdt); + lu_tgt_qos_weight_calc(mdt, true); total_weight += mdt->ltd_qos.ltq_weight; good_mdts++; diff --git a/lustre/obdclass/lu_tgt_descs.c b/lustre/obdclass/lu_tgt_descs.c index bfb5816..f2cffcc 100644 --- a/lustre/obdclass/lu_tgt_descs.c +++ b/lustre/obdclass/lu_tgt_descs.c @@ -206,33 +206,26 @@ out: } EXPORT_SYMBOL(lu_qos_del_tgt); -static inline __u64 tgt_statfs_bavail(struct lu_tgt_desc *tgt) -{ - struct obd_statfs *statfs = &tgt->ltd_statfs; - - return statfs->os_bavail * statfs->os_bsize; -} - -static inline __u64 tgt_statfs_iavail(struct lu_tgt_desc *tgt) -{ - return tgt->ltd_statfs.os_ffree; -} - /** * Calculate weight for a given tgt. * - * The final tgt weight is bavail >> 16 * iavail >> 8 minus the tgt and server - * penalties. See ltd_qos_penalties_calc() for how penalties are calculated. + * The final tgt weight uses only free space for OSTs, but combines + * both free space and inodes for MDTs, minus tgt and server penalties. + * See ltd_qos_penalties_calc() for how penalties are calculated. * * \param[in] tgt target descriptor + * \param[in] is_mdt target table is for MDT selection (use inodes) */ -void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt) +void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt, bool is_mdt) { struct lu_tgt_qos *ltq = &tgt->ltd_qos; __u64 penalty; - ltq->ltq_avail = (tgt_statfs_bavail(tgt) >> 16) * - (tgt_statfs_iavail(tgt) >> 8); + if (is_mdt) + ltq->ltq_avail = (tgt_statfs_bavail(tgt) >> 16) * + (tgt_statfs_iavail(tgt) >> 8); + else + ltq->ltq_avail = tgt_statfs_bavail(tgt) >> 8; penalty = ltq->ltq_penalty + ltq->ltq_svr->lsq_penalty; if (ltq->ltq_avail < penalty) ltq->ltq_weight = 0; @@ -521,14 +514,13 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd) /* * per-tgt penalty is - * prio * bavail * iavail / (num_tgt - 1) / 2 + * prio * bavail * iavail / (num_tgt - 1) / prio_max / 2 */ - tgt->ltd_qos.ltq_penalty_per_obj = prio_wide * ba * ia >> 8; + tgt->ltd_qos.ltq_penalty_per_obj = prio_wide * ba * ia >> 9; do_div(tgt->ltd_qos.ltq_penalty_per_obj, num_active); - tgt->ltd_qos.ltq_penalty_per_obj >>= 1; age = (now - tgt->ltd_qos.ltq_used) >> 3; - if (test_bit(LQ_RESET, &qos->lq_flags) || + if (test_bit(LQ_RESET, &qos->lq_flags) || age > 32 * desc->ld_qos_maxage) tgt->ltd_qos.ltq_penalty = 0; else if (age > desc->ld_qos_maxage) @@ -564,7 +556,7 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd) svr->lsq_penalty_per_obj >>= 1; age = (now - svr->lsq_used) >> 3; - if (test_bit(LQ_RESET, &qos->lq_flags) || + if (test_bit(LQ_RESET, &qos->lq_flags) || age > 32 * desc->ld_qos_maxage) svr->lsq_penalty = 0; else if (age > desc->ld_qos_maxage) @@ -572,14 +564,11 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd) svr->lsq_penalty >>= age / desc->ld_qos_maxage; } - clear_bit(LQ_DIRTY, &qos->lq_flags); - clear_bit(LQ_RESET, &qos->lq_flags); /* * If each tgt has almost same free space, do rr allocation for better * creation performance */ - clear_bit(LQ_SAME_SPACE, &qos->lq_flags); if (((ba_max * (QOS_THRESHOLD_MAX - qos->lq_threshold_rr)) / QOS_THRESHOLD_MAX) < ba_min && ((ia_max * (QOS_THRESHOLD_MAX - qos->lq_threshold_rr)) / @@ -587,7 +576,11 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd) set_bit(LQ_SAME_SPACE, &qos->lq_flags); /* Reset weights for the next time we enter qos mode */ set_bit(LQ_RESET, &qos->lq_flags); + } else { + clear_bit(LQ_SAME_SPACE, &qos->lq_flags); + clear_bit(LQ_RESET, &qos->lq_flags); } + clear_bit(LQ_DIRTY, &qos->lq_flags); rc = 0; out: @@ -664,7 +657,7 @@ int ltd_qos_update(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt, else ltq->ltq_penalty -= ltq->ltq_penalty_per_obj; - lu_tgt_qos_weight_calc(tgt); + lu_tgt_qos_weight_calc(tgt, ltd->ltd_is_mdt); /* Recalc the total weight of usable osts */ if (ltq->ltq_usable)