Whamcloud - gitweb
LU-16501 tgt: skip free inodes in OST weights 90/49890/4
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 3 Feb 2023 10:14:39 +0000 (03:14 -0700)
committerOleg Drokin <green@whamcloud.com>
Tue, 14 Feb 2023 06:03:27 +0000 (06:03 +0000)
In lu_tgt_qos_weight_calc() calculate the target weight consistently
with how the per-OST and per-OSS penalty calculation is done in
ltd_qos_penalties_calc().  Otherwise, the QOS weighting calculations
combine two different units, which incorrectly weighs allocations on
OST with more free inodes over those with more free space.

Fixes: d3090bb2b486 ("LU-11213 lod: share object alloc QoS code with LMV")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I1ccc52d7ad5dc440ae48403ba129efd6a0a51c33
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49890
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Sergey Cheremencev <scherementsev@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lu_object.h
lustre/lmv/lmv_obd.c
lustre/lod/lod_qos.c
lustre/obdclass/lu_tgt_descs.c

index 03db2eb..8c5a979 100644 (file)
@@ -1675,6 +1675,18 @@ struct lu_tgt_desc {
                           ltd_connecting:1; /* target is connecting */
 };
 
+static inline __u64 tgt_statfs_bavail(struct lu_tgt_desc *tgt)
+{
+       struct obd_statfs *statfs = &tgt->ltd_statfs;
+
+       return statfs->os_bavail * statfs->os_bsize;
+}
+
+static inline __u64 tgt_statfs_iavail(struct lu_tgt_desc *tgt)
+{
+       return tgt->ltd_statfs.os_ffree;
+}
+
 /* number of pointers at 2nd level */
 #define TGT_PTRS_PER_BLOCK     (PAGE_SIZE / sizeof(void *))
 /* number of pointers at 1st level - only need as many as max OST/MDT count */
@@ -1739,7 +1751,7 @@ struct lu_tgt_descs {
 u64 lu_prandom_u64_max(u64 ep_ro);
 int lu_qos_add_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd);
 int lu_qos_del_tgt(struct lu_qos *qos, struct lu_tgt_desc *ltd);
-void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt);
+void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt, bool is_mdt);
 
 int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt);
 void lu_tgt_descs_fini(struct lu_tgt_descs *ltd);
index 84cc639..e16d24c 100644 (file)
@@ -1545,7 +1545,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv,
                }
 
                tgt->ltd_qos.ltq_usable = 1;
-               lu_tgt_qos_weight_calc(tgt);
+               lu_tgt_qos_weight_calc(tgt, true);
                if (tgt->ltd_index == op_data->op_mds)
                        cur = tgt;
                total_avail += tgt->ltd_qos.ltq_avail;
@@ -1647,7 +1647,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_lf(struct lmv_obd *lmv)
                }
 
                tgt->ltd_qos.ltq_usable = 1;
-               lu_tgt_qos_weight_calc(tgt);
+               lu_tgt_qos_weight_calc(tgt, true);
                avail += tgt->ltd_qos.ltq_avail;
                if (!min || min->ltd_qos.ltq_avail > tgt->ltd_qos.ltq_avail)
                        min = tgt;
index 8c13592..badc064 100644 (file)
@@ -1444,8 +1444,7 @@ static int lod_pool_qos_penalties_calc(struct lod_device *lod,
                if (!ost->ltd_active)
                        continue;
 
-               ba = ost->ltd_statfs.os_bavail * ost->ltd_statfs.os_bsize;
-               ba >>= 8;
+               ba = tgt_statfs_bavail(ost) >> 8;
                if (!ba)
                        continue;
 
@@ -1455,9 +1454,9 @@ static int lod_pool_qos_penalties_calc(struct lod_device *lod,
 
                /*
                 * per-ost penalty is
-                * prio * bavail * iavail / (num_tgt - 1) / 2
+                * prio * bavail / (num_tgt - 1) / prio_max / 2
                 */
-               ost->ltd_qos.ltq_penalty_per_obj = prio_wide * ba >> 8;
+               ost->ltd_qos.ltq_penalty_per_obj = prio_wide * ba >> 9;
                do_div(ost->ltd_qos.ltq_penalty_per_obj, num_active);
 
                age = (now - ost->ltd_qos.ltq_used) >> 3;
@@ -1641,7 +1640,7 @@ static int lod_ost_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                        continue;
 
                ost->ltd_qos.ltq_usable = 1;
-               lu_tgt_qos_weight_calc(ost);
+               lu_tgt_qos_weight_calc(ost, false);
                total_weight += ost->ltd_qos.ltq_weight;
 
                good_osts++;
@@ -1887,7 +1886,7 @@ int lod_mdt_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                        continue;
 
                mdt->ltd_qos.ltq_usable = 1;
-               lu_tgt_qos_weight_calc(mdt);
+               lu_tgt_qos_weight_calc(mdt, true);
                total_weight += mdt->ltd_qos.ltq_weight;
 
                good_mdts++;
index bfb5816..f2cffcc 100644 (file)
@@ -206,33 +206,26 @@ out:
 }
 EXPORT_SYMBOL(lu_qos_del_tgt);
 
-static inline __u64 tgt_statfs_bavail(struct lu_tgt_desc *tgt)
-{
-       struct obd_statfs *statfs = &tgt->ltd_statfs;
-
-       return statfs->os_bavail * statfs->os_bsize;
-}
-
-static inline __u64 tgt_statfs_iavail(struct lu_tgt_desc *tgt)
-{
-       return tgt->ltd_statfs.os_ffree;
-}
-
 /**
  * Calculate weight for a given tgt.
  *
- * The final tgt weight is bavail >> 16 * iavail >> 8 minus the tgt and server
- * penalties.  See ltd_qos_penalties_calc() for how penalties are calculated.
+ * The final tgt weight uses only free space for OSTs, but combines
+ * both free space and inodes for MDTs, minus tgt and server penalties.
+ * See ltd_qos_penalties_calc() for how penalties are calculated.
  *
  * \param[in] tgt      target descriptor
+ * \param[in] is_mdt   target table is for MDT selection (use inodes)
  */
-void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt)
+void lu_tgt_qos_weight_calc(struct lu_tgt_desc *tgt, bool is_mdt)
 {
        struct lu_tgt_qos *ltq = &tgt->ltd_qos;
        __u64 penalty;
 
-       ltq->ltq_avail = (tgt_statfs_bavail(tgt) >> 16) *
-                        (tgt_statfs_iavail(tgt) >> 8);
+       if (is_mdt)
+               ltq->ltq_avail = (tgt_statfs_bavail(tgt) >> 16) *
+                                (tgt_statfs_iavail(tgt) >> 8);
+       else
+               ltq->ltq_avail = tgt_statfs_bavail(tgt) >> 8;
        penalty = ltq->ltq_penalty + ltq->ltq_svr->lsq_penalty;
        if (ltq->ltq_avail < penalty)
                ltq->ltq_weight = 0;
@@ -521,14 +514,13 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd)
 
                /*
                 * per-tgt penalty is
-                * prio * bavail * iavail / (num_tgt - 1) / 2
+                * prio * bavail * iavail / (num_tgt - 1) / prio_max / 2
                 */
-               tgt->ltd_qos.ltq_penalty_per_obj = prio_wide * ba * ia >> 8;
+               tgt->ltd_qos.ltq_penalty_per_obj = prio_wide * ba * ia >> 9;
                do_div(tgt->ltd_qos.ltq_penalty_per_obj, num_active);
-               tgt->ltd_qos.ltq_penalty_per_obj >>= 1;
 
                age = (now - tgt->ltd_qos.ltq_used) >> 3;
-               if (test_bit(LQ_RESET, &qos->lq_flags) || 
+               if (test_bit(LQ_RESET, &qos->lq_flags) ||
                    age > 32 * desc->ld_qos_maxage)
                        tgt->ltd_qos.ltq_penalty = 0;
                else if (age > desc->ld_qos_maxage)
@@ -564,7 +556,7 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd)
                svr->lsq_penalty_per_obj >>= 1;
 
                age = (now - svr->lsq_used) >> 3;
-               if (test_bit(LQ_RESET, &qos->lq_flags) || 
+               if (test_bit(LQ_RESET, &qos->lq_flags) ||
                    age > 32 * desc->ld_qos_maxage)
                        svr->lsq_penalty = 0;
                else if (age > desc->ld_qos_maxage)
@@ -572,14 +564,11 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd)
                        svr->lsq_penalty >>= age / desc->ld_qos_maxage;
        }
 
-       clear_bit(LQ_DIRTY, &qos->lq_flags);
-       clear_bit(LQ_RESET, &qos->lq_flags);
 
        /*
         * If each tgt has almost same free space, do rr allocation for better
         * creation performance
         */
-       clear_bit(LQ_SAME_SPACE, &qos->lq_flags);
        if (((ba_max * (QOS_THRESHOLD_MAX - qos->lq_threshold_rr)) /
            QOS_THRESHOLD_MAX) < ba_min &&
            ((ia_max * (QOS_THRESHOLD_MAX - qos->lq_threshold_rr)) /
@@ -587,7 +576,11 @@ int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd)
                set_bit(LQ_SAME_SPACE, &qos->lq_flags);
                /* Reset weights for the next time we enter qos mode */
                set_bit(LQ_RESET, &qos->lq_flags);
+       } else {
+               clear_bit(LQ_SAME_SPACE, &qos->lq_flags);
+               clear_bit(LQ_RESET, &qos->lq_flags);
        }
+       clear_bit(LQ_DIRTY, &qos->lq_flags);
        rc = 0;
 
 out:
@@ -664,7 +657,7 @@ int ltd_qos_update(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt,
                else
                        ltq->ltq_penalty -= ltq->ltq_penalty_per_obj;
 
-               lu_tgt_qos_weight_calc(tgt);
+               lu_tgt_qos_weight_calc(tgt, ltd->ltd_is_mdt);
 
                /* Recalc the total weight of usable osts */
                if (ltq->ltq_usable)