Whamcloud - gitweb
LU-13439 lmv: qos stay on current MDT if less full
authorAndreas Dilger <adilger@whamcloud.com>
Sun, 25 Apr 2021 11:02:19 +0000 (05:02 -0600)
committerAndreas Dilger <adilger@whamcloud.com>
Wed, 5 May 2021 04:02:06 +0000 (04:02 +0000)
Keep "space balanced" subdirectories on the parent MDT if it is less
full than average, since it doesn't make sense to select another MDT
which may occasionally be *more* full.  This also reduces random
"MDT jumping" and needless remote directories.

Reduce the QOS threshold for space balanced LMV layouts, so that the
MDTs don't become too imbalanced before trying to fix the problem.

Change the LUSTRE_OP_MKDIR opcode to be 1 instead of 0, so it can
be seen that a valid opcode has been stored into the structure.

Lustre-change: https://review.whamcloud.com/43445
Lustre-commit: 3f6fc483013da443b1494d81efe2d271ac67f901

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Iab34c7eade03d761aa16b08f409f7e5d69cd70bd
Reviewed-on: https://review.whamcloud.com/43431
Tested-by: jenkins <devops@whamcloud.com>
lustre/include/obd.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lmv/lmv_obd.c
lustre/obdclass/lu_tgt_descs.c

index 37cd841..d15eb21 100644 (file)
@@ -852,11 +852,11 @@ enum md_cli_flags {
 };
 
 enum md_op_code {
-       LUSTRE_OPC_MKDIR        = 0,
-       LUSTRE_OPC_SYMLINK      = 1,
-       LUSTRE_OPC_MKNOD        = 2,
-       LUSTRE_OPC_CREATE       = 3,
-       LUSTRE_OPC_ANY          = 5,
+       LUSTRE_OPC_MKDIR = 1,
+       LUSTRE_OPC_SYMLINK,
+       LUSTRE_OPC_MKNOD,
+       LUSTRE_OPC_CREATE,
+       LUSTRE_OPC_ANY,
 };
 
 /**
index e0c270c..5393bfb 100644 (file)
@@ -719,6 +719,12 @@ struct fsxattr {
 #define LOV_OFFSET_DEFAULT      ((__u16)-1)
 #define LMV_OFFSET_DEFAULT      ((__u32)-1)
 
+#define LOV_QOS_DEF_THRESHOLD_RR_PCT   17
+#define LMV_QOS_DEF_THRESHOLD_RR_PCT    5
+
+#define LOV_QOS_DEF_PRIO_FREE          90
+#define LMV_QOS_DEF_PRIO_FREE          90
+
 static inline bool lov_pattern_supported(__u32 pattern)
 {
        pattern &= ~LOV_PATTERN_F_RELEASED;
index 8b59b1b..71375ac 100644 (file)
@@ -1462,9 +1462,10 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
 
 static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
 {
-       struct lu_tgt_desc *tgt;
+       struct lu_tgt_desc *tgt, *cur = NULL;
        __u64 total_weight = 0;
        __u64 cur_weight = 0;
+       int total_usable = 0;
        __u64 rand;
        int rc;
 
@@ -1483,15 +1484,29 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
                GOTO(unlock, tgt = ERR_PTR(rc));
 
        lmv_foreach_tgt(lmv, tgt) {
-               tgt->ltd_qos.ltq_usable = 0;
-               if (!tgt->ltd_exp || !tgt->ltd_active)
+               if (!tgt->ltd_exp || !tgt->ltd_active) {
+                       tgt->ltd_qos.ltq_usable = 0;
                        continue;
+               }
 
                tgt->ltd_qos.ltq_usable = 1;
                lu_tgt_qos_weight_calc(tgt);
+               if (tgt->ltd_index == *mdt) {
+                       cur = tgt;
+                       cur_weight = tgt->ltd_qos.ltq_weight;
+               }
                total_weight += tgt->ltd_qos.ltq_weight;
+               total_usable++;
+       }
+
+       /* if current MDT has higher-than-average space, stay on same MDT */
+       rand = total_weight / total_usable;
+       if (cur_weight >= rand) {
+               tgt = cur;
+               GOTO(unlock, rc = 0);
        }
 
+       cur_weight = 0;
        rand = lu_prandom_u64_max(total_weight);
 
        lmv_foreach_connected_tgt(lmv, tgt) {
index 30b2bc1..7756e3d 100644 (file)
@@ -277,13 +277,21 @@ int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt)
        init_rwsem(&ltd->ltd_qos.lq_rw_sem);
        ltd->ltd_qos.lq_dirty = 1;
        ltd->ltd_qos.lq_reset = 1;
-       /* Default priority is toward free space balance */
-       ltd->ltd_qos.lq_prio_free = 232;
-       /* Default threshold for rr (roughly 17%) */
-       ltd->ltd_qos.lq_threshold_rr = 43;
        ltd->ltd_is_mdt = is_mdt;
-       if (is_mdt)
+       /* MDT imbalance threshold is low to balance across MDTs
+        * relatively quickly, because each directory may result
+        * in a large number of files/subdirs created therein.
+        */
+       if (is_mdt) {
                ltd->ltd_lmv_desc.ld_pattern = LMV_HASH_TYPE_DEFAULT;
+               ltd->ltd_qos.lq_prio_free = LMV_QOS_DEF_PRIO_FREE * 256 / 100;
+               ltd->ltd_qos.lq_threshold_rr =
+                       LMV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100;
+       } else {
+               ltd->ltd_qos.lq_prio_free = LOV_QOS_DEF_PRIO_FREE * 256 / 100;
+               ltd->ltd_qos.lq_threshold_rr =
+                       LOV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100;
+       }
 
        lu_qos_rr_init(&ltd->ltd_qos.lq_rr);