Whamcloud - gitweb
LU-13439 lmv: qos stay on current MDT if less full 45/43445/9
authorAndreas Dilger <adilger@whamcloud.com>
Sun, 25 Apr 2021 11:02:19 +0000 (05:02 -0600)
committerOleg Drokin <green@whamcloud.com>
Wed, 5 May 2021 02:52:20 +0000 (02:52 +0000)
Keep "space balanced" subdirectories on the parent MDT if it is less
full than average, since it doesn't make sense to select another MDT
which may occasionally be *more* full.  This also reduces random
"MDT jumping" and needless remote directories.

Reduce the QOS threshold for space balanced LMV layouts, so that the
MDTs don't become too imbalanced before trying to fix the problem.

Change the LUSTRE_OP_MKDIR opcode to be 1 instead of 0, so it can
be seen that a valid opcode has been stored into the structure.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Iab34c7eade03d761aa16b08f409f7e5d69cd70bd
Reviewed-on: https://review.whamcloud.com/43445
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lmv/lmv_obd.c
lustre/obdclass/lu_tgt_descs.c

index a7a3daf..ed93ccf 100644 (file)
@@ -850,11 +850,11 @@ enum md_cli_flags {
 };
 
 enum md_op_code {
 };
 
 enum md_op_code {
-       LUSTRE_OPC_MKDIR        = 0,
-       LUSTRE_OPC_SYMLINK      = 1,
-       LUSTRE_OPC_MKNOD        = 2,
-       LUSTRE_OPC_CREATE       = 3,
-       LUSTRE_OPC_ANY          = 5,
+       LUSTRE_OPC_MKDIR = 1,
+       LUSTRE_OPC_SYMLINK,
+       LUSTRE_OPC_MKNOD,
+       LUSTRE_OPC_CREATE,
+       LUSTRE_OPC_ANY,
 };
 
 /**
 };
 
 /**
index 9297f0d..308a526 100644 (file)
@@ -718,6 +718,12 @@ struct fsxattr {
 #define LOV_OFFSET_DEFAULT      ((__u16)-1)
 #define LMV_OFFSET_DEFAULT      ((__u32)-1)
 
 #define LOV_OFFSET_DEFAULT      ((__u16)-1)
 #define LMV_OFFSET_DEFAULT      ((__u32)-1)
 
+#define LOV_QOS_DEF_THRESHOLD_RR_PCT   17
+#define LMV_QOS_DEF_THRESHOLD_RR_PCT    5
+
+#define LOV_QOS_DEF_PRIO_FREE          90
+#define LMV_QOS_DEF_PRIO_FREE          90
+
 static inline bool lov_pattern_supported(__u32 pattern)
 {
        return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
 static inline bool lov_pattern_supported(__u32 pattern)
 {
        return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
index 5236e44..d231ed9 100644 (file)
@@ -1461,9 +1461,10 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
 
 static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
 {
 
 static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
 {
-       struct lu_tgt_desc *tgt;
+       struct lu_tgt_desc *tgt, *cur = NULL;
        __u64 total_weight = 0;
        __u64 cur_weight = 0;
        __u64 total_weight = 0;
        __u64 cur_weight = 0;
+       int total_usable = 0;
        __u64 rand;
        int rc;
 
        __u64 rand;
        int rc;
 
@@ -1482,15 +1483,29 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
                GOTO(unlock, tgt = ERR_PTR(rc));
 
        lmv_foreach_tgt(lmv, tgt) {
                GOTO(unlock, tgt = ERR_PTR(rc));
 
        lmv_foreach_tgt(lmv, tgt) {
-               tgt->ltd_qos.ltq_usable = 0;
-               if (!tgt->ltd_exp || !tgt->ltd_active)
+               if (!tgt->ltd_exp || !tgt->ltd_active) {
+                       tgt->ltd_qos.ltq_usable = 0;
                        continue;
                        continue;
+               }
 
                tgt->ltd_qos.ltq_usable = 1;
                lu_tgt_qos_weight_calc(tgt);
 
                tgt->ltd_qos.ltq_usable = 1;
                lu_tgt_qos_weight_calc(tgt);
+               if (tgt->ltd_index == *mdt) {
+                       cur = tgt;
+                       cur_weight = tgt->ltd_qos.ltq_weight;
+               }
                total_weight += tgt->ltd_qos.ltq_weight;
                total_weight += tgt->ltd_qos.ltq_weight;
+               total_usable++;
+       }
+
+       /* if current MDT has higher-than-average space, stay on same MDT */
+       rand = total_weight / total_usable;
+       if (cur_weight >= rand) {
+               tgt = cur;
+               GOTO(unlock, rc = 0);
        }
 
        }
 
+       cur_weight = 0;
        rand = lu_prandom_u64_max(total_weight);
 
        lmv_foreach_connected_tgt(lmv, tgt) {
        rand = lu_prandom_u64_max(total_weight);
 
        lmv_foreach_connected_tgt(lmv, tgt) {
index 04e2330..1e84857 100644 (file)
@@ -273,13 +273,21 @@ int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt)
        init_rwsem(&ltd->ltd_qos.lq_rw_sem);
        set_bit(LQ_DIRTY, &ltd->ltd_qos.lq_flags);
        set_bit(LQ_RESET, &ltd->ltd_qos.lq_flags);
        init_rwsem(&ltd->ltd_qos.lq_rw_sem);
        set_bit(LQ_DIRTY, &ltd->ltd_qos.lq_flags);
        set_bit(LQ_RESET, &ltd->ltd_qos.lq_flags);
-       /* Default priority is toward free space balance */
-       ltd->ltd_qos.lq_prio_free = 232;
-       /* Default threshold for rr (roughly 17%) */
-       ltd->ltd_qos.lq_threshold_rr = 43;
        ltd->ltd_is_mdt = is_mdt;
        ltd->ltd_is_mdt = is_mdt;
-       if (is_mdt)
+       /* MDT imbalance threshold is low to balance across MDTs
+        * relatively quickly, because each directory may result
+        * in a large number of files/subdirs created therein.
+        */
+       if (is_mdt) {
                ltd->ltd_lmv_desc.ld_pattern = LMV_HASH_TYPE_DEFAULT;
                ltd->ltd_lmv_desc.ld_pattern = LMV_HASH_TYPE_DEFAULT;
+               ltd->ltd_qos.lq_prio_free = LMV_QOS_DEF_PRIO_FREE * 256 / 100;
+               ltd->ltd_qos.lq_threshold_rr =
+                       LMV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100;
+       } else {
+               ltd->ltd_qos.lq_prio_free = LOV_QOS_DEF_PRIO_FREE * 256 / 100;
+               ltd->ltd_qos.lq_threshold_rr =
+                       LOV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100;
+       }
 
        return 0;
 }
 
        return 0;
 }