Whamcloud - gitweb
LU-17334 lmv: exclude newly added MDT in mkdir
authorLai Siyao <lai.siyao@whamcloud.com>
Thu, 18 Jan 2024 15:59:25 +0000 (10:59 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Sun, 31 Mar 2024 15:35:49 +0000 (15:35 +0000)
Exclude newly added MDT in QoS mkdir for 30 seconds in case
connections between MDTs are not ready, which may cause lookup fail.

Lustre-change: https://review.whamcloud.com/53860
Lustre-commit: a2b08583a1dc8ab18c4ea4a4b900870761a5c252

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: Ibb5e6eda29ddfff8f66708d72e33453a96f5e7ef
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54608
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/lustre_import.h
lustre/include/obd.h
lustre/lmv/lmv_obd.c
lustre/ptlrpc/import.c

index 066972a..d8371d3 100644 (file)
@@ -347,6 +347,7 @@ struct obd_import {
                                /* adaptive timeout data */
        struct imp_at           imp_at;
        time64_t                imp_last_reply_time;    /* for health check */
+       time64_t                imp_setup_time;
        __u32                   imp_conn_restricted_net;
 };
 
index fa45b72..3ce6bbd 100644 (file)
@@ -474,6 +474,7 @@ struct lmv_obd {
        struct kobject          *lmv_tgts_kobj;
        void                    *lmv_cache;
 
+       time64_t                lmv_setup_time;
        __u32                   lmv_qos_rr_index; /* next round-robin MDT idx */
        struct rhashtable       lmv_qos_exclude_hash;
        struct list_head        lmv_qos_exclude_list;
index 7e9415e..a0c248c 100644 (file)
@@ -1513,6 +1513,8 @@ static int lmv_get_root(struct obd_export *exp, const char *fileset,
                RETURN(-ENODEV);
 
        rc = md_get_root(tgt->ltd_exp, fileset, fid);
+       if (!rc)
+               lmv->lmv_setup_time = ktime_get_seconds();
        RETURN(rc);
 }
 
@@ -1623,10 +1625,23 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
        RETURN(rc);
 }
 
+static inline bool tgt_qos_is_usable(struct lmv_obd *lmv,
+                                    struct lu_tgt_desc *tgt, time64_t now)
+{
+       struct obd_import *imp = class_exp2cliimp(tgt->ltd_exp);
+       u32 maxage = lmv->lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage;
+
+       return tgt->ltd_exp && tgt->ltd_active &&
+              !(tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE) &&
+              (now - imp->imp_setup_time > (maxage >> 1) ||
+               now - lmv->lmv_setup_time < (maxage << 1));
+}
+
 static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv,
                                              struct md_op_data *op_data)
 {
        struct lu_tgt_desc *tgt, *cur = NULL;
+       time64_t now = ktime_get_seconds();
        __u64 total_avail = 0;
        __u64 total_weight = 0;
        __u64 cur_weight = 0;
@@ -1649,13 +1664,12 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv,
                GOTO(unlock, tgt = ERR_PTR(rc));
 
        lmv_foreach_tgt(lmv, tgt) {
-               if (!tgt->ltd_exp || !tgt->ltd_active ||
-                   (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) {
+               if (!tgt_qos_is_usable(lmv, tgt, now)) {
                        tgt->ltd_qos.ltq_usable = 0;
                        continue;
                }
                /* update one hour overdue statfs */
-               if (ktime_get_seconds() - tgt->ltd_statfs_age >
+               if (now - tgt->ltd_statfs_age >
                    60 * lmv->lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage)
                        lmv_statfs_check_update(lmv2obd_dev(lmv), tgt);
                tgt->ltd_qos.ltq_usable = 1;
@@ -1711,6 +1725,7 @@ unlock:
 
 static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv)
 {
+       time64_t now = ktime_get_seconds();
        struct lu_tgt_desc *tgt;
        int i;
        int index;
@@ -1722,8 +1737,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv)
                index = (i + lmv->lmv_qos_rr_index) %
                        lmv->lmv_mdt_descs.ltd_tgts_size;
                tgt = lmv_tgt(lmv, index);
-               if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
-                   (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE))
+               if (!tgt || !tgt_qos_is_usable(lmv, tgt, now))
                        continue;
 
                lmv->lmv_qos_rr_index = (tgt->ltd_index + 1) %
@@ -2065,11 +2079,15 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv,
                if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) &&
                    !lmv_op_default_rr_mkdir(op_data) &&
                    !lmv_op_user_qos_mkdir(op_data) &&
-                   !(tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE))
+                   !(tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE)) {
                        /* if not necessary, don't create remote directory. */
                        tgt = tmp;
-               else
+               } else {
                        tgt = lmv_locate_tgt_rr(lmv);
+                       /* if no MDT chosen, use parent MDT */
+                       if (IS_ERR(tgt))
+                               tgt = tmp;
+               }
                if (!IS_ERR(tgt))
                        lmv_statfs_check_update(lmv2obd_dev(lmv), tgt);
        }
index 2f2cccf..0b5accd 100644 (file)
@@ -1060,6 +1060,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
        imp->imp_pingable = 1;
        imp->imp_force_reconnect = 0;
        imp->imp_force_verify = 0;
+       imp->imp_setup_time = ktime_get_seconds();
 
        imp->imp_connect_data = *ocd;