Whamcloud - gitweb
LU-17334 lmv: exclude newly added MDT in mkdir 60/53860/3
authorLai Siyao <lai.siyao@whamcloud.com>
Thu, 18 Jan 2024 15:59:25 +0000 (10:59 -0500)
committerOleg Drokin <green@whamcloud.com>
Sat, 23 Mar 2024 05:53:24 +0000 (05:53 +0000)
Exclude newly added MDT in QoS mkdir for 30 seconds in case
connections between MDTs are not ready, which may cause lookup fail.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: Ibb5e6eda29ddfff8f66708d72e33453a96f5e7ef
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53860
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_import.h
lustre/include/obd.h
lustre/lmv/lmv_obd.c
lustre/ptlrpc/import.c

index b19a933..8231fd3 100644 (file)
@@ -345,6 +345,7 @@ struct obd_import {
                                /* adaptive timeout data */
        struct imp_at           imp_at;
        time64_t                imp_last_reply_time;    /* for health check */
+       time64_t                imp_setup_time;
        __u32                   imp_conn_restricted_net;
 };
 
index c16d7ef..62baa2a 100644 (file)
@@ -445,6 +445,7 @@ struct lmv_obd {
        struct kobject          *lmv_tgts_kobj;
        void                    *lmv_cache;
 
+       time64_t                lmv_setup_time;
        __u32                   lmv_qos_rr_index; /* next round-robin MDT idx */
        struct rhashtable       lmv_qos_exclude_hash;
        struct list_head        lmv_qos_exclude_list;
index 9fed3a8..de7487e 100644 (file)
@@ -1535,6 +1535,8 @@ static int lmv_get_root(struct obd_export *exp, const char *fileset,
                RETURN(-ENODEV);
 
        rc = md_get_root(tgt->ltd_exp, fileset, fid);
+       if (!rc)
+               lmv->lmv_setup_time = ktime_get_seconds();
        RETURN(rc);
 }
 
@@ -1645,10 +1647,23 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
        RETURN(rc);
 }
 
+static inline bool tgt_qos_is_usable(struct lmv_obd *lmv,
+                                    struct lu_tgt_desc *tgt, time64_t now)
+{
+       struct obd_import *imp = class_exp2cliimp(tgt->ltd_exp);
+       u32 maxage = lmv->lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage;
+
+       return tgt->ltd_exp && tgt->ltd_active &&
+              !(tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE) &&
+              (now - imp->imp_setup_time > (maxage >> 1) ||
+               now - lmv->lmv_setup_time < (maxage << 1));
+}
+
 static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv,
                                              struct md_op_data *op_data)
 {
        struct lu_tgt_desc *tgt, *cur = NULL;
+       time64_t now = ktime_get_seconds();
        __u64 total_avail = 0;
        __u64 total_weight = 0;
        __u64 cur_weight = 0;
@@ -1671,13 +1686,12 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv,
                GOTO(unlock, tgt = ERR_PTR(rc));
 
        lmv_foreach_tgt(lmv, tgt) {
-               if (!tgt->ltd_exp || !tgt->ltd_active ||
-                   (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE)) {
+               if (!tgt_qos_is_usable(lmv, tgt, now)) {
                        tgt->ltd_qos.ltq_usable = 0;
                        continue;
                }
                /* update one hour overdue statfs */
-               if (ktime_get_seconds() - tgt->ltd_statfs_age >
+               if (now - tgt->ltd_statfs_age >
                    60 * lmv->lmv_mdt_descs.ltd_lmv_desc.ld_qos_maxage)
                        lmv_statfs_check_update(lmv2obd_dev(lmv), tgt);
                tgt->ltd_qos.ltq_usable = 1;
@@ -1733,6 +1747,7 @@ unlock:
 
 static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv)
 {
+       time64_t now = ktime_get_seconds();
        struct lu_tgt_desc *tgt;
        int i;
        int index;
@@ -1744,8 +1759,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv)
                index = (i + lmv->lmv_qos_rr_index) %
                        lmv->lmv_mdt_descs.ltd_tgts_size;
                tgt = lmv_tgt(lmv, index);
-               if (!tgt || !tgt->ltd_exp || !tgt->ltd_active ||
-                   (tgt->ltd_statfs.os_state & OS_STATFS_NOCREATE))
+               if (!tgt || !tgt_qos_is_usable(lmv, tgt, now))
                        continue;
 
                lmv->lmv_qos_rr_index = (tgt->ltd_index + 1) %
@@ -2088,11 +2102,15 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv,
                if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) &&
                    !lmv_op_default_rr_mkdir(op_data) &&
                    !lmv_op_user_qos_mkdir(op_data) &&
-                   !(tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE))
+                   !(tmp->ltd_statfs.os_state & OS_STATFS_NOCREATE)) {
                        /* if not necessary, don't create remote directory. */
                        tgt = tmp;
-               else
+               } else {
                        tgt = lmv_locate_tgt_rr(lmv);
+                       /* if no MDT chosen, use parent MDT */
+                       if (IS_ERR(tgt))
+                               tgt = tmp;
+               }
                if (!IS_ERR(tgt))
                        lmv_statfs_check_update(lmv2obd_dev(lmv), tgt);
        }
index c8432d1..d89b3c2 100644 (file)
@@ -1059,6 +1059,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
        imp->imp_pingable = 1;
        imp->imp_force_reconnect = 0;
        imp->imp_force_verify = 0;
+       imp->imp_setup_time = ktime_get_seconds();
 
        imp->imp_connect_data = *ocd;