Whamcloud - gitweb
LU-15788 lmv: try another MDT if statfs failed 52/47152/6
authorAlexander Boyko <alexander.boyko@hpe.com>
Wed, 27 Apr 2022 08:36:49 +0000 (04:36 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 11 Jun 2022 05:29:45 +0000 (05:29 +0000)
With lazystatfs option statfs could fail if MDT0 is offline.
This leads to MPICH->IOR fail during FOFB tests. A client
could get statfs data from different MDT at DNE setup.

HPE-bug-id: LUS-10581
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: Icec83aba0c3ddbc749b782787b1b52faadf34a3e
Reviewed-on: https://review.whamcloud.com/47152
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lmv/lmv_obd.c
lustre/tests/recovery-small.sh

index 3a1405b..5b8c03d 100644 (file)
@@ -1239,6 +1239,7 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
        __u32 i;
        __u32 idx;
        int rc = 0;
+       int err = 0;
 
        ENTRY;
 
@@ -1260,6 +1261,10 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
                if (rc) {
                        CERROR("%s: can't stat MDS #%d: rc = %d\n",
                               tgt->ltd_exp->exp_obd->obd_name, i, rc);
+                       err = rc;
+                       /* Try another MDT */
+                       if (flags & OBD_STATFS_SUM)
+                               continue;
                        GOTO(out_free_temp, rc);
                }
 
@@ -1273,7 +1278,7 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
                         * clients can be mounted as long as MDT0 is in
                         * service */
                        *osfs = *temp;
-                       break;
+                       GOTO(out_free_temp, rc);
                }
 
                if (i == 0) {
@@ -1286,11 +1291,12 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp,
                        osfs->os_granted += temp->os_granted;
                }
        }
-
-       EXIT;
+       /* There is no stats from some MDTs, data incomplete */
+       if (err)
+               rc = err;
 out_free_temp:
        OBD_FREE(temp, sizeof(*temp));
-       return rc;
+       RETURN(rc);
 }
 
 static int lmv_statfs_update(void *cookie, int rc)
index ad27535..dbaa788 100755 (executable)
@@ -3280,6 +3280,29 @@ test_149() {
 }
 run_test 149 "skip orphan removal at umount"
 
+test_150() {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
+       local lazystatfs
+       local max
+
+       lazystatfs=$($LCTL get_param -n llite.$FSNAME-*.lazystatfs | head -1)
+       max=$($LCTL get_param -n llite.$FSNAME-*.statahead_max | head -1)
+
+       $LCTL set_param llite.$FSNAME-*.lazystatfs=1
+       $LCTL set_param llite.$FSNAME-*.statahead_max=0
+       stack_trap "$LCTL set_param llite.$FSNAME-*.lazystatfs=$lazystatfs" EXIT
+       stack_trap "$LCTL set_param llite.$FSNAME-*.statahead_max=$max" EXIT
+       # stop a slave MDT where one ons stripe is located
+       stop mds1 -f
+
+       stack_trap "start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS && \
+               wait_recovery_complete mds1 && clients_up && true" EXIT
+
+       df $MOUNT || error "statfs failed"
+       return 0
+}
+run_test 150 "statfs when MDT0 offline with lazystatfs option"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status