From 57f3262baa7d8931176a81cde05bc057facfc3b6 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Wed, 27 Apr 2022 04:36:49 -0400 Subject: [PATCH] LU-15788 lmv: try another MDT if statfs failed With lazystatfs option statfs could fail if MDT0 is offline. This leads to MPICH->IOR fail during FOFB tests. A client could get statfs data from different MDT at DNE setup. HPE-bug-id: LUS-10581 Signed-off-by: Alexander Boyko Change-Id: Icec83aba0c3ddbc749b782787b1b52faadf34a3e Reviewed-on: https://review.whamcloud.com/47152 Tested-by: jenkins Reviewed-by: Andreas Dilger Reviewed-by: Alexander Zarochentsev Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/lmv/lmv_obd.c | 14 ++++++++++---- lustre/tests/recovery-small.sh | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 3a1405b..5b8c03d 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1239,6 +1239,7 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp, __u32 i; __u32 idx; int rc = 0; + int err = 0; ENTRY; @@ -1260,6 +1261,10 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp, if (rc) { CERROR("%s: can't stat MDS #%d: rc = %d\n", tgt->ltd_exp->exp_obd->obd_name, i, rc); + err = rc; + /* Try another MDT */ + if (flags & OBD_STATFS_SUM) + continue; GOTO(out_free_temp, rc); } @@ -1273,7 +1278,7 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp, * clients can be mounted as long as MDT0 is in * service */ *osfs = *temp; - break; + GOTO(out_free_temp, rc); } if (i == 0) { @@ -1286,11 +1291,12 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp, osfs->os_granted += temp->os_granted; } } - - EXIT; + /* There is no stats from some MDTs, data incomplete */ + if (err) + rc = err; out_free_temp: OBD_FREE(temp, sizeof(*temp)); - return rc; + RETURN(rc); } static int lmv_statfs_update(void *cookie, int rc) diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index ad27535..dbaa788 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -3280,6 +3280,29 @@ test_149() { } run_test 149 "skip orphan removal at umount" +test_150() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" + local lazystatfs + local max + + lazystatfs=$($LCTL get_param -n llite.$FSNAME-*.lazystatfs | head -1) + max=$($LCTL get_param -n llite.$FSNAME-*.statahead_max | head -1) + + $LCTL set_param llite.$FSNAME-*.lazystatfs=1 + $LCTL set_param llite.$FSNAME-*.statahead_max=0 + stack_trap "$LCTL set_param llite.$FSNAME-*.lazystatfs=$lazystatfs" EXIT + stack_trap "$LCTL set_param llite.$FSNAME-*.statahead_max=$max" EXIT + # stop a slave MDT where one ons stripe is located + stop mds1 -f + + stack_trap "start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS && \ + wait_recovery_complete mds1 && clients_up && true" EXIT + + df $MOUNT || error "statfs failed" + return 0 +} +run_test 150 "statfs when MDT0 offline with lazystatfs option" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1