From 53d323fbde29cf228c353784de272bbc99fcca33 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Thu, 16 Nov 2023 17:57:24 -0500 Subject: [PATCH] LU-17306 ofd: return error for reconnection During the cleanup orphan phase, reconnection leads to unsynchronized last id between MDT and OST. This means that MDT could assign non existing objects to a client for a file create operation. ofd_create_hdl()) capstor-OST0087: dropping old orphan cleanup request MDS LAST_ID [0x2540000400:0xb6941:0x0] (747841) is 352 behind OST LAST_ID [0x2540000400:0xb6aa1:0x0] (748193), trust the OST recovery-small 144c reproduce bug where MDT lost synchronization with OST. Lustre-change: https://review.whamcloud.com/53195 Lustre-commit: TBD (from 1f0deff150a3087a974adbac687a5019f6c0e39d) Fixes: 63e17799a3 ("LU-8367 osp: enable replay for precreation request") HPE-bug-id: LUS-11969 Signed-off-by: Alexander Boyko Change-Id: I22c3d3b3db2acc9ad8f1b978b234afe7d3eef51d Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53341 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/obd_support.h | 1 + lustre/ofd/ofd_dev.c | 5 +++-- lustre/tests/recovery-small.sh | 46 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index cb2c18b..c47272e 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -362,6 +362,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_WR_ATTR_DELAY 0x250 #define OBD_FAIL_OST_RESTART_IO 0x251 #define OBD_FAIL_OST_OPCODE 0x253 +#define OBD_FAIL_OST_DELORPHAN_DELAY 0x254 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 1b06a99..dcaa49e 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -1553,12 +1553,13 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) (oa->o_flags & OBD_FL_DELORPHAN)) { exp->exp_filter_data.fed_lastid_gen = ofd->ofd_lastid_gen; + CFS_FAIL_TIMEOUT(OBD_FAIL_OST_DELORPHAN_DELAY, cfs_fail_val); /* destroy orphans */ if (lustre_msg_get_conn_cnt(tgt_ses_req(tsi)->rq_reqmsg) < exp->exp_conn_cnt) { CERROR("%s: dropping old orphan cleanup request\n", ofd_name(ofd)); - GOTO(out_nolock, rc = 0); + GOTO(out_nolock, rc = -ESTALE); } /* This causes inflight precreates to abort and drop lock */ oseq->os_destroys_in_progress = 1; @@ -1606,7 +1607,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) exp->exp_conn_cnt) { CERROR("%s: dropping old precreate request\n", ofd_name(ofd)); - GOTO(out, rc = 0); + GOTO(out, rc = -ESTALE); } /* only precreate if seq is 0, IDIF or normal and also o_id * must be specfied */ diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 29f2f7a..5e2fa61 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -3114,6 +3114,52 @@ test_144a() { } run_test 144a "MDT failover should stop precreation threads" +test_144c() { + (( $OST1_VERSION >= $(version_code 2.14.0.119) )) || + skip "need OSS >= v2.14.0.119 for reconnect fix" + + [ "$PARALLEL" == "yes" ] && skip "skip parallel run" + remote_mds_nodsh && skip "remote MDS with nodsh" + remote_ost_nodsh && skip "remote OST with nodsh" + local rc + + #increase a precreation window + mkdir_on_mdt0 $DIR/$tdir + $LFS setstripe -c 1 -i 0 $DIR/$tdir + createmany -o $DIR/$tdir/$tfile 9000 + + stop mds1 +#define OBD_FAIL_OST_DELORPHAN_DELAY 0x254 + #delay delorphan request to reconnection 5seconds + do_facet ost1 $LCTL set_param fail_loc=0x0000254 fail_val=5 + + start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "mds1 start fail" + + wait_recovery_complete mds1 || error "MDS recovery not done" + + sleep 1 + #reconnect + do_facet mds1 $LCTL --device $FSNAME-OST0000-osc-MDT0000 recover + + do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0 + + #first and second orphan request delayed for 5seconds + sleep 12 + + local testid=$(echo $TESTNAME | tr '_' ' ') + + do_facet ost1 "dmesg | tac | sed '/$testid/,$ d'" | + grep "trust the OST" + rc=$? + if (( rc == 0 )); then + remount_facet mds1 + error "LAST_ID synchronization failed" + else + return 0 + fi +} +run_test 144c "reconnection during orphan cleanup shouldn't lose LAST_ID synchronization" + test_145() { [ $MDSCOUNT -lt 3 ] && skip "needs >= 3 MDTs" [ $(facet_active_host mds2) = $(facet_active_host mds3) ] && -- 1.8.3.1