Whamcloud - gitweb
LU-17306 ofd: return error for reconnection
authorAlexander Boyko <alexander.boyko@hpe.com>
Thu, 16 Nov 2023 22:57:24 +0000 (17:57 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 12 Dec 2023 05:38:47 +0000 (05:38 +0000)
During the cleanup orphan phase, reconnection leads to unsynchronized
last id between MDT and OST. This means that MDT could assign non
existing objects to a client for a file create operation.

ofd_create_hdl()) capstor-OST0087: dropping old orphan cleanup request
MDS LAST_ID [0x2540000400:0xb6941:0x0] (747841) is 352 behind OST
    LAST_ID [0x2540000400:0xb6aa1:0x0] (748193), trust the OST

recovery-small 144c reproduce bug where MDT lost synchronization
with OST.

Lustre-change: https://review.whamcloud.com/53195
Lustre-commit: TBD (from 1f0deff150a3087a974adbac687a5019f6c0e39d)

Fixes: 63e17799a3 ("LU-8367 osp: enable replay for precreation request")
HPE-bug-id: LUS-11969
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I22c3d3b3db2acc9ad8f1b978b234afe7d3eef51d
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53341
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/obd_support.h
lustre/ofd/ofd_dev.c
lustre/tests/recovery-small.sh

index cb2c18b..c47272e 100644 (file)
@@ -362,6 +362,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_WR_ATTR_DELAY      0x250
 #define OBD_FAIL_OST_RESTART_IO                 0x251
 #define OBD_FAIL_OST_OPCODE             0x253
+#define OBD_FAIL_OST_DELORPHAN_DELAY    0x254
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
index 1b06a99..dcaa49e 100644 (file)
@@ -1553,12 +1553,13 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
            (oa->o_flags & OBD_FL_DELORPHAN)) {
                exp->exp_filter_data.fed_lastid_gen = ofd->ofd_lastid_gen;
 
+               CFS_FAIL_TIMEOUT(OBD_FAIL_OST_DELORPHAN_DELAY, cfs_fail_val);
                /* destroy orphans */
                if (lustre_msg_get_conn_cnt(tgt_ses_req(tsi)->rq_reqmsg) <
                    exp->exp_conn_cnt) {
                        CERROR("%s: dropping old orphan cleanup request\n",
                               ofd_name(ofd));
-                       GOTO(out_nolock, rc = 0);
+                       GOTO(out_nolock, rc = -ESTALE);
                }
                /* This causes inflight precreates to abort and drop lock */
                oseq->os_destroys_in_progress = 1;
@@ -1606,7 +1607,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                    exp->exp_conn_cnt) {
                        CERROR("%s: dropping old precreate request\n",
                               ofd_name(ofd));
-                       GOTO(out, rc = 0);
+                       GOTO(out, rc = -ESTALE);
                }
                /* only precreate if seq is 0, IDIF or normal and also o_id
                 * must be specfied */
index 29f2f7a..5e2fa61 100755 (executable)
@@ -3114,6 +3114,52 @@ test_144a() {
 }
 run_test 144a "MDT failover should stop precreation threads"
 
+test_144c() {
+       (( $OST1_VERSION >= $(version_code 2.14.0.119) )) ||
+               skip "need OSS >= v2.14.0.119 for reconnect fix"
+
+       [ "$PARALLEL" == "yes" ] && skip "skip parallel run"
+       remote_mds_nodsh && skip "remote MDS with nodsh"
+       remote_ost_nodsh && skip "remote OST with nodsh"
+       local rc
+
+       #increase a precreation window
+       mkdir_on_mdt0 $DIR/$tdir
+       $LFS setstripe -c 1 -i 0 $DIR/$tdir
+       createmany -o $DIR/$tdir/$tfile 9000
+
+       stop mds1
+#define OBD_FAIL_OST_DELORPHAN_DELAY     0x254
+       #delay delorphan request to reconnection 5seconds
+       do_facet ost1 $LCTL set_param fail_loc=0x0000254 fail_val=5
+
+       start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "mds1 start fail"
+
+       wait_recovery_complete mds1 || error "MDS recovery not done"
+
+       sleep 1
+       #reconnect
+       do_facet mds1 $LCTL --device $FSNAME-OST0000-osc-MDT0000 recover
+
+       do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0
+
+       #first and second orphan request delayed for 5seconds
+       sleep 12
+
+       local testid=$(echo $TESTNAME | tr '_' ' ')
+
+       do_facet ost1 "dmesg | tac | sed '/$testid/,$ d'" |
+               grep "trust the OST"
+       rc=$?
+       if (( rc == 0 )); then
+               remount_facet mds1
+               error "LAST_ID synchronization failed"
+       else
+               return 0
+       fi
+}
+run_test 144c "reconnection during orphan cleanup shouldn't lose LAST_ID synchronization"
+
 test_145() {
        [ $MDSCOUNT -lt 3 ] && skip "needs >= 3 MDTs"
        [ $(facet_active_host mds2) = $(facet_active_host mds3) ] &&