Whamcloud - gitweb
LU-17306 ofd: return error for reconnection 95/53195/5
authorAlexander Boyko <alexander.boyko@hpe.com>
Thu, 16 Nov 2023 22:57:24 +0000 (17:57 -0500)
committerOleg Drokin <green@whamcloud.com>
Wed, 20 Dec 2023 01:44:15 +0000 (01:44 +0000)
During the cleanup orphan phase, reconnection leads to unsynchronized
last id between MDT and OST. This means that MDT could assign non
existing objects to a client for a file create operation.

ofd_create_hdl()) capstor-OST0087: dropping old orphan cleanup request
MDS LAST_ID [0x2540000400:0xb6941:0x0] (747841) is 352 behind OST
    LAST_ID [0x2540000400:0xb6aa1:0x0] (748193), trust the OST

recovery-small 144c reproduce bug where MDT lost synchronization
with OST.

Fixes: 63e17799a3 ("LU-8367 osp: enable replay for precreation request")
HPE-bug-id: LUS-11969
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I22c3d3b3db2acc9ad8f1b978b234afe7d3eef51d
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53195
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/ofd/ofd_dev.c
lustre/tests/recovery-small.sh

index bf64d21..0a42aa7 100644 (file)
@@ -365,6 +365,7 @@ extern bool obd_enable_health_write;
 #define OBD_FAIL_OST_WR_ATTR_DELAY      0x250
 #define OBD_FAIL_OST_RESTART_IO                 0x251
 #define OBD_FAIL_OST_OPCODE             0x253
+#define OBD_FAIL_OST_DELORPHAN_DELAY    0x254
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
index b495b5c..77c91ac 100644 (file)
@@ -1516,12 +1516,13 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
            (oa->o_flags & OBD_FL_DELORPHAN)) {
                exp->exp_filter_data.fed_lastid_gen = ofd->ofd_lastid_gen;
 
+               CFS_FAIL_TIMEOUT(OBD_FAIL_OST_DELORPHAN_DELAY, cfs_fail_val);
                /* destroy orphans */
                if (lustre_msg_get_conn_cnt(tgt_ses_req(tsi)->rq_reqmsg) <
                    exp->exp_conn_cnt) {
                        CERROR("%s: dropping old orphan cleanup request\n",
                               ofd_name(ofd));
-                       GOTO(out_nolock, rc = 0);
+                       GOTO(out_nolock, rc = -ESTALE);
                }
                /* This causes inflight precreates to abort and drop lock */
                oseq->os_destroys_in_progress = 1;
@@ -1569,7 +1570,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                    exp->exp_conn_cnt) {
                        CERROR("%s: dropping old precreate request\n",
                               ofd_name(ofd));
-                       GOTO(out, rc = 0);
+                       GOTO(out, rc = -ESTALE);
                }
                /* only precreate if seq is 0, IDIF or normal and also o_id
                 * must be specfied */
index bba1987..2a5f9a1 100755 (executable)
@@ -3166,6 +3166,51 @@ test_144b() {
 }
 run_test 144b "orphan cleanup shouldn't be blocked for no objects+failover situation"
 
+test_144c() {
+       [ "$PARALLEL" == "yes" ] && skip "skip parallel run"
+       remote_mds_nodsh && skip "remote MDS with nodsh"
+       remote_ost_nodsh && skip "remote OST with nodsh"
+        (( OST1_VERSION >= $(version_code 2.15.59.53) )) ||
+               skip "need OSS >= v2_15_59.53 for reconnect fix"
+       local rc
+
+       #increase a precreation window
+       mkdir_on_mdt0 $DIR/$tdir
+       $LFS setstripe -c 1 -i 0 $DIR/$tdir
+       createmany -o $DIR/$tdir/$tfile 9000
+
+       stop mds1
+#define OBD_FAIL_OST_DELORPHAN_DELAY     0x254
+       #delay delorphan request to reconnection 5seconds
+       do_facet ost1 $LCTL set_param fail_loc=0x0000254 fail_val=5
+
+       start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "mds1 start fail"
+
+       wait_recovery_complete mds1 || error "MDS recovery not done"
+
+       sleep 1
+       #reconnect
+       do_facet mds1 $LCTL --device $FSNAME-OST0000-osc-MDT0000 recover
+
+       do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0
+
+       #first and second orphan request delayed for 5seconds
+       sleep 12
+
+       local testid=${TESTNAME//_/ }
+
+       do_facet ost1 "dmesg | tac | sed '/$testid/,$ d'" |
+               grep "trust the OST"
+       rc=$?
+       if (( rc == 0 )); then
+               remount_facet mds1
+               error "LAST_ID synchronization failed"
+       else
+               return 0
+       fi
+}
+run_test 144c "reconnection during orphan cleanup shouldn't lose LAST_ID synchronization"
+
 test_145() {
        [ $MDSCOUNT -lt 3 ] && skip "needs >= 3 MDTs"
        [ $(facet_active_host mds2) = $(facet_active_host mds3) ] &&