Whamcloud - gitweb
LU-16943 tests: fix replay-single/135 under hard failure mode 08/51608/4
authorJian Yu <yujian@whamcloud.com>
Fri, 14 Jul 2023 06:04:42 +0000 (14:04 +0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 19 Oct 2023 13:48:30 +0000 (13:48 +0000)
This patch fixes replay-single test_135() to load libcfs module
on the failover partner node to avoid 'fail_val' setting error.
It also fixes the issue that not all of the OSTs are mounted after
failing back ost1.

Lustre-change: https://review.whamcloud.com/51574
Lustre-commit: 74140e5df4c094f7f0e923e1b82c464b18e8a7cc

Test-Parameters: trivial testlist=replay-single
Test-Parameters: trivial fstype=zfs testlist=replay-single

Test-Parameters: trivial env=FAILURE_MODE=HARD \
    clientcount=4 mdtcount=1 mdscount=2 osscount=2 \
    austeroptions=-R failover=true iscsi=1 \
    testlist=replay-single

Change-Id: Id46c722a6db9d832829a739f41f7462b32a6d9d9
Signed-off-by: Jian Yu <yujian@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51608
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Alex Deiter <alex.deiter@gmail.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/tests/replay-single.sh
lustre/tests/test-framework.sh

index c05570a..f631b73 100755 (executable)
@@ -4977,6 +4977,8 @@ test_135() {
 
        #define OBD_FAIL_TGT_REPLAY_RECONNECT     0x32d
        # Make sure lock replay server side never completes and errors out.
+       do_rpc_nodes $(facet_active_host ost1) \
+               load_module ../libcfs/libcfs/libcfs
        do_facet ost1 "$LCTL set_param fail_val=20"
        do_facet ost1 "$LCTL set_param fail_loc=0x32d"
 
@@ -4993,8 +4995,13 @@ test_135() {
        change_active ost1
        wait_for_facet ost1
 
+       do_rpc_nodes $(facet_active_host ost1) \
+               load_module ../libcfs/libcfs/libcfs
        do_facet ost1 "$LCTL set_param fail_loc=0"
        mount_facet ost1
+       unmountoss
+       mountoss
+       clients_up || clients_up || error "$LFS df $MOUNT failed"
        echo blah > $DIR/$tdir/file.test2
 
        rm -rf $DIR/$tdir
index 825b718..2c92993 100755 (executable)
@@ -5033,6 +5033,15 @@ mountmds() {
        done
 }
 
+unmountoss() {
+       local num
+
+       for num in $(seq $OSTCOUNT); do
+               stop ost$num -f
+               rm -f $TMP/ost${num}active
+       done
+}
+
 mountoss() {
        local num
        local devname