Whamcloud - gitweb
LU-15816 tests: use correct ost host to manage failure 48/49248/2
authorMr NeilBrown <neilb@suse.de>
Fri, 25 Nov 2022 05:13:20 +0000 (16:13 +1100)
committerOleg Drokin <green@whamcloud.com>
Tue, 13 Dec 2022 16:08:47 +0000 (16:08 +0000)
sanity test_398m sets up striping across 2 OSTs.  It ensures that
failing IO to either OST individually will fail the total IO.

However it sends the command to fail IO for the second OST (OST1) to
the host managing the first OST (ost1).  If the first 2 OSTs are on
the same host, this works.  If not, it fails.

Also there error messages when testing the second stripe say "first
stripe".

Test-Parameters: trivial env=ONLY=398m
Signed-off-by: Mr NeilBrown <neilb@suse.de>
Change-Id: Ic7085dab2610fa2c044a966fd8de40def0438ca4
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49248
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/tests/sanity.sh

index 4487174..ae70409 100755 (executable)
@@ -25315,6 +25315,7 @@ test_398m() { #  LU-13798
        # Set up failure on OST0, the first stripe:
        #define OBD_FAIL_OST_BRW_WRITE_BULK     0x20e
        #NB: Fail val is ost # + 1, because we cannot use cfs_fail_val = 0
+       # OST0 is on ost1, OST1 is on ost2.
        # So this fail_val specifies OST0
        do_facet ost1 $LCTL set_param fail_loc=0x20e fail_val=1
        stack_trap "do_facet ost1 $LCTL set_param fail_loc=0"
@@ -25340,13 +25341,13 @@ test_398m() { #  LU-13798
        # Clear file contents, maintain striping
        echo > $DIR/$tfile
        # Set up failure on OST1, second stripe:
-       do_facet ost1 $LCTL set_param fail_loc=0x20e fail_val=2
-       stack_trap "do_facet ost1 $LCTL set_param fail_loc=0"
+       do_facet ost2 $LCTL set_param fail_loc=0x20e fail_val=2
+       stack_trap "do_facet ost2 $LCTL set_param fail_loc=0"
 
        dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct &&
-               error "parallel dio write with failure on first stripe succeeded"
+               error "parallel dio write with failure on second stripe succeeded"
        stack_trap "rm -f $DIR/$tfile"
-       do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0
+       do_facet ost2 $LCTL set_param fail_loc=0 fail_val=0
 
        # Place data in file for read
        dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct ||
@@ -25356,7 +25357,7 @@ test_398m() { #  LU-13798
        #define OBD_FAIL_OST_BRW_READ_BULK       0x20f
        do_facet ost2 $LCTL set_param fail_loc=0x20f fail_val=2
        dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 iflag=direct &&
-               error "parallel dio read with error on first stripe succeeded"
+               error "parallel dio read with error on second stripe succeeded"
        rm -f $DIR/$tfile.2
        do_facet ost2 $LCTL set_param fail_loc=0 fail_val=0
 }