From f54759cd90d282862a30d7a3a9680d786b979dca Mon Sep 17 00:00:00 2001 From: Lei Feng Date: Wed, 26 Jul 2023 17:08:21 +0800 Subject: [PATCH] LU-16984 tests: replay-dual/31 checks file from DIR2 Move replay-dual/test_31 to correct place. In replay-dual/test_31, check file existence from DIR2. Add more messages for diagnosis. Lustre-change: https://review.whamcloud.com/51762 Lustre-commit: TBD (from 025aa6e78ca6d3f4fc872b2b616034ec5b28952a) Fixes: 07764c4eeb ("LU-16953 tests: wait longer in replay-dual/test_31") Signed-off-by: Lei Feng Test-Parameters: trivial testlist=replay-dual env=ONLY=31,ONLY_REPEAT=100 Change-Id: Iee679ee94ac2cb51baad1651bfaddf452fafdbd1 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51764 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/tests/replay-dual.sh | 139 +++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 65 deletions(-) diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 2695b1b..f239319 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -1126,6 +1126,80 @@ test_29() { } run_test 29 "replay vs update with the same xid" +test_31() { + mkdir_on_mdt0 $DIR1/$tdir + $LFS setstripe -c 1 -i 0 $DIR1/$tdir + for (( i=0; i < 10; i++ )) ; do + mkdir -p $DIR1/$tdir/d.${i} + done + mkdir $DIR1/$tdir/mdtdir + $LFS setstripe -E 1M -L mdt $DIR1/$tdir/mdtdir + + # failover has to take longer than blocking timeout involved + # by second multiop below which is set to obd_timeout/2 by + # disabling AT + local timeout=$(do_facet mds1 $LCTL get_param -n timeout) + + timeout=$((timeout / 2 + 5)) + fail ost1 $timeout & + local failpid=$! + + sleep 1 + + # consume preallocated objects, precreate thread will be awakened + consume_precreations $DIR1/$tdir mds1 0 1 + + # disable AT so that blocking timeout gets set to obd_timeout/2 + local amm=$(at_max_get mds1) + + at_max_set 0 mds1 + stack_trap "at_max_set $amm mds1" + + declare -a multiops + + #define OBD_FAIL_LLITE_XATTR_PAUSE 0x1420 + $LCTL set_param fail_loc=0x80001420 + $MULTIOP $DIR1/$tdir/mdtdir/$tfile Osw4096c & + multiops+=($!) + + for (( i=0; i<10; i++ )); do + if [ -w $DIR2/$tdir/mdtdir/$tfile ]; then + echo "file $DIR2/$tdir/mdtdir/$tfile is ready" + break + else + echo "file $DIR2/$tdir/mdtdir/$tfile is not ready, wait 0.5 second..." + sleep 0.5 + fi + done + + $MULTIOP $DIR2/$tdir/mdtdir/$tfile oO_WRONLY:w4096c & + multiops+=($!) + + sleep 0.5 + local mmrif=$($LCTL get_param -n \ + mdc.$FSNAME-MDT0000-mdc-*.max_mod_rpcs_in_flight | tail -1) + # these are blocked by precreation until ost failover is in progress + for (( i=0; i < $mmrif; i++ )) ; do + $MULTIOP $DIR1/$tdir/d.${i}/parallel Oc & + multiops+=($!) + done + wait $failpid + local failed=0 + + echo "pids: ${multiops[@]}" + for pid in ${multiops[@]}; do + local rc=0 + + wait $pid || rc=$? + if (( $rc != 0 )); then + echo "wait $pid failed, rc = $rc" + ((failed++)) + fi + done + ((failed == 0)) || error "$failed multiops failed" +} +run_test 31 "deadlock on file_remove_privs and occupied mod rpc slots" + test_32() { (( $MDSCOUNT < 2 )) && skip_env "needs >= 2 MDTs" @@ -1207,71 +1281,6 @@ test_33() { # LU-15935 } run_test 33 "Check for OBD_INCOMPAT_MULTI_RPCS in last_rcvd after abort_recovery" -test_31() { - mkdir_on_mdt0 $DIR1/$tdir - $LFS setstripe -c 1 -i 0 $DIR1/$tdir - for (( i=0; i < 10; i++ )) ; do - mkdir -p $DIR1/$tdir/d.${i} - done - mkdir $DIR1/$tdir/mdtdir - $LFS setstripe -E 1M -L mdt $DIR1/$tdir/mdtdir - - # failover has to take longer than blocking timeout involved - # by second multiop below which is set to obd_timeout/2 by - # disabling AT - local timeout=$(do_facet mds1 $LCTL get_param -n timeout) - - timeout=$((timeout / 2 + 5)) - fail ost1 $timeout & - local failpid=$! - - sleep 1 - - # consume preallocated objects, precreate thread will be awakened - consume_precreations $DIR1/$tdir mds1 0 1 - - # disable AT so that blocking timeout gets set to obd_timeout/2 - local amm=$(at_max_get mds1) - - at_max_set 0 mds1 - stack_trap "at_max_set $amm mds1" - - declare -a multiops - - #define OBD_FAIL_LLITE_XATTR_PAUSE 0x1420 - $LCTL set_param fail_loc=0x80001420 - $MULTIOP $DIR1/$tdir/mdtdir/$tfile Osw4096c & - multiops+=($!) - while [ ! -f $DIR1/$tdir/mdtdir/$tfile ]; do - sleep 0.5 - done - $MULTIOP $DIR2/$tdir/mdtdir/$tfile oO_WRONLY:w4096c & - multiops+=($!) - sleep 0.5 - local mmrif=$($LCTL get_param -n \ - mdc.$FSNAME-MDT0000-mdc-*.max_mod_rpcs_in_flight | tail -1) - # these are blocked by precreation until ost failover is in progress - for (( i=0; i < $mmrif; i++ )) ; do - $MULTIOP $DIR1/$tdir/d.${i}/parallel Oc & - multiops+=($!) - done - wait $failpid - local failed=0 - - echo "pids: ${multiops[@]}" - for pid in ${multiops[@]}; do - local rc=0 - - wait $pid || rc=$? - if (( $rc != 0 )); then - echo "wait $pid failed, rc = $rc" - ((failed++)) - fi - done - ((failed == 0)) || error "$failed multiops failed" -} -run_test 31 "deadlock on file_remove_privs and occupied mod rpc slots" - complete_test $SECONDS SLEEP=$((SECONDS - $NOW)) [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP -- 1.8.3.1