X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Freplay-dual.sh;h=1af2e4d99460981025dd7dc25bca7fe40004d755;hp=9131e9d2f0001f7a06b0e931f51f205c8686c2cf;hb=879e8d045057941ae0a5117d096f53975ef12ad0;hpb=961cea154f7f4dd4d780f86a71d99d6a12ceb80b diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 9131e9d..1af2e4d 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -5,6 +5,9 @@ set -e # bug number: 10124 ALWAYS_EXCEPT="15c $REPLAY_DUAL_EXCEPT" +LFS=${LFS:-lfs} +SETSTRIPE=${SETSTRIPE:-"$LFS setstripe"} +GETSTRIPE=${GETSTRIPE:-"$LFS getstripe"} SAVE_PWD=$PWD PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} @@ -13,13 +16,6 @@ CLEANUP=${CLEANUP:-""} MOUNT_2=${MOUNT_2:-"yes"} . $LUSTRE/tests/test-framework.sh -if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then - CONFIG_EXCEPTIONS="17" - echo -n "Several ost services on one ost node are used with FAILURE_MODE=$FAILURE_MODE. " - echo "Except the tests: $CONFIG_EXCEPTIONS" - ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS" -fi - init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging @@ -42,6 +38,44 @@ rm -rf $DIR/[df][0-9]* [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE +# LU-482 Avert LVM and VM inability to flush caches in pre .33 kernels +if [ $LINUX_VERSION_CODE -lt $(kernel_version 2 6 33) ]; then + sync; sleep 5; sync; sleep 5; sync; sleep 5 +fi + +test_0a() { + touch $MOUNT2/$tfile-A # force sync FLD/SEQ update before barrier + replay_barrier $SINGLEMDS +#define OBD_FAIL_PTLRPC_FINISH_REPLAY | OBD_FAIL_ONCE + touch $MOUNT2/$tfile + createmany -o $MOUNT1/$tfile- 50 + $LCTL set_param fail_loc=0x80000514 + facet_failover $SINGLEMDS + client_up || return 1 + umount -f $MOUNT2 + client_up || return 1 + zconf_mount `hostname` $MOUNT2 || error "mount2 fais" + unlinkmany $MOUNT1/$tfile- 50 || return 2 + rm $MOUNT2/$tfile || return 3 + rm $MOUNT2/$tfile-A || return 4 +} +run_test 0a "expired recovery with lost client" + +test_0b() { + replay_barrier $SINGLEMDS + touch $MOUNT2/$tfile + touch $MOUNT1/$tfile-2 + umount $MOUNT2 + facet_failover $SINGLEMDS + umount -f $MOUNT1 + zconf_mount `hostname` $MOUNT1 || error "mount1 fais" + zconf_mount `hostname` $MOUNT2 || error "mount2 fais" + checkstat $MOUNT1/$tfile-2 && return 1 + checkstat $MOUNT2/$tfile && return 2 + return 0 +} +run_test 0b "lost client during waiting for next transno" + test_1() { touch $MOUNT1/a replay_barrier $SINGLEMDS @@ -250,41 +284,29 @@ test_13() { } run_test 13 "close resend timeout" -test_14a() { - replay_barrier $SINGLEMDS - createmany -o $MOUNT1/$tfile- 25 - createmany -o $MOUNT2/$tfile-2- 1 - createmany -o $MOUNT1/$tfile-3- 25 - umount $MOUNT2 - - facet_failover $SINGLEMDS - # expect failover to fail due to missing client 2 - client_evicted || return 1 - sleep 1 - - # first 25 files should have been replayed - unlinkmany $MOUNT1/$tfile- 25 || return 2 - - zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" - return 0 -} -run_test 14a "timeouts waiting for lost client during replay" +# test 14a removed after 18143 because it shouldn't fail anymore and do the same +# as test_15a test_14b() { + wait_mds_ost_sync + wait_destroy_complete BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'` mkdir -p $MOUNT1/$tdir + $SETSTRIPE -o 0 $MOUNT1/$tdir replay_barrier $SINGLEMDS - createmany -o $MOUNT1/$tfile- 5 - echo "data" > $MOUNT2/$tdir/$tfile-2 - createmany -o $MOUNT1/$tfile-3- 5 + createmany -o $MOUNT1/$tdir/$tfile- 5 + + $SETSTRIPE -o 0 $MOUNT2/f14b-3 + echo "data" > $MOUNT2/f14b-3 + createmany -o $MOUNT1/$tdir/$tfile-3- 5 umount $MOUNT2 fail $SINGLEMDS wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" # first 25 files should have been replayed - unlinkmany $MOUNT1/$tfile- 5 || return 2 - unlinkmany $MOUNT1/$tfile-3- 5 || return 3 + unlinkmany $MOUNT1/$tdir/$tfile- 5 || return 2 + unlinkmany $MOUNT1/$tdir/$tfile-3- 5 || return 3 zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" @@ -532,9 +554,9 @@ run_test 21b "commit on sharing, two clients" # end commit on sharing tests -equals_msg `basename $0`: test complete, cleaning up +complete $(basename $0) $SECONDS SLEEP=$((`date +%s` - $NOW)) [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP [ "$MOUNTED2" = yes ] && zconf_umount $HOSTNAME $MOUNT2 || true check_and_cleanup_lustre -[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true +exit_status