X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Freplay-dual.sh;h=f9bbbc0fb7c8af1e146297a57481e929ca4a61f3;hp=44e257a90e937611944c31a22ab65f72ab139363;hb=b72c4e8d0d1e7b34b67cc7dfa0c2d8da72cfe00d;hpb=eb7c28ff977f4e0a280558aa74e23f2a9ab0ea0c diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 44e257a..f9bbbc0 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -1,28 +1,21 @@ #!/bin/bash +# -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*- +# vim:shiftwidth=4:softtabstop=4:tabstop=4: set -e -# bug number: 10124 -ALWAYS_EXCEPT="15c $REPLAY_DUAL_EXCEPT" +# bug number: LU-2012 10124 +ALWAYS_EXCEPT="14b 15c $REPLAY_DUAL_EXCEPT" -LFS=${LFS:-lfs} -SETSTRIPE=${SETSTRIPE:-"$LFS setstripe"} -GETSTRIPE=${GETSTRIPE:-"$LFS getstripe"} SAVE_PWD=$PWD PTLDEBUG=${PTLDEBUG:--1} LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} SETUP=${SETUP:-""} CLEANUP=${CLEANUP:-""} MOUNT_2=${MOUNT_2:-"yes"} +export MULTIOP=${MULTIOP:-multiop} . $LUSTRE/tests/test-framework.sh -if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then - CONFIG_EXCEPTIONS="17" - echo -n "Several ost services on one ost node are used with FAILURE_MODE=$FAILURE_MODE. " - echo "Except the tests: $CONFIG_EXCEPTIONS" - ALWAYS_EXCEPT="$ALWAYS_EXCEPT $CONFIG_EXCEPTIONS" -fi - init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging @@ -35,7 +28,7 @@ build_test_filter check_and_setup_lustre MOUNTED=$(mounted_lustre_filesystems) -if ! $(echo $MOUNTED | grep -w -q $MOUNT2); then +if ! $(echo $MOUNTED' ' | grep -w -q $MOUNT2' '); then zconf_mount $HOSTNAME $MOUNT2 MOUNTED2=yes fi @@ -45,6 +38,66 @@ rm -rf $DIR/[df][0-9]* [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE +# LU-482 Avert LVM and VM inability to flush caches in pre .33 kernels +if [ $LINUX_VERSION_CODE -lt $(version_code 2.6.33) ]; then + sync + do_facet $SINGLEMDS "sync; sleep 10; sync; sleep 10; sync" +fi + +LU482_FAILED=$(mktemp -u $TMP/$TESTSUITE.lu482.XXXXXX) +test_0a() { + echo "Check file is LU482_FAILED=$LU482_FAILED" + touch $MOUNT2/$tfile-A # force sync FLD/SEQ update before barrier + replay_barrier $SINGLEMDS +#define OBD_FAIL_PTLRPC_FINISH_REPLAY | OBD_FAIL_ONCE + touch $MOUNT2/$tfile + createmany -o $MOUNT1/$tfile- 50 + $LCTL set_param fail_loc=0x80000514 + facet_failover $SINGLEMDS + [ -f "$LU482_FAILED" ] && skip "LU-482 failure" && return 0 + client_up || return 1 + umount -f $MOUNT2 + client_up || return 1 + zconf_mount `hostname` $MOUNT2 || error "mount2 fais" + unlinkmany $MOUNT1/$tfile- 50 || return 2 + rm $MOUNT2/$tfile || return 3 + rm $MOUNT2/$tfile-A || return 4 +} +run_test 0a "expired recovery with lost client" + +if [ -f "$LU482_FAILED" ]; then + log "Found check file $LU482_FAILED, aborting test script" + rm -vf "$LU482_FAILED" + complete $SECONDS + do_nodes $CLIENTS umount -f $MOUNT2 || true + do_nodes $CLIENTS umount -f $MOUNT || true + # copied from stopall, but avoid the MDS recovery + for num in `seq $OSTCOUNT`; do + stop ost$num -f + rm -f $TMP/ost${num}active + done + if ! combined_mgs_mds ; then + stop mgs + fi + + exit_status +fi + +test_0b() { + replay_barrier $SINGLEMDS + touch $MOUNT2/$tfile + touch $MOUNT1/$tfile-2 + umount $MOUNT2 + facet_failover $SINGLEMDS + umount -f $MOUNT1 + zconf_mount `hostname` $MOUNT1 || error "mount1 fais" + zconf_mount `hostname` $MOUNT2 || error "mount2 fais" + checkstat $MOUNT1/$tfile-2 && return 1 + checkstat $MOUNT2/$tfile && return 2 + return 0 +} +run_test 0b "lost client during waiting for next transno" + test_1() { touch $MOUNT1/a replay_barrier $SINGLEMDS @@ -214,7 +267,7 @@ test_12() { multiop_bg_pause $DIR/$tfile mo_c || return 1 MULTIPID=$! -#define OBD_FAIL_LDLM_ENQUEUE 0x302 +#define OBD_FAIL_LDLM_ENQUEUE_NET 0x302 do_facet $SINGLEMDS lctl set_param fail_loc=0x80000302 facet_failover $SINGLEMDS do_facet $SINGLEMDS lctl set_param fail_loc=0 @@ -257,36 +310,39 @@ run_test 13 "close resend timeout" # as test_15a test_14b() { - wait_mds_ost_sync - wait_destroy_complete - BEFOREUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'` - mkdir -p $MOUNT1/$tdir - $SETSTRIPE -o 0 $MOUNT1/$tdir - replay_barrier $SINGLEMDS - createmany -o $MOUNT1/$tdir/$tfile- 5 + wait_mds_ost_sync + wait_delete_completed - $SETSTRIPE -o 0 $MOUNT2/f14b-3 - echo "data" > $MOUNT2/f14b-3 - createmany -o $MOUNT1/$tdir/$tfile-3- 5 - umount $MOUNT2 + local BEFOREUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }') - fail $SINGLEMDS - wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" + mkdir -p $MOUNT1/$tdir + $SETSTRIPE -i 0 $MOUNT1/$tdir + replay_barrier $SINGLEMDS + createmany -o $MOUNT1/$tdir/$tfile- 5 - # first 25 files should have been replayed - unlinkmany $MOUNT1/$tdir/$tfile- 5 || return 2 - unlinkmany $MOUNT1/$tdir/$tfile-3- 5 || return 3 + $SETSTRIPE -i 0 $MOUNT2/$tfile-2 + dd if=/dev/zero of=$MOUNT2/$tfile-2 bs=1M count=5 + createmany -o $MOUNT1/$tdir/$tfile-3- 5 + umount $MOUNT2 - zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail" + fail $SINGLEMDS + wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" - wait_mds_ost_sync || return 4 - wait_destroy_complete || return 5 + # first set of files should have been replayed + unlinkmany $MOUNT1/$tdir/$tfile- 5 || error "first unlinks failed" + unlinkmany $MOUNT1/$tdir/$tfile-3- 5 || error "second unlinks failed" - AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'` - log "before $BEFOREUSED, after $AFTERUSED" - [ $AFTERUSED -ne $BEFOREUSED ] && \ - error "after $AFTERUSED > before $BEFOREUSED" && return 4 - return 0 + zconf_mount $HOSTNAME $MOUNT2 || error "mount $MOUNT2 failed" + [ -f $MOUNT2/$tfile-2 ] && error "$MOUNT2/$tfile-2 exists!" + + wait_mds_ost_sync || error "wait_mds_ost_sync failed" + wait_delete_completed || error "wait_delete_complete failed" + + local AFTERUSED=$(df -P $DIR | tail -1 | awk '{ print $3 }') + log "before $BEFOREUSED, after $AFTERUSED" + # leave some margin for some files/dirs to be modified (OI, llog, etc) + [ $AFTERUSED -gt $((BEFOREUSED + 128)) ] && + error "after $AFTERUSED > before $BEFOREUSED" || true } run_test 14b "delete ost orphans if gap occured in objids due to VBR" @@ -373,7 +429,7 @@ test_18() { # bug 3822 - evicting client with enqueued lock NOW=`date +%s` do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b # hold enqueue sleep 1 -#define OBD_FAIL_LDLM_BL_CALLBACK 0x305 +#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305 do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict cancel_lru_locks mdc usleep 500 # wait to ensure first client is one that will be evicted @@ -523,9 +579,9 @@ run_test 21b "commit on sharing, two clients" # end commit on sharing tests -equals_msg `basename $0`: test complete, cleaning up +complete $SECONDS SLEEP=$((`date +%s` - $NOW)) [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP [ "$MOUNTED2" = yes ] && zconf_umount $HOSTNAME $MOUNT2 || true check_and_cleanup_lustre -[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true +exit_status