From 0ce535cc4b058d1066fa4f4b4339486200ca2b67 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Thu, 10 May 2012 16:56:46 -0600 Subject: [PATCH] LU-482 tests: abort replay-dual if test_0a fails Due to repeated and annoying LU-482 failures of replay-dual.sh test_0a, abort the whole test script if test_0a fails. While this is not ideal, it is better than disabling replay-dual.sh entirely. Until bug TT-554 is fixed to display "SKIP" results from a test in Maloo, we should still be able to detect the LU-482 failures because replay-dual test will complete in a few seconds instead of thousands. Signed-off-by: Andreas Dilger Change-Id: I4bc6b62e0028b908f0fc8afaab0030b0fcdf500c Reviewed-on: http://review.whamcloud.com/2731 Tested-by: Hudson Reviewed-by: Yu Jian Reviewed-by: Li Wei Reviewed-by: Oleg Drokin Tested-by: Maloo --- lustre/tests/replay-dual.sh | 43 ++++++++++++++++++++++++++---------------- lustre/tests/test-framework.sh | 21 ++++++++++++++++----- 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 08a9662..2ac97fc 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -1,5 +1,4 @@ #!/bin/bash - set -e # bug number: 10124 @@ -37,28 +36,40 @@ rm -rf $DIR/[df][0-9]* # LU-482 Avert LVM and VM inability to flush caches in pre .33 kernels if [ $LINUX_VERSION_CODE -lt $(version_code 2.6.33) ]; then - sync - do_facet $SINGLEMDS sync + sync + do_facet $SINGLEMDS "sync; sleep 10; sync; sleep 10; sync" fi +LU482_FAILED=$(mktemp -u $TMP/$TESTSUITE.lu482.XXXXXX) test_0a() { - touch $MOUNT2/$tfile-A # force sync FLD/SEQ update before barrier - replay_barrier $SINGLEMDS + echo "Check file is LU482_FAILED=$LU482_FAILED" + touch $MOUNT2/$tfile-A # force sync FLD/SEQ update before barrier + replay_barrier $SINGLEMDS #define OBD_FAIL_PTLRPC_FINISH_REPLAY | OBD_FAIL_ONCE - touch $MOUNT2/$tfile - createmany -o $MOUNT1/$tfile- 50 - $LCTL set_param fail_loc=0x80000514 - facet_failover $SINGLEMDS - client_up || return 1 - umount -f $MOUNT2 - client_up || return 1 - zconf_mount `hostname` $MOUNT2 || error "mount2 fais" - unlinkmany $MOUNT1/$tfile- 50 || return 2 - rm $MOUNT2/$tfile || return 3 - rm $MOUNT2/$tfile-A || return 4 + touch $MOUNT2/$tfile + createmany -o $MOUNT1/$tfile- 50 + $LCTL set_param fail_loc=0x80000514 + facet_failover $SINGLEMDS + [ -f "$LU482_FAILED" ] && skip "LU-482 failure" && return 0 + client_up || return 1 + umount -f $MOUNT2 + client_up || return 1 + zconf_mount `hostname` $MOUNT2 || error "mount2 fais" + unlinkmany $MOUNT1/$tfile- 50 || return 2 + rm $MOUNT2/$tfile || return 3 + rm $MOUNT2/$tfile-A || return 4 } run_test 0a "expired recovery with lost client" +if [ -f "$LU482_FAILED" ]; then + log "Found check file $LU482_FAILED, aborting test script" + rm -vf "$LU482_FAILED" + complete $(basename $0) $SECONDS + [ "$MOUNTED2" = yes ] && zconf_umount $HOSTNAME $MOUNT2 || true + check_and_cleanup_lustre + exit_status +fi + test_0b() { replay_barrier $SINGLEMDS touch $MOUNT2/$tfile diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index a7fdf95..9f90481 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -649,12 +649,21 @@ set_default_debug_facet () { # Facet functions mount_facets () { - local facets=${1:-$(get_facets)} - local facet + local facets=${1:-$(get_facets)} + local facet - for facet in ${facets//,/ }; do - mount_facet $facet || error "Restart of $facet failed!" - done + for facet in ${facets//,/ }; do + mount_facet $facet + local RC=$? + [ $RC -eq 0 ] && continue + + if [ "$TESTSUITE.$TESTNAME" = "replay-dual.test_0a" ]; then + skip "Restart of $facet failed!." && touch $LU482_FAILED + else + error "Restart of $facet failed!" + fi + return $RC + done } mount_facet() { @@ -667,6 +676,8 @@ mount_facet() { echo "Starting ${facet}: ${!opt} $@ ${!dev} $mntpt" do_facet ${facet} "mkdir -p $mntpt; mount -t lustre ${!opt} $@ ${!dev} $mntpt" RC=${PIPESTATUS[0]} + # to allow testing LU-482 error handling in mount_facets() and test_0a() + [ -f $TMP/test-lu482-trigger ] && RC=2 if [ $RC -ne 0 ]; then echo "mount -t lustre $@ ${!dev} $mntpt" echo "Start of ${!dev} on ${facet} failed ${RC}" -- 1.8.3.1