Whamcloud - gitweb
LU-482 tests: abort replay-dual if test_0a fails
authorAndreas Dilger <adilger@whamcloud.com>
Thu, 10 May 2012 22:56:46 +0000 (16:56 -0600)
committerOleg Drokin <green@whamcloud.com>
Sat, 19 May 2012 03:05:08 +0000 (23:05 -0400)
Due to repeated and annoying LU-482 failures of replay-dual.sh
test_0a, abort the whole test script if test_0a fails.  While this
is not ideal, it is better than disabling replay-dual.sh entirely.

Until bug TT-554 is fixed to display "SKIP" results from a test in
Maloo, we should still be able to detect the LU-482 failures because
replay-dual test will complete in a few seconds instead of thousands.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I4bc6b62e0028b908f0fc8afaab0030b0fcdf500c
Reviewed-on: http://review.whamcloud.com/2731
Tested-by: Hudson
Reviewed-by: Yu Jian <yujian@whamcloud.com>
Reviewed-by: Li Wei <liwei@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
lustre/tests/replay-dual.sh
lustre/tests/test-framework.sh

index 08a9662..2ac97fc 100755 (executable)
@@ -1,5 +1,4 @@
 #!/bin/bash
-
 set -e
 
 # bug number:  10124
@@ -37,28 +36,40 @@ rm -rf $DIR/[df][0-9]*
 
 # LU-482 Avert LVM and VM inability to flush caches in pre .33 kernels
 if [ $LINUX_VERSION_CODE -lt $(version_code 2.6.33) ]; then
-    sync
-    do_facet $SINGLEMDS sync
+       sync
+       do_facet $SINGLEMDS "sync; sleep 10; sync; sleep 10; sync"
 fi
 
+LU482_FAILED=$(mktemp -u $TMP/$TESTSUITE.lu482.XXXXXX)
 test_0a() {
-    touch $MOUNT2/$tfile-A # force sync FLD/SEQ update before barrier
-    replay_barrier $SINGLEMDS
+       echo "Check file is LU482_FAILED=$LU482_FAILED"
+       touch $MOUNT2/$tfile-A # force sync FLD/SEQ update before barrier
+       replay_barrier $SINGLEMDS
 #define OBD_FAIL_PTLRPC_FINISH_REPLAY | OBD_FAIL_ONCE
-    touch $MOUNT2/$tfile
-    createmany -o $MOUNT1/$tfile- 50
-    $LCTL set_param fail_loc=0x80000514
-    facet_failover $SINGLEMDS
-    client_up || return 1
-    umount -f $MOUNT2
-    client_up || return 1
-    zconf_mount `hostname` $MOUNT2 || error "mount2 fais"
-    unlinkmany $MOUNT1/$tfile- 50 || return 2
-    rm $MOUNT2/$tfile || return 3
-    rm $MOUNT2/$tfile-A || return 4
+       touch $MOUNT2/$tfile
+       createmany -o $MOUNT1/$tfile- 50
+       $LCTL set_param fail_loc=0x80000514
+       facet_failover $SINGLEMDS
+       [ -f "$LU482_FAILED" ] && skip "LU-482 failure" && return 0
+       client_up || return 1
+       umount -f $MOUNT2
+       client_up || return 1
+       zconf_mount `hostname` $MOUNT2 || error "mount2 fais"
+       unlinkmany $MOUNT1/$tfile- 50 || return 2
+       rm $MOUNT2/$tfile || return 3
+       rm $MOUNT2/$tfile-A || return 4
 }
 run_test 0a "expired recovery with lost client"
 
+if [ -f "$LU482_FAILED" ]; then
+       log "Found check file $LU482_FAILED, aborting test script"
+       rm -vf "$LU482_FAILED"
+       complete $(basename $0) $SECONDS
+       [ "$MOUNTED2" = yes ] && zconf_umount $HOSTNAME $MOUNT2 || true
+       check_and_cleanup_lustre
+       exit_status
+fi
+
 test_0b() {
     replay_barrier $SINGLEMDS
     touch $MOUNT2/$tfile
index a7fdf95..9f90481 100644 (file)
@@ -649,12 +649,21 @@ set_default_debug_facet () {
 
 # Facet functions
 mount_facets () {
-    local facets=${1:-$(get_facets)}
-    local facet
+       local facets=${1:-$(get_facets)}
+       local facet
 
-    for facet in ${facets//,/ }; do
-        mount_facet $facet || error "Restart of $facet failed!"
-    done
+       for facet in ${facets//,/ }; do
+               mount_facet $facet
+               local RC=$?
+               [ $RC -eq 0 ] && continue
+
+               if [ "$TESTSUITE.$TESTNAME" = "replay-dual.test_0a" ]; then
+                       skip "Restart of $facet failed!." && touch $LU482_FAILED
+               else
+                       error "Restart of $facet failed!"
+               fi
+               return $RC
+       done
 }
 
 mount_facet() {
@@ -667,6 +676,8 @@ mount_facet() {
     echo "Starting ${facet}: ${!opt} $@ ${!dev} $mntpt"
     do_facet ${facet} "mkdir -p $mntpt; mount -t lustre ${!opt} $@ ${!dev} $mntpt"
     RC=${PIPESTATUS[0]}
+    # to allow testing LU-482 error handling in mount_facets() and test_0a()
+    [ -f $TMP/test-lu482-trigger ] && RC=2
     if [ $RC -ne 0 ]; then
         echo "mount -t lustre $@ ${!dev} $mntpt"
         echo "Start of ${!dev} on ${facet} failed ${RC}"