Whamcloud - gitweb
LU-8333 test: Add replay-dual 21b to ALWAYS_EXCEPT
[fs/lustre-release.git] / lustre / tests / replay-dual.sh
index fa775ab..0a25100 100755 (executable)
@@ -2,8 +2,8 @@
 
 set -e
 
-# bug number:  LU-2012 10124
-ALWAYS_EXCEPT="14b     15c   $REPLAY_DUAL_EXCEPT"
+# bug number:  LU-2012 10124 LU-7372 LU-8333
+ALWAYS_EXCEPT="14b     15c   26      21b     $REPLAY_DUAL_EXCEPT"
 
 SAVE_PWD=$PWD
 PTLDEBUG=${PTLDEBUG:--1}
@@ -20,6 +20,7 @@ init_logging
 
 remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0
 
+#                                   7  (min)"
 [ "$SLOW" = "no" ] && EXCEPT_SLOW="21b"
 
 [[ $(facet_fstype $SINGLEMDS) == zfs ]] &&
@@ -440,7 +441,7 @@ test_18() { # bug 3822 - evicting client with enqueued lock
        touch $MOUNT1/$tdir/${tfile}0 || error "touch file failed"
        statmany -s $MOUNT1/$tdir/$tfile 1 500 &
        OPENPID=$!
-       NOW=$(date +%s)
+       NOW=$SECONDS
        #define OBD_FAIL_LDLM_ENQUEUE_BLOCKED    0x30b
        do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b  # hold enqueue
        sleep 1
@@ -577,15 +578,15 @@ test_21b() {
 
        test_21b_sub $facet || error "Not all renames are replayed. COS=$COS"
 
-       # COS disabled (should fail)
-       COS=0
-       do_facet $facet lctl set_param mdt.*.commit_on_sharing=$COS
-
        # there is still a window when transactions may be written to disk
        # before the mds device is set R/O. To avoid such a rare test failure,
        # the check is repeated several times.
+       COS=0
        local n_attempts=1
        while true; do
+               # COS disabled (should fail)
+               do_facet $facet lctl set_param mdt.*.commit_on_sharing=$COS
+
                test_21b_sub $facet || break
                n_attempts=$((n_attempts + 1))
                [ $n_attempts -gt 3 ] &&
@@ -920,8 +921,110 @@ test_25() {
 }
 run_test 25 "replay|resend"
 
+cleanup_26() {
+       trap 0
+       kill -9 $tar_26_pid
+       kill -9 $dbench_26_pid
+       killall -9 dbench
+}
+
+test_26() {
+       local clients=${CLIENTS:-$HOSTNAME}
+
+       zconf_mount_clients $clients $MOUNT
+
+       local duration=600
+       [ "$SLOW" = "no" ] && duration=200
+       # set duration to 900 because it takes some time to boot node
+       [ "$FAILURE_MODE" = HARD ] && duration=900
+
+       local start_ts=$SECONDS
+       local rc=0
+
+       trap cleanup_26 EXIT
+       (
+               local tar_dir=$DIR/$tdir/run_tar
+               while true; do
+                       test_mkdir -p -c$MDSCOUNT $tar_dir || break
+                       if [ $MDSCOUNT -ge 2 ]; then
+                               $LFS setdirstripe -D -c$MDSCOUNT $tar_dir ||
+                                       error "set default dirstripe failed"
+                       fi
+                       cd $tar_dir || break
+                       tar cf - /etc | tar xf - || error "tar failed"
+                       cd $DIR/$tdir || break
+                       rm -rf $tar_dir || break
+               done
+       )&
+       tar_26_pid=$!
+       echo "Started tar $tar_26_pid"
+
+       (
+               local dbench_dir=$DIR2/$tdir/run_dbench
+               while true; do
+                       test_mkdir -p -c$MDSCOUNT $dbench_dir || break
+                       if [ $MDSCOUNT -ge 2 ]; then
+                               $LFS setdirstripe -D -c$MDSCOUNT $dbench_dir ||
+                                       error "set default dirstripe failed"
+                       fi
+                       cd $dbench_dir || break
+                       rundbench 1 -D $dbench_dir -t 100 &>/dev/null || break
+                       cd $DIR/$tdir || break
+                       rm -rf $dbench_dir || break
+               done
+       )&
+       dbench_26_pid=$!
+       echo "Started dbench $dbench_26_pid"
+
+       local num_failovers=0
+       local fail_index=1
+       while [ $((SECONDS - start_ts)) -lt $duration ]; do
+               kill -0 $tar_26_pid || error "tar $tar_26_pid missing"
+               kill -0 $dbench_26_pid || error "dbench $dbench_26_pid missing"
+               sleep 2
+               replay_barrier mds$fail_index
+               sleep 2 # give clients a time to do operations
+               # Increment the number of failovers
+               num_failovers=$((num_failovers + 1))
+               log "$TESTNAME fail mds$fail_index $num_failovers times"
+               fail mds$fail_index
+               if [ $fail_index -ge $MDSCOUNT ]; then
+                       fail_index=1
+               else
+                       fail_index=$((fail_index + 1))
+               fi
+       done
+       # stop the client loads
+       kill -0 $tar_26_pid || error "tar $tar_26_pid stopped"
+       kill -0 $dbench_26_pid || error "dbench $dbench_26_pid stopped"
+       cleanup_26 || true
+}
+run_test 26 "dbench and tar with mds failover"
+
+test_28() {
+       $SETSTRIPE -i 0 -c 1 $DIR2/$tfile
+       dd if=/dev/zero of=$DIR2/$tfile bs=4096 count=1
+
+       #define OBD_FAIL_LDLM_SRV_BL_AST         0x324
+       do_facet ost1 $LCTL set_param fail_loc=0x80000324
+
+       dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 &
+       local pid=$!
+       sleep 2
+
+       #define OBD_FAIL_LDLM_GRANT_CHECK        0x32a
+       do_facet ost1 $LCTL set_param fail_loc=0x32a
+
+       fail ost1
+
+       sleep 2
+       cancel_lru_locks OST0000-osc
+       wait $pid || error "dd failed"
+}
+run_test 28 "lock replay should be ordered: waiting after granted"
+
 complete $SECONDS
-SLEEP=$((`date +%s` - $NOW))
+SLEEP=$((SECONDS - $NOW))
 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
 [ "$MOUNTED2" = yes ] && zconf_umount $HOSTNAME $MOUNT2 || true
 check_and_cleanup_lustre