+test_25() {
+ cancel_lru_locks osc
+
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+
+ # get lock for the 1st client
+ dd if=/dev/zero of=$DIR/$tfile count=1 >/dev/null ||
+ error "failed to write data"
+
+ # get waiting locks for the 2nd client
+ drop_ldlm_cancel "multiop $DIR2/$tfile Ow512" &
+ sleep 1
+
+ # failover, replay and resend replayed waiting locks
+ if [ $(lustre_version_code ost1) -ge $(version_code 2.6.90) ]; then
+ #define OBD_FAIL_LDLM_SRV_CP_AST 0x325
+ do_facet ost1 lctl set_param fail_loc=0x80000325
+ else
+ #define OBD_FAIL_OST_LDLM_REPLY_NET 0x213
+ do_facet ost1 lctl set_param fail_loc=0x80000213
+ fi
+
+ fail ost1
+
+ # multiop does not finish because CP AST is skipped;
+ # it is ok to kill it in the test, because CP AST is already re-sent
+ # and it does not hung forever in real life
+ killall multiop
+ wait
+}
+run_test 25 "replay|resend"
+
+cleanup_26() {
+ trap 0
+ kill -9 $tar_26_pid
+ kill -9 $dbench_26_pid
+ killall -9 dbench
+}
+
+test_26() {
+ local clients=${CLIENTS:-$HOSTNAME}
+
+ zconf_mount_clients $clients $MOUNT
+
+ local duration=600
+ [ "$SLOW" = "no" ] && duration=200
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
+
+ local start_ts=$SECONDS
+ local rc=0
+
+ trap cleanup_26 EXIT
+ (
+ local tar_dir=$DIR/$tdir/run_tar
+ while true; do
+ test_mkdir -p -c$MDSCOUNT $tar_dir || break
+ if [ $MDSCOUNT -ge 2 ]; then
+ $LFS setdirstripe -D -c$MDSCOUNT $tar_dir ||
+ error "set default dirstripe failed"
+ fi
+ cd $tar_dir || break
+ tar cf - /etc | tar xf - || error "tar failed"
+ cd $DIR/$tdir || break
+ rm -rf $tar_dir || break
+ done
+ )&
+ tar_26_pid=$!
+ echo "Started tar $tar_26_pid"
+
+ (
+ local dbench_dir=$DIR2/$tdir/run_dbench
+ while true; do
+ test_mkdir -p -c$MDSCOUNT $dbench_dir || break
+ if [ $MDSCOUNT -ge 2 ]; then
+ $LFS setdirstripe -D -c$MDSCOUNT $dbench_dir ||
+ error "set default dirstripe failed"
+ fi
+ cd $dbench_dir || break
+ rundbench 1 -D $dbench_dir -t 100 > /dev/null 2&>1 ||
+ break
+ cd $DIR/$tdir || break
+ rm -rf $dbench_dir || break
+ done
+ )&
+ dbench_26_pid=$!
+ echo "Started dbench $dbench_26_pid"
+
+ local num_failovers=0
+ local fail_index=1
+ while [ $((SECONDS - start_ts)) -lt $duration ]; do
+ kill -0 $tar_26_pid || error "tar $tar_26_pid missing"
+ kill -0 $dbench_26_pid || error "dbench $dbench_26_pid missing"
+ sleep 2
+ replay_barrier mds$fail_index
+ sleep 2 # give clients a time to do operations
+ # Increment the number of failovers
+ num_failovers=$((num_failovers + 1))
+ log "$TESTNAME fail mds$fail_index $num_failovers times"
+ fail mds$fail_index
+ if [ $fail_index -ge $MDSCOUNT ]; then
+ fail_index=1
+ else
+ fail_index=$((fail_index + 1))
+ fi
+ done
+ # stop the client loads
+ kill -0 $tar_26_pid || error "tar $tar_26_pid stopped"
+ kill -0 $dbench_26_pid || error "dbench $dbench_26_pid stopped"
+ cleanup_26 || true
+}
+run_test 26 "dbench and tar with mds failover"
+
+test_28() {
+ $SETSTRIPE -i 0 -c 1 $DIR2/$tfile
+ dd if=/dev/zero of=$DIR2/$tfile bs=4096 count=1
+
+ #define OBD_FAIL_LDLM_SRV_BL_AST 0x324
+ do_facet ost1 $LCTL set_param fail_loc=0x80000324
+
+ dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 &
+ local pid=$!
+ sleep 2
+
+ #define OBD_FAIL_LDLM_GRANT_CHECK 0x32a
+ do_facet ost1 $LCTL set_param fail_loc=0x32a
+
+ fail ost1
+
+ sleep 2
+ cancel_lru_locks OST0000-osc
+ wait $pid || error "dd failed"
+}
+run_test 28 "lock replay should be ordered: waiting after granted"
+