+random_double_fail_mdt() {
+ local max_index=$1
+ local duration=$2
+ local monitor_pid=$3
+ local elapsed
+ local start_ts=$(date +%s)
+ local num_failovers=0
+ local fail_index
+ local second_index
+
+ elapsed=$(($(date +%s) - start_ts))
+ while [ $elapsed -lt $duration ]; do
+ fail_index=$((RANDOM%max_index + 1))
+ if [ $fail_index -eq $max_index ]; then
+ second_index=1
+ else
+ second_index=$((fail_index + 1))
+ fi
+ kill -0 $monitor_pid ||
+ error "$monitor_pid stopped"
+ sleep 120
+ replay_barrier mds$fail_index
+ replay_barrier mds$second_index
+ sleep 10
+ # Increment the number of failovers
+ num_failovers=$((num_failovers+1))
+ log "fail mds$fail_index mds$second_index $num_failovers times"
+ fail mds${fail_index},mds${second_index}
+ elapsed=$(($(date +%s) - start_ts))
+ done
+}
+
+test_71a () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local clients=${CLIENTS:-$HOSTNAME}
+ local rc=0
+
+ zconf_mount_clients $clients $MOUNT
+
+ local duration=300
+ [ "$SLOW" = "no" ] && duration=180
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
+
+ mkdir -p $DIR/$tdir
+
+ local elapsed
+ local start_ts=$(date +%s)
+
+ trap cleanup_71a EXIT
+ (
+ while true; do
+ $LFS mkdir -i0 -c2 $DIR/$tdir/test
+ rmdir $DIR/$tdir/test
+ done
+ )&
+ mkdir_71a_pid=$!
+ echo "Started $mkdir_71a_pid"
+
+ random_double_fail_mdt 2 $duration $mkdir_71a_pid
+ kill -0 $mkdir_71a_pid || error "mkdir/rmdir $mkdir_71a_pid stopped"
+
+ cleanup_71a
+ true
+}
+run_test 71a "mkdir/rmdir striped dir with 2 mdts recovery"
+
+test_73a() {
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
+
+ replay_barrier $SINGLEMDS
+ #define OBD_FAIL_LDLM_ENQUEUE_NET 0x302
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000302"
+ fail $SINGLEMDS
+ kill -USR1 $pid
+ wait $pid || error "multiop pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
+}
+run_test 73a "open(O_CREAT), unlink, replay, reconnect before open replay, close"
+
+test_73b() {
+ multiop_bg_pause $DIR/$tfile O_tSc ||
+ error "multiop_bg_pause $DIR/$tfile failed"
+ pid=$!
+ rm -f $DIR/$tfile
+
+ replay_barrier $SINGLEMDS
+ #define OBD_FAIL_LDLM_REPLY 0x30c
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000030c"
+ fail $SINGLEMDS
+ kill -USR1 $pid
+ wait $pid || error "multiop pid failed"
+ [ -e $DIR/$tfile ] && error "file $DIR/$tfile should not exist"
+ return 0
+}
+run_test 73b "open(O_CREAT), unlink, replay, reconnect at open_replay reply, close"
+
+# bug 18554
+test_74() {
+ local clients=${CLIENTS:-$HOSTNAME}
+
+ zconf_umount_clients $clients $MOUNT
+ stop ost1
+ facet_failover $SINGLEMDS
+ zconf_mount_clients $clients $MOUNT
+ mount_facet ost1
+ touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+ rm $DIR/$tfile || error "rm $DIR/$tfile failed"
+ clients_up || error "client evicted: $?"
+ return 0
+}
+run_test 74 "Ensure applications don't fail waiting for OST recovery"
+
+remote_dir_check_80() {
+ local MDTIDX=1
+ local diridx
+ local fileidx
+
+ diridx=$($GETSTRIPE -M $remote_dir) ||
+ error "$GETSTRIPE -M $remote_dir failed"
+ [ $diridx -eq $MDTIDX ] || error "$diridx != $MDTIDX"
+
+ createmany -o $remote_dir/f-%d 20 || error "creation failed"
+ fileidx=$($GETSTRIPE -M $remote_dir/f-1) ||
+ error "$GETSTRIPE -M $remote_dir/f-1 failed"
+ [ $fileidx -eq $MDTIDX ] || error "$fileidx != $MDTIDX"
+
+ return 0
+}
+
+test_80a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_OUT_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ fail mds${MDTIDX}
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80a "DNE: create remote dir, drop update rep from MDT0, fail MDT0"
+
+test_80b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80b "DNE: create remote dir, drop update rep from MDT0, fail MDT1"
+
+test_80c() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80c "DNE: create remote dir, drop update rep from MDT1, fail MDT[0,1]"
+
+test_80d() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX},mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80d "DNE: create remote dir, drop update rep from MDT1, fail 2 MDTs"
+
+test_80e() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
+
+ replay_barrier mds1
+ fail mds${MDTIDX}
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80e "DNE: create remote dir, drop MDT1 rep, fail MDT0"
+
+test_80f() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds2
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80f "DNE: create remote dir, drop MDT1 rep, fail MDT1"
+
+test_80g() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80g "DNE: create remote dir, drop MDT1 rep, fail MDT0, then MDT1"
+
+test_80h() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ $LFS mkdir -i $MDTIDX $remote_dir &
+ local CLIENT_PID=$!
+
+ # sleep 3 seconds to make sure MDTs are failed after
+ # lfs mkdir -i has finished on all of MDTs.
+ sleep 3
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX},mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "remote dir creation failed"
+
+ remote_dir_check_80 || error "remote dir check failed"
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 80h "DNE: create remote dir, drop MDT1 rep, fail 2 MDTs"
+
+test_81a() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ touch $remote_dir || error "touch $remote_dir failed"
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds2
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81a "DNE: unlink remote dir, drop MDT0 update rep, fail MDT1"
+
+test_81b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ fail mds${MDTIDX}
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81b "DNE: unlink remote dir, drop MDT0 update reply, fail MDT0"
+
+test_81c() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81c "DNE: unlink remote dir, drop MDT0 update reply, fail MDT0,MDT1"
+
+test_81d() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_OBJ_UPDATE_NET_REP 0x1701
+ do_facet mds${MDTIDX} lctl set_param fail_loc=0x1701
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX},mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81d "DNE: unlink remote dir, drop MDT0 update reply, fail 2 MDTs"
+
+test_81e() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0
+
+ replay_barrier mds1
+ fail mds${MDTIDX}
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81e "DNE: unlink remote dir, drop MDT1 req reply, fail MDT0"
+
+test_81f() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds2
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81f "DNE: unlink remote dir, drop MDT1 req reply, fail MDT1"
+
+test_81g() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ ([ $FAILURE_MODE == "HARD" ] &&
+ [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) &&
+ skip "MDTs needs to be on diff hosts for HARD fail mode" &&
+ return 0
+
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX}
+ fail mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81g "DNE: unlink remote dir, drop req reply, fail M0, then M1"
+
+test_81h() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local MDTIDX=1
+ local remote_dir=$DIR/$tdir/remote_dir
+
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+ $LFS mkdir -i $MDTIDX $remote_dir || error "lfs mkdir failed"
+
+ # OBD_FAIL_MDS_REINT_NET_REP 0x119
+ do_facet mds$((MDTIDX + 1)) lctl set_param fail_loc=0x119
+ rmdir $remote_dir &
+ local CLIENT_PID=$!
+
+ replay_barrier mds1
+ replay_barrier mds2
+ fail mds${MDTIDX},mds$((MDTIDX + 1))
+
+ wait $CLIENT_PID || error "rm remote dir failed"
+
+ stat $remote_dir 2&>/dev/null && error "$remote_dir still exist!"
+
+ rm -rf $DIR/$tdir || error "rmdir failed"
+
+ return 0
+}
+run_test 81h "DNE: unlink remote dir, drop request reply, fail 2 MDTs"
+
+test_84a() {
+#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000144"
+ createmany -o $DIR/$tfile- 1 &
+ PID=$!
+ mds_evict_client
+ wait $PID
+ client_up || client_up || true # reconnect
+}
+run_test 84a "stale open during export disconnect"
+
+test_85a() { #bug 16774
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+
+ for i in $(seq 100); do
+ echo "tag-$i" > $DIR/$tfile-$i
+ grep -q "tag-$i" $DIR/$tfile-$i || error "f2-$i"
+ done
+
+ lov_id=$(lctl dl | grep "clilov")
+ addr=$(echo $lov_id | awk '{print $4}' | awk -F '-' '{print $NF}')
+ count=$(lctl get_param -n \
+ ldlm.namespaces.*MDT0000*$addr.lock_unused_count)
+ echo "before recovery: unused locks count = $count"
+
+ fail $SINGLEMDS
+
+ count2=$(lctl get_param -n \
+ ldlm.namespaces.*MDT0000*$addr.lock_unused_count)
+ echo "after recovery: unused locks count = $count2"
+
+ if [ $count2 -ge $count ]; then
+ error "unused locks are not canceled"
+ fi
+}
+run_test 85a "check the cancellation of unused locks during recovery(IBITS)"
+
+test_85b() { #bug 16774
+ lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+
+ create_pool $FSNAME.$TESTNAME ||
+ error "unable to create pool $TESTNAME"
+ do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $FSNAME-OST0000 ||
+ error "unable to add pool $TESTNAME"
+
+ $SETSTRIPE -c 1 -p $FSNAME.$TESTNAME $DIR
+
+ for i in $(seq 100); do
+ dd if=/dev/urandom of=$DIR/$tfile-$i bs=4096 \
+ count=32 >/dev/null 2>&1
+ done
+
+ cancel_lru_locks osc
+
+ for i in $(seq 100); do
+ dd if=$DIR/$tfile-$i of=/dev/null bs=4096 \
+ count=32 >/dev/null 2>&1
+ done
+
+ lov_id=$(lctl dl | grep "clilov")
+ addr=$(echo $lov_id | awk '{print $4}' | awk -F '-' '{print $NF}')
+ count=$(lctl get_param -n \
+ ldlm.namespaces.*OST0000*$addr.lock_unused_count)
+ echo "before recovery: unused locks count = $count"
+ [ $count -ne 0 ] || error "unused locks ($count) should be zero"
+
+ fail ost1
+
+ count2=$(lctl get_param \
+ -n ldlm.namespaces.*OST0000*$addr.lock_unused_count)
+ echo "after recovery: unused locks count = $count2"
+
+ do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $FSNAME-OST0000 ||
+ error "unable to remove pool $TESTNAME"
+ do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME ||
+ error "unable to destroy the pool $TESTNAME"
+
+ if [ $count2 -ge $count ]; then
+ error "unused locks are not canceled"
+ fi
+}
+run_test 85b "check the cancellation of unused locks during recovery(EXTENT)"
+
+test_86() {
+ local clients=${CLIENTS:-$HOSTNAME}
+
+ zconf_umount_clients $clients $MOUNT
+ do_facet $SINGLEMDS lctl set_param mdt.${FSNAME}-MDT*.exports.clear=0
+ remount_facet $SINGLEMDS
+ zconf_mount_clients $clients $MOUNT
+}
+run_test 86 "umount server after clear nid_stats should not hit LBUG"
+
+test_87a() {
+ do_facet ost1 "lctl set_param -n obdfilter.${ost1_svc}.sync_journal 0"
+
+ replay_barrier ost1
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+ dd if=/dev/urandom of=$DIR/$tfile bs=1024k count=8 ||
+ error "dd to $DIR/$tfile failed"
+ cksum=$(md5sum $DIR/$tfile | awk '{print $1}')
+ cancel_lru_locks osc
+ fail ost1
+ dd if=$DIR/$tfile of=/dev/null bs=1024k count=8 || error "Cannot read"
+ cksum2=$(md5sum $DIR/$tfile | awk '{print $1}')
+ if [ $cksum != $cksum2 ] ; then
+ error "New checksum $cksum2 does not match original $cksum"
+ fi
+}
+run_test 87a "write replay"
+
+test_87b() {
+ do_facet ost1 "lctl set_param -n obdfilter.${ost1_svc}.sync_journal 0"
+
+ replay_barrier ost1
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+ dd if=/dev/urandom of=$DIR/$tfile bs=1024k count=8 ||
+ error "dd to $DIR/$tfile failed"
+ sleep 1 # Give it a chance to flush dirty data
+ echo TESTTEST | dd of=$DIR/$tfile bs=1 count=8 seek=64
+ cksum=$(md5sum $DIR/$tfile | awk '{print $1}')
+ cancel_lru_locks osc
+ fail ost1
+ dd if=$DIR/$tfile of=/dev/null bs=1024k count=8 || error "Cannot read"
+ cksum2=$(md5sum $DIR/$tfile | awk '{print $1}')
+ if [ $cksum != $cksum2 ] ; then
+ error "New checksum $cksum2 does not match original $cksum"
+ fi
+}
+run_test 87b "write replay with changed data (checksum resend)"