local osc2dev=`lctl get_param -n devices | grep ${ost2_svc}-osc- | egrep -v 'MDT' | awk '{print $1}'`
$LCTL --device $osc2dev deactivate || return 3
# my understanding is that there should be nothing in the page
- # cache after the client reconnects?
+ # cache after the client reconnects?
rc=0
pgcache_empty || rc=2
$LCTL --device $osc2dev activate
# allow recovery to complete
sleep $((TIMEOUT + 2))
# my understanding is that there should be nothing in the page
- # cache after the client reconnects?
+ # cache after the client reconnects?
rc=0
pgcache_empty || rc=2
rm -f $f $TMP/$tfile
rc=$?
[ $rc -eq 0 ] && error "multiop didn't fail enqueue: rc $rc" || true
}
-run_test 20a "ldlm_handle_enqueue error (should return error)"
+run_test 20a "ldlm_handle_enqueue error (should return error)"
test_20b() { # bug 2986 - ldlm_handle_enqueue error during open
remote_ost_nodsh && skip "remote OST with nodsh" && return 0
test_22() {
f1=$DIR/${tfile}-1
f2=$DIR/${tfile}-2
-
+
do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000115"
$MULTIOP $f2 Oc &
close_pid=$!
# PING_INTERVAL max(obd_timeout / 4, 1U)
# PING_EVICT_TIMEOUT (PING_INTERVAL * 6)
- # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
- # But if there's a race to start the evictor from various obds,
+ # evictor takes PING_EVICT_TIMEOUT + 3 * PING_INTERVAL to evict.
+ # But if there's a race to start the evictor from various obds,
# the loser might have to wait for the next ping.
# = 9 * PING_INTERVAL + PING_INTERVAL
# = 10 PING_INTERVAL = 10 obd_timeout / 4 = 2.5 obd_timeout
facet_failover $SINGLEMDS
#no crashes allowed!
kill -USR1 $CLIENT_PID
- wait $CLIENT_PID
+ wait $CLIENT_PID
true
FAILURE_MODE=$save_FAILURE_MODE
}
# client process should see no problems even though MDS went down
sleep $TIMEOUT
kill -USR1 $CLIENT_PID
- wait $CLIENT_PID
+ wait $CLIENT_PID
rc=$?
echo writemany returned $rc
#these may fail because of eviction due to slow AST response.
run_test 56 "do not fail on getattr resend"
test_57_helper() {
- # no oscs means no client or mdt
+ # no oscs means no client or mdt
while lctl get_param osc.*.* > /dev/null 2>&1; do
: # loop until proc file is removed
done
$LFS setstripe -c 1 -i 0 $DIR/$tdir
replay_barrier $SINGLEMDS
- createmany -o $DIR/$tdir/$tfile-%d 10
+ createmany -o $DIR/$tdir/$tfile-%d 10
local oid=$(do_facet ost1 "lctl get_param -n \
obdfilter.${ost1_svc}.last_id" | sed -e 's/.*://')
}
run_test 154b "restore update llog after failed recovery"
+test_156()
+{
+ (( OST1_VERSION >= $(version_code v2_15_60-90-g9df01eee75) )) ||
+ skip "Need OST version >= 2.15.60.90 for tgt_granted miscount fix"
+ (( OST1_VERSION < $(version_code 2.15.0) &&
+ OST1_VERSION >= $(version_code 2.14.0-ddn141) )) ||
+ skip "Need OST version < 2.15.0 && version >= 2.14.0-ddn141 \
+ for tgt_granted miscount fix"
+
+ # on failover recovery time hard will be 9 * 5
+ local saved_timeout=$(do_facet ost1 $LCTL get_param -n timeout)
+
+ do_facet mgs $LCTL set_param -P timeout=5 ||
+ error "failed to set obd_timeout"
+ stack_trap "do_facet mgs $LCTL set_param -P timeout=$saved_timeout" \
+ EXIT
+
+ $LFS setstripe -c 1 -i 0 $DIR/$tfile || error "setstripe failed"
+
+ # this is to sync last_rcvd, so that the client will have to
+ # send replay on recovery
+ $LFS df $MOUNT
+ do_facet ost1 sync
+
+ replay_barrier ost1
+
+ $MULTIOP $DIR/$tfile oO_RDWR:O_SYNC:w1048576c || error "multiop failed"
+
+ # delay write replay for 45 sec (OBD_RECOVERY_TIME_HARD) to
+ # get the client evicted as not sending replays
+
+#define OBD_FAIL_PTLRPC_REPLAY_PAUSE 0x536
+ $LCTL set_param fail_loc=0x80000536 fail_val=45
+
+ fail ost1
+
+ # check that ost1 evicted the client in recovery
+ local clients
+ clients=($(do_facet ost1 \
+ $LCTL get_param -n obdfilter.$FSNAME-OST0000.recovery_status |
+ awk '/completed_clients/ { print $2 }' | tr '/' '\n'))
+ [[ $((${clients[0]} + 1)) == ${clients[1]} ]] ||
+ error "client not evicted by ost1"
+
+ local testid=$(echo $TESTNAME | tr '_' ' ')
+ do_facet ost1 dmesg | tac | sed "/$testid/,$ d" |
+ grep "ofd_obd_disconnect: tot_granted" &&
+ error "grant miscount" || true
+}
+run_test 156 "tot_granted miscount after client eviction"
+
complete_test $SECONDS
check_and_cleanup_lustre
exit_status