to += drt - left;
} else if (!extend && (drt > to)) {
to = drt;
- /* reduce drt by already passed time */
- drt -= obd->obd_recovery_timeout - left;
}
if (to > obd->obd_recovery_time_hard)
to = obd->obd_recovery_time_hard;
- if (obd->obd_recovery_timeout < to ||
- obd->obd_recovery_timeout == obd->obd_recovery_time_hard) {
+ if (obd->obd_recovery_timeout < to) {
obd->obd_recovery_timeout = to;
- cfs_timer_arm(&obd->obd_recovery_timer,
- cfs_time_shift(drt));
+ end = obd->obd_recovery_start + to;
+ cfs_timer_arm(&obd->obd_recovery_timer,
+ cfs_time_shift(end - now));
}
spin_unlock(&obd->obd_dev_lock);
CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n",
- obd->obd_name, (unsigned)drt);
+ obd->obd_name, (unsigned)cfs_time_sub(end, now));
}
/* Reset the timer with each new client connection */
int (*health_check)(struct obd_export *))
{
repeat:
+ if ((obd->obd_recovery_start != 0) && (cfs_time_current_sec() >=
+ (obd->obd_recovery_start + obd->obd_recovery_time_hard))) {
+ CWARN("recovery is aborted by hard timeout\n");
+ obd->obd_abort_recovery = 1;
+ }
+
wait_event(obd->obd_next_transno_waitq, check_routine(obd));
if (obd->obd_abort_recovery) {
CWARN("recovery is aborted, evict exports in recovery\n");
obd->obd_next_recovery_transno);
CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+ /** It is needed to extend recovery window above recovery_time_soft.
+ * Extending is possible only in the end of recovery window
+ * (see more details in handle_recovery_req).
+ */
+ CFS_FAIL_TIMEOUT_MS(OBD_FAIL_TGT_REPLAY_DELAY, 300);
if (target_recovery_overseer(obd, check_for_next_transno,
exp_req_replay_healthy)) {
run_test 83 "ENOSPACE on OST doesn't cause message VFS: \
Busy inodes after unmount ..."
+recovery_time_min() {
+ local CONNECTION_SWITCH_MIN=5
+ local CONNECTION_SWITCH_INC=5
+ local CONNECTION_SWITCH_MAX
+ local RECONNECT_DELAY_MAX
+ local INITIAL_CONNECT_TIMEOUT
+ local max
+ local TO_20
+
+ #CONNECTION_SWITCH_MAX=min(50, max($CONNECTION_SWITCH_MIN,$TIMEOUT)
+ (($CONNECTION_SWITCH_MIN>$TIMEOUT)) && \
+ max=$CONNECTION_SWITCH_MIN || max=$TIMEOUT
+ (($max<50)) && CONNECTION_SWITCH_MAX=$max || CONNECTION_SWITCH_MAX=50
+
+ #INITIAL_CONNECT_TIMEOUT = max(CONNECTION_SWITCH_MIN, \
+ #obd_timeout/20)
+ TO_20=$(($TIMEOUT/20))
+ (($CONNECTION_SWITCH_MIN>$TO_20)) && \
+ INITIAL_CONNECT_TIMEOUT=$CONNECTION_SWITCH_MIN || \
+ INITIAL_CONNECT_TIMEOUT=$TO_20
+
+ RECONNECT_DELAY_MAX=$(($CONNECTION_SWITCH_MAX+$CONNECTION_SWITCH_INC+ \
+ $INITIAL_CONNECT_TIMEOUT))
+ echo $((2*$RECONNECT_DELAY_MAX))
+}
+
+test_83() {
+ local facet=$SINGLEMDS
+ local num=$(echo $facet | tr -d "mds")
+ local dev=$(mdsdevname $num)
+ local time_min=$(recovery_time_min)
+ local recovery_duration
+ local completed_clients
+
+ echo "start mds service on `facet_active_host $facet`"
+ start $facet ${dev} $MDS_MOUNT_OPTS \
+ "-o recovery_time_hard=$time_min,recovery_time_soft=$time_min"\
+ $@ || return 94
+
+ start_ost
+ start_ost2
+
+ echo "recovery_time_hard $time_min, recovery_time_soft $time_min, \
+ timeout $TIMEOUT"
+
+ mount_client $MOUNT1 || error "mount failed"
+ mount_client $MOUNT2 || error "mount failed"
+
+ replay_barrier $SINGLEMDS
+ createmany -o $DIR1/$tfile-%d 1000
+
+ # We need to catch the end of recovery window to extend it.
+ # Skip 5 requests and add delay to request handling.
+ #define OBD_FAIL_TGT_REPLAY_DELAY 0x709 | FAIL_SKIP
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x20000709"
+ do_facet $SINGLEMDS "lctl set_param fail_val=5"
+
+ facet_failover $SINGLEMDS || error "failover: $?"
+ client_up
+
+ echo "recovery status"
+ do_facet $SINGLEMDS "$LCTL get_param -n \
+ mdt.$FSNAME-MDT0000.recovery_status"
+
+ recovery_duration=$(do_facet $SINGLEMDS "$LCTL get_param -n \
+ mdt.$FSNAME-MDT0000.recovery_status" | \
+ grep recovery_duration |awk '{print $2}')
+ (($recovery_duration>$time_min)) && \
+ error "recovery_duration > recovery_time_hard"
+ completed_clients=$(do_facet $SINGLEMDS "$LCTL get_param -n \
+ mdt.$FSNAME-MDT0000.recovery_status" | \
+ grep completed_clients |awk '{print $2}')
+ [ "$completed_clients" = "1/2" ] || \
+ error "completed_clients != 1/2: "$completed_clients
+
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0"
+ umount_client $MOUNT1
+ umount_client $MOUNT2
+
+ stop_ost
+ stop_ost2
+ stop_mds
+}
+run_test 83 "check recovery_hard_time"
+
if ! combined_mgs_mds ; then
stop mgs
fi