From 08f61e0baaa1d44a50aa6ad048ffc31f091362e0 Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Thu, 25 Aug 2011 17:30:50 -0700 Subject: [PATCH] ORNL-28: Set recovery timeout correctly make sure recovery window uses timeout value from lustre config; in current implementation this piece of code is totally wrong since it just disregards timeout configuration. Change-Id: I0cb0d777569cccd96f30da11834c6e333a673816 Signed-off-by: Jinshan Xiong Reviewed-on: http://review.whamcloud.com/1292 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre_lib.h | 1 + lustre/ldlm/ldlm_lib.c | 132 ++++++++++++++++++++++---------------------- lustre/mdt/mdt_handler.c | 7 ++- lustre/obdclass/obd_mount.c | 8 +-- lustre/obdfilter/filter.c | 17 ++++++ 5 files changed, 92 insertions(+), 73 deletions(-) diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 6283648..de7b96c 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -97,6 +97,7 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req); struct l_wait_info; +void target_start_recovery_timer(struct obd_device *obd); void target_cancel_recovery_timer(struct obd_device *obd); void target_stop_recovery_thread(struct obd_device *obd); void target_cleanup_recovery(struct obd_device *obd); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index ab65cb0..60a7e3a 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -672,10 +672,17 @@ void target_client_add_cb(struct obd_device *obd, __u64 transno, void *cb_data, } EXPORT_SYMBOL(target_client_add_cb); +#ifdef __KERNEL__ static void -target_start_and_reset_recovery_timer(struct obd_device *obd, - struct ptlrpc_request *req, - int new_client); +check_and_extend_recovery_timer(struct obd_device *obd, + struct ptlrpc_request *req); +#else +static inline void +check_and_extend_recovery_timer(struct obd_device *obd, + struct ptlrpc_request *req) +{ +} +#endif int target_handle_connect(struct ptlrpc_request *req) { @@ -901,10 +908,10 @@ no_export: export, (long)cfs_time_current_sec(), export ? (long)export->exp_last_request_time : 0); - /* If this is the first time a client connects, - * reset the recovery timer */ - if (rc == 0 && target->obd_recovering) - target_start_and_reset_recovery_timer(target, req, !export); + /* If this is the first time a client connects, reset the recovery + * timer */ + if (rc == 0 && target->obd_recovering && export) + check_and_extend_recovery_timer(target, req); /* We want to handle EALREADY but *not* -EALREADY from * target_handle_reconnect(), return reconnection state in a flag */ @@ -1301,7 +1308,6 @@ static void abort_lock_replay_queue(struct obd_device *obd) target_request_copy_put(req); } } -#endif /* Called from a cleanup function if the device is being cleaned up forcefully. The exports should all have been disconnected already, @@ -1359,56 +1365,48 @@ void target_cancel_recovery_timer(struct obd_device *obd) cfs_timer_disarm(&obd->obd_recovery_timer); } -/* extend = 1 means require at least "duration" seconds left in the timer, - extend = 0 means set the total duration (start_recovery_timer) */ -static void reset_recovery_timer(struct obd_device *obd, int duration, - int extend) +void target_start_recovery_timer(struct obd_device *obd) { - cfs_time_t now = cfs_time_current_sec(); - cfs_duration_t left; - - cfs_spin_lock(&obd->obd_recovery_task_lock); + cfs_spin_lock(&obd->obd_dev_lock); if (!obd->obd_recovering || obd->obd_abort_recovery) { - cfs_spin_unlock(&obd->obd_recovery_task_lock); + cfs_spin_unlock(&obd->obd_dev_lock); return; } - left = cfs_time_sub(obd->obd_recovery_end, now); - - if (extend && (duration > left)) - obd->obd_recovery_timeout += duration - left; - else if (!extend && (duration > obd->obd_recovery_timeout)) - /* Track the client's largest expected replay time */ - obd->obd_recovery_timeout = duration; - - /* Hard limit of obd_recovery_time_hard which should not happen */ - if (obd->obd_recovery_timeout > obd->obd_recovery_time_hard) - obd->obd_recovery_timeout = obd->obd_recovery_time_hard; - - obd->obd_recovery_end = obd->obd_recovery_start + - obd->obd_recovery_timeout; - if (!cfs_timer_is_armed(&obd->obd_recovery_timer) || - cfs_time_before(now, obd->obd_recovery_end)) { - left = cfs_time_sub(obd->obd_recovery_end, now); - cfs_timer_arm(&obd->obd_recovery_timer, cfs_time_shift(left)); + if (cfs_timer_is_armed(&obd->obd_recovery_timer)) { + cfs_spin_unlock(&obd->obd_dev_lock); + return; } - cfs_spin_unlock(&obd->obd_recovery_task_lock); - CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n", - obd->obd_name, (unsigned)left); + + cfs_timer_arm(&obd->obd_recovery_timer, + cfs_time_shift(obd->obd_recovery_timeout)); + obd->obd_recovery_start = cfs_time_current_sec(); + cfs_spin_unlock(&obd->obd_dev_lock); + CDEBUG(D_HA, "%s: starting recovery timer\n", obd->obd_name); } +EXPORT_SYMBOL(target_start_recovery_timer); -static void check_and_start_recovery_timer(struct obd_device *obd) +/* extend recovery window to have extra @duration seconds at least. */ +static void extend_recovery_timer(struct obd_device *obd, int drt) { - cfs_spin_lock(&obd->obd_recovery_task_lock); - if (cfs_timer_is_armed(&obd->obd_recovery_timer)) { - cfs_spin_unlock(&obd->obd_recovery_task_lock); + cfs_time_t now = cfs_time_current_sec(); + cfs_duration_t left; + + if (!cfs_timer_is_armed(&obd->obd_recovery_timer)) { + cfs_spin_lock(&obd->obd_dev_lock); + if (obd->obd_recovery_timeout < drt) + obd->obd_recovery_timeout = drt; + cfs_spin_unlock(&obd->obd_dev_lock); return; } - CDEBUG(D_HA, "%s: starting recovery timer\n", obd->obd_name); - obd->obd_recovery_start = cfs_time_current_sec(); - cfs_spin_unlock(&obd->obd_recovery_task_lock); - reset_recovery_timer(obd, obd->obd_recovery_timeout, 0); + left = obd->obd_recovery_timeout; + left -= cfs_time_sub(now, obd->obd_recovery_start); + if (drt > left) { + cfs_timer_arm(&obd->obd_recovery_timer, cfs_time_shift(drt)); + CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n", + obd->obd_name, (unsigned)drt); + } } /* Reset the timer with each new client connection */ @@ -1423,20 +1421,19 @@ static void check_and_start_recovery_timer(struct obd_device *obd) */ static void -target_start_and_reset_recovery_timer(struct obd_device *obd, - struct ptlrpc_request *req, - int new_client) +check_and_extend_recovery_timer(struct obd_device *obd, + struct ptlrpc_request *req) { int service_time = lustre_msg_get_service_time(req->rq_reqmsg); + struct obd_device_target *obt = &obd->u.obt; + struct lustre_sb_info *lsi; - if (!new_client && service_time) + if (service_time) /* Teach server about old server's estimates, as first guess * at how long new requests will take. */ at_measured(&req->rq_rqbd->rqbd_service->srv_at_estimate, service_time); - check_and_start_recovery_timer(obd); - /* convert the service time to rpc timeout, * reuse service_time to limit stack usage */ service_time = at_est2timeout(service_time); @@ -1444,14 +1441,18 @@ target_start_and_reset_recovery_timer(struct obd_device *obd, /* We expect other clients to timeout within service_time, then try * to reconnect, then try the failover server. The max delay between * connect attempts is SWITCH_MAX + SWITCH_INC + INITIAL */ - service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + - INITIAL_CONNECT_TIMEOUT); - if (service_time > obd->obd_recovery_timeout && !new_client) - reset_recovery_timer(obd, service_time, 0); + service_time += 2 * INITIAL_CONNECT_TIMEOUT; + + LASSERT(obt->obt_magic == OBT_MAGIC); + lsi = s2lsi(obt->obt_sb); + if (!(lsi->lsi_flags | LSI_IR_CAPABLE)) + service_time += 2 * (CONNECTION_SWITCH_MAX + + CONNECTION_SWITCH_INC); + service_time -= obd->obd_recovery_timeout; + if (service_time > 0) + extend_recovery_timer(obd, service_time); } -#ifdef __KERNEL__ - /** Health checking routines */ static inline int exp_connect_healthy(struct obd_export *exp) { @@ -1614,7 +1615,7 @@ repeat: * reset timer, recovery will proceed with versions now, * timeout is set just to handle reconnection delays */ - reset_recovery_timer(obd, RECONNECT_DELAY_MAX, 1); + extend_recovery_timer(obd, RECONNECT_DELAY_MAX); /** Wait for recovery events again, after evicting bad clients */ goto repeat; } @@ -1732,13 +1733,15 @@ static int handle_recovery_req(struct ptlrpc_thread *thread, lu_context_fini(&req->rq_recov_session); /* don't reset timer for final stage */ if (!exp_finished(req->rq_export)) { + int to = obd_timeout; + /** * Add request timeout to the recovery time so next request from * this client may come in recovery time */ - reset_recovery_timer(class_exp2obd(req->rq_export), - AT_OFF ? obd_timeout : - lustre_msg_get_timeout(req->rq_reqmsg), 1); + if (!AT_OFF) + to = lustre_msg_get_timeout(req->rq_reqmsg); + extend_recovery_timer(class_exp2obd(req->rq_export), to); } reqcopy_put: RETURN(rc); @@ -1943,11 +1946,6 @@ void target_recovery_init(struct lu_target *lut, svc_handler_t handler) obd->obd_recovery_start = 0; obd->obd_recovery_end = 0; - /* both values can be get from mount data already */ - if (obd->obd_recovery_timeout == 0) - obd->obd_recovery_timeout = OBD_RECOVERY_TIME_SOFT; - if (obd->obd_recovery_time_hard == 0) - obd->obd_recovery_time_hard = OBD_RECOVERY_TIME_HARD; cfs_timer_init(&obd->obd_recovery_timer, target_recovery_expired, obd); target_start_recovery_thread(lut, handler); } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index e1cdd5c..8a1f9d4 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5297,11 +5297,11 @@ static int mdt_upcall(const struct lu_env *env, struct md_device *md, RETURN(rc); } -static int mdt_obd_notify(struct obd_device *host, +static int mdt_obd_notify(struct obd_device *obd, struct obd_device *watched, enum obd_notify_event ev, void *data) { - struct mdt_device *mdt = mdt_dev(host->obd_lu_dev); + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); #ifdef HAVE_QUOTA_SUPPORT struct md_device *next = mdt->mdt_child; #endif @@ -5309,6 +5309,9 @@ static int mdt_obd_notify(struct obd_device *host, switch (ev) { case OBD_NOTIFY_CONFIG: + /* reset recovery timeout in case it has already started */ + target_start_recovery_timer(obd); + mdt_allow_cli(mdt, (unsigned long)data); #ifdef HAVE_QUOTA_SUPPORT diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 4a8b91f..6e18246 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1295,11 +1295,11 @@ out_mgc: server_notify_target(sb, obd); - /* log has been fully processed */ - obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG); - /* calculate recovery timeout, do it after lustre_process_log */ server_calc_timeout(lsi, obd); + + /* log has been fully processed */ + obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG); } RETURN(rc); @@ -1916,7 +1916,7 @@ void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd) } /* we're done */ - obd->obd_recovery_timeout = soft; + obd->obd_recovery_timeout = max(obd->obd_recovery_timeout, soft); obd->obd_recovery_time_hard = hard; obd->obd_recovery_ir_factor = factor; } diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index e740d16..07413a2 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -4702,6 +4702,22 @@ static int filter_process_config(struct obd_device *obd, obd_count len, return rc; } +static int filter_notify(struct obd_device *obd, + struct obd_device *unused, + enum obd_notify_event ev, void *data) +{ + switch (ev) { + case OBD_NOTIFY_CONFIG: + /* reset recovery timeout in case it has already started */ + target_start_recovery_timer(obd); + break; + default: + CDEBUG(D_INFO, "%s: Unhandled notification %#x\n", + obd->obd_name, ev); + } + return 0; +} + static struct lvfs_callback_ops filter_lvfs_ops = { l_fid2dentry: filter_lvfs_fid2dentry, }; @@ -4736,6 +4752,7 @@ static struct obd_ops filter_obd_ops = { .o_iocontrol = filter_iocontrol, .o_health_check = filter_health_check, .o_process_config = filter_process_config, + .o_notify = filter_notify, }; quota_interface_t *filter_quota_interface_ref; -- 1.8.3.1