From 8d4c3cd3a2a45cc1295897713ba41c5cc36597a2 Mon Sep 17 00:00:00 2001 From: jacob Date: Fri, 27 May 2005 21:34:59 +0000 Subject: [PATCH] b=6377 r=adilger Use obd_recovering instead of obd_recoverable_clients to check if the obd is in recovery. abort_recovery didn't set obd_recoverable_clients to 0 (which it now does), so if recovery was aborted, the server would never ping-evict clients (which can lead to extra exports, possibly requiring recovery to be aborted). --- lustre/include/linux/obd_class.h | 3 +++ lustre/ldlm/ldlm_lib.c | 1 + lustre/obdclass/genops.c | 48 ++++++++++++++++++++++++++-------------- lustre/obdclass/obd_config.c | 4 +++- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 00d3e86..f306d12 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -80,6 +80,9 @@ void oig_complete_one(struct obd_io_group *oig, void oig_release(struct obd_io_group *oig); int oig_wait(struct obd_io_group *oig); +/* buf should be len PTL_NALFMT_SIZE */ +char *obd_export_nid2str(struct obd_export *exp, char *buf); + /* config.c */ int class_process_config(struct lustre_cfg *lcfg); int class_attach(struct lustre_cfg *lcfg); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 7b3706e..0db65ce 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -886,6 +886,7 @@ void target_abort_recovery(void *data) return; } obd->obd_recovering = obd->obd_abort_recovery = 0; + obd->obd_recoverable_clients = 0; target_cancel_recovery_timer(obd); spin_unlock_bh(&obd->obd_processing_task_lock); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index a109d1a..cb8b088 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1076,27 +1076,15 @@ static int ping_evictor_main(void *arg) if (expire_time > exp->exp_last_request_time) { char ipbuf[PTL_NALFMT_SIZE]; - struct ptlrpc_peer *peer; - - peer = exp->exp_connection ? - &exp->exp_connection->c_peer : NULL; - - if (peer && peer->peer_ni) { - portals_nid2str(peer->peer_ni->pni_number, - peer->peer_id.nid, - ipbuf); - } LCONSOLE_WARN("%s hasn't heard from %s in %ld " "seconds. I think it's dead, " "and I am evicting it.\n", obd->obd_name, - (peer && peer->peer_ni) ? - ipbuf : - (char *)exp->exp_client_uuid.uuid, - (long)(CURRENT_SECONDS - - exp->exp_last_request_time)); - + obd_export_nid2str(exp, ipbuf), + (long)(CURRENT_SECONDS - + exp->exp_last_request_time)); + ping_evictor_fail_export(exp); } else { /* List is sorted, so everyone below is ok */ @@ -1155,6 +1143,9 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) { struct obd_export *oldest_exp; time_t oldest_time; + + ENTRY; + LASSERT(exp); /* Compensate for slow machines, etc, by faking our request time @@ -1177,6 +1168,7 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) if (list_empty(&exp->exp_obd_chain_timed)) { /* this one is not timed */ spin_unlock(&exp->exp_obd->obd_dev_lock); + EXIT; return; } @@ -1187,9 +1179,11 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) oldest_time = oldest_exp->exp_last_request_time; spin_unlock(&exp->exp_obd->obd_dev_lock); - if (exp->exp_obd->obd_recoverable_clients > 0) + if (exp->exp_obd->obd_recovering) { /* be nice to everyone during recovery */ + EXIT; return; + } /* Note - racing to start/reset the obd_eviction timer is safe */ if (exp->exp_obd->obd_eviction_timer == 0) { @@ -1216,5 +1210,25 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) exp->exp_obd->obd_eviction_timer = 0; } } + + EXIT; } +char *obd_export_nid2str(struct obd_export *exp, char *ipbuf) +{ + struct ptlrpc_peer *peer; + + peer = exp->exp_connection + ? &exp->exp_connection->c_peer + : NULL; + + if (peer && peer->peer_ni) { + portals_nid2str(peer->peer_ni->pni_number, + peer->peer_id.nid, + ipbuf); + } else { + snprintf(ipbuf, PTL_NALFMT_SIZE, "(no nid)"); + } + + return ipbuf; +} diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 409964c..a012b65 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -266,6 +266,7 @@ int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg) static void dump_exports(struct obd_device *obd) { struct obd_export *exp, *n; + char ipbuf[PTL_NALFMT_SIZE]; list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) { struct ptlrpc_reply_state *rs; @@ -279,8 +280,9 @@ static void dump_exports(struct obd_device *obd) nreplies++; } - CDEBUG(D_IOCTL, "%s: %p %s %d %d %d: %p %s\n", + CDEBUG(D_IOCTL, "%s: %p %s %s %d %d %d: %p %s\n", obd->obd_name, exp, exp->exp_client_uuid.uuid, + obd_export_nid2str(exp, ipbuf), atomic_read(&exp->exp_refcount), exp->exp_failed, nreplies, first_reply, nreplies > 3 ? "..." : ""); -- 1.8.3.1