From d572822bd36b94c149f6e9e055a53f89d34406d4 Mon Sep 17 00:00:00 2001 From: "Alexander.Boyko" Date: Wed, 21 Nov 2012 10:32:54 +0400 Subject: [PATCH] LU-2368 recovery: fix for obd_stale_clients counter class_fail_export() occured during recovery process, when the mds connect to ost from new IP(failover happened), and cause perpetual recovery. - in class_fail_export() update obd_stale_clients counter if recovery is in progress. Signed-off-by: Mikhail Pershin Signed-off-by: Alexander Boyko Xyratex-bug-id: MRP-738 Change-Id: I2b2fe5853a9b0713a0a9357713f612b331505c6f Reviewed-on: http://review.whamcloud.com/4641 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Keith Mannthey Reviewed-by: Keith Mannthey Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_lib.c | 7 ++----- lustre/obdclass/genops.c | 36 +++++++++++++++++++++--------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index b0e4294..18a4a61 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1558,11 +1558,8 @@ static int check_for_clients(struct obd_device *obd) if (obd->obd_abort_recovery || obd->obd_recovery_expired) return 1; LASSERT(obd->obd_connected_clients <= obd->obd_max_recoverable_clients); - if (obd->obd_no_conn == 0 && - obd->obd_connected_clients + obd->obd_stale_clients == - obd->obd_max_recoverable_clients) - return 1; - return 0; + return (obd->obd_connected_clients + obd->obd_stale_clients == + obd->obd_max_recoverable_clients); } static int check_for_next_transno(struct obd_device *obd) diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index fad1e8e..3c3d256 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -718,12 +718,20 @@ void class_export_recovery_cleanup(struct obd_export *exp) cfs_spin_lock(&obd->obd_recovery_task_lock); if (exp->exp_delayed) obd->obd_delayed_clients--; - if (obd->obd_recovering && exp->exp_in_recovery) { - cfs_spin_lock(&exp->exp_lock); - exp->exp_in_recovery = 0; - cfs_spin_unlock(&exp->exp_lock); - LASSERT(obd->obd_connected_clients); - obd->obd_connected_clients--; + if (obd->obd_recovering) { + if (exp->exp_in_recovery) { + cfs_spin_lock(&exp->exp_lock); + exp->exp_in_recovery = 0; + cfs_spin_unlock(&exp->exp_lock); + LASSERT(obd->obd_connected_clients); + obd->obd_connected_clients--; + } + /* if called during recovery then should update + * obd_stale_clients counter, + * lightweight exports are not counted */ + if (exp->exp_failed && + (exp->exp_connect_flags & OBD_CONNECT_LIGHTWEIGHT) == 0) + exp->exp_obd->obd_stale_clients++; } cfs_spin_unlock(&obd->obd_recovery_task_lock); /** Cleanup req replay fields */ @@ -1263,7 +1271,7 @@ void class_disconnect_stale_exports(struct obd_device *obd, continue; cfs_spin_lock(&exp->exp_lock); - if (test_export(exp)) { + if (exp->exp_failed || test_export(exp)) { cfs_spin_unlock(&exp->exp_lock); continue; } @@ -1278,15 +1286,13 @@ void class_disconnect_stale_exports(struct obd_device *obd, libcfs_nid2str(exp->exp_connection->c_peer.nid)); print_export_data(exp, "EVICTING", 0); } - cfs_spin_unlock(&obd->obd_dev_lock); + cfs_spin_unlock(&obd->obd_dev_lock); - if (evicted) { - LCONSOLE_WARN("%s: disconnecting %d stale clients\n", - obd->obd_name, evicted); - obd->obd_stale_clients += evicted; - } - class_disconnect_export_list(&work_list, exp_flags_from_obd(obd) | - OBD_OPT_ABORT_RECOV); + if (evicted) + LCONSOLE_WARN("%s: disconnecting %d stale clients\n", + obd->obd_name, evicted); + class_disconnect_export_list(&work_list, exp_flags_from_obd(obd) | + OBD_OPT_ABORT_RECOV); EXIT; } EXPORT_SYMBOL(class_disconnect_stale_exports); -- 1.8.3.1