From 6fcd05ba750e4eac627721752da8d2265b1405f8 Mon Sep 17 00:00:00 2001 From: nathan Date: Thu, 9 Aug 2007 15:36:59 +0000 Subject: [PATCH] b=13147 i=tappro i=fanyong block reactivating mgc import until all deactivates complete Only an issue when failing back MDT/MGS to itself (testing) --- lustre/ChangeLog | 5 +++++ lustre/include/lustre_import.h | 1 + lustre/mgc/mgc_request.c | 43 ++++++++++++++++++++++++++++-------------- lustre/obdclass/genops.c | 1 + lustre/ptlrpc/import.c | 5 +++++ lustre/tests/test-framework.sh | 2 +- 6 files changed, 42 insertions(+), 15 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index edc8a2f..1a9d540 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -79,6 +79,11 @@ Details : Port older jbd statistics patch for sles10 should be installed. It is versioned separately from Lustre and may be released separately in future. +Severity : minor +Bugzilla : 13147 +Description: block reactivating mgc import until all deactivates complete +Details : Fix race when failing back MDT/MGS to itself (testing) + Severity : enhancement Bugzilla : 12194 Description: add optional extra BUILD_VERSION info diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index b9dcf85..542d073 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -70,6 +70,7 @@ struct obd_import { atomic_t imp_inflight; atomic_t imp_replay_inflight; + atomic_t imp_inval_count; enum lustre_imp_state imp_state; int imp_generation; __u32 imp_conn_cnt; diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index e251dde..5de0f95 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -730,6 +730,33 @@ static int mgc_target_register(struct obd_export *exp, RETURN(rc); } +int mgc_reconnect_import(struct obd_import *imp) +{ + /* Force a new connect attempt */ + ptlrpc_invalidate_import(imp); + /* Do a fresh connect next time by zeroing the handle */ + ptlrpc_disconnect_import(imp, 1); + /* Wait for all invalidate calls to finish */ + if (atomic_read(&imp->imp_inval_count) > 0) { + int rc; + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); + rc = l_wait_event(imp->imp_recovery_waitq, + (atomic_read(&imp->imp_inval_count) == 0), + &lwi); + if (rc) + CERROR("Interrupted, inval=%d\n", + atomic_read(&imp->imp_inval_count)); + } + + /* Allow reconnect attempts */ + imp->imp_obd->obd_no_recov = 0; + /* Remove 'invalid' flag */ + ptlrpc_activate_import(imp); + /* Attempt a new connect */ + ptlrpc_recover_import(imp, NULL); + return 0; +} + int mgc_set_info_async(struct obd_export *exp, obd_count keylen, void *key, obd_count vallen, void *val, struct ptlrpc_request_set *set) @@ -767,20 +794,8 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, imp->imp_replayable, imp->imp_obd->obd_replayable, ptlrpc_import_state_name(imp->imp_state)); /* Resurrect if we previously died */ - if (imp->imp_invalid || value > 1) { - /* Force a new connect attempt */ - /* (can't put these in obdclass, module loop) */ - ptlrpc_invalidate_import(imp); - /* Do a fresh connect next time by zeroing the handle */ - ptlrpc_disconnect_import(imp, 1); - /* See client_disconnect_export */ - /* Allow reconnect attempts */ - imp->imp_obd->obd_no_recov = 0; - /* Remove 'invalid' flag */ - ptlrpc_activate_import(imp); - /* Attempt a new connect */ - ptlrpc_recover_import(imp, NULL); - } + if (imp->imp_invalid || value > 1) + mgc_reconnect_import(imp); RETURN(0); } /* FIXME move this to mgc_process_config */ diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index e3b4446..9538ee3 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -842,6 +842,7 @@ struct obd_import *class_new_import(struct obd_device *obd) atomic_set(&imp->imp_refcount, 2); atomic_set(&imp->imp_inflight, 0); atomic_set(&imp->imp_replay_inflight, 0); + atomic_set(&imp->imp_inval_count, 0); CFS_INIT_LIST_HEAD(&imp->imp_conn_list); CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link); class_handle_hash(&imp->imp_handle, import_handle_addref); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index cb0209d..0778530 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -200,6 +200,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp) struct l_wait_info lwi; int rc; + atomic_inc(&imp->imp_inval_count); + ptlrpc_deactivate_import(imp); LASSERT(imp->imp_invalid); @@ -217,6 +219,9 @@ void ptlrpc_invalidate_import(struct obd_import *imp) obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE); sptlrpc_import_flush_all_ctx(imp); + + atomic_dec(&imp->imp_inval_count); + cfs_waitq_signal(&imp->imp_recovery_waitq); } /* unset imp_invalid */ diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index a0f0320..37864d5 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -498,7 +498,7 @@ facet_failover() { wait_for $facet local dev=${facet}_dev local opt=${facet}_opt - start $facet ${!dev} ${!opt} + start $facet ${!dev} ${!opt} || error "Restart of $facet failed" } obd_name() { -- 1.8.3.1