From d0446a8c44f38a276e6d52e7e5f35d844c71e482 Mon Sep 17 00:00:00 2001 From: ccooper Date: Thu, 16 Sep 2004 20:27:50 +0000 Subject: [PATCH] b=3969 r=adilger - landing b1_2_bug3969 --- lustre/ChangeLog | 1 + lustre/include/linux/lustre_ha.h | 2 +- lustre/ldlm/ldlm_lib.c | 2 +- lustre/ptlrpc/import.c | 48 +++++++++++++++++++++++++++++++--------- lustre/ptlrpc/recover.c | 2 +- 5 files changed, 42 insertions(+), 13 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index df9f65d..39abe20 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -34,6 +34,7 @@ - don't match INVALID dentries from d_lookup and spin (3784) - hold dcache_lock while marking dentries INVALID and hashing (4255) - fix invalid assertion in ptlrpc_set_wait (3880) + - create a new thread to do import eviction to avoid deadlock (3969) * miscellania - add libwrap support for the TCP acceptor (3996) - add /proc/sys/portals/routes for non-root route listing (3994) diff --git a/lustre/include/linux/lustre_ha.h b/lustre/include/linux/lustre_ha.h index fe83b7d9..eb44bbe 100644 --- a/lustre/include/linux/lustre_ha.h +++ b/lustre/include/linux/lustre_ha.h @@ -20,7 +20,7 @@ void ptlrpc_wake_delayed(struct obd_import *imp); int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid); int ptlrpc_set_import_active(struct obd_import *imp, int active); void ptlrpc_deactivate_import(struct obd_import *imp); -void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc); +void ptlrpc_invalidate_import(struct obd_import *imp); void ptlrpc_fail_import(struct obd_import *imp, int generation); void ptlrpc_fail_export(struct obd_export *exp); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 2cfe883..b3690b5 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -318,7 +318,7 @@ int client_disconnect_export(struct obd_export *exp, int failover) /* Yeah, obd_no_recov also (mainly) means "forced shutdown". */ if (obd->obd_no_recov) - ptlrpc_invalidate_import(imp, 0); + ptlrpc_invalidate_import(imp); else rc = ptlrpc_disconnect_import(imp); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 40e7d72..77ffc9f 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -149,10 +149,9 @@ void ptlrpc_deactivate_import(struct obd_import *imp) * waiting for requests to complete. Ugly, yes, but I don't see an * cleaner way right now. */ -void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc) +void ptlrpc_invalidate_import(struct obd_import *imp) { struct l_wait_info lwi; - int inflight = 0; int rc; if (!imp->imp_invalid) @@ -160,19 +159,17 @@ void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc) LASSERT(imp->imp_invalid); - if (in_rpc) - inflight = 1; /* wait for all requests to error out and call completion callbacks */ lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), NULL, NULL, NULL); rc = l_wait_event(imp->imp_recovery_waitq, - (atomic_read(&imp->imp_inflight) == inflight), + (atomic_read(&imp->imp_inflight) == 0), &lwi); if (rc) - CERROR("%s: rc = %d waiting for callback (%d != %d)\n", + CERROR("%s: rc = %d waiting for callback (%d != 0)\n", imp->imp_target_uuid.uuid, rc, - atomic_read(&imp->imp_inflight), inflight); + atomic_read(&imp->imp_inflight)); obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE); } @@ -493,6 +490,35 @@ static int signal_completed_replay(struct obd_import *imp) RETURN(0); } +static int ptlrpc_invalidate_import_thread(void *data) +{ + struct obd_import *imp = data; + unsigned long flags; + + ENTRY; + + lock_kernel(); + ptlrpc_daemonize(); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + THREAD_NAME(current->comm, sizeof(current->comm), "ll_imp_inval"); + unlock_kernel(); + + CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n", + imp->imp_obd->obd_name, imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); + + ptlrpc_invalidate_import(imp); + + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + ptlrpc_import_recovery_state_machine(imp); + + RETURN(0); +} + int ptlrpc_import_recovery_state_machine(struct obd_import *imp) { int rc = 0; @@ -503,9 +529,11 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid); - ptlrpc_invalidate_import(imp, 1); - - IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + rc = kernel_thread(ptlrpc_invalidate_import_thread, imp, + CLONE_VM | CLONE_FILES); + if (rc < 0) + CERROR("error starting invalidate thread: %d\n", rc); + RETURN(rc); } if (imp->imp_state == LUSTRE_IMP_REPLAY) { diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 9c8b56e..824481b 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -316,7 +316,7 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) /* When deactivating, mark import invalid, and abort in-flight * requests. */ if (!active) { - ptlrpc_invalidate_import(imp, 0); + ptlrpc_invalidate_import(imp); imp->imp_deactive = 1; } -- 1.8.3.1