From 676c8912ee3ce99c1e3d5dd05788f698451de659 Mon Sep 17 00:00:00 2001 From: nathan Date: Fri, 28 Apr 2006 20:04:03 +0000 Subject: [PATCH] Branch b1_5 Narrow the window on connect / shutdown race --- lustre/ldlm/ldlm_lib.c | 15 +++++++++++++-- lustre/ldlm/ldlm_lockd.c | 5 +---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 0b9945d..fd3fa69 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -511,7 +511,7 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) { - struct obd_device *target; + struct obd_device *target, *targref = NULL; struct obd_export *export = NULL; struct obd_import *revimp; struct lustre_handle conn; @@ -556,6 +556,11 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) GOTO(out, rc = -ENODEV); } + /* Make sure the target isn't cleaned up while we're here. Yes, + there's still a race between the above check and our incref here. + Really, class_uuid2obd should take the ref. */ + targref = class_incref(target); + LASSERT_REQSWAB (req, 1); str = lustre_msg_string(req->rq_reqmsg, 1, sizeof(cluuid) - 1); if (str == NULL) { @@ -715,7 +720,11 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) * drop any previous reference the request had, but we don't want * that to go to zero before we get our new export reference. */ export = class_conn2export(&conn); - LASSERT(export != NULL); + + /* It's possible that the connection fails if this target is shutting + down. */ + if (!export) + GOTO(out, rc = -ENODEV); /* If the client and the server are the same node, we will already * have an export that really points to the client's DLM export, @@ -775,6 +784,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) out: if (export) export->exp_connecting = 0; + if (targref) + class_decref(targref); if (rc) req->rq_status = rc; RETURN(rc); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index b801f01..3e150d5 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1585,10 +1585,7 @@ static int ldlm_setup(void) spin_lock_init(&waiting_locks_spinlock); cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0); - /* Using CLONE_FILES instead of CLONE_FS here causes failures in - conf-sanity test 21. But using CLONE_FS can cause problems - if the daemonize happens between push/pop_ctxt... */ - rc = cfs_kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS); + rc = cfs_kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FILES); if (rc < 0) { CERROR("Cannot start ldlm expired-lock thread: %d\n", rc); GOTO(out_thread, rc); -- 1.8.3.1