Whamcloud - gitweb
Branch b1_5
authornathan <nathan>
Fri, 28 Apr 2006 20:04:03 +0000 (20:04 +0000)
committernathan <nathan>
Fri, 28 Apr 2006 20:04:03 +0000 (20:04 +0000)
Narrow the window on connect / shutdown race

lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lockd.c

index 0b9945d..fd3fa69 100644 (file)
@@ -511,7 +511,7 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
 
 int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
 {
-        struct obd_device *target;
+        struct obd_device *target, *targref = NULL;
         struct obd_export *export = NULL;
         struct obd_import *revimp;
         struct lustre_handle conn;
@@ -556,6 +556,11 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                 GOTO(out, rc = -ENODEV);
         }
 
+        /* Make sure the target isn't cleaned up while we're here. Yes, 
+           there's still a race between the above check and our incref here. 
+           Really, class_uuid2obd should take the ref. */
+        targref = class_incref(target);
+
         LASSERT_REQSWAB (req, 1);
         str = lustre_msg_string(req->rq_reqmsg, 1, sizeof(cluuid) - 1);
         if (str == NULL) {
@@ -715,7 +720,11 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
          * drop any previous reference the request had, but we don't want
          * that to go to zero before we get our new export reference. */
         export = class_conn2export(&conn);
-        LASSERT(export != NULL);
+
+        /* It's possible that the connection fails if this target is shutting
+           down. */
+        if (!export)
+                GOTO(out, rc = -ENODEV);
 
         /* If the client and the server are the same node, we will already
          * have an export that really points to the client's DLM export,
@@ -775,6 +784,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
 out:
         if (export)
                 export->exp_connecting = 0;
+        if (targref) 
+                class_decref(targref);
         if (rc)
                 req->rq_status = rc;
         RETURN(rc);
index b801f01..3e150d5 100644 (file)
@@ -1585,10 +1585,7 @@ static int ldlm_setup(void)
         spin_lock_init(&waiting_locks_spinlock);
         cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0);
 
-        /* Using CLONE_FILES instead of CLONE_FS here causes failures in 
-           conf-sanity test 21.  But using CLONE_FS can cause problems
-           if the daemonize happens between push/pop_ctxt... */
-        rc = cfs_kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS);
+        rc = cfs_kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FILES);
         if (rc < 0) {
                 CERROR("Cannot start ldlm expired-lock thread: %d\n", rc);
                 GOTO(out_thread, rc);