Whamcloud - gitweb
LU-11128 ptlrpc: new request vs disconnect race 80/32980/35
authorAlex Zhuravlev <bzzz@whamcloud.com>
Fri, 10 Aug 2018 17:03:11 +0000 (21:03 +0400)
committerOleg Drokin <green@whamcloud.com>
Tue, 2 Oct 2018 21:23:02 +0000 (21:23 +0000)
new request can race with disconnect-by-idle process.
disconnect code detect this state and initiate a new connection.

Change-Id: I6acc913c371f2ae63f27151edce457c2a13118c5
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/32980
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ptlrpc/client.c
lustre/ptlrpc/import.c

index a3b8504..a167146 100644 (file)
@@ -867,6 +867,12 @@ ptlrpc_request_alloc_internal(struct obd_import *imp,
         struct ptlrpc_request *request;
        int connect = 0;
 
+       request = __ptlrpc_request_alloc(imp, pool);
+       if (request == NULL)
+               return NULL;
+
+       /* initiate connection if needed when the import has been
+        * referenced by the new request to avoid races with disconnect */
        if (unlikely(imp->imp_state == LUSTRE_IMP_IDLE)) {
                int rc;
                CDEBUG_LIMIT(imp->imp_idle_debug,
@@ -883,16 +889,14 @@ ptlrpc_request_alloc_internal(struct obd_import *imp,
                spin_unlock(&imp->imp_lock);
                if (connect) {
                        rc = ptlrpc_connect_import(imp);
-                       if (rc < 0)
+                       if (rc < 0) {
+                               ptlrpc_request_free(request);
                                return NULL;
+                       }
                        ptlrpc_pinger_add_import(imp);
                }
        }
 
-       request = __ptlrpc_request_alloc(imp, pool);
-       if (request == NULL)
-               return NULL;
-
         req_capsule_init(&request->rq_pill, request, RCL_CLIENT);
         req_capsule_set(&request->rq_pill, format);
         return request;
@@ -2346,7 +2350,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 /* wait until all complete, interrupted, or an in-flight
                  * req times out */
                CDEBUG(D_RPCTRACE, "set %p going to sleep for %lld seconds\n",
-                       set, timeout);
+                       set, timeout);
 
                if ((timeout == 0 && !signal_pending(current)) ||
                    set->set_allow_intr)
index 241c1a7..8ca1dec 100644 (file)
@@ -1665,13 +1665,38 @@ static int ptlrpc_disconnect_idle_interpret(const struct lu_env *env,
                                            void *data, int rc)
 {
        struct obd_import *imp = req->rq_import;
+       int connect = 0;
+
+       DEBUG_REQ(D_HA, req, "inflight=%d, refcount=%d: rc = %d\n",
+                 atomic_read(&imp->imp_inflight),
+                 atomic_read(&imp->imp_refcount), rc);
 
-       LASSERT(imp->imp_state == LUSTRE_IMP_CONNECTING);
        spin_lock(&imp->imp_lock);
-       IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_IDLE);
-       memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+       /* DISCONNECT reply can be late and another connection can just
+        * be initiated. so we have to abort disconnection. */
+       if (req->rq_import_generation == imp->imp_generation &&
+           imp->imp_state != LUSTRE_IMP_CLOSED) {
+               LASSERTF(imp->imp_state == LUSTRE_IMP_CONNECTING,
+                        "%s\n", ptlrpc_import_state_name(imp->imp_state));
+               imp->imp_state = LUSTRE_IMP_IDLE;
+               memset(&imp->imp_remote_handle, 0,
+                      sizeof(imp->imp_remote_handle));
+               /* take our DISCONNECT into account */
+               if (atomic_read(&imp->imp_inflight) > 1) {
+                       imp->imp_generation++;
+                       imp->imp_initiated_at = imp->imp_generation;
+                       IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_NEW);
+                       connect = 1;
+               }
+       }
        spin_unlock(&imp->imp_lock);
 
+       if (connect) {
+               rc = ptlrpc_connect_import(imp);
+               if (rc >= 0)
+                       ptlrpc_pinger_add_import(imp);
+       }
+
        return 0;
 }