Whamcloud - gitweb
LU-8500 ldlm: fix export reference problem
[fs/lustre-release.git] / lustre / ptlrpc / import.c
index 6d798ca..ef0df00 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2011, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -372,10 +372,9 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
                                                  "still on delayed list");
                                }
 
-                               CERROR("%s: RPCs in \"%s\" phase found (%d). "
+                               CERROR("%s: Unregistering RPCs found (%d). "
                                       "Network is sluggish? Waiting them "
                                       "to error out.\n", cli_tgt,
-                                      ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
                                       atomic_read(&imp->imp_unregistering));
                        }
                        spin_unlock(&imp->imp_lock);
@@ -518,7 +517,7 @@ static int import_select_connection(struct obd_import *imp)
        }
 
        list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
-                CDEBUG(D_HA, "%s: connect to NID %s last attempt "LPU64"\n",
+               CDEBUG(D_HA, "%s: connect to NID %s last attempt %llu\n",
                        imp->imp_obd->obd_name,
                        libcfs_nid2str(conn->oic_conn->c_peer.nid),
                        conn->oic_last_attempt);
@@ -647,19 +646,19 @@ static int ptlrpc_first_transno(struct obd_import *imp, __u64 *transno)
  */
 int ptlrpc_connect_import(struct obd_import *imp)
 {
-        struct obd_device *obd = imp->imp_obd;
-        int initial_connect = 0;
-        int set_transno = 0;
-        __u64 committed_before_reconnect = 0;
-        struct ptlrpc_request *request;
-        char *bufs[] = { NULL,
-                         obd2cli_tgt(imp->imp_obd),
-                         obd->obd_uuid.uuid,
-                         (char *)&imp->imp_dlm_handle,
-                         (char *)&imp->imp_connect_data };
-        struct ptlrpc_connect_async_args *aa;
-        int rc;
-        ENTRY;
+       struct obd_device *obd = imp->imp_obd;
+       int initial_connect = 0;
+       int set_transno = 0;
+       __u64 committed_before_reconnect = 0;
+       struct ptlrpc_request *request;
+       char *bufs[] = { NULL,
+                        obd2cli_tgt(imp->imp_obd),
+                        obd->obd_uuid.uuid,
+                        (char *)&imp->imp_dlm_handle,
+                        (char *)&imp->imp_connect_data };
+       struct ptlrpc_connect_async_args *aa;
+       int rc;
+       ENTRY;
 
        spin_lock(&imp->imp_lock);
        if (imp->imp_state == LUSTRE_IMP_CLOSED) {
@@ -670,97 +669,99 @@ int ptlrpc_connect_import(struct obd_import *imp)
                spin_unlock(&imp->imp_lock);
                CERROR("already connected\n");
                RETURN(0);
-       } else if (imp->imp_state == LUSTRE_IMP_CONNECTING) {
+       } else if (imp->imp_state == LUSTRE_IMP_CONNECTING ||
+                  imp->imp_connected) {
                spin_unlock(&imp->imp_lock);
                CERROR("already connecting\n");
                RETURN(-EALREADY);
        }
 
-        IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
+       IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CONNECTING);
 
-        imp->imp_conn_cnt++;
-        imp->imp_resend_replay = 0;
+       imp->imp_conn_cnt++;
+       imp->imp_resend_replay = 0;
 
-        if (!lustre_handle_is_used(&imp->imp_remote_handle))
-                initial_connect = 1;
-        else
-                committed_before_reconnect = imp->imp_peer_committed_transno;
+       if (!lustre_handle_is_used(&imp->imp_remote_handle))
+               initial_connect = 1;
+       else
+               committed_before_reconnect = imp->imp_peer_committed_transno;
 
-        set_transno = ptlrpc_first_transno(imp,
-                                           &imp->imp_connect_data.ocd_transno);
+       set_transno = ptlrpc_first_transno(imp,
+                                          &imp->imp_connect_data.ocd_transno);
        spin_unlock(&imp->imp_lock);
 
-        rc = import_select_connection(imp);
-        if (rc)
-                GOTO(out, rc);
+       rc = import_select_connection(imp);
+       if (rc)
+               GOTO(out, rc);
 
        rc = sptlrpc_import_sec_adapt(imp, NULL, NULL);
-        if (rc)
-                GOTO(out, rc);
+       if (rc)
+               GOTO(out, rc);
 
-        /* Reset connect flags to the originally requested flags, in case
-         * the server is updated on-the-fly we will get the new features. */
-        imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig;
+       /* Reset connect flags to the originally requested flags, in case
+        * the server is updated on-the-fly we will get the new features. */
+       imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig;
+       imp->imp_connect_data.ocd_connect_flags2 = imp->imp_connect_flags2_orig;
        /* Reset ocd_version each time so the server knows the exact versions */
        imp->imp_connect_data.ocd_version = LUSTRE_VERSION_CODE;
-        imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
-        imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
-
-        rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd,
-                           &obd->obd_uuid, &imp->imp_connect_data, NULL);
-        if (rc)
-                GOTO(out, rc);
-
-        request = ptlrpc_request_alloc(imp, &RQF_MDS_CONNECT);
-        if (request == NULL)
-                GOTO(out, rc = -ENOMEM);
-
-        rc = ptlrpc_request_bufs_pack(request, LUSTRE_OBD_VERSION,
-                                      imp->imp_connect_op, bufs, NULL);
-        if (rc) {
-                ptlrpc_request_free(request);
-                GOTO(out, rc);
-        }
+       imp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
+       imp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+
+       rc = obd_reconnect(NULL, imp->imp_obd->obd_self_export, obd,
+                          &obd->obd_uuid, &imp->imp_connect_data, NULL);
+       if (rc)
+               GOTO(out, rc);
 
-        /* Report the rpc service time to the server so that it knows how long
-         * to wait for clients to join recovery */
-        lustre_msg_set_service_time(request->rq_reqmsg,
-                                    at_timeout2est(request->rq_timeout));
-
-        /* The amount of time we give the server to process the connect req.
-         * import_select_connection will increase the net latency on
-         * repeated reconnect attempts to cover slow networks.
-         * We override/ignore the server rpc completion estimate here,
-         * which may be large if this is a reconnect attempt */
-        request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
-        lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
-
-        request->rq_no_resend = request->rq_no_delay = 1;
-        request->rq_send_state = LUSTRE_IMP_CONNECTING;
-        /* Allow a slightly larger reply for future growth compatibility */
-        req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
-                             sizeof(struct obd_connect_data)+16*sizeof(__u64));
-        ptlrpc_request_set_replen(request);
-        request->rq_interpret_reply = ptlrpc_connect_interpret;
-
-        CLASSERT(sizeof (*aa) <= sizeof (request->rq_async_args));
-        aa = ptlrpc_req_async_args(request);
-        memset(aa, 0, sizeof *aa);
-
-        aa->pcaa_peer_committed = committed_before_reconnect;
-        aa->pcaa_initial_connect = initial_connect;
-
-        if (aa->pcaa_initial_connect) {
+       request = ptlrpc_request_alloc(imp, &RQF_MDS_CONNECT);
+       if (request == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       rc = ptlrpc_request_bufs_pack(request, LUSTRE_OBD_VERSION,
+                                     imp->imp_connect_op, bufs, NULL);
+       if (rc) {
+               ptlrpc_request_free(request);
+               GOTO(out, rc);
+       }
+
+       /* Report the rpc service time to the server so that it knows how long
+        * to wait for clients to join recovery */
+       lustre_msg_set_service_time(request->rq_reqmsg,
+                                   at_timeout2est(request->rq_timeout));
+
+       /* The amount of time we give the server to process the connect req.
+        * import_select_connection will increase the net latency on
+        * repeated reconnect attempts to cover slow networks.
+        * We override/ignore the server rpc completion estimate here,
+        * which may be large if this is a reconnect attempt */
+       request->rq_timeout = INITIAL_CONNECT_TIMEOUT;
+       lustre_msg_set_timeout(request->rq_reqmsg, request->rq_timeout);
+
+       request->rq_no_resend = request->rq_no_delay = 1;
+       request->rq_send_state = LUSTRE_IMP_CONNECTING;
+       /* Allow a slightly larger reply for future growth compatibility */
+       req_capsule_set_size(&request->rq_pill, &RMF_CONNECT_DATA, RCL_SERVER,
+                            sizeof(struct obd_connect_data)+16*sizeof(__u64));
+       ptlrpc_request_set_replen(request);
+       request->rq_interpret_reply = ptlrpc_connect_interpret;
+
+       CLASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
+       aa = ptlrpc_req_async_args(request);
+       memset(aa, 0, sizeof *aa);
+
+       aa->pcaa_peer_committed = committed_before_reconnect;
+       aa->pcaa_initial_connect = initial_connect;
+
+       if (aa->pcaa_initial_connect) {
                spin_lock(&imp->imp_lock);
                imp->imp_replayable = 1;
                spin_unlock(&imp->imp_lock);
-                lustre_msg_add_op_flags(request->rq_reqmsg,
-                                        MSG_CONNECT_INITIAL);
-        }
+               lustre_msg_add_op_flags(request->rq_reqmsg,
+                                       MSG_CONNECT_INITIAL);
+       }
 
-        if (set_transno)
-                lustre_msg_add_op_flags(request->rq_reqmsg,
-                                        MSG_CONNECT_TRANSNO);
+       if (set_transno)
+               lustre_msg_add_op_flags(request->rq_reqmsg,
+                                       MSG_CONNECT_TRANSNO);
 
        DEBUG_REQ(D_RPCTRACE, request, "(re)connect request (timeout %d)",
                  request->rq_timeout);
@@ -803,7 +804,7 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp,
            !(ocd->ocd_connect_flags & OBD_CONNECT_IBITS)) {
                LCONSOLE_WARN("%s: MDS %s does not support ibits "
                              "lock, either very old or invalid: "
-                             "requested "LPX64", replied "LPX64"\n",
+                             "requested %#llx, replied %#llx\n",
                              imp->imp_obd->obd_name,
                              imp->imp_connection->c_remote_uuid.uuid,
                              imp->imp_connect_flags_orig,
@@ -917,7 +918,7 @@ static int ptlrpc_connect_set_flags(struct obd_import *imp,
         * disable lru_resize, etc. */
        if (old_connect_flags != exp_connect_flags(exp) || init_connect) {
                CDEBUG(D_HA, "%s: Resetting ns_connect_flags to server "
-                            "flags: "LPX64"\n", imp->imp_obd->obd_name,
+                            "flags: %#llx\n", imp->imp_obd->obd_name,
                             ocd->ocd_connect_flags);
                imp->imp_obd->obd_namespace->ns_connect_flags =
                        ocd->ocd_connect_flags;
@@ -1009,11 +1010,16 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                ptlrpc_maybe_ping_import_soon(imp);
                GOTO(out, rc);
        }
+
+       /* LU-7558: indicate that we are interpretting connect reply,
+        * pltrpc_connect_import() will not try to reconnect until
+        * interpret will finish. */
+       imp->imp_connected = 1;
        spin_unlock(&imp->imp_lock);
 
-        LASSERT(imp->imp_conn_current);
+       LASSERT(imp->imp_conn_current);
 
-        msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
+       msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
 
        ret = req_capsule_get_size(&request->rq_pill, &RMF_CONNECT_DATA,
                                   RCL_SERVER);
@@ -1043,16 +1049,33 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
 
        spin_unlock(&imp->imp_lock);
 
+       if (!exp) {
+               /* This could happen if export is cleaned during the
+                  connect attempt */
+               CERROR("%s: missing export after connect\n",
+                      imp->imp_obd->obd_name);
+               GOTO(out, rc = -ENODEV);
+       }
+
        /* check that server granted subset of flags we asked for. */
        if ((ocd->ocd_connect_flags & imp->imp_connect_flags_orig) !=
            ocd->ocd_connect_flags) {
-               CERROR("%s: Server didn't granted asked subset of flags: "
-                      "asked="LPX64" grranted="LPX64"\n",
-                      imp->imp_obd->obd_name,imp->imp_connect_flags_orig,
+               CERROR("%s: Server didn't grant requested subset of flags: "
+                      "asked=%#llx granted=%#llx\n",
+                      imp->imp_obd->obd_name, imp->imp_connect_flags_orig,
                       ocd->ocd_connect_flags);
                GOTO(out, rc = -EPROTO);
        }
 
+       if ((ocd->ocd_connect_flags2 & imp->imp_connect_flags2_orig) !=
+           ocd->ocd_connect_flags2) {
+               CERROR("%s: Server didn't grant requested subset of flags2: "
+                      "asked=%#llx granted=%#llx\n",
+                      imp->imp_obd->obd_name, imp->imp_connect_flags2_orig,
+                      ocd->ocd_connect_flags2);
+               GOTO(out, rc = -EPROTO);
+       }
+
        if (!(imp->imp_connect_flags_orig & OBD_CONNECT_LIGHTWEIGHT) &&
            (imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS) &&
            (imp->imp_connect_flags_orig & OBD_CONNECT_FID) &&
@@ -1079,13 +1102,6 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                }
        }
 
-       if (!exp) {
-               /* This could happen if export is cleaned during the
-                  connect attempt */
-               CERROR("%s: missing export after connect\n",
-                      imp->imp_obd->obd_name);
-               GOTO(out, rc = -ENODEV);
-       }
        old_connect_flags = exp_connect_flags(exp);
        exp->exp_connect_data = *ocd;
        imp->imp_obd->obd_self_export->exp_connect_data = *ocd;
@@ -1144,7 +1160,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                 if (!memcmp(&old_hdl, lustre_msg_get_handle(request->rq_repmsg),
                             sizeof (old_hdl))) {
                         LCONSOLE_WARN("Reconnect to %s (at @%s) failed due "
-                                      "bad handle "LPX64"\n",
+                                     "bad handle %#llx\n",
                                       obd2cli_tgt(imp->imp_obd),
                                       imp->imp_connection->c_remote_uuid.uuid,
                                       imp->imp_dlm_handle.cookie);
@@ -1166,7 +1182,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                          * with server again */
                         if ((MSG_CONNECT_RECOVERING & msg_flags)) {
                                 CDEBUG(level,"%s@%s changed server handle from "
-                                       LPX64" to "LPX64
+                                      "%#llx to %#llx"
                                        " but is still in recovery\n",
                                        obd2cli_tgt(imp->imp_obd),
                                        imp->imp_connection->c_remote_uuid.uuid,
@@ -1176,7 +1192,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                         } else {
                                 LCONSOLE_WARN("Evicted from %s (at %s) "
                                               "after server handle changed from "
-                                              LPX64" to "LPX64"\n",
+                                             "%#llx to %#llx\n",
                                               obd2cli_tgt(imp->imp_obd),
                                               imp->imp_connection-> \
                                               c_remote_uuid.uuid,
@@ -1222,6 +1238,7 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
                 imp->imp_remote_handle =
                                 *lustre_msg_get_handle(request->rq_repmsg);
                 imp->imp_last_replay_transno = 0;
+               imp->imp_replay_cursor = &imp->imp_committed_list;
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
         } else {
                 DEBUG_REQ(D_HA, request, "%s: evicting (reconnect/recover flags"
@@ -1240,8 +1257,8 @@ static int ptlrpc_connect_interpret(const struct lu_env *env,
         if (lustre_msg_get_last_committed(request->rq_repmsg) > 0 &&
             lustre_msg_get_last_committed(request->rq_repmsg) <
             aa->pcaa_peer_committed) {
-                CERROR("%s went back in time (transno "LPD64
-                       " was previously committed, server now claims "LPD64
+               CERROR("%s went back in time (transno %lld"
+                      " was previously committed, server now claims %lld"
                        ")!  See https://bugzilla.lustre.org/show_bug.cgi?"
                        "id=9646\n",
                        obd2cli_tgt(imp->imp_obd), aa->pcaa_peer_committed,
@@ -1257,12 +1274,18 @@ finish:
                       obd2cli_tgt(imp->imp_obd),
                       imp->imp_connection->c_remote_uuid.uuid);
                ptlrpc_connect_import(imp);
+               spin_lock(&imp->imp_lock);
+               imp->imp_connected = 0;
                imp->imp_connect_tried = 1;
+               spin_unlock(&imp->imp_lock);
                RETURN(0);
        }
 
 out:
+       spin_lock(&imp->imp_lock);
+       imp->imp_connected = 0;
        imp->imp_connect_tried = 1;
+       spin_unlock(&imp->imp_lock);
 
         if (rc != 0) {
                 IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON);