Whamcloud - gitweb
b=1803
authorrread <rread>
Fri, 22 Aug 2003 02:18:59 +0000 (02:18 +0000)
committerrread <rread>
Fri, 22 Aug 2003 02:18:59 +0000 (02:18 +0000)
r=shaver

New import state machine, as documented on the lustre wiki in
ImportStates.

A new function, ptlrpc_connect_import, performs all import connects
and moves the import from the DISCON state to either FULL, EVICTED,
REPLAY, or RECOVER, depending on the situation. Unlike the levels, the
states are now exact, and the request->rq_send_state much match the
import state to be sent.

Passes recovery/01, replay-small, and replay-dual.

lustre/ldlm/ldlm_lib.c
lustre/mgmt/mgmt_cli.c
lustre/ptlrpc/import.c [new file with mode: 0644]
lustre/ptlrpc/pinger.c

index 568a97d..5c6b620 100644 (file)
 #include <linux/lustre_mds.h>
 #include <linux/lustre_net.h>
 
-int client_import_connect(struct lustre_handle *dlm_handle,
+int client_connect_import(struct lustre_handle *dlm_handle,
                           struct obd_device *obd,
                           struct obd_uuid *cluuid)
 {
         struct client_obd *cli = &obd->u.cli;
         struct obd_import *imp = cli->cl_import;
         struct obd_export *exp;
-        struct ptlrpc_request *request;
-        /* XXX maybe this is a good time to create a connect struct? */
-        int rc, size[] = {sizeof(imp->imp_target_uuid),
-                          sizeof(obd->obd_uuid),
-                          sizeof(*dlm_handle)};
-        char *tmp[] = {imp->imp_target_uuid.uuid,
-                       obd->obd_uuid.uuid,
-                       (char *)dlm_handle};
-        int msg_flags;
-
+        int rc;
         ENTRY;
+
         down(&cli->cl_sem);
         rc = class_connect(dlm_handle, obd, cluuid);
         if (rc)
@@ -68,44 +60,32 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         if (obd->obd_namespace == NULL)
                 GOTO(out_disco, rc = -ENOMEM);
 
-        request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
-        if (!request)
-                GOTO(out_ldlm, rc = -ENOMEM);
-
-        request->rq_level = LUSTRE_CONN_NEW;
-        request->rq_replen = lustre_msg_size(0, NULL);
-
-        lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER);
-
         imp->imp_dlm_handle = *dlm_handle;
-
-        imp->imp_conn_cnt++; 
-        imp->imp_level = LUSTRE_CONN_CON;
-        rc = ptlrpc_queue_wait(request);
-        if (rc) {
-                class_disconnect(dlm_handle, 0);
-                GOTO(out_req, rc);
+        imp->imp_state = LUSTRE_IMP_DISCON;
+        
+        rc = ptlrpc_connect_import(imp);
+        if (rc != 0) {
+                LASSERT (imp->imp_state == LUSTRE_IMP_DISCON);
+                GOTO(out_ldlm, rc);
         }
 
+        LASSERT (imp->imp_state == LUSTRE_IMP_FULL);
+
         exp = class_conn2export(dlm_handle);
-        exp->exp_connection = ptlrpc_connection_addref(request->rq_connection);
+        exp->exp_connection = ptlrpc_connection_addref(imp->imp_connection);
         class_export_put(exp);
 
-        msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
-        if (msg_flags & MSG_CONNECT_REPLAYABLE) {
-                imp->imp_replayable = 1;
+        if (imp->imp_replayable) {
                 CDEBUG(D_HA, "connected to replayable target: %s\n",
                        imp->imp_target_uuid.uuid);
                 ptlrpc_pinger_add_import(imp);
         }
-        imp->imp_level = LUSTRE_CONN_FULL;
-        imp->imp_remote_handle = request->rq_repmsg->handle;
+
         CDEBUG(D_HA, "local import: %p, remote handle: "LPX64"\n", imp,
                imp->imp_remote_handle.cookie);
 
         EXIT;
-out_req:
-        ptlrpc_req_finished(request);
+
         if (rc) {
 out_ldlm:
                 ldlm_namespace_free(obd->obd_namespace);
@@ -119,13 +99,12 @@ out_sem:
         return rc;
 }
 
-int client_import_disconnect(struct lustre_handle *dlm_handle, int failover)
+int client_disconnect_import(struct lustre_handle *dlm_handle, int failover)
 {
         struct obd_device *obd = class_conn2obd(dlm_handle);
         struct client_obd *cli = &obd->u.cli;
         struct obd_import *imp = cli->cl_import;
-        struct ptlrpc_request *request = NULL;
-        int rc = 0, err, rq_opc;
+        int rc = 0, err;
         ENTRY;
 
         if (!obd) {
@@ -134,16 +113,6 @@ int client_import_disconnect(struct lustre_handle *dlm_handle, int failover)
                 RETURN(-EINVAL);
         }
 
-        switch (imp->imp_connect_op) {
-        case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
-        case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
-        case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
-        default:
-                CERROR("don't know how to disconnect from %s (connect_op %d)\n",
-                       imp->imp_target_uuid.uuid, imp->imp_connect_op);
-                RETURN(-EINVAL);
-        }
-
         down(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
@@ -155,6 +124,9 @@ int client_import_disconnect(struct lustre_handle *dlm_handle, int failover)
         if (cli->cl_conn_count)
                 GOTO(out_no_disconnect, rc = 0);
 
+        if (imp->imp_replayable)
+                ptlrpc_pinger_del_import(imp);
+
         if (obd->obd_namespace != NULL) {
                 /* obd_no_recov == local only */
                 ldlm_cli_cancel_unused(obd->obd_namespace, NULL,
@@ -167,15 +139,11 @@ int client_import_disconnect(struct lustre_handle *dlm_handle, int failover)
         if (obd->obd_no_recov) {
                 ptlrpc_set_import_active(imp, 0);
         } else {
-                request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL);
-                if (request) {
-                        request->rq_replen = lustre_msg_size(0, NULL);
-                        rc = ptlrpc_queue_wait(request);
-                        ptlrpc_req_finished(request);
-                }
+                rc = ptlrpc_disconnect_import(imp);
         }
-        if (imp->imp_replayable)
-                ptlrpc_pinger_del_import(imp);
+        
+        imp->imp_state = LUSTRE_IMP_NEW;
+
 
         EXIT;
 
@@ -375,7 +343,7 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
         dlmimp->imp_remote_handle = conn;
         dlmimp->imp_obd = target;
         dlmimp->imp_dlm_fake = 1;
-        dlmimp->imp_level = LUSTRE_CONN_FULL;
+        dlmimp->imp_state = LUSTRE_IMP_FULL;
         class_import_put(dlmimp);
 out:
         if (rc)
index 1ba8805..9d4183a 100644 (file)
@@ -104,7 +104,7 @@ static int mgmtcli_connect_to_svc(struct obd_device *obd)
         ENTRY;
 
         /* Connect to ourselves, and thusly to the mgmt service. */
-        rc = client_import_connect(&mc->mc_ping_handle, obd, &obd->obd_uuid);
+        rc = client_connect_import(&mc->mc_ping_handle, obd, &obd->obd_uuid);
         if (rc) {
                 CERROR("failed to connect to mgmt svc: %d\n", rc);
                 (void)client_obd_cleanup(obd, 0);
@@ -126,7 +126,7 @@ static int mgmtcli_connect_to_svc(struct obd_device *obd)
                 CERROR("can't start thread to ping mgmt svc %s: %d\n",
                        mc->mc_import->imp_target_uuid.uuid, rc);
                 OBD_FREE(mc->mc_ping_thread, sizeof (*mc->mc_ping_thread));
-                (void)client_import_disconnect(&mc->mc_ping_handle, 0);
+                (void)client_disconnect_import(&mc->mc_ping_handle, 0);
                 RETURN(rc);
         }
         l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING, &lwi);
@@ -143,7 +143,7 @@ static int mgmtcli_disconnect_from_svc(struct obd_device *obd)
         int rc;
 
         ENTRY;
-        rc = client_import_disconnect(&mc->mc_ping_handle, 0);
+        rc = client_disconnect_import(&mc->mc_ping_handle, 0);
         if (rc) {
                 CERROR("can't disconnect from %s: %d (%s)\n",
                        imp->imp_target_uuid.uuid, rc,
diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c
new file mode 100644 (file)
index 0000000..650b65a
--- /dev/null
@@ -0,0 +1,204 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *   Author: Mike Shaver <shaver@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_RPC
+#ifdef __KERNEL__
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/kmod.h>
+#else
+# include <liblustre.h>
+#endif
+
+#include <linux/obd_support.h>
+#include <linux/lustre_ha.h>
+#include <linux/lustre_net.h>
+#include <linux/lustre_import.h>
+#include <linux/lustre_export.h>
+#include <linux/obd.h>
+#include <linux/obd_class.h>
+
+#include "ptlrpc_internal.h"
+
+/* should this take an imp_sem to ensure connect is single threaded? */
+int ptlrpc_connect_import(struct obd_import *imp)
+{
+        struct obd_device *obd = imp->imp_obd;
+        int msg_flags;
+        int initial_connect = 0;
+        int rc;
+        __u64 committed_before_reconnect = 0;
+        struct ptlrpc_request *request;
+        struct lustre_handle old_hdl;
+        int size[] = {sizeof(imp->imp_target_uuid),
+                                 sizeof(obd->obd_uuid),
+                                 sizeof(imp->imp_dlm_handle)};
+        char *tmp[] = {imp->imp_target_uuid.uuid,
+                       obd->obd_uuid.uuid,
+                       (char *)&imp->imp_dlm_handle};
+        unsigned long flags;
+
+        spin_lock_irqsave(&imp->imp_lock, flags);
+        if (imp->imp_state == LUSTRE_IMP_CONNECTING) {
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+                RETURN(-EALREADY);
+        } else {
+                LASSERT(imp->imp_state == LUSTRE_IMP_DISCON);
+        }
+        CDEBUG(D_HA, "%s: new state: CONNECTING\n", 
+               imp->imp_client->cli_name);
+        imp->imp_state = LUSTRE_IMP_CONNECTING;
+        imp->imp_conn_cnt++; 
+        if (imp->imp_remote_handle.cookie == 0) {
+                initial_connect = 1;
+        } else {
+                committed_before_reconnect = imp->imp_peer_committed_transno;
+        }
+        spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+        request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
+        if (!request)
+                GOTO(out, rc = -ENOMEM);
+
+        request->rq_send_state = LUSTRE_IMP_CONNECTING;
+        request->rq_replen = lustre_msg_size(0, NULL);
+
+        // lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER);
+
+        rc = ptlrpc_queue_wait(request);
+        if (rc) {
+                GOTO(free_req, rc);
+        }
+
+        msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
+
+        if (initial_connect) {
+                CDEBUG(D_HA, "%s: new state: FULL\n", 
+                       imp->imp_client->cli_name);
+                if (msg_flags & MSG_CONNECT_REPLAYABLE)
+                        imp->imp_replayable = 1;
+                imp->imp_remote_handle = request->rq_repmsg->handle;
+                imp->imp_state = LUSTRE_IMP_FULL;
+                GOTO(free_req, rc = 0);
+        }
+
+        /* Determine what recovery state to move the import to. */
+        if (MSG_CONNECT_RECONNECT & msg_flags) {
+                memset(&old_hdl, 0, sizeof(old_hdl));
+                if (!memcmp(&old_hdl, &request->rq_repmsg->handle,
+                            sizeof (old_hdl))) {
+                        CERROR("%s@%s didn't like our handle "LPX64
+                               ", failed\n", imp->imp_target_uuid.uuid,
+                               imp->imp_connection->c_remote_uuid.uuid,
+                               imp->imp_dlm_handle.cookie);
+                        GOTO(free_req, rc = -ENOTCONN);
+                }
+
+                if (memcmp(&imp->imp_remote_handle, &request->rq_repmsg->handle,
+                           sizeof(imp->imp_remote_handle))) {
+                        CERROR("%s@%s changed handle from "LPX64" to "LPX64
+                               "; copying, but this may foreshadow disaster\n",
+                               imp->imp_target_uuid.uuid,
+                               imp->imp_connection->c_remote_uuid.uuid,
+                               imp->imp_remote_handle.cookie,
+                               request->rq_repmsg->handle.cookie);
+                        imp->imp_remote_handle = request->rq_repmsg->handle;
+                } else {
+                        CERROR("reconnected to %s@%s after partition\n",
+                               imp->imp_target_uuid.uuid, 
+                               imp->imp_connection->c_remote_uuid.uuid);
+                }
+                CDEBUG(D_HA, "%s: new state: RECOVER\n", 
+                       imp->imp_client->cli_name);
+                imp->imp_state = LUSTRE_IMP_RECOVER;
+        } 
+        else if (MSG_CONNECT_RECOVERING & msg_flags) {
+                CDEBUG(D_HA, "%s: new state: REPLAY\n", 
+                       imp->imp_client->cli_name);
+                LASSERT(imp->imp_replayable);
+                imp->imp_state = LUSTRE_IMP_RECOVER;
+                imp->imp_remote_handle = request->rq_repmsg->handle;
+                imp->imp_state = LUSTRE_IMP_REPLAY;
+        } 
+        else {
+                CDEBUG(D_HA, "%s: new state: EVICTED\n", 
+                       imp->imp_client->cli_name);
+                imp->imp_remote_handle = request->rq_repmsg->handle;
+                imp->imp_state = LUSTRE_IMP_EVICTED;
+        }
+        
+        /* Sanity checks for a reconnected import. */
+        if (!(imp->imp_replayable) != 
+             !(msg_flags & MSG_CONNECT_REPLAYABLE)) {
+                CERROR("imp_replayable flag does not match server "
+                       "after reconnect. We should LBUG right here.\n");
+        }
+
+        if (request->rq_repmsg->last_committed < committed_before_reconnect) {
+                CERROR("%s went back in time (transno "LPD64
+                       " was previously committed, server now claims "LPD64
+                       ")! is shared storage not coherent?\n",
+                       imp->imp_target_uuid.uuid,
+                       committed_before_reconnect,
+                       request->rq_repmsg->last_committed);
+        }
+
+ free_req:
+        ptlrpc_req_finished(request);
+
+ out:
+        if (rc != 0)
+                imp->imp_state = LUSTRE_IMP_DISCON;
+        RETURN(rc);
+}
+
+
+
+int ptlrpc_disconnect_import(struct obd_import *imp)
+{
+        struct ptlrpc_request *request;
+        int rq_opc;
+        int rc = 0;
+        ENTRY;
+
+        switch (imp->imp_connect_op) {
+        case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
+        case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
+        case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
+        default:
+                CERROR("don't know how to disconnect from %s (connect_op %d)\n",
+                       imp->imp_target_uuid.uuid, imp->imp_connect_op);
+                RETURN(-EINVAL);
+        }
+
+        request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL);
+        if (request) {
+                request->rq_replen = lustre_msg_size(0, NULL);
+                rc = ptlrpc_queue_wait(request);
+                ptlrpc_req_finished(request);
+        }
+
+        imp->imp_state = LUSTRE_IMP_DISCON;
+        memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle));
+        RETURN(rc);
+}
+
index c81fb51..77d6fc3 100644 (file)
@@ -149,10 +149,10 @@ static int ptlrpc_pinger_main(void *arg)
                                 /* Add a ping. */
                                 spin_lock_irqsave(&imp->imp_lock, flags);
                                 generation = imp->imp_generation;
-                                level = imp->imp_level;
+                                level = imp->imp_state;
                                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-                                if (level != LUSTRE_CONN_FULL) {
+                                if (level != LUSTRE_IMP_FULL) {
                                         CDEBUG(D_HA,
                                                "not pinging %s (in recovery)\n",
                                                imp->imp_target_uuid.uuid);
@@ -167,7 +167,7 @@ static int ptlrpc_pinger_main(void *arg)
                                 }
                                 req->rq_no_resend = 1;
                                 req->rq_replen = lustre_msg_size(0, NULL);
-                                req->rq_level = LUSTRE_CONN_FULL;
+                                req->rq_send_state = LUSTRE_IMP_FULL;
                                 req->rq_phase = RQ_PHASE_RPC;
                                 req->rq_import_generation = generation;
                                 ptlrpc_set_add_req(set, req);