Whamcloud - gitweb
- Split import reconnection and replay (OSC only needs to reconnect).
authorshaver <shaver>
Sat, 19 Oct 2002 11:19:14 +0000 (11:19 +0000)
committershaver <shaver>
Sat, 19 Oct 2002 11:19:14 +0000 (11:19 +0000)
- Restore recovd-management of server-side connections (why did I take that out
  in the first place?).
- Abort inflight, uncommitted and waiting requests for imports that are
  invalidated by reconnection to OST.  (Generates -EIO, or should!)
- Mete out harsh, harsh justice to locks held by recovery-invalidated imports.
- Remove LBUG()s now that some error returns from ldlm_cli_cancel are "normal".
- Cancel locks on disconnection from OST (really filter).
- Ignore replies that come in after we've started recovery on a given connection.
  The socknal's implicit retransmission can otherwise cause us worlds and worlds
  of hurt.
- Wake up when we get aborted, and return -EIO.
- Remove connections from the recovd's care when they go unused.  (Which might
  actually happen, once Phil lands his refcounting fixes!)

13 files changed:
lustre/include/linux/lustre_ha.h
lustre/include/linux/lustre_import.h
lustre/lib/target.c
lustre/llite/recover.c
lustre/llite/super.c
lustre/mdc/mdc_request.c
lustre/obdclass/genops.c
lustre/obdfilter/filter.c
lustre/ptlrpc/client.c
lustre/ptlrpc/connection.c
lustre/ptlrpc/recovd.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/rpc.c

index 09610b2..8afa1a8 100644 (file)
@@ -9,6 +9,7 @@
 
 struct recovd_data;
 struct recovd_obd;
+struct obd_import;
 struct ptlrpc_connection;
 
 /* rd_phase/rd_next_phase values */
@@ -32,6 +33,7 @@ struct ptlrpc_connection;
 typedef int (*ptlrpc_recovery_cb_t)(struct recovd_data *, int);
 
 struct recovd_data {
+        /* you must hold recovd->recovd_lock when touching rd_managed_chain */
         struct list_head     rd_managed_chain;
         ptlrpc_recovery_cb_t rd_recover;
         struct recovd_obd   *rd_recovd;
@@ -50,6 +52,7 @@ int recovd_cleanup(struct recovd_obd *mgr);
 extern struct recovd_obd *ptlrpc_recovd;
 
 int ptlrpc_run_recovery_upcall(struct ptlrpc_connection *conn);
-int ptlrpc_reconnect_and_replay(struct ptlrpc_connection *conn);
+int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc);
+int ptlrpc_replay(struct ptlrpc_connection *conn);
 
 #endif
index aa3e3d7..3a183e4 100644 (file)
@@ -20,7 +20,6 @@ struct obd_import {
         struct list_head          imp_chain;
         struct obd_device        *imp_obd;
         /* XXX need a UUID here, I think
-         * XXX what about client_obd.cl_target_uuid?
          */
 };
 
index 94665e0..8786ee8 100644 (file)
@@ -90,6 +90,8 @@ int target_handle_connect(struct ptlrpc_request *req)
         spin_lock(&export->exp_connection->c_lock);
         list_add(&export->exp_conn_chain, &export->exp_connection->c_exports);
         spin_unlock(&export->exp_connection->c_lock);
+        recovd_conn_manage(export->exp_connection, ptlrpc_recovd,
+                           target_revoke_connection);
 
         dlmimp = &export->exp_ldlm_data.led_import;
         dlmimp->imp_connection = req->rq_connection;
@@ -98,7 +100,6 @@ int target_handle_connect(struct ptlrpc_request *req)
         dlmimp->imp_handle.cookie = req->rq_reqmsg->cookie;
         dlmimp->imp_obd = /* LDLM! */ NULL;
         
-#warning Peter: is this the right place to upgrade the server connection level?
         req->rq_connection->c_level = LUSTRE_CONN_FULL;
 out:
         req->rq_status = rc;
@@ -137,6 +138,8 @@ static int target_disconnect_client(struct ptlrpc_connection *conn)
                 if (rc)
                         CERROR("disconnecting export %p failed: %d\n", exp, rc);
         }
+
+        /* XXX spank the connection (it's frozen in _RECOVD for now!) */
         RETURN(0);
 }
 
index 1d2f5ad..e88cedf 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <linux/lustre_lite.h>
 #include <linux/lustre_ha.h>
+#include <linux/lustre_dlm.h>
+#include <linux/lustre_idl.h>
 
 static int ll_retry_recovery(struct ptlrpc_connection *conn)
 {
@@ -17,6 +19,99 @@ static int ll_retry_recovery(struct ptlrpc_connection *conn)
     RETURN(0);
 }
 
+/* XXX looks a lot like super.c:invalidate_request_list, don't it? */
+static void abort_inflight_for_import(struct obd_import *imp)
+{
+        struct list_head *tmp, *n;
+
+        list_for_each_safe(tmp, n, &imp->imp_connection->c_sending_head) {
+                struct ptlrpc_request *req =
+                        list_entry(tmp, struct ptlrpc_request, rq_list);
+                if (req->rq_flags & PTL_RPC_FL_REPLIED) {
+                        /* no need to replay, just discard */
+                        CERROR("uncommitted req xid "LPD64" op %d to OST %s\n",
+                               (unsigned long long)req->rq_xid,
+                               req->rq_reqmsg->opc,
+                               imp->imp_obd->u.cli.cl_target_uuid);
+                        ptlrpc_req_finished(req);
+                } else {
+                        CERROR("inflight req xid "LPD64" op %d to OST %s\n",
+                               (unsigned long long)req->rq_xid,
+                               req->rq_reqmsg->opc,
+                               imp->imp_obd->u.cli.cl_target_uuid);
+
+                        req->rq_flags |= PTL_RPC_FL_ERR;
+                        wake_up(&req->rq_wait_for_rep);
+                }
+        }
+
+        list_for_each_safe(tmp, n, &imp->imp_connection->c_delayed_head) {
+                struct ptlrpc_request *req =
+                        list_entry(tmp, struct ptlrpc_request, rq_list);
+                CERROR("aborting waiting req xid "LPD64" op %d to OST %s\n",
+                       (unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
+                       imp->imp_obd->u.cli.cl_target_uuid);
+                req->rq_flags |= PTL_RPC_FL_ERR;
+                wake_up(&req->rq_wait_for_rep);
+        }
+}
+
+static void reconnect_ost(struct obd_import *imp)
+{
+        struct ldlm_namespace *ns = imp->imp_obd->obd_namespace;
+        
+        CDEBUG(D_HA, "invalidating all locks for OST imp %p (to %s):\n",
+               imp, imp->imp_connection->c_remote_uuid);
+        ldlm_namespace_dump(ns);
+        ldlm_namespace_cleanup(ns, 1 /* no network ops */);
+
+        abort_inflight_for_import(imp);
+
+        (void)ptlrpc_reconnect_import(imp, OST_CONNECT);
+}
+
+static int ll_reconnect(struct ptlrpc_connection *conn)
+{
+        struct list_head *tmp;
+        int need_replay = 0;
+
+        ENTRY;
+
+        /* XXX c_lock semantics! */
+        conn->c_level = LUSTRE_CONN_CON;
+
+        /* XXX this code MUST be shared with class_obd_connect! */
+        list_for_each(tmp, &conn->c_imports) {
+                struct obd_import *imp = list_entry(tmp, struct obd_import,
+                                                    imp_chain);
+                if (imp->imp_obd->obd_type->typ_ops->o_brw) {
+                        /* XXX what to do if we fail? */
+                        reconnect_ost(imp);
+                } else {
+                        int rc = ptlrpc_reconnect_import(imp, MDS_CONNECT);
+                        if (!rc)
+                                need_replay = 1;
+                        /* make sure we don't try to replay for dead imps?
+                         *
+                         * else imp->imp_connection = NULL;
+                         *
+                         */
+                }
+        }
+
+        if (!need_replay) {
+                /* all done! */
+                conn->c_level = LUSTRE_CONN_FULL;
+                RETURN(0);
+        }
+        
+        conn->c_level = LUSTRE_CONN_RECOVD;
+        /* this will replay, up the c_level, recovd_conn_fixed and continue reqs.
+         * also, makes a mean cup of coffee.
+         */
+        RETURN(ptlrpc_replay(conn));
+}
+
 int ll_recover(struct recovd_data *rd, int phase)
 {
         struct ptlrpc_connection *conn = class_rd2conn(rd);
@@ -28,7 +123,7 @@ int ll_recover(struct recovd_data *rd, int phase)
             case PTLRPC_RECOVD_PHASE_PREPARE:
                 RETURN(ptlrpc_run_recovery_upcall(conn));
             case PTLRPC_RECOVD_PHASE_RECOVER:
-                RETURN(ptlrpc_reconnect_and_replay(conn));
+                RETURN(ll_reconnect(conn));
             case PTLRPC_RECOVD_PHASE_FAILURE:
                 RETURN(ll_retry_recovery(conn));
         }
index 53c8015..d8bf664 100644 (file)
@@ -533,7 +533,7 @@ static inline void invalidate_request_list(struct list_head *req_list)
         list_for_each_safe(tmp, n, req_list) {
                 struct ptlrpc_request *req = 
                         list_entry(tmp, struct ptlrpc_request, rq_list);
-                CERROR("invalidating req xid %d op %d to %s:%d\n",
+                CERROR("invalidating req xid "LPD64" op %d to %s:%d\n",
                        (unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
                        req->rq_connection->c_remote_uuid,
                        req->rq_import->imp_client->cli_request_portal);
index af165ea..1fb85e2 100644 (file)
@@ -175,8 +175,8 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                 ldlm_lock2handle(lock, &lockh);
                 rc = ldlm_cli_cancel(&lockh);
                 if (rc < 0) {
-                        CERROR("ldlm_cli_cancel: %d\n", rc);
-                        LBUG();
+                        CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
+                        RETURN(rc);
                 }
                 break;
         case LDLM_CB_CANCELING:
index 487c932..6750092 100644 (file)
@@ -342,8 +342,8 @@ struct obd_export *class_conn2export(struct lustre_handle *conn)
                 RETURN(NULL);
 
         if (export->exp_cookie != conn->cookie)
-                return NULL;
-        return export;
+                RETURN(NULL);
+        RETURN(export);
 } /* class_conn2export */
 
 struct obd_device *class_conn2obd(struct lustre_handle *conn)
index 62f3954..f87a392 100644 (file)
@@ -353,9 +353,12 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
 
 static int filter_disconnect(struct lustre_handle *conn)
 {
+        struct obd_export *export = class_conn2export(conn);
         int rc;
         ENTRY;
 
+        ldlm_cancel_locks_for_export(export);
+
         rc = class_disconnect(conn);
         if (!rc)
                 MOD_DEC_USE_COUNT;
index 2459ad3..d86c807 100644 (file)
@@ -251,6 +251,21 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req)
         int rc = 0;
 
         if (req->rq_repmsg != NULL) {
+                struct ptlrpc_connection *conn = req->rq_import->imp_connection;
+                spin_lock(&conn->c_lock);
+                if (req->rq_level > conn->c_level) {
+                        CDEBUG(D_HA,
+                               "rep to xid "LPD64" op %d to %s:%d: "
+                               "recovery started, ignoring (%d > %d)\n",
+                               (unsigned long long)req->rq_xid,
+                               req->rq_reqmsg->opc, conn->c_remote_uuid,
+                               req->rq_import->imp_client->cli_request_portal,
+                               req->rq_level, conn->c_level);
+                        req->rq_repmsg = NULL;
+                        spin_unlock(&conn->c_lock);
+                        GOTO(out, rc = 0);
+                }
+                spin_unlock(&conn->c_lock);
                 req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
                 req->rq_flags |= PTL_RPC_FL_REPLIED;
                 GOTO(out, rc = 1);
@@ -261,6 +276,11 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req)
                 GOTO(out, rc = 1);
         }
 
+        if (req->rq_flags & PTL_RPC_FL_ERR) {
+                CERROR("-- ABORTED --\n");
+                GOTO(out, rc = 1);
+        }
+
  out:
         CDEBUG(D_NET, "req = %p, rc = %d\n", req, rc);
         return rc;
@@ -575,6 +595,11 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
                        req->rq_connection->c_remote_uuid,
                        req->rq_import->imp_client->cli_request_portal);
 
+        if (req->rq_flags & PTL_RPC_FL_ERR) {
+                ptlrpc_abort(req);
+                GOTO(out, rc = -EIO);
+        }
+
         /* Don't resend if we were interrupted. */
         if ((req->rq_flags & (PTL_RPC_FL_RESEND | PTL_RPC_FL_INTR)) ==
             PTL_RPC_FL_RESEND) {
index a5528fd..df2a2c2 100644 (file)
@@ -121,6 +121,7 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c)
         CDEBUG(D_INFO, "connection=%p refcount %d\n",
                c, atomic_read(&c->c_refcount) - 1);
         if (atomic_dec_and_test(&c->c_refcount)) {
+                recovd_conn_unmanage(c);
                 spin_lock(&conn_lock);
                 list_del(&c->c_link);
                 list_add(&c->c_link, &conn_unused_list);
index 02df21a..336633f 100644 (file)
@@ -85,6 +85,24 @@ void recovd_conn_manage(struct ptlrpc_connection *conn,
         EXIT;
 }
 
+void recovd_conn_unmanage(struct ptlrpc_connection *conn)
+{
+        struct recovd_data *rd = &conn->c_recovd_data;
+        struct recovd_obd *recovd = rd->rd_recovd;
+        ENTRY;
+
+        if (recovd) {
+                spin_lock(&recovd->recovd_lock);
+                list_del(&rd->rd_managed_chain);
+                spin_unlock(&recovd->recovd_lock);
+                rd->rd_recovd = NULL;
+        }
+        /* should be safe enough, right? */
+        rd->rd_recover = NULL;
+        rd->rd_next_phase = RD_IDLE;
+        rd->rd_next_phase = RD_TROUBLED;
+}
+
 void recovd_conn_fail(struct ptlrpc_connection *conn)
 {
         struct recovd_data *rd = &conn->c_recovd_data;
@@ -137,7 +155,6 @@ void recovd_conn_fixed(struct ptlrpc_connection *conn)
         EXIT;
 }
 
-
 static int recovd_check_event(struct recovd_obd *recovd)
 {
         int rc = 0;
index 0b3a1b8..a4fb6c7 100644 (file)
 #include <linux/lustre_net.h>
 #include <linux/obd.h>
 
-static int ptlrpc_reconnect(struct ptlrpc_connection *conn) 
+int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc)
 {
-        struct list_head *tmp;
-        int rc = -EINVAL;
-
-        /* XXX c_lock semantics! */
-        conn->c_level = LUSTRE_CONN_CON;
-
-        /* XXX this code MUST be shared with class_obd_connect! */
-        list_for_each(tmp, &conn->c_imports) {
-                struct obd_import *imp = list_entry(tmp, struct obd_import,
-                                                    imp_chain);
-                struct obd_device *obd = imp->imp_obd;
-                struct client_obd *cli = &obd->u.cli;
-                int rq_opc = (obd->obd_type->typ_ops->o_brw)
-                        ? OST_CONNECT : MDS_CONNECT;
-                int size[] = { sizeof(cli->cl_target_uuid),
-                               sizeof(obd->obd_uuid) };
-                char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
-                struct lustre_handle old_hdl;
-                struct ptlrpc_request *request; 
-                struct obd_export *ldlmexp;
-
-                LASSERT(imp->imp_connection == conn);
-                request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
-                request->rq_level = LUSTRE_CONN_NEW;
-                request->rq_replen = lustre_msg_size(0, NULL);
-                /*
-                 * This address is the export that represents our client-side
-                 * LDLM service (for ASTs).  We should only have one on this
-                 * list, so we just grab the first one.
-                 *
-                 * XXX tear down export, call class_obd_connect!
-                 */
-                ldlmexp = list_entry(obd->obd_exports.next, struct obd_export,
-                                     exp_obd_chain);
-                request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
-                request->rq_reqmsg->cookie = ldlmexp->exp_cookie;
-                rc = ptlrpc_queue_wait(request);
-                rc = ptlrpc_check_status(request, rc);
-                if (rc) {
-                        CERROR("cannot connect to %s@%s: rc = %d\n",
-                               cli->cl_target_uuid, conn->c_remote_uuid, rc);
-                        ptlrpc_free_req(request);
-                        GOTO(out_disc, rc = -ENOTCONN);
-                }
+        struct obd_device *obd = imp->imp_obd;
+        struct client_obd *cli = &obd->u.cli;
+        int size[] = { sizeof(cli->cl_target_uuid), sizeof(obd->obd_uuid) };
+        char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
+        struct ptlrpc_connection *conn = imp->imp_connection;
+        struct lustre_handle old_hdl;
+        struct ptlrpc_request *request; 
+        struct obd_export *ldlmexp;
+        int rc;
+
+        request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
+        request->rq_level = LUSTRE_CONN_NEW;
+        request->rq_replen = lustre_msg_size(0, NULL);
+        /*
 
-                old_hdl = imp->imp_handle;
-                imp->imp_handle.addr = request->rq_repmsg->addr;
-                imp->imp_handle.cookie = request->rq_repmsg->cookie;
-                CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n",
-                       cli->cl_target_uuid, conn->c_remote_uuid,
-                       imp->imp_handle.addr, imp->imp_handle.cookie,
-                       old_hdl.addr, old_hdl.cookie);
-                ptlrpc_req_finished(request);
+         * This address is the export that represents our client-side LDLM
+         * service (for ASTs).  We should only have one on this list, so we
+         * just grab the first one.
+         *
+         * XXX tear down export, call class_obd_connect?
+         */
+        ldlmexp = list_entry(obd->obd_exports.next, struct obd_export,
+                             exp_obd_chain);
+        request->rq_reqmsg->addr = (__u64)(unsigned long)ldlmexp;
+        request->rq_reqmsg->cookie = ldlmexp->exp_cookie;
+        rc = ptlrpc_queue_wait(request);
+        rc = ptlrpc_check_status(request, rc);
+        if (rc) {
+                CERROR("cannot connect to %s@%s: rc = %d\n",
+                       cli->cl_target_uuid, conn->c_remote_uuid, rc);
+                ptlrpc_free_req(request);
+                GOTO(out_disc, rc = -ENOTCONN);
         }
-        conn->c_level = LUSTRE_CONN_RECOVD;
+        
+        old_hdl = imp->imp_handle;
+        imp->imp_handle.addr = request->rq_repmsg->addr;
+        imp->imp_handle.cookie = request->rq_repmsg->cookie;
+        CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n",
+               cli->cl_target_uuid, conn->c_remote_uuid,
+               imp->imp_handle.addr, imp->imp_handle.cookie,
+               old_hdl.addr, old_hdl.cookie);
+        ptlrpc_req_finished(request);
 
  out_disc:
         return rc;
@@ -121,19 +108,13 @@ int ptlrpc_run_recovery_upcall(struct ptlrpc_connection *conn)
         RETURN(0);
 }
 
-int ptlrpc_reconnect_and_replay(struct ptlrpc_connection *conn)
+int ptlrpc_replay(struct ptlrpc_connection *conn)
 {
         int rc = 0;
         struct list_head *tmp, *pos;
         struct ptlrpc_request *req;
         ENTRY;
 
-        /* 1. reconnect */
-        rc = ptlrpc_reconnect(conn);
-        if (rc)
-                RETURN(rc);
-        
-        /* 2. walk the request list */
         spin_lock(&conn->c_lock);
 
         CDEBUG(D_HA, "connection %p to %s has last_xid "LPD64"\n",
index 98be8ca..cb89c76 100644 (file)
@@ -247,7 +247,8 @@ EXPORT_SYMBOL(lustre_msg_buf);
 
 /* recover.c */
 EXPORT_SYMBOL(ptlrpc_run_recovery_upcall);
-EXPORT_SYMBOL(ptlrpc_reconnect_and_replay);
+EXPORT_SYMBOL(ptlrpc_reconnect_import);
+EXPORT_SYMBOL(ptlrpc_replay);
 
 MODULE_AUTHOR("Cluster File Systems, Inc <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Request Processor v1.0");