X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fptlrpc%2Frpc.c;h=25d644953d56d2cb361703e27094a427389fbf39;hb=762e5b7708660eb86c614b6ac1c5aaa6bf7acc4c;hp=742e4607801103c34dd7b83d9e217279cd937709;hpb=cc4bd2873eab77210c69df32be466dae78479ad1;p=fs%2Flustre-release.git

diff --git a/lustre/ptlrpc/rpc.c b/lustre/ptlrpc/rpc.c
index 742e460..25d6449 100644
--- a/lustre/ptlrpc/rpc.c
+++ b/lustre/ptlrpc/rpc.c
@@ -28,6 +28,7 @@
 #include <linux/obd_class.h>
 #include <linux/lustre_lib.h>
 #include <linux/lustre_ha.h>
+#include <linux/lustre_net.h>
 #include <linux/init.h>
 
 extern int ptlrpc_init_portals(void);
@@ -76,10 +77,10 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
 
         ENTRY;
 
-        if (cmd != OBD_IOC_RECOVD_NEWCONN)
-                RETURN(0);
+        if (cmd != OBD_IOC_RECOVD_NEWCONN && cmd != OBD_IOC_RECOVD_FAILCONN)
+                RETURN(-EINVAL); /* XXX ENOSYS? */
         
-        /* Find the connection that's been rebuilt. */
+        /* Find the connection that's been rebuilt or has failed. */
         spin_lock(&recovd->recovd_lock);
         list_for_each(tmp, &recovd->recovd_troubled_items) {
                 conn = list_entry(tmp, struct ptlrpc_connection,
@@ -92,13 +93,49 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
                 conn = NULL;
         }
 
-        if (!conn)
-                GOTO(out, rc = -EINVAL);
+        if (!conn) {
+                if (cmd == OBD_IOC_RECOVD_NEWCONN)
+                        GOTO(out, rc = -EINVAL);
+                /* XXX macroize/inline and share with loop above */
+                list_for_each(tmp, &recovd->recovd_managed_items) {
+                        conn = list_entry(tmp, struct ptlrpc_connection,
+                                          c_recovd_data.rd_managed_chain);
+                        
+                        LASSERT(conn->c_recovd_data.rd_recovd == recovd);
+                        
+                        if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+                                break;
+                        conn = NULL;
+                }
+                if (!conn)
+                        GOTO(out, rc = -EINVAL);
+        }
 
-        if (conn->c_recovd_data.rd_phase != RECOVD_PREPARING)
-                GOTO(out, rc = -EALREADY);
+        if (cmd == OBD_IOC_RECOVD_FAILCONN) {
+                spin_unlock(&recovd->recovd_lock);
+                recovd_conn_fail(conn);
+                spin_lock(&recovd->recovd_lock);
+
+                /* Jump straight to the "failed" phase of recovery. */
+                conn->c_recovd_data.rd_phase = RD_FAILED;
+                goto out;
+        }
 
+
+        /* else (NEWCONN) */
         spin_lock(&conn->c_lock);
+
+        /* whatever happens, reset the INVALID flag */
+        conn->c_flags &= ~CONN_INVALID;
+
+        /* XXX is this a good check?  should we allow readdressing of
+         * XXX conns that aren't in recovery?
+         */
+        if (conn->c_recovd_data.rd_phase != RD_PREPARING) {
+                spin_unlock(&conn->c_lock);
+                GOTO(out, rc = -EALREADY);
+        }
+
         if (data->ioc_inllen2) {
                 CERROR("conn %p UUID change %s -> %s\n",
                        conn, conn->c_remote_uuid, data->ioc_inlbuf2);
@@ -110,20 +147,26 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
         ptlrpc_readdress_connection(conn, conn->c_remote_uuid);
         spin_unlock(&conn->c_lock);
         
-        conn->c_recovd_data.rd_phase = RECOVD_PREPARED;
+        conn->c_recovd_data.rd_phase = RD_PREPARED;
         wake_up(&recovd->recovd_waitq);
  out:
         spin_unlock(&recovd->recovd_lock);
         RETURN(rc);
 }
 
+static int connmgr_connect(struct lustre_handle *conn, struct obd_device *src,
+                           obd_uuid_t cluuid, struct recovd_obd *recovd,
+                           ptlrpc_recovery_cb_t recover)
+{
+        return class_connect(conn, src, cluuid);
+}
 
 /* use obd ops to offer management infrastructure */
 static struct obd_ops recovd_obd_ops = {
         o_setup:       connmgr_setup,
         o_cleanup:     connmgr_cleanup,
         o_iocontrol:   connmgr_iocontrol,
-        o_connect:     class_connect,
+        o_connect:     connmgr_connect,
         o_disconnect:  class_disconnect
 };
 
@@ -201,8 +244,9 @@ EXPORT_SYMBOL(lustre_msg_size);
 EXPORT_SYMBOL(lustre_unpack_msg);
 EXPORT_SYMBOL(lustre_msg_buf);
 
-EXPORT_SYMBOL(ll_recover);
-
+/* recover.c */
+EXPORT_SYMBOL(ptlrpc_run_recovery_upcall);
+EXPORT_SYMBOL(ptlrpc_reconnect_and_replay);
 
 MODULE_AUTHOR("Cluster File Systems, Inc <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Request Processor v1.0");