Whamcloud - gitweb
Landing of b_recovery (at last).
[fs/lustre-release.git] / lustre / ptlrpc / rpc.c
index 659845b..7263ac0 100644 (file)
  */
 
 #define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_RPC
 
 #include <linux/module.h>
+#include <linux/obd_support.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_ha.h>
+#include <linux/lustre_net.h>
+#include <linux/init.h>
+#include <linux/lprocfs_status.h>
 
-#define DEBUG_SUBSYSTEM S_RPC
 
-#include <linux/lustre_ha.h>
 
 extern int ptlrpc_init_portals(void);
 extern void ptlrpc_exit_portals(void);
 
+extern struct lprocfs_vars status_var_nm_1[];
+extern struct lprocfs_vars status_class_var[];
+
 int connmgr_setup(struct obd_device *obddev, obd_count len, void *buf)
 {
         struct recovd_obd *recovd = &obddev->u.recovd;
         int err;
         ENTRY;
 
+        MOD_INC_USE_COUNT;
         memset(recovd, 0, sizeof(*recovd));
 
-        OBD_ALLOC(recovd->recovd_client, sizeof(*recovd->recovd_client));
-        if (!recovd)
-                RETURN(-ENOMEM);
-
         err = recovd_setup(recovd);
-        if (err)
-                GOTO(err_free, err);
-
-        recovd->recovd_service =
-                ptlrpc_init_svc(128 * 1024,CONNMGR_REQUEST_PORTAL,
-                                CONNMGR_REPLY_PORTAL, "self", connmgr_handle);
-        if (!recovd->recovd_service) {
-                CERROR("failed to start service\n");
-                GOTO(err_recovd, err = -EINVAL);
-        }
-
-        ptlrpc_init_client(NULL, NULL, CONNMGR_REQUEST_PORTAL, 
-                           CONNMGR_REPLY_PORTAL, recovd->recovd_client);
-
-        err = ptlrpc_start_thread(obddev, recovd->recovd_service, "lustre_connmgr");
         if (err) {
-                CERROR("cannot start thread\n");
-                GOTO(err_svc, err);
+                MOD_DEC_USE_COUNT;
+                RETURN(err);
         }
 
-        MOD_INC_USE_COUNT;
-        ptlrpc_connmgr = recovd;
         RETURN(0);
-
- err_svc: 
-        rpc_unregister_service(recovd->recovd_service);
- err_recovd: 
-        recovd_cleanup(recovd); 
- err_free:
-        if (recovd->recovd_client)
-                OBD_FREE(recovd->recovd_client, sizeof(*recovd->recovd_client));
-        RETURN(err);
 }
 
 int connmgr_cleanup(struct obd_device *dev)
@@ -83,48 +63,128 @@ int connmgr_cleanup(struct obd_device *dev)
         struct recovd_obd *recovd = &dev->u.recovd;
         int err;
 
-        err = recovd_cleanup(recovd); 
-        if (err) 
+        err = recovd_cleanup(recovd);
+        if (err)
                 LBUG();
 
-        ptlrpc_stop_thread(recovd->recovd_service);
-        rpc_unregister_service(recovd->recovd_service);
-        if (!list_empty(&recovd->recovd_service->srv_reqs)) {
-                // XXX reply with errors and clean up
-                CERROR("Request list not empty!\n");
-        }
-
-        OBD_FREE(recovd->recovd_service, sizeof(*recovd->recovd_service));
-        ptlrpc_cleanup_client(recovd->recovd_client);
-        OBD_FREE(recovd->recovd_client, sizeof(*recovd->recovd_client));
         MOD_DEC_USE_COUNT;
         RETURN(0);
 }
 
-
-int connmgr_iocontrol(int cmd, struct obd_conn *conn, int len, void *karg,
-                         void *uarg)
+int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg,
+                      void *uarg)
 {
-        struct recovd_obd *recovd = &conn->oc_dev->u.recovd;
+        struct ptlrpc_connection *conn = NULL;
+        struct obd_device *obd = class_conn2obd(hdl);
+        struct recovd_obd *recovd = &obd->u.recovd;
+        struct obd_ioctl_data *data = karg;
+        struct list_head *tmp;
+        int rc = 0;
 
         ENTRY;
-        if (cmd == OBD_RECOVD_NEWCONN) { 
-                spin_lock(&recovd->recovd_lock);
-                recovd->recovd_flags |= RECOVD_UPCALL_ANSWER;
-                recovd->recovd_wakeup_flag = 1;
-                wake_up(&recovd->recovd_waitq);
+
+        if (cmd != OBD_IOC_RECOVD_NEWCONN && cmd != OBD_IOC_RECOVD_FAILCONN)
+                RETURN(-EINVAL); /* XXX ENOSYS? */
+        
+        /* Find the connection that's been rebuilt or has failed. */
+        spin_lock(&recovd->recovd_lock);
+        list_for_each(tmp, &recovd->recovd_troubled_items) {
+                conn = list_entry(tmp, struct ptlrpc_connection,
+                                  c_recovd_data.rd_managed_chain);
+
+                LASSERT(conn->c_recovd_data.rd_recovd == recovd); /* sanity */
+
+                if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+                        break;
+                conn = NULL;
+        }
+
+        if (!conn) {
+                if (cmd == OBD_IOC_RECOVD_NEWCONN)
+                        GOTO(out, rc = -EINVAL);
+                /* XXX macroize/inline and share with loop above */
+                list_for_each(tmp, &recovd->recovd_managed_items) {
+                        conn = list_entry(tmp, struct ptlrpc_connection,
+                                          c_recovd_data.rd_managed_chain);
+                        
+                        LASSERT(conn->c_recovd_data.rd_recovd == recovd);
+                        
+                        if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
+                                break;
+                        conn = NULL;
+                }
+                if (!conn)
+                        GOTO(out, rc = -EINVAL);
+        }
+
+        if (cmd == OBD_IOC_RECOVD_FAILCONN) {
                 spin_unlock(&recovd->recovd_lock);
-                EXIT;
+                recovd_conn_fail(conn);
+                spin_lock(&recovd->recovd_lock);
+
+                /* Jump straight to the "failed" phase of recovery. */
+                conn->c_recovd_data.rd_phase = RD_FAILED;
+                goto out;
         }
-        return 0;
+
+
+        /* else (NEWCONN) */
+        spin_lock(&conn->c_lock);
+
+        /* whatever happens, reset the INVALID flag */
+        conn->c_flags &= ~CONN_INVALID;
+
+        /* XXX is this a good check?  should we allow readdressing of
+         * XXX conns that aren't in recovery?
+         */
+        if (conn->c_recovd_data.rd_phase != RD_PREPARING) {
+                spin_unlock(&conn->c_lock);
+                GOTO(out, rc = -EALREADY);
+        }
+
+        if (data->ioc_inllen2) {
+                CERROR("conn %p UUID change %s -> %s\n",
+                       conn, conn->c_remote_uuid, data->ioc_inlbuf2);
+                strcpy(conn->c_remote_uuid, data->ioc_inlbuf2);
+        } else {
+                CERROR("conn %p UUID %s reconnected\n", conn,
+                       conn->c_remote_uuid);
+        }
+        ptlrpc_readdress_connection(conn, conn->c_remote_uuid);
+        spin_unlock(&conn->c_lock);
+        
+        conn->c_recovd_data.rd_phase = RD_PREPARED;
+        wake_up(&recovd->recovd_waitq);
+ out:
+        spin_unlock(&recovd->recovd_lock);
+        RETURN(rc);
 }
 
+static int connmgr_connect(struct lustre_handle *conn, struct obd_device *src,
+                           obd_uuid_t cluuid, struct recovd_obd *recovd,
+                           ptlrpc_recovery_cb_t recover)
+{
+        return class_connect(conn, src, cluuid);
+}
 
+int connmgr_attach(struct obd_device *dev, obd_count len, void *data)
+{
+        return lprocfs_reg_obd(dev, status_var_nm_1, dev);
+}
+
+int conmgr_detach(struct obd_device *dev)
+{
+        return lprocfs_dereg_obd(dev);
+}
 /* use obd ops to offer management infrastructure */
 static struct obd_ops recovd_obd_ops = {
+        o_attach:      connmgr_attach,
+        o_detach:      conmgr_detach,
         o_setup:       connmgr_setup,
         o_cleanup:     connmgr_cleanup,
-        o_iocontrol:     connmgr_iocontrol,
+        o_iocontrol:   connmgr_iocontrol,
+        o_connect:     connmgr_connect,
+        o_disconnect:  class_disconnect
 };
 
 static int __init ptlrpc_init(void)
@@ -134,20 +194,88 @@ static int __init ptlrpc_init(void)
         if (rc) 
                 RETURN(rc);
         ptlrpc_init_connection();
-        obd_register_type(&recovd_obd_ops, LUSTRE_HA_NAME);
+        rc = class_register_type(&recovd_obd_ops, status_class_var,
+                                 LUSTRE_HA_NAME);
+        if (rc) 
+                RETURN(rc);
+        ptlrpc_put_connection_superhack = ptlrpc_put_connection;
         return 0;
 }
 
 static void __exit ptlrpc_exit(void)
 {
-        obd_unregister_type(LUSTRE_HA_NAME);
+        class_unregister_type(LUSTRE_HA_NAME);
         ptlrpc_exit_portals();
         ptlrpc_cleanup_connection();
 }
 
-MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+/* recovd.c */
+EXPORT_SYMBOL(ptlrpc_recovd);
+EXPORT_SYMBOL(recovd_conn_fail);
+EXPORT_SYMBOL(recovd_conn_manage);
+EXPORT_SYMBOL(recovd_conn_fixed);
+EXPORT_SYMBOL(recovd_setup);
+EXPORT_SYMBOL(recovd_cleanup);
+
+/* connection.c */
+EXPORT_SYMBOL(ptlrpc_readdress_connection);
+EXPORT_SYMBOL(ptlrpc_get_connection);
+EXPORT_SYMBOL(ptlrpc_put_connection);
+EXPORT_SYMBOL(ptlrpc_connection_addref);
+EXPORT_SYMBOL(ptlrpc_init_connection);
+EXPORT_SYMBOL(ptlrpc_cleanup_connection);
+
+/* niobuf.c */
+EXPORT_SYMBOL(ptlrpc_send_bulk);
+EXPORT_SYMBOL(ptlrpc_register_bulk);
+EXPORT_SYMBOL(ptlrpc_abort_bulk);
+EXPORT_SYMBOL(ptlrpc_reply);
+EXPORT_SYMBOL(ptlrpc_error);
+EXPORT_SYMBOL(ptlrpc_resend_req);
+EXPORT_SYMBOL(ptl_send_rpc);
+EXPORT_SYMBOL(ptlrpc_link_svc_me);
+EXPORT_SYMBOL(obd_brw_set_free);
+EXPORT_SYMBOL(obd_brw_set_new);
+EXPORT_SYMBOL(obd_brw_set_add);
+
+/* client.c */
+EXPORT_SYMBOL(ptlrpc_init_client);
+EXPORT_SYMBOL(ptlrpc_cleanup_client);
+EXPORT_SYMBOL(ptlrpc_req_to_uuid);
+EXPORT_SYMBOL(ptlrpc_uuid_to_connection);
+EXPORT_SYMBOL(ptlrpc_queue_wait);
+EXPORT_SYMBOL(ptlrpc_continue_req);
+EXPORT_SYMBOL(ptlrpc_replay_req);
+EXPORT_SYMBOL(ptlrpc_restart_req);
+EXPORT_SYMBOL(ptlrpc_prep_req);
+EXPORT_SYMBOL(ptlrpc_free_req);
+EXPORT_SYMBOL(ptlrpc_req_finished);
+EXPORT_SYMBOL(ptlrpc_prep_bulk);
+EXPORT_SYMBOL(ptlrpc_free_bulk);
+EXPORT_SYMBOL(ptlrpc_prep_bulk_page);
+EXPORT_SYMBOL(ptlrpc_free_bulk_page);
+EXPORT_SYMBOL(ll_brw_sync_wait);
+
+/* service.c */
+EXPORT_SYMBOL(ptlrpc_init_svc);
+EXPORT_SYMBOL(ptlrpc_stop_all_threads);
+EXPORT_SYMBOL(ptlrpc_start_thread);
+EXPORT_SYMBOL(ptlrpc_unregister_service);
+
+/* pack_generic.c */
+EXPORT_SYMBOL(lustre_pack_msg);
+EXPORT_SYMBOL(lustre_msg_size);
+EXPORT_SYMBOL(lustre_unpack_msg);
+EXPORT_SYMBOL(lustre_msg_buf);
+
+/* recover.c */
+EXPORT_SYMBOL(ptlrpc_run_recovery_upcall);
+EXPORT_SYMBOL(ptlrpc_reconnect_import);
+EXPORT_SYMBOL(ptlrpc_replay);
+
+MODULE_AUTHOR("Cluster File Systems, Inc <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Request Processor v1.0");
-MODULE_LICENSE("GPL"); 
+MODULE_LICENSE("GPL");
 
 module_init(ptlrpc_init);
 module_exit(ptlrpc_exit);