Whamcloud - gitweb
Avoid allocating 'event' and 'request' on the stack (saving some 350 bytes),
[fs/lustre-release.git] / lustre / ptlrpc / recovd.c
index a663a44..3f10733 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  linux/mds/handler.c
+ *  obd/rpc/recovd.c
  *
  *  Lustre High Availability Daemon
  *
  *
  */
 
-#define EXPORT_SYMTAB
-
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/locks.h>
-#include <linux/kmod.h>
-#include <linux/quotaops.h>
-#include <asm/unistd.h>
-#include <asm/uaccess.h>
-
 #define DEBUG_SUBSYSTEM S_RPC
 
 #include <linux/lustre_lite.h>
 #include <linux/lustre_ha.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-
-struct connmgr_obd *ptlrpc_connmgr; 
-
-void connmgr_cli_manage(struct connmgr_obd *mgr, struct ptlrpc_client *cli)
-{
-        ENTRY;
-        cli->cli_ha_mgr = mgr;
-        spin_lock(&mgr->mgr_lock);
-        list_add(&cli->cli_ha_item, &mgr->mgr_connections_lh); 
-        spin_unlock(&mgr->mgr_lock); 
-        EXIT;
-}
+#include <linux/obd_support.h>
 
-
-void connmgr_cli_fail(struct ptlrpc_client *cli)
+void recovd_conn_manage(struct ptlrpc_connection *conn,
+                        struct recovd_obd *recovd, ptlrpc_recovery_cb_t recover)
 {
+        struct recovd_data *rd = &conn->c_recovd_data;
         ENTRY;
-        spin_lock(&cli->cli_ha_mgr->mgr_lock); 
-        cli->cli_ha_mgr->mgr_flags |= SVC_HA_EVENT;
-        list_del(&cli->cli_ha_item);
-        list_add(&cli->cli_ha_item, &cli->cli_ha_mgr->mgr_troubled_lh); 
-        spin_unlock(&cli->cli_ha_mgr->mgr_lock); 
-        wake_up(&cli->cli_ha_mgr->mgr_waitq);
-        EXIT;
-}
-
-int connmgr_upcall(void)
-{
-        char *argv[2];
-        char *envp[3];
-
-        argv[0] = "/usr/src/obd/utils/ha_assist.sh";
-        argv[1] = NULL;
 
-        envp [0] = "HOME=/";
-        envp [1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-        envp [2] = NULL;
-
-        return call_usermodehelper(argv[0], argv, envp);
-}
+        rd->rd_recovd = recovd;
+        rd->rd_recover = recover;
 
-static void connmgr_unpack_body(struct ptlrpc_request *req)
-{
-        struct connmgr_body *b = lustre_msg_buf(req->rq_repmsg, 0);
-        if (b == NULL)
-                LBUG();
+        spin_lock(&recovd->recovd_lock);
+        list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
+        spin_unlock(&recovd->recovd_lock);
 
-        b->generation = NTOH__u32(b->generation);
+        EXIT;
 }
 
-int connmgr_connect(struct connmgr_obd *mgr, 
-                    struct ptlrpc_connection *conn)
+void recovd_conn_fail(struct ptlrpc_connection *conn)
 {
-        struct ptlrpc_request *req;
-        struct ptlrpc_client *cl;
-        struct connmgr_body *body;
-        int rc, size = sizeof(*body);
+        struct recovd_data *rd = &conn->c_recovd_data;
+        struct recovd_obd *recovd = rd->rd_recovd;
         ENTRY;
 
-        if (!mgr) { 
-                CERROR("no manager\n"); 
-                LBUG();
+        if (!recovd) {
+                CERROR("no recovd for connection %p\n", conn);
+                return;
         }
-        cl = mgr->mgr_client;
-
-        req = ptlrpc_prep_req(cl, conn, CONNMGR_CONNECT, 1, &size, NULL);
-        if (!req)
-                GOTO(out, rc = -ENOMEM);
-
-        body = lustre_msg_buf(req->rq_reqmsg, 0);
-        body->generation = HTON__u32(conn->c_generation);
 
-        req->rq_replen = lustre_msg_size(1, &size);
+        spin_lock(&recovd->recovd_lock);
+        list_del(&rd->rd_managed_chain);
+        list_add_tail(&rd->rd_managed_chain, &recovd->recovd_troubled_items);
+        spin_unlock(&recovd->recovd_lock);
 
-        rc = ptlrpc_queue_wait(req);
-        rc = ptlrpc_check_status(req, rc);
-
-        if (!rc) {
-                connmgr_unpack_body(req);
-                body = lustre_msg_buf(req->rq_repmsg, 0);
-                CDEBUG(D_NET, "mode: %o\n", body->generation);
-        }
+        wake_up(&recovd->recovd_waitq);
 
         EXIT;
- out:
-        return rc;
 }
 
-
-int connmgr_handle_connect(struct ptlrpc_request *req)
+/* this function must be called with conn->c_lock held */
+void recovd_conn_fixed(struct ptlrpc_connection *conn)
 {
-        struct connmgr_body *body;
-        int rc, size = sizeof(*body);
+        struct recovd_data *rd = &conn->c_recovd_data;
         ENTRY;
 
-        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc) {
-                CERROR("connmgr: out of memory\n");
-                req->rq_status = -ENOMEM;
-                RETURN(0);
-        }
-
-        body = lustre_msg_buf(req->rq_reqmsg, 0);
-        connmgr_unpack_body(req); 
+        list_del(&rd->rd_managed_chain);
+        list_add(&rd->rd_managed_chain, &rd->rd_recovd->recovd_managed_items);
 
-        printk("incoming generation %d\n", body->generation);
-        body = lustre_msg_buf(req->rq_repmsg, 0);
-        body->generation = 4711;
-        RETURN(0);
+        EXIT;
 }
 
-int connmgr_handle(struct obd_device *dev,
-                   struct ptlrpc_service *svc,
-                   struct ptlrpc_request *req)
+
+static int recovd_check_event(struct recovd_obd *recovd)
 {
-        int rc;
+        int rc = 0;
         ENTRY;
 
-        rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
-        if (rc) { 
-                CERROR("lustre_mds: Invalid request\n");
-                GOTO(out, rc);
-        }
+        spin_lock(&recovd->recovd_lock);
 
-        if (req->rq_reqmsg->type != PTL_RPC_REQUEST) {
-                CERROR("lustre_mds: wrong packet type sent %d\n",
-                       req->rq_reqmsg->type);
-                GOTO(out, rc = -EINVAL);
+        if (recovd->recovd_phase == RECOVD_IDLE &&
+            !list_empty(&recovd->recovd_troubled_items)) {
+                GOTO(out, rc = 1);
         }
 
-        switch (req->rq_reqmsg->opc) {
-        case CONNMGR_CONNECT:
-                CDEBUG(D_INODE, "getattr\n");
-                OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NET, 0);
-                rc = connmgr_handle_connect(req);
-                break;
+        if (recovd->recovd_flags & RECOVD_STOPPING)
+                GOTO(out, rc = 1);
 
-        default:
-                rc = ptlrpc_error(svc, req);
-                RETURN(rc);
+        if (recovd->recovd_flags & RECOVD_FAILED) {
+                LASSERT(recovd->recovd_phase != RECOVD_IDLE && 
+                        recovd->recovd_current_rd);
+                GOTO(out, rc = 1);
         }
 
-        EXIT;
-out:
-        if (rc) {
-                ptlrpc_error(svc, req);
-        } else {
-                CDEBUG(D_NET, "sending reply\n");
-                ptlrpc_reply(svc, req);
-        }
+        if (recovd->recovd_phase == recovd->recovd_next_phase)
+                GOTO(out, rc = 1);
 
-        return 0;
+ out:
+        spin_unlock(&recovd->recovd_lock);
+        RETURN(rc);
 }
 
-
-static int recovd_check_event(struct connmgr_obd *mgr)
+static int recovd_handle_event(struct recovd_obd *recovd)
 {
-        int rc = 0; 
+        struct recovd_data *rd;
+        int rc;
         ENTRY;
 
-        spin_lock(&mgr->mgr_lock);
-
-        if (!(mgr->mgr_flags & MGR_WORKING) && 
-            !list_empty(&mgr->mgr_troubled_lh) ) {
-
-                CERROR("connection in trouble - state: WORKING, upcall\n"); 
-                mgr->mgr_flags = MGR_WORKING;
+        if (recovd->recovd_flags & RECOVD_FAILED) {
 
-                mgr->mgr_waketime = CURRENT_TIME; 
-                mgr->mgr_timeout = 5 * HZ;
-                schedule_timeout(mgr->mgr_timeout); 
+                LASSERT(recovd->recovd_phase != RECOVD_IDLE && 
+                        recovd->recovd_current_rd);
 
-        }
-
-        if (mgr->mgr_flags & MGR_WORKING &&
-            CURRENT_TIME <= mgr->mgr_waketime + mgr->mgr_timeout ) { 
-                CERROR("WORKING: new event\n");
+                rd = recovd->recovd_current_rd;
+        cb_failed:
+                CERROR("recovery FAILED for rd %p (conn %p), recovering\n",
+                       rd, class_rd2conn(rd));
 
-                mgr->mgr_waketime = CURRENT_TIME; 
-                schedule_timeout(mgr->mgr_timeout); 
-        }
+                list_add(&rd->rd_managed_chain, &recovd->recovd_managed_items);
+                spin_unlock(&recovd->recovd_lock);
+                rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_FAILURE);
+                spin_lock(&recovd->recovd_lock);
+                recovd->recovd_phase = RECOVD_IDLE;
+                recovd->recovd_next_phase = RECOVD_PREPARING;
+                
+                recovd->recovd_flags &= ~RECOVD_FAILED;
 
-        if (mgr->mgr_flags & MGR_STOPPING) { 
-                CERROR("ha mgr stopping\n");
-                rc = 1;
+                RETURN(1);
         }
 
-        spin_unlock(&mgr->mgr_lock); 
-        RETURN(rc);
-}
-
-int recovd_handle_event(struct connmgr_obd *mgr)
-{
-
-        spin_lock(&mgr->mgr_lock);
-
-        if (!(mgr->mgr_flags & MGR_WORKING) && 
-            !list_empty(&mgr->mgr_troubled_lh) ) {
+        switch (recovd->recovd_phase) {
+            case RECOVD_IDLE:
+                if (recovd->recovd_current_rd ||
+                    list_empty(&recovd->recovd_troubled_items))
+                        break;
+                rd = list_entry(recovd->recovd_troubled_items.next,
+                                struct recovd_data, rd_managed_chain);
+                
+                list_del(&rd->rd_managed_chain);
+                if (!rd->rd_recover)
+                        LBUG();
+
+                CERROR("starting recovery for rd %p (conn %p)\n",
+                       rd, class_rd2conn(rd));
+                recovd->recovd_current_rd = rd;
+                recovd->recovd_flags &= ~RECOVD_FAILED;
+                recovd->recovd_phase = RECOVD_PREPARING;
+
+                spin_unlock(&recovd->recovd_lock);
+                rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_PREPARE);
+                spin_lock(&recovd->recovd_lock);
+                if (rc)
+                        goto cb_failed;
+                
+                recovd->recovd_next_phase = RECOVD_PREPARED;
+                break;
 
-                CERROR("connection in trouble - state: WORKING, upcall\n"); 
-                mgr->mgr_flags = MGR_WORKING;
+            case RECOVD_PREPARED:
+                rd = recovd->recovd_current_rd;
+                recovd->recovd_phase = RECOVD_RECOVERING;
 
+                CERROR("recovery prepared for rd %p (conn %p), recovering\n",
+                       rd, class_rd2conn(rd));
 
-                connmgr_upcall();
-                mgr->mgr_waketime = CURRENT_TIME; 
-                mgr->mgr_timeout = 5 * HZ;
-                schedule_timeout(mgr->mgr_timeout); 
+                spin_unlock(&recovd->recovd_lock);
+                rc = rd->rd_recover(rd, PTLRPC_RECOVD_PHASE_RECOVER);
+                spin_lock(&recovd->recovd_lock);
+                if (rc)
+                        goto cb_failed;
+                
+                recovd->recovd_next_phase = RECOVD_RECOVERED;
+                break;
 
-        }
+            case RECOVD_RECOVERED:
+                rd = recovd->recovd_current_rd;
+                recovd->recovd_phase = RECOVD_IDLE;
+                recovd->recovd_next_phase = RECOVD_PREPARING;
 
-        if (mgr->mgr_flags & MGR_WORKING &&
-            CURRENT_TIME <= mgr->mgr_waketime + mgr->mgr_timeout ) { 
-                CERROR("WORKING: new event\n");
+                CERROR("recovery complete for rd %p (conn %p), recovering\n",
+                       rd, class_rd2conn(rd));
+                break;
 
-                mgr->mgr_waketime = CURRENT_TIME; 
-                schedule_timeout(mgr->mgr_timeout); 
+            default:
+                break;
         }
 
-        spin_unlock(&mgr->mgr_lock);
-        return 0;
+        RETURN(0);
 }
 
 static int recovd_main(void *arg)
 {
-        struct connmgr_thread *data = (struct connmgr_thread *)arg;
-        struct connmgr_obd *mgr = data->mgr;
+        struct recovd_obd *recovd = (struct recovd_obd *)arg;
 
         ENTRY;
 
@@ -268,71 +199,82 @@ static int recovd_main(void *arg)
         recalc_sigpending(current);
         spin_unlock_irq(&current->sigmask_lock);
 
-        sprintf(current->comm, data->name);
+        sprintf(current->comm, "lustre_recovd");
+        unlock_kernel();
 
         /* Record that the  thread is running */
-        mgr->mgr_thread = current;
-        mgr->mgr_flags = MGR_RUNNING;
-        wake_up(&mgr->mgr_ctl_waitq);
+        recovd->recovd_thread = current;
+        recovd->recovd_flags = RECOVD_IDLE;
+        wake_up(&recovd->recovd_ctl_waitq);
 
         /* And now, loop forever on requests */
         while (1) {
-                wait_event_interruptible(mgr->mgr_waitq, 
-                                         recovd_check_event(mgr));
+                wait_event(recovd->recovd_waitq, recovd_check_event(recovd));
 
-                spin_lock(&mgr->mgr_lock);
-                if (mgr->mgr_flags & MGR_STOPPING) {
-                        spin_unlock(&mgr->mgr_lock);
-                        CERROR("lustre_hamgr quitting\n"); 
+                spin_lock(&recovd->recovd_lock);
+
+                if (recovd->recovd_flags & RECOVD_STOPPING) {
+                        spin_unlock(&recovd->recovd_lock);
+                        CERROR("lustre_recovd stopping\n");
                         EXIT;
                         break;
                 }
 
-                recovd_handle_event(mgr); 
-                spin_unlock(&mgr->mgr_lock);
+                recovd_handle_event(recovd);
+                spin_unlock(&recovd->recovd_lock);
         }
 
-        mgr->mgr_thread = NULL;
-        mgr->mgr_flags = MGR_STOPPED;
-        wake_up(&mgr->mgr_ctl_waitq);
+        recovd->recovd_thread = NULL;
+        recovd->recovd_flags = RECOVD_STOPPED;
+        wake_up(&recovd->recovd_ctl_waitq);
         CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
         RETURN(0);
 }
 
-int recovd_setup(struct connmgr_obd *mgr)
+int recovd_setup(struct recovd_obd *recovd)
 {
-        struct connmgr_thread d;
         int rc;
-        ENTRY;
+        extern void (*class_signal_connection_failure)
+                (struct ptlrpc_connection *);
 
-        INIT_LIST_HEAD(&mgr->mgr_connections_lh);
-        INIT_LIST_HEAD(&mgr->mgr_troubled_lh);
-        spin_lock_init(&mgr->mgr_lock); 
+        ENTRY;
 
-        d.mgr = mgr;
-        d.name = "lustre_recovd";
+        INIT_LIST_HEAD(&recovd->recovd_managed_items);
+        INIT_LIST_HEAD(&recovd->recovd_troubled_items);
+        spin_lock_init(&recovd->recovd_lock);
 
-        init_waitqueue_head(&mgr->mgr_waitq);
-        init_waitqueue_head(&mgr->mgr_ctl_waitq);
+        init_waitqueue_head(&recovd->recovd_waitq);
+        init_waitqueue_head(&recovd->recovd_recovery_waitq);
+        init_waitqueue_head(&recovd->recovd_ctl_waitq);
 
-        rc = kernel_thread(recovd_main, (void *) &d,
+        recovd->recovd_next_phase = RECOVD_PREPARING;
+        
+        rc = kernel_thread(recovd_main, (void *)recovd,
                            CLONE_VM | CLONE_FS | CLONE_FILES);
         if (rc < 0) {
                 CERROR("cannot start thread\n");
                 RETURN(-EINVAL);
         }
-        wait_event(mgr->mgr_ctl_waitq, mgr->mgr_flags & MGR_RUNNING);
+        wait_event(recovd->recovd_ctl_waitq,
+                   recovd->recovd_phase == RECOVD_IDLE);
 
-        RETURN(0); 
-}
+        /* exported and called by obdclass timeout handlers */
+        class_signal_connection_failure = recovd_conn_fail;
+        ptlrpc_recovd = recovd;
 
+        RETURN(0);
+}
 
-int recovd_cleanup(struct connmgr_obd *mgr)
+int recovd_cleanup(struct recovd_obd *recovd)
 {
-        mgr->mgr_flags = MGR_STOPPING;
+        spin_lock(&recovd->recovd_lock);
+        recovd->recovd_flags = RECOVD_STOPPING;
+        wake_up(&recovd->recovd_waitq);
+        spin_unlock(&recovd->recovd_lock);
 
-        wake_up(&mgr->mgr_waitq);
-        wait_event_interruptible(mgr->mgr_ctl_waitq,
-                                 (mgr->mgr_flags & MGR_STOPPED));
+        wait_event(recovd->recovd_ctl_waitq,
+                   (recovd->recovd_flags & RECOVD_STOPPED));
         RETURN(0);
 }
+
+struct recovd_obd *ptlrpc_recovd;