Whamcloud - gitweb
- More Peter's additions for the ha manager. This doesn't seem to break much -
authorbraam <braam>
Wed, 10 Apr 2002 19:25:43 +0000 (19:25 +0000)
committerbraam <braam>
Wed, 10 Apr 2002 19:25:43 +0000 (19:25 +0000)
  but this code hasn't been tested much.

lustre/include/linux/lustre_ha.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_net.h
lustre/ldlm/ldlm_lockd.c
lustre/llite/llite_ha.c
lustre/llite/super.c
lustre/mdc/mdc_request.c
lustre/osc/osc_request.c
lustre/ptlrpc/client.c

index 715055b..652a64c 100644 (file)
@@ -1,10 +1,12 @@
+#ifndef _LUSTRE_HA_H
+#define _LUSTRE_HA_H
 
 #define MGR_STOPPING   1
 #define MGR_RUNNING    2
 #define MGR_STOPPED    4
 #define MGR_KILLED     8
 #define MGR_EVENT      16
-#define MGR_RECOVERING 32
+#define MGR_WORKING    32
 #define MGR_SIGNAL     64
 
 struct lustre_ha_mgr {
@@ -13,6 +15,9 @@ struct lustre_ha_mgr {
         wait_queue_head_t   mgr_waitq;
         wait_queue_head_t   mgr_ctl_waitq;
         spinlock_t          mgr_lock;
+        time_t              mgr_waketime;
+        struct list_head    mgr_connections_lh;  /* connections managed by the mgr */
+        struct list_head    mgr_troubled_lh;  /* connections in trouble */
 };
 
 struct lustre_ha_thread { 
@@ -20,3 +25,11 @@ struct lustre_ha_thread {
         struct lustre_ha_mgr *mgr; 
         struct obd_device    *dev;
 }; 
+
+int llite_ha_cleanup(struct lustre_ha_mgr *mgr);
+struct lustre_ha_mgr *llite_ha_setup(void);
+void llite_ha_conn_fail(struct ptlrpc_client *cli);
+void llite_ha_conn_manage(struct lustre_ha_mgr *mgr, struct ptlrpc_client *cli);
+
+
+#endif
index 41a222b..df66e70 100644 (file)
@@ -16,6 +16,7 @@
 
 #include <linux/lustre_net.h>
 #include <linux/lustre_mds.h>
+#include <linux/lustre_ha.h>
 #include <linux/obdo.h>
 
 #define LUSTRE_LITE_NAME "llite"
@@ -45,6 +46,7 @@ struct ll_sb_info {
         struct ptlrpc_client     ll_mds_client;
         struct lustre_peer       ll_mds_peer;
         struct ptlrpc_client     ll_ost_client;
+        struct lustre_ha_mgr    *ll_ha_mgr;
         struct lustre_peer       ll_ost_peer;
 };
 
index bd37fce..78bd73b 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/obd_class.h>
 #include <portals/p30.h>
 #include <linux/lustre_idl.h>
+#include <linux/lustre_ha.h>
 
 /* FOO_REQUEST_PORTAL is for incoming requests on the FOO
  * FOO_REPLY_PORTAL   is for incoming replies on the FOO
@@ -79,6 +80,8 @@ struct ptlrpc_client {
         __u32 cli_epoch;       /* changes when peer changes */
         __u32 cli_bootcount;   /* peer's boot count */ 
         struct semaphore cli_rpc_sem;
+        struct list_head cli_ha_item; 
+        struct lustre_ha_mgr *cli_ha_mgr;
 };
 
 /* These do double-duty in rq_type and rq_flags */
@@ -112,6 +115,7 @@ struct ptlrpc_request {
         char *rq_bulkbuf;
         int rq_bulklen;
 
+        time_t rq_time;
         void * rq_reply_handle;
         wait_queue_head_t rq_wait_for_rep;
 
@@ -199,8 +203,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, struct ptlrpc_client *cl);
 void ptlrpc_link_svc_me(struct ptlrpc_service *service, int i);
 
 /* rpc/client.c */
-void ptlrpc_init_client(int dev, int req_portal, int rep_portal,
-                        struct ptlrpc_client *cl);
+void ptlrpc_init_client(struct lustre_ha_mgr *mgr, int req_portal, int rep_portal,
+                       struct ptlrpc_client *cl);
 int ptlrpc_connect_client(char *uuid, struct ptlrpc_client *cl,
                           struct lustre_peer *peer);
 int ptlrpc_queue_wait(struct ptlrpc_client *cl, struct ptlrpc_request *req);
index 7995590..8662db0 100644 (file)
@@ -237,7 +237,19 @@ static int ldlm_iocontrol(int cmd, struct obd_conn *conn, int len, void *karg,
         if (_IOC_TYPE(cmd) != IOC_LDLM_TYPE || _IOC_NR(cmd) < IOC_LDLM_MIN_NR ||
             _IOC_NR(cmd) > IOC_LDLM_MAX_NR) {
                 CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
-                       _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+                                _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+                EXIT;
+                return -EINVAL;
+        }
+
+#if 0
+        /* XX phil -- put the peer back in */
+
+        ptlrpc_init_client(NULL, LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, &cl);
+        err = ptlrpc_connect_client("ldlm", &cl, NULL);
+#endif
+        if (err) {
+                CERROR("cannot create client\n");
                 RETURN(-EINVAL);
         }
 
index 2657d33..6a86554 100644 (file)
@@ -3,7 +3,7 @@
  *
  *  linux/mds/handler.c
  *
- *  Lustre Metadata Server (mds) request handler
+ *  Lustre High Availability Daemon
  *
  *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
  *
@@ -12,8 +12,6 @@
  *
  *  by Peter Braam <braam@clusterfs.com>
  *
- *  This server is single threaded at present (but can easily be multi threaded)
- *
  */
 
 #define EXPORT_SYMTAB
@@ -23,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/stat.h>
 #include <linux/locks.h>
+#include <linux/kmod.h>
 #include <linux/quotaops.h>
 #include <asm/unistd.h>
 #include <asm/uaccess.h>
 
 static int lustre_ha_check_event(struct lustre_ha_mgr *mgr)
 {
+        int rc = 0; 
+        ENTRY;
+
+        spin_lock(&mgr->mgr_lock); 
+        if (!(mgr->mgr_flags & MGR_WORKING) && 
+            !list_empty(&mgr->mgr_troubled_lh) ) {
+                mgr->mgr_flags |= MGR_WORKING;
+                mgr->mgr_waketime = CURRENT_TIME; 
+                schedule_timeout(4*HZ); 
+                CERROR("connection in trouble\n"); 
+                rc = 1;
+        }
+
+        if (!mgr->mgr_flags & MGR_WORKING &&
+            CURRENT_TIME >= mgr->mgr_waketime ) { 
+                CERROR("woken up once more\n");
+                mgr->mgr_waketime = CURRENT_TIME; 
+                schedule_timeout(4*HZ); 
+                rc = 1;
+        }
+
+        if (mgr->mgr_flags & MGR_STOPPING) { 
+                CERROR("ha mgr stopping\n");
+                rc = 1;
+        }
 
-        
-        return 1;
+        spin_unlock(&mgr->mgr_lock); 
+        RETURN(rc);
 }
 
 
+static int llite_ha_upcall(void)
+{
+        char *argv[2];
+        char *envp[3];
+
+        argv[0] = "/usr/src/obd/utils/ha_assist.sh";
+        argv[1] = NULL;
+
+        envp [0] = "HOME=/";
+        envp [1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+        envp [2] = NULL;
+
+        return call_usermodehelper(argv[0], argv, envp);
+}
+
 static int llite_ha_main(void *arg)
 {
         struct lustre_ha_thread *data = (struct lustre_ha_thread *)arg;
@@ -68,67 +107,59 @@ static int llite_ha_main(void *arg)
                 wait_event_interruptible(mgr->mgr_waitq, 
                                          lustre_ha_check_event(mgr));
 
-                spin_lock(&mgr->mgr_lock);
-                schedule_timeout(5 * HZ); 
-                if (mgr->mgr_flags & MGR_SIGNAL) {
-                        spin_unlock(&mgr->mgr_lock);
-                        EXIT;
-                        break;
-                }
-
                 if (mgr->mgr_flags & MGR_STOPPING) {
                         spin_unlock(&mgr->mgr_lock);
+                        CERROR("lustre_hamgr quitting\n"); 
                         EXIT;
                         break;
                 }
 
-                if (mgr->mgr_flags & MGR_EVENT) {
-                        mgr->mgr_flags = MGR_RUNNING;
-
-                        /* FIXME: If we move to an event-driven model,
-                         * we should put the request on the stack of
-                         * mds_handle instead. */
-                        CERROR("MGR event\n"); 
-                        continue;
-                }
-
-                CERROR("unknown break in service");
+                spin_lock(&mgr->mgr_lock);
+                CERROR("lustre_hamgr woken up\n"); 
+                llite_ha_upcall();
+                schedule_timeout(5 * HZ);
                 spin_unlock(&mgr->mgr_lock);
-                EXIT;
-                break;
         }
 
         mgr->mgr_thread = NULL;
         mgr->mgr_flags = MGR_STOPPED;
         wake_up(&mgr->mgr_ctl_waitq);
         CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
-        return 0;
+        RETURN(0);
 }
 
-
-int llite_ha_setup(struct obd_device *dev, struct lustre_ha_mgr *mgr,
-                   char *name)
+struct lustre_ha_mgr *llite_ha_setup(void)
 {
         struct lustre_ha_thread d;
+        struct lustre_ha_mgr *mgr;
         int rc;
         ENTRY;
 
-        d.dev = dev;
+        PORTAL_ALLOC(mgr, sizeof(*mgr));
+        if (!mgr) { 
+                CERROR("out of memory\n");
+                LBUG();
+                RETURN(NULL); 
+        }
+        INIT_LIST_HEAD(&mgr->mgr_connections_lh);
+        INIT_LIST_HEAD(&mgr->mgr_troubled_lh);
+        spin_lock_init(&mgr->mgr_lock); 
+
         d.mgr = mgr;
-        d.name = name;
+        d.name = "lustre_hamgr";
 
         init_waitqueue_head(&mgr->mgr_waitq);
-
         init_waitqueue_head(&mgr->mgr_ctl_waitq);
+
         rc = kernel_thread(llite_ha_main, (void *) &d,
                            CLONE_VM | CLONE_FS | CLONE_FILES);
         if (rc < 0) {
                 CERROR("cannot start thread\n");
-                RETURN(-EINVAL);
+                RETURN(NULL);
         }
         wait_event(mgr->mgr_ctl_waitq, mgr->mgr_flags & MGR_RUNNING);
 
-        RETURN(0);
+        RETURN(mgr);
 }
 
 
@@ -139,5 +170,6 @@ int llite_ha_cleanup(struct lustre_ha_mgr *mgr)
         wake_up(&mgr->mgr_waitq);
         wait_event_interruptible(mgr->mgr_ctl_waitq,
                                  (mgr->mgr_flags & MGR_STOPPED));
-        return 0;
+        PORTAL_FREE(mgr, sizeof(*mgr));
+        RETURN(0);
 }
index 50bb08f..a897d38 100644 (file)
@@ -41,6 +41,8 @@ extern struct address_space_operations ll_aops;
 extern struct address_space_operations ll_dir_aops;
 struct super_operations ll_super_operations;
 
+static struct lustre_ha_mgr *llite_ha_mgr;
+
 static char *ll_read_opt(const char *opt, char *data)
 {
         char *value;
@@ -134,7 +136,7 @@ static struct super_block * ll_read_super(struct super_block *sb,
         }
 
         /* the first parameter should become an mds device no */
-        ptlrpc_init_client(-1, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
+        ptlrpc_init_client(llite_ha_mgr, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
                            &sbi->ll_mds_client);
         err = ptlrpc_connect_client("mds", &sbi->ll_mds_client,
                                     &sbi->ll_mds_peer);
@@ -145,6 +147,7 @@ static struct super_block * ll_read_super(struct super_block *sb,
 
         sbi->ll_super = sb;
         sbi->ll_rootino = 2;
+        sbi->ll_ha_mgr = llite_ha_mgr;
 
         sb->s_maxbytes = 1LL << 36;
         sb->s_blocksize = PAGE_SIZE;
@@ -419,13 +422,15 @@ static int __init init_lustre_lite(void)
         if (ll_file_data_slab == NULL)
                 return -ENOMEM;
 
+        llite_ha_mgr = llite_ha_setup();
         return register_filesystem(&lustre_lite_fs_type);
 }
 
 static void __exit exit_lustre_lite(void)
 {
-        kmem_cache_destroy(ll_file_data_slab);
         unregister_filesystem(&lustre_lite_fs_type);
+        llite_ha_cleanup(llite_ha_mgr);
+        kmem_cache_destroy(ll_file_data_slab);
         obd_unregister_type(LUSTRE_LITE_NAME);
 }
 
index b2ee0ce..55630b9 100644 (file)
@@ -240,7 +240,7 @@ static int request_ioctl(struct inode *inode, struct file *file,
                 RETURN(-EINVAL);
         }
 
-        ptlrpc_init_client(-1, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &cl);
+        ptlrpc_init_client(NULL, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &cl);
         err = ptlrpc_connect_client("mds", &cl, &peer);
         if (err) {
                 CERROR("cannot create client\n");
index 2928d3c..4d2007a 100644 (file)
@@ -583,15 +583,15 @@ static int osc_setup(struct obd_device *obddev, obd_count len,
         struct osc_obd *osc = &obddev->u.osc;
         struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf;
         int rc;
-        int dev = data->ioc_dev;
         ENTRY;
 
         OBD_ALLOC(osc->osc_client, sizeof(*osc->osc_client));
         if (osc->osc_client == NULL)
                 RETURN(-ENOMEM);
 
-        ptlrpc_init_client(dev, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
+        ptlrpc_init_client(NULL, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
                                    osc->osc_client);
+
         rc = ptlrpc_connect_client("ost", osc->osc_client, &osc->osc_peer);
 
         if (rc == 0)
index 73d1324..5a6a5c1 100644 (file)
 #include <linux/obd_class.h>
 #include <linux/lustre_net.h>
 
-void ptlrpc_init_client(int dev, int req_portal, int rep_portal,
-                        struct ptlrpc_client *cl)
+void llite_ha_conn_manage(struct lustre_ha_mgr *mgr, struct ptlrpc_client *cli)
+{
+        ENTRY;
+        cli->cli_ha_mgr = mgr;
+        spin_lock(&mgr->mgr_lock);
+        list_add(&cli->cli_ha_item, &mgr->mgr_connections_lh); 
+        spin_unlock(&mgr->mgr_lock); 
+        EXIT;
+}
+
+void llite_ha_conn_fail(struct ptlrpc_client *cli)
+{
+        ENTRY;
+        spin_lock(&cli->cli_ha_mgr->mgr_lock);
+        list_del(&cli->cli_ha_item);
+        list_add(&cli->cli_ha_item, &cli->cli_ha_mgr->mgr_troubled_lh); 
+        spin_unlock(&cli->cli_ha_mgr->mgr_lock); 
+        wake_up(&cli->cli_ha_mgr->mgr_waitq);
+        EXIT;
+}
+
+void ptlrpc_init_client(struct lustre_ha_mgr *mgr, int req_portal, int rep_portal,
+                          struct ptlrpc_client *cl)
 {
         memset(cl, 0, sizeof(*cl));
         spin_lock_init(&cl->cli_lock);
+        cl->cli_ha_mgr = mgr;
+        if (mgr)
+                llite_ha_conn_manage(mgr, cl);
         cl->cli_xid = 1;
         cl->cli_generation = 1;
         cl->cli_epoch = 1;
@@ -100,12 +124,16 @@ struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
                 CERROR("cannot pack request %d\n", rc);
                 RETURN(NULL);
         }
+        request->rq_time = CURRENT_TIME;
         request->rq_type = PTL_RPC_REQUEST;
         memcpy(&request->rq_peer, peer, sizeof(*peer));
         request->rq_reqmsg = (struct lustre_msg *)request->rq_reqbuf;
         request->rq_reqmsg->opc = HTON__u32(opcode);
         request->rq_reqmsg->xid = HTON__u32(request->rq_xid);
         request->rq_reqmsg->type = HTON__u32(request->rq_type);
+        request->rq_client = cl;
+        request->rq_req_portal = cl->cli_request_portal;
+        request->rq_reply_portal = cl->cli_reply_portal;
 
         RETURN(request);
 }
@@ -124,11 +152,19 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req)
 {
         int rc = 0;
 
+        schedule_timeout(3 * HZ);  /* 3 second timeout */
         if (req->rq_repbuf != NULL) {
                 req->rq_flags = PTL_RPC_REPLY;
                 GOTO(out, rc = 1);
         }
 
+        if (CURRENT_TIME - req->rq_time >= 3) { 
+                CERROR("-- REQ TIMEOUT --\n"); 
+                if (req->rq_client && req->rq_client->cli_ha_mgr)
+                        llite_ha_conn_fail(req->rq_client); 
+                return 0;
+        }
+
         if (sigismember(&(current->pending.signal), SIGKILL) ||
             sigismember(&(current->pending.signal), SIGTERM) ||
             sigismember(&(current->pending.signal), SIGINT)) {
@@ -195,9 +231,6 @@ int ptlrpc_queue_wait(struct ptlrpc_client *cl, struct ptlrpc_request *req)
 
         init_waitqueue_head(&req->rq_wait_for_rep);
 
-        req->rq_client = cl;
-        req->rq_req_portal = cl->cli_request_portal;
-        req->rq_reply_portal = cl->cli_reply_portal;
         rc = ptl_send_rpc(req, cl);
         if (rc) {
                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);