but this code hasn't been tested much.
+#ifndef _LUSTRE_HA_H
+#define _LUSTRE_HA_H
#define MGR_STOPPING 1
#define MGR_RUNNING 2
#define MGR_STOPPED 4
#define MGR_KILLED 8
#define MGR_EVENT 16
-#define MGR_RECOVERING 32
+#define MGR_WORKING 32
#define MGR_SIGNAL 64
struct lustre_ha_mgr {
wait_queue_head_t mgr_waitq;
wait_queue_head_t mgr_ctl_waitq;
spinlock_t mgr_lock;
+ time_t mgr_waketime;
+ struct list_head mgr_connections_lh; /* connections managed by the mgr */
+ struct list_head mgr_troubled_lh; /* connections in trouble */
};
struct lustre_ha_thread {
struct lustre_ha_mgr *mgr;
struct obd_device *dev;
};
+
+int llite_ha_cleanup(struct lustre_ha_mgr *mgr);
+struct lustre_ha_mgr *llite_ha_setup(void);
+void llite_ha_conn_fail(struct ptlrpc_client *cli);
+void llite_ha_conn_manage(struct lustre_ha_mgr *mgr, struct ptlrpc_client *cli);
+
+
+#endif
#include <linux/lustre_net.h>
#include <linux/lustre_mds.h>
+#include <linux/lustre_ha.h>
#include <linux/obdo.h>
#define LUSTRE_LITE_NAME "llite"
struct ptlrpc_client ll_mds_client;
struct lustre_peer ll_mds_peer;
struct ptlrpc_client ll_ost_client;
+ struct lustre_ha_mgr *ll_ha_mgr;
struct lustre_peer ll_ost_peer;
};
#include <linux/obd_class.h>
#include <portals/p30.h>
#include <linux/lustre_idl.h>
+#include <linux/lustre_ha.h>
/* FOO_REQUEST_PORTAL is for incoming requests on the FOO
* FOO_REPLY_PORTAL is for incoming replies on the FOO
__u32 cli_epoch; /* changes when peer changes */
__u32 cli_bootcount; /* peer's boot count */
struct semaphore cli_rpc_sem;
+ struct list_head cli_ha_item;
+ struct lustre_ha_mgr *cli_ha_mgr;
};
/* These do double-duty in rq_type and rq_flags */
char *rq_bulkbuf;
int rq_bulklen;
+ time_t rq_time;
void * rq_reply_handle;
wait_queue_head_t rq_wait_for_rep;
void ptlrpc_link_svc_me(struct ptlrpc_service *service, int i);
/* rpc/client.c */
-void ptlrpc_init_client(int dev, int req_portal, int rep_portal,
- struct ptlrpc_client *cl);
+void ptlrpc_init_client(struct lustre_ha_mgr *mgr, int req_portal, int rep_portal,
+ struct ptlrpc_client *cl);
int ptlrpc_connect_client(char *uuid, struct ptlrpc_client *cl,
struct lustre_peer *peer);
int ptlrpc_queue_wait(struct ptlrpc_client *cl, struct ptlrpc_request *req);
if (_IOC_TYPE(cmd) != IOC_LDLM_TYPE || _IOC_NR(cmd) < IOC_LDLM_MIN_NR ||
_IOC_NR(cmd) > IOC_LDLM_MAX_NR) {
CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+ _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+ EXIT;
+ return -EINVAL;
+ }
+
+#if 0
+ /* XX phil -- put the peer back in */
+
+ ptlrpc_init_client(NULL, LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, &cl);
+ err = ptlrpc_connect_client("ldlm", &cl, NULL);
+#endif
+ if (err) {
+ CERROR("cannot create client\n");
RETURN(-EINVAL);
}
*
* linux/mds/handler.c
*
- * Lustre Metadata Server (mds) request handler
+ * Lustre High Availability Daemon
*
* Copyright (C) 2001, 2002 Cluster File Systems, Inc.
*
*
* by Peter Braam <braam@clusterfs.com>
*
- * This server is single threaded at present (but can easily be multi threaded)
- *
*/
#define EXPORT_SYMTAB
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/locks.h>
+#include <linux/kmod.h>
#include <linux/quotaops.h>
#include <asm/unistd.h>
#include <asm/uaccess.h>
static int lustre_ha_check_event(struct lustre_ha_mgr *mgr)
{
+ int rc = 0;
+ ENTRY;
+
+ spin_lock(&mgr->mgr_lock);
+ if (!(mgr->mgr_flags & MGR_WORKING) &&
+ !list_empty(&mgr->mgr_troubled_lh) ) {
+ mgr->mgr_flags |= MGR_WORKING;
+ mgr->mgr_waketime = CURRENT_TIME;
+ schedule_timeout(4*HZ);
+ CERROR("connection in trouble\n");
+ rc = 1;
+ }
+
+ if (!mgr->mgr_flags & MGR_WORKING &&
+ CURRENT_TIME >= mgr->mgr_waketime ) {
+ CERROR("woken up once more\n");
+ mgr->mgr_waketime = CURRENT_TIME;
+ schedule_timeout(4*HZ);
+ rc = 1;
+ }
+
+ if (mgr->mgr_flags & MGR_STOPPING) {
+ CERROR("ha mgr stopping\n");
+ rc = 1;
+ }
-
- return 1;
+ spin_unlock(&mgr->mgr_lock);
+ RETURN(rc);
}
+static int llite_ha_upcall(void)
+{
+ char *argv[2];
+ char *envp[3];
+
+ argv[0] = "/usr/src/obd/utils/ha_assist.sh";
+ argv[1] = NULL;
+
+ envp [0] = "HOME=/";
+ envp [1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+ envp [2] = NULL;
+
+ return call_usermodehelper(argv[0], argv, envp);
+}
+
static int llite_ha_main(void *arg)
{
struct lustre_ha_thread *data = (struct lustre_ha_thread *)arg;
wait_event_interruptible(mgr->mgr_waitq,
lustre_ha_check_event(mgr));
- spin_lock(&mgr->mgr_lock);
- schedule_timeout(5 * HZ);
- if (mgr->mgr_flags & MGR_SIGNAL) {
- spin_unlock(&mgr->mgr_lock);
- EXIT;
- break;
- }
-
if (mgr->mgr_flags & MGR_STOPPING) {
spin_unlock(&mgr->mgr_lock);
+ CERROR("lustre_hamgr quitting\n");
EXIT;
break;
}
- if (mgr->mgr_flags & MGR_EVENT) {
- mgr->mgr_flags = MGR_RUNNING;
-
- /* FIXME: If we move to an event-driven model,
- * we should put the request on the stack of
- * mds_handle instead. */
- CERROR("MGR event\n");
- continue;
- }
-
- CERROR("unknown break in service");
+ spin_lock(&mgr->mgr_lock);
+ CERROR("lustre_hamgr woken up\n");
+ llite_ha_upcall();
+ schedule_timeout(5 * HZ);
spin_unlock(&mgr->mgr_lock);
- EXIT;
- break;
}
mgr->mgr_thread = NULL;
mgr->mgr_flags = MGR_STOPPED;
wake_up(&mgr->mgr_ctl_waitq);
CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
- return 0;
+ RETURN(0);
}
-
-int llite_ha_setup(struct obd_device *dev, struct lustre_ha_mgr *mgr,
- char *name)
+struct lustre_ha_mgr *llite_ha_setup(void)
{
struct lustre_ha_thread d;
+ struct lustre_ha_mgr *mgr;
int rc;
ENTRY;
- d.dev = dev;
+ PORTAL_ALLOC(mgr, sizeof(*mgr));
+ if (!mgr) {
+ CERROR("out of memory\n");
+ LBUG();
+ RETURN(NULL);
+ }
+ INIT_LIST_HEAD(&mgr->mgr_connections_lh);
+ INIT_LIST_HEAD(&mgr->mgr_troubled_lh);
+ spin_lock_init(&mgr->mgr_lock);
+
d.mgr = mgr;
- d.name = name;
+ d.name = "lustre_hamgr";
init_waitqueue_head(&mgr->mgr_waitq);
-
init_waitqueue_head(&mgr->mgr_ctl_waitq);
+
rc = kernel_thread(llite_ha_main, (void *) &d,
CLONE_VM | CLONE_FS | CLONE_FILES);
if (rc < 0) {
CERROR("cannot start thread\n");
- RETURN(-EINVAL);
+ RETURN(NULL);
}
wait_event(mgr->mgr_ctl_waitq, mgr->mgr_flags & MGR_RUNNING);
- RETURN(0);
+ RETURN(mgr);
}
wake_up(&mgr->mgr_waitq);
wait_event_interruptible(mgr->mgr_ctl_waitq,
(mgr->mgr_flags & MGR_STOPPED));
- return 0;
+ PORTAL_FREE(mgr, sizeof(*mgr));
+ RETURN(0);
}
extern struct address_space_operations ll_dir_aops;
struct super_operations ll_super_operations;
+static struct lustre_ha_mgr *llite_ha_mgr;
+
static char *ll_read_opt(const char *opt, char *data)
{
char *value;
}
/* the first parameter should become an mds device no */
- ptlrpc_init_client(-1, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
+ ptlrpc_init_client(llite_ha_mgr, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
&sbi->ll_mds_client);
err = ptlrpc_connect_client("mds", &sbi->ll_mds_client,
&sbi->ll_mds_peer);
sbi->ll_super = sb;
sbi->ll_rootino = 2;
+ sbi->ll_ha_mgr = llite_ha_mgr;
sb->s_maxbytes = 1LL << 36;
sb->s_blocksize = PAGE_SIZE;
if (ll_file_data_slab == NULL)
return -ENOMEM;
+ llite_ha_mgr = llite_ha_setup();
return register_filesystem(&lustre_lite_fs_type);
}
static void __exit exit_lustre_lite(void)
{
- kmem_cache_destroy(ll_file_data_slab);
unregister_filesystem(&lustre_lite_fs_type);
+ llite_ha_cleanup(llite_ha_mgr);
+ kmem_cache_destroy(ll_file_data_slab);
obd_unregister_type(LUSTRE_LITE_NAME);
}
RETURN(-EINVAL);
}
- ptlrpc_init_client(-1, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &cl);
+ ptlrpc_init_client(NULL, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &cl);
err = ptlrpc_connect_client("mds", &cl, &peer);
if (err) {
CERROR("cannot create client\n");
struct osc_obd *osc = &obddev->u.osc;
struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf;
int rc;
- int dev = data->ioc_dev;
ENTRY;
OBD_ALLOC(osc->osc_client, sizeof(*osc->osc_client));
if (osc->osc_client == NULL)
RETURN(-ENOMEM);
- ptlrpc_init_client(dev, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
+ ptlrpc_init_client(NULL, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
osc->osc_client);
+
rc = ptlrpc_connect_client("ost", osc->osc_client, &osc->osc_peer);
if (rc == 0)
#include <linux/obd_class.h>
#include <linux/lustre_net.h>
-void ptlrpc_init_client(int dev, int req_portal, int rep_portal,
- struct ptlrpc_client *cl)
+void llite_ha_conn_manage(struct lustre_ha_mgr *mgr, struct ptlrpc_client *cli)
+{
+ ENTRY;
+ cli->cli_ha_mgr = mgr;
+ spin_lock(&mgr->mgr_lock);
+ list_add(&cli->cli_ha_item, &mgr->mgr_connections_lh);
+ spin_unlock(&mgr->mgr_lock);
+ EXIT;
+}
+
+void llite_ha_conn_fail(struct ptlrpc_client *cli)
+{
+ ENTRY;
+ spin_lock(&cli->cli_ha_mgr->mgr_lock);
+ list_del(&cli->cli_ha_item);
+ list_add(&cli->cli_ha_item, &cli->cli_ha_mgr->mgr_troubled_lh);
+ spin_unlock(&cli->cli_ha_mgr->mgr_lock);
+ wake_up(&cli->cli_ha_mgr->mgr_waitq);
+ EXIT;
+}
+
+void ptlrpc_init_client(struct lustre_ha_mgr *mgr, int req_portal, int rep_portal,
+ struct ptlrpc_client *cl)
{
memset(cl, 0, sizeof(*cl));
spin_lock_init(&cl->cli_lock);
+ cl->cli_ha_mgr = mgr;
+ if (mgr)
+ llite_ha_conn_manage(mgr, cl);
cl->cli_xid = 1;
cl->cli_generation = 1;
cl->cli_epoch = 1;
CERROR("cannot pack request %d\n", rc);
RETURN(NULL);
}
+ request->rq_time = CURRENT_TIME;
request->rq_type = PTL_RPC_REQUEST;
memcpy(&request->rq_peer, peer, sizeof(*peer));
request->rq_reqmsg = (struct lustre_msg *)request->rq_reqbuf;
request->rq_reqmsg->opc = HTON__u32(opcode);
request->rq_reqmsg->xid = HTON__u32(request->rq_xid);
request->rq_reqmsg->type = HTON__u32(request->rq_type);
+ request->rq_client = cl;
+ request->rq_req_portal = cl->cli_request_portal;
+ request->rq_reply_portal = cl->cli_reply_portal;
RETURN(request);
}
{
int rc = 0;
+ schedule_timeout(3 * HZ); /* 3 second timeout */
if (req->rq_repbuf != NULL) {
req->rq_flags = PTL_RPC_REPLY;
GOTO(out, rc = 1);
}
+ if (CURRENT_TIME - req->rq_time >= 3) {
+ CERROR("-- REQ TIMEOUT --\n");
+ if (req->rq_client && req->rq_client->cli_ha_mgr)
+ llite_ha_conn_fail(req->rq_client);
+ return 0;
+ }
+
if (sigismember(&(current->pending.signal), SIGKILL) ||
sigismember(&(current->pending.signal), SIGTERM) ||
sigismember(&(current->pending.signal), SIGINT)) {
init_waitqueue_head(&req->rq_wait_for_rep);
- req->rq_client = cl;
- req->rq_req_portal = cl->cli_request_portal;
- req->rq_reply_portal = cl->cli_reply_portal;
rc = ptl_send_rpc(req, cl);
if (rc) {
CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);