From: braam Date: Wed, 10 Apr 2002 19:25:43 +0000 (+0000) Subject: - More Peter's additions for the ha manager. This doesn't seem to break much - X-Git-Tag: v1_7_100~5777 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=9c856b9cd52de9edb028917044f5126bf1a2fabb;p=fs%2Flustre-release.git - More Peter's additions for the ha manager. This doesn't seem to break much - but this code hasn't been tested much. --- diff --git a/lustre/include/linux/lustre_ha.h b/lustre/include/linux/lustre_ha.h index 715055b..652a64c 100644 --- a/lustre/include/linux/lustre_ha.h +++ b/lustre/include/linux/lustre_ha.h @@ -1,10 +1,12 @@ +#ifndef _LUSTRE_HA_H +#define _LUSTRE_HA_H #define MGR_STOPPING 1 #define MGR_RUNNING 2 #define MGR_STOPPED 4 #define MGR_KILLED 8 #define MGR_EVENT 16 -#define MGR_RECOVERING 32 +#define MGR_WORKING 32 #define MGR_SIGNAL 64 struct lustre_ha_mgr { @@ -13,6 +15,9 @@ struct lustre_ha_mgr { wait_queue_head_t mgr_waitq; wait_queue_head_t mgr_ctl_waitq; spinlock_t mgr_lock; + time_t mgr_waketime; + struct list_head mgr_connections_lh; /* connections managed by the mgr */ + struct list_head mgr_troubled_lh; /* connections in trouble */ }; struct lustre_ha_thread { @@ -20,3 +25,11 @@ struct lustre_ha_thread { struct lustre_ha_mgr *mgr; struct obd_device *dev; }; + +int llite_ha_cleanup(struct lustre_ha_mgr *mgr); +struct lustre_ha_mgr *llite_ha_setup(void); +void llite_ha_conn_fail(struct ptlrpc_client *cli); +void llite_ha_conn_manage(struct lustre_ha_mgr *mgr, struct ptlrpc_client *cli); + + +#endif diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 41a222b..df66e70 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -16,6 +16,7 @@ #include #include +#include #include #define LUSTRE_LITE_NAME "llite" @@ -45,6 +46,7 @@ struct ll_sb_info { struct ptlrpc_client ll_mds_client; struct lustre_peer ll_mds_peer; struct ptlrpc_client ll_ost_client; + struct lustre_ha_mgr *ll_ha_mgr; struct lustre_peer ll_ost_peer; }; diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index bd37fce..78bd73b 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -28,6 +28,7 @@ #include #include #include +#include /* FOO_REQUEST_PORTAL is for incoming requests on the FOO * FOO_REPLY_PORTAL is for incoming replies on the FOO @@ -79,6 +80,8 @@ struct ptlrpc_client { __u32 cli_epoch; /* changes when peer changes */ __u32 cli_bootcount; /* peer's boot count */ struct semaphore cli_rpc_sem; + struct list_head cli_ha_item; + struct lustre_ha_mgr *cli_ha_mgr; }; /* These do double-duty in rq_type and rq_flags */ @@ -112,6 +115,7 @@ struct ptlrpc_request { char *rq_bulkbuf; int rq_bulklen; + time_t rq_time; void * rq_reply_handle; wait_queue_head_t rq_wait_for_rep; @@ -199,8 +203,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, struct ptlrpc_client *cl); void ptlrpc_link_svc_me(struct ptlrpc_service *service, int i); /* rpc/client.c */ -void ptlrpc_init_client(int dev, int req_portal, int rep_portal, - struct ptlrpc_client *cl); +void ptlrpc_init_client(struct lustre_ha_mgr *mgr, int req_portal, int rep_portal, + struct ptlrpc_client *cl); int ptlrpc_connect_client(char *uuid, struct ptlrpc_client *cl, struct lustre_peer *peer); int ptlrpc_queue_wait(struct ptlrpc_client *cl, struct ptlrpc_request *req); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 7995590..8662db0 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -237,7 +237,19 @@ static int ldlm_iocontrol(int cmd, struct obd_conn *conn, int len, void *karg, if (_IOC_TYPE(cmd) != IOC_LDLM_TYPE || _IOC_NR(cmd) < IOC_LDLM_MIN_NR || _IOC_NR(cmd) > IOC_LDLM_MAX_NR) { CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); + _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); + EXIT; + return -EINVAL; + } + +#if 0 + /* XX phil -- put the peer back in */ + + ptlrpc_init_client(NULL, LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, &cl); + err = ptlrpc_connect_client("ldlm", &cl, NULL); +#endif + if (err) { + CERROR("cannot create client\n"); RETURN(-EINVAL); } diff --git a/lustre/llite/llite_ha.c b/lustre/llite/llite_ha.c index 2657d33..6a86554 100644 --- a/lustre/llite/llite_ha.c +++ b/lustre/llite/llite_ha.c @@ -3,7 +3,7 @@ * * linux/mds/handler.c * - * Lustre Metadata Server (mds) request handler + * Lustre High Availability Daemon * * Copyright (C) 2001, 2002 Cluster File Systems, Inc. * @@ -12,8 +12,6 @@ * * by Peter Braam * - * This server is single threaded at present (but can easily be multi threaded) - * */ #define EXPORT_SYMTAB @@ -23,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -36,12 +35,52 @@ static int lustre_ha_check_event(struct lustre_ha_mgr *mgr) { + int rc = 0; + ENTRY; + + spin_lock(&mgr->mgr_lock); + if (!(mgr->mgr_flags & MGR_WORKING) && + !list_empty(&mgr->mgr_troubled_lh) ) { + mgr->mgr_flags |= MGR_WORKING; + mgr->mgr_waketime = CURRENT_TIME; + schedule_timeout(4*HZ); + CERROR("connection in trouble\n"); + rc = 1; + } + + if (!mgr->mgr_flags & MGR_WORKING && + CURRENT_TIME >= mgr->mgr_waketime ) { + CERROR("woken up once more\n"); + mgr->mgr_waketime = CURRENT_TIME; + schedule_timeout(4*HZ); + rc = 1; + } + + if (mgr->mgr_flags & MGR_STOPPING) { + CERROR("ha mgr stopping\n"); + rc = 1; + } - - return 1; + spin_unlock(&mgr->mgr_lock); + RETURN(rc); } +static int llite_ha_upcall(void) +{ + char *argv[2]; + char *envp[3]; + + argv[0] = "/usr/src/obd/utils/ha_assist.sh"; + argv[1] = NULL; + + envp [0] = "HOME=/"; + envp [1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp [2] = NULL; + + return call_usermodehelper(argv[0], argv, envp); +} + static int llite_ha_main(void *arg) { struct lustre_ha_thread *data = (struct lustre_ha_thread *)arg; @@ -68,67 +107,59 @@ static int llite_ha_main(void *arg) wait_event_interruptible(mgr->mgr_waitq, lustre_ha_check_event(mgr)); - spin_lock(&mgr->mgr_lock); - schedule_timeout(5 * HZ); - if (mgr->mgr_flags & MGR_SIGNAL) { - spin_unlock(&mgr->mgr_lock); - EXIT; - break; - } - if (mgr->mgr_flags & MGR_STOPPING) { spin_unlock(&mgr->mgr_lock); + CERROR("lustre_hamgr quitting\n"); EXIT; break; } - if (mgr->mgr_flags & MGR_EVENT) { - mgr->mgr_flags = MGR_RUNNING; - - /* FIXME: If we move to an event-driven model, - * we should put the request on the stack of - * mds_handle instead. */ - CERROR("MGR event\n"); - continue; - } - - CERROR("unknown break in service"); + spin_lock(&mgr->mgr_lock); + CERROR("lustre_hamgr woken up\n"); + llite_ha_upcall(); + schedule_timeout(5 * HZ); spin_unlock(&mgr->mgr_lock); - EXIT; - break; } mgr->mgr_thread = NULL; mgr->mgr_flags = MGR_STOPPED; wake_up(&mgr->mgr_ctl_waitq); CDEBUG(D_NET, "mgr exiting process %d\n", current->pid); - return 0; + RETURN(0); } - -int llite_ha_setup(struct obd_device *dev, struct lustre_ha_mgr *mgr, - char *name) +struct lustre_ha_mgr *llite_ha_setup(void) { struct lustre_ha_thread d; + struct lustre_ha_mgr *mgr; int rc; ENTRY; - d.dev = dev; + PORTAL_ALLOC(mgr, sizeof(*mgr)); + if (!mgr) { + CERROR("out of memory\n"); + LBUG(); + RETURN(NULL); + } + INIT_LIST_HEAD(&mgr->mgr_connections_lh); + INIT_LIST_HEAD(&mgr->mgr_troubled_lh); + spin_lock_init(&mgr->mgr_lock); + d.mgr = mgr; - d.name = name; + d.name = "lustre_hamgr"; init_waitqueue_head(&mgr->mgr_waitq); - init_waitqueue_head(&mgr->mgr_ctl_waitq); + rc = kernel_thread(llite_ha_main, (void *) &d, CLONE_VM | CLONE_FS | CLONE_FILES); if (rc < 0) { CERROR("cannot start thread\n"); - RETURN(-EINVAL); + RETURN(NULL); } wait_event(mgr->mgr_ctl_waitq, mgr->mgr_flags & MGR_RUNNING); - RETURN(0); + RETURN(mgr); } @@ -139,5 +170,6 @@ int llite_ha_cleanup(struct lustre_ha_mgr *mgr) wake_up(&mgr->mgr_waitq); wait_event_interruptible(mgr->mgr_ctl_waitq, (mgr->mgr_flags & MGR_STOPPED)); - return 0; + PORTAL_FREE(mgr, sizeof(*mgr)); + RETURN(0); } diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 50bb08f..a897d38 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -41,6 +41,8 @@ extern struct address_space_operations ll_aops; extern struct address_space_operations ll_dir_aops; struct super_operations ll_super_operations; +static struct lustre_ha_mgr *llite_ha_mgr; + static char *ll_read_opt(const char *opt, char *data) { char *value; @@ -134,7 +136,7 @@ static struct super_block * ll_read_super(struct super_block *sb, } /* the first parameter should become an mds device no */ - ptlrpc_init_client(-1, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, + ptlrpc_init_client(llite_ha_mgr, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &sbi->ll_mds_client); err = ptlrpc_connect_client("mds", &sbi->ll_mds_client, &sbi->ll_mds_peer); @@ -145,6 +147,7 @@ static struct super_block * ll_read_super(struct super_block *sb, sbi->ll_super = sb; sbi->ll_rootino = 2; + sbi->ll_ha_mgr = llite_ha_mgr; sb->s_maxbytes = 1LL << 36; sb->s_blocksize = PAGE_SIZE; @@ -419,13 +422,15 @@ static int __init init_lustre_lite(void) if (ll_file_data_slab == NULL) return -ENOMEM; + llite_ha_mgr = llite_ha_setup(); return register_filesystem(&lustre_lite_fs_type); } static void __exit exit_lustre_lite(void) { - kmem_cache_destroy(ll_file_data_slab); unregister_filesystem(&lustre_lite_fs_type); + llite_ha_cleanup(llite_ha_mgr); + kmem_cache_destroy(ll_file_data_slab); obd_unregister_type(LUSTRE_LITE_NAME); } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index b2ee0ce..55630b9 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -240,7 +240,7 @@ static int request_ioctl(struct inode *inode, struct file *file, RETURN(-EINVAL); } - ptlrpc_init_client(-1, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &cl); + ptlrpc_init_client(NULL, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &cl); err = ptlrpc_connect_client("mds", &cl, &peer); if (err) { CERROR("cannot create client\n"); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 2928d3c..4d2007a 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -583,15 +583,15 @@ static int osc_setup(struct obd_device *obddev, obd_count len, struct osc_obd *osc = &obddev->u.osc; struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf; int rc; - int dev = data->ioc_dev; ENTRY; OBD_ALLOC(osc->osc_client, sizeof(*osc->osc_client)); if (osc->osc_client == NULL) RETURN(-ENOMEM); - ptlrpc_init_client(dev, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, + ptlrpc_init_client(NULL, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, osc->osc_client); + rc = ptlrpc_connect_client("ost", osc->osc_client, &osc->osc_peer); if (rc == 0) diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 73d1324..5a6a5c1 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -33,11 +33,35 @@ #include #include -void ptlrpc_init_client(int dev, int req_portal, int rep_portal, - struct ptlrpc_client *cl) +void llite_ha_conn_manage(struct lustre_ha_mgr *mgr, struct ptlrpc_client *cli) +{ + ENTRY; + cli->cli_ha_mgr = mgr; + spin_lock(&mgr->mgr_lock); + list_add(&cli->cli_ha_item, &mgr->mgr_connections_lh); + spin_unlock(&mgr->mgr_lock); + EXIT; +} + +void llite_ha_conn_fail(struct ptlrpc_client *cli) +{ + ENTRY; + spin_lock(&cli->cli_ha_mgr->mgr_lock); + list_del(&cli->cli_ha_item); + list_add(&cli->cli_ha_item, &cli->cli_ha_mgr->mgr_troubled_lh); + spin_unlock(&cli->cli_ha_mgr->mgr_lock); + wake_up(&cli->cli_ha_mgr->mgr_waitq); + EXIT; +} + +void ptlrpc_init_client(struct lustre_ha_mgr *mgr, int req_portal, int rep_portal, + struct ptlrpc_client *cl) { memset(cl, 0, sizeof(*cl)); spin_lock_init(&cl->cli_lock); + cl->cli_ha_mgr = mgr; + if (mgr) + llite_ha_conn_manage(mgr, cl); cl->cli_xid = 1; cl->cli_generation = 1; cl->cli_epoch = 1; @@ -100,12 +124,16 @@ struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl, CERROR("cannot pack request %d\n", rc); RETURN(NULL); } + request->rq_time = CURRENT_TIME; request->rq_type = PTL_RPC_REQUEST; memcpy(&request->rq_peer, peer, sizeof(*peer)); request->rq_reqmsg = (struct lustre_msg *)request->rq_reqbuf; request->rq_reqmsg->opc = HTON__u32(opcode); request->rq_reqmsg->xid = HTON__u32(request->rq_xid); request->rq_reqmsg->type = HTON__u32(request->rq_type); + request->rq_client = cl; + request->rq_req_portal = cl->cli_request_portal; + request->rq_reply_portal = cl->cli_reply_portal; RETURN(request); } @@ -124,11 +152,19 @@ static int ptlrpc_check_reply(struct ptlrpc_request *req) { int rc = 0; + schedule_timeout(3 * HZ); /* 3 second timeout */ if (req->rq_repbuf != NULL) { req->rq_flags = PTL_RPC_REPLY; GOTO(out, rc = 1); } + if (CURRENT_TIME - req->rq_time >= 3) { + CERROR("-- REQ TIMEOUT --\n"); + if (req->rq_client && req->rq_client->cli_ha_mgr) + llite_ha_conn_fail(req->rq_client); + return 0; + } + if (sigismember(&(current->pending.signal), SIGKILL) || sigismember(&(current->pending.signal), SIGTERM) || sigismember(&(current->pending.signal), SIGINT)) { @@ -195,9 +231,6 @@ int ptlrpc_queue_wait(struct ptlrpc_client *cl, struct ptlrpc_request *req) init_waitqueue_head(&req->rq_wait_for_rep); - req->rq_client = cl; - req->rq_req_portal = cl->cli_request_portal; - req->rq_reply_portal = cl->cli_reply_portal; rc = ptl_send_rpc(req, cl); if (rc) { CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);