From e976ee72602477e54d26693aaeb84709ea5fd38f Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Tue, 26 Jun 2012 15:37:47 +0400 Subject: [PATCH] LU-1239 ldlm: cascading client reconnects It may happen that - MDS is overloaded with enqueues, they consume all the threads on MDS_REQUEST portal and waiting for a lock a client holds; - that client tries to re-connect but MDS is out of threads and re-connection fails; - other clients are waiting for their enqueue completions, they try to ping MDS if it is still alive, but despite the fact it is a HP-rpc, there is no thread reserved for it. Thus, other clients get timed out as well. Ensure each service which handles HP-rpc has an extra thread reserved for them; make MDS_CONNECT and OST_CONNECT HP-rpc. Change-Id: I295aec6a2d2fb614d4b5f037068a3dcdda1a8b09 Signed-off-by: Vitaly Fertman Reviewed-by: Alexey Lyashkov Reviewed-by: Andrew Perepechko Xyratex-bug-id: MRP-455 Reviewed-on: http://review.whamcloud.com/2355 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lustre_net.h | 1 + lustre/ldlm/ldlm_lockd.c | 3 ++- lustre/mdt/mdt_handler.c | 8 ++++++++ lustre/ost/ost_handler.c | 3 +++ lustre/ptlrpc/events.c | 1 + lustre/ptlrpc/service.c | 34 +++++++++++++++++++++++++++------- 6 files changed, 42 insertions(+), 8 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 77f1a98..5566584 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -1856,6 +1856,7 @@ void ptlrpc_save_lock(struct ptlrpc_request *req, void ptlrpc_commit_replies(struct obd_export *exp); void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs); void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs); +int ptlrpc_hpreq_handler(struct ptlrpc_request *req); struct ptlrpc_service *ptlrpc_register_service( struct ptlrpc_service_conf *conf, struct proc_dir_entry *proc_entry); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 7a601e8..7833e72 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -2202,7 +2202,8 @@ static int ldlm_cancel_hpreq_check(struct ptlrpc_request *req) static struct ptlrpc_hpreq_ops ldlm_cancel_hpreq_ops = { .hpreq_lock_match = ldlm_cancel_hpreq_lock_match, - .hpreq_check = ldlm_cancel_hpreq_check + .hpreq_check = ldlm_cancel_hpreq_check, + .hpreq_fini = NULL, }; static int ldlm_hpreq_handler(struct ptlrpc_request *req) diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index f25a5e1..a39ce6e 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -3991,6 +3991,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) .psc_ops = { .so_req_handler = mdt_regular_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = ptlrpc_hpreq_handler, }, }; m->mdt_regular_service = ptlrpc_register_service(&conf, procfs_entry); @@ -4034,6 +4035,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) .psc_ops = { .so_req_handler = mdt_readpage_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = NULL, }, }; m->mdt_readpage_service = ptlrpc_register_service(&conf, procfs_entry); @@ -4080,6 +4082,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) .psc_ops = { .so_req_handler = mdt_regular_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = NULL, }, }; m->mdt_setattr_service = ptlrpc_register_service(&conf, procfs_entry); @@ -4115,6 +4118,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) .psc_ops = { .so_req_handler = mdt_mdsc_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = NULL, }, }; m->mdt_mdsc_service = ptlrpc_register_service(&conf, procfs_entry); @@ -4150,6 +4154,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) .psc_ops = { .so_req_handler = mdt_mdss_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = NULL, }, }; m->mdt_mdss_service = ptlrpc_register_service(&conf, procfs_entry); @@ -4187,6 +4192,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) .psc_ops = { .so_req_handler = mdt_dtss_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = NULL, }, }; m->mdt_dtss_service = ptlrpc_register_service(&conf, procfs_entry); @@ -4220,6 +4226,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) .psc_ops = { .so_req_handler = mdt_fld_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = NULL, }, }; m->mdt_fld_service = ptlrpc_register_service(&conf, procfs_entry); @@ -4256,6 +4263,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m) .psc_ops = { .so_req_handler = mdt_xmds_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = ptlrpc_hpreq_handler, }, }; m->mdt_xmds_service = ptlrpc_register_service(&conf, procfs_entry); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index ec56759..b9405cb 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -2480,6 +2480,7 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg) .psc_ops = { .so_req_handler = ost_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = ptlrpc_hpreq_handler, }, }; ost->ost_service = ptlrpc_register_service(&svc_conf, @@ -2518,6 +2519,7 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg) .psc_ops = { .so_req_handler = ost_handle, .so_req_printer = target_print_req, + .so_hpreq_handler = NULL, }, }; ost->ost_create_service = ptlrpc_register_service(&svc_conf, @@ -2586,6 +2588,7 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg) .so_req_handler = ost_handle, .so_hpreq_handler = ost_hpreq_handler, .so_req_printer = target_print_req, + .so_hpreq_handler = NULL, }, }; ost->ost_io_service = ptlrpc_register_service(&svc_conf, diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 6de9c20..c85ee23 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -334,6 +334,7 @@ void request_in_callback(lnet_event_t *ev) req->rq_phase = RQ_PHASE_NEW; cfs_spin_lock_init(&req->rq_lock); CFS_INIT_LIST_HEAD(&req->rq_timed_list); + CFS_INIT_LIST_HEAD(&req->rq_exp_list); cfs_atomic_set(&req->rq_refcount, 1); if (ev->type == LNET_EVENT_PUT) CDEBUG(D_INFO, "incoming req@%p x"LPU64" msgsize %u\n", diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index b07dbe8..28bb89a 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1475,6 +1475,32 @@ static void ptlrpc_hpreq_fini(struct ptlrpc_request *req) EXIT; } +static int ptlrpc_hpreq_check(struct ptlrpc_request *req) +{ + return 1; +} + +static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = { + .hpreq_lock_match = NULL, + .hpreq_check = ptlrpc_hpreq_check, + .hpreq_fini = NULL, +}; + +/* Hi-Priority RPC check by RPC operation code. */ +int ptlrpc_hpreq_handler(struct ptlrpc_request *req) +{ + int opc = lustre_msg_get_opc(req->rq_reqmsg); + + /* Check for export to let only reconnects for not yet evicted + * export to become a HP rpc. */ + if ((req->rq_export != NULL) && + (opc == OBD_PING || opc == MDS_CONNECT || opc == OST_CONNECT)) + req->rq_ops = &ptlrpc_hpreq_common; + + return 0; +} +EXPORT_SYMBOL(ptlrpc_hpreq_handler); + /** * Make the request a high priority one. * @@ -1527,13 +1553,7 @@ void ptlrpc_hpreq_reorder(struct ptlrpc_request *req) static int ptlrpc_server_hpreq_check(struct ptlrpc_service *svc, struct ptlrpc_request *req) { - ENTRY; - - /* Check by request opc. */ - if (OBD_PING == lustre_msg_get_opc(req->rq_reqmsg)) - RETURN(1); - - RETURN(ptlrpc_hpreq_init(svc, req)); + return ptlrpc_hpreq_init(svc, req); } /** Check if a request is a high priority one. */ -- 1.8.3.1