Whamcloud - gitweb
LU-1239 ldlm: cascading client reconnects
authorVitaly Fertman <vitaly_fertman@xyratex.com>
Tue, 26 Jun 2012 11:37:47 +0000 (15:37 +0400)
committerOleg Drokin <green@whamcloud.com>
Thu, 5 Jul 2012 18:26:42 +0000 (14:26 -0400)
It may happen that
- MDS is overloaded with enqueues, they consume all the threads on
  MDS_REQUEST portal and waiting for a lock a client holds;
- that client tries to re-connect but MDS is out of threads and
  re-connection fails;
- other clients are waiting for their enqueue completions, they try
  to ping MDS if it is still alive, but despite the fact it is a HP-rpc,
  there is no thread reserved for it. Thus, other clients get timed
  out as well.

Ensure each service which handles HP-rpc has an extra thread reserved
for them; make MDS_CONNECT and OST_CONNECT HP-rpc.

Change-Id: I295aec6a2d2fb614d4b5f037068a3dcdda1a8b09
Signed-off-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Reviewed-by: Alexey Lyashkov <alexey_lyashkov@xyratex.com>
Reviewed-by: Andrew Perepechko <Andrew_Perepechko@xyratex.com>
Xyratex-bug-id: MRP-455
Reviewed-on: http://review.whamcloud.com/2355
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_net.h
lustre/ldlm/ldlm_lockd.c
lustre/mdt/mdt_handler.c
lustre/ost/ost_handler.c
lustre/ptlrpc/events.c
lustre/ptlrpc/service.c

index 77f1a98..5566584 100644 (file)
@@ -1856,6 +1856,7 @@ void ptlrpc_save_lock(struct ptlrpc_request *req,
 void ptlrpc_commit_replies(struct obd_export *exp);
 void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs);
 void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs);
+int ptlrpc_hpreq_handler(struct ptlrpc_request *req);
 struct ptlrpc_service *ptlrpc_register_service(
                                struct ptlrpc_service_conf *conf,
                                struct proc_dir_entry *proc_entry);
index 7a601e8..7833e72 100644 (file)
@@ -2202,7 +2202,8 @@ static int ldlm_cancel_hpreq_check(struct ptlrpc_request *req)
 
 static struct ptlrpc_hpreq_ops ldlm_cancel_hpreq_ops = {
         .hpreq_lock_match = ldlm_cancel_hpreq_lock_match,
-        .hpreq_check      = ldlm_cancel_hpreq_check
+       .hpreq_check      = ldlm_cancel_hpreq_check,
+       .hpreq_fini       = NULL,
 };
 
 static int ldlm_hpreq_handler(struct ptlrpc_request *req)
index f25a5e1..a39ce6e 100644 (file)
@@ -3991,6 +3991,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
                .psc_ops                = {
                        .so_req_handler         = mdt_regular_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = ptlrpc_hpreq_handler,
                },
        };
        m->mdt_regular_service = ptlrpc_register_service(&conf, procfs_entry);
@@ -4034,6 +4035,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
                .psc_ops                = {
                        .so_req_handler         = mdt_readpage_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
                },
        };
        m->mdt_readpage_service = ptlrpc_register_service(&conf, procfs_entry);
@@ -4080,6 +4082,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
                .psc_ops                = {
                        .so_req_handler         = mdt_regular_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
                },
        };
        m->mdt_setattr_service = ptlrpc_register_service(&conf, procfs_entry);
@@ -4115,6 +4118,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
                .psc_ops                = {
                        .so_req_handler         = mdt_mdsc_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
                },
        };
        m->mdt_mdsc_service = ptlrpc_register_service(&conf, procfs_entry);
@@ -4150,6 +4154,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
                .psc_ops                = {
                        .so_req_handler         = mdt_mdss_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
                },
         };
        m->mdt_mdss_service = ptlrpc_register_service(&conf, procfs_entry);
@@ -4187,6 +4192,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
                .psc_ops                = {
                        .so_req_handler         = mdt_dtss_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
                },
         };
        m->mdt_dtss_service = ptlrpc_register_service(&conf, procfs_entry);
@@ -4220,6 +4226,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
                .psc_ops                = {
                        .so_req_handler         = mdt_fld_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
                },
        };
        m->mdt_fld_service = ptlrpc_register_service(&conf, procfs_entry);
@@ -4256,6 +4263,7 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
                .psc_ops                = {
                        .so_req_handler         = mdt_xmds_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = ptlrpc_hpreq_handler,
                },
        };
        m->mdt_xmds_service = ptlrpc_register_service(&conf, procfs_entry);
index ec56759..b9405cb 100644 (file)
@@ -2480,6 +2480,7 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                .psc_ops                = {
                        .so_req_handler         = ost_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = ptlrpc_hpreq_handler,
                },
        };
        ost->ost_service = ptlrpc_register_service(&svc_conf,
@@ -2518,6 +2519,7 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                .psc_ops                = {
                        .so_req_handler         = ost_handle,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
                },
        };
        ost->ost_create_service = ptlrpc_register_service(&svc_conf,
@@ -2586,6 +2588,7 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                        .so_req_handler         = ost_handle,
                        .so_hpreq_handler       = ost_hpreq_handler,
                        .so_req_printer         = target_print_req,
+                       .so_hpreq_handler       = NULL,
                },
        };
        ost->ost_io_service = ptlrpc_register_service(&svc_conf,
index 6de9c20..c85ee23 100644 (file)
@@ -334,6 +334,7 @@ void request_in_callback(lnet_event_t *ev)
         req->rq_phase = RQ_PHASE_NEW;
         cfs_spin_lock_init(&req->rq_lock);
         CFS_INIT_LIST_HEAD(&req->rq_timed_list);
+       CFS_INIT_LIST_HEAD(&req->rq_exp_list);
         cfs_atomic_set(&req->rq_refcount, 1);
         if (ev->type == LNET_EVENT_PUT)
                 CDEBUG(D_INFO, "incoming req@%p x"LPU64" msgsize %u\n",
index b07dbe8..28bb89a 100644 (file)
@@ -1475,6 +1475,32 @@ static void ptlrpc_hpreq_fini(struct ptlrpc_request *req)
         EXIT;
 }
 
+static int ptlrpc_hpreq_check(struct ptlrpc_request *req)
+{
+       return 1;
+}
+
+static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = {
+       .hpreq_lock_match  = NULL,
+       .hpreq_check       = ptlrpc_hpreq_check,
+       .hpreq_fini        = NULL,
+};
+
+/* Hi-Priority RPC check by RPC operation code. */
+int ptlrpc_hpreq_handler(struct ptlrpc_request *req)
+{
+       int opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+       /* Check for export to let only reconnects for not yet evicted
+        * export to become a HP rpc. */
+       if ((req->rq_export != NULL) &&
+           (opc == OBD_PING || opc == MDS_CONNECT || opc == OST_CONNECT))
+               req->rq_ops = &ptlrpc_hpreq_common;
+
+       return 0;
+}
+EXPORT_SYMBOL(ptlrpc_hpreq_handler);
+
 /**
  * Make the request a high priority one.
  *
@@ -1527,13 +1553,7 @@ void ptlrpc_hpreq_reorder(struct ptlrpc_request *req)
 static int ptlrpc_server_hpreq_check(struct ptlrpc_service *svc,
                                      struct ptlrpc_request *req)
 {
-        ENTRY;
-
-        /* Check by request opc. */
-        if (OBD_PING == lustre_msg_get_opc(req->rq_reqmsg))
-                RETURN(1);
-
-        RETURN(ptlrpc_hpreq_init(svc, req));
+       return ptlrpc_hpreq_init(svc, req);
 }
 
 /** Check if a request is a high priority one. */