Whamcloud - gitweb
LU-15118 ldlm: no free thread to process resend request 72/45272/2
authorAndriy Skulysh <c17819@cray.com>
Wed, 14 Oct 2020 09:01:51 +0000 (12:01 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 27 Oct 2021 00:38:35 +0000 (00:38 +0000)
MDS grants lock request and sends a reply,
input request queue can be filled immediately
with conflicting lock enqueue request.
So there isn't any free thread to process resend of
first lock enqueue request if the client has failed
to receive the reply.

Process lock enqueue resends with existing lock on MDS
in high priority queue.

Change-Id: If7b94690100b44c774dc14231ed4902f701ed807
HPE-bug-id: LUS-9444
Signed-off-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Alexander Zarochentsev <c17826@cray.com>
Reviewed-by: Vitaly Fertman <c17818@cray.com>
Reviewed-by: Andrew Perepechko <c17827@cray.com>
Reviewed-on: https://review.whamcloud.com/45272
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mdt/mdt_mds.c

index 85b5b13..e94fed6 100644 (file)
@@ -159,6 +159,56 @@ static void mds_stop_ptlrpc_service(struct mds_device *m)
        EXIT;
 }
 
        EXIT;
 }
 
+static int ldlm_enqueue_hpreq_check(struct ptlrpc_request *req)
+{
+       struct ldlm_request *dlm_req;
+       int rc = 0;
+       ENTRY;
+
+       if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_REPLAY|MSG_RESENT)) !=
+           MSG_RESENT)
+               RETURN(0);
+
+       req_capsule_init(&req->rq_pill, req, RCL_SERVER);
+       req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE);
+       dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
+       if (dlm_req == NULL)
+               RETURN(-EFAULT);
+
+       if (dlm_req->lock_count > 0) {
+               struct ldlm_lock *lock;
+
+               lock = cfs_hash_lookup(req->rq_export->exp_lock_hash,
+                                       (void *)&dlm_req->lock_handle[0]);
+
+               DEBUG_REQ(D_RPCTRACE, req, "lock %p cookie 0x%llx",
+                       lock, dlm_req->lock_handle[0].cookie);
+               if (lock != NULL) {
+                       rc = lock->l_granted_mode == lock->l_req_mode;
+                       if (rc)
+                               LDLM_DEBUG(lock, "hpreq resend");
+                       LDLM_LOCK_RELEASE(lock);
+               }
+       }
+
+       RETURN(rc);
+}
+
+static struct ptlrpc_hpreq_ops ldlm_enqueue_hpreq_ops = {
+       .hpreq_lock_match = NULL,
+       .hpreq_check      = ldlm_enqueue_hpreq_check,
+       .hpreq_fini       = NULL,
+};
+
+static int mds_hpreq_handler(struct ptlrpc_request *req)
+{
+       if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_ENQUEUE)
+               req->rq_ops = &ldlm_enqueue_hpreq_ops;
+       else
+               ptlrpc_hpreq_handler(req);
+       return 0;
+}
+
 static int mds_start_ptlrpc_service(struct mds_device *m)
 {
        static struct ptlrpc_service_conf conf;
 static int mds_start_ptlrpc_service(struct mds_device *m)
 {
        static struct ptlrpc_service_conf conf;
@@ -203,7 +253,7 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
                .psc_ops                = {
                        .so_req_handler         = tgt_request_handle,
                        .so_req_printer         = target_print_req,
                .psc_ops                = {
                        .so_req_handler         = tgt_request_handle,
                        .so_req_printer         = target_print_req,
-                       .so_hpreq_handler       = ptlrpc_hpreq_handler,
+                       .so_hpreq_handler       = mds_hpreq_handler,
                },
        };
        m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset,
                },
        };
        m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset,