From: Andriy Skulysh Date: Wed, 14 Oct 2020 09:01:51 +0000 (+0300) Subject: LU-15118 ldlm: no free thread to process resend request X-Git-Tag: 2.14.56~126 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=7e8f25ed3cd8b5435f92ba8b343aabfe0a180c5b;ds=sidebyside LU-15118 ldlm: no free thread to process resend request MDS grants lock request and sends a reply, input request queue can be filled immediately with conflicting lock enqueue request. So there isn't any free thread to process resend of first lock enqueue request if the client has failed to receive the reply. Process lock enqueue resends with existing lock on MDS in high priority queue. Change-Id: If7b94690100b44c774dc14231ed4902f701ed807 HPE-bug-id: LUS-9444 Signed-off-by: Andriy Skulysh Reviewed-by: Alexander Boyko Reviewed-by: Alexander Zarochentsev Reviewed-by: Vitaly Fertman Reviewed-by: Andrew Perepechko Reviewed-on: https://review.whamcloud.com/45272 Tested-by: jenkins Reviewed-by: Andrew Perepechko Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c index 85b5b13..e94fed6 100644 --- a/lustre/mdt/mdt_mds.c +++ b/lustre/mdt/mdt_mds.c @@ -159,6 +159,56 @@ static void mds_stop_ptlrpc_service(struct mds_device *m) EXIT; } +static int ldlm_enqueue_hpreq_check(struct ptlrpc_request *req) +{ + struct ldlm_request *dlm_req; + int rc = 0; + ENTRY; + + if ((lustre_msg_get_flags(req->rq_reqmsg) & (MSG_REPLAY|MSG_RESENT)) != + MSG_RESENT) + RETURN(0); + + req_capsule_init(&req->rq_pill, req, RCL_SERVER); + req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE); + dlm_req = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); + if (dlm_req == NULL) + RETURN(-EFAULT); + + if (dlm_req->lock_count > 0) { + struct ldlm_lock *lock; + + lock = cfs_hash_lookup(req->rq_export->exp_lock_hash, + (void *)&dlm_req->lock_handle[0]); + + DEBUG_REQ(D_RPCTRACE, req, "lock %p cookie 0x%llx", + lock, dlm_req->lock_handle[0].cookie); + if (lock != NULL) { + rc = lock->l_granted_mode == lock->l_req_mode; + if (rc) + LDLM_DEBUG(lock, "hpreq resend"); + LDLM_LOCK_RELEASE(lock); + } + } + + RETURN(rc); +} + +static struct ptlrpc_hpreq_ops ldlm_enqueue_hpreq_ops = { + .hpreq_lock_match = NULL, + .hpreq_check = ldlm_enqueue_hpreq_check, + .hpreq_fini = NULL, +}; + +static int mds_hpreq_handler(struct ptlrpc_request *req) +{ + if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_ENQUEUE) + req->rq_ops = &ldlm_enqueue_hpreq_ops; + else + ptlrpc_hpreq_handler(req); + return 0; +} + static int mds_start_ptlrpc_service(struct mds_device *m) { static struct ptlrpc_service_conf conf; @@ -203,7 +253,7 @@ static int mds_start_ptlrpc_service(struct mds_device *m) .psc_ops = { .so_req_handler = tgt_request_handle, .so_req_printer = target_print_req, - .so_hpreq_handler = ptlrpc_hpreq_handler, + .so_hpreq_handler = mds_hpreq_handler, }, }; m->mds_regular_service = ptlrpc_register_service(&conf, &obd->obd_kset,