From: Vitaly Fertman Date: Thu, 9 Oct 2014 15:18:34 +0000 (-0400) Subject: LU-5579 ldlm: re-sent enqueue vs lock destroy race X-Git-Tag: 2.6.54~5 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=e77a7afe261cc2a9393adf4ffe59401c830d16ec;ds=sidebyside LU-5579 ldlm: re-sent enqueue vs lock destroy race Upon lock enqueue re-send, lock is pinned by ldlm_handle_enqueue0, however it may race with client eviction or even lock cancel (if a reply for the original RPC finally reached the client) and the lock cannot be found by cookie anymore: ASSERTION( lock != NULL ) failed: Invalid lock handle Signed-off-by: Vitaly Fertman Change-Id: I9d8156bf78a1b83ac22ffaa1148feb43bef37b1a Xyratex-bug-id: MRP-2094 Reviewed-on: http://review.whamcloud.com/11839 Tested-by: Jenkins Reviewed-by: James Simmons Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 204ebd5..c1e5ea8 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1351,7 +1351,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info, rc = mdt_check_resent_lock(info, child, lhc); if (rc < 0) { - RETURN(-EPROTO); + RETURN(rc); } else if (rc > 0) { mdt_lock_handle_init(lhc); mdt_lock_reg_init(lhc, LCK_PR); @@ -2329,8 +2329,14 @@ int mdt_check_resent_lock(struct mdt_thread_info *info, lock = ldlm_handle2lock(&lhc->mlh_reg_lh); LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT); - LASSERTF(lock != NULL, "Invalid lock handle "LPX64"\n", - lhc->mlh_reg_lh.cookie); + if (lock == NULL) { + /* Lock is pinned by ldlm_handle_enqueue0() as it is + * a resend case, however, it could be already destroyed + * due to client eviction or a raced cancel RPC. */ + LDLM_DEBUG_NOLOCK("Invalid lock handle "LPX64"\n", + lhc->mlh_reg_lh.cookie); + RETURN(-ESTALE); + } if (!fid_res_name_eq(mdt_object_fid(mo), &lock->l_resource->lr_name)) {