Whamcloud - gitweb
LU-14999 mdt: Deadlock on parent during resend 85/44885/9
authorAndriy Skulysh <c17819@cray.com>
Sun, 6 Sep 2020 09:21:14 +0000 (12:21 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 3 Nov 2021 02:38:13 +0000 (02:38 +0000)
Parent-child lock order gets broken during resend as
there is child lock already but there isn't parent lock
and MDS tries to lock it again.

Don't lookup child on resend, extract fid from the lock instead.

Change-Id: I443648a8162e770c63fd087dd534c27e7c637c40
HPE-bug-id: LUS-9306
Signed-off-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexander Boyko <c17825@cray.com>
Reviewed-by: Alexander Zarochentsev <c17826@cray.com>
Reviewed-on: https://review.whamcloud.com/44885
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Vitaly Fertman <vitaly.fertman@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mdt/mdt_handler.c

index a11a9bf..3040be6 100644 (file)
@@ -2175,7 +2175,25 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                GOTO(out_child, rc = -ESTALE);
        }
 
                GOTO(out_child, rc = -ESTALE);
        }
 
-       if (!(info->mti_body->mbo_valid & OBD_MD_NAMEHASH) &&
+       if (!child && is_resent) {
+               lock = ldlm_handle2lock(&lhc->mlh_reg_lh);
+               if (lock == NULL) {
+                       /* Lock is pinned by ldlm_handle_enqueue0() as it is
+                        * a resend case, however, it could be already destroyed
+                        * due to client eviction or a raced cancel RPC.
+                        */
+                       LDLM_DEBUG_NOLOCK("Invalid lock handle %#llx",
+                                         lhc->mlh_reg_lh.cookie);
+                       RETURN(-ESTALE);
+               }
+               fid_extract_from_res_name(child_fid,
+                                         &lock->l_resource->lr_name);
+               LDLM_LOCK_PUT(lock);
+               child = mdt_object_find(info->mti_env, info->mti_mdt,
+                                       child_fid);
+               if (IS_ERR(child))
+                       RETURN(PTR_ERR(child));
+       } else if (!(info->mti_body->mbo_valid & OBD_MD_NAMEHASH) &&
            lu_name_is_valid(lname)) {
                if (info->mti_body->mbo_valid == OBD_MD_FLID) {
                        rc = mdt_raw_lookup(info, parent, lname);
            lu_name_is_valid(lname)) {
                if (info->mti_body->mbo_valid == OBD_MD_FLID) {
                        rc = mdt_raw_lookup(info, parent, lname);
@@ -2300,19 +2318,22 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
        /* finally, we can get attr for child. */
        rc = mdt_getattr_internal(info, child, ma_need);
        if (unlikely(rc != 0)) {
        /* finally, we can get attr for child. */
        rc = mdt_getattr_internal(info, child, ma_need);
        if (unlikely(rc != 0)) {
-               mdt_object_unlock(info, child, lhc, 1);
+               if (!is_resent)
+                       mdt_object_unlock(info, child, lhc, 1);
                GOTO(out_child, rc);
        }
 
        rc = mdt_pack_secctx_in_reply(info, child);
        if (unlikely(rc)) {
                GOTO(out_child, rc);
        }
 
        rc = mdt_pack_secctx_in_reply(info, child);
        if (unlikely(rc)) {
-               mdt_object_unlock(info, child, lhc, 1);
+               if (!is_resent)
+                       mdt_object_unlock(info, child, lhc, 1);
                GOTO(out_child, rc);
        }
 
        rc = mdt_pack_encctx_in_reply(info, child);
        if (unlikely(rc)) {
                GOTO(out_child, rc);
        }
 
        rc = mdt_pack_encctx_in_reply(info, child);
        if (unlikely(rc)) {
-               mdt_object_unlock(info, child, lhc, 1);
+               if (!is_resent)
+                       mdt_object_unlock(info, child, lhc, 1);
                GOTO(out_child, rc);
        }
 
                GOTO(out_child, rc);
        }