Whamcloud - gitweb
LU-16958 llite: migrate deadlock on not responding lock cancel 88/52388/13
authorBobi Jam <bobijam@whamcloud.com>
Wed, 13 Sep 2023 17:24:19 +0000 (01:24 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 18 Nov 2023 21:40:48 +0000 (21:40 +0000)
lfs migrate race makes MDS hang with following backtrace

[ 3683.248584] [<0>] ldlm_completion_ast+0x78d/0x8e0 [ptlrpc]
[ 3683.250122] [<0>] ldlm_cli_enqueue_local+0x2fd/0x840 [ptlrpc]
[ 3683.251363] [<0>] mdt_object_local_lock+0x50e/0xb10 [mdt]
[ 3683.252615] [<0>] mdt_object_lock_internal+0x187/0x430 [mdt]
[ 3683.253793] [<0>] mdt_object_lock_try+0x22/0xa0 [mdt]
[ 3683.254857] [<0>] mdt_getattr_name_lock+0x1317/0x1dc0 [mdt]
[ 3683.256016] [<0>] mdt_intent_getattr+0x264/0x440 [mdt]
[ 3683.257105] [<0>] mdt_intent_opc+0x452/0xa80 [mdt]
[ 3683.258126] [<0>] mdt_intent_policy+0x1fd/0x390 [mdt]
[ 3683.259191] [<0>] ldlm_lock_enqueue+0x469/0xa90 [ptlrpc]
[ 3683.260350] [<0>] ldlm_handle_enqueue0+0x61a/0x16c0 [ptlrpc]
[ 3683.261596] [<0>] tgt_enqueue+0xa4/0x200 [ptlrpc]
[ 3683.262662] [<0>] tgt_request_handle+0xc9c/0x1a40 [ptlrpc]
[ 3683.263860] [<0>] ptlrpc_server_handle_request+0x323/0xbd0 [ptlrpc]
[ 3683.265220] [<0>] ptlrpc_main+0xbf3/0x1540 [ptlrpc]
[ 3683.266303] [<0>] kthread+0x134/0x150
[ 3683.267111] [<0>] ret_from_fork+0x35/0x40

The deadlock happens as follows:
T1:
vvp_io_init()
  ->ll_layout_refresh() <= take lli_layout_mutex
  ->ll_layout_intent()
  ->ll_take_md_lock()  <= take the CR layout lock ref
  ->ll_layout_conf()
    ->vvp_prune()
    ->vvp_inode_ops() <= release lli_layout_mtex
    ->vvp_inode_ops() <= try to acquire lli_layout_mutex
    -> racer wait here for T2
T2:
->ll_file_write_iter()
  ->vvp_io_init()
    ->ll_layout_refresh() <= take lli_layout_mutex
    ->ll_layout_intent() <= Request layout from MDT
    -> racer wait from server...

And server want to cancel the CR layout lock T1 hold, and it won't
happen. Also T1 could has take extent ldlm lock while waiting
lli_layout_mutex hold by T2, and ofd_destroy_hdl does not get the
lock cancellation response from T1.

lli_layout_mutex is only needed for enqueuing layout lock from server,
so that ll_layout_conf() does not involve with lli_layout_mutex.

Fixes: 8f2c1592c3 ("LU-16958 llite: migrate vs regular ops deadlock")
Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: Ib94de2c63544c3a962199aa0537418255980ae8c
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52388
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/cl_object.h
lustre/llite/file.c
lustre/llite/vvp_object.c
lustre/lov/lov_cl_internal.h
lustre/lov/lov_object.c

index 6689a30..e24e0cb 100644 (file)
@@ -306,8 +306,6 @@ enum coo_inode_opc {
        COIO_INODE_UNLOCK,
        COIO_SIZE_LOCK,
        COIO_SIZE_UNLOCK,
-       COIO_LAYOUT_LOCK,
-       COIO_LAYOUT_UNLOCK,
 };
 
 /**
index 5178a27..d33f115 100644 (file)
@@ -6323,10 +6323,6 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
        LASSERT(fid_is_sane(ll_inode2fid(inode)));
        LASSERT(S_ISREG(inode->i_mode));
 
-       /* take layout lock mutex to enqueue layout lock exclusively. */
-       mutex_lock(&lli->lli_layout_mutex);
-       lli->lli_layout_lock_owner = current;
-
        while (1) {
                /* mostly layout lock is caching on the local side, so try to
                 * match it before grabbing layout lock mutex. */
@@ -6341,15 +6337,16 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
                        break;
                }
 
+               /* take layout lock mutex to enqueue layout lock exclusively. */
+               mutex_lock(&lli->lli_layout_mutex);
                rc = ll_layout_intent(inode, &intent);
+               mutex_unlock(&lli->lli_layout_mutex);
                if (rc != 0)
                        break;
        }
 
        if (rc == 0)
                *gen = ll_layout_version_get(lli);
-       lli->lli_layout_lock_owner = NULL;
-       mutex_unlock(&lli->lli_layout_mutex);
 
        RETURN(rc);
 }
index 42a5148..a931f66 100644 (file)
@@ -271,20 +271,6 @@ static int vvp_inode_ops(const struct lu_env *env, struct cl_object *obj,
                else
                        rc = -ENOLCK;
                break;
-       case COIO_LAYOUT_LOCK:
-               if (lli->lli_layout_lock_owner != current) {
-                       mutex_lock(&lli->lli_layout_mutex);
-                       lli->lli_layout_lock_owner = current;
-               }
-               break;
-       case COIO_LAYOUT_UNLOCK:
-               if (lli->lli_layout_lock_owner == current) {
-                       lli->lli_layout_lock_owner = NULL;
-                       mutex_unlock(&lli->lli_layout_mutex);
-               } else {
-                       rc = -ENOLCK;
-               }
-               break;
        default:
                rc = -EINVAL;
                break;
index a2f5b45..0346ab2 100644 (file)
@@ -242,6 +242,7 @@ struct lov_mirror_entry {
 enum lov_object_flags {
        /* Layout is invalid, set when layout lock is lost */
        LO_LAYOUT_INVALID       = 0x1,
+       LO_NEED_INODE_LOCK      = 0x2,
 };
 
 /**
index 787f2e1..a265e6a 100644 (file)
@@ -1310,8 +1310,11 @@ static int lov_layout_change(const struct lu_env *unused,
        new_ops = &lov_dispatch[llt];
 
        rc = cl_object_prune(env, &lov->lo_cl);
-       if (rc != 0)
+       if (rc != 0) {
+               if (rc == -EAGAIN)
+                       set_bit(LO_NEED_INODE_LOCK, &lov->lo_obj_flags);
                GOTO(out, rc);
+       }
 
        rc = old_ops->llo_delete(env, lov, &lov->u);
        if (rc != 0)
@@ -1394,13 +1397,12 @@ out_lsm:
 static int lov_conf_set(const struct lu_env *env, struct cl_object *obj,
                        const struct cl_object_conf *conf)
 {
-       struct lov_stripe_md    *lsm = NULL;
-       struct lov_object       *lov = cl2lov(obj);
-       int                      result = 0;
+       struct lov_stripe_md *lsm = NULL;
+       struct lov_object *lov = cl2lov(obj);
        struct cl_object *top = cl_object_top(obj);
-       bool unlock_inode = false;
-       bool lock_inode_size = false;
-       bool lock_layout = false;
+       bool lock_inode = false;
+       bool inode_size_locked = false;
+       int result = 0;
        ENTRY;
 
        if (conf->coc_opc == OBJECT_CONF_SET &&
@@ -1471,9 +1473,11 @@ retry:
                GOTO(out, result = -ERESTARTSYS);
        }
 
+       clear_bit(LO_NEED_INODE_LOCK, &lov->lo_obj_flags);
        result = lov_layout_change(env, lov, lsm, conf);
        if (result) {
-               if (result == -EAGAIN) {
+               if (result == -EAGAIN &&
+                   test_bit(LO_NEED_INODE_LOCK, &lov->lo_obj_flags)) {
                        /**
                         * we need unlocked lov conf and get inode lock.
                         * It's possible we have already taken inode's size
@@ -1481,42 +1485,28 @@ retry:
                         * order, lest deadlock happens:
                         *   inode lock        (ll_inode_lock())
                         *   inode size lock   (ll_inode_size_lock())
-                        *   inode layout lock (ll_layout_refresh())
                         *   lov conf lock     (lov_conf_lock())
                         *
                         * e.g.
                         *   vfs_setxattr                inode locked
                         *     ll_lov_setstripe_ea_info  inode size locked
                         *       ll_prep_inode
-                        *         ll_file_inode_init
-                        *           cl_conf_set
-                        *             lov_conf_set      lov conf locked
-                        *
-                        *   ll_migrate                  inode locked
-                        *     ...
-                        *       ll_layout_refresh       inode layout locked
-                        *         ll_layout_conf
+                        *         cl_file_inode_init
                         *           cl_conf_set
                         *             lov_conf_set      lov conf locked
                         */
                        lov_conf_unlock(lov);
-                       if (cl_object_inode_ops(env, top, COIO_LAYOUT_UNLOCK,
-                                               NULL) == 0)
-                               lock_layout = true;
                        if (cl_object_inode_ops(env, top, COIO_SIZE_UNLOCK,
                                                NULL) == 0)
-                               lock_inode_size = true;
+                               inode_size_locked = true;
 
                        /* take lock in order */
                        if (cl_object_inode_ops(
                                        env, top, COIO_INODE_LOCK, NULL) == 0)
-                               unlock_inode = true;
-                       if (lock_inode_size)
+                               lock_inode = true;
+                       if (inode_size_locked)
                                cl_object_inode_ops(env, top, COIO_SIZE_LOCK,
                                                    NULL);
-                       if (lock_layout)
-                               cl_object_inode_ops(env, top, COIO_LAYOUT_LOCK,
-                                                   NULL);
                        goto retry;
                }
                set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags);
@@ -1527,7 +1517,7 @@ retry:
 
 out:
        lov_conf_unlock(lov);
-       if (unlock_inode)
+       if (lock_inode)
                cl_object_inode_ops(env, top, COIO_INODE_UNLOCK, NULL);
 out_lsm:
        lov_lsm_put(lsm);