From 37646c74bf884c535149d530af840d728814792b Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Thu, 14 Sep 2023 01:24:19 +0800 Subject: [PATCH] LU-16958 llite: migrate deadlock on not responding lock cancel lfs migrate race makes MDS hang with following backtrace [ 3683.248584] [<0>] ldlm_completion_ast+0x78d/0x8e0 [ptlrpc] [ 3683.250122] [<0>] ldlm_cli_enqueue_local+0x2fd/0x840 [ptlrpc] [ 3683.251363] [<0>] mdt_object_local_lock+0x50e/0xb10 [mdt] [ 3683.252615] [<0>] mdt_object_lock_internal+0x187/0x430 [mdt] [ 3683.253793] [<0>] mdt_object_lock_try+0x22/0xa0 [mdt] [ 3683.254857] [<0>] mdt_getattr_name_lock+0x1317/0x1dc0 [mdt] [ 3683.256016] [<0>] mdt_intent_getattr+0x264/0x440 [mdt] [ 3683.257105] [<0>] mdt_intent_opc+0x452/0xa80 [mdt] [ 3683.258126] [<0>] mdt_intent_policy+0x1fd/0x390 [mdt] [ 3683.259191] [<0>] ldlm_lock_enqueue+0x469/0xa90 [ptlrpc] [ 3683.260350] [<0>] ldlm_handle_enqueue0+0x61a/0x16c0 [ptlrpc] [ 3683.261596] [<0>] tgt_enqueue+0xa4/0x200 [ptlrpc] [ 3683.262662] [<0>] tgt_request_handle+0xc9c/0x1a40 [ptlrpc] [ 3683.263860] [<0>] ptlrpc_server_handle_request+0x323/0xbd0 [ptlrpc] [ 3683.265220] [<0>] ptlrpc_main+0xbf3/0x1540 [ptlrpc] [ 3683.266303] [<0>] kthread+0x134/0x150 [ 3683.267111] [<0>] ret_from_fork+0x35/0x40 The deadlock happens as follows: T1: vvp_io_init() ->ll_layout_refresh() <= take lli_layout_mutex ->ll_layout_intent() ->ll_take_md_lock() <= take the CR layout lock ref ->ll_layout_conf() ->vvp_prune() ->vvp_inode_ops() <= release lli_layout_mtex ->vvp_inode_ops() <= try to acquire lli_layout_mutex -> racer wait here for T2 T2: ->ll_file_write_iter() ->vvp_io_init() ->ll_layout_refresh() <= take lli_layout_mutex ->ll_layout_intent() <= Request layout from MDT -> racer wait from server... And server want to cancel the CR layout lock T1 hold, and it won't happen. Also T1 could has take extent ldlm lock while waiting lli_layout_mutex hold by T2, and ofd_destroy_hdl does not get the lock cancellation response from T1. lli_layout_mutex is only needed for enqueuing layout lock from server, so that ll_layout_conf() does not involve with lli_layout_mutex. Fixes: 8f2c1592c3 ("LU-16958 llite: migrate vs regular ops deadlock") Signed-off-by: Bobi Jam Change-Id: Ib94de2c63544c3a962199aa0537418255980ae8c Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52388 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Qian Yingjin Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/cl_object.h | 2 -- lustre/llite/file.c | 9 +++------ lustre/llite/vvp_object.c | 14 -------------- lustre/lov/lov_cl_internal.h | 1 + lustre/lov/lov_object.c | 44 +++++++++++++++++--------------------------- 5 files changed, 21 insertions(+), 49 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 6689a30..e24e0cb 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -306,8 +306,6 @@ enum coo_inode_opc { COIO_INODE_UNLOCK, COIO_SIZE_LOCK, COIO_SIZE_UNLOCK, - COIO_LAYOUT_LOCK, - COIO_LAYOUT_UNLOCK, }; /** diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 5178a27..d33f115 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -6323,10 +6323,6 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) LASSERT(fid_is_sane(ll_inode2fid(inode))); LASSERT(S_ISREG(inode->i_mode)); - /* take layout lock mutex to enqueue layout lock exclusively. */ - mutex_lock(&lli->lli_layout_mutex); - lli->lli_layout_lock_owner = current; - while (1) { /* mostly layout lock is caching on the local side, so try to * match it before grabbing layout lock mutex. */ @@ -6341,15 +6337,16 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) break; } + /* take layout lock mutex to enqueue layout lock exclusively. */ + mutex_lock(&lli->lli_layout_mutex); rc = ll_layout_intent(inode, &intent); + mutex_unlock(&lli->lli_layout_mutex); if (rc != 0) break; } if (rc == 0) *gen = ll_layout_version_get(lli); - lli->lli_layout_lock_owner = NULL; - mutex_unlock(&lli->lli_layout_mutex); RETURN(rc); } diff --git a/lustre/llite/vvp_object.c b/lustre/llite/vvp_object.c index 42a5148..a931f66 100644 --- a/lustre/llite/vvp_object.c +++ b/lustre/llite/vvp_object.c @@ -271,20 +271,6 @@ static int vvp_inode_ops(const struct lu_env *env, struct cl_object *obj, else rc = -ENOLCK; break; - case COIO_LAYOUT_LOCK: - if (lli->lli_layout_lock_owner != current) { - mutex_lock(&lli->lli_layout_mutex); - lli->lli_layout_lock_owner = current; - } - break; - case COIO_LAYOUT_UNLOCK: - if (lli->lli_layout_lock_owner == current) { - lli->lli_layout_lock_owner = NULL; - mutex_unlock(&lli->lli_layout_mutex); - } else { - rc = -ENOLCK; - } - break; default: rc = -EINVAL; break; diff --git a/lustre/lov/lov_cl_internal.h b/lustre/lov/lov_cl_internal.h index a2f5b45..0346ab2 100644 --- a/lustre/lov/lov_cl_internal.h +++ b/lustre/lov/lov_cl_internal.h @@ -242,6 +242,7 @@ struct lov_mirror_entry { enum lov_object_flags { /* Layout is invalid, set when layout lock is lost */ LO_LAYOUT_INVALID = 0x1, + LO_NEED_INODE_LOCK = 0x2, }; /** diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 787f2e1..a265e6a 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -1310,8 +1310,11 @@ static int lov_layout_change(const struct lu_env *unused, new_ops = &lov_dispatch[llt]; rc = cl_object_prune(env, &lov->lo_cl); - if (rc != 0) + if (rc != 0) { + if (rc == -EAGAIN) + set_bit(LO_NEED_INODE_LOCK, &lov->lo_obj_flags); GOTO(out, rc); + } rc = old_ops->llo_delete(env, lov, &lov->u); if (rc != 0) @@ -1394,13 +1397,12 @@ out_lsm: static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, const struct cl_object_conf *conf) { - struct lov_stripe_md *lsm = NULL; - struct lov_object *lov = cl2lov(obj); - int result = 0; + struct lov_stripe_md *lsm = NULL; + struct lov_object *lov = cl2lov(obj); struct cl_object *top = cl_object_top(obj); - bool unlock_inode = false; - bool lock_inode_size = false; - bool lock_layout = false; + bool lock_inode = false; + bool inode_size_locked = false; + int result = 0; ENTRY; if (conf->coc_opc == OBJECT_CONF_SET && @@ -1471,9 +1473,11 @@ retry: GOTO(out, result = -ERESTARTSYS); } + clear_bit(LO_NEED_INODE_LOCK, &lov->lo_obj_flags); result = lov_layout_change(env, lov, lsm, conf); if (result) { - if (result == -EAGAIN) { + if (result == -EAGAIN && + test_bit(LO_NEED_INODE_LOCK, &lov->lo_obj_flags)) { /** * we need unlocked lov conf and get inode lock. * It's possible we have already taken inode's size @@ -1481,42 +1485,28 @@ retry: * order, lest deadlock happens: * inode lock (ll_inode_lock()) * inode size lock (ll_inode_size_lock()) - * inode layout lock (ll_layout_refresh()) * lov conf lock (lov_conf_lock()) * * e.g. * vfs_setxattr inode locked * ll_lov_setstripe_ea_info inode size locked * ll_prep_inode - * ll_file_inode_init - * cl_conf_set - * lov_conf_set lov conf locked - * - * ll_migrate inode locked - * ... - * ll_layout_refresh inode layout locked - * ll_layout_conf + * cl_file_inode_init * cl_conf_set * lov_conf_set lov conf locked */ lov_conf_unlock(lov); - if (cl_object_inode_ops(env, top, COIO_LAYOUT_UNLOCK, - NULL) == 0) - lock_layout = true; if (cl_object_inode_ops(env, top, COIO_SIZE_UNLOCK, NULL) == 0) - lock_inode_size = true; + inode_size_locked = true; /* take lock in order */ if (cl_object_inode_ops( env, top, COIO_INODE_LOCK, NULL) == 0) - unlock_inode = true; - if (lock_inode_size) + lock_inode = true; + if (inode_size_locked) cl_object_inode_ops(env, top, COIO_SIZE_LOCK, NULL); - if (lock_layout) - cl_object_inode_ops(env, top, COIO_LAYOUT_LOCK, - NULL); goto retry; } set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); @@ -1527,7 +1517,7 @@ retry: out: lov_conf_unlock(lov); - if (unlock_inode) + if (lock_inode) cl_object_inode_ops(env, top, COIO_INODE_UNLOCK, NULL); out_lsm: lov_lsm_put(lsm); -- 1.8.3.1