From ec54a4bbe37d4cf507a5c13466627d21d85c5f55 Mon Sep 17 00:00:00 2001 From: Andriy Skulysh Date: Tue, 12 Nov 2024 12:03:42 -0800 Subject: [PATCH] LU-16770 llite: prune object without layout lock first lov_layout_change() calls cl_object_prune() before changing layout. It may lead to eviction from MDT in case slow responce from OST. To reduce risk of possible eviction call cl_object_prune() without layout lock held before calling lov_layout_change() vvp_prune() attempts to sync and truncate page cache pages. osc_page_delete() may encounter page cache pages in non-clean state during truncate because there's a race window between sync and truncate. Writes may stick into this window and generate dirty or writeback pages. This window is usually protected with a special truncate semaphore e.g. when truncate is requested from the truncate syscall. Let's use this semaphore to avoid write vs truncate race in vvp_prune(). Lustre-change: https://review.whamcloud.com/50742 Lustre-commit: 9c453ba6d9a0152aa75e92b8372d54a758a10b18 HPE-bug-id: LUS-9927, LUS-11612 Signed-off-by: Andriy Skulysh Change-Id: Ie2ee29ea1e792e1b34b6de068ff2b84fd8f52f2a Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56987 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Zhenyu Xu Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/include/cl_object.h | 1 + lustre/llite/file.c | 30 +++++++++++++++++++++++++++--- lustre/llite/llite_lib.c | 1 + lustre/llite/namei.c | 1 + lustre/lov/lov_object.c | 5 +++++ 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index d44d59b..97be190 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -262,6 +262,7 @@ struct cl_object_conf { * Layout lock handle. */ struct ldlm_lock *coc_lock; + bool coc_try; /** * Operation to handle layout, OBJECT_CONF_XYZ. */ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index f803077..7b73a2a 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -5944,7 +5944,7 @@ out: * in this function. */ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, - struct inode *inode) + struct inode *inode, bool try) { struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -5992,6 +5992,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode, conf.coc_opc = OBJECT_CONF_SET; conf.coc_inode = inode; conf.coc_lock = lock; + conf.coc_try = try; conf.u.coc_layout.lb_buf = lock->l_lvb_data; conf.u.coc_layout.lb_len = lock->l_lvb_len; rc = ll_layout_conf(inode, &conf); @@ -6013,11 +6014,32 @@ out: conf.coc_inode = inode; rc = ll_layout_conf(inode, &conf); if (rc == 0) - rc = -EAGAIN; + rc = -ERESTARTSYS; CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n", sbi->ll_fsname, PFID(&lli->lli_fid), rc); } + + if (rc == -ERESTARTSYS) { + __u16 refcheck; + struct lu_env *env; + struct cl_object * obj = lli->lli_clob; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + RETURN(PTR_ERR(env)); + + CDEBUG(D_INODE, "prune without lock "DFID"\n", + PFID(lu_object_fid(&obj->co_lu))); + + trunc_sem_down_write(&lli->lli_trunc_sem); + cl_object_prune(env, obj); + trunc_sem_up_write(&lli->lli_trunc_sem); + cl_env_put(env, &refcheck); + + rc = -EAGAIN; + } + RETURN(rc); } @@ -6096,6 +6118,7 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) }; enum ldlm_mode mode; int rc; + bool try = true; ENTRY; *gen = ll_layout_version_get(lli); @@ -6117,7 +6140,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen) LCK_CR | LCK_CW | LCK_PR | LCK_PW | LCK_EX); if (mode != 0) { /* hit cached lock */ - rc = ll_layout_lock_set(&lockh, mode, inode); + rc = ll_layout_lock_set(&lockh, mode, inode, try); + try = false; if (rc == -EAGAIN) continue; break; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 6addf6c..b504993 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -3219,6 +3219,7 @@ int ll_prep_inode(struct inode **inode, struct req_capsule *pill, conf.coc_opc = OBJECT_CONF_SET; conf.coc_inode = *inode; conf.coc_lock = lock; + conf.coc_try = false; conf.u.coc_layout = md.layout; (void)ll_layout_conf(*inode, &conf); } diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 148b748..a7d772d 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -317,6 +317,7 @@ static void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel) struct cl_object_conf conf = { .coc_opc = OBJECT_CONF_INVALIDATE, .coc_inode = inode, + .coc_try = false, }; rc = ll_layout_conf(inode, &conf); diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 88107f8..980bf1d 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -1441,6 +1441,11 @@ static int lov_conf_set(const struct lu_env *env, struct cl_object *obj, GOTO(out, result = -EBUSY); } + if (conf->coc_try) { + set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); + GOTO(out, result = -ERESTARTSYS); + } + result = lov_layout_change(env, lov, lsm, conf); if (result) set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags); -- 1.8.3.1