Whamcloud - gitweb
LU-16770 llite: prune object without layout lock first 42/50742/6
authorAndriy Skulysh <andriy.skulysh@hpe.com>
Tue, 26 Jul 2022 11:10:43 +0000 (14:10 +0300)
committerOleg Drokin <green@whamcloud.com>
Mon, 7 Aug 2023 03:49:06 +0000 (03:49 +0000)
lov_layout_change() calls cl_object_prune() before
changing layout. It may lead to eviction from MDT
in case slow responce from OST.

To reduce risk of possible eviction call cl_object_prune()
without layout lock held before calling lov_layout_change()

vvp_prune() attempts to sync and truncate page cache pages.
osc_page_delete() may encounter page cache pages in non-clean state
during truncate because there's a race window between sync and truncate.
Writes may stick into this window and generate dirty or writeback pages.

This window is usually protected with a special truncate semaphore e.g.
when truncate is requested from the truncate syscall.

Let's use this semaphore to avoid write vs truncate race in vvp_prune().

Change-Id: Ie2ee29ea1e792e1b34b6de068ff2b84fd8f52f2a
HPE-bug-id: LUS-9927, LUS-11612
Signed-off-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Vitaly Fertman <c17818@cray.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50742
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/cl_object.h
lustre/llite/file.c
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/lov/lov_object.c

index 3bc3afa..bdad8d8 100644 (file)
@@ -264,6 +264,7 @@ struct cl_object_conf {
         * Layout lock handle.
         */
        struct ldlm_lock         *coc_lock;
+       bool                     coc_try;
        /**
         * Operation to handle layout, OBJECT_CONF_XYZ.
         */
index 81bffd9..d365b0b 100644 (file)
@@ -6118,7 +6118,7 @@ out:
  * in this function.
  */
 static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
-                             struct inode *inode)
+                             struct inode *inode, bool try)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
        struct ll_sb_info    *sbi = ll_i2sbi(inode);
@@ -6166,6 +6166,7 @@ static int ll_layout_lock_set(struct lustre_handle *lockh, enum ldlm_mode mode,
        conf.coc_opc = OBJECT_CONF_SET;
        conf.coc_inode = inode;
        conf.coc_lock = lock;
+       conf.coc_try = try;
        conf.u.coc_layout.lb_buf = lock->l_lvb_data;
        conf.u.coc_layout.lb_len = lock->l_lvb_len;
        rc = ll_layout_conf(inode, &conf);
@@ -6187,11 +6188,32 @@ out:
                conf.coc_inode = inode;
                rc = ll_layout_conf(inode, &conf);
                if (rc == 0)
-                       rc = -EAGAIN;
+                       rc = -ERESTARTSYS;
 
                CDEBUG(D_INODE, "%s file="DFID" waiting layout return: %d\n",
                       sbi->ll_fsname, PFID(&lli->lli_fid), rc);
        }
+
+       if (rc == -ERESTARTSYS) {
+               __u16 refcheck;
+               struct lu_env *env;
+               struct cl_object * obj = lli->lli_clob;
+
+               env = cl_env_get(&refcheck);
+               if (IS_ERR(env))
+                       RETURN(PTR_ERR(env));
+
+               CDEBUG(D_INODE, "prune without lock "DFID"\n",
+                               PFID(lu_object_fid(&obj->co_lu)));
+
+               trunc_sem_down_write(&lli->lli_trunc_sem);
+               cl_object_prune(env, obj);
+               trunc_sem_up_write(&lli->lli_trunc_sem);
+               cl_env_put(env, &refcheck);
+
+               rc = -EAGAIN;
+       }
+
        RETURN(rc);
 }
 
@@ -6270,6 +6292,7 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
        };
        enum ldlm_mode mode;
        int rc;
+       bool try = true;
        ENTRY;
 
        *gen = ll_layout_version_get(lli);
@@ -6292,7 +6315,8 @@ int ll_layout_refresh(struct inode *inode, __u32 *gen)
                                       LCK_CR | LCK_CW | LCK_PR |
                                       LCK_PW | LCK_EX);
                if (mode != 0) { /* hit cached lock */
-                       rc = ll_layout_lock_set(&lockh, mode, inode);
+                       rc = ll_layout_lock_set(&lockh, mode, inode, try);
+                       try = false;
                        if (rc == -EAGAIN)
                                continue;
                        break;
index 591d522..8c289bb 100644 (file)
@@ -3618,6 +3618,7 @@ int ll_prep_inode(struct inode **inode, struct req_capsule *pill,
                        conf.coc_opc = OBJECT_CONF_SET;
                        conf.coc_inode = *inode;
                        conf.coc_lock = lock;
+                       conf.coc_try = false;
                        conf.u.coc_layout = md.layout;
                        (void)ll_layout_conf(*inode, &conf);
                }
index 548bd1f..e7a5cef 100644 (file)
@@ -319,6 +319,7 @@ static void ll_lock_cancel_bits(struct ldlm_lock *lock, __u64 to_cancel)
                struct cl_object_conf conf = {
                        .coc_opc = OBJECT_CONF_INVALIDATE,
                        .coc_inode = inode,
+                       .coc_try = false,
                };
 
                rc = ll_layout_conf(inode, &conf);
index 023c348..8093bdc 100644 (file)
@@ -1435,6 +1435,11 @@ retry:
                GOTO(out, result = -EBUSY);
        }
 
+       if (conf->coc_try) {
+               set_bit(LO_LAYOUT_INVALID, &lov->lo_obj_flags);
+               GOTO(out, result = -ERESTARTSYS);
+       }
+
        result = lov_layout_change(env, lov, lsm, conf);
        if (result) {
                if (result == -EAGAIN) {