Whamcloud - gitweb
LU-16637 llite: tolerate fresh page cache pages after truncate 54/53554/4
authorAndrew Perepechko <andrew.perepechko@hpe.com>
Tue, 26 Dec 2023 17:02:12 +0000 (20:02 +0300)
committerOleg Drokin <green@whamcloud.com>
Tue, 23 Jan 2024 05:31:50 +0000 (05:31 +0000)
Truncate called by ll_layout_refesh() can race with a fast read
or tiny write, which can add an uninitialized non-uptodate page
into the page cache.

We want to avoid expensive locking for this rare case so if there
is any leftover in the cache after truncate, just check that
the pages are not uptodate, not dirty and do not have any
filesystem-specific information attached to them.

Change-Id: I8cadc022a3d1822a585f32e1a765e59ad0ff434d
Signed-off-by: Andrew Perepechko <andrew.perepechko@hpe.com>
HPE-bug-id: LUS-11937
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53554
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Zhenyu Xu <bobijam@hotmail.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/llite/llite_lib.c
lustre/llite/rw26.c
lustre/tests/sanity.sh
lustre/utils/lfs.c

index 465382d..fba127e 100644 (file)
@@ -634,6 +634,7 @@ extern bool obd_enable_health_write;
 #define OBD_FAIL_LOV_COMP_MAGIC                            0x1426
 #define OBD_FAIL_LOV_COMP_PATTERN                  0x1427
 #define OBD_FAIL_LOV_INVALID_OSTIDX                0x1428
+#define OBD_FAIL_LLITE_DELAY_TRUNCATE              0x1430
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index c190b8e..7580a4c 100644 (file)
@@ -3046,6 +3046,8 @@ void ll_truncate_inode_pages_final(struct inode *inode)
 
        truncate_inode_pages_final(mapping);
 
+       CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_DELAY_TRUNCATE, 5);
+
        /* Workaround for LU-118: Note nrpages may not be totally updated when
         * truncate_inode_pages() returns, as there can be a page in the process
         * of deletion (inside __delete_from_page_cache()) in the specified
@@ -3060,12 +3062,38 @@ void ll_truncate_inode_pages_final(struct inode *inode)
                ll_xa_unlock_irqrestore(&mapping->i_pages, flags);
        } /* Workaround end */
 
-       LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu "
-                "state %#lx, lli_flags %#lx, "
-                "see https://jira.whamcloud.com/browse/LU-118\n",
-                ll_i2sbi(inode)->ll_fsname,
-                PFID(ll_inode2fid(inode)), inode, nrpages,
-                inode->i_state, ll_i2info(inode)->lli_flags);
+       if (nrpages) {
+#ifdef HAVE_XARRAY_SUPPORT
+               XA_STATE(xas, &mapping->i_pages, 0);
+               struct page *page;
+#endif
+               CWARN("%s: inode="DFID"(%p) nrpages=%lu "
+                        "state %#lx, lli_flags %#lx, "
+                        "see https://jira.whamcloud.com/browse/LU-118\n",
+                        ll_i2sbi(inode)->ll_fsname,
+                        PFID(ll_inode2fid(inode)), inode, nrpages,
+                        inode->i_state, ll_i2info(inode)->lli_flags);
+#ifdef HAVE_XARRAY_SUPPORT
+               rcu_read_lock();
+               xas_for_each(&xas, page, ULONG_MAX) {
+                       if (xas_retry(&xas, page))
+                               continue;
+
+                       if (xa_is_value(page))
+                               continue;
+
+                       /*
+                        * We can only have non-uptodate pages
+                        * without internal state at this point
+                        */
+                       LASSERTF(!PageUptodate(page) &&
+                                !PageDirty(page) &&
+                                !PagePrivate(page),
+                                "%p", page);
+               }
+               rcu_read_unlock();
+#endif
+       }
 }
 
 int ll_read_inode2(struct inode *inode, void *opaque)
index 298b8f1..5586468 100644 (file)
@@ -820,7 +820,8 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
 
        lcc = ll_cl_find(inode);
        if (lcc == NULL) {
-               vmpage = grab_cache_page_nowait(mapping, index);
+               /* do not allocate a page, only find & lock */
+               vmpage = find_lock_page(mapping, index);
                result = ll_tiny_write_begin(vmpage, mapping);
                GOTO(out, result);
        }
index be98d8a..71deb1d 100755 (executable)
@@ -29950,6 +29950,23 @@ test_440() {
 }
 run_test 440 "bash completion for lfs, lctl"
 
+test_442() {
+       local pid1
+       local pid2
+       mkdir -p $DIR/$tdir
+       multiop $DIR/$tdir/$tfile.1 O_w1 & pid1=$!
+       multiop $DIR/$tdir/$tfile.1 O_w1 & pid2=$!
+       sleep 1
+       touch $DIR/$tdir/$tfile.2
+       $LFS swap_layouts -n $DIR/$tdir/$tfile.1 $DIR/$tdir/$tfile.2
+       $LCTL set_param fail_loc=0x1430
+       kill -USR1 $pid1
+       sleep 1
+       kill -USR1 $pid2
+       wait
+}
+run_test 442 "truncate vs read/write should not panic"
+
 prep_801() {
        [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] ||
        [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] &&
index c0dbbc9..4566994 100644 (file)
@@ -10742,12 +10742,17 @@ static int lfs_hsm_cancel(int argc, char **argv)
 
 static int lfs_swap_layouts(int argc, char **argv)
 {
-       if (argc != 3)
+       int noxtime = 0;
+
+       if (argc == 4 && !strcmp(argv[1], "-n"))
+               noxtime = 1;
+       else if (argc != 3)
                return CMD_HELP;
 
-       return llapi_swap_layouts(argv[1], argv[2], 0, 0,
-                                 SWAP_LAYOUTS_KEEP_MTIME |
-                                 SWAP_LAYOUTS_KEEP_ATIME);
+       return llapi_swap_layouts(argv[1+noxtime], argv[2+noxtime],
+                                 0, 0, noxtime ? 0 :
+                                 (SWAP_LAYOUTS_KEEP_MTIME |
+                                 SWAP_LAYOUTS_KEEP_ATIME));
 }
 
 static const char *const ladvise_names[] = LU_LADVISE_NAMES;