Whamcloud - gitweb
LU-16637 llite: tolerate fresh page cache pages after truncate
authorAndrew Perepechko <andrew.perepechko@hpe.com>
Tue, 26 Dec 2023 17:02:12 +0000 (20:02 +0300)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 18 Jan 2024 09:24:48 +0000 (09:24 +0000)
Truncate called by ll_layout_refesh() can race with a fast read
or tiny write, which can add an uninitialized non-uptodate page
into the page cache.

We want to avoid expensive locking for this rare case so if there
is any leftover in the cache after truncate, just check that
the pages are not uptodate, not dirty and do not have any
filesystem-specific information attached to them.

Lustre-change: https://review.whamcloud.com/53554
Lustre-commit: TBD (from f4c8d44a7c2f0fbc2c74d1832ff63c5216c22c38)

Change-Id: I8cadc022a3d1822a585f32e1a765e59ad0ff434d
Signed-off-by: Andrew Perepechko <andrew.perepechko@hpe.com>
HPE-bug-id: LUS-11937
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53611
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/obd_support.h
lustre/llite/llite_lib.c
lustre/llite/rw26.c
lustre/tests/sanity.sh
lustre/utils/lfs.c

index 2a365a6..b4a0e27 100644 (file)
@@ -637,6 +637,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LOV_COMP_MAGIC                            0x1426
 #define OBD_FAIL_LOV_COMP_PATTERN                  0x1427
 #define OBD_FAIL_LOV_INVALID_OSTIDX                0x1428
+#define OBD_FAIL_LLITE_DELAY_TRUNCATE              0x1430
 
 #define OBD_FAIL_FID_INDIR     0x1501
 #define OBD_FAIL_FID_INLMA     0x1502
index 2d8a988..116ccde 100644 (file)
@@ -2766,6 +2766,8 @@ void ll_truncate_inode_pages_final(struct inode *inode)
 
        truncate_inode_pages_final(mapping);
 
+       CFS_FAIL_TIMEOUT(OBD_FAIL_LLITE_DELAY_TRUNCATE, 5);
+
        /* Workaround for LU-118: Note nrpages may not be totally updated when
         * truncate_inode_pages() returns, as there can be a page in the process
         * of deletion (inside __delete_from_page_cache()) in the specified
@@ -2780,12 +2782,38 @@ void ll_truncate_inode_pages_final(struct inode *inode)
                ll_xa_unlock_irqrestore(&mapping->i_pages, flags);
        } /* Workaround end */
 
-       LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu "
-                "state %#lx, lli_flags %#lx, "
-                "see https://jira.whamcloud.com/browse/LU-118\n",
-                ll_i2sbi(inode)->ll_fsname,
-                PFID(ll_inode2fid(inode)), inode, nrpages,
-                inode->i_state, ll_i2info(inode)->lli_flags);
+       if (nrpages) {
+#ifdef HAVE_XARRAY_SUPPORT
+               XA_STATE(xas, &mapping->i_pages, 0);
+               struct page *page;
+#endif
+               CWARN("%s: inode="DFID"(%p) nrpages=%lu "
+                        "state %#lx, lli_flags %#lx, "
+                        "see https://jira.whamcloud.com/browse/LU-118\n",
+                        ll_i2sbi(inode)->ll_fsname,
+                        PFID(ll_inode2fid(inode)), inode, nrpages,
+                        inode->i_state, ll_i2info(inode)->lli_flags);
+#ifdef HAVE_XARRAY_SUPPORT
+               rcu_read_lock();
+               xas_for_each(&xas, page, ULONG_MAX) {
+                       if (xas_retry(&xas, page))
+                               continue;
+
+                       if (xa_is_value(page))
+                               continue;
+
+                       /*
+                        * We can only have non-uptodate pages
+                        * without internal state at this point
+                        */
+                       LASSERTF(!PageUptodate(page) &&
+                                !PageDirty(page) &&
+                                !PagePrivate(page),
+                                "%p", page);
+               }
+               rcu_read_unlock();
+#endif
+       }
 }
 
 int ll_read_inode2(struct inode *inode, void *opaque)
index 3230592..004a092 100644 (file)
@@ -749,7 +749,8 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
 
        lcc = ll_cl_find(inode);
        if (lcc == NULL) {
-               vmpage = grab_cache_page_nowait(mapping, index);
+               /* do not allocate a page, only find & lock */
+               vmpage = find_lock_page(mapping, index);
                result = ll_tiny_write_begin(vmpage, mapping);
                GOTO(out, result);
        }
index 094ef21..60a224a 100755 (executable)
@@ -28242,6 +28242,23 @@ test_440() {
 }
 run_test 440 "bash completion for lfs, lctl"
 
+test_442() {
+       local pid1
+       local pid2
+       mkdir -p $DIR/$tdir
+       multiop $DIR/$tdir/$tfile.1 O_w1 & pid1=$!
+       multiop $DIR/$tdir/$tfile.1 O_w1 & pid2=$!
+       sleep 1
+       touch $DIR/$tdir/$tfile.2
+       $LFS swap_layouts -n $DIR/$tdir/$tfile.1 $DIR/$tdir/$tfile.2
+       $LCTL set_param fail_loc=0x1430
+       kill -USR1 $pid1
+       sleep 1
+       kill -USR1 $pid2
+       wait
+}
+run_test 442 "truncate vs read/write should not panic"
+
 test_450() {
        remote_ost_nodsh && skip "remote OST with nodsh" && return
        local mntdev
index b4d5653..d0338f3 100644 (file)
@@ -10522,12 +10522,17 @@ static int lfs_hsm_cancel(int argc, char **argv)
 
 static int lfs_swap_layouts(int argc, char **argv)
 {
-       if (argc != 3)
+       int noxtime = 0;
+
+       if (argc == 4 && !strcmp(argv[1], "-n"))
+               noxtime = 1;
+       else if (argc != 3)
                return CMD_HELP;
 
-       return llapi_swap_layouts(argv[1], argv[2], 0, 0,
-                                 SWAP_LAYOUTS_KEEP_MTIME |
-                                 SWAP_LAYOUTS_KEEP_ATIME);
+       return llapi_swap_layouts(argv[1+noxtime], argv[2+noxtime],
+                                 0, 0, noxtime ? 0 :
+                                 (SWAP_LAYOUTS_KEEP_MTIME |
+                                 SWAP_LAYOUTS_KEEP_ATIME));
 }
 
 static const char *const ladvise_names[] = LU_LADVISE_NAMES;