Whamcloud - gitweb
LU-16160 revert: "llite: clear stale page's uptodate bit"
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 6 Jan 2023 18:19:33 +0000 (18:19 +0000)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 12 Jan 2023 01:08:18 +0000 (01:08 +0000)
This reverts commit 451b4ac514dd03c4fe91726da2f95a1f5575a5a6
which caused a bug in cl_page_own() race with ll_releasepage()
and cl_pagevec_put() assertion failure.

Lustre-change: https://review.whamcloud.com/49541
Lustre-commit: TBD (from ef330e09a59da0df2de153ecdb2e7d8729cd6b63)

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: Icdb8c60f4d992c9976670e1b06c5bab5ef3a3954
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49576
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/cl_object.h
lustre/llite/rw.c
lustre/llite/vvp_io.c
lustre/llite/vvp_page.c
lustre/tests/rw_seq_cst_vs_drop_caches.c
lustre/tests/sanityn.sh

index 05a9265..4393749 100644 (file)
@@ -768,16 +768,9 @@ struct cl_page {
          * creation.
          */
        enum cl_page_type       cp_type:CP_TYPE_BITS; /* 32 bits */
-                               /* fault page read grab extra reference */
-       unsigned                 cp_fault_ref:1,
-                                /**
-                                 * if fault page got delete before returned to
-                                 * filemap_fault(), defer the vmpage detach/put
-                                 * until filemap_fault() has been handled.
-                                 */
-                                cp_defer_detach:1;
        /* which slab kmem index this memory allocated from */
        short int               cp_kmem_index; /* 48 bits */
+       unsigned int            cp_unused1:16;  /* 64 bits */
 
        /**
         * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
@@ -2490,11 +2483,6 @@ static inline int cl_io_is_mkwrite(const struct cl_io *io)
        return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
 }
 
-static inline int cl_io_is_pagefault(const struct cl_io *io)
-{
-       return io->ci_type == CIT_FAULT && !io->u.ci_fault.ft_mkwrite;
-}
-
 /**
  * True, iff \a io is a truncate(2).
  */
index 2f53992..5c1fa74 100644 (file)
@@ -1975,7 +1975,6 @@ int ll_readpage(struct file *file, struct page *vmpage)
        page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
        if (!IS_ERR(page)) {
                LASSERT(page->cp_type == CPT_CACHEABLE);
-
                if (likely(!PageUptodate(vmpage))) {
                        cl_page_assume(env, io, page);
 
@@ -1985,15 +1984,7 @@ int ll_readpage(struct file *file, struct page *vmpage)
                        unlock_page(vmpage);
                        result = 0;
                }
-               if (cl_io_is_pagefault(io) && result == 0) {
-                       /**
-                        * page fault, retain the cl_page reference until
-                        * vvp_io_kernel_fault() release it.
-                        */
-                       page->cp_fault_ref = 1;
-               } else {
-                       cl_page_put(env, page);
-               }
+               cl_page_put(env, page);
        } else {
                unlock_page(vmpage);
                result = PTR_ERR(page);
index 1a98421..7b12507 100644 (file)
@@ -1389,42 +1389,14 @@ static void vvp_io_rw_end(const struct lu_env *env,
        trunc_sem_up_read(&lli->lli_trunc_sem);
 }
 
-static void detach_and_deref_page(struct cl_page *clp, struct page *vmpage)
-{
-       if (!clp->cp_defer_detach)
-               return;
-
-       /**
-        * cl_page_delete0() took a vmpage reference, but not unlink the vmpage
-        * from its cl_page.
-        */
-       clp->cp_defer_detach = 0;
-       ClearPagePrivate(vmpage);
-       vmpage->private = 0;
-
-       put_page(vmpage);
-       atomic_dec(&clp->cp_ref);
-}
-
-static int vvp_io_kernel_fault(const struct lu_env *env,
-                              struct vvp_fault_io *cfio)
+static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
 {
        struct vm_fault *vmf = cfio->ft_vmf;
-       struct file *vmff = cfio->ft_vma->vm_file;
-       struct address_space *mapping = vmff->f_mapping;
-       struct inode *inode = mapping->host;
-       struct page *vmpage = NULL;
-       struct cl_page *clp = NULL;
-       int rc = 0;
-       ENTRY;
 
-       ll_inode_size_lock(inode);
-retry:
        cfio->ft_flags = ll_filemap_fault(cfio->ft_vma, vmf);
        cfio->ft_flags_valid = 1;
 
        if (vmf->page) {
-               /* success, vmpage is locked */
                LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
                               get_vmf_address(vmf));
                if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
@@ -1434,100 +1406,25 @@ retry:
 
                cfio->ft_vmpage = vmf->page;
 
-               /**
-                * ll_filemap_fault()->ll_readpage() could get an extra cl_page
-                * reference. So we have to get the cl_page's to check its
-                * cp_fault_ref and drop the reference later.
-                */
-               clp = cl_vmpage_page(vmf->page, NULL);
-
-               GOTO(unlock, rc = 0);
-       }
-
-       /* filemap_fault() fails, vmpage is not locked */
-       if (clp == NULL) {
-               vmpage = find_get_page(mapping, vmf->pgoff);
-               if (vmpage) {
-                       lock_page(vmpage);
-                       clp = cl_vmpage_page(vmpage, NULL);
-                       unlock_page(vmpage);
-               }
+               return 0;
        }
 
        if (cfio->ft_flags & VM_FAULT_SIGBUS) {
-               pgoff_t max_idx;
-
-               /**
-                * ll_filemap_fault()->ll_readpage() could fill vmpage
-                * correctly, and unlock the vmpage, while memory pressure or
-                * truncate could detach cl_page from vmpage, and kernel
-                * filemap_fault() will wait_on_page_locked(vmpage) and find
-                * out that the vmpage has been cleared its uptodate bit,
-                * so it returns VM_FAULT_SIGBUS.
-                *
-                * In this case, we'd retry the filemap_fault()->ll_readpage()
-                * to rebuild the cl_page and fill vmpage with uptodated data.
-                */
-               if (likely(vmpage)) {
-                       bool need_retry = false;
-
-                       if (clp) {
-                               if (clp->cp_defer_detach) {
-                                       detach_and_deref_page(clp, vmpage);
-                                       /**
-                                        * check i_size to make sure it's not
-                                        * over EOF, we don't want to call
-                                        * filemap_fault() repeatedly since it
-                                        * returns VM_FAULT_SIGBUS without even
-                                        * trying if vmf->pgoff is over EOF.
-                                        */
-                                       max_idx = DIV_ROUND_UP(
-                                               i_size_read(inode), PAGE_SIZE);
-                                       if (vmf->pgoff < max_idx)
-                                               need_retry = true;
-                               }
-                               if (clp->cp_fault_ref) {
-                                       clp->cp_fault_ref = 0;
-                                       /* ref not released in ll_readpage() */
-                                       cl_page_put(env, clp);
-                               }
-                               if (need_retry)
-                                       goto retry;
-                       }
-               }
-
                CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", get_vmf_address(vmf));
-               GOTO(unlock, rc = -EFAULT);
+               return -EFAULT;
        }
 
        if (cfio->ft_flags & VM_FAULT_OOM) {
                CDEBUG(D_PAGE, "got addr %p - OOM\n", get_vmf_address(vmf));
-               GOTO(unlock, rc = -ENOMEM);
+               return -ENOMEM;
        }
 
        if (cfio->ft_flags & VM_FAULT_RETRY)
-               GOTO(unlock, rc = -EAGAIN);
+               return -EAGAIN;
 
        CERROR("unknown error in page fault %d\n", cfio->ft_flags);
-       rc = -EINVAL;
-unlock:
-       ll_inode_size_unlock(inode);
-       if (clp) {
-               if (clp->cp_defer_detach && vmpage)
-                       detach_and_deref_page(clp, vmpage);
-
-               /* additional cl_page ref has been taken in ll_readpage() */
-               if (clp->cp_fault_ref) {
-                       clp->cp_fault_ref = 0;
-                       /* ref not released in ll_readpage() */
-                       cl_page_put(env, clp);
-               }
-               /* ref taken in this function */
-               cl_page_put(env, clp);
-       }
-       if (vmpage)
-               put_page(vmpage);
-       return rc;
+
+       return -EINVAL;
 }
 
 static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
@@ -1582,7 +1479,7 @@ static int vvp_io_fault_start(const struct lu_env *env,
                LASSERT(cfio->ft_vmpage != NULL);
                lock_page(cfio->ft_vmpage);
        } else {
-               result = vvp_io_kernel_fault(env, cfio);
+               result = vvp_io_kernel_fault(cfio);
                if (result != 0)
                        RETURN(result);
        }
index 4bab3fb..7028526 100644 (file)
@@ -164,36 +164,17 @@ static void vvp_page_delete(const struct lu_env *env,
        LASSERT(PageLocked(vmpage));
        LASSERT((struct cl_page *)vmpage->private == page);
 
-       /**
-        * clear vmpage uptodate bit, since ll_read_ahead_pages()->
-        * ll_read_ahead_page() could pick up this stale vmpage and
-        * take it as uptodated.
-        */
-       ClearPageUptodate(vmpage);
-       /**
-        * vvp_io_kernel_fault()->ll_readpage() set vpg_fault_ref
-        * and need it to check cl_page to retry the page fault read.
-        */
-       if (page->cp_fault_ref) {
-               page->cp_defer_detach = 1;
-               /**
-                * get a vmpage reference, so that filemap_fault()
-                * won't free it from pagecache.
-                */
-               get_page(vmpage);
-       } else {
-               /* Drop the reference count held in vvp_page_init */
-               refc = atomic_dec_return(&page->cp_ref);
-               LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
 
-               ClearPagePrivate(vmpage);
-               vmpage->private = 0;
-               /*
-                * Reference from vmpage to cl_page is removed, but the
-                * reference back is still here. It is removed later in
-                * vvp_page_fini().
-                */
-       }
+       /* Drop the reference count held in vvp_page_init */
+       refc = atomic_dec_return(&page->cp_ref);
+       LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
+
+       ClearPagePrivate(vmpage);
+       vmpage->private = 0;
+       /*
+        * Reference from vmpage to cl_page is removed, but the reference back
+        * is still here. It is removed later in vvp_page_fini().
+        */
 }
 
 static void vvp_page_export(const struct lu_env *env,
@@ -290,11 +271,6 @@ static void vvp_page_completion_read(const struct lu_env *env,
                ll_ra_count_put(ll_i2sbi(inode), 1);
 
        if (ioret == 0)  {
-               /**
-                * vpg_defer_uptodate is used for readahead page, and the
-                * vmpage Uptodate bit is deferred to set in ll_readpage/
-                * ll_io_read_page.
-                */
                if (!vpg->vpg_defer_uptodate)
                        cl_page_export(env, page, 1);
        } else if (vpg->vpg_defer_uptodate) {
index 3cb9aba..5f988bb 100644 (file)
@@ -11,7 +11,7 @@
 #include <pthread.h>
 
 /*
- * Usage: rw_seq_cst_vs_drop_caches [-m] /mnt/lustre/file0 /mnt/lustre2/file0
+ * Usage: rw_seq_cst_vs_drop_caches /mnt/lustre/file0 /mnt/lustre2/file0
 
  * Race reads of the same file on two client mounts vs writes and drop
  * caches to detect sequential consistency violations. Run
  * which case the wait status ($?) will be 134.
 */
 
-int mmap_mode;         /* -m flag */
-
-void usage(void)
-{
-       fprintf(stdout,
-               "%s: rw_seq_cst_vs_drop_caches [-m] /mnt/lustre/file0 /mnt/lustre2/file0\n"
-"      -m : use mmap to read/write file\n", __func__);
-}
-
 #define handle_error(msg)      \
        do { perror(msg); exit(EXIT_FAILURE); } while (0)
 
@@ -37,39 +28,23 @@ static int fd[2] = { -1, -1 };
  */
 static uint64_t u, u_max = UINT64_MAX / 2;
 static uint64_t v[2];
-char *ptr;
 
 static void *access_thread_start(void *unused)
 {
-       char *ptr2 = NULL;
        ssize_t rc;
        int i;
 
-       if (mmap_mode) {
-               ptr2 = mmap(NULL, sizeof(v[1]), PROT_READ,
-                           MAP_PRIVATE | MAP_POPULATE, fd[1], 0);
-               if (ptr2 == MAP_FAILED)
-                       handle_error("mmap");
-       }
-
        do {
                for (i = 0; i < 2; i++) {
-                       if (mmap_mode) {
-                               memcpy(&v[i], i == 0 ? ptr : ptr2,
-                                      sizeof(v[i]));
-                       } else {
-                               rc = pread(fd[i], &v[i], sizeof(v[i]), 0);
-                               if (rc < 0 || rc != sizeof(v[i]))
-                                       handle_error("pread");
-                       }
+                       rc = pread(fd[i], &v[i], sizeof(v[i]), 0);
+                       if (rc < 0 || rc != sizeof(v[i]))
+                               handle_error("pread");
                }
        } while (v[0] <= v[1]);
 
        fprintf(stderr, "error: u = %"PRIu64", v = %"PRIu64", %"PRIu64"\n",
                u, v[0], v[1]);
 
-       if (mmap_mode)
-               munmap(ptr2, sizeof(v[i]));
        abort();
 }
 
@@ -81,26 +56,12 @@ int main(int argc, char *argv[])
        pthread_t access_thread;
        struct stat st[2];
        ssize_t rc;
-       int i, ch;
+       int i;
 
        setvbuf(stderr, stderr_buf, _IOLBF, sizeof(stderr_buf));
 
-       while ((ch = getopt(argc, argv, "m")) >= 0) {
-               switch (ch) {
-               case 'm':
-                       mmap_mode = 1;
-                       break;
-               default:
-                       usage();
-                       exit(EXIT_FAILURE);
-               }
-       }
-
-       argc -= optind;
-       argv += optind;
-
-       if (argc != 2) {
-               usage();
+       if (argc != 3) {
+               fprintf(stderr, "Usage: %s /mnt/lustre/file0 /mnt/lustre2/file0\n", argv[0]);
                exit(EXIT_FAILURE);
        }
 
@@ -108,7 +69,7 @@ int main(int argc, char *argv[])
        assert(!(drop_caches_fd < 0));
 
        for (i = 0; i < 2; i++) {
-               fd[i] = open(argv[i], O_RDWR|O_CREAT|O_TRUNC, 0666);
+               fd[i] = open(argv[i + 1], O_RDWR|O_CREAT|O_TRUNC, 0666);
                if (fd[i] < 0)
                        handle_error("open");
 
@@ -125,33 +86,18 @@ int main(int argc, char *argv[])
                exit(EXIT_FAILURE);
        }
 
-       if (mmap_mode) {
-               if (ftruncate(fd[0], sizeof(u)) < 0)
-                       handle_error("ftruncate");
-
-               ptr = mmap(NULL, sizeof(u), PROT_READ|PROT_WRITE, MAP_SHARED,
-                          fd[0], 0);
-               if (ptr == MAP_FAILED)
-                       handle_error("mmap");
-               memcpy(ptr, &u, sizeof(u));
-       } else {
-               rc = pwrite(fd[0], &u, sizeof(u), 0);
-               if (rc < 0 || rc != sizeof(u))
-                       handle_error("pwrite");
-       }
+       rc = pwrite(fd[0], &u, sizeof(u), 0);
+       if (rc < 0 || rc != sizeof(u))
+               handle_error("pwrite");
 
        rc = pthread_create(&access_thread, NULL, &access_thread_start, NULL);
        if (rc != 0)
                handle_error("pthread_create");
 
        for (u = 1; u <= u_max; u++) {
-               if (mmap_mode) {
-                       memcpy(ptr, &u, sizeof(u));
-               } else {
-                       rc = pwrite(fd[0], &u, sizeof(u), 0);
-                       if (rc < 0 || rc != sizeof(u))
-                               handle_error("pwrite");
-               }
+               rc = pwrite(fd[0], &u, sizeof(u), 0);
+               if (rc < 0 || rc != sizeof(u))
+                       handle_error("pwrite");
 
                rc = write(drop_caches_fd, "3\n", 2);
                if (rc < 0 || rc != 2)
@@ -166,8 +112,5 @@ int main(int argc, char *argv[])
        if (rc != 0)
                handle_error("pthread_join");
 
-       if (mmap_mode)
-               munmap(ptr, sizeof(u));
-
        return 0;
 }
index daec5cc..324482d 100755 (executable)
@@ -18,8 +18,8 @@ init_test_env $@
 init_logging
 
 ALWAYS_EXCEPT="$SANITYN_EXCEPT "
-# bug number for skipped test:  LU-7105
-ALWAYS_EXCEPT+="                28 "
+# bug number for skipped test:  LU-7105 LU-14541
+ALWAYS_EXCEPT+="                28      16f"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 if [ $mds1_FSTYPE = "zfs" ]; then
@@ -579,35 +579,6 @@ test_16f() { # LU-14541
 }
 run_test 16f "rw sequential consistency vs drop_caches"
 
-test_16g() {
-       local file1=$DIR1/$tfile
-       local file2=$DIR2/$tfile
-       local duration=20
-       local status
-
-       timeout --preserve-status --signal=USR1 $duration \
-               rw_seq_cst_vs_drop_caches -m $file1 $file2
-       status=$?
-
-       case $((status & 0x7f)) in
-               0)
-                       echo OK # Computers must be fast now.
-                       ;;
-               6) # SIGABRT
-                       error "sequential consistency violation detected"
-                       ;;
-               10) # SIGUSR1
-                       echo TIMEOUT # This is fine.
-                       ;;
-               *)
-                       error "strange status '$status'"
-                       ;;
-       esac
-
-       rm -f $file1
-}
-run_test 16g "mmap rw sequential consistency vs drop_caches"
-
 test_16h() {
        local tf=$DIR/$tdir/$tfile
        local tf2=$DIR2/$tdir/$tfile