From 3903f67feb56e4351f22a9d7f12802d70346e3f5 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 6 Jan 2023 18:19:33 +0000 Subject: [PATCH] LU-16160 revert: "llite: clear stale page's uptodate bit" This reverts commit 451b4ac514dd03c4fe91726da2f95a1f5575a5a6 which caused a bug in cl_page_own() race with ll_releasepage() and cl_pagevec_put() assertion failure. Lustre-change: https://review.whamcloud.com/49541 Lustre-commit: TBD (from ef330e09a59da0df2de153ecdb2e7d8729cd6b63) Signed-off-by: Bobi Jam Change-Id: Icdb8c60f4d992c9976670e1b06c5bab5ef3a3954 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/49576 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/cl_object.h | 14 +--- lustre/llite/rw.c | 11 +-- lustre/llite/vvp_io.c | 119 +++---------------------------- lustre/llite/vvp_page.c | 44 +++--------- lustre/tests/rw_seq_cst_vs_drop_caches.c | 85 ++++------------------ lustre/tests/sanityn.sh | 33 +-------- 6 files changed, 36 insertions(+), 270 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 05a9265..4393749 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -768,16 +768,9 @@ struct cl_page { * creation. */ enum cl_page_type cp_type:CP_TYPE_BITS; /* 32 bits */ - /* fault page read grab extra reference */ - unsigned cp_fault_ref:1, - /** - * if fault page got delete before returned to - * filemap_fault(), defer the vmpage detach/put - * until filemap_fault() has been handled. - */ - cp_defer_detach:1; /* which slab kmem index this memory allocated from */ short int cp_kmem_index; /* 48 bits */ + unsigned int cp_unused1:16; /* 64 bits */ /** * Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned @@ -2490,11 +2483,6 @@ static inline int cl_io_is_mkwrite(const struct cl_io *io) return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite; } -static inline int cl_io_is_pagefault(const struct cl_io *io) -{ - return io->ci_type == CIT_FAULT && !io->u.ci_fault.ft_mkwrite; -} - /** * True, iff \a io is a truncate(2). */ diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 2f53992..5c1fa74 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -1975,7 +1975,6 @@ int ll_readpage(struct file *file, struct page *vmpage) page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (!IS_ERR(page)) { LASSERT(page->cp_type == CPT_CACHEABLE); - if (likely(!PageUptodate(vmpage))) { cl_page_assume(env, io, page); @@ -1985,15 +1984,7 @@ int ll_readpage(struct file *file, struct page *vmpage) unlock_page(vmpage); result = 0; } - if (cl_io_is_pagefault(io) && result == 0) { - /** - * page fault, retain the cl_page reference until - * vvp_io_kernel_fault() release it. - */ - page->cp_fault_ref = 1; - } else { - cl_page_put(env, page); - } + cl_page_put(env, page); } else { unlock_page(vmpage); result = PTR_ERR(page); diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 1a98421..7b12507 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1389,42 +1389,14 @@ static void vvp_io_rw_end(const struct lu_env *env, trunc_sem_up_read(&lli->lli_trunc_sem); } -static void detach_and_deref_page(struct cl_page *clp, struct page *vmpage) -{ - if (!clp->cp_defer_detach) - return; - - /** - * cl_page_delete0() took a vmpage reference, but not unlink the vmpage - * from its cl_page. - */ - clp->cp_defer_detach = 0; - ClearPagePrivate(vmpage); - vmpage->private = 0; - - put_page(vmpage); - atomic_dec(&clp->cp_ref); -} - -static int vvp_io_kernel_fault(const struct lu_env *env, - struct vvp_fault_io *cfio) +static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) { struct vm_fault *vmf = cfio->ft_vmf; - struct file *vmff = cfio->ft_vma->vm_file; - struct address_space *mapping = vmff->f_mapping; - struct inode *inode = mapping->host; - struct page *vmpage = NULL; - struct cl_page *clp = NULL; - int rc = 0; - ENTRY; - ll_inode_size_lock(inode); -retry: cfio->ft_flags = ll_filemap_fault(cfio->ft_vma, vmf); cfio->ft_flags_valid = 1; if (vmf->page) { - /* success, vmpage is locked */ LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n", get_vmf_address(vmf)); if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) { @@ -1434,100 +1406,25 @@ retry: cfio->ft_vmpage = vmf->page; - /** - * ll_filemap_fault()->ll_readpage() could get an extra cl_page - * reference. So we have to get the cl_page's to check its - * cp_fault_ref and drop the reference later. - */ - clp = cl_vmpage_page(vmf->page, NULL); - - GOTO(unlock, rc = 0); - } - - /* filemap_fault() fails, vmpage is not locked */ - if (clp == NULL) { - vmpage = find_get_page(mapping, vmf->pgoff); - if (vmpage) { - lock_page(vmpage); - clp = cl_vmpage_page(vmpage, NULL); - unlock_page(vmpage); - } + return 0; } if (cfio->ft_flags & VM_FAULT_SIGBUS) { - pgoff_t max_idx; - - /** - * ll_filemap_fault()->ll_readpage() could fill vmpage - * correctly, and unlock the vmpage, while memory pressure or - * truncate could detach cl_page from vmpage, and kernel - * filemap_fault() will wait_on_page_locked(vmpage) and find - * out that the vmpage has been cleared its uptodate bit, - * so it returns VM_FAULT_SIGBUS. - * - * In this case, we'd retry the filemap_fault()->ll_readpage() - * to rebuild the cl_page and fill vmpage with uptodated data. - */ - if (likely(vmpage)) { - bool need_retry = false; - - if (clp) { - if (clp->cp_defer_detach) { - detach_and_deref_page(clp, vmpage); - /** - * check i_size to make sure it's not - * over EOF, we don't want to call - * filemap_fault() repeatedly since it - * returns VM_FAULT_SIGBUS without even - * trying if vmf->pgoff is over EOF. - */ - max_idx = DIV_ROUND_UP( - i_size_read(inode), PAGE_SIZE); - if (vmf->pgoff < max_idx) - need_retry = true; - } - if (clp->cp_fault_ref) { - clp->cp_fault_ref = 0; - /* ref not released in ll_readpage() */ - cl_page_put(env, clp); - } - if (need_retry) - goto retry; - } - } - CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", get_vmf_address(vmf)); - GOTO(unlock, rc = -EFAULT); + return -EFAULT; } if (cfio->ft_flags & VM_FAULT_OOM) { CDEBUG(D_PAGE, "got addr %p - OOM\n", get_vmf_address(vmf)); - GOTO(unlock, rc = -ENOMEM); + return -ENOMEM; } if (cfio->ft_flags & VM_FAULT_RETRY) - GOTO(unlock, rc = -EAGAIN); + return -EAGAIN; CERROR("unknown error in page fault %d\n", cfio->ft_flags); - rc = -EINVAL; -unlock: - ll_inode_size_unlock(inode); - if (clp) { - if (clp->cp_defer_detach && vmpage) - detach_and_deref_page(clp, vmpage); - - /* additional cl_page ref has been taken in ll_readpage() */ - if (clp->cp_fault_ref) { - clp->cp_fault_ref = 0; - /* ref not released in ll_readpage() */ - cl_page_put(env, clp); - } - /* ref taken in this function */ - cl_page_put(env, clp); - } - if (vmpage) - put_page(vmpage); - return rc; + + return -EINVAL; } static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io, @@ -1582,7 +1479,7 @@ static int vvp_io_fault_start(const struct lu_env *env, LASSERT(cfio->ft_vmpage != NULL); lock_page(cfio->ft_vmpage); } else { - result = vvp_io_kernel_fault(env, cfio); + result = vvp_io_kernel_fault(cfio); if (result != 0) RETURN(result); } diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index 4bab3fb..7028526 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -164,36 +164,17 @@ static void vvp_page_delete(const struct lu_env *env, LASSERT(PageLocked(vmpage)); LASSERT((struct cl_page *)vmpage->private == page); - /** - * clear vmpage uptodate bit, since ll_read_ahead_pages()-> - * ll_read_ahead_page() could pick up this stale vmpage and - * take it as uptodated. - */ - ClearPageUptodate(vmpage); - /** - * vvp_io_kernel_fault()->ll_readpage() set vpg_fault_ref - * and need it to check cl_page to retry the page fault read. - */ - if (page->cp_fault_ref) { - page->cp_defer_detach = 1; - /** - * get a vmpage reference, so that filemap_fault() - * won't free it from pagecache. - */ - get_page(vmpage); - } else { - /* Drop the reference count held in vvp_page_init */ - refc = atomic_dec_return(&page->cp_ref); - LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc); - ClearPagePrivate(vmpage); - vmpage->private = 0; - /* - * Reference from vmpage to cl_page is removed, but the - * reference back is still here. It is removed later in - * vvp_page_fini(). - */ - } + /* Drop the reference count held in vvp_page_init */ + refc = atomic_dec_return(&page->cp_ref); + LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc); + + ClearPagePrivate(vmpage); + vmpage->private = 0; + /* + * Reference from vmpage to cl_page is removed, but the reference back + * is still here. It is removed later in vvp_page_fini(). + */ } static void vvp_page_export(const struct lu_env *env, @@ -290,11 +271,6 @@ static void vvp_page_completion_read(const struct lu_env *env, ll_ra_count_put(ll_i2sbi(inode), 1); if (ioret == 0) { - /** - * vpg_defer_uptodate is used for readahead page, and the - * vmpage Uptodate bit is deferred to set in ll_readpage/ - * ll_io_read_page. - */ if (!vpg->vpg_defer_uptodate) cl_page_export(env, page, 1); } else if (vpg->vpg_defer_uptodate) { diff --git a/lustre/tests/rw_seq_cst_vs_drop_caches.c b/lustre/tests/rw_seq_cst_vs_drop_caches.c index 3cb9aba..5f988bb 100644 --- a/lustre/tests/rw_seq_cst_vs_drop_caches.c +++ b/lustre/tests/rw_seq_cst_vs_drop_caches.c @@ -11,7 +11,7 @@ #include /* - * Usage: rw_seq_cst_vs_drop_caches [-m] /mnt/lustre/file0 /mnt/lustre2/file0 + * Usage: rw_seq_cst_vs_drop_caches /mnt/lustre/file0 /mnt/lustre2/file0 * Race reads of the same file on two client mounts vs writes and drop * caches to detect sequential consistency violations. Run @@ -19,15 +19,6 @@ * which case the wait status ($?) will be 134. */ -int mmap_mode; /* -m flag */ - -void usage(void) -{ - fprintf(stdout, - "%s: rw_seq_cst_vs_drop_caches [-m] /mnt/lustre/file0 /mnt/lustre2/file0\n" -" -m : use mmap to read/write file\n", __func__); -} - #define handle_error(msg) \ do { perror(msg); exit(EXIT_FAILURE); } while (0) @@ -37,39 +28,23 @@ static int fd[2] = { -1, -1 }; */ static uint64_t u, u_max = UINT64_MAX / 2; static uint64_t v[2]; -char *ptr; static void *access_thread_start(void *unused) { - char *ptr2 = NULL; ssize_t rc; int i; - if (mmap_mode) { - ptr2 = mmap(NULL, sizeof(v[1]), PROT_READ, - MAP_PRIVATE | MAP_POPULATE, fd[1], 0); - if (ptr2 == MAP_FAILED) - handle_error("mmap"); - } - do { for (i = 0; i < 2; i++) { - if (mmap_mode) { - memcpy(&v[i], i == 0 ? ptr : ptr2, - sizeof(v[i])); - } else { - rc = pread(fd[i], &v[i], sizeof(v[i]), 0); - if (rc < 0 || rc != sizeof(v[i])) - handle_error("pread"); - } + rc = pread(fd[i], &v[i], sizeof(v[i]), 0); + if (rc < 0 || rc != sizeof(v[i])) + handle_error("pread"); } } while (v[0] <= v[1]); fprintf(stderr, "error: u = %"PRIu64", v = %"PRIu64", %"PRIu64"\n", u, v[0], v[1]); - if (mmap_mode) - munmap(ptr2, sizeof(v[i])); abort(); } @@ -81,26 +56,12 @@ int main(int argc, char *argv[]) pthread_t access_thread; struct stat st[2]; ssize_t rc; - int i, ch; + int i; setvbuf(stderr, stderr_buf, _IOLBF, sizeof(stderr_buf)); - while ((ch = getopt(argc, argv, "m")) >= 0) { - switch (ch) { - case 'm': - mmap_mode = 1; - break; - default: - usage(); - exit(EXIT_FAILURE); - } - } - - argc -= optind; - argv += optind; - - if (argc != 2) { - usage(); + if (argc != 3) { + fprintf(stderr, "Usage: %s /mnt/lustre/file0 /mnt/lustre2/file0\n", argv[0]); exit(EXIT_FAILURE); } @@ -108,7 +69,7 @@ int main(int argc, char *argv[]) assert(!(drop_caches_fd < 0)); for (i = 0; i < 2; i++) { - fd[i] = open(argv[i], O_RDWR|O_CREAT|O_TRUNC, 0666); + fd[i] = open(argv[i + 1], O_RDWR|O_CREAT|O_TRUNC, 0666); if (fd[i] < 0) handle_error("open"); @@ -125,33 +86,18 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - if (mmap_mode) { - if (ftruncate(fd[0], sizeof(u)) < 0) - handle_error("ftruncate"); - - ptr = mmap(NULL, sizeof(u), PROT_READ|PROT_WRITE, MAP_SHARED, - fd[0], 0); - if (ptr == MAP_FAILED) - handle_error("mmap"); - memcpy(ptr, &u, sizeof(u)); - } else { - rc = pwrite(fd[0], &u, sizeof(u), 0); - if (rc < 0 || rc != sizeof(u)) - handle_error("pwrite"); - } + rc = pwrite(fd[0], &u, sizeof(u), 0); + if (rc < 0 || rc != sizeof(u)) + handle_error("pwrite"); rc = pthread_create(&access_thread, NULL, &access_thread_start, NULL); if (rc != 0) handle_error("pthread_create"); for (u = 1; u <= u_max; u++) { - if (mmap_mode) { - memcpy(ptr, &u, sizeof(u)); - } else { - rc = pwrite(fd[0], &u, sizeof(u), 0); - if (rc < 0 || rc != sizeof(u)) - handle_error("pwrite"); - } + rc = pwrite(fd[0], &u, sizeof(u), 0); + if (rc < 0 || rc != sizeof(u)) + handle_error("pwrite"); rc = write(drop_caches_fd, "3\n", 2); if (rc < 0 || rc != 2) @@ -166,8 +112,5 @@ int main(int argc, char *argv[]) if (rc != 0) handle_error("pthread_join"); - if (mmap_mode) - munmap(ptr, sizeof(u)); - return 0; } diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index daec5cc..324482d 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -18,8 +18,8 @@ init_test_env $@ init_logging ALWAYS_EXCEPT="$SANITYN_EXCEPT " -# bug number for skipped test: LU-7105 -ALWAYS_EXCEPT+=" 28 " +# bug number for skipped test: LU-7105 LU-14541 +ALWAYS_EXCEPT+=" 28 16f" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! if [ $mds1_FSTYPE = "zfs" ]; then @@ -579,35 +579,6 @@ test_16f() { # LU-14541 } run_test 16f "rw sequential consistency vs drop_caches" -test_16g() { - local file1=$DIR1/$tfile - local file2=$DIR2/$tfile - local duration=20 - local status - - timeout --preserve-status --signal=USR1 $duration \ - rw_seq_cst_vs_drop_caches -m $file1 $file2 - status=$? - - case $((status & 0x7f)) in - 0) - echo OK # Computers must be fast now. - ;; - 6) # SIGABRT - error "sequential consistency violation detected" - ;; - 10) # SIGUSR1 - echo TIMEOUT # This is fine. - ;; - *) - error "strange status '$status'" - ;; - esac - - rm -f $file1 -} -run_test 16g "mmap rw sequential consistency vs drop_caches" - test_16h() { local tf=$DIR/$tdir/$tfile local tf2=$DIR2/$tdir/$tfile -- 1.8.3.1