* creation.
*/
enum cl_page_type cp_type:CP_TYPE_BITS; /* 32 bits */
- /* fault page read grab extra reference */
- unsigned cp_fault_ref:1,
- /**
- * if fault page got delete before returned to
- * filemap_fault(), defer the vmpage detach/put
- * until filemap_fault() has been handled.
- */
- cp_defer_detach:1;
/* which slab kmem index this memory allocated from */
short int cp_kmem_index; /* 48 bits */
+ unsigned int cp_unused1:16; /* 64 bits */
/**
* Owning IO in cl_page_state::CPS_OWNED state. Sub-page can be owned
return io->ci_type == CIT_FAULT && io->u.ci_fault.ft_mkwrite;
}
-static inline int cl_io_is_pagefault(const struct cl_io *io)
-{
- return io->ci_type == CIT_FAULT && !io->u.ci_fault.ft_mkwrite;
-}
-
/**
* True, iff \a io is a truncate(2).
*/
page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
if (!IS_ERR(page)) {
LASSERT(page->cp_type == CPT_CACHEABLE);
-
if (likely(!PageUptodate(vmpage))) {
cl_page_assume(env, io, page);
unlock_page(vmpage);
result = 0;
}
- if (cl_io_is_pagefault(io) && result == 0) {
- /**
- * page fault, retain the cl_page reference until
- * vvp_io_kernel_fault() release it.
- */
- page->cp_fault_ref = 1;
- } else {
- cl_page_put(env, page);
- }
+ cl_page_put(env, page);
} else {
unlock_page(vmpage);
result = PTR_ERR(page);
trunc_sem_up_read(&lli->lli_trunc_sem);
}
-static void detach_and_deref_page(struct cl_page *clp, struct page *vmpage)
-{
- if (!clp->cp_defer_detach)
- return;
-
- /**
- * cl_page_delete0() took a vmpage reference, but not unlink the vmpage
- * from its cl_page.
- */
- clp->cp_defer_detach = 0;
- ClearPagePrivate(vmpage);
- vmpage->private = 0;
-
- put_page(vmpage);
- atomic_dec(&clp->cp_ref);
-}
-
-static int vvp_io_kernel_fault(const struct lu_env *env,
- struct vvp_fault_io *cfio)
+static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
{
struct vm_fault *vmf = cfio->ft_vmf;
- struct file *vmff = cfio->ft_vma->vm_file;
- struct address_space *mapping = vmff->f_mapping;
- struct inode *inode = mapping->host;
- struct page *vmpage = NULL;
- struct cl_page *clp = NULL;
- int rc = 0;
- ENTRY;
- ll_inode_size_lock(inode);
-retry:
cfio->ft_flags = ll_filemap_fault(cfio->ft_vma, vmf);
cfio->ft_flags_valid = 1;
if (vmf->page) {
- /* success, vmpage is locked */
LL_CDEBUG_PAGE(D_PAGE, vmf->page, "got addr %p type NOPAGE\n",
get_vmf_address(vmf));
if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
cfio->ft_vmpage = vmf->page;
- /**
- * ll_filemap_fault()->ll_readpage() could get an extra cl_page
- * reference. So we have to get the cl_page's to check its
- * cp_fault_ref and drop the reference later.
- */
- clp = cl_vmpage_page(vmf->page, NULL);
-
- GOTO(unlock, rc = 0);
- }
-
- /* filemap_fault() fails, vmpage is not locked */
- if (clp == NULL) {
- vmpage = find_get_page(mapping, vmf->pgoff);
- if (vmpage) {
- lock_page(vmpage);
- clp = cl_vmpage_page(vmpage, NULL);
- unlock_page(vmpage);
- }
+ return 0;
}
if (cfio->ft_flags & VM_FAULT_SIGBUS) {
- pgoff_t max_idx;
-
- /**
- * ll_filemap_fault()->ll_readpage() could fill vmpage
- * correctly, and unlock the vmpage, while memory pressure or
- * truncate could detach cl_page from vmpage, and kernel
- * filemap_fault() will wait_on_page_locked(vmpage) and find
- * out that the vmpage has been cleared its uptodate bit,
- * so it returns VM_FAULT_SIGBUS.
- *
- * In this case, we'd retry the filemap_fault()->ll_readpage()
- * to rebuild the cl_page and fill vmpage with uptodated data.
- */
- if (likely(vmpage)) {
- bool need_retry = false;
-
- if (clp) {
- if (clp->cp_defer_detach) {
- detach_and_deref_page(clp, vmpage);
- /**
- * check i_size to make sure it's not
- * over EOF, we don't want to call
- * filemap_fault() repeatedly since it
- * returns VM_FAULT_SIGBUS without even
- * trying if vmf->pgoff is over EOF.
- */
- max_idx = DIV_ROUND_UP(
- i_size_read(inode), PAGE_SIZE);
- if (vmf->pgoff < max_idx)
- need_retry = true;
- }
- if (clp->cp_fault_ref) {
- clp->cp_fault_ref = 0;
- /* ref not released in ll_readpage() */
- cl_page_put(env, clp);
- }
- if (need_retry)
- goto retry;
- }
- }
-
CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", get_vmf_address(vmf));
- GOTO(unlock, rc = -EFAULT);
+ return -EFAULT;
}
if (cfio->ft_flags & VM_FAULT_OOM) {
CDEBUG(D_PAGE, "got addr %p - OOM\n", get_vmf_address(vmf));
- GOTO(unlock, rc = -ENOMEM);
+ return -ENOMEM;
}
if (cfio->ft_flags & VM_FAULT_RETRY)
- GOTO(unlock, rc = -EAGAIN);
+ return -EAGAIN;
CERROR("unknown error in page fault %d\n", cfio->ft_flags);
- rc = -EINVAL;
-unlock:
- ll_inode_size_unlock(inode);
- if (clp) {
- if (clp->cp_defer_detach && vmpage)
- detach_and_deref_page(clp, vmpage);
-
- /* additional cl_page ref has been taken in ll_readpage() */
- if (clp->cp_fault_ref) {
- clp->cp_fault_ref = 0;
- /* ref not released in ll_readpage() */
- cl_page_put(env, clp);
- }
- /* ref taken in this function */
- cl_page_put(env, clp);
- }
- if (vmpage)
- put_page(vmpage);
- return rc;
+
+ return -EINVAL;
}
static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
LASSERT(cfio->ft_vmpage != NULL);
lock_page(cfio->ft_vmpage);
} else {
- result = vvp_io_kernel_fault(env, cfio);
+ result = vvp_io_kernel_fault(cfio);
if (result != 0)
RETURN(result);
}
LASSERT(PageLocked(vmpage));
LASSERT((struct cl_page *)vmpage->private == page);
- /**
- * clear vmpage uptodate bit, since ll_read_ahead_pages()->
- * ll_read_ahead_page() could pick up this stale vmpage and
- * take it as uptodated.
- */
- ClearPageUptodate(vmpage);
- /**
- * vvp_io_kernel_fault()->ll_readpage() set vpg_fault_ref
- * and need it to check cl_page to retry the page fault read.
- */
- if (page->cp_fault_ref) {
- page->cp_defer_detach = 1;
- /**
- * get a vmpage reference, so that filemap_fault()
- * won't free it from pagecache.
- */
- get_page(vmpage);
- } else {
- /* Drop the reference count held in vvp_page_init */
- refc = atomic_dec_return(&page->cp_ref);
- LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
- ClearPagePrivate(vmpage);
- vmpage->private = 0;
- /*
- * Reference from vmpage to cl_page is removed, but the
- * reference back is still here. It is removed later in
- * vvp_page_fini().
- */
- }
+ /* Drop the reference count held in vvp_page_init */
+ refc = atomic_dec_return(&page->cp_ref);
+ LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
+
+ ClearPagePrivate(vmpage);
+ vmpage->private = 0;
+ /*
+ * Reference from vmpage to cl_page is removed, but the reference back
+ * is still here. It is removed later in vvp_page_fini().
+ */
}
static void vvp_page_export(const struct lu_env *env,
ll_ra_count_put(ll_i2sbi(inode), 1);
if (ioret == 0) {
- /**
- * vpg_defer_uptodate is used for readahead page, and the
- * vmpage Uptodate bit is deferred to set in ll_readpage/
- * ll_io_read_page.
- */
if (!vpg->vpg_defer_uptodate)
cl_page_export(env, page, 1);
} else if (vpg->vpg_defer_uptodate) {
#include <pthread.h>
/*
- * Usage: rw_seq_cst_vs_drop_caches [-m] /mnt/lustre/file0 /mnt/lustre2/file0
+ * Usage: rw_seq_cst_vs_drop_caches /mnt/lustre/file0 /mnt/lustre2/file0
* Race reads of the same file on two client mounts vs writes and drop
* caches to detect sequential consistency violations. Run
* which case the wait status ($?) will be 134.
*/
-int mmap_mode; /* -m flag */
-
-void usage(void)
-{
- fprintf(stdout,
- "%s: rw_seq_cst_vs_drop_caches [-m] /mnt/lustre/file0 /mnt/lustre2/file0\n"
-" -m : use mmap to read/write file\n", __func__);
-}
-
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
*/
static uint64_t u, u_max = UINT64_MAX / 2;
static uint64_t v[2];
-char *ptr;
static void *access_thread_start(void *unused)
{
- char *ptr2 = NULL;
ssize_t rc;
int i;
- if (mmap_mode) {
- ptr2 = mmap(NULL, sizeof(v[1]), PROT_READ,
- MAP_PRIVATE | MAP_POPULATE, fd[1], 0);
- if (ptr2 == MAP_FAILED)
- handle_error("mmap");
- }
-
do {
for (i = 0; i < 2; i++) {
- if (mmap_mode) {
- memcpy(&v[i], i == 0 ? ptr : ptr2,
- sizeof(v[i]));
- } else {
- rc = pread(fd[i], &v[i], sizeof(v[i]), 0);
- if (rc < 0 || rc != sizeof(v[i]))
- handle_error("pread");
- }
+ rc = pread(fd[i], &v[i], sizeof(v[i]), 0);
+ if (rc < 0 || rc != sizeof(v[i]))
+ handle_error("pread");
}
} while (v[0] <= v[1]);
fprintf(stderr, "error: u = %"PRIu64", v = %"PRIu64", %"PRIu64"\n",
u, v[0], v[1]);
- if (mmap_mode)
- munmap(ptr2, sizeof(v[i]));
abort();
}
pthread_t access_thread;
struct stat st[2];
ssize_t rc;
- int i, ch;
+ int i;
setvbuf(stderr, stderr_buf, _IOLBF, sizeof(stderr_buf));
- while ((ch = getopt(argc, argv, "m")) >= 0) {
- switch (ch) {
- case 'm':
- mmap_mode = 1;
- break;
- default:
- usage();
- exit(EXIT_FAILURE);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- if (argc != 2) {
- usage();
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s /mnt/lustre/file0 /mnt/lustre2/file0\n", argv[0]);
exit(EXIT_FAILURE);
}
assert(!(drop_caches_fd < 0));
for (i = 0; i < 2; i++) {
- fd[i] = open(argv[i], O_RDWR|O_CREAT|O_TRUNC, 0666);
+ fd[i] = open(argv[i + 1], O_RDWR|O_CREAT|O_TRUNC, 0666);
if (fd[i] < 0)
handle_error("open");
exit(EXIT_FAILURE);
}
- if (mmap_mode) {
- if (ftruncate(fd[0], sizeof(u)) < 0)
- handle_error("ftruncate");
-
- ptr = mmap(NULL, sizeof(u), PROT_READ|PROT_WRITE, MAP_SHARED,
- fd[0], 0);
- if (ptr == MAP_FAILED)
- handle_error("mmap");
- memcpy(ptr, &u, sizeof(u));
- } else {
- rc = pwrite(fd[0], &u, sizeof(u), 0);
- if (rc < 0 || rc != sizeof(u))
- handle_error("pwrite");
- }
+ rc = pwrite(fd[0], &u, sizeof(u), 0);
+ if (rc < 0 || rc != sizeof(u))
+ handle_error("pwrite");
rc = pthread_create(&access_thread, NULL, &access_thread_start, NULL);
if (rc != 0)
handle_error("pthread_create");
for (u = 1; u <= u_max; u++) {
- if (mmap_mode) {
- memcpy(ptr, &u, sizeof(u));
- } else {
- rc = pwrite(fd[0], &u, sizeof(u), 0);
- if (rc < 0 || rc != sizeof(u))
- handle_error("pwrite");
- }
+ rc = pwrite(fd[0], &u, sizeof(u), 0);
+ if (rc < 0 || rc != sizeof(u))
+ handle_error("pwrite");
rc = write(drop_caches_fd, "3\n", 2);
if (rc < 0 || rc != 2)
if (rc != 0)
handle_error("pthread_join");
- if (mmap_mode)
- munmap(ptr, sizeof(u));
-
return 0;
}
init_logging
ALWAYS_EXCEPT="$SANITYN_EXCEPT "
-# bug number for skipped test: LU-7105
-ALWAYS_EXCEPT+=" 28 "
+# bug number for skipped test: LU-7105 LU-14541
+ALWAYS_EXCEPT+=" 28 16f"
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
if [ $mds1_FSTYPE = "zfs" ]; then
}
run_test 16f "rw sequential consistency vs drop_caches"
-test_16g() {
- local file1=$DIR1/$tfile
- local file2=$DIR2/$tfile
- local duration=20
- local status
-
- timeout --preserve-status --signal=USR1 $duration \
- rw_seq_cst_vs_drop_caches -m $file1 $file2
- status=$?
-
- case $((status & 0x7f)) in
- 0)
- echo OK # Computers must be fast now.
- ;;
- 6) # SIGABRT
- error "sequential consistency violation detected"
- ;;
- 10) # SIGUSR1
- echo TIMEOUT # This is fine.
- ;;
- *)
- error "strange status '$status'"
- ;;
- esac
-
- rm -f $file1
-}
-run_test 16g "mmap rw sequential consistency vs drop_caches"
-
test_16h() {
local tf=$DIR/$tdir/$tfile
local tf2=$DIR2/$tdir/$tfile