}
#ifdef HAVE_VM_OPS_USE_VM_FAULT_ONLY
-# define ll_filemap_fault(vma, vmf) filemap_fault(vmf)
+# define __ll_filemap_fault(vma, vmf) filemap_fault(vmf)
#else
-# define ll_filemap_fault(vma, vmf) filemap_fault(vma, vmf)
+# define __ll_filemap_fault(vma, vmf) filemap_fault(vma, vmf)
#endif
#ifndef HAVE_CURRENT_TIME
#include <linux/compat.h>
#include <linux/aio.h>
#include <linux/parser.h>
+#include <linux/seqlock.h>
#include <lustre_compat.h>
#include <lustre_crypto.h>
#include <range_lock.h>
struct mutex lli_xattrs_enq_lock;
struct list_head lli_xattrs; /* ll_xattr_entry->xe_list */
struct list_head lli_lccs; /* list of ll_cl_context */
+ seqlock_t lli_page_inv_lock;
};
#ifndef HAVE_USER_NAMESPACE_ARG
bool ll_foreign_is_openable(struct dentry *dentry, unsigned int flags);
bool ll_foreign_is_removable(struct dentry *dentry, bool unset);
+int ll_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+
#endif /* LLITE_INTERNAL_H */
memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid));
/* ll_cl_context initialize */
INIT_LIST_HEAD(&lli->lli_lccs);
+ seqlock_init(&lli->lli_page_inv_lock);
}
#define MAX_STRING_SIZE 128
return result;
}
+int ll_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ int ret;
+ unsigned int seq;
+
+ /* this seqlock lets us notice if a page has been deleted on this inode
+ * during the fault process, allowing us to catch an erroneous SIGBUS
+ * See LU-16160
+ */
+ do {
+ seq = read_seqbegin(&ll_i2info(inode)->lli_page_inv_lock);
+ ret = __ll_filemap_fault(vma, vmf);
+ } while (read_seqretry(&ll_i2info(inode)->lli_page_inv_lock, seq) &&
+ (ret & VM_FAULT_SIGBUS));
+
+ return ret;
+}
+
/**
* Lustre implementation of a vm_operations_struct::fault() method, called by
* VM to server page fault (both in kernel and user space).
generic_error_remove_page(vmpage->mapping, vmpage);
}
-static void vvp_page_delete(const struct lu_env *env,
- const struct cl_page_slice *slice)
-{
- struct page *vmpage = cl2vm_page(slice);
- struct cl_page *page = slice->cpl_page;
- int refc;
-
- LASSERT(PageLocked(vmpage));
- LASSERT((struct cl_page *)vmpage->private == page);
-
-
- /* Drop the reference count held in vvp_page_init */
- refc = atomic_dec_return(&page->cp_ref);
- LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
-
- ClearPagePrivate(vmpage);
- vmpage->private = 0;
- /*
- * Reference from vmpage to cl_page is removed, but the reference back
- * is still here. It is removed later in vvp_page_fini().
- */
-}
-
static void vvp_page_export(const struct lu_env *env,
const struct cl_page_slice *slice,
int uptodate)
return 0;
}
+static void vvp_page_delete(const struct lu_env *env,
+ const struct cl_page_slice *slice)
+{
+ struct cl_page *cp = slice->cpl_page;
+
+ if (cp->cp_type == CPT_CACHEABLE) {
+ struct page *vmpage = cp->cp_vmpage;
+ struct inode *inode = vmpage->mapping->host;
+
+ LASSERT(PageLocked(vmpage));
+ LASSERT((struct cl_page *)vmpage->private == cp);
+
+ /* Drop the reference count held in vvp_page_init */
+ atomic_dec(&cp->cp_ref);
+
+ ClearPagePrivate(vmpage);
+ vmpage->private = 0;
+
+ /* clearpageuptodate prevents the page being read by the
+ * kernel after it has been deleted from Lustre, which avoids
+ * potential stale data reads. The seqlock allows us to see
+ * that a page was potentially deleted and catch the resulting
+ * SIGBUS - see ll_filemap_fault() (LU-16160) */
+ write_seqlock(&ll_i2info(inode)->lli_page_inv_lock);
+ ClearPageUptodate(vmpage);
+ write_sequnlock(&ll_i2info(inode)->lli_page_inv_lock);
+
+ /*
+ * The reference from vmpage to cl_page is removed,
+ * but the reference back is still here. It is removed
+ * later in cl_page_free().
+ */
+ }
+}
+
/**
* Handles page transfer errors at VM level.
*
#include <pthread.h>
/*
- * Usage: rw_seq_cst_vs_drop_caches /mnt/lustre/file0 /mnt/lustre2/file0
+ * Usage: rw_seq_cst_vs_drop_caches [-m] /mnt/lustre/file0 /mnt/lustre2/file0
* Race reads of the same file on two client mounts vs writes and drop
* caches to detect sequential consistency violations. Run
* which case the wait status ($?) will be 134.
*/
+int mmap_mode; /* -m flag */
+
+void usage(void)
+{
+ fprintf(stdout,
+ "%s: rw_seq_cst_vs_drop_caches [-m] /mnt/lustre/file0 /mnt/lustre2/file0\n"
+" -m : use mmap to read/write file\n", __func__);
+}
+
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
*/
static uint64_t u, u_max = UINT64_MAX / 2;
static uint64_t v[2];
+char *ptr;
static void *access_thread_start(void *unused)
{
+ char *ptr2 = NULL;
ssize_t rc;
int i;
+ if (mmap_mode) {
+ ptr2 = mmap(NULL, sizeof(v[1]), PROT_READ,
+ MAP_PRIVATE | MAP_POPULATE, fd[1], 0);
+ if (ptr2 == MAP_FAILED)
+ handle_error("mmap");
+ }
+
do {
for (i = 0; i < 2; i++) {
- rc = pread(fd[i], &v[i], sizeof(v[i]), 0);
- if (rc < 0 || rc != sizeof(v[i]))
- handle_error("pread");
+ if (mmap_mode) {
+ memcpy(&v[i], i == 0 ? ptr : ptr2,
+ sizeof(v[i]));
+ } else {
+ rc = pread(fd[i], &v[i], sizeof(v[i]), 0);
+ if (rc < 0 || rc != sizeof(v[i]))
+ handle_error("pread");
+ }
}
} while (v[0] <= v[1]);
fprintf(stderr, "error: u = %"PRIu64", v = %"PRIu64", %"PRIu64"\n",
u, v[0], v[1]);
+ if (mmap_mode)
+ munmap(ptr2, sizeof(v[i]));
abort();
}
pthread_t access_thread;
struct stat st[2];
ssize_t rc;
- int i;
+ int i, ch;
setvbuf(stderr, stderr_buf, _IOLBF, sizeof(stderr_buf));
- if (argc != 3) {
- fprintf(stderr, "Usage: %s /mnt/lustre/file0 /mnt/lustre2/file0\n", argv[0]);
+ while ((ch = getopt(argc, argv, "m")) >= 0) {
+ switch (ch) {
+ case 'm':
+ mmap_mode = 1;
+ break;
+ default:
+ usage();
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 2) {
+ usage();
exit(EXIT_FAILURE);
}
assert(!(drop_caches_fd < 0));
for (i = 0; i < 2; i++) {
- fd[i] = open(argv[i + 1], O_RDWR|O_CREAT|O_TRUNC, 0666);
+ fd[i] = open(argv[i], O_RDWR|O_CREAT|O_TRUNC, 0666);
if (fd[i] < 0)
handle_error("open");
exit(EXIT_FAILURE);
}
- rc = pwrite(fd[0], &u, sizeof(u), 0);
- if (rc < 0 || rc != sizeof(u))
- handle_error("pwrite");
+ if (mmap_mode) {
+ if (ftruncate(fd[0], sizeof(u)) < 0)
+ handle_error("ftruncate");
+
+ ptr = mmap(NULL, sizeof(u), PROT_READ|PROT_WRITE, MAP_SHARED,
+ fd[0], 0);
+ if (ptr == MAP_FAILED)
+ handle_error("mmap");
+ memcpy(ptr, &u, sizeof(u));
+ } else {
+ rc = pwrite(fd[0], &u, sizeof(u), 0);
+ if (rc < 0 || rc != sizeof(u))
+ handle_error("pwrite");
+ }
rc = pthread_create(&access_thread, NULL, &access_thread_start, NULL);
if (rc != 0)
handle_error("pthread_create");
for (u = 1; u <= u_max; u++) {
- rc = pwrite(fd[0], &u, sizeof(u), 0);
- if (rc < 0 || rc != sizeof(u))
- handle_error("pwrite");
+ if (mmap_mode) {
+ memcpy(ptr, &u, sizeof(u));
+ } else {
+ rc = pwrite(fd[0], &u, sizeof(u), 0);
+ if (rc < 0 || rc != sizeof(u))
+ handle_error("pwrite");
+ }
rc = write(drop_caches_fd, "3\n", 2);
if (rc < 0 || rc != 2)
if (rc != 0)
handle_error("pthread_join");
+ if (mmap_mode)
+ munmap(ptr, sizeof(u));
+
return 0;
}
init_logging
ALWAYS_EXCEPT="$SANITYN_EXCEPT "
-# bug number for skipped test: LU-7105 LU-14541
-ALWAYS_EXCEPT+=" 28 16f"
+# bug number for skipped test: LU-7105
+ALWAYS_EXCEPT+=" 28 "
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
# skip tests for PPC until they are fixed
}
run_test 16f "rw sequential consistency vs drop_caches"
+test_16g() {
+ local file1=$DIR1/$tfile
+ local file2=$DIR2/$tfile
+ local duration=20
+ local status
+
+ timeout --preserve-status --signal=USR1 $duration \
+ rw_seq_cst_vs_drop_caches -m $file1 $file2
+ status=$?
+
+ case $((status & 0x7f)) in
+ 0)
+ echo OK # Computers must be fast now.
+ ;;
+ 6) # SIGABRT
+ error "sequential consistency violation detected"
+ ;;
+ 10) # SIGUSR1
+ echo TIMEOUT # This is fine.
+ ;;
+ *)
+ error "strange status '$status'"
+ ;;
+ esac
+
+ rm -f $file1
+}
+run_test 16g "mmap rw sequential consistency vs drop_caches"
+
test_17() { # bug 3513, 3667
remote_ost_nodsh && skip "remote OST with nodsh" && return