Whamcloud - gitweb
LU-16160 llite: SIGBUS is possible on a race with page reclaim 47/49647/2
authorAndrew Perepechko <andrew.perepechko@hpe.com>
Sun, 15 Jan 2023 16:55:58 +0000 (11:55 -0500)
committerOleg Drokin <green@whamcloud.com>
Fri, 27 Jan 2023 00:32:24 +0000 (00:32 +0000)
We can restart fault handling if page truncation happens
in parallel with the fault handler.

Change-Id: I6e60783e3334f87e799dc8b0e6e63d0bb358a236
Signed-off-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Signed-off-by: Patrick Farrell <farr0186@gmail.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49647
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_compat.h
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/llite_mmap.c
lustre/llite/vvp_page.c
lustre/obdclass/cl_page.c
lustre/tests/sanityn.sh

index b4efb01..965d331 100644 (file)
@@ -397,9 +397,9 @@ static inline void __user *get_vmf_address(struct vm_fault *vmf)
 }
 
 #ifdef HAVE_VM_OPS_USE_VM_FAULT_ONLY
-# define ll_filemap_fault(vma, vmf) filemap_fault(vmf)
+# define __ll_filemap_fault(vma, vmf) filemap_fault(vmf)
 #else
-# define ll_filemap_fault(vma, vmf) filemap_fault(vma, vmf)
+# define __ll_filemap_fault(vma, vmf) filemap_fault(vma, vmf)
 #endif
 
 #ifndef HAVE_CURRENT_TIME
index 45ab7b1..bdbfbb0 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/compat.h>
 #include <linux/aio.h>
 #include <linux/parser.h>
+#include <linux/seqlock.h>
 #include <lustre_compat.h>
 #include <lustre_crypto.h>
 #include <range_lock.h>
@@ -279,6 +280,7 @@ struct ll_inode_info {
        struct mutex                    lli_xattrs_enq_lock;
        struct list_head                lli_xattrs; /* ll_xattr_entry->xe_list */
        struct list_head                lli_lccs; /* list of ll_cl_context */
+       seqlock_t                       lli_page_inv_lock;
 };
 
 #ifndef HAVE_USER_NAMESPACE_ARG
@@ -1822,4 +1824,6 @@ int ll_manage_foreign(struct inode *inode, struct lustre_md *lmd);
 bool ll_foreign_is_openable(struct dentry *dentry, unsigned int flags);
 bool ll_foreign_is_removable(struct dentry *dentry, bool unset);
 
+int ll_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+
 #endif /* LLITE_INTERNAL_H */
index 13b6dcb..114c84c 100644 (file)
@@ -1256,6 +1256,7 @@ void ll_lli_init(struct ll_inode_info *lli)
        memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid));
        /* ll_cl_context initialize */
        INIT_LIST_HEAD(&lli->lli_lccs);
+       seqlock_init(&lli->lli_page_inv_lock);
 }
 
 #define MAX_STRING_SIZE 128
index 8da0702..af70ae6 100644 (file)
@@ -250,6 +250,25 @@ static inline int to_fault_error(int result)
        return result;
 }
 
+int ll_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct inode *inode = file_inode(vma->vm_file);
+       int ret;
+       unsigned int seq;
+
+       /* this seqlock lets us notice if a page has been deleted on this inode
+        * during the fault process, allowing us to catch an erroneous SIGBUS
+        * See LU-16160
+        */
+       do {
+               seq = read_seqbegin(&ll_i2info(inode)->lli_page_inv_lock);
+               ret = __ll_filemap_fault(vma, vmf);
+       } while (read_seqretry(&ll_i2info(inode)->lli_page_inv_lock, seq) &&
+                (ret & VM_FAULT_SIGBUS));
+
+       return ret;
+}
+
 /**
  * Lustre implementation of a vm_operations_struct::fault() method, called by
  * VM to server page fault (both in kernel and user space).
index b892013..ccd228e 100644 (file)
@@ -63,6 +63,41 @@ static void vvp_page_discard(const struct lu_env *env,
                ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
 }
 
+static void vvp_page_delete(const struct lu_env *env,
+                           const struct cl_page_slice *slice)
+{
+       struct cl_page *cp = slice->cpl_page;
+
+       if (cp->cp_type == CPT_CACHEABLE) {
+               struct page *vmpage = cp->cp_vmpage;
+               struct inode *inode = vmpage->mapping->host;
+
+               LASSERT(PageLocked(vmpage));
+               LASSERT((struct cl_page *)vmpage->private == cp);
+
+               /* Drop the reference count held in vvp_page_init */
+               refcount_dec(&cp->cp_ref);
+
+               ClearPagePrivate(vmpage);
+               vmpage->private = 0;
+
+               /* clearpageuptodate prevents the page being read by the
+                * kernel after it has been deleted from Lustre, which avoids
+                * potential stale data reads.  The seqlock allows us to see
+                * that a page was potentially deleted and catch the resulting
+                * SIGBUS - see ll_filemap_fault() (LU-16160) */
+               write_seqlock(&ll_i2info(inode)->lli_page_inv_lock);
+               ClearPageUptodate(vmpage);
+               write_sequnlock(&ll_i2info(inode)->lli_page_inv_lock);
+
+               /*
+                * The reference from vmpage to cl_page is removed,
+                * but the reference back is still here. It is removed
+                * later in cl_page_free().
+                */
+       }
+}
+
 /**
  * Handles page transfer errors at VM level.
  *
@@ -159,6 +194,7 @@ static void vvp_page_completion_write(const struct lu_env *env,
 }
 
 static const struct cl_page_operations vvp_page_ops = {
+       .cpo_delete        = vvp_page_delete,
        .cpo_discard       = vvp_page_discard,
        .io = {
                [CRT_READ] = {
index 92ba1d6..45b2cfe 100644 (file)
@@ -804,7 +804,6 @@ EXPORT_SYMBOL(cl_page_discard);
  */
 static void __cl_page_delete(const struct lu_env *env, struct cl_page *cp)
 {
-       struct page *vmpage;
        const struct cl_page_slice *slice;
        int i;
 
@@ -822,24 +821,6 @@ static void __cl_page_delete(const struct lu_env *env, struct cl_page *cp)
                        (*slice->cpl_ops->cpo_delete)(env, slice);
        }
 
-       if (cp->cp_type == CPT_CACHEABLE) {
-               vmpage = cp->cp_vmpage;
-               LASSERT(PageLocked(vmpage));
-               LASSERT((struct cl_page *)vmpage->private == cp);
-
-               /* Drop the reference count held in vvp_page_init */
-               refcount_dec(&cp->cp_ref);
-
-               ClearPagePrivate(vmpage);
-               vmpage->private = 0;
-
-               /*
-                * The reference from vmpage to cl_page is removed,
-                * but the reference back is still here. It is removed
-                * later in cl_page_free().
-                */
-       }
-
        EXIT;
 }
 
index 8019af2..c324b60 100755 (executable)
@@ -18,8 +18,8 @@ init_test_env $@
 init_logging
 
 ALWAYS_EXCEPT="$SANITYN_EXCEPT "
-# bug number for skipped test:  LU-7105 LU-14541 LU-16160
-ALWAYS_EXCEPT+="                28      16f      16g "
+# bug number for skipped test:  LU-7105
+ALWAYS_EXCEPT+="                28"
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
 if [ $mds1_FSTYPE = "zfs" ]; then