From: Andrew Perepechko Date: Sun, 15 Jan 2023 16:55:58 +0000 (-0500) Subject: LU-16160 llite: SIGBUS is possible on a race with page reclaim X-Git-Tag: 2.15.54~63 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=b4da788a819f82d35b685d6ee7f02809c05ca005 LU-16160 llite: SIGBUS is possible on a race with page reclaim We can restart fault handling if page truncation happens in parallel with the fault handler. Change-Id: I6e60783e3334f87e799dc8b0e6e63d0bb358a236 Signed-off-by: Andrew Perepechko Signed-off-by: Patrick Farrell Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49647 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index b4efb01..965d331 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -397,9 +397,9 @@ static inline void __user *get_vmf_address(struct vm_fault *vmf) } #ifdef HAVE_VM_OPS_USE_VM_FAULT_ONLY -# define ll_filemap_fault(vma, vmf) filemap_fault(vmf) +# define __ll_filemap_fault(vma, vmf) filemap_fault(vmf) #else -# define ll_filemap_fault(vma, vmf) filemap_fault(vma, vmf) +# define __ll_filemap_fault(vma, vmf) filemap_fault(vma, vmf) #endif #ifndef HAVE_CURRENT_TIME diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 45ab7b1..bdbfbb0 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -279,6 +280,7 @@ struct ll_inode_info { struct mutex lli_xattrs_enq_lock; struct list_head lli_xattrs; /* ll_xattr_entry->xe_list */ struct list_head lli_lccs; /* list of ll_cl_context */ + seqlock_t lli_page_inv_lock; }; #ifndef HAVE_USER_NAMESPACE_ARG @@ -1822,4 +1824,6 @@ int ll_manage_foreign(struct inode *inode, struct lustre_md *lmd); bool ll_foreign_is_openable(struct dentry *dentry, unsigned int flags); bool ll_foreign_is_removable(struct dentry *dentry, bool unset); +int ll_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf); + #endif /* LLITE_INTERNAL_H */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 13b6dcb..114c84c 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1256,6 +1256,7 @@ void ll_lli_init(struct ll_inode_info *lli) memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid)); /* ll_cl_context initialize */ INIT_LIST_HEAD(&lli->lli_lccs); + seqlock_init(&lli->lli_page_inv_lock); } #define MAX_STRING_SIZE 128 diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 8da0702..af70ae68 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -250,6 +250,25 @@ static inline int to_fault_error(int result) return result; } +int ll_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + int ret; + unsigned int seq; + + /* this seqlock lets us notice if a page has been deleted on this inode + * during the fault process, allowing us to catch an erroneous SIGBUS + * See LU-16160 + */ + do { + seq = read_seqbegin(&ll_i2info(inode)->lli_page_inv_lock); + ret = __ll_filemap_fault(vma, vmf); + } while (read_seqretry(&ll_i2info(inode)->lli_page_inv_lock, seq) && + (ret & VM_FAULT_SIGBUS)); + + return ret; +} + /** * Lustre implementation of a vm_operations_struct::fault() method, called by * VM to server page fault (both in kernel and user space). diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index b892013..ccd228e 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -63,6 +63,41 @@ static void vvp_page_discard(const struct lu_env *env, ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED); } +static void vvp_page_delete(const struct lu_env *env, + const struct cl_page_slice *slice) +{ + struct cl_page *cp = slice->cpl_page; + + if (cp->cp_type == CPT_CACHEABLE) { + struct page *vmpage = cp->cp_vmpage; + struct inode *inode = vmpage->mapping->host; + + LASSERT(PageLocked(vmpage)); + LASSERT((struct cl_page *)vmpage->private == cp); + + /* Drop the reference count held in vvp_page_init */ + refcount_dec(&cp->cp_ref); + + ClearPagePrivate(vmpage); + vmpage->private = 0; + + /* clearpageuptodate prevents the page being read by the + * kernel after it has been deleted from Lustre, which avoids + * potential stale data reads. The seqlock allows us to see + * that a page was potentially deleted and catch the resulting + * SIGBUS - see ll_filemap_fault() (LU-16160) */ + write_seqlock(&ll_i2info(inode)->lli_page_inv_lock); + ClearPageUptodate(vmpage); + write_sequnlock(&ll_i2info(inode)->lli_page_inv_lock); + + /* + * The reference from vmpage to cl_page is removed, + * but the reference back is still here. It is removed + * later in cl_page_free(). + */ + } +} + /** * Handles page transfer errors at VM level. * @@ -159,6 +194,7 @@ static void vvp_page_completion_write(const struct lu_env *env, } static const struct cl_page_operations vvp_page_ops = { + .cpo_delete = vvp_page_delete, .cpo_discard = vvp_page_discard, .io = { [CRT_READ] = { diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index 92ba1d6..45b2cfe 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -804,7 +804,6 @@ EXPORT_SYMBOL(cl_page_discard); */ static void __cl_page_delete(const struct lu_env *env, struct cl_page *cp) { - struct page *vmpage; const struct cl_page_slice *slice; int i; @@ -822,24 +821,6 @@ static void __cl_page_delete(const struct lu_env *env, struct cl_page *cp) (*slice->cpl_ops->cpo_delete)(env, slice); } - if (cp->cp_type == CPT_CACHEABLE) { - vmpage = cp->cp_vmpage; - LASSERT(PageLocked(vmpage)); - LASSERT((struct cl_page *)vmpage->private == cp); - - /* Drop the reference count held in vvp_page_init */ - refcount_dec(&cp->cp_ref); - - ClearPagePrivate(vmpage); - vmpage->private = 0; - - /* - * The reference from vmpage to cl_page is removed, - * but the reference back is still here. It is removed - * later in cl_page_free(). - */ - } - EXIT; } diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 8019af2..c324b60 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -18,8 +18,8 @@ init_test_env $@ init_logging ALWAYS_EXCEPT="$SANITYN_EXCEPT " -# bug number for skipped test: LU-7105 LU-14541 LU-16160 -ALWAYS_EXCEPT+=" 28 16f 16g " +# bug number for skipped test: LU-7105 +ALWAYS_EXCEPT+=" 28" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! if [ $mds1_FSTYPE = "zfs" ]; then