From 5309e108582c692f3b60705818fddc4a3b3b1345 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Fri, 16 Oct 2020 17:18:29 +1100 Subject: [PATCH] LU-13783 libcfs: switch from ->mmap_sem to mmap_lock() In Linux 5.8, ->mmap_sem is gone and the preferred interface for locking the mmap is to suite of mmap*lock() functions. So provide those functions when not available, and use them as needed in Lustre. Signed-off-by: Mr NeilBrown Change-Id: I4ce3959f9e93eae10a7b7db03e2b0a1525723138 Reviewed-on: https://review.whamcloud.com/40288 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Andreas Dilger --- libcfs/autoconf/lustre-libcfs.m4 | 20 ++++++++++++++ libcfs/include/libcfs/linux/linux-mem.h | 32 ++++++++++++++++++++++ libcfs/libcfs/linux/linux-curproc.c | 8 +++--- lustre/llite/llite_internal.h | 12 ++++----- lustre/llite/llite_mmap.c | 27 ++++++++++--------- lustre/llite/pcc.c | 6 ++--- lustre/llite/rw26.c | 4 +-- lustre/llite/vvp_io.c | 48 ++++++++++++++++----------------- 8 files changed, 105 insertions(+), 52 deletions(-) diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 index 529b534..396f7a5 100644 --- a/libcfs/autoconf/lustre-libcfs.m4 +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -1312,6 +1312,24 @@ nr_unstable_nfs_exists, [ EXTRA_KCFLAGS="$tmp_flags" ]) # LIBCFS_HAVE_NR_UNSTABLE_NFS +# +# LIBCFS_HAVE_MMAP_LOCK +# +# kernel v5.8-rc1~83^2~24 +# mmap locking API: rename mmap_sem to mmap_lock +# +AC_DEFUN([LIBCFS_HAVE_MMAP_LOCK], [ +LB_CHECK_COMPILE([if mmap_lock API is available], +mmap_write_lock, [ + #include +],[ + mmap_write_lock(NULL); +],[ + AC_DEFINE(HAVE_MMAP_LOCK, 1, + [mmap_lock API is available.]) +]) +]) # LIBCFS_HAVE_MMAP_LOCK + AC_DEFUN([LIBCFS_PROG_LINUX_SRC], [] ) AC_DEFUN([LIBCFS_PROG_LINUX_RESULTS], []) @@ -1420,6 +1438,8 @@ LIBCFS_LOOKUP_USER_KEY LIBCFS_FORCE_SIG_WITH_TASK LIBCFS_CACHE_DETAIL_WRITERS LIBCFS_HAVE_NR_UNSTABLE_NFS +# 5.8 +LIBCFS_HAVE_MMAP_LOCK ]) # LIBCFS_PROG_LINUX # diff --git a/libcfs/include/libcfs/linux/linux-mem.h b/libcfs/include/libcfs/linux/linux-mem.h index 80668b1..956de36 100644 --- a/libcfs/include/libcfs/linux/linux-mem.h +++ b/libcfs/include/libcfs/linux/linux-mem.h @@ -150,4 +150,36 @@ void remove_shrinker(struct shrinker *shrinker) kfree(shrinker); } +#ifndef HAVE_MMAP_LOCK +static inline void mmap_write_lock(struct mm_struct *mm) +{ + down_write(&mm->mmap_sem); +} + +static inline bool mmap_write_trylock(struct mm_struct *mm) +{ + return down_write_trylock(&mm->mmap_sem) != 0; +} + +static inline void mmap_write_unlock(struct mm_struct *mm) +{ + up_write(&mm->mmap_sem); +} + +static inline void mmap_read_lock(struct mm_struct *mm) +{ + down_read(&mm->mmap_sem); +} + +static inline bool mmap_read_trylock(struct mm_struct *mm) +{ + return down_read_trylock(&mm->mmap_sem) != 0; +} + +static inline void mmap_read_unlock(struct mm_struct *mm) +{ + up_read(&mm->mmap_sem); +} +#endif + #endif /* __LINUX_CFS_MEM_H__ */ diff --git a/libcfs/libcfs/linux/linux-curproc.c b/libcfs/libcfs/linux/linux-curproc.c index 5413a60..e6cdb63 100644 --- a/libcfs/libcfs/linux/linux-curproc.c +++ b/libcfs/libcfs/linux/linux-curproc.c @@ -117,11 +117,11 @@ static int cfs_access_process_vm(struct task_struct *tsk, struct page *page; void *old_buf = buf; - /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(), - * which is already holding mmap_sem for writes. If some other + /* Avoid deadlocks on mmap_lock if called from sys_mmap_pgoff(), + * which is already holding mmap_lock for writes. If some other * thread gets the write lock in the meantime, this thread will * block, but at least it won't deadlock on itself. LU-1735 */ - if (down_read_trylock(&mm->mmap_sem) == 0) + if (!mmap_read_trylock(mm)) return -EDEADLK; /* ignore errors, just check how much was successfully transferred */ @@ -159,7 +159,7 @@ static int cfs_access_process_vm(struct task_struct *tsk, buf += bytes; addr += bytes; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); return buf - old_buf; } diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 16e4fdd..8801fc0 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -279,11 +279,11 @@ static inline void ll_trunc_sem_init(struct ll_trunc_sem *sem) * * We must take lli_trunc_sem in read mode on entry in to various i/o paths * in Lustre, in order to exclude truncates. Some of these paths then need to - * take the mmap_sem, while still holding the trunc_sem. The problem is that - * page faults hold the mmap_sem when calling in to Lustre, and then must also + * take the mmap_lock, while still holding the trunc_sem. The problem is that + * page faults hold the mmap_lock when calling in to Lustre, and then must also * take the trunc_sem to exclude truncate. * - * This means the locking order for trunc_sem and mmap_sem is sometimes AB, + * This means the locking order for trunc_sem and mmap_lock is sometimes AB, * sometimes BA. This is almost OK because in both cases, we take the trunc * sem for read, so it doesn't block. * @@ -293,9 +293,9 @@ static inline void ll_trunc_sem_init(struct ll_trunc_sem *sem) * * So we have, on our truncate sem, in order (where 'reader' and 'writer' refer * to the mode in which they take the semaphore): - * reader (holding mmap_sem, needs truncate_sem) + * reader (holding mmap_lock, needs truncate_sem) * writer - * reader (holding truncate sem, waiting for mmap_sem) + * reader (holding truncate sem, waiting for mmap_lock) * * And so the readers deadlock. * @@ -305,7 +305,7 @@ static inline void ll_trunc_sem_init(struct ll_trunc_sem *sem) * of the order they arrived in. * * down_read_nowait is only used in the page fault case, where we already hold - * the mmap_sem. This is because otherwise repeated read and write operations + * the mmap_lock. This is because otherwise repeated read and write operations * (which take the truncate sem) could prevent a truncate from ever starting. * This could still happen with page faults, but without an even more complex * mechanism, this is unavoidable. diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 8ac6698..ee41cdf 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -54,21 +54,22 @@ void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma, struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, size_t count) { - struct vm_area_struct *vma, *ret = NULL; - ENTRY; + struct vm_area_struct *vma, *ret = NULL; + ENTRY; - /* mmap_sem must have been held by caller. */ - LASSERT(!down_write_trylock(&mm->mmap_sem)); + /* mmap_lock must have been held by caller. */ + LASSERT(!mmap_write_trylock(mm)); - for(vma = find_vma(mm, addr); - vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) { - if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops && - vma->vm_flags & VM_SHARED) { - ret = vma; - break; - } - } - RETURN(ret); + for (vma = find_vma(mm, addr); + vma != NULL && vma->vm_start < (addr + count); + vma = vma->vm_next) { + if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops && + vma->vm_flags & VM_SHARED) { + ret = vma; + break; + } + } + RETURN(ret); } /** diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index 2606407..35d3962 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -1954,7 +1954,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, "%s: PCC backend fs not support ->page_mkwrite()\n", ll_i2sbi(inode)->ll_fsname); pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); *cached = true; RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE); } @@ -1981,7 +1981,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, */ if (page->mapping == pcc_file->f_mapping) { *cached = true; - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE); } @@ -1995,7 +1995,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PCC_DETACH_MKWRITE)) { pcc_io_fini(inode); pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE); } diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index b873113..b332d08 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -228,10 +228,10 @@ static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter, if (*pages == NULL) return -ENOMEM; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); result = get_user_pages(current, current->mm, addr, page_count, rw == READ, 0, *pages, NULL); - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (unlikely(result != page_count)) { ll_free_user_pages(*pages, page_count); diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index cb30a0d..468fb7a 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -464,17 +464,17 @@ static int vvp_mmap_locks(const struct lu_env *env, addr = (unsigned long)iov.iov_base; count = iov.iov_len; - if (count == 0) - continue; + if (count == 0) + continue; count += addr & ~PAGE_MASK; addr &= PAGE_MASK; - down_read(&mm->mmap_sem); - while((vma = our_vma(mm, addr, count)) != NULL) { + mmap_read_lock(mm); + while ((vma = our_vma(mm, addr, count)) != NULL) { struct dentry *de = file_dentry(vma->vm_file); struct inode *inode = de->d_inode; - int flags = CEF_MUST; + int flags = CEF_MUST; if (ll_file_nolock(vma->vm_file)) { /* @@ -484,24 +484,24 @@ static int vvp_mmap_locks(const struct lu_env *env, break; } - /* - * XXX: Required lock mode can be weakened: CIT_WRITE - * io only ever reads user level buffer, and CIT_READ - * only writes on it. - */ - policy_from_vma(&policy, vma, addr, count); - descr->cld_mode = vvp_mode_from_vma(vma); - descr->cld_obj = ll_i2info(inode)->lli_clob; - descr->cld_start = cl_index(descr->cld_obj, - policy.l_extent.start); - descr->cld_end = cl_index(descr->cld_obj, - policy.l_extent.end); - descr->cld_enq_flags = flags; - result = cl_io_lock_alloc_add(env, io, descr); - - CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n", - descr->cld_mode, descr->cld_start, - descr->cld_end); + /* + * XXX: Required lock mode can be weakened: CIT_WRITE + * io only ever reads user level buffer, and CIT_READ + * only writes on it. + */ + policy_from_vma(&policy, vma, addr, count); + descr->cld_mode = vvp_mode_from_vma(vma); + descr->cld_obj = ll_i2info(inode)->lli_clob; + descr->cld_start = cl_index(descr->cld_obj, + policy.l_extent.start); + descr->cld_end = cl_index(descr->cld_obj, + policy.l_extent.end); + descr->cld_enq_flags = flags; + result = cl_io_lock_alloc_add(env, io, descr); + + CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n", + descr->cld_mode, descr->cld_start, + descr->cld_end); if (result < 0) break; @@ -512,7 +512,7 @@ static int vvp_mmap_locks(const struct lu_env *env, count -= vma->vm_end - addr; addr = vma->vm_end; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); if (result < 0) break; } -- 1.8.3.1