Whamcloud - gitweb
LU-13783 libcfs: switch from ->mmap_sem to mmap_lock() 88/40288/5
authorMr NeilBrown <neilb@suse.de>
Fri, 16 Oct 2020 06:18:29 +0000 (17:18 +1100)
committerOleg Drokin <green@whamcloud.com>
Fri, 26 Feb 2021 21:05:57 +0000 (21:05 +0000)
In Linux 5.8, ->mmap_sem is gone and the preferred interface
for locking the mmap is to suite of mmap*lock() functions.

So provide those functions when not available, and use them
as needed in Lustre.

Signed-off-by: Mr NeilBrown <neilb@suse.de>
Change-Id: I4ce3959f9e93eae10a7b7db03e2b0a1525723138
Reviewed-on: https://review.whamcloud.com/40288
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
libcfs/autoconf/lustre-libcfs.m4
libcfs/include/libcfs/linux/linux-mem.h
libcfs/libcfs/linux/linux-curproc.c
lustre/llite/llite_internal.h
lustre/llite/llite_mmap.c
lustre/llite/pcc.c
lustre/llite/rw26.c
lustre/llite/vvp_io.c

index 529b534..396f7a5 100644 (file)
@@ -1312,6 +1312,24 @@ nr_unstable_nfs_exists, [
 EXTRA_KCFLAGS="$tmp_flags"
 ]) # LIBCFS_HAVE_NR_UNSTABLE_NFS
 
 EXTRA_KCFLAGS="$tmp_flags"
 ]) # LIBCFS_HAVE_NR_UNSTABLE_NFS
 
+#
+# LIBCFS_HAVE_MMAP_LOCK
+#
+# kernel v5.8-rc1~83^2~24
+# mmap locking API: rename mmap_sem to mmap_lock
+#
+AC_DEFUN([LIBCFS_HAVE_MMAP_LOCK], [
+LB_CHECK_COMPILE([if mmap_lock API is available],
+mmap_write_lock, [
+       #include <linux/mm.h>
+],[
+       mmap_write_lock(NULL);
+],[
+       AC_DEFINE(HAVE_MMAP_LOCK, 1,
+               [mmap_lock API is available.])
+])
+]) # LIBCFS_HAVE_MMAP_LOCK
+
 AC_DEFUN([LIBCFS_PROG_LINUX_SRC], [] )
 AC_DEFUN([LIBCFS_PROG_LINUX_RESULTS], [])
 
 AC_DEFUN([LIBCFS_PROG_LINUX_SRC], [] )
 AC_DEFUN([LIBCFS_PROG_LINUX_RESULTS], [])
 
@@ -1420,6 +1438,8 @@ LIBCFS_LOOKUP_USER_KEY
 LIBCFS_FORCE_SIG_WITH_TASK
 LIBCFS_CACHE_DETAIL_WRITERS
 LIBCFS_HAVE_NR_UNSTABLE_NFS
 LIBCFS_FORCE_SIG_WITH_TASK
 LIBCFS_CACHE_DETAIL_WRITERS
 LIBCFS_HAVE_NR_UNSTABLE_NFS
+# 5.8
+LIBCFS_HAVE_MMAP_LOCK
 ]) # LIBCFS_PROG_LINUX
 
 #
 ]) # LIBCFS_PROG_LINUX
 
 #
index 80668b1..956de36 100644 (file)
@@ -150,4 +150,36 @@ void remove_shrinker(struct shrinker *shrinker)
         kfree(shrinker);
 }
 
         kfree(shrinker);
 }
 
+#ifndef HAVE_MMAP_LOCK
+static inline void mmap_write_lock(struct mm_struct *mm)
+{
+       down_write(&mm->mmap_sem);
+}
+
+static inline bool mmap_write_trylock(struct mm_struct *mm)
+{
+       return down_write_trylock(&mm->mmap_sem) != 0;
+}
+
+static inline void mmap_write_unlock(struct mm_struct *mm)
+{
+       up_write(&mm->mmap_sem);
+}
+
+static inline void mmap_read_lock(struct mm_struct *mm)
+{
+       down_read(&mm->mmap_sem);
+}
+
+static inline bool mmap_read_trylock(struct mm_struct *mm)
+{
+       return down_read_trylock(&mm->mmap_sem) != 0;
+}
+
+static inline void mmap_read_unlock(struct mm_struct *mm)
+{
+       up_read(&mm->mmap_sem);
+}
+#endif
+
 #endif /* __LINUX_CFS_MEM_H__ */
 #endif /* __LINUX_CFS_MEM_H__ */
index 5413a60..e6cdb63 100644 (file)
@@ -117,11 +117,11 @@ static int cfs_access_process_vm(struct task_struct *tsk,
        struct page *page;
        void *old_buf = buf;
 
        struct page *page;
        void *old_buf = buf;
 
-       /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(),
-        * which is already holding mmap_sem for writes.  If some other
+       /* Avoid deadlocks on mmap_lock if called from sys_mmap_pgoff(),
+        * which is already holding mmap_lock for writes.  If some other
         * thread gets the write lock in the meantime, this thread will
         * block, but at least it won't deadlock on itself.  LU-1735 */
         * thread gets the write lock in the meantime, this thread will
         * block, but at least it won't deadlock on itself.  LU-1735 */
-       if (down_read_trylock(&mm->mmap_sem) == 0)
+       if (!mmap_read_trylock(mm))
                return -EDEADLK;
 
        /* ignore errors, just check how much was successfully transferred */
                return -EDEADLK;
 
        /* ignore errors, just check how much was successfully transferred */
@@ -159,7 +159,7 @@ static int cfs_access_process_vm(struct task_struct *tsk,
                buf += bytes;
                addr += bytes;
        }
                buf += bytes;
                addr += bytes;
        }
-       up_read(&mm->mmap_sem);
+       mmap_read_unlock(mm);
 
        return buf - old_buf;
 }
 
        return buf - old_buf;
 }
index 16e4fdd..8801fc0 100644 (file)
@@ -279,11 +279,11 @@ static inline void ll_trunc_sem_init(struct ll_trunc_sem *sem)
  *
  * We must take lli_trunc_sem in read mode on entry in to various i/o paths
  * in Lustre, in order to exclude truncates.  Some of these paths then need to
  *
  * We must take lli_trunc_sem in read mode on entry in to various i/o paths
  * in Lustre, in order to exclude truncates.  Some of these paths then need to
- * take the mmap_sem, while still holding the trunc_sem.  The problem is that
- * page faults hold the mmap_sem when calling in to Lustre, and then must also
+ * take the mmap_lock, while still holding the trunc_sem.  The problem is that
+ * page faults hold the mmap_lock when calling in to Lustre, and then must also
  * take the trunc_sem to exclude truncate.
  *
  * take the trunc_sem to exclude truncate.
  *
- * This means the locking order for trunc_sem and mmap_sem is sometimes AB,
+ * This means the locking order for trunc_sem and mmap_lock is sometimes AB,
  * sometimes BA.  This is almost OK because in both cases, we take the trunc
  * sem for read, so it doesn't block.
  *
  * sometimes BA.  This is almost OK because in both cases, we take the trunc
  * sem for read, so it doesn't block.
  *
@@ -293,9 +293,9 @@ static inline void ll_trunc_sem_init(struct ll_trunc_sem *sem)
  *
  * So we have, on our truncate sem, in order (where 'reader' and 'writer' refer
  * to the mode in which they take the semaphore):
  *
  * So we have, on our truncate sem, in order (where 'reader' and 'writer' refer
  * to the mode in which they take the semaphore):
- * reader (holding mmap_sem, needs truncate_sem)
+ * reader (holding mmap_lock, needs truncate_sem)
  * writer
  * writer
- * reader (holding truncate sem, waiting for mmap_sem)
+ * reader (holding truncate sem, waiting for mmap_lock)
  *
  * And so the readers deadlock.
  *
  *
  * And so the readers deadlock.
  *
@@ -305,7 +305,7 @@ static inline void ll_trunc_sem_init(struct ll_trunc_sem *sem)
  * of the order they arrived in.
  *
  * down_read_nowait is only used in the page fault case, where we already hold
  * of the order they arrived in.
  *
  * down_read_nowait is only used in the page fault case, where we already hold
- * the mmap_sem.  This is because otherwise repeated read and write operations
+ * the mmap_lock.  This is because otherwise repeated read and write operations
  * (which take the truncate sem) could prevent a truncate from ever starting.
  * This could still happen with page faults, but without an even more complex
  * mechanism, this is unavoidable.
  * (which take the truncate sem) could prevent a truncate from ever starting.
  * This could still happen with page faults, but without an even more complex
  * mechanism, this is unavoidable.
index 8ac6698..ee41cdf 100644 (file)
@@ -54,21 +54,22 @@ void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma,
 struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
                                size_t count)
 {
 struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
                                size_t count)
 {
-        struct vm_area_struct *vma, *ret = NULL;
-        ENTRY;
+       struct vm_area_struct *vma, *ret = NULL;
+       ENTRY;
 
 
-        /* mmap_sem must have been held by caller. */
-        LASSERT(!down_write_trylock(&mm->mmap_sem));
+       /* mmap_lock must have been held by caller. */
+       LASSERT(!mmap_write_trylock(mm));
 
 
-        for(vma = find_vma(mm, addr);
-            vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
-                if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
-                    vma->vm_flags & VM_SHARED) {
-                        ret = vma;
-                        break;
-                }
-        }
-        RETURN(ret);
+       for (vma = find_vma(mm, addr);
+            vma != NULL && vma->vm_start < (addr + count);
+            vma = vma->vm_next) {
+               if (vma->vm_ops && vma->vm_ops == &ll_file_vm_ops &&
+                   vma->vm_flags & VM_SHARED) {
+                       ret = vma;
+                       break;
+               }
+       }
+       RETURN(ret);
 }
 
 /**
 }
 
 /**
index 2606407..35d3962 100644 (file)
@@ -1954,7 +1954,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                       "%s: PCC backend fs not support ->page_mkwrite()\n",
                       ll_i2sbi(inode)->ll_fsname);
                pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE);
                       "%s: PCC backend fs not support ->page_mkwrite()\n",
                       ll_i2sbi(inode)->ll_fsname);
                pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE);
-               up_read(&mm->mmap_sem);
+               mmap_read_unlock(mm);
                *cached = true;
                RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
        }
                *cached = true;
                RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
        }
@@ -1981,7 +1981,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                 */
                if (page->mapping == pcc_file->f_mapping) {
                        *cached = true;
                 */
                if (page->mapping == pcc_file->f_mapping) {
                        *cached = true;
-                       up_read(&mm->mmap_sem);
+                       mmap_read_unlock(mm);
                        RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
                }
 
                        RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
                }
 
@@ -1995,7 +1995,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PCC_DETACH_MKWRITE)) {
                pcc_io_fini(inode);
                pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE);
        if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PCC_DETACH_MKWRITE)) {
                pcc_io_fini(inode);
                pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE);
-               up_read(&mm->mmap_sem);
+               mmap_read_unlock(mm);
                RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
        }
 
                RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
        }
 
index b873113..b332d08 100644 (file)
@@ -228,10 +228,10 @@ static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter,
        if (*pages == NULL)
                return -ENOMEM;
 
        if (*pages == NULL)
                return -ENOMEM;
 
-       down_read(&current->mm->mmap_sem);
+       mmap_read_lock(current->mm);
        result = get_user_pages(current, current->mm, addr, page_count,
                                rw == READ, 0, *pages, NULL);
        result = get_user_pages(current, current->mm, addr, page_count,
                                rw == READ, 0, *pages, NULL);
-       up_read(&current->mm->mmap_sem);
+       mmap_read_unlock(current->mm);
 
        if (unlikely(result != page_count)) {
                ll_free_user_pages(*pages, page_count);
 
        if (unlikely(result != page_count)) {
                ll_free_user_pages(*pages, page_count);
index cb30a0d..468fb7a 100644 (file)
@@ -464,17 +464,17 @@ static int vvp_mmap_locks(const struct lu_env *env,
                addr = (unsigned long)iov.iov_base;
                count = iov.iov_len;
 
                addr = (unsigned long)iov.iov_base;
                count = iov.iov_len;
 
-                if (count == 0)
-                        continue;
+               if (count == 0)
+                       continue;
 
                count += addr & ~PAGE_MASK;
                addr &= PAGE_MASK;
 
 
                count += addr & ~PAGE_MASK;
                addr &= PAGE_MASK;
 
-                down_read(&mm->mmap_sem);
-                while((vma = our_vma(mm, addr, count)) != NULL) {
+               mmap_read_lock(mm);
+               while ((vma = our_vma(mm, addr, count)) != NULL) {
                        struct dentry *de = file_dentry(vma->vm_file);
                        struct inode *inode = de->d_inode;
                        struct dentry *de = file_dentry(vma->vm_file);
                        struct inode *inode = de->d_inode;
-                        int flags = CEF_MUST;
+                       int flags = CEF_MUST;
 
                        if (ll_file_nolock(vma->vm_file)) {
                                /*
 
                        if (ll_file_nolock(vma->vm_file)) {
                                /*
@@ -484,24 +484,24 @@ static int vvp_mmap_locks(const struct lu_env *env,
                                break;
                        }
 
                                break;
                        }
 
-                        /*
-                         * XXX: Required lock mode can be weakened: CIT_WRITE
-                         * io only ever reads user level buffer, and CIT_READ
-                         * only writes on it.
-                         */
-                        policy_from_vma(&policy, vma, addr, count);
-                        descr->cld_mode = vvp_mode_from_vma(vma);
-                        descr->cld_obj = ll_i2info(inode)->lli_clob;
-                        descr->cld_start = cl_index(descr->cld_obj,
-                                                    policy.l_extent.start);
-                        descr->cld_end = cl_index(descr->cld_obj,
-                                                  policy.l_extent.end);
-                        descr->cld_enq_flags = flags;
-                        result = cl_io_lock_alloc_add(env, io, descr);
-
-                        CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
-                               descr->cld_mode, descr->cld_start,
-                               descr->cld_end);
+                       /*
+                        * XXX: Required lock mode can be weakened: CIT_WRITE
+                        * io only ever reads user level buffer, and CIT_READ
+                        * only writes on it.
+                        */
+                       policy_from_vma(&policy, vma, addr, count);
+                       descr->cld_mode = vvp_mode_from_vma(vma);
+                       descr->cld_obj = ll_i2info(inode)->lli_clob;
+                       descr->cld_start = cl_index(descr->cld_obj,
+                                                   policy.l_extent.start);
+                       descr->cld_end = cl_index(descr->cld_obj,
+                                                 policy.l_extent.end);
+                       descr->cld_enq_flags = flags;
+                       result = cl_io_lock_alloc_add(env, io, descr);
+
+                       CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
+                              descr->cld_mode, descr->cld_start,
+                              descr->cld_end);
 
                        if (result < 0)
                                break;
 
                        if (result < 0)
                                break;
@@ -512,7 +512,7 @@ static int vvp_mmap_locks(const struct lu_env *env,
                        count -= vma->vm_end - addr;
                        addr = vma->vm_end;
                }
                        count -= vma->vm_end - addr;
                        addr = vma->vm_end;
                }
-               up_read(&mm->mmap_sem);
+               mmap_read_unlock(mm);
                if (result < 0)
                        break;
        }
                if (result < 0)
                        break;
        }