Whamcloud - gitweb
LU-6260 llite: add support for direct IO api changes 65/14665/9
authorAl Viro <viro@zeniv.linux.org.uk>
Wed, 20 May 2015 17:44:26 +0000 (13:44 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 28 May 2015 22:13:44 +0000 (22:13 +0000)
For the 3.16 kernels the direct_IO api changed which
breaks the lustre build. This patch adds support back
for direct IO on newer kernels. Backported from the
upstream lustre client.

------------------------------------------------------
Linux-commit: d8d3d94b80aa1a1c0ca75c58b8abdc7356f38418
pass iov_iter to ->direct_IO()

unmodified, for now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
------------------------------------------------------
Linux-commit: a6cbcd4a4a85e2fdb0b3344b88df2e8b3d526b9e
get rid of pointless iov_length() in ->direct_IO()

all callers have iov_length(iter->iov, iter->nr_segs) == iov_iter_count(iter)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
------------------------------------------------------
Linux-commit: 886a39115005ced8b15ab067c9c2a8d546b40a5e

new primitive: iov_iter_alignment()

returns the value aligned as badly as the worst remaining segment
in iov_iter is.  Use instead of open-coded equivalents.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
------------------------------------------------------
Linux-commit: 91f79c43d1b54d7154b118860d81b39bad07dfff

new helper: iov_iter_get_pages_alloc()

same as iov_iter_get_pages(), except that pages array is allocated
(kmalloc if possible, vmalloc if that fails) and left for caller to
free.  Lustre and NFS ->direct_IO() switched to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
------------------------------------------------------
Linux-commit: 1d8cb70c7bdda47125ed551fc390aa9597c5f264
drivers: staging: lustre: Fix space required after that ',' errors

Fix checkpatch.pl space required after that ',' errors

Signed-off-by: Greg Donald <gdonald@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
------------------------------------------------------
Linux-commit: ef96fdddcd386c88fee1f2078a174943472b615e
staging: lustre: lustre: llite: use DIV_ROUND_UP

    The kernel.h macro DIV_ROUND_UP performs the computation (((n) + (d) - 1) /
    (d)) but is perhaps more readable.

    Coccinelle script used :

    // <smpl>
    @haskernel@
    @@
    @depends on haskernel@
    expression n,d;
    @@
    (
    - (n + d - 1) / d
    + DIV_ROUND_UP(n,d)
    |
    - (n + (d - 1)) / d
    + DIV_ROUND_UP(n,d)
    )
    @depends on haskernel@
    expression n,d;
    expression n,d;
    @@
    - DIV_ROUND_UP(n,(d))
    + DIV_ROUND_UP(n,d)
    // </smpl>

Signed-off-by: Tapasweni Pathak <tapaswenipathak@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
------------------------------------------------------
Linux-commit: 9fb186cf6907ba4e873d8396d7e5abfa4f22ca4e

Staging: Lustre: rw26.c: include according to checkpatch.pl

Signed-off-by: Georges-Axel Jaloyan <georges-axel.jaloyan@ens.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
------------------------------------------------------
Linux-commit: 6f67376318abea58589ebe6d69dffeabb6f6c26a

direct_IO: use iov_iter_rw() instead of rw everywhere

The rw parameter to direct_IO is redundant with iov_iter->type, and
treated slightly differently just about everywhere it's used: some users
do rw & WRITE, and others do rw == WRITE where they should be doing a
bitwise check. Simplify this with the new iov_iter_rw() helper, which
always returns either READ or WRITE.

Signed-off-by: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
------------------------------------------------------
Linux-commit: 22c6186ecea0be9eff1c399298ad36e94a59995f

direct_IO: remove rw from a_ops->direct_IO()

Now that no one is using rw, remove it completely.

Signed-off-by: Omar Sandoval <osandov@osandov.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
------------------------------------------------------

Change-Id: I683e02a0952aec2d8fe387cf3ddaf75010858c7c
Signed-off-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-on: http://review.whamcloud.com/14665
Tested-by: Jenkins
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Bob Glossman <bob.glossman@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Thomas Stibor <t.stibor@gsi.de>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/autoconf/lustre-core.m4
lustre/llite/rw26.c

index f30920b..37f1fe9 100644 (file)
@@ -1620,6 +1620,47 @@ vfs_rename_6args, [
 ]) # LC_VFS_RENAME_6ARGS
 
 #
+# LC_DIRECTIO_USE_ITER
+#
+# 3.16 kernel changes direct IO to use iov_iter
+#
+AC_DEFUN([LC_DIRECTIO_USE_ITER], [
+LB_CHECK_COMPILE([if direct IO uses iov_iter],
+direct_io_iter, [
+       #include <linux/fs.h>
+],[
+       struct address_space_operations ops;
+       struct iov_iter *iter = NULL;
+       loff_t offset = 0;
+
+       ops.direct_IO(0, NULL, iter, offset);
+],[
+       AC_DEFINE(HAVE_DIRECTIO_ITER, 1,
+               [direct IO uses iov_iter])
+])
+]) # LC_DIRECTIO_USE_ITER
+
+#
+# LC_IOV_ITER_RW
+#
+# 4.1 kernel has iov_iter_rw
+#
+AC_DEFUN([LC_IOV_ITER_RW], [
+LB_CHECK_COMPILE([if iov_iter_rw exist],
+iov_iter_rw, [
+       #include <linux/fs.h>
+       #include <linux/uio.h>
+],[
+       struct iov_iter *iter = NULL;
+
+       iov_iter_rw(iter);
+],[
+       AC_DEFINE(HAVE_IOV_ITER_RW, 1,
+               [iov_iter_rw exist])
+])
+]) # LC_IOV_ITER_RW
+
+#
 # LC_PROG_LINUX
 #
 # Lustre linux kernel checks
@@ -1749,6 +1790,12 @@ AC_DEFUN([LC_PROG_LINUX], [
        # 3.15
        LC_VFS_RENAME_6ARGS
 
+       # 3.16
+       LC_DIRECTIO_USE_ITER
+
+       # 4.1.0
+       LC_IOV_ITER_RW
+
        #
        AS_IF([test "x$enable_server" != xno], [
                LC_FUNC_DEV_SET_RDONLY
index 6085a20..d3fbda3 100644 (file)
@@ -339,20 +339,19 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
 }
 EXPORT_SYMBOL(ll_direct_rw_pages);
 
-static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
-                                   int rw, struct inode *inode,
-                                   struct address_space *mapping,
-                                   size_t size, loff_t file_offset,
-                                   struct page **pages, int page_count)
+static ssize_t
+ll_direct_IO_seg(const struct lu_env *env, struct cl_io *io, int rw,
+                struct inode *inode, size_t size, loff_t file_offset,
+                struct page **pages, int page_count)
 {
-    struct ll_dio_pages pvec = { .ldp_pages        = pages,
-                                 .ldp_nr           = page_count,
-                                 .ldp_size         = size,
-                                 .ldp_offsets      = NULL,
-                                 .ldp_start_offset = file_offset
-                               };
-
-    return ll_direct_rw_pages(env, io, rw, inode, &pvec);
+       struct ll_dio_pages pvec = { .ldp_pages         = pages,
+                                    .ldp_nr            = page_count,
+                                    .ldp_size          = size,
+                                    .ldp_offsets       = NULL,
+                                    .ldp_start_offset  = file_offset
+                                  };
+
+       return ll_direct_rw_pages(env, io, rw, inode, &pvec);
 }
 
 #ifdef KMALLOC_MAX_SIZE
@@ -368,27 +367,138 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
  * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */
 #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
                      ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
-                               const struct iovec *iov, loff_t file_offset,
-                               unsigned long nr_segs)
+
+#ifndef HAVE_IOV_ITER_RW
+# define iov_iter_rw(iter)     rw
+#endif
+
+#if defined(HAVE_DIRECTIO_ITER) || defined(HAVE_IOV_ITER_RW)
+static ssize_t
+ll_direct_IO(
+# ifndef HAVE_IOV_ITER_RW
+            int rw,
+# endif
+            struct kiocb *iocb, struct iov_iter *iter,
+            loff_t file_offset)
 {
-        struct lu_env *env;
-        struct cl_io *io;
-        struct file *file = iocb->ki_filp;
-        struct inode *inode = file->f_mapping->host;
-        long count = iov_length(iov, nr_segs);
-        long tot_bytes = 0, result = 0;
-        unsigned long seg = 0;
-        long size = MAX_DIO_SIZE;
-        int refcheck;
-        ENTRY;
+       struct lu_env *env;
+       struct cl_io *io;
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       ssize_t count = iov_iter_count(iter);
+       ssize_t tot_bytes = 0, result = 0;
+       size_t size = MAX_DIO_SIZE;
+       int refcheck;
+
+       /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
+       if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
+               return -EINVAL;
+
+       CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
+              "offset=%lld=%llx, pages %zd (max %lu)\n",
+              PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
+              file_offset, file_offset, count >> PAGE_CACHE_SHIFT,
+              MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
+
+       /* Check that all user buffers are aligned as well */
+       if (iov_iter_alignment(iter) & ~PAGE_MASK)
+               return -EINVAL;
+
+       env = cl_env_get(&refcheck);
+       LASSERT(!IS_ERR(env));
+       io = vvp_env_io(env)->vui_cl.cis_io;
+       LASSERT(io != NULL);
+
+       /* 0. Need locking between buffered and direct access. and race with
+        *    size changing by concurrent truncates and writes.
+        * 1. Need inode mutex to operate transient pages.
+        */
+       if (iov_iter_rw(iter) == READ)
+               mutex_lock(&inode->i_mutex);
+
+       while (iov_iter_count(iter)) {
+               struct page **pages;
+               size_t offs;
+
+               count = min_t(size_t, iov_iter_count(iter), size);
+               if (iov_iter_rw(iter) == READ) {
+                       if (file_offset >= i_size_read(inode))
+                               break;
+
+                       if (file_offset + count > i_size_read(inode))
+                               count = i_size_read(inode) - file_offset;
+               }
+
+               result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
+               if (likely(result > 0)) {
+                       int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);
+
+                       result = ll_direct_IO_seg(env, io, iov_iter_rw(iter),
+                                                 inode, result, file_offset,
+                                                 pages, n);
+                       ll_free_user_pages(pages, n,
+                                          iov_iter_rw(iter) == READ);
+
+               }
+               if (unlikely(result <= 0)) {
+                       /* If we can't allocate a large enough buffer
+                        * for the request, shrink it to a smaller
+                        * PAGE_SIZE multiple and try again.
+                        * We should always be able to kmalloc for a
+                        * page worth of page pointers = 4MB on i386. */
+                       if (result == -ENOMEM &&
+                           size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
+                                   PAGE_CACHE_SIZE) {
+                               size = ((((size / 2) - 1) |
+                                       ~PAGE_MASK) + 1) & PAGE_MASK;
+                               CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
+                                      size);
+                               continue;
+                       }
+
+                       GOTO(out, result);
+               }
+
+               iov_iter_advance(iter, result);
+               tot_bytes += result;
+               file_offset += result;
+       }
+out:
+       if (iov_iter_rw(iter) == READ)
+               mutex_unlock(&inode->i_mutex);
+
+       if (tot_bytes > 0) {
+               struct vvp_io *vio = vvp_env_io(env);
+
+               /* no commit async for direct IO */
+               vio->u.write.vui_written += tot_bytes;
+       }
+
+       cl_env_put(env, &refcheck);
+       return tot_bytes ? : result;
+}
+#else /* !HAVE_DIRECTIO_ITER && !HAVE_IOV_ITER_RW */
+static ssize_t
+ll_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+            loff_t file_offset, unsigned long nr_segs)
+{
+       struct lu_env *env;
+       struct cl_io *io;
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       ssize_t count = iov_length(iov, nr_segs);
+       ssize_t tot_bytes = 0, result = 0;
+       unsigned long seg = 0;
+       size_t size = MAX_DIO_SIZE;
+       int refcheck;
+       ENTRY;
 
         /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
        if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
                 RETURN(-EINVAL);
 
-       CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%lu (max %lu), "
-              "offset=%lld=%llx, pages %lu (max %lu)\n",
+       CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
+              "offset=%lld=%llx, pages %zd (max %lu)\n",
               PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
               file_offset, file_offset, count >> PAGE_CACHE_SHIFT,
               MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
@@ -406,7 +516,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
         LASSERT(io != NULL);
 
         for (seg = 0; seg < nr_segs; seg++) {
-                long iov_left = iov[seg].iov_len;
+               size_t iov_left = iov[seg].iov_len;
                 unsigned long user_addr = (unsigned long)iov[seg].iov_base;
 
                 if (rw == READ) {
@@ -419,7 +529,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
                 while (iov_left > 0) {
                         struct page **pages;
                         int page_count, max_pages = 0;
-                        long bytes;
+                       size_t bytes;
 
                         bytes = min(size, iov_left);
                         page_count = ll_get_user_pages(rw, user_addr, bytes,
@@ -427,10 +537,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
                         if (likely(page_count > 0)) {
                                 if (unlikely(page_count <  max_pages))
                                        bytes = page_count << PAGE_CACHE_SHIFT;
-                                result = ll_direct_IO_26_seg(env, io, rw, inode,
-                                                             file->f_mapping,
-                                                             bytes, file_offset,
-                                                             pages, page_count);
+                               result = ll_direct_IO_seg(env, io, rw, inode,
+                                                         bytes, file_offset,
+                                                         pages, page_count);
                                 ll_free_user_pages(pages, max_pages, rw==READ);
                         } else if (page_count == 0) {
                                 GOTO(out, result = -EFAULT);
@@ -449,7 +558,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
                                         size = ((((size / 2) - 1) |
                                                 ~PAGE_CACHE_MASK) + 1) &
                                                PAGE_CACHE_MASK;
-                                        CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
+                                       CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                                                size);
                                         continue;
                                 }
@@ -474,6 +583,7 @@ out:
        cl_env_put(env, &refcheck);
        RETURN(tot_bytes ? tot_bytes : result);
 }
+#endif /* HAVE_DIRECTIO_ITER || HAVE_IOV_ITER_RW */
 
 /**
  * Prepare partially written-to page for a write.
@@ -688,22 +798,22 @@ static int ll_migratepage(struct address_space *mapping,
 #ifndef MS_HAS_NEW_AOPS
 const struct address_space_operations ll_aops = {
        .readpage       = ll_readpage,
-        .direct_IO      = ll_direct_IO_26,
-        .writepage      = ll_writepage,
-       .writepages     = ll_writepages,
-        .set_page_dirty = __set_page_dirty_nobuffers,
-        .write_begin    = ll_write_begin,
-        .write_end      = ll_write_end,
-        .invalidatepage = ll_invalidatepage,
-        .releasepage    = (void *)ll_releasepage,
+       .direct_IO      = ll_direct_IO,
+       .writepage      = ll_writepage,
+       .writepages     = ll_writepages,
+       .set_page_dirty = __set_page_dirty_nobuffers,
+       .write_begin    = ll_write_begin,
+       .write_end      = ll_write_end,
+       .invalidatepage = ll_invalidatepage,
+       .releasepage    = (void *)ll_releasepage,
 #ifdef CONFIG_MIGRATION
-        .migratepage    = ll_migratepage,
+       .migratepage    = ll_migratepage,
 #endif
 };
 #else
 const struct address_space_operations_ext ll_aops = {
        .orig_aops.readpage             = ll_readpage,
-       .orig_aops.direct_IO            = ll_direct_IO_26,
+       .orig_aops.direct_IO            = ll_direct_IO,
        .orig_aops.writepage            = ll_writepage,
        .orig_aops.writepages           = ll_writepages,
        .orig_aops.set_page_dirty       = __set_page_dirty_nobuffers,