From 98883bd3e2cc48a3a5674ce140ad74bece37df05 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 May 2015 13:44:26 -0400 Subject: [PATCH] LU-6260 llite: add support for direct IO api changes For the 3.16 kernels the direct_IO api changed which breaks the lustre build. This patch adds support back for direct IO on newer kernels. Backported from the upstream lustre client. ------------------------------------------------------ Linux-commit: d8d3d94b80aa1a1c0ca75c58b8abdc7356f38418 pass iov_iter to ->direct_IO() unmodified, for now Signed-off-by: Al Viro ------------------------------------------------------ Linux-commit: a6cbcd4a4a85e2fdb0b3344b88df2e8b3d526b9e get rid of pointless iov_length() in ->direct_IO() all callers have iov_length(iter->iov, iter->nr_segs) == iov_iter_count(iter) Signed-off-by: Al Viro ------------------------------------------------------ Linux-commit: 886a39115005ced8b15ab067c9c2a8d546b40a5e new primitive: iov_iter_alignment() returns the value aligned as badly as the worst remaining segment in iov_iter is. Use instead of open-coded equivalents. Signed-off-by: Al Viro ------------------------------------------------------ Linux-commit: 91f79c43d1b54d7154b118860d81b39bad07dfff new helper: iov_iter_get_pages_alloc() same as iov_iter_get_pages(), except that pages array is allocated (kmalloc if possible, vmalloc if that fails) and left for caller to free. Lustre and NFS ->direct_IO() switched to it. Signed-off-by: Al Viro ------------------------------------------------------ Linux-commit: 1d8cb70c7bdda47125ed551fc390aa9597c5f264 drivers: staging: lustre: Fix space required after that ',' errors Fix checkpatch.pl space required after that ',' errors Signed-off-by: Greg Donald Signed-off-by: Greg Kroah-Hartman ------------------------------------------------------ Linux-commit: ef96fdddcd386c88fee1f2078a174943472b615e staging: lustre: lustre: llite: use DIV_ROUND_UP The kernel.h macro DIV_ROUND_UP performs the computation (((n) + (d) - 1) / (d)) but is perhaps more readable. Coccinelle script used : // @haskernel@ @@ @depends on haskernel@ expression n,d; @@ ( - (n + d - 1) / d + DIV_ROUND_UP(n,d) | - (n + (d - 1)) / d + DIV_ROUND_UP(n,d) ) @depends on haskernel@ expression n,d; expression n,d; @@ - DIV_ROUND_UP(n,(d)) + DIV_ROUND_UP(n,d) // Signed-off-by: Tapasweni Pathak Signed-off-by: Greg Kroah-Hartman ------------------------------------------------------ Linux-commit: 9fb186cf6907ba4e873d8396d7e5abfa4f22ca4e Staging: Lustre: rw26.c: include according to checkpatch.pl Signed-off-by: Georges-Axel Jaloyan Signed-off-by: Greg Kroah-Hartman ------------------------------------------------------ Linux-commit: 6f67376318abea58589ebe6d69dffeabb6f6c26a direct_IO: use iov_iter_rw() instead of rw everywhere The rw parameter to direct_IO is redundant with iov_iter->type, and treated slightly differently just about everywhere it's used: some users do rw & WRITE, and others do rw == WRITE where they should be doing a bitwise check. Simplify this with the new iov_iter_rw() helper, which always returns either READ or WRITE. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro ------------------------------------------------------ Linux-commit: 22c6186ecea0be9eff1c399298ad36e94a59995f direct_IO: remove rw from a_ops->direct_IO() Now that no one is using rw, remove it completely. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro ------------------------------------------------------ Change-Id: I683e02a0952aec2d8fe387cf3ddaf75010858c7c Signed-off-by: James Simmons Reviewed-on: http://review.whamcloud.com/14665 Tested-by: Jenkins Reviewed-by: John L. Hammond Reviewed-by: Bob Glossman Tested-by: Maloo Reviewed-by: Thomas Stibor Reviewed-by: Oleg Drokin --- lustre/autoconf/lustre-core.m4 | 47 ++++++++++ lustre/llite/rw26.c | 200 +++++++++++++++++++++++++++++++---------- 2 files changed, 202 insertions(+), 45 deletions(-) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index f30920b..37f1fe90 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1620,6 +1620,47 @@ vfs_rename_6args, [ ]) # LC_VFS_RENAME_6ARGS # +# LC_DIRECTIO_USE_ITER +# +# 3.16 kernel changes direct IO to use iov_iter +# +AC_DEFUN([LC_DIRECTIO_USE_ITER], [ +LB_CHECK_COMPILE([if direct IO uses iov_iter], +direct_io_iter, [ + #include +],[ + struct address_space_operations ops; + struct iov_iter *iter = NULL; + loff_t offset = 0; + + ops.direct_IO(0, NULL, iter, offset); +],[ + AC_DEFINE(HAVE_DIRECTIO_ITER, 1, + [direct IO uses iov_iter]) +]) +]) # LC_DIRECTIO_USE_ITER + +# +# LC_IOV_ITER_RW +# +# 4.1 kernel has iov_iter_rw +# +AC_DEFUN([LC_IOV_ITER_RW], [ +LB_CHECK_COMPILE([if iov_iter_rw exist], +iov_iter_rw, [ + #include + #include +],[ + struct iov_iter *iter = NULL; + + iov_iter_rw(iter); +],[ + AC_DEFINE(HAVE_IOV_ITER_RW, 1, + [iov_iter_rw exist]) +]) +]) # LC_IOV_ITER_RW + +# # LC_PROG_LINUX # # Lustre linux kernel checks @@ -1749,6 +1790,12 @@ AC_DEFUN([LC_PROG_LINUX], [ # 3.15 LC_VFS_RENAME_6ARGS + # 3.16 + LC_DIRECTIO_USE_ITER + + # 4.1.0 + LC_IOV_ITER_RW + # AS_IF([test "x$enable_server" != xno], [ LC_FUNC_DEV_SET_RDONLY diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 6085a20..d3fbda3 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -339,20 +339,19 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, } EXPORT_SYMBOL(ll_direct_rw_pages); -static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, - int rw, struct inode *inode, - struct address_space *mapping, - size_t size, loff_t file_offset, - struct page **pages, int page_count) +static ssize_t +ll_direct_IO_seg(const struct lu_env *env, struct cl_io *io, int rw, + struct inode *inode, size_t size, loff_t file_offset, + struct page **pages, int page_count) { - struct ll_dio_pages pvec = { .ldp_pages = pages, - .ldp_nr = page_count, - .ldp_size = size, - .ldp_offsets = NULL, - .ldp_start_offset = file_offset - }; - - return ll_direct_rw_pages(env, io, rw, inode, &pvec); + struct ll_dio_pages pvec = { .ldp_pages = pages, + .ldp_nr = page_count, + .ldp_size = size, + .ldp_offsets = NULL, + .ldp_start_offset = file_offset + }; + + return ll_direct_rw_pages(env, io, rw, inode, &pvec); } #ifdef KMALLOC_MAX_SIZE @@ -368,27 +367,138 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */ #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \ ~(DT_MAX_BRW_SIZE - 1)) -static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t file_offset, - unsigned long nr_segs) + +#ifndef HAVE_IOV_ITER_RW +# define iov_iter_rw(iter) rw +#endif + +#if defined(HAVE_DIRECTIO_ITER) || defined(HAVE_IOV_ITER_RW) +static ssize_t +ll_direct_IO( +# ifndef HAVE_IOV_ITER_RW + int rw, +# endif + struct kiocb *iocb, struct iov_iter *iter, + loff_t file_offset) { - struct lu_env *env; - struct cl_io *io; - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - long count = iov_length(iov, nr_segs); - long tot_bytes = 0, result = 0; - unsigned long seg = 0; - long size = MAX_DIO_SIZE; - int refcheck; - ENTRY; + struct lu_env *env; + struct cl_io *io; + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t count = iov_iter_count(iter); + ssize_t tot_bytes = 0, result = 0; + size_t size = MAX_DIO_SIZE; + int refcheck; + + /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ + if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) + return -EINVAL; + + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), " + "offset=%lld=%llx, pages %zd (max %lu)\n", + PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, + file_offset, file_offset, count >> PAGE_CACHE_SHIFT, + MAX_DIO_SIZE >> PAGE_CACHE_SHIFT); + + /* Check that all user buffers are aligned as well */ + if (iov_iter_alignment(iter) & ~PAGE_MASK) + return -EINVAL; + + env = cl_env_get(&refcheck); + LASSERT(!IS_ERR(env)); + io = vvp_env_io(env)->vui_cl.cis_io; + LASSERT(io != NULL); + + /* 0. Need locking between buffered and direct access. and race with + * size changing by concurrent truncates and writes. + * 1. Need inode mutex to operate transient pages. + */ + if (iov_iter_rw(iter) == READ) + mutex_lock(&inode->i_mutex); + + while (iov_iter_count(iter)) { + struct page **pages; + size_t offs; + + count = min_t(size_t, iov_iter_count(iter), size); + if (iov_iter_rw(iter) == READ) { + if (file_offset >= i_size_read(inode)) + break; + + if (file_offset + count > i_size_read(inode)) + count = i_size_read(inode) - file_offset; + } + + result = iov_iter_get_pages_alloc(iter, &pages, count, &offs); + if (likely(result > 0)) { + int n = DIV_ROUND_UP(result + offs, PAGE_SIZE); + + result = ll_direct_IO_seg(env, io, iov_iter_rw(iter), + inode, result, file_offset, + pages, n); + ll_free_user_pages(pages, n, + iov_iter_rw(iter) == READ); + + } + if (unlikely(result <= 0)) { + /* If we can't allocate a large enough buffer + * for the request, shrink it to a smaller + * PAGE_SIZE multiple and try again. + * We should always be able to kmalloc for a + * page worth of page pointers = 4MB on i386. */ + if (result == -ENOMEM && + size > (PAGE_CACHE_SIZE / sizeof(*pages)) * + PAGE_CACHE_SIZE) { + size = ((((size / 2) - 1) | + ~PAGE_MASK) + 1) & PAGE_MASK; + CDEBUG(D_VFSTRACE, "DIO size now %zu\n", + size); + continue; + } + + GOTO(out, result); + } + + iov_iter_advance(iter, result); + tot_bytes += result; + file_offset += result; + } +out: + if (iov_iter_rw(iter) == READ) + mutex_unlock(&inode->i_mutex); + + if (tot_bytes > 0) { + struct vvp_io *vio = vvp_env_io(env); + + /* no commit async for direct IO */ + vio->u.write.vui_written += tot_bytes; + } + + cl_env_put(env, &refcheck); + return tot_bytes ? : result; +} +#else /* !HAVE_DIRECTIO_ITER && !HAVE_IOV_ITER_RW */ +static ssize_t +ll_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t file_offset, unsigned long nr_segs) +{ + struct lu_env *env; + struct cl_io *io; + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + ssize_t count = iov_length(iov, nr_segs); + ssize_t tot_bytes = 0, result = 0; + unsigned long seg = 0; + size_t size = MAX_DIO_SIZE; + int refcheck; + ENTRY; /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) RETURN(-EINVAL); - CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%lu (max %lu), " - "offset=%lld=%llx, pages %lu (max %lu)\n", + CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), " + "offset=%lld=%llx, pages %zd (max %lu)\n", PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, file_offset, file_offset, count >> PAGE_CACHE_SHIFT, MAX_DIO_SIZE >> PAGE_CACHE_SHIFT); @@ -406,7 +516,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, LASSERT(io != NULL); for (seg = 0; seg < nr_segs; seg++) { - long iov_left = iov[seg].iov_len; + size_t iov_left = iov[seg].iov_len; unsigned long user_addr = (unsigned long)iov[seg].iov_base; if (rw == READ) { @@ -419,7 +529,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, while (iov_left > 0) { struct page **pages; int page_count, max_pages = 0; - long bytes; + size_t bytes; bytes = min(size, iov_left); page_count = ll_get_user_pages(rw, user_addr, bytes, @@ -427,10 +537,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, if (likely(page_count > 0)) { if (unlikely(page_count < max_pages)) bytes = page_count << PAGE_CACHE_SHIFT; - result = ll_direct_IO_26_seg(env, io, rw, inode, - file->f_mapping, - bytes, file_offset, - pages, page_count); + result = ll_direct_IO_seg(env, io, rw, inode, + bytes, file_offset, + pages, page_count); ll_free_user_pages(pages, max_pages, rw==READ); } else if (page_count == 0) { GOTO(out, result = -EFAULT); @@ -449,7 +558,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, size = ((((size / 2) - 1) | ~PAGE_CACHE_MASK) + 1) & PAGE_CACHE_MASK; - CDEBUG(D_VFSTRACE,"DIO size now %lu\n", + CDEBUG(D_VFSTRACE, "DIO size now %zu\n", size); continue; } @@ -474,6 +583,7 @@ out: cl_env_put(env, &refcheck); RETURN(tot_bytes ? tot_bytes : result); } +#endif /* HAVE_DIRECTIO_ITER || HAVE_IOV_ITER_RW */ /** * Prepare partially written-to page for a write. @@ -688,22 +798,22 @@ static int ll_migratepage(struct address_space *mapping, #ifndef MS_HAS_NEW_AOPS const struct address_space_operations ll_aops = { .readpage = ll_readpage, - .direct_IO = ll_direct_IO_26, - .writepage = ll_writepage, - .writepages = ll_writepages, - .set_page_dirty = __set_page_dirty_nobuffers, - .write_begin = ll_write_begin, - .write_end = ll_write_end, - .invalidatepage = ll_invalidatepage, - .releasepage = (void *)ll_releasepage, + .direct_IO = ll_direct_IO, + .writepage = ll_writepage, + .writepages = ll_writepages, + .set_page_dirty = __set_page_dirty_nobuffers, + .write_begin = ll_write_begin, + .write_end = ll_write_end, + .invalidatepage = ll_invalidatepage, + .releasepage = (void *)ll_releasepage, #ifdef CONFIG_MIGRATION - .migratepage = ll_migratepage, + .migratepage = ll_migratepage, #endif }; #else const struct address_space_operations_ext ll_aops = { .orig_aops.readpage = ll_readpage, - .orig_aops.direct_IO = ll_direct_IO_26, + .orig_aops.direct_IO = ll_direct_IO, .orig_aops.writepage = ll_writepage, .orig_aops.writepages = ll_writepages, .orig_aops.set_page_dirty = __set_page_dirty_nobuffers, -- 1.8.3.1