Whamcloud - gitweb
LU-6215 llite: remove obsolete conditional code
[fs/lustre-release.git] / lustre / llite / rw26.c
index 6aedd06..b362532 100644 (file)
@@ -88,7 +88,7 @@ static void ll_invalidatepage(struct page *vmpage,
         struct cl_page   *page;
         struct cl_object *obj;
 
-        int refcheck;
+       __u16 refcheck;
 
         LASSERT(PageLocked(vmpage));
         LASSERT(!PageWriteback(vmpage));
@@ -186,53 +186,6 @@ static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask)
 
 #define MAX_DIRECTIO_SIZE 2*1024*1024*1024UL
 
-static inline int ll_get_user_pages(int rw, unsigned long user_addr,
-                                    size_t size, struct page ***pages,
-                                    int *max_pages)
-{
-        int result = -ENOMEM;
-
-        /* set an arbitrary limit to prevent arithmetic overflow */
-        if (size > MAX_DIRECTIO_SIZE) {
-                *pages = NULL;
-                return -EFBIG;
-        }
-
-       *max_pages = (user_addr + size + PAGE_CACHE_SIZE - 1) >>
-                    PAGE_CACHE_SHIFT;
-       *max_pages -= user_addr >> PAGE_CACHE_SHIFT;
-
-        OBD_ALLOC_LARGE(*pages, *max_pages * sizeof(**pages));
-        if (*pages) {
-                down_read(&current->mm->mmap_sem);
-                result = get_user_pages(current, current->mm, user_addr,
-                                        *max_pages, (rw == READ), 0, *pages,
-                                        NULL);
-                up_read(&current->mm->mmap_sem);
-                if (unlikely(result <= 0))
-                        OBD_FREE_LARGE(*pages, *max_pages * sizeof(**pages));
-        }
-
-        return result;
-}
-
-/*  ll_free_user_pages - tear down page struct array
- *  @pages: array of page struct pointers underlying target buffer */
-static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
-{
-        int i;
-
-        for (i = 0; i < npages; i++) {
-                if (pages[i] == NULL)
-                        break;
-                if (do_dirty)
-                        set_page_dirty_lock(pages[i]);
-                page_cache_release(pages[i]);
-        }
-
-        OBD_FREE_LARGE(pages, npages * sizeof(*pages));
-}
-
 ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
                            int rw, struct inode *inode,
                            struct ll_dio_pages *pv)
@@ -339,20 +292,40 @@ ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
 }
 EXPORT_SYMBOL(ll_direct_rw_pages);
 
-static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
-                                   int rw, struct inode *inode,
-                                   struct address_space *mapping,
-                                   size_t size, loff_t file_offset,
-                                   struct page **pages, int page_count)
+static ssize_t
+ll_direct_IO_seg(const struct lu_env *env, struct cl_io *io, int rw,
+                struct inode *inode, size_t size, loff_t file_offset,
+                struct page **pages, int page_count)
 {
-    struct ll_dio_pages pvec = { .ldp_pages        = pages,
-                                 .ldp_nr           = page_count,
-                                 .ldp_size         = size,
-                                 .ldp_offsets      = NULL,
-                                 .ldp_start_offset = file_offset
-                               };
-
-    return ll_direct_rw_pages(env, io, rw, inode, &pvec);
+       struct ll_dio_pages pvec = { .ldp_pages         = pages,
+                                    .ldp_nr            = page_count,
+                                    .ldp_size          = size,
+                                    .ldp_offsets       = NULL,
+                                    .ldp_start_offset  = file_offset
+                                  };
+
+       return ll_direct_rw_pages(env, io, rw, inode, &pvec);
+}
+
+/*  ll_free_user_pages - tear down page struct array
+ *  @pages: array of page struct pointers underlying target buffer */
+static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+       int i;
+
+       for (i = 0; i < npages; i++) {
+               if (pages[i] == NULL)
+                       break;
+               if (do_dirty)
+                       set_page_dirty_lock(pages[i]);
+               page_cache_release(pages[i]);
+       }
+
+#if defined(HAVE_DIRECTIO_ITER) || defined(HAVE_IOV_ITER_RW)
+       kvfree(pages);
+#else
+       OBD_FREE_LARGE(pages, npages * sizeof(*pages));
+#endif
 }
 
 #ifdef KMALLOC_MAX_SIZE
@@ -368,39 +341,177 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
  * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */
 #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
                      ~(DT_MAX_BRW_SIZE - 1))
-static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
-                               const struct iovec *iov, loff_t file_offset,
-                               unsigned long nr_segs)
+
+#ifndef HAVE_IOV_ITER_RW
+# define iov_iter_rw(iter)     rw
+#endif
+
+#if defined(HAVE_DIRECTIO_ITER) || defined(HAVE_IOV_ITER_RW)
+static ssize_t
+ll_direct_IO(
+# ifndef HAVE_IOV_ITER_RW
+            int rw,
+# endif
+            struct kiocb *iocb, struct iov_iter *iter,
+            loff_t file_offset)
+{
+       struct lu_env *env;
+       struct cl_io *io;
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       ssize_t count = iov_iter_count(iter);
+       ssize_t tot_bytes = 0, result = 0;
+       size_t size = MAX_DIO_SIZE;
+       __u16 refcheck;
+
+       /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
+       if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
+               return -EINVAL;
+
+       CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
+              "offset=%lld=%llx, pages %zd (max %lu)\n",
+              PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
+              file_offset, file_offset, count >> PAGE_CACHE_SHIFT,
+              MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
+
+       /* Check that all user buffers are aligned as well */
+       if (iov_iter_alignment(iter) & ~PAGE_MASK)
+               return -EINVAL;
+
+       env = cl_env_get(&refcheck);
+       LASSERT(!IS_ERR(env));
+       io = vvp_env_io(env)->vui_cl.cis_io;
+       LASSERT(io != NULL);
+
+       /* 0. Need locking between buffered and direct access. and race with
+        *    size changing by concurrent truncates and writes.
+        * 1. Need inode mutex to operate transient pages.
+        */
+       if (iov_iter_rw(iter) == READ)
+               mutex_lock(&inode->i_mutex);
+
+       while (iov_iter_count(iter)) {
+               struct page **pages;
+               size_t offs;
+
+               count = min_t(size_t, iov_iter_count(iter), size);
+               if (iov_iter_rw(iter) == READ) {
+                       if (file_offset >= i_size_read(inode))
+                               break;
+
+                       if (file_offset + count > i_size_read(inode))
+                               count = i_size_read(inode) - file_offset;
+               }
+
+               result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
+               if (likely(result > 0)) {
+                       int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);
+
+                       result = ll_direct_IO_seg(env, io, iov_iter_rw(iter),
+                                                 inode, result, file_offset,
+                                                 pages, n);
+                       ll_free_user_pages(pages, n,
+                                          iov_iter_rw(iter) == READ);
+
+               }
+               if (unlikely(result <= 0)) {
+                       /* If we can't allocate a large enough buffer
+                        * for the request, shrink it to a smaller
+                        * PAGE_SIZE multiple and try again.
+                        * We should always be able to kmalloc for a
+                        * page worth of page pointers = 4MB on i386. */
+                       if (result == -ENOMEM &&
+                           size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
+                                   PAGE_CACHE_SIZE) {
+                               size = ((((size / 2) - 1) |
+                                       ~PAGE_MASK) + 1) & PAGE_MASK;
+                               CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
+                                      size);
+                               continue;
+                       }
+
+                       GOTO(out, result);
+               }
+
+               iov_iter_advance(iter, result);
+               tot_bytes += result;
+               file_offset += result;
+       }
+out:
+       if (iov_iter_rw(iter) == READ)
+               mutex_unlock(&inode->i_mutex);
+
+       if (tot_bytes > 0) {
+               struct vvp_io *vio = vvp_env_io(env);
+
+               /* no commit async for direct IO */
+               vio->u.write.vui_written += tot_bytes;
+       }
+
+       cl_env_put(env, &refcheck);
+       return tot_bytes ? : result;
+}
+#else /* !HAVE_DIRECTIO_ITER && !HAVE_IOV_ITER_RW */
+
+static inline int ll_get_user_pages(int rw, unsigned long user_addr,
+                                   size_t size, struct page ***pages,
+                                   int *max_pages)
 {
-        struct lu_env *env;
-        struct cl_io *io;
-        struct file *file = iocb->ki_filp;
-        struct inode *inode = file->f_mapping->host;
-        long count = iov_length(iov, nr_segs);
-        long tot_bytes = 0, result = 0;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        unsigned long seg = 0;
-        long size = MAX_DIO_SIZE;
-        int refcheck;
-        ENTRY;
-
-       if (!lli->lli_has_smd)
-                RETURN(-EBADF);
+       int result = -ENOMEM;
+
+       /* set an arbitrary limit to prevent arithmetic overflow */
+       if (size > MAX_DIRECTIO_SIZE) {
+               *pages = NULL;
+               return -EFBIG;
+       }
+
+       *max_pages = (user_addr + size + PAGE_CACHE_SIZE - 1) >>
+                     PAGE_CACHE_SHIFT;
+       *max_pages -= user_addr >> PAGE_CACHE_SHIFT;
+
+       OBD_ALLOC_LARGE(*pages, *max_pages * sizeof(**pages));
+       if (*pages) {
+               down_read(&current->mm->mmap_sem);
+               result = get_user_pages(current, current->mm, user_addr,
+                                       *max_pages, (rw == READ), 0, *pages,
+                                       NULL);
+               up_read(&current->mm->mmap_sem);
+               if (unlikely(result <= 0))
+                       OBD_FREE_LARGE(*pages, *max_pages * sizeof(**pages));
+       }
+
+       return result;
+}
+
+static ssize_t
+ll_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+            loff_t file_offset, unsigned long nr_segs)
+{
+       struct lu_env *env;
+       struct cl_io *io;
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       ssize_t count = iov_length(iov, nr_segs);
+       ssize_t tot_bytes = 0, result = 0;
+       unsigned long seg = 0;
+       size_t size = MAX_DIO_SIZE;
+       __u16 refcheck;
+       ENTRY;
 
         /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
-        if ((file_offset & ~CFS_PAGE_MASK) || (count & ~CFS_PAGE_MASK))
+       if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
                 RETURN(-EINVAL);
 
-       CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%lu (max %lu), "
-              "offset=%lld=%llx, pages %lu (max %lu)\n",
+       CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
+              "offset=%lld=%llx, pages %zd (max %lu)\n",
               PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
               file_offset, file_offset, count >> PAGE_CACHE_SHIFT,
               MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
 
         /* Check that all user buffers are aligned as well */
         for (seg = 0; seg < nr_segs; seg++) {
-                if (((unsigned long)iov[seg].iov_base & ~CFS_PAGE_MASK) ||
-                    (iov[seg].iov_len & ~CFS_PAGE_MASK))
+               if (((unsigned long)iov[seg].iov_base & ~PAGE_MASK) ||
+                   (iov[seg].iov_len & ~PAGE_MASK))
                         RETURN(-EINVAL);
         }
 
@@ -410,7 +521,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
         LASSERT(io != NULL);
 
         for (seg = 0; seg < nr_segs; seg++) {
-                long iov_left = iov[seg].iov_len;
+               size_t iov_left = iov[seg].iov_len;
                 unsigned long user_addr = (unsigned long)iov[seg].iov_base;
 
                 if (rw == READ) {
@@ -423,7 +534,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
                 while (iov_left > 0) {
                         struct page **pages;
                         int page_count, max_pages = 0;
-                        long bytes;
+                       size_t bytes;
 
                         bytes = min(size, iov_left);
                         page_count = ll_get_user_pages(rw, user_addr, bytes,
@@ -431,10 +542,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
                         if (likely(page_count > 0)) {
                                 if (unlikely(page_count <  max_pages))
                                        bytes = page_count << PAGE_CACHE_SHIFT;
-                                result = ll_direct_IO_26_seg(env, io, rw, inode,
-                                                             file->f_mapping,
-                                                             bytes, file_offset,
-                                                             pages, page_count);
+                               result = ll_direct_IO_seg(env, io, rw, inode,
+                                                         bytes, file_offset,
+                                                         pages, page_count);
                                 ll_free_user_pages(pages, max_pages, rw==READ);
                         } else if (page_count == 0) {
                                 GOTO(out, result = -EFAULT);
@@ -451,9 +561,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
                                    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
                                           PAGE_CACHE_SIZE) {
                                         size = ((((size / 2) - 1) |
-                                                 ~CFS_PAGE_MASK) + 1) &
-                                                CFS_PAGE_MASK;
-                                        CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
+                                                ~PAGE_CACHE_MASK) + 1) &
+                                               PAGE_CACHE_MASK;
+                                       CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                                                size);
                                         continue;
                                 }
@@ -478,6 +588,7 @@ out:
        cl_env_put(env, &refcheck);
        RETURN(tot_bytes ? tot_bytes : result);
 }
+#endif /* HAVE_DIRECTIO_ITER || HAVE_IOV_ITER_RW */
 
 /**
  * Prepare partially written-to page for a write.
@@ -485,7 +596,7 @@ out:
 static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io,
                                   struct cl_page *pg)
 {
-       struct cl_attr *attr   = ccc_env_thread_attr(env);
+       struct cl_attr *attr   = vvp_env_thread_attr(env);
        struct cl_object *obj  = io->ci_obj;
        struct vvp_page *vpg   = cl_object_page_slice(obj, pg);
        loff_t          offset = cl_offset(obj, vvp_index(vpg));
@@ -533,8 +644,10 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
        CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);
 
        lcc = ll_cl_find(file);
-       if (lcc == NULL)
+       if (lcc == NULL) {
+               io = NULL;
                GOTO(out, result = -EIO);
+       }
 
        env = lcc->lcc_env;
        io  = lcc->lcc_io;
@@ -605,6 +718,8 @@ out:
                        unlock_page(vmpage);
                        page_cache_release(vmpage);
                }
+               if (io)
+                       io->ci_result = result;
        } else {
                *pagep = vmpage;
                *fsdata = lcc;
@@ -670,9 +785,11 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
                unplug = true;
        }
        if (unplug ||
-           file->f_flags & O_SYNC || IS_SYNC(file->f_dentry->d_inode))
+           file->f_flags & O_SYNC || IS_SYNC(file->f_path.dentry->d_inode))
                result = vvp_io_write_commit(env, io);
 
+       if (result < 0)
+               io->ci_result = result;
        RETURN(result >= 0 ? copied : result);
 }
 
@@ -689,34 +806,17 @@ static int ll_migratepage(struct address_space *mapping,
 }
 #endif
 
-#ifndef MS_HAS_NEW_AOPS
 const struct address_space_operations ll_aops = {
        .readpage       = ll_readpage,
-        .direct_IO      = ll_direct_IO_26,
-        .writepage      = ll_writepage,
-       .writepages     = ll_writepages,
-        .set_page_dirty = __set_page_dirty_nobuffers,
-        .write_begin    = ll_write_begin,
-        .write_end      = ll_write_end,
-        .invalidatepage = ll_invalidatepage,
-        .releasepage    = (void *)ll_releasepage,
+       .direct_IO      = ll_direct_IO,
+       .writepage      = ll_writepage,
+       .writepages     = ll_writepages,
+       .set_page_dirty = __set_page_dirty_nobuffers,
+       .write_begin    = ll_write_begin,
+       .write_end      = ll_write_end,
+       .invalidatepage = ll_invalidatepage,
+       .releasepage    = (void *)ll_releasepage,
 #ifdef CONFIG_MIGRATION
-        .migratepage    = ll_migratepage,
+       .migratepage    = ll_migratepage,
 #endif
 };
-#else
-const struct address_space_operations_ext ll_aops = {
-       .orig_aops.readpage             = ll_readpage,
-       .orig_aops.direct_IO            = ll_direct_IO_26,
-       .orig_aops.writepage            = ll_writepage,
-       .orig_aops.writepages           = ll_writepages,
-       .orig_aops.set_page_dirty       = __set_page_dirty_nobuffers,
-       .orig_aops.invalidatepage       = ll_invalidatepage,
-       .orig_aops.releasepage          = ll_releasepage,
-#ifdef CONFIG_MIGRATION
-       .orig_aops.migratepage          = ll_migratepage,
-#endif
-       .write_begin                    = ll_write_begin,
-       .write_end                      = ll_write_end
-};
-#endif