+#ifdef HAVE_DIRECTIO_2ARGS
+ loff_t file_offset = iocb->ki_pos;
+#endif
+ struct ll_cl_context *lcc;
+ const struct lu_env *env;
+ struct cl_io *io;
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t count = iov_iter_count(iter);
+ ssize_t tot_bytes = 0, result = 0;
+ size_t size = MAX_DIO_SIZE;
+
+ /* Check EOF by ourselves */
+ if (iov_iter_rw(iter) == READ && file_offset >= i_size_read(inode))
+ return 0;
+ /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
+ if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
+ return -EINVAL;
+
+ CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
+ "offset=%lld=%llx, pages %zd (max %lu)\n",
+ PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
+ file_offset, file_offset, count >> PAGE_SHIFT,
+ MAX_DIO_SIZE >> PAGE_SHIFT);
+
+ /* Check that all user buffers are aligned as well */
+ if (iov_iter_alignment(iter) & ~PAGE_MASK)
+ return -EINVAL;
+
+ lcc = ll_cl_find(file);
+ if (lcc == NULL)
+ RETURN(-EIO);
+
+ env = lcc->lcc_env;
+ LASSERT(!IS_ERR(env));
+ io = lcc->lcc_io;
+ LASSERT(io != NULL);
+
+ /* 0. Need locking between buffered and direct access. and race with
+ * size changing by concurrent truncates and writes.
+ * 1. Need inode mutex to operate transient pages.
+ */
+ if (iov_iter_rw(iter) == READ)
+ inode_lock(inode);
+
+ while (iov_iter_count(iter)) {
+ struct page **pages;
+ size_t offs;
+
+ count = min_t(size_t, iov_iter_count(iter), size);
+ if (iov_iter_rw(iter) == READ) {
+ if (file_offset >= i_size_read(inode))
+ break;
+
+ if (file_offset + count > i_size_read(inode))
+ count = i_size_read(inode) - file_offset;
+ }
+
+ result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
+ if (likely(result > 0)) {
+ int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);
+
+ result = ll_direct_IO_seg(env, io, iov_iter_rw(iter),
+ inode, result, file_offset,
+ pages, n);
+ ll_free_user_pages(pages, n,
+ iov_iter_rw(iter) == READ);
+
+ }
+ if (unlikely(result <= 0)) {
+ /* If we can't allocate a large enough buffer
+ * for the request, shrink it to a smaller
+ * PAGE_SIZE multiple and try again.
+ * We should always be able to kmalloc for a
+ * page worth of page pointers = 4MB on i386. */
+ if (result == -ENOMEM &&
+ size > (PAGE_SIZE / sizeof(*pages)) *
+ PAGE_SIZE) {
+ size = ((((size / 2) - 1) |
+ ~PAGE_MASK) + 1) & PAGE_MASK;
+ CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
+ size);
+ continue;
+ }
+
+ GOTO(out, result);
+ }
+
+ iov_iter_advance(iter, result);
+ tot_bytes += result;
+ file_offset += result;
+ }
+out:
+ if (iov_iter_rw(iter) == READ)
+ inode_unlock(inode);
+
+ if (tot_bytes > 0) {
+ struct vvp_io *vio = vvp_env_io(env);
+
+ /* no commit async for direct IO */
+ vio->u.write.vui_written += tot_bytes;
+ }
+
+ return tot_bytes ? : result;
+}
+#else /* !HAVE_DIRECTIO_ITER && !HAVE_IOV_ITER_RW */
+
+static inline int ll_get_user_pages(int rw, unsigned long user_addr,
+ size_t size, struct page ***pages,
+ int *max_pages)
+{
+ int result = -ENOMEM;
+
+ /* set an arbitrary limit to prevent arithmetic overflow */
+ if (size > MAX_DIRECTIO_SIZE) {
+ *pages = NULL;
+ return -EFBIG;
+ }
+
+ *max_pages = (user_addr + size + PAGE_SIZE - 1) >>
+ PAGE_SHIFT;
+ *max_pages -= user_addr >> PAGE_SHIFT;
+
+ OBD_ALLOC_LARGE(*pages, *max_pages * sizeof(**pages));
+ if (*pages) {
+ down_read(¤t->mm->mmap_sem);
+ result = get_user_pages(current, current->mm, user_addr,
+ *max_pages, (rw == READ), 0, *pages,
+ NULL);
+ up_read(¤t->mm->mmap_sem);
+ if (unlikely(result <= 0))
+ OBD_FREE_LARGE(*pages, *max_pages * sizeof(**pages));
+ }
+
+ return result;
+}
+
+static ssize_t
+ll_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t file_offset, unsigned long nr_segs)
+{
+ struct ll_cl_context *lcc;
+ const struct lu_env *env;
+ struct cl_io *io;
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t count = iov_length(iov, nr_segs);
+ ssize_t tot_bytes = 0, result = 0;
+ unsigned long seg = 0;
+ size_t size = MAX_DIO_SIZE;
+ ENTRY;