X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Frw26.c;h=6fb6d537fe4ca5b669b6b24f5cb1593f33f8d22d;hp=d6b619610339d77cc1b91de36f637711283a6b91;hb=1e4d10af3909452b0eee1f99010d80aeb01d42a7;hpb=f71a539c3e41bae750bcb54b0f9159670148176b diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index d6b6196..6fb6d53 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lustre/lustre/llite/rw26.c * @@ -228,10 +227,10 @@ static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter, if (*pages == NULL) return -ENOMEM; - down_read(¤t->mm->mmap_sem); + mmap_read_lock(current->mm); result = get_user_pages(current, current->mm, addr, page_count, rw == READ, 0, *pages, NULL); - up_read(¤t->mm->mmap_sem); + mmap_read_unlock(current->mm); if (unlikely(result != page_count)) { ll_free_user_pages(*pages, page_count); @@ -250,12 +249,12 @@ static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter, /* iov_iter_alignment() is introduced in 3.16 similar to HAVE_DIO_ITER */ #if defined(HAVE_DIO_ITER) -static unsigned long ll_iov_iter_alignment(const struct iov_iter *i) +static unsigned long iov_iter_alignment_vfs(const struct iov_iter *i) { return iov_iter_alignment(i); } #else /* copied from alignment_iovec() */ -static unsigned long ll_iov_iter_alignment(const struct iov_iter *i) +static unsigned long iov_iter_alignment_vfs(const struct iov_iter *i) { const struct iovec *iov = i->iov; unsigned long res; @@ -282,6 +281,35 @@ static unsigned long ll_iov_iter_alignment(const struct iov_iter *i) } #endif +/* + * Lustre could relax a bit for alignment, io count is not + * necessary page alignment. + */ +static unsigned long ll_iov_iter_alignment(struct iov_iter *i) +{ + size_t orig_size = i->count; + size_t count = orig_size & ~PAGE_MASK; + unsigned long res; + + if (!count) + return iov_iter_alignment_vfs(i); + + if (orig_size > PAGE_SIZE) { + iov_iter_truncate(i, orig_size - count); + res = iov_iter_alignment_vfs(i); + iov_iter_reexpand(i, orig_size); + + return res; + } + + res = iov_iter_alignment_vfs(i); + /* start address is page aligned */ + if ((res & ~PAGE_MASK) == orig_size) + return PAGE_SIZE; + + return res; +} + /** direct IO pages */ struct ll_dio_pages { struct cl_dio_aio *ldp_aio; @@ -329,16 +357,28 @@ ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size, } page->cp_sync_io = anchor; - cl_2queue_add(queue, page); + if (inode && IS_ENCRYPTED(inode)) { + /* In case of Direct IO on encrypted file, we need to + * add a reference to the inode on the cl_page. + * This info is required by llcrypt to proceed + * to encryption/decryption. + * This is safe because we know these pages are private + * to the thread doing the Direct IO. + */ + page->cp_inode = inode; + } + /* We keep the refcount from cl_page_find, so we don't need + * another one here + */ + cl_2queue_add(queue, page, false); /* * Set page clip to tell transfer formation engine * that page has to be sent even if it is beyond KMS. */ - cl_page_clip(env, page, 0, min(size, page_size)); + if (size < page_size) + cl_page_clip(env, page, 0, size); ++io_pages; - /* drop the reference count for cl_page_find */ - cl_page_put(env, page); offset += page_size; size -= page_size; } @@ -346,6 +386,11 @@ ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size, int iot = rw == READ ? CRT_READ : CRT_WRITE; atomic_add(io_pages, &anchor->csi_sync_nr); + /* + * Avoid out-of-order execution of adding inflight + * modifications count and io submit. + */ + smp_mb(); rc = cl_io_submit_rw(env, io, iot, queue); if (rc == 0) { cl_page_list_splice(&queue->c2_qout, @@ -398,18 +443,15 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw) size_t count = iov_iter_count(iter); ssize_t tot_bytes = 0, result = 0; loff_t file_offset = iocb->ki_pos; - - /* if file is encrypted, return 0 so that we fall back to buffered IO */ - if (IS_ENCRYPTED(inode)) - return 0; + struct vvp_io *vio; /* Check EOF by ourselves */ if (rw == READ && file_offset >= i_size_read(inode)) return 0; /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ - if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) - return -EINVAL; + if (file_offset & ~PAGE_MASK) + RETURN(-EINVAL); CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), " "offset=%lld=%llx, pages %zd (max %lu)\n", @@ -419,7 +461,7 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw) /* Check that all user buffers are aligned as well */ if (ll_iov_iter_alignment(iter) & ~PAGE_MASK) - return -EINVAL; + RETURN(-EINVAL); lcc = ll_cl_find(file); if (lcc == NULL) @@ -427,19 +469,13 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw) env = lcc->lcc_env; LASSERT(!IS_ERR(env)); + vio = vvp_env_io(env); io = lcc->lcc_io; LASSERT(io != NULL); - aio = cl_aio_alloc(iocb); - if (!aio) - RETURN(-ENOMEM); - - /* 0. Need locking between buffered and direct access. and race with - * size changing by concurrent truncates and writes. - * 1. Need inode mutex to operate transient pages. - */ - if (rw == READ) - inode_lock(inode); + aio = io->ci_aio; + LASSERT(aio); + LASSERT(aio->cda_iocb == iocb); while (iov_iter_count(iter)) { struct ll_dio_pages pvec = { .ldp_aio = aio }; @@ -476,30 +512,36 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw) } out: - aio->cda_bytes = tot_bytes; - cl_sync_io_note(env, &aio->cda_sync, result); + aio->cda_bytes += tot_bytes; + + if (rw == WRITE) + vio->u.readwrite.vui_written += tot_bytes; + else + vio->u.readwrite.vui_read += tot_bytes; - if (is_sync_kiocb(iocb)) { + /* We cannot do async submission - for AIO or regular DIO - unless + * lockless because it causes us to release the lock early. + * + * There are also several circumstances in which we must disable + * parallel DIO, so we check if it is enabled. + * + * The check for "is_sync_kiocb" excludes AIO, which does not need to + * be disabled in these situations. + */ + if (io->ci_dio_lock || (is_sync_kiocb(iocb) && !io->ci_parallel_dio)) { ssize_t rc2; - rc2 = cl_sync_io_wait(env, &aio->cda_sync, 0); + /* Wait here rather than doing async submission */ + rc2 = cl_sync_io_wait_recycle(env, &aio->cda_sync, 0, 0); if (result == 0 && rc2) result = rc2; - if (result == 0) { - struct vvp_io *vio = vvp_env_io(env); - /* no commit async for direct IO */ - vio->u.write.vui_written += tot_bytes; + if (result == 0) result = tot_bytes; - } - cl_aio_free(aio); - } else { + } else if (result == 0) { result = -EIOCBQUEUED; } - if (rw == READ) - inode_unlock(inode); - return result; } @@ -649,12 +691,12 @@ static int ll_write_begin(struct file *file, struct address_space *mapping, GOTO(out, result = -EBUSY); /** - * Direct read can fall back to buffered read, but DIO is done + * Direct write can fall back to buffered read, but DIO is done * with lockless i/o, and buffered requires LDLM locking, so * in this case we must restart without lockless. */ - if (!io->ci_ignore_lockless) { - io->ci_ignore_lockless = 1; + if (!io->ci_dio_lock) { + io->ci_dio_lock = 1; io->ci_need_restart = 1; GOTO(out, result = -ENOLCK); } @@ -666,7 +708,7 @@ again: if (unlikely(vmpage == NULL || PageDirty(vmpage) || PageWriteback(vmpage))) { struct vvp_io *vio = vvp_env_io(env); - struct cl_page_list *plist = &vio->u.write.vui_queue; + struct cl_page_list *plist = &vio->u.readwrite.vui_queue; /* if the page is already in dirty cache, we have to commit * the pages right now; otherwise, it may cause deadlock @@ -827,17 +869,17 @@ static int ll_write_end(struct file *file, struct address_space *mapping, LASSERT(cl_page_is_owned(page, io)); if (copied > 0) { - struct cl_page_list *plist = &vio->u.write.vui_queue; + struct cl_page_list *plist = &vio->u.readwrite.vui_queue; lcc->lcc_page = NULL; /* page will be queued */ /* Add it into write queue */ - cl_page_list_add(plist, page); + cl_page_list_add(plist, page, true); if (plist->pl_nr == 1) /* first page */ - vio->u.write.vui_from = from; + vio->u.readwrite.vui_from = from; else LASSERT(from == 0); - vio->u.write.vui_to = from + copied; + vio->u.readwrite.vui_to = from + copied; /* To address the deadlock in balance_dirty_pages() where * this dirty page may be written back in the same thread. */