X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Frw26.c;h=6fb6d537fe4ca5b669b6b24f5cb1593f33f8d22d;hp=d6b619610339d77cc1b91de36f637711283a6b91;hb=1e4d10af3909452b0eee1f99010d80aeb01d42a7;hpb=f71a539c3e41bae750bcb54b0f9159670148176b

diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c
index d6b6196..6fb6d53 100644
--- a/lustre/llite/rw26.c
+++ b/lustre/llite/rw26.c
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/lustre/llite/rw26.c
  *
@@ -228,10 +227,10 @@ static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter,
 	if (*pages == NULL)
 		return -ENOMEM;
 
-	down_read(&current->mm->mmap_sem);
+	mmap_read_lock(current->mm);
 	result = get_user_pages(current, current->mm, addr, page_count,
 				rw == READ, 0, *pages, NULL);
-	up_read(&current->mm->mmap_sem);
+	mmap_read_unlock(current->mm);
 
 	if (unlikely(result != page_count)) {
 		ll_free_user_pages(*pages, page_count);
@@ -250,12 +249,12 @@ static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter,
 
 /* iov_iter_alignment() is introduced in 3.16 similar to HAVE_DIO_ITER */
 #if defined(HAVE_DIO_ITER)
-static unsigned long ll_iov_iter_alignment(const struct iov_iter *i)
+static unsigned long iov_iter_alignment_vfs(const struct iov_iter *i)
 {
 	return iov_iter_alignment(i);
 }
 #else /* copied from alignment_iovec() */
-static unsigned long ll_iov_iter_alignment(const struct iov_iter *i)
+static unsigned long iov_iter_alignment_vfs(const struct iov_iter *i)
 {
 	const struct iovec *iov = i->iov;
 	unsigned long res;
@@ -282,6 +281,35 @@ static unsigned long ll_iov_iter_alignment(const struct iov_iter *i)
 }
 #endif
 
+/*
+ * Lustre could relax a bit for alignment, io count is not
+ * necessary page alignment.
+ */
+static unsigned long ll_iov_iter_alignment(struct iov_iter *i)
+{
+	size_t orig_size = i->count;
+	size_t count = orig_size & ~PAGE_MASK;
+	unsigned long res;
+
+	if (!count)
+		return iov_iter_alignment_vfs(i);
+
+	if (orig_size > PAGE_SIZE) {
+		iov_iter_truncate(i, orig_size - count);
+		res = iov_iter_alignment_vfs(i);
+		iov_iter_reexpand(i, orig_size);
+
+		return res;
+	}
+
+	res = iov_iter_alignment_vfs(i);
+	/* start address is page aligned */
+	if ((res & ~PAGE_MASK) == orig_size)
+		return PAGE_SIZE;
+
+	return res;
+}
+
 /** direct IO pages */
 struct ll_dio_pages {
 	struct cl_dio_aio	*ldp_aio;
@@ -329,16 +357,28 @@ ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size,
 		}
 
 		page->cp_sync_io = anchor;
-		cl_2queue_add(queue, page);
+		if (inode && IS_ENCRYPTED(inode)) {
+			/* In case of Direct IO on encrypted file, we need to
+			 * add a reference to the inode on the cl_page.
+			 * This info is required by llcrypt to proceed
+			 * to encryption/decryption.
+			 * This is safe because we know these pages are private
+			 * to the thread doing the Direct IO.
+			 */
+			page->cp_inode = inode;
+		}
+		/* We keep the refcount from cl_page_find, so we don't need
+		 * another one here
+		 */
+		cl_2queue_add(queue, page, false);
 		/*
 		 * Set page clip to tell transfer formation engine
 		 * that page has to be sent even if it is beyond KMS.
 		 */
-		cl_page_clip(env, page, 0, min(size, page_size));
+		if (size < page_size)
+			cl_page_clip(env, page, 0, size);
 		++io_pages;
 
-		/* drop the reference count for cl_page_find */
-		cl_page_put(env, page);
 		offset += page_size;
 		size -= page_size;
 	}
@@ -346,6 +386,11 @@ ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size,
 		int iot = rw == READ ? CRT_READ : CRT_WRITE;
 
 		atomic_add(io_pages, &anchor->csi_sync_nr);
+		/*
+		 * Avoid out-of-order execution of adding inflight
+		 * modifications count and io submit.
+		 */
+		smp_mb();
 		rc = cl_io_submit_rw(env, io, iot, queue);
 		if (rc == 0) {
 			cl_page_list_splice(&queue->c2_qout,
@@ -398,18 +443,15 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
 	size_t count = iov_iter_count(iter);
 	ssize_t tot_bytes = 0, result = 0;
 	loff_t file_offset = iocb->ki_pos;
-
-	/* if file is encrypted, return 0 so that we fall back to buffered IO */
-	if (IS_ENCRYPTED(inode))
-		return 0;
+	struct vvp_io *vio;
 
 	/* Check EOF by ourselves */
 	if (rw == READ && file_offset >= i_size_read(inode))
 		return 0;
 
 	/* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
-	if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
-		return -EINVAL;
+	if (file_offset & ~PAGE_MASK)
+		RETURN(-EINVAL);
 
 	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
 	       "offset=%lld=%llx, pages %zd (max %lu)\n",
@@ -419,7 +461,7 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
 
 	/* Check that all user buffers are aligned as well */
 	if (ll_iov_iter_alignment(iter) & ~PAGE_MASK)
-		return -EINVAL;
+		RETURN(-EINVAL);
 
 	lcc = ll_cl_find(file);
 	if (lcc == NULL)
@@ -427,19 +469,13 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
 
 	env = lcc->lcc_env;
 	LASSERT(!IS_ERR(env));
+	vio = vvp_env_io(env);
 	io = lcc->lcc_io;
 	LASSERT(io != NULL);
 
-	aio = cl_aio_alloc(iocb);
-	if (!aio)
-		RETURN(-ENOMEM);
-
-	/* 0. Need locking between buffered and direct access. and race with
-	 *    size changing by concurrent truncates and writes.
-	 * 1. Need inode mutex to operate transient pages.
-	 */
-	if (rw == READ)
-		inode_lock(inode);
+	aio = io->ci_aio;
+	LASSERT(aio);
+	LASSERT(aio->cda_iocb == iocb);
 
 	while (iov_iter_count(iter)) {
 		struct ll_dio_pages pvec = { .ldp_aio = aio };
@@ -476,30 +512,36 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
 	}
 
 out:
-	aio->cda_bytes = tot_bytes;
-	cl_sync_io_note(env, &aio->cda_sync, result);
+	aio->cda_bytes += tot_bytes;
+
+	if (rw == WRITE)
+		vio->u.readwrite.vui_written += tot_bytes;
+	else
+		vio->u.readwrite.vui_read += tot_bytes;
 
-	if (is_sync_kiocb(iocb)) {
+	/* We cannot do async submission - for AIO or regular DIO - unless
+	 * lockless because it causes us to release the lock early.
+	 *
+	 * There are also several circumstances in which we must disable
+	 * parallel DIO, so we check if it is enabled.
+	 *
+	 * The check for "is_sync_kiocb" excludes AIO, which does not need to
+	 * be disabled in these situations.
+	 */
+	if (io->ci_dio_lock || (is_sync_kiocb(iocb) && !io->ci_parallel_dio)) {
 		ssize_t rc2;
 
-		rc2 = cl_sync_io_wait(env, &aio->cda_sync, 0);
+		/* Wait here rather than doing async submission */
+		rc2 = cl_sync_io_wait_recycle(env, &aio->cda_sync, 0, 0);
 		if (result == 0 && rc2)
 			result = rc2;
 
-		if (result == 0) {
-			struct vvp_io *vio = vvp_env_io(env);
-			/* no commit async for direct IO */
-			vio->u.write.vui_written += tot_bytes;
+		if (result == 0)
 			result = tot_bytes;
-		}
-		cl_aio_free(aio);
-	} else {
+	} else if (result == 0) {
 		result = -EIOCBQUEUED;
 	}
 
-	if (rw == READ)
-		inode_unlock(inode);
-
 	return result;
 }
 
@@ -649,12 +691,12 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
 			GOTO(out, result = -EBUSY);
 
 		/**
-		 * Direct read can fall back to buffered read, but DIO is done
+		 * Direct write can fall back to buffered read, but DIO is done
 		 * with lockless i/o, and buffered requires LDLM locking, so
 		 * in this case we must restart without lockless.
 		 */
-		if (!io->ci_ignore_lockless) {
-			io->ci_ignore_lockless = 1;
+		if (!io->ci_dio_lock) {
+			io->ci_dio_lock = 1;
 			io->ci_need_restart = 1;
 			GOTO(out, result = -ENOLCK);
 		}
@@ -666,7 +708,7 @@ again:
 	if (unlikely(vmpage == NULL ||
 		     PageDirty(vmpage) || PageWriteback(vmpage))) {
 		struct vvp_io *vio = vvp_env_io(env);
-		struct cl_page_list *plist = &vio->u.write.vui_queue;
+		struct cl_page_list *plist = &vio->u.readwrite.vui_queue;
 
                 /* if the page is already in dirty cache, we have to commit
 		 * the pages right now; otherwise, it may cause deadlock
@@ -827,17 +869,17 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
 
 	LASSERT(cl_page_is_owned(page, io));
 	if (copied > 0) {
-		struct cl_page_list *plist = &vio->u.write.vui_queue;
+		struct cl_page_list *plist = &vio->u.readwrite.vui_queue;
 
 		lcc->lcc_page = NULL; /* page will be queued */
 
 		/* Add it into write queue */
-		cl_page_list_add(plist, page);
+		cl_page_list_add(plist, page, true);
 		if (plist->pl_nr == 1) /* first page */
-			vio->u.write.vui_from = from;
+			vio->u.readwrite.vui_from = from;
 		else
 			LASSERT(from == 0);
-		vio->u.write.vui_to = from + copied;
+		vio->u.readwrite.vui_to = from + copied;
 
 		/* To address the deadlock in balance_dirty_pages() where
 		 * this dirty page may be written back in the same thread. */