Whamcloud - gitweb
LU-13799 llite: Move free user pages
authorPatrick Farrell <pfarrell@whamcloud.com>
Wed, 19 Jan 2022 15:46:47 +0000 (10:46 -0500)
committerAndreas Dilger <adilger@whamcloud.com>
Wed, 9 Mar 2022 17:23:51 +0000 (17:23 +0000)
It is incorrect to release our reference on the user pages
before we're done with them - We need to keep it until the
i/o is complete, otherwise we access them after releasing
our reference.  This has not caused any known bugs so far,
but it's still wrong.

So only drop these references when we free the aio struct,
which is only freed once i/o is complete.

Also rename free_user_pages to release_user_pages, because
it does not free them - it just releases our reference.

This also helps performance by moving free_user_pages to
the daemon threads.  This is a 5-10% boost.

This patch reduces i/o time in ms/GiB by:
Write: 18 ms/GiB
Read: 19 ms/GiB

Totals:
Write: 180 ms/GiB
Read: 178 ms/GiB

mpirun -np 1  $IOR -w -r -t 64M -b 64G -o ./iorfile --posix.odirect

With previous patches in series:
write     5183 MiB/s
read      5201 MiB/s

Plus this patch:
write        5702 MiB/s
read         5756 MiB/s

Lustre-change: https://review.whamcloud.com/39443
Lustre-commit: 7f9b8465bc1125e51aa29cdc27db9a9d6fdc0b89

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I5cf2201e5fd4eeee5b4c996de51d3a6a5394ae34
Reviewed-on: https://review.whamcloud.com/44685
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/cl_object.h
lustre/llite/rw26.c
lustre/obdclass/cl_io.c

index 0e35e32..a7a04dc 100644 (file)
@@ -2607,6 +2607,19 @@ struct cl_sync_io {
        struct cl_dio_aio      *csi_aio;
 };
 
+/** direct IO pages */
+struct ll_dio_pages {
+       /*
+        * page array to be written. we don't support
+        * partial pages except the last one.
+        */
+       struct page             **ldp_pages;
+       /** # of pages in the array. */
+       size_t                  ldp_count;
+       /* the file offset of the first page. */
+       loff_t                  ldp_file_offset;
+};
+
 /** To support Direct AIO */
 struct cl_dio_aio {
        struct cl_sync_io       cda_sync;
@@ -2615,10 +2628,18 @@ struct cl_dio_aio {
        struct kiocb            *cda_iocb;
        ssize_t                 cda_bytes;
        struct cl_dio_aio       *cda_ll_aio;
+       struct ll_dio_pages     cda_dio_pages;
        unsigned                cda_no_aio_complete:1,
                                cda_no_aio_free:1;
 };
 
+#if defined(HAVE_DIRECTIO_ITER) || defined(HAVE_IOV_ITER_RW) || \
+       defined(HAVE_DIRECTIO_2ARGS)
+#define HAVE_DIO_ITER 1
+#endif
+
+void ll_release_user_pages(struct page **pages, int npages);
+
 /** @} cl_sync_io */
 
 /** \defgroup cl_env cl_env
index 86e0a8f..f189af3 100644 (file)
@@ -163,32 +163,6 @@ static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask)
        return result;
 }
 
-#if defined(HAVE_DIRECTIO_ITER) || defined(HAVE_IOV_ITER_RW) || \
-       defined(HAVE_DIRECTIO_2ARGS)
-#define HAVE_DIO_ITER 1
-#endif
-
-/*
- * ll_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- */
-static void ll_free_user_pages(struct page **pages, int npages)
-{
-       int i;
-
-       for (i = 0; i < npages; i++) {
-               if (!pages[i])
-                       break;
-               put_page(pages[i]);
-       }
-
-#if defined(HAVE_DIO_ITER)
-       kvfree(pages);
-#else
-       OBD_FREE_PTR_ARRAY_LARGE(pages, npages);
-#endif
-}
-
 static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter,
                                struct page ***pages, ssize_t *npages,
                                size_t maxsize)
@@ -234,7 +208,7 @@ static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter,
        mmap_read_unlock(current->mm);
 
        if (unlikely(result != page_count)) {
-               ll_free_user_pages(*pages, page_count);
+               ll_release_user_pages(*pages, page_count);
                *pages = NULL;
 
                if (result >= 0)
@@ -311,28 +285,15 @@ static unsigned long ll_iov_iter_alignment(struct iov_iter *i)
        return res;
 }
 
-/** direct IO pages */
-struct ll_dio_pages {
-       struct cl_dio_aio       *ldp_aio;
-       /*
-        * page array to be written. we don't support
-        * partial pages except the last one.
-        */
-       struct page             **ldp_pages;
-       /** # of pages in the array. */
-       size_t                  ldp_count;
-       /* the file offset of the first page. */
-       loff_t                  ldp_file_offset;
-};
-
 static int
 ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size,
-                  int rw, struct inode *inode, struct ll_dio_pages *pv)
+                  int rw, struct inode *inode, struct cl_dio_aio *aio)
 {
+       struct ll_dio_pages *pv = &aio->cda_dio_pages;
        struct cl_page    *page;
        struct cl_2queue  *queue = &io->ci_queue;
        struct cl_object  *obj = io->ci_obj;
-       struct cl_sync_io *anchor = &pv->ldp_aio->cda_sync;
+       struct cl_sync_io *anchor = &aio->cda_sync;
        loff_t offset   = pv->ldp_file_offset;
        int io_pages    = 0;
        size_t page_size = cl_page_size(obj);
@@ -393,8 +354,7 @@ ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size,
                smp_mb();
                rc = cl_io_submit_rw(env, io, iot, queue);
                if (rc == 0) {
-                       cl_page_list_splice(&queue->c2_qout,
-                                       &pv->ldp_aio->cda_pages);
+                       cl_page_list_splice(&queue->c2_qout, &aio->cda_pages);
                } else {
                        atomic_add(-queue->c2_qin.pl_nr,
                                   &anchor->csi_sync_nr);
@@ -479,7 +439,7 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
        LASSERT(ll_aio->cda_iocb == iocb);
 
        while (iov_iter_count(iter)) {
-               struct ll_dio_pages pvec = {};
+               struct ll_dio_pages *pvec;
                struct page **pages;
 
                count = min_t(size_t, iov_iter_count(iter), MAX_DIO_SIZE);
@@ -497,26 +457,26 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
                ldp_aio = cl_aio_alloc(iocb, ll_i2info(inode)->lli_clob, ll_aio);
                if (!ldp_aio)
                        GOTO(out, result = -ENOMEM);
-               pvec.ldp_aio = ldp_aio;
+
+               pvec = &ldp_aio->cda_dio_pages;
 
                result = ll_get_user_pages(rw, iter, &pages,
-                                          &pvec.ldp_count, count);
+                                          &pvec->ldp_count, count);
                if (unlikely(result <= 0)) {
                        cl_sync_io_note(env, &ldp_aio->cda_sync, result);
                        GOTO(out, result);
                }
 
                count = result;
-               pvec.ldp_file_offset = file_offset;
-               pvec.ldp_pages = pages;
+               pvec->ldp_file_offset = file_offset;
+               pvec->ldp_pages = pages;
 
                result = ll_direct_rw_pages(env, io, count,
-                                           rw, inode, &pvec);
+                                           rw, inode, ldp_aio);
                /* We've submitted pages and can now remove the extra
                 * reference for that
                 */
                cl_sync_io_note(env, &ldp_aio->cda_sync, result);
-               ll_free_user_pages(pages, pvec.ldp_count);
 
                if (unlikely(result < 0))
                        GOTO(out, result);
index 8f6bdf3..d6b19db 100644 (file)
@@ -1220,8 +1220,11 @@ static void cl_aio_end(const struct lu_env *env, struct cl_sync_io *anchor)
        if (!aio->cda_no_aio_complete)
                aio_complete(aio->cda_iocb, ret ?: aio->cda_bytes, 0);
 
-       if (aio->cda_ll_aio)
+       if (aio->cda_ll_aio) {
+               ll_release_user_pages(aio->cda_dio_pages.ldp_pages,
+                                     aio->cda_dio_pages.ldp_count);
                cl_sync_io_note(env, &aio->cda_ll_aio->cda_sync, ret);
+       }
 
        EXIT;
 }
@@ -1278,6 +1281,32 @@ void cl_aio_free(const struct lu_env *env, struct cl_dio_aio *aio)
 }
 EXPORT_SYMBOL(cl_aio_free);
 
+/*
+ * ll_release_user_pages - tear down page struct array
+ * @pages: array of page struct pointers underlying target buffer
+ */
+void ll_release_user_pages(struct page **pages, int npages)
+{
+       int i;
+
+       if (npages == 0) {
+               LASSERT(!pages);
+               return;
+       }
+
+       for (i = 0; i < npages; i++) {
+               if (!pages[i])
+                       break;
+               put_page(pages[i]);
+       }
+
+#if defined(HAVE_DIO_ITER)
+       kvfree(pages);
+#else
+       OBD_FREE_PTR_ARRAY_LARGE(pages, npages);
+#endif
+}
+EXPORT_SYMBOL(ll_release_user_pages);
 
 /**
  * Indicate that transfer of a single page completed.