LU-9920 vvp: dirty pages with pagevec

author Patrick Farrell <pfarrell@whamcloud.com>

Fri, 13 Sep 2019 19:27:40 +0000 (15:27 -0400)

committer Oleg Drokin <green@whamcloud.com>

Mon, 30 Sep 2019 23:12:12 +0000 (23:12 +0000)
author Patrick Farrell <pfarrell@whamcloud.com>
Fri, 13 Sep 2019 19:27:40 +0000 (15:27 -0400)
committer Oleg Drokin <green@whamcloud.com>
Mon, 30 Sep 2019 23:12:12 +0000 (23:12 +0000)
diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4

index 3995f81..3dc3537 100644 (file)
--- a/lustre/autoconf/lustre-core.m4
+++ b/lustre/autoconf/lustre-core.m4
@@ -1874,6 +1874,23 @@ bio_endio, [
  ]) # LC_BIO_ENDIO_USES_ONE_ARG
  
  #
+# LC_ACCOUNT_PAGE_DIRTIED_3ARGS
+#
+# 4.2 kernel page dirtied takes 3 arguments
+#
+AC_DEFUN([LC_ACCOUNT_PAGE_DIRTIED_3ARGS], [
+LB_CHECK_COMPILE([if 'account_page_dirtied' with 3 args exists],
+account_page_dirtied, [
+       #include <linux/mm.h>
+],[
+       account_page_dirtied(NULL, NULL, NULL);
+],[
+       AC_DEFINE(HAVE_ACCOUNT_PAGE_DIRTIED_3ARGS, 1,
+               [account_page_dirtied takes three arguments])
+])
+]) # LC_ACCOUNT_PAGE_DIRTIED_3ARGS
+
+#
  # LC_HAVE_INTERVAL_EXP_BLK_INTEGRITY
  #
  # 4.3 replace interval with interval_exp in 'struct blk_integrity'
@@ -2225,6 +2242,23 @@ EXTRA_KCFLAGS="$tmp_flags"
  ]) # LC_D_IN_LOOKUP
  
  #
+# LC_LOCK_PAGE_MEMCG
+#
+# Kernel version 4.6 adds lock_page_memcg
+#
+AC_DEFUN([LC_LOCK_PAGE_MEMCG], [
+LB_CHECK_COMPILE([if 'lock_page_memcg' is defined],
+lock_page_memcg, [
+       #include <linux/memcontrol.h>
+],[
+       lock_page_memcg(NULL);
+],[
+       AC_DEFINE(HAVE_LOCK_PAGE_MEMCG, 1,
+               [lock_page_memcg is defined])
+])
+]) # LC_LOCK_PAGE_MEMCG
+
+#
  # LC_DIRECTIO_2ARGS
  #
  # Kernel version 4.7 commit c8b8e32d700fe943a935e435ae251364d016c497
@@ -3036,6 +3070,7 @@ AC_DEFUN([LC_PROG_LINUX], [
         # 4.2
         LC_BIO_ENDIO_USES_ONE_ARG
         LC_SYMLINK_OPS_USE_NAMEIDATA
+       LC_ACCOUNT_PAGE_DIRTIED_3ARGS
  
         # 4.3
         LC_HAVE_INTERVAL_EXP_BLK_INTEGRITY
@@ -3061,6 +3096,7 @@ AC_DEFUN([LC_PROG_LINUX], [
         # 4.6
         LC_HAVE_IN_COMPAT_SYSCALL
         LC_HAVE_XATTR_HANDLER_INODE_PARAM
+       LC_LOCK_PAGE_MEMCG
  
         # 4.7
         LC_D_IN_LOOKUP
diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h

index 695e87b..b475955 100644 (file)
--- a/lustre/include/cl_object.h
+++ b/lustre/include/cl_object.h
@@ -1472,7 +1472,7 @@ struct cl_io_slice {
  };
  
  typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
-                             struct cl_page *);
+                             struct pagevec *);
  
  struct cl_read_ahead {
         /* Maximum page index the readahead window will end.
diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h

index 0c0b3ba..ad73cb7 100644 (file)
--- a/lustre/include/lustre_compat.h
+++ b/lustre/include/lustre_compat.h
@@ -755,8 +755,16 @@ static inline bool bdev_integrity_enabled(struct block_device *bdev, int rw)
  #define page_tree i_pages
  #else
  #define i_pages tree_lock
-#define xa_lock_irq(lockp) spin_lock_irq(lockp)
-#define xa_unlock_irq(lockp) spin_unlock_irq(lockp)
+#endif
+
+#ifndef xa_lock_irqsave
+#define xa_lock_irqsave(lockp, flags) spin_lock_irqsave(lockp, flags)
+#define xa_unlock_irqrestore(lockp, flags) spin_unlock_irqrestore(lockp, flags)
+#endif
+
+#ifndef HAVE_LOCK_PAGE_MEMCG
+#define lock_page_memcg(page) do {} while (0)
+#define unlock_page_memcg(page) do {} while (0)
  #endif
  
  #ifndef KMEM_CACHE_USERCOPY
diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h

index 0c2cc11..bcd6b85 100644 (file)
--- a/lustre/include/lustre_osc.h
+++ b/lustre/include/lustre_osc.h
@@ -592,9 +592,9 @@ int osc_set_async_flags(struct osc_object *obj, struct osc_page *opg,
  int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
                         struct page *page, loff_t offset);
  int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
-                      struct osc_page *ops);
-int osc_page_cache_add(const struct lu_env *env,
-                      const struct cl_page_slice *slice, struct cl_io *io);
+                      struct osc_page *ops, cl_commit_cbt cb);
+int osc_page_cache_add(const struct lu_env *env, struct osc_page *opg,
+                      struct cl_io *io, cl_commit_cbt cb);
  int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
                             struct osc_page *ops);
  int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c

index 49b9d4c..38bd1d4 100644 (file)
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -2232,6 +2232,8 @@ void ll_delete_inode(struct inode *inode)
         struct ll_inode_info *lli = ll_i2info(inode);
         struct address_space *mapping = &inode->i_data;
         unsigned long nrpages;
+       unsigned long flags;
+
         ENTRY;
  
         if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL) {
@@ -2256,9 +2258,9 @@ void ll_delete_inode(struct inode *inode)
          */
         nrpages = mapping->nrpages;
         if (nrpages) {
-               xa_lock_irq(&mapping->i_pages);
+               xa_lock_irqsave(&mapping->i_pages, flags);
                 nrpages = mapping->nrpages;
-               xa_unlock_irq(&mapping->i_pages);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
         } /* Workaround end */
  
         LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, "
diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c

index 15fcc92..8f8b724 100644 (file)
--- a/lustre/llite/vvp_io.c
+++ b/lustre/llite/vvp_io.c
@@ -39,6 +39,8 @@
  
  
  #include <obd.h>
+#include <linux/pagevec.h>
+#include <linux/memcontrol.h>
  #include "llite_internal.h"
  #include "vvp_internal.h"
  #include <libcfs/linux/linux-misc.h>
@@ -916,19 +918,114 @@ static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
         RETURN(bytes > 0 ? bytes : rc);
  }
  
+/* Taken from kernel set_page_dirty, __set_page_dirty_nobuffers
+ * Last change to this area: b93b016313b3ba8003c3b8bb71f569af91f19fc7
+ *
+ * Current with Linus tip of tree (7/13/2019):
+ * v5.2-rc4-224-ge01e060fe0
+ *
+ * Backwards compat for 3.x, 4.x kernels relating to memcg handling
+ * & rename of radix tree to xarray. */
+void vvp_set_pagevec_dirty(struct pagevec *pvec)
+{
+       struct page *page = pvec->pages[0];
+       struct address_space *mapping = page->mapping;
+#if defined HAVE_ACCOUNT_PAGE_DIRTIED_3ARGS
+       struct mem_cgroup *memcg;
+#endif
+       unsigned long flags;
+       int count = pagevec_count(pvec);
+       int dirtied = 0;
+       int i = 0;
+
+       ENTRY;
+
+       /* From set_page_dirty */
+       for (i = 0; i < count; i++)
+               ClearPageReclaim(pvec->pages[i]);
+
+       LASSERTF(page->mapping,
+                "mapping must be set. page %p, page->private (cl_page) %p",
+                page, (void *) page->private);
+
+       /* Rest of code derived from __set_page_dirty_nobuffers */
+       xa_lock_irqsave(&mapping->i_pages, flags);
+
+       /* Notes on differences with __set_page_dirty_nobuffers:
+        * 1. We don't need to call page_mapping because we know this is a page
+        * cache page.
+        * 2. We have the pages locked, so there is no need for the careful
+        * mapping/mapping2 dance.
+        * 3. No mapping is impossible. (Race w/truncate mentioned in
+        * dirty_nobuffers should be impossible because we hold the page lock.)
+        * 4. All mappings are the same because i/o is only to one file.
+        * 5. We invert the lock order on lock_page_memcg(page) and the mapping
+        * xa_lock, but this is the only function that should use that pair of
+        * locks and it can't race because Lustre locks pages throughout i/o.
+        */
+       for (i = 0; i < count; i++) {
+               page = pvec->pages[i];
+               lock_page_memcg(page);
+               if (TestSetPageDirty(page)) {
+                       unlock_page_memcg(page);
+                       continue;
+               }
+               LASSERTF(page->mapping == mapping,
+                        "all pages must have the same mapping.  page %p, mapping %p, first mapping %p\n",
+                        page, page->mapping, mapping);
+               WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
+#ifdef HAVE_ACCOUNT_PAGE_DIRTIED_3ARGS
+               memcg = mem_cgroup_begin_page_stat(page);
+               account_page_dirtied(page, mapping, memcg);
+               mem_cgroup_end_page_stat(memcg);
+#else
+               account_page_dirtied(page, mapping);
+#endif
+               radix_tree_tag_set(&mapping->page_tree, page_index(page),
+                                  PAGECACHE_TAG_DIRTY);
+               dirtied++;
+               unlock_page_memcg(page);
+       }
+       xa_unlock_irqrestore(&mapping->i_pages, flags);
+
+       CDEBUG(D_VFSTRACE, "mapping %p, count %d, dirtied %d\n", mapping,
+              count, dirtied);
+
+       if (mapping->host && dirtied) {
+               /* !PageAnon && !swapper_space */
+               __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+       }
+
+       EXIT;
+}
+
  static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
-                               struct cl_page *page)
+                                 struct pagevec *pvec)
  {
-       struct page *vmpage = page->cp_vmpage;
+       int count = 0;
+       int i = 0;
+
+       ENTRY;
  
-       SetPageUptodate(vmpage);
-       set_page_dirty(vmpage);
+       count = pagevec_count(pvec);
+       LASSERT(count > 0);
  
-       cl_page_disown(env, io, page);
+       for (i = 0; i < count; i++) {
+               struct page *vmpage = pvec->pages[i];
+               SetPageUptodate(vmpage);
+       }
+
+       vvp_set_pagevec_dirty(pvec);
  
-       /* held in ll_cl_init() */
-       lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
-       cl_page_put(env, page);
+       for (i = 0; i < count; i++) {
+               struct page *vmpage = pvec->pages[i];
+               struct cl_page *page = (struct cl_page *) vmpage->private;
+               cl_page_disown(env, io, page);
+               lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
+               cl_page_put(env, page);
+       }
+
+       EXIT;
  }
  
  /* make sure the page list is contiguous */
@@ -1204,9 +1301,9 @@ static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
  }
  
  static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
-                                   struct cl_page *page)
+                                   struct pagevec *pvec)
  {
-       set_page_dirty(page->cp_vmpage);
+       vvp_set_pagevec_dirty(pvec);
  }
  
  static int vvp_io_fault_start(const struct lu_env *env,
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c

index e08bf89..e029696 100644 (file)
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -1134,16 +1134,17 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
          */
         unsigned long offset = hash_x_index(*hash, hash64);
         struct page *page;
+       unsigned long flags;
         int found;
  
-       xa_lock_irq(&mapping->i_pages);
+       xa_lock_irqsave(&mapping->i_pages, flags);
         found = radix_tree_gang_lookup(&mapping->page_tree,
                                        (void **)&page, offset, 1);
         if (found > 0 && !radix_tree_exceptional_entry(page)) {
                 struct lu_dirpage *dp;
  
                 get_page(page);
-               xa_unlock_irq(&mapping->i_pages);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
                 /*
                  * In contrast to find_lock_page() we are sure that directory
                  * page cannot be truncated (while DLM lock is held) and,
@@ -1192,7 +1193,7 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
                         page = ERR_PTR(-EIO);
                 }
         } else {
-               xa_unlock_irq(&mapping->i_pages);
+               xa_unlock_irqrestore(&mapping->i_pages, flags);
                 page = NULL;
         }
         return page;
diff --git a/lustre/obdclass/cl_io.c b/lustre/obdclass/cl_io.c

index 207dd95..661b26e 100644 (file)
--- a/lustre/obdclass/cl_io.c
+++ b/lustre/obdclass/cl_io.c
@@ -586,8 +586,8 @@ EXPORT_SYMBOL(cl_io_read_ahead);
   * \see cl_io_operations::cio_commit_async()
   */
  int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
-                       struct cl_page_list *queue, int from, int to,
-                       cl_commit_cbt cb)
+                      struct cl_page_list *queue, int from, int to,
+                      cl_commit_cbt cb)
  {
         const struct cl_io_slice *scan;
         int result = 0;
diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c

index 2acc4a0..07ed6b7 100644 (file)
--- a/lustre/obdecho/echo_client.c
+++ b/lustre/obdecho/echo_client.c
@@ -1310,16 +1310,23 @@ static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
  }
  
  static void echo_commit_callback(const struct lu_env *env, struct cl_io *io,
-                                struct cl_page *page)
+                                struct pagevec *pvec)
  {
         struct echo_thread_info *info;
         struct cl_2queue        *queue;
+       int i = 0;
  
         info = echo_env_info(env);
         LASSERT(io == &info->eti_io);
  
         queue = &info->eti_queue;
-       cl_page_list_add(&queue->c2_qout, page);
+
+       for (i = 0; i < pagevec_count(pvec); i++) {
+               struct page *vmpage = pvec->pages[i];
+               struct cl_page *page = (struct cl_page *)vmpage->private;
+
+               cl_page_list_add(&queue->c2_qout, page);
+       }
  }
  
  static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c

index 8cf0158..665a63a 100644 (file)
--- a/lustre/osc/osc_cache.c
+++ b/lustre/osc/osc_cache.c
@@ -2356,13 +2356,14 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
  EXPORT_SYMBOL(osc_prep_async_page);
  
  int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
-                      struct osc_page *ops)
+                      struct osc_page *ops, cl_commit_cbt cb)
  {
         struct osc_io *oio = osc_env_io(env);
         struct osc_extent     *ext = NULL;
         struct osc_async_page *oap = &ops->ops_oap;
         struct client_obd     *cli = oap->oap_cli;
         struct osc_object     *osc = oap->oap_obj;
+       struct pagevec        *pvec = &osc_env_info(env)->oti_pagevec;
         pgoff_t index;
         unsigned int tmp;
         unsigned int grants = 0;
@@ -2481,7 +2482,14 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
  
                 rc = 0;
                 if (grants == 0) {
-                       /* we haven't allocated grant for this page. */
+                       /* We haven't allocated grant for this page, and we
+                        * must not hold a page lock while we do enter_cache,
+                        * so we must mark dirty & unlock any pages in the
+                        * write commit pagevec. */
+                       if (pagevec_count(pvec)) {
+                               cb(env, io, pvec);
+                               pagevec_reinit(pvec);
+                       }
                         rc = osc_enter_cache(env, cli, oap, tmp);
                         if (rc == 0)
                                 grants = tmp;
diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c

index 0d2bf4f..b4a196f 100644 (file)
--- a/lustre/osc/osc_io.c
+++ b/lustre/osc/osc_io.c
@@ -39,6 +39,7 @@
  
  #include <lustre_obdo.h>
  #include <lustre_osc.h>
+#include <linux/pagevec.h>
  
  #include "osc_internal.h"
  
@@ -289,6 +290,7 @@ int osc_io_commit_async(const struct lu_env *env,
         struct cl_page  *page;
         struct cl_page  *last_page;
         struct osc_page *opg;
+       struct pagevec  *pvec = &osc_env_info(env)->oti_pagevec;
         int result = 0;
         ENTRY;
  
@@ -308,6 +310,8 @@ int osc_io_commit_async(const struct lu_env *env,
                 }
         }
  
+       ll_pagevec_init(pvec, 0);
+
         while (qin->pl_nr > 0) {
                 struct osc_async_page *oap;
  
@@ -327,7 +331,7 @@ int osc_io_commit_async(const struct lu_env *env,
  
                 /* The page may be already in dirty cache. */
                 if (list_empty(&oap->oap_pending_item)) {
-                       result = osc_page_cache_add(env, &opg->ops_cl, io);
+                       result = osc_page_cache_add(env, opg, io, cb);
                         if (result != 0)
                                 break;
                 }
@@ -337,11 +341,20 @@ int osc_io_commit_async(const struct lu_env *env,
  
                 cl_page_list_del(env, qin, page);
  
-               (*cb)(env, io, page);
-               /* Can't access page any more. Page can be in transfer and
-                * complete at any time. */
+               /* if there are no more slots, do the callback & reinit */
+               if (pagevec_add(pvec, page->cp_vmpage) == 0) {
+                       (*cb)(env, io, pvec);
+                       pagevec_reinit(pvec);
+               }
         }
  
+       /* Clean up any partially full pagevecs */
+       if (pagevec_count(pvec) != 0)
+               (*cb)(env, io, pvec);
+
+       /* Can't access these pages any more. Page can be in transfer and
+        * complete at any time. */
+
         /* for sync write, kernel will wait for this page to be flushed before
          * osc_io_end() is called, so release it earlier.
          * for mkwrite(), it's known there is no further pages. */
diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c

index caa9f59..a5321e3 100644 (file)
--- a/lustre/osc/osc_page.c
+++ b/lustre/osc/osc_page.c
@@ -87,15 +87,14 @@ static void osc_page_transfer_add(const struct lu_env *env,
         osc_lru_use(osc_cli(obj), opg);
  }
  
-int osc_page_cache_add(const struct lu_env *env,
-                       const struct cl_page_slice *slice, struct cl_io *io)
+int osc_page_cache_add(const struct lu_env *env, struct osc_page *opg,
+                      struct cl_io *io, cl_commit_cbt cb)
  {
-       struct osc_page *opg = cl2osc_page(slice);
         int result;
         ENTRY;
  
         osc_page_transfer_get(opg, "transfer\0cache");
-       result = osc_queue_async_io(env, io, opg);
+       result = osc_queue_async_io(env, io, opg, cb);
         if (result != 0)
                 osc_page_transfer_put(env, opg);
         else
author	Patrick Farrell <pfarrell@whamcloud.com>
	Fri, 13 Sep 2019 19:27:40 +0000 (15:27 -0400)
committer	Oleg Drokin <green@whamcloud.com>
	Mon, 30 Sep 2019 23:12:12 +0000 (23:12 +0000)
lustre/autoconf/lustre-core.m4		patch \| blob \| history
lustre/include/cl_object.h		patch \| blob \| history
lustre/include/lustre_compat.h		patch \| blob \| history
lustre/include/lustre_osc.h		patch \| blob \| history
lustre/llite/llite_lib.c		patch \| blob \| history
lustre/llite/vvp_io.c		patch \| blob \| history
lustre/mdc/mdc_request.c		patch \| blob \| history
lustre/obdclass/cl_io.c		patch \| blob \| history
lustre/obdecho/echo_client.c		patch \| blob \| history
lustre/osc/osc_cache.c		patch \| blob \| history
lustre/osc/osc_io.c		patch \| blob \| history
lustre/osc/osc_page.c		patch \| blob \| history