Whamcloud - gitweb
LU-1666 obdclass: reduce lock contention on coh_page_guard
[fs/lustre-release.git] / lustre / obdclass / cl_page.c
index b60943f..ea160eb 100644 (file)
@@ -39,9 +39,6 @@
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 
 #include <libcfs/libcfs.h>
 #include <obd_class.h>
@@ -95,7 +92,7 @@ static struct lu_kmem_descr cl_page_caches[] = {
 
 /**
  * Internal version of cl_page_top, it should be called with page referenced,
- * or coh_page_guard held.
+ * or cp_lock held.
  */
 static struct cl_page *cl_page_top_trusted(struct cl_page *page)
 {
@@ -137,10 +134,8 @@ cl_page_at_trusted(const struct cl_page *page,
         const struct cl_page_slice *slice;
 
 #ifdef INVARIANT_CHECK
-        struct cl_object_header *ch = cl_object_header(page->cp_obj);
-
         if (!cfs_atomic_read(&page->cp_ref))
-                LASSERT_SPIN_LOCKED(&ch->coh_page_guard);
+                LASSERT_SPIN_LOCKED(&page->cp_lock);
 #endif
         ENTRY;
 
@@ -347,6 +342,7 @@ static int cl_page_alloc(const struct lu_env *env, struct cl_object *o,
                                                      "cl_page", page);
                 page->cp_index = ind;
                 cl_page_state_set_trust(page, CPS_CACHED);
+               cfs_spin_lock_init(&page->cp_lock);
                 page->cp_type = type;
                 CFS_INIT_LIST_HEAD(&page->cp_layers);
                 CFS_INIT_LIST_HEAD(&page->cp_batch);
@@ -418,6 +414,11 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
         /* fast path. */
         if (type == CPT_CACHEABLE) {
+               /* cl_page::cp_lock is used to protect the page state and
+                * refcount, but need an external lock to protect the
+                * child/parent relationship, so vmpage lock must be held for
+                * this purpose. */
+               KLASSERT(PageLocked(vmpage));
                 /*
                  * cl_vmpage_page() can be called here without any locks as
                  *
@@ -651,7 +652,6 @@ EXPORT_SYMBOL(cl_page_get);
  */
 void cl_page_put(const struct lu_env *env, struct cl_page *page)
 {
-        struct cl_object_header *hdr;
         struct cl_site *site = cl_object_site(page->cp_obj);
 
         PASSERT(env, page, cfs_atomic_read(&page->cp_ref) > !!page->cp_parent);
@@ -660,19 +660,18 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page)
         CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
                        cfs_atomic_read(&page->cp_ref));
 
-        hdr = cl_object_header(cl_object_top(page->cp_obj));
-        if (cfs_atomic_dec_and_lock(&page->cp_ref, &hdr->coh_page_guard)) {
+        if (cfs_atomic_dec_and_lock(&page->cp_ref, &page->cp_lock)) {
                 cfs_atomic_dec(&site->cs_pages.cs_busy);
                 /* We're going to access the page w/o a reference, but it's
-                 * ok because we have grabbed the lock coh_page_guard, which
+                 * ok because we have grabbed the lock cp_lock, which
                  * means nobody is able to free this page behind us.
                  */
                 if (page->cp_state == CPS_FREEING) {
                         /* We drop the page reference and check the page state
-                         * inside the coh_page_guard. So that if it gets here,
+                         * inside the cp_lock. So that if it gets here,
                          * it is the REALLY last reference to this page.
                          */
-                        cfs_spin_unlock(&hdr->coh_page_guard);
+                        cfs_spin_unlock(&page->cp_lock);
 
                         LASSERT(cfs_atomic_read(&page->cp_ref) == 0);
                         PASSERT(env, page, page->cp_owner == NULL);
@@ -686,7 +685,7 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page)
                         EXIT;
                         return;
                 }
-                cfs_spin_unlock(&hdr->coh_page_guard);
+                cfs_spin_unlock(&page->cp_lock);
         }
 
         EXIT;
@@ -721,8 +720,8 @@ EXPORT_SYMBOL(cl_page_vmpage);
  */
 struct cl_page *cl_vmpage_page(cfs_page_t *vmpage, struct cl_object *obj)
 {
-        struct cl_page *page;
-        struct cl_object_header *hdr;
+       struct cl_page *top;
+       struct cl_page *page;
 
         ENTRY;
         KLASSERT(PageLocked(vmpage));
@@ -737,16 +736,18 @@ struct cl_page *cl_vmpage_page(cfs_page_t *vmpage, struct cl_object *obj)
          * This loop assumes that ->private points to the top-most page. This
          * can be rectified easily.
          */
-        hdr = cl_object_header(cl_object_top(obj));
-        cfs_spin_lock(&hdr->coh_page_guard);
-        for (page = (void *)vmpage->private;
-             page != NULL; page = page->cp_child) {
+        top = (struct cl_page *)vmpage->private;
+       if (top == NULL)
+               RETURN(NULL);
+
+       cfs_spin_lock(&top->cp_lock);
+        for (page = top; page != NULL; page = page->cp_child) {
                 if (cl_object_same(page->cp_obj, obj)) {
                         cl_page_get_trust(page);
                         break;
                 }
         }
-        cfs_spin_unlock(&hdr->coh_page_guard);
+        cfs_spin_unlock(&top->cp_lock);
         LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
         RETURN(page);
 }
@@ -1026,15 +1027,14 @@ EXPORT_SYMBOL(cl_page_own_try);
 void cl_page_assume(const struct lu_env *env,
                     struct cl_io *io, struct cl_page *pg)
 {
-        PASSERT(env, pg, pg->cp_owner == NULL);
         PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
-        PINVRNT(env, pg, cl_page_invariant(pg));
 
         ENTRY;
         pg = cl_page_top(pg);
         io = cl_io_top(io);
 
         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
+        PASSERT(env, pg, pg->cp_owner == NULL);
         pg->cp_owner = io;
         pg->cp_task = current;
         cl_page_owner_set(pg);
@@ -1362,13 +1362,6 @@ void cl_page_completion(const struct lu_env *env,
                 pg->cp_sync_io = NULL;
                 cl_sync_io_note(anchor, ioret);
         }
-
-        /* Don't assert the page writeback bit here because the lustre file
-         * may be as a backend of swap space. in this case, the page writeback
-         * is set by VM, and obvious we shouldn't clear it at all. Fortunately
-         * this type of pages are all TRANSIENT pages. */
-        KLASSERT(ergo(pg->cp_type == CPT_CACHEABLE,
-                      !PageWriteback(cl_page_vmpage(env, pg))));
         EXIT;
 }
 EXPORT_SYMBOL(cl_page_completion);
@@ -1410,32 +1403,61 @@ EXPORT_SYMBOL(cl_page_make_ready);
  * its queues.
  *
  * \pre  cl_page_is_owned(pg, io)
- * \post ergo(result == 0,
- *            pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT)
+ * \post cl_page_is_owned(pg, io)
  *
  * \see cl_page_operations::cpo_cache_add()
  */
 int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
                       struct cl_page *pg, enum cl_req_type crt)
 {
-        int result;
+       const struct cl_page_slice *scan;
+       int result = 0;
 
-        PINVRNT(env, pg, crt < CRT_NR);
-        PINVRNT(env, pg, cl_page_is_owned(pg, io));
-        PINVRNT(env, pg, cl_page_invariant(pg));
+       PINVRNT(env, pg, crt < CRT_NR);
+       PINVRNT(env, pg, cl_page_is_owned(pg, io));
+       PINVRNT(env, pg, cl_page_invariant(pg));
 
-        ENTRY;
-        result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_cache_add));
-        if (result == 0) {
-                cl_page_owner_clear(pg);
-                cl_page_state_set(env, pg, CPS_CACHED);
-        }
-        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
-        RETURN(result);
+       ENTRY;
+
+       cfs_list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
+               if (scan->cpl_ops->io[crt].cpo_cache_add == NULL)
+                       continue;
+
+               result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
+               if (result != 0)
+                       break;
+       }
+       CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
+       RETURN(result);
 }
 EXPORT_SYMBOL(cl_page_cache_add);
 
 /**
+ * Called if a pge is being written back by kernel's intention.
+ *
+ * \pre  cl_page_is_owned(pg, io)
+ * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
+ *
+ * \see cl_page_operations::cpo_flush()
+ */
+int cl_page_flush(const struct lu_env *env, struct cl_io *io,
+                 struct cl_page *pg)
+{
+       int result;
+
+       PINVRNT(env, pg, cl_page_is_owned(pg, io));
+       PINVRNT(env, pg, cl_page_invariant(pg));
+
+       ENTRY;
+
+       result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
+
+       CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
+       RETURN(result);
+}
+EXPORT_SYMBOL(cl_page_flush);
+
+/**
  * Checks whether page is protected by any extent lock is at least required
  * mode.
  *
@@ -1488,6 +1510,7 @@ int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
          * function, we just make cl_page_list functions happy. -jay
          */
         io->ci_obj = obj;
+       io->ci_ignore_layout = 1;
         result = cl_io_init(env, io, CIT_MISC, obj);
         if (result != 0) {
                 cl_io_fini(env, io);