Whamcloud - gitweb
LU-481 Don't store 'transient' page in radix tree
[fs/lustre-release.git] / lustre / obdclass / cl_page.c
index 1f9aedd..0a466e9 100644 (file)
@@ -26,7 +26,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  */
 /*
@@ -99,7 +99,6 @@ static struct lu_kmem_descr cl_page_caches[] = {
  */
 static struct cl_page *cl_page_top_trusted(struct cl_page *page)
 {
-        LASSERT(cl_is_page(page));
         while (page->cp_parent != NULL)
                 page = page->cp_parent;
         return page;
@@ -118,7 +117,6 @@ static struct cl_page *cl_page_top_trusted(struct cl_page *page)
  */
 static void cl_page_get_trust(struct cl_page *page)
 {
-        LASSERT(cl_is_page(page));
         /*
          * Checkless version for trusted users.
          */
@@ -171,7 +169,6 @@ struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
 
         page = radix_tree_lookup(&hdr->coh_tree, index);
         if (page != NULL) {
-                LASSERT(cl_is_page(page));
                 cl_page_get_trust(page);
         }
         return page;
@@ -221,16 +218,13 @@ void cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
                 idx = pvec[nr - 1]->cp_index + 1;
                 for (i = 0, j = 0; i < nr; ++i) {
                         page = pvec[i];
-                        PASSERT(env, page, cl_is_page(page));
                         pvec[i] = NULL;
+
+                        LASSERT(page->cp_type == CPT_CACHEABLE);
                         if (page->cp_index > end)
                                 break;
                         if (page->cp_state == CPS_FREEING)
                                 continue;
-                        if (page->cp_type == CPT_TRANSIENT) {
-                                /* God, we found a transient page!*/
-                                continue;
-                        }
 
                         slice = cl_page_at_trusted(page, dtype);
                         /*
@@ -288,7 +282,6 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
         struct cl_object *obj  = page->cp_obj;
         struct cl_site   *site = cl_object_site(obj);
 
-        PASSERT(env, page, cl_is_page(page));
         PASSERT(env, page, cfs_list_empty(&page->cp_batch));
         PASSERT(env, page, page->cp_owner == NULL);
         PASSERT(env, page, page->cp_req == NULL);
@@ -306,7 +299,10 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
                 slice->cpl_ops->cpo_fini(env, slice);
         }
         cfs_atomic_dec(&site->cs_pages.cs_total);
+
+#ifdef LUSTRE_PAGESTATE_TRACKING
         cfs_atomic_dec(&site->cs_pages_state[page->cp_state]);
+#endif
         lu_object_ref_del_at(&obj->co_lu, page->cp_obj_ref, "cl_page", page);
         cl_object_put(env, obj);
         lu_ref_fini(&page->cp_reference);
@@ -359,8 +355,7 @@ static int cl_page_alloc(const struct lu_env *env, struct cl_object *o,
                                 err = o->co_ops->coo_page_init(env, o,
                                                                page, vmpage);
                                 if (err != NULL) {
-                                        cl_page_state_set_trust(page,
-                                                                CPS_FREEING);
+                                        cl_page_delete0(env, page, 0);
                                         cl_page_free(env, page);
                                         page = err;
                                         break;
@@ -370,7 +365,10 @@ static int cl_page_alloc(const struct lu_env *env, struct cl_object *o,
                 if (err == NULL) {
                         cfs_atomic_inc(&site->cs_pages.cs_busy);
                         cfs_atomic_inc(&site->cs_pages.cs_total);
+
+#ifdef LUSTRE_PAGESTATE_TRACKING
                         cfs_atomic_inc(&site->cs_pages_state[CPS_CACHED]);
+#endif
                         cfs_atomic_inc(&site->cs_pages.cs_created);
                         result = 0;
                 }
@@ -397,7 +395,7 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                                      enum cl_page_type type,
                                      struct cl_page *parent)
 {
-        struct cl_page          *page;
+        struct cl_page          *page = NULL;
         struct cl_page          *ghost = NULL;
         struct cl_object_header *hdr;
         struct cl_site          *site = cl_object_site(o);
@@ -411,7 +409,7 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
         hdr = cl_object_header(o);
         cfs_atomic_inc(&site->cs_pages.cs_lookup);
 
-        CDEBUG(D_PAGE, "%lu@"DFID" %p %lu %i\n",
+        CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
         /* fast path. */
         if (type == CPT_CACHEABLE) {
@@ -430,11 +428,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                              cl_page_vmpage(env, page) == vmpage &&
                              (void *)radix_tree_lookup(&hdr->coh_tree,
                                                        idx) == page));
-        } else {
-                cfs_spin_lock(&hdr->coh_page_guard);
-                page = cl_page_lookup(hdr, idx);
-                cfs_spin_unlock(&hdr->coh_page_guard);
         }
+
         if (page != NULL) {
                 cfs_atomic_inc(&site->cs_pages.cs_hit);
                 RETURN(page);
@@ -444,6 +439,16 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
         err = cl_page_alloc(env, o, idx, vmpage, type, &page);
         if (err != 0)
                 RETURN(page);
+
+        if (type == CPT_TRANSIENT) {
+                if (parent) {
+                        LASSERT(page->cp_parent == NULL);
+                        page->cp_parent = parent;
+                        parent->cp_child = page;
+                }
+                RETURN(page);
+        }
+
         /*
          * XXX optimization: use radix_tree_preload() here, and change tree
          * gfp mask to GFP_KERNEL in cl_object_header_init().
@@ -466,27 +471,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                  *     which is very useful during diagnosing and debugging.
                  */
                 page = ERR_PTR(err);
-                if (err == -EEXIST) {
-                        /*
-                         * XXX in case of a lookup for CPT_TRANSIENT page,
-                         * nothing protects a CPT_CACHEABLE page from being
-                         * concurrently moved into CPS_FREEING state.
-                         */
-                        page = cl_page_lookup(hdr, idx);
-                        PASSERT(env, page, page != NULL);
-                        if (page->cp_type == CPT_TRANSIENT &&
-                            type == CPT_CACHEABLE) {
-                                /* XXX: We should make sure that inode sem
-                                 * keeps being held in the lifetime of
-                                 * transient pages, so it is impossible to
-                                 * have conflicting transient pages.
-                                 */
-                                cfs_spin_unlock(&hdr->coh_page_guard);
-                                cl_page_put(env, page);
-                                cfs_spin_lock(&hdr->coh_page_guard);
-                                page = ERR_PTR(-EBUSY);
-                        }
-                }
+                CL_PAGE_DEBUG(D_ERROR, env, ghost,
+                              "fail to insert into radix tree: %d\n", err);
         } else {
                 if (parent) {
                         LASSERT(page->cp_parent == NULL);
@@ -529,7 +515,6 @@ static inline int cl_page_invariant(const struct cl_page *pg)
         struct cl_page          *child;
         struct cl_io            *owner;
 
-        LASSERT(cl_is_page(pg));
         /*
          * Page invariant is protected by a VM lock.
          */
@@ -563,7 +548,9 @@ static void cl_page_state_set0(const struct lu_env *env,
                                struct cl_page *page, enum cl_page_state state)
 {
         enum cl_page_state old;
+#ifdef LUSTRE_PAGESTATE_TRACKING
         struct cl_site *site = cl_object_site(page->cp_obj);
+#endif
 
         /*
          * Matrix of allowed state transitions [old][new], for sanity
@@ -610,14 +597,16 @@ static void cl_page_state_set0(const struct lu_env *env,
         ENTRY;
         old = page->cp_state;
         PASSERT(env, page, allowed_transitions[old][state]);
-        CL_PAGE_HEADER(D_TRACE, env, page, "%i -> %i\n", old, state);
+        CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
         for (; page != NULL; page = page->cp_child) {
                 PASSERT(env, page, page->cp_state == old);
                 PASSERT(env, page,
                         equi(state == CPS_OWNED, page->cp_owner != NULL));
 
+#ifdef LUSTRE_PAGESTATE_TRACKING
                 cfs_atomic_dec(&site->cs_pages_state[page->cp_state]);
                 cfs_atomic_inc(&site->cs_pages_state[state]);
+#endif
                 cl_page_state_set_trust(page, state);
         }
         EXIT;
@@ -664,7 +653,7 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page)
         PASSERT(env, page, cfs_atomic_read(&page->cp_ref) > !!page->cp_parent);
 
         ENTRY;
-        CL_PAGE_HEADER(D_TRACE, env, page, "%i\n",
+        CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
                        cfs_atomic_read(&page->cp_ref));
 
         hdr = cl_object_header(cl_object_top(page->cp_obj));
@@ -754,7 +743,7 @@ struct cl_page *cl_vmpage_page(cfs_page_t *vmpage, struct cl_object *obj)
                 }
         }
         cfs_spin_unlock(&hdr->coh_page_guard);
-        LASSERT(ergo(page, cl_is_page(page) && page->cp_type == CPT_CACHEABLE));
+        LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
         RETURN(page);
 }
 EXPORT_SYMBOL(cl_vmpage_page);
@@ -770,20 +759,6 @@ struct cl_page *cl_page_top(struct cl_page *page)
 }
 EXPORT_SYMBOL(cl_page_top);
 
-/**
- * Returns true if \a addr is an address of an allocated cl_page. Used in
- * assertions. This check is optimistically imprecise, i.e., it occasionally
- * returns true for the incorrect addresses, but if it returns false, then the
- * address is guaranteed to be incorrect. (Should be named cl_pagep().)
- *
- * \see cl_is_lock()
- */
-int cl_is_page(const void *addr)
-{
-        return cfs_mem_is_in_cache(addr, cl_page_kmem);
-}
-EXPORT_SYMBOL(cl_is_page);
-
 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
                                        const struct lu_device_type *dtype)
 {
@@ -1169,23 +1144,25 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
         cl_page_export(env, pg, 0);
         cl_page_state_set0(env, pg, CPS_FREEING);
 
-        if (!radix)
-                /*
-                 * !radix means that @pg is not yet in the radix tree, skip
-                 * removing it.
-                 */
-                tmp = pg->cp_child;
-        for (; tmp != NULL; tmp = tmp->cp_child) {
-                void                    *value;
-                struct cl_object_header *hdr;
-
-                hdr = cl_object_header(tmp->cp_obj);
-                cfs_spin_lock(&hdr->coh_page_guard);
-                value = radix_tree_delete(&hdr->coh_tree, tmp->cp_index);
-                PASSERT(env, tmp, value == tmp);
-                PASSERT(env, tmp, hdr->coh_pages > 0);
-                hdr->coh_pages--;
-                cfs_spin_unlock(&hdr->coh_page_guard);
+        if (tmp->cp_type == CPT_CACHEABLE) {
+                if (!radix)
+                        /* !radix means that @pg is not yet in the radix tree,
+                         * skip removing it.
+                         */
+                        tmp = pg->cp_child;
+                for (; tmp != NULL; tmp = tmp->cp_child) {
+                        void                    *value;
+                        struct cl_object_header *hdr;
+
+                        hdr = cl_object_header(tmp->cp_obj);
+                        cfs_spin_lock(&hdr->coh_page_guard);
+                        value = radix_tree_delete(&hdr->coh_tree,
+                                                  tmp->cp_index);
+                        PASSERT(env, tmp, value == tmp);
+                        PASSERT(env, tmp, hdr->coh_pages > 0);
+                        hdr->coh_pages--;
+                        cfs_spin_unlock(&hdr->coh_page_guard);
+                }
         }
 
         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
@@ -1335,7 +1312,7 @@ int cl_page_prep(const struct lu_env *env, struct cl_io *io,
         KLASSERT(ergo(crt == CRT_WRITE && pg->cp_type == CPT_CACHEABLE,
                       equi(result == 0,
                            PageWriteback(cl_page_vmpage(env, pg)))));
-        CL_PAGE_HEADER(D_TRACE, env, pg, "%i %i\n", crt, result);
+        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
         return result;
 }
 EXPORT_SYMBOL(cl_page_prep);
@@ -1367,7 +1344,7 @@ void cl_page_completion(const struct lu_env *env,
         PINVRNT(env, pg, cl_page_invariant(pg));
 
         ENTRY;
-        CL_PAGE_HEADER(D_TRACE, env, pg, "%i %i\n", crt, ioret);
+        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
         if (crt == CRT_READ && ioret == 0) {
                 PASSERT(env, pg, !(pg->cp_flags & CPF_READ_COMPLETED));
                 pg->cp_flags |= CPF_READ_COMPLETED;
@@ -1417,7 +1394,7 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
                 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
                 cl_page_io_start(env, pg, crt);
         }
-        CL_PAGE_HEADER(D_TRACE, env, pg, "%i %i\n", crt, result);
+        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
         RETURN(result);
 }
 EXPORT_SYMBOL(cl_page_make_ready);
@@ -1450,7 +1427,7 @@ int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
                 cl_page_owner_clear(pg);
                 cl_page_state_set(env, pg, CPS_CACHED);
         }
-        CL_PAGE_HEADER(D_TRACE, env, pg, "%i %i\n", crt, result);
+        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
         RETURN(result);
 }
 EXPORT_SYMBOL(cl_page_cache_add);
@@ -1539,7 +1516,7 @@ void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
 {
         PINVRNT(env, pg, cl_page_invariant(pg));
 
-        CL_PAGE_HEADER(D_TRACE, env, pg, "%i %i\n", from, to);
+        CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
                        (const struct lu_env *,
                         const struct cl_page_slice *,int, int),