Whamcloud - gitweb
LU-481 Don't store 'transient' page in radix tree
[fs/lustre-release.git] / lustre / obdclass / cl_page.c
index 76fa7ca..0a466e9 100644 (file)
@@ -99,7 +99,6 @@ static struct lu_kmem_descr cl_page_caches[] = {
  */
 static struct cl_page *cl_page_top_trusted(struct cl_page *page)
 {
-        LASSERT(cl_is_page(page));
         while (page->cp_parent != NULL)
                 page = page->cp_parent;
         return page;
@@ -118,7 +117,6 @@ static struct cl_page *cl_page_top_trusted(struct cl_page *page)
  */
 static void cl_page_get_trust(struct cl_page *page)
 {
-        LASSERT(cl_is_page(page));
         /*
          * Checkless version for trusted users.
          */
@@ -171,7 +169,6 @@ struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
 
         page = radix_tree_lookup(&hdr->coh_tree, index);
         if (page != NULL) {
-                LASSERT(cl_is_page(page));
                 cl_page_get_trust(page);
         }
         return page;
@@ -221,16 +218,13 @@ void cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
                 idx = pvec[nr - 1]->cp_index + 1;
                 for (i = 0, j = 0; i < nr; ++i) {
                         page = pvec[i];
-                        PASSERT(env, page, cl_is_page(page));
                         pvec[i] = NULL;
+
+                        LASSERT(page->cp_type == CPT_CACHEABLE);
                         if (page->cp_index > end)
                                 break;
                         if (page->cp_state == CPS_FREEING)
                                 continue;
-                        if (page->cp_type == CPT_TRANSIENT) {
-                                /* God, we found a transient page!*/
-                                continue;
-                        }
 
                         slice = cl_page_at_trusted(page, dtype);
                         /*
@@ -288,7 +282,6 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
         struct cl_object *obj  = page->cp_obj;
         struct cl_site   *site = cl_object_site(obj);
 
-        PASSERT(env, page, cl_is_page(page));
         PASSERT(env, page, cfs_list_empty(&page->cp_batch));
         PASSERT(env, page, page->cp_owner == NULL);
         PASSERT(env, page, page->cp_req == NULL);
@@ -362,8 +355,7 @@ static int cl_page_alloc(const struct lu_env *env, struct cl_object *o,
                                 err = o->co_ops->coo_page_init(env, o,
                                                                page, vmpage);
                                 if (err != NULL) {
-                                        cl_page_state_set_trust(page,
-                                                                CPS_FREEING);
+                                        cl_page_delete0(env, page, 0);
                                         cl_page_free(env, page);
                                         page = err;
                                         break;
@@ -403,7 +395,7 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                                      enum cl_page_type type,
                                      struct cl_page *parent)
 {
-        struct cl_page          *page;
+        struct cl_page          *page = NULL;
         struct cl_page          *ghost = NULL;
         struct cl_object_header *hdr;
         struct cl_site          *site = cl_object_site(o);
@@ -417,7 +409,7 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
         hdr = cl_object_header(o);
         cfs_atomic_inc(&site->cs_pages.cs_lookup);
 
-        CDEBUG(D_PAGE, "%lu@"DFID" %p %lu %d\n",
+        CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
         /* fast path. */
         if (type == CPT_CACHEABLE) {
@@ -436,11 +428,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                              cl_page_vmpage(env, page) == vmpage &&
                              (void *)radix_tree_lookup(&hdr->coh_tree,
                                                        idx) == page));
-        } else {
-                cfs_spin_lock(&hdr->coh_page_guard);
-                page = cl_page_lookup(hdr, idx);
-                cfs_spin_unlock(&hdr->coh_page_guard);
         }
+
         if (page != NULL) {
                 cfs_atomic_inc(&site->cs_pages.cs_hit);
                 RETURN(page);
@@ -450,6 +439,16 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
         err = cl_page_alloc(env, o, idx, vmpage, type, &page);
         if (err != 0)
                 RETURN(page);
+
+        if (type == CPT_TRANSIENT) {
+                if (parent) {
+                        LASSERT(page->cp_parent == NULL);
+                        page->cp_parent = parent;
+                        parent->cp_child = page;
+                }
+                RETURN(page);
+        }
+
         /*
          * XXX optimization: use radix_tree_preload() here, and change tree
          * gfp mask to GFP_KERNEL in cl_object_header_init().
@@ -472,27 +471,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                  *     which is very useful during diagnosing and debugging.
                  */
                 page = ERR_PTR(err);
-                if (err == -EEXIST) {
-                        /*
-                         * XXX in case of a lookup for CPT_TRANSIENT page,
-                         * nothing protects a CPT_CACHEABLE page from being
-                         * concurrently moved into CPS_FREEING state.
-                         */
-                        page = cl_page_lookup(hdr, idx);
-                        PASSERT(env, page, page != NULL);
-                        if (page->cp_type == CPT_TRANSIENT &&
-                            type == CPT_CACHEABLE) {
-                                /* XXX: We should make sure that inode sem
-                                 * keeps being held in the lifetime of
-                                 * transient pages, so it is impossible to
-                                 * have conflicting transient pages.
-                                 */
-                                cfs_spin_unlock(&hdr->coh_page_guard);
-                                cl_page_put(env, page);
-                                cfs_spin_lock(&hdr->coh_page_guard);
-                                page = ERR_PTR(-EBUSY);
-                        }
-                }
+                CL_PAGE_DEBUG(D_ERROR, env, ghost,
+                              "fail to insert into radix tree: %d\n", err);
         } else {
                 if (parent) {
                         LASSERT(page->cp_parent == NULL);
@@ -535,7 +515,6 @@ static inline int cl_page_invariant(const struct cl_page *pg)
         struct cl_page          *child;
         struct cl_io            *owner;
 
-        LASSERT(cl_is_page(pg));
         /*
          * Page invariant is protected by a VM lock.
          */
@@ -764,7 +743,7 @@ struct cl_page *cl_vmpage_page(cfs_page_t *vmpage, struct cl_object *obj)
                 }
         }
         cfs_spin_unlock(&hdr->coh_page_guard);
-        LASSERT(ergo(page, cl_is_page(page) && page->cp_type == CPT_CACHEABLE));
+        LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
         RETURN(page);
 }
 EXPORT_SYMBOL(cl_vmpage_page);
@@ -780,20 +759,6 @@ struct cl_page *cl_page_top(struct cl_page *page)
 }
 EXPORT_SYMBOL(cl_page_top);
 
-/**
- * Returns true if \a addr is an address of an allocated cl_page. Used in
- * assertions. This check is optimistically imprecise, i.e., it occasionally
- * returns true for the incorrect addresses, but if it returns false, then the
- * address is guaranteed to be incorrect. (Should be named cl_pagep().)
- *
- * \see cl_is_lock()
- */
-int cl_is_page(const void *addr)
-{
-        return cfs_mem_is_in_cache(addr, cl_page_kmem);
-}
-EXPORT_SYMBOL(cl_is_page);
-
 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
                                        const struct lu_device_type *dtype)
 {
@@ -1179,23 +1144,25 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
         cl_page_export(env, pg, 0);
         cl_page_state_set0(env, pg, CPS_FREEING);
 
-        if (!radix)
-                /*
-                 * !radix means that @pg is not yet in the radix tree, skip
-                 * removing it.
-                 */
-                tmp = pg->cp_child;
-        for (; tmp != NULL; tmp = tmp->cp_child) {
-                void                    *value;
-                struct cl_object_header *hdr;
-
-                hdr = cl_object_header(tmp->cp_obj);
-                cfs_spin_lock(&hdr->coh_page_guard);
-                value = radix_tree_delete(&hdr->coh_tree, tmp->cp_index);
-                PASSERT(env, tmp, value == tmp);
-                PASSERT(env, tmp, hdr->coh_pages > 0);
-                hdr->coh_pages--;
-                cfs_spin_unlock(&hdr->coh_page_guard);
+        if (tmp->cp_type == CPT_CACHEABLE) {
+                if (!radix)
+                        /* !radix means that @pg is not yet in the radix tree,
+                         * skip removing it.
+                         */
+                        tmp = pg->cp_child;
+                for (; tmp != NULL; tmp = tmp->cp_child) {
+                        void                    *value;
+                        struct cl_object_header *hdr;
+
+                        hdr = cl_object_header(tmp->cp_obj);
+                        cfs_spin_lock(&hdr->coh_page_guard);
+                        value = radix_tree_delete(&hdr->coh_tree,
+                                                  tmp->cp_index);
+                        PASSERT(env, tmp, value == tmp);
+                        PASSERT(env, tmp, hdr->coh_pages > 0);
+                        hdr->coh_pages--;
+                        cfs_spin_unlock(&hdr->coh_page_guard);
+                }
         }
 
         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),