Whamcloud - gitweb
LU-481 Don't store 'transient' page in radix tree
authorNiu Yawei <niu@whamcloud.com>
Fri, 8 Jul 2011 06:44:00 +0000 (23:44 -0700)
committerOleg Drokin <green@whamcloud.com>
Wed, 13 Jul 2011 17:28:07 +0000 (10:28 -0700)
- We currently store both 'transient' page and inode page in the
  same radix tree, which will cause trouble for the race handling
  of concurrent dio and buffered read, imagine the following case:

  dio created a 'transient' page for a given file offset in the
  radix tree, while a concurrent buffered read on the same offset
  happened, the reader will try to find the exsting cached page by
  searching the radix tree, however, the 'transient' page is found,
  and the read will fail for -EBUSY at the end.

  To make the situation worse, there are two level of radix trees for
  a give file page (object and sub-object), above race can happen
  in both levels and we have to make sure page type consistence
  between these two levels.

  Actually, it doesn't make sense to store these disposable 'transient'
  page in the radix tree, so we just remove them in this patch.

- In cl_page_alloc(), if the coo_page_init() fails, we should call
  cl_page_delete0() to break the linkage between vmpage and cl_page
  before calling cl_page_free().

Signed-off-by: Niu Yawei <niu@whamcloud.com>
Change-Id: If2fa85495e6e78b330571b3348ac55a7796a4a9f
Reviewed-on: http://review.whamcloud.com/1072
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Jinshan Xiong <jay@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lov/lov_page.c
lustre/obdclass/cl_page.c

index f850589..deb06af 100644 (file)
@@ -184,19 +184,9 @@ struct cl_page *lov_page_init_raid0(const struct lu_env *env,
                 lpg->lps_invalid = 0;
                 result = NULL;
         } else {
-                /*
-                 * This is only possible when TRANSIENT page
-                 * is being created, and CACHEABLE sub-page
-                 * (attached to already existing top-page) has
-                 * been found. Tell cl_page_find() to use
-                 * existing page.
-                 */
-                LASSERT(subpage->cp_type == CPT_CACHEABLE);
-                LASSERT(page->cp_type == CPT_TRANSIENT);
-                /* TODO: this is problematic, what if the page is being freed? */
-                result = cl_page_top(subpage);
-                cl_page_get(result);
-                cl_page_put(env, subpage);
+                CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n");
+                CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n");
+                LASSERT(0);
         }
 
         EXIT;
index 926035e..0a466e9 100644 (file)
@@ -219,14 +219,12 @@ void cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
                 for (i = 0, j = 0; i < nr; ++i) {
                         page = pvec[i];
                         pvec[i] = NULL;
+
+                        LASSERT(page->cp_type == CPT_CACHEABLE);
                         if (page->cp_index > end)
                                 break;
                         if (page->cp_state == CPS_FREEING)
                                 continue;
-                        if (page->cp_type == CPT_TRANSIENT) {
-                                /* God, we found a transient page!*/
-                                continue;
-                        }
 
                         slice = cl_page_at_trusted(page, dtype);
                         /*
@@ -357,8 +355,7 @@ static int cl_page_alloc(const struct lu_env *env, struct cl_object *o,
                                 err = o->co_ops->coo_page_init(env, o,
                                                                page, vmpage);
                                 if (err != NULL) {
-                                        cl_page_state_set_trust(page,
-                                                                CPS_FREEING);
+                                        cl_page_delete0(env, page, 0);
                                         cl_page_free(env, page);
                                         page = err;
                                         break;
@@ -398,7 +395,7 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                                      enum cl_page_type type,
                                      struct cl_page *parent)
 {
-        struct cl_page          *page;
+        struct cl_page          *page = NULL;
         struct cl_page          *ghost = NULL;
         struct cl_object_header *hdr;
         struct cl_site          *site = cl_object_site(o);
@@ -431,11 +428,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                              cl_page_vmpage(env, page) == vmpage &&
                              (void *)radix_tree_lookup(&hdr->coh_tree,
                                                        idx) == page));
-        } else {
-                cfs_spin_lock(&hdr->coh_page_guard);
-                page = cl_page_lookup(hdr, idx);
-                cfs_spin_unlock(&hdr->coh_page_guard);
         }
+
         if (page != NULL) {
                 cfs_atomic_inc(&site->cs_pages.cs_hit);
                 RETURN(page);
@@ -445,6 +439,16 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
         err = cl_page_alloc(env, o, idx, vmpage, type, &page);
         if (err != 0)
                 RETURN(page);
+
+        if (type == CPT_TRANSIENT) {
+                if (parent) {
+                        LASSERT(page->cp_parent == NULL);
+                        page->cp_parent = parent;
+                        parent->cp_child = page;
+                }
+                RETURN(page);
+        }
+
         /*
          * XXX optimization: use radix_tree_preload() here, and change tree
          * gfp mask to GFP_KERNEL in cl_object_header_init().
@@ -467,27 +471,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
                  *     which is very useful during diagnosing and debugging.
                  */
                 page = ERR_PTR(err);
-                if (err == -EEXIST) {
-                        /*
-                         * XXX in case of a lookup for CPT_TRANSIENT page,
-                         * nothing protects a CPT_CACHEABLE page from being
-                         * concurrently moved into CPS_FREEING state.
-                         */
-                        page = cl_page_lookup(hdr, idx);
-                        PASSERT(env, page, page != NULL);
-                        if (page->cp_type == CPT_TRANSIENT &&
-                            type == CPT_CACHEABLE) {
-                                /* XXX: We should make sure that inode sem
-                                 * keeps being held in the lifetime of
-                                 * transient pages, so it is impossible to
-                                 * have conflicting transient pages.
-                                 */
-                                cfs_spin_unlock(&hdr->coh_page_guard);
-                                cl_page_put(env, page);
-                                cfs_spin_lock(&hdr->coh_page_guard);
-                                page = ERR_PTR(-EBUSY);
-                        }
-                }
+                CL_PAGE_DEBUG(D_ERROR, env, ghost,
+                              "fail to insert into radix tree: %d\n", err);
         } else {
                 if (parent) {
                         LASSERT(page->cp_parent == NULL);
@@ -1159,23 +1144,25 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
         cl_page_export(env, pg, 0);
         cl_page_state_set0(env, pg, CPS_FREEING);
 
-        if (!radix)
-                /*
-                 * !radix means that @pg is not yet in the radix tree, skip
-                 * removing it.
-                 */
-                tmp = pg->cp_child;
-        for (; tmp != NULL; tmp = tmp->cp_child) {
-                void                    *value;
-                struct cl_object_header *hdr;
-
-                hdr = cl_object_header(tmp->cp_obj);
-                cfs_spin_lock(&hdr->coh_page_guard);
-                value = radix_tree_delete(&hdr->coh_tree, tmp->cp_index);
-                PASSERT(env, tmp, value == tmp);
-                PASSERT(env, tmp, hdr->coh_pages > 0);
-                hdr->coh_pages--;
-                cfs_spin_unlock(&hdr->coh_page_guard);
+        if (tmp->cp_type == CPT_CACHEABLE) {
+                if (!radix)
+                        /* !radix means that @pg is not yet in the radix tree,
+                         * skip removing it.
+                         */
+                        tmp = pg->cp_child;
+                for (; tmp != NULL; tmp = tmp->cp_child) {
+                        void                    *value;
+                        struct cl_object_header *hdr;
+
+                        hdr = cl_object_header(tmp->cp_obj);
+                        cfs_spin_lock(&hdr->coh_page_guard);
+                        value = radix_tree_delete(&hdr->coh_tree,
+                                                  tmp->cp_index);
+                        PASSERT(env, tmp, value == tmp);
+                        PASSERT(env, tmp, hdr->coh_pages > 0);
+                        hdr->coh_pages--;
+                        cfs_spin_unlock(&hdr->coh_page_guard);
+                }
         }
 
         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),