From 9e213f7975423b69eae06b1e561516e6b26a2c72 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Thu, 7 Jul 2011 23:44:00 -0700 Subject: [PATCH] LU-481 Don't store 'transient' page in radix tree - We currently store both 'transient' page and inode page in the same radix tree, which will cause trouble for the race handling of concurrent dio and buffered read, imagine the following case: dio created a 'transient' page for a given file offset in the radix tree, while a concurrent buffered read on the same offset happened, the reader will try to find the exsting cached page by searching the radix tree, however, the 'transient' page is found, and the read will fail for -EBUSY at the end. To make the situation worse, there are two level of radix trees for a give file page (object and sub-object), above race can happen in both levels and we have to make sure page type consistence between these two levels. Actually, it doesn't make sense to store these disposable 'transient' page in the radix tree, so we just remove them in this patch. - In cl_page_alloc(), if the coo_page_init() fails, we should call cl_page_delete0() to break the linkage between vmpage and cl_page before calling cl_page_free(). Signed-off-by: Niu Yawei Change-Id: If2fa85495e6e78b330571b3348ac55a7796a4a9f Reviewed-on: http://review.whamcloud.com/1072 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Jinshan Xiong Reviewed-by: Oleg Drokin --- lustre/lov/lov_page.c | 16 ++------- lustre/obdclass/cl_page.c | 85 ++++++++++++++++++++--------------------------- 2 files changed, 39 insertions(+), 62 deletions(-) diff --git a/lustre/lov/lov_page.c b/lustre/lov/lov_page.c index f850589..deb06af 100644 --- a/lustre/lov/lov_page.c +++ b/lustre/lov/lov_page.c @@ -184,19 +184,9 @@ struct cl_page *lov_page_init_raid0(const struct lu_env *env, lpg->lps_invalid = 0; result = NULL; } else { - /* - * This is only possible when TRANSIENT page - * is being created, and CACHEABLE sub-page - * (attached to already existing top-page) has - * been found. Tell cl_page_find() to use - * existing page. - */ - LASSERT(subpage->cp_type == CPT_CACHEABLE); - LASSERT(page->cp_type == CPT_TRANSIENT); - /* TODO: this is problematic, what if the page is being freed? */ - result = cl_page_top(subpage); - cl_page_get(result); - cl_page_put(env, subpage); + CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n"); + CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n"); + LASSERT(0); } EXIT; diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index 926035e..0a466e9 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -219,14 +219,12 @@ void cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, for (i = 0, j = 0; i < nr; ++i) { page = pvec[i]; pvec[i] = NULL; + + LASSERT(page->cp_type == CPT_CACHEABLE); if (page->cp_index > end) break; if (page->cp_state == CPS_FREEING) continue; - if (page->cp_type == CPT_TRANSIENT) { - /* God, we found a transient page!*/ - continue; - } slice = cl_page_at_trusted(page, dtype); /* @@ -357,8 +355,7 @@ static int cl_page_alloc(const struct lu_env *env, struct cl_object *o, err = o->co_ops->coo_page_init(env, o, page, vmpage); if (err != NULL) { - cl_page_state_set_trust(page, - CPS_FREEING); + cl_page_delete0(env, page, 0); cl_page_free(env, page); page = err; break; @@ -398,7 +395,7 @@ static struct cl_page *cl_page_find0(const struct lu_env *env, enum cl_page_type type, struct cl_page *parent) { - struct cl_page *page; + struct cl_page *page = NULL; struct cl_page *ghost = NULL; struct cl_object_header *hdr; struct cl_site *site = cl_object_site(o); @@ -431,11 +428,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env, cl_page_vmpage(env, page) == vmpage && (void *)radix_tree_lookup(&hdr->coh_tree, idx) == page)); - } else { - cfs_spin_lock(&hdr->coh_page_guard); - page = cl_page_lookup(hdr, idx); - cfs_spin_unlock(&hdr->coh_page_guard); } + if (page != NULL) { cfs_atomic_inc(&site->cs_pages.cs_hit); RETURN(page); @@ -445,6 +439,16 @@ static struct cl_page *cl_page_find0(const struct lu_env *env, err = cl_page_alloc(env, o, idx, vmpage, type, &page); if (err != 0) RETURN(page); + + if (type == CPT_TRANSIENT) { + if (parent) { + LASSERT(page->cp_parent == NULL); + page->cp_parent = parent; + parent->cp_child = page; + } + RETURN(page); + } + /* * XXX optimization: use radix_tree_preload() here, and change tree * gfp mask to GFP_KERNEL in cl_object_header_init(). @@ -467,27 +471,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env, * which is very useful during diagnosing and debugging. */ page = ERR_PTR(err); - if (err == -EEXIST) { - /* - * XXX in case of a lookup for CPT_TRANSIENT page, - * nothing protects a CPT_CACHEABLE page from being - * concurrently moved into CPS_FREEING state. - */ - page = cl_page_lookup(hdr, idx); - PASSERT(env, page, page != NULL); - if (page->cp_type == CPT_TRANSIENT && - type == CPT_CACHEABLE) { - /* XXX: We should make sure that inode sem - * keeps being held in the lifetime of - * transient pages, so it is impossible to - * have conflicting transient pages. - */ - cfs_spin_unlock(&hdr->coh_page_guard); - cl_page_put(env, page); - cfs_spin_lock(&hdr->coh_page_guard); - page = ERR_PTR(-EBUSY); - } - } + CL_PAGE_DEBUG(D_ERROR, env, ghost, + "fail to insert into radix tree: %d\n", err); } else { if (parent) { LASSERT(page->cp_parent == NULL); @@ -1159,23 +1144,25 @@ static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg, cl_page_export(env, pg, 0); cl_page_state_set0(env, pg, CPS_FREEING); - if (!radix) - /* - * !radix means that @pg is not yet in the radix tree, skip - * removing it. - */ - tmp = pg->cp_child; - for (; tmp != NULL; tmp = tmp->cp_child) { - void *value; - struct cl_object_header *hdr; - - hdr = cl_object_header(tmp->cp_obj); - cfs_spin_lock(&hdr->coh_page_guard); - value = radix_tree_delete(&hdr->coh_tree, tmp->cp_index); - PASSERT(env, tmp, value == tmp); - PASSERT(env, tmp, hdr->coh_pages > 0); - hdr->coh_pages--; - cfs_spin_unlock(&hdr->coh_page_guard); + if (tmp->cp_type == CPT_CACHEABLE) { + if (!radix) + /* !radix means that @pg is not yet in the radix tree, + * skip removing it. + */ + tmp = pg->cp_child; + for (; tmp != NULL; tmp = tmp->cp_child) { + void *value; + struct cl_object_header *hdr; + + hdr = cl_object_header(tmp->cp_obj); + cfs_spin_lock(&hdr->coh_page_guard); + value = radix_tree_delete(&hdr->coh_tree, + tmp->cp_index); + PASSERT(env, tmp, value == tmp); + PASSERT(env, tmp, hdr->coh_pages > 0); + hdr->coh_pages--; + cfs_spin_unlock(&hdr->coh_page_guard); + } } CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete), -- 1.8.3.1