- We currently store both 'transient' page and inode page in the
same radix tree, which will cause trouble for the race handling
of concurrent dio and buffered read, imagine the following case:
dio created a 'transient' page for a given file offset in the
radix tree, while a concurrent buffered read on the same offset
happened, the reader will try to find the exsting cached page by
searching the radix tree, however, the 'transient' page is found,
and the read will fail for -EBUSY at the end.
To make the situation worse, there are two level of radix trees for
a give file page (object and sub-object), above race can happen
in both levels and we have to make sure page type consistence
between these two levels.
Actually, it doesn't make sense to store these disposable 'transient'
page in the radix tree, so we just remove them in this patch.
- In cl_page_alloc(), if the coo_page_init() fails, we should call
cl_page_delete0() to break the linkage between vmpage and cl_page
before calling cl_page_free().
Signed-off-by: Niu Yawei <niu@whamcloud.com>
Change-Id: If2fa85495e6e78b330571b3348ac55a7796a4a9f
Reviewed-on: http://review.whamcloud.com/1072
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Jinshan Xiong <jay@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lpg->lps_invalid = 0;
result = NULL;
} else {
lpg->lps_invalid = 0;
result = NULL;
} else {
- /*
- * This is only possible when TRANSIENT page
- * is being created, and CACHEABLE sub-page
- * (attached to already existing top-page) has
- * been found. Tell cl_page_find() to use
- * existing page.
- */
- LASSERT(subpage->cp_type == CPT_CACHEABLE);
- LASSERT(page->cp_type == CPT_TRANSIENT);
- /* TODO: this is problematic, what if the page is being freed? */
- result = cl_page_top(subpage);
- cl_page_get(result);
- cl_page_put(env, subpage);
+ CL_PAGE_DEBUG(D_ERROR, env, page, "parent page\n");
+ CL_PAGE_DEBUG(D_ERROR, env, subpage, "child page\n");
+ LASSERT(0);
for (i = 0, j = 0; i < nr; ++i) {
page = pvec[i];
pvec[i] = NULL;
for (i = 0, j = 0; i < nr; ++i) {
page = pvec[i];
pvec[i] = NULL;
+
+ LASSERT(page->cp_type == CPT_CACHEABLE);
if (page->cp_index > end)
break;
if (page->cp_state == CPS_FREEING)
continue;
if (page->cp_index > end)
break;
if (page->cp_state == CPS_FREEING)
continue;
- if (page->cp_type == CPT_TRANSIENT) {
- /* God, we found a transient page!*/
- continue;
- }
slice = cl_page_at_trusted(page, dtype);
/*
slice = cl_page_at_trusted(page, dtype);
/*
err = o->co_ops->coo_page_init(env, o,
page, vmpage);
if (err != NULL) {
err = o->co_ops->coo_page_init(env, o,
page, vmpage);
if (err != NULL) {
- cl_page_state_set_trust(page,
- CPS_FREEING);
+ cl_page_delete0(env, page, 0);
cl_page_free(env, page);
page = err;
break;
cl_page_free(env, page);
page = err;
break;
enum cl_page_type type,
struct cl_page *parent)
{
enum cl_page_type type,
struct cl_page *parent)
{
+ struct cl_page *page = NULL;
struct cl_page *ghost = NULL;
struct cl_object_header *hdr;
struct cl_site *site = cl_object_site(o);
struct cl_page *ghost = NULL;
struct cl_object_header *hdr;
struct cl_site *site = cl_object_site(o);
cl_page_vmpage(env, page) == vmpage &&
(void *)radix_tree_lookup(&hdr->coh_tree,
idx) == page));
cl_page_vmpage(env, page) == vmpage &&
(void *)radix_tree_lookup(&hdr->coh_tree,
idx) == page));
- } else {
- cfs_spin_lock(&hdr->coh_page_guard);
- page = cl_page_lookup(hdr, idx);
- cfs_spin_unlock(&hdr->coh_page_guard);
if (page != NULL) {
cfs_atomic_inc(&site->cs_pages.cs_hit);
RETURN(page);
if (page != NULL) {
cfs_atomic_inc(&site->cs_pages.cs_hit);
RETURN(page);
err = cl_page_alloc(env, o, idx, vmpage, type, &page);
if (err != 0)
RETURN(page);
err = cl_page_alloc(env, o, idx, vmpage, type, &page);
if (err != 0)
RETURN(page);
+
+ if (type == CPT_TRANSIENT) {
+ if (parent) {
+ LASSERT(page->cp_parent == NULL);
+ page->cp_parent = parent;
+ parent->cp_child = page;
+ }
+ RETURN(page);
+ }
+
/*
* XXX optimization: use radix_tree_preload() here, and change tree
* gfp mask to GFP_KERNEL in cl_object_header_init().
/*
* XXX optimization: use radix_tree_preload() here, and change tree
* gfp mask to GFP_KERNEL in cl_object_header_init().
* which is very useful during diagnosing and debugging.
*/
page = ERR_PTR(err);
* which is very useful during diagnosing and debugging.
*/
page = ERR_PTR(err);
- if (err == -EEXIST) {
- /*
- * XXX in case of a lookup for CPT_TRANSIENT page,
- * nothing protects a CPT_CACHEABLE page from being
- * concurrently moved into CPS_FREEING state.
- */
- page = cl_page_lookup(hdr, idx);
- PASSERT(env, page, page != NULL);
- if (page->cp_type == CPT_TRANSIENT &&
- type == CPT_CACHEABLE) {
- /* XXX: We should make sure that inode sem
- * keeps being held in the lifetime of
- * transient pages, so it is impossible to
- * have conflicting transient pages.
- */
- cfs_spin_unlock(&hdr->coh_page_guard);
- cl_page_put(env, page);
- cfs_spin_lock(&hdr->coh_page_guard);
- page = ERR_PTR(-EBUSY);
- }
- }
+ CL_PAGE_DEBUG(D_ERROR, env, ghost,
+ "fail to insert into radix tree: %d\n", err);
} else {
if (parent) {
LASSERT(page->cp_parent == NULL);
} else {
if (parent) {
LASSERT(page->cp_parent == NULL);
cl_page_export(env, pg, 0);
cl_page_state_set0(env, pg, CPS_FREEING);
cl_page_export(env, pg, 0);
cl_page_state_set0(env, pg, CPS_FREEING);
- if (!radix)
- /*
- * !radix means that @pg is not yet in the radix tree, skip
- * removing it.
- */
- tmp = pg->cp_child;
- for (; tmp != NULL; tmp = tmp->cp_child) {
- void *value;
- struct cl_object_header *hdr;
-
- hdr = cl_object_header(tmp->cp_obj);
- cfs_spin_lock(&hdr->coh_page_guard);
- value = radix_tree_delete(&hdr->coh_tree, tmp->cp_index);
- PASSERT(env, tmp, value == tmp);
- PASSERT(env, tmp, hdr->coh_pages > 0);
- hdr->coh_pages--;
- cfs_spin_unlock(&hdr->coh_page_guard);
+ if (tmp->cp_type == CPT_CACHEABLE) {
+ if (!radix)
+ /* !radix means that @pg is not yet in the radix tree,
+ * skip removing it.
+ */
+ tmp = pg->cp_child;
+ for (; tmp != NULL; tmp = tmp->cp_child) {
+ void *value;
+ struct cl_object_header *hdr;
+
+ hdr = cl_object_header(tmp->cp_obj);
+ cfs_spin_lock(&hdr->coh_page_guard);
+ value = radix_tree_delete(&hdr->coh_tree,
+ tmp->cp_index);
+ PASSERT(env, tmp, value == tmp);
+ PASSERT(env, tmp, hdr->coh_pages > 0);
+ hdr->coh_pages--;
+ cfs_spin_unlock(&hdr->coh_page_guard);
+ }
}
CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),
}
CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_delete),