From c4f8279e40f9cbc6b266db19f239d8e175da089c Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Thu, 31 Jan 2013 16:12:42 +0400 Subject: [PATCH] LU-2722 clio: directIO thread races with completion thread ll_direct_IO_26()) ASSERTION( obj->cob_transient_pages == 0 ) failed The directio thread puts pages to the transfer in osc_io_submit(), and waits for its completion. Upon completion, osc_completion() is called, which notifies waiters IO is completed and only after that put the page. the original directIO thread wakes up and may succeed to assert on the last page which is not yet put by osc_completion(). Signed-off-by: Vitaly Fertman Change-Id: Ib912e913885a9ff2cb1e9c865ae2003a676c2b5b Xyratex-bug-id: MRP-838 Reviewed-on: http://review.whamcloud.com/5223 Reviewed-by: Jinshan Xiong Tested-by: Hudson Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Oleg Drokin --- lustre/obdclass/cl_page.c | 11 ++++++++++- lustre/osc/osc_cache.c | 11 +++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index b34555a..29a570c 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -1319,8 +1319,17 @@ void cl_page_completion(const struct lu_env *env, LASSERT(cl_page_is_vmlocked(env, pg)); LASSERT(pg->cp_sync_io == anchor); pg->cp_sync_io = NULL; + } + /* + * As page->cp_obj is pinned by a reference from page->cp_req, it is + * safe to call cl_page_put() without risking object destruction in a + * non-blocking context. + */ + cl_page_put(env, pg); + + if (anchor) cl_sync_io_note(anchor, ioret); - } + EXIT; } EXPORT_SYMBOL(cl_page_completion); diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index fc896eb..37b249f 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -1283,8 +1283,6 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, opg->ops_submit_time = 0; srvlock = oap->oap_brw_flags & OBD_BRW_SRVLOCK; - cl_page_completion(env, page, crt, rc); - /* statistic */ if (rc == 0 && srvlock) { struct lu_device *ld = opg->ops_cl.cpl_obj->co_lu.lo_dev; @@ -1303,12 +1301,9 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, * reference counter protects page from concurrent reclaim. */ lu_ref_del(&page->cp_reference, "transfer", page); - /* - * As page->cp_obj is pinned by a reference from page->cp_req, it is - * safe to call cl_page_put() without risking object destruction in a - * non-blocking context. - */ - cl_page_put(env, page); + + cl_page_completion(env, page, crt, rc); + RETURN(0); } -- 1.8.3.1