From 9f604f5da06a2aa428388905ca9158b128e5016c Mon Sep 17 00:00:00 2001 From: Eric Mei Date: Wed, 3 Mar 2010 17:04:36 -0800 Subject: [PATCH] b=21106 retry page_gang_lookup. If client cached a huge number of page, cl_lock_page_out() may hog CPU for too long time without releasing, especially with high debug grade. This patch break this process with voluntary resched. r=jay r=wangdi --- lustre/include/cl_object.h | 3 ++- lustre/obdclass/cl_lock.c | 19 ++++++++++--------- lustre/obdclass/cl_page.c | 40 +++++++++++++++++++++++++++++----------- lustre/osc/osc_lock.c | 3 ++- 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 5081a74..58ef32a 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -2661,7 +2661,8 @@ void cl_page_gang_lookup(const struct lu_env *env, struct cl_io *io, pgoff_t start, pgoff_t end, struct cl_page_list *plist, - int nonblock); + int nonblock, + int *resched); struct cl_page *cl_page_find (const struct lu_env *env, struct cl_object *obj, pgoff_t idx, struct page *vmpage, diff --git a/lustre/obdclass/cl_lock.c b/lustre/obdclass/cl_lock.c index 575a54d..6535e3a 100644 --- a/lustre/obdclass/cl_lock.c +++ b/lustre/obdclass/cl_lock.c @@ -1927,6 +1927,7 @@ int cl_lock_page_out(const struct lu_env *env, struct cl_lock *lock, struct cl_2queue *queue = &info->clt_queue; struct cl_lock_descr *descr = &lock->cll_descr; long page_count; + int nonblock = 1, resched; int result; LINVRNT(cl_lock_invariant(env, lock)); @@ -1934,13 +1935,14 @@ int cl_lock_page_out(const struct lu_env *env, struct cl_lock *lock, io->ci_obj = cl_object_top(descr->cld_obj); result = cl_io_init(env, io, CIT_MISC, io->ci_obj); - if (result == 0) { - int nonblock = 1; + if (result != 0) + GOTO(out, result); -restart: + do { cl_2queue_init(queue); cl_page_gang_lookup(env, descr->cld_obj, io, descr->cld_start, - descr->cld_end, &queue->c2_qin, nonblock); + descr->cld_end, &queue->c2_qin, nonblock, + &resched); page_count = queue->c2_qin.pl_nr; if (page_count > 0) { result = cl_page_list_unmap(env, io, &queue->c2_qin); @@ -1963,11 +1965,10 @@ restart: } cl_2queue_fini(env, queue); - if (nonblock) { - nonblock = 0; - goto restart; - } - } + if (resched) + cfs_cond_resched(); + } while (resched || nonblock--); +out: cl_io_fini(env, io); RETURN(result); } diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index 603184e..1f9aedd 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -181,12 +181,17 @@ EXPORT_SYMBOL(cl_page_lookup); /** * Returns a list of pages by a given [start, end] of \a obj. * + * \param resched If not NULL, then we give up before hogging CPU for too + * long and set *resched = 1, in that case caller should implement a retry + * logic. + * * Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely * crucial in the face of [offset, EOF] locks. */ void cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, struct cl_io *io, pgoff_t start, pgoff_t end, - struct cl_page_list *queue, int nonblock) + struct cl_page_list *queue, int nonblock, + int *resched) { struct cl_object_header *hdr; struct cl_page *page; @@ -202,6 +207,8 @@ void cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, struct cl_page *pg); ENTRY; + if (resched != NULL) + *resched = 0; page_own = nonblock ? cl_page_own_try : cl_page_own; idx = start; @@ -266,6 +273,10 @@ void cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj, cfs_spin_lock(&hdr->coh_page_guard); if (nr < CLT_PVEC_SIZE) break; + if (resched != NULL && cfs_need_resched()) { + *resched = 1; + break; + } } cfs_spin_unlock(&hdr->coh_page_guard); EXIT; @@ -1477,6 +1488,7 @@ int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj) struct cl_object *obj = cl_object_top(clobj); struct cl_io *io; struct cl_page_list *plist; + int resched; int result; ENTRY; @@ -1495,16 +1507,22 @@ int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj) RETURN(io->ci_result); } - cl_page_list_init(plist); - cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF, plist, 0); - /* - * Since we're purging the pages of an object, we don't care - * the possible outcomes of the following functions. - */ - cl_page_list_unmap(env, io, plist); - cl_page_list_discard(env, io, plist); - cl_page_list_disown(env, io, plist); - cl_page_list_fini(env, plist); + do { + cl_page_list_init(plist); + cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF, plist, 0, + &resched); + /* + * Since we're purging the pages of an object, we don't care + * the possible outcomes of the following functions. + */ + cl_page_list_unmap(env, io, plist); + cl_page_list_discard(env, io, plist); + cl_page_list_disown(env, io, plist); + cl_page_list_fini(env, plist); + + if (resched) + cfs_cond_resched(); + } while (resched); cl_io_fini(env, io); RETURN(result); diff --git a/lustre/osc/osc_lock.c b/lustre/osc/osc_lock.c index b96b962..bc552de 100644 --- a/lustre/osc/osc_lock.c +++ b/lustre/osc/osc_lock.c @@ -1411,7 +1411,8 @@ static int osc_lock_has_pages(struct osc_lock *olck) io->ci_obj = cl_object_top(obj); cl_io_init(env, io, CIT_MISC, io->ci_obj); cl_page_gang_lookup(env, obj, io, - descr->cld_start, descr->cld_end, plist, 0); + descr->cld_start, descr->cld_end, plist, 0, + NULL); cl_lock_page_list_fixup(env, io, lock, plist); if (plist->pl_nr > 0) { CL_LOCK_DEBUG(D_ERROR, env, lock, "still has pages\n"); -- 1.8.3.1