From 110d8d4952a9de607cf21f648d75e0b05ef0cee1 Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Mon, 4 Nov 2013 19:39:58 -0800 Subject: [PATCH] LU-3321 obdclass: Add a preallocated percpu cl_env This change adds support for a single preallocated cl_env per CPU which can be used in circumstances where reschedule is not possible. Currently this interface is only used by the ll_releasepage function. Signed-off-by: Jinshan Xiong Signed-off-by: Prakash Surya Change-Id: I14a06294f0c2caae8806d7da134a8076f75ddc81 Reviewed-on: http://review.whamcloud.com/8174 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Bobi Jam Reviewed-by: Oleg Drokin --- libcfs/include/libcfs/user-prim.h | 13 +++++ lustre/include/cl_object.h | 11 ++++ lustre/llite/rw26.c | 97 ++++++++++++++++++--------------- lustre/obdclass/cl_lock.c | 1 - lustre/obdclass/cl_object.c | 110 +++++++++++++++++++++++++++++++++++++- lustre/obdclass/cl_page.c | 1 - 6 files changed, 187 insertions(+), 46 deletions(-) diff --git a/libcfs/include/libcfs/user-prim.h b/libcfs/include/libcfs/user-prim.h index f073ba5..33e0f62 100644 --- a/libcfs/include/libcfs/user-prim.h +++ b/libcfs/include/libcfs/user-prim.h @@ -71,6 +71,19 @@ typedef struct proc_dir_entry cfs_proc_dir_entry_t; #ifndef num_possible_cpus # define num_possible_cpus() 1 #endif +#ifndef get_cpu +# define get_cpu() 0 +#endif +#ifndef put_cpu +# define put_cpu() do {} while (0) +#endif +#ifndef NR_CPUS +# define NR_CPUS 1 +#endif +#ifndef for_each_possible_cpu +# define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) +#endif + /* * Wait Queue. */ diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index f57e2ac..6c9bdf0 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -2769,6 +2769,15 @@ static inline void *cl_object_page_slice(struct cl_object *clob, return (void *)((char *)page + clob->co_slice_off); } +/** + * Return refcount of cl_object. + */ +static inline int cl_object_refc(struct cl_object *clob) +{ + struct lu_object_header *header = clob->co_lu.lo_header; + return cfs_atomic_read(&header->loh_ref); +} + /** @} cl_object */ /** \defgroup cl_page cl_page @@ -3250,6 +3259,8 @@ void cl_env_reexit (void *cookie); void cl_env_implant (struct lu_env *env, int *refcheck); void cl_env_unplant (struct lu_env *env, int *refcheck); unsigned cl_env_cache_purge(unsigned nr); +struct lu_env *cl_env_percpu_get (void); +void cl_env_percpu_put (struct lu_env *env); /** @} cl_env */ diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 0a4b9a9..18098dc 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -118,49 +118,62 @@ static void ll_invalidatepage(struct page *vmpage, unsigned long offset) #endif static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask) { - struct cl_env_nest nest; - struct lu_env *env; - struct cl_object *obj; - struct cl_page *page; - struct address_space *mapping; - int result; + struct lu_env *env; + void *cookie; + struct cl_object *obj; + struct cl_page *page; + struct address_space *mapping; + int result = 0; + + LASSERT(PageLocked(vmpage)); + if (PageWriteback(vmpage) || PageDirty(vmpage)) + return 0; + + mapping = vmpage->mapping; + if (mapping == NULL) + return 1; + + obj = ll_i2info(mapping->host)->lli_clob; + if (obj == NULL) + return 1; + + /* 1 for caller, 1 for cl_page and 1 for page cache */ + if (page_count(vmpage) > 3) + return 0; + + page = cl_vmpage_page(vmpage, obj); + if (page == NULL) + return 1; + + cookie = cl_env_reenter(); + env = cl_env_percpu_get(); + LASSERT(!IS_ERR(env)); + + if (!cl_page_in_use(page)) { + result = 1; + cl_page_delete(env, page); + } - LASSERT(PageLocked(vmpage)); - if (PageWriteback(vmpage) || PageDirty(vmpage)) - return 0; - - mapping = vmpage->mapping; - if (mapping == NULL) - return 1; - - obj = ll_i2info(mapping->host)->lli_clob; - if (obj == NULL) - return 1; - - /* 1 for page allocator, 1 for cl_page and 1 for page cache */ - if (page_count(vmpage) > 3) - return 0; - - /* TODO: determine what gfp should be used by @gfp_mask. */ - env = cl_env_nested_get(&nest); - if (IS_ERR(env)) - /* If we can't allocate an env we won't call cl_page_put() - * later on which further means it's impossible to drop - * page refcount by cl_page, so ask kernel to not free - * this page. */ - return 0; - - page = cl_vmpage_page(vmpage, obj); - result = page == NULL; - if (page != NULL) { - if (!cl_page_in_use(page)) { - result = 1; - cl_page_delete(env, page); - } - cl_page_put(env, page); - } - cl_env_nested_put(&nest, env); - return result; + /* To use percpu env array, the call path can not be rescheduled; + * otherwise percpu array will be messed if ll_releaspage() called + * again on the same CPU. + * + * If this page holds the last refc of cl_object, the following + * call path may cause reschedule: + * cl_page_put -> cl_page_free -> cl_object_put -> + * lu_object_put -> lu_object_free -> lov_delete_raid0 -> + * cl_locks_prune. + * + * However, the kernel can't get rid of this inode until all pages have + * been cleaned up. Now that we hold page lock here, it's pretty safe + * that we won't get into object delete path. + */ + LASSERT(cl_object_refc(obj) > 1); + cl_page_put(env, page); + + cl_env_percpu_put(env); + cl_env_reexit(cookie); + return result; } static int ll_set_page_dirty(struct page *vmpage) diff --git a/lustre/obdclass/cl_lock.c b/lustre/obdclass/cl_lock.c index a2a1624..2a71cc3 100644 --- a/lustre/obdclass/cl_lock.c +++ b/lustre/obdclass/cl_lock.c @@ -268,7 +268,6 @@ static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock) ENTRY; cl_lock_trace(D_DLMTRACE, env, "free lock", lock); - might_sleep(); while (!cfs_list_empty(&lock->cll_layers)) { struct cl_lock_slice *slice; diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c index 6ad9b0a..78bc76c 100644 --- a/lustre/obdclass/cl_object.c +++ b/lustre/obdclass/cl_object.c @@ -426,6 +426,8 @@ int cache_stats_print(const struct cache_stats *cs, return nob; } +static void cl_env_percpu_refill(void); + /** * Initialize client site. * @@ -445,8 +447,9 @@ int cl_site_init(struct cl_site *s, struct cl_device *d) cfs_atomic_set(&s->cs_pages_state[0], 0); for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i) cfs_atomic_set(&s->cs_locks_state[i], 0); - } - return result; + cl_env_percpu_refill(); + } + return result; } EXPORT_SYMBOL(cl_site_init); @@ -1112,6 +1115,103 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb) } EXPORT_SYMBOL(cl_lvb2attr); +static struct cl_env cl_env_percpu[NR_CPUS]; + +static int cl_env_percpu_init(void) +{ + struct cl_env *cle; + int tags = LCT_REMEMBER | LCT_NOREF; + int i, j; + int rc = 0; + + for_each_possible_cpu(i) { + struct lu_env *env; + + cle = &cl_env_percpu[i]; + env = &cle->ce_lu; + + CFS_INIT_LIST_HEAD(&cle->ce_linkage); + cle->ce_magic = &cl_env_init0; + rc = lu_env_init(env, LCT_CL_THREAD | tags); + if (rc == 0) { + rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags); + if (rc == 0) { + lu_context_enter(&cle->ce_ses); + env->le_ses = &cle->ce_ses; + } else { + lu_env_fini(env); + } + } + if (rc != 0) + break; + } + if (rc != 0) { + /* Indices 0 to i (excluding i) were correctly initialized, + * thus we must uninitialize up to i, the rest are undefined. */ + for (j = 0; j < i; j++) { + cle = &cl_env_percpu[i]; + lu_context_exit(&cle->ce_ses); + lu_context_fini(&cle->ce_ses); + lu_env_fini(&cle->ce_lu); + } + } + + return rc; +} + +static void cl_env_percpu_fini(void) +{ + int i; + + for_each_possible_cpu(i) { + struct cl_env *cle = &cl_env_percpu[i]; + + lu_context_exit(&cle->ce_ses); + lu_context_fini(&cle->ce_ses); + lu_env_fini(&cle->ce_lu); + } +} + +static void cl_env_percpu_refill(void) +{ + int i; + + for_each_possible_cpu(i) + lu_env_refill(&cl_env_percpu[i].ce_lu); +} + +void cl_env_percpu_put(struct lu_env *env) +{ + struct cl_env *cle; + int cpu; + + cpu = smp_processor_id(); + cle = cl_env_container(env); + LASSERT(cle == &cl_env_percpu[cpu]); + + cle->ce_ref--; + LASSERT(cle->ce_ref == 0); + + CL_ENV_DEC(busy); + cl_env_detach(cle); + cle->ce_debug = NULL; + + put_cpu(); +} +EXPORT_SYMBOL(cl_env_percpu_put); + +struct lu_env *cl_env_percpu_get() +{ + struct cl_env *cle; + + cle = &cl_env_percpu[get_cpu()]; + cl_env_init0(cle, __builtin_return_address(0)); + + cl_env_attach(cle); + return &cle->ce_lu; +} +EXPORT_SYMBOL(cl_env_percpu_get); + /***************************************************************************** * * Temporary prototype thing: mirror obd-devices into cl devices. @@ -1267,6 +1367,11 @@ int cl_global_init(void) if (result) goto out_lock; + result = cl_env_percpu_init(); + if (result) + /* no cl_env_percpu_fini on error */ + goto out_lock; + return 0; out_lock: cl_lock_fini(); @@ -1284,6 +1389,7 @@ out_store: */ void cl_global_fini(void) { + cl_env_percpu_fini(); cl_lock_fini(); cl_page_fini(); lu_context_key_degister(&cl_key); diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index a8f46ea..0f02723 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -158,7 +158,6 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page) PASSERT(env, page, page->cp_state == CPS_FREEING); ENTRY; - might_sleep(); while (!cfs_list_empty(&page->cp_layers)) { struct cl_page_slice *slice; -- 1.8.3.1