From f7a81d4797933d179f9955bb0821779d3ac9a8fe Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Mon, 6 May 2013 23:38:21 +0800 Subject: [PATCH] LU-3281 osc: some cleanup to reduce stack overflow chance ptlrpcd_add_req() will wake_up other process, do not hold a spinlock before calling ptlrpcd_queue_work()->ptlrpcd_add_req(). If current process is allocating memory, memory shrinker could get to osc_lru_del(), don't call osc_lru_shrink() further since it could lead a long calling chain. Use static string OES_STRINGS in OSC_EXTENT_DUMP() to reduce stack footprint. Alloc crattr on heap for osc_build_rpc() to reduce stack footprint. Signed-off-by: Bobi Jam Change-Id: I1a1ce0b46850773a2ae45ce16be6b708adc40ab8 Reviewed-on: http://review.whamcloud.com/6270 Tested-by: Hudson Reviewed-by: Jinshan Xiong Reviewed-by: Keith Mannthey Reviewed-by: Andreas Dilger Tested-by: Maloo --- lustre/osc/osc_cache.c | 46 ++++++++--------- lustre/osc/osc_cl_internal.h | 2 - lustre/osc/osc_page.c | 3 +- lustre/osc/osc_request.c | 116 +++++++++++++++++++++++-------------------- 4 files changed, 87 insertions(+), 80 deletions(-) diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 0d61628..0602242 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -99,22 +99,23 @@ static inline char list_empty_marker(cfs_list_t *list) #define EXTSTR "[%lu -> %lu/%lu]" #define EXTPARA(ext) (ext)->oe_start, (ext)->oe_end, (ext)->oe_max_end +static const char *oes_strings[] = { + "inv", "active", "cache", "locking", "lockdone", "rpc", "trunc", NULL }; #define OSC_EXTENT_DUMP(lvl, extent, fmt, ...) do { \ struct osc_extent *__ext = (extent); \ - const char *__str[] = OES_STRINGS; \ char __buf[16]; \ \ CDEBUG(lvl, \ "extent %p@{" EXTSTR ", " \ "[%d|%d|%c|%s|%s|%p], [%d|%d|%c|%c|%p|%u|%p]} " fmt, \ - /* ----- extent part 0 ----- */ \ + /* ----- extent part 0 ----- */ \ __ext, EXTPARA(__ext), \ /* ----- part 1 ----- */ \ cfs_atomic_read(&__ext->oe_refc), \ cfs_atomic_read(&__ext->oe_users), \ list_empty_marker(&__ext->oe_link), \ - __str[__ext->oe_state], ext_flags(__ext, __buf), \ + oes_strings[__ext->oe_state], ext_flags(__ext, __buf), \ __ext->oe_obj, \ /* ----- part 2 ----- */ \ __ext->oe_grants, __ext->oe_nr_pages, \ @@ -128,10 +129,10 @@ static inline char list_empty_marker(cfs_list_t *list) #undef EASSERTF #define EASSERTF(expr, ext, fmt, args...) do { \ if (!(expr)) { \ - OSC_EXTENT_DUMP(D_ERROR, (ext), fmt, ##args); \ - osc_extent_tree_dump(D_ERROR, (ext)->oe_obj); \ + OSC_EXTENT_DUMP(D_ERROR, (ext), fmt, ##args); \ + osc_extent_tree_dump(D_ERROR, (ext)->oe_obj); \ LASSERT(expr); \ - } \ + } \ } while (0) #undef EASSERT @@ -2124,27 +2125,24 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli, static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli, struct osc_object *osc, pdl_policy_t pol, int async) { - int has_rpcs = 1; int rc = 0; - client_obd_list_lock(&cli->cl_loi_list_lock); - if (osc != NULL) - has_rpcs = __osc_list_maint(cli, osc); - if (has_rpcs) { - if (!async) { - /* disable osc_lru_shrink() temporarily to avoid - * potential stack overrun problem. LU-2859 */ - cfs_atomic_inc(&cli->cl_lru_shrinkers); - osc_check_rpcs(env, cli, pol); - cfs_atomic_dec(&cli->cl_lru_shrinkers); - } else { - CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", - cli); - LASSERT(cli->cl_writeback_work != NULL); - rc = ptlrpcd_queue_work(cli->cl_writeback_work); - } + if (osc != NULL && osc_list_maint(cli, osc) == 0) + return 0; + + if (!async) { + /* disable osc_lru_shrink() temporarily to avoid + * potential stack overrun problem. LU-2859 */ + cfs_atomic_inc(&cli->cl_lru_shrinkers); + client_obd_list_lock(&cli->cl_loi_list_lock); + osc_check_rpcs(env, cli, pol); + client_obd_list_unlock(&cli->cl_loi_list_lock); + cfs_atomic_dec(&cli->cl_lru_shrinkers); + } else { + CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli); + LASSERT(cli->cl_writeback_work != NULL); + rc = ptlrpcd_queue_work(cli->cl_writeback_work); } - client_obd_list_unlock(&cli->cl_loi_list_lock); return rc; } diff --git a/lustre/osc/osc_cl_internal.h b/lustre/osc/osc_cl_internal.h index 9b1aca7..0dea7b5 100644 --- a/lustre/osc/osc_cl_internal.h +++ b/lustre/osc/osc_cl_internal.h @@ -596,8 +596,6 @@ enum osc_extent_state { OES_TRUNC = 6, /** being truncated */ OES_STATE_MAX }; -#define OES_STRINGS { "inv", "active", "cache", "locking", "lockdone", "rpc", \ - "trunc", NULL } /** * osc_extent data to manage dirty pages. diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index df51fc5..6e89354 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -809,7 +809,8 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del) * stealing one of them. * cl_lru_shrinkers is to avoid recursive call in case * we're already in the context of osc_lru_shrink(). */ - if (cfs_atomic_read(&cli->cl_lru_shrinkers) == 0) + if (cfs_atomic_read(&cli->cl_lru_shrinkers) == 0 && + !cfs_memory_pressure_get()) osc_lru_shrink(cli, osc_cache_too_much(cli)); cfs_waitq_signal(&osc_lru_waitq); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 31c1e39..f73f218 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2049,21 +2049,26 @@ static int brw_interpret(const struct lu_env *env, int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, cfs_list_t *ext_list, int cmd, pdl_policy_t pol) { - struct ptlrpc_request *req = NULL; - struct osc_extent *ext; + struct ptlrpc_request *req = NULL; + struct osc_extent *ext; + struct brw_page **pga = NULL; + struct osc_brw_async_args *aa = NULL; + struct obdo *oa = NULL; + struct osc_async_page *oap; + struct osc_async_page *tmp; + struct cl_req *clerq = NULL; + enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : + CRT_READ; + struct ldlm_lock *lock = NULL; + struct cl_req_attr *crattr = NULL; + obd_off starting_offset = OBD_OBJECT_EOF; + obd_off ending_offset = 0; + int mpflag = 0; + int mem_tight = 0; + int page_count = 0; + int i; + int rc; CFS_LIST_HEAD(rpc_list); - struct brw_page **pga = NULL; - struct osc_brw_async_args *aa = NULL; - struct obdo *oa = NULL; - struct osc_async_page *oap; - struct osc_async_page *tmp; - struct cl_req *clerq = NULL; - enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ; - struct ldlm_lock *lock = NULL; - struct cl_req_attr crattr; - obd_off starting_offset = OBD_OBJECT_EOF; - obd_off ending_offset = 0; - int i, rc, mpflag = 0, mem_tight = 0, page_count = 0; ENTRY; LASSERT(!cfs_list_empty(ext_list)); @@ -2091,7 +2096,10 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, if (mem_tight) mpflag = cfs_memory_pressure_get_and_set(); - memset(&crattr, 0, sizeof crattr); + OBD_ALLOC(crattr, sizeof(*crattr)); + if (crattr == NULL) + GOTO(out, rc = -ENOMEM); + OBD_ALLOC(pga, sizeof(*pga) * page_count); if (pga == NULL) GOTO(out, rc = -ENOMEM); @@ -2105,42 +2113,40 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, struct cl_page *page = oap2cl_page(oap); if (clerq == NULL) { clerq = cl_req_alloc(env, page, crt, - 1 /* only 1-object rpcs for - * now */); + 1 /* only 1-object rpcs for now */); if (IS_ERR(clerq)) GOTO(out, rc = PTR_ERR(clerq)); lock = oap->oap_ldlm_lock; } if (mem_tight) oap->oap_brw_flags |= OBD_BRW_MEMALLOC; - pga[i] = &oap->oap_brw_page; - pga[i]->off = oap->oap_obj_off + oap->oap_page_off; - CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", - pga[i]->pg, cfs_page_index(oap->oap_page), oap, pga[i]->flag); - i++; - cl_req_page_add(env, clerq, page); - } + pga[i] = &oap->oap_brw_page; + pga[i]->off = oap->oap_obj_off + oap->oap_page_off; + CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", + pga[i]->pg, cfs_page_index(oap->oap_page), oap, + pga[i]->flag); + i++; + cl_req_page_add(env, clerq, page); + } - /* always get the data for the obdo for the rpc */ + /* always get the data for the obdo for the rpc */ LASSERT(clerq != NULL); - crattr.cra_oa = oa; - crattr.cra_capa = NULL; - memset(crattr.cra_jobid, 0, JOBSTATS_JOBID_SIZE); - cl_req_attr_set(env, clerq, &crattr, ~0ULL); - if (lock) { - oa->o_handle = lock->l_remote_handle; - oa->o_valid |= OBD_MD_FLHANDLE; - } + crattr->cra_oa = oa; + cl_req_attr_set(env, clerq, crattr, ~0ULL); + if (lock) { + oa->o_handle = lock->l_remote_handle; + oa->o_valid |= OBD_MD_FLHANDLE; + } - rc = cl_req_prep(env, clerq); - if (rc != 0) { - CERROR("cl_req_prep failed: %d\n", rc); + rc = cl_req_prep(env, clerq); + if (rc != 0) { + CERROR("cl_req_prep failed: %d\n", rc); GOTO(out, rc); } sort_brw_pages(pga, page_count); rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count, - pga, &req, crattr.cra_capa, 1, 0); + pga, &req, crattr->cra_capa, 1, 0); if (rc != 0) { CERROR("prep_req failed: %d\n", rc); GOTO(out, rc); @@ -2148,17 +2154,17 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, req->rq_interpret_reply = brw_interpret; if (mem_tight != 0) - req->rq_memalloc = 1; + req->rq_memalloc = 1; - /* Need to update the timestamps after the request is built in case - * we race with setattr (locally or in queue at OST). If OST gets - * later setattr before earlier BRW (as determined by the request xid), - * the OST will not use BRW timestamps. Sadly, there is no obvious - * way to do this in a single call. bug 10150 */ - cl_req_attr_set(env, clerq, &crattr, - OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME); + /* Need to update the timestamps after the request is built in case + * we race with setattr (locally or in queue at OST). If OST gets + * later setattr before earlier BRW (as determined by the request xid), + * the OST will not use BRW timestamps. Sadly, there is no obvious + * way to do this in a single call. bug 10150 */ + cl_req_attr_set(env, clerq, crattr, + OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME); - lustre_msg_set_jobid(req->rq_reqmsg, crattr.cra_jobid); + lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid); CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); @@ -2225,16 +2231,20 @@ out: if (mem_tight != 0) cfs_memory_pressure_restore(mpflag); - capa_put(crattr.cra_capa); + if (crattr != NULL) { + capa_put(crattr->cra_capa); + OBD_FREE(crattr, sizeof(*crattr)); + } + if (rc != 0) { LASSERT(req == NULL); - if (oa) - OBDO_FREE(oa); - if (pga) - OBD_FREE(pga, sizeof(*pga) * page_count); - /* this should happen rarely and is pretty bad, it makes the - * pending list not follow the dirty order */ + if (oa) + OBDO_FREE(oa); + if (pga) + OBD_FREE(pga, sizeof(*pga) * page_count); + /* this should happen rarely and is pretty bad, it makes the + * pending list not follow the dirty order */ while (!cfs_list_empty(ext_list)) { ext = cfs_list_entry(ext_list->next, struct osc_extent, oe_link); -- 1.8.3.1