Whamcloud - gitweb
LU-3281 osc: some cleanup to reduce stack overflow chance
authorBobi Jam <bobijam.xu@intel.com>
Mon, 6 May 2013 15:38:21 +0000 (23:38 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 8 May 2013 18:12:04 +0000 (14:12 -0400)
ptlrpcd_add_req() will wake_up other process, do not hold a spinlock
before calling ptlrpcd_queue_work()->ptlrpcd_add_req().

If current process is allocating memory, memory shrinker could get to
osc_lru_del(), don't call osc_lru_shrink() further since it could
lead a long calling chain.

Use static string OES_STRINGS in OSC_EXTENT_DUMP() to reduce stack
footprint.

Alloc crattr on heap for osc_build_rpc() to reduce stack footprint.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: I1a1ce0b46850773a2ae45ce16be6b708adc40ab8
Reviewed-on: http://review.whamcloud.com/6270
Tested-by: Hudson
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Keith Mannthey <keith.mannthey@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
lustre/osc/osc_cache.c
lustre/osc/osc_cl_internal.h
lustre/osc/osc_page.c
lustre/osc/osc_request.c

index 0d61628..0602242 100644 (file)
@@ -99,22 +99,23 @@ static inline char list_empty_marker(cfs_list_t *list)
 
 #define EXTSTR       "[%lu -> %lu/%lu]"
 #define EXTPARA(ext) (ext)->oe_start, (ext)->oe_end, (ext)->oe_max_end
 
 #define EXTSTR       "[%lu -> %lu/%lu]"
 #define EXTPARA(ext) (ext)->oe_start, (ext)->oe_end, (ext)->oe_max_end
+static const char *oes_strings[] = {
+       "inv", "active", "cache", "locking", "lockdone", "rpc", "trunc", NULL };
 
 #define OSC_EXTENT_DUMP(lvl, extent, fmt, ...) do {                          \
        struct osc_extent *__ext = (extent);                                  \
 
 #define OSC_EXTENT_DUMP(lvl, extent, fmt, ...) do {                          \
        struct osc_extent *__ext = (extent);                                  \
-       const char *__str[] = OES_STRINGS;                                    \
        char __buf[16];                                                       \
                                                                              \
        CDEBUG(lvl,                                                           \
                "extent %p@{" EXTSTR ", "                                     \
                "[%d|%d|%c|%s|%s|%p], [%d|%d|%c|%c|%p|%u|%p]} " fmt,          \
        char __buf[16];                                                       \
                                                                              \
        CDEBUG(lvl,                                                           \
                "extent %p@{" EXTSTR ", "                                     \
                "[%d|%d|%c|%s|%s|%p], [%d|%d|%c|%c|%p|%u|%p]} " fmt,          \
-               /* ----- extent part 0 ----- */                               \
+               /* ----- extent part 0 ----- */                               \
                __ext, EXTPARA(__ext),                                        \
                /* ----- part 1 ----- */                                      \
                cfs_atomic_read(&__ext->oe_refc),                             \
                cfs_atomic_read(&__ext->oe_users),                            \
                list_empty_marker(&__ext->oe_link),                           \
                __ext, EXTPARA(__ext),                                        \
                /* ----- part 1 ----- */                                      \
                cfs_atomic_read(&__ext->oe_refc),                             \
                cfs_atomic_read(&__ext->oe_users),                            \
                list_empty_marker(&__ext->oe_link),                           \
-               __str[__ext->oe_state], ext_flags(__ext, __buf),              \
+               oes_strings[__ext->oe_state], ext_flags(__ext, __buf),        \
                __ext->oe_obj,                                                \
                /* ----- part 2 ----- */                                      \
                __ext->oe_grants, __ext->oe_nr_pages,                         \
                __ext->oe_obj,                                                \
                /* ----- part 2 ----- */                                      \
                __ext->oe_grants, __ext->oe_nr_pages,                         \
@@ -128,10 +129,10 @@ static inline char list_empty_marker(cfs_list_t *list)
 #undef EASSERTF
 #define EASSERTF(expr, ext, fmt, args...) do {                         \
        if (!(expr)) {                                                  \
 #undef EASSERTF
 #define EASSERTF(expr, ext, fmt, args...) do {                         \
        if (!(expr)) {                                                  \
-               OSC_EXTENT_DUMP(D_ERROR, (ext), fmt, ##args);            \
-               osc_extent_tree_dump(D_ERROR, (ext)->oe_obj);            \
+               OSC_EXTENT_DUMP(D_ERROR, (ext), fmt, ##args);           \
+               osc_extent_tree_dump(D_ERROR, (ext)->oe_obj);           \
                LASSERT(expr);                                          \
                LASSERT(expr);                                          \
-       }                                                                    \
+       }                                                               \
 } while (0)
 
 #undef EASSERT
 } while (0)
 
 #undef EASSERT
@@ -2124,27 +2125,24 @@ static void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli,
 static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
                          struct osc_object *osc, pdl_policy_t pol, int async)
 {
 static int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
                          struct osc_object *osc, pdl_policy_t pol, int async)
 {
-       int has_rpcs = 1;
        int rc = 0;
 
        int rc = 0;
 
-       client_obd_list_lock(&cli->cl_loi_list_lock);
-       if (osc != NULL)
-               has_rpcs = __osc_list_maint(cli, osc);
-       if (has_rpcs) {
-               if (!async) {
-                       /* disable osc_lru_shrink() temporarily to avoid
-                        * potential stack overrun problem. LU-2859 */
-                       cfs_atomic_inc(&cli->cl_lru_shrinkers);
-                       osc_check_rpcs(env, cli, pol);
-                       cfs_atomic_dec(&cli->cl_lru_shrinkers);
-               } else {
-                       CDEBUG(D_CACHE, "Queue writeback work for client %p.\n",
-                              cli);
-                       LASSERT(cli->cl_writeback_work != NULL);
-                       rc = ptlrpcd_queue_work(cli->cl_writeback_work);
-               }
+       if (osc != NULL && osc_list_maint(cli, osc) == 0)
+               return 0;
+
+       if (!async) {
+               /* disable osc_lru_shrink() temporarily to avoid
+                * potential stack overrun problem. LU-2859 */
+               cfs_atomic_inc(&cli->cl_lru_shrinkers);
+               client_obd_list_lock(&cli->cl_loi_list_lock);
+               osc_check_rpcs(env, cli, pol);
+               client_obd_list_unlock(&cli->cl_loi_list_lock);
+               cfs_atomic_dec(&cli->cl_lru_shrinkers);
+       } else {
+               CDEBUG(D_CACHE, "Queue writeback work for client %p.\n", cli);
+               LASSERT(cli->cl_writeback_work != NULL);
+               rc = ptlrpcd_queue_work(cli->cl_writeback_work);
        }
        }
-       client_obd_list_unlock(&cli->cl_loi_list_lock);
        return rc;
 }
 
        return rc;
 }
 
index 9b1aca7..0dea7b5 100644 (file)
@@ -596,8 +596,6 @@ enum osc_extent_state {
        OES_TRUNC     = 6, /** being truncated */
        OES_STATE_MAX
 };
        OES_TRUNC     = 6, /** being truncated */
        OES_STATE_MAX
 };
-#define OES_STRINGS { "inv", "active", "cache", "locking", "lockdone", "rpc", \
-                     "trunc", NULL }
 
 /**
  * osc_extent data to manage dirty pages.
 
 /**
  * osc_extent data to manage dirty pages.
index df51fc5..6e89354 100644 (file)
@@ -809,7 +809,8 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg, bool del)
                         * stealing one of them.
                         * cl_lru_shrinkers is to avoid recursive call in case
                         * we're already in the context of osc_lru_shrink(). */
                         * stealing one of them.
                         * cl_lru_shrinkers is to avoid recursive call in case
                         * we're already in the context of osc_lru_shrink(). */
-                       if (cfs_atomic_read(&cli->cl_lru_shrinkers) == 0)
+                       if (cfs_atomic_read(&cli->cl_lru_shrinkers) == 0 &&
+                           !cfs_memory_pressure_get())
                                osc_lru_shrink(cli, osc_cache_too_much(cli));
                        cfs_waitq_signal(&osc_lru_waitq);
                }
                                osc_lru_shrink(cli, osc_cache_too_much(cli));
                        cfs_waitq_signal(&osc_lru_waitq);
                }
index 31c1e39..f73f218 100644 (file)
@@ -2049,21 +2049,26 @@ static int brw_interpret(const struct lu_env *env,
 int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                  cfs_list_t *ext_list, int cmd, pdl_policy_t pol)
 {
 int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                  cfs_list_t *ext_list, int cmd, pdl_policy_t pol)
 {
-       struct ptlrpc_request *req = NULL;
-       struct osc_extent *ext;
+       struct ptlrpc_request           *req = NULL;
+       struct osc_extent               *ext;
+       struct brw_page                 **pga = NULL;
+       struct osc_brw_async_args       *aa = NULL;
+       struct obdo                     *oa = NULL;
+       struct osc_async_page           *oap;
+       struct osc_async_page           *tmp;
+       struct cl_req                   *clerq = NULL;
+       enum cl_req_type                crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE :
+                                                                     CRT_READ;
+       struct ldlm_lock                *lock = NULL;
+       struct cl_req_attr              *crattr = NULL;
+       obd_off                         starting_offset = OBD_OBJECT_EOF;
+       obd_off                         ending_offset = 0;
+       int                             mpflag = 0;
+       int                             mem_tight = 0;
+       int                             page_count = 0;
+       int                             i;
+       int                             rc;
        CFS_LIST_HEAD(rpc_list);
        CFS_LIST_HEAD(rpc_list);
-       struct brw_page **pga = NULL;
-       struct osc_brw_async_args *aa = NULL;
-        struct obdo *oa = NULL;
-        struct osc_async_page *oap;
-        struct osc_async_page *tmp;
-        struct cl_req *clerq = NULL;
-        enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
-        struct ldlm_lock *lock = NULL;
-        struct cl_req_attr crattr;
-       obd_off starting_offset = OBD_OBJECT_EOF;
-       obd_off ending_offset = 0;
-       int i, rc, mpflag = 0, mem_tight = 0, page_count = 0;
 
        ENTRY;
        LASSERT(!cfs_list_empty(ext_list));
 
        ENTRY;
        LASSERT(!cfs_list_empty(ext_list));
@@ -2091,7 +2096,10 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
        if (mem_tight)
                mpflag = cfs_memory_pressure_get_and_set();
 
        if (mem_tight)
                mpflag = cfs_memory_pressure_get_and_set();
 
-       memset(&crattr, 0, sizeof crattr);
+       OBD_ALLOC(crattr, sizeof(*crattr));
+       if (crattr == NULL)
+               GOTO(out, rc = -ENOMEM);
+
        OBD_ALLOC(pga, sizeof(*pga) * page_count);
        if (pga == NULL)
                GOTO(out, rc = -ENOMEM);
        OBD_ALLOC(pga, sizeof(*pga) * page_count);
        if (pga == NULL)
                GOTO(out, rc = -ENOMEM);
@@ -2105,42 +2113,40 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                struct cl_page *page = oap2cl_page(oap);
                if (clerq == NULL) {
                        clerq = cl_req_alloc(env, page, crt,
                struct cl_page *page = oap2cl_page(oap);
                if (clerq == NULL) {
                        clerq = cl_req_alloc(env, page, crt,
-                                            1 /* only 1-object rpcs for
-                                               * now */);
+                                            1 /* only 1-object rpcs for now */);
                        if (IS_ERR(clerq))
                                GOTO(out, rc = PTR_ERR(clerq));
                        lock = oap->oap_ldlm_lock;
                }
                if (mem_tight)
                        oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
                        if (IS_ERR(clerq))
                                GOTO(out, rc = PTR_ERR(clerq));
                        lock = oap->oap_ldlm_lock;
                }
                if (mem_tight)
                        oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
-                pga[i] = &oap->oap_brw_page;
-                pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
-                CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
-                       pga[i]->pg, cfs_page_index(oap->oap_page), oap, pga[i]->flag);
-                i++;
-                cl_req_page_add(env, clerq, page);
-        }
+               pga[i] = &oap->oap_brw_page;
+               pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
+               CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
+                      pga[i]->pg, cfs_page_index(oap->oap_page), oap,
+                      pga[i]->flag);
+               i++;
+               cl_req_page_add(env, clerq, page);
+       }
 
 
-        /* always get the data for the obdo for the rpc */
+       /* always get the data for the obdo for the rpc */
        LASSERT(clerq != NULL);
        LASSERT(clerq != NULL);
-       crattr.cra_oa = oa;
-       crattr.cra_capa = NULL;
-       memset(crattr.cra_jobid, 0, JOBSTATS_JOBID_SIZE);
-        cl_req_attr_set(env, clerq, &crattr, ~0ULL);
-        if (lock) {
-                oa->o_handle = lock->l_remote_handle;
-                oa->o_valid |= OBD_MD_FLHANDLE;
-        }
+       crattr->cra_oa = oa;
+       cl_req_attr_set(env, clerq, crattr, ~0ULL);
+       if (lock) {
+               oa->o_handle = lock->l_remote_handle;
+               oa->o_valid |= OBD_MD_FLHANDLE;
+       }
 
 
-        rc = cl_req_prep(env, clerq);
-        if (rc != 0) {
-                CERROR("cl_req_prep failed: %d\n", rc);
+       rc = cl_req_prep(env, clerq);
+       if (rc != 0) {
+               CERROR("cl_req_prep failed: %d\n", rc);
                GOTO(out, rc);
        }
 
        sort_brw_pages(pga, page_count);
        rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
                GOTO(out, rc);
        }
 
        sort_brw_pages(pga, page_count);
        rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
-                       pga, &req, crattr.cra_capa, 1, 0);
+                       pga, &req, crattr->cra_capa, 1, 0);
        if (rc != 0) {
                CERROR("prep_req failed: %d\n", rc);
                GOTO(out, rc);
        if (rc != 0) {
                CERROR("prep_req failed: %d\n", rc);
                GOTO(out, rc);
@@ -2148,17 +2154,17 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 
        req->rq_interpret_reply = brw_interpret;
        if (mem_tight != 0)
 
        req->rq_interpret_reply = brw_interpret;
        if (mem_tight != 0)
-                req->rq_memalloc = 1;
+               req->rq_memalloc = 1;
 
 
-        /* Need to update the timestamps after the request is built in case
-         * we race with setattr (locally or in queue at OST).  If OST gets
-         * later setattr before earlier BRW (as determined by the request xid),
-         * the OST will not use BRW timestamps.  Sadly, there is no obvious
-         * way to do this in a single call.  bug 10150 */
-        cl_req_attr_set(env, clerq, &crattr,
-                        OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME);
+       /* Need to update the timestamps after the request is built in case
+        * we race with setattr (locally or in queue at OST).  If OST gets
+        * later setattr before earlier BRW (as determined by the request xid),
+        * the OST will not use BRW timestamps.  Sadly, there is no obvious
+        * way to do this in a single call.  bug 10150 */
+       cl_req_attr_set(env, clerq, crattr,
+                       OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME);
 
 
-       lustre_msg_set_jobid(req->rq_reqmsg, crattr.cra_jobid);
+       lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
 
        CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
        aa = ptlrpc_req_async_args(req);
 
        CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
        aa = ptlrpc_req_async_args(req);
@@ -2225,16 +2231,20 @@ out:
        if (mem_tight != 0)
                cfs_memory_pressure_restore(mpflag);
 
        if (mem_tight != 0)
                cfs_memory_pressure_restore(mpflag);
 
-       capa_put(crattr.cra_capa);
+       if (crattr != NULL) {
+               capa_put(crattr->cra_capa);
+               OBD_FREE(crattr, sizeof(*crattr));
+       }
+
        if (rc != 0) {
                LASSERT(req == NULL);
 
        if (rc != 0) {
                LASSERT(req == NULL);
 
-                if (oa)
-                        OBDO_FREE(oa);
-                if (pga)
-                        OBD_FREE(pga, sizeof(*pga) * page_count);
-                /* this should happen rarely and is pretty bad, it makes the
-                 * pending list not follow the dirty order */
+               if (oa)
+                       OBDO_FREE(oa);
+               if (pga)
+                       OBD_FREE(pga, sizeof(*pga) * page_count);
+               /* this should happen rarely and is pretty bad, it makes the
+                * pending list not follow the dirty order */
                while (!cfs_list_empty(ext_list)) {
                        ext = cfs_list_entry(ext_list->next, struct osc_extent,
                                             oe_link);
                while (!cfs_list_empty(ext_list)) {
                        ext = cfs_list_entry(ext_list->next, struct osc_extent,
                                             oe_link);