From 1f2311cdec223772d84243ec60ae67ad2161f2af Mon Sep 17 00:00:00 2001 From: yangsheng Date: Thu, 28 Apr 2011 00:03:04 +0800 Subject: [PATCH] LU-234 OOM killer causes node hang. b=18213 Handle the signal to avoid process hang. Change-Id: Ic730d98fb812bc9dbb5249847a6115a84d67758b Signed-off-by: Yang Sheng Reviewed-on: http://review.whamcloud.com/470 Reviewed-by: Fan Yong Tested-by: Hudson Reviewed-by: Oleg Drokin --- lustre/mdc/mdc_internal.h | 2 +- lustre/mdc/mdc_lib.c | 15 ++++++++++++--- lustre/mdc/mdc_locks.c | 17 ++++++++++++++--- lustre/osc/osc_request.c | 2 +- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 785f8f4..d2711ad 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -74,7 +74,7 @@ void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data); void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data, const char *old, int oldlen, const char *new, int newlen); void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data); -void mdc_enter_request(struct client_obd *cli); +int mdc_enter_request(struct client_obd *cli); void mdc_exit_request(struct client_obd *cli); /* mdc/mdc_locks.c */ diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 1b6757d5..56d3f73 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -501,21 +501,30 @@ static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) /* We record requests in flight in cli->cl_r_in_flight here. * There is only one write rpc possible in mdc anyway. If this to change * in the future - the code may need to be revisited. */ -void mdc_enter_request(struct client_obd *cli) +int mdc_enter_request(struct client_obd *cli) { + int rc = 0; struct mdc_cache_waiter mcw; - struct l_wait_info lwi = { 0 }; + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); client_obd_list_lock(&cli->cl_loi_list_lock); if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { cfs_list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); cfs_waitq_init(&mcw.mcw_waitq); client_obd_list_unlock(&cli->cl_loi_list_lock); - l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi); + rc = l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi); + if (rc) { + client_obd_list_lock(&cli->cl_loi_list_lock); + if (cfs_list_empty(&mcw.mcw_entry)) + cli->cl_r_in_flight--; + cfs_list_del_init(&mcw.mcw_entry); + client_obd_list_unlock(&cli->cl_loi_list_lock); + } } else { cli->cl_r_in_flight++; client_obd_list_unlock(&cli->cl_loi_list_lock); } + return rc; } void mdc_exit_request(struct client_obd *cli) diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index c3d242d..7ec8408 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -693,7 +693,12 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, * rpcs in flight counter. We do not do flock request limiting, though*/ if (it) { mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it); - mdc_enter_request(&obddev->u.cli); + rc = mdc_enter_request(&obddev->u.cli); + if (rc != 0) { + mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it); + ptlrpc_req_finished(req); + RETURN(rc); + } } rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, policy, &flags, NULL, @@ -1023,7 +1028,7 @@ int mdc_intent_getattr_async(struct obd_export *exp, .l_inodebits = { MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE } }; - int rc; + int rc = 0; int flags = LDLM_FL_HAS_INTENT; ENTRY; @@ -1036,11 +1041,17 @@ int mdc_intent_getattr_async(struct obd_export *exp, if (!req) RETURN(-ENOMEM); - mdc_enter_request(&obddev->u.cli); + rc = mdc_enter_request(&obddev->u.cli); + if (rc != 0) { + ptlrpc_req_finished(req); + RETURN(rc); + } + rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, &policy, &flags, NULL, 0, &minfo->mi_lockh, 1); if (rc < 0) { mdc_exit_request(&obddev->u.cli); + ptlrpc_req_finished(req); RETURN(rc); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 02c3ace..91939d0 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2866,7 +2866,7 @@ static int osc_enter_cache(const struct lu_env *env, struct osc_async_page *oap) { struct osc_cache_waiter ocw; - struct l_wait_info lwi = { 0 }; + struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); ENTRY; -- 1.8.3.1