From: johann Date: Mon, 28 Jan 2008 10:52:50 +0000 (+0000) Subject: Branch HEAD X-Git-Tag: v1_7_0_51~286 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=81344484a5f570ad0ecc530ac3666fb1d99469ed;p=fs%2Flustre-release.git Branch HEAD b=13843 i=adilger i=shadow A lot of unlink operations with concurrent I/O can lead to a deadlock causing evictions. To address the problem, the number of oustanding OST_DESTROY requests is now throttled to max_rpcs_in_flight per OSC. --- diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 929fdfa..d933b24 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -636,6 +636,15 @@ Details : After ELC code landed, it is now improper to enqueue any mds locks under och_sem, because enqueue might want to decide to cancel open locks for same inode we are holding och_sem for. +Severity : normal +Bugzilla : 13843 +Description: Client eviction while running blogbench +Details : A lot of unlink operations with concurrent I/O can lead to a + deadlock causing evictions. To address the problem, the number of + oustanding OST_DESTROY requests is now throttled to + max_rpcs_in_flight per OSC and LDLM_FL_DISCARD_DATA blocking + callbacks are processed in priority. + -------------------------------------------------------------------------------- 2007-08-10 Cluster File Systems, Inc. diff --git a/lustre/include/obd.h b/lustre/include/obd.h index edad05b..91afc47 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -447,6 +447,10 @@ struct client_obd { struct obd_histogram cl_read_offset_hist; struct obd_histogram cl_write_offset_hist; + /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ + atomic_t cl_destroy_in_flight; + cfs_waitq_t cl_destroy_waitq; + struct mdc_rpc_lock *cl_rpc_lock; struct mdc_rpc_lock *cl_setattr_lock; struct mdc_rpc_lock *cl_close_lock; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 7d4d43a..79c3fc9 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -276,6 +276,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) spin_lock_init(&cli->cl_write_page_hist.oh_lock); spin_lock_init(&cli->cl_read_offset_hist.oh_lock); spin_lock_init(&cli->cl_write_offset_hist.oh_lock); + cfs_waitq_init(&cli->cl_destroy_waitq); + atomic_set(&cli->cl_destroy_in_flight, 0); #ifdef ENABLE_CHECKSUM cli->cl_checksum = 1; #endif diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index ba98d4e..5a760bd 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -594,6 +594,34 @@ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa, RETURN(count); } +static int osc_destroy_interpret(struct ptlrpc_request *req, void *data, + int rc) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + + atomic_dec(&cli->cl_destroy_in_flight); + cfs_waitq_signal(&cli->cl_destroy_waitq); + return 0; +} + +static int osc_can_send_destroy(struct client_obd *cli) +{ + if (atomic_inc_return(&cli->cl_destroy_in_flight) <= + cli->cl_max_rpcs_in_flight) { + /* The destroy request can be sent */ + return 1; + } + if (atomic_dec_return(&cli->cl_destroy_in_flight) < + cli->cl_max_rpcs_in_flight) { + /* + * The counter has been modified between the two atomic + * operations. + */ + cfs_waitq_signal(&cli->cl_destroy_waitq); + } + return 0; +} + /* Destroy requests can be async always on the client, and we don't even really * care about the return code since the client cannot do anything at all about * a destroy failure. @@ -613,6 +641,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, struct ost_body *body; int size[3] = { sizeof(struct ptlrpc_body), sizeof(*body), 0 }; int count, bufcount = 2; + struct client_obd *cli = &exp->exp_obd->u.cli; ENTRY; if (!oa) { @@ -630,6 +659,7 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, RETURN(-ENOMEM); req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ + req->rq_interpret_reply = osc_destroy_interpret; body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) @@ -639,6 +669,18 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa, ptlrpc_req_set_repsize(req, 2, size); + if (!osc_can_send_destroy(cli)) { + struct l_wait_info lwi = { 0 }; + + /* + * Wait until the number of on-going destroy RPCs drops + * under max_rpc_in_flight + */ + l_wait_event_exclusive(cli->cl_destroy_waitq, + osc_can_send_destroy(cli), &lwi); + } + + /* Do not wait for response */ ptlrpcd_add_req(req); RETURN(0); }