From 9d7344649b533f19d2a7499764d60a23823f6030 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Mon, 1 Sep 2014 15:03:51 +0200 Subject: [PATCH 1/1] LU-5556 target: limit bulk transfer time Messages lost during bulk transfer are not resent, so there is no point in waiting for a very long time (up to at_max/600s has been seen). This patch adds a new static timeout for the bulk transfer (100s by default). Signed-off-by: Johann Lombardi Change-Id: I3926a7a8f2bce4cbd00b8fe54094a8e9cbec1508 Signed-off-by: Mikhail Pershin Reviewed-on: http://review.whamcloud.com/12242 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/ldlm/ldlm_lib.c | 131 ++++++++++++++++++----------------- lustre/obdclass/class_obd.c | 3 + lustre/obdclass/linux/linux-sysctl.c | 14 ++++ lustre/ost/ost_handler.c | 9 ++- 5 files changed, 93 insertions(+), 65 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index a43bfbd..eaed5a2 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -68,6 +68,7 @@ extern unsigned int obd_timeout; /* seconds */ extern unsigned int ldlm_timeout; /* seconds */ extern unsigned int obd_timeout_set; extern unsigned int ldlm_timeout_set; +extern unsigned int bulk_timeout; extern unsigned int at_min; extern unsigned int at_max; extern unsigned int at_history; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index c871f94..654dc76 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -2648,9 +2648,12 @@ static inline char *bulk2type(struct ptlrpc_bulk_desc *desc) int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, struct l_wait_info *lwi) { - struct ptlrpc_request *req = desc->bd_req; - int rc = 0; - ENTRY; + struct ptlrpc_request *req = desc->bd_req; + time_t start = cfs_time_current_sec(); + time_t deadline; + int rc = 0; + + ENTRY; /* If there is eviction in progress, wait for it to finish. */ if (unlikely(cfs_atomic_read(&exp->exp_obd->obd_evict_inprogress))) { @@ -2671,66 +2674,70 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, rc = ptlrpc_start_bulk_transfer(desc); } - if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) { - ptlrpc_abort_bulk(desc); - } else if (rc == 0) { - time_t start = cfs_time_current_sec(); - do { - long timeoutl = req->rq_deadline - cfs_time_current_sec(); - cfs_duration_t timeout = timeoutl <= 0 ? - CFS_TICK : cfs_time_seconds(timeoutl); - *lwi = LWI_TIMEOUT_INTERVAL(timeout, - cfs_time_seconds(1), - target_bulk_timeout, - desc); - rc = l_wait_event(desc->bd_waitq, - !ptlrpc_server_bulk_active(desc) || - exp->exp_failed || - exp->exp_abort_active_req, - lwi); - LASSERT(rc == 0 || rc == -ETIMEDOUT); - /* Wait again if we changed deadline. */ - } while ((rc == -ETIMEDOUT) && - (req->rq_deadline > cfs_time_current_sec())); - - if (rc == -ETIMEDOUT) { - DEBUG_REQ(D_ERROR, req, - "timeout on bulk %s after %ld%+lds", - bulk2type(desc), - req->rq_deadline - start, - cfs_time_current_sec() - - req->rq_deadline); - ptlrpc_abort_bulk(desc); - } else if (exp->exp_failed) { - DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s", - bulk2type(desc)); - rc = -ENOTCONN; - ptlrpc_abort_bulk(desc); - } else if (exp->exp_abort_active_req) { - DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s", - bulk2type(desc)); - /* We don't reply anyway. */ - rc = -ETIMEDOUT; - ptlrpc_abort_bulk(desc); - } else if (desc->bd_failure || - desc->bd_nob_transferred != desc->bd_nob) { - DEBUG_REQ(D_ERROR, req, "%s bulk %s %d(%d)", - desc->bd_failure ? - "network error on" : "truncated", - bulk2type(desc), - desc->bd_nob_transferred, - desc->bd_nob); - /* XXX Should this be a different errno? */ - rc = -ETIMEDOUT; - } else if (desc->bd_type == BULK_GET_SINK) { - rc = sptlrpc_svc_unwrap_bulk(req, desc); - } - } else { - DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d", - bulk2type(desc), rc); - } + if (rc < 0) { + DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d", + bulk2type(desc), rc); + RETURN(rc); + } - RETURN(rc); + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) { + ptlrpc_abort_bulk(desc); + RETURN(0); + } + + /* limit actual bulk transfer to bulk_timeout seconds */ + deadline = start + bulk_timeout; + if (deadline > req->rq_deadline) + deadline = req->rq_deadline; + + do { + long timeoutl = deadline - cfs_time_current_sec(); + cfs_duration_t timeout = timeoutl <= 0 ? + CFS_TICK : cfs_time_seconds(timeoutl); + + *lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1), + target_bulk_timeout, desc); + rc = l_wait_event(desc->bd_waitq, + !ptlrpc_server_bulk_active(desc) || + exp->exp_failed || + exp->exp_abort_active_req, lwi); + LASSERT(rc == 0 || rc == -ETIMEDOUT); + /* Wait again if we changed rq_deadline. */ + deadline = start + bulk_timeout; + if (deadline > req->rq_deadline) + deadline = req->rq_deadline; + } while ((rc == -ETIMEDOUT) && + (deadline > cfs_time_current_sec())); + + if (rc == -ETIMEDOUT) { + DEBUG_REQ(D_ERROR, req, "timeout on bulk %s after %ld%+lds", + bulk2type(desc), deadline - start, + cfs_time_current_sec() - deadline); + ptlrpc_abort_bulk(desc); + } else if (exp->exp_failed) { + DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s", + bulk2type(desc)); + rc = -ENOTCONN; + ptlrpc_abort_bulk(desc); + } else if (exp->exp_abort_active_req) { + DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s", + bulk2type(desc)); + /* We don't reply anyway. */ + rc = -ETIMEDOUT; + ptlrpc_abort_bulk(desc); + } else if (desc->bd_failure || + desc->bd_nob_transferred != desc->bd_nob) { + DEBUG_REQ(D_ERROR, req, "%s bulk %s %d(%d)", + desc->bd_failure ? "network error on" : "truncated", + bulk2type(desc), desc->bd_nob_transferred, + desc->bd_nob); + /* XXX Should this be a different errno? */ + rc = -ETIMEDOUT; + } else if (desc->bd_type == BULK_GET_SINK) { + rc = sptlrpc_svc_unwrap_bulk(req, desc); + } + + RETURN(rc); } EXPORT_SYMBOL(target_bulk_io); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 8d2c1ac..06e2c16 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -94,6 +94,9 @@ unsigned int obd_timeout_set; EXPORT_SYMBOL(obd_timeout_set); unsigned int ldlm_timeout_set; EXPORT_SYMBOL(ldlm_timeout_set); +/* bulk transfer timeout, give up after 100s by default */ +unsigned int bulk_timeout = 100; /* seconds */ +EXPORT_SYMBOL(bulk_timeout); /* Adaptive timeout defs here instead of ptlrpc module for /proc/sys/ access */ unsigned int at_min = 0; EXPORT_SYMBOL(at_min); diff --git a/lustre/obdclass/linux/linux-sysctl.c b/lustre/obdclass/linux/linux-sysctl.c index 25c7179..6a06687 100644 --- a/lustre/obdclass/linux/linux-sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -79,6 +79,7 @@ enum { OBD_AT_EXTRA, OBD_AT_EARLY_MARGIN, OBD_AT_HISTORY, + OBD_BULK_TIMEOUT, /* bulk transfer timeout */ }; #else @@ -104,6 +105,7 @@ enum { #define OBD_AT_EXTRA CTL_UNNUMBERED #define OBD_AT_EARLY_MARGIN CTL_UNNUMBERED #define OBD_AT_HISTORY CTL_UNNUMBERED +#define OBD_BULK_TIMEOUT CTL_UNNUMBERED #endif @@ -288,6 +290,10 @@ int LL_PROC_PROTO(proc_alloc_fail_rate) } #endif +int LL_PROC_PROTO(proc_bulk_timeout) +{ + return proc_dointvec(table, write, buffer, lenp, ppos); +} int LL_PROC_PROTO(proc_at_min) { return ll_proc_dointvec(table, write, filp, buffer, lenp, ppos); @@ -401,6 +407,14 @@ static struct ctl_table obd_table[] = { .mode = 0644, .proc_handler = &proc_max_dirty_pages_in_mb }, + { + INIT_CTL_NAME(OBD_BULK_TIMEOUT) + .procname = "bulk_timeout", + .data = &bulk_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_bulk_timeout + }, { INIT_CTL_NAME(OBD_AT_MIN) .procname = "at_min", diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index ae982d0..ed4a5fb 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -2010,9 +2010,12 @@ static int ost_rw_hpreq_lock_match(struct ptlrpc_request *req, if (!ostid_res_name_eq(&ioo->ioo_oid, &lock->l_resource->lr_name)) RETURN(0); - mode = LCK_PW; - if (opc == OST_READ) - mode |= LCK_PR; + /* a bulk write can only hold a reference on a PW extent lock */ + mode = LCK_PW; + if (opc == OST_READ) + /* whereas a bulk read can be protected by either a PR or PW + * extent lock */ + mode |= LCK_PR; if (!(lock->l_granted_mode & mode)) RETURN(0); -- 1.8.3.1