When the server drops duplicate request processing, the client and
the server have different deadline for the same request. The server
operates with the first copy and the client operates with the second.
This patch adds request deadline updates if a duplicate request is
found.
A fix for LU-8420 changed lock callback prolong calculation to use
request deadline in case when service estimate changed since the
request has beed created. Using outdated deadline may cause
insufficient prolong timeout and subsequent client eviction.
Signed-off-by: Alexander Boyko <c17825@cray.com>
Signed-off-by: Vladimir Saveliev <c17830@cray.com>
Change-Id: I55725d396f50d864687248df46e7882290fc21ca
Cray-bug-id: MRP-3720 MRP-4289
Reviewed-on: https://review.whamcloud.com/31910
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Vitaly Fertman <c17818@cray.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
/* Check if we are already handling earlier incarnation of this request.
* Called under &req->rq_export->exp_rpc_lock locked */
/* Check if we are already handling earlier incarnation of this request.
* Called under &req->rq_export->exp_rpc_lock locked */
-static int ptlrpc_server_check_resend_in_progress(struct ptlrpc_request *req)
+static struct ptlrpc_request*
+ptlrpc_server_check_resend_in_progress(struct ptlrpc_request *req)
{
struct ptlrpc_request *tmp = NULL;
if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ||
(atomic_read(&req->rq_export->exp_rpc_count) == 0))
{
struct ptlrpc_request *tmp = NULL;
if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) ||
(atomic_read(&req->rq_export->exp_rpc_count) == 0))
/* bulk request are aborted upon reconnect, don't try to
* find a match */
if (req->rq_bulk_write || req->rq_bulk_read)
/* bulk request are aborted upon reconnect, don't try to
* find a match */
if (req->rq_bulk_write || req->rq_bulk_read)
/* This list should not be longer than max_requests in
* flights on the client, so it is not all that long.
/* This list should not be longer than max_requests in
* flights on the client, so it is not all that long.
if (tmp->rq_xid == req->rq_xid)
goto found;
}
if (tmp->rq_xid == req->rq_xid)
goto found;
}
found:
DEBUG_REQ(D_HA, req, "Found duplicate req in processing");
DEBUG_REQ(D_HA, tmp, "Request being processed");
found:
DEBUG_REQ(D_HA, req, "Found duplicate req in processing");
DEBUG_REQ(D_HA, tmp, "Request being processed");
+ struct ptlrpc_request *orig;
ENTRY;
rc = ptlrpc_server_hpreq_init(svcpt, req);
ENTRY;
rc = ptlrpc_server_hpreq_init(svcpt, req);
/* do search for duplicated xid and the adding to the list
* atomically */
spin_lock_bh(&exp->exp_rpc_lock);
/* do search for duplicated xid and the adding to the list
* atomically */
spin_lock_bh(&exp->exp_rpc_lock);
- rc = ptlrpc_server_check_resend_in_progress(req);
- if (rc < 0) {
+ orig = ptlrpc_server_check_resend_in_progress(req);
+ if (orig && likely(atomic_inc_not_zero(&orig->rq_refcount))) {
+ bool linked;
+
spin_unlock_bh(&exp->exp_rpc_lock);
spin_unlock_bh(&exp->exp_rpc_lock);
+ /*
+ * When the client resend request and the server has
+ * the previous copy of it, we need to update deadlines,
+ * to be sure that the client and the server have equal
+ * request deadlines.
+ */
+
+ spin_lock(&orig->rq_rqbd->rqbd_svcpt->scp_at_lock);
+ linked = orig->rq_at_linked;
+ if (likely(linked))
+ ptlrpc_at_remove_timed(orig);
+ spin_unlock(&orig->rq_rqbd->rqbd_svcpt->scp_at_lock);
+ orig->rq_deadline = req->rq_deadline;
+ if (likely(linked))
+ ptlrpc_at_add_timed(orig);
+ ptlrpc_server_drop_request(orig);
ptlrpc_nrs_req_finalize(req);
ptlrpc_nrs_req_finalize(req);
}
if (hp || req->rq_ops != NULL)
}
if (hp || req->rq_ops != NULL)