From bb75072cb679bf52e00537c19e42f8e4e95255b6 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Tue, 5 Jul 2016 04:41:59 +0800 Subject: [PATCH] LU-6808 ptlrpc: properly set "rq_xid" for 4MB IO The commit d099fdd6 replaced the "rq_xid" with "rq_mbits" as the matchbits of bulk data transferring. To be interoperable with old servers, it introduced the new connection flag: OBD_CONNECT_BULK_MBITS. If the server does not support such feature, then the "rq_xid" would be set the same value as "rq_mbits". Unfortunately, it forgot to handle multiple bulk operations, for example 4MB IO. If the new client wants to make 4MB IO with old server, it may send a small "rq_xid" to the old server, as to the old server will regard it as an 1MB IO or 2MB IO. So the data transfer will not complete because of only part of data transferred. Then the client will timeout failure and retry again and again. Test-Parameters: alwaysuploadlogs testlist=sanity envdefinitions=ONLY=224c ossjob=lustre-b2_7_fe mdsjob=lustre-b2_7_fe ossbuildno=95 mdsbuildno=95 mdsdistro=el6.7 ossdistro=el6.7 Signed-off-by: Fan Yong Change-Id: I9b1c0de13674f16443bef2b454c491e6c72b8ab3 Reviewed-on: http://review.whamcloud.com/22373 Tested-by: Jenkins Reviewed-by: Jinshan Xiong Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Liang Zhen Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_lib.c | 3 +-- lustre/ptlrpc/client.c | 8 ++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index e05873c..95e566d 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -3174,8 +3174,7 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, if (req->rq_bulk_read) rc = sptlrpc_svc_wrap_bulk(req, desc); - if ((exp->exp_connect_data.ocd_connect_flags & - OBD_CONNECT_BULK_MBITS) != 0) + if (OCD_HAS_FLAG(&exp->exp_connect_data, BULK_MBITS)) req->rq_mbits = lustre_msg_get_mbits(req->rq_reqmsg); else /* old version, bulk matchbits is rq_xid */ req->rq_mbits = req->rq_xid; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 53a8603..59f4cc5 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -3249,8 +3249,7 @@ void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req) } else { /* needs to generate a new matchbits for resend */ __u64 old_mbits = req->rq_mbits; - if ((bd->bd_import->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_BULK_MBITS) != 0) { + if (OCD_HAS_FLAG(&bd->bd_import->imp_connect_data, BULK_MBITS)){ req->rq_mbits = ptlrpc_next_xid(); } else {/* old version transfers rq_xid to peer as matchbits */ spin_lock(&req->rq_import->imp_lock); @@ -3268,6 +3267,11 @@ void ptlrpc_set_bulk_mbits(struct ptlrpc_request *req) * see LU-1431 */ req->rq_mbits += ((bd->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV) - 1; + + /* Set rq_xid as rq_mbits to indicate the final bulk for the old + * server which does not support OBD_CONNECT_BULK_MBITS. LU-6808 */ + if (!OCD_HAS_FLAG(&bd->bd_import->imp_connect_data, BULK_MBITS)) + req->rq_xid = req->rq_mbits; } /** -- 1.8.3.1