From 00c72ab6bb432ee1312282eed3dfae23ab8d0b42 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Fri, 15 Jun 2018 05:02:36 -0400 Subject: [PATCH] LU-11117 ptlrpc: don't zero request handle LNet can retransmit a request at any time if it isn't replied. The ptlrpc_resend_req zero the request handle and ptlrpc_send_rpc set it. If retransmission happen with zeroed handle, the client can't find a valid export by handle and set rq_export to NULL and reply with ENOTCONN. A server evict client with this error. client (nid x.x.x.x@tcp) returned error from blocking AST (req status -107 rc -107), evict it Signed-off-by: Alexander Boyko Cray-bug-id: LUS-6037 Change-Id: I198666d386fea99b46994f965c1519acb5743d75 Reviewed-on: https://review.whamcloud.com/32781 Reviewed-by: Mike Pershin Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alexey Lyashkov Reviewed-by: Oleg Drokin --- lustre/ptlrpc/client.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index d0e8d78..047eea7 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -2785,7 +2785,7 @@ void ptlrpc_cleanup_client(struct obd_import *imp) */ void ptlrpc_resend_req(struct ptlrpc_request *req) { - DEBUG_REQ(D_HA, req, "going to resend"); + DEBUG_REQ(D_HA, req, "going to resend"); spin_lock(&req->rq_lock); /* Request got reply but linked to the import list still. @@ -2796,14 +2796,13 @@ void ptlrpc_resend_req(struct ptlrpc_request *req) return; } - lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 }); - req->rq_status = -EAGAIN; + req->rq_status = -EAGAIN; - req->rq_resend = 1; - req->rq_net_err = 0; - req->rq_timedout = 0; + req->rq_resend = 1; + req->rq_net_err = 0; + req->rq_timedout = 0; - ptlrpc_client_wake_req(req); + ptlrpc_client_wake_req(req); spin_unlock(&req->rq_lock); } -- 1.8.3.1