LU-13984 ptlrpc: throttle RPC resend if network error

author Aurelien Degremont <degremoa@amazon.com>

Wed, 23 Sep 2020 19:20:08 +0000 (19:20 +0000)

committer Oleg Drokin <green@whamcloud.com>

Tue, 3 Nov 2020 03:40:12 +0000 (03:40 +0000)
author Aurelien Degremont <degremoa@amazon.com>
Wed, 23 Sep 2020 19:20:08 +0000 (19:20 +0000)
committer Oleg Drokin <green@whamcloud.com>
Tue, 3 Nov 2020 03:40:12 +0000 (03:40 +0000)
diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c

index a902a5b..368732a 100644 (file)
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -2001,6 +2001,27 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
                                         GOTO(interpret, req->rq_status);
                                 }
  
+                               /* don't resend too fast in case of network
+                                * errors.
+                                */
+                               if (ktime_get_real_seconds() < (req->rq_sent + 1)
+                                   && req->rq_net_err && req->rq_timedout) {
+
+                                       DEBUG_REQ(D_INFO, req,
+                                                 "throttle request");
+                                       /* Don't try to resend RPC right away
+                                        * as it is likely it will fail again
+                                        * and ptlrpc_check_set() will be
+                                        * called again, keeping this thread
+                                        * busy. Instead, wait for the next
+                                        * timeout. Flag it as resend to
+                                        * ensure we don't wait to long.
+                                        */
+                                       req->rq_resend = 1;
+                                       spin_unlock(&imp->imp_lock);
+                                       continue;
+                               }
+
                                 list_move_tail(&req->rq_list,
                                                &imp->imp_sending_list);
author	Aurelien Degremont <degremoa@amazon.com>
	Wed, 23 Sep 2020 19:20:08 +0000 (19:20 +0000)
committer	Oleg Drokin <green@whamcloud.com>
	Tue, 3 Nov 2020 03:40:12 +0000 (03:40 +0000)