Whamcloud - gitweb
LU-18072 ptlrpc: do not search for duplicate cancel requests 43/56843/5
authorOleg Drokin <green@whamcloud.com>
Thu, 31 Oct 2024 19:29:46 +0000 (15:29 -0400)
committerOleg Drokin <green@whamcloud.com>
Fri, 25 Apr 2025 00:51:15 +0000 (00:51 +0000)
Cancel requests don't have any max inflight limitations, so
really could arrive in huge numbers and if they are also have a resent
flag, that leads to a lot of very expensive duplicates search that is
totally unneeded at the same time, so let's skip the check for cancels.

Change-Id: Id4be03a3c9406867adcdcfd31ed91ecc7b12f700
Signed-off-by: Oleg Drokin <green@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56843
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
lustre/ptlrpc/service.c

index 89bae0d..67ff96f 100644 (file)
@@ -1951,7 +1951,8 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
 {
        int rc;
        bool hp;
-       struct ptlrpc_request *orig;
+       struct ptlrpc_request *orig = NULL;
+       int opc;
 
        ENTRY;
 
@@ -1962,6 +1963,8 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
        hp = rc > 0;
        ptlrpc_nrs_req_initialize(svcpt, req, hp);
 
+       opc = lustre_msg_get_opc(req->rq_reqmsg);
+
        while (req->rq_export != NULL) {
                struct obd_export *exp = req->rq_export;
 
@@ -1970,16 +1973,25 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
                 * atomically
                 */
                spin_lock_bh(&exp->exp_rpc_lock);
+
+               /* Cancels are unbounded unlimited requests, they are also
+                * stateless, so we don't really want to search for duplicates
+                * as that can take a really long time (under spinlock at that.
+                * There might be other requests like this and we might want to
+                * make this code a bit more generic, but this should plug
+                * the most obious hole for now */
+               if (opc != LDLM_CANCEL) {
 #ifdef HAVE_SERVER_SUPPORT
-               ptlrpc_server_mark_in_progress_obsolete(req);
+                       ptlrpc_server_mark_in_progress_obsolete(req);
 #endif
-               orig = ptlrpc_server_check_resend_in_progress(req);
-               if (orig && CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_RESEND_RACE)) {
-                       spin_unlock_bh(&exp->exp_rpc_lock);
+                       orig = ptlrpc_server_check_resend_in_progress(req);
+                       if (orig && CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_RESEND_RACE)) {
+                               spin_unlock_bh(&exp->exp_rpc_lock);
 
-                       CFS_RACE(OBD_FAIL_PTLRPC_RESEND_RACE);
-                       msleep(4 * MSEC_PER_SEC);
-                       continue;
+                               CFS_RACE(OBD_FAIL_PTLRPC_RESEND_RACE);
+                               msleep(4 * MSEC_PER_SEC);
+                               continue;
+                       }
                }
 
                if (orig && likely(atomic_inc_not_zero(&orig->rq_refcount))) {