From 41bb553efcf58fcfb6bdd427a41c655c191480e0 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Thu, 31 Oct 2024 15:29:46 -0400 Subject: [PATCH] LU-18072 ptlrpc: do not search for duplicate cancel requests Cancel requests don't have any max inflight limitations, so really could arrive in huge numbers and if they are also have a resent flag, that leads to a lot of very expensive duplicates search that is totally unneeded at the same time, so let's skip the check for cancels. Change-Id: Id4be03a3c9406867adcdcfd31ed91ecc7b12f700 Signed-off-by: Oleg Drokin Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56843 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andriy Skulysh Reviewed-by: Mikhail Pershin Reviewed-by: Olaf Faaland --- lustre/ptlrpc/service.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 89bae0d..67ff96f 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1951,7 +1951,8 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, { int rc; bool hp; - struct ptlrpc_request *orig; + struct ptlrpc_request *orig = NULL; + int opc; ENTRY; @@ -1962,6 +1963,8 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, hp = rc > 0; ptlrpc_nrs_req_initialize(svcpt, req, hp); + opc = lustre_msg_get_opc(req->rq_reqmsg); + while (req->rq_export != NULL) { struct obd_export *exp = req->rq_export; @@ -1970,16 +1973,25 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, * atomically */ spin_lock_bh(&exp->exp_rpc_lock); + + /* Cancels are unbounded unlimited requests, they are also + * stateless, so we don't really want to search for duplicates + * as that can take a really long time (under spinlock at that. + * There might be other requests like this and we might want to + * make this code a bit more generic, but this should plug + * the most obious hole for now */ + if (opc != LDLM_CANCEL) { #ifdef HAVE_SERVER_SUPPORT - ptlrpc_server_mark_in_progress_obsolete(req); + ptlrpc_server_mark_in_progress_obsolete(req); #endif - orig = ptlrpc_server_check_resend_in_progress(req); - if (orig && CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_RESEND_RACE)) { - spin_unlock_bh(&exp->exp_rpc_lock); + orig = ptlrpc_server_check_resend_in_progress(req); + if (orig && CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_RESEND_RACE)) { + spin_unlock_bh(&exp->exp_rpc_lock); - CFS_RACE(OBD_FAIL_PTLRPC_RESEND_RACE); - msleep(4 * MSEC_PER_SEC); - continue; + CFS_RACE(OBD_FAIL_PTLRPC_RESEND_RACE); + msleep(4 * MSEC_PER_SEC); + continue; + } } if (orig && likely(atomic_inc_not_zero(&orig->rq_refcount))) { -- 1.8.3.1