From 92b6fbdd2266846bfc22663841a0805ba7b823d3 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Mon, 13 May 2024 10:48:56 +0300 Subject: [PATCH] LU-16430 ptlrpc: racy rq_obsolete bit modification Racy bit modification causes assertion failure in ptlrpc_at_remove_timed(): ASSERTION( !list_empty(&req->rq_srv.sr_timed_list) ) rq_obsolete is a bit field, so it's modification isn't atomic and should be modified under rq_lock. Lustre-Commit: 14ac768fd9633c5cf4474555170e5042c71a135b Lustre-Change: https://review.whamcloud.com/49505 Change-Id: Ib1d3ad189a78b71ecf5b01585478922e984c9568 Signed-off-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/55086 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/lustre_net.h | 2 +- lustre/ptlrpc/service.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index e5d63c8..38b0376 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -1006,7 +1006,7 @@ struct ptlrpc_request { rq_allow_intr:1; /** @} */ - /** server-side flags @{ */ + /** server-side flags are serialized by rq_lock @{ */ unsigned int rq_hp:1, /**< high priority RPC */ rq_at_linked:1, /**< link into service's srv_at_array */ diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 25325d3..d5debae 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1670,7 +1670,9 @@ found: #ifdef HAVE_SERVER_SUPPORT static void ptlrpc_server_mark_obsolete(struct ptlrpc_request *req) { + spin_lock(&req->rq_lock); req->rq_obsolete = 1; + spin_unlock(&req->rq_lock); } static void @@ -1856,7 +1858,9 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, ptlrpc_nrs_req_finalize(req); /* don't mark slot unused for resend in progress */ + spin_lock(&req->rq_lock); req->rq_obsolete = 1; + spin_unlock(&req->rq_lock); RETURN(-EBUSY); } -- 1.8.3.1