From: Andriy Skulysh Date: Thu, 24 Nov 2022 13:18:04 +0000 (+0200) Subject: LU-16430 ptlrpc: racy rq_obsolete bit modification X-Git-Tag: 2.15.57~8 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=14ac768fd9633c5cf4474555170e5042c71a135b;p=fs%2Flustre-release.git LU-16430 ptlrpc: racy rq_obsolete bit modification Racy bit modification causes assertion failure in ptlrpc_at_remove_timed(): ASSERTION( !list_empty(&req->rq_srv.sr_timed_list) ) rq_obsolete is a bit field, so it's modification isn't atomic and should be modified under rq_lock. Change-Id: Ib1d3ad189a78b71ecf5b01585478922e984c9568 HPE-bug-id: LUS-11368 Fixes: 23773b32bf ("LU-11444 ptlrpc: resend may corrupt the data") Signed-off-by: Andriy Skulysh Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49505 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexander Zarochentsev Reviewed-by: Neil Brown Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 6b700eb..71cd242 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -1008,7 +1008,7 @@ struct ptlrpc_request { rq_allow_intr:1; /** @} */ - /** server-side flags @{ */ + /** server-side flags are serialized by rq_lock @{ */ unsigned int rq_hp:1, /**< high priority RPC */ rq_at_linked:1, /**< link into service's srv_at_array */ diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 6c88753..7e3268f 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1688,7 +1688,9 @@ found: #ifdef HAVE_SERVER_SUPPORT static void ptlrpc_server_mark_obsolete(struct ptlrpc_request *req) { + spin_lock(&req->rq_lock); req->rq_obsolete = 1; + spin_unlock(&req->rq_lock); } static void @@ -1875,7 +1877,9 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt, ptlrpc_nrs_req_finalize(req); /* don't mark slot unused for resend in progress */ + spin_lock(&req->rq_lock); req->rq_obsolete = 1; + spin_unlock(&req->rq_lock); RETURN(-EBUSY); }