Whamcloud - gitweb
LU-16430 ptlrpc: racy rq_obsolete bit modification 05/49505/6
authorAndriy Skulysh <andriy.skulysh@hpe.com>
Thu, 24 Nov 2022 13:18:04 +0000 (15:18 +0200)
committerOleg Drokin <green@whamcloud.com>
Tue, 1 Aug 2023 06:14:27 +0000 (06:14 +0000)
Racy bit modification causes assertion failure in
ptlrpc_at_remove_timed():
ASSERTION( !list_empty(&req->rq_srv.sr_timed_list) )

rq_obsolete is a bit field, so it's modification
isn't atomic and should be modified under rq_lock.

Change-Id: Ib1d3ad189a78b71ecf5b01585478922e984c9568
HPE-bug-id: LUS-11368
Fixes:  23773b32bf ("LU-11444 ptlrpc: resend may corrupt the data")
Signed-off-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49505
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alexander Zarochentsev <alexander.zarochentsev@hpe.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_net.h
lustre/ptlrpc/service.c

index 6b700eb..71cd242 100644 (file)
@@ -1008,7 +1008,7 @@ struct ptlrpc_request {
                rq_allow_intr:1;
        /** @} */
 
-       /** server-side flags @{ */
+       /** server-side flags are serialized by rq_lock @{ */
        unsigned int
                rq_hp:1,                /**< high priority RPC */
                rq_at_linked:1,         /**< link into service's srv_at_array */
index 6c88753..7e3268f 100644 (file)
@@ -1688,7 +1688,9 @@ found:
 #ifdef HAVE_SERVER_SUPPORT
 static void ptlrpc_server_mark_obsolete(struct ptlrpc_request *req)
 {
+       spin_lock(&req->rq_lock);
        req->rq_obsolete = 1;
+       spin_unlock(&req->rq_lock);
 }
 
 static void
@@ -1875,7 +1877,9 @@ static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
                        ptlrpc_nrs_req_finalize(req);
 
                        /* don't mark slot unused for resend in progress */
+                       spin_lock(&req->rq_lock);
                        req->rq_obsolete = 1;
+                       spin_unlock(&req->rq_lock);
 
                        RETURN(-EBUSY);
                }