From 050420cf767986abce6d9bad8d2c0ea85fbbe571 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Wed, 1 May 2024 11:33:33 -0500 Subject: [PATCH] LU-17840 kfilnd: Race between peer del RKEY reuse kfilnd_peer object deletion is a two step process. First a flag (kfilnd_peer::kp_remove_peer = 1) is atomically set in the object to mark it for removal via a call to kfilnd_peer_del(). Then, the next caller of kfilnd_peer_put() will atomically modify this flag (kfilnd_peer::kp_remove_peer = 2) again to denote that it is removing the peer from the rhashtable before actually removing the object. The window between marking a peer for deletion and removing it from the peer cache allows a race where an RKEY may be re-used. For example: Thread 1: Posts tagged receive with RKEY based on peerA::kp_local_session_key X and tn_mr_key Y Thread 1: Cancels tagged receive Thread 1: kfilnd_peer_del() -> peerA::kp_remove_peer = 1 Thread 2: kfilnd_peer_put() -> peerA::kp_remove_peer = 2 Thread 1: kfilnd_peer_put() -> kfilnd_tn_finalize() -> releases tn_mr_key Y Thread 3: allocates tn_mr_key Y Thread 3: Fetches peerA with kp_local_session_key X Thread 2: Removes peerA from rhashtable At this point, thread 3 has the same RKEY used by thread 1. The fix is to check on the peer lookup path whether a peer found in the rhashtable has been marked for removal. If it has then we perform the lookup again. We do this in a loop until either no peer is found, or a peer is found that has not been marked for removal. To reduce the size of this window, the process for kfilnd_peer deletion is modified so that the first thread to call kfilnd_peer_del() will also remove the peer from the rhashtable. HPE-bug-id: LUS-12312 Test-Parameters: trivial testlist=sanity-lnet Signed-off-by: Chris Horn Change-Id: Ibbbb38cd5ee2d90956791f8350dafbee5fe5d888 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55071 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Ian Ziemba Reviewed-by: Ron Gredvig Reviewed-by: Oleg Drokin --- lnet/klnds/kfilnd/kfilnd_peer.c | 42 ++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/lnet/klnds/kfilnd/kfilnd_peer.c b/lnet/klnds/kfilnd/kfilnd_peer.c index 6c66739..8ad2dce 100644 --- a/lnet/klnds/kfilnd/kfilnd_peer.c +++ b/lnet/klnds/kfilnd/kfilnd_peer.c @@ -56,20 +56,36 @@ static void kfilnd_peer_free(void *ptr, void *arg) } /** - * kfilnd_peer_del() - Mark a peer for deletion + * kfilnd_peer_del() - Delete a peer from the peer cache. kp_remove_peer is used + * to prevent more than one thread from deleting the peer at once, and it + * informs threads on the allocation path that this peer is being deleted. When + * the peer is removed from the peer cache its allocation reference is returned + * and lnet is notified that this peer is down. * @kp: Peer to be deleted */ static void kfilnd_peer_del(struct kfilnd_peer *kp) { + + rcu_read_lock(); + if (atomic_cmpxchg(&kp->kp_remove_peer, 0, 1) == 0) { struct lnet_nid peer_nid; + rhashtable_remove_fast(&kp->kp_dev->peer_cache, &kp->kp_node, + peer_cache_params); + /* Return allocation reference */ + refcount_dec(&kp->kp_cnt); + + rcu_read_unlock(); + lnet_nid4_to_nid(kp->kp_nid, &peer_nid); - CDEBUG(D_NET, "%s(%p):0x%llx marked for removal from peer cache\n", + CDEBUG(D_NET, "%s(%p):0x%llx removed from peer cache\n", libcfs_nidstr(&peer_nid), kp, kp->kp_addr); lnet_notify(kp->kp_dev->kfd_ni, &peer_nid, false, false, kp->kp_last_alive); + } else { + rcu_read_unlock(); } } @@ -158,22 +174,8 @@ void kfilnd_peer_tn_failed(struct kfilnd_peer *kp, int error, bool delete) */ void kfilnd_peer_put(struct kfilnd_peer *kp) { - rcu_read_lock(); - - /* Return allocation reference if the peer was marked for removal. */ - if (atomic_cmpxchg(&kp->kp_remove_peer, 1, 2) == 1) { - rhashtable_remove_fast(&kp->kp_dev->peer_cache, &kp->kp_node, - peer_cache_params); - refcount_dec(&kp->kp_cnt); - - CDEBUG(D_NET, "%s(%p):0x%llx removed from peer cache\n", - libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr); - } - if (refcount_dec_and_test(&kp->kp_cnt)) kfilnd_peer_free(kp, NULL); - - rcu_read_unlock(); } u16 kfilnd_peer_target_rx_base(struct kfilnd_peer *kp) @@ -210,8 +212,14 @@ again: kp = NULL; rcu_read_unlock(); - if (kp) + if (kp) { + if (atomic_read(&kp->kp_remove_peer)) { + kfilnd_peer_put(kp); + goto again; + } + return kp; + } /* Allocate a new peer for the cache. */ kp = kzalloc(sizeof(*kp), GFP_KERNEL); -- 1.8.3.1