From f054443e9c03b71a934fd7d05475fb55e50be8c8 Mon Sep 17 00:00:00 2001 From: Chris Horn Date: Tue, 7 Nov 2023 14:14:42 -0700 Subject: [PATCH] LU-17271 kfilnd: Protect RKEY for bulk Put/Get The initiator of a bulk Put/Get generates an RKEY based on the the values of the struct kfilnd_tn::tn_mr_key and struct kfilnd_peer::kp_local_session_key. kp_local_session_key is assigned at peer creation, and tn_mr_key is assigned when the kfilnd_tn is allocated. A bulk Put/Get can fail in various ways such that the target of the operation may have a reference to the RKEY, but the originator cannot know the state of the operation at the target. In these cases, the initiator must ensure that the RKEY is not re-used. To accomplish this, we need to delete the target peer from the originator's peer cache to ensure that subsequent bulk Put/Get operations will use a new kp_local_session_key, and thus avoid re-using any old RKEY values. HPE-bug-id: LUS-11972 Test-Parameters: trivial Signed-off-by: Chris Horn Change-Id: If270a2df745ee88c35addc8194cdb160cb373c3e Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53028 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Ron Gredvig Reviewed-by: Ian Ziemba Reviewed-by: Oleg Drokin --- lnet/klnds/kfilnd/kfilnd_peer.c | 45 +++++++++++++++++++--------------- lnet/klnds/kfilnd/kfilnd_peer.h | 3 +-- lnet/klnds/kfilnd/kfilnd_tn.c | 54 +++++++++++++++++++++++++++++++++++------ 3 files changed, 72 insertions(+), 30 deletions(-) diff --git a/lnet/klnds/kfilnd/kfilnd_peer.c b/lnet/klnds/kfilnd/kfilnd_peer.c index 412d2ee..7288208 100644 --- a/lnet/klnds/kfilnd/kfilnd_peer.c +++ b/lnet/klnds/kfilnd/kfilnd_peer.c @@ -56,6 +56,24 @@ static void kfilnd_peer_free(void *ptr, void *arg) } /** + * kfilnd_peer_del() - Mark a peer for deletion + * @kp: Peer to be deleted + */ +static void kfilnd_peer_del(struct kfilnd_peer *kp) +{ + if (atomic_cmpxchg(&kp->kp_remove_peer, 0, 1) == 0) { + struct lnet_nid peer_nid; + + lnet_nid4_to_nid(kp->kp_nid, &peer_nid); + CDEBUG(D_NET, "%s(%p):0x%llx marked for removal from peer cache\n", + libcfs_nidstr(&peer_nid), kp, kp->kp_addr); + + lnet_notify(kp->kp_dev->kfd_ni, &peer_nid, false, false, + kp->kp_last_alive); + } +} + +/** * kfilnd_peer_purge_old_peer() - Delete the specified peer from the cache * if we haven't heard from it within 5x LND timeouts. * @kp: The peer to be checked or purged @@ -115,36 +133,23 @@ static void kfilnd_peer_down(struct kfilnd_peer *kp) /** * kfilnd_peer_tn_failed() - A transaction with this peer has failed. Mark the - * peer as either stale or down depending on the provided error value. - * @kp: The peer to be marked down or stale + * peer as either stale or down depending on the provided error value. If + * @delete is true we also delete the peer from the cache. + * @kp: The peer to be marked down, stale, or deleted. * @error: An errno indicating why the transaction failed. + * @delete: Whether to delete the peer * Note: We currently only consider EHOSTUNREACH which corresponds to * C_RC_UNDELIVERABLE, and ENOTCONN which corresponds to C_RC_VNI_NOT_FOUND. */ -void kfilnd_peer_tn_failed(struct kfilnd_peer *kp, int error) +void kfilnd_peer_tn_failed(struct kfilnd_peer *kp, int error, bool delete) { if (error == -EHOSTUNREACH || error == -ENOTCONN) kfilnd_peer_down(kp); else kfilnd_peer_stale(kp); -} - -/** - * kfilnd_peer_del() - Mark a peer for deletion - * @kp: Peer to be deleted - */ -void kfilnd_peer_del(struct kfilnd_peer *kp) -{ - if (atomic_cmpxchg(&kp->kp_remove_peer, 0, 1) == 0) { - struct lnet_nid peer_nid; - lnet_nid4_to_nid(kp->kp_nid, &peer_nid); - CDEBUG(D_NET, "%s(%p):0x%llx marked for removal from peer cache\n", - libcfs_nidstr(&peer_nid), kp, kp->kp_addr); - - lnet_notify(kp->kp_dev->kfd_ni, &peer_nid, false, false, - kp->kp_last_alive); - } + if (delete) + kfilnd_peer_del(kp); } /** diff --git a/lnet/klnds/kfilnd/kfilnd_peer.h b/lnet/klnds/kfilnd/kfilnd_peer.h index 963d802..a723c6a 100644 --- a/lnet/klnds/kfilnd/kfilnd_peer.h +++ b/lnet/klnds/kfilnd/kfilnd_peer.h @@ -34,7 +34,6 @@ #include "kfilnd.h" -void kfilnd_peer_del(struct kfilnd_peer *kp); void kfilnd_peer_put(struct kfilnd_peer *kp); struct kfilnd_peer *kfilnd_peer_get(struct kfilnd_dev *dev, lnet_nid_t nid); void kfilnd_peer_alive(struct kfilnd_peer *kp); @@ -43,6 +42,6 @@ void kfilnd_peer_init(struct kfilnd_dev *dev); kfi_addr_t kfilnd_peer_get_kfi_addr(struct kfilnd_peer *kp); u16 kfilnd_peer_target_rx_base(struct kfilnd_peer *kp); void kfilnd_peer_process_hello(struct kfilnd_peer *kp, struct kfilnd_msg *msg); -void kfilnd_peer_tn_failed(struct kfilnd_peer *kp, int error); +void kfilnd_peer_tn_failed(struct kfilnd_peer *kp, int error, bool delete); #endif /* _KFILND_PEER_ */ diff --git a/lnet/klnds/kfilnd/kfilnd_tn.c b/lnet/klnds/kfilnd/kfilnd_tn.c index 0b802fa..243ca29 100644 --- a/lnet/klnds/kfilnd/kfilnd_tn.c +++ b/lnet/klnds/kfilnd/kfilnd_tn.c @@ -919,7 +919,10 @@ static int kfilnd_tn_state_imm_send(struct kfilnd_transaction *tn, hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, status, hstatus); - kfilnd_peer_tn_failed(tn->tn_kp, status); + /* RKEY is not involved in immediate sends, so no need to + * delete peer + */ + kfilnd_peer_tn_failed(tn->tn_kp, status, false); if (tn->msg_type == KFILND_MSG_HELLO_REQ) kfilnd_peer_clear_hello_pending(tn->tn_kp); break; @@ -1095,7 +1098,8 @@ static int kfilnd_tn_state_wait_comp(struct kfilnd_transaction *tn, CFS_FAIL_CHECK(CFS_KFI_FAIL_WAIT_SEND_COMP3)) { hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, -EIO, hstatus); - kfilnd_peer_tn_failed(tn->tn_kp, -EIO); + /* Don't delete peer on debug/test path */ + kfilnd_peer_tn_failed(tn->tn_kp, -EIO, false); kfilnd_tn_state_change(tn, TN_STATE_FAIL); break; } @@ -1128,7 +1132,15 @@ static int kfilnd_tn_state_wait_comp(struct kfilnd_transaction *tn, hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, status, hstatus); - kfilnd_peer_tn_failed(tn->tn_kp, status); + /* The bulk request message failed, however, there is an edge + * case where the last request packet of a message is received + * at the target successfully, but the corresponding response + * packet is repeatedly dropped. This results in the target + * generating a success completion event but the initiator + * generating an error completion event. Due to this, we have to + * delete the peer here to protect the RKEY. + */ + kfilnd_peer_tn_failed(tn->tn_kp, status, true); /* Need to cancel the tagged receive to prevent resources from * being leaked. @@ -1162,6 +1174,10 @@ static int kfilnd_tn_state_wait_comp(struct kfilnd_transaction *tn, case TN_EVENT_TAG_RX_FAIL: kfilnd_tn_status_update(tn, status, LNET_MSG_STATUS_LOCAL_ERROR); + /* The target may hold a reference to the RKEY, so we need to + * delete the peer to protect it + */ + kfilnd_peer_tn_failed(tn->tn_kp, status, true); kfilnd_tn_state_change(tn, TN_STATE_FAIL); break; @@ -1187,7 +1203,10 @@ static int kfilnd_tn_state_wait_send_comp(struct kfilnd_transaction *tn, case TN_EVENT_TX_FAIL: kfilnd_tn_status_update(tn, status, LNET_MSG_STATUS_NETWORK_TIMEOUT); - kfilnd_peer_tn_failed(tn->tn_kp, status); + /* The bulk request message was never queued so we do not need + * to delete the peer + */ + kfilnd_peer_tn_failed(tn->tn_kp, status, false); break; default: KFILND_TN_ERROR(tn, "Invalid %s event", tn_event_to_str(event)); @@ -1220,7 +1239,11 @@ static int kfilnd_tn_state_wait_tag_rma_comp(struct kfilnd_transaction *tn, hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, status, hstatus); - kfilnd_peer_tn_failed(tn->tn_kp, status); + /* This event occurrs at the target of a bulk LNetPut/Get. + * Since the target did not generate the RKEY, we needn't + * delete the peer. + */ + kfilnd_peer_tn_failed(tn->tn_kp, status, false); break; default: @@ -1301,7 +1324,11 @@ static int kfilnd_tn_state_wait_tag_comp(struct kfilnd_transaction *tn, hstatus = LNET_MSG_STATUS_REMOTE_ERROR; kfilnd_tn_status_update(tn, status, hstatus); - kfilnd_peer_tn_failed(tn->tn_kp, status); + /* This event occurrs at the target of a bulk LNetPut/Get. + * Since the target did not generate the RKEY, we needn't + * delete the peer. + */ + kfilnd_peer_tn_failed(tn->tn_kp, status, false); break; case TN_EVENT_TAG_TX_OK: @@ -1327,7 +1354,8 @@ static int kfilnd_tn_state_fail(struct kfilnd_transaction *tn, switch (event) { case TN_EVENT_TX_FAIL: - kfilnd_peer_tn_failed(tn->tn_kp, status); + /* Prior TN states will have deleted the peer if necessary */ + kfilnd_peer_tn_failed(tn->tn_kp, status, false); break; case TN_EVENT_TX_OK: @@ -1373,12 +1401,22 @@ static int kfilnd_tn_state_wait_timeout_tag_comp(struct kfilnd_transaction *tn, case TN_EVENT_TAG_RX_CANCEL: kfilnd_tn_status_update(tn, -ETIMEDOUT, LNET_MSG_STATUS_NETWORK_TIMEOUT); - kfilnd_peer_tn_failed(tn->tn_kp, -ETIMEDOUT); + /* We've cancelled locally, but the target may still have a ref + * on the RKEY. Delete the peer to protect it. + */ + kfilnd_peer_tn_failed(tn->tn_kp, -ETIMEDOUT, true); break; case TN_EVENT_TAG_RX_FAIL: kfilnd_tn_status_update(tn, status, LNET_MSG_STATUS_LOCAL_ERROR); + /* The initiator of a bulk LNetPut/Get eagerly sends the bulk + * request message to the target without ensuring the tagged + * receive buffer is posted. Thus, the target could be issuing + * kfi_write/read operations using the tagged receive buffer + * RKEY, and we need to delete this peer to protect the it. + */ + kfilnd_peer_tn_failed(tn->tn_kp, status, true); break; case TN_EVENT_TAG_RX_OK: -- 1.8.3.1