From: Serguei Smirnov Date: Fri, 23 Sep 2022 19:29:59 +0000 (-0700) Subject: LU-16184 o2iblnd: fix deadline for tx on peer queue X-Git-Tag: 2.15.53~167 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=4c89ee7d7b098c7f1e6566f49fa2940db577518d;p=fs%2Flustre-release.git LU-16184 o2iblnd: fix deadline for tx on peer queue In o2iblnd, deadline is checked for txs on peer queue, but not set prior to adding the tx to the queue. This may cause the tx to be dropped unnecessarily with "Timed out tx for ..." warning. Fix it by setting the tx_deadline when adding tx to peer queue. Test-Parameters: trivial Signed-off-by: Serguei Smirnov Change-Id: Ie7cf5590b440b60f71527049953a64bb31d53578 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48640 Reviewed-by: Cyril Bordage Reviewed-by: Frank Sehr Reviewed-by: Amir Shehata Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 9c2b574..a18b1b78 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1529,6 +1529,7 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) int rc; int i; struct lnet_ioctl_config_o2iblnd_tunables *tunables; + s64 timeout_ns; /* If I get here, I've committed to send, so I complete the tx with * failure on any problems @@ -1556,6 +1557,7 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) return; } + timeout_ns = kiblnd_timeout() * NSEC_PER_SEC; read_unlock(g_lock); /* Re-try with a write lock */ write_lock(g_lock); @@ -1565,9 +1567,12 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) if (list_empty(&peer_ni->ibp_conns)) { /* found a peer_ni, but it's still connecting... */ LASSERT(kiblnd_peer_connecting(peer_ni)); - if (tx != NULL) + if (tx != NULL) { + tx->tx_deadline = ktime_add_ns(ktime_get(), + timeout_ns); list_add_tail(&tx->tx_list, &peer_ni->ibp_tx_queue); + } write_unlock_irqrestore(g_lock, flags); } else { conn = kiblnd_get_conn_locked(peer_ni); @@ -1604,9 +1609,12 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) if (list_empty(&peer2->ibp_conns)) { /* found a peer_ni, but it's still connecting... */ LASSERT(kiblnd_peer_connecting(peer2)); - if (tx != NULL) + if (tx != NULL) { + tx->tx_deadline = ktime_add_ns(ktime_get(), + timeout_ns); list_add_tail(&tx->tx_list, &peer2->ibp_tx_queue); + } write_unlock_irqrestore(g_lock, flags); } else { conn = kiblnd_get_conn_locked(peer2); @@ -1631,8 +1639,10 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) /* always called with a ref on ni, which prevents ni being shutdown */ LASSERT(((struct kib_net *)ni->ni_data)->ibn_shutdown == 0); - if (tx != NULL) + if (tx != NULL) { + tx->tx_deadline = ktime_add_ns(ktime_get(), timeout_ns); list_add_tail(&tx->tx_list, &peer_ni->ibp_tx_queue); + } kiblnd_peer_addref(peer_ni); hash_add(kiblnd_data.kib_peers, &peer_ni->ibp_list, nid);