From 21e6b6edbc95eef46c241335147818f1df871fa5 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Mon, 28 Nov 2016 15:47:23 +0800 Subject: [PATCH] LU-8867 ksocklnd: ignore timedout TX on closing connection ksocklnd reaper thread always tries to close the connection for the first timedout zero-copy TX. This is wrong if this connection is already being closed, because the reaper will see the same TX again and again and cannot find out other timedout zero-copy TXs and close connections for them. Change-Id: I4552f556f17910857e589a8ae27682d976fcd991 Signed-off-by: Liang Zhen Reviewed-on: https://review.whamcloud.com/23973 Tested-by: Jenkins Reviewed-by: Doug Oucharek Tested-by: Maloo Reviewed-by: Oleg Drokin --- lnet/klnds/socklnd/socklnd_cb.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index a813cdd..3b8fa74 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -2422,9 +2422,10 @@ ksocknal_check_peer_timeouts (int idx) read_lock(&ksocknal_data.ksnd_global_lock); list_for_each_entry(peer, peers, ksnp_list) { - cfs_time_t deadline = 0; - int resid = 0; - int n = 0; + ksock_tx_t *tx_stale; + cfs_time_t deadline = 0; + int resid = 0; + int n = 0; if (ksocknal_send_keepalive_locked(peer) != 0) { read_unlock(&ksocknal_data.ksnd_global_lock); @@ -2468,6 +2469,7 @@ ksocknal_check_peer_timeouts (int idx) if (list_empty(&peer->ksnp_zc_req_list)) continue; + tx_stale = NULL; spin_lock(&peer->ksnp_lock); list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) { if (!cfs_time_aftereq(cfs_time_current(), @@ -2477,28 +2479,28 @@ ksocknal_check_peer_timeouts (int idx) if (tx->tx_conn->ksnc_closing) continue; n++; + if (tx_stale == NULL) + tx_stale = tx; } - if (n == 0) { + if (tx_stale == NULL) { spin_unlock(&peer->ksnp_lock); - continue; - } + continue; + } - tx = list_entry(peer->ksnp_zc_req_list.next, - ksock_tx_t, tx_zc_list); - deadline = tx->tx_deadline; - resid = tx->tx_resid; - conn = tx->tx_conn; - ksocknal_conn_addref(conn); + deadline = tx_stale->tx_deadline; + resid = tx_stale->tx_resid; + conn = tx_stale->tx_conn; + ksocknal_conn_addref(conn); spin_unlock(&peer->ksnp_lock); read_unlock(&ksocknal_data.ksnd_global_lock); - CERROR("Total %d stale ZC_REQs for peer %s detected; the " - "oldest(%p) timed out %ld secs ago, " - "resid: %d, wmem: %d\n", - n, libcfs_nid2str(peer->ksnp_id.nid), tx, - cfs_duration_sec(cfs_time_current() - deadline), + CERROR("Total %d stale ZC_REQs for peer %s detected; the " + "oldest(%p) timed out %ld secs ago, " + "resid: %d, wmem: %d\n", + n, libcfs_nid2str(peer->ksnp_id.nid), tx_stale, + cfs_duration_sec(cfs_time_current() - deadline), resid, conn->ksnc_sock->sk->sk_wmem_queued); ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); -- 1.8.3.1