Sometimes connection can't be established for a long time
due to rejections and produces cycle of reconnections.
Peer is not removed in each iteration unlike connection.
Thus until connection becomes established txs live in
peer->ibp_tx_queue. This patch adds tx_deadline checking
for txs from peer tx_queue.
Change-Id: Id2623285c735d1dff40ec755a5c8d20e9c62e60a
Signed-off-by: Sergey Cheremencev <sergey.cheremencev@seagate.com>
Seagate-bug-id: MRP-4056
Reviewed-on: https://review.whamcloud.com/25376
Tested-by: Jenkins
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: Amir Shehata <amir.shehata@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
{
struct list_head closes = LIST_HEAD_INIT(closes);
struct list_head checksends = LIST_HEAD_INIT(checksends);
{
struct list_head closes = LIST_HEAD_INIT(closes);
struct list_head checksends = LIST_HEAD_INIT(checksends);
+ struct list_head timedout_txs = LIST_HEAD_INIT(timedout_txs);
struct list_head *peers = &kiblnd_data.kib_peers[idx];
struct list_head *ptmp;
kib_peer_ni_t *peer_ni;
kib_conn_t *conn;
struct list_head *peers = &kiblnd_data.kib_peers[idx];
struct list_head *ptmp;
kib_peer_ni_t *peer_ni;
kib_conn_t *conn;
struct list_head *ctmp;
unsigned long flags;
/* NB. We expect to have a look at all the peers and not find any
* RDMAs to time out, so we just use a shared lock while we
* take a look... */
struct list_head *ctmp;
unsigned long flags;
/* NB. We expect to have a look at all the peers and not find any
* RDMAs to time out, so we just use a shared lock while we
* take a look... */
- read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
+ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
list_for_each(ptmp, peers) {
peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list);
list_for_each(ptmp, peers) {
peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list);
+ /* Check tx_deadline */
+ list_for_each_entry_safe(tx, tx_tmp, &peer_ni->ibp_tx_queue, tx_list) {
+ if (cfs_time_aftereq(jiffies, tx->tx_deadline)) {
+ CWARN("Timed out tx for %s: %lu seconds\n",
+ libcfs_nid2str(peer_ni->ibp_nid),
+ cfs_duration_sec(jiffies - tx->tx_deadline));
+ list_move(&tx->tx_list, &timedout_txs);
+ }
+ }
+
list_for_each(ctmp, &peer_ni->ibp_conns) {
int timedout;
int sendnoop;
list_for_each(ctmp, &peer_ni->ibp_conns) {
int timedout;
int sendnoop;
- read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+ write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ if (!list_empty(&timedout_txs))
+ kiblnd_txlist_done(&timedout_txs, -ETIMEDOUT);
/* Handle timeout by closing the whole
* connection. We can only be sure RDMA activity
/* Handle timeout by closing the whole
* connection. We can only be sure RDMA activity