/* if we NETERROR, make sure it is rate limited */
if (!kgnilnd_conn_clean_errno(error) &&
- peer->gnp_down == GNILND_RCA_NODE_UP) {
+ peer->gnp_state != GNILND_PEER_DOWN) {
CNETERR("closing conn to %s: error %d\n",
libcfs_nid2str(peer->gnp_nid), error);
} else {
logmsg = (nlive + nrdma + nq_rdma);
if (logmsg) {
- if (conn->gnc_peer->gnp_down == GNILND_RCA_NODE_UP) {
- CNETERR("Closed conn 0x%p->%s (errno %d, peer errno %d): "
- "canceled %d TX, %d/%d RDMA\n",
- conn, libcfs_nid2str(conn->gnc_peer->gnp_nid),
- conn->gnc_error, conn->gnc_peer_error,
- nlive, nq_rdma, nrdma);
- } else {
- CDEBUG(D_NET, "Closed conn 0x%p->%s (errno %d,"
- " peer errno %d): canceled %d TX, %d/%d RDMA\n",
- conn, libcfs_nid2str(conn->gnc_peer->gnp_nid),
- conn->gnc_error, conn->gnc_peer_error,
- nlive, nq_rdma, nrdma);
- }
+ int level = conn->gnc_peer->gnp_state == GNILND_PEER_UP ?
+ D_NETERROR : D_NET;
+ CDEBUG(level, "Closed conn 0x%p->%s (errno %d,"
+ " peer errno %d): canceled %d TX, %d/%d RDMA\n",
+ conn, libcfs_nid2str(conn->gnc_peer->gnp_nid),
+ conn->gnc_error, conn->gnc_peer_error,
+ nlive, nq_rdma, nrdma);
}
kgnilnd_destroy_conn_ep(conn);
return -ENOMEM;
}
peer->gnp_nid = nid;
- peer->gnp_down = node_state;
+ peer->gnp_state = node_state;
/* translate from nid to nic addr & store */
rc = kgnilnd_nid_to_nicaddrs(LNET_NIDADDR(nid), 1, &peer->gnp_host_id);
write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
/* Don't add a peer for node up events */
- if (down == GNILND_RCA_NODE_UP) {
+ if (down == GNILND_PEER_UP)
return 0;
- }
/* find any valid net - we don't care which one... */
down_read(&kgnilnd_data.kgn_net_rw_sem);
}
}
- peer->gnp_down = down;
+ peer->gnp_state = down;
- if (down == GNILND_RCA_NODE_DOWN) {
+ if (down == GNILND_PEER_DOWN) {
kgn_conn_t *conn;
peer->gnp_down_event_time = jiffies;
write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
- if (down == GNILND_RCA_NODE_DOWN) {
+ if (down == GNILND_PEER_DOWN) {
/* using ENETRESET so we don't get messages from
* kgnilnd_tx_done
*/
#define GNILND_DEL_PEER 1
#define GNILND_CLEAR_PURGATORY 2
-#define GNILND_RCA_NODE_UP 0
-#define GNILND_RCA_NODE_DOWN 1
-#define GNILND_RCA_NODE_UNKNOWN 2
+#define GNILND_PEER_UP 0
+#define GNILND_PEER_DOWN 1
+#define GNILND_PEER_TIMED_OUT 2
+#define GNILND_PEER_UNKNOWN 3
/* defines for reverse RDMA states */
#define GNILND_REVERSE_NONE 0
int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */
int *kgn_reg_fail_timeout; /* registration failure timeout */
int *kgn_thread_affinity; /* bind scheduler threads to cpus */
+ int *kgn_to_reconn_disable;/* disable reconnect after timeout */
int *kgn_thread_safe; /* use thread safe kgni API */
} kgn_tunables_t;
unsigned long gnp_reconnect_time; /* get_seconds() when reconnect OK */
unsigned long gnp_reconnect_interval; /* exponential backoff */
atomic_t gnp_dirty_eps; /* # of old but yet to be destroyed EPs from conns */
- int gnp_down; /* rca says peer down */
+ int gnp_state; /* up/down/timedout */
unsigned long gnp_down_event_time; /* time peer down */
unsigned long gnp_up_event_time; /* time peer back up */
} kgn_peer_t;
}
/* don't create a connection if the peer is marked down */
- if (peer->gnp_down == GNILND_RCA_NODE_DOWN) {
+ if (peer->gnp_state != GNILND_PEER_UP) {
read_unlock(&kgnilnd_data.kgn_peer_conn_lock);
rc = -ENETRESET;
GOTO(no_peer, rc);
kgnilnd_add_peer_locked(target->nid, new_peer, &peer);
/* don't create a connection if the peer is not up */
- if (peer->gnp_down != GNILND_RCA_NODE_UP) {
+ if (peer->gnp_state != GNILND_PEER_UP) {
write_unlock(&kgnilnd_data.kgn_peer_conn_lock);
rc = -ENETRESET;
GOTO(no_peer, rc);
if (time_after_eq(now, newest_last_rx + timeout)) {
uint32_t level = D_CONSOLE|D_NETERROR;
- if (conn->gnc_peer->gnp_down == GNILND_RCA_NODE_DOWN) {
+ if (conn->gnc_peer->gnp_state == GNILND_PEER_DOWN) {
level = D_NET;
}
GNIDBG_CONN(level, conn,
conn->gnc_close_recvd = GNILND_CLOSE_INJECT1;
conn->gnc_peer_error = -ETIMEDOUT;
}
+
+ if (*kgnilnd_tunables.kgn_to_reconn_disable &&
+ rc == -ETIMEDOUT) {
+ peer->gnp_state = GNILND_PEER_TIMED_OUT;
+ CDEBUG(D_WARNING, "%s conn timed out, will "
+ "reconnect upon request from peer\n",
+ libcfs_nid2str(conn->gnc_peer->gnp_nid));
+ }
/* Once we mark closed, any of the scheduler threads could
* get it and move through before we hit the fail loc code */
kgnilnd_close_conn_locked(conn, rc);
/* Don't reconnect if we are still trying to clear out old conns.
* This prevents us sending traffic on the new mbox before ensuring we are done
* with the old one */
- reconnect = (peer->gnp_down == GNILND_RCA_NODE_UP) &&
+ reconnect = (peer->gnp_state == GNILND_PEER_UP) &&
(atomic_read(&peer->gnp_dirty_eps) == 0);
/* fast reconnect after a timeout */
/* assume this is a new peer - it makes locking cleaner when it isn't */
/* no holding kgn_net_rw_sem - already are at the kgnilnd_dgram_mover level */
- rc = kgnilnd_create_peer_safe(&new_peer, her_nid, NULL, GNILND_RCA_NODE_UP);
+ rc = kgnilnd_create_peer_safe(&new_peer, her_nid, NULL, GNILND_PEER_UP);
if (rc != 0) {
CERROR("Can't create peer for %s\n", libcfs_nid2str(her_nid));
return rc;
}
}
- if (peer->gnp_down == GNILND_RCA_NODE_DOWN) {
+ if (peer->gnp_state == GNILND_PEER_DOWN) {
CNETERR("Received connection request from down nid %s\n",
libcfs_nid2str(her_nid));
- peer->gnp_down = GNILND_RCA_NODE_UP;
}
+ peer->gnp_state = GNILND_PEER_UP;
nstale = kgnilnd_close_stale_conns_locked(peer, conn);
/* either way with peer (new or existing), we are ok with ref counts here as the
module_param(reg_fail_timeout, int, 0644);
MODULE_PARM_DESC(reg_fail_timeout, "fmablk registration timeout LBUG");
+static int to_reconn_disable;
+module_param(to_reconn_disable, int, 0644);
+MODULE_PARM_DESC(to_reconn_disable,
+ "Timed out connection waits for peer before reconnecting");
+
kgn_tunables_t kgnilnd_tunables = {
.kgn_min_reconnect_interval = &min_reconnect_interval,
.kgn_max_reconnect_interval = &max_reconnect_interval,
.kgn_thread_affinity = &thread_affinity,
.kgn_thread_safe = &thread_safe,
.kgn_reg_fail_timeout = ®_fail_timeout,
+ .kgn_to_reconn_disable = &to_reconn_disable,
.kgn_max_purgatory = &max_conn_purg
};
read_unlock(&kgnilnd_data.kgn_peer_conn_lock);
- seq_printf(s, "%p->%s [%d] %s NIC 0x%x q %d conn %c purg %d "
- "last %d@%dms dgram %d@%dms "
- "reconn %dms to %lus \n",
+ seq_printf(s, "%p->%s [%d] %s NIC 0x%x q %d conn %c purg %d last %d@%dms dgram %d@%dms reconn %dms to %lus \n",
peer, libcfs_nid2str(peer->gnp_nid),
atomic_read(&peer->gnp_refcount),
- (peer->gnp_down == GNILND_RCA_NODE_DOWN) ? "down" : "up",
+ (peer->gnp_state == GNILND_PEER_DOWN) ? "down" :
+ peer->gnp_state == GNILND_PEER_TIMED_OUT ? "timedout" : "up",
peer->gnp_host_id,
kgnilnd_count_list(&peer->gnp_tx_queue),
conn_str,
}
if (krca_get_message(&rca_krt, &event) == 0) {
- int node_down = GNILND_RCA_NODE_UNKNOWN;
+ int node_down = GNILND_PEER_UNKNOWN;
rs_state_t state;
LIST_HEAD(zombies);
switch (event.ev_id) {
case ec_node_available:
CDEBUG(D_INFO, "ec_node_available\n");
- node_down = GNILND_RCA_NODE_UP;
+ node_down = GNILND_PEER_UP;
break;
case ec_node_failed:
CDEBUG(D_INFO, "ec_node_failed\n");
"ec_node_failed ignored\n");
break;
}
- node_down = GNILND_RCA_NODE_DOWN;
+ node_down = GNILND_PEER_DOWN;
break;
case ec_node_unavailable:
state = RSN_GET_FLD(event.ev_gen.svid_node.rsn_intval, STATE);
" RS_CS_READY state\n");
break;
}
- node_down = GNILND_RCA_NODE_DOWN;
+ node_down = GNILND_PEER_DOWN;
break;
default:
CDEBUG(D_INFO, "unknown event\n");
/* if we get an event we don't know about, just go ahead
* and wait for another event */
- if (node_down == GNILND_RCA_NODE_UNKNOWN) {
+ if (node_down == GNILND_PEER_UNKNOWN)
continue;
- }
nid = RSN_GET_FLD(event.ev_gen.svid_node.rs_node_flat,
NID);
kgnilnd_get_node_state(__u32 nid)
{
int i;
- int rc = GNILND_RCA_NODE_UNKNOWN;
+ int rc = GNILND_PEER_UNKNOWN;
int ret;
rs_node_array_t nlist;
rs_node_t *na = NULL;
for (i = 0; i < nlist.na_len; i++) {
if ((rca_nid_t)RSN_GET_FLD(na[i].rs_node_flat, NID) == nid) {
rc = RSN_GET_FLD(na[i].rs_node_flat, STATE) == RS_CS_READY ?
- GNILND_RCA_NODE_UP : GNILND_RCA_NODE_DOWN;
+ GNILND_PEER_UP : GNILND_PEER_DOWN;
break;
}
}
int
kgnilnd_get_node_state(__u32 nid)
{
- return GNILND_RCA_NODE_UP;
+ return GNILND_PEER_UP;
}
#endif /* GNILND_USE_RCA */