- shared routing enhancements: peer health detection.
Description:
Details :
+Severity : normal
+Bugzilla : 16186
+Description: One down Lustre FS hangs ALL mounted Lustre filesystems
+Details : Shared routing enhancements - peer health detection.
+
Severity : enhancement
Bugzilla : 14132
Description: acceptor.c cleanup
}
int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, time_t when);
+void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, time_t when);
int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid);
int lnet_check_routes(void);
int lnet_del_route(__u32 net, lnet_nid_t gw_nid);
/* notification of peer health */
void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
+ /* query of peer aliveness */
+ void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, time_t *when);
+
#if defined(__KERNEL__) || defined(HAVE_LIBPTHREAD)
/* accept a new connection */
int (*lnd_accept)(struct lnet_ni *ni, cfs_socket_t *sock);
int ni_txcredits; /* # tx credits free */
int ni_mintxcredits; /* lowest it's been */
int ni_peertxcredits; /* # per-peer send credits */
+ int ni_peertimeout; /* seconds to consider peer dead */
lnet_nid_t ni_nid; /* interface's NID */
void *ni_data; /* instance-specific data */
lnd_t *ni_lnd; /* procedural interface */
int lp_alive_count; /* # times router went dead<->alive */
long lp_txqnob; /* bytes queued for sending */
time_t lp_timestamp; /* time of last aliveness news */
+ time_t lp_last_alive; /* when I was last alive */
time_t lp_ping_timestamp; /* time of last ping attempt */
time_t lp_ping_deadline; /* != 0 if ping reply expected */
lnet_ni_t *lp_ni; /* interface peer is on */
.lnd_startup = kiblnd_startup,
.lnd_shutdown = kiblnd_shutdown,
.lnd_ctl = kiblnd_ctl,
+ .lnd_query = kiblnd_query,
.lnd_send = kiblnd_send,
.lnd_recv = kiblnd_recv,
};
peer->ibp_ni = ni;
peer->ibp_nid = nid;
peer->ibp_error = 0;
- peer->ibp_last_alive = cfs_time_current();
+ peer->ibp_last_alive = 0;
atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */
INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */
}
void
+kiblnd_query (lnet_ni_t *ni, lnet_nid_t nid, time_t *when)
+{
+ cfs_time_t last_alive = 0;
+ rwlock_t *glock = &kiblnd_data.kib_global_lock;
+ kib_peer_t *peer;
+ unsigned long flags;
+
+ read_lock_irqsave(glock, flags);
+
+ peer = kiblnd_find_peer_locked(nid);
+ if (peer != NULL) {
+ LASSERT (peer->ibp_connecting > 0 || /* creating conns */
+ peer->ibp_accepting > 0 ||
+ !list_empty(&peer->ibp_conns)); /* active conn */
+ last_alive = peer->ibp_last_alive;
+ }
+
+ read_unlock_irqrestore(glock, flags);
+
+ if (last_alive != 0)
+ *when = cfs_time_current_sec() -
+ cfs_duration_sec(cfs_time_current() - last_alive);
+
+ /* peer is not persistent in hash, trigger peer creation
+ * and connection establishment with a NULL tx */
+ if (peer == NULL)
+ kiblnd_launch_tx(ni, NULL, nid);
+ return;
+}
+
+void
kiblnd_free_pages (kib_pages_t *p)
{
int npages = p->ibp_npages;
ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits;
ni->ni_peertxcredits = *kiblnd_tunables.kib_peercredits;
+ ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout;
spin_lock_init(&net->ibn_tx_lock);
INIT_LIST_HEAD(&net->ibn_idle_txs);
int *kib_ntx; /* # tx descs */
int *kib_credits; /* # concurrent sends */
int *kib_peercredits; /* # concurrent sends to 1 peer */
+ int *kib_peertimeout; /* seconds to consider peer dead */
char **kib_default_ipif; /* default IPoIB interface */
int *kib_retry_count;
int *kib_rnr_retry_count;
int kiblnd_startup (lnet_ni_t *ni);
void kiblnd_shutdown (lnet_ni_t *ni);
int kiblnd_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg);
+void kiblnd_query (struct lnet_ni *ni, lnet_nid_t nid, time_t *when);
int kiblnd_tunables_init(void);
void kiblnd_tunables_fini(void);
int kiblnd_init_rdma (lnet_ni_t *ni, kib_tx_t *tx, int type,
int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie);
+void kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid);
void kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn);
void kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn);
void kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob);
/* If I get here, I've committed to send, so I complete the tx with
* failure on any problems */
- LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */
- LASSERT (tx->tx_nwrq > 0); /* work items have been set up */
+ LASSERT (tx == NULL || tx->tx_conn == NULL); /* only set when assigned a conn */
+ LASSERT (tx == NULL || tx->tx_nwrq > 0); /* work items have been set up */
/* First time, just use a read lock since I expect to find my peer
* connected */
read_unlock_irqrestore(g_lock, flags);
- kiblnd_queue_tx(tx, conn);
+ if (tx != NULL)
+ kiblnd_queue_tx(tx, conn);
kiblnd_conn_decref(conn); /* ...to here */
return;
}
/* found a peer, but it's still connecting... */
LASSERT (peer->ibp_connecting != 0 ||
peer->ibp_accepting != 0);
- list_add_tail (&tx->tx_list, &peer->ibp_tx_queue);
+ if (tx != NULL)
+ list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
write_unlock_irqrestore(g_lock, flags);
} else {
conn = kiblnd_get_conn_locked(peer);
kiblnd_conn_addref(conn); /* 1 ref for me... */
write_unlock_irqrestore(g_lock, flags);
-
- kiblnd_queue_tx(tx, conn);
+
+ if (tx != NULL)
+ kiblnd_queue_tx(tx, conn);
kiblnd_conn_decref(conn); /* ...to here */
}
return;
rc = kiblnd_create_peer(ni, &peer, nid);
if (rc != 0) {
CERROR("Can't create peer %s\n", libcfs_nid2str(nid));
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kiblnd_tx_done(ni, tx);
+ if (tx != NULL) {
+ tx->tx_status = -EHOSTUNREACH;
+ tx->tx_waiting = 0;
+ kiblnd_tx_done(ni, tx);
+ }
return;
}
/* found a peer, but it's still connecting... */
LASSERT (peer2->ibp_connecting != 0 ||
peer2->ibp_accepting != 0);
- list_add_tail (&tx->tx_list, &peer2->ibp_tx_queue);
+ if (tx != NULL)
+ list_add_tail(&tx->tx_list, &peer2->ibp_tx_queue);
write_unlock_irqrestore(g_lock, flags);
} else {
conn = kiblnd_get_conn_locked(peer2);
kiblnd_conn_addref(conn); /* 1 ref for me... */
write_unlock_irqrestore(g_lock, flags);
-
- kiblnd_queue_tx(tx, conn);
+
+ if (tx != NULL)
+ kiblnd_queue_tx(tx, conn);
kiblnd_conn_decref(conn); /* ...to here */
}
/* always called with a ref on ni, which prevents ni being shutdown */
LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0);
- list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
+ if (tx != NULL)
+ list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
kiblnd_peer_addref(peer);
list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
CFS_MODULE_PARM(peer_credits, "i", int, 0444,
"# concurrent sends to 1 peer");
+static int peer_timeout = 0;
+CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
+ "Seconds without aliveness news to declare peer dead (<=0 to disable)");
+
static char *ipif_name = "ib0";
CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
"IPoIB interface name");
.kib_ntx = &ntx,
.kib_credits = &credits,
.kib_peercredits = &peer_credits,
+ .kib_peertimeout = &peer_timeout,
.kib_default_ipif = &ipif_name,
.kib_retry_count = &retry_count,
.kib_rnr_retry_count = &rnr_retry_count,
O2IBLND_NTX,
O2IBLND_CREDITS,
O2IBLND_PEER_CREDITS,
+ O2IBLND_PEER_TIMEOUT,
O2IBLND_IPIF_BASENAME,
O2IBLND_RETRY_COUNT,
O2IBLND_RNR_RETRY_COUNT,
#define O2IBLND_NTX CTL_UNNUMBERED
#define O2IBLND_CREDITS CTL_UNNUMBERED
#define O2IBLND_PEER_CREDITS CTL_UNNUMBERED
+#define O2IBLND_PEER_TIMEOUT CTL_UNNUMBERED
#define O2IBLND_IPIF_BASENAME CTL_UNNUMBERED
#define O2IBLND_RETRY_COUNT CTL_UNNUMBERED
#define O2IBLND_RNR_RETRY_COUNT CTL_UNNUMBERED
.proc_handler = &proc_dointvec
},
{
+ .ctl_name = O2IBLND_PEER_TIMEOUT,
+ .procname = "peer_timeout",
+ .data = &peer_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ },
+ {
.ctl_name = O2IBLND_IPIF_BASENAME,
.procname = "ipif_name",
.data = ipif_basename_space,
.lnd_startup = kptllnd_startup,
.lnd_shutdown = kptllnd_shutdown,
.lnd_ctl = kptllnd_ctl,
+ .lnd_query = kptllnd_query,
.lnd_send = kptllnd_send,
.lnd_recv = kptllnd_recv,
.lnd_eager_recv = kptllnd_eager_recv,
return rc;
}
+void
+kptllnd_query (lnet_ni_t *ni, lnet_nid_t nid, time_t *when)
+{
+ kptl_peer_t *peer = NULL;
+ lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID};
+ unsigned long flags;
+
+ /* NB: kptllnd_find_target connects to peer if necessary */
+ if (kptllnd_find_target(&peer, id) != 0)
+ return;
+
+ spin_lock_irqsave(&peer->peer_lock, flags);
+ if (peer->peer_last_alive != 0)
+ *when = cfs_time_current_sec() -
+ cfs_duration_sec(cfs_time_current() -
+ peer->peer_last_alive);
+ spin_unlock_irqrestore(&peer->peer_lock, flags);
+ kptllnd_peer_decref(peer);
+ return;
+}
+
int
kptllnd_startup (lnet_ni_t *ni)
{
int kptllnd_startup(lnet_ni_t *ni);
void kptllnd_shutdown(lnet_ni_t *ni);
int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
+void kptllnd_query (struct lnet_ni *ni, lnet_nid_t nid, time_t *when);
int kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int kptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
int delayed, unsigned int niov,
peer->peer_state = PEER_STATE_ALLOCATED;
peer->peer_error = 0;
- peer->peer_last_alive = cfs_time_current();
+ peer->peer_last_alive = 0;
peer->peer_id = lpid;
peer->peer_ptlid = ppid;
peer->peer_credits = 1; /* enough for HELLO */
peer->ksnp_closing = 0;
peer->ksnp_accepting = 0;
peer->ksnp_proto = NULL;
+ peer->ksnp_last_alive = 0;
peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
}
void
+ksocknal_query (lnet_ni_t *ni, lnet_nid_t nid, time_t *when)
+{
+ int connect = 1;
+ cfs_time_t last_alive = 0;
+ ksock_peer_t *peer = NULL;
+ rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
+ lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID};
+
+ read_lock(glock);
+
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL) {
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+ int bufnob;
+
+ list_for_each (tmp, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+ bufnob = libcfs_sock_wmem_queued(conn->ksnc_sock);
+
+ if (bufnob < conn->ksnc_tx_bufnob) {
+ /* something got ACKed */
+ conn->ksnc_tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_tx_bufnob = bufnob;
+ }
+ }
+
+ last_alive = peer->ksnp_last_alive;
+ if (ksocknal_find_connectable_route_locked(peer) == NULL)
+ connect = 0;
+ }
+
+ read_unlock(glock);
+
+ if (last_alive != 0)
+ *when = cfs_time_current_sec() -
+ cfs_duration_sec(cfs_time_current() - last_alive);
+
+ if (!connect)
+ return;
+
+ ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
+
+ write_lock_bh(glock);
+
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL)
+ ksocknal_launch_all_connections_locked(peer);
+
+ write_unlock_bh(glock);
+ return;
+}
+
+void
ksocknal_push_peer (ksock_peer_t *peer)
{
int index;
ni->ni_data = net;
ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits;
+ ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout;
if (ni->ni_interfaces[0] == NULL) {
rc = ksocknal_enumerate_interfaces(net);
the_ksocklnd.lnd_send = ksocknal_send;
the_ksocklnd.lnd_recv = ksocknal_recv;
the_ksocklnd.lnd_notify = ksocknal_notify;
+ the_ksocklnd.lnd_query = ksocknal_query;
the_ksocklnd.lnd_accept = ksocknal_accept;
rc = ksocknal_tunables_init();
int *ksnd_keepalive_intvl; /* time between probes */
int *ksnd_credits; /* # concurrent sends */
int *ksnd_peercredits; /* # concurrent sends to 1 peer */
+ int *ksnd_peertimeout; /* seconds to consider peer dead */
int *ksnd_enable_csum; /* enable check sum */
int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload size */
typedef struct ksock_peer
{
struct list_head ksnp_list; /* stash on global peer list */
+ cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */
lnet_process_id_t ksnp_id; /* who's on the other end(s) */
cfs_atomic_t ksnp_refcount; /* # users */
int ksnp_sharecount; /* lconf usage counter */
struct list_head ksnp_tx_queue; /* waiting packets */
cfs_spinlock_t ksnp_lock; /* serialize, NOT safe in g_lock */
struct list_head ksnp_zc_req_list; /* zero copy requests wait for ACK */
- cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */
cfs_time_t ksnp_send_keepalive; /* time to send keepalive */
lnet_ni_t *ksnp_ni; /* which network */
int ksnp_n_passive_ips; /* # of... */
extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error);
extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
+extern void ksocknal_query (struct lnet_ni *ni, lnet_nid_t nid, time_t *when);
extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg);
extern void ksocknal_thread_fini (void);
+extern void ksocknal_launch_all_connections_locked (ksock_peer_t *peer);
+extern ksock_route_t *ksocknal_find_connectable_route_locked (ksock_peer_t *peer);
extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer);
extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
extern int ksocknal_scheduler (void *arg);
cfs_spin_unlock_bh (&ksocknal_data.ksnd_connd_lock);
}
+void
+ksocknal_launch_all_connections_locked (ksock_peer_t *peer)
+{
+ ksock_route_t *route;
+
+ /* called holding write lock on ksnd_global_lock */
+ for (;;) {
+ /* launch any/all connections that need it */
+ route = ksocknal_find_connectable_route_locked(peer);
+ if (route == NULL)
+ return;
+
+ ksocknal_launch_connection_locked(route);
+ }
+}
+
ksock_conn_t *
ksocknal_find_conn_locked(ksock_peer_t *peer, ksock_tx_t *tx, int nonblk)
{
/* First packet starts the timeout */
conn->ksnc_tx_deadline =
cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ if (conn->ksnc_tx_bufnob > 0) /* something got ACKed */
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
conn->ksnc_tx_bufnob = 0;
cfs_mb(); /* order with adding to tx_queue */
}
{
ksock_peer_t *peer;
ksock_conn_t *conn;
- ksock_route_t *route;
cfs_rwlock_t *g_lock;
int retry;
int rc;
}
}
- for (;;) {
- /* launch any/all connections that need it */
- route = ksocknal_find_connectable_route_locked (peer);
- if (route == NULL)
- break;
-
- ksocknal_launch_connection_locked (route);
- }
+ ksocknal_launch_all_connections_locked(peer);
conn = ksocknal_find_conn_locked(peer, tx, tx->tx_nonblk);
if (conn != NULL) {
SOCKLND_TIMEOUT = 1,
SOCKLND_CREDITS,
SOCKLND_PEER_CREDITS,
+ SOCKLND_PEER_TIMEOUT,
SOCKLND_NCONNDS,
SOCKLND_RECONNECTS_MIN,
SOCKLND_RECONNECTS_MAX,
#define SOCKLND_TIMEOUT CTL_UNNUMBERED
#define SOCKLND_CREDITS CTL_UNNUMBERED
#define SOCKLND_PEER_CREDITS CTL_UNNUMBERED
+#define SOCKLND_PEER_TIMEOUT CTL_UNNUMBERED
#define SOCKLND_NCONNDS CTL_UNNUMBERED
#define SOCKLND_RECONNECTS_MIN CTL_UNNUMBERED
#define SOCKLND_RECONNECTS_MAX CTL_UNNUMBERED
.strategy = &sysctl_intvec,
},
{
+ .ctl_name = SOCKLND_PEER_TIMEOUT,
+ .procname = "peer_timeout",
+ .data = &ksocknal_tunables.ksnd_peertimeout,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec
+ .strategy = &sysctl_intvec,
+ },
+ {
.ctl_name = SOCKLND_NCONNDS,
.procname = "nconnds",
.data = &ksocknal_tunables.ksnd_nconnds,
CFS_MODULE_PARM(peer_credits, "i", int, 0444,
"# concurrent sends to 1 peer");
+static int peer_timeout = 0;
+CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
+ "Seconds without aliveness news to declare peer dead (<=0 to disable)");
+
static int nconnds = 4;
CFS_MODULE_PARM(nconnds, "i", int, 0444,
"# connection daemons");
ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl;
ksocknal_tunables.ksnd_credits = &credits;
ksocknal_tunables.ksnd_peercredits = &peer_credits;
+ ksocknal_tunables.ksnd_peertimeout = &peer_timeout;
ksocknal_tunables.ksnd_enable_csum = &enable_csum;
ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error;
ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload;
goto failed;
}
+ LASSERT (ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
+
list_del(&ni->ni_list);
LNET_LOCK();
ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits;
- CDEBUG(D_LNI, "Added LNI %s [%d/%d]\n",
+ CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d]\n",
libcfs_nid2str(ni->ni_nid),
- ni->ni_peertxcredits, ni->ni_txcredits);
+ ni->ni_peertxcredits, ni->ni_txcredits,
+ ni->ni_peertimeout);
nicount++;
}
return rc;
}
+/* NB: caller shall hold a ref on 'lp' as I'd drop LNET_LOCK */
+void
+lnet_ni_peer_alive(lnet_peer_t *lp)
+{
+ time_t last_alive = 0;
+ lnet_ni_t *ni = lp->lp_ni;
+
+ LASSERT (ni != NULL);
+ LASSERT (ni->ni_peertimeout > 0);
+ LASSERT (ni->ni_lnd->lnd_query != NULL);
+
+ LNET_UNLOCK();
+ (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
+ LNET_LOCK();
+
+ if (last_alive != 0) /* NI has updated timestamp */
+ lp->lp_last_alive = last_alive;
+ return;
+}
+
+/* NB: always called with LNET_LOCK held */
+static inline int
+lnet_peer_is_alive (lnet_peer_t *lp, time_t now)
+{
+ lnet_ni_t *ni = lp->lp_ni;
+ time_t deadline;
+ int alive;
+
+ LASSERT (ni != NULL);
+ LASSERT (ni->ni_peertimeout > 0);
+
+ if (!lp->lp_alive && lp->lp_alive_count > 0 &&
+ cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
+ return 0;
+
+ deadline = cfs_time_add(lp->lp_last_alive, ni->ni_peertimeout);
+ alive = cfs_time_after(deadline, now);
+ if (alive && !lp->lp_alive) /* update obsolete lp_alive */
+ lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
+
+ return alive;
+}
+
+/* NB: returns 1 when alive, 0 when dead, negative when error;
+ * may drop the LNET_LOCK */
+int
+lnet_peer_alive_locked (lnet_peer_t *lp)
+{
+ lnet_ni_t *ni = lp->lp_ni;
+ time_t now = cfs_time_current_sec();
+
+ LASSERT (ni != NULL);
+
+ if (ni->ni_peertimeout <= 0) /* disabled */
+ return -ENODEV;
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ /* peer appears dead, query LND for latest aliveness news */
+ lnet_ni_peer_alive(lp);
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
+ return 0;
+}
+
+/* NB: returns 1 when alive, 0 when dead, negative when error;
+ * may drop the LNET_LOCK */
+int
+lnet_nid_alive_locked (lnet_nid_t nid)
+{
+ int rc;
+ lnet_peer_t *lp;
+
+ rc = lnet_nid2peer_locked(&lp, nid);
+ if (rc != 0) {
+ CERROR("Error %d looking up %s\n", rc, libcfs_nid2str(nid));
+ return -ENOENT;
+ }
+
+ rc = lnet_peer_alive_locked(lp);
+ lnet_peer_decref_locked(lp);
+ return rc;
+}
+
int
lnet_post_send_locked (lnet_msg_t *msg, int do_send)
{
/* lnet_send is going to LNET_UNLOCK immediately after this, so it sets
* do_send FALSE and I don't do the unlock/send/lock bit. I return
- * EAGAIN if msg blocked and 0 if sent or OK to send */
+ * EAGAIN if msg blocked, EHOSTUNREACH if msg_txpeer appears dead, and
+ * 0 if sent or OK to send */
lnet_peer_t *lp = msg->msg_txpeer;
lnet_ni_t *ni = lp->lp_ni;
LASSERT (!do_send || msg->msg_delayed);
LASSERT (!msg->msg_receiving);
+ /* NB 'lp' is always the next hop */
+ if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
+ lnet_peer_alive_locked(lp) == 0) {
+ LNET_UNLOCK();
+
+ CDEBUG(D_NETERROR, "Dropping message for %s: peer not alive\n",
+ libcfs_id2str(msg->msg_target));
+ if (do_send)
+ lnet_finalize(ni, msg, -EHOSTUNREACH);
+
+ LNET_LOCK();
+ return EHOSTUNREACH;
+ }
+
if (!msg->msg_peertxcredit) {
LASSERT ((lp->lp_txcredits < 0) == !list_empty(&lp->lp_txq));
{
/* lnet_parse is going to LNET_UNLOCK immediately after this, so it
* sets do_recv FALSE and I don't do the unlock/send/lock bit. I
- * return EAGAIN if msg blocked and 0 if sent or OK to send */
+ * return EAGAIN if msg blocked and 0 if received or OK to receive */
lnet_peer_t *lp = msg->msg_rxpeer;
lnet_rtrbufpool_t *rbp;
lnet_rtrbuf_t *rb;
rc = lnet_post_send_locked(msg, 0);
LNET_UNLOCK();
+ if (rc == EHOSTUNREACH)
+ return -EHOSTUNREACH;
+
if (rc == 0)
lnet_ni_send(src_ni, msg);
int rc = 0;
int for_me;
lnet_msg_t *msg;
+ lnet_pid_t dest_pid;
lnet_nid_t dest_nid;
lnet_nid_t src_nid;
__u32 payload_length;
type = le32_to_cpu(hdr->type);
src_nid = le64_to_cpu(hdr->src_nid);
dest_nid = le64_to_cpu(hdr->dest_nid);
+ dest_pid = le32_to_cpu(hdr->dest_pid);
payload_length = le32_to_cpu(hdr->payload_length);
for_me = (ni->ni_nid == dest_nid);
LASSERT (for_me);
#else
if (!for_me) {
- msg->msg_target.pid = le32_to_cpu(hdr->dest_pid);
+ msg->msg_target.pid = dest_pid;
msg->msg_target.nid = dest_nid;
msg->msg_routing = 1;
msg->msg_offset = 0;
goto free_drop;
}
}
-
lnet_commit_routedmsg(msg);
rc = lnet_post_routed_recv_locked(msg, 0);
LNET_UNLOCK();
msg->msg_hdr.src_nid = src_nid;
msg->msg_hdr.src_pid = le32_to_cpu(msg->msg_hdr.src_pid);
msg->msg_hdr.dest_nid = dest_nid;
- msg->msg_hdr.dest_pid = le32_to_cpu(msg->msg_hdr.dest_pid);
+ msg->msg_hdr.dest_pid = dest_pid;
msg->msg_hdr.payload_length = payload_length;
msg->msg_ev.sender = from_nid;
CFS_INIT_LIST_HEAD(&lp->lp_txq);
CFS_INIT_LIST_HEAD(&lp->lp_rtrq);
- lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
lp->lp_notify = 0;
lp->lp_notifylnd = 0;
lp->lp_notifying = 0;
lp->lp_alive_count = 0;
lp->lp_timestamp = 0;
+ lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */
+ lp->lp_last_alive = cfs_time_current_sec(); /* assumes alive */
lp->lp_ping_timestamp = 0;
lp->lp_nid = nid;
lp->lp_refcount = 2; /* 1 for caller; 1 for hash */
void
lnet_debug_peer(lnet_nid_t nid)
{
+ char *aliveness = "NA";
int rc;
lnet_peer_t *lp;
LNET_LOCK();
-
+
rc = lnet_nid2peer_locked(&lp, nid);
if (rc != 0) {
LNET_UNLOCK();
return;
}
+ if (lnet_isrouter(lp) || lp->lp_ni->ni_peertimeout > 0)
+ aliveness = lp->lp_alive ? "up" : "down";
+
CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
- libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
- !lnet_isrouter(lp) ? "~rtr" : (lp->lp_alive ? "up" : "down"),
- lp->lp_ni->ni_peertxcredits,
- lp->lp_rtrcredits, lp->lp_minrtrcredits,
+ libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
+ aliveness, lp->lp_ni->ni_peertxcredits,
+ lp->lp_rtrcredits, lp->lp_minrtrcredits,
lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
lnet_peer_decref_locked(lp);
return -EOPNOTSUPP;
}
+void
+lnet_notify_locked (lnet_peer_t *lp, int notifylnd, int alive, time_t when)
+{
+ return;
+}
+
#endif
static void
LNET_UNLOCK();
- seq_printf(s,
+ seq_printf(s,
"%-4d %7d %9d %6s %12lu %s\n", nrefs, nrtrrefs,
alive_cnt, alive ? "up" : "down",
last_ping, libcfs_nid2str(nid));
lnet_peer_seq_show (struct seq_file *s, void *iter)
{
lnet_peer_seq_iterator_t *lpsi = iter;
+ char *aliveness = "NA";
lnet_peer_t *lp;
lnet_nid_t nid;
int maxcr;
int txcr;
int minrtrcr;
int rtrcr;
- int alive;
- int rtr;
int txqnob;
int nrefs;
mintxcr = lp->lp_mintxcredits;
rtrcr = lp->lp_rtrcredits;
minrtrcr = lp->lp_minrtrcredits;
- rtr = lnet_isrouter(lp);
- alive = lp->lp_alive;
txqnob = lp->lp_txqnob;
nrefs = lp->lp_refcount;
+ if (lnet_isrouter(lp) || lp->lp_ni->ni_peertimeout > 0)
+ aliveness = lp->lp_alive ? "up" : "down";
+
LNET_UNLOCK();
seq_printf(s, "%-24s %4d %5s %5d %5d %5d %5d %5d %d\n",
- libcfs_nid2str(nid), nrefs,
- !rtr ? "~rtr" : (alive ? "up" : "down"),
+ libcfs_nid2str(nid), nrefs, aliveness,
maxcr, rtrcr, minrtrcr, txcr, mintxcr, txqnob);
return 0;
}