/* notification of peer down */
void (*lnd_notify_peer_down)(lnet_nid_t peer);
- /* query of peer aliveness */
- void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, time64_t *when);
-
/* accept a new connection */
int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock);
};
.lnd_send = kgnilnd_send,
.lnd_recv = kgnilnd_recv,
.lnd_eager_recv = kgnilnd_eager_recv,
- .lnd_query = kgnilnd_query,
};
kgn_data_t kgnilnd_data;
return rc;
}
-void
-kgnilnd_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when)
-{
- kgn_net_t *net = ni->ni_data;
- kgn_tx_t *tx;
- kgn_peer_t *peer = NULL;
- kgn_conn_t *conn = NULL;
- struct lnet_process_id id = {
- .nid = nid,
- .pid = LNET_PID_LUSTRE,
- };
- ENTRY;
-
- /* I expect to find him, so only take a read lock */
- read_lock(&kgnilnd_data.kgn_peer_conn_lock);
- peer = kgnilnd_find_peer_locked(nid);
- if (peer != NULL) {
- /* LIE if in a quiesce - we will update the timeouts after,
- * but we don't want sends failing during it */
- if (kgnilnd_data.kgn_quiesce_trigger) {
- *when = ktime_get_seconds();
- read_unlock(&kgnilnd_data.kgn_peer_conn_lock);
- GOTO(out, 0);
- }
-
- /* Update to best guess, might refine on later checks */
- *when = peer->gnp_last_alive;
-
- /* we have a peer, how about a conn? */
- conn = kgnilnd_find_conn_locked(peer);
-
- if (conn == NULL) {
- /* if there is no conn, check peer last errno to see if clean disconnect
- * - if it was, we lie to LNet because we believe a TX would complete
- * on reconnect */
- if (kgnilnd_conn_clean_errno(peer->gnp_last_errno)) {
- *when = ktime_get_seconds();
- }
- /* we still want to fire a TX and new conn in this case */
- } else {
- /* gnp_last_alive is valid, run for the hills */
- read_unlock(&kgnilnd_data.kgn_peer_conn_lock);
- GOTO(out, 0);
- }
- }
- /* if we get here, either we have no peer or no conn for him, so fire off
- * new TX to trigger conn setup */
- read_unlock(&kgnilnd_data.kgn_peer_conn_lock);
-
- /* if we couldn't find him, we'll fire up a TX and get connected -
- * if we don't do this, after ni_peer_timeout, LNet will declare him dead.
- * So really we treat kgnilnd_query as a bit of a 'connect now' type
- * event because it'll only do this when it wants to send
- *
- * Use a real TX for this to get the proper gnp_tx_queue behavior, etc
- * normally we'd use kgnilnd_send_ctlmsg for this, but we don't really
- * care that this goes out quickly since we already know we need a new conn
- * formed */
- if (CFS_FAIL_CHECK(CFS_FAIL_GNI_NOOP_SEND))
- return;
-
- tx = kgnilnd_new_tx_msg(GNILND_MSG_NOOP, ni->ni_nid);
- if (tx != NULL) {
- kgnilnd_launch_tx(tx, net, &id);
- }
-out:
- CDEBUG(D_NETTRACE, "peer 0x%p->%s when %lld\n", peer,
- libcfs_nid2str(nid), *when);
- EXIT;
-}
-
int
kgnilnd_dev_init(kgn_device_t *dev)
{
void kgnilnd_free_phys_fmablk(kgn_device_t *device);
int kgnilnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg);
-void kgnilnd_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when);
int kgnilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
int kgnilnd_eager_recv(struct lnet_ni *ni, void *private,
struct lnet_msg *lntmsg, void **new_private);
}
static void
-kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when)
-{
- time64_t last_alive = 0;
- time64_t now = ktime_get_seconds();
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_peer_ni *peer_ni;
- unsigned long flags;
-
- read_lock_irqsave(glock, flags);
-
- peer_ni = kiblnd_find_peer_locked(ni, nid);
- if (peer_ni != NULL)
- last_alive = peer_ni->ibp_last_alive;
-
- read_unlock_irqrestore(glock, flags);
-
- if (last_alive != 0)
- *when = last_alive;
-
- /* peer_ni is not persistent in hash, trigger peer_ni creation
- * and connection establishment with a NULL tx */
- if (peer_ni == NULL)
- kiblnd_launch_tx(ni, NULL, nid);
-
- CDEBUG(D_NET, "peer_ni %s %p, alive %lld secs ago\n",
- libcfs_nid2str(nid), peer_ni,
- last_alive ? now - last_alive : -1);
-}
-
-static void
kiblnd_free_pages(struct kib_pages *p)
{
int npages = p->ibp_npages;
.lnd_startup = kiblnd_startup,
.lnd_shutdown = kiblnd_shutdown,
.lnd_ctl = kiblnd_ctl,
- .lnd_query = kiblnd_query,
.lnd_send = kiblnd_send,
.lnd_recv = kiblnd_recv,
};
}
write_lock_irqsave(glock, flags);
- /* retry connection if it's still needed and no other connection
- * attempts (active or passive) are in progress
- * NB: reconnect is still needed even when ibp_tx_queue is
- * empty if ibp_version != version because reconnect may be
- * initiated by kiblnd_query() */
+ /* retry connection if it's still needed and no other connection
+ * attempts (active or passive) are in progress
+ * NB: reconnect is still needed even when ibp_tx_queue is
+ * empty if ibp_version != version because reconnect may be
+ * initiated.
+ */
reconnect = (!list_empty(&peer_ni->ibp_tx_queue) ||
peer_ni->ibp_version != version) &&
peer_ni->ibp_connecting &&
* if we have autroutes, and these connect on demand. */
}
-void
-ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when)
-{
- int connect = 1;
- time64_t last_alive = 0;
- time64_t now = ktime_get_seconds();
- struct ksock_peer_ni *peer_ni = NULL;
- rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
- struct lnet_process_id id = {
- .nid = nid,
- .pid = LNET_PID_LUSTRE,
- };
-
- read_lock(glock);
-
- peer_ni = ksocknal_find_peer_locked(ni, id);
- if (peer_ni != NULL) {
- struct list_head *tmp;
- struct ksock_conn *conn;
- int bufnob;
-
- list_for_each(tmp, &peer_ni->ksnp_conns) {
- conn = list_entry(tmp, struct ksock_conn, ksnc_list);
- bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
-
- if (bufnob < conn->ksnc_tx_bufnob) {
- /* something got ACKed */
- conn->ksnc_tx_deadline = ktime_get_seconds() +
- lnet_get_lnd_timeout();
- peer_ni->ksnp_last_alive = now;
- conn->ksnc_tx_bufnob = bufnob;
- }
- }
-
- last_alive = peer_ni->ksnp_last_alive;
- if (ksocknal_find_connectable_route_locked(peer_ni) == NULL)
- connect = 0;
- }
-
- read_unlock(glock);
-
- if (last_alive != 0)
- *when = last_alive;
-
- CDEBUG(D_NET, "peer_ni %s %p, alive %lld secs ago, connect %d\n",
- libcfs_nid2str(nid), peer_ni,
- last_alive ? now - last_alive : -1,
- connect);
-
- if (!connect)
- return;
-
- ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
-
- write_lock_bh(glock);
-
- peer_ni = ksocknal_find_peer_locked(ni, id);
- if (peer_ni != NULL)
- ksocknal_launch_all_connections_locked(peer_ni);
-
- write_unlock_bh(glock);
-}
-
static void
ksocknal_push_peer(struct ksock_peer_ni *peer_ni)
{
.lnd_send = ksocknal_send,
.lnd_recv = ksocknal_recv,
.lnd_notify_peer_down = ksocknal_notify_gw_down,
- .lnd_query = ksocknal_query,
.lnd_accept = ksocknal_accept,
};
if (rc < 0)
goto failed1;
- LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
- ni->ni_net->net_lnd->lnd_query != NULL);
-
lnet_ni_addref(ni);
list_add_tail(&ni->ni_netlist, &local_ni_list);