void lnet_mt_event_handler(struct lnet_event *event);
-int lnet_notify(struct lnet_ni *ni, lnet_nid_t peer, int alive,
+int lnet_notify(struct lnet_ni *ni, lnet_nid_t peer, bool alive, bool reset,
time64_t when);
void lnet_notify_locked(struct lnet_peer_ni *lp, int notifylnd, int alive,
time64_t when);
}
static inline void
+lnet_set_healthv(atomic_t *healthv, int value)
+{
+ atomic_set(healthv, value);
+}
+
+static inline void
lnet_inc_healthv(atomic_t *healthv)
{
atomic_add_unless(healthv, 1, LNET_MAX_HEALTH_VALUE);
int (*lnd_eager_recv)(struct lnet_ni *ni, void *private,
struct lnet_msg *msg, void **new_privatep);
- /* notification of peer health */
- void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive);
+ /* notification of peer down */
+ void (*lnd_notify_peer_down)(lnet_nid_t peer);
/* query of peer aliveness */
void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, time64_t *when);
ktime_get_seconds() - peer->gnp_last_alive);
lnet_notify(net->gnn_ni, peer_nid, alive,
+ (alive) ? true : false,
peer->gnp_last_alive);
kgnilnd_net_decref(net);
/* Notify LNET that we now have a working connection to this peer.
* This is a Cray extension to the "standard" LND behavior.
*/
- lnet_notify(peer->gnp_net->gnn_ni, peer->gnp_nid, 1,
+ lnet_notify(peer->gnp_net->gnn_ni, peer->gnp_nid, true, true,
ktime_get_seconds());
/* drop our 'hold' ref */
static void
kiblnd_peer_notify(struct kib_peer_ni *peer_ni)
{
- int error = 0;
+ int error = 0;
time64_t last_alive = 0;
- unsigned long flags;
+ unsigned long flags;
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
if (kiblnd_peer_idle(peer_ni) && peer_ni->ibp_error != 0) {
- error = peer_ni->ibp_error;
- peer_ni->ibp_error = 0;
+ error = peer_ni->ibp_error;
+ peer_ni->ibp_error = 0;
- last_alive = peer_ni->ibp_last_alive;
- }
+ last_alive = peer_ni->ibp_last_alive;
+ }
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- if (error != 0)
- lnet_notify(peer_ni->ibp_ni,
- peer_ni->ibp_nid, 0, last_alive);
+ if (error != 0)
+ lnet_notify(peer_ni->ibp_ni,
+ peer_ni->ibp_nid, false, false, last_alive);
}
void
read_unlock(&ksocknal_data.ksnd_global_lock);
if (notify)
- lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid, 0,
- last_alive);
+ lnet_notify(peer_ni->ksnp_ni, peer_ni->ksnp_id.nid,
+ false, false, last_alive);
}
void
}
void
-ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive)
+ksocknal_notify_gw_down(lnet_nid_t gw_nid)
{
/* The router is telling me she's been notified of a change in
* gateway state....
.pid = LNET_PID_ANY,
};
- CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
- alive ? "up" : "down");
+ CDEBUG(D_NET, "gw %s down\n", libcfs_nid2str(gw_nid));
- if (!alive) {
- /* If the gateway crashed, close all open connections... */
- ksocknal_close_matching_conns (id, 0);
- return;
- }
+ /* If the gateway crashed, close all open connections... */
+ ksocknal_close_matching_conns(id, 0);
+ return;
- /* ...otherwise do nothing. We can only establish new connections
- * if we have autroutes, and these connect on demand. */
+ /* We can only establish new connections
+ * if we have autroutes, and these connect on demand. */
}
void
the_ksocklnd.lnd_ctl = ksocknal_ctl;
the_ksocklnd.lnd_send = ksocknal_send;
the_ksocklnd.lnd_recv = ksocknal_recv;
- the_ksocklnd.lnd_notify = ksocknal_notify;
+ the_ksocklnd.lnd_notify_peer_down = ksocknal_notify_gw_down;
the_ksocklnd.lnd_query = ksocknal_query;
the_ksocklnd.lnd_accept = ksocknal_accept;
extern void ksocknal_queue_tx_locked(struct ksock_tx *tx, struct ksock_conn *conn);
extern void ksocknal_txlist_done(struct lnet_ni *ni, struct list_head *txlist,
int error);
-extern void ksocknal_notify(struct lnet_ni *ni, lnet_nid_t gw_nid, int alive);
+extern void ksocknal_notify(lnet_nid_t gw_nid);
extern void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when);
extern int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name);
extern void ksocknal_thread_fini(void);
* that deadline to the wall clock.
*/
deadline += ktime_get_seconds();
- return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
+ return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, false,
deadline);
}
lnet_rtrpools_free(1);
}
+static inline void
+lnet_notify_peer_down(struct lnet_ni *ni, lnet_nid_t nid)
+{
+ if (ni->ni_net->net_lnd->lnd_notify_peer_down != NULL)
+ (ni->ni_net->net_lnd->lnd_notify_peer_down)(nid);
+}
+
+/*
+ * ni: local NI used to communicate with the peer
+ * nid: peer NID
+ * alive: true if peer is alive, false otherwise
+ * reset: reset health value. This is requested by the LND.
+ * when: notificaiton time.
+ */
int
-lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, int alive, time64_t when)
+lnet_notify(struct lnet_ni *ni, lnet_nid_t nid, bool alive, bool reset,
+ time64_t when)
{
- struct lnet_peer_ni *lp = NULL;
+ struct lnet_peer_ni *lpni = NULL;
time64_t now = ktime_get_seconds();
- int cpt = lnet_cpt_of_nid(nid, ni);
+ int cpt;
LASSERT (!in_interrupt ());
return 0;
}
- lnet_net_lock(cpt);
+ /* must lock 0 since this is used for synchronization */
+ lnet_net_lock(0);
if (the_lnet.ln_state != LNET_STATE_RUNNING) {
- lnet_net_unlock(cpt);
+ lnet_net_unlock(0);
return -ESHUTDOWN;
}
- lp = lnet_find_peer_ni_locked(nid);
- if (lp == NULL) {
+ lpni = lnet_find_peer_ni_locked(nid);
+ if (lpni == NULL) {
/* nid not found */
- lnet_net_unlock(cpt);
+ lnet_net_unlock(0);
CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
return 0;
}
- /*
- * It is possible for this function to be called for the same peer
- * but with different NIs. We want to synchronize the notification
- * between the different calls. So we will use the lpni_cpt to
- * grab the net lock.
- */
- if (lp->lpni_cpt != cpt) {
- lnet_net_unlock(cpt);
- cpt = lp->lpni_cpt;
- lnet_net_lock(cpt);
+ if (alive) {
+ if (reset)
+ lnet_set_healthv(&lpni->lpni_healthv,
+ LNET_MAX_HEALTH_VALUE);
+ else
+ lnet_inc_healthv(&lpni->lpni_healthv);
+ } else {
+ lnet_handle_remote_failure_locked(lpni);
}
- lnet_peer_ni_decref_locked(lp);
+ /* recalculate aliveness */
+ alive = lnet_is_peer_ni_alive(lpni);
+ lnet_net_unlock(0);
+ if (ni != NULL && !alive)
+ lnet_notify_peer_down(ni, lpni->lpni_nid);
+
+ cpt = lpni->lpni_cpt;
+ lnet_net_lock(cpt);
+ lnet_peer_ni_decref_locked(lpni);
lnet_net_unlock(cpt);
+
return 0;
}
EXPORT_SYMBOL(lnet_notify);