+/* NB: caller shall hold a ref on 'lp' as I'd drop LNET_LOCK */
+void
+lnet_ni_peer_alive(lnet_peer_t *lp)
+{
+ cfs_time_t last_alive = 0;
+ lnet_ni_t *ni = lp->lp_ni;
+
+ LASSERT (lnet_peer_aliveness_enabled(lp));
+ LASSERT (ni->ni_lnd->lnd_query != NULL);
+ LASSERT (the_lnet.ln_routing == 1);
+
+ LNET_UNLOCK();
+ (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
+ LNET_LOCK();
+
+ lp->lp_last_query = cfs_time_current();
+
+ if (last_alive != 0) /* NI has updated timestamp */
+ lp->lp_last_alive = last_alive;
+}
+
+/* NB: always called with LNET_LOCK held */
+static inline int
+lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
+{
+ int alive;
+ cfs_time_t deadline;
+
+ LASSERT (lnet_peer_aliveness_enabled(lp));
+ LASSERT (the_lnet.ln_routing == 1);
+
+ /* Trust lnet_notify() if it has more recent aliveness news, but
+ * ignore the initial assumed death (see lnet_peers_start_down()).
+ */
+ if (!lp->lp_alive && lp->lp_alive_count > 0 &&
+ cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
+ return 0;
+
+ deadline = cfs_time_add(lp->lp_last_alive,
+ cfs_time_seconds(lp->lp_ni->ni_peertimeout));
+ alive = cfs_time_after(deadline, now);
+
+ /* Update obsolete lp_alive except for routers assumed to be dead
+ * initially, because router checker would update aliveness in this
+ * case, and moreover lp_last_alive at peer creation is assumed.
+ */
+ if (alive && !lp->lp_alive &&
+ !(lnet_isrouter(lp) && lp->lp_alive_count == 0))
+ lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
+
+ return alive;
+}
+
+
+/* NB: returns 1 when alive, 0 when dead, negative when error;
+ * may drop the LNET_LOCK */
+int
+lnet_peer_alive_locked (lnet_peer_t *lp)
+{
+ cfs_time_t now = cfs_time_current();
+
+ /* LU-630: only router checks peer health. */
+ if (the_lnet.ln_routing == 0)
+ return 1;
+
+ if (!lnet_peer_aliveness_enabled(lp))
+ return -ENODEV;
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ /* Peer appears dead, but we should avoid frequent NI queries (at
+ * most once per lnet_queryinterval seconds). */
+ if (lp->lp_last_query != 0) {
+ static const int lnet_queryinterval = 1;
+
+ cfs_time_t next_query =
+ cfs_time_add(lp->lp_last_query,
+ cfs_time_seconds(lnet_queryinterval));
+
+ if (cfs_time_before(now, next_query)) {
+ if (lp->lp_alive)
+ CWARN("Unexpected aliveness of peer %s: "
+ "%d < %d (%d/%d)\n",
+ libcfs_nid2str(lp->lp_nid),
+ (int)now, (int)next_query,
+ lnet_queryinterval,
+ lp->lp_ni->ni_peertimeout);
+ return 0;
+ }
+ }
+
+ /* query NI for latest aliveness news */
+ lnet_ni_peer_alive(lp);
+
+ if (lnet_peer_is_alive(lp, now))
+ return 1;
+
+ lnet_notify_locked(lp, 0, 0, lp->lp_last_alive);
+ return 0;
+}
+