int alive;
cfs_time_t deadline;
- LASSERT (lnet_peer_aliveness_enabled(lp));
-
/* Trust lnet_notify() if it has more recent aliveness news, but
* ignore the initial assumed death (see lnet_peers_start_down()).
*/
int
lnet_peer_alive_locked (lnet_peer_t *lp)
{
- cfs_time_t now = cfs_time_current();
-
- if (!lnet_peer_aliveness_enabled(lp))
- return -ENODEV;
+ cfs_time_t now = cfs_time_current();
+ bool query;
- if (lnet_peer_is_alive(lp, now))
- return 1;
+ if (!lnet_peer_aliveness_enabled(lp))
+ return -ENODEV;
- /* Peer appears dead, but we should avoid frequent NI queries (at
- * most once per lnet_queryinterval seconds). */
- if (lp->lp_last_query != 0) {
- static const int lnet_queryinterval = 1;
-
- cfs_time_t next_query =
- cfs_time_add(lp->lp_last_query,
- cfs_time_seconds(lnet_queryinterval));
-
- if (cfs_time_before(now, next_query)) {
- if (lp->lp_alive)
- CWARN("Unexpected aliveness of peer %s: "
- "%d < %d (%d/%d)\n",
- libcfs_nid2str(lp->lp_nid),
- (int)now, (int)next_query,
- lnet_queryinterval,
- lp->lp_ni->ni_peertimeout);
- return 0;
- }
+ if (lp->lp_last_query == 0) {
+ query = true;
+ } else {
+ /* Peer appears dead, but we should avoid frequent NI queries
+ * (at most once per ni_query_interval seconds). */
+ static const int ni_query_interval = 1;
+ cfs_time_t next_query;
+
+ next_query = cfs_time_add(lp->lp_last_query,
+ cfs_time_seconds(ni_query_interval));
+ query = cfs_time_aftereq(now, next_query);
}
/* query NI for latest aliveness news */
- lnet_ni_query_locked(lp->lp_ni, lp);
+ if (query)
+ lnet_ni_query_locked(lp->lp_ni, lp);
if (lnet_peer_is_alive(lp, now))
return 1;
/* NB 'lp' is always the next hop */
if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
- lnet_peer_alive_locked(lp) == 0) {
+ lnet_peer_alive_locked(lp) == 0 &&
+ !lnet_msg_is_rc_ping(msg)) { /* send RC ping even for dead router */
the_lnet.ln_counters[cpt]->drop_count++;
the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
lnet_net_unlock(cpt);
lnet_peer_t *p1 = r1->lr_gateway;
lnet_peer_t *p2 = r2->lr_gateway;
+ if (p1->lp_ni->ni_peertimeout > 0 &&
+ p2->lp_ni->ni_peertimeout > 0) {
+ /* if a router has queued bytes but no aliveness update for
+ * the last 10 seconds, it could be potentially dead or
+ * congested, so we prefer not to choose it even its status
+ * is still alive.
+ */
+ int router_slow = cfs_time_seconds(10);
+ bool r1_slow;
+ bool r2_slow;
+ cfs_time_t now = cfs_time_current();
+
+ r1_slow = p1->lp_txqnob != 0 &&
+ cfs_time_aftereq(now, p1->lp_last_alive + router_slow);
+ r2_slow = p2->lp_txqnob != 0 &&
+ cfs_time_aftereq(now, p2->lp_last_alive + router_slow);
+
+ if (!r1_slow && r2_slow)
+ return 1;
+
+ if (r1_slow && !r2_slow)
+ return -1;
+ }
+
if (r1->lr_priority < r2->lr_priority)
return 1;
lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
void *private, int rdma_req)
{
- int rc = 0;
- int cpt;
- int for_me;
struct lnet_msg *msg;
- lnet_pid_t dest_pid;
- lnet_nid_t dest_nid;
- lnet_nid_t src_nid;
- __u32 payload_length;
- __u32 type;
-
- LASSERT (!in_interrupt ());
+ lnet_peer_t *rxpeer;
+ lnet_pid_t dest_pid;
+ lnet_nid_t dest_nid;
+ lnet_nid_t src_nid;
+ __u32 payload_length;
+ __u32 type;
+ int for_me;
+ int cpt;
+ int rc = 0;
+
+ LASSERT(!in_interrupt());
type = le32_to_cpu(hdr->type);
src_nid = le64_to_cpu(hdr->src_nid);
}
lnet_msg_commit(msg, cpt);
+ /* LND just notified me for incoming message from rxpeer, so assume
+ * it is alive */
+ rxpeer = msg->msg_rxpeer;
+ rxpeer->lp_last_alive = rxpeer->lp_last_query = cfs_time_current();
+ if (!rxpeer->lp_alive)
+ lnet_notify_locked(rxpeer, 0, 1, rxpeer->lp_last_alive);
+
+ if (lnet_isrouter(msg->msg_rxpeer) &&
+ LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
+ lnet_router_ni_update_locked(msg->msg_rxpeer,
+ LNET_NIDNET(src_nid));
+ }
/* message delay simulation */
if (unlikely(!list_empty(&the_lnet.ln_delay_rules) &&