__u32 lpni_sel_priority;
/* number of preferred NIDs in lnpi_pref_nids */
__u32 lpni_pref_nnids;
+ /* Whether some thread is processing an lnet_notify() event for this
+ * peer NI
+ */
+ bool lpni_notifying;
+ /* Timestamp of the last lnet_notify() event for this peer NI */
+ time64_t lpni_timestamp;
+ /* Whether we've received an lnet_notify() event for this peer NI */
+ bool lpni_notified;
};
/* Preferred path added due to traffic on non-MR peer_ni */
CDEBUG(D_NET, "Peer %s(%p):0x%llx new -> wait response\n",
libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr);
} else if (msg->type == KFILND_MSG_HELLO_RSP) {
+ struct lnet_nid nid;
+
kp->kp_version = msg->proto.hello.version;
atomic_set(&kp->kp_state, KP_STATE_UPTODATE);
CDEBUG(D_NET,
libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
msg->proto.hello.version);
kfilnd_peer_clear_hello_state(kp);
+
+ lnet_nid4_to_nid(kp->kp_nid, &nid);
+ lnet_notify(kp->kp_dev->kfd_ni, &nid, true, false,
+ kp->kp_last_alive);
}
}
if (!lnet_peer_aliveness_enabled(lpni))
return 0;
- /* If we're resending a message, let's attempt to send it even if
- * the peer is down to fulfill our resend quota on the message
- */
- if (msg->msg_retry_count > 0)
- return 0;
-
- /* try and send recovery messages regardless */
- if (msg->msg_recovery)
- return 0;
-
- /* always send any responses */
- if (lnet_msg_is_response(msg))
- return 0;
-
/* always send non-routed messages */
if (!msg->msg_routing)
return 0;
- /* assume peer_ni is alive as long as we're within the configured
- * peer timeout
- */
- if (ktime_get_seconds() <
- (lpni->lpni_last_alive +
- lpni->lpni_net->net_tunables.lct_peer_timeout))
+ if (lnet_is_peer_ni_alive(lpni))
return 0;
- if (!lnet_is_peer_ni_alive(lpni))
- return -EHOSTUNREACH;
-
- return 0;
+ return -EHOSTUNREACH;
}
/**
* from it. The ping response reports back the ns_status which is
* marked on the remote as up or down and we cache it here.
*/
- msg->msg_rxpeer->lpni_ns_status = LNET_NI_STATUS_UP;
+ if (unlikely(msg->msg_rxpeer->lpni_ns_status != LNET_NI_STATUS_UP)) {
+ spin_lock(&msg->msg_rxpeer->lpni_lock);
+ msg->msg_rxpeer->lpni_ns_status = LNET_NI_STATUS_UP;
+ spin_unlock(&msg->msg_rxpeer->lpni_lock);
+ }
lnet_msg_commit(msg, cpt);
lnet_incr_hstats(ni, lpni, hstatus);
/* For remote failures, health/recovery/resends are not needed
* if the peer only has a single interface. Special case for
- * routers where we rely on health feature to manage route
- * aliveness. NB: lp_nnis does _not_ include the lolnd, so a
- * single-rail node would have lp_nnis == 1.
+ * routers where we rely on health feature to manage route and
+ * peer aliveness. NB: unlike pb_nnis above, lp_nnis does _not_
+ * include the lolnd, so a single-rail node would have
+ * lp_nnis == 1.
*/
if (lpni && lpni->lpni_peer_net &&
lpni->lpni_peer_net->lpn_peer &&
lpni->lpni_peer_net->lpn_peer->lp_nnis <= 1) {
attempt_remote_resend = false;
- if (!lnet_isrouter(lpni))
+ if (!(lnet_isrouter(lpni) || the_lnet.ln_routing))
handle_remote_health = false;
}
/* Do not put my interfaces into peer NI recovery. They should
lpni->lpni_nid = *nid;
lpni->lpni_cpt = cpt;
atomic_set(&lpni->lpni_healthv, LNET_MAX_HEALTH_VALUE);
+ lpni->lpni_notified = false;
net = lnet_get_net_locked(LNET_NID_NET(nid));
lpni->lpni_net = net;
return nnis;
}
-static inline void handle_disc_lpni_health(struct lnet_peer_ni *lpni)
+static inline void handle_disc_lpni_health(struct lnet_peer_ni *lpni,
+ __u32 new_status)
{
- if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN) {
+ __u32 old_status;
+
+ spin_lock(&lpni->lpni_lock);
+ old_status = lpni->lpni_ns_status;
+ lpni->lpni_ns_status = new_status;
+ spin_unlock(&lpni->lpni_lock);
+
+ /* Decrement health when transitioning from UP to DOWN */
+ if (old_status != new_status && new_status == LNET_NI_STATUS_DOWN) {
lnet_net_lock(0);
lnet_handle_remote_failure_locked(lpni);
lnet_net_unlock(0);
- } else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP &&
- !lpni->lpni_last_alive)
+ } else if (new_status == LNET_NI_STATUS_UP && !lpni->lpni_last_alive) {
+ /* Set health to max if the initial status is UP */
atomic_set(&lpni->lpni_healthv, LNET_MAX_HEALTH_VALUE);
+ }
}
/*
int i;
int j;
int rc;
- __u32 old_st;
flags = LNET_PEER_DISCOVERED;
if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL)
*/
lpni = lnet_peer_ni_find_locked(&curnis[i]);
if (lpni) {
- old_st = lpni->lpni_ns_status;
- lpni->lpni_ns_status = *stp;
- if (old_st != lpni->lpni_ns_status)
- handle_disc_lpni_health(lpni);
+ handle_disc_lpni_health(lpni, *stp);
lnet_peer_ni_decref_locked(lpni);
}
break;
}
lpni = lnet_peer_ni_find_locked(&addnis[i].ns_nid);
if (lpni) {
- lpni->lpni_ns_status = addnis[i].ns_status;
- handle_disc_lpni_health(lpni);
+ handle_disc_lpni_health(lpni, addnis[i].ns_status);
lnet_peer_ni_decref_locked(lpni);
}
}
lnet_rtrpools_free(1);
}
-static inline void
-lnet_notify_peer_down(struct lnet_ni *ni, struct lnet_nid *nid)
-{
- if (ni->ni_net->net_lnd->lnd_notify_peer_down != NULL)
- (ni->ni_net->net_lnd->lnd_notify_peer_down)(nid);
-}
-
/*
* ni: local NI used to communicate with the peer
* nid: peer NID
struct lnet_route *route;
struct lnet_peer *lp;
time64_t now = ktime_get_seconds();
- int cpt;
+ int cpt = lnet_nid2cpt(nid, ni);
+ unsigned int ns_status = alive ? LNET_NI_STATUS_UP :
+ LNET_NI_STATUS_DOWN;
LASSERT(!in_interrupt());
return -EINVAL;
}
- if (ni != NULL && !alive && /* LND telling me she's down */
- !auto_down) { /* auto-down disabled */
+ if (ni && !alive && /* LND telling me she's down */
+ !auto_down) { /* auto-down disabled */
CDEBUG(D_NET, "Auto-down disabled\n");
return 0;
}
- /* must lock 0 since this is used for synchronization */
- lnet_net_lock(0);
+ lnet_net_lock(cpt);
if (the_lnet.ln_state != LNET_STATE_RUNNING) {
- lnet_net_unlock(0);
+ lnet_net_unlock(cpt);
return -ESHUTDOWN;
}
lpni = lnet_peer_ni_find_locked(nid);
- if (lpni == NULL) {
+ if (!lpni) {
/* nid not found */
- lnet_net_unlock(0);
+ lnet_net_unlock(cpt);
CDEBUG(D_NET, "%s not found\n", libcfs_nidstr(nid));
return 0;
}
- if (alive) {
- if (reset) {
- lpni->lpni_ns_status = LNET_NI_STATUS_UP;
- lnet_set_lpni_healthv_locked(lpni,
- LNET_MAX_HEALTH_VALUE);
- } else {
- lnet_inc_lpni_healthv_locked(lpni);
- }
- } else if (reset) {
- lpni->lpni_ns_status = LNET_NI_STATUS_DOWN;
+ if (lpni->lpni_cpt != cpt) {
+ lnet_net_unlock(cpt);
+ cpt = lpni->lpni_cpt;
+ lnet_net_lock(cpt);
}
- /* recalculate aliveness */
- alive = lnet_is_peer_ni_alive(lpni);
+ if (ni && !alive && when < lpni->lpni_last_alive)
+ when = lpni->lpni_last_alive;
+
+ if (lpni->lpni_timestamp > when) {
+ CDEBUG(D_NET, "Out of date\n");
+ goto out_lpni_decref;
+ }
+
+ lpni->lpni_timestamp = when;
lp = lpni->lpni_peer_net->lpn_peer;
- /* If this is an LNet router then update route aliveness */
+ /* If this peer NI belongs to an LNet router then update the associated
+ * route aliveness. We update before taking lpni_lock below to avoid
+ * holding both lpni_lock and lp_lock which is taken in
+ * lnet_is_discovery_disabled().
+ */
if (lp->lp_rtr_refcount) {
if (reset)
/* reset flag indicates gateway peer went up or down */
/* If discovery is disabled, locally or on the gateway, then
* any routes using lpni as next-hop need to be updated
- *
- * NB: We can get many notifications while a route is down, so
- * we try and avoid the expensive net_lock/EX here for the
- * common case of receiving duplicate lnet_notify() calls (i.e.
- * only grab EX lock when we actually need to update the route
- * aliveness).
*/
if (lnet_is_discovery_disabled(lp)) {
list_for_each_entry(route, &lp->lp_routes, lr_gwlist) {
}
}
- lnet_net_unlock(0);
+ spin_lock(&lpni->lpni_lock);
- if (ni != NULL && !alive)
- lnet_notify_peer_down(ni, &lpni->lpni_nid);
+ /* Depending on lnet_peers_start_down()/check_routers_before_use the
+ * lpni_ns_status may initialize to either UP or DOWN. Thus, the
+ * first notification that we receive may match the existing status.
+ * We do not want to ignore this notification.
+ */
+ if (lpni->lpni_notified && lpni->lpni_ns_status == ns_status) {
+ CDEBUG(D_NET, "Old news\n");
+ goto out_lpni_unlock;
+ }
- cpt = lpni->lpni_cpt;
- lnet_net_lock(cpt);
+ lpni->lpni_notified = true;
+ lpni->lpni_ns_status = ns_status;
+
+ while (lpni->lpni_notifying) {
+ /* Previous event is being processed */
+ spin_unlock(&lpni->lpni_lock);
+ lnet_net_unlock(cpt);
+ schedule();
+ lnet_net_lock(cpt);
+ spin_lock(&lpni->lpni_lock);
+ };
+
+ lpni->lpni_notifying = true;
+
+ if (alive && reset)
+ lnet_set_lpni_healthv_locked(lpni, LNET_MAX_HEALTH_VALUE);
+
+ if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN &&
+ ni && ni->ni_net->net_lnd->lnd_notify_peer_down) {
+ spin_unlock(&lpni->lpni_lock);
+ lnet_net_unlock(cpt);
+
+ (ni->ni_net->net_lnd->lnd_notify_peer_down)(&lpni->lpni_nid);
+
+ lnet_net_lock(cpt);
+ spin_lock(&lpni->lpni_lock);
+ }
+
+ lpni->lpni_notifying = false;
+
+out_lpni_unlock:
+ spin_unlock(&lpni->lpni_lock);
+out_lpni_decref:
lnet_peer_ni_decref_locked(lpni);
lnet_net_unlock(cpt);
}
run_test 226 "test missing route for 1 of 2 routers"
+test_227() {
+ local opts="lnet_peer_discovery_disabled=1 lnet_health_sensitivity=0"
+ opts+=" lnet_transaction_timeout=10"
+
+ [[ $NETTYPE != kfi* ]] || skip "kfi doesn't support drop rules"
+
+ setup_router_test -p 2 $opts || return $?
+
+ do_basic_rtr_test || return $?
+
+ do_node ${RPEERS[0]} $LNETCTL lnet unconfigure ||
+ error "Failed to unconfigure lnet on ${RPEERS[0]}"
+
+ local rpeer_nids=( ${RPEER_NIDS[${RPEERS[0]}]} )
+
+ do_lnetctl ping ${rpeer_nids[0]} &&
+ error "Expected ping to fail"
+
+ do_lnetctl ping ${rpeer_nids[0]} &&
+ error "Expected ping to fail"
+
+ local dropped=$(do_node ${ROUTERS[0]} \
+ $LNETCTL peer show -v 2 --nid ${rpeer_nids[0]} |
+ grep -A 2 dropped_stats |
+ awk '/get:/{print $2}' |
+ xargs echo |
+ sed 's/ /\+/g' | bc)
+
+ ((dropped > 0)) ||
+ error "Expected dropped > 0 found $dropped"
+
+ cleanup_router_test
+}
+run_test 227 "Check router peer health w/DD disabled"
+
test_230() {
[[ ${NETTYPE} == tcp* ]] || skip "Need tcp NETTYPE"