continue;
if (best_ni)
- CDEBUG(D_NET, "compare ni %s [c:%d, d:%d, s:%d, p:%u, g:%u] with best_ni %s [c:%d, d:%d, s:%d, p:%u, g:%u]\n",
+ CDEBUG(D_NET, "compare ni %s [c:%d, d:%d, s:%d, p:%u, g:%u, h:%d] with best_ni %s [c:%d, d:%d, s:%d, p:%u, g:%u, h:%d]\n",
libcfs_nidstr(&ni->ni_nid), ni_credits, distance,
- ni->ni_seq, ni_sel_prio, ni_dev_prio,
+ ni->ni_seq, ni_sel_prio, ni_dev_prio, ni_healthv,
(best_ni) ? libcfs_nidstr(&best_ni->ni_nid)
: "not selected", best_credits, shortest_distance,
(best_ni) ? best_ni->ni_seq : 0,
- best_sel_prio, best_dev_prio);
+ best_sel_prio, best_dev_prio, best_healthv);
else
goto select_ni;
return best_ni;
}
+static bool
+lnet_reserved_msg(struct lnet_msg *msg)
+{
+ if (msg->msg_type == LNET_MSG_PUT) {
+ if (msg->msg_hdr.msg.put.ptl_index == LNET_RESERVED_PORTAL)
+ return true;
+ } else if (msg->msg_type == LNET_MSG_GET) {
+ if (msg->msg_hdr.msg.get.ptl_index == LNET_RESERVED_PORTAL)
+ return true;
+ }
+ return false;
+}
+
/*
* Traffic to the LNET_RESERVED_PORTAL may not trigger peer discovery,
* because such traffic is required to perform discovery. We therefore
static bool
lnet_msg_discovery(struct lnet_msg *msg)
{
- if (msg->msg_type == LNET_MSG_PUT) {
- if (msg->msg_hdr.msg.put.ptl_index != LNET_RESERVED_PORTAL)
- return true;
- } else if (msg->msg_type == LNET_MSG_GET) {
- if (msg->msg_hdr.msg.get.ptl_index != LNET_RESERVED_PORTAL)
- return true;
- }
- return false;
+ return !(lnet_reserved_msg(msg) || lnet_msg_is_response(msg));
}
#define SRC_SPEC 0x0001
lnet_select_preferred_best_ni(struct lnet_send_data *sd)
{
struct lnet_ni *best_ni = NULL;
- struct lnet_peer_ni *best_lpni = sd->sd_best_lpni;
/*
* We must use a consistent source address when sending to a
*
* So we need to pick the NI the peer prefers for this
* particular network.
+ *
+ * An exception is traffic on LNET_RESERVED_PORTAL. Internal LNet
+ * traffic doesn't care which source NI is used, and we don't actually
+ * want to restrict local recovery pings to a single source NI.
*/
+ if (!lnet_reserved_msg(sd->sd_msg))
+ best_ni = lnet_find_existing_preferred_best_ni(sd->sd_best_lpni,
+ sd->sd_cpt);
- best_ni = lnet_find_existing_preferred_best_ni(sd->sd_best_lpni,
- sd->sd_cpt);
-
- /* if best_ni is still not set just pick one */
- if (!best_ni) {
- best_ni =
- lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
+ if (!best_ni)
+ best_ni = lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
sd->sd_msg,
sd->sd_md_cpt);
- /* If there is no best_ni we don't have a route */
- if (!best_ni) {
- CERROR("no path to %s from net %s\n",
- libcfs_nidstr(&best_lpni->lpni_nid),
- libcfs_net2str(best_lpni->lpni_net->net_id));
- return -EHOSTUNREACH;
- }
+
+ /* If there is no best_ni we don't have a route */
+ if (!best_ni) {
+ CERROR("no path to %s from net %s\n",
+ libcfs_nidstr(&sd->sd_best_lpni->lpni_nid),
+ libcfs_net2str(sd->sd_best_lpni->lpni_net->net_id));
+ return -EHOSTUNREACH;
}
sd->sd_best_ni = best_ni;
}
do_lnetctl() {
+ $LCTL mark "$LNETCTL $@"
echo "$LNETCTL $@"
$LNETCTL "$@"
}
}
run_test 217 "Don't leak memory when discovering peer with nnis <= 1"
+test_218() {
+ reinit_dlc || return $?
+
+ [[ ${#INTERFACES[@]} -lt 2 ]] &&
+ skip "Need two LNet interfaces"
+
+ add_net "tcp" "${INTERFACES[0]}" || return $?
+
+ local nid1=$($LCTL list_nids | head -n 1)
+
+ do_lnetctl ping $nid1 ||
+ error "ping failed"
+
+ add_net "tcp" "${INTERFACES[1]}" || return $?
+
+ local nid2=$($LCTL list_nids | tail --lines 1)
+
+ do_lnetctl ping $nid2 ||
+ error "ping failed"
+
+ $LCTL net_drop_add -s $nid1 -d $nid1 -e local_error -r 1
+
+ do_lnetctl ping $nid1 &&
+ error "ping should have failed"
+
+ local health_recovered
+ local i
+
+ for i in $(seq 1 5); do
+ health_recovered=$($LNETCTL net show -v 2 |
+ grep -c 'health value: 1000')
+
+ if [[ $health_recovered -ne 2 ]]; then
+ echo "Wait 1 second for health to recover"
+ sleep 1
+ else
+ break
+ fi
+ done
+
+ health_recovered=$($LNETCTL net show -v 2 |
+ grep -c 'health value: 1000')
+
+ $LCTL net_drop_del -a
+
+ [[ $health_recovered -ne 2 ]] &&
+ do_lnetctl net show -v 2 | egrep -e nid -e health &&
+ error "Health hasn't recovered"
+
+ return 0
+}
+run_test 218 "Local recovery pings should exercise all available paths"
+
test_230() {
# LU-12815
echo "Check valid values; Should succeed"