return rc;
}
+/* returns true if this message should be dropped */
static bool
-lnet_is_peer_deadline_passed(struct lnet_peer_ni *lpni, time64_t now)
+lnet_check_message_drop(struct lnet_ni *ni, struct lnet_peer_ni *lpni,
+ struct lnet_msg *msg)
{
- time64_t deadline;
-
- deadline = lpni->lpni_last_alive +
- lpni->lpni_net->net_tunables.lct_peer_timeout;
-
- /*
- * assume peer_ni is alive as long as we're within the configured
- * peer timeout
- */
- if (deadline > now)
+ if (msg->msg_target.pid & LNET_PID_USERFLAG)
return false;
- return true;
-}
-
-/* NB: returns 1 when alive, 0 when dead, negative when error;
- * may drop the lnet_net_lock */
-static int
-lnet_peer_alive_locked(struct lnet_ni *ni, struct lnet_peer_ni *lpni,
- struct lnet_msg *msg)
-{
- time64_t now = ktime_get_seconds();
-
if (!lnet_peer_aliveness_enabled(lpni))
- return -ENODEV;
+ return false;
- /*
- * If we're resending a message, let's attempt to send it even if
+ /* If we're resending a message, let's attempt to send it even if
* the peer is down to fulfill our resend quota on the message
*/
if (msg->msg_retry_count > 0)
- return 1;
+ return false;
/* try and send recovery messages irregardless */
if (msg->msg_recovery)
- return 1;
+ return false;
/* always send any responses */
if (lnet_msg_is_response(msg))
- return 1;
-
- if (!lnet_is_peer_deadline_passed(lpni, now))
- return true;
+ return false;
- return lnet_is_peer_ni_alive(lpni);
+ /* assume peer_ni is alive as long as we're within the configured
+ * peer timeout
+ */
+ return ktime_get_seconds() >=
+ (lpni->lpni_last_alive +
+ lpni->lpni_net->net_tunables.lct_peer_timeout);
}
/**
LASSERT(!nid_same(&lp->lpni_nid, &the_lnet.ln_loni->ni_nid));
/* NB 'lp' is always the next hop */
- if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
- lnet_peer_alive_locked(ni, lp, msg) == 0) {
+ if (lnet_check_message_drop(ni, lp, msg)) {
the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++;
the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length +=
msg->msg_len;
local node=$1
local net=$2
local if=$3
+ local opts=$4
- do_rpc_nodes $node "$LNETCTL net add --net $net --if $if" ||
+ do_rpc_nodes $node "$LNETCTL net add --net $net --if $if $opts" ||
error "add $net on interface $if on node $node failed rc=$?"
}
LOCAL_NET=${NETTYPE}1
REMOTE_NET=${NETTYPE}2
setup_router_test() {
- local mod_opts="$@"
+ local mod_opts="$1"
+ local rtr_net_opts="$2"
if [[ ${#RPEER_INTERFACES[@]} -eq 0 ]]; then
init_router_test_vars ||
do_nodes $all_nodes "$LNETCTL lnet configure" ||
error "Failed to initialize DLC"
- do_net_add $ROUTER $LOCAL_NET ${ROUTER_INTERFACES[0]} ||
+ do_net_add $ROUTER $LOCAL_NET ${ROUTER_INTERFACES[0]} $rtr_net_opts ||
return $?
do_net_add $ROUTER $REMOTE_NET ${ROUTER_INTERFACES[0]} ||
test_223() {
local opts="avoid_asym_router_failure=1 lnet_peer_discovery_disabled=1"
- setup_router_test $opts || return $?
+ setup_router_test "$opts" || return $?
do_aarf_enabled_test || return $?
test_225() {
local opts="avoid_asym_router_failure=0 lnet_peer_discovery_disabled=1"
- setup_router_test $opts || return $?
+ setup_router_test "$opts" || return $?
do_aarf_disabled_test || return $?
}
run_test 225 "Check avoid_asym_router_failure=0 w/DD disabled"
+do_rtr_peer_health_test() {
+ local expected="$1"
+
+ do_node $ROUTER "$LNETCTL set routing 1" ||
+ error "Unable to enable routing on $ROUTER"
+
+ do_route_add $HOSTNAME $REMOTE_NET ${ROUTER_NIDS[0]} ||
+ return $?
+
+ do_route_add $RPEER $LOCAL_NET ${ROUTER_NIDS[1]} ||
+ return $?
+
+ check_router_ni_status "up" "up" ||
+ return $?
+
+ check_route_aliveness "$HOSTNAME" "up" ||
+ return $?
+
+ check_route_aliveness "$RPEER" "up" ||
+ return $?
+
+ do_lnetctl ping ${RPEER_NIDS[0]} ||
+ error "Failed to ping ${RPEER_NIDS[0]}"
+
+ do_node $RPEER "$LNETCTL ping ${LNIDS[0]}" ||
+ error "$RPEER failed to ping ${LNIDS[0]}"
+
+ # Stop LNet on local host
+ do_lnetctl lnet unconfigure ||
+ error "Failed to stop LNet rc=$?"
+
+ check_router_ni_status "down" "up" ||
+ return $?
+
+ check_route_aliveness "$RPEER" "up" ||
+ return $?
+
+ # The NI used to send the message to the destination will be the
+ # router's NI on LOCAL_NET, so that's the drop count that will be
+ # incremented
+ local d1=$(do_node $ROUTER $LNETCTL net show -v --net $LOCAL_NET | \
+ awk '/drop_count:/{print $NF}')
+
+ # Ping from RPEER to local host should be dropped by the router
+ do_node $RPEER "$LCTL ping ${LNIDS[0]}" &&
+ error "$RPEER expected ping to fail"
+
+ local d2=$(do_node $ROUTER $LNETCTL net show -v --net $LOCAL_NET | \
+ awk '/drop_count:/{print $NF}')
+
+ [[ $((d2 - d1)) -ne $expected ]] &&
+ error "Expected drop count change by $expected: $d1 -> $d2"
+
+ return 0
+}
+
+test_226() {
+ setup_router_test avoid_asym_router_failure=0 --peer-timeout=10 ||
+ return $?
+
+ do_rtr_peer_health_test 1 ||
+ return $?
+
+ cleanup_router_test ||
+ return $?
+}
+run_test 226 "Check router peer health enabled"
+
+test_227() {
+ setup_router_test avoid_asym_router_failure=0 --peer-timeout=0 ||
+ return $?
+
+ do_rtr_peer_health_test 0 ||
+ return $?
+
+ cleanup_router_test ||
+ return $?
+}
+run_test 227 "Check router peer health disabled"
+
test_230() {
# LU-12815
echo "Check valid values; Should succeed"