return rc;
}
-/* returns true if this message should be dropped */
-static bool
+/* Returns:
+ * -ETIMEDOUT if the message deadline has been exceeded
+ * -EHOSTUNREACH if the peer is down
+ * 0 if this message should not be dropped
+ */
+static int
lnet_check_message_drop(struct lnet_ni *ni, struct lnet_peer_ni *lpni,
struct lnet_msg *msg)
{
+ /* Drop message if we've exceeded the message deadline */
+ if (ktime_after(ktime_get(), msg->msg_deadline))
+ return -ETIMEDOUT;
+
if (msg->msg_target.pid & LNET_PID_USERFLAG)
- return false;
+ return 0;
if (!lnet_peer_aliveness_enabled(lpni))
- return false;
+ return 0;
/* If we're resending a message, let's attempt to send it even if
* the peer is down to fulfill our resend quota on the message
*/
if (msg->msg_retry_count > 0)
- return false;
+ return 0;
- /* try and send recovery messages irregardless */
+ /* try and send recovery messages regardless */
if (msg->msg_recovery)
- return false;
+ return 0;
/* always send any responses */
if (lnet_msg_is_response(msg))
- return false;
+ return 0;
/* always send non-routed messages */
if (!msg->msg_routing)
- return false;
+ return 0;
/* assume peer_ni is alive as long as we're within the configured
* peer timeout
*/
- return ktime_get_seconds() >=
- (lpni->lpni_last_alive +
- lpni->lpni_net->net_tunables.lct_peer_timeout);
+ if (ktime_get_seconds() >=
+ (lpni->lpni_last_alive +
+ lpni->lpni_net->net_tunables.lct_peer_timeout))
+ return -EHOSTUNREACH;
+
+ return 0;
}
/**
struct lnet_ni *ni = msg->msg_txni;
int cpt = msg->msg_tx_cpt;
struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
+ int rc;
/* non-lnet_send() callers have checked before */
LASSERT(!do_send || msg->msg_tx_delayed);
LASSERT(!nid_same(&lp->lpni_nid, &the_lnet.ln_loni->ni_nid));
/* NB 'lp' is always the next hop */
- if (lnet_check_message_drop(ni, lp, msg)) {
+ rc = lnet_check_message_drop(ni, lp, msg);
+ if (rc) {
the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++;
the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length +=
msg->msg_len;
msg->msg_type,
LNET_STATS_TYPE_DROP);
- CNETERR("Dropping message for %s: peer not alive\n",
- libcfs_idstr(&msg->msg_target));
- msg->msg_health_status = LNET_MSG_STATUS_REMOTE_DROPPED;
+ if (rc == -EHOSTUNREACH) {
+ CNETERR("Dropping message for %s: peer not alive\n",
+ libcfs_idstr(&msg->msg_target));
+ msg->msg_health_status = LNET_MSG_STATUS_REMOTE_DROPPED;
+ } else {
+ CNETERR("Dropping message for %s: exceeded message deadline\n",
+ libcfs_idstr(&msg->msg_target));
+ msg->msg_health_status =
+ LNET_MSG_STATUS_NETWORK_TIMEOUT;
+ }
+
if (do_send)
- lnet_finalize(msg, -EHOSTUNREACH);
+ lnet_finalize(msg, rc);
lnet_net_lock(cpt);
- return -EHOSTUNREACH;
+ return rc;
}
if (msg->msg_md != NULL &&
}
run_test 252 "Ping to down peer should unlink quickly"
+do_expired_message_drop_test() {
+ local rnid lnid old_tto
+
+ old_tto=$($LNETCTL global show |
+ awk '/transaction_timeout:/{print $NF}')
+
+ [[ -z $old_tto ]] &&
+ error "Cannot determine LNet transaction timeout"
+
+ local tto=10
+
+ do_lnetctl set transaction_timeout "${tto}" ||
+ error "Failed to set transaction_timeout"
+
+ # We want to consume all peer credits for at least transaction_timeout
+ # seconds
+ local delay
+
+ delay=$((tto + 1))
+
+ for lnid in "${LNIDS[@]}"; do
+ for rnid in "${RNIDS[@]}"; do
+ $LCTL net_delay_add -s "${lnid}" -d "${rnid}" -l "${delay}" -r 1
+ done
+ done
+
+ local pc
+
+ pc=$($LNETCTL peer show -v --nid "${RNIDS[0]}" |
+ awk '/max_ni_tx_credits:/{print $NF}' |
+ xargs echo |
+ sed 's/ /\+/g' | bc)
+
+ echo "Found $pc peer_credits for ${RNIDS[0]}"
+
+ local i
+
+ for i in $(seq 1 "${pc}"); do
+ $LNETCTL ping --timeout $((delay+2)) "${RNIDS[0]}" 1>/dev/null &
+ done
+
+ echo "Issued ${pc} pings to ${RNIDS[0]}"
+
+ # This ping should be queued on peer NI tx credit
+ $LNETCTL ping --timeout $((delay+2)) "${RNIDS[0]}" &
+
+ sleep ${delay}
+
+ $LCTL net_delay_del -a
+
+ wait
+
+ # Messages sent from the delay list do not go through
+ # lnet_post_send_locked(), thus we should only have a single drop
+ local dropped
+
+ dropped=$($LNETCTL peer show -v 2 --nid "${RNIDS[0]}" |
+ grep -A 2 dropped_stats |
+ awk '/get:/{print $2}' |
+ xargs echo |
+ sed 's/ /\+/g' | bc)
+
+ [[ $dropped -ne 1 ]] &&
+ error "Expect 1 dropped GET but found $dropped"
+
+ do_lnetctl set transaction_timeout "${old_tto}"
+
+ return 0
+}
+
+test_253() {
+ setup_health_test false || return $?
+
+ do_expired_message_drop_test || return $?
+
+ cleanup_health_test
+}
+run_test 253 "Message delayed beyond deadline should be dropped (single-rail)"
+
+test_254() {
+ setup_health_test true || return $?
+
+ do_expired_message_drop_test || return $?
+
+ cleanup_health_test
+}
+run_test 254 "Message delayed beyond deadline should be dropped (multi-rail)"
+
test_300() {
# LU-13274
local header