Whamcloud - gitweb
LU-16303 lnet: Drop LNet message if deadline exceeded 78/49078/5
authorChris Horn <chris.horn@hpe.com>
Mon, 7 Nov 2022 22:06:32 +0000 (15:06 -0700)
committerOleg Drokin <green@whamcloud.com>
Tue, 29 Nov 2022 07:14:13 +0000 (07:14 +0000)
The LNet message deadline is set when a message is committed for
sending. A message can be queued while waiting for send credit(s)
after it has been committed. Thus, it is possible for a message
deadline to be exceeded while on the queue. We should check for this
when posting messages to LND layer.

HPE-bug-id: LUS-11333
Test-Parameters: trivial testlist=sanity-lnet env=ONLY=253,ONLY_REPEAT=100
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I1315b2351536e63b9d4f22d9336a57415031e0c7
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49078
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/lib-move.c
lnet/lnet/lib-msg.c
lustre/tests/sanity-lnet.sh

index 4bc7199..81b8688 100644 (file)
@@ -769,41 +769,52 @@ lnet_ni_eager_recv(struct lnet_ni *ni, struct lnet_msg *msg)
        return rc;
 }
 
-/* returns true if this message should be dropped */
-static bool
+/* Returns:
+ *  -ETIMEDOUT if the message deadline has been exceeded
+ *  -EHOSTUNREACH if the peer is down
+ *  0 if this message should not be dropped
+ */
+static int
 lnet_check_message_drop(struct lnet_ni *ni, struct lnet_peer_ni *lpni,
                        struct lnet_msg *msg)
 {
+       /* Drop message if we've exceeded the message deadline */
+       if (ktime_after(ktime_get(), msg->msg_deadline))
+               return -ETIMEDOUT;
+
        if (msg->msg_target.pid & LNET_PID_USERFLAG)
-               return false;
+               return 0;
 
        if (!lnet_peer_aliveness_enabled(lpni))
-               return false;
+               return 0;
 
        /* If we're resending a message, let's attempt to send it even if
         * the peer is down to fulfill our resend quota on the message
         */
        if (msg->msg_retry_count > 0)
-               return false;
+               return 0;
 
-       /* try and send recovery messages irregardless */
+       /* try and send recovery messages regardless */
        if (msg->msg_recovery)
-               return false;
+               return 0;
 
        /* always send any responses */
        if (lnet_msg_is_response(msg))
-               return false;
+               return 0;
 
        /* always send non-routed messages */
        if (!msg->msg_routing)
-               return false;
+               return 0;
 
        /* assume peer_ni is alive as long as we're within the configured
         * peer timeout
         */
-       return ktime_get_seconds() >=
-               (lpni->lpni_last_alive +
-                lpni->lpni_net->net_tunables.lct_peer_timeout);
+       if (ktime_get_seconds() >=
+           (lpni->lpni_last_alive +
+            lpni->lpni_net->net_tunables.lct_peer_timeout))
+               return -EHOSTUNREACH;
+
+       return 0;
 }
 
 /**
@@ -824,6 +835,7 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send)
        struct lnet_ni          *ni = msg->msg_txni;
        int                     cpt = msg->msg_tx_cpt;
        struct lnet_tx_queue    *tq = ni->ni_tx_queues[cpt];
+       int rc;
 
        /* non-lnet_send() callers have checked before */
        LASSERT(!do_send || msg->msg_tx_delayed);
@@ -835,7 +847,8 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send)
                LASSERT(!nid_same(&lp->lpni_nid, &the_lnet.ln_loni->ni_nid));
 
        /* NB 'lp' is always the next hop */
-       if (lnet_check_message_drop(ni, lp, msg)) {
+       rc = lnet_check_message_drop(ni, lp, msg);
+       if (rc) {
                the_lnet.ln_counters[cpt]->lct_common.lcc_drop_count++;
                the_lnet.ln_counters[cpt]->lct_common.lcc_drop_length +=
                        msg->msg_len;
@@ -849,14 +862,22 @@ lnet_post_send_locked(struct lnet_msg *msg, int do_send)
                                        msg->msg_type,
                                        LNET_STATS_TYPE_DROP);
 
-               CNETERR("Dropping message for %s: peer not alive\n",
-                       libcfs_idstr(&msg->msg_target));
-               msg->msg_health_status = LNET_MSG_STATUS_REMOTE_DROPPED;
+               if (rc == -EHOSTUNREACH) {
+                       CNETERR("Dropping message for %s: peer not alive\n",
+                               libcfs_idstr(&msg->msg_target));
+                       msg->msg_health_status = LNET_MSG_STATUS_REMOTE_DROPPED;
+               } else {
+                       CNETERR("Dropping message for %s: exceeded message deadline\n",
+                               libcfs_idstr(&msg->msg_target));
+                       msg->msg_health_status =
+                               LNET_MSG_STATUS_NETWORK_TIMEOUT;
+               }
+
                if (do_send)
-                       lnet_finalize(msg, -EHOSTUNREACH);
+                       lnet_finalize(msg, rc);
 
                lnet_net_lock(cpt);
-               return -EHOSTUNREACH;
+               return rc;
        }
 
        if (msg->msg_md != NULL &&
index 245499e..7362b3d 100644 (file)
@@ -782,7 +782,7 @@ lnet_health_check(struct lnet_msg *msg)
                lo = true;
 
        if (hstatus != LNET_MSG_STATUS_OK &&
-           ktime_compare(ktime_get(), msg->msg_deadline) >= 0)
+           ktime_after(ktime_get(), msg->msg_deadline))
                return -1;
 
        /*
index c351531..5d1a70f 100755 (executable)
@@ -3173,6 +3173,94 @@ test_252() {
 }
 run_test 252 "Ping to down peer should unlink quickly"
 
+do_expired_message_drop_test() {
+       local rnid lnid old_tto
+
+       old_tto=$($LNETCTL global show |
+                 awk '/transaction_timeout:/{print $NF}')
+
+       [[ -z $old_tto ]] &&
+               error "Cannot determine LNet transaction timeout"
+
+       local tto=10
+
+       do_lnetctl set transaction_timeout "${tto}" ||
+               error "Failed to set transaction_timeout"
+
+       # We want to consume all peer credits for at least transaction_timeout
+       # seconds
+       local delay
+
+       delay=$((tto + 1))
+
+       for lnid in "${LNIDS[@]}"; do
+               for rnid in "${RNIDS[@]}"; do
+                       $LCTL net_delay_add -s "${lnid}" -d "${rnid}" -l "${delay}" -r 1
+               done
+       done
+
+       local pc
+
+       pc=$($LNETCTL peer show -v --nid "${RNIDS[0]}" |
+                       awk '/max_ni_tx_credits:/{print $NF}' |
+                       xargs echo |
+                       sed 's/ /\+/g' | bc)
+
+       echo "Found $pc peer_credits for ${RNIDS[0]}"
+
+       local i
+
+       for i in $(seq 1 "${pc}"); do
+               $LNETCTL ping --timeout $((delay+2)) "${RNIDS[0]}" 1>/dev/null &
+       done
+
+       echo "Issued ${pc} pings to ${RNIDS[0]}"
+
+       # This ping should be queued on peer NI tx credit
+       $LNETCTL ping --timeout $((delay+2)) "${RNIDS[0]}" &
+
+       sleep ${delay}
+
+       $LCTL net_delay_del -a
+
+       wait
+
+       # Messages sent from the delay list do not go through
+       # lnet_post_send_locked(), thus we should only have a single drop
+       local dropped
+
+       dropped=$($LNETCTL peer show -v 2 --nid "${RNIDS[0]}" |
+                       grep -A 2 dropped_stats |
+                       awk '/get:/{print $2}' |
+                       xargs echo |
+                       sed 's/ /\+/g' | bc)
+
+       [[ $dropped -ne 1 ]] &&
+               error "Expect 1 dropped GET but found $dropped"
+
+       do_lnetctl set transaction_timeout "${old_tto}"
+
+       return 0
+}
+
+test_253() {
+       setup_health_test false || return $?
+
+       do_expired_message_drop_test || return $?
+
+       cleanup_health_test
+}
+run_test 253 "Message delayed beyond deadline should be dropped (single-rail)"
+
+test_254() {
+       setup_health_test true || return $?
+
+       do_expired_message_drop_test || return $?
+
+       cleanup_health_test
+}
+run_test 254 "Message delayed beyond deadline should be dropped (multi-rail)"
+
 test_300() {
        # LU-13274
        local header