Whamcloud - gitweb
LU-15446 lnet: Don't use pref NI for reserved portal 78/46078/4
authorChris Horn <chris.horn@hpe.com>
Wed, 12 Jan 2022 19:19:21 +0000 (19:19 +0000)
committerOleg Drokin <green@whamcloud.com>
Mon, 7 Feb 2022 04:43:29 +0000 (04:43 +0000)
Don't use the preferred NI when sending traffic on the LNet reserved
portal. This allows local recovery pings to utilize any local NI as
source in the case where we do not have a multi-rail peer entry for
the local host. This is typically the case when MR is not being
configured statically (i.e. when discovery is being used for MR
configuration).

lnet_get_best_ni() was modified to include health values of the NIs
being compared in its debug output.

HPE-bug-id: LUS-10658
Test-Parameters: trivial testlist=sanity-lnet
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I38f5760bf034f698b7f44ffa89aa91c4f5d4b9ea
Reviewed-on: https://review.whamcloud.com/46078
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/lib-move.c
lustre/tests/sanity-lnet.sh

index 289a482..f273d90 100644 (file)
@@ -1703,13 +1703,13 @@ lnet_get_best_ni(struct lnet_net *local_net, struct lnet_ni *best_ni,
                        continue;
 
                if (best_ni)
-                       CDEBUG(D_NET, "compare ni %s [c:%d, d:%d, s:%d, p:%u, g:%u] with best_ni %s [c:%d, d:%d, s:%d, p:%u, g:%u]\n",
+                       CDEBUG(D_NET, "compare ni %s [c:%d, d:%d, s:%d, p:%u, g:%u, h:%d] with best_ni %s [c:%d, d:%d, s:%d, p:%u, g:%u, h:%d]\n",
                               libcfs_nidstr(&ni->ni_nid), ni_credits, distance,
-                              ni->ni_seq, ni_sel_prio, ni_dev_prio,
+                              ni->ni_seq, ni_sel_prio, ni_dev_prio, ni_healthv,
                               (best_ni) ? libcfs_nidstr(&best_ni->ni_nid)
                               : "not selected", best_credits, shortest_distance,
                               (best_ni) ? best_ni->ni_seq : 0,
-                              best_sel_prio, best_dev_prio);
+                              best_sel_prio, best_dev_prio, best_healthv);
                else
                        goto select_ni;
 
@@ -1756,6 +1756,19 @@ select_ni:
        return best_ni;
 }
 
+static bool
+lnet_reserved_msg(struct lnet_msg *msg)
+{
+       if (msg->msg_type == LNET_MSG_PUT) {
+               if (msg->msg_hdr.msg.put.ptl_index == LNET_RESERVED_PORTAL)
+                       return true;
+       } else if (msg->msg_type == LNET_MSG_GET) {
+               if (msg->msg_hdr.msg.get.ptl_index == LNET_RESERVED_PORTAL)
+                       return true;
+       }
+       return false;
+}
+
 /*
  * Traffic to the LNET_RESERVED_PORTAL may not trigger peer discovery,
  * because such traffic is required to perform discovery. We therefore
@@ -1767,14 +1780,7 @@ select_ni:
 static bool
 lnet_msg_discovery(struct lnet_msg *msg)
 {
-       if (msg->msg_type == LNET_MSG_PUT) {
-               if (msg->msg_hdr.msg.put.ptl_index != LNET_RESERVED_PORTAL)
-                       return true;
-       } else if (msg->msg_type == LNET_MSG_GET) {
-               if (msg->msg_hdr.msg.get.ptl_index != LNET_RESERVED_PORTAL)
-                       return true;
-       }
-       return false;
+       return !(lnet_reserved_msg(msg) || lnet_msg_is_response(msg));
 }
 
 #define SRC_SPEC       0x0001
@@ -2543,7 +2549,6 @@ static int
 lnet_select_preferred_best_ni(struct lnet_send_data *sd)
 {
        struct lnet_ni *best_ni = NULL;
-       struct lnet_peer_ni *best_lpni = sd->sd_best_lpni;
 
        /*
         * We must use a consistent source address when sending to a
@@ -2554,25 +2559,27 @@ lnet_select_preferred_best_ni(struct lnet_send_data *sd)
         *
         * So we need to pick the NI the peer prefers for this
         * particular network.
+        *
+        * An exception is traffic on LNET_RESERVED_PORTAL. Internal LNet
+        * traffic doesn't care which source NI is used, and we don't actually
+        * want to restrict local recovery pings to a single source NI.
         */
+       if (!lnet_reserved_msg(sd->sd_msg))
+               best_ni = lnet_find_existing_preferred_best_ni(sd->sd_best_lpni,
+                                                              sd->sd_cpt);
 
-       best_ni = lnet_find_existing_preferred_best_ni(sd->sd_best_lpni,
-                                                      sd->sd_cpt);
-
-       /* if best_ni is still not set just pick one */
-       if (!best_ni) {
-               best_ni =
-                 lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
+       if (!best_ni)
+               best_ni = lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
                                                sd->sd_best_lpni->lpni_peer_net,
                                                sd->sd_msg,
                                                sd->sd_md_cpt);
-               /* If there is no best_ni we don't have a route */
-               if (!best_ni) {
-                       CERROR("no path to %s from net %s\n",
-                               libcfs_nidstr(&best_lpni->lpni_nid),
-                               libcfs_net2str(best_lpni->lpni_net->net_id));
-                       return -EHOSTUNREACH;
-               }
+
+       /* If there is no best_ni we don't have a route */
+       if (!best_ni) {
+               CERROR("no path to %s from net %s\n",
+                       libcfs_nidstr(&sd->sd_best_lpni->lpni_nid),
+                       libcfs_net2str(sd->sd_best_lpni->lpni_net->net_id));
+               return -EHOSTUNREACH;
        }
 
        sd->sd_best_ni = best_ni;
index 72e28eb..c2d6f34 100755 (executable)
@@ -92,6 +92,7 @@ load_lnet() {
 }
 
 do_lnetctl() {
+       $LCTL mark "$LNETCTL $@"
        echo "$LNETCTL $@"
        $LNETCTL "$@"
 }
@@ -2348,6 +2349,59 @@ test_217() {
 }
 run_test 217 "Don't leak memory when discovering peer with nnis <= 1"
 
+test_218() {
+       reinit_dlc || return $?
+
+       [[ ${#INTERFACES[@]} -lt 2 ]] &&
+               skip "Need two LNet interfaces"
+
+       add_net "tcp" "${INTERFACES[0]}" || return $?
+
+       local nid1=$($LCTL list_nids | head -n 1)
+
+       do_lnetctl ping $nid1 ||
+               error "ping failed"
+
+       add_net "tcp" "${INTERFACES[1]}" || return $?
+
+       local nid2=$($LCTL list_nids | tail --lines 1)
+
+       do_lnetctl ping $nid2 ||
+               error "ping failed"
+
+       $LCTL net_drop_add -s $nid1 -d $nid1 -e local_error -r 1
+
+       do_lnetctl ping $nid1 &&
+               error "ping should have failed"
+
+       local health_recovered
+       local i
+
+       for i in $(seq 1 5); do
+               health_recovered=$($LNETCTL net show -v 2 |
+                                  grep -c 'health value: 1000')
+
+               if [[ $health_recovered -ne 2 ]]; then
+                       echo "Wait 1 second for health to recover"
+                       sleep 1
+               else
+                       break
+               fi
+       done
+
+       health_recovered=$($LNETCTL net show -v 2 |
+                          grep -c 'health value: 1000')
+
+       $LCTL net_drop_del -a
+
+       [[ $health_recovered -ne 2 ]] &&
+               do_lnetctl net show -v 2 | egrep -e nid -e health &&
+               error "Health hasn't recovered"
+
+       return 0
+}
+run_test 218 "Local recovery pings should exercise all available paths"
+
 test_230() {
        # LU-12815
        echo "Check valid values; Should succeed"