Whamcloud - gitweb
LU-17476 lnet: prefer to use bits only to match ME 43/53843/8
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Sat, 27 Jan 2024 20:17:34 +0000 (12:17 -0800)
committerOleg Drokin <green@whamcloud.com>
Mon, 4 Mar 2024 20:03:54 +0000 (20:03 +0000)
In some cases, it has been observed that a reply will arrive
at the portal with the correct match bits, but is dropped by
lnet_parse_put().  This appears to happen with LNet Multi-Rail
peers, each having two separate NIDs.

If a reply arrives with matchbits available and matching, but
the NIDs don't match, confirm the match if the NIDs are found
to belong to the same peer.  This will only happen in cases
where the reply would be dropped entirely, causing hundreds of
seconds of delay until the RPC is resent, so the extra overhead
of checking for a peer match before dropping the request is
only in the error path and minimal compared to the alternative.

Add CFS_FAIL_CHECK() for exercising the match NIDs code.

That is in a hot codepath, but CFS_FAIL_CHECK() is marked unlikely()
and this check is in the error case and _should_ only be hit when
the message would have been dropped anyway, so it seems unlikely to
impact performance in any meaningful way.

Test-Parameters: testlist=sanity env=ONLY=350,ONLY_REPEAT=10
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I10e1a2142539ddf5dabc26ce962cec1f2cfcf3db
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53843
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/lib-ptl.c
lustre/tests/sanity.sh

index 8043e2c..68a48ae 100644 (file)
@@ -38,6 +38,7 @@
 
 /* LNET has 0xeXXX */
 #define CFS_FAIL_PTLRPC_OST_BULK_CB2   0xe000
+#define CFS_FAIL_MATCH_MD_NID          0xe001
 
 #include <linux/netdevice.h>
 
index df065e2..68dd1cc 100644 (file)
@@ -152,19 +152,67 @@ lnet_try_match_md(struct lnet_libmd *md,
        if ((md->md_options & info->mi_opc) == 0)
                return LNET_MATCHMD_NONE;
 
-       /* mismatched ME nid/pid? */
-       if (!LNET_NID_IS_ANY(&me->me_match_id.nid) &&
-           !nid_same(&me->me_match_id.nid, &info->mi_id.nid))
-               return LNET_MATCHMD_NONE;
-
-       if (me->me_match_id.pid != LNET_PID_ANY &&
-           me->me_match_id.pid != info->mi_id.pid)
-               return LNET_MATCHMD_NONE;
-
        /* mismatched ME matchbits? */
        if (((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) != 0)
                return LNET_MATCHMD_NONE;
 
+       /* mismatched ME nid/pid? */
+       if (me->me_match_bits & ~me->me_ignore_bits) {
+               /* try to accept match based on bits only */
+               if ((!LNET_NID_IS_ANY(&me->me_match_id.nid) &&
+                    !nid_same(&me->me_match_id.nid, &info->mi_id.nid)) ||
+                   CFS_FAIL_CHECK(CFS_FAIL_MATCH_MD_NID)) {
+                       struct lnet_peer *lp_me, *lp_peer;
+
+                       /* check if ME NID matches another NID of same peer */
+                       lp_me = lnet_find_peer(&me->me_match_id.nid);
+                       lp_peer = lnet_find_peer(&info->mi_id.nid);
+
+                       if (lp_me && lp_peer && (lp_me == lp_peer)) {
+                               /* Shouldn't happen, but better than dropping
+                                * message entirely. Print warning so we know
+                                * it happens, and something needs to be fixed.
+                                */
+                               CWARN("message from %s matched %llu with NID mismatch %s accepted (same peer %pK)\n",
+                                     libcfs_idstr(&info->mi_id),
+                                     info->mi_mbits,
+                                     libcfs_nidstr(&me->me_match_id.nid),
+                                     lp_me);
+                               lnet_peer_decref_locked(lp_me);
+                               lnet_peer_decref_locked(lp_peer);
+                       } else {
+                               CNETERR("message from %s matched %llu with NID mismatch %s rejected (different peer %pK != %pK)\n",
+                                       libcfs_idstr(&info->mi_id),
+                                       info->mi_mbits,
+                                       libcfs_nidstr(&me->me_match_id.nid),
+                                       lp_me, lp_peer);
+                               if (lp_me)
+                                       lnet_peer_decref_locked(lp_me);
+                               if (lp_peer)
+                                       lnet_peer_decref_locked(lp_peer);
+
+                               return LNET_MATCHMD_NONE;
+                       }
+               }
+
+               if (me->me_match_id.pid != LNET_PID_ANY &&
+                   me->me_match_id.pid != info->mi_id.pid) {
+                       CNETERR("message from %s matched %llu with PID mismatch %s rejected\n",
+                               libcfs_idstr(&info->mi_id), info->mi_mbits,
+                               libcfs_idstr(&me->me_match_id));
+                       return LNET_MATCHMD_NONE;
+               }
+       } else {
+               /* there were no bits to match, reject on nid/pid mismatch */
+               if (!LNET_NID_IS_ANY(&me->me_match_id.nid) &&
+                   !nid_same(&me->me_match_id.nid, &info->mi_id.nid))
+                       return LNET_MATCHMD_NONE;
+
+               if (me->me_match_id.pid != LNET_PID_ANY &&
+                   me->me_match_id.pid != info->mi_id.pid)
+                       return LNET_MATCHMD_NONE;
+       }
+
        /* Hurrah! This _is_ a match; check it out... */
 
        if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0)
index 91a5010..e67d765 100755 (executable)
@@ -27335,6 +27335,23 @@ test_319() {
 }
 run_test 319 "lost lease lock on migrate error"
 
+test_350() {
+       local mdts=$(comma_list $(mdts_nodes))
+
+       mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+       stack_trap "rm -r $DIR/$tdir"
+
+       #force 1/100 of replies to take "NID mismatch" codepath
+       #define CFS_FAIL_MATCH_MD_NID 0xe001  CFS_FAIL_SOME 0x10000000
+       do_nodes $mdts $LCTL set_param fail_loc=0x1000e001 fail_val=100
+
+       while ls -lR $DIR/$tdir > /dev/null; do :; done &
+       stack_trap "killall -9 ls || killall -9 ls"
+
+       cp -a /etc $DIR/$tdir || error "cp failed"
+}
+run_test 350 "force NID mismatch path to be exercised"
+
 test_360() {
        (( $OST1_VERSION >= $(version_code 2.15.58.96) )) ||
                skip "Need OST version at least 2.15.58.96"