Whamcloud - gitweb
LU-17476 lnet: prefer to use bits only to match ME
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Sat, 27 Jan 2024 20:17:34 +0000 (12:17 -0800)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 2 Feb 2024 16:09:33 +0000 (16:09 +0000)
In some cases, it has been observed that a reply will arrive
at the portal with the correct match bits, but is dropped by
lnet_parse_put().  This appears to happen with LNet Multi-Rail
peers, each having two separate NIDs.

If a reply arrives with matchbits available and matching, but
the NIDs don't match, confirm the match if the NIDs are found
to belong to the same peer.  This will only happen in cases
where the reply would be dropped entirely, causing hundreds of
seconds of delay until the RPC is resent, so the extra overhead
of checking for a peer match before dropping the request is
only in the error path and minimal compared to the alternative.

Add CFS_FAIL_CHECK() for exercising the match NIDs code.

That is in a hot codepath, but CFS_FAIL_CHECK() is marked unlikely()
and this check is in the error case and _should_ only be hit when the
message would have been dropped anyway, so it seems unlikely to impact
performance in any meaningful way.

Lustre-change: https://review.whamcloud.com/53843
Lustre-commit: TBD (from 3360e892750d1bf4f2b7ceab60d9a637b3e649ad)

Test-Parameters: testlist=sanity-lnet env=ONLY=350,ONLY_REPEAT=10
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I10e1a2142539ddf5dabc26ce962cec1f2cfcf3db
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53846
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/lib-ptl.c [changed mode: 0644->0755]
lustre/tests/sanity.sh

index b23cf32..c2223c6 100644 (file)
@@ -39,6 +39,7 @@
 
 /* LNET has 0xeXXX */
 #define CFS_FAIL_PTLRPC_OST_BULK_CB2   0xe000
+#define CFS_FAIL_MATCH_MD_NID          0xe001
 
 #include <linux/netdevice.h>
 
old mode 100644 (file)
new mode 100755 (executable)
index abdd6ae..1032ea7
@@ -153,19 +153,67 @@ lnet_try_match_md(struct lnet_libmd *md,
        if ((md->md_options & info->mi_opc) == 0)
                return LNET_MATCHMD_NONE;
 
-       /* mismatched ME nid/pid? */
-       if (me->me_match_id.nid != LNET_NID_ANY &&
-           me->me_match_id.nid != info->mi_id.nid)
-               return LNET_MATCHMD_NONE;
-
-       if (me->me_match_id.pid != LNET_PID_ANY &&
-           me->me_match_id.pid != info->mi_id.pid)
-               return LNET_MATCHMD_NONE;
-
        /* mismatched ME matchbits? */
        if (((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) != 0)
                return LNET_MATCHMD_NONE;
 
+       /* mismatched ME nid/pid? */
+       if (me->me_match_bits & ~me->me_ignore_bits) {
+               /* try to accept match based on bits only */
+               if ((me->me_match_id.nid != LNET_NID_ANY &&
+                    me->me_match_id.nid != info->mi_id.nid) ||
+                   CFS_FAIL_CHECK(CFS_FAIL_MATCH_MD_NID)) {
+                       struct lnet_peer *lp_me, *lp_peer;
+
+                       /* check if ME NID matches another NID of same peer */
+                       lp_me = lnet_find_peer(me->me_match_id.nid);
+                       lp_peer = lnet_find_peer(info->mi_id.nid);
+
+                       if (lp_me && lp_peer && lp_me == lp_peer) {
+                               /* Shouldn't happen, but better than dropping
+                                * message entirely. Print warning so we know
+                                * it happens, and something needs to be fixed.
+                                */
+                               CWARN("message from %s matched %llu with NID mismatch %s accepted (same peer %pK)\n",
+                                     libcfs_id2str(info->mi_id),
+                                     info->mi_mbits,
+                                     libcfs_nid2str(me->me_match_id.nid),
+                                     lp_me);
+                               lnet_peer_decref_locked(lp_me);
+                               lnet_peer_decref_locked(lp_peer);
+                       } else {
+                               CNETERR("message from %s matched %llu but NID mismatch %s rejected (different peer %pK != %pK)\n",
+                                       libcfs_id2str(info->mi_id),
+                                       info->mi_mbits,
+                                       libcfs_nid2str(me->me_match_id.nid),
+                                       lp_me, lp_peer);
+                               if (lp_me)
+                                       lnet_peer_decref_locked(lp_me);
+                               if (lp_peer)
+                                       lnet_peer_decref_locked(lp_peer);
+
+                               return LNET_MATCHMD_NONE;
+                       }
+               }
+
+               if (me->me_match_id.pid != LNET_PID_ANY &&
+                   me->me_match_id.pid != info->mi_id.pid) {
+                       CNETERR("message from %s matched %llu but PID mismatch %s rejected\n",
+                               libcfs_id2str(info->mi_id), info->mi_mbits,
+                               libcfs_id2str(me->me_match_id));
+                       return LNET_MATCHMD_NONE;
+               }
+       } else {
+               /* there were no bits to match, reject on NID/PID mismatch */
+               if (me->me_match_id.nid != LNET_NID_ANY &&
+                   me->me_match_id.nid != info->mi_id.nid)
+                       return LNET_MATCHMD_NONE;
+
+               if (me->me_match_id.pid != LNET_PID_ANY &&
+                   me->me_match_id.pid != info->mi_id.pid)
+                       return LNET_MATCHMD_NONE;
+       }
+
        /* Hurrah! This _is_ a match; check it out... */
 
        if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0)
index 34cfb93..23d25c7 100755 (executable)
@@ -25320,6 +25320,23 @@ test_319() {
 }
 run_test 319 "lost lease lock on migrate error"
 
+test_350() {
+       local mdts=$(comma_list $(mdts_nodes))
+
+       mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+       stack_trap "rm -r $DIR/$tdir"
+
+       #force 1/100 of replies to take "NID mismatch" codepath
+       #define CFS_FAIL_MATCH_MD_NID 0xe001  CFS_FAIL_SOME 0x10000000
+       do_nodes $mdts $LCTL set_param fail_loc=0x1000e001 fail_val=100
+
+       while ls -lR $DIR/$tdir > /dev/null; do :; done &
+       stack_trap "killall -9 ls || killall -9 ls"
+
+       cp -a /etc $DIR/$tdir || error "cp failed"
+}
+run_test 350 "force NID mismatch path to be exercised"
+
 test_360() {
        (( $OST1_VERSION >= $(version_code 2.14.0.114) )) ||
                skip "Need OST version at least 2.14.0.114"