Whamcloud - gitweb
LU-17476 lnet: use bits only to match ME in all cases
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Fri, 16 Feb 2024 19:01:21 +0000 (11:01 -0800)
committerAndreas Dilger <adilger@whamcloud.com>
Sat, 9 Mar 2024 07:47:25 +0000 (07:47 +0000)
If NIDs belong to the same peer and matchbits are matching,
declare a match even if matchbits are matched as not available
or ignored

Lustre-change: https://review.whamcloud.com/54082
Lustre-commit: a7ae2e5515879dc31e87106314d35dc439a2c50d

Test-Parameters: testlist=sanity env=ONLY=350,ONLY_REPEAT=10
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I394c492381a2d069b34516c473220192df05fbd2
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/54277
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lnet/lnet/lib-ptl.c

index 1032ea7..2886703 100644 (file)
@@ -157,61 +157,48 @@ lnet_try_match_md(struct lnet_libmd *md,
        if (((me->me_match_bits ^ info->mi_mbits) & ~me->me_ignore_bits) != 0)
                return LNET_MATCHMD_NONE;
 
-       /* mismatched ME nid/pid? */
-       if (me->me_match_bits & ~me->me_ignore_bits) {
-               /* try to accept match based on bits only */
-               if ((me->me_match_id.nid != LNET_NID_ANY &&
-                    me->me_match_id.nid != info->mi_id.nid) ||
-                   CFS_FAIL_CHECK(CFS_FAIL_MATCH_MD_NID)) {
-                       struct lnet_peer *lp_me, *lp_peer;
-
-                       /* check if ME NID matches another NID of same peer */
-                       lp_me = lnet_find_peer(me->me_match_id.nid);
-                       lp_peer = lnet_find_peer(info->mi_id.nid);
-
-                       if (lp_me && lp_peer && lp_me == lp_peer) {
-                               /* Shouldn't happen, but better than dropping
-                                * message entirely. Print warning so we know
-                                * it happens, and something needs to be fixed.
-                                */
-                               CWARN("message from %s matched %llu with NID mismatch %s accepted (same peer %pK)\n",
-                                     libcfs_id2str(info->mi_id),
-                                     info->mi_mbits,
-                                     libcfs_nid2str(me->me_match_id.nid),
-                                     lp_me);
+       /* mismatched PID? */
+       if (me->me_match_id.pid != LNET_PID_ANY &&
+           me->me_match_id.pid != info->mi_id.pid)
+               return LNET_MATCHMD_NONE;
+
+       /* try to accept match based on bits only */
+       if ((me->me_match_id.nid != LNET_NID_ANY &&
+            me->me_match_id.nid != info->mi_id.nid) ||
+           (me->me_match_id.nid != LNET_NID_ANY &&
+            CFS_FAIL_CHECK(CFS_FAIL_MATCH_MD_NID))) {
+               struct lnet_peer *lp_me, *lp_peer;
+
+               /* check if ME NID matches another NID of same peer */
+               lp_me = lnet_find_peer(me->me_match_id.nid);
+               lp_peer = lnet_find_peer(info->mi_id.nid);
+
+               if (lp_me && lp_peer && lp_me == lp_peer) {
+                       /* Shouldn't happen, but better than dropping
+                        * message entirely. Print warning so we know
+                        * it happens, and something needs to be fixed.
+                        */
+                       CWARN("message from %s matched %llu with NID mismatch %s accepted (same peer %pK)\n",
+                             libcfs_id2str(info->mi_id),
+                             info->mi_mbits,
+                             libcfs_nid2str(me->me_match_id.nid),
+                             lp_me);
+
                                lnet_peer_decref_locked(lp_me);
                                lnet_peer_decref_locked(lp_peer);
-                       } else {
-                               CNETERR("message from %s matched %llu but NID mismatch %s rejected (different peer %pK != %pK)\n",
-                                       libcfs_id2str(info->mi_id),
-                                       info->mi_mbits,
-                                       libcfs_nid2str(me->me_match_id.nid),
-                                       lp_me, lp_peer);
-                               if (lp_me)
-                                       lnet_peer_decref_locked(lp_me);
-                               if (lp_peer)
-                                       lnet_peer_decref_locked(lp_peer);
-
-                               return LNET_MATCHMD_NONE;
-                       }
-               }
+               } else {
+                       CWARN("message from %s matched %llu but NID mismatch %s rejected (different peer %pK != %pK)\n",
+                               libcfs_id2str(info->mi_id),
+                               info->mi_mbits,
+                               libcfs_nid2str(me->me_match_id.nid),
+                               lp_me, lp_peer);
+                       if (lp_me)
+                               lnet_peer_decref_locked(lp_me);
+                       if (lp_peer)
+                               lnet_peer_decref_locked(lp_peer);
 
-               if (me->me_match_id.pid != LNET_PID_ANY &&
-                   me->me_match_id.pid != info->mi_id.pid) {
-                       CNETERR("message from %s matched %llu but PID mismatch %s rejected\n",
-                               libcfs_id2str(info->mi_id), info->mi_mbits,
-                               libcfs_id2str(me->me_match_id));
                        return LNET_MATCHMD_NONE;
                }
-       } else {
-               /* there were no bits to match, reject on NID/PID mismatch */
-               if (me->me_match_id.nid != LNET_NID_ANY &&
-                   me->me_match_id.nid != info->mi_id.nid)
-                       return LNET_MATCHMD_NONE;
-
-               if (me->me_match_id.pid != LNET_PID_ANY &&
-                   me->me_match_id.pid != info->mi_id.pid)
-                       return LNET_MATCHMD_NONE;
        }
 
        /* Hurrah! This _is_ a match; check it out... */