Whamcloud - gitweb
LU-14668 lnet: add 'lock_prim_nid" lnet module parameter 34/51134/4
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Tue, 28 Feb 2023 23:02:20 +0000 (15:02 -0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 2 Aug 2023 06:20:45 +0000 (06:20 +0000)
Add 'lock_prim_nid' lnet module parameter to allow control
of how Lustre peer primary NID is selected.
If set to 1 (default), the NID specified by Lustre when
calling LNet API is designated as primary for the peer,
allowing for non-blocking discovery in the background.
If set to 0, peer discovery is blocking until complete
and the NID listed first in discovery response is designated
as primary.

Lustre-change: https://review.whamcloud.com/50159
Lustre-commit: fc7a0d6013b46ebc17cdfdccc04a5d1d92c6af24

Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I6ed1cb0c637f4aa7a7340a6f01819ba9a85858f4
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51134
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/api-ni.c
lnet/lnet/peer.c

index 5805586..5821d20 100644 (file)
@@ -615,6 +615,7 @@ extern int alive_router_check_interval;
 extern int live_router_check_interval;
 extern int dead_router_check_interval;
 extern int portal_rotor;
+extern int lock_prim_nid;
 
 void lnet_mt_event_handler(struct lnet_event *event);
 
index 3a56ea8..fa7d702 100644 (file)
@@ -243,6 +243,11 @@ module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
 MODULE_PARM_DESC(lnet_response_tracking,
                 "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
 
+int lock_prim_nid = 1;
+module_param(lock_prim_nid, int, 0444);
+MODULE_PARM_DESC(lock_prim_nid,
+                "Whether nid passed down by Lustre is locked as primary");
+
 #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
                                  (LNET_RETRY_COUNT_DEFAULT + 1))
 unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
index 33a16f8..4eb7a44 100644 (file)
@@ -1411,6 +1411,7 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids)
        lnet_nid_t pnid = 0;
        bool mr;
        int i, rc;
+       int flags = lock_prim_nid ? LNET_PEER_LOCK_PRIMARY : 0;
 
        if (!nids || num_nids < 1)
                return -EINVAL;
@@ -1431,7 +1432,7 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids)
                if (!pnid) {
                        pnid = nids[i];
                        rc = lnet_add_peer_ni(pnid, LNET_NID_ANY, mr,
-                                             LNET_PEER_LOCK_PRIMARY);
+                                             flags);
                        if (rc == -EALREADY) {
                                struct lnet_peer *lp;
 
@@ -1447,10 +1448,10 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids)
                        }
                } else if (lnet_peer_discovery_disabled) {
                        rc = lnet_add_peer_ni(nids[i], LNET_NID_ANY, mr,
-                                             LNET_PEER_LOCK_PRIMARY);
+                                             flags);
                } else {
                        rc = lnet_add_peer_ni(pnid, nids[i], mr,
-                                             LNET_PEER_LOCK_PRIMARY);
+                                             flags);
                }
 
                if (rc && rc != -EEXIST)
@@ -1493,37 +1494,54 @@ LNetPrimaryNID(lnet_nid_t nid)
         * down then this discovery can introduce long delays into the mount
         * process, so skip it if it isn't necessary.
         */
+again:
        spin_lock(&lp->lp_lock);
-       if (!lnet_peer_discovery_disabled &&
-           (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) ||
-            !lnet_peer_is_uptodate_locked(lp))) {
-               /* force a full discovery cycle */
-               lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH |
-                               LNET_PEER_LOCK_PRIMARY;
+       if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid)
+               lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+
+       /* DD disabled, nothing to do */
+       if (lnet_peer_discovery_disabled) {
+               nid = lnet_nid_to_nid4(&lp->lp_primary_nid);
                spin_unlock(&lp->lp_lock);
+               goto out_decref;
+       }
 
-               /* start discovery in the background. Messages to that
-                * peer will not go through until the discovery is
-                * complete
-                */
-               rc = lnet_discover_peer_locked(lpni, cpt, false);
-               if (rc)
-                       goto out_decref;
-               /* The lpni (or lp) for this NID may have changed and our ref is
-                * the only thing keeping the old one around. Release the ref
-                * and lookup the lpni again
-                */
-               lnet_peer_ni_decref_locked(lpni);
-               lpni = lnet_find_peer_ni_locked(nid);
-               if (!lpni) {
-                       rc = -ENOENT;
-                       goto out_unlock;
-               }
-               lp = lpni->lpni_peer_net->lpn_peer;
-       } else {
+       /* Peer already up to date, nothing to do */
+       if (lnet_peer_is_uptodate_locked(lp)) {
+               nid = lnet_nid_to_nid4(&lp->lp_primary_nid);
                spin_unlock(&lp->lp_lock);
+               goto out_decref;
+       }
+       spin_unlock(&lp->lp_lock);
+
+       /* If primary nid locking is enabled, discovery is performed
+        * in the background.
+        * If primary nid locking is disabled, discovery blocks here.
+        * Messages to the peer will not go through until the discovery is
+        * complete.
+        */
+       if (lock_prim_nid)
+               rc = lnet_discover_peer_locked(lpni, cpt, false);
+       else
+               rc = lnet_discover_peer_locked(lpni, cpt, true);
+       if (rc)
+               goto out_decref;
+
+       /* The lpni (or lp) for this NID may have changed and our ref is
+        * the only thing keeping the old one around. Release the ref
+        * and lookup the lpni again
+        */
+       lnet_peer_ni_decref_locked(lpni);
+       lpni = lnet_find_peer_ni_locked(nid);
+       if (!lpni) {
+               rc = -ENOENT;
+               goto out_unlock;
        }
-       primary_nid = lnet_nid_to_nid4(&lp->lp_primary_nid);
+       lp = lpni->lpni_peer_net->lpn_peer;
+
+       if (!lock_prim_nid && !lnet_is_discovery_disabled(lp))
+               goto again;
+       nid = lnet_nid_to_nid4(&lp->lp_primary_nid);
 out_decref:
        lnet_peer_ni_decref_locked(lpni);
 out_unlock:
@@ -1614,7 +1632,6 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp,
                ptable->pt_peers++;
        }
 
-
        /* Update peer state */
        spin_lock(&lp->lp_lock);
        if (flags & LNET_PEER_CONFIGURED) {
@@ -1692,10 +1709,8 @@ lnet_peer_add(lnet_nid_t nid4, unsigned int flags)
                                rc = -EPERM;
                        goto out;
                } else if (lp->lp_state & LNET_PEER_LOCK_PRIMARY) {
-                       if (nid_same(&lp->lp_primary_nid, &nid)) {
+                       if (nid_same(&lp->lp_primary_nid, &nid))
                                rc = -EEXIST;
-                               goto out;
-                       }
                        /* we're trying to recreate an existing peer which
                         * has already been created and its primary
                         * locked. This is likely due to two servers
@@ -1703,8 +1718,19 @@ lnet_peer_add(lnet_nid_t nid4, unsigned int flags)
                         * to that node with the primary NID which was
                         * first added by Lustre
                         */
-                       rc = -EALREADY;
+                       else
+                               rc = -EALREADY;
                        goto out;
+               } else if (!(flags &
+                          (LNET_PEER_LOCK_PRIMARY | LNET_PEER_CONFIGURED))) {
+                       /* if not recreating peer as configured and
+                        * not locking primary nid, no need to
+                        * do anything if primary nid is not being changed
+                        */
+                       if (nid_same(&lp->lp_primary_nid, &nid)) {
+                               rc = -EEXIST;
+                               goto out;
+                       }
                }
                /* Delete and recreate the peer.
                 * We can get here:
@@ -2011,6 +2037,14 @@ __must_hold(&the_lnet.ln_api_mutex)
        lnet_peer_ni_decref_locked(lpni);
        lp = lpni->lpni_peer_net->lpn_peer;
 
+       /* Peer must have been configured. */
+       if ((flags & LNET_PEER_CONFIGURED) &&
+           !(lp->lp_state & LNET_PEER_CONFIGURED)) {
+               CDEBUG(D_NET, "peer %s was not configured\n",
+                      libcfs_nid2str(prim_nid));
+               return -ENOENT;
+       }
+
        /* Primary NID must match */
        if (lnet_nid_to_nid4(&lp->lp_primary_nid) != prim_nid) {
                CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
@@ -2026,9 +2060,7 @@ __must_hold(&the_lnet.ln_api_mutex)
                return -EPERM;
        }
 
-       if ((flags & LNET_PEER_LOCK_PRIMARY) &&
-           (lnet_peer_is_uptodate(lp) &&
-            (lp->lp_state & LNET_PEER_LOCK_PRIMARY))) {
+       if (lnet_peer_is_uptodate(lp) && !(flags & LNET_PEER_CONFIGURED)) {
                CDEBUG(D_NET,
                       "Don't add temporary peer NI for uptodate peer %s\n",
                       libcfs_nidstr(&lp->lp_primary_nid));