Whamcloud - gitweb
LU-14668 lnet: add 'lock_prim_nid" lnet module parameter 59/50159/8
authorSerguei Smirnov <ssmirnov@whamcloud.com>
Tue, 28 Feb 2023 23:02:20 +0000 (15:02 -0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 28 Mar 2023 22:16:18 +0000 (22:16 +0000)
Add 'lock_prim_nid' lnet module parameter to allow control
of how Lustre peer primary NID is selected.
If set to 1 (default), the NID specified by Lustre when
calling LNet API is designated as primary for the peer,
allowing for non-blocking discovery in the background.
If set to 0, peer discovery is blocking until complete
and the NID listed first in discovery response is designated
as primary.

Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I6ed1cb0c637f4aa7a7340a6f01819ba9a85858f4
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50159
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Chris Horn <chris.horn@hpe.com>
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/lnet/lib-lnet.h
lnet/lnet/api-ni.c
lnet/lnet/peer.c

index c8b8d4d..056386a 100644 (file)
@@ -616,6 +616,7 @@ extern int alive_router_check_interval;
 extern int live_router_check_interval;
 extern int dead_router_check_interval;
 extern int portal_rotor;
+extern int lock_prim_nid;
 
 void lnet_mt_event_handler(struct lnet_event *event);
 
index da8e767..3c63f94 100644 (file)
@@ -268,6 +268,11 @@ module_param_call(lnet_response_tracking, response_tracking_set, param_get_int,
 MODULE_PARM_DESC(lnet_response_tracking,
                 "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)");
 
+int lock_prim_nid = 1;
+module_param(lock_prim_nid, int, 0444);
+MODULE_PARM_DESC(lock_prim_nid,
+                "Whether nid passed down by Lustre is locked as primary");
+
 #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \
                                  (LNET_RETRY_COUNT_DEFAULT + 1))
 unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT;
index 5e617d8..86c9cc3 100644 (file)
@@ -1355,6 +1355,7 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids)
        struct lnet_nid pnid = LNET_ANY_NID;
        bool mr;
        int i, rc;
+       int flags = lock_prim_nid ? LNET_PEER_LOCK_PRIMARY : 0;
 
        if (!nids || num_nids < 1)
                return -EINVAL;
@@ -1377,8 +1378,7 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids)
                lnet_nid4_to_nid(nids[i], &nid);
                if (LNET_NID_IS_ANY(&pnid)) {
                        lnet_nid4_to_nid(nids[i], &pnid);
-                       rc = lnet_add_peer_ni(&pnid, &LNET_ANY_NID, mr,
-                                             LNET_PEER_LOCK_PRIMARY);
+                       rc = lnet_add_peer_ni(&pnid, &LNET_ANY_NID, mr, flags);
                        if (rc == -EALREADY) {
                                struct lnet_peer *lp;
 
@@ -1394,12 +1394,10 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids)
                        }
                } else if (lnet_peer_discovery_disabled) {
                        lnet_nid4_to_nid(nids[i], &nid);
-                       rc = lnet_add_peer_ni(&nid, &LNET_ANY_NID, mr,
-                                             LNET_PEER_LOCK_PRIMARY);
+                       rc = lnet_add_peer_ni(&nid, &LNET_ANY_NID, mr, flags);
                } else {
                        lnet_nid4_to_nid(nids[i], &nid);
-                       rc = lnet_add_peer_ni(&pnid, &nid, mr,
-                                             LNET_PEER_LOCK_PRIMARY);
+                       rc = lnet_add_peer_ni(&pnid, &nid, mr, flags);
                }
 
                if (rc && rc != -EEXIST)
@@ -1441,36 +1439,53 @@ void LNetPrimaryNID(struct lnet_nid *nid)
         * down then this discovery can introduce long delays into the mount
         * process, so skip it if it isn't necessary.
         */
+again:
        spin_lock(&lp->lp_lock);
-       if (!lnet_peer_discovery_disabled &&
-           (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) ||
-            !lnet_peer_is_uptodate_locked(lp))) {
-               /* force a full discovery cycle */
-               lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH |
-                               LNET_PEER_LOCK_PRIMARY;
+       if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid)
+               lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
+
+       /* DD disabled, nothing to do */
+       if (lnet_peer_discovery_disabled) {
+               *nid = lp->lp_primary_nid;
                spin_unlock(&lp->lp_lock);
+               goto out_decref;
+       }
 
-               /* start discovery in the background. Messages to that
-                * peer will not go through until the discovery is
-                * complete
-                */
-               rc = lnet_discover_peer_locked(lpni, cpt, false);
-               if (rc)
-                       goto out_decref;
-               /* The lpni (or lp) for this NID may have changed and our ref is
-                * the only thing keeping the old one around. Release the ref
-                * and lookup the lpni again
-                */
-               lnet_peer_ni_decref_locked(lpni);
-               lpni = lnet_peer_ni_find_locked(nid);
-               if (!lpni) {
-                       rc = -ENOENT;
-                       goto out_unlock;
-               }
-               lp = lpni->lpni_peer_net->lpn_peer;
-       } else {
+       /* Peer already up to date, nothing to do */
+       if (lnet_peer_is_uptodate_locked(lp)) {
+               *nid = lp->lp_primary_nid;
                spin_unlock(&lp->lp_lock);
+               goto out_decref;
        }
+       spin_unlock(&lp->lp_lock);
+
+       /* If primary nid locking is enabled, discovery is performed
+        * in the background.
+        * If primary nid locking is disabled, discovery blocks here.
+        * Messages to the peer will not go through until the discovery is
+        * complete.
+        */
+       if (lock_prim_nid)
+               rc = lnet_discover_peer_locked(lpni, cpt, false);
+       else
+               rc = lnet_discover_peer_locked(lpni, cpt, true);
+       if (rc)
+               goto out_decref;
+
+       /* The lpni (or lp) for this NID may have changed and our ref is
+        * the only thing keeping the old one around. Release the ref
+        * and lookup the lpni again
+        */
+       lnet_peer_ni_decref_locked(lpni);
+       lpni = lnet_peer_ni_find_locked(nid);
+       if (!lpni) {
+               rc = -ENOENT;
+               goto out_unlock;
+       }
+       lp = lpni->lpni_peer_net->lpn_peer;
+
+       if (!lock_prim_nid && !lnet_is_discovery_disabled(lp))
+               goto again;
        *nid = lp->lp_primary_nid;
 out_decref:
        lnet_peer_ni_decref_locked(lpni);
@@ -1561,7 +1576,6 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp,
                ptable->pt_peers++;
        }
 
-
        /* Update peer state */
        spin_lock(&lp->lp_lock);
        if (flags & LNET_PEER_CONFIGURED) {
@@ -1638,10 +1652,8 @@ lnet_peer_add(struct lnet_nid *nid, unsigned int flags)
                                rc = -EPERM;
                        goto out;
                } else if (lp->lp_state & LNET_PEER_LOCK_PRIMARY) {
-                       if (nid_same(&lp->lp_primary_nid, nid)) {
+                       if (nid_same(&lp->lp_primary_nid, nid))
                                rc = -EEXIST;
-                               goto out;
-                       }
                        /* we're trying to recreate an existing peer which
                         * has already been created and its primary
                         * locked. This is likely due to two servers
@@ -1649,8 +1661,18 @@ lnet_peer_add(struct lnet_nid *nid, unsigned int flags)
                         * to that node with the primary NID which was
                         * first added by Lustre
                         */
-                       rc = -EALREADY;
+                       else
+                               rc = -EALREADY;
                        goto out;
+               } else if (!(flags & (LNET_PEER_LOCK_PRIMARY | LNET_PEER_CONFIGURED))) {
+                       /* if not recreating peer as configured and
+                        * not locking primary nid, no need to
+                        * do anything if primary nid is not being changed
+                        */
+                       if (nid_same(&lp->lp_primary_nid, nid)) {
+                               rc = -EEXIST;
+                               goto out;
+                       }
                }
                /* Delete and recreate the peer.
                 * We can get here:
@@ -1963,6 +1985,14 @@ __must_hold(&the_lnet.ln_api_mutex)
        lnet_peer_ni_decref_locked(lpni);
        lp = lpni->lpni_peer_net->lpn_peer;
 
+       /* Peer must have been configured. */
+       if ((flags & LNET_PEER_CONFIGURED) &&
+           !(lp->lp_state & LNET_PEER_CONFIGURED)) {
+               CDEBUG(D_NET, "peer %s was not configured\n",
+                      libcfs_nidstr(prim_nid));
+               return -ENOENT;
+       }
+
        /* Primary NID must match */
        if (!nid_same(&lp->lp_primary_nid, prim_nid)) {
                CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n",
@@ -1978,8 +2008,7 @@ __must_hold(&the_lnet.ln_api_mutex)
                return -EPERM;
        }
 
-       if ((flags & LNET_PEER_LOCK_PRIMARY) &&
-           (lnet_peer_is_uptodate(lp) && (lp->lp_state & LNET_PEER_LOCK_PRIMARY))) {
+       if (lnet_peer_is_uptodate(lp) && !(flags & LNET_PEER_CONFIGURED)) {
                CDEBUG(D_NET,
                       "Don't add temporary peer NI for uptodate peer %s\n",
                       libcfs_nidstr(&lp->lp_primary_nid));