From 6cfc8e55a2e77c9c91b81a8842e2cbd886025298 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Tue, 28 Feb 2023 15:02:20 -0800 Subject: [PATCH] LU-14668 lnet: add 'lock_prim_nid" lnet module parameter Add 'lock_prim_nid' lnet module parameter to allow control of how Lustre peer primary NID is selected. If set to 1 (default), the NID specified by Lustre when calling LNet API is designated as primary for the peer, allowing for non-blocking discovery in the background. If set to 0, peer discovery is blocking until complete and the NID listed first in discovery response is designated as primary. Lustre-change: https://review.whamcloud.com/50159 Lustre-commit: fc7a0d6013b46ebc17cdfdccc04a5d1d92c6af24 Signed-off-by: Serguei Smirnov Change-Id: I6ed1cb0c637f4aa7a7340a6f01819ba9a85858f4 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51134 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 1 + lnet/lnet/api-ni.c | 5 ++ lnet/lnet/peer.c | 106 ++++++++++++++++++++++++++++--------------- 3 files changed, 75 insertions(+), 37 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 5805586..5821d20 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -615,6 +615,7 @@ extern int alive_router_check_interval; extern int live_router_check_interval; extern int dead_router_check_interval; extern int portal_rotor; +extern int lock_prim_nid; void lnet_mt_event_handler(struct lnet_event *event); diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 3a56ea8..fa7d702 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -243,6 +243,11 @@ module_param_call(lnet_response_tracking, response_tracking_set, param_get_int, MODULE_PARM_DESC(lnet_response_tracking, "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)"); +int lock_prim_nid = 1; +module_param(lock_prim_nid, int, 0444); +MODULE_PARM_DESC(lock_prim_nid, + "Whether nid passed down by Lustre is locked as primary"); + #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \ (LNET_RETRY_COUNT_DEFAULT + 1)) unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 33a16f8..4eb7a44 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -1411,6 +1411,7 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids) lnet_nid_t pnid = 0; bool mr; int i, rc; + int flags = lock_prim_nid ? LNET_PEER_LOCK_PRIMARY : 0; if (!nids || num_nids < 1) return -EINVAL; @@ -1431,7 +1432,7 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids) if (!pnid) { pnid = nids[i]; rc = lnet_add_peer_ni(pnid, LNET_NID_ANY, mr, - LNET_PEER_LOCK_PRIMARY); + flags); if (rc == -EALREADY) { struct lnet_peer *lp; @@ -1447,10 +1448,10 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids) } } else if (lnet_peer_discovery_disabled) { rc = lnet_add_peer_ni(nids[i], LNET_NID_ANY, mr, - LNET_PEER_LOCK_PRIMARY); + flags); } else { rc = lnet_add_peer_ni(pnid, nids[i], mr, - LNET_PEER_LOCK_PRIMARY); + flags); } if (rc && rc != -EEXIST) @@ -1493,37 +1494,54 @@ LNetPrimaryNID(lnet_nid_t nid) * down then this discovery can introduce long delays into the mount * process, so skip it if it isn't necessary. */ +again: spin_lock(&lp->lp_lock); - if (!lnet_peer_discovery_disabled && - (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) || - !lnet_peer_is_uptodate_locked(lp))) { - /* force a full discovery cycle */ - lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH | - LNET_PEER_LOCK_PRIMARY; + if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid) + lp->lp_state |= LNET_PEER_LOCK_PRIMARY; + + /* DD disabled, nothing to do */ + if (lnet_peer_discovery_disabled) { + nid = lnet_nid_to_nid4(&lp->lp_primary_nid); spin_unlock(&lp->lp_lock); + goto out_decref; + } - /* start discovery in the background. Messages to that - * peer will not go through until the discovery is - * complete - */ - rc = lnet_discover_peer_locked(lpni, cpt, false); - if (rc) - goto out_decref; - /* The lpni (or lp) for this NID may have changed and our ref is - * the only thing keeping the old one around. Release the ref - * and lookup the lpni again - */ - lnet_peer_ni_decref_locked(lpni); - lpni = lnet_find_peer_ni_locked(nid); - if (!lpni) { - rc = -ENOENT; - goto out_unlock; - } - lp = lpni->lpni_peer_net->lpn_peer; - } else { + /* Peer already up to date, nothing to do */ + if (lnet_peer_is_uptodate_locked(lp)) { + nid = lnet_nid_to_nid4(&lp->lp_primary_nid); spin_unlock(&lp->lp_lock); + goto out_decref; + } + spin_unlock(&lp->lp_lock); + + /* If primary nid locking is enabled, discovery is performed + * in the background. + * If primary nid locking is disabled, discovery blocks here. + * Messages to the peer will not go through until the discovery is + * complete. + */ + if (lock_prim_nid) + rc = lnet_discover_peer_locked(lpni, cpt, false); + else + rc = lnet_discover_peer_locked(lpni, cpt, true); + if (rc) + goto out_decref; + + /* The lpni (or lp) for this NID may have changed and our ref is + * the only thing keeping the old one around. Release the ref + * and lookup the lpni again + */ + lnet_peer_ni_decref_locked(lpni); + lpni = lnet_find_peer_ni_locked(nid); + if (!lpni) { + rc = -ENOENT; + goto out_unlock; } - primary_nid = lnet_nid_to_nid4(&lp->lp_primary_nid); + lp = lpni->lpni_peer_net->lpn_peer; + + if (!lock_prim_nid && !lnet_is_discovery_disabled(lp)) + goto again; + nid = lnet_nid_to_nid4(&lp->lp_primary_nid); out_decref: lnet_peer_ni_decref_locked(lpni); out_unlock: @@ -1614,7 +1632,6 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp, ptable->pt_peers++; } - /* Update peer state */ spin_lock(&lp->lp_lock); if (flags & LNET_PEER_CONFIGURED) { @@ -1692,10 +1709,8 @@ lnet_peer_add(lnet_nid_t nid4, unsigned int flags) rc = -EPERM; goto out; } else if (lp->lp_state & LNET_PEER_LOCK_PRIMARY) { - if (nid_same(&lp->lp_primary_nid, &nid)) { + if (nid_same(&lp->lp_primary_nid, &nid)) rc = -EEXIST; - goto out; - } /* we're trying to recreate an existing peer which * has already been created and its primary * locked. This is likely due to two servers @@ -1703,8 +1718,19 @@ lnet_peer_add(lnet_nid_t nid4, unsigned int flags) * to that node with the primary NID which was * first added by Lustre */ - rc = -EALREADY; + else + rc = -EALREADY; goto out; + } else if (!(flags & + (LNET_PEER_LOCK_PRIMARY | LNET_PEER_CONFIGURED))) { + /* if not recreating peer as configured and + * not locking primary nid, no need to + * do anything if primary nid is not being changed + */ + if (nid_same(&lp->lp_primary_nid, &nid)) { + rc = -EEXIST; + goto out; + } } /* Delete and recreate the peer. * We can get here: @@ -2011,6 +2037,14 @@ __must_hold(&the_lnet.ln_api_mutex) lnet_peer_ni_decref_locked(lpni); lp = lpni->lpni_peer_net->lpn_peer; + /* Peer must have been configured. */ + if ((flags & LNET_PEER_CONFIGURED) && + !(lp->lp_state & LNET_PEER_CONFIGURED)) { + CDEBUG(D_NET, "peer %s was not configured\n", + libcfs_nid2str(prim_nid)); + return -ENOENT; + } + /* Primary NID must match */ if (lnet_nid_to_nid4(&lp->lp_primary_nid) != prim_nid) { CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n", @@ -2026,9 +2060,7 @@ __must_hold(&the_lnet.ln_api_mutex) return -EPERM; } - if ((flags & LNET_PEER_LOCK_PRIMARY) && - (lnet_peer_is_uptodate(lp) && - (lp->lp_state & LNET_PEER_LOCK_PRIMARY))) { + if (lnet_peer_is_uptodate(lp) && !(flags & LNET_PEER_CONFIGURED)) { CDEBUG(D_NET, "Don't add temporary peer NI for uptodate peer %s\n", libcfs_nidstr(&lp->lp_primary_nid)); -- 1.8.3.1