From fc7a0d6013b46ebc17cdfdccc04a5d1d92c6af24 Mon Sep 17 00:00:00 2001 From: Serguei Smirnov Date: Tue, 28 Feb 2023 15:02:20 -0800 Subject: [PATCH] LU-14668 lnet: add 'lock_prim_nid" lnet module parameter Add 'lock_prim_nid' lnet module parameter to allow control of how Lustre peer primary NID is selected. If set to 1 (default), the NID specified by Lustre when calling LNet API is designated as primary for the peer, allowing for non-blocking discovery in the background. If set to 0, peer discovery is blocking until complete and the NID listed first in discovery response is designated as primary. Signed-off-by: Serguei Smirnov Change-Id: I6ed1cb0c637f4aa7a7340a6f01819ba9a85858f4 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50159 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: Frank Sehr Reviewed-by: Cyril Bordage Reviewed-by: Oleg Drokin --- lnet/include/lnet/lib-lnet.h | 1 + lnet/lnet/api-ni.c | 5 +++ lnet/lnet/peer.c | 105 +++++++++++++++++++++++++++---------------- 3 files changed, 73 insertions(+), 38 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index c8b8d4d..056386a 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -616,6 +616,7 @@ extern int alive_router_check_interval; extern int live_router_check_interval; extern int dead_router_check_interval; extern int portal_rotor; +extern int lock_prim_nid; void lnet_mt_event_handler(struct lnet_event *event); diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index da8e767..3c63f94 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -268,6 +268,11 @@ module_param_call(lnet_response_tracking, response_tracking_set, param_get_int, MODULE_PARM_DESC(lnet_response_tracking, "(0|1|2|3) LNet Internal Only|GET Reply only|PUT ACK only|Full Tracking (default)"); +int lock_prim_nid = 1; +module_param(lock_prim_nid, int, 0444); +MODULE_PARM_DESC(lock_prim_nid, + "Whether nid passed down by Lustre is locked as primary"); + #define LNET_LND_TIMEOUT_DEFAULT ((LNET_TRANSACTION_TIMEOUT_DEFAULT - 1) / \ (LNET_RETRY_COUNT_DEFAULT + 1)) unsigned int lnet_lnd_timeout = LNET_LND_TIMEOUT_DEFAULT; diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 5e617d8..86c9cc3 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -1355,6 +1355,7 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids) struct lnet_nid pnid = LNET_ANY_NID; bool mr; int i, rc; + int flags = lock_prim_nid ? LNET_PEER_LOCK_PRIMARY : 0; if (!nids || num_nids < 1) return -EINVAL; @@ -1377,8 +1378,7 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids) lnet_nid4_to_nid(nids[i], &nid); if (LNET_NID_IS_ANY(&pnid)) { lnet_nid4_to_nid(nids[i], &pnid); - rc = lnet_add_peer_ni(&pnid, &LNET_ANY_NID, mr, - LNET_PEER_LOCK_PRIMARY); + rc = lnet_add_peer_ni(&pnid, &LNET_ANY_NID, mr, flags); if (rc == -EALREADY) { struct lnet_peer *lp; @@ -1394,12 +1394,10 @@ LNetAddPeer(lnet_nid_t *nids, __u32 num_nids) } } else if (lnet_peer_discovery_disabled) { lnet_nid4_to_nid(nids[i], &nid); - rc = lnet_add_peer_ni(&nid, &LNET_ANY_NID, mr, - LNET_PEER_LOCK_PRIMARY); + rc = lnet_add_peer_ni(&nid, &LNET_ANY_NID, mr, flags); } else { lnet_nid4_to_nid(nids[i], &nid); - rc = lnet_add_peer_ni(&pnid, &nid, mr, - LNET_PEER_LOCK_PRIMARY); + rc = lnet_add_peer_ni(&pnid, &nid, mr, flags); } if (rc && rc != -EEXIST) @@ -1441,36 +1439,53 @@ void LNetPrimaryNID(struct lnet_nid *nid) * down then this discovery can introduce long delays into the mount * process, so skip it if it isn't necessary. */ +again: spin_lock(&lp->lp_lock); - if (!lnet_peer_discovery_disabled && - (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) || - !lnet_peer_is_uptodate_locked(lp))) { - /* force a full discovery cycle */ - lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH | - LNET_PEER_LOCK_PRIMARY; + if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY) && lock_prim_nid) + lp->lp_state |= LNET_PEER_LOCK_PRIMARY; + + /* DD disabled, nothing to do */ + if (lnet_peer_discovery_disabled) { + *nid = lp->lp_primary_nid; spin_unlock(&lp->lp_lock); + goto out_decref; + } - /* start discovery in the background. Messages to that - * peer will not go through until the discovery is - * complete - */ - rc = lnet_discover_peer_locked(lpni, cpt, false); - if (rc) - goto out_decref; - /* The lpni (or lp) for this NID may have changed and our ref is - * the only thing keeping the old one around. Release the ref - * and lookup the lpni again - */ - lnet_peer_ni_decref_locked(lpni); - lpni = lnet_peer_ni_find_locked(nid); - if (!lpni) { - rc = -ENOENT; - goto out_unlock; - } - lp = lpni->lpni_peer_net->lpn_peer; - } else { + /* Peer already up to date, nothing to do */ + if (lnet_peer_is_uptodate_locked(lp)) { + *nid = lp->lp_primary_nid; spin_unlock(&lp->lp_lock); + goto out_decref; } + spin_unlock(&lp->lp_lock); + + /* If primary nid locking is enabled, discovery is performed + * in the background. + * If primary nid locking is disabled, discovery blocks here. + * Messages to the peer will not go through until the discovery is + * complete. + */ + if (lock_prim_nid) + rc = lnet_discover_peer_locked(lpni, cpt, false); + else + rc = lnet_discover_peer_locked(lpni, cpt, true); + if (rc) + goto out_decref; + + /* The lpni (or lp) for this NID may have changed and our ref is + * the only thing keeping the old one around. Release the ref + * and lookup the lpni again + */ + lnet_peer_ni_decref_locked(lpni); + lpni = lnet_peer_ni_find_locked(nid); + if (!lpni) { + rc = -ENOENT; + goto out_unlock; + } + lp = lpni->lpni_peer_net->lpn_peer; + + if (!lock_prim_nid && !lnet_is_discovery_disabled(lp)) + goto again; *nid = lp->lp_primary_nid; out_decref: lnet_peer_ni_decref_locked(lpni); @@ -1561,7 +1576,6 @@ lnet_peer_attach_peer_ni(struct lnet_peer *lp, ptable->pt_peers++; } - /* Update peer state */ spin_lock(&lp->lp_lock); if (flags & LNET_PEER_CONFIGURED) { @@ -1638,10 +1652,8 @@ lnet_peer_add(struct lnet_nid *nid, unsigned int flags) rc = -EPERM; goto out; } else if (lp->lp_state & LNET_PEER_LOCK_PRIMARY) { - if (nid_same(&lp->lp_primary_nid, nid)) { + if (nid_same(&lp->lp_primary_nid, nid)) rc = -EEXIST; - goto out; - } /* we're trying to recreate an existing peer which * has already been created and its primary * locked. This is likely due to two servers @@ -1649,8 +1661,18 @@ lnet_peer_add(struct lnet_nid *nid, unsigned int flags) * to that node with the primary NID which was * first added by Lustre */ - rc = -EALREADY; + else + rc = -EALREADY; goto out; + } else if (!(flags & (LNET_PEER_LOCK_PRIMARY | LNET_PEER_CONFIGURED))) { + /* if not recreating peer as configured and + * not locking primary nid, no need to + * do anything if primary nid is not being changed + */ + if (nid_same(&lp->lp_primary_nid, nid)) { + rc = -EEXIST; + goto out; + } } /* Delete and recreate the peer. * We can get here: @@ -1963,6 +1985,14 @@ __must_hold(&the_lnet.ln_api_mutex) lnet_peer_ni_decref_locked(lpni); lp = lpni->lpni_peer_net->lpn_peer; + /* Peer must have been configured. */ + if ((flags & LNET_PEER_CONFIGURED) && + !(lp->lp_state & LNET_PEER_CONFIGURED)) { + CDEBUG(D_NET, "peer %s was not configured\n", + libcfs_nidstr(prim_nid)); + return -ENOENT; + } + /* Primary NID must match */ if (!nid_same(&lp->lp_primary_nid, prim_nid)) { CDEBUG(D_NET, "prim_nid %s is not primary for peer %s\n", @@ -1978,8 +2008,7 @@ __must_hold(&the_lnet.ln_api_mutex) return -EPERM; } - if ((flags & LNET_PEER_LOCK_PRIMARY) && - (lnet_peer_is_uptodate(lp) && (lp->lp_state & LNET_PEER_LOCK_PRIMARY))) { + if (lnet_peer_is_uptodate(lp) && !(flags & LNET_PEER_CONFIGURED)) { CDEBUG(D_NET, "Don't add temporary peer NI for uptodate peer %s\n", libcfs_nidstr(&lp->lp_primary_nid)); -- 1.8.3.1