From 51b2c0f75f727f0562b3145015357cbff5cbb3b5 Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Fri, 15 Feb 2019 12:45:28 -0800 Subject: [PATCH] LU-9121 lnet: foundation patch for selection mod Add the priority and preferred NIDs fields in the lnet_ni, lnet_net, lnet_peer_net and lnet_peer_ni. Switched the implementation of the preferred NIDs list to list_head instead of array, because the code is more straight forward. There is more memory overhead due to list_head, but these lists are expected to be small, so I chose code simplicity over memory. Test-Parameters: trivial testlist=lnet-selftest,sanity-lnet Signed-off-by: Amir Shehata Signed-off-by: Serguei Smirnov Change-Id: I0c75855b736345c25e1604083eee2b65d38ef28d Reviewed-on: https://review.whamcloud.com/34350 Reviewed-by: Chris Horn --- lnet/include/lnet/lib-types.h | 27 ++++++-- lnet/lnet/config.c | 4 ++ lnet/lnet/peer.c | 145 ++++++++++++++++++++++-------------------- 3 files changed, 103 insertions(+), 73 deletions(-) diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index dc85f34..385fcd3 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -62,6 +62,7 @@ * All local and peer NIs created have their health default to this value. */ #define LNET_MAX_HEALTH_VALUE 1000 +#define LNET_MAX_SELECTION_PRIORITY UINT_MAX /* forward refs */ struct lnet_libmd; @@ -369,15 +370,15 @@ struct lnet_net { * lnet/include/lnet/nidstr.h */ __u32 net_id; - /* priority of the network */ - __u32 net_prio; - /* total number of CPTs in the array */ __u32 net_ncpts; /* cumulative CPTs of all NIs in this net */ __u32 *net_cpts; + /* relative net selection priority */ + __u32 net_sel_priority; + /* network tunables */ struct lnet_ioctl_config_lnd_cmn_tunables net_tunables; @@ -404,6 +405,9 @@ struct lnet_net { /* protects access to net_last_alive */ spinlock_t net_lock; + + /* list of router nids preferred for this network */ + struct list_head net_rtr_pref_nids; }; struct lnet_ni { @@ -483,6 +487,9 @@ struct lnet_ni { */ atomic_t ni_fatal_error_on; + /* the relative selection priority of this NI */ + __u32 ni_sel_priority; + /* * equivalent interfaces to use * This is an array because socklnd bonding can still be configured @@ -514,6 +521,11 @@ struct lnet_ping_buffer { #define LNET_PING_INFO_TO_BUFFER(PINFO) \ container_of((PINFO), struct lnet_ping_buffer, pb_info) +struct lnet_nid_list { + struct list_head nl_list; + lnet_nid_t nl_nid; +}; + struct lnet_peer_ni { /* chain on lpn_peer_nis */ struct list_head lpni_peer_nis; @@ -573,8 +585,12 @@ struct lnet_peer_ni { /* preferred local nids: if only one, use lpni_pref.nid */ union lpni_pref { lnet_nid_t nid; - lnet_nid_t *nids; + struct list_head nids; } lpni_pref; + /* list of router nids preferred for this peer NI */ + struct list_head lpni_rtr_pref_nids; + /* The relative selection priority of this peer NI */ + __u32 lpni_sel_priority; /* number of preferred NIDs in lnpi_pref_nids */ __u32 lpni_pref_nnids; }; @@ -768,6 +784,9 @@ struct lnet_peer_net { /* selection sequence number */ __u32 lpn_seq; + /* relative peer net selection priority */ + __u32 lpn_sel_priority; + /* reference count */ atomic_t lpn_refcount; }; diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index 746253b..610100a 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -373,11 +373,14 @@ lnet_net_alloc(__u32 net_id, struct list_head *net_list) INIT_LIST_HEAD(&net->net_ni_list); INIT_LIST_HEAD(&net->net_ni_added); INIT_LIST_HEAD(&net->net_ni_zombie); + INIT_LIST_HEAD(&net->net_rtr_pref_nids); spin_lock_init(&net->net_lock); net->net_id = net_id; net->net_last_alive = ktime_get_real_seconds(); + net->net_sel_priority = LNET_MAX_SELECTION_PRIORITY; + /* initialize global paramters to undefiend */ net->net_tunables.lct_peer_timeout = -1; net->net_tunables.lct_max_tx_credits = -1; @@ -481,6 +484,7 @@ lnet_ni_alloc_common(struct lnet_net *net, char *iface) ni->ni_net_ns = get_net(&init_net); ni->ni_state = LNET_NI_STATE_INIT; + ni->ni_sel_priority = LNET_MAX_SELECTION_PRIORITY; list_add_tail(&ni->ni_netlist, &net->net_ni_added); /* diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index a6943e7..826589f 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -166,8 +166,10 @@ lnet_peer_ni_alloc(lnet_nid_t nid) INIT_LIST_HEAD(&lpni->lpni_peer_nis); INIT_LIST_HEAD(&lpni->lpni_recovery); INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list); + INIT_LIST_HEAD(&lpni->lpni_rtr_pref_nids); LNetInvalidateMDHandle(&lpni->lpni_recovery_ping_mdh); atomic_set(&lpni->lpni_refcount, 1); + lpni->lpni_sel_priority = LNET_MAX_SELECTION_PRIORITY; spin_lock_init(&lpni->lpni_lock); @@ -217,6 +219,7 @@ lnet_peer_net_alloc(__u32 net_id) INIT_LIST_HEAD(&lpn->lpn_peer_nets); INIT_LIST_HEAD(&lpn->lpn_peer_nis); lpn->lpn_net_id = net_id; + lpn->lpn_sel_priority = LNET_MAX_SELECTION_PRIORITY; CDEBUG(D_NET, "%p net %s\n", lpn, libcfs_net2str(lpn->lpn_net_id)); @@ -903,14 +906,14 @@ lnet_push_update_to_peers(int force) bool lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid) { - int i; + struct lnet_nid_list *ne; if (lpni->lpni_pref_nnids == 0) return false; if (lpni->lpni_pref_nnids == 1) return lpni->lpni_pref.nid == nid; - for (i = 0; i < lpni->lpni_pref_nnids; i++) { - if (lpni->lpni_pref.nids[i] == nid) + list_for_each_entry(ne, &lpni->lpni_pref.nids, nl_list) { + if (ne->nl_nid == nid) return true; } return false; @@ -982,11 +985,10 @@ lnet_peer_clr_non_mr_pref_nids(struct lnet_peer *lp) int lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid) { - lnet_nid_t *nids = NULL; - lnet_nid_t *oldnids = NULL; struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer; - int size; - int i; + struct lnet_nid_list *ne1 = NULL; + struct lnet_nid_list *ne2 = NULL; + lnet_nid_t tmp_nid = LNET_NID_ANY; int rc = 0; if (nid == LNET_NID_ANY) { @@ -1000,29 +1002,47 @@ lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid) } /* A non-MR node may have only one preferred NI per peer_ni */ - if (lpni->lpni_pref_nnids > 0) { - if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) { - rc = -EPERM; - goto out; - } + if (lpni->lpni_pref_nnids > 0 && + !(lp->lp_state & LNET_PEER_MULTI_RAIL)) { + rc = -EPERM; + goto out; } + /* add the new preferred nid to the list of preferred nids */ if (lpni->lpni_pref_nnids != 0) { - size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1); - LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size); - if (!nids) { + size_t alloc_size = sizeof(*ne1); + + if (lpni->lpni_pref_nnids == 1) { + tmp_nid = lpni->lpni_pref.nid; + INIT_LIST_HEAD(&lpni->lpni_pref.nids); + } + + list_for_each_entry(ne1, &lpni->lpni_pref.nids, nl_list) { + if (ne1->nl_nid == nid) { + rc = -EEXIST; + goto out; + } + } + + LIBCFS_CPT_ALLOC(ne1, lnet_cpt_table(), lpni->lpni_cpt, + alloc_size); + if (!ne1) { rc = -ENOMEM; goto out; } - for (i = 0; i < lpni->lpni_pref_nnids; i++) { - if (lpni->lpni_pref.nids[i] == nid) { - LIBCFS_FREE(nids, size); - rc = -EEXIST; + + /* move the originally stored nid to the list */ + if (lpni->lpni_pref_nnids == 1) { + LIBCFS_CPT_ALLOC(ne2, lnet_cpt_table(), + lpni->lpni_cpt, alloc_size); + if (!ne2) { + rc = -ENOMEM; goto out; } - nids[i] = lpni->lpni_pref.nids[i]; + INIT_LIST_HEAD(&ne2->nl_list); + ne2->nl_nid = tmp_nid; } - nids[i] = nid; + ne1->nl_nid = nid; } lnet_net_lock(LNET_LOCK_EX); @@ -1030,18 +1050,15 @@ lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid) if (lpni->lpni_pref_nnids == 0) { lpni->lpni_pref.nid = nid; } else { - oldnids = lpni->lpni_pref.nids; - lpni->lpni_pref.nids = nids; + if (ne2) + list_add_tail(&ne2->nl_list, &lpni->lpni_pref.nids); + list_add_tail(&ne1->nl_list, &lpni->lpni_pref.nids); } lpni->lpni_pref_nnids++; lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF; spin_unlock(&lpni->lpni_lock); lnet_net_unlock(LNET_LOCK_EX); - if (oldnids) { - size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1); - CFS_FREE_PTR_ARRAY(oldnids, size); - } out: if (rc == -EEXIST && (lpni->lpni_state & LNET_PEER_NI_NON_MR_PREF)) { spin_lock(&lpni->lpni_lock); @@ -1056,11 +1073,8 @@ out: int lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid) { - lnet_nid_t *nids = NULL; - lnet_nid_t *oldnids = NULL; struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer; - int size; - int i, j; + struct lnet_nid_list *ne = NULL; int rc = 0; if (lpni->lpni_pref_nnids == 0) { @@ -1073,55 +1087,42 @@ lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid) rc = -ENOENT; goto out; } - } else if (lpni->lpni_pref_nnids == 2) { - if (lpni->lpni_pref.nids[0] != nid && - lpni->lpni_pref.nids[1] != nid) { - rc = -ENOENT; - goto out; - } } else { - size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1); - LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size); - if (!nids) { - rc = -ENOMEM; - goto out; - } - for (i = 0, j = 0; i < lpni->lpni_pref_nnids; i++) { - if (lpni->lpni_pref.nids[i] != nid) - continue; - nids[j++] = lpni->lpni_pref.nids[i]; - } - /* Check if we actually removed a nid. */ - if (j == lpni->lpni_pref_nnids) { - LIBCFS_FREE(nids, size); - rc = -ENOENT; - goto out; + list_for_each_entry(ne, &lpni->lpni_pref.nids, nl_list) { + if (ne->nl_nid == nid) + goto remove_nid_entry; } + rc = -ENOENT; + ne = NULL; + goto out; } +remove_nid_entry: lnet_net_lock(LNET_LOCK_EX); spin_lock(&lpni->lpni_lock); - if (lpni->lpni_pref_nnids == 1) { + if (lpni->lpni_pref_nnids == 1) lpni->lpni_pref.nid = LNET_NID_ANY; - } else if (lpni->lpni_pref_nnids == 2) { - oldnids = lpni->lpni_pref.nids; - if (oldnids[0] == nid) - lpni->lpni_pref.nid = oldnids[1]; - else - lpni->lpni_pref.nid = oldnids[2]; - } else { - oldnids = lpni->lpni_pref.nids; - lpni->lpni_pref.nids = nids; + else { + list_del_init(&ne->nl_list); + if (lpni->lpni_pref_nnids == 2) { + struct lnet_nid_list *ne, *tmp; + + list_for_each_entry_safe(ne, tmp, + &lpni->lpni_pref.nids, + nl_list) { + lpni->lpni_pref.nid = ne->nl_nid; + list_del_init(&ne->nl_list); + LIBCFS_FREE(ne, sizeof(*ne)); + } + } } lpni->lpni_pref_nnids--; lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF; spin_unlock(&lpni->lpni_lock); lnet_net_unlock(LNET_LOCK_EX); - if (oldnids) { - size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1); - CFS_FREE_PTR_ARRAY(oldnids, size); - } + if (ne) + LIBCFS_FREE(ne, sizeof(*ne)); out: CDEBUG(D_NET, "peer %s nid %s: %d\n", libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid), rc); @@ -1719,9 +1720,15 @@ lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni) spin_unlock(&ptable->pt_zombie_lock); } - if (lpni->lpni_pref_nnids > 1) - CFS_FREE_PTR_ARRAY(lpni->lpni_pref.nids, lpni->lpni_pref_nnids); + if (lpni->lpni_pref_nnids > 1) { + struct lnet_nid_list *ne, *tmp; + list_for_each_entry_safe(ne, tmp, &lpni->lpni_pref.nids, + nl_list) { + list_del_init(&ne->nl_list); + LIBCFS_FREE(ne, sizeof(*ne)); + } + } LIBCFS_FREE(lpni, sizeof(*lpni)); if (lpn) -- 1.8.3.1