Whamcloud - gitweb
LU-9121 lnet: foundation patch for selection mod
authorAmir Shehata <ashehata@whamcloud.com>
Fri, 15 Feb 2019 20:45:28 +0000 (12:45 -0800)
committerAmir Shehata <ashehata@whamcloud.com>
Mon, 22 Feb 2021 17:10:56 +0000 (09:10 -0800)
Add the priority and preferred NIDs fields in the lnet_ni,
lnet_net, lnet_peer_net and lnet_peer_ni. Switched
the implementation of the preferred NIDs list to list_head
instead of array, because the code is more straight forward.
There is more memory overhead due to list_head, but these lists
are expected to be small, so I chose code simplicity over memory.

Test-Parameters: trivial testlist=lnet-selftest,sanity-lnet
Signed-off-by: Amir Shehata <ashehata@whamcloud.com>
Signed-off-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Change-Id: I0c75855b736345c25e1604083eee2b65d38ef28d
Reviewed-on: https://review.whamcloud.com/34350
Reviewed-by: Chris Horn <chris.horn@hpe.com>
lnet/include/lnet/lib-types.h
lnet/lnet/config.c
lnet/lnet/peer.c

index dc85f34..385fcd3 100644 (file)
@@ -62,6 +62,7 @@
  * All local and peer NIs created have their health default to this value.
  */
 #define LNET_MAX_HEALTH_VALUE 1000
+#define LNET_MAX_SELECTION_PRIORITY UINT_MAX
 
 /* forward refs */
 struct lnet_libmd;
@@ -369,15 +370,15 @@ struct lnet_net {
         * lnet/include/lnet/nidstr.h */
        __u32                   net_id;
 
-       /* priority of the network */
-       __u32                   net_prio;
-
        /* total number of CPTs in the array */
        __u32                   net_ncpts;
 
        /* cumulative CPTs of all NIs in this net */
        __u32                   *net_cpts;
 
+       /* relative net selection priority */
+       __u32                   net_sel_priority;
+
        /* network tunables */
        struct lnet_ioctl_config_lnd_cmn_tunables net_tunables;
 
@@ -404,6 +405,9 @@ struct lnet_net {
 
        /* protects access to net_last_alive */
        spinlock_t              net_lock;
+
+       /* list of router nids preferred for this network */
+       struct list_head        net_rtr_pref_nids;
 };
 
 struct lnet_ni {
@@ -483,6 +487,9 @@ struct lnet_ni {
         */
        atomic_t                ni_fatal_error_on;
 
+       /* the relative selection priority of this NI */
+       __u32                   ni_sel_priority;
+
        /*
         * equivalent interfaces to use
         * This is an array because socklnd bonding can still be configured
@@ -514,6 +521,11 @@ struct lnet_ping_buffer {
 #define LNET_PING_INFO_TO_BUFFER(PINFO)        \
        container_of((PINFO), struct lnet_ping_buffer, pb_info)
 
+struct lnet_nid_list {
+       struct list_head nl_list;
+       lnet_nid_t nl_nid;
+};
+
 struct lnet_peer_ni {
        /* chain on lpn_peer_nis */
        struct list_head        lpni_peer_nis;
@@ -573,8 +585,12 @@ struct lnet_peer_ni {
        /* preferred local nids: if only one, use lpni_pref.nid */
        union lpni_pref {
                lnet_nid_t      nid;
-               lnet_nid_t      *nids;
+               struct list_head nids;
        } lpni_pref;
+       /* list of router nids preferred for this peer NI */
+       struct list_head        lpni_rtr_pref_nids;
+       /* The relative selection priority of this peer NI */
+       __u32                   lpni_sel_priority;
        /* number of preferred NIDs in lnpi_pref_nids */
        __u32                   lpni_pref_nnids;
 };
@@ -768,6 +784,9 @@ struct lnet_peer_net {
        /* selection sequence number */
        __u32                   lpn_seq;
 
+       /* relative peer net selection priority */
+       __u32                   lpn_sel_priority;
+
        /* reference count */
        atomic_t                lpn_refcount;
 };
index 746253b..610100a 100644 (file)
@@ -373,11 +373,14 @@ lnet_net_alloc(__u32 net_id, struct list_head *net_list)
        INIT_LIST_HEAD(&net->net_ni_list);
        INIT_LIST_HEAD(&net->net_ni_added);
        INIT_LIST_HEAD(&net->net_ni_zombie);
+       INIT_LIST_HEAD(&net->net_rtr_pref_nids);
        spin_lock_init(&net->net_lock);
 
        net->net_id = net_id;
        net->net_last_alive = ktime_get_real_seconds();
 
+       net->net_sel_priority = LNET_MAX_SELECTION_PRIORITY;
+
        /* initialize global paramters to undefiend */
        net->net_tunables.lct_peer_timeout = -1;
        net->net_tunables.lct_max_tx_credits = -1;
@@ -481,6 +484,7 @@ lnet_ni_alloc_common(struct lnet_net *net, char *iface)
                ni->ni_net_ns = get_net(&init_net);
 
        ni->ni_state = LNET_NI_STATE_INIT;
+       ni->ni_sel_priority = LNET_MAX_SELECTION_PRIORITY;
        list_add_tail(&ni->ni_netlist, &net->net_ni_added);
 
        /*
index a6943e7..826589f 100644 (file)
@@ -166,8 +166,10 @@ lnet_peer_ni_alloc(lnet_nid_t nid)
        INIT_LIST_HEAD(&lpni->lpni_peer_nis);
        INIT_LIST_HEAD(&lpni->lpni_recovery);
        INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list);
+       INIT_LIST_HEAD(&lpni->lpni_rtr_pref_nids);
        LNetInvalidateMDHandle(&lpni->lpni_recovery_ping_mdh);
        atomic_set(&lpni->lpni_refcount, 1);
+       lpni->lpni_sel_priority = LNET_MAX_SELECTION_PRIORITY;
 
        spin_lock_init(&lpni->lpni_lock);
 
@@ -217,6 +219,7 @@ lnet_peer_net_alloc(__u32 net_id)
        INIT_LIST_HEAD(&lpn->lpn_peer_nets);
        INIT_LIST_HEAD(&lpn->lpn_peer_nis);
        lpn->lpn_net_id = net_id;
+       lpn->lpn_sel_priority = LNET_MAX_SELECTION_PRIORITY;
 
        CDEBUG(D_NET, "%p net %s\n", lpn, libcfs_net2str(lpn->lpn_net_id));
 
@@ -903,14 +906,14 @@ lnet_push_update_to_peers(int force)
 bool
 lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid)
 {
-       int i;
+       struct lnet_nid_list *ne;
 
        if (lpni->lpni_pref_nnids == 0)
                return false;
        if (lpni->lpni_pref_nnids == 1)
                return lpni->lpni_pref.nid == nid;
-       for (i = 0; i < lpni->lpni_pref_nnids; i++) {
-               if (lpni->lpni_pref.nids[i] == nid)
+       list_for_each_entry(ne, &lpni->lpni_pref.nids, nl_list) {
+               if (ne->nl_nid == nid)
                        return true;
        }
        return false;
@@ -982,11 +985,10 @@ lnet_peer_clr_non_mr_pref_nids(struct lnet_peer *lp)
 int
 lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
 {
-       lnet_nid_t *nids = NULL;
-       lnet_nid_t *oldnids = NULL;
        struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer;
-       int size;
-       int i;
+       struct lnet_nid_list *ne1 = NULL;
+       struct lnet_nid_list *ne2 = NULL;
+       lnet_nid_t tmp_nid = LNET_NID_ANY;
        int rc = 0;
 
        if (nid == LNET_NID_ANY) {
@@ -1000,29 +1002,47 @@ lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
        }
 
        /* A non-MR node may have only one preferred NI per peer_ni */
-       if (lpni->lpni_pref_nnids > 0) {
-               if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
-                       rc = -EPERM;
-                       goto out;
-               }
+       if (lpni->lpni_pref_nnids > 0 &&
+           !(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
+               rc = -EPERM;
+               goto out;
        }
 
+       /* add the new preferred nid to the list of preferred nids */
        if (lpni->lpni_pref_nnids != 0) {
-               size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
-               LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size);
-               if (!nids) {
+               size_t alloc_size = sizeof(*ne1);
+
+               if (lpni->lpni_pref_nnids == 1) {
+                       tmp_nid = lpni->lpni_pref.nid;
+                       INIT_LIST_HEAD(&lpni->lpni_pref.nids);
+               }
+
+               list_for_each_entry(ne1, &lpni->lpni_pref.nids, nl_list) {
+                       if (ne1->nl_nid == nid) {
+                               rc = -EEXIST;
+                               goto out;
+                       }
+               }
+
+               LIBCFS_CPT_ALLOC(ne1, lnet_cpt_table(), lpni->lpni_cpt,
+                                alloc_size);
+               if (!ne1) {
                        rc = -ENOMEM;
                        goto out;
                }
-               for (i = 0; i < lpni->lpni_pref_nnids; i++) {
-                       if (lpni->lpni_pref.nids[i] == nid) {
-                               LIBCFS_FREE(nids, size);
-                               rc = -EEXIST;
+
+               /* move the originally stored nid to the list */
+               if (lpni->lpni_pref_nnids == 1) {
+                       LIBCFS_CPT_ALLOC(ne2, lnet_cpt_table(),
+                               lpni->lpni_cpt, alloc_size);
+                       if (!ne2) {
+                               rc = -ENOMEM;
                                goto out;
                        }
-                       nids[i] = lpni->lpni_pref.nids[i];
+                       INIT_LIST_HEAD(&ne2->nl_list);
+                       ne2->nl_nid = tmp_nid;
                }
-               nids[i] = nid;
+               ne1->nl_nid = nid;
        }
 
        lnet_net_lock(LNET_LOCK_EX);
@@ -1030,18 +1050,15 @@ lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
        if (lpni->lpni_pref_nnids == 0) {
                lpni->lpni_pref.nid = nid;
        } else {
-               oldnids = lpni->lpni_pref.nids;
-               lpni->lpni_pref.nids = nids;
+               if (ne2)
+                       list_add_tail(&ne2->nl_list, &lpni->lpni_pref.nids);
+               list_add_tail(&ne1->nl_list, &lpni->lpni_pref.nids);
        }
        lpni->lpni_pref_nnids++;
        lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
        spin_unlock(&lpni->lpni_lock);
        lnet_net_unlock(LNET_LOCK_EX);
 
-       if (oldnids) {
-               size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
-               CFS_FREE_PTR_ARRAY(oldnids, size);
-       }
 out:
        if (rc == -EEXIST && (lpni->lpni_state & LNET_PEER_NI_NON_MR_PREF)) {
                spin_lock(&lpni->lpni_lock);
@@ -1056,11 +1073,8 @@ out:
 int
 lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
 {
-       lnet_nid_t *nids = NULL;
-       lnet_nid_t *oldnids = NULL;
        struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer;
-       int size;
-       int i, j;
+       struct lnet_nid_list *ne = NULL;
        int rc = 0;
 
        if (lpni->lpni_pref_nnids == 0) {
@@ -1073,55 +1087,42 @@ lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
                        rc = -ENOENT;
                        goto out;
                }
-       } else if (lpni->lpni_pref_nnids == 2) {
-               if (lpni->lpni_pref.nids[0] != nid &&
-                   lpni->lpni_pref.nids[1] != nid) {
-                       rc = -ENOENT;
-                       goto out;
-               }
        } else {
-               size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
-               LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size);
-               if (!nids) {
-                       rc = -ENOMEM;
-                       goto out;
-               }
-               for (i = 0, j = 0; i < lpni->lpni_pref_nnids; i++) {
-                       if (lpni->lpni_pref.nids[i] != nid)
-                               continue;
-                       nids[j++] = lpni->lpni_pref.nids[i];
-               }
-               /* Check if we actually removed a nid. */
-               if (j == lpni->lpni_pref_nnids) {
-                       LIBCFS_FREE(nids, size);
-                       rc = -ENOENT;
-                       goto out;
+               list_for_each_entry(ne, &lpni->lpni_pref.nids, nl_list) {
+                       if (ne->nl_nid == nid)
+                               goto remove_nid_entry;
                }
+               rc = -ENOENT;
+               ne = NULL;
+               goto out;
        }
 
+remove_nid_entry:
        lnet_net_lock(LNET_LOCK_EX);
        spin_lock(&lpni->lpni_lock);
-       if (lpni->lpni_pref_nnids == 1) {
+       if (lpni->lpni_pref_nnids == 1)
                lpni->lpni_pref.nid = LNET_NID_ANY;
-       } else if (lpni->lpni_pref_nnids == 2) {
-               oldnids = lpni->lpni_pref.nids;
-               if (oldnids[0] == nid)
-                       lpni->lpni_pref.nid = oldnids[1];
-               else
-                       lpni->lpni_pref.nid = oldnids[2];
-       } else {
-               oldnids = lpni->lpni_pref.nids;
-               lpni->lpni_pref.nids = nids;
+       else {
+               list_del_init(&ne->nl_list);
+               if (lpni->lpni_pref_nnids == 2) {
+                       struct lnet_nid_list *ne, *tmp;
+
+                       list_for_each_entry_safe(ne, tmp,
+                                                &lpni->lpni_pref.nids,
+                                                nl_list) {
+                               lpni->lpni_pref.nid = ne->nl_nid;
+                               list_del_init(&ne->nl_list);
+                               LIBCFS_FREE(ne, sizeof(*ne));
+                       }
+               }
        }
        lpni->lpni_pref_nnids--;
        lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
        spin_unlock(&lpni->lpni_lock);
        lnet_net_unlock(LNET_LOCK_EX);
 
-       if (oldnids) {
-               size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
-               CFS_FREE_PTR_ARRAY(oldnids, size);
-       }
+       if (ne)
+               LIBCFS_FREE(ne, sizeof(*ne));
 out:
        CDEBUG(D_NET, "peer %s nid %s: %d\n",
               libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid), rc);
@@ -1719,9 +1720,15 @@ lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni)
                spin_unlock(&ptable->pt_zombie_lock);
        }
 
-       if (lpni->lpni_pref_nnids > 1)
-               CFS_FREE_PTR_ARRAY(lpni->lpni_pref.nids, lpni->lpni_pref_nnids);
+       if (lpni->lpni_pref_nnids > 1) {
+               struct lnet_nid_list *ne, *tmp;
 
+               list_for_each_entry_safe(ne, tmp, &lpni->lpni_pref.nids,
+                                        nl_list) {
+                       list_del_init(&ne->nl_list);
+                       LIBCFS_FREE(ne, sizeof(*ne));
+               }
+       }
        LIBCFS_FREE(lpni, sizeof(*lpni));
 
        if (lpn)