Whamcloud - gitweb
LU-13235 lnet: copy the correct amount of CPTs to lnet_cpts
[fs/lustre-release.git] / lnet / lnet / config.c
index bf88011..7b7d775 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2012, 2015, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -35,6 +31,9 @@
  */
 
 #define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/ctype.h>
+#include <linux/inetdevice.h>
 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <lnet/lib-lnet.h>
@@ -50,8 +49,11 @@ static int lnet_tbnob = 0;                   /* track text buf allocation */
 #define LNET_MAX_TEXTBUF_NOB    (64<<10)       /* bound allocation */
 #define LNET_SINGLE_TEXTBUF_NOB  (4<<10)
 
+#define SPACESTR " \t\v\r\n"
+#define DELIMITERS ":()[]"
+
 static void
-lnet_syntax(char *name, char *str, int offset, int width)
+lnet_syntax(const char *name, const char *str, int offset, int width)
 {
        static char dots[LNET_SINGLE_TEXTBUF_NOB];
        static char dashes[LNET_SINGLE_TEXTBUF_NOB];
@@ -80,20 +82,217 @@ lnet_issep (char c)
        }
 }
 
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
+bool
+lnet_net_unique(__u32 net_id, struct list_head *netlist,
+               struct lnet_net **net)
+{
+       struct lnet_net  *net_l;
+
+       if (!netlist)
+               return true;
+
+       list_for_each_entry(net_l, netlist, net_list) {
+               if (net_l->net_id == net_id) {
+                       if (net != NULL)
+                               *net = net_l;
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+/* check that the NI is unique within the list of NIs already added to
+ * a network */
+bool
+lnet_ni_unique_net(struct list_head *nilist, char *iface)
 {
        struct list_head *tmp;
-       lnet_ni_t        *ni;
+       struct lnet_ni *ni;
 
        list_for_each(tmp, nilist) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
+               ni = list_entry(tmp, struct lnet_ni, ni_netlist);
 
-               if (LNET_NIDNET(ni->ni_nid) == net)
-                       return 0;
+               if (ni->ni_interfaces[0] != NULL &&
+                   strncmp(ni->ni_interfaces[0], iface, strlen(iface)) == 0)
+                       return false;
        }
 
-       return 1;
+       return true;
+}
+
+/* check that the NI is unique to the interfaces with in the same NI.
+ * This is only a consideration if use_tcp_bonding is set */
+static bool
+lnet_ni_unique_ni(char *iface_list[LNET_INTERFACES_NUM], char *iface)
+{
+       int i;
+       for (i = 0; i < LNET_INTERFACES_NUM; i++) {
+               if (iface_list[i] != NULL &&
+                   strncmp(iface_list[i], iface, strlen(iface)) == 0)
+                       return false;
+       }
+
+       return true;
+}
+
+static bool
+in_array(__u32 *array, __u32 size, __u32 value)
+{
+       int i;
+
+       for (i = 0; i < size; i++) {
+               if (array[i] == value)
+                       return false;
+       }
+
+       return true;
+}
+
+static int
+lnet_net_append_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net)
+{
+       __u32 *added_cpts = NULL;
+       int i, j = 0, rc = 0;
+
+       /*
+        * no need to go futher since a subset of the NIs already exist on
+        * all CPTs
+        */
+       if (net->net_ncpts == LNET_CPT_NUMBER)
+               return 0;
+
+       if (cpts == NULL) {
+               /* there is an NI which will exist on all CPTs */
+               if (net->net_cpts != NULL)
+                       LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) *
+                                   net->net_ncpts);
+               net->net_cpts = NULL;
+               net->net_ncpts = LNET_CPT_NUMBER;
+               return 0;
+       }
+
+       if (net->net_cpts == NULL) {
+               LIBCFS_ALLOC(net->net_cpts, sizeof(*net->net_cpts) * ncpts);
+               if (net->net_cpts == NULL)
+                       return -ENOMEM;
+               memcpy(net->net_cpts, cpts, ncpts * sizeof(*net->net_cpts));
+               net->net_ncpts = ncpts;
+               return 0;
+       }
+
+       LIBCFS_ALLOC(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER);
+       if (added_cpts == NULL)
+               return -ENOMEM;
+
+       for (i = 0; i < ncpts; i++) {
+               if (!in_array(net->net_cpts, net->net_ncpts, cpts[i])) {
+                       added_cpts[j] = cpts[i];
+                       j++;
+               }
+       }
+
+       /* append the new cpts if any to the list of cpts in the net */
+       if (j > 0) {
+               __u32 *array = NULL, *loc;
+               __u32 total_entries = j + net->net_ncpts;
+
+               LIBCFS_ALLOC(array, sizeof(*net->net_cpts) * total_entries);
+               if (array == NULL) {
+                       rc = -ENOMEM;
+                       goto failed;
+               }
+
+               memcpy(array, net->net_cpts,
+                      net->net_ncpts * sizeof(*net->net_cpts));
+               loc = array + net->net_ncpts;
+               memcpy(loc, added_cpts, j * sizeof(*net->net_cpts));
+
+               LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) *
+                           net->net_ncpts);
+               net->net_ncpts = total_entries;
+               net->net_cpts = array;
+       }
+
+failed:
+       LIBCFS_FREE(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER);
+
+       return rc;
+}
+
+static void
+lnet_net_remove_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net)
+{
+       struct lnet_ni *ni;
+       int rc;
+
+       /*
+        * Operation Assumption:
+        *      This function is called after an NI has been removed from
+        *      its parent net.
+        *
+        * if we're removing an NI which exists on all CPTs then
+        * we have to check if any of the other NIs on this net also
+        * exists on all CPTs. If none, then we need to build our Net CPT
+        * list based on the remaining NIs.
+        *
+        * If the NI being removed exist on a subset of the CPTs then we
+        * alo rebuild the Net CPT list based on the remaining NIs, which
+        * should resutl in the expected Net CPT list.
+        */
+
+       /*
+        * sometimes this function can be called due to some failure
+        * creating an NI, before any of the cpts are allocated, so check
+        * for that case and don't do anything
+        */
+       if (ncpts == 0)
+               return;
+
+       if (ncpts == LNET_CPT_NUMBER) {
+               /*
+                * first iteration through the NI list in the net to see
+                * if any of the NIs exist on all the CPTs. If one is
+                * found then our job is done.
+                */
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       if (ni->ni_ncpts == LNET_CPT_NUMBER)
+                               return;
+               }
+       }
+
+       /*
+        * Rebuild the Net CPT list again, thereby only including only the
+        * CPTs which the remaining NIs are associated with.
+        */
+       if (net->net_cpts != NULL) {
+               LIBCFS_FREE(net->net_cpts,
+                       sizeof(*net->net_cpts) * net->net_ncpts);
+               net->net_cpts = NULL;
+       }
+
+       list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+               rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts,
+                                         net);
+               if (rc != 0) {
+                       CERROR("Out of Memory\n");
+                       /*
+                        * do our best to keep on going. Delete
+                        * the net cpts and set it to NULL. This
+                        * way we can keep on going but less
+                        * efficiently, since memory accesses might be
+                        * accross CPT lines.
+                        */
+                       if (net->net_cpts != NULL) {
+                               LIBCFS_FREE(net->net_cpts,
+                                               sizeof(*net->net_cpts) *
+                                               net->net_ncpts);
+                               net->net_cpts = NULL;
+                               net->net_ncpts = LNET_CPT_NUMBER;
+                       }
+                       return;
+               }
+       }
 }
 
 void
@@ -101,6 +300,8 @@ lnet_ni_free(struct lnet_ni *ni)
 {
        int i;
 
+       lnet_net_remove_cpts(ni->ni_cpts, ni->ni_ncpts, ni->ni_net);
+
        if (ni->ni_refs != NULL)
                cfs_percpt_free(ni->ni_refs);
 
@@ -110,10 +311,7 @@ lnet_ni_free(struct lnet_ni *ni)
        if (ni->ni_cpts != NULL)
                cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
 
-       if (ni->ni_lnd_tunables != NULL)
-               LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables));
-
-       for (i = 0; i < LNET_MAX_INTERFACES &&
+       for (i = 0; i < LNET_INTERFACES_NUM &&
                    ni->ni_interfaces[i] != NULL; i++) {
                LIBCFS_FREE(ni->ni_interfaces[i],
                            strlen(ni->ni_interfaces[i]) + 1);
@@ -126,29 +324,144 @@ lnet_ni_free(struct lnet_ni *ni)
        LIBCFS_FREE(ni, sizeof(*ni));
 }
 
-lnet_ni_t *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
+void
+lnet_net_free(struct lnet_net *net)
+{
+       struct list_head *tmp, *tmp2;
+       struct lnet_ni *ni;
+
+       LASSERT(list_empty(&net->net_ni_zombie));
+
+       /*
+        * delete any nis that haven't been added yet. This could happen
+        * if there is a failure on net startup
+        */
+       list_for_each_safe(tmp, tmp2, &net->net_ni_added) {
+               ni = list_entry(tmp, struct lnet_ni, ni_netlist);
+               list_del_init(&ni->ni_netlist);
+               lnet_ni_free(ni);
+       }
+
+       /* delete any nis which have been started. */
+       list_for_each_safe(tmp, tmp2, &net->net_ni_list) {
+               ni = list_entry(tmp, struct lnet_ni, ni_netlist);
+               list_del_init(&ni->ni_netlist);
+               lnet_ni_free(ni);
+       }
+
+       if (net->net_cpts != NULL)
+               LIBCFS_FREE(net->net_cpts,
+                           sizeof(*net->net_cpts) * net->net_ncpts);
+
+       LIBCFS_FREE(net, sizeof(*net));
+}
+
+struct lnet_net *
+lnet_net_alloc(__u32 net_id, struct list_head *net_list)
+{
+       struct lnet_net         *net;
+
+       if (!lnet_net_unique(net_id, net_list, NULL)) {
+               CERROR("Duplicate net %s. Ignore\n",
+                      libcfs_net2str(net_id));
+               return NULL;
+       }
+
+       LIBCFS_ALLOC(net, sizeof(*net));
+       if (net == NULL) {
+               CERROR("Out of memory creating network %s\n",
+                      libcfs_net2str(net_id));
+               return NULL;
+       }
+
+       INIT_LIST_HEAD(&net->net_list);
+       INIT_LIST_HEAD(&net->net_ni_list);
+       INIT_LIST_HEAD(&net->net_ni_added);
+       INIT_LIST_HEAD(&net->net_ni_zombie);
+       spin_lock_init(&net->net_lock);
+
+       net->net_id = net_id;
+       net->net_last_alive = ktime_get_real_seconds();
+
+       /* initialize global paramters to undefiend */
+       net->net_tunables.lct_peer_timeout = -1;
+       net->net_tunables.lct_max_tx_credits = -1;
+       net->net_tunables.lct_peer_tx_credits = -1;
+       net->net_tunables.lct_peer_rtr_credits = -1;
+
+       if (net_list)
+               list_add_tail(&net->net_list, net_list);
+
+       return net;
+}
+
+static int
+lnet_ni_add_interface(struct lnet_ni *ni, char *iface)
+{
+       int niface = 0;
+
+       if (ni == NULL)
+               return -ENOMEM;
+
+       if (!lnet_ni_unique_ni(ni->ni_interfaces, iface))
+               return -EINVAL;
+
+       /* Allocate a separate piece of memory and copy
+        * into it the string, so we don't have
+        * a depencency on the tokens string.  This way we
+        * can free the tokens at the end of the function.
+        * The newly allocated ni_interfaces[] can be
+        * freed when freeing the NI */
+       while (niface < LNET_INTERFACES_NUM &&
+              ni->ni_interfaces[niface] != NULL)
+               niface++;
+
+       if (niface >= LNET_INTERFACES_NUM) {
+               LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
+                                  "for net %s\n",
+                                  libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+               return -EINVAL;
+       }
+
+       LIBCFS_ALLOC(ni->ni_interfaces[niface],
+                    strlen(iface) + 1);
+
+       if (ni->ni_interfaces[niface] == NULL) {
+               CERROR("Can't allocate net interface name\n");
+               return -ENOMEM;
+       }
+
+       strncpy(ni->ni_interfaces[niface], iface,
+               strlen(iface) + 1);
+
+       return 0;
+}
+
+static struct lnet_ni *
+lnet_ni_alloc_common(struct lnet_net *net, char *iface)
 {
        struct lnet_tx_queue    *tq;
        struct lnet_ni          *ni;
-       int                     rc;
        int                     i;
 
-       if (!lnet_net_unique(net, nilist)) {
-               LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
-                                  libcfs_net2str(net));
-               return NULL;
-       }
+       if (iface != NULL)
+               /* make sure that this NI is unique in the net it's
+                * being added to */
+               if (!lnet_ni_unique_net(&net->net_ni_added, iface))
+                       return NULL;
 
        LIBCFS_ALLOC(ni, sizeof(*ni));
        if (ni == NULL) {
-               CERROR("Out of memory creating network %s\n",
-                      libcfs_net2str(net));
+               CERROR("Out of memory creating network interface %s%s\n",
+                      libcfs_net2str(net->net_id),
+                      (iface != NULL) ? iface : "");
                return NULL;
        }
 
        spin_lock_init(&ni->ni_lock);
-       INIT_LIST_HEAD(&ni->ni_cptlist);
+       INIT_LIST_HEAD(&ni->ni_netlist);
+       INIT_LIST_HEAD(&ni->ni_recovery);
+       LNetInvalidateMDHandle(&ni->ni_ping_mdh);
        ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
                                       sizeof(*ni->ni_refs[0]));
        if (ni->ni_refs == NULL)
@@ -162,14 +475,53 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
        cfs_percpt_for_each(tq, i, ni->ni_tx_queues)
                INIT_LIST_HEAD(&tq->tq_delayed);
 
-       if (el == NULL) {
+       ni->ni_net = net;
+       /* LND will fill in the address part of the NID */
+       ni->ni_nid = LNET_MKNID(net->net_id, 0);
+
+       /* Store net namespace in which current ni is being created */
+       if (current->nsproxy && current->nsproxy->net_ns)
+               ni->ni_net_ns = get_net(current->nsproxy->net_ns);
+       else
+               ni->ni_net_ns = get_net(&init_net);
+
+       ni->ni_state = LNET_NI_STATE_INIT;
+       list_add_tail(&ni->ni_netlist, &net->net_ni_added);
+
+       /*
+        * if an interface name is provided then make sure to add in that
+        * interface name in NI
+        */
+       if (iface)
+               if (lnet_ni_add_interface(ni, iface) != 0)
+                       goto failed;
+
+       return ni;
+failed:
+       lnet_ni_free(ni);
+       return NULL;
+}
+
+/* allocate and add to the provided network */
+struct lnet_ni *
+lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface)
+{
+       struct lnet_ni          *ni;
+       int                     rc;
+
+       ni = lnet_ni_alloc_common(net, iface);
+       if (!ni)
+               return NULL;
+
+       if (!el) {
                ni->ni_cpts  = NULL;
                ni->ni_ncpts = LNET_CPT_NUMBER;
        } else {
                rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
                if (rc <= 0) {
-                       CERROR("Failed to set CPTs for NI %s: %d\n",
-                              libcfs_net2str(net), rc);
+                       CERROR("Failed to set CPTs for NI %s(%s): %d\n",
+                              libcfs_net2str(net->net_id),
+                              (iface != NULL) ? iface : "", rc);
                        goto failed;
                }
 
@@ -182,35 +534,66 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
                ni->ni_ncpts = rc;
        }
 
-       /* LND will fill in the address part of the NID */
-       ni->ni_nid = LNET_MKNID(net, 0);
+       rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
+       if (rc != 0)
+               goto failed;
 
-       /* Store net namespace in which current ni is being created */
-       if (current->nsproxy->net_ns != NULL)
-               ni->ni_net_ns = get_net(current->nsproxy->net_ns);
-       else
-               ni->ni_net_ns = NULL;
+       return ni;
+failed:
+       lnet_ni_free(ni);
+       return NULL;
+}
+
+struct lnet_ni *
+lnet_ni_alloc_w_cpt_array(struct lnet_net *net, __u32 *cpts, __u32 ncpts,
+                         char *iface)
+{
+       struct lnet_ni          *ni;
+       int                     rc;
+
+       ni = lnet_ni_alloc_common(net, iface);
+       if (!ni)
+               return NULL;
+
+       if (ncpts == 0) {
+               ni->ni_cpts  = NULL;
+               ni->ni_ncpts = LNET_CPT_NUMBER;
+       } else {
+               size_t array_size = ncpts * sizeof(ni->ni_cpts[0]);
+               LIBCFS_ALLOC(ni->ni_cpts, array_size);
+               if (ni->ni_cpts == NULL)
+                       goto failed;
+               memcpy(ni->ni_cpts, cpts, array_size);
+               ni->ni_ncpts = ncpts;
+       }
+
+       rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
+       if (rc != 0)
+               goto failed;
 
-       ni->ni_last_alive = cfs_time_current_sec();
-       list_add_tail(&ni->ni_list, nilist);
        return ni;
- failed:
+failed:
        lnet_ni_free(ni);
        return NULL;
 }
 
+/*
+ * Parse the networks string and create the matching set of NIs on the
+ * nilist.
+ */
 int
-lnet_parse_networks(struct list_head *nilist, char *networks)
+lnet_parse_networks(struct list_head *netlist, char *networks,
+                   bool use_tcp_bonding)
 {
-       struct cfs_expr_list *el = NULL;
+       struct cfs_expr_list *net_el = NULL;
+       struct cfs_expr_list *ni_el = NULL;
        int             tokensize;
        char            *tokens;
        char            *str;
-       char            *tmp;
-       struct lnet_ni  *ni;
-       __u32           net;
+       struct lnet_net *net;
+       struct lnet_ni  *ni = NULL;
+       __u32           net_id;
        int             nnets = 0;
-       struct list_head *temp_node;
 
        if (networks == NULL) {
                CERROR("networks string is undefined\n");
@@ -233,173 +616,238 @@ lnet_parse_networks(struct list_head *nilist, char *networks)
        }
 
        memcpy(tokens, networks, tokensize);
-       str = tmp = tokens;
-
-       while (str != NULL && *str != 0) {
-               char    *comma = strchr(str, ',');
-               char    *bracket = strchr(str, '(');
-               char    *square = strchr(str, '[');
-               char    *iface;
-               int     niface;
-               int     rc;
-
-               /* NB we don't check interface conflicts here; it's the LNDs
-                * responsibility (if it cares at all) */
-
-               if (square != NULL && (comma == NULL || square < comma)) {
-                       /* i.e: o2ib0(ib0)[1,2], number between square
-                        * brackets are CPTs this NI needs to be bond */
-                       if (bracket != NULL && bracket > square) {
-                               tmp = square;
+       str = tokens;
+
+       /*
+        * Main parser loop.
+        *
+        * NB we don't check interface conflicts here; it's the LNDs
+        * responsibility (if it cares at all)
+        */
+       do {
+               char *nistr;
+               char *elstr;
+               char *name;
+               int rc;
+
+               /*
+                * Parse a network string into its components.
+                *
+                * <name>{"("...")"}{"["<el>"]"}
+                */
+
+               /* Network name (mandatory) */
+               while (isspace(*str))
+                       *str++ = '\0';
+               if (!*str)
+                       break;
+               name = str;
+               str += strcspn(str, SPACESTR ":()[],");
+               while (isspace(*str))
+                       *str++ = '\0';
+
+               /* Interface list (optional) */
+               if (*str == '(') {
+                       *str++ = '\0';
+                       nistr = str;
+                       str += strcspn(str, ")");
+                       if (*str != ')') {
+                               str = nistr;
                                goto failed_syntax;
                        }
+                       do {
+                               *str++ = '\0';
+                       } while (isspace(*str));
+               } else {
+                       nistr = NULL;
+               }
 
-                       tmp = strchr(square, ']');
-                       if (tmp == NULL) {
-                               tmp = square;
+               /* CPT expression (optional) */
+               if (*str == '[') {
+                       elstr = str;
+                       str += strcspn(str, "]");
+                       if (*str != ']') {
+                               str = elstr;
                                goto failed_syntax;
                        }
-
-                       rc = cfs_expr_list_parse(square, tmp - square + 1,
-                                                0, LNET_CPT_NUMBER - 1, &el);
+                       rc = cfs_expr_list_parse(elstr, str - elstr + 1,
+                                               0, LNET_CPT_NUMBER - 1,
+                                               &net_el);
                        if (rc != 0) {
-                               tmp = square;
+                               str = elstr;
                                goto failed_syntax;
                        }
-
-                       while (square <= tmp)
-                               *square++ = ' ';
+                       *elstr = '\0';
+                       do {
+                               *str++ = '\0';
+                       } while (isspace(*str));
                }
 
-               if (bracket == NULL ||
-                   (comma != NULL && comma < bracket)) {
-
-                       /* no interface list specified */
-
-                       if (comma != NULL)
-                               *comma++ = 0;
-                       net = libcfs_str2net(cfs_trimwhite(str));
-
-                       if (net == LNET_NIDNET(LNET_NID_ANY)) {
-                               LCONSOLE_ERROR_MSG(0x113, "Unrecognised network"
-                                                  " type\n");
-                               tmp = str;
-                               goto failed_syntax;
-                       }
+               /* Bad delimiters */
+               if (*str && (strchr(DELIMITERS, *str) != NULL))
+                       goto failed_syntax;
 
-                       if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
-                           lnet_ni_alloc(net, el, nilist) == NULL)
-                               goto failed;
+               /* go to the next net if it exits */
+               str += strcspn(str, ",");
+               if (*str == ',')
+                       *str++ = '\0';
+
+               /*
+                * At this point the name is properly terminated.
+                */
+               net_id = libcfs_str2net(name);
+               if (net_id == LNET_NIDNET(LNET_NID_ANY)) {
+                       LCONSOLE_ERROR_MSG(0x113,
+                                       "Unrecognised network type\n");
+                       str = name;
+                       goto failed_syntax;
+               }
 
-                       if (el != NULL) {
-                               cfs_expr_list_free(el);
-                               el = NULL;
+               if (LNET_NETTYP(net_id) == LOLND) {
+                       /* Loopback is implicit, and there can be only one. */
+                       if (net_el) {
+                               cfs_expr_list_free(net_el);
+                               net_el = NULL;
                        }
-
-                       str = comma;
+                       /* Should we error out instead? */
                        continue;
                }
 
-               *bracket = 0;
-               net = libcfs_str2net(cfs_trimwhite(str));
-               if (net == LNET_NIDNET(LNET_NID_ANY)) {
-                       tmp = str;
-                       goto failed_syntax;
-               }
+               /*
+                * All network paramaters are now known.
+                */
+               nnets++;
 
-               ni = lnet_ni_alloc(net, el, nilist);
-               if (ni == NULL)
+               /* always allocate a net, since we will eventually add an
+                * interface to it, or we will fail, in which case we'll
+                * just delete it */
+               net = lnet_net_alloc(net_id, netlist);
+               if (IS_ERR_OR_NULL(net))
                        goto failed;
 
-               if (el != NULL) {
-                       cfs_expr_list_free(el);
-                       el = NULL;
-               }
-
-               niface = 0;
-               iface = bracket + 1;
+               if (!nistr ||
+                   (use_tcp_bonding && LNET_NETTYP(net_id) == SOCKLND)) {
+                       /*
+                        * No interface list was specified, allocate a
+                        * ni using the defaults.
+                        */
+                       ni = lnet_ni_alloc(net, net_el, NULL);
+                       if (IS_ERR_OR_NULL(ni))
+                               goto failed;
 
-               bracket = strchr(iface, ')');
-               if (bracket == NULL) {
-                       tmp = iface;
-                       goto failed_syntax;
+                       if (!nistr) {
+                               if (net_el) {
+                                       cfs_expr_list_free(net_el);
+                                       net_el = NULL;
+                               }
+                               continue;
+                       }
                }
 
-               *bracket = 0;
                do {
-                       comma = strchr(iface, ',');
-                       if (comma != NULL)
-                               *comma++ = 0;
+                       elstr = NULL;
+
+                       /* Interface name (mandatory) */
+                       while (isspace(*nistr))
+                               *nistr++ = '\0';
+                       name = nistr;
+                       nistr += strcspn(nistr, SPACESTR "[],");
+                       while (isspace(*nistr))
+                               *nistr++ = '\0';
+
+                       /* CPT expression (optional) */
+                       if (*nistr == '[') {
+                               elstr = nistr;
+                               nistr += strcspn(nistr, "]");
+                               if (*nistr != ']') {
+                                       str = elstr;
+                                       goto failed_syntax;
+                               }
+                               rc = cfs_expr_list_parse(elstr,
+                                                       nistr - elstr + 1,
+                                                       0, LNET_CPT_NUMBER - 1,
+                                                       &ni_el);
+                               if (rc != 0) {
+                                       str = elstr;
+                                       goto failed_syntax;
+                               }
+                               *elstr = '\0';
+                               do {
+                                       *nistr++ = '\0';
+                               } while (isspace(*nistr));
+                       } else {
+                               ni_el = net_el;
+                       }
 
-                       iface = cfs_trimwhite(iface);
-                       if (*iface == 0) {
-                               tmp = iface;
+                       /*
+                        * End of single interface specificaton,
+                        * advance to the start of the next one, if
+                        * any.
+                        */
+                       if (*nistr == ',') {
+                               do {
+                                       *nistr++ = '\0';
+                               } while (isspace(*nistr));
+                               if (!*nistr) {
+                                       str = nistr;
+                                       goto failed_syntax;
+                               }
+                       } else if (*nistr) {
+                               str = nistr;
                                goto failed_syntax;
                        }
 
-                       if (niface == LNET_MAX_INTERFACES) {
-                               LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
-                                                  "for net %s\n",
-                                                  libcfs_net2str(net));
-                               goto failed;
+                       /*
+                        * At this point the name is properly terminated.
+                        */
+                       if (!*name) {
+                               str = name;
+                               goto failed_syntax;
                        }
 
-                       /* Allocate a separate piece of memory and copy
-                        * into it the string, so we don't have
-                        * a depencency on the tokens string.  This way we
-                        * can free the tokens at the end of the function.
-                        * The newly allocated ni_interfaces[] can be
-                        * freed when freeing the NI */
-                       LIBCFS_ALLOC(ni->ni_interfaces[niface],
-                                    strlen(iface) + 1);
-                       if (ni->ni_interfaces[niface] == NULL) {
-                               CERROR("Can't allocate net interface name\n");
-                               goto failed;
+                       if (use_tcp_bonding &&
+                           LNET_NETTYP(net->net_id) == SOCKLND) {
+                               rc = lnet_ni_add_interface(ni, name);
+                               if (rc != 0)
+                                       goto failed;
+                       } else {
+                               ni = lnet_ni_alloc(net, ni_el, name);
+                               if (IS_ERR_OR_NULL(ni))
+                                       goto failed;
                        }
-                       strncpy(ni->ni_interfaces[niface], iface,
-                               strlen(iface));
-                       niface++;
-                       iface = comma;
-               } while (iface != NULL);
-
-               str = bracket + 1;
-               comma = strchr(bracket + 1, ',');
-               if (comma != NULL) {
-                       *comma = 0;
-                       str = cfs_trimwhite(str);
-                       if (*str != 0) {
-                               tmp = str;
-                               goto failed_syntax;
+
+                       if (ni_el) {
+                               if (ni_el != net_el) {
+                                       cfs_expr_list_free(ni_el);
+                                       ni_el = NULL;
+                               }
                        }
-                       str = comma + 1;
-                       continue;
-               }
+               } while (*nistr);
 
-               str = cfs_trimwhite(str);
-               if (*str != 0) {
-                       tmp = str;
-                       goto failed_syntax;
+               if (net_el) {
+                       cfs_expr_list_free(net_el);
+                       net_el = NULL;
                }
-       }
-
-       list_for_each(temp_node, nilist)
-               nnets++;
+       } while (*str);
 
        LIBCFS_FREE(tokens, tokensize);
        return nnets;
 
  failed_syntax:
-       lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
+       lnet_syntax("networks", networks, (int)(str - tokens), strlen(str));
  failed:
-       while (!list_empty(nilist)) {
-               ni = list_entry(nilist->next, lnet_ni_t, ni_list);
+       /* free the net list and all the nis on each net */
+       while (!list_empty(netlist)) {
+               net = list_entry(netlist->next, struct lnet_net, net_list);
 
-               list_del(&ni->ni_list);
-               lnet_ni_free(ni);
+               list_del_init(&net->net_list);
+               lnet_net_free(net);
        }
 
-       if (el != NULL)
-               cfs_expr_list_free(el);
+       if (ni_el && ni_el != net_el)
+               cfs_expr_list_free(ni_el);
+       if (net_el)
+               cfs_expr_list_free(net_el);
 
        LIBCFS_FREE(tokens, tokensize);
 
@@ -454,31 +902,14 @@ lnet_free_text_bufs(struct list_head *tbs)
        }
 }
 
-void
-lnet_print_text_bufs(struct list_head *tbs)
-{
-       struct list_head *tmp;
-       struct lnet_text_buf  *ltb;
-
-       list_for_each(tmp, tbs) {
-               ltb = list_entry(tmp, struct lnet_text_buf, ltb_list);
-
-               CDEBUG(D_WARNING, "%s\n", ltb->ltb_text);
-       }
-
-       CDEBUG(D_WARNING, "%d allocated\n", lnet_tbnob);
-}
-
 static int
 lnet_str2tbs_sep(struct list_head *tbs, char *str)
 {
-       struct list_head  pending;
-       char             *sep;
-       int               nob;
-       int               i;
-       struct lnet_text_buf  *ltb;
-
-       INIT_LIST_HEAD(&pending);
+       LIST_HEAD(pending);
+       char *sep;
+       int nob;
+       int i;
+       struct lnet_text_buf *ltb;
 
        /* Split 'str' into separate commands */
        for (;;) {
@@ -556,7 +987,7 @@ static int
 lnet_str2tbs_expand(struct list_head *tbs, char *str)
 {
        char              num[16];
-       struct list_head  pending;
+       LIST_HEAD(pending);
        char             *sep;
        char             *sep2;
        char             *parsed;
@@ -568,8 +999,6 @@ lnet_str2tbs_expand(struct list_head *tbs, char *str)
        int               nob;
        int               scanned;
 
-       INIT_LIST_HEAD(&pending);
-
        sep = strchr(str, '[');
        if (sep == NULL)                        /* nothing to expand */
                return 0;
@@ -679,10 +1108,10 @@ static int
 lnet_parse_route (char *str, int *im_a_router)
 {
        /* static scratch buffer OK (single threaded) */
-       static char       cmd[LNET_SINGLE_TEXTBUF_NOB];
+       static char cmd[LNET_SINGLE_TEXTBUF_NOB];
 
-       struct list_head  nets;
-       struct list_head  gateways;
+       LIST_HEAD(nets);
+       LIST_HEAD(gateways);
        struct list_head *tmp1;
        struct list_head *tmp2;
        __u32             net;
@@ -697,9 +1126,6 @@ lnet_parse_route (char *str, int *im_a_router)
        int               got_hops = 0;
        unsigned int      priority = 0;
 
-       INIT_LIST_HEAD(&gateways);
-       INIT_LIST_HEAD(&nets);
-
        /* save a copy of the string for error messages */
        strncpy(cmd, str, sizeof(cmd));
        cmd[sizeof(cmd) - 1] = '\0';
@@ -799,7 +1225,7 @@ lnet_parse_route (char *str, int *im_a_router)
                                continue;
                        }
 
-                       rc = lnet_add_route(net, hops, nid, priority);
+                       rc = lnet_add_route(net, hops, nid, priority, 1);
                        if (rc != 0 && rc != -EEXIST && rc != -EHOSTUNREACH) {
                                CERROR("Can't create route "
                                       "to %s via %s\n",
@@ -844,13 +1270,11 @@ lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router)
 int
 lnet_parse_routes (char *routes, int *im_a_router)
 {
-       struct list_head tbs;
-       int              rc = 0;
+       LIST_HEAD(tbs);
+       int rc = 0;
 
        *im_a_router = 0;
 
-       INIT_LIST_HEAD(&tbs);
-
        if (lnet_str2tbs_sep(&tbs, routes) < 0) {
                CERROR("Error parsing routes\n");
                rc = -EINVAL;
@@ -865,7 +1289,7 @@ lnet_parse_routes (char *routes, int *im_a_router)
 static int
 lnet_match_network_token(char *token, int len, __u32 *ipaddrs, int nip)
 {
-       struct list_head list = LIST_HEAD_INIT(list);
+       LIST_HEAD(list);
        int             rc;
        int             i;
 
@@ -1043,9 +1467,9 @@ lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
        static char       networks[LNET_SINGLE_TEXTBUF_NOB];
        static char       source[LNET_SINGLE_TEXTBUF_NOB];
 
-       struct list_head  raw_entries;
-       struct list_head  matched_nets;
-       struct list_head  current_nets;
+       LIST_HEAD(raw_entries);
+       LIST_HEAD(matched_nets);
+       LIST_HEAD(current_nets);
        struct list_head *t;
        struct list_head *t2;
        struct lnet_text_buf  *tb;
@@ -1057,15 +1481,12 @@ lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
        int               dup;
        int               rc;
 
-       INIT_LIST_HEAD(&raw_entries);
        if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) {
                CERROR("Error parsing ip2nets\n");
                LASSERT(lnet_tbnob == 0);
                return -EINVAL;
        }
 
-       INIT_LIST_HEAD(&matched_nets);
-       INIT_LIST_HEAD(&current_nets);
        networks[0] = 0;
        count = 0;
        len = 0;
@@ -1127,12 +1548,11 @@ lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
                list_for_each_safe(t, t2, &current_nets) {
                        tb = list_entry(t, struct lnet_text_buf, ltb_list);
 
-                       list_del(&tb->ltb_list);
-                       list_add_tail(&tb->ltb_list, &matched_nets);
+                       list_move_tail(&tb->ltb_list, &matched_nets);
 
-                       len += snprintf(networks + len, sizeof(networks) - len,
-                                       "%s%s", (len == 0) ? "" : ",",
-                                       tb->ltb_text);
+                       len += scnprintf(networks + len, sizeof(networks) - len,
+                                        "%s%s", (len == 0) ? "" : ",",
+                                        tb->ltb_text);
 
                        if (len >= sizeof(networks)) {
                                CERROR("Too many matched networks\n");
@@ -1156,111 +1576,139 @@ lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip)
        *networksp = networks;
        return count;
 }
-
-static void
-lnet_ipaddr_free_enumeration(__u32 *ipaddrs, int nip)
-{
-       LIBCFS_FREE(ipaddrs, nip * sizeof(*ipaddrs));
-}
-
-static int
-lnet_ipaddr_enumerate (__u32 **ipaddrsp)
+/*
+ * kernel 5.3: commit ef11db3310e272d3d8dbe8739e0770820dd20e52
+ * added in_dev_for_each_ifa_rtnl and in_dev_for_each_ifa_rcu
+ * and removed for_ifa and endfor_ifa.
+ * Use the _rntl variant as the current locking is rtnl.
+ */
+#ifdef in_dev_for_each_ifa_rtnl
+#define DECLARE_CONST_IN_IFADDR(ifa)           const struct in_ifaddr *ifa
+#define endfor_ifa(in_dev)
+#else
+#define DECLARE_CONST_IN_IFADDR(ifa)
+#define in_dev_for_each_ifa_rtnl(ifa, in_dev)  for_ifa((in_dev))
+#endif
+
+int lnet_inet_enumerate(struct lnet_inetdev **dev_list, struct net *ns)
 {
-       int        up;
-       __u32      netmask;
-       __u32     *ipaddrs;
-       __u32     *ipaddrs2;
-       int        nip;
-       char     **ifnames;
-       int        nif = lnet_ipif_enumerate(&ifnames);
-       int        i;
-       int        rc;
-
-       if (nif <= 0)
-               return nif;
-
-       LIBCFS_ALLOC(ipaddrs, nif * sizeof(*ipaddrs));
-       if (ipaddrs == NULL) {
-               CERROR("Can't allocate ipaddrs[%d]\n", nif);
-               lnet_ipif_free_enumeration(ifnames, nif);
-               return -ENOMEM;
-       }
-
-       for (i = nip = 0; i < nif; i++) {
-               if (!strcmp(ifnames[i], "lo"))
+       struct lnet_inetdev *ifaces = NULL;
+       struct net_device *dev;
+       int nalloc = 0;
+       int nip = 0;
+       DECLARE_CONST_IN_IFADDR(ifa);
+
+       rtnl_lock();
+       for_each_netdev(ns, dev) {
+               int flags = dev_get_flags(dev);
+               struct in_device *in_dev;
+               int node_id;
+               int cpt;
+
+               if (flags & IFF_LOOPBACK) /* skip the loopback IF */
                        continue;
 
-               rc = lnet_ipif_query(ifnames[i], &up,
-                                      &ipaddrs[nip], &netmask);
-               if (rc != 0) {
-                       CWARN("Can't query interface %s: %d\n",
-                             ifnames[i], rc);
+               if (!(flags & IFF_UP)) {
+                       CWARN("lnet: Ignoring interface %s: it's down\n",
+                             dev->name);
                        continue;
                }
 
-               if (!up) {
-                       CWARN("Ignoring interface %s: it's down\n",
-                             ifnames[i]);
+               in_dev = __in_dev_get_rtnl(dev);
+               if (!in_dev) {
+                       CWARN("lnet: Interface %s has no IPv4 status.\n",
+                             dev->name);
                        continue;
                }
 
-               nip++;
-       }
-
-       lnet_ipif_free_enumeration(ifnames, nif);
-
-       if (nip == nif) {
-               *ipaddrsp = ipaddrs;
-       } else {
-               if (nip > 0) {
-                       LIBCFS_ALLOC(ipaddrs2, nip * sizeof(*ipaddrs2));
-                       if (ipaddrs2 == NULL) {
-                               CERROR("Can't allocate ipaddrs[%d]\n", nip);
-                               nip = -ENOMEM;
-                       } else {
-                               memcpy(ipaddrs2, ipaddrs,
-                                       nip * sizeof(*ipaddrs));
-                               *ipaddrsp = ipaddrs2;
-                               rc = nip;
+               node_id = dev_to_node(&dev->dev);
+               cpt = cfs_cpt_of_node(lnet_cpt_table(), node_id);
+
+               in_dev_for_each_ifa_rtnl(ifa, in_dev) {
+                       if (nip >= nalloc) {
+                               struct lnet_inetdev *tmp;
+
+                               nalloc += LNET_INTERFACES_NUM;
+                               tmp = krealloc(ifaces, nalloc * sizeof(*tmp),
+                                              GFP_KERNEL);
+                               if (!tmp) {
+                                       kfree(ifaces);
+                                       ifaces = NULL;
+                                       nip = -ENOMEM;
+                                       goto unlock_rtnl;
+                               }
+                               ifaces = tmp;
                        }
+
+                       ifaces[nip].li_cpt = cpt;
+                       ifaces[nip].li_flags = flags;
+                       ifaces[nip].li_ipaddr = ntohl(ifa->ifa_local);
+                       ifaces[nip].li_netmask = ntohl(ifa->ifa_mask);
+                       strlcpy(ifaces[nip].li_name, ifa->ifa_label,
+                               sizeof(ifaces[nip].li_name));
+                       nip++;
                }
-               lnet_ipaddr_free_enumeration(ipaddrs, nif);
+               endfor_ifa(in_dev);
        }
+unlock_rtnl:
+       rtnl_unlock();
+
+       if (nip == 0) {
+               CERROR("lnet: Can't find any usable interfaces, rc = -ENOENT\n");
+               nip = -ENOENT;
+       }
+
+       *dev_list = ifaces;
        return nip;
 }
+EXPORT_SYMBOL(lnet_inet_enumerate);
 
 int
 lnet_parse_ip2nets (char **networksp, char *ip2nets)
 {
+       struct lnet_inetdev *ifaces = NULL;
        __u32     *ipaddrs = NULL;
-       int        nip = lnet_ipaddr_enumerate(&ipaddrs);
+       int nip;
        int        rc;
+       int i;
 
+       if (current->nsproxy && current->nsproxy->net_ns)
+               nip = lnet_inet_enumerate(&ifaces, current->nsproxy->net_ns);
+       else
+               nip = lnet_inet_enumerate(&ifaces, &init_net);
        if (nip < 0) {
-               LCONSOLE_ERROR_MSG(0x117, "Error %d enumerating local IP "
-                                  "interfaces for ip2nets to match\n", nip);
+               if (nip != -ENOENT) {
+                       LCONSOLE_ERROR_MSG(0x117,
+                                          "Error %d enumerating local IP interfaces for ip2nets to match\n",
+                                          nip);
+               } else {
+                       LCONSOLE_ERROR_MSG(0x118,
+                                          "No local IP interfaces for ip2nets to match\n");
+               }
                return nip;
        }
 
-       if (nip == 0) {
-               LCONSOLE_ERROR_MSG(0x118, "No local IP interfaces "
-                                  "for ip2nets to match\n");
-               return -ENOENT;
+       LIBCFS_ALLOC(ipaddrs, nip * sizeof(*ipaddrs));
+       if (!ipaddrs) {
+               rc = -ENOMEM;
+               CERROR("lnet: Can't allocate ipaddrs[%d], rc = %d\n",
+                      nip, rc);
+               goto out_free_addrs;
        }
 
-       rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
-       lnet_ipaddr_free_enumeration(ipaddrs, nip);
+       for (i = 0; i < nip; i++)
+               ipaddrs[i] = ifaces[i].li_ipaddr;
 
+       rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip);
        if (rc < 0) {
                LCONSOLE_ERROR_MSG(0x119, "Error %d parsing ip2nets\n", rc);
-               return rc;
-       }
-
-       if (rc == 0) {
+       } else if (rc == 0) {
                LCONSOLE_ERROR_MSG(0x11a, "ip2nets does not match "
                                   "any local IP interfaces\n");
-               return -ENOENT;
+               rc = -ENOENT;
        }
-
-       return 0;
+       LIBCFS_FREE(ipaddrs, nip * sizeof(*ipaddrs));
+out_free_addrs:
+       kfree(ifaces);
+       return rc > 0 ? 0 : rc;
 }