Whamcloud - gitweb
LU-7734 lnet: Multi-Rail local NI split
[fs/lustre-release.git] / lnet / lnet / api-ni.c
index 270629d..4910d3a 100644 (file)
@@ -57,6 +57,11 @@ static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
 module_param(rnet_htable_size, int, 0444);
 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
 
+static int use_tcp_bonding = false;
+module_param(use_tcp_bonding, int, 0444);
+MODULE_PARM_DESC(use_tcp_bonding,
+                "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
+
 static int lnet_ping(lnet_process_id_t id, signed long timeout,
                     lnet_process_id_t __user *ids, int n_ids);
 
@@ -584,9 +589,7 @@ lnet_prepare(lnet_pid_t requested_pid)
        the_lnet.ln_pid = requested_pid;
 
        INIT_LIST_HEAD(&the_lnet.ln_test_peers);
-       INIT_LIST_HEAD(&the_lnet.ln_nis);
-       INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
-       INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
+       INIT_LIST_HEAD(&the_lnet.ln_nets);
        INIT_LIST_HEAD(&the_lnet.ln_routers);
        INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
        INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
@@ -667,9 +670,7 @@ lnet_unprepare (void)
 
        LASSERT(the_lnet.ln_refcount == 0);
        LASSERT(list_empty(&the_lnet.ln_test_peers));
-       LASSERT(list_empty(&the_lnet.ln_nis));
-       LASSERT(list_empty(&the_lnet.ln_nis_cpt));
-       LASSERT(list_empty(&the_lnet.ln_nis_zombie));
+       LASSERT(list_empty(&the_lnet.ln_nets));
 
        lnet_portals_destroy();
 
@@ -700,18 +701,17 @@ lnet_unprepare (void)
 }
 
 lnet_ni_t  *
-lnet_net2ni_locked(__u32 net, int cpt)
+lnet_net2ni_locked(__u32 net_id, int cpt)
 {
-       struct list_head *tmp;
-       lnet_ni_t        *ni;
+       struct lnet_ni   *ni;
+       struct lnet_net  *net;
 
        LASSERT(cpt != LNET_LOCK_EX);
 
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
-
-               if (LNET_NIDNET(ni->ni_nid) == net) {
-                       lnet_ni_addref_locked(ni, cpt);
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               if (net->net_id == net_id) {
+                       ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+                                       ni_netlist);
                        return ni;
                }
        }
@@ -732,6 +732,19 @@ lnet_net2ni(__u32 net)
 }
 EXPORT_SYMBOL(lnet_net2ni);
 
+struct lnet_net *
+lnet_get_net_locked(__u32 net_id)
+{
+       struct lnet_net  *net;
+
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               if (net->net_id == net_id)
+                       return net;
+       }
+
+       return NULL;
+}
+
 static unsigned int
 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
 {
@@ -752,31 +765,41 @@ lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
 }
 
 int
-lnet_cpt_of_nid_locked(lnet_nid_t nid)
+lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
 {
-       struct lnet_ni *ni;
+       struct lnet_net *net;
 
        /* must called with hold of lnet_net_lock */
        if (LNET_CPT_NUMBER == 1)
                return 0; /* the only one */
 
-       /* take lnet_net_lock(any) would be OK */
-       if (!list_empty(&the_lnet.ln_nis_cpt)) {
-               list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
-                       if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
-                               continue;
+       /*
+        * If NI is provided then use the CPT identified in the NI cpt
+        * list if one exists. If one doesn't exist, then that NI is
+        * associated with all CPTs and it follows that the net it belongs
+        * to is implicitly associated with all CPTs, so just hash the nid
+        * and return that.
+        */
+       if (ni != NULL) {
+               if (ni->ni_cpts != NULL)
+                       return ni->ni_cpts[lnet_nid_cpt_hash(nid,
+                                                            ni->ni_ncpts)];
+               else
+                       return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
+       }
 
-                       LASSERT(ni->ni_cpts != NULL);
-                       return ni->ni_cpts[lnet_nid_cpt_hash
-                                          (nid, ni->ni_ncpts)];
-               }
+       /* no NI provided so look at the net */
+       net = lnet_get_net_locked(LNET_NIDNET(nid));
+
+       if (net != NULL && net->net_cpts != NULL) {
+               return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
        }
 
        return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
 }
 
 int
-lnet_cpt_of_nid(lnet_nid_t nid)
+lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
 {
        int     cpt;
        int     cpt2;
@@ -784,11 +807,10 @@ lnet_cpt_of_nid(lnet_nid_t nid)
        if (LNET_CPT_NUMBER == 1)
                return 0; /* the only one */
 
-       if (list_empty(&the_lnet.ln_nis_cpt))
-               return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-
        cpt = lnet_net_lock_current();
-       cpt2 = lnet_cpt_of_nid_locked(nid);
+
+       cpt2 = lnet_cpt_of_nid_locked(nid, ni);
+
        lnet_net_unlock(cpt);
 
        return cpt2;
@@ -796,42 +818,66 @@ lnet_cpt_of_nid(lnet_nid_t nid)
 EXPORT_SYMBOL(lnet_cpt_of_nid);
 
 int
-lnet_islocalnet(__u32 net)
+lnet_islocalnet(__u32 net_id)
 {
-       struct lnet_ni  *ni;
+       struct lnet_net *net;
        int             cpt;
+       bool            local;
 
        cpt = lnet_net_lock_current();
 
-       ni = lnet_net2ni_locked(net, cpt);
-       if (ni != NULL)
-               lnet_ni_decref_locked(ni, cpt);
+       net = lnet_get_net_locked(net_id);
+
+       local = net != NULL;
 
        lnet_net_unlock(cpt);
 
-       return ni != NULL;
+       return local;
+}
+
+bool
+lnet_is_ni_healthy_locked(struct lnet_ni *ni)
+{
+       if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
+           ni->ni_state == LNET_NI_STATE_DEGRADED)
+               return true;
+
+       return false;
 }
 
 lnet_ni_t  *
 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
 {
+       struct lnet_net  *net;
        struct lnet_ni   *ni;
-       struct list_head *tmp;
 
        LASSERT(cpt != LNET_LOCK_EX);
 
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
-
-               if (ni->ni_nid == nid) {
-                       lnet_ni_addref_locked(ni, cpt);
-                       return ni;
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       if (ni->ni_nid == nid)
+                               return ni;
                }
        }
 
        return NULL;
 }
 
+lnet_ni_t *
+lnet_nid2ni_addref(lnet_nid_t nid)
+{
+       lnet_ni_t *ni;
+
+       lnet_net_lock(0);
+       ni = lnet_nid2ni_locked(nid, 0);
+       if (ni)
+               lnet_ni_addref_locked(ni, 0);
+       lnet_net_unlock(0);
+
+       return ni;
+}
+EXPORT_SYMBOL(lnet_nid2ni_addref);
+
 int
 lnet_islocalnid(lnet_nid_t nid)
 {
@@ -840,27 +886,24 @@ lnet_islocalnid(lnet_nid_t nid)
 
        cpt = lnet_net_lock_current();
        ni = lnet_nid2ni_locked(nid, cpt);
-       if (ni != NULL)
-               lnet_ni_decref_locked(ni, cpt);
        lnet_net_unlock(cpt);
 
        return ni != NULL;
 }
 
 int
-lnet_count_acceptor_nis (void)
+lnet_count_acceptor_nets(void)
 {
        /* Return the # of NIs that need the acceptor. */
        int              count = 0;
-       struct list_head *tmp;
-       struct lnet_ni   *ni;
+       struct lnet_net  *net;
        int              cpt;
 
        cpt = lnet_net_lock_current();
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
-
-               if (ni->ni_lnd->lnd_accept != NULL)
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               /* all socklnd type networks should have the acceptor
+                * thread started */
+               if (net->net_lnd->lnd_accept != NULL)
                        count++;
        }
 
@@ -891,15 +934,30 @@ lnet_ping_info_create(int num_ni)
 }
 
 static inline int
+lnet_get_net_ni_count_locked(struct lnet_net *net)
+{
+       struct lnet_ni  *ni;
+       int             count = 0;
+
+       list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+               count++;
+
+       return count;
+}
+
+static inline int
 lnet_get_ni_count(void)
 {
-       struct lnet_ni *ni;
-       int            count = 0;
+       struct lnet_ni  *ni;
+       struct lnet_net *net;
+       int             count = 0;
 
        lnet_net_lock(0);
 
-       list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
-               count++;
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+                       count++;
+       }
 
        lnet_net_unlock(0);
 
@@ -917,14 +975,17 @@ lnet_ping_info_free(struct lnet_ping_info *pinfo)
 static void
 lnet_ping_info_destroy(void)
 {
+       struct lnet_net *net;
        struct lnet_ni  *ni;
 
        lnet_net_lock(LNET_LOCK_EX);
 
-       list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-               lnet_ni_lock(ni);
-               ni->ni_status = NULL;
-               lnet_ni_unlock(ni);
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       lnet_ni_lock(ni);
+                       ni->ni_status = NULL;
+                       lnet_ni_unlock(ni);
+               }
        }
 
        lnet_ping_info_free(the_lnet.ln_ping_info);
@@ -1029,24 +1090,29 @@ static void
 lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
 {
        int                     i;
-       lnet_ni_t               *ni;
+       struct lnet_ni          *ni;
+       struct lnet_net         *net;
        struct lnet_ni_status *ns;
 
        i = 0;
-       list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-               LASSERT(i < ping_info->pi_nnis);
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       LASSERT(i < ping_info->pi_nnis);
 
-               ns = &ping_info->pi_ni[i];
+                       ns = &ping_info->pi_ni[i];
 
-               ns->ns_nid = ni->ni_nid;
+                       ns->ns_nid = ni->ni_nid;
 
-               lnet_ni_lock(ni);
-               ns->ns_status = (ni->ni_status != NULL) ?
-                               ni->ni_status->ns_status : LNET_NI_STATUS_UP;
-               ni->ni_status = ns;
-               lnet_ni_unlock(ni);
+                       lnet_ni_lock(ni);
+                       ns->ns_status = (ni->ni_status != NULL) ?
+                                       ni->ni_status->ns_status :
+                                               LNET_NI_STATUS_UP;
+                       ni->ni_status = ns;
+                       lnet_ni_unlock(ni);
+
+                       i++;
+               }
 
-               i++;
        }
 }
 
@@ -1101,11 +1167,11 @@ lnet_ni_tq_credits(lnet_ni_t *ni)
        LASSERT(ni->ni_ncpts >= 1);
 
        if (ni->ni_ncpts == 1)
-               return ni->ni_maxtxcredits;
+               return ni->ni_net->net_tunables.lct_max_tx_credits;
 
-       credits = ni->ni_maxtxcredits / ni->ni_ncpts;
-       credits = max(credits, 8 * ni->ni_peertxcredits);
-       credits = min(credits, ni->ni_maxtxcredits);
+       credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
+       credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
+       credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
 
        return credits;
 }
@@ -1119,37 +1185,43 @@ lnet_ni_unlink_locked(lnet_ni_t *ni)
        }
 
        /* move it to zombie list and nobody can find it anymore */
-       LASSERT(!list_empty(&ni->ni_list));
-       list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
-       lnet_ni_decref_locked(ni, 0);   /* drop ln_nis' ref */
+       LASSERT(!list_empty(&ni->ni_netlist));
+       list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
+       lnet_ni_decref_locked(ni, 0);
 }
 
 static void
-lnet_clear_zombies_nis_locked(void)
+lnet_clear_zombies_nis_locked(struct lnet_net *net)
 {
        int             i;
        int             islo;
        lnet_ni_t       *ni;
+       struct list_head *zombie_list = &net->net_ni_zombie;
 
-       /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
-        * and shut them down in guaranteed thread context */
+       /*
+        * Now wait for the NIs I just nuked to show up on the zombie
+        * list and shut them down in guaranteed thread context
+        */
        i = 2;
-       while (!list_empty(&the_lnet.ln_nis_zombie)) {
+       while (!list_empty(zombie_list)) {
                int     *ref;
                int     j;
 
-               ni = list_entry(the_lnet.ln_nis_zombie.next,
-                               lnet_ni_t, ni_list);
-               list_del_init(&ni->ni_list);
+               ni = list_entry(zombie_list->next,
+                               lnet_ni_t, ni_netlist);
+               list_del_init(&ni->ni_netlist);
+               /* the ni should be in deleting state. If it's not it's
+                * a bug */
+               LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
                cfs_percpt_for_each(ref, j, ni->ni_refs) {
                        if (*ref == 0)
                                continue;
                        /* still busy, add it back to zombie list */
-                       list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
+                       list_add(&ni->ni_netlist, zombie_list);
                        break;
                }
 
-               if (!list_empty(&ni->ni_list)) {
+               if (!list_empty(&ni->ni_netlist)) {
                        lnet_net_unlock(LNET_LOCK_EX);
                        ++i;
                        if ((i & (-i)) == i) {
@@ -1163,16 +1235,12 @@ lnet_clear_zombies_nis_locked(void)
                        continue;
                }
 
-               ni->ni_lnd->lnd_refcount--;
                lnet_net_unlock(LNET_LOCK_EX);
 
-               islo = ni->ni_lnd->lnd_type == LOLND;
+               islo = ni->ni_net->net_lnd->lnd_type == LOLND;
 
                LASSERT(!in_interrupt());
-               (ni->ni_lnd->lnd_shutdown)(ni);
-
-               /* can't deref lnd anymore now; it might have unregistered
-                * itself...  */
+               (net->net_lnd->lnd_shutdown)(ni);
 
                if (!islo)
                        CDEBUG(D_LNI, "Removed LNI %s\n",
@@ -1184,60 +1252,15 @@ lnet_clear_zombies_nis_locked(void)
        }
 }
 
-static void
-lnet_shutdown_lndnis(void)
-{
-       int             i;
-       lnet_ni_t       *ni;
-
-       /* NB called holding the global mutex */
-
-       /* All quiet on the API front */
-       LASSERT(!the_lnet.ln_shutdown);
-       LASSERT(the_lnet.ln_refcount == 0);
-       LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
-       lnet_net_lock(LNET_LOCK_EX);
-       the_lnet.ln_shutdown = 1;       /* flag shutdown */
-
-       /* Unlink NIs from the global table */
-       while (!list_empty(&the_lnet.ln_nis)) {
-               ni = list_entry(the_lnet.ln_nis.next,
-                               lnet_ni_t, ni_list);
-               lnet_ni_unlink_locked(ni);
-       }
-
-       /* Drop the cached loopback NI. */
-       if (the_lnet.ln_loni != NULL) {
-               lnet_ni_decref_locked(the_lnet.ln_loni, 0);
-               the_lnet.ln_loni = NULL;
-       }
-
-       lnet_net_unlock(LNET_LOCK_EX);
-
-       /* Clear lazy portals and drop delayed messages which hold refs
-        * on their lnet_msg_t::msg_rxpeer */
-       for (i = 0; i < the_lnet.ln_nportals; i++)
-               LNetClearLazyPortal(i);
-
-       /* Clear the peer table and wait for all peers to go (they hold refs on
-        * their NIs) */
-       lnet_peer_tables_cleanup(NULL);
-
-       lnet_net_lock(LNET_LOCK_EX);
-
-       lnet_clear_zombies_nis_locked();
-       the_lnet.ln_shutdown = 0;
-       lnet_net_unlock(LNET_LOCK_EX);
-}
-
 /* shutdown down the NI and release refcount */
 static void
 lnet_shutdown_lndni(struct lnet_ni *ni)
 {
        int i;
+       struct lnet_net *net = ni->ni_net;
 
        lnet_net_lock(LNET_LOCK_EX);
+       ni->ni_state = LNET_NI_STATE_DELETING;
        lnet_ni_unlink_locked(ni);
        lnet_net_unlock(LNET_LOCK_EX);
 
@@ -1249,147 +1272,131 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
        lnet_peer_tables_cleanup(ni);
 
        lnet_net_lock(LNET_LOCK_EX);
-       lnet_clear_zombies_nis_locked();
+       lnet_clear_zombies_nis_locked(net);
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
-static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
+static void
+lnet_shutdown_lndnet(struct lnet_net *net)
 {
-       struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
-       int                     rc = -EINVAL;
-       __u32                   lnd_type;
-       lnd_t                   *lnd;
-       struct lnet_tx_queue    *tq;
-       int                     i;
+       struct lnet_ni *ni;
 
-       lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
+       lnet_net_lock(LNET_LOCK_EX);
 
-       LASSERT(libcfs_isknown_lnd(lnd_type));
+       net->net_state = LNET_NET_STATE_DELETING;
 
-       if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
-           lnd_type == IIBLND || lnd_type == VIBLND) {
-               CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
-               goto failed0;
+       list_del_init(&net->net_list);
+
+       while (!list_empty(&net->net_ni_list)) {
+               ni = list_entry(net->net_ni_list.next,
+                               lnet_ni_t, ni_netlist);
+               lnet_net_unlock(LNET_LOCK_EX);
+               lnet_shutdown_lndni(ni);
+               lnet_net_lock(LNET_LOCK_EX);
        }
 
-       /* Make sure this new NI is unique. */
-       lnet_net_lock(LNET_LOCK_EX);
-       rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
+       /*
+        * decrement ref count on lnd only when the entire network goes
+        * away
+        */
+       net->net_lnd->lnd_refcount--;
+
        lnet_net_unlock(LNET_LOCK_EX);
 
-       if (!rc) {
-               if (lnd_type == LOLND) {
-                       lnet_ni_free(ni);
-                       return 0;
-               }
+       lnet_net_free(net);
+}
 
-               CERROR("Net %s is not unique\n",
-                      libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+static void
+lnet_shutdown_lndnets(void)
+{
+       struct lnet_net *net;
 
-               rc = -EEXIST;
-               goto failed0;
-       }
+       /* NB called holding the global mutex */
 
-       mutex_lock(&the_lnet.ln_lnd_mutex);
-       lnd = lnet_find_lnd_by_type(lnd_type);
+       /* All quiet on the API front */
+       LASSERT(!the_lnet.ln_shutdown);
+       LASSERT(the_lnet.ln_refcount == 0);
 
-       if (lnd == NULL) {
-               mutex_unlock(&the_lnet.ln_lnd_mutex);
-               rc = request_module("%s", libcfs_lnd2modname(lnd_type));
-               mutex_lock(&the_lnet.ln_lnd_mutex);
+       lnet_net_lock(LNET_LOCK_EX);
+       the_lnet.ln_shutdown = 1;       /* flag shutdown */
 
-               lnd = lnet_find_lnd_by_type(lnd_type);
-               if (lnd == NULL) {
-                       mutex_unlock(&the_lnet.ln_lnd_mutex);
-                       CERROR("Can't load LND %s, module %s, rc=%d\n",
-                              libcfs_lnd2str(lnd_type),
-                              libcfs_lnd2modname(lnd_type), rc);
-#ifndef HAVE_MODULE_LOADING_SUPPORT
-                       LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
-                                          "compiled with kernel module "
-                                          "loading support.");
-#endif
-                       rc = -EINVAL;
-                       goto failed0;
-               }
+       while (!list_empty(&the_lnet.ln_nets)) {
+               /*
+                * move the nets to the zombie list to avoid them being
+                * picked up for new work. LONET is also included in the
+                * Nets that will be moved to the zombie list
+                */
+               net = list_entry(the_lnet.ln_nets.next,
+                                struct lnet_net, net_list);
+               list_move(&net->net_list, &the_lnet.ln_net_zombie);
        }
 
-       lnet_net_lock(LNET_LOCK_EX);
-       lnd->lnd_refcount++;
+       /* Drop the cached loopback Net. */
+       if (the_lnet.ln_loni != NULL) {
+               lnet_ni_decref_locked(the_lnet.ln_loni, 0);
+               the_lnet.ln_loni = NULL;
+       }
        lnet_net_unlock(LNET_LOCK_EX);
 
-       ni->ni_lnd = lnd;
+       /* iterate through the net zombie list and delete each net */
+       while (!list_empty(&the_lnet.ln_net_zombie)) {
+               net = list_entry(the_lnet.ln_net_zombie.next,
+                                struct lnet_net, net_list);
+               lnet_shutdown_lndnet(net);
+       }
 
-       if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
-               lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+       lnet_net_lock(LNET_LOCK_EX);
+       the_lnet.ln_shutdown = 0;
+       lnet_net_unlock(LNET_LOCK_EX);
+}
 
-       if (lnd_tunables != NULL) {
-               LIBCFS_ALLOC(ni->ni_lnd_tunables,
-                            sizeof(*ni->ni_lnd_tunables));
-               if (ni->ni_lnd_tunables == NULL) {
-                       mutex_unlock(&the_lnet.ln_lnd_mutex);
-                       rc = -ENOMEM;
-                       goto failed0;
-               }
-               memcpy(ni->ni_lnd_tunables, lnd_tunables,
-                      sizeof(*ni->ni_lnd_tunables));
-       }
+static int
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
+{
+       int                     rc = -EINVAL;
+       struct lnet_tx_queue    *tq;
+       int                     i;
+       struct lnet_net         *net = ni->ni_net;
 
-       /* If given some LND tunable parameters, parse those now to
-        * override the values in the NI structure. */
-       if (conf) {
-               if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
-                       ni->ni_peerrtrcredits =
-                               conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
-               if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
-                       ni->ni_peertimeout =
-                               conf->cfg_config_u.cfg_net.net_peer_timeout;
-               if (conf->cfg_config_u.cfg_net.net_peer_tx_credits >= 0)
-                       ni->ni_peertxcredits =
-                               conf->cfg_config_u.cfg_net.net_peer_tx_credits;
-               if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
-                       ni->ni_maxtxcredits =
-                               conf->cfg_config_u.cfg_net.net_max_tx_credits;
+       mutex_lock(&the_lnet.ln_lnd_mutex);
+
+       if (tun) {
+               memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
+               ni->ni_lnd_tunables_set = true;
        }
 
-       rc = (lnd->lnd_startup)(ni);
+       rc = (net->net_lnd->lnd_startup)(ni);
 
        mutex_unlock(&the_lnet.ln_lnd_mutex);
 
        if (rc != 0) {
                LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
-                                  rc, libcfs_lnd2str(lnd->lnd_type));
+                                  rc, libcfs_lnd2str(net->net_lnd->lnd_type));
                lnet_net_lock(LNET_LOCK_EX);
-               lnd->lnd_refcount--;
+               net->net_lnd->lnd_refcount--;
                lnet_net_unlock(LNET_LOCK_EX);
                goto failed0;
        }
 
-       LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
-
-       lnet_net_lock(LNET_LOCK_EX);
-       /* refcount for ln_nis */
-       lnet_ni_addref_locked(ni, 0);
-       list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
-       if (ni->ni_cpts != NULL) {
-               lnet_ni_addref_locked(ni, 0);
-               list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
-       }
-
-       lnet_net_unlock(LNET_LOCK_EX);
+       ni->ni_state = LNET_NI_STATE_ACTIVE;
 
-       if (lnd->lnd_type == LOLND) {
+       /* We keep a reference on the loopback net through the loopback NI */
+       if (net->net_lnd->lnd_type == LOLND) {
                lnet_ni_addref(ni);
                LASSERT(the_lnet.ln_loni == NULL);
                the_lnet.ln_loni = ni;
+               ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
+               ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
+               ni->ni_net->net_tunables.lct_max_tx_credits = 0;
+               ni->ni_net->net_tunables.lct_peer_timeout = 0;
                return 0;
        }
 
-       if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) {
+       if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
+           ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
                LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
-                                  libcfs_lnd2str(lnd->lnd_type),
-                                  ni->ni_peertxcredits == 0 ?
+                                  libcfs_lnd2str(net->net_lnd->lnd_type),
+                                  ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
                                        "" : "per-peer ");
                /* shutdown the NI since if we get here then it must've already
                 * been started
@@ -1405,9 +1412,11 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
        }
 
        CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
-               libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
+               libcfs_nid2str(ni->ni_nid),
+               ni->ni_net->net_tunables.lct_peer_tx_credits,
                lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
-               ni->ni_peerrtrcredits, ni->ni_peertimeout);
+               ni->ni_net->net_tunables.lct_peer_rtr_credits,
+               ni->ni_net->net_tunables.lct_peer_timeout);
 
        return 0;
 failed0:
@@ -1416,26 +1425,216 @@ failed0:
 }
 
 static int
-lnet_startup_lndnis(struct list_head *nilist)
+lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
 {
        struct lnet_ni          *ni;
+       struct lnet_net         *net_l = NULL;
+       struct list_head        local_ni_list;
        int                     rc;
        int                     ni_count = 0;
+       __u32                   lnd_type;
+       lnd_t                   *lnd;
+       int                     peer_timeout =
+               net->net_tunables.lct_peer_timeout;
+       int                     maxtxcredits =
+               net->net_tunables.lct_max_tx_credits;
+       int                     peerrtrcredits =
+               net->net_tunables.lct_peer_rtr_credits;
+
+       INIT_LIST_HEAD(&local_ni_list);
+
+       /*
+        * make sure that this net is unique. If it isn't then
+        * we are adding interfaces to an already existing network, and
+        * 'net' is just a convenient way to pass in the list.
+        * if it is unique we need to find the LND and load it if
+        * necessary.
+        */
+       if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
+               lnd_type = LNET_NETTYP(net->net_id);
+
+               LASSERT(libcfs_isknown_lnd(lnd_type));
+
+               if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
+                   lnd_type == IIBLND || lnd_type == VIBLND) {
+                       CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
+                       rc = -EINVAL;
+                       goto failed0;
+               }
+
+               mutex_lock(&the_lnet.ln_lnd_mutex);
+               lnd = lnet_find_lnd_by_type(lnd_type);
+
+               if (lnd == NULL) {
+                       mutex_unlock(&the_lnet.ln_lnd_mutex);
+                       rc = request_module("%s", libcfs_lnd2modname(lnd_type));
+                       mutex_lock(&the_lnet.ln_lnd_mutex);
+
+                       lnd = lnet_find_lnd_by_type(lnd_type);
+                       if (lnd == NULL) {
+                               mutex_unlock(&the_lnet.ln_lnd_mutex);
+                               CERROR("Can't load LND %s, module %s, rc=%d\n",
+                               libcfs_lnd2str(lnd_type),
+                               libcfs_lnd2modname(lnd_type), rc);
+#ifndef HAVE_MODULE_LOADING_SUPPORT
+                               LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
+                                               "compiled with kernel module "
+                                               "loading support.");
+#endif
+                               rc = -EINVAL;
+                               goto failed0;
+                       }
+               }
+
+               lnet_net_lock(LNET_LOCK_EX);
+               lnd->lnd_refcount++;
+               lnet_net_unlock(LNET_LOCK_EX);
+
+               net->net_lnd = lnd;
+
+               mutex_unlock(&the_lnet.ln_lnd_mutex);
+
+               net_l = net;
+       }
+
+       /*
+        * net_l: if the network being added is unique then net_l
+        *        will point to that network
+        *        if the network being added is not unique then
+        *        net_l points to the existing network.
+        *
+        * When we enter the loop below, we'll pick NIs off he
+        * network beign added and start them up, then add them to
+        * a local ni list. Once we've successfully started all
+        * the NIs then we join the local NI list (of started up
+        * networks) with the net_l->net_ni_list, which should
+        * point to the correct network to add the new ni list to
+        *
+        * If any of the new NIs fail to start up, then we want to
+        * iterate through the local ni list, which should include
+        * any NIs which were successfully started up, and shut
+        * them down.
+        *
+        * After than we want to delete the network being added,
+        * to avoid a memory leak.
+        */
+
+       /*
+        * When a network uses TCP bonding then all its interfaces
+        * must be specified when the network is first defined: the
+        * TCP bonding code doesn't allow for interfaces to be added
+        * or removed.
+        */
+       if (net_l != net && net_l != NULL && use_tcp_bonding &&
+           LNET_NETTYP(net_l->net_id) == SOCKLND) {
+               rc = -EINVAL;
+               goto failed0;
+       }
+
+       while (!list_empty(&net->net_ni_added)) {
+               ni = list_entry(net->net_ni_added.next, struct lnet_ni,
+                               ni_netlist);
+               list_del_init(&ni->ni_netlist);
+
+               /* make sure that the the NI we're about to start
+                * up is actually unique. if it's not fail. */
+               if (!lnet_ni_unique_net(&net_l->net_ni_list,
+                                       ni->ni_interfaces[0])) {
+                       rc = -EINVAL;
+                       goto failed1;
+               }
+
+               /* adjust the pointer the parent network, just in case it
+                * the net is a duplicate */
+               ni->ni_net = net_l;
 
-       while (!list_empty(nilist)) {
-               ni = list_entry(nilist->next, lnet_ni_t, ni_list);
-               list_del(&ni->ni_list);
-               rc = lnet_startup_lndni(ni, NULL);
+               rc = lnet_startup_lndni(ni, tun);
+
+               LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
+                       ni->ni_net->net_lnd->lnd_query != NULL);
 
                if (rc < 0)
-                       goto failed;
+                       goto failed1;
+
+               lnet_ni_addref(ni);
+               list_add_tail(&ni->ni_netlist, &local_ni_list);
 
                ni_count++;
        }
 
+       lnet_net_lock(LNET_LOCK_EX);
+       list_splice_tail(&local_ni_list, &net_l->net_ni_list);
+       lnet_net_unlock(LNET_LOCK_EX);
+
+       /* if the network is not unique then we don't want to keep
+        * it around after we're done. Free it. Otherwise add that
+        * net to the global the_lnet.ln_nets */
+       if (net_l != net && net_l != NULL) {
+               /*
+                * TODO - note. currently the tunables can not be updated
+                * once added
+                */
+               lnet_net_free(net);
+       } else {
+               net->net_state = LNET_NET_STATE_ACTIVE;
+               /*
+                * restore tunables after it has been overwitten by the
+                * lnd
+                */
+               if (peer_timeout != -1)
+                       net->net_tunables.lct_peer_timeout = peer_timeout;
+               if (maxtxcredits != -1)
+                       net->net_tunables.lct_max_tx_credits = maxtxcredits;
+               if (peerrtrcredits != -1)
+                       net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
+
+               lnet_net_lock(LNET_LOCK_EX);
+               list_add_tail(&net->net_list, &the_lnet.ln_nets);
+               lnet_net_unlock(LNET_LOCK_EX);
+       }
+
+       return ni_count;
+
+failed1:
+       /*
+        * shutdown the new NIs that are being started up
+        * free the NET being started
+        */
+       while (!list_empty(&local_ni_list)) {
+               ni = list_entry(local_ni_list.next, struct lnet_ni,
+                               ni_netlist);
+
+               lnet_shutdown_lndni(ni);
+       }
+
+failed0:
+       lnet_net_free(net);
+
+       return rc;
+}
+
+static int
+lnet_startup_lndnets(struct list_head *netlist)
+{
+       struct lnet_net         *net;
+       int                     rc;
+       int                     ni_count = 0;
+
+       while (!list_empty(netlist)) {
+               net = list_entry(netlist->next, struct lnet_net, net_list);
+               list_del_init(&net->net_list);
+
+               rc = lnet_startup_lndnet(net, NULL);
+
+               if (rc < 0)
+                       goto failed;
+
+               ni_count += rc;
+       }
+
        return ni_count;
 failed:
-       lnet_shutdown_lndnis();
+       lnet_shutdown_lndnets();
 
        return rc;
 }
@@ -1483,6 +1682,7 @@ int lnet_lib_init(void)
        the_lnet.ln_refcount = 0;
        LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
        INIT_LIST_HEAD(&the_lnet.ln_lnds);
+       INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
        INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
        INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
 
@@ -1543,6 +1743,7 @@ LNetNIInit(lnet_pid_t requested_pid)
        struct lnet_ping_info   *pinfo;
        lnet_handle_md_t        md_handle;
        struct list_head        net_head;
+       struct lnet_net         *net;
 
        INIT_LIST_HEAD(&net_head);
 
@@ -1562,8 +1763,15 @@ LNetNIInit(lnet_pid_t requested_pid)
                return rc;
        }
 
-       /* Add in the loopback network */
-       if (lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head) == NULL) {
+       /* create a network for Loopback network */
+       net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
+       if (net == NULL) {
+               rc = -ENOMEM;
+               goto err_empty_list;
+       }
+
+       /* Add in the loopback NI */
+       if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
                rc = -ENOMEM;
                goto err_empty_list;
        }
@@ -1575,13 +1783,13 @@ LNetNIInit(lnet_pid_t requested_pid)
         * in this case.  On cleanup in case of failure only clean up
         * routes if it has been loaded */
        if (!the_lnet.ln_nis_from_mod_params) {
-               rc = lnet_parse_networks(&net_head,
-                                        lnet_get_networks());
+               rc = lnet_parse_networks(&net_head, lnet_get_networks(),
+                                        use_tcp_bonding);
                if (rc < 0)
                        goto err_empty_list;
        }
 
-       ni_count = lnet_startup_lndnis(&net_head);
+       ni_count = lnet_startup_lndnets(&net_head);
        if (ni_count < 0) {
                rc = ni_count;
                goto err_empty_list;
@@ -1634,17 +1842,17 @@ err_destroy_routes:
        if (!the_lnet.ln_nis_from_mod_params)
                lnet_destroy_routes();
 err_shutdown_lndnis:
-       lnet_shutdown_lndnis();
+       lnet_shutdown_lndnets();
 err_empty_list:
        lnet_unprepare();
        LASSERT(rc < 0);
        mutex_unlock(&the_lnet.ln_api_mutex);
        while (!list_empty(&net_head)) {
-               struct lnet_ni *ni;
+               struct lnet_net *net;
 
-               ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-               list_del_init(&ni->ni_list);
-               lnet_ni_free(ni);
+               net = list_entry(net_head.next, struct lnet_net, net_list);
+               list_del_init(&net->net_list);
+               lnet_net_free(net);
        }
        return rc;
 }
@@ -1682,7 +1890,7 @@ LNetNIFini()
 
                lnet_acceptor_stop();
                lnet_destroy_routes();
-               lnet_shutdown_lndnis();
+               lnet_shutdown_lndnets();
                lnet_unprepare();
        }
 
@@ -1732,10 +1940,14 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
        }
 
        config->cfg_nid = ni->ni_nid;
-       config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
-       config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
-       config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
-       config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
+       config->cfg_config_u.cfg_net.net_peer_timeout =
+               ni->ni_net->net_tunables.lct_peer_timeout;
+       config->cfg_config_u.cfg_net.net_max_tx_credits =
+               ni->ni_net->net_tunables.lct_max_tx_credits;
+       config->cfg_config_u.cfg_net.net_peer_tx_credits =
+               ni->ni_net->net_tunables.lct_peer_tx_credits;
+       config->cfg_config_u.cfg_net.net_peer_rtr_credits =
+               ni->ni_net->net_tunables.lct_peer_rtr_credits;
 
        net_config->ni_status = ni->ni_status->ns_status;
 
@@ -1757,46 +1969,99 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
        if (config->cfg_hdr.ioc_len > min_size)
                tunable_size = config->cfg_hdr.ioc_len - min_size;
 
-       /* Don't copy to much data to user space */
-       min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
+       /* Don't copy too much data to user space */
+       min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
        lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
 
-       if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
-               memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
+       if (lnd_cfg && min_size) {
+               memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
                config->cfg_config_u.cfg_net.net_interface_count = 1;
 
                /* Tell user land that kernel side has less data */
-               if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
+               if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
                        min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
                        config->cfg_hdr.ioc_len -= min_size;
                }
        }
 }
 
-static int
+struct lnet_ni *
+lnet_get_ni_idx_locked(int idx)
+{
+       struct lnet_ni          *ni;
+       struct lnet_net         *net;
+
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       if (idx-- == 0)
+                               return ni;
+               }
+       }
+
+       return NULL;
+}
+
+struct lnet_ni *
+lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
+{
+       struct lnet_ni          *ni;
+       struct lnet_net         *net = mynet;
+
+       if (prev == NULL) {
+               if (net == NULL)
+                       net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
+                                       net_list);
+               ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+                               ni_netlist);
+
+               return ni;
+       }
+
+       if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
+               /* if you reached the end of the ni list and the net is
+                * specified, then there are no more nis in that net */
+               if (net != NULL)
+                       return NULL;
+
+               /* we reached the end of this net ni list. move to the
+                * next net */
+               if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
+                       /* no more nets and no more NIs. */
+                       return NULL;
+
+               /* get the next net */
+               net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
+                                net_list);
+               /* get the ni on it */
+               ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+                               ni_netlist);
+
+               return ni;
+       }
+
+       /* there are more nis left */
+       ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
+
+       return ni;
+}
+
+int
 lnet_get_net_config(struct lnet_ioctl_config_data *config)
 {
        struct lnet_ni *ni;
-       struct list_head *tmp;
-       int idx = config->cfg_count;
+       int cpt;
        int rc = -ENOENT;
-       int cpt, i = 0;
-
-       if (unlikely(!config->cfg_bulk))
-               return -EINVAL;
+       int idx = config->cfg_count;
 
        cpt = lnet_net_lock_current();
 
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               if (i++ != idx)
-                       continue;
+       ni = lnet_get_ni_idx_locked(idx);
 
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
+       if (ni != NULL) {
+               rc = 0;
                lnet_ni_lock(ni);
                lnet_fill_ni_info(ni, config);
                lnet_ni_unlock(ni);
-               rc = 0;
-               break;
        }
 
        lnet_net_unlock(cpt);
@@ -1809,29 +2074,36 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
        char                    *nets = conf->cfg_config_u.cfg_net.net_intf;
        struct lnet_ping_info   *pinfo;
        lnet_handle_md_t        md_handle;
-       struct lnet_ni          *ni;
+       struct lnet_net         *net;
        struct list_head        net_head;
        int                     rc;
        lnet_remotenet_t        *rnet;
+       int                     net_ni_count;
+       int                     num_acceptor_nets;
+       __u32                   net_type;
+       struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
 
        INIT_LIST_HEAD(&net_head);
 
-       /* Create a ni structure for the network string */
-       rc = lnet_parse_networks(&net_head, nets);
+       if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
+               lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+
+       /* Create a net/ni structures for the network string */
+       rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
        if (rc <= 0)
                return rc == 0 ? -EINVAL : rc;
 
        mutex_lock(&the_lnet.ln_api_mutex);
 
        if (rc > 1) {
-               rc = -EINVAL; /* only add one interface per call */
+               rc = -EINVAL; /* only add one network per call */
                goto failed0;
        }
 
-       ni = list_entry(net_head.next, struct lnet_ni, ni_list);
+       net = list_entry(net_head.next, struct lnet_net, net_list);
 
        lnet_net_lock(LNET_LOCK_EX);
-       rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
+       rnet = lnet_find_rnet_locked(net->net_id);
        lnet_net_unlock(LNET_LOCK_EX);
        /* make sure that the net added doesn't invalidate the current
         * configuration LNet is keeping */
@@ -1842,23 +2114,66 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
                goto failed0;
        }
 
-       rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
+       /*
+        * make sure you calculate the correct number of slots in the ping
+        * info. Since the ping info is a flattened list of all the NIs,
+        * we should allocate enough slots to accomodate the number of NIs
+        * which will be added.
+        *
+        * We can use lnet_get_net_ni_count_locked() since the net is not
+        * on a public list yet, so locking is not a problem
+        */
+       net_ni_count = lnet_get_net_ni_count_locked(net);
+
+       rc = lnet_ping_info_setup(&pinfo, &md_handle,
+                                 net_ni_count + lnet_get_ni_count(),
                                  false);
        if (rc != 0)
                goto failed0;
 
-       list_del_init(&ni->ni_list);
+       list_del_init(&net->net_list);
 
-       rc = lnet_startup_lndni(ni, conf);
-       if (rc != 0)
+       if (lnd_tunables)
+               memcpy(&net->net_tunables,
+                      &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
+
+       /*
+        * before starting this network get a count of the current TCP
+        * networks which require the acceptor thread running. If that
+        * count is == 0 before we start up this network, then we'd want to
+        * start up the acceptor thread after starting up this network
+        */
+       num_acceptor_nets = lnet_count_acceptor_nets();
+
+       /*
+        * lnd_startup_lndnet() can deallocate 'net' even if it it returns
+        * success, because we endded up adding interfaces to an existing
+        * network. So grab the net_type now
+        */
+       net_type = LNET_NETTYP(net->net_id);
+
+       rc = lnet_startup_lndnet(net,
+                                (lnd_tunables) ? &lnd_tunables->lt_tun : NULL);
+       if (rc < 0)
                goto failed1;
 
-       if (ni->ni_lnd->lnd_accept != NULL) {
+       /*
+        * Start the acceptor thread if this is the first network
+        * being added that requires the thread.
+        */
+       if (net_type == SOCKLND && num_acceptor_nets == 0)
+       {
                rc = lnet_acceptor_start();
                if (rc < 0) {
-                       /* shutdown the ni that we just started */
+                       /* shutdown the net that we just started */
                        CERROR("Failed to start up acceptor thread\n");
-                       lnet_shutdown_lndni(ni);
+                       /*
+                        * Note that if we needed to start the acceptor
+                        * thread, then 'net' must have been the first TCP
+                        * network, therefore was unique, and therefore
+                        * wasn't deallocated by lnet_startup_lndnet()
+                        */
+                       lnet_shutdown_lndnet(net);
                        goto failed1;
                }
        }
@@ -1874,51 +2189,53 @@ failed1:
 failed0:
        mutex_unlock(&the_lnet.ln_api_mutex);
        while (!list_empty(&net_head)) {
-               ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-               list_del_init(&ni->ni_list);
-               lnet_ni_free(ni);
+               net = list_entry(net_head.next, struct lnet_net, net_list);
+               list_del_init(&net->net_list);
+               lnet_net_free(net);
        }
        return rc;
 }
 
 int
-lnet_dyn_del_ni(__u32 net)
+lnet_dyn_del_ni(__u32 net_id)
 {
-       lnet_ni_t        *ni;
+       struct lnet_net  *net;
        struct lnet_ping_info *pinfo;
        lnet_handle_md_t  md_handle;
        int               rc;
+       int               net_ni_count;
 
        /* don't allow userspace to shutdown the LOLND */
-       if (LNET_NETTYP(net) == LOLND)
+       if (LNET_NETTYP(net_id) == LOLND)
                return -EINVAL;
 
        mutex_lock(&the_lnet.ln_api_mutex);
-       /* create and link a new ping info, before removing the old one */
-       rc = lnet_ping_info_setup(&pinfo, &md_handle,
-                                 lnet_get_ni_count() - 1, false);
-       if (rc != 0)
-               goto out;
 
-       ni = lnet_net2ni(net);
-       if (ni == NULL) {
+       lnet_net_lock(0);
+
+       net = lnet_get_net_locked(net_id);
+       if (net == NULL) {
                rc = -EINVAL;
-               goto failed;
+               goto out;
        }
 
-       /* decrement the reference counter taken by lnet_net2ni() */
-       lnet_ni_decref_locked(ni, 0);
+       net_ni_count = lnet_get_net_ni_count_locked(net);
 
-       lnet_shutdown_lndni(ni);
+       lnet_net_unlock(0);
 
-       if (lnet_count_acceptor_nis() == 0)
+       /* create and link a new ping info, before removing the old one */
+       rc = lnet_ping_info_setup(&pinfo, &md_handle,
+                                 lnet_get_ni_count() - net_ni_count, false);
+       if (rc != 0)
+               goto out;
+
+       lnet_shutdown_lndnet(net);
+
+       if (lnet_count_acceptor_nets() == 0)
                lnet_acceptor_stop();
 
        lnet_ping_target_update(pinfo, md_handle);
-       goto out;
-failed:
-       lnet_ping_md_unlink(pinfo, &md_handle);
-       lnet_ping_info_free(pinfo);
+
 out:
        mutex_unlock(&the_lnet.ln_api_mutex);
 
@@ -2137,12 +2454,11 @@ LNetCtl(unsigned int cmd, void *arg)
                if (ni == NULL)
                        return -EINVAL;
 
-               if (ni->ni_lnd->lnd_ctl == NULL)
+               if (ni->ni_net->net_lnd->lnd_ctl == NULL)
                        rc = -EINVAL;
                else
-                       rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
+                       rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
 
-               lnet_ni_decref(ni);
                return rc;
        }
        /* not reached */
@@ -2170,7 +2486,7 @@ int
 LNetGetId(unsigned int index, lnet_process_id_t *id)
 {
        struct lnet_ni   *ni;
-       struct list_head *tmp;
+       struct lnet_net  *net;
        int               cpt;
        int               rc = -ENOENT;
 
@@ -2178,16 +2494,16 @@ LNetGetId(unsigned int index, lnet_process_id_t *id)
 
        cpt = lnet_net_lock_current();
 
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               if (index-- != 0)
-                       continue;
-
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       if (index-- != 0)
+                               continue;
 
-               id->nid = ni->ni_nid;
-               id->pid = the_lnet.ln_pid;
-               rc = 0;
-               break;
+                       id->nid = ni->ni_nid;
+                       id->pid = the_lnet.ln_pid;
+                       rc = 0;
+                       break;
+               }
        }
 
        lnet_net_unlock(cpt);