Whamcloud - gitweb
LU-7734 lnet: Multi-Rail local NI split
authorAmir Shehata <amir.shehata@intel.com>
Sat, 12 Dec 2015 04:02:54 +0000 (20:02 -0800)
committerAmir Shehata <amir.shehata@intel.com>
Wed, 25 Jan 2017 02:38:08 +0000 (18:38 -0800)
This patch allows the configuration of multiple NIs under one Net.
It is now possible to have multiple NIDs on the same network:
   Ex: <ip1>@tcp, <ip2>@tcp.
This can be configured using the following syntax:
   Ex: tcp(eth0, eth1)

The data structures for the example above can be visualized
as follows

               NET(tcp)
                |
        -----------------
        |               |
      NI(eth0)        NI(eth1)

For more details refer to the Mult-Rail Requirements and HLD
documents

Signed-off-by: Amir Shehata <amir.shehata@intel.com>
Change-Id: Id7c73b9b811a3082b61e53b9e9f95743188cbd51
Reviewed-on: http://review.whamcloud.com/18274
Tested-by: Jenkins
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Olaf Weber <olaf@sgi.com>
22 files changed:
lnet/include/lnet/lib-dlc.h
lnet/include/lnet/lib-lnet.h
lnet/include/lnet/lib-types.h
lnet/klnds/gnilnd/gnilnd.c
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c
lnet/klnds/o2iblnd/o2iblnd_modparams.c
lnet/klnds/socklnd/socklnd.c
lnet/lnet/acceptor.c
lnet/lnet/api-ni.c
lnet/lnet/config.c
lnet/lnet/lib-move.c
lnet/lnet/lib-ptl.c
lnet/lnet/lo.c
lnet/lnet/net_fault.c
lnet/lnet/peer.c
lnet/lnet/router.c
lnet/lnet/router_proc.c
lnet/selftest/brw_test.c
lnet/selftest/framework.c
lnet/selftest/selftest.h

index 436d9e8..69247f3 100644 (file)
 
 struct lnet_ioctl_config_lnd_cmn_tunables {
        __u32 lct_version;
 
 struct lnet_ioctl_config_lnd_cmn_tunables {
        __u32 lct_version;
-       __u32 lct_peer_timeout;
-       __u32 lct_peer_tx_credits;
-       __u32 lct_peer_rtr_credits;
-       __u32 lct_max_tx_credits;
+       __s32 lct_peer_timeout;
+       __s32 lct_peer_tx_credits;
+       __s32 lct_peer_rtr_credits;
+       __s32 lct_max_tx_credits;
 };
 
 struct lnet_ioctl_config_o2iblnd_tunables {
 };
 
 struct lnet_ioctl_config_o2iblnd_tunables {
@@ -56,11 +56,15 @@ struct lnet_ioctl_config_o2iblnd_tunables {
        __u32 pad;
 };
 
        __u32 pad;
 };
 
+struct lnet_lnd_tunables {
+       union {
+               struct lnet_ioctl_config_o2iblnd_tunables lnd_o2ib;
+       } lnd_tun_u;
+};
+
 struct lnet_ioctl_config_lnd_tunables {
        struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
 struct lnet_ioctl_config_lnd_tunables {
        struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
-       union {
-               struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
-       } lt_tun_u;
+       struct lnet_lnd_tunables lt_tun;
 };
 
 struct lnet_ioctl_net_config {
 };
 
 struct lnet_ioctl_net_config {
index 3b29582..6c12c35 100644 (file)
@@ -279,24 +279,6 @@ lnet_me_free(lnet_me_t *me)
        kmem_cache_free(lnet_mes_cachep, me);
 }
 
        kmem_cache_free(lnet_mes_cachep, me);
 }
 
-static inline lnet_msg_t *
-lnet_msg_alloc(void)
-{
-       lnet_msg_t *msg;
-
-       LIBCFS_ALLOC(msg, sizeof(*msg));
-
-       /* no need to zero, LIBCFS_ALLOC does for us */
-       return (msg);
-}
-
-static inline void
-lnet_msg_free(lnet_msg_t *msg)
-{
-       LASSERT(!msg->msg_onactivelist);
-       LIBCFS_FREE(msg, sizeof(*msg));
-}
-
 lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec,
                                     __u64 cookie);
 void lnet_res_lh_initialize(struct lnet_res_container *rec,
 lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec,
                                     __u64 cookie);
 void lnet_res_lh_initialize(struct lnet_res_container *rec,
@@ -454,9 +436,40 @@ lnet_ni_decref(lnet_ni_t *ni)
        lnet_net_unlock(0);
 }
 
        lnet_net_unlock(0);
 }
 
-void lnet_ni_free(lnet_ni_t *ni);
-lnet_ni_t *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist);
+static inline lnet_msg_t *
+lnet_msg_alloc(void)
+{
+       lnet_msg_t *msg;
+
+       LIBCFS_ALLOC(msg, sizeof(*msg));
+
+       /* no need to zero, LIBCFS_ALLOC does for us */
+       return (msg);
+}
+
+static inline void
+lnet_msg_free(lnet_msg_t *msg)
+{
+       LASSERT(!msg->msg_onactivelist);
+
+       /* Make sure we have no references to an NI. */
+       if (msg->msg_txni)
+               lnet_ni_decref_locked(msg->msg_txni, msg->msg_tx_cpt);
+       if (msg->msg_rxni)
+               lnet_ni_decref_locked(msg->msg_rxni, msg->msg_rx_cpt);
+
+       LIBCFS_FREE(msg, sizeof(*msg));
+}
+
+void lnet_ni_free(struct lnet_ni *ni);
+void lnet_net_free(struct lnet_net *net);
+
+struct lnet_net *
+lnet_net_alloc(__u32 net_type, struct list_head *netlist);
+
+struct lnet_ni *
+lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el,
+             char *iface);
 
 static inline int
 lnet_nid2peerhash(lnet_nid_t nid)
 
 static inline int
 lnet_nid2peerhash(lnet_nid_t nid)
@@ -475,11 +488,13 @@ lnet_net2rnethash(__u32 net)
 extern lnd_t the_lolnd;
 extern int avoid_asym_router_failure;
 
 extern lnd_t the_lolnd;
 extern int avoid_asym_router_failure;
 
-extern int lnet_cpt_of_nid_locked(lnet_nid_t nid);
-extern int lnet_cpt_of_nid(lnet_nid_t nid);
+extern int lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni);
+extern int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni);
 extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
 extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
+extern lnet_ni_t *lnet_nid2ni_addref(lnet_nid_t nid);
 extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt);
 extern lnet_ni_t *lnet_net2ni(__u32 net);
 extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt);
 extern lnet_ni_t *lnet_net2ni(__u32 net);
+bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
 
 int lnet_lib_init(void);
 void lnet_lib_exit(void);
 
 int lnet_lib_init(void);
 void lnet_lib_exit(void);
@@ -496,6 +511,9 @@ void lnet_destroy_routes(void);
 int lnet_get_route(int idx, __u32 *net, __u32 *hops,
                   lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
 int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
 int lnet_get_route(int idx, __u32 *net, __u32 *hops,
                   lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
 int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
+struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet,
+                                       struct lnet_ni *prev);
+struct lnet_ni *lnet_get_ni_idx_locked(int idx);
 
 struct libcfs_ioctl_handler {
        struct list_head item;
 
 struct libcfs_ioctl_handler {
        struct list_head item;
@@ -521,11 +539,12 @@ int  lnet_rtrpools_adjust(int tiny, int small, int large);
 int lnet_rtrpools_enable(void);
 void lnet_rtrpools_disable(void);
 void lnet_rtrpools_free(int keep_pools);
 int lnet_rtrpools_enable(void);
 void lnet_rtrpools_disable(void);
 void lnet_rtrpools_free(int keep_pools);
-lnet_remotenet_t *lnet_find_net_locked (__u32 net);
+lnet_remotenet_t *lnet_find_rnet_locked(__u32 net);
 int lnet_dyn_add_ni(lnet_pid_t requested_pid,
                    struct lnet_ioctl_config_data *conf);
 int lnet_dyn_del_ni(__u32 net);
 int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
 int lnet_dyn_add_ni(lnet_pid_t requested_pid,
                    struct lnet_ioctl_config_data *conf);
 int lnet_dyn_del_ni(__u32 net);
 int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
+struct lnet_net *lnet_get_net_locked(__u32 net_id);
 
 int lnet_islocalnid(lnet_nid_t nid);
 int lnet_islocalnet(__u32 net);
 
 int lnet_islocalnid(lnet_nid_t nid);
 int lnet_islocalnet(__u32 net);
@@ -731,8 +750,8 @@ void lnet_unregister_lnd(lnd_t *lnd);
 int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
                 __u32 local_ip, __u32 peer_ip, int peer_port);
 void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
 int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
                 __u32 local_ip, __u32 peer_ip, int peer_port);
 void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
-                               __u32 peer_ip, int port);
-int lnet_count_acceptor_nis(void);
+                                __u32 peer_ip, int port);
+int lnet_count_acceptor_nets(void);
 int lnet_acceptor_timeout(void);
 int lnet_acceptor_port(void);
 int lnet_acceptor_start(void);
 int lnet_acceptor_timeout(void);
 int lnet_acceptor_port(void);
 int lnet_acceptor_start(void);
@@ -754,7 +773,7 @@ int lnet_sock_connect(struct socket **sockp, int *fatal,
                        __u32 peer_ip, int peer_port);
 
 int lnet_peers_start_down(void);
                        __u32 peer_ip, int peer_port);
 
 int lnet_peers_start_down(void);
-int lnet_peer_buffer_credits(lnet_ni_t *ni);
+int lnet_peer_buffer_credits(struct lnet_net *net);
 
 int lnet_router_checker_start(void);
 void lnet_router_checker_stop(void);
 
 int lnet_router_checker_start(void);
 void lnet_router_checker_stop(void);
@@ -763,8 +782,11 @@ void lnet_swap_pinginfo(struct lnet_ping_info *info);
 
 int lnet_parse_ip2nets(char **networksp, char *ip2nets);
 int lnet_parse_routes(char *route_str, int *im_a_router);
 
 int lnet_parse_ip2nets(char **networksp, char *ip2nets);
 int lnet_parse_routes(char *route_str, int *im_a_router);
-int lnet_parse_networks(struct list_head *nilist, char *networks);
-int lnet_net_unique(__u32 net, struct list_head *nilist);
+int lnet_parse_networks(struct list_head *nilist, char *networks,
+                       bool use_tcp_bonding);
+bool lnet_net_unique(__u32 net_id, struct list_head *nilist,
+                    struct lnet_net **net);
+bool lnet_ni_unique_net(struct list_head *nilist, char *iface);
 
 int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
 lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
 
 int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
 lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
index fca5ace..601488f 100644 (file)
@@ -87,34 +87,37 @@ typedef struct lnet_msg {
        /* ready for pending on RX delay list */
        unsigned int            msg_rx_ready_delay:1;
 
        /* ready for pending on RX delay list */
        unsigned int            msg_rx_ready_delay:1;
 
-       unsigned int          msg_vmflush:1;      /* VM trying to free memory */
-       unsigned int          msg_target_is_router:1; /* sending to a router */
-       unsigned int          msg_routing:1;      /* being forwarded */
-       unsigned int          msg_ack:1;          /* ack on finalize (PUT) */
-       unsigned int          msg_sending:1;      /* outgoing message */
-       unsigned int          msg_receiving:1;    /* being received */
-       unsigned int          msg_txcredit:1;     /* taken an NI send credit */
-       unsigned int          msg_peertxcredit:1; /* taken a peer send credit */
-       unsigned int          msg_rtrcredit:1;    /* taken a globel router credit */
-       unsigned int          msg_peerrtrcredit:1; /* taken a peer router credit */
-       unsigned int          msg_onactivelist:1; /* on the activelist */
+       unsigned int          msg_vmflush:1;      /* VM trying to free memory */
+       unsigned int          msg_target_is_router:1; /* sending to a router */
+       unsigned int          msg_routing:1;      /* being forwarded */
+       unsigned int          msg_ack:1;          /* ack on finalize (PUT) */
+       unsigned int          msg_sending:1;      /* outgoing message */
+       unsigned int          msg_receiving:1;    /* being received */
+       unsigned int          msg_txcredit:1;     /* taken an NI send credit */
+       unsigned int          msg_peertxcredit:1; /* taken a peer send credit */
+       unsigned int          msg_rtrcredit:1;    /* taken a globel router credit */
+       unsigned int          msg_peerrtrcredit:1; /* taken a peer router credit */
+       unsigned int          msg_onactivelist:1; /* on the activelist */
        unsigned int          msg_rdma_get:1;
 
        unsigned int          msg_rdma_get:1;
 
-       struct lnet_peer     *msg_txpeer;         /* peer I'm sending to */
-       struct lnet_peer     *msg_rxpeer;         /* peer I received from */
+       struct lnet_peer     *msg_txpeer;         /* peer I'm sending to */
+       struct lnet_peer     *msg_rxpeer;         /* peer I received from */
 
 
-       void                 *msg_private;
+       void                 *msg_private;
        struct lnet_libmd    *msg_md;
        struct lnet_libmd    *msg_md;
-
-       unsigned int          msg_len;
-       unsigned int          msg_wanted;
-       unsigned int          msg_offset;
-       unsigned int          msg_niov;
+       /* the NI the message was sent or received over */
+       struct lnet_ni       *msg_txni;
+       struct lnet_ni       *msg_rxni;
+
+       unsigned int          msg_len;
+       unsigned int          msg_wanted;
+       unsigned int          msg_offset;
+       unsigned int          msg_niov;
        struct kvec          *msg_iov;
        struct kvec          *msg_iov;
-       lnet_kiov_t          *msg_kiov;
+       lnet_kiov_t          *msg_kiov;
 
 
-       lnet_event_t          msg_ev;
-       lnet_hdr_t            msg_hdr;
+       lnet_event_t          msg_ev;
+       lnet_hdr_t            msg_hdr;
 } lnet_msg_t;
 
 
 } lnet_msg_t;
 
 
@@ -263,29 +266,123 @@ struct lnet_tx_queue {
        struct list_head        tq_delayed;     /* delayed TXs */
 };
 
        struct list_head        tq_delayed;     /* delayed TXs */
 };
 
+enum lnet_net_state {
+       /* set when net block is allocated */
+       LNET_NET_STATE_INIT = 0,
+       /* set when NIs in net are started successfully */
+       LNET_NET_STATE_ACTIVE,
+       /* set if all NIs in net are in FAILED state */
+       LNET_NET_STATE_INACTIVE,
+       /* set when shutting down a NET */
+       LNET_NET_STATE_DELETING
+};
+
+enum lnet_ni_state {
+       /* set when NI block is allocated */
+       LNET_NI_STATE_INIT = 0,
+       /* set when NI is started successfully */
+       LNET_NI_STATE_ACTIVE,
+       /* set when LND notifies NI failed */
+       LNET_NI_STATE_FAILED,
+       /* set when LND notifies NI degraded */
+       LNET_NI_STATE_DEGRADED,
+       /* set when shuttding down NI */
+       LNET_NI_STATE_DELETING
+};
+
+struct lnet_net {
+       /* chain on the ln_nets */
+       struct list_head        net_list;
+
+       /* net ID, which is compoed of
+        * (net_type << 16) | net_num.
+        * net_type can be one of the enumarated types defined in
+        * lnet/include/lnet/nidstr.h */
+       __u32                   net_id;
+
+       /* priority of the network */
+       __u32                   net_prio;
+
+       /* total number of CPTs in the array */
+       __u32                   net_ncpts;
+
+       /* cumulative CPTs of all NIs in this net */
+       __u32                   *net_cpts;
+
+       /* network tunables */
+       struct lnet_ioctl_config_lnd_cmn_tunables net_tunables;
+
+       /*
+        * boolean to indicate that the tunables have been set and
+        * shouldn't be reset
+        */
+       bool                    net_tunables_set;
+
+       /* procedural interface */
+       lnd_t                   *net_lnd;
+
+       /* list of NIs on this net */
+       struct list_head        net_ni_list;
+
+       /* list of NIs being added, but not started yet */
+       struct list_head        net_ni_added;
+
+       /* dying LND instances */
+       struct list_head        net_ni_zombie;
+
+       /* network state */
+       enum lnet_net_state     net_state;
+};
+
 typedef struct lnet_ni {
 typedef struct lnet_ni {
+       /* chain on the lnet_net structure */
+       struct list_head        ni_netlist;
+
+       /* chain on net_ni_cpt */
+       struct list_head        ni_cptlist;
+
        spinlock_t              ni_lock;
        spinlock_t              ni_lock;
-       struct list_head        ni_list;        /* chain on ln_nis */
-       struct list_head        ni_cptlist;     /* chain on ln_nis_cpt */
-       int                     ni_maxtxcredits; /* # tx credits  */
-       /* # per-peer send credits */
-       int                     ni_peertxcredits;
-       /* # per-peer router buffer credits */
-       int                     ni_peerrtrcredits;
-       /* seconds to consider peer dead */
-       int                     ni_peertimeout;
-       int                     ni_ncpts;       /* number of CPTs */
-       __u32                   *ni_cpts;       /* bond NI on some CPTs */
-       lnet_nid_t              ni_nid;         /* interface's NID */
-       void                    *ni_data;       /* instance-specific data */
-       lnd_t                   *ni_lnd;        /* procedural interface */
-       struct lnet_tx_queue    **ni_tx_queues; /* percpt TX queues */
-       int                     **ni_refs;      /* percpt reference count */
-       time64_t                ni_last_alive;  /* when I was last alive */
-       struct lnet_ni_status   *ni_status;     /* my health status */
+
+       /* number of CPTs */
+       int                     ni_ncpts;
+
+       /* bond NI on some CPTs */
+       __u32                   *ni_cpts;
+
+       /* interface's NID */
+       lnet_nid_t              ni_nid;
+
+       /* instance-specific data */
+       void                    *ni_data;
+
+       /* percpt TX queues */
+       struct lnet_tx_queue    **ni_tx_queues;
+
+       /* percpt reference count */
+       int                     **ni_refs;
+
+       /* when I was last alive */
+       long                    ni_last_alive;
+
+       /* pointer to parent network */
+       struct lnet_net         *ni_net;
+
+       /* my health status */
+       lnet_ni_status_t        *ni_status;
+
+       /* NI FSM */
+       enum lnet_ni_state      ni_state;
+
        /* per NI LND tunables */
        /* per NI LND tunables */
-       struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
-       /* equivalent interfaces to use */
+       struct lnet_lnd_tunables ni_lnd_tunables;
+
+       /* lnd tunables set explicitly */
+       bool ni_lnd_tunables_set;
+
+       /*
+        * equivalent interfaces to use
+        * This is an array because socklnd bonding can still be configured
+        */
        char                    *ni_interfaces[LNET_MAX_INTERFACES];
        struct net              *ni_net_ns;     /* original net namespace */
 } lnet_ni_t;
        char                    *ni_interfaces[LNET_MAX_INTERFACES];
        struct net              *ni_net_ns;     /* original net namespace */
 } lnet_ni_t;
@@ -362,8 +459,8 @@ typedef struct lnet_peer {
        cfs_time_t              lp_last_alive;
        /* when lp_ni was queried last time */
        cfs_time_t              lp_last_query;
        cfs_time_t              lp_last_alive;
        /* when lp_ni was queried last time */
        cfs_time_t              lp_last_query;
-       /* interface peer is on */
-       lnet_ni_t               *lp_ni;
+       /* network peer is on */
+       struct lnet_net         *lp_net;
        lnet_nid_t              lp_nid;         /* peer's NID */
        int                     lp_refcount;    /* # refs */
        int                     lp_cpt;         /* CPT this peer attached on */
        lnet_nid_t              lp_nid;         /* peer's NID */
        int                     lp_refcount;    /* # refs */
        int                     lp_cpt;         /* CPT this peer attached on */
@@ -392,7 +489,7 @@ struct lnet_peer_table {
 /* peer aliveness is enabled only on routers for peers in a network where the
  * lnet_ni_t::ni_peertimeout has been set to a positive value */
 #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
 /* peer aliveness is enabled only on routers for peers in a network where the
  * lnet_ni_t::ni_peertimeout has been set to a positive value */
 #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
-                                        (lp)->lp_ni->ni_peertimeout > 0)
+                                        (lp)->lp_net->net_tunables.lct_peer_timeout > 0)
 
 typedef struct {
        struct list_head        lr_list;        /* chain on net */
 
 typedef struct {
        struct list_head        lr_list;        /* chain on net */
@@ -470,6 +567,7 @@ enum {
 struct lnet_match_info {
        __u64                   mi_mbits;
        lnet_process_id_t       mi_id;
 struct lnet_match_info {
        __u64                   mi_mbits;
        lnet_process_id_t       mi_id;
+       unsigned int            mi_cpt;
        unsigned int            mi_opc;
        unsigned int            mi_portal;
        unsigned int            mi_rlength;
        unsigned int            mi_opc;
        unsigned int            mi_portal;
        unsigned int            mi_rlength;
@@ -597,13 +695,12 @@ typedef struct
        struct list_head                ln_test_peers;
        struct list_head                ln_drop_rules;
        struct list_head                ln_delay_rules;
        struct list_head                ln_test_peers;
        struct list_head                ln_drop_rules;
        struct list_head                ln_delay_rules;
-
-       struct list_head                ln_nis;         /* LND instances */
-       /* NIs bond on specific CPT(s) */
-       struct list_head                ln_nis_cpt;
-       /* dying LND instances */
-       struct list_head                ln_nis_zombie;
-       lnet_ni_t                       *ln_loni;       /* the loopback NI */
+       /* LND instances */
+       struct list_head                ln_nets;
+       /* the loopback NI */
+       struct lnet_ni                  *ln_loni;
+       /* network zombie list */
+       struct list_head                ln_net_zombie;
 
        /* remote networks with routes to them */
        struct list_head                *ln_remote_nets_hash;
 
        /* remote networks with routes to them */
        struct list_head                *ln_remote_nets_hash;
index 4e1d708..2922f5e 100644 (file)
@@ -2684,9 +2684,9 @@ kgnilnd_startup(lnet_ni_t *ni)
        kgn_net_t        *net;
        ENTRY;
 
        kgn_net_t        *net;
        ENTRY;
 
-       LASSERTF(ni->ni_lnd == &the_kgnilnd,
+       LASSERTF(ni->ni_net->net_lnd == &the_kgnilnd,
                "bad LND 0x%p != the_kgnilnd @ 0x%p\n",
                "bad LND 0x%p != the_kgnilnd @ 0x%p\n",
-               ni->ni_lnd, &the_kgnilnd);
+               ni->ni_net->net_lnd, &the_kgnilnd);
 
        if (kgnilnd_data.kgn_init == GNILND_INIT_NOTHING) {
                rc = kgnilnd_base_startup();
 
        if (kgnilnd_data.kgn_init == GNILND_INIT_NOTHING) {
                rc = kgnilnd_base_startup();
index ee5a01f..1c3e2d2 100644 (file)
@@ -317,7 +317,7 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
 {
        kib_peer_t      *peer;
        kib_net_t       *net = ni->ni_data;
 {
        kib_peer_t      *peer;
        kib_net_t       *net = ni->ni_data;
-       int             cpt = lnet_cpt_of_nid(nid);
+       int             cpt = lnet_cpt_of_nid(nid, ni);
        unsigned long   flags;
 
        LASSERT(net != NULL);
        unsigned long   flags;
 
        LASSERT(net != NULL);
@@ -334,7 +334,7 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid)
        peer->ibp_error = 0;
        peer->ibp_last_alive = 0;
        peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
        peer->ibp_error = 0;
        peer->ibp_last_alive = 0;
        peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
-       peer->ibp_queue_depth = ni->ni_peertxcredits;
+       peer->ibp_queue_depth = ni->ni_net->net_tunables.lct_peer_tx_credits;
        atomic_set(&peer->ibp_refcount, 1);     /* 1 ref for caller */
 
        INIT_LIST_HEAD(&peer->ibp_list);        /* not in the peer table yet */
        atomic_set(&peer->ibp_refcount, 1);     /* 1 ref for caller */
 
        INIT_LIST_HEAD(&peer->ibp_list);        /* not in the peer table yet */
@@ -722,7 +722,7 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
 
        dev = net->ibn_dev;
 
 
        dev = net->ibn_dev;
 
-       cpt = lnet_cpt_of_nid(peer->ibp_nid);
+       cpt = lnet_cpt_of_nid(peer->ibp_nid, peer->ibp_ni);
        sched = kiblnd_data.kib_scheds[cpt];
 
        LASSERT(sched->ibs_nthreads > 0);
        sched = kiblnd_data.kib_scheds[cpt];
 
        LASSERT(sched->ibs_nthreads > 0);
@@ -1391,7 +1391,7 @@ kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd,
        int     mod;
        __u16   nfrags;
 
        int     mod;
        __u16   nfrags;
 
-       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
        mod = tunables->lnd_map_on_demand;
        nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod;
 
        mod = tunables->lnd_map_on_demand;
        nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod;
 
@@ -2395,7 +2395,7 @@ kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts, int ncpts)
        int             rc;
        int             i;
 
        int             rc;
        int             i;
 
-       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
 
        read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
        if (tunables->lnd_map_on_demand == 0) {
 
        read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
        if (tunables->lnd_map_on_demand == 0) {
@@ -3177,7 +3177,7 @@ kiblnd_startup (lnet_ni_t *ni)
         int                       rc;
        int                       newdev;
 
         int                       rc;
        int                       newdev;
 
-        LASSERT (ni->ni_lnd == &the_o2iblnd);
+        LASSERT (ni->ni_net->net_lnd == &the_o2iblnd);
 
         if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
                 rc = kiblnd_base_startup();
 
         if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
                 rc = kiblnd_base_startup();
index a617b63..c398b84 100644 (file)
@@ -782,7 +782,7 @@ kiblnd_cfg_rdma_frags(struct lnet_ni *ni)
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
        int mod;
 
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
        int mod;
 
-       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
        mod = tunables->lnd_map_on_demand;
        return mod != 0 ? mod : IBLND_MAX_RDMA_FRAGS;
 }
        mod = tunables->lnd_map_on_demand;
        return mod != 0 ? mod : IBLND_MAX_RDMA_FRAGS;
 }
@@ -801,7 +801,7 @@ kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
        int concurrent_sends;
 
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
        int concurrent_sends;
 
-       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
        concurrent_sends = tunables->lnd_concurrent_sends;
 
        if (version == IBLND_MSG_VERSION_1) {
        concurrent_sends = tunables->lnd_concurrent_sends;
 
        if (version == IBLND_MSG_VERSION_1) {
@@ -941,7 +941,7 @@ kiblnd_need_noop(kib_conn_t *conn)
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
 
        LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
 
        LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
-       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
 
         if (conn->ibc_outstanding_credits <
            IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
 
         if (conn->ibc_outstanding_credits <
            IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
index a72bdba..d352dd3 100644 (file)
@@ -116,7 +116,7 @@ kiblnd_get_idle_tx(lnet_ni_t *ni, lnet_nid_t target)
        kib_tx_t                *tx;
        kib_tx_poolset_t        *tps;
 
        kib_tx_t                *tx;
        kib_tx_poolset_t        *tps;
 
-       tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
+       tps = net->ibn_tx_ps[lnet_cpt_of_nid(target, ni)];
        node = kiblnd_pool_alloc_node(&tps->tps_poolset);
         if (node == NULL)
                 return NULL;
        node = kiblnd_pool_alloc_node(&tps->tps_poolset);
         if (node == NULL)
                 return NULL;
@@ -2243,75 +2243,75 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
                __u32 ip = ntohl(peer_addr->sin_addr.s_addr);
                CERROR("Peer's port (%pI4h:%hu) is not privileged\n",
                       &ip, ntohs(peer_addr->sin_port));
                __u32 ip = ntohl(peer_addr->sin_addr.s_addr);
                CERROR("Peer's port (%pI4h:%hu) is not privileged\n",
                       &ip, ntohs(peer_addr->sin_port));
-                goto failed;
-        }
+               goto failed;
+       }
 
 
-        if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
-                CERROR("Short connection request\n");
-                goto failed;
-        }
+       if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
+               CERROR("Short connection request\n");
+               goto failed;
+       }
 
 
-        /* Future protocol version compatibility support!  If the
-         * o2iblnd-specific protocol changes, or when LNET unifies
-         * protocols over all LNDs, the initial connection will
-         * negotiate a protocol version.  I trap this here to avoid
-         * console errors; the reject tells the peer which protocol I
-         * speak. */
-        if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
-            reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
-                goto failed;
-        if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
-            reqmsg->ibm_version != IBLND_MSG_VERSION &&
-            reqmsg->ibm_version != IBLND_MSG_VERSION_1)
-                goto failed;
-        if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
-            reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
-            reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
-                goto failed;
+       /* Future protocol version compatibility support!  If the
+        * o2iblnd-specific protocol changes, or when LNET unifies
+        * protocols over all LNDs, the initial connection will
+        * negotiate a protocol version.  I trap this here to avoid
+        * console errors; the reject tells the peer which protocol I
+        * speak. */
+       if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
+           reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
+               goto failed;
+       if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
+           reqmsg->ibm_version != IBLND_MSG_VERSION &&
+           reqmsg->ibm_version != IBLND_MSG_VERSION_1)
+               goto failed;
+       if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
+           reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
+           reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
+               goto failed;
 
 
-        rc = kiblnd_unpack_msg(reqmsg, priv_nob);
-        if (rc != 0) {
-                CERROR("Can't parse connection request: %d\n", rc);
-                goto failed;
-        }
+       rc = kiblnd_unpack_msg(reqmsg, priv_nob);
+       if (rc != 0) {
+               CERROR("Can't parse connection request: %d\n", rc);
+               goto failed;
+       }
 
 
-        nid = reqmsg->ibm_srcnid;
-        ni  = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
+       nid = reqmsg->ibm_srcnid;
+       ni  = lnet_nid2ni_addref(reqmsg->ibm_dstnid);
 
 
-        if (ni != NULL) {
-                net = (kib_net_t *)ni->ni_data;
-                rej.ibr_incarnation = net->ibn_incarnation;
-        }
+       if (ni != NULL) {
+               net = (kib_net_t *)ni->ni_data;
+               rej.ibr_incarnation = net->ibn_incarnation;
+       }
 
 
-        if (ni == NULL ||                         /* no matching net */
-            ni->ni_nid != reqmsg->ibm_dstnid ||   /* right NET, wrong NID! */
-            net->ibn_dev != ibdev) {              /* wrong device */
+       if (ni == NULL ||                         /* no matching net */
+           ni->ni_nid != reqmsg->ibm_dstnid ||   /* right NET, wrong NID! */
+           net->ibn_dev != ibdev) {              /* wrong device */
                CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): "
                CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): "
-                       "bad dst nid %s\n", libcfs_nid2str(nid),
-                       ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
-                       ibdev->ibd_ifname, ibdev->ibd_nnets,
+                      "bad dst nid %s\n", libcfs_nid2str(nid),
+                      ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
+                      ibdev->ibd_ifname, ibdev->ibd_nnets,
                        &ibdev->ibd_ifip,
                        &ibdev->ibd_ifip,
-                       libcfs_nid2str(reqmsg->ibm_dstnid));
+                      libcfs_nid2str(reqmsg->ibm_dstnid));
 
 
-                goto failed;
-        }
+               goto failed;
+       }
 
        /* check time stamp as soon as possible */
 
        /* check time stamp as soon as possible */
-        if (reqmsg->ibm_dststamp != 0 &&
-            reqmsg->ibm_dststamp != net->ibn_incarnation) {
-                CWARN("Stale connection request\n");
-                rej.ibr_why = IBLND_REJECT_CONN_STALE;
-                goto failed;
-        }
+       if (reqmsg->ibm_dststamp != 0 &&
+           reqmsg->ibm_dststamp != net->ibn_incarnation) {
+               CWARN("Stale connection request\n");
+               rej.ibr_why = IBLND_REJECT_CONN_STALE;
+               goto failed;
+       }
 
 
-        /* I can accept peer's version */
-        version = reqmsg->ibm_version;
+       /* I can accept peer's version */
+       version = reqmsg->ibm_version;
 
 
-        if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
-                CERROR("Unexpected connreq msg type: %x from %s\n",
-                       reqmsg->ibm_type, libcfs_nid2str(nid));
-                goto failed;
-        }
+       if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
+               CERROR("Unexpected connreq msg type: %x from %s\n",
+                      reqmsg->ibm_type, libcfs_nid2str(nid));
+               goto failed;
+       }
 
        if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
            kiblnd_msg_queue_size(version, ni)) {
 
        if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
            kiblnd_msg_queue_size(version, ni)) {
@@ -2575,14 +2575,14 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version,
                 break;
 
        case IBLND_REJECT_RDMA_FRAGS: {
                 break;
 
        case IBLND_REJECT_RDMA_FRAGS: {
-               struct lnet_ioctl_config_lnd_tunables *tunables;
+               struct lnet_ioctl_config_o2iblnd_tunables *tunables;
 
                if (!cp) {
                        reason = "can't negotiate max frags";
                        goto out;
                }
 
                if (!cp) {
                        reason = "can't negotiate max frags";
                        goto out;
                }
-               tunables = peer->ibp_ni->ni_lnd_tunables;
-               if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
+               tunables = &peer->ibp_ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
+               if (!tunables->lnd_map_on_demand) {
                        reason = "map_on_demand must be enabled";
                        goto out;
                }
                        reason = "map_on_demand must be enabled";
                        goto out;
                }
index 1466dd9..54a81b5 100644 (file)
@@ -164,7 +164,7 @@ kiblnd_msg_queue_size(int version, lnet_ni_t *ni)
        if (version == IBLND_MSG_VERSION_1)
                return IBLND_MSG_QUEUE_SIZE_V1;
        else if (ni)
        if (version == IBLND_MSG_VERSION_1)
                return IBLND_MSG_QUEUE_SIZE_V1;
        else if (ni)
-               return ni->ni_peertxcredits;
+               return ni->ni_net->net_tunables.lct_peer_tx_credits;
        else
                return peer_credits;
 }
        else
                return peer_credits;
 }
@@ -173,21 +173,17 @@ int
 kiblnd_tunables_setup(lnet_ni_t *ni)
 {
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
 kiblnd_tunables_setup(lnet_ni_t *ni)
 {
        struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+       struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables;
 
        /*
         * if there was no tunables specified, setup the tunables to be
         * defaulted
         */
 
        /*
         * if there was no tunables specified, setup the tunables to be
         * defaulted
         */
-       if (!ni->ni_lnd_tunables) {
-               LIBCFS_ALLOC(ni->ni_lnd_tunables,
-                            sizeof(*ni->ni_lnd_tunables));
-               if (!ni->ni_lnd_tunables)
-                       return -ENOMEM;
-
-               memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
+       if (!ni->ni_lnd_tunables_set)
+               memcpy(&ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib,
                       &default_tunables, sizeof(*tunables));
                       &default_tunables, sizeof(*tunables));
-       }
-       tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
 
        /* Current API version */
        tunables->lnd_version = 0;
 
        /* Current API version */
        tunables->lnd_version = 0;
@@ -198,35 +194,39 @@ kiblnd_tunables_setup(lnet_ni_t *ni)
                return -EINVAL;
        }
 
                return -EINVAL;
        }
 
-       if (!ni->ni_peertimeout)
-               ni->ni_peertimeout = peer_timeout;
+       net_tunables = &ni->ni_net->net_tunables;
 
 
-       if (!ni->ni_maxtxcredits)
-               ni->ni_maxtxcredits = credits;
+       if (net_tunables->lct_peer_timeout == -1)
+               net_tunables->lct_peer_timeout = peer_timeout;
 
 
-       if (!ni->ni_peertxcredits)
-               ni->ni_peertxcredits = peer_credits;
+       if (net_tunables->lct_max_tx_credits == -1)
+               net_tunables->lct_max_tx_credits = credits;
 
 
-       if (!ni->ni_peerrtrcredits)
-               ni->ni_peerrtrcredits = peer_buffer_credits;
+       if (net_tunables->lct_peer_tx_credits == -1)
+               net_tunables->lct_peer_tx_credits = peer_credits;
 
 
-       if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
-               ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
+       if (net_tunables->lct_peer_rtr_credits == -1)
+               net_tunables->lct_peer_rtr_credits = peer_buffer_credits;
 
 
-       if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
-               ni->ni_peertxcredits = IBLND_CREDITS_MAX;
+       if (net_tunables->lct_peer_tx_credits < IBLND_CREDITS_DEFAULT)
+               net_tunables->lct_peer_tx_credits = IBLND_CREDITS_DEFAULT;
 
 
-       if (ni->ni_peertxcredits > credits)
-               ni->ni_peertxcredits = credits;
+       if (net_tunables->lct_peer_tx_credits > IBLND_CREDITS_MAX)
+               net_tunables->lct_peer_tx_credits = IBLND_CREDITS_MAX;
+
+       if (net_tunables->lct_peer_tx_credits >
+           net_tunables->lct_max_tx_credits)
+               net_tunables->lct_peer_tx_credits =
+                       net_tunables->lct_max_tx_credits;
 
        if (!tunables->lnd_peercredits_hiw)
                tunables->lnd_peercredits_hiw = peer_credits_hiw;
 
 
        if (!tunables->lnd_peercredits_hiw)
                tunables->lnd_peercredits_hiw = peer_credits_hiw;
 
-       if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
-               tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
+       if (tunables->lnd_peercredits_hiw < net_tunables->lct_peer_tx_credits / 2)
+               tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits / 2;
 
 
-       if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
-               tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
+       if (tunables->lnd_peercredits_hiw >= net_tunables->lct_peer_tx_credits)
+               tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits - 1;
 
        if (tunables->lnd_map_on_demand < 0 ||
            tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
 
        if (tunables->lnd_map_on_demand < 0 ||
            tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
@@ -243,22 +243,24 @@ kiblnd_tunables_setup(lnet_ni_t *ni)
                if (tunables->lnd_map_on_demand > 0 &&
                    tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
                        tunables->lnd_concurrent_sends =
                if (tunables->lnd_map_on_demand > 0 &&
                    tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
                        tunables->lnd_concurrent_sends =
-                                               ni->ni_peertxcredits * 2;
+                                       net_tunables->lct_peer_tx_credits * 2;
                } else {
                } else {
-                       tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
+                       tunables->lnd_concurrent_sends =
+                               net_tunables->lct_peer_tx_credits;
                }
        }
 
                }
        }
 
-       if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
-               tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
+       if (tunables->lnd_concurrent_sends > net_tunables->lct_peer_tx_credits * 2)
+               tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits * 2;
 
 
-       if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
-               tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
+       if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits / 2)
+               tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits / 2;
 
 
-       if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
+       if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits) {
                CWARN("Concurrent sends %d is lower than message "
                      "queue size: %d, performance may drop slightly.\n",
                CWARN("Concurrent sends %d is lower than message "
                      "queue size: %d, performance may drop slightly.\n",
-                     tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
+                     tunables->lnd_concurrent_sends,
+                     net_tunables->lct_peer_tx_credits);
        }
 
        if (!tunables->lnd_fmr_pool_size)
        }
 
        if (!tunables->lnd_fmr_pool_size)
index 33c34cd..bed371c 100644 (file)
@@ -98,7 +98,7 @@ ksocknal_destroy_route (ksock_route_t *route)
 static int
 ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
 {
 static int
 ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
 {
-       int             cpt = lnet_cpt_of_nid(id.nid);
+       int             cpt = lnet_cpt_of_nid(id.nid, ni);
        ksock_net_t     *net = ni->ni_data;
        ksock_peer_t    *peer;
 
        ksock_net_t     *net = ni->ni_data;
        ksock_peer_t    *peer;
 
@@ -1117,7 +1117,7 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
         LASSERT (conn->ksnc_proto != NULL);
         LASSERT (peerid.nid != LNET_NID_ANY);
 
         LASSERT (conn->ksnc_proto != NULL);
         LASSERT (peerid.nid != LNET_NID_ANY);
 
-       cpt = lnet_cpt_of_nid(peerid.nid);
+       cpt = lnet_cpt_of_nid(peerid.nid, ni);
 
         if (active) {
                 ksocknal_peer_addref(peer);
 
         if (active) {
                 ksocknal_peer_addref(peer);
@@ -2775,7 +2775,7 @@ ksocknal_startup (lnet_ni_t *ni)
         int           rc;
         int           i;
 
         int           rc;
         int           i;
 
-        LASSERT (ni->ni_lnd == &the_ksocklnd);
+        LASSERT (ni->ni_net->net_lnd == &the_ksocklnd);
 
         if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
                 rc = ksocknal_base_startup();
 
         if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
                 rc = ksocknal_base_startup();
@@ -2790,10 +2790,17 @@ ksocknal_startup (lnet_ni_t *ni)
        spin_lock_init(&net->ksnn_lock);
         net->ksnn_incarnation = ksocknal_new_incarnation();
         ni->ni_data = net;
        spin_lock_init(&net->ksnn_lock);
         net->ksnn_incarnation = ksocknal_new_incarnation();
         ni->ni_data = net;
-        ni->ni_peertimeout    = *ksocknal_tunables.ksnd_peertimeout;
-        ni->ni_maxtxcredits   = *ksocknal_tunables.ksnd_credits;
-        ni->ni_peertxcredits  = *ksocknal_tunables.ksnd_peertxcredits;
-        ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
+       if (!ni->ni_net->net_tunables_set) {
+               ni->ni_net->net_tunables.lct_peer_timeout =
+                       *ksocknal_tunables.ksnd_peertimeout;
+               ni->ni_net->net_tunables.lct_max_tx_credits =
+                       *ksocknal_tunables.ksnd_credits;
+               ni->ni_net->net_tunables.lct_peer_tx_credits =
+                       *ksocknal_tunables.ksnd_peertxcredits;
+               ni->ni_net->net_tunables.lct_peer_rtr_credits =
+                       *ksocknal_tunables.ksnd_peerrtrcredits;
+               ni->ni_net->net_tunables_set = true;
+       }
 
         if (ni->ni_interfaces[0] == NULL) {
                 rc = ksocknal_enumerate_interfaces(net);
 
         if (ni->ni_interfaces[0] == NULL) {
                 rc = ksocknal_enumerate_interfaces(net);
index 4de013a..8230ceb 100644 (file)
@@ -310,8 +310,8 @@ lnet_accept(struct socket *sock, __u32 magic)
        if (flip)
                __swab64s(&cr.acr_nid);
 
        if (flip)
                __swab64s(&cr.acr_nid);
 
-       ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
-       if (ni == NULL ||               /* no matching net */
+       ni = lnet_nid2ni_addref(cr.acr_nid);
+       if (ni == NULL ||               /* no matching net */
            ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
                if (ni != NULL)
                        lnet_ni_decref(ni);
            ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
                if (ni != NULL)
                        lnet_ni_decref(ni);
@@ -321,7 +321,7 @@ lnet_accept(struct socket *sock, __u32 magic)
                return -EPERM;
        }
 
                return -EPERM;
        }
 
-       if (ni->ni_lnd->lnd_accept == NULL) {
+       if (ni->ni_net->net_lnd->lnd_accept == NULL) {
                /* This catches a request for the loopback LND */
                lnet_ni_decref(ni);
                LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h "
                /* This catches a request for the loopback LND */
                lnet_ni_decref(ni);
                LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h "
@@ -333,7 +333,7 @@ lnet_accept(struct socket *sock, __u32 magic)
        CDEBUG(D_NET, "Accept %s from %pI4h\n",
               libcfs_nid2str(cr.acr_nid), &peer_ip);
 
        CDEBUG(D_NET, "Accept %s from %pI4h\n",
               libcfs_nid2str(cr.acr_nid), &peer_ip);
 
-       rc = ni->ni_lnd->lnd_accept(ni, sock);
+       rc = ni->ni_net->net_lnd->lnd_accept(ni, sock);
 
        lnet_ni_decref(ni);
        return rc;
 
        lnet_ni_decref(ni);
        return rc;
@@ -476,7 +476,7 @@ lnet_acceptor_start(void)
        if (rc <= 0)
                return rc;
 
        if (rc <= 0)
                return rc;
 
-       if (lnet_count_acceptor_nis() == 0)  /* not required */
+       if (lnet_count_acceptor_nets() == 0)  /* not required */
                return 0;
 
        task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
                return 0;
 
        task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
index 270629d..4910d3a 100644 (file)
@@ -57,6 +57,11 @@ static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT;
 module_param(rnet_htable_size, int, 0444);
 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
 
 module_param(rnet_htable_size, int, 0444);
 MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
 
+static int use_tcp_bonding = false;
+module_param(use_tcp_bonding, int, 0444);
+MODULE_PARM_DESC(use_tcp_bonding,
+                "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
+
 static int lnet_ping(lnet_process_id_t id, signed long timeout,
                     lnet_process_id_t __user *ids, int n_ids);
 
 static int lnet_ping(lnet_process_id_t id, signed long timeout,
                     lnet_process_id_t __user *ids, int n_ids);
 
@@ -584,9 +589,7 @@ lnet_prepare(lnet_pid_t requested_pid)
        the_lnet.ln_pid = requested_pid;
 
        INIT_LIST_HEAD(&the_lnet.ln_test_peers);
        the_lnet.ln_pid = requested_pid;
 
        INIT_LIST_HEAD(&the_lnet.ln_test_peers);
-       INIT_LIST_HEAD(&the_lnet.ln_nis);
-       INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
-       INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
+       INIT_LIST_HEAD(&the_lnet.ln_nets);
        INIT_LIST_HEAD(&the_lnet.ln_routers);
        INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
        INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
        INIT_LIST_HEAD(&the_lnet.ln_routers);
        INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
        INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
@@ -667,9 +670,7 @@ lnet_unprepare (void)
 
        LASSERT(the_lnet.ln_refcount == 0);
        LASSERT(list_empty(&the_lnet.ln_test_peers));
 
        LASSERT(the_lnet.ln_refcount == 0);
        LASSERT(list_empty(&the_lnet.ln_test_peers));
-       LASSERT(list_empty(&the_lnet.ln_nis));
-       LASSERT(list_empty(&the_lnet.ln_nis_cpt));
-       LASSERT(list_empty(&the_lnet.ln_nis_zombie));
+       LASSERT(list_empty(&the_lnet.ln_nets));
 
        lnet_portals_destroy();
 
 
        lnet_portals_destroy();
 
@@ -700,18 +701,17 @@ lnet_unprepare (void)
 }
 
 lnet_ni_t  *
 }
 
 lnet_ni_t  *
-lnet_net2ni_locked(__u32 net, int cpt)
+lnet_net2ni_locked(__u32 net_id, int cpt)
 {
 {
-       struct list_head *tmp;
-       lnet_ni_t        *ni;
+       struct lnet_ni   *ni;
+       struct lnet_net  *net;
 
        LASSERT(cpt != LNET_LOCK_EX);
 
 
        LASSERT(cpt != LNET_LOCK_EX);
 
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
-
-               if (LNET_NIDNET(ni->ni_nid) == net) {
-                       lnet_ni_addref_locked(ni, cpt);
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               if (net->net_id == net_id) {
+                       ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+                                       ni_netlist);
                        return ni;
                }
        }
                        return ni;
                }
        }
@@ -732,6 +732,19 @@ lnet_net2ni(__u32 net)
 }
 EXPORT_SYMBOL(lnet_net2ni);
 
 }
 EXPORT_SYMBOL(lnet_net2ni);
 
+struct lnet_net *
+lnet_get_net_locked(__u32 net_id)
+{
+       struct lnet_net  *net;
+
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               if (net->net_id == net_id)
+                       return net;
+       }
+
+       return NULL;
+}
+
 static unsigned int
 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
 {
 static unsigned int
 lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
 {
@@ -752,31 +765,41 @@ lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
 }
 
 int
 }
 
 int
-lnet_cpt_of_nid_locked(lnet_nid_t nid)
+lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
 {
 {
-       struct lnet_ni *ni;
+       struct lnet_net *net;
 
        /* must called with hold of lnet_net_lock */
        if (LNET_CPT_NUMBER == 1)
                return 0; /* the only one */
 
 
        /* must called with hold of lnet_net_lock */
        if (LNET_CPT_NUMBER == 1)
                return 0; /* the only one */
 
-       /* take lnet_net_lock(any) would be OK */
-       if (!list_empty(&the_lnet.ln_nis_cpt)) {
-               list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
-                       if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
-                               continue;
+       /*
+        * If NI is provided then use the CPT identified in the NI cpt
+        * list if one exists. If one doesn't exist, then that NI is
+        * associated with all CPTs and it follows that the net it belongs
+        * to is implicitly associated with all CPTs, so just hash the nid
+        * and return that.
+        */
+       if (ni != NULL) {
+               if (ni->ni_cpts != NULL)
+                       return ni->ni_cpts[lnet_nid_cpt_hash(nid,
+                                                            ni->ni_ncpts)];
+               else
+                       return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
+       }
 
 
-                       LASSERT(ni->ni_cpts != NULL);
-                       return ni->ni_cpts[lnet_nid_cpt_hash
-                                          (nid, ni->ni_ncpts)];
-               }
+       /* no NI provided so look at the net */
+       net = lnet_get_net_locked(LNET_NIDNET(nid));
+
+       if (net != NULL && net->net_cpts != NULL) {
+               return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
        }
 
        return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
 }
 
 int
        }
 
        return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
 }
 
 int
-lnet_cpt_of_nid(lnet_nid_t nid)
+lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
 {
        int     cpt;
        int     cpt2;
 {
        int     cpt;
        int     cpt2;
@@ -784,11 +807,10 @@ lnet_cpt_of_nid(lnet_nid_t nid)
        if (LNET_CPT_NUMBER == 1)
                return 0; /* the only one */
 
        if (LNET_CPT_NUMBER == 1)
                return 0; /* the only one */
 
-       if (list_empty(&the_lnet.ln_nis_cpt))
-               return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-
        cpt = lnet_net_lock_current();
        cpt = lnet_net_lock_current();
-       cpt2 = lnet_cpt_of_nid_locked(nid);
+
+       cpt2 = lnet_cpt_of_nid_locked(nid, ni);
+
        lnet_net_unlock(cpt);
 
        return cpt2;
        lnet_net_unlock(cpt);
 
        return cpt2;
@@ -796,42 +818,66 @@ lnet_cpt_of_nid(lnet_nid_t nid)
 EXPORT_SYMBOL(lnet_cpt_of_nid);
 
 int
 EXPORT_SYMBOL(lnet_cpt_of_nid);
 
 int
-lnet_islocalnet(__u32 net)
+lnet_islocalnet(__u32 net_id)
 {
 {
-       struct lnet_ni  *ni;
+       struct lnet_net *net;
        int             cpt;
        int             cpt;
+       bool            local;
 
        cpt = lnet_net_lock_current();
 
 
        cpt = lnet_net_lock_current();
 
-       ni = lnet_net2ni_locked(net, cpt);
-       if (ni != NULL)
-               lnet_ni_decref_locked(ni, cpt);
+       net = lnet_get_net_locked(net_id);
+
+       local = net != NULL;
 
        lnet_net_unlock(cpt);
 
 
        lnet_net_unlock(cpt);
 
-       return ni != NULL;
+       return local;
+}
+
+bool
+lnet_is_ni_healthy_locked(struct lnet_ni *ni)
+{
+       if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
+           ni->ni_state == LNET_NI_STATE_DEGRADED)
+               return true;
+
+       return false;
 }
 
 lnet_ni_t  *
 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
 {
 }
 
 lnet_ni_t  *
 lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
 {
+       struct lnet_net  *net;
        struct lnet_ni   *ni;
        struct lnet_ni   *ni;
-       struct list_head *tmp;
 
        LASSERT(cpt != LNET_LOCK_EX);
 
 
        LASSERT(cpt != LNET_LOCK_EX);
 
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
-
-               if (ni->ni_nid == nid) {
-                       lnet_ni_addref_locked(ni, cpt);
-                       return ni;
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       if (ni->ni_nid == nid)
+                               return ni;
                }
        }
 
        return NULL;
 }
 
                }
        }
 
        return NULL;
 }
 
+lnet_ni_t *
+lnet_nid2ni_addref(lnet_nid_t nid)
+{
+       lnet_ni_t *ni;
+
+       lnet_net_lock(0);
+       ni = lnet_nid2ni_locked(nid, 0);
+       if (ni)
+               lnet_ni_addref_locked(ni, 0);
+       lnet_net_unlock(0);
+
+       return ni;
+}
+EXPORT_SYMBOL(lnet_nid2ni_addref);
+
 int
 lnet_islocalnid(lnet_nid_t nid)
 {
 int
 lnet_islocalnid(lnet_nid_t nid)
 {
@@ -840,27 +886,24 @@ lnet_islocalnid(lnet_nid_t nid)
 
        cpt = lnet_net_lock_current();
        ni = lnet_nid2ni_locked(nid, cpt);
 
        cpt = lnet_net_lock_current();
        ni = lnet_nid2ni_locked(nid, cpt);
-       if (ni != NULL)
-               lnet_ni_decref_locked(ni, cpt);
        lnet_net_unlock(cpt);
 
        return ni != NULL;
 }
 
 int
        lnet_net_unlock(cpt);
 
        return ni != NULL;
 }
 
 int
-lnet_count_acceptor_nis (void)
+lnet_count_acceptor_nets(void)
 {
        /* Return the # of NIs that need the acceptor. */
        int              count = 0;
 {
        /* Return the # of NIs that need the acceptor. */
        int              count = 0;
-       struct list_head *tmp;
-       struct lnet_ni   *ni;
+       struct lnet_net  *net;
        int              cpt;
 
        cpt = lnet_net_lock_current();
        int              cpt;
 
        cpt = lnet_net_lock_current();
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
-
-               if (ni->ni_lnd->lnd_accept != NULL)
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               /* all socklnd type networks should have the acceptor
+                * thread started */
+               if (net->net_lnd->lnd_accept != NULL)
                        count++;
        }
 
                        count++;
        }
 
@@ -891,15 +934,30 @@ lnet_ping_info_create(int num_ni)
 }
 
 static inline int
 }
 
 static inline int
+lnet_get_net_ni_count_locked(struct lnet_net *net)
+{
+       struct lnet_ni  *ni;
+       int             count = 0;
+
+       list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+               count++;
+
+       return count;
+}
+
+static inline int
 lnet_get_ni_count(void)
 {
 lnet_get_ni_count(void)
 {
-       struct lnet_ni *ni;
-       int            count = 0;
+       struct lnet_ni  *ni;
+       struct lnet_net *net;
+       int             count = 0;
 
        lnet_net_lock(0);
 
 
        lnet_net_lock(0);
 
-       list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
-               count++;
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+                       count++;
+       }
 
        lnet_net_unlock(0);
 
 
        lnet_net_unlock(0);
 
@@ -917,14 +975,17 @@ lnet_ping_info_free(struct lnet_ping_info *pinfo)
 static void
 lnet_ping_info_destroy(void)
 {
 static void
 lnet_ping_info_destroy(void)
 {
+       struct lnet_net *net;
        struct lnet_ni  *ni;
 
        lnet_net_lock(LNET_LOCK_EX);
 
        struct lnet_ni  *ni;
 
        lnet_net_lock(LNET_LOCK_EX);
 
-       list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-               lnet_ni_lock(ni);
-               ni->ni_status = NULL;
-               lnet_ni_unlock(ni);
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       lnet_ni_lock(ni);
+                       ni->ni_status = NULL;
+                       lnet_ni_unlock(ni);
+               }
        }
 
        lnet_ping_info_free(the_lnet.ln_ping_info);
        }
 
        lnet_ping_info_free(the_lnet.ln_ping_info);
@@ -1029,24 +1090,29 @@ static void
 lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
 {
        int                     i;
 lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
 {
        int                     i;
-       lnet_ni_t               *ni;
+       struct lnet_ni          *ni;
+       struct lnet_net         *net;
        struct lnet_ni_status *ns;
 
        i = 0;
        struct lnet_ni_status *ns;
 
        i = 0;
-       list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-               LASSERT(i < ping_info->pi_nnis);
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       LASSERT(i < ping_info->pi_nnis);
 
 
-               ns = &ping_info->pi_ni[i];
+                       ns = &ping_info->pi_ni[i];
 
 
-               ns->ns_nid = ni->ni_nid;
+                       ns->ns_nid = ni->ni_nid;
 
 
-               lnet_ni_lock(ni);
-               ns->ns_status = (ni->ni_status != NULL) ?
-                               ni->ni_status->ns_status : LNET_NI_STATUS_UP;
-               ni->ni_status = ns;
-               lnet_ni_unlock(ni);
+                       lnet_ni_lock(ni);
+                       ns->ns_status = (ni->ni_status != NULL) ?
+                                       ni->ni_status->ns_status :
+                                               LNET_NI_STATUS_UP;
+                       ni->ni_status = ns;
+                       lnet_ni_unlock(ni);
+
+                       i++;
+               }
 
 
-               i++;
        }
 }
 
        }
 }
 
@@ -1101,11 +1167,11 @@ lnet_ni_tq_credits(lnet_ni_t *ni)
        LASSERT(ni->ni_ncpts >= 1);
 
        if (ni->ni_ncpts == 1)
        LASSERT(ni->ni_ncpts >= 1);
 
        if (ni->ni_ncpts == 1)
-               return ni->ni_maxtxcredits;
+               return ni->ni_net->net_tunables.lct_max_tx_credits;
 
 
-       credits = ni->ni_maxtxcredits / ni->ni_ncpts;
-       credits = max(credits, 8 * ni->ni_peertxcredits);
-       credits = min(credits, ni->ni_maxtxcredits);
+       credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
+       credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
+       credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
 
        return credits;
 }
 
        return credits;
 }
@@ -1119,37 +1185,43 @@ lnet_ni_unlink_locked(lnet_ni_t *ni)
        }
 
        /* move it to zombie list and nobody can find it anymore */
        }
 
        /* move it to zombie list and nobody can find it anymore */
-       LASSERT(!list_empty(&ni->ni_list));
-       list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
-       lnet_ni_decref_locked(ni, 0);   /* drop ln_nis' ref */
+       LASSERT(!list_empty(&ni->ni_netlist));
+       list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
+       lnet_ni_decref_locked(ni, 0);
 }
 
 static void
 }
 
 static void
-lnet_clear_zombies_nis_locked(void)
+lnet_clear_zombies_nis_locked(struct lnet_net *net)
 {
        int             i;
        int             islo;
        lnet_ni_t       *ni;
 {
        int             i;
        int             islo;
        lnet_ni_t       *ni;
+       struct list_head *zombie_list = &net->net_ni_zombie;
 
 
-       /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
-        * and shut them down in guaranteed thread context */
+       /*
+        * Now wait for the NIs I just nuked to show up on the zombie
+        * list and shut them down in guaranteed thread context
+        */
        i = 2;
        i = 2;
-       while (!list_empty(&the_lnet.ln_nis_zombie)) {
+       while (!list_empty(zombie_list)) {
                int     *ref;
                int     j;
 
                int     *ref;
                int     j;
 
-               ni = list_entry(the_lnet.ln_nis_zombie.next,
-                               lnet_ni_t, ni_list);
-               list_del_init(&ni->ni_list);
+               ni = list_entry(zombie_list->next,
+                               lnet_ni_t, ni_netlist);
+               list_del_init(&ni->ni_netlist);
+               /* the ni should be in deleting state. If it's not it's
+                * a bug */
+               LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
                cfs_percpt_for_each(ref, j, ni->ni_refs) {
                        if (*ref == 0)
                                continue;
                        /* still busy, add it back to zombie list */
                cfs_percpt_for_each(ref, j, ni->ni_refs) {
                        if (*ref == 0)
                                continue;
                        /* still busy, add it back to zombie list */
-                       list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
+                       list_add(&ni->ni_netlist, zombie_list);
                        break;
                }
 
                        break;
                }
 
-               if (!list_empty(&ni->ni_list)) {
+               if (!list_empty(&ni->ni_netlist)) {
                        lnet_net_unlock(LNET_LOCK_EX);
                        ++i;
                        if ((i & (-i)) == i) {
                        lnet_net_unlock(LNET_LOCK_EX);
                        ++i;
                        if ((i & (-i)) == i) {
@@ -1163,16 +1235,12 @@ lnet_clear_zombies_nis_locked(void)
                        continue;
                }
 
                        continue;
                }
 
-               ni->ni_lnd->lnd_refcount--;
                lnet_net_unlock(LNET_LOCK_EX);
 
                lnet_net_unlock(LNET_LOCK_EX);
 
-               islo = ni->ni_lnd->lnd_type == LOLND;
+               islo = ni->ni_net->net_lnd->lnd_type == LOLND;
 
                LASSERT(!in_interrupt());
 
                LASSERT(!in_interrupt());
-               (ni->ni_lnd->lnd_shutdown)(ni);
-
-               /* can't deref lnd anymore now; it might have unregistered
-                * itself...  */
+               (net->net_lnd->lnd_shutdown)(ni);
 
                if (!islo)
                        CDEBUG(D_LNI, "Removed LNI %s\n",
 
                if (!islo)
                        CDEBUG(D_LNI, "Removed LNI %s\n",
@@ -1184,60 +1252,15 @@ lnet_clear_zombies_nis_locked(void)
        }
 }
 
        }
 }
 
-static void
-lnet_shutdown_lndnis(void)
-{
-       int             i;
-       lnet_ni_t       *ni;
-
-       /* NB called holding the global mutex */
-
-       /* All quiet on the API front */
-       LASSERT(!the_lnet.ln_shutdown);
-       LASSERT(the_lnet.ln_refcount == 0);
-       LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
-       lnet_net_lock(LNET_LOCK_EX);
-       the_lnet.ln_shutdown = 1;       /* flag shutdown */
-
-       /* Unlink NIs from the global table */
-       while (!list_empty(&the_lnet.ln_nis)) {
-               ni = list_entry(the_lnet.ln_nis.next,
-                               lnet_ni_t, ni_list);
-               lnet_ni_unlink_locked(ni);
-       }
-
-       /* Drop the cached loopback NI. */
-       if (the_lnet.ln_loni != NULL) {
-               lnet_ni_decref_locked(the_lnet.ln_loni, 0);
-               the_lnet.ln_loni = NULL;
-       }
-
-       lnet_net_unlock(LNET_LOCK_EX);
-
-       /* Clear lazy portals and drop delayed messages which hold refs
-        * on their lnet_msg_t::msg_rxpeer */
-       for (i = 0; i < the_lnet.ln_nportals; i++)
-               LNetClearLazyPortal(i);
-
-       /* Clear the peer table and wait for all peers to go (they hold refs on
-        * their NIs) */
-       lnet_peer_tables_cleanup(NULL);
-
-       lnet_net_lock(LNET_LOCK_EX);
-
-       lnet_clear_zombies_nis_locked();
-       the_lnet.ln_shutdown = 0;
-       lnet_net_unlock(LNET_LOCK_EX);
-}
-
 /* shutdown down the NI and release refcount */
 static void
 lnet_shutdown_lndni(struct lnet_ni *ni)
 {
        int i;
 /* shutdown down the NI and release refcount */
 static void
 lnet_shutdown_lndni(struct lnet_ni *ni)
 {
        int i;
+       struct lnet_net *net = ni->ni_net;
 
        lnet_net_lock(LNET_LOCK_EX);
 
        lnet_net_lock(LNET_LOCK_EX);
+       ni->ni_state = LNET_NI_STATE_DELETING;
        lnet_ni_unlink_locked(ni);
        lnet_net_unlock(LNET_LOCK_EX);
 
        lnet_ni_unlink_locked(ni);
        lnet_net_unlock(LNET_LOCK_EX);
 
@@ -1249,147 +1272,131 @@ lnet_shutdown_lndni(struct lnet_ni *ni)
        lnet_peer_tables_cleanup(ni);
 
        lnet_net_lock(LNET_LOCK_EX);
        lnet_peer_tables_cleanup(ni);
 
        lnet_net_lock(LNET_LOCK_EX);
-       lnet_clear_zombies_nis_locked();
+       lnet_clear_zombies_nis_locked(net);
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
        lnet_net_unlock(LNET_LOCK_EX);
 }
 
-static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
+static void
+lnet_shutdown_lndnet(struct lnet_net *net)
 {
 {
-       struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
-       int                     rc = -EINVAL;
-       __u32                   lnd_type;
-       lnd_t                   *lnd;
-       struct lnet_tx_queue    *tq;
-       int                     i;
+       struct lnet_ni *ni;
 
 
-       lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
+       lnet_net_lock(LNET_LOCK_EX);
 
 
-       LASSERT(libcfs_isknown_lnd(lnd_type));
+       net->net_state = LNET_NET_STATE_DELETING;
 
 
-       if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
-           lnd_type == IIBLND || lnd_type == VIBLND) {
-               CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
-               goto failed0;
+       list_del_init(&net->net_list);
+
+       while (!list_empty(&net->net_ni_list)) {
+               ni = list_entry(net->net_ni_list.next,
+                               lnet_ni_t, ni_netlist);
+               lnet_net_unlock(LNET_LOCK_EX);
+               lnet_shutdown_lndni(ni);
+               lnet_net_lock(LNET_LOCK_EX);
        }
 
        }
 
-       /* Make sure this new NI is unique. */
-       lnet_net_lock(LNET_LOCK_EX);
-       rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
+       /*
+        * decrement ref count on lnd only when the entire network goes
+        * away
+        */
+       net->net_lnd->lnd_refcount--;
+
        lnet_net_unlock(LNET_LOCK_EX);
 
        lnet_net_unlock(LNET_LOCK_EX);
 
-       if (!rc) {
-               if (lnd_type == LOLND) {
-                       lnet_ni_free(ni);
-                       return 0;
-               }
+       lnet_net_free(net);
+}
 
 
-               CERROR("Net %s is not unique\n",
-                      libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+static void
+lnet_shutdown_lndnets(void)
+{
+       struct lnet_net *net;
 
 
-               rc = -EEXIST;
-               goto failed0;
-       }
+       /* NB called holding the global mutex */
 
 
-       mutex_lock(&the_lnet.ln_lnd_mutex);
-       lnd = lnet_find_lnd_by_type(lnd_type);
+       /* All quiet on the API front */
+       LASSERT(!the_lnet.ln_shutdown);
+       LASSERT(the_lnet.ln_refcount == 0);
 
 
-       if (lnd == NULL) {
-               mutex_unlock(&the_lnet.ln_lnd_mutex);
-               rc = request_module("%s", libcfs_lnd2modname(lnd_type));
-               mutex_lock(&the_lnet.ln_lnd_mutex);
+       lnet_net_lock(LNET_LOCK_EX);
+       the_lnet.ln_shutdown = 1;       /* flag shutdown */
 
 
-               lnd = lnet_find_lnd_by_type(lnd_type);
-               if (lnd == NULL) {
-                       mutex_unlock(&the_lnet.ln_lnd_mutex);
-                       CERROR("Can't load LND %s, module %s, rc=%d\n",
-                              libcfs_lnd2str(lnd_type),
-                              libcfs_lnd2modname(lnd_type), rc);
-#ifndef HAVE_MODULE_LOADING_SUPPORT
-                       LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
-                                          "compiled with kernel module "
-                                          "loading support.");
-#endif
-                       rc = -EINVAL;
-                       goto failed0;
-               }
+       while (!list_empty(&the_lnet.ln_nets)) {
+               /*
+                * move the nets to the zombie list to avoid them being
+                * picked up for new work. LONET is also included in the
+                * Nets that will be moved to the zombie list
+                */
+               net = list_entry(the_lnet.ln_nets.next,
+                                struct lnet_net, net_list);
+               list_move(&net->net_list, &the_lnet.ln_net_zombie);
        }
 
        }
 
-       lnet_net_lock(LNET_LOCK_EX);
-       lnd->lnd_refcount++;
+       /* Drop the cached loopback Net. */
+       if (the_lnet.ln_loni != NULL) {
+               lnet_ni_decref_locked(the_lnet.ln_loni, 0);
+               the_lnet.ln_loni = NULL;
+       }
        lnet_net_unlock(LNET_LOCK_EX);
 
        lnet_net_unlock(LNET_LOCK_EX);
 
-       ni->ni_lnd = lnd;
+       /* iterate through the net zombie list and delete each net */
+       while (!list_empty(&the_lnet.ln_net_zombie)) {
+               net = list_entry(the_lnet.ln_net_zombie.next,
+                                struct lnet_net, net_list);
+               lnet_shutdown_lndnet(net);
+       }
 
 
-       if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
-               lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+       lnet_net_lock(LNET_LOCK_EX);
+       the_lnet.ln_shutdown = 0;
+       lnet_net_unlock(LNET_LOCK_EX);
+}
 
 
-       if (lnd_tunables != NULL) {
-               LIBCFS_ALLOC(ni->ni_lnd_tunables,
-                            sizeof(*ni->ni_lnd_tunables));
-               if (ni->ni_lnd_tunables == NULL) {
-                       mutex_unlock(&the_lnet.ln_lnd_mutex);
-                       rc = -ENOMEM;
-                       goto failed0;
-               }
-               memcpy(ni->ni_lnd_tunables, lnd_tunables,
-                      sizeof(*ni->ni_lnd_tunables));
-       }
+static int
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
+{
+       int                     rc = -EINVAL;
+       struct lnet_tx_queue    *tq;
+       int                     i;
+       struct lnet_net         *net = ni->ni_net;
 
 
-       /* If given some LND tunable parameters, parse those now to
-        * override the values in the NI structure. */
-       if (conf) {
-               if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
-                       ni->ni_peerrtrcredits =
-                               conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
-               if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
-                       ni->ni_peertimeout =
-                               conf->cfg_config_u.cfg_net.net_peer_timeout;
-               if (conf->cfg_config_u.cfg_net.net_peer_tx_credits >= 0)
-                       ni->ni_peertxcredits =
-                               conf->cfg_config_u.cfg_net.net_peer_tx_credits;
-               if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
-                       ni->ni_maxtxcredits =
-                               conf->cfg_config_u.cfg_net.net_max_tx_credits;
+       mutex_lock(&the_lnet.ln_lnd_mutex);
+
+       if (tun) {
+               memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
+               ni->ni_lnd_tunables_set = true;
        }
 
        }
 
-       rc = (lnd->lnd_startup)(ni);
+       rc = (net->net_lnd->lnd_startup)(ni);
 
        mutex_unlock(&the_lnet.ln_lnd_mutex);
 
        if (rc != 0) {
                LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
 
        mutex_unlock(&the_lnet.ln_lnd_mutex);
 
        if (rc != 0) {
                LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
-                                  rc, libcfs_lnd2str(lnd->lnd_type));
+                                  rc, libcfs_lnd2str(net->net_lnd->lnd_type));
                lnet_net_lock(LNET_LOCK_EX);
                lnet_net_lock(LNET_LOCK_EX);
-               lnd->lnd_refcount--;
+               net->net_lnd->lnd_refcount--;
                lnet_net_unlock(LNET_LOCK_EX);
                goto failed0;
        }
 
                lnet_net_unlock(LNET_LOCK_EX);
                goto failed0;
        }
 
-       LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
-
-       lnet_net_lock(LNET_LOCK_EX);
-       /* refcount for ln_nis */
-       lnet_ni_addref_locked(ni, 0);
-       list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
-       if (ni->ni_cpts != NULL) {
-               lnet_ni_addref_locked(ni, 0);
-               list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
-       }
-
-       lnet_net_unlock(LNET_LOCK_EX);
+       ni->ni_state = LNET_NI_STATE_ACTIVE;
 
 
-       if (lnd->lnd_type == LOLND) {
+       /* We keep a reference on the loopback net through the loopback NI */
+       if (net->net_lnd->lnd_type == LOLND) {
                lnet_ni_addref(ni);
                LASSERT(the_lnet.ln_loni == NULL);
                the_lnet.ln_loni = ni;
                lnet_ni_addref(ni);
                LASSERT(the_lnet.ln_loni == NULL);
                the_lnet.ln_loni = ni;
+               ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
+               ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
+               ni->ni_net->net_tunables.lct_max_tx_credits = 0;
+               ni->ni_net->net_tunables.lct_peer_timeout = 0;
                return 0;
        }
 
                return 0;
        }
 
-       if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) {
+       if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
+           ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
                LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
                LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
-                                  libcfs_lnd2str(lnd->lnd_type),
-                                  ni->ni_peertxcredits == 0 ?
+                                  libcfs_lnd2str(net->net_lnd->lnd_type),
+                                  ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
                                        "" : "per-peer ");
                /* shutdown the NI since if we get here then it must've already
                 * been started
                                        "" : "per-peer ");
                /* shutdown the NI since if we get here then it must've already
                 * been started
@@ -1405,9 +1412,11 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
        }
 
        CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
        }
 
        CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
-               libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
+               libcfs_nid2str(ni->ni_nid),
+               ni->ni_net->net_tunables.lct_peer_tx_credits,
                lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
                lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
-               ni->ni_peerrtrcredits, ni->ni_peertimeout);
+               ni->ni_net->net_tunables.lct_peer_rtr_credits,
+               ni->ni_net->net_tunables.lct_peer_timeout);
 
        return 0;
 failed0:
 
        return 0;
 failed0:
@@ -1416,26 +1425,216 @@ failed0:
 }
 
 static int
 }
 
 static int
-lnet_startup_lndnis(struct list_head *nilist)
+lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
 {
        struct lnet_ni          *ni;
 {
        struct lnet_ni          *ni;
+       struct lnet_net         *net_l = NULL;
+       struct list_head        local_ni_list;
        int                     rc;
        int                     ni_count = 0;
        int                     rc;
        int                     ni_count = 0;
+       __u32                   lnd_type;
+       lnd_t                   *lnd;
+       int                     peer_timeout =
+               net->net_tunables.lct_peer_timeout;
+       int                     maxtxcredits =
+               net->net_tunables.lct_max_tx_credits;
+       int                     peerrtrcredits =
+               net->net_tunables.lct_peer_rtr_credits;
+
+       INIT_LIST_HEAD(&local_ni_list);
+
+       /*
+        * make sure that this net is unique. If it isn't then
+        * we are adding interfaces to an already existing network, and
+        * 'net' is just a convenient way to pass in the list.
+        * if it is unique we need to find the LND and load it if
+        * necessary.
+        */
+       if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
+               lnd_type = LNET_NETTYP(net->net_id);
+
+               LASSERT(libcfs_isknown_lnd(lnd_type));
+
+               if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
+                   lnd_type == IIBLND || lnd_type == VIBLND) {
+                       CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
+                       rc = -EINVAL;
+                       goto failed0;
+               }
+
+               mutex_lock(&the_lnet.ln_lnd_mutex);
+               lnd = lnet_find_lnd_by_type(lnd_type);
+
+               if (lnd == NULL) {
+                       mutex_unlock(&the_lnet.ln_lnd_mutex);
+                       rc = request_module("%s", libcfs_lnd2modname(lnd_type));
+                       mutex_lock(&the_lnet.ln_lnd_mutex);
+
+                       lnd = lnet_find_lnd_by_type(lnd_type);
+                       if (lnd == NULL) {
+                               mutex_unlock(&the_lnet.ln_lnd_mutex);
+                               CERROR("Can't load LND %s, module %s, rc=%d\n",
+                               libcfs_lnd2str(lnd_type),
+                               libcfs_lnd2modname(lnd_type), rc);
+#ifndef HAVE_MODULE_LOADING_SUPPORT
+                               LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
+                                               "compiled with kernel module "
+                                               "loading support.");
+#endif
+                               rc = -EINVAL;
+                               goto failed0;
+                       }
+               }
+
+               lnet_net_lock(LNET_LOCK_EX);
+               lnd->lnd_refcount++;
+               lnet_net_unlock(LNET_LOCK_EX);
+
+               net->net_lnd = lnd;
+
+               mutex_unlock(&the_lnet.ln_lnd_mutex);
+
+               net_l = net;
+       }
+
+       /*
+        * net_l: if the network being added is unique then net_l
+        *        will point to that network
+        *        if the network being added is not unique then
+        *        net_l points to the existing network.
+        *
+        * When we enter the loop below, we'll pick NIs off he
+        * network beign added and start them up, then add them to
+        * a local ni list. Once we've successfully started all
+        * the NIs then we join the local NI list (of started up
+        * networks) with the net_l->net_ni_list, which should
+        * point to the correct network to add the new ni list to
+        *
+        * If any of the new NIs fail to start up, then we want to
+        * iterate through the local ni list, which should include
+        * any NIs which were successfully started up, and shut
+        * them down.
+        *
+        * After than we want to delete the network being added,
+        * to avoid a memory leak.
+        */
+
+       /*
+        * When a network uses TCP bonding then all its interfaces
+        * must be specified when the network is first defined: the
+        * TCP bonding code doesn't allow for interfaces to be added
+        * or removed.
+        */
+       if (net_l != net && net_l != NULL && use_tcp_bonding &&
+           LNET_NETTYP(net_l->net_id) == SOCKLND) {
+               rc = -EINVAL;
+               goto failed0;
+       }
+
+       while (!list_empty(&net->net_ni_added)) {
+               ni = list_entry(net->net_ni_added.next, struct lnet_ni,
+                               ni_netlist);
+               list_del_init(&ni->ni_netlist);
+
+               /* make sure that the the NI we're about to start
+                * up is actually unique. if it's not fail. */
+               if (!lnet_ni_unique_net(&net_l->net_ni_list,
+                                       ni->ni_interfaces[0])) {
+                       rc = -EINVAL;
+                       goto failed1;
+               }
+
+               /* adjust the pointer the parent network, just in case it
+                * the net is a duplicate */
+               ni->ni_net = net_l;
 
 
-       while (!list_empty(nilist)) {
-               ni = list_entry(nilist->next, lnet_ni_t, ni_list);
-               list_del(&ni->ni_list);
-               rc = lnet_startup_lndni(ni, NULL);
+               rc = lnet_startup_lndni(ni, tun);
+
+               LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
+                       ni->ni_net->net_lnd->lnd_query != NULL);
 
                if (rc < 0)
 
                if (rc < 0)
-                       goto failed;
+                       goto failed1;
+
+               lnet_ni_addref(ni);
+               list_add_tail(&ni->ni_netlist, &local_ni_list);
 
                ni_count++;
        }
 
 
                ni_count++;
        }
 
+       lnet_net_lock(LNET_LOCK_EX);
+       list_splice_tail(&local_ni_list, &net_l->net_ni_list);
+       lnet_net_unlock(LNET_LOCK_EX);
+
+       /* if the network is not unique then we don't want to keep
+        * it around after we're done. Free it. Otherwise add that
+        * net to the global the_lnet.ln_nets */
+       if (net_l != net && net_l != NULL) {
+               /*
+                * TODO - note. currently the tunables can not be updated
+                * once added
+                */
+               lnet_net_free(net);
+       } else {
+               net->net_state = LNET_NET_STATE_ACTIVE;
+               /*
+                * restore tunables after it has been overwitten by the
+                * lnd
+                */
+               if (peer_timeout != -1)
+                       net->net_tunables.lct_peer_timeout = peer_timeout;
+               if (maxtxcredits != -1)
+                       net->net_tunables.lct_max_tx_credits = maxtxcredits;
+               if (peerrtrcredits != -1)
+                       net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
+
+               lnet_net_lock(LNET_LOCK_EX);
+               list_add_tail(&net->net_list, &the_lnet.ln_nets);
+               lnet_net_unlock(LNET_LOCK_EX);
+       }
+
+       return ni_count;
+
+failed1:
+       /*
+        * shutdown the new NIs that are being started up
+        * free the NET being started
+        */
+       while (!list_empty(&local_ni_list)) {
+               ni = list_entry(local_ni_list.next, struct lnet_ni,
+                               ni_netlist);
+
+               lnet_shutdown_lndni(ni);
+       }
+
+failed0:
+       lnet_net_free(net);
+
+       return rc;
+}
+
+static int
+lnet_startup_lndnets(struct list_head *netlist)
+{
+       struct lnet_net         *net;
+       int                     rc;
+       int                     ni_count = 0;
+
+       while (!list_empty(netlist)) {
+               net = list_entry(netlist->next, struct lnet_net, net_list);
+               list_del_init(&net->net_list);
+
+               rc = lnet_startup_lndnet(net, NULL);
+
+               if (rc < 0)
+                       goto failed;
+
+               ni_count += rc;
+       }
+
        return ni_count;
 failed:
        return ni_count;
 failed:
-       lnet_shutdown_lndnis();
+       lnet_shutdown_lndnets();
 
        return rc;
 }
 
        return rc;
 }
@@ -1483,6 +1682,7 @@ int lnet_lib_init(void)
        the_lnet.ln_refcount = 0;
        LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
        INIT_LIST_HEAD(&the_lnet.ln_lnds);
        the_lnet.ln_refcount = 0;
        LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
        INIT_LIST_HEAD(&the_lnet.ln_lnds);
+       INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
        INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
        INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
 
        INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
        INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
 
@@ -1543,6 +1743,7 @@ LNetNIInit(lnet_pid_t requested_pid)
        struct lnet_ping_info   *pinfo;
        lnet_handle_md_t        md_handle;
        struct list_head        net_head;
        struct lnet_ping_info   *pinfo;
        lnet_handle_md_t        md_handle;
        struct list_head        net_head;
+       struct lnet_net         *net;
 
        INIT_LIST_HEAD(&net_head);
 
 
        INIT_LIST_HEAD(&net_head);
 
@@ -1562,8 +1763,15 @@ LNetNIInit(lnet_pid_t requested_pid)
                return rc;
        }
 
                return rc;
        }
 
-       /* Add in the loopback network */
-       if (lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head) == NULL) {
+       /* create a network for Loopback network */
+       net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
+       if (net == NULL) {
+               rc = -ENOMEM;
+               goto err_empty_list;
+       }
+
+       /* Add in the loopback NI */
+       if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
                rc = -ENOMEM;
                goto err_empty_list;
        }
                rc = -ENOMEM;
                goto err_empty_list;
        }
@@ -1575,13 +1783,13 @@ LNetNIInit(lnet_pid_t requested_pid)
         * in this case.  On cleanup in case of failure only clean up
         * routes if it has been loaded */
        if (!the_lnet.ln_nis_from_mod_params) {
         * in this case.  On cleanup in case of failure only clean up
         * routes if it has been loaded */
        if (!the_lnet.ln_nis_from_mod_params) {
-               rc = lnet_parse_networks(&net_head,
-                                        lnet_get_networks());
+               rc = lnet_parse_networks(&net_head, lnet_get_networks(),
+                                        use_tcp_bonding);
                if (rc < 0)
                        goto err_empty_list;
        }
 
                if (rc < 0)
                        goto err_empty_list;
        }
 
-       ni_count = lnet_startup_lndnis(&net_head);
+       ni_count = lnet_startup_lndnets(&net_head);
        if (ni_count < 0) {
                rc = ni_count;
                goto err_empty_list;
        if (ni_count < 0) {
                rc = ni_count;
                goto err_empty_list;
@@ -1634,17 +1842,17 @@ err_destroy_routes:
        if (!the_lnet.ln_nis_from_mod_params)
                lnet_destroy_routes();
 err_shutdown_lndnis:
        if (!the_lnet.ln_nis_from_mod_params)
                lnet_destroy_routes();
 err_shutdown_lndnis:
-       lnet_shutdown_lndnis();
+       lnet_shutdown_lndnets();
 err_empty_list:
        lnet_unprepare();
        LASSERT(rc < 0);
        mutex_unlock(&the_lnet.ln_api_mutex);
        while (!list_empty(&net_head)) {
 err_empty_list:
        lnet_unprepare();
        LASSERT(rc < 0);
        mutex_unlock(&the_lnet.ln_api_mutex);
        while (!list_empty(&net_head)) {
-               struct lnet_ni *ni;
+               struct lnet_net *net;
 
 
-               ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-               list_del_init(&ni->ni_list);
-               lnet_ni_free(ni);
+               net = list_entry(net_head.next, struct lnet_net, net_list);
+               list_del_init(&net->net_list);
+               lnet_net_free(net);
        }
        return rc;
 }
        }
        return rc;
 }
@@ -1682,7 +1890,7 @@ LNetNIFini()
 
                lnet_acceptor_stop();
                lnet_destroy_routes();
 
                lnet_acceptor_stop();
                lnet_destroy_routes();
-               lnet_shutdown_lndnis();
+               lnet_shutdown_lndnets();
                lnet_unprepare();
        }
 
                lnet_unprepare();
        }
 
@@ -1732,10 +1940,14 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
        }
 
        config->cfg_nid = ni->ni_nid;
        }
 
        config->cfg_nid = ni->ni_nid;
-       config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
-       config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
-       config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
-       config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
+       config->cfg_config_u.cfg_net.net_peer_timeout =
+               ni->ni_net->net_tunables.lct_peer_timeout;
+       config->cfg_config_u.cfg_net.net_max_tx_credits =
+               ni->ni_net->net_tunables.lct_max_tx_credits;
+       config->cfg_config_u.cfg_net.net_peer_tx_credits =
+               ni->ni_net->net_tunables.lct_peer_tx_credits;
+       config->cfg_config_u.cfg_net.net_peer_rtr_credits =
+               ni->ni_net->net_tunables.lct_peer_rtr_credits;
 
        net_config->ni_status = ni->ni_status->ns_status;
 
 
        net_config->ni_status = ni->ni_status->ns_status;
 
@@ -1757,46 +1969,99 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config)
        if (config->cfg_hdr.ioc_len > min_size)
                tunable_size = config->cfg_hdr.ioc_len - min_size;
 
        if (config->cfg_hdr.ioc_len > min_size)
                tunable_size = config->cfg_hdr.ioc_len - min_size;
 
-       /* Don't copy to much data to user space */
-       min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
+       /* Don't copy too much data to user space */
+       min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
        lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
 
        lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
 
-       if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
-               memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
+       if (lnd_cfg && min_size) {
+               memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
                config->cfg_config_u.cfg_net.net_interface_count = 1;
 
                /* Tell user land that kernel side has less data */
                config->cfg_config_u.cfg_net.net_interface_count = 1;
 
                /* Tell user land that kernel side has less data */
-               if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
+               if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
                        min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
                        config->cfg_hdr.ioc_len -= min_size;
                }
        }
 }
 
                        min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
                        config->cfg_hdr.ioc_len -= min_size;
                }
        }
 }
 
-static int
+struct lnet_ni *
+lnet_get_ni_idx_locked(int idx)
+{
+       struct lnet_ni          *ni;
+       struct lnet_net         *net;
+
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       if (idx-- == 0)
+                               return ni;
+               }
+       }
+
+       return NULL;
+}
+
+struct lnet_ni *
+lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
+{
+       struct lnet_ni          *ni;
+       struct lnet_net         *net = mynet;
+
+       if (prev == NULL) {
+               if (net == NULL)
+                       net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
+                                       net_list);
+               ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+                               ni_netlist);
+
+               return ni;
+       }
+
+       if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
+               /* if you reached the end of the ni list and the net is
+                * specified, then there are no more nis in that net */
+               if (net != NULL)
+                       return NULL;
+
+               /* we reached the end of this net ni list. move to the
+                * next net */
+               if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
+                       /* no more nets and no more NIs. */
+                       return NULL;
+
+               /* get the next net */
+               net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
+                                net_list);
+               /* get the ni on it */
+               ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+                               ni_netlist);
+
+               return ni;
+       }
+
+       /* there are more nis left */
+       ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
+
+       return ni;
+}
+
+int
 lnet_get_net_config(struct lnet_ioctl_config_data *config)
 {
        struct lnet_ni *ni;
 lnet_get_net_config(struct lnet_ioctl_config_data *config)
 {
        struct lnet_ni *ni;
-       struct list_head *tmp;
-       int idx = config->cfg_count;
+       int cpt;
        int rc = -ENOENT;
        int rc = -ENOENT;
-       int cpt, i = 0;
-
-       if (unlikely(!config->cfg_bulk))
-               return -EINVAL;
+       int idx = config->cfg_count;
 
        cpt = lnet_net_lock_current();
 
 
        cpt = lnet_net_lock_current();
 
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               if (i++ != idx)
-                       continue;
+       ni = lnet_get_ni_idx_locked(idx);
 
 
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
+       if (ni != NULL) {
+               rc = 0;
                lnet_ni_lock(ni);
                lnet_fill_ni_info(ni, config);
                lnet_ni_unlock(ni);
                lnet_ni_lock(ni);
                lnet_fill_ni_info(ni, config);
                lnet_ni_unlock(ni);
-               rc = 0;
-               break;
        }
 
        lnet_net_unlock(cpt);
        }
 
        lnet_net_unlock(cpt);
@@ -1809,29 +2074,36 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
        char                    *nets = conf->cfg_config_u.cfg_net.net_intf;
        struct lnet_ping_info   *pinfo;
        lnet_handle_md_t        md_handle;
        char                    *nets = conf->cfg_config_u.cfg_net.net_intf;
        struct lnet_ping_info   *pinfo;
        lnet_handle_md_t        md_handle;
-       struct lnet_ni          *ni;
+       struct lnet_net         *net;
        struct list_head        net_head;
        int                     rc;
        lnet_remotenet_t        *rnet;
        struct list_head        net_head;
        int                     rc;
        lnet_remotenet_t        *rnet;
+       int                     net_ni_count;
+       int                     num_acceptor_nets;
+       __u32                   net_type;
+       struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
 
        INIT_LIST_HEAD(&net_head);
 
 
        INIT_LIST_HEAD(&net_head);
 
-       /* Create a ni structure for the network string */
-       rc = lnet_parse_networks(&net_head, nets);
+       if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
+               lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+
+       /* Create a net/ni structures for the network string */
+       rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
        if (rc <= 0)
                return rc == 0 ? -EINVAL : rc;
 
        mutex_lock(&the_lnet.ln_api_mutex);
 
        if (rc > 1) {
        if (rc <= 0)
                return rc == 0 ? -EINVAL : rc;
 
        mutex_lock(&the_lnet.ln_api_mutex);
 
        if (rc > 1) {
-               rc = -EINVAL; /* only add one interface per call */
+               rc = -EINVAL; /* only add one network per call */
                goto failed0;
        }
 
                goto failed0;
        }
 
-       ni = list_entry(net_head.next, struct lnet_ni, ni_list);
+       net = list_entry(net_head.next, struct lnet_net, net_list);
 
        lnet_net_lock(LNET_LOCK_EX);
 
        lnet_net_lock(LNET_LOCK_EX);
-       rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
+       rnet = lnet_find_rnet_locked(net->net_id);
        lnet_net_unlock(LNET_LOCK_EX);
        /* make sure that the net added doesn't invalidate the current
         * configuration LNet is keeping */
        lnet_net_unlock(LNET_LOCK_EX);
        /* make sure that the net added doesn't invalidate the current
         * configuration LNet is keeping */
@@ -1842,23 +2114,66 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf)
                goto failed0;
        }
 
                goto failed0;
        }
 
-       rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
+       /*
+        * make sure you calculate the correct number of slots in the ping
+        * info. Since the ping info is a flattened list of all the NIs,
+        * we should allocate enough slots to accomodate the number of NIs
+        * which will be added.
+        *
+        * We can use lnet_get_net_ni_count_locked() since the net is not
+        * on a public list yet, so locking is not a problem
+        */
+       net_ni_count = lnet_get_net_ni_count_locked(net);
+
+       rc = lnet_ping_info_setup(&pinfo, &md_handle,
+                                 net_ni_count + lnet_get_ni_count(),
                                  false);
        if (rc != 0)
                goto failed0;
 
                                  false);
        if (rc != 0)
                goto failed0;
 
-       list_del_init(&ni->ni_list);
+       list_del_init(&net->net_list);
 
 
-       rc = lnet_startup_lndni(ni, conf);
-       if (rc != 0)
+       if (lnd_tunables)
+               memcpy(&net->net_tunables,
+                      &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
+
+       /*
+        * before starting this network get a count of the current TCP
+        * networks which require the acceptor thread running. If that
+        * count is == 0 before we start up this network, then we'd want to
+        * start up the acceptor thread after starting up this network
+        */
+       num_acceptor_nets = lnet_count_acceptor_nets();
+
+       /*
+        * lnd_startup_lndnet() can deallocate 'net' even if it it returns
+        * success, because we endded up adding interfaces to an existing
+        * network. So grab the net_type now
+        */
+       net_type = LNET_NETTYP(net->net_id);
+
+       rc = lnet_startup_lndnet(net,
+                                (lnd_tunables) ? &lnd_tunables->lt_tun : NULL);
+       if (rc < 0)
                goto failed1;
 
                goto failed1;
 
-       if (ni->ni_lnd->lnd_accept != NULL) {
+       /*
+        * Start the acceptor thread if this is the first network
+        * being added that requires the thread.
+        */
+       if (net_type == SOCKLND && num_acceptor_nets == 0)
+       {
                rc = lnet_acceptor_start();
                if (rc < 0) {
                rc = lnet_acceptor_start();
                if (rc < 0) {
-                       /* shutdown the ni that we just started */
+                       /* shutdown the net that we just started */
                        CERROR("Failed to start up acceptor thread\n");
                        CERROR("Failed to start up acceptor thread\n");
-                       lnet_shutdown_lndni(ni);
+                       /*
+                        * Note that if we needed to start the acceptor
+                        * thread, then 'net' must have been the first TCP
+                        * network, therefore was unique, and therefore
+                        * wasn't deallocated by lnet_startup_lndnet()
+                        */
+                       lnet_shutdown_lndnet(net);
                        goto failed1;
                }
        }
                        goto failed1;
                }
        }
@@ -1874,51 +2189,53 @@ failed1:
 failed0:
        mutex_unlock(&the_lnet.ln_api_mutex);
        while (!list_empty(&net_head)) {
 failed0:
        mutex_unlock(&the_lnet.ln_api_mutex);
        while (!list_empty(&net_head)) {
-               ni = list_entry(net_head.next, struct lnet_ni, ni_list);
-               list_del_init(&ni->ni_list);
-               lnet_ni_free(ni);
+               net = list_entry(net_head.next, struct lnet_net, net_list);
+               list_del_init(&net->net_list);
+               lnet_net_free(net);
        }
        return rc;
 }
 
 int
        }
        return rc;
 }
 
 int
-lnet_dyn_del_ni(__u32 net)
+lnet_dyn_del_ni(__u32 net_id)
 {
 {
-       lnet_ni_t        *ni;
+       struct lnet_net  *net;
        struct lnet_ping_info *pinfo;
        lnet_handle_md_t  md_handle;
        int               rc;
        struct lnet_ping_info *pinfo;
        lnet_handle_md_t  md_handle;
        int               rc;
+       int               net_ni_count;
 
        /* don't allow userspace to shutdown the LOLND */
 
        /* don't allow userspace to shutdown the LOLND */
-       if (LNET_NETTYP(net) == LOLND)
+       if (LNET_NETTYP(net_id) == LOLND)
                return -EINVAL;
 
        mutex_lock(&the_lnet.ln_api_mutex);
                return -EINVAL;
 
        mutex_lock(&the_lnet.ln_api_mutex);
-       /* create and link a new ping info, before removing the old one */
-       rc = lnet_ping_info_setup(&pinfo, &md_handle,
-                                 lnet_get_ni_count() - 1, false);
-       if (rc != 0)
-               goto out;
 
 
-       ni = lnet_net2ni(net);
-       if (ni == NULL) {
+       lnet_net_lock(0);
+
+       net = lnet_get_net_locked(net_id);
+       if (net == NULL) {
                rc = -EINVAL;
                rc = -EINVAL;
-               goto failed;
+               goto out;
        }
 
        }
 
-       /* decrement the reference counter taken by lnet_net2ni() */
-       lnet_ni_decref_locked(ni, 0);
+       net_ni_count = lnet_get_net_ni_count_locked(net);
 
 
-       lnet_shutdown_lndni(ni);
+       lnet_net_unlock(0);
 
 
-       if (lnet_count_acceptor_nis() == 0)
+       /* create and link a new ping info, before removing the old one */
+       rc = lnet_ping_info_setup(&pinfo, &md_handle,
+                                 lnet_get_ni_count() - net_ni_count, false);
+       if (rc != 0)
+               goto out;
+
+       lnet_shutdown_lndnet(net);
+
+       if (lnet_count_acceptor_nets() == 0)
                lnet_acceptor_stop();
 
        lnet_ping_target_update(pinfo, md_handle);
                lnet_acceptor_stop();
 
        lnet_ping_target_update(pinfo, md_handle);
-       goto out;
-failed:
-       lnet_ping_md_unlink(pinfo, &md_handle);
-       lnet_ping_info_free(pinfo);
+
 out:
        mutex_unlock(&the_lnet.ln_api_mutex);
 
 out:
        mutex_unlock(&the_lnet.ln_api_mutex);
 
@@ -2137,12 +2454,11 @@ LNetCtl(unsigned int cmd, void *arg)
                if (ni == NULL)
                        return -EINVAL;
 
                if (ni == NULL)
                        return -EINVAL;
 
-               if (ni->ni_lnd->lnd_ctl == NULL)
+               if (ni->ni_net->net_lnd->lnd_ctl == NULL)
                        rc = -EINVAL;
                else
                        rc = -EINVAL;
                else
-                       rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
+                       rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
 
 
-               lnet_ni_decref(ni);
                return rc;
        }
        /* not reached */
                return rc;
        }
        /* not reached */
@@ -2170,7 +2486,7 @@ int
 LNetGetId(unsigned int index, lnet_process_id_t *id)
 {
        struct lnet_ni   *ni;
 LNetGetId(unsigned int index, lnet_process_id_t *id)
 {
        struct lnet_ni   *ni;
-       struct list_head *tmp;
+       struct lnet_net  *net;
        int               cpt;
        int               rc = -ENOENT;
 
        int               cpt;
        int               rc = -ENOENT;
 
@@ -2178,16 +2494,16 @@ LNetGetId(unsigned int index, lnet_process_id_t *id)
 
        cpt = lnet_net_lock_current();
 
 
        cpt = lnet_net_lock_current();
 
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               if (index-- != 0)
-                       continue;
-
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
+       list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       if (index-- != 0)
+                               continue;
 
 
-               id->nid = ni->ni_nid;
-               id->pid = the_lnet.ln_pid;
-               rc = 0;
-               break;
+                       id->nid = ni->ni_nid;
+                       id->pid = the_lnet.ln_pid;
+                       rc = 0;
+                       break;
+               }
        }
 
        lnet_net_unlock(cpt);
        }
 
        lnet_net_unlock(cpt);
index ba8b879..34889e6 100644 (file)
@@ -46,8 +46,11 @@ static int lnet_tbnob = 0;                   /* track text buf allocation */
 #define LNET_MAX_TEXTBUF_NOB    (64<<10)       /* bound allocation */
 #define LNET_SINGLE_TEXTBUF_NOB  (4<<10)
 
 #define LNET_MAX_TEXTBUF_NOB    (64<<10)       /* bound allocation */
 #define LNET_SINGLE_TEXTBUF_NOB  (4<<10)
 
+#define SPACESTR " \t\v\r\n"
+#define DELIMITERS ":()[]"
+
 static void
 static void
-lnet_syntax(char *name, char *str, int offset, int width)
+lnet_syntax(const char *name, const char *str, int offset, int width)
 {
        static char dots[LNET_SINGLE_TEXTBUF_NOB];
        static char dashes[LNET_SINGLE_TEXTBUF_NOB];
 {
        static char dots[LNET_SINGLE_TEXTBUF_NOB];
        static char dashes[LNET_SINGLE_TEXTBUF_NOB];
@@ -76,20 +79,212 @@ lnet_issep (char c)
        }
 }
 
        }
 }
 
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
+bool
+lnet_net_unique(__u32 net_id, struct list_head *netlist,
+               struct lnet_net **net)
+{
+       struct lnet_net  *net_l;
+
+       list_for_each_entry(net_l, netlist, net_list) {
+               if (net_l->net_id == net_id) {
+                       if (net != NULL)
+                               *net = net_l;
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+/* check that the NI is unique within the list of NIs already added to
+ * a network */
+bool
+lnet_ni_unique_net(struct list_head *nilist, char *iface)
 {
        struct list_head *tmp;
 {
        struct list_head *tmp;
-       lnet_ni_t        *ni;
+       struct lnet_ni *ni;
 
        list_for_each(tmp, nilist) {
 
        list_for_each(tmp, nilist) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
+               ni = list_entry(tmp, struct lnet_ni, ni_netlist);
 
 
-               if (LNET_NIDNET(ni->ni_nid) == net)
-                       return 0;
+               if (ni->ni_interfaces[0] != NULL &&
+                   strncmp(ni->ni_interfaces[0], iface, strlen(iface)) == 0)
+                       return false;
        }
 
        }
 
-       return 1;
+       return true;
+}
+
+/* check that the NI is unique to the interfaces with in the same NI.
+ * This is only a consideration if use_tcp_bonding is set */
+static bool
+lnet_ni_unique_ni(char *iface_list[LNET_MAX_INTERFACES], char *iface)
+{
+       int i;
+       for (i = 0; i < LNET_MAX_INTERFACES; i++) {
+               if (iface_list[i] != NULL &&
+                   strncmp(iface_list[i], iface, strlen(iface)) == 0)
+                       return false;
+       }
+
+       return true;
+}
+
+static bool
+in_array(__u32 *array, __u32 size, __u32 value)
+{
+       int i;
+
+       for (i = 0; i < size; i++) {
+               if (array[i] == value)
+                       return false;
+       }
+
+       return true;
+}
+
+static int
+lnet_net_append_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net)
+{
+       __u32 *added_cpts = NULL;
+       int i, j = 0, rc = 0;
+
+       /*
+        * no need to go futher since a subset of the NIs already exist on
+        * all CPTs
+        */
+       if (net->net_ncpts == LNET_CPT_NUMBER)
+               return 0;
+
+       if (cpts == NULL) {
+               /* there is an NI which will exist on all CPTs */
+               if (net->net_cpts != NULL)
+                       LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) *
+                                   net->net_ncpts);
+               net->net_cpts = NULL;
+               net->net_ncpts = LNET_CPT_NUMBER;
+               return 0;
+       }
+
+       if (net->net_cpts == NULL) {
+               LIBCFS_ALLOC(net->net_cpts, sizeof(*net->net_cpts) * ncpts);
+               if (net->net_cpts == NULL)
+                       return -ENOMEM;
+               memcpy(net->net_cpts, cpts, ncpts);
+               return 0;
+       }
+
+       LIBCFS_ALLOC(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER);
+       if (added_cpts == NULL)
+               return -ENOMEM;
+
+       for (i = 0; i < ncpts; i++) {
+               if (!in_array(net->net_cpts, net->net_ncpts, cpts[i])) {
+                       added_cpts[j] = cpts[i];
+                       j++;
+               }
+       }
+
+       /* append the new cpts if any to the list of cpts in the net */
+       if (j > 0) {
+               __u32 *array = NULL, *loc;
+               __u32 total_entries = j + net->net_ncpts;
+
+               LIBCFS_ALLOC(array, sizeof(*net->net_cpts) * total_entries);
+               if (array == NULL) {
+                       rc = -ENOMEM;
+                       goto failed;
+               }
+
+               memcpy(array, net->net_cpts, net->net_ncpts);
+               loc = array + net->net_ncpts;
+               memcpy(loc, added_cpts, j);
+
+               LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) *
+                           net->net_ncpts);
+               net->net_ncpts = total_entries;
+               net->net_cpts = array;
+       }
+
+failed:
+       LIBCFS_FREE(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER);
+
+       return rc;
+}
+
+static void
+lnet_net_remove_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net)
+{
+       struct lnet_ni *ni;
+       int rc;
+
+       /*
+        * Operation Assumption:
+        *      This function is called after an NI has been removed from
+        *      its parent net.
+        *
+        * if we're removing an NI which exists on all CPTs then
+        * we have to check if any of the other NIs on this net also
+        * exists on all CPTs. If none, then we need to build our Net CPT
+        * list based on the remaining NIs.
+        *
+        * If the NI being removed exist on a subset of the CPTs then we
+        * alo rebuild the Net CPT list based on the remaining NIs, which
+        * should resutl in the expected Net CPT list.
+        */
+
+       /*
+        * sometimes this function can be called due to some failure
+        * creating an NI, before any of the cpts are allocated, so check
+        * for that case and don't do anything
+        */
+       if (ncpts == 0)
+               return;
+
+       if (ncpts == LNET_CPT_NUMBER) {
+               /*
+                * first iteration through the NI list in the net to see
+                * if any of the NIs exist on all the CPTs. If one is
+                * found then our job is done.
+                */
+               list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+                       if (ni->ni_ncpts == LNET_CPT_NUMBER)
+                               return;
+               }
+       }
+
+       /*
+        * Rebuild the Net CPT list again, thereby only including only the
+        * CPTs which the remaining NIs are associated with.
+        */
+       if (net->net_cpts != NULL) {
+               LIBCFS_FREE(net->net_cpts,
+                       sizeof(*net->net_cpts) * net->net_ncpts);
+               net->net_cpts = NULL;
+       }
+
+       list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+               rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts,
+                                         net);
+               if (rc != 0) {
+                       CERROR("Out of Memory\n");
+                       /*
+                        * do our best to keep on going. Delete
+                        * the net cpts and set it to NULL. This
+                        * way we can keep on going but less
+                        * efficiently, since memory accesses might be
+                        * accross CPT lines.
+                        */
+                       if (net->net_cpts != NULL) {
+                               LIBCFS_FREE(net->net_cpts,
+                                               sizeof(*net->net_cpts) *
+                                               net->net_ncpts);
+                               net->net_cpts = NULL;
+                               net->net_ncpts = LNET_CPT_NUMBER;
+                       }
+                       return;
+               }
+       }
 }
 
 void
 }
 
 void
@@ -97,6 +292,8 @@ lnet_ni_free(struct lnet_ni *ni)
 {
        int i;
 
 {
        int i;
 
+       lnet_net_remove_cpts(ni->ni_cpts, ni->ni_ncpts, ni->ni_net);
+
        if (ni->ni_refs != NULL)
                cfs_percpt_free(ni->ni_refs);
 
        if (ni->ni_refs != NULL)
                cfs_percpt_free(ni->ni_refs);
 
@@ -106,9 +303,6 @@ lnet_ni_free(struct lnet_ni *ni)
        if (ni->ni_cpts != NULL)
                cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
 
        if (ni->ni_cpts != NULL)
                cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
 
-       if (ni->ni_lnd_tunables != NULL)
-               LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables));
-
        for (i = 0; i < LNET_MAX_INTERFACES &&
                    ni->ni_interfaces[i] != NULL; i++) {
                LIBCFS_FREE(ni->ni_interfaces[i],
        for (i = 0; i < LNET_MAX_INTERFACES &&
                    ni->ni_interfaces[i] != NULL; i++) {
                LIBCFS_FREE(ni->ni_interfaces[i],
@@ -122,29 +316,143 @@ lnet_ni_free(struct lnet_ni *ni)
        LIBCFS_FREE(ni, sizeof(*ni));
 }
 
        LIBCFS_FREE(ni, sizeof(*ni));
 }
 
+void
+lnet_net_free(struct lnet_net *net)
+{
+       struct list_head *tmp, *tmp2;
+       struct lnet_ni *ni;
+
+       LASSERT(list_empty(&net->net_ni_zombie));
+
+       /*
+        * delete any nis that haven't been added yet. This could happen
+        * if there is a failure on net startup
+        */
+       list_for_each_safe(tmp, tmp2, &net->net_ni_added) {
+               ni = list_entry(tmp, struct lnet_ni, ni_netlist);
+               list_del_init(&ni->ni_netlist);
+               lnet_ni_free(ni);
+       }
+
+       /* delete any nis which have been started. */
+       list_for_each_safe(tmp, tmp2, &net->net_ni_list) {
+               ni = list_entry(tmp, struct lnet_ni, ni_netlist);
+               list_del_init(&ni->ni_netlist);
+               lnet_ni_free(ni);
+       }
+
+       if (net->net_cpts != NULL)
+               LIBCFS_FREE(net->net_cpts,
+                           sizeof(*net->net_cpts) * net->net_ncpts);
+
+       LIBCFS_FREE(net, sizeof(*net));
+}
+
+struct lnet_net *
+lnet_net_alloc(__u32 net_id, struct list_head *net_list)
+{
+       struct lnet_net         *net;
+
+       if (!lnet_net_unique(net_id, net_list, NULL)) {
+               CERROR("Duplicate net %s. Ignore\n",
+                      libcfs_net2str(net_id));
+               return NULL;
+       }
+
+       LIBCFS_ALLOC(net, sizeof(*net));
+       if (net == NULL) {
+               CERROR("Out of memory creating network %s\n",
+                      libcfs_net2str(net_id));
+               return NULL;
+       }
+
+       INIT_LIST_HEAD(&net->net_list);
+       INIT_LIST_HEAD(&net->net_ni_list);
+       INIT_LIST_HEAD(&net->net_ni_added);
+       INIT_LIST_HEAD(&net->net_ni_zombie);
+
+       net->net_id = net_id;
+       net->net_state = LNET_NET_STATE_INIT;
+
+       /* initialize global paramters to undefiend */
+       net->net_tunables.lct_peer_timeout = -1;
+       net->net_tunables.lct_max_tx_credits = -1;
+       net->net_tunables.lct_peer_tx_credits = -1;
+       net->net_tunables.lct_peer_rtr_credits = -1;
+
+       list_add_tail(&net->net_list, net_list);
+
+       return net;
+}
+
+static int
+lnet_ni_add_interface(struct lnet_ni *ni, char *iface)
+{
+       int niface = 0;
+
+       if (ni == NULL)
+               return -ENOMEM;
+
+       if (!lnet_ni_unique_ni(ni->ni_interfaces, iface))
+               return -EINVAL;
+
+       /* Allocate a separate piece of memory and copy
+        * into it the string, so we don't have
+        * a depencency on the tokens string.  This way we
+        * can free the tokens at the end of the function.
+        * The newly allocated ni_interfaces[] can be
+        * freed when freeing the NI */
+       while (niface < LNET_MAX_INTERFACES &&
+              ni->ni_interfaces[niface] != NULL)
+               niface++;
+
+       if (niface >= LNET_MAX_INTERFACES) {
+               LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
+                                  "for net %s\n",
+                                  libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+               return -EINVAL;
+       }
+
+       LIBCFS_ALLOC(ni->ni_interfaces[niface],
+                    strlen(iface) + 1);
+
+       if (ni->ni_interfaces[niface] == NULL) {
+               CERROR("Can't allocate net interface name\n");
+               return -ENOMEM;
+       }
+
+       strncpy(ni->ni_interfaces[niface], iface,
+               strlen(iface) + 1);
+
+       return 0;
+}
+
+/* allocate and add to the provided network */
 lnet_ni_t *
 lnet_ni_t *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
+lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface)
 {
        struct lnet_tx_queue    *tq;
        struct lnet_ni          *ni;
        int                     rc;
        int                     i;
 
 {
        struct lnet_tx_queue    *tq;
        struct lnet_ni          *ni;
        int                     rc;
        int                     i;
 
-       if (!lnet_net_unique(net, nilist)) {
-               LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
-                                  libcfs_net2str(net));
-               return NULL;
-       }
+       if (iface != NULL)
+               /* make sure that this NI is unique in the net it's
+                * being added to */
+               if (!lnet_ni_unique_net(&net->net_ni_added, iface))
+                       return NULL;
 
        LIBCFS_ALLOC(ni, sizeof(*ni));
        if (ni == NULL) {
 
        LIBCFS_ALLOC(ni, sizeof(*ni));
        if (ni == NULL) {
-               CERROR("Out of memory creating network %s\n",
-                      libcfs_net2str(net));
+               CERROR("Out of memory creating network interface %s%s\n",
+                      libcfs_net2str(net->net_id),
+                      (iface != NULL) ? iface : "");
                return NULL;
        }
 
        spin_lock_init(&ni->ni_lock);
        INIT_LIST_HEAD(&ni->ni_cptlist);
                return NULL;
        }
 
        spin_lock_init(&ni->ni_lock);
        INIT_LIST_HEAD(&ni->ni_cptlist);
+       INIT_LIST_HEAD(&ni->ni_netlist);
        ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
                                       sizeof(*ni->ni_refs[0]));
        if (ni->ni_refs == NULL)
        ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
                                       sizeof(*ni->ni_refs[0]));
        if (ni->ni_refs == NULL)
@@ -164,8 +472,9 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
        } else {
                rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
                if (rc <= 0) {
        } else {
                rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
                if (rc <= 0) {
-                       CERROR("Failed to set CPTs for NI %s: %d\n",
-                              libcfs_net2str(net), rc);
+                       CERROR("Failed to set CPTs for NI %s(%s): %d\n",
+                              libcfs_net2str(net->net_id),
+                              (iface != NULL) ? iface : "", rc);
                        goto failed;
                }
 
                        goto failed;
                }
 
@@ -178,8 +487,9 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
                ni->ni_ncpts = rc;
        }
 
                ni->ni_ncpts = rc;
        }
 
+       ni->ni_net = net;
        /* LND will fill in the address part of the NID */
        /* LND will fill in the address part of the NID */
-       ni->ni_nid = LNET_MKNID(net, 0);
+       ni->ni_nid = LNET_MKNID(net->net_id, 0);
 
        /* Store net namespace in which current ni is being created */
        if (current->nsproxy->net_ns != NULL)
 
        /* Store net namespace in which current ni is being created */
        if (current->nsproxy->net_ns != NULL)
@@ -188,25 +498,41 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
                ni->ni_net_ns = NULL;
 
        ni->ni_last_alive = ktime_get_real_seconds();
                ni->ni_net_ns = NULL;
 
        ni->ni_last_alive = ktime_get_real_seconds();
-       list_add_tail(&ni->ni_list, nilist);
+       ni->ni_state = LNET_NI_STATE_INIT;
+       rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
+       if (rc != 0)
+               goto failed;
+       list_add_tail(&ni->ni_netlist, &net->net_ni_added);
+
+       /* if an interface name is provided then make sure to add in that
+        * interface name in NI */
+       if (iface != NULL)
+               if (lnet_ni_add_interface(ni, iface) != 0)
+                       goto failed;
+
        return ni;
        return ni;
- failed:
+failed:
        lnet_ni_free(ni);
        return NULL;
 }
 
        lnet_ni_free(ni);
        return NULL;
 }
 
+/*
+ * Parse the networks string and create the matching set of NIs on the
+ * nilist.
+ */
 int
 int
-lnet_parse_networks(struct list_head *nilist, char *networks)
+lnet_parse_networks(struct list_head *netlist, char *networks,
+                   bool use_tcp_bonding)
 {
 {
-       struct cfs_expr_list *el = NULL;
+       struct cfs_expr_list *net_el = NULL;
+       struct cfs_expr_list *ni_el = NULL;
        int             tokensize;
        char            *tokens;
        char            *str;
        int             tokensize;
        char            *tokens;
        char            *str;
-       char            *tmp;
-       struct lnet_ni  *ni;
-       __u32           net;
+       struct lnet_net *net;
+       struct lnet_ni  *ni = NULL;
+       __u32           net_id;
        int             nnets = 0;
        int             nnets = 0;
-       struct list_head *temp_node;
 
        if (networks == NULL) {
                CERROR("networks string is undefined\n");
 
        if (networks == NULL) {
                CERROR("networks string is undefined\n");
@@ -229,173 +555,238 @@ lnet_parse_networks(struct list_head *nilist, char *networks)
        }
 
        memcpy(tokens, networks, tokensize);
        }
 
        memcpy(tokens, networks, tokensize);
-       str = tmp = tokens;
-
-       while (str != NULL && *str != 0) {
-               char    *comma = strchr(str, ',');
-               char    *bracket = strchr(str, '(');
-               char    *square = strchr(str, '[');
-               char    *iface;
-               int     niface;
-               int     rc;
-
-               /* NB we don't check interface conflicts here; it's the LNDs
-                * responsibility (if it cares at all) */
-
-               if (square != NULL && (comma == NULL || square < comma)) {
-                       /* i.e: o2ib0(ib0)[1,2], number between square
-                        * brackets are CPTs this NI needs to be bond */
-                       if (bracket != NULL && bracket > square) {
-                               tmp = square;
+       str = tokens;
+
+       /*
+        * Main parser loop.
+        *
+        * NB we don't check interface conflicts here; it's the LNDs
+        * responsibility (if it cares at all)
+        */
+       do {
+               char *nistr;
+               char *elstr;
+               char *name;
+               int rc;
+
+               /*
+                * Parse a network string into its components.
+                *
+                * <name>{"("...")"}{"["<el>"]"}
+                */
+
+               /* Network name (mandatory) */
+               while (isspace(*str))
+                       *str++ = '\0';
+               if (!*str)
+                       break;
+               name = str;
+               str += strcspn(str, SPACESTR ":()[],");
+               while (isspace(*str))
+                       *str++ = '\0';
+
+               /* Interface list (optional) */
+               if (*str == '(') {
+                       *str++ = '\0';
+                       nistr = str;
+                       str += strcspn(str, ")");
+                       if (*str != ')') {
+                               str = nistr;
                                goto failed_syntax;
                        }
                                goto failed_syntax;
                        }
+                       do {
+                               *str++ = '\0';
+                       } while (isspace(*str));
+               } else {
+                       nistr = NULL;
+               }
 
 
-                       tmp = strchr(square, ']');
-                       if (tmp == NULL) {
-                               tmp = square;
+               /* CPT expression (optional) */
+               if (*str == '[') {
+                       elstr = str;
+                       str += strcspn(str, "]");
+                       if (*str != ']') {
+                               str = elstr;
                                goto failed_syntax;
                        }
                                goto failed_syntax;
                        }
-
-                       rc = cfs_expr_list_parse(square, tmp - square + 1,
-                                                0, LNET_CPT_NUMBER - 1, &el);
+                       rc = cfs_expr_list_parse(elstr, str - elstr + 1,
+                                               0, LNET_CPT_NUMBER - 1,
+                                               &net_el);
                        if (rc != 0) {
                        if (rc != 0) {
-                               tmp = square;
+                               str = elstr;
                                goto failed_syntax;
                        }
                                goto failed_syntax;
                        }
-
-                       while (square <= tmp)
-                               *square++ = ' ';
+                       *elstr = '\0';
+                       do {
+                               *str++ = '\0';
+                       } while (isspace(*str));
                }
 
                }
 
-               if (bracket == NULL ||
-                   (comma != NULL && comma < bracket)) {
-
-                       /* no interface list specified */
-
-                       if (comma != NULL)
-                               *comma++ = 0;
-                       net = libcfs_str2net(cfs_trimwhite(str));
-
-                       if (net == LNET_NIDNET(LNET_NID_ANY)) {
-                               LCONSOLE_ERROR_MSG(0x113, "Unrecognised network"
-                                                  " type\n");
-                               tmp = str;
-                               goto failed_syntax;
-                       }
+               /* Bad delimiters */
+               if (*str && (strchr(DELIMITERS, *str) != NULL))
+                       goto failed_syntax;
 
 
-                       if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
-                           lnet_ni_alloc(net, el, nilist) == NULL)
-                               goto failed;
+               /* go to the next net if it exits */
+               str += strcspn(str, ",");
+               if (*str == ',')
+                       *str++ = '\0';
+
+               /*
+                * At this point the name is properly terminated.
+                */
+               net_id = libcfs_str2net(name);
+               if (net_id == LNET_NIDNET(LNET_NID_ANY)) {
+                       LCONSOLE_ERROR_MSG(0x113,
+                                       "Unrecognised network type\n");
+                       str = name;
+                       goto failed_syntax;
+               }
 
 
-                       if (el != NULL) {
-                               cfs_expr_list_free(el);
-                               el = NULL;
+               if (LNET_NETTYP(net_id) == LOLND) {
+                       /* Loopback is implicit, and there can be only one. */
+                       if (net_el) {
+                               cfs_expr_list_free(net_el);
+                               net_el = NULL;
                        }
                        }
-
-                       str = comma;
+                       /* Should we error out instead? */
                        continue;
                }
 
                        continue;
                }
 
-               *bracket = 0;
-               net = libcfs_str2net(cfs_trimwhite(str));
-               if (net == LNET_NIDNET(LNET_NID_ANY)) {
-                       tmp = str;
-                       goto failed_syntax;
-               }
+               /*
+                * All network paramaters are now known.
+                */
+               nnets++;
 
 
-               ni = lnet_ni_alloc(net, el, nilist);
-               if (ni == NULL)
+               /* always allocate a net, since we will eventually add an
+                * interface to it, or we will fail, in which case we'll
+                * just delete it */
+               net = lnet_net_alloc(net_id, netlist);
+               if (IS_ERR_OR_NULL(net))
                        goto failed;
 
                        goto failed;
 
-               if (el != NULL) {
-                       cfs_expr_list_free(el);
-                       el = NULL;
-               }
-
-               niface = 0;
-               iface = bracket + 1;
+               if (!nistr ||
+                   (use_tcp_bonding && LNET_NETTYP(net_id) == SOCKLND)) {
+                       /*
+                        * No interface list was specified, allocate a
+                        * ni using the defaults.
+                        */
+                       ni = lnet_ni_alloc(net, net_el, NULL);
+                       if (IS_ERR_OR_NULL(ni))
+                               goto failed;
 
 
-               bracket = strchr(iface, ')');
-               if (bracket == NULL) {
-                       tmp = iface;
-                       goto failed_syntax;
+                       if (!nistr) {
+                               if (net_el) {
+                                       cfs_expr_list_free(net_el);
+                                       net_el = NULL;
+                               }
+                               continue;
+                       }
                }
 
                }
 
-               *bracket = 0;
                do {
                do {
-                       comma = strchr(iface, ',');
-                       if (comma != NULL)
-                               *comma++ = 0;
+                       elstr = NULL;
+
+                       /* Interface name (mandatory) */
+                       while (isspace(*nistr))
+                               *nistr++ = '\0';
+                       name = nistr;
+                       nistr += strcspn(nistr, SPACESTR "[],");
+                       while (isspace(*nistr))
+                               *nistr++ = '\0';
+
+                       /* CPT expression (optional) */
+                       if (*nistr == '[') {
+                               elstr = nistr;
+                               nistr += strcspn(nistr, "]");
+                               if (*nistr != ']') {
+                                       str = elstr;
+                                       goto failed_syntax;
+                               }
+                               rc = cfs_expr_list_parse(elstr,
+                                                       nistr - elstr + 1,
+                                                       0, LNET_CPT_NUMBER - 1,
+                                                       &ni_el);
+                               if (rc != 0) {
+                                       str = elstr;
+                                       goto failed_syntax;
+                               }
+                               *elstr = '\0';
+                               do {
+                                       *nistr++ = '\0';
+                               } while (isspace(*nistr));
+                       } else {
+                               ni_el = net_el;
+                       }
 
 
-                       iface = cfs_trimwhite(iface);
-                       if (*iface == 0) {
-                               tmp = iface;
+                       /*
+                        * End of single interface specificaton,
+                        * advance to the start of the next one, if
+                        * any.
+                        */
+                       if (*nistr == ',') {
+                               do {
+                                       *nistr++ = '\0';
+                               } while (isspace(*nistr));
+                               if (!*nistr) {
+                                       str = nistr;
+                                       goto failed_syntax;
+                               }
+                       } else if (*nistr) {
+                               str = nistr;
                                goto failed_syntax;
                        }
 
                                goto failed_syntax;
                        }
 
-                       if (niface == LNET_MAX_INTERFACES) {
-                               LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
-                                                  "for net %s\n",
-                                                  libcfs_net2str(net));
-                               goto failed;
+                       /*
+                        * At this point the name is properly terminated.
+                        */
+                       if (!*name) {
+                               str = name;
+                               goto failed_syntax;
                        }
 
                        }
 
-                       /* Allocate a separate piece of memory and copy
-                        * into it the string, so we don't have
-                        * a depencency on the tokens string.  This way we
-                        * can free the tokens at the end of the function.
-                        * The newly allocated ni_interfaces[] can be
-                        * freed when freeing the NI */
-                       LIBCFS_ALLOC(ni->ni_interfaces[niface],
-                                    strlen(iface) + 1);
-                       if (ni->ni_interfaces[niface] == NULL) {
-                               CERROR("Can't allocate net interface name\n");
-                               goto failed;
+                       if (use_tcp_bonding &&
+                           LNET_NETTYP(net->net_id) == SOCKLND) {
+                               rc = lnet_ni_add_interface(ni, name);
+                               if (rc != 0)
+                                       goto failed;
+                       } else {
+                               ni = lnet_ni_alloc(net, ni_el, name);
+                               if (IS_ERR_OR_NULL(ni))
+                                       goto failed;
                        }
                        }
-                       strncpy(ni->ni_interfaces[niface], iface,
-                               strlen(iface));
-                       niface++;
-                       iface = comma;
-               } while (iface != NULL);
-
-               str = bracket + 1;
-               comma = strchr(bracket + 1, ',');
-               if (comma != NULL) {
-                       *comma = 0;
-                       str = cfs_trimwhite(str);
-                       if (*str != 0) {
-                               tmp = str;
-                               goto failed_syntax;
+
+                       if (ni_el) {
+                               if (ni_el != net_el) {
+                                       cfs_expr_list_free(ni_el);
+                                       ni_el = NULL;
+                               }
                        }
                        }
-                       str = comma + 1;
-                       continue;
-               }
+               } while (*nistr);
 
 
-               str = cfs_trimwhite(str);
-               if (*str != 0) {
-                       tmp = str;
-                       goto failed_syntax;
+               if (net_el) {
+                       cfs_expr_list_free(net_el);
+                       net_el = NULL;
                }
                }
-       }
-
-       list_for_each(temp_node, nilist)
-               nnets++;
+       } while (*str);
 
        LIBCFS_FREE(tokens, tokensize);
        return nnets;
 
  failed_syntax:
 
        LIBCFS_FREE(tokens, tokensize);
        return nnets;
 
  failed_syntax:
-       lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
+       lnet_syntax("networks", networks, (int)(str - tokens), strlen(str));
  failed:
  failed:
-       while (!list_empty(nilist)) {
-               ni = list_entry(nilist->next, lnet_ni_t, ni_list);
+       /* free the net list and all the nis on each net */
+       while (!list_empty(netlist)) {
+               net = list_entry(netlist->next, struct lnet_net, net_list);
 
 
-               list_del(&ni->ni_list);
-               lnet_ni_free(ni);
+               list_del_init(&net->net_list);
+               lnet_net_free(net);
        }
 
        }
 
-       if (el != NULL)
-               cfs_expr_list_free(el);
+       if (ni_el && ni_el != net_el)
+               cfs_expr_list_free(ni_el);
+       if (net_el)
+               cfs_expr_list_free(net_el);
 
        LIBCFS_FREE(tokens, tokensize);
 
 
        LIBCFS_FREE(tokens, tokensize);
 
index d93d061..e9a63eb 100644 (file)
@@ -584,13 +584,14 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
                        iov  = msg->msg_iov;
                        kiov = msg->msg_kiov;
 
                        iov  = msg->msg_iov;
                        kiov = msg->msg_kiov;
 
-                       LASSERT(niov > 0);
-                       LASSERT((iov == NULL) != (kiov == NULL));
+                       LASSERT (niov > 0);
+                       LASSERT ((iov == NULL) != (kiov == NULL));
                }
        }
 
                }
        }
 
-       rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed,
-                                   niov, iov, kiov, offset, mlen, rlen);
+       rc = (ni->ni_net->net_lnd->lnd_recv)(ni, private, msg, delayed,
+                                            niov, iov, kiov, offset, mlen,
+                                            rlen);
        if (rc < 0)
                lnet_finalize(ni, msg, rc);
 }
        if (rc < 0)
                lnet_finalize(ni, msg, rc);
 }
@@ -645,7 +646,7 @@ lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg)
        LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
                 (msg->msg_txcredit && msg->msg_peertxcredit));
 
        LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
                 (msg->msg_txcredit && msg->msg_peertxcredit));
 
-       rc = (ni->ni_lnd->lnd_send)(ni, priv, msg);
+       rc = (ni->ni_net->net_lnd->lnd_send)(ni, priv, msg);
        if (rc < 0)
                lnet_finalize(ni, msg, rc);
 }
        if (rc < 0)
                lnet_finalize(ni, msg, rc);
 }
@@ -658,11 +659,11 @@ lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg)
        LASSERT(!msg->msg_sending);
        LASSERT(msg->msg_receiving);
        LASSERT(!msg->msg_rx_ready_delay);
        LASSERT(!msg->msg_sending);
        LASSERT(msg->msg_receiving);
        LASSERT(!msg->msg_rx_ready_delay);
-       LASSERT(ni->ni_lnd->lnd_eager_recv != NULL);
+       LASSERT(ni->ni_net->net_lnd->lnd_eager_recv != NULL);
 
        msg->msg_rx_ready_delay = 1;
 
        msg->msg_rx_ready_delay = 1;
-       rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
-                                         &msg->msg_private);
+       rc = (ni->ni_net->net_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
+                                                 &msg->msg_private);
        if (rc != 0) {
                CERROR("recv from %s / send to %s aborted: "
                       "eager_recv failed %d\n",
        if (rc != 0) {
                CERROR("recv from %s / send to %s aborted: "
                       "eager_recv failed %d\n",
@@ -681,10 +682,10 @@ lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp)
        cfs_time_t last_alive = 0;
 
        LASSERT(lnet_peer_aliveness_enabled(lp));
        cfs_time_t last_alive = 0;
 
        LASSERT(lnet_peer_aliveness_enabled(lp));
-       LASSERT(ni->ni_lnd->lnd_query != NULL);
+       LASSERT(ni->ni_net->net_lnd->lnd_query != NULL);
 
        lnet_net_unlock(lp->lp_cpt);
 
        lnet_net_unlock(lp->lp_cpt);
-       (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
+       (ni->ni_net->net_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
        lnet_net_lock(lp->lp_cpt);
 
        lp->lp_last_query = cfs_time_current();
        lnet_net_lock(lp->lp_cpt);
 
        lp->lp_last_query = cfs_time_current();
@@ -697,23 +698,27 @@ lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp)
 static inline int
 lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
 {
 static inline int
 lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
 {
-       int        alive;
+       int        alive;
        cfs_time_t deadline;
 
        cfs_time_t deadline;
 
-       LASSERT(lnet_peer_aliveness_enabled(lp));
+       LASSERT (lnet_peer_aliveness_enabled(lp));
 
 
-       /* Trust lnet_notify() if it has more recent aliveness news, but
+       /*
+        * Trust lnet_notify() if it has more recent aliveness news, but
         * ignore the initial assumed death (see lnet_peers_start_down()).
         */
        if (!lp->lp_alive && lp->lp_alive_count > 0 &&
            cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
                return 0;
 
         * ignore the initial assumed death (see lnet_peers_start_down()).
         */
        if (!lp->lp_alive && lp->lp_alive_count > 0 &&
            cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
                return 0;
 
-       deadline = cfs_time_add(lp->lp_last_alive,
-                               cfs_time_seconds(lp->lp_ni->ni_peertimeout));
+       deadline =
+         cfs_time_add(lp->lp_last_alive,
+                      cfs_time_seconds(lp->lp_net->net_tunables.
+                                       lct_peer_timeout));
        alive = cfs_time_after(deadline, now);
 
        alive = cfs_time_after(deadline, now);
 
-       /* Update obsolete lp_alive except for routers assumed to be dead
+       /*
+        * Update obsolete lp_alive except for routers assumed to be dead
         * initially, because router checker would update aliveness in this
         * case, and moreover lp_last_alive at peer creation is assumed.
         */
         * initially, because router checker would update aliveness in this
         * case, and moreover lp_last_alive at peer creation is assumed.
         */
@@ -728,7 +733,7 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
 /* NB: returns 1 when alive, 0 when dead, negative when error;
  *     may drop the lnet_net_lock */
 static int
 /* NB: returns 1 when alive, 0 when dead, negative when error;
  *     may drop the lnet_net_lock */
 static int
-lnet_peer_alive_locked (lnet_peer_t *lp)
+lnet_peer_alive_locked (struct lnet_ni *ni, lnet_peer_t *lp)
 {
        cfs_time_t now = cfs_time_current();
 
 {
        cfs_time_t now = cfs_time_current();
 
@@ -738,8 +743,10 @@ lnet_peer_alive_locked (lnet_peer_t *lp)
        if (lnet_peer_is_alive(lp, now))
                return 1;
 
        if (lnet_peer_is_alive(lp, now))
                return 1;
 
-       /* Peer appears dead, but we should avoid frequent NI queries (at
-        * most once per lnet_queryinterval seconds). */
+       /*
+        * Peer appears dead, but we should avoid frequent NI queries (at
+        * most once per lnet_queryinterval seconds).
+        */
        if (lp->lp_last_query != 0) {
                static const int lnet_queryinterval = 1;
 
        if (lp->lp_last_query != 0) {
                static const int lnet_queryinterval = 1;
 
@@ -754,13 +761,13 @@ lnet_peer_alive_locked (lnet_peer_t *lp)
                                      libcfs_nid2str(lp->lp_nid),
                                      (int)now, (int)next_query,
                                      lnet_queryinterval,
                                      libcfs_nid2str(lp->lp_nid),
                                      (int)now, (int)next_query,
                                      lnet_queryinterval,
-                                     lp->lp_ni->ni_peertimeout);
+                                     lp->lp_net->net_tunables.lct_peer_timeout);
                        return 0;
                }
        }
 
        /* query NI for latest aliveness news */
                        return 0;
                }
        }
 
        /* query NI for latest aliveness news */
-       lnet_ni_query_locked(lp->lp_ni, lp);
+       lnet_ni_query_locked(ni, lp);
 
        if (lnet_peer_is_alive(lp, now))
                return 1;
 
        if (lnet_peer_is_alive(lp, now))
                return 1;
@@ -784,7 +791,7 @@ static int
 lnet_post_send_locked(lnet_msg_t *msg, int do_send)
 {
        lnet_peer_t             *lp = msg->msg_txpeer;
 lnet_post_send_locked(lnet_msg_t *msg, int do_send)
 {
        lnet_peer_t             *lp = msg->msg_txpeer;
-       lnet_ni_t               *ni = lp->lp_ni;
+       lnet_ni_t               *ni = msg->msg_txni;
        int                     cpt = msg->msg_tx_cpt;
        struct lnet_tx_queue    *tq = ni->ni_tx_queues[cpt];
 
        int                     cpt = msg->msg_tx_cpt;
        struct lnet_tx_queue    *tq = ni->ni_tx_queues[cpt];
 
@@ -795,7 +802,7 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send)
 
        /* NB 'lp' is always the next hop */
        if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
 
        /* NB 'lp' is always the next hop */
        if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
-           lnet_peer_alive_locked(lp) == 0) {
+           lnet_peer_alive_locked(ni, lp) == 0) {
                the_lnet.ln_counters[cpt]->drop_count++;
                the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
                lnet_net_unlock(cpt);
                the_lnet.ln_counters[cpt]->drop_count++;
                the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
                lnet_net_unlock(cpt);
@@ -954,7 +961,7 @@ lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv)
                int cpt = msg->msg_rx_cpt;
 
                lnet_net_unlock(cpt);
                int cpt = msg->msg_rx_cpt;
 
                lnet_net_unlock(cpt);
-               lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
+               lnet_ni_recv(msg->msg_rxni, msg->msg_private, msg, 1,
                             0, msg->msg_len, msg->msg_len);
                lnet_net_lock(cpt);
        }
                             0, msg->msg_len, msg->msg_len);
                lnet_net_lock(cpt);
        }
@@ -966,9 +973,10 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg)
 {
        lnet_peer_t     *txpeer = msg->msg_txpeer;
        lnet_msg_t      *msg2;
 {
        lnet_peer_t     *txpeer = msg->msg_txpeer;
        lnet_msg_t      *msg2;
+       struct lnet_ni  *txni = msg->msg_txni;
 
        if (msg->msg_txcredit) {
 
        if (msg->msg_txcredit) {
-               struct lnet_ni       *ni = txpeer->lp_ni;
+               struct lnet_ni       *ni = msg->msg_txni;
                struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
 
                /* give back NI txcredits */
                struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
 
                /* give back NI txcredits */
@@ -983,7 +991,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg)
                                          lnet_msg_t, msg_list);
                        list_del(&msg2->msg_list);
 
                                          lnet_msg_t, msg_list);
                        list_del(&msg2->msg_list);
 
-                       LASSERT(msg2->msg_txpeer->lp_ni == ni);
+                       LASSERT(msg2->msg_txni == ni);
                        LASSERT(msg2->msg_tx_delayed);
 
                        (void) lnet_post_send_locked(msg2, 1);
                        LASSERT(msg2->msg_tx_delayed);
 
                        (void) lnet_post_send_locked(msg2, 1);
@@ -1013,6 +1021,11 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg)
                }
        }
 
                }
        }
 
+       if (txni != NULL) {
+               msg->msg_txni = NULL;
+               lnet_ni_decref_locked(txni, msg->msg_tx_cpt);
+       }
+
        if (txpeer != NULL) {
                msg->msg_txpeer = NULL;
                lnet_peer_decref_locked(txpeer);
        if (txpeer != NULL) {
                msg->msg_txpeer = NULL;
                lnet_peer_decref_locked(txpeer);
@@ -1047,7 +1060,7 @@ lnet_drop_routed_msgs_locked(struct list_head *list, int cpt)
        lnet_net_unlock(cpt);
 
        list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
        lnet_net_unlock(cpt);
 
        list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
-               lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
+               lnet_ni_recv(msg->msg_rxni, msg->msg_private, NULL,
                             0, 0, 0, msg->msg_hdr.payload_length);
                list_del_init(&msg->msg_list);
                lnet_finalize(NULL, msg, -ECANCELED);
                             0, 0, 0, msg->msg_hdr.payload_length);
                list_del_init(&msg->msg_list);
                lnet_finalize(NULL, msg, -ECANCELED);
@@ -1060,6 +1073,7 @@ void
 lnet_return_rx_credits_locked(lnet_msg_t *msg)
 {
        lnet_peer_t     *rxpeer = msg->msg_rxpeer;
 lnet_return_rx_credits_locked(lnet_msg_t *msg)
 {
        lnet_peer_t     *rxpeer = msg->msg_rxpeer;
+       struct lnet_ni  *rxni = msg->msg_rxni;
        lnet_msg_t      *msg2;
 
        if (msg->msg_rtrcredit) {
        lnet_msg_t      *msg2;
 
        if (msg->msg_rtrcredit) {
@@ -1129,6 +1143,10 @@ routing_off:
                        (void) lnet_post_routed_recv_locked(msg2, 1);
                }
        }
                        (void) lnet_post_routed_recv_locked(msg2, 1);
                }
        }
+       if (rxni != NULL) {
+               msg->msg_rxni = NULL;
+               lnet_ni_decref_locked(rxni, msg->msg_rx_cpt);
+       }
        if (rxpeer != NULL) {
                msg->msg_rxpeer = NULL;
                lnet_peer_decref_locked(rxpeer);
        if (rxpeer != NULL) {
                msg->msg_rxpeer = NULL;
                lnet_peer_decref_locked(rxpeer);
@@ -1174,7 +1192,8 @@ lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2)
 }
 
 static lnet_peer_t *
 }
 
 static lnet_peer_t *
-lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
+lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target,
+                      lnet_nid_t rtr_nid)
 {
        lnet_remotenet_t        *rnet;
        lnet_route_t            *route;
 {
        lnet_remotenet_t        *rnet;
        lnet_route_t            *route;
@@ -1187,7 +1206,7 @@ lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
        /* If @rtr_nid is not LNET_NID_ANY, return the gateway with
         * rtr_nid nid, otherwise find the best gateway I can use */
 
        /* If @rtr_nid is not LNET_NID_ANY, return the gateway with
         * rtr_nid nid, otherwise find the best gateway I can use */
 
-       rnet = lnet_find_net_locked(LNET_NIDNET(target));
+       rnet = lnet_find_rnet_locked(LNET_NIDNET(target));
        if (rnet == NULL)
                return NULL;
 
        if (rnet == NULL)
                return NULL;
 
@@ -1199,7 +1218,7 @@ lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
                if (!lnet_is_route_alive(route))
                        continue;
 
                if (!lnet_is_route_alive(route))
                        continue;
 
-               if (ni != NULL && lp->lp_ni != ni)
+               if (net != NULL && lp->lp_net != net)
                        continue;
 
                if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
                        continue;
 
                if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
@@ -1254,14 +1273,13 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
        msg->msg_sending = 1;
 
        LASSERT(!msg->msg_tx_committed);
        msg->msg_sending = 1;
 
        LASSERT(!msg->msg_tx_committed);
-       cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
+       local_ni = lnet_net2ni(LNET_NIDNET(dst_nid));
+       cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid,
+                             local_ni);
  again:
  again:
-       lnet_net_lock(cpt);
-
-       if (the_lnet.ln_shutdown) {
-               lnet_net_unlock(cpt);
+       if (the_lnet.ln_shutdown)
                return -ESHUTDOWN;
                return -ESHUTDOWN;
-       }
+       lnet_net_lock(cpt);
 
        if (src_nid == LNET_NID_ANY) {
                src_ni = NULL;
 
        if (src_nid == LNET_NID_ANY) {
                src_ni = NULL;
@@ -1284,11 +1302,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
                if (src_ni == NULL) {
                        src_ni = local_ni;
                        src_nid = src_ni->ni_nid;
                if (src_ni == NULL) {
                        src_ni = local_ni;
                        src_nid = src_ni->ni_nid;
-               } else if (src_ni == local_ni) {
-                       lnet_ni_decref_locked(local_ni, cpt);
-               } else {
-                       lnet_ni_decref_locked(local_ni, cpt);
-                       lnet_ni_decref_locked(src_ni, cpt);
+               } else if (src_ni != local_ni) {
                        lnet_net_unlock(cpt);
                        LCONSOLE_WARN("No route to %s via from %s\n",
                                      libcfs_nid2str(dst_nid),
                        lnet_net_unlock(cpt);
                        LCONSOLE_WARN("No route to %s via from %s\n",
                                      libcfs_nid2str(dst_nid),
@@ -1306,16 +1320,10 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
                        /* No send credit hassles with LOLND */
                        lnet_net_unlock(cpt);
                        lnet_ni_send(src_ni, msg);
                        /* No send credit hassles with LOLND */
                        lnet_net_unlock(cpt);
                        lnet_ni_send(src_ni, msg);
-
-                       lnet_net_lock(cpt);
-                       lnet_ni_decref_locked(src_ni, cpt);
-                       lnet_net_unlock(cpt);
                        return 0;
                }
 
                rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
                        return 0;
                }
 
                rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
-               /* lp has ref on src_ni; lose mine */
-               lnet_ni_decref_locked(src_ni, cpt);
                if (rc != 0) {
                        lnet_net_unlock(cpt);
                        LCONSOLE_WARN("Error %d finding peer %s\n", rc,
                if (rc != 0) {
                        lnet_net_unlock(cpt);
                        LCONSOLE_WARN("Error %d finding peer %s\n", rc,
@@ -1323,13 +1331,13 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
                        /* ENOMEM or shutting down */
                        return rc;
                }
                        /* ENOMEM or shutting down */
                        return rc;
                }
-               LASSERT(lp->lp_ni == src_ni);
+               LASSERT (lp->lp_net == src_ni->ni_net);
        } else {
                /* sending to a remote network */
        } else {
                /* sending to a remote network */
-               lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
+               lp = lnet_find_route_locked(src_ni != NULL ?
+                                           src_ni->ni_net : NULL,
+                                           dst_nid, rtr_nid);
                if (lp == NULL) {
                if (lp == NULL) {
-                       if (src_ni != NULL)
-                               lnet_ni_decref_locked(src_ni, cpt);
                        lnet_net_unlock(cpt);
 
                        LCONSOLE_WARN("No route to %s via %s "
                        lnet_net_unlock(cpt);
 
                        LCONSOLE_WARN("No route to %s via %s "
@@ -1344,10 +1352,8 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
                 * pre-determined router, this can happen if router table
                 * was changed when we release the lock */
                if (rtr_nid != lp->lp_nid) {
                 * pre-determined router, this can happen if router table
                 * was changed when we release the lock */
                if (rtr_nid != lp->lp_nid) {
-                       cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
+                       cpt2 = lp->lp_cpt;
                        if (cpt2 != cpt) {
                        if (cpt2 != cpt) {
-                               if (src_ni != NULL)
-                                       lnet_ni_decref_locked(src_ni, cpt);
                                lnet_net_unlock(cpt);
 
                                rtr_nid = lp->lp_nid;
                                lnet_net_unlock(cpt);
 
                                rtr_nid = lp->lp_nid;
@@ -1361,11 +1367,11 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
                       lnet_msgtyp2str(msg->msg_type), msg->msg_len);
 
                if (src_ni == NULL) {
                       lnet_msgtyp2str(msg->msg_type), msg->msg_len);
 
                if (src_ni == NULL) {
-                       src_ni = lp->lp_ni;
+                       src_ni = lnet_get_next_ni_locked(lp->lp_net, NULL);
+                       LASSERT(src_ni != NULL);
                        src_nid = src_ni->ni_nid;
                } else {
                        src_nid = src_ni->ni_nid;
                } else {
-                       LASSERT(src_ni == lp->lp_ni);
-                       lnet_ni_decref_locked(src_ni, cpt);
+                       LASSERT (src_ni->ni_net == lp->lp_net);
                }
 
                lnet_peer_addref_locked(lp);
                }
 
                lnet_peer_addref_locked(lp);
@@ -1389,7 +1395,10 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid)
        LASSERT(!msg->msg_txcredit);
        LASSERT(msg->msg_txpeer == NULL);
 
        LASSERT(!msg->msg_txcredit);
        LASSERT(msg->msg_txpeer == NULL);
 
-       msg->msg_txpeer = lp;                   /* msg takes my ref on lp */
+       msg->msg_txpeer = lp;                   /* msg takes my ref on lp */
+       /* set the NI for this message */
+       msg->msg_txni = src_ni;
+       lnet_ni_addref_locked(msg->msg_txni, cpt);
 
        rc = lnet_post_send_locked(msg, 0);
        lnet_net_unlock(cpt);
 
        rc = lnet_post_send_locked(msg, 0);
        lnet_net_unlock(cpt);
@@ -1453,8 +1462,9 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg)
        info.mi_rlength = hdr->payload_length;
        info.mi_roffset = hdr->msg.put.offset;
        info.mi_mbits   = hdr->msg.put.match_bits;
        info.mi_rlength = hdr->payload_length;
        info.mi_roffset = hdr->msg.put.offset;
        info.mi_mbits   = hdr->msg.put.match_bits;
+       info.mi_cpt     = msg->msg_rxpeer->lp_cpt;
 
 
-       msg->msg_rx_ready_delay = ni->ni_lnd->lnd_eager_recv == NULL;
+       msg->msg_rx_ready_delay = ni->ni_net->net_lnd->lnd_eager_recv == NULL;
        ready_delay = msg->msg_rx_ready_delay;
 
  again:
        ready_delay = msg->msg_rx_ready_delay;
 
  again:
@@ -1687,7 +1697,7 @@ lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg)
 
        if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
            lnet_msg2bufpool(msg)->rbp_credits <= 0) {
 
        if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
            lnet_msg2bufpool(msg)->rbp_credits <= 0) {
-               if (ni->ni_lnd->lnd_eager_recv == NULL) {
+               if (ni->ni_net->net_lnd->lnd_eager_recv == NULL) {
                        msg->msg_rx_ready_delay = 1;
                } else {
                        lnet_net_unlock(msg->msg_rx_cpt);
                        msg->msg_rx_ready_delay = 1;
                } else {
                        lnet_net_unlock(msg->msg_rx_cpt);
@@ -1832,7 +1842,7 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
        payload_length = le32_to_cpu(hdr->payload_length);
 
        for_me = (ni->ni_nid == dest_nid);
        payload_length = le32_to_cpu(hdr->payload_length);
 
        for_me = (ni->ni_nid == dest_nid);
-       cpt = lnet_cpt_of_nid(from_nid);
+       cpt = lnet_cpt_of_nid(from_nid, ni);
 
        switch (type) {
        case LNET_MSG_ACK:
 
        switch (type) {
        case LNET_MSG_ACK:
@@ -1994,6 +2004,8 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid,
                        return 0;
                goto drop;
        }
                        return 0;
                goto drop;
        }
+       msg->msg_rxni = ni;
+       lnet_ni_addref_locked(ni, cpt);
 
        if (lnet_isrouter(msg->msg_rxpeer)) {
                lnet_peer_set_alive(msg->msg_rxpeer);
 
        if (lnet_isrouter(msg->msg_rxpeer)) {
                lnet_peer_set_alive(msg->msg_rxpeer);
@@ -2078,7 +2090,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
                 * called lnet_drop_message(), so I just hang onto msg as well
                 * until that's done */
 
                 * called lnet_drop_message(), so I just hang onto msg as well
                 * until that's done */
 
-               lnet_drop_message(msg->msg_rxpeer->lp_ni,
+               lnet_drop_message(msg->msg_rxni,
                                  msg->msg_rxpeer->lp_cpt,
                                  msg->msg_private, msg->msg_len);
                /*
                                  msg->msg_rxpeer->lp_cpt,
                                  msg->msg_private, msg->msg_len);
                /*
@@ -2086,7 +2098,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason)
                 * but we still should give error code so lnet_msg_decommit()
                 * can skip counters operations and other checks.
                 */
                 * but we still should give error code so lnet_msg_decommit()
                 * can skip counters operations and other checks.
                 */
-               lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
+               lnet_finalize(msg->msg_rxni, msg, -ENOENT);
        }
 }
 
        }
 }
 
@@ -2109,6 +2121,7 @@ lnet_recv_delayed_msg_list(struct list_head *head)
                LASSERT(msg->msg_rx_delayed);
                LASSERT(msg->msg_md != NULL);
                LASSERT(msg->msg_rxpeer != NULL);
                LASSERT(msg->msg_rx_delayed);
                LASSERT(msg->msg_md != NULL);
                LASSERT(msg->msg_rxpeer != NULL);
+               LASSERT(msg->msg_rxni != NULL);
                LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
 
                CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
                LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
 
                CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
@@ -2118,7 +2131,7 @@ lnet_recv_delayed_msg_list(struct list_head *head)
                        msg->msg_hdr.msg.put.offset,
                        msg->msg_hdr.payload_length);
 
                        msg->msg_hdr.msg.put.offset,
                        msg->msg_hdr.payload_length);
 
-               lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
+               lnet_recv_put(msg->msg_rxni, msg);
        }
 }
 
        }
 }
 
@@ -2303,7 +2316,7 @@ lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg)
        lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
        lnet_res_unlock(cpt);
 
        lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
        lnet_res_unlock(cpt);
 
-       cpt = lnet_cpt_of_nid(peer_id.nid);
+       cpt = lnet_cpt_of_nid(peer_id.nid, ni);
 
        lnet_net_lock(cpt);
        lnet_msg_commit(msg, cpt);
 
        lnet_net_lock(cpt);
        lnet_msg_commit(msg, cpt);
@@ -2314,7 +2327,7 @@ lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg)
        return msg;
 
  drop:
        return msg;
 
  drop:
-       cpt = lnet_cpt_of_nid(peer_id.nid);
+       cpt = lnet_cpt_of_nid(peer_id.nid, ni);
 
        lnet_net_lock(cpt);
        the_lnet.ln_counters[cpt]->drop_count++;
 
        lnet_net_lock(cpt);
        the_lnet.ln_counters[cpt]->drop_count++;
@@ -2461,7 +2474,7 @@ int
 LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
 {
        struct list_head        *e;
 LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
 {
        struct list_head        *e;
-       struct lnet_ni          *ni;
+       struct lnet_ni          *ni = NULL;
        lnet_remotenet_t        *rnet;
        __u32                   dstnet = LNET_NIDNET(dstnid);
        int                     hops;
        lnet_remotenet_t        *rnet;
        __u32                   dstnet = LNET_NIDNET(dstnid);
        int                     hops;
@@ -2478,9 +2491,7 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
 
        cpt = lnet_net_lock_current();
 
 
        cpt = lnet_net_lock_current();
 
-       list_for_each(e, &the_lnet.ln_nis) {
-               ni = list_entry(e, lnet_ni_t, ni_list);
-
+       while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
                if (ni->ni_nid == dstnid) {
                        if (srcnidp != NULL)
                                *srcnidp = dstnid;
                if (ni->ni_nid == dstnid) {
                        if (srcnidp != NULL)
                                *srcnidp = dstnid;
@@ -2540,8 +2551,12 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
 
                        LASSERT(shortest != NULL);
                        hops = shortest_hops;
 
                        LASSERT(shortest != NULL);
                        hops = shortest_hops;
-                       if (srcnidp != NULL)
-                               *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
+                       if (srcnidp != NULL) {
+                               ni = lnet_get_next_ni_locked(
+                                       shortest->lr_gateway->lp_net,
+                                       NULL);
+                               *srcnidp = ni->ni_nid;
+                       }
                        if (orderp != NULL)
                                *orderp = order;
                        lnet_net_unlock(cpt);
                        if (orderp != NULL)
                                *orderp = order;
                        lnet_net_unlock(cpt);
index acba755..cddd7de 100644 (file)
@@ -222,7 +222,7 @@ lnet_match2mt(struct lnet_portal *ptl, lnet_process_id_t id, __u64 mbits)
 
        /* if it's a unique portal, return match-table hashed by NID */
        return lnet_ptl_is_unique(ptl) ?
 
        /* if it's a unique portal, return match-table hashed by NID */
        return lnet_ptl_is_unique(ptl) ?
-              ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
+              ptl->ptl_mtables[lnet_cpt_of_nid(id.nid, NULL)] : NULL;
 }
 
 struct lnet_match_table *
 }
 
 struct lnet_match_table *
@@ -292,7 +292,7 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg)
 
        rotor = ptl->ptl_rotor++; /* get round-robin factor */
        if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
 
        rotor = ptl->ptl_rotor++; /* get round-robin factor */
        if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
-               cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
+               cpt = info->mi_cpt;
        else
                cpt = rotor % LNET_CPT_NUMBER;
 
        else
                cpt = rotor % LNET_CPT_NUMBER;
 
@@ -941,7 +941,7 @@ lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason)
                /* grab all messages which are on the NI passed in */
                list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
                                         msg_list) {
                /* grab all messages which are on the NI passed in */
                list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
                                         msg_list) {
-                       if (msg->msg_rxpeer->lp_ni == ni)
+                       if (msg->msg_txni == ni || msg->msg_rxni == ni)
                                list_move(&msg->msg_list, &zombies);
                }
        } else {
                                list_move(&msg->msg_list, &zombies);
                }
        } else {
index 673f9b3..cda649b 100644 (file)
@@ -94,7 +94,7 @@ lolnd_shutdown(lnet_ni_t *ni)
 static int
 lolnd_startup (lnet_ni_t *ni)
 {
 static int
 lolnd_startup (lnet_ni_t *ni)
 {
-       LASSERT (ni->ni_lnd == &the_lolnd);
+       LASSERT (ni->ni_net->net_lnd == &the_lolnd);
        LASSERT (!lolnd_instanced);
        lolnd_instanced = 1;
 
        LASSERT (!lolnd_instanced);
        lolnd_instanced = 1;
 
index 083b169..91c9c6b 100644 (file)
@@ -617,8 +617,9 @@ delayed_msg_process(struct list_head *msg_list, bool drop)
 
                msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
                LASSERT(msg->msg_rxpeer != NULL);
 
                msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
                LASSERT(msg->msg_rxpeer != NULL);
+               LASSERT(msg->msg_rxni != NULL);
 
 
-               ni = msg->msg_rxpeer->lp_ni;
+               ni = msg->msg_rxni;
                cpt = msg->msg_rx_cpt;
 
                list_del_init(&msg->msg_list);
                cpt = msg->msg_rx_cpt;
 
                list_del_init(&msg->msg_list);
index 523d5b3..236f63b 100644 (file)
@@ -110,7 +110,7 @@ lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable)
        for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
                list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
                                         lp_hashlist) {
        for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
                list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
                                         lp_hashlist) {
-                       if (ni != NULL && ni != lp->lp_ni)
+                       if (ni != NULL && ni->ni_net != lp->lp_net)
                                continue;
                        list_del_init(&lp->lp_hashlist);
                        /* Lose hash table's ref */
                                continue;
                        list_del_init(&lp->lp_hashlist);
                        /* Lose hash table's ref */
@@ -152,7 +152,7 @@ lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable,
        for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
                list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
                                         lp_hashlist) {
        for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
                list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
                                         lp_hashlist) {
-                       if (ni != lp->lp_ni)
+                       if (ni->ni_net != lp->lp_net)
                                continue;
 
                        if (lp->lp_rtr_refcount == 0)
                                continue;
 
                        if (lp->lp_rtr_refcount == 0)
@@ -224,8 +224,7 @@ lnet_destroy_peer_locked(lnet_peer_t *lp)
        LASSERT(ptable->pt_number > 0);
        ptable->pt_number--;
 
        LASSERT(ptable->pt_number > 0);
        ptable->pt_number--;
 
-       lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
-       lp->lp_ni = NULL;
+       lp->lp_net = NULL;
 
        list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
        LASSERT(ptable->pt_zombies > 0);
 
        list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
        LASSERT(ptable->pt_zombies > 0);
@@ -265,7 +264,7 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
                return -ESHUTDOWN;
 
        /* cpt can be LNET_LOCK_EX if it's called from router functions */
                return -ESHUTDOWN;
 
        /* cpt can be LNET_LOCK_EX if it's called from router functions */
-       cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
+       cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid, NULL);
 
        ptable = the_lnet.ln_peer_tables[cpt2];
        lp = lnet_find_peer_locked(ptable, nid);
 
        ptable = the_lnet.ln_peer_tables[cpt2];
        lp = lnet_find_peer_locked(ptable, nid);
@@ -330,16 +329,11 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt)
                goto out;
        }
 
                goto out;
        }
 
-       lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
-       if (lp->lp_ni == NULL) {
-               rc = -EHOSTUNREACH;
-               goto out;
-       }
-
+       lp->lp_net = lnet_get_net_locked(LNET_NIDNET(lp->lp_nid));
        lp->lp_txcredits    =
        lp->lp_txcredits    =
-       lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
+       lp->lp_mintxcredits = lp->lp_net->net_tunables.lct_peer_tx_credits;
        lp->lp_rtrcredits    =
        lp->lp_rtrcredits    =
-       lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
+       lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_net);
 
        list_add_tail(&lp->lp_hashlist,
                      &ptable->pt_hash[lnet_nid2peerhash(nid)]);
 
        list_add_tail(&lp->lp_hashlist,
                      &ptable->pt_hash[lnet_nid2peerhash(nid)]);
@@ -362,7 +356,7 @@ lnet_debug_peer(lnet_nid_t nid)
        int             rc;
        int             cpt;
 
        int             rc;
        int             cpt;
 
-       cpt = lnet_cpt_of_nid(nid);
+       cpt = lnet_cpt_of_nid(nid, NULL);
        lnet_net_lock(cpt);
 
        rc = lnet_nid2peer_locked(&lp, nid, cpt);
        lnet_net_lock(cpt);
 
        rc = lnet_nid2peer_locked(&lp, nid, cpt);
@@ -377,7 +371,7 @@ lnet_debug_peer(lnet_nid_t nid)
 
        CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
               libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
 
        CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
               libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
-              aliveness, lp->lp_ni->ni_peertxcredits,
+              aliveness, lp->lp_net->net_tunables.lct_peer_tx_credits,
               lp->lp_rtrcredits, lp->lp_minrtrcredits,
               lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
 
               lp->lp_rtrcredits, lp->lp_minrtrcredits,
               lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
 
@@ -431,7 +425,8 @@ int lnet_get_peer_info(__u32 peer_index, __u64 *nid,
 
                        *nid = lp->lp_nid;
                        *refcount = lp->lp_refcount;
 
                        *nid = lp->lp_nid;
                        *refcount = lp->lp_refcount;
-                       *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits;
+                       *ni_peer_tx_credits =
+                               lp->lp_net->net_tunables.lct_peer_tx_credits;
                        *peer_tx_credits = lp->lp_txcredits;
                        *peer_rtr_credits = lp->lp_rtrcredits;
                        *peer_min_rtr_credits = lp->lp_mintxcredits;
                        *peer_tx_credits = lp->lp_txcredits;
                        *peer_rtr_credits = lp->lp_rtrcredits;
                        *peer_min_rtr_credits = lp->lp_mintxcredits;
index 3ae2ba3..b4c5842 100644 (file)
@@ -55,17 +55,17 @@ module_param(auto_down, int, 0444);
 MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
 
 int
 MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
 
 int
-lnet_peer_buffer_credits(lnet_ni_t *ni)
+lnet_peer_buffer_credits(struct lnet_net *net)
 {
        /* NI option overrides LNet default */
 {
        /* NI option overrides LNet default */
-       if (ni->ni_peerrtrcredits > 0)
-               return ni->ni_peerrtrcredits;
+       if (net->net_tunables.lct_peer_rtr_credits > 0)
+               return net->net_tunables.lct_peer_rtr_credits;
        if (peer_buffer_credits > 0)
                return peer_buffer_credits;
 
        /* As an approximation, allow this peer the same number of router
         * buffers as it is allowed outstanding sends */
        if (peer_buffer_credits > 0)
                return peer_buffer_credits;
 
        /* As an approximation, allow this peer the same number of router
         * buffers as it is allowed outstanding sends */
-       return ni->ni_peertxcredits;
+       return net->net_tunables.lct_peer_tx_credits;
 }
 
 /* forward ref's */
 }
 
 /* forward ref's */
@@ -148,13 +148,14 @@ lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp)
                lp->lp_notifylnd = 0;
                lp->lp_notify    = 0;
 
                lp->lp_notifylnd = 0;
                lp->lp_notify    = 0;
 
-               if (notifylnd && ni->ni_lnd->lnd_notify != NULL) {
+               if (notifylnd && ni->ni_net->net_lnd->lnd_notify != NULL) {
                        lnet_net_unlock(lp->lp_cpt);
 
                        /* A new notification could happen now; I'll handle it
                         * when control returns to me */
 
                        lnet_net_unlock(lp->lp_cpt);
 
                        /* A new notification could happen now; I'll handle it
                         * when control returns to me */
 
-                       (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
+                       (ni->ni_net->net_lnd->lnd_notify)(ni, lp->lp_nid,
+                                                         alive);
 
                        lnet_net_lock(lp->lp_cpt);
                }
 
                        lnet_net_lock(lp->lp_cpt);
                }
@@ -216,7 +217,7 @@ lnet_rtr_decref_locked(lnet_peer_t *lp)
 }
 
 lnet_remotenet_t *
 }
 
 lnet_remotenet_t *
-lnet_find_net_locked (__u32 net)
+lnet_find_rnet_locked(__u32 net)
 {
        lnet_remotenet_t *rnet;
        struct list_head *tmp;
 {
        lnet_remotenet_t *rnet;
        struct list_head *tmp;
@@ -240,8 +241,7 @@ static void lnet_shuffle_seed(void)
        __u32 lnd_type;
        __u32 seed[2];
        struct timespec64 ts;
        __u32 lnd_type;
        __u32 seed[2];
        struct timespec64 ts;
-       lnet_ni_t *ni;
-       struct list_head *tmp;
+       lnet_ni_t *ni = NULL;
 
        if (seeded)
                return;
 
        if (seeded)
                return;
@@ -250,8 +250,7 @@ static void lnet_shuffle_seed(void)
 
        /* Nodes with small feet have little entropy
         * the NID for this node gives the most entropy in the low bits */
 
        /* Nodes with small feet have little entropy
         * the NID for this node gives the most entropy in the low bits */
-       list_for_each(tmp, &the_lnet.ln_nis) {
-               ni = list_entry(tmp, lnet_ni_t, ni_list);
+       while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
                lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
 
                if (lnd_type != LOLND)
                lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
 
                if (lnd_type != LOLND)
@@ -356,7 +355,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
 
        LASSERT(!the_lnet.ln_shutdown);
 
 
        LASSERT(!the_lnet.ln_shutdown);
 
-       rnet2 = lnet_find_net_locked(net);
+       rnet2 = lnet_find_rnet_locked(net);
        if (rnet2 == NULL) {
                /* new network */
                list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
        if (rnet2 == NULL) {
                /* new network */
                list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
@@ -381,12 +380,12 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway,
                lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
                lnet_add_route_to_rnet(rnet2, route);
 
                lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
                lnet_add_route_to_rnet(rnet2, route);
 
-               ni = route->lr_gateway->lp_ni;
+               ni = lnet_get_next_ni_locked(route->lr_gateway->lp_net, NULL);
                lnet_net_unlock(LNET_LOCK_EX);
 
                /* XXX Assume alive */
                lnet_net_unlock(LNET_LOCK_EX);
 
                /* XXX Assume alive */
-               if (ni->ni_lnd->lnd_notify != NULL)
-                       (ni->ni_lnd->lnd_notify)(ni, gateway, 1);
+               if (ni->ni_net->net_lnd->lnd_notify != NULL)
+                       (ni->ni_net->net_lnd->lnd_notify)(ni, gateway, 1);
 
                lnet_net_lock(LNET_LOCK_EX);
        }
 
                lnet_net_lock(LNET_LOCK_EX);
        }
@@ -444,8 +443,8 @@ lnet_check_routes(void)
                                        continue;
                                }
 
                                        continue;
                                }
 
-                               if (route->lr_gateway->lp_ni ==
-                                   route2->lr_gateway->lp_ni)
+                               if (route->lr_gateway->lp_net ==
+                                   route2->lr_gateway->lp_net)
                                        continue;
 
                                nid1 = route->lr_gateway->lp_nid;
                                        continue;
 
                                nid1 = route->lr_gateway->lp_nid;
@@ -833,8 +832,8 @@ lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net)
 static void
 lnet_update_ni_status_locked(void)
 {
 static void
 lnet_update_ni_status_locked(void)
 {
-       lnet_ni_t       *ni;
-       time64_t now;
+       lnet_ni_t       *ni = NULL;
+       time64_t        now;
        int             timeout;
 
        LASSERT(the_lnet.ln_routing);
        int             timeout;
 
        LASSERT(the_lnet.ln_routing);
@@ -843,8 +842,8 @@ lnet_update_ni_status_locked(void)
                  MAX(live_router_check_interval, dead_router_check_interval);
 
        now = ktime_get_real_seconds();
                  MAX(live_router_check_interval, dead_router_check_interval);
 
        now = ktime_get_real_seconds();
-       list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
-               if (ni->ni_lnd->lnd_type == LOLND)
+       while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
+               if (ni->ni_net->net_lnd->lnd_type == LOLND)
                        continue;
 
                if (now < ni->ni_last_alive + timeout)
                        continue;
 
                if (now < ni->ni_last_alive + timeout)
@@ -977,8 +976,9 @@ static void
 lnet_ping_router_locked (lnet_peer_t *rtr)
 {
        lnet_rc_data_t *rcd = NULL;
 lnet_ping_router_locked (lnet_peer_t *rtr)
 {
        lnet_rc_data_t *rcd = NULL;
-       cfs_time_t      now = cfs_time_current();
-       int             secs;
+       cfs_time_t      now = cfs_time_current();
+       int             secs;
+       struct lnet_ni  *ni;
 
        lnet_peer_addref_locked(rtr);
 
 
        lnet_peer_addref_locked(rtr);
 
@@ -987,7 +987,8 @@ lnet_ping_router_locked (lnet_peer_t *rtr)
                lnet_notify_locked(rtr, 1, 0, now);
 
        /* Run any outstanding notifications */
                lnet_notify_locked(rtr, 1, 0, now);
 
        /* Run any outstanding notifications */
-       lnet_ni_notify_locked(rtr->lp_ni, rtr);
+       ni = lnet_get_next_ni_locked(rtr->lp_net, NULL);
+       lnet_ni_notify_locked(ni, rtr);
 
        if (!lnet_isrouter(rtr) ||
            the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
 
        if (!lnet_isrouter(rtr) ||
            the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
@@ -1242,7 +1243,7 @@ rescan:
                list_for_each(entry, &the_lnet.ln_routers) {
                        rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
 
                list_for_each(entry, &the_lnet.ln_routers) {
                        rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
 
-                       cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
+                       cpt2 = rtr->lp_cpt;
                        if (cpt != cpt2) {
                                lnet_net_unlock(cpt);
                                cpt = cpt2;
                        if (cpt != cpt2) {
                                lnet_net_unlock(cpt);
                                cpt = cpt2;
@@ -1718,7 +1719,7 @@ lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when)
 {
        struct lnet_peer        *lp = NULL;
        cfs_time_t              now = cfs_time_current();
 {
        struct lnet_peer        *lp = NULL;
        cfs_time_t              now = cfs_time_current();
-       int                     cpt = lnet_cpt_of_nid(nid);
+       int                     cpt = lnet_cpt_of_nid(nid, ni);
 
        LASSERT (!in_interrupt ());
 
 
        LASSERT (!in_interrupt ());
 
index efec11b..0d2d670 100644 (file)
@@ -490,19 +490,19 @@ proc_lnet_peers(struct ctl_table *table, int write, void __user *buffer,
                        p = NULL;
                        hoff = 1;
                        hash++;
                        p = NULL;
                        hoff = 1;
                        hash++;
-               }
+                }
 
                if (peer != NULL) {
 
                if (peer != NULL) {
-                       lnet_nid_t nid       = peer->lp_nid;
-                       int        nrefs     = peer->lp_refcount;
-                       int        lastalive = -1;
-                       char      *aliveness = "NA";
-                       int        maxcr     = peer->lp_ni->ni_peertxcredits;
-                       int        txcr      = peer->lp_txcredits;
-                       int        mintxcr   = peer->lp_mintxcredits;
-                       int        rtrcr     = peer->lp_rtrcredits;
-                       int        minrtrcr  = peer->lp_minrtrcredits;
-                       int        txqnob    = peer->lp_txqnob;
+                       lnet_nid_t nid       = peer->lp_nid;
+                       int nrefs     = peer->lp_refcount;
+                       int lastalive = -1;
+                       char *aliveness = "NA";
+                       int maxcr = peer->lp_net->net_tunables.lct_peer_tx_credits;
+                       int txcr = peer->lp_txcredits;
+                       int mintxcr = peer->lp_mintxcredits;
+                       int rtrcr = peer->lp_rtrcredits;
+                       int minrtrcr = peer->lp_minrtrcredits;
+                       int txqnob = peer->lp_txqnob;
 
                        if (lnet_isrouter(peer) ||
                            lnet_peer_aliveness_enabled(peer))
 
                        if (lnet_isrouter(peer) ||
                            lnet_peer_aliveness_enabled(peer))
@@ -656,27 +656,14 @@ proc_lnet_nis(struct ctl_table *table, int write, void __user *buffer,
                              "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
                              "nid", "status", "alive", "refs", "peer",
                              "rtr", "max", "tx", "min");
                              "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
                              "nid", "status", "alive", "refs", "peer",
                              "rtr", "max", "tx", "min");
-               LASSERT(tmpstr + tmpsiz - s > 0);
+               LASSERT (tmpstr + tmpsiz - s > 0);
        } else {
        } else {
-               struct list_head  *n;
-               lnet_ni_t         *ni   = NULL;
-               int                skip = *ppos - 1;
+               lnet_ni_t         *ni   = NULL;
+               int                skip = *ppos - 1;
 
                lnet_net_lock(0);
 
 
                lnet_net_lock(0);
 
-               n = the_lnet.ln_nis.next;
-
-               while (n != &the_lnet.ln_nis) {
-                       lnet_ni_t *a_ni = list_entry(n, lnet_ni_t, ni_list);
-
-                       if (skip == 0) {
-                               ni = a_ni;
-                               break;
-                       }
-
-                       skip--;
-                       n = n->next;
-               }
+               ni = lnet_get_ni_idx_locked(skip);
 
                if (ni != NULL) {
                        struct lnet_tx_queue    *tq;
 
                if (ni != NULL) {
                        struct lnet_tx_queue    *tq;
@@ -690,7 +677,7 @@ proc_lnet_nis(struct ctl_table *table, int write, void __user *buffer,
                                last_alive = now - ni->ni_last_alive;
 
                        /* @lo forever alive */
                                last_alive = now - ni->ni_last_alive;
 
                        /* @lo forever alive */
-                       if (ni->ni_lnd->lnd_type == LOLND)
+                       if (ni->ni_net->net_lnd->lnd_type == LOLND)
                                last_alive = 0;
 
                        lnet_ni_lock(ni);
                                last_alive = 0;
 
                        lnet_ni_lock(ni);
@@ -718,8 +705,8 @@ proc_lnet_nis(struct ctl_table *table, int write, void __user *buffer,
                                      "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
                                      libcfs_nid2str(ni->ni_nid), stat,
                                      last_alive, *ni->ni_refs[i],
                                      "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
                                      libcfs_nid2str(ni->ni_nid), stat,
                                      last_alive, *ni->ni_refs[i],
-                                     ni->ni_peertxcredits,
-                                     ni->ni_peerrtrcredits,
+                                     ni->ni_net->net_tunables.lct_peer_tx_credits,
+                                     ni->ni_net->net_tunables.lct_peer_rtr_credits,
                                      tq->tq_credits_max,
                                      tq->tq_credits, tq->tq_credits_min);
                                if (i != 0)
                                      tq->tq_credits_max,
                                      tq->tq_credits, tq->tq_credits_min);
                                if (i != 0)
index bfabc6c..51e3254 100644 (file)
@@ -120,7 +120,7 @@ brw_client_init (sfw_test_instance_t *tsi)
                return -EINVAL;
 
        list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
                return -EINVAL;
 
        list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
-               bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
+               bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL),
                                       off, npg, len, opc == LST_BRW_READ);
                if (bulk == NULL) {
                        brw_client_fini(tsi);
                                       off, npg, len, opc == LST_BRW_READ);
                if (bulk == NULL) {
                        brw_client_fini(tsi);
index ae43d09..50cf411 100644 (file)
@@ -1032,7 +1032,8 @@ sfw_run_batch (sfw_batch_t *tsb)
                        wi = &tsu->tsu_worker;
                        swi_init_workitem(wi, tsu, sfw_run_test,
                                          lst_sched_test[\
                        wi = &tsu->tsu_worker;
                        swi_init_workitem(wi, tsu, sfw_run_test,
                                          lst_sched_test[\
-                                         lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
+                                         lnet_cpt_of_nid(tsu->tsu_dest.nid,
+                                                         NULL)]);
                        swi_schedule_workitem(wi);
                }
        }
                        swi_schedule_workitem(wi);
                }
        }
index 970f130..9fecaad 100644 (file)
@@ -524,7 +524,7 @@ srpc_init_client_rpc(srpc_client_rpc_t *rpc, lnet_process_id_t peer,
 
        INIT_LIST_HEAD(&rpc->crpc_list);
        swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc,
 
        INIT_LIST_HEAD(&rpc->crpc_list);
        swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc,
-                         lst_sched_test[lnet_cpt_of_nid(peer.nid)]);
+                         lst_sched_test[lnet_cpt_of_nid(peer.nid, NULL)]);
        spin_lock_init(&rpc->crpc_lock);
        atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */
 
        spin_lock_init(&rpc->crpc_lock);
        atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */