This patch allows the configuration of multiple NIs under one Net.
It is now possible to have multiple NIDs on the same network:
Ex: <ip1>@tcp, <ip2>@tcp.
This can be configured using the following syntax:
Ex: tcp(eth0, eth1)
The data structures for the example above can be visualized
as follows
NET(tcp)
|
-----------------
| |
NI(eth0) NI(eth1)
For more details refer to the Mult-Rail Requirements and HLD
documents
Signed-off-by: Amir Shehata <amir.shehata@intel.com>
Change-Id: Id7c73b9b811a3082b61e53b9e9f95743188cbd51
Reviewed-on: http://review.whamcloud.com/18274
Tested-by: Jenkins
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Olaf Weber <olaf@sgi.com>
struct lnet_ioctl_config_lnd_cmn_tunables {
__u32 lct_version;
- __u32 lct_peer_timeout;
- __u32 lct_peer_tx_credits;
- __u32 lct_peer_rtr_credits;
- __u32 lct_max_tx_credits;
+ __s32 lct_peer_timeout;
+ __s32 lct_peer_tx_credits;
+ __s32 lct_peer_rtr_credits;
+ __s32 lct_max_tx_credits;
};
struct lnet_ioctl_config_o2iblnd_tunables {
__u32 pad;
};
+struct lnet_lnd_tunables {
+ union {
+ struct lnet_ioctl_config_o2iblnd_tunables lnd_o2ib;
+ } lnd_tun_u;
+};
+
struct lnet_ioctl_config_lnd_tunables {
struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn;
- union {
- struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib;
- } lt_tun_u;
+ struct lnet_lnd_tunables lt_tun;
};
struct lnet_ioctl_net_config {
kmem_cache_free(lnet_mes_cachep, me);
}
-static inline lnet_msg_t *
-lnet_msg_alloc(void)
-{
- lnet_msg_t *msg;
-
- LIBCFS_ALLOC(msg, sizeof(*msg));
-
- /* no need to zero, LIBCFS_ALLOC does for us */
- return (msg);
-}
-
-static inline void
-lnet_msg_free(lnet_msg_t *msg)
-{
- LASSERT(!msg->msg_onactivelist);
- LIBCFS_FREE(msg, sizeof(*msg));
-}
-
lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec,
__u64 cookie);
void lnet_res_lh_initialize(struct lnet_res_container *rec,
lnet_net_unlock(0);
}
-void lnet_ni_free(lnet_ni_t *ni);
-lnet_ni_t *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist);
+static inline lnet_msg_t *
+lnet_msg_alloc(void)
+{
+ lnet_msg_t *msg;
+
+ LIBCFS_ALLOC(msg, sizeof(*msg));
+
+ /* no need to zero, LIBCFS_ALLOC does for us */
+ return (msg);
+}
+
+static inline void
+lnet_msg_free(lnet_msg_t *msg)
+{
+ LASSERT(!msg->msg_onactivelist);
+
+ /* Make sure we have no references to an NI. */
+ if (msg->msg_txni)
+ lnet_ni_decref_locked(msg->msg_txni, msg->msg_tx_cpt);
+ if (msg->msg_rxni)
+ lnet_ni_decref_locked(msg->msg_rxni, msg->msg_rx_cpt);
+
+ LIBCFS_FREE(msg, sizeof(*msg));
+}
+
+void lnet_ni_free(struct lnet_ni *ni);
+void lnet_net_free(struct lnet_net *net);
+
+struct lnet_net *
+lnet_net_alloc(__u32 net_type, struct list_head *netlist);
+
+struct lnet_ni *
+lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el,
+ char *iface);
static inline int
lnet_nid2peerhash(lnet_nid_t nid)
extern lnd_t the_lolnd;
extern int avoid_asym_router_failure;
-extern int lnet_cpt_of_nid_locked(lnet_nid_t nid);
-extern int lnet_cpt_of_nid(lnet_nid_t nid);
+extern int lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni);
+extern int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni);
extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt);
+extern lnet_ni_t *lnet_nid2ni_addref(lnet_nid_t nid);
extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt);
extern lnet_ni_t *lnet_net2ni(__u32 net);
+bool lnet_is_ni_healthy_locked(struct lnet_ni *ni);
int lnet_lib_init(void);
void lnet_lib_exit(void);
int lnet_get_route(int idx, __u32 *net, __u32 *hops,
lnet_nid_t *gateway, __u32 *alive, __u32 *priority);
int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg);
+struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet,
+ struct lnet_ni *prev);
+struct lnet_ni *lnet_get_ni_idx_locked(int idx);
struct libcfs_ioctl_handler {
struct list_head item;
int lnet_rtrpools_enable(void);
void lnet_rtrpools_disable(void);
void lnet_rtrpools_free(int keep_pools);
-lnet_remotenet_t *lnet_find_net_locked (__u32 net);
+lnet_remotenet_t *lnet_find_rnet_locked(__u32 net);
int lnet_dyn_add_ni(lnet_pid_t requested_pid,
struct lnet_ioctl_config_data *conf);
int lnet_dyn_del_ni(__u32 net);
int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
+struct lnet_net *lnet_get_net_locked(__u32 net_id);
int lnet_islocalnid(lnet_nid_t nid);
int lnet_islocalnet(__u32 net);
int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid,
__u32 local_ip, __u32 peer_ip, int peer_port);
void lnet_connect_console_error(int rc, lnet_nid_t peer_nid,
- __u32 peer_ip, int port);
-int lnet_count_acceptor_nis(void);
+ __u32 peer_ip, int port);
+int lnet_count_acceptor_nets(void);
int lnet_acceptor_timeout(void);
int lnet_acceptor_port(void);
int lnet_acceptor_start(void);
__u32 peer_ip, int peer_port);
int lnet_peers_start_down(void);
-int lnet_peer_buffer_credits(lnet_ni_t *ni);
+int lnet_peer_buffer_credits(struct lnet_net *net);
int lnet_router_checker_start(void);
void lnet_router_checker_stop(void);
int lnet_parse_ip2nets(char **networksp, char *ip2nets);
int lnet_parse_routes(char *route_str, int *im_a_router);
-int lnet_parse_networks(struct list_head *nilist, char *networks);
-int lnet_net_unique(__u32 net, struct list_head *nilist);
+int lnet_parse_networks(struct list_head *nilist, char *networks,
+ bool use_tcp_bonding);
+bool lnet_net_unique(__u32 net_id, struct list_head *nilist,
+ struct lnet_net **net);
+bool lnet_ni_unique_net(struct list_head *nilist, char *iface);
int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt);
lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable,
/* ready for pending on RX delay list */
unsigned int msg_rx_ready_delay:1;
- unsigned int msg_vmflush:1; /* VM trying to free memory */
- unsigned int msg_target_is_router:1; /* sending to a router */
- unsigned int msg_routing:1; /* being forwarded */
- unsigned int msg_ack:1; /* ack on finalize (PUT) */
- unsigned int msg_sending:1; /* outgoing message */
- unsigned int msg_receiving:1; /* being received */
- unsigned int msg_txcredit:1; /* taken an NI send credit */
- unsigned int msg_peertxcredit:1; /* taken a peer send credit */
- unsigned int msg_rtrcredit:1; /* taken a globel router credit */
- unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
- unsigned int msg_onactivelist:1; /* on the activelist */
+ unsigned int msg_vmflush:1; /* VM trying to free memory */
+ unsigned int msg_target_is_router:1; /* sending to a router */
+ unsigned int msg_routing:1; /* being forwarded */
+ unsigned int msg_ack:1; /* ack on finalize (PUT) */
+ unsigned int msg_sending:1; /* outgoing message */
+ unsigned int msg_receiving:1; /* being received */
+ unsigned int msg_txcredit:1; /* taken an NI send credit */
+ unsigned int msg_peertxcredit:1; /* taken a peer send credit */
+ unsigned int msg_rtrcredit:1; /* taken a globel router credit */
+ unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */
+ unsigned int msg_onactivelist:1; /* on the activelist */
unsigned int msg_rdma_get:1;
- struct lnet_peer *msg_txpeer; /* peer I'm sending to */
- struct lnet_peer *msg_rxpeer; /* peer I received from */
+ struct lnet_peer *msg_txpeer; /* peer I'm sending to */
+ struct lnet_peer *msg_rxpeer; /* peer I received from */
- void *msg_private;
+ void *msg_private;
struct lnet_libmd *msg_md;
-
- unsigned int msg_len;
- unsigned int msg_wanted;
- unsigned int msg_offset;
- unsigned int msg_niov;
+ /* the NI the message was sent or received over */
+ struct lnet_ni *msg_txni;
+ struct lnet_ni *msg_rxni;
+
+ unsigned int msg_len;
+ unsigned int msg_wanted;
+ unsigned int msg_offset;
+ unsigned int msg_niov;
struct kvec *msg_iov;
- lnet_kiov_t *msg_kiov;
+ lnet_kiov_t *msg_kiov;
- lnet_event_t msg_ev;
- lnet_hdr_t msg_hdr;
+ lnet_event_t msg_ev;
+ lnet_hdr_t msg_hdr;
} lnet_msg_t;
struct list_head tq_delayed; /* delayed TXs */
};
+enum lnet_net_state {
+ /* set when net block is allocated */
+ LNET_NET_STATE_INIT = 0,
+ /* set when NIs in net are started successfully */
+ LNET_NET_STATE_ACTIVE,
+ /* set if all NIs in net are in FAILED state */
+ LNET_NET_STATE_INACTIVE,
+ /* set when shutting down a NET */
+ LNET_NET_STATE_DELETING
+};
+
+enum lnet_ni_state {
+ /* set when NI block is allocated */
+ LNET_NI_STATE_INIT = 0,
+ /* set when NI is started successfully */
+ LNET_NI_STATE_ACTIVE,
+ /* set when LND notifies NI failed */
+ LNET_NI_STATE_FAILED,
+ /* set when LND notifies NI degraded */
+ LNET_NI_STATE_DEGRADED,
+ /* set when shuttding down NI */
+ LNET_NI_STATE_DELETING
+};
+
+struct lnet_net {
+ /* chain on the ln_nets */
+ struct list_head net_list;
+
+ /* net ID, which is compoed of
+ * (net_type << 16) | net_num.
+ * net_type can be one of the enumarated types defined in
+ * lnet/include/lnet/nidstr.h */
+ __u32 net_id;
+
+ /* priority of the network */
+ __u32 net_prio;
+
+ /* total number of CPTs in the array */
+ __u32 net_ncpts;
+
+ /* cumulative CPTs of all NIs in this net */
+ __u32 *net_cpts;
+
+ /* network tunables */
+ struct lnet_ioctl_config_lnd_cmn_tunables net_tunables;
+
+ /*
+ * boolean to indicate that the tunables have been set and
+ * shouldn't be reset
+ */
+ bool net_tunables_set;
+
+ /* procedural interface */
+ lnd_t *net_lnd;
+
+ /* list of NIs on this net */
+ struct list_head net_ni_list;
+
+ /* list of NIs being added, but not started yet */
+ struct list_head net_ni_added;
+
+ /* dying LND instances */
+ struct list_head net_ni_zombie;
+
+ /* network state */
+ enum lnet_net_state net_state;
+};
+
typedef struct lnet_ni {
+ /* chain on the lnet_net structure */
+ struct list_head ni_netlist;
+
+ /* chain on net_ni_cpt */
+ struct list_head ni_cptlist;
+
spinlock_t ni_lock;
- struct list_head ni_list; /* chain on ln_nis */
- struct list_head ni_cptlist; /* chain on ln_nis_cpt */
- int ni_maxtxcredits; /* # tx credits */
- /* # per-peer send credits */
- int ni_peertxcredits;
- /* # per-peer router buffer credits */
- int ni_peerrtrcredits;
- /* seconds to consider peer dead */
- int ni_peertimeout;
- int ni_ncpts; /* number of CPTs */
- __u32 *ni_cpts; /* bond NI on some CPTs */
- lnet_nid_t ni_nid; /* interface's NID */
- void *ni_data; /* instance-specific data */
- lnd_t *ni_lnd; /* procedural interface */
- struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */
- int **ni_refs; /* percpt reference count */
- time64_t ni_last_alive; /* when I was last alive */
- struct lnet_ni_status *ni_status; /* my health status */
+
+ /* number of CPTs */
+ int ni_ncpts;
+
+ /* bond NI on some CPTs */
+ __u32 *ni_cpts;
+
+ /* interface's NID */
+ lnet_nid_t ni_nid;
+
+ /* instance-specific data */
+ void *ni_data;
+
+ /* percpt TX queues */
+ struct lnet_tx_queue **ni_tx_queues;
+
+ /* percpt reference count */
+ int **ni_refs;
+
+ /* when I was last alive */
+ long ni_last_alive;
+
+ /* pointer to parent network */
+ struct lnet_net *ni_net;
+
+ /* my health status */
+ lnet_ni_status_t *ni_status;
+
+ /* NI FSM */
+ enum lnet_ni_state ni_state;
+
/* per NI LND tunables */
- struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables;
- /* equivalent interfaces to use */
+ struct lnet_lnd_tunables ni_lnd_tunables;
+
+ /* lnd tunables set explicitly */
+ bool ni_lnd_tunables_set;
+
+ /*
+ * equivalent interfaces to use
+ * This is an array because socklnd bonding can still be configured
+ */
char *ni_interfaces[LNET_MAX_INTERFACES];
struct net *ni_net_ns; /* original net namespace */
} lnet_ni_t;
cfs_time_t lp_last_alive;
/* when lp_ni was queried last time */
cfs_time_t lp_last_query;
- /* interface peer is on */
- lnet_ni_t *lp_ni;
+ /* network peer is on */
+ struct lnet_net *lp_net;
lnet_nid_t lp_nid; /* peer's NID */
int lp_refcount; /* # refs */
int lp_cpt; /* CPT this peer attached on */
/* peer aliveness is enabled only on routers for peers in a network where the
* lnet_ni_t::ni_peertimeout has been set to a positive value */
#define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \
- (lp)->lp_ni->ni_peertimeout > 0)
+ (lp)->lp_net->net_tunables.lct_peer_timeout > 0)
typedef struct {
struct list_head lr_list; /* chain on net */
struct lnet_match_info {
__u64 mi_mbits;
lnet_process_id_t mi_id;
+ unsigned int mi_cpt;
unsigned int mi_opc;
unsigned int mi_portal;
unsigned int mi_rlength;
struct list_head ln_test_peers;
struct list_head ln_drop_rules;
struct list_head ln_delay_rules;
-
- struct list_head ln_nis; /* LND instances */
- /* NIs bond on specific CPT(s) */
- struct list_head ln_nis_cpt;
- /* dying LND instances */
- struct list_head ln_nis_zombie;
- lnet_ni_t *ln_loni; /* the loopback NI */
+ /* LND instances */
+ struct list_head ln_nets;
+ /* the loopback NI */
+ struct lnet_ni *ln_loni;
+ /* network zombie list */
+ struct list_head ln_net_zombie;
/* remote networks with routes to them */
struct list_head *ln_remote_nets_hash;
kgn_net_t *net;
ENTRY;
- LASSERTF(ni->ni_lnd == &the_kgnilnd,
+ LASSERTF(ni->ni_net->net_lnd == &the_kgnilnd,
"bad LND 0x%p != the_kgnilnd @ 0x%p\n",
- ni->ni_lnd, &the_kgnilnd);
+ ni->ni_net->net_lnd, &the_kgnilnd);
if (kgnilnd_data.kgn_init == GNILND_INIT_NOTHING) {
rc = kgnilnd_base_startup();
{
kib_peer_t *peer;
kib_net_t *net = ni->ni_data;
- int cpt = lnet_cpt_of_nid(nid);
+ int cpt = lnet_cpt_of_nid(nid, ni);
unsigned long flags;
LASSERT(net != NULL);
peer->ibp_error = 0;
peer->ibp_last_alive = 0;
peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni);
- peer->ibp_queue_depth = ni->ni_peertxcredits;
+ peer->ibp_queue_depth = ni->ni_net->net_tunables.lct_peer_tx_credits;
atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */
INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */
dev = net->ibn_dev;
- cpt = lnet_cpt_of_nid(peer->ibp_nid);
+ cpt = lnet_cpt_of_nid(peer->ibp_nid, peer->ibp_ni);
sched = kiblnd_data.kib_scheds[cpt];
LASSERT(sched->ibs_nthreads > 0);
int mod;
__u16 nfrags;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
mod = tunables->lnd_map_on_demand;
nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod;
int rc;
int i;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
if (tunables->lnd_map_on_demand == 0) {
int rc;
int newdev;
- LASSERT (ni->ni_lnd == &the_o2iblnd);
+ LASSERT (ni->ni_net->net_lnd == &the_o2iblnd);
if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) {
rc = kiblnd_base_startup();
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
int mod;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
mod = tunables->lnd_map_on_demand;
return mod != 0 ? mod : IBLND_MAX_RDMA_FRAGS;
}
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
int concurrent_sends;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
concurrent_sends = tunables->lnd_concurrent_sends;
if (version == IBLND_MSG_VERSION_1) {
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
if (conn->ibc_outstanding_credits <
IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
kib_tx_t *tx;
kib_tx_poolset_t *tps;
- tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)];
+ tps = net->ibn_tx_ps[lnet_cpt_of_nid(target, ni)];
node = kiblnd_pool_alloc_node(&tps->tps_poolset);
if (node == NULL)
return NULL;
__u32 ip = ntohl(peer_addr->sin_addr.s_addr);
CERROR("Peer's port (%pI4h:%hu) is not privileged\n",
&ip, ntohs(peer_addr->sin_port));
- goto failed;
- }
+ goto failed;
+ }
- if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
- CERROR("Short connection request\n");
- goto failed;
- }
+ if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
+ CERROR("Short connection request\n");
+ goto failed;
+ }
- /* Future protocol version compatibility support! If the
- * o2iblnd-specific protocol changes, or when LNET unifies
- * protocols over all LNDs, the initial connection will
- * negotiate a protocol version. I trap this here to avoid
- * console errors; the reject tells the peer which protocol I
- * speak. */
- if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
- reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
- goto failed;
- if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
- reqmsg->ibm_version != IBLND_MSG_VERSION &&
- reqmsg->ibm_version != IBLND_MSG_VERSION_1)
- goto failed;
- if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
- reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
- goto failed;
+ /* Future protocol version compatibility support! If the
+ * o2iblnd-specific protocol changes, or when LNET unifies
+ * protocols over all LNDs, the initial connection will
+ * negotiate a protocol version. I trap this here to avoid
+ * console errors; the reject tells the peer which protocol I
+ * speak. */
+ if (reqmsg->ibm_magic == LNET_PROTO_MAGIC ||
+ reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
+ goto failed;
+ if (reqmsg->ibm_magic == IBLND_MSG_MAGIC &&
+ reqmsg->ibm_version != IBLND_MSG_VERSION &&
+ reqmsg->ibm_version != IBLND_MSG_VERSION_1)
+ goto failed;
+ if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) &&
+ reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) &&
+ reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1))
+ goto failed;
- rc = kiblnd_unpack_msg(reqmsg, priv_nob);
- if (rc != 0) {
- CERROR("Can't parse connection request: %d\n", rc);
- goto failed;
- }
+ rc = kiblnd_unpack_msg(reqmsg, priv_nob);
+ if (rc != 0) {
+ CERROR("Can't parse connection request: %d\n", rc);
+ goto failed;
+ }
- nid = reqmsg->ibm_srcnid;
- ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid));
+ nid = reqmsg->ibm_srcnid;
+ ni = lnet_nid2ni_addref(reqmsg->ibm_dstnid);
- if (ni != NULL) {
- net = (kib_net_t *)ni->ni_data;
- rej.ibr_incarnation = net->ibn_incarnation;
- }
+ if (ni != NULL) {
+ net = (kib_net_t *)ni->ni_data;
+ rej.ibr_incarnation = net->ibn_incarnation;
+ }
- if (ni == NULL || /* no matching net */
- ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
- net->ibn_dev != ibdev) { /* wrong device */
+ if (ni == NULL || /* no matching net */
+ ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */
+ net->ibn_dev != ibdev) { /* wrong device */
CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): "
- "bad dst nid %s\n", libcfs_nid2str(nid),
- ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
- ibdev->ibd_ifname, ibdev->ibd_nnets,
+ "bad dst nid %s\n", libcfs_nid2str(nid),
+ ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid),
+ ibdev->ibd_ifname, ibdev->ibd_nnets,
&ibdev->ibd_ifip,
- libcfs_nid2str(reqmsg->ibm_dstnid));
+ libcfs_nid2str(reqmsg->ibm_dstnid));
- goto failed;
- }
+ goto failed;
+ }
/* check time stamp as soon as possible */
- if (reqmsg->ibm_dststamp != 0 &&
- reqmsg->ibm_dststamp != net->ibn_incarnation) {
- CWARN("Stale connection request\n");
- rej.ibr_why = IBLND_REJECT_CONN_STALE;
- goto failed;
- }
+ if (reqmsg->ibm_dststamp != 0 &&
+ reqmsg->ibm_dststamp != net->ibn_incarnation) {
+ CWARN("Stale connection request\n");
+ rej.ibr_why = IBLND_REJECT_CONN_STALE;
+ goto failed;
+ }
- /* I can accept peer's version */
- version = reqmsg->ibm_version;
+ /* I can accept peer's version */
+ version = reqmsg->ibm_version;
- if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
- CERROR("Unexpected connreq msg type: %x from %s\n",
- reqmsg->ibm_type, libcfs_nid2str(nid));
- goto failed;
- }
+ if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) {
+ CERROR("Unexpected connreq msg type: %x from %s\n",
+ reqmsg->ibm_type, libcfs_nid2str(nid));
+ goto failed;
+ }
if (reqmsg->ibm_u.connparams.ibcp_queue_depth >
kiblnd_msg_queue_size(version, ni)) {
break;
case IBLND_REJECT_RDMA_FRAGS: {
- struct lnet_ioctl_config_lnd_tunables *tunables;
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
if (!cp) {
reason = "can't negotiate max frags";
goto out;
}
- tunables = peer->ibp_ni->ni_lnd_tunables;
- if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) {
+ tunables = &peer->ibp_ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
+ if (!tunables->lnd_map_on_demand) {
reason = "map_on_demand must be enabled";
goto out;
}
if (version == IBLND_MSG_VERSION_1)
return IBLND_MSG_QUEUE_SIZE_V1;
else if (ni)
- return ni->ni_peertxcredits;
+ return ni->ni_net->net_tunables.lct_peer_tx_credits;
else
return peer_credits;
}
kiblnd_tunables_setup(lnet_ni_t *ni)
{
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables;
/*
* if there was no tunables specified, setup the tunables to be
* defaulted
*/
- if (!ni->ni_lnd_tunables) {
- LIBCFS_ALLOC(ni->ni_lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- if (!ni->ni_lnd_tunables)
- return -ENOMEM;
-
- memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib,
+ if (!ni->ni_lnd_tunables_set)
+ memcpy(&ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib,
&default_tunables, sizeof(*tunables));
- }
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
/* Current API version */
tunables->lnd_version = 0;
return -EINVAL;
}
- if (!ni->ni_peertimeout)
- ni->ni_peertimeout = peer_timeout;
+ net_tunables = &ni->ni_net->net_tunables;
- if (!ni->ni_maxtxcredits)
- ni->ni_maxtxcredits = credits;
+ if (net_tunables->lct_peer_timeout == -1)
+ net_tunables->lct_peer_timeout = peer_timeout;
- if (!ni->ni_peertxcredits)
- ni->ni_peertxcredits = peer_credits;
+ if (net_tunables->lct_max_tx_credits == -1)
+ net_tunables->lct_max_tx_credits = credits;
- if (!ni->ni_peerrtrcredits)
- ni->ni_peerrtrcredits = peer_buffer_credits;
+ if (net_tunables->lct_peer_tx_credits == -1)
+ net_tunables->lct_peer_tx_credits = peer_credits;
- if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT)
- ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT;
+ if (net_tunables->lct_peer_rtr_credits == -1)
+ net_tunables->lct_peer_rtr_credits = peer_buffer_credits;
- if (ni->ni_peertxcredits > IBLND_CREDITS_MAX)
- ni->ni_peertxcredits = IBLND_CREDITS_MAX;
+ if (net_tunables->lct_peer_tx_credits < IBLND_CREDITS_DEFAULT)
+ net_tunables->lct_peer_tx_credits = IBLND_CREDITS_DEFAULT;
- if (ni->ni_peertxcredits > credits)
- ni->ni_peertxcredits = credits;
+ if (net_tunables->lct_peer_tx_credits > IBLND_CREDITS_MAX)
+ net_tunables->lct_peer_tx_credits = IBLND_CREDITS_MAX;
+
+ if (net_tunables->lct_peer_tx_credits >
+ net_tunables->lct_max_tx_credits)
+ net_tunables->lct_peer_tx_credits =
+ net_tunables->lct_max_tx_credits;
if (!tunables->lnd_peercredits_hiw)
tunables->lnd_peercredits_hiw = peer_credits_hiw;
- if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2)
- tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2;
+ if (tunables->lnd_peercredits_hiw < net_tunables->lct_peer_tx_credits / 2)
+ tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits / 2;
- if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits)
- tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1;
+ if (tunables->lnd_peercredits_hiw >= net_tunables->lct_peer_tx_credits)
+ tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits - 1;
if (tunables->lnd_map_on_demand < 0 ||
tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) {
if (tunables->lnd_map_on_demand > 0 &&
tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) {
tunables->lnd_concurrent_sends =
- ni->ni_peertxcredits * 2;
+ net_tunables->lct_peer_tx_credits * 2;
} else {
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits;
+ tunables->lnd_concurrent_sends =
+ net_tunables->lct_peer_tx_credits;
}
}
- if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2)
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2;
+ if (tunables->lnd_concurrent_sends > net_tunables->lct_peer_tx_credits * 2)
+ tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits * 2;
- if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2)
- tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2;
+ if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits / 2)
+ tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits / 2;
- if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) {
+ if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits) {
CWARN("Concurrent sends %d is lower than message "
"queue size: %d, performance may drop slightly.\n",
- tunables->lnd_concurrent_sends, ni->ni_peertxcredits);
+ tunables->lnd_concurrent_sends,
+ net_tunables->lct_peer_tx_credits);
}
if (!tunables->lnd_fmr_pool_size)
static int
ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
{
- int cpt = lnet_cpt_of_nid(id.nid);
+ int cpt = lnet_cpt_of_nid(id.nid, ni);
ksock_net_t *net = ni->ni_data;
ksock_peer_t *peer;
LASSERT (conn->ksnc_proto != NULL);
LASSERT (peerid.nid != LNET_NID_ANY);
- cpt = lnet_cpt_of_nid(peerid.nid);
+ cpt = lnet_cpt_of_nid(peerid.nid, ni);
if (active) {
ksocknal_peer_addref(peer);
int rc;
int i;
- LASSERT (ni->ni_lnd == &the_ksocklnd);
+ LASSERT (ni->ni_net->net_lnd == &the_ksocklnd);
if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
rc = ksocknal_base_startup();
spin_lock_init(&net->ksnn_lock);
net->ksnn_incarnation = ksocknal_new_incarnation();
ni->ni_data = net;
- ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout;
- ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
- ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits;
- ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
+ if (!ni->ni_net->net_tunables_set) {
+ ni->ni_net->net_tunables.lct_peer_timeout =
+ *ksocknal_tunables.ksnd_peertimeout;
+ ni->ni_net->net_tunables.lct_max_tx_credits =
+ *ksocknal_tunables.ksnd_credits;
+ ni->ni_net->net_tunables.lct_peer_tx_credits =
+ *ksocknal_tunables.ksnd_peertxcredits;
+ ni->ni_net->net_tunables.lct_peer_rtr_credits =
+ *ksocknal_tunables.ksnd_peerrtrcredits;
+ ni->ni_net->net_tunables_set = true;
+ }
if (ni->ni_interfaces[0] == NULL) {
rc = ksocknal_enumerate_interfaces(net);
if (flip)
__swab64s(&cr.acr_nid);
- ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid));
- if (ni == NULL || /* no matching net */
+ ni = lnet_nid2ni_addref(cr.acr_nid);
+ if (ni == NULL || /* no matching net */
ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */
if (ni != NULL)
lnet_ni_decref(ni);
return -EPERM;
}
- if (ni->ni_lnd->lnd_accept == NULL) {
+ if (ni->ni_net->net_lnd->lnd_accept == NULL) {
/* This catches a request for the loopback LND */
lnet_ni_decref(ni);
LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h "
CDEBUG(D_NET, "Accept %s from %pI4h\n",
libcfs_nid2str(cr.acr_nid), &peer_ip);
- rc = ni->ni_lnd->lnd_accept(ni, sock);
+ rc = ni->ni_net->net_lnd->lnd_accept(ni, sock);
lnet_ni_decref(ni);
return rc;
if (rc <= 0)
return rc;
- if (lnet_count_acceptor_nis() == 0) /* not required */
+ if (lnet_count_acceptor_nets() == 0) /* not required */
return 0;
task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure,
module_param(rnet_htable_size, int, 0444);
MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table");
+static int use_tcp_bonding = false;
+module_param(use_tcp_bonding, int, 0444);
+MODULE_PARM_DESC(use_tcp_bonding,
+ "Set to 1 to use socklnd bonding. 0 to use Multi-Rail");
+
static int lnet_ping(lnet_process_id_t id, signed long timeout,
lnet_process_id_t __user *ids, int n_ids);
the_lnet.ln_pid = requested_pid;
INIT_LIST_HEAD(&the_lnet.ln_test_peers);
- INIT_LIST_HEAD(&the_lnet.ln_nis);
- INIT_LIST_HEAD(&the_lnet.ln_nis_cpt);
- INIT_LIST_HEAD(&the_lnet.ln_nis_zombie);
+ INIT_LIST_HEAD(&the_lnet.ln_nets);
INIT_LIST_HEAD(&the_lnet.ln_routers);
INIT_LIST_HEAD(&the_lnet.ln_drop_rules);
INIT_LIST_HEAD(&the_lnet.ln_delay_rules);
LASSERT(the_lnet.ln_refcount == 0);
LASSERT(list_empty(&the_lnet.ln_test_peers));
- LASSERT(list_empty(&the_lnet.ln_nis));
- LASSERT(list_empty(&the_lnet.ln_nis_cpt));
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
+ LASSERT(list_empty(&the_lnet.ln_nets));
lnet_portals_destroy();
}
lnet_ni_t *
-lnet_net2ni_locked(__u32 net, int cpt)
+lnet_net2ni_locked(__u32 net_id, int cpt)
{
- struct list_head *tmp;
- lnet_ni_t *ni;
+ struct lnet_ni *ni;
+ struct lnet_net *net;
LASSERT(cpt != LNET_LOCK_EX);
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (LNET_NIDNET(ni->ni_nid) == net) {
- lnet_ni_addref_locked(ni, cpt);
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ if (net->net_id == net_id) {
+ ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+ ni_netlist);
return ni;
}
}
}
EXPORT_SYMBOL(lnet_net2ni);
+struct lnet_net *
+lnet_get_net_locked(__u32 net_id)
+{
+ struct lnet_net *net;
+
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ if (net->net_id == net_id)
+ return net;
+ }
+
+ return NULL;
+}
+
static unsigned int
lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
{
}
int
-lnet_cpt_of_nid_locked(lnet_nid_t nid)
+lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni)
{
- struct lnet_ni *ni;
+ struct lnet_net *net;
/* must called with hold of lnet_net_lock */
if (LNET_CPT_NUMBER == 1)
return 0; /* the only one */
- /* take lnet_net_lock(any) would be OK */
- if (!list_empty(&the_lnet.ln_nis_cpt)) {
- list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) {
- if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid))
- continue;
+ /*
+ * If NI is provided then use the CPT identified in the NI cpt
+ * list if one exists. If one doesn't exist, then that NI is
+ * associated with all CPTs and it follows that the net it belongs
+ * to is implicitly associated with all CPTs, so just hash the nid
+ * and return that.
+ */
+ if (ni != NULL) {
+ if (ni->ni_cpts != NULL)
+ return ni->ni_cpts[lnet_nid_cpt_hash(nid,
+ ni->ni_ncpts)];
+ else
+ return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
+ }
- LASSERT(ni->ni_cpts != NULL);
- return ni->ni_cpts[lnet_nid_cpt_hash
- (nid, ni->ni_ncpts)];
- }
+ /* no NI provided so look at the net */
+ net = lnet_get_net_locked(LNET_NIDNET(nid));
+
+ if (net != NULL && net->net_cpts != NULL) {
+ return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)];
}
return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
}
int
-lnet_cpt_of_nid(lnet_nid_t nid)
+lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni)
{
int cpt;
int cpt2;
if (LNET_CPT_NUMBER == 1)
return 0; /* the only one */
- if (list_empty(&the_lnet.ln_nis_cpt))
- return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER);
-
cpt = lnet_net_lock_current();
- cpt2 = lnet_cpt_of_nid_locked(nid);
+
+ cpt2 = lnet_cpt_of_nid_locked(nid, ni);
+
lnet_net_unlock(cpt);
return cpt2;
EXPORT_SYMBOL(lnet_cpt_of_nid);
int
-lnet_islocalnet(__u32 net)
+lnet_islocalnet(__u32 net_id)
{
- struct lnet_ni *ni;
+ struct lnet_net *net;
int cpt;
+ bool local;
cpt = lnet_net_lock_current();
- ni = lnet_net2ni_locked(net, cpt);
- if (ni != NULL)
- lnet_ni_decref_locked(ni, cpt);
+ net = lnet_get_net_locked(net_id);
+
+ local = net != NULL;
lnet_net_unlock(cpt);
- return ni != NULL;
+ return local;
+}
+
+bool
+lnet_is_ni_healthy_locked(struct lnet_ni *ni)
+{
+ if (ni->ni_state == LNET_NI_STATE_ACTIVE ||
+ ni->ni_state == LNET_NI_STATE_DEGRADED)
+ return true;
+
+ return false;
}
lnet_ni_t *
lnet_nid2ni_locked(lnet_nid_t nid, int cpt)
{
+ struct lnet_net *net;
struct lnet_ni *ni;
- struct list_head *tmp;
LASSERT(cpt != LNET_LOCK_EX);
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (ni->ni_nid == nid) {
- lnet_ni_addref_locked(ni, cpt);
- return ni;
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (ni->ni_nid == nid)
+ return ni;
}
}
return NULL;
}
+lnet_ni_t *
+lnet_nid2ni_addref(lnet_nid_t nid)
+{
+ lnet_ni_t *ni;
+
+ lnet_net_lock(0);
+ ni = lnet_nid2ni_locked(nid, 0);
+ if (ni)
+ lnet_ni_addref_locked(ni, 0);
+ lnet_net_unlock(0);
+
+ return ni;
+}
+EXPORT_SYMBOL(lnet_nid2ni_addref);
+
int
lnet_islocalnid(lnet_nid_t nid)
{
cpt = lnet_net_lock_current();
ni = lnet_nid2ni_locked(nid, cpt);
- if (ni != NULL)
- lnet_ni_decref_locked(ni, cpt);
lnet_net_unlock(cpt);
return ni != NULL;
}
int
-lnet_count_acceptor_nis (void)
+lnet_count_acceptor_nets(void)
{
/* Return the # of NIs that need the acceptor. */
int count = 0;
- struct list_head *tmp;
- struct lnet_ni *ni;
+ struct lnet_net *net;
int cpt;
cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
-
- if (ni->ni_lnd->lnd_accept != NULL)
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ /* all socklnd type networks should have the acceptor
+ * thread started */
+ if (net->net_lnd->lnd_accept != NULL)
count++;
}
}
static inline int
+lnet_get_net_ni_count_locked(struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+ int count = 0;
+
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+ count++;
+
+ return count;
+}
+
+static inline int
lnet_get_ni_count(void)
{
- struct lnet_ni *ni;
- int count = 0;
+ struct lnet_ni *ni;
+ struct lnet_net *net;
+ int count = 0;
lnet_net_lock(0);
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list)
- count++;
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
+ count++;
+ }
lnet_net_unlock(0);
static void
lnet_ping_info_destroy(void)
{
+ struct lnet_net *net;
struct lnet_ni *ni;
lnet_net_lock(LNET_LOCK_EX);
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- lnet_ni_lock(ni);
- ni->ni_status = NULL;
- lnet_ni_unlock(ni);
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ lnet_ni_lock(ni);
+ ni->ni_status = NULL;
+ lnet_ni_unlock(ni);
+ }
}
lnet_ping_info_free(the_lnet.ln_ping_info);
lnet_ping_info_install_locked(struct lnet_ping_info *ping_info)
{
int i;
- lnet_ni_t *ni;
+ struct lnet_ni *ni;
+ struct lnet_net *net;
struct lnet_ni_status *ns;
i = 0;
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- LASSERT(i < ping_info->pi_nnis);
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ LASSERT(i < ping_info->pi_nnis);
- ns = &ping_info->pi_ni[i];
+ ns = &ping_info->pi_ni[i];
- ns->ns_nid = ni->ni_nid;
+ ns->ns_nid = ni->ni_nid;
- lnet_ni_lock(ni);
- ns->ns_status = (ni->ni_status != NULL) ?
- ni->ni_status->ns_status : LNET_NI_STATUS_UP;
- ni->ni_status = ns;
- lnet_ni_unlock(ni);
+ lnet_ni_lock(ni);
+ ns->ns_status = (ni->ni_status != NULL) ?
+ ni->ni_status->ns_status :
+ LNET_NI_STATUS_UP;
+ ni->ni_status = ns;
+ lnet_ni_unlock(ni);
+
+ i++;
+ }
- i++;
}
}
LASSERT(ni->ni_ncpts >= 1);
if (ni->ni_ncpts == 1)
- return ni->ni_maxtxcredits;
+ return ni->ni_net->net_tunables.lct_max_tx_credits;
- credits = ni->ni_maxtxcredits / ni->ni_ncpts;
- credits = max(credits, 8 * ni->ni_peertxcredits);
- credits = min(credits, ni->ni_maxtxcredits);
+ credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts;
+ credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits);
+ credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits);
return credits;
}
}
/* move it to zombie list and nobody can find it anymore */
- LASSERT(!list_empty(&ni->ni_list));
- list_move(&ni->ni_list, &the_lnet.ln_nis_zombie);
- lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */
+ LASSERT(!list_empty(&ni->ni_netlist));
+ list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie);
+ lnet_ni_decref_locked(ni, 0);
}
static void
-lnet_clear_zombies_nis_locked(void)
+lnet_clear_zombies_nis_locked(struct lnet_net *net)
{
int i;
int islo;
lnet_ni_t *ni;
+ struct list_head *zombie_list = &net->net_ni_zombie;
- /* Now wait for the NI's I just nuked to show up on ln_zombie_nis
- * and shut them down in guaranteed thread context */
+ /*
+ * Now wait for the NIs I just nuked to show up on the zombie
+ * list and shut them down in guaranteed thread context
+ */
i = 2;
- while (!list_empty(&the_lnet.ln_nis_zombie)) {
+ while (!list_empty(zombie_list)) {
int *ref;
int j;
- ni = list_entry(the_lnet.ln_nis_zombie.next,
- lnet_ni_t, ni_list);
- list_del_init(&ni->ni_list);
+ ni = list_entry(zombie_list->next,
+ lnet_ni_t, ni_netlist);
+ list_del_init(&ni->ni_netlist);
+ /* the ni should be in deleting state. If it's not it's
+ * a bug */
+ LASSERT(ni->ni_state == LNET_NI_STATE_DELETING);
cfs_percpt_for_each(ref, j, ni->ni_refs) {
if (*ref == 0)
continue;
/* still busy, add it back to zombie list */
- list_add(&ni->ni_list, &the_lnet.ln_nis_zombie);
+ list_add(&ni->ni_netlist, zombie_list);
break;
}
- if (!list_empty(&ni->ni_list)) {
+ if (!list_empty(&ni->ni_netlist)) {
lnet_net_unlock(LNET_LOCK_EX);
++i;
if ((i & (-i)) == i) {
continue;
}
- ni->ni_lnd->lnd_refcount--;
lnet_net_unlock(LNET_LOCK_EX);
- islo = ni->ni_lnd->lnd_type == LOLND;
+ islo = ni->ni_net->net_lnd->lnd_type == LOLND;
LASSERT(!in_interrupt());
- (ni->ni_lnd->lnd_shutdown)(ni);
-
- /* can't deref lnd anymore now; it might have unregistered
- * itself... */
+ (net->net_lnd->lnd_shutdown)(ni);
if (!islo)
CDEBUG(D_LNI, "Removed LNI %s\n",
}
}
-static void
-lnet_shutdown_lndnis(void)
-{
- int i;
- lnet_ni_t *ni;
-
- /* NB called holding the global mutex */
-
- /* All quiet on the API front */
- LASSERT(!the_lnet.ln_shutdown);
- LASSERT(the_lnet.ln_refcount == 0);
- LASSERT(list_empty(&the_lnet.ln_nis_zombie));
-
- lnet_net_lock(LNET_LOCK_EX);
- the_lnet.ln_shutdown = 1; /* flag shutdown */
-
- /* Unlink NIs from the global table */
- while (!list_empty(&the_lnet.ln_nis)) {
- ni = list_entry(the_lnet.ln_nis.next,
- lnet_ni_t, ni_list);
- lnet_ni_unlink_locked(ni);
- }
-
- /* Drop the cached loopback NI. */
- if (the_lnet.ln_loni != NULL) {
- lnet_ni_decref_locked(the_lnet.ln_loni, 0);
- the_lnet.ln_loni = NULL;
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
-
- /* Clear lazy portals and drop delayed messages which hold refs
- * on their lnet_msg_t::msg_rxpeer */
- for (i = 0; i < the_lnet.ln_nportals; i++)
- LNetClearLazyPortal(i);
-
- /* Clear the peer table and wait for all peers to go (they hold refs on
- * their NIs) */
- lnet_peer_tables_cleanup(NULL);
-
- lnet_net_lock(LNET_LOCK_EX);
-
- lnet_clear_zombies_nis_locked();
- the_lnet.ln_shutdown = 0;
- lnet_net_unlock(LNET_LOCK_EX);
-}
-
/* shutdown down the NI and release refcount */
static void
lnet_shutdown_lndni(struct lnet_ni *ni)
{
int i;
+ struct lnet_net *net = ni->ni_net;
lnet_net_lock(LNET_LOCK_EX);
+ ni->ni_state = LNET_NI_STATE_DELETING;
lnet_ni_unlink_locked(ni);
lnet_net_unlock(LNET_LOCK_EX);
lnet_peer_tables_cleanup(ni);
lnet_net_lock(LNET_LOCK_EX);
- lnet_clear_zombies_nis_locked();
+ lnet_clear_zombies_nis_locked(net);
lnet_net_unlock(LNET_LOCK_EX);
}
-static int
-lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf)
+static void
+lnet_shutdown_lndnet(struct lnet_net *net)
{
- struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
- int rc = -EINVAL;
- __u32 lnd_type;
- lnd_t *lnd;
- struct lnet_tx_queue *tq;
- int i;
+ struct lnet_ni *ni;
- lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
+ lnet_net_lock(LNET_LOCK_EX);
- LASSERT(libcfs_isknown_lnd(lnd_type));
+ net->net_state = LNET_NET_STATE_DELETING;
- if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
- lnd_type == IIBLND || lnd_type == VIBLND) {
- CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
- goto failed0;
+ list_del_init(&net->net_list);
+
+ while (!list_empty(&net->net_ni_list)) {
+ ni = list_entry(net->net_ni_list.next,
+ lnet_ni_t, ni_netlist);
+ lnet_net_unlock(LNET_LOCK_EX);
+ lnet_shutdown_lndni(ni);
+ lnet_net_lock(LNET_LOCK_EX);
}
- /* Make sure this new NI is unique. */
- lnet_net_lock(LNET_LOCK_EX);
- rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis);
+ /*
+ * decrement ref count on lnd only when the entire network goes
+ * away
+ */
+ net->net_lnd->lnd_refcount--;
+
lnet_net_unlock(LNET_LOCK_EX);
- if (!rc) {
- if (lnd_type == LOLND) {
- lnet_ni_free(ni);
- return 0;
- }
+ lnet_net_free(net);
+}
- CERROR("Net %s is not unique\n",
- libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+static void
+lnet_shutdown_lndnets(void)
+{
+ struct lnet_net *net;
- rc = -EEXIST;
- goto failed0;
- }
+ /* NB called holding the global mutex */
- mutex_lock(&the_lnet.ln_lnd_mutex);
- lnd = lnet_find_lnd_by_type(lnd_type);
+ /* All quiet on the API front */
+ LASSERT(!the_lnet.ln_shutdown);
+ LASSERT(the_lnet.ln_refcount == 0);
- if (lnd == NULL) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = request_module("%s", libcfs_lnd2modname(lnd_type));
- mutex_lock(&the_lnet.ln_lnd_mutex);
+ lnet_net_lock(LNET_LOCK_EX);
+ the_lnet.ln_shutdown = 1; /* flag shutdown */
- lnd = lnet_find_lnd_by_type(lnd_type);
- if (lnd == NULL) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- CERROR("Can't load LND %s, module %s, rc=%d\n",
- libcfs_lnd2str(lnd_type),
- libcfs_lnd2modname(lnd_type), rc);
-#ifndef HAVE_MODULE_LOADING_SUPPORT
- LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
- "compiled with kernel module "
- "loading support.");
-#endif
- rc = -EINVAL;
- goto failed0;
- }
+ while (!list_empty(&the_lnet.ln_nets)) {
+ /*
+ * move the nets to the zombie list to avoid them being
+ * picked up for new work. LONET is also included in the
+ * Nets that will be moved to the zombie list
+ */
+ net = list_entry(the_lnet.ln_nets.next,
+ struct lnet_net, net_list);
+ list_move(&net->net_list, &the_lnet.ln_net_zombie);
}
- lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount++;
+ /* Drop the cached loopback Net. */
+ if (the_lnet.ln_loni != NULL) {
+ lnet_ni_decref_locked(the_lnet.ln_loni, 0);
+ the_lnet.ln_loni = NULL;
+ }
lnet_net_unlock(LNET_LOCK_EX);
- ni->ni_lnd = lnd;
+ /* iterate through the net zombie list and delete each net */
+ while (!list_empty(&the_lnet.ln_net_zombie)) {
+ net = list_entry(the_lnet.ln_net_zombie.next,
+ struct lnet_net, net_list);
+ lnet_shutdown_lndnet(net);
+ }
- if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
- lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+ lnet_net_lock(LNET_LOCK_EX);
+ the_lnet.ln_shutdown = 0;
+ lnet_net_unlock(LNET_LOCK_EX);
+}
- if (lnd_tunables != NULL) {
- LIBCFS_ALLOC(ni->ni_lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- if (ni->ni_lnd_tunables == NULL) {
- mutex_unlock(&the_lnet.ln_lnd_mutex);
- rc = -ENOMEM;
- goto failed0;
- }
- memcpy(ni->ni_lnd_tunables, lnd_tunables,
- sizeof(*ni->ni_lnd_tunables));
- }
+static int
+lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun)
+{
+ int rc = -EINVAL;
+ struct lnet_tx_queue *tq;
+ int i;
+ struct lnet_net *net = ni->ni_net;
- /* If given some LND tunable parameters, parse those now to
- * override the values in the NI structure. */
- if (conf) {
- if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0)
- ni->ni_peerrtrcredits =
- conf->cfg_config_u.cfg_net.net_peer_rtr_credits;
- if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0)
- ni->ni_peertimeout =
- conf->cfg_config_u.cfg_net.net_peer_timeout;
- if (conf->cfg_config_u.cfg_net.net_peer_tx_credits >= 0)
- ni->ni_peertxcredits =
- conf->cfg_config_u.cfg_net.net_peer_tx_credits;
- if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0)
- ni->ni_maxtxcredits =
- conf->cfg_config_u.cfg_net.net_max_tx_credits;
+ mutex_lock(&the_lnet.ln_lnd_mutex);
+
+ if (tun) {
+ memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun));
+ ni->ni_lnd_tunables_set = true;
}
- rc = (lnd->lnd_startup)(ni);
+ rc = (net->net_lnd->lnd_startup)(ni);
mutex_unlock(&the_lnet.ln_lnd_mutex);
if (rc != 0) {
LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n",
- rc, libcfs_lnd2str(lnd->lnd_type));
+ rc, libcfs_lnd2str(net->net_lnd->lnd_type));
lnet_net_lock(LNET_LOCK_EX);
- lnd->lnd_refcount--;
+ net->net_lnd->lnd_refcount--;
lnet_net_unlock(LNET_LOCK_EX);
goto failed0;
}
- LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL);
-
- lnet_net_lock(LNET_LOCK_EX);
- /* refcount for ln_nis */
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_list, &the_lnet.ln_nis);
- if (ni->ni_cpts != NULL) {
- lnet_ni_addref_locked(ni, 0);
- list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt);
- }
-
- lnet_net_unlock(LNET_LOCK_EX);
+ ni->ni_state = LNET_NI_STATE_ACTIVE;
- if (lnd->lnd_type == LOLND) {
+ /* We keep a reference on the loopback net through the loopback NI */
+ if (net->net_lnd->lnd_type == LOLND) {
lnet_ni_addref(ni);
LASSERT(the_lnet.ln_loni == NULL);
the_lnet.ln_loni = ni;
+ ni->ni_net->net_tunables.lct_peer_tx_credits = 0;
+ ni->ni_net->net_tunables.lct_peer_rtr_credits = 0;
+ ni->ni_net->net_tunables.lct_max_tx_credits = 0;
+ ni->ni_net->net_tunables.lct_peer_timeout = 0;
return 0;
}
- if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) {
+ if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ||
+ ni->ni_net->net_tunables.lct_max_tx_credits == 0) {
LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n",
- libcfs_lnd2str(lnd->lnd_type),
- ni->ni_peertxcredits == 0 ?
+ libcfs_lnd2str(net->net_lnd->lnd_type),
+ ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ?
"" : "per-peer ");
/* shutdown the NI since if we get here then it must've already
* been started
}
CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n",
- libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits,
+ libcfs_nid2str(ni->ni_nid),
+ ni->ni_net->net_tunables.lct_peer_tx_credits,
lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER,
- ni->ni_peerrtrcredits, ni->ni_peertimeout);
+ ni->ni_net->net_tunables.lct_peer_rtr_credits,
+ ni->ni_net->net_tunables.lct_peer_timeout);
return 0;
failed0:
}
static int
-lnet_startup_lndnis(struct list_head *nilist)
+lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun)
{
struct lnet_ni *ni;
+ struct lnet_net *net_l = NULL;
+ struct list_head local_ni_list;
int rc;
int ni_count = 0;
+ __u32 lnd_type;
+ lnd_t *lnd;
+ int peer_timeout =
+ net->net_tunables.lct_peer_timeout;
+ int maxtxcredits =
+ net->net_tunables.lct_max_tx_credits;
+ int peerrtrcredits =
+ net->net_tunables.lct_peer_rtr_credits;
+
+ INIT_LIST_HEAD(&local_ni_list);
+
+ /*
+ * make sure that this net is unique. If it isn't then
+ * we are adding interfaces to an already existing network, and
+ * 'net' is just a convenient way to pass in the list.
+ * if it is unique we need to find the LND and load it if
+ * necessary.
+ */
+ if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) {
+ lnd_type = LNET_NETTYP(net->net_id);
+
+ LASSERT(libcfs_isknown_lnd(lnd_type));
+
+ if (lnd_type == CIBLND || lnd_type == OPENIBLND ||
+ lnd_type == IIBLND || lnd_type == VIBLND) {
+ CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type));
+ rc = -EINVAL;
+ goto failed0;
+ }
+
+ mutex_lock(&the_lnet.ln_lnd_mutex);
+ lnd = lnet_find_lnd_by_type(lnd_type);
+
+ if (lnd == NULL) {
+ mutex_unlock(&the_lnet.ln_lnd_mutex);
+ rc = request_module("%s", libcfs_lnd2modname(lnd_type));
+ mutex_lock(&the_lnet.ln_lnd_mutex);
+
+ lnd = lnet_find_lnd_by_type(lnd_type);
+ if (lnd == NULL) {
+ mutex_unlock(&the_lnet.ln_lnd_mutex);
+ CERROR("Can't load LND %s, module %s, rc=%d\n",
+ libcfs_lnd2str(lnd_type),
+ libcfs_lnd2modname(lnd_type), rc);
+#ifndef HAVE_MODULE_LOADING_SUPPORT
+ LCONSOLE_ERROR_MSG(0x104, "Your kernel must be "
+ "compiled with kernel module "
+ "loading support.");
+#endif
+ rc = -EINVAL;
+ goto failed0;
+ }
+ }
+
+ lnet_net_lock(LNET_LOCK_EX);
+ lnd->lnd_refcount++;
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ net->net_lnd = lnd;
+
+ mutex_unlock(&the_lnet.ln_lnd_mutex);
+
+ net_l = net;
+ }
+
+ /*
+ * net_l: if the network being added is unique then net_l
+ * will point to that network
+ * if the network being added is not unique then
+ * net_l points to the existing network.
+ *
+ * When we enter the loop below, we'll pick NIs off he
+ * network beign added and start them up, then add them to
+ * a local ni list. Once we've successfully started all
+ * the NIs then we join the local NI list (of started up
+ * networks) with the net_l->net_ni_list, which should
+ * point to the correct network to add the new ni list to
+ *
+ * If any of the new NIs fail to start up, then we want to
+ * iterate through the local ni list, which should include
+ * any NIs which were successfully started up, and shut
+ * them down.
+ *
+ * After than we want to delete the network being added,
+ * to avoid a memory leak.
+ */
+
+ /*
+ * When a network uses TCP bonding then all its interfaces
+ * must be specified when the network is first defined: the
+ * TCP bonding code doesn't allow for interfaces to be added
+ * or removed.
+ */
+ if (net_l != net && net_l != NULL && use_tcp_bonding &&
+ LNET_NETTYP(net_l->net_id) == SOCKLND) {
+ rc = -EINVAL;
+ goto failed0;
+ }
+
+ while (!list_empty(&net->net_ni_added)) {
+ ni = list_entry(net->net_ni_added.next, struct lnet_ni,
+ ni_netlist);
+ list_del_init(&ni->ni_netlist);
+
+ /* make sure that the the NI we're about to start
+ * up is actually unique. if it's not fail. */
+ if (!lnet_ni_unique_net(&net_l->net_ni_list,
+ ni->ni_interfaces[0])) {
+ rc = -EINVAL;
+ goto failed1;
+ }
+
+ /* adjust the pointer the parent network, just in case it
+ * the net is a duplicate */
+ ni->ni_net = net_l;
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, lnet_ni_t, ni_list);
- list_del(&ni->ni_list);
- rc = lnet_startup_lndni(ni, NULL);
+ rc = lnet_startup_lndni(ni, tun);
+
+ LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 ||
+ ni->ni_net->net_lnd->lnd_query != NULL);
if (rc < 0)
- goto failed;
+ goto failed1;
+
+ lnet_ni_addref(ni);
+ list_add_tail(&ni->ni_netlist, &local_ni_list);
ni_count++;
}
+ lnet_net_lock(LNET_LOCK_EX);
+ list_splice_tail(&local_ni_list, &net_l->net_ni_list);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ /* if the network is not unique then we don't want to keep
+ * it around after we're done. Free it. Otherwise add that
+ * net to the global the_lnet.ln_nets */
+ if (net_l != net && net_l != NULL) {
+ /*
+ * TODO - note. currently the tunables can not be updated
+ * once added
+ */
+ lnet_net_free(net);
+ } else {
+ net->net_state = LNET_NET_STATE_ACTIVE;
+ /*
+ * restore tunables after it has been overwitten by the
+ * lnd
+ */
+ if (peer_timeout != -1)
+ net->net_tunables.lct_peer_timeout = peer_timeout;
+ if (maxtxcredits != -1)
+ net->net_tunables.lct_max_tx_credits = maxtxcredits;
+ if (peerrtrcredits != -1)
+ net->net_tunables.lct_peer_rtr_credits = peerrtrcredits;
+
+ lnet_net_lock(LNET_LOCK_EX);
+ list_add_tail(&net->net_list, &the_lnet.ln_nets);
+ lnet_net_unlock(LNET_LOCK_EX);
+ }
+
+ return ni_count;
+
+failed1:
+ /*
+ * shutdown the new NIs that are being started up
+ * free the NET being started
+ */
+ while (!list_empty(&local_ni_list)) {
+ ni = list_entry(local_ni_list.next, struct lnet_ni,
+ ni_netlist);
+
+ lnet_shutdown_lndni(ni);
+ }
+
+failed0:
+ lnet_net_free(net);
+
+ return rc;
+}
+
+static int
+lnet_startup_lndnets(struct list_head *netlist)
+{
+ struct lnet_net *net;
+ int rc;
+ int ni_count = 0;
+
+ while (!list_empty(netlist)) {
+ net = list_entry(netlist->next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+
+ rc = lnet_startup_lndnet(net, NULL);
+
+ if (rc < 0)
+ goto failed;
+
+ ni_count += rc;
+ }
+
return ni_count;
failed:
- lnet_shutdown_lndnis();
+ lnet_shutdown_lndnets();
return rc;
}
the_lnet.ln_refcount = 0;
LNetInvalidateHandle(&the_lnet.ln_rc_eqh);
INIT_LIST_HEAD(&the_lnet.ln_lnds);
+ INIT_LIST_HEAD(&the_lnet.ln_net_zombie);
INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie);
INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow);
struct lnet_ping_info *pinfo;
lnet_handle_md_t md_handle;
struct list_head net_head;
+ struct lnet_net *net;
INIT_LIST_HEAD(&net_head);
return rc;
}
- /* Add in the loopback network */
- if (lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head) == NULL) {
+ /* create a network for Loopback network */
+ net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head);
+ if (net == NULL) {
+ rc = -ENOMEM;
+ goto err_empty_list;
+ }
+
+ /* Add in the loopback NI */
+ if (lnet_ni_alloc(net, NULL, NULL) == NULL) {
rc = -ENOMEM;
goto err_empty_list;
}
* in this case. On cleanup in case of failure only clean up
* routes if it has been loaded */
if (!the_lnet.ln_nis_from_mod_params) {
- rc = lnet_parse_networks(&net_head,
- lnet_get_networks());
+ rc = lnet_parse_networks(&net_head, lnet_get_networks(),
+ use_tcp_bonding);
if (rc < 0)
goto err_empty_list;
}
- ni_count = lnet_startup_lndnis(&net_head);
+ ni_count = lnet_startup_lndnets(&net_head);
if (ni_count < 0) {
rc = ni_count;
goto err_empty_list;
if (!the_lnet.ln_nis_from_mod_params)
lnet_destroy_routes();
err_shutdown_lndnis:
- lnet_shutdown_lndnis();
+ lnet_shutdown_lndnets();
err_empty_list:
lnet_unprepare();
LASSERT(rc < 0);
mutex_unlock(&the_lnet.ln_api_mutex);
while (!list_empty(&net_head)) {
- struct lnet_ni *ni;
+ struct lnet_net *net;
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
+ net = list_entry(net_head.next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+ lnet_net_free(net);
}
return rc;
}
lnet_acceptor_stop();
lnet_destroy_routes();
- lnet_shutdown_lndnis();
+ lnet_shutdown_lndnets();
lnet_unprepare();
}
}
config->cfg_nid = ni->ni_nid;
- config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout;
- config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits;
- config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits;
- config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits;
+ config->cfg_config_u.cfg_net.net_peer_timeout =
+ ni->ni_net->net_tunables.lct_peer_timeout;
+ config->cfg_config_u.cfg_net.net_max_tx_credits =
+ ni->ni_net->net_tunables.lct_max_tx_credits;
+ config->cfg_config_u.cfg_net.net_peer_tx_credits =
+ ni->ni_net->net_tunables.lct_peer_tx_credits;
+ config->cfg_config_u.cfg_net.net_peer_rtr_credits =
+ ni->ni_net->net_tunables.lct_peer_rtr_credits;
net_config->ni_status = ni->ni_status->ns_status;
if (config->cfg_hdr.ioc_len > min_size)
tunable_size = config->cfg_hdr.ioc_len - min_size;
- /* Don't copy to much data to user space */
- min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables));
+ /* Don't copy too much data to user space */
+ min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables));
lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk;
- if (ni->ni_lnd_tunables && lnd_cfg && min_size) {
- memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size);
+ if (lnd_cfg && min_size) {
+ memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size);
config->cfg_config_u.cfg_net.net_interface_count = 1;
/* Tell user land that kernel side has less data */
- if (tunable_size > sizeof(*ni->ni_lnd_tunables)) {
+ if (tunable_size > sizeof(ni->ni_lnd_tunables)) {
min_size = tunable_size - sizeof(ni->ni_lnd_tunables);
config->cfg_hdr.ioc_len -= min_size;
}
}
}
-static int
+struct lnet_ni *
+lnet_get_ni_idx_locked(int idx)
+{
+ struct lnet_ni *ni;
+ struct lnet_net *net;
+
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (idx-- == 0)
+ return ni;
+ }
+ }
+
+ return NULL;
+}
+
+struct lnet_ni *
+lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
+{
+ struct lnet_ni *ni;
+ struct lnet_net *net = mynet;
+
+ if (prev == NULL) {
+ if (net == NULL)
+ net = list_entry(the_lnet.ln_nets.next, struct lnet_net,
+ net_list);
+ ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+ ni_netlist);
+
+ return ni;
+ }
+
+ if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) {
+ /* if you reached the end of the ni list and the net is
+ * specified, then there are no more nis in that net */
+ if (net != NULL)
+ return NULL;
+
+ /* we reached the end of this net ni list. move to the
+ * next net */
+ if (prev->ni_net->net_list.next == &the_lnet.ln_nets)
+ /* no more nets and no more NIs. */
+ return NULL;
+
+ /* get the next net */
+ net = list_entry(prev->ni_net->net_list.next, struct lnet_net,
+ net_list);
+ /* get the ni on it */
+ ni = list_entry(net->net_ni_list.next, struct lnet_ni,
+ ni_netlist);
+
+ return ni;
+ }
+
+ /* there are more nis left */
+ ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist);
+
+ return ni;
+}
+
+int
lnet_get_net_config(struct lnet_ioctl_config_data *config)
{
struct lnet_ni *ni;
- struct list_head *tmp;
- int idx = config->cfg_count;
+ int cpt;
int rc = -ENOENT;
- int cpt, i = 0;
-
- if (unlikely(!config->cfg_bulk))
- return -EINVAL;
+ int idx = config->cfg_count;
cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (i++ != idx)
- continue;
+ ni = lnet_get_ni_idx_locked(idx);
- ni = list_entry(tmp, lnet_ni_t, ni_list);
+ if (ni != NULL) {
+ rc = 0;
lnet_ni_lock(ni);
lnet_fill_ni_info(ni, config);
lnet_ni_unlock(ni);
- rc = 0;
- break;
}
lnet_net_unlock(cpt);
char *nets = conf->cfg_config_u.cfg_net.net_intf;
struct lnet_ping_info *pinfo;
lnet_handle_md_t md_handle;
- struct lnet_ni *ni;
+ struct lnet_net *net;
struct list_head net_head;
int rc;
lnet_remotenet_t *rnet;
+ int net_ni_count;
+ int num_acceptor_nets;
+ __u32 net_type;
+ struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL;
INIT_LIST_HEAD(&net_head);
- /* Create a ni structure for the network string */
- rc = lnet_parse_networks(&net_head, nets);
+ if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf))
+ lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk;
+
+ /* Create a net/ni structures for the network string */
+ rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding);
if (rc <= 0)
return rc == 0 ? -EINVAL : rc;
mutex_lock(&the_lnet.ln_api_mutex);
if (rc > 1) {
- rc = -EINVAL; /* only add one interface per call */
+ rc = -EINVAL; /* only add one network per call */
goto failed0;
}
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
+ net = list_entry(net_head.next, struct lnet_net, net_list);
lnet_net_lock(LNET_LOCK_EX);
- rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid));
+ rnet = lnet_find_rnet_locked(net->net_id);
lnet_net_unlock(LNET_LOCK_EX);
/* make sure that the net added doesn't invalidate the current
* configuration LNet is keeping */
goto failed0;
}
- rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(),
+ /*
+ * make sure you calculate the correct number of slots in the ping
+ * info. Since the ping info is a flattened list of all the NIs,
+ * we should allocate enough slots to accomodate the number of NIs
+ * which will be added.
+ *
+ * We can use lnet_get_net_ni_count_locked() since the net is not
+ * on a public list yet, so locking is not a problem
+ */
+ net_ni_count = lnet_get_net_ni_count_locked(net);
+
+ rc = lnet_ping_info_setup(&pinfo, &md_handle,
+ net_ni_count + lnet_get_ni_count(),
false);
if (rc != 0)
goto failed0;
- list_del_init(&ni->ni_list);
+ list_del_init(&net->net_list);
- rc = lnet_startup_lndni(ni, conf);
- if (rc != 0)
+ if (lnd_tunables)
+ memcpy(&net->net_tunables,
+ &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn));
+
+ /*
+ * before starting this network get a count of the current TCP
+ * networks which require the acceptor thread running. If that
+ * count is == 0 before we start up this network, then we'd want to
+ * start up the acceptor thread after starting up this network
+ */
+ num_acceptor_nets = lnet_count_acceptor_nets();
+
+ /*
+ * lnd_startup_lndnet() can deallocate 'net' even if it it returns
+ * success, because we endded up adding interfaces to an existing
+ * network. So grab the net_type now
+ */
+ net_type = LNET_NETTYP(net->net_id);
+
+ rc = lnet_startup_lndnet(net,
+ (lnd_tunables) ? &lnd_tunables->lt_tun : NULL);
+ if (rc < 0)
goto failed1;
- if (ni->ni_lnd->lnd_accept != NULL) {
+ /*
+ * Start the acceptor thread if this is the first network
+ * being added that requires the thread.
+ */
+ if (net_type == SOCKLND && num_acceptor_nets == 0)
+ {
rc = lnet_acceptor_start();
if (rc < 0) {
- /* shutdown the ni that we just started */
+ /* shutdown the net that we just started */
CERROR("Failed to start up acceptor thread\n");
- lnet_shutdown_lndni(ni);
+ /*
+ * Note that if we needed to start the acceptor
+ * thread, then 'net' must have been the first TCP
+ * network, therefore was unique, and therefore
+ * wasn't deallocated by lnet_startup_lndnet()
+ */
+ lnet_shutdown_lndnet(net);
goto failed1;
}
}
failed0:
mutex_unlock(&the_lnet.ln_api_mutex);
while (!list_empty(&net_head)) {
- ni = list_entry(net_head.next, struct lnet_ni, ni_list);
- list_del_init(&ni->ni_list);
- lnet_ni_free(ni);
+ net = list_entry(net_head.next, struct lnet_net, net_list);
+ list_del_init(&net->net_list);
+ lnet_net_free(net);
}
return rc;
}
int
-lnet_dyn_del_ni(__u32 net)
+lnet_dyn_del_ni(__u32 net_id)
{
- lnet_ni_t *ni;
+ struct lnet_net *net;
struct lnet_ping_info *pinfo;
lnet_handle_md_t md_handle;
int rc;
+ int net_ni_count;
/* don't allow userspace to shutdown the LOLND */
- if (LNET_NETTYP(net) == LOLND)
+ if (LNET_NETTYP(net_id) == LOLND)
return -EINVAL;
mutex_lock(&the_lnet.ln_api_mutex);
- /* create and link a new ping info, before removing the old one */
- rc = lnet_ping_info_setup(&pinfo, &md_handle,
- lnet_get_ni_count() - 1, false);
- if (rc != 0)
- goto out;
- ni = lnet_net2ni(net);
- if (ni == NULL) {
+ lnet_net_lock(0);
+
+ net = lnet_get_net_locked(net_id);
+ if (net == NULL) {
rc = -EINVAL;
- goto failed;
+ goto out;
}
- /* decrement the reference counter taken by lnet_net2ni() */
- lnet_ni_decref_locked(ni, 0);
+ net_ni_count = lnet_get_net_ni_count_locked(net);
- lnet_shutdown_lndni(ni);
+ lnet_net_unlock(0);
- if (lnet_count_acceptor_nis() == 0)
+ /* create and link a new ping info, before removing the old one */
+ rc = lnet_ping_info_setup(&pinfo, &md_handle,
+ lnet_get_ni_count() - net_ni_count, false);
+ if (rc != 0)
+ goto out;
+
+ lnet_shutdown_lndnet(net);
+
+ if (lnet_count_acceptor_nets() == 0)
lnet_acceptor_stop();
lnet_ping_target_update(pinfo, md_handle);
- goto out;
-failed:
- lnet_ping_md_unlink(pinfo, &md_handle);
- lnet_ping_info_free(pinfo);
+
out:
mutex_unlock(&the_lnet.ln_api_mutex);
if (ni == NULL)
return -EINVAL;
- if (ni->ni_lnd->lnd_ctl == NULL)
+ if (ni->ni_net->net_lnd->lnd_ctl == NULL)
rc = -EINVAL;
else
- rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg);
+ rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg);
- lnet_ni_decref(ni);
return rc;
}
/* not reached */
LNetGetId(unsigned int index, lnet_process_id_t *id)
{
struct lnet_ni *ni;
- struct list_head *tmp;
+ struct lnet_net *net;
int cpt;
int rc = -ENOENT;
cpt = lnet_net_lock_current();
- list_for_each(tmp, &the_lnet.ln_nis) {
- if (index-- != 0)
- continue;
-
- ni = list_entry(tmp, lnet_ni_t, ni_list);
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (index-- != 0)
+ continue;
- id->nid = ni->ni_nid;
- id->pid = the_lnet.ln_pid;
- rc = 0;
- break;
+ id->nid = ni->ni_nid;
+ id->pid = the_lnet.ln_pid;
+ rc = 0;
+ break;
+ }
}
lnet_net_unlock(cpt);
#define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */
#define LNET_SINGLE_TEXTBUF_NOB (4<<10)
+#define SPACESTR " \t\v\r\n"
+#define DELIMITERS ":()[]"
+
static void
-lnet_syntax(char *name, char *str, int offset, int width)
+lnet_syntax(const char *name, const char *str, int offset, int width)
{
static char dots[LNET_SINGLE_TEXTBUF_NOB];
static char dashes[LNET_SINGLE_TEXTBUF_NOB];
}
}
-int
-lnet_net_unique(__u32 net, struct list_head *nilist)
+bool
+lnet_net_unique(__u32 net_id, struct list_head *netlist,
+ struct lnet_net **net)
+{
+ struct lnet_net *net_l;
+
+ list_for_each_entry(net_l, netlist, net_list) {
+ if (net_l->net_id == net_id) {
+ if (net != NULL)
+ *net = net_l;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* check that the NI is unique within the list of NIs already added to
+ * a network */
+bool
+lnet_ni_unique_net(struct list_head *nilist, char *iface)
{
struct list_head *tmp;
- lnet_ni_t *ni;
+ struct lnet_ni *ni;
list_for_each(tmp, nilist) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
+ ni = list_entry(tmp, struct lnet_ni, ni_netlist);
- if (LNET_NIDNET(ni->ni_nid) == net)
- return 0;
+ if (ni->ni_interfaces[0] != NULL &&
+ strncmp(ni->ni_interfaces[0], iface, strlen(iface)) == 0)
+ return false;
}
- return 1;
+ return true;
+}
+
+/* check that the NI is unique to the interfaces with in the same NI.
+ * This is only a consideration if use_tcp_bonding is set */
+static bool
+lnet_ni_unique_ni(char *iface_list[LNET_MAX_INTERFACES], char *iface)
+{
+ int i;
+ for (i = 0; i < LNET_MAX_INTERFACES; i++) {
+ if (iface_list[i] != NULL &&
+ strncmp(iface_list[i], iface, strlen(iface)) == 0)
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+in_array(__u32 *array, __u32 size, __u32 value)
+{
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (array[i] == value)
+ return false;
+ }
+
+ return true;
+}
+
+static int
+lnet_net_append_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net)
+{
+ __u32 *added_cpts = NULL;
+ int i, j = 0, rc = 0;
+
+ /*
+ * no need to go futher since a subset of the NIs already exist on
+ * all CPTs
+ */
+ if (net->net_ncpts == LNET_CPT_NUMBER)
+ return 0;
+
+ if (cpts == NULL) {
+ /* there is an NI which will exist on all CPTs */
+ if (net->net_cpts != NULL)
+ LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) *
+ net->net_ncpts);
+ net->net_cpts = NULL;
+ net->net_ncpts = LNET_CPT_NUMBER;
+ return 0;
+ }
+
+ if (net->net_cpts == NULL) {
+ LIBCFS_ALLOC(net->net_cpts, sizeof(*net->net_cpts) * ncpts);
+ if (net->net_cpts == NULL)
+ return -ENOMEM;
+ memcpy(net->net_cpts, cpts, ncpts);
+ return 0;
+ }
+
+ LIBCFS_ALLOC(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER);
+ if (added_cpts == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ncpts; i++) {
+ if (!in_array(net->net_cpts, net->net_ncpts, cpts[i])) {
+ added_cpts[j] = cpts[i];
+ j++;
+ }
+ }
+
+ /* append the new cpts if any to the list of cpts in the net */
+ if (j > 0) {
+ __u32 *array = NULL, *loc;
+ __u32 total_entries = j + net->net_ncpts;
+
+ LIBCFS_ALLOC(array, sizeof(*net->net_cpts) * total_entries);
+ if (array == NULL) {
+ rc = -ENOMEM;
+ goto failed;
+ }
+
+ memcpy(array, net->net_cpts, net->net_ncpts);
+ loc = array + net->net_ncpts;
+ memcpy(loc, added_cpts, j);
+
+ LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) *
+ net->net_ncpts);
+ net->net_ncpts = total_entries;
+ net->net_cpts = array;
+ }
+
+failed:
+ LIBCFS_FREE(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER);
+
+ return rc;
+}
+
+static void
+lnet_net_remove_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+ int rc;
+
+ /*
+ * Operation Assumption:
+ * This function is called after an NI has been removed from
+ * its parent net.
+ *
+ * if we're removing an NI which exists on all CPTs then
+ * we have to check if any of the other NIs on this net also
+ * exists on all CPTs. If none, then we need to build our Net CPT
+ * list based on the remaining NIs.
+ *
+ * If the NI being removed exist on a subset of the CPTs then we
+ * alo rebuild the Net CPT list based on the remaining NIs, which
+ * should resutl in the expected Net CPT list.
+ */
+
+ /*
+ * sometimes this function can be called due to some failure
+ * creating an NI, before any of the cpts are allocated, so check
+ * for that case and don't do anything
+ */
+ if (ncpts == 0)
+ return;
+
+ if (ncpts == LNET_CPT_NUMBER) {
+ /*
+ * first iteration through the NI list in the net to see
+ * if any of the NIs exist on all the CPTs. If one is
+ * found then our job is done.
+ */
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (ni->ni_ncpts == LNET_CPT_NUMBER)
+ return;
+ }
+ }
+
+ /*
+ * Rebuild the Net CPT list again, thereby only including only the
+ * CPTs which the remaining NIs are associated with.
+ */
+ if (net->net_cpts != NULL) {
+ LIBCFS_FREE(net->net_cpts,
+ sizeof(*net->net_cpts) * net->net_ncpts);
+ net->net_cpts = NULL;
+ }
+
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts,
+ net);
+ if (rc != 0) {
+ CERROR("Out of Memory\n");
+ /*
+ * do our best to keep on going. Delete
+ * the net cpts and set it to NULL. This
+ * way we can keep on going but less
+ * efficiently, since memory accesses might be
+ * accross CPT lines.
+ */
+ if (net->net_cpts != NULL) {
+ LIBCFS_FREE(net->net_cpts,
+ sizeof(*net->net_cpts) *
+ net->net_ncpts);
+ net->net_cpts = NULL;
+ net->net_ncpts = LNET_CPT_NUMBER;
+ }
+ return;
+ }
+ }
}
void
{
int i;
+ lnet_net_remove_cpts(ni->ni_cpts, ni->ni_ncpts, ni->ni_net);
+
if (ni->ni_refs != NULL)
cfs_percpt_free(ni->ni_refs);
if (ni->ni_cpts != NULL)
cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts);
- if (ni->ni_lnd_tunables != NULL)
- LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables));
-
for (i = 0; i < LNET_MAX_INTERFACES &&
ni->ni_interfaces[i] != NULL; i++) {
LIBCFS_FREE(ni->ni_interfaces[i],
LIBCFS_FREE(ni, sizeof(*ni));
}
+void
+lnet_net_free(struct lnet_net *net)
+{
+ struct list_head *tmp, *tmp2;
+ struct lnet_ni *ni;
+
+ LASSERT(list_empty(&net->net_ni_zombie));
+
+ /*
+ * delete any nis that haven't been added yet. This could happen
+ * if there is a failure on net startup
+ */
+ list_for_each_safe(tmp, tmp2, &net->net_ni_added) {
+ ni = list_entry(tmp, struct lnet_ni, ni_netlist);
+ list_del_init(&ni->ni_netlist);
+ lnet_ni_free(ni);
+ }
+
+ /* delete any nis which have been started. */
+ list_for_each_safe(tmp, tmp2, &net->net_ni_list) {
+ ni = list_entry(tmp, struct lnet_ni, ni_netlist);
+ list_del_init(&ni->ni_netlist);
+ lnet_ni_free(ni);
+ }
+
+ if (net->net_cpts != NULL)
+ LIBCFS_FREE(net->net_cpts,
+ sizeof(*net->net_cpts) * net->net_ncpts);
+
+ LIBCFS_FREE(net, sizeof(*net));
+}
+
+struct lnet_net *
+lnet_net_alloc(__u32 net_id, struct list_head *net_list)
+{
+ struct lnet_net *net;
+
+ if (!lnet_net_unique(net_id, net_list, NULL)) {
+ CERROR("Duplicate net %s. Ignore\n",
+ libcfs_net2str(net_id));
+ return NULL;
+ }
+
+ LIBCFS_ALLOC(net, sizeof(*net));
+ if (net == NULL) {
+ CERROR("Out of memory creating network %s\n",
+ libcfs_net2str(net_id));
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&net->net_list);
+ INIT_LIST_HEAD(&net->net_ni_list);
+ INIT_LIST_HEAD(&net->net_ni_added);
+ INIT_LIST_HEAD(&net->net_ni_zombie);
+
+ net->net_id = net_id;
+ net->net_state = LNET_NET_STATE_INIT;
+
+ /* initialize global paramters to undefiend */
+ net->net_tunables.lct_peer_timeout = -1;
+ net->net_tunables.lct_max_tx_credits = -1;
+ net->net_tunables.lct_peer_tx_credits = -1;
+ net->net_tunables.lct_peer_rtr_credits = -1;
+
+ list_add_tail(&net->net_list, net_list);
+
+ return net;
+}
+
+static int
+lnet_ni_add_interface(struct lnet_ni *ni, char *iface)
+{
+ int niface = 0;
+
+ if (ni == NULL)
+ return -ENOMEM;
+
+ if (!lnet_ni_unique_ni(ni->ni_interfaces, iface))
+ return -EINVAL;
+
+ /* Allocate a separate piece of memory and copy
+ * into it the string, so we don't have
+ * a depencency on the tokens string. This way we
+ * can free the tokens at the end of the function.
+ * The newly allocated ni_interfaces[] can be
+ * freed when freeing the NI */
+ while (niface < LNET_MAX_INTERFACES &&
+ ni->ni_interfaces[niface] != NULL)
+ niface++;
+
+ if (niface >= LNET_MAX_INTERFACES) {
+ LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
+ "for net %s\n",
+ libcfs_net2str(LNET_NIDNET(ni->ni_nid)));
+ return -EINVAL;
+ }
+
+ LIBCFS_ALLOC(ni->ni_interfaces[niface],
+ strlen(iface) + 1);
+
+ if (ni->ni_interfaces[niface] == NULL) {
+ CERROR("Can't allocate net interface name\n");
+ return -ENOMEM;
+ }
+
+ strncpy(ni->ni_interfaces[niface], iface,
+ strlen(iface) + 1);
+
+ return 0;
+}
+
+/* allocate and add to the provided network */
lnet_ni_t *
-lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist)
+lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface)
{
struct lnet_tx_queue *tq;
struct lnet_ni *ni;
int rc;
int i;
- if (!lnet_net_unique(net, nilist)) {
- LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n",
- libcfs_net2str(net));
- return NULL;
- }
+ if (iface != NULL)
+ /* make sure that this NI is unique in the net it's
+ * being added to */
+ if (!lnet_ni_unique_net(&net->net_ni_added, iface))
+ return NULL;
LIBCFS_ALLOC(ni, sizeof(*ni));
if (ni == NULL) {
- CERROR("Out of memory creating network %s\n",
- libcfs_net2str(net));
+ CERROR("Out of memory creating network interface %s%s\n",
+ libcfs_net2str(net->net_id),
+ (iface != NULL) ? iface : "");
return NULL;
}
spin_lock_init(&ni->ni_lock);
INIT_LIST_HEAD(&ni->ni_cptlist);
+ INIT_LIST_HEAD(&ni->ni_netlist);
ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(),
sizeof(*ni->ni_refs[0]));
if (ni->ni_refs == NULL)
} else {
rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts);
if (rc <= 0) {
- CERROR("Failed to set CPTs for NI %s: %d\n",
- libcfs_net2str(net), rc);
+ CERROR("Failed to set CPTs for NI %s(%s): %d\n",
+ libcfs_net2str(net->net_id),
+ (iface != NULL) ? iface : "", rc);
goto failed;
}
ni->ni_ncpts = rc;
}
+ ni->ni_net = net;
/* LND will fill in the address part of the NID */
- ni->ni_nid = LNET_MKNID(net, 0);
+ ni->ni_nid = LNET_MKNID(net->net_id, 0);
/* Store net namespace in which current ni is being created */
if (current->nsproxy->net_ns != NULL)
ni->ni_net_ns = NULL;
ni->ni_last_alive = ktime_get_real_seconds();
- list_add_tail(&ni->ni_list, nilist);
+ ni->ni_state = LNET_NI_STATE_INIT;
+ rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net);
+ if (rc != 0)
+ goto failed;
+ list_add_tail(&ni->ni_netlist, &net->net_ni_added);
+
+ /* if an interface name is provided then make sure to add in that
+ * interface name in NI */
+ if (iface != NULL)
+ if (lnet_ni_add_interface(ni, iface) != 0)
+ goto failed;
+
return ni;
- failed:
+failed:
lnet_ni_free(ni);
return NULL;
}
+/*
+ * Parse the networks string and create the matching set of NIs on the
+ * nilist.
+ */
int
-lnet_parse_networks(struct list_head *nilist, char *networks)
+lnet_parse_networks(struct list_head *netlist, char *networks,
+ bool use_tcp_bonding)
{
- struct cfs_expr_list *el = NULL;
+ struct cfs_expr_list *net_el = NULL;
+ struct cfs_expr_list *ni_el = NULL;
int tokensize;
char *tokens;
char *str;
- char *tmp;
- struct lnet_ni *ni;
- __u32 net;
+ struct lnet_net *net;
+ struct lnet_ni *ni = NULL;
+ __u32 net_id;
int nnets = 0;
- struct list_head *temp_node;
if (networks == NULL) {
CERROR("networks string is undefined\n");
}
memcpy(tokens, networks, tokensize);
- str = tmp = tokens;
-
- while (str != NULL && *str != 0) {
- char *comma = strchr(str, ',');
- char *bracket = strchr(str, '(');
- char *square = strchr(str, '[');
- char *iface;
- int niface;
- int rc;
-
- /* NB we don't check interface conflicts here; it's the LNDs
- * responsibility (if it cares at all) */
-
- if (square != NULL && (comma == NULL || square < comma)) {
- /* i.e: o2ib0(ib0)[1,2], number between square
- * brackets are CPTs this NI needs to be bond */
- if (bracket != NULL && bracket > square) {
- tmp = square;
+ str = tokens;
+
+ /*
+ * Main parser loop.
+ *
+ * NB we don't check interface conflicts here; it's the LNDs
+ * responsibility (if it cares at all)
+ */
+ do {
+ char *nistr;
+ char *elstr;
+ char *name;
+ int rc;
+
+ /*
+ * Parse a network string into its components.
+ *
+ * <name>{"("...")"}{"["<el>"]"}
+ */
+
+ /* Network name (mandatory) */
+ while (isspace(*str))
+ *str++ = '\0';
+ if (!*str)
+ break;
+ name = str;
+ str += strcspn(str, SPACESTR ":()[],");
+ while (isspace(*str))
+ *str++ = '\0';
+
+ /* Interface list (optional) */
+ if (*str == '(') {
+ *str++ = '\0';
+ nistr = str;
+ str += strcspn(str, ")");
+ if (*str != ')') {
+ str = nistr;
goto failed_syntax;
}
+ do {
+ *str++ = '\0';
+ } while (isspace(*str));
+ } else {
+ nistr = NULL;
+ }
- tmp = strchr(square, ']');
- if (tmp == NULL) {
- tmp = square;
+ /* CPT expression (optional) */
+ if (*str == '[') {
+ elstr = str;
+ str += strcspn(str, "]");
+ if (*str != ']') {
+ str = elstr;
goto failed_syntax;
}
-
- rc = cfs_expr_list_parse(square, tmp - square + 1,
- 0, LNET_CPT_NUMBER - 1, &el);
+ rc = cfs_expr_list_parse(elstr, str - elstr + 1,
+ 0, LNET_CPT_NUMBER - 1,
+ &net_el);
if (rc != 0) {
- tmp = square;
+ str = elstr;
goto failed_syntax;
}
-
- while (square <= tmp)
- *square++ = ' ';
+ *elstr = '\0';
+ do {
+ *str++ = '\0';
+ } while (isspace(*str));
}
- if (bracket == NULL ||
- (comma != NULL && comma < bracket)) {
-
- /* no interface list specified */
-
- if (comma != NULL)
- *comma++ = 0;
- net = libcfs_str2net(cfs_trimwhite(str));
-
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- LCONSOLE_ERROR_MSG(0x113, "Unrecognised network"
- " type\n");
- tmp = str;
- goto failed_syntax;
- }
+ /* Bad delimiters */
+ if (*str && (strchr(DELIMITERS, *str) != NULL))
+ goto failed_syntax;
- if (LNET_NETTYP(net) != LOLND && /* LO is implicit */
- lnet_ni_alloc(net, el, nilist) == NULL)
- goto failed;
+ /* go to the next net if it exits */
+ str += strcspn(str, ",");
+ if (*str == ',')
+ *str++ = '\0';
+
+ /*
+ * At this point the name is properly terminated.
+ */
+ net_id = libcfs_str2net(name);
+ if (net_id == LNET_NIDNET(LNET_NID_ANY)) {
+ LCONSOLE_ERROR_MSG(0x113,
+ "Unrecognised network type\n");
+ str = name;
+ goto failed_syntax;
+ }
- if (el != NULL) {
- cfs_expr_list_free(el);
- el = NULL;
+ if (LNET_NETTYP(net_id) == LOLND) {
+ /* Loopback is implicit, and there can be only one. */
+ if (net_el) {
+ cfs_expr_list_free(net_el);
+ net_el = NULL;
}
-
- str = comma;
+ /* Should we error out instead? */
continue;
}
- *bracket = 0;
- net = libcfs_str2net(cfs_trimwhite(str));
- if (net == LNET_NIDNET(LNET_NID_ANY)) {
- tmp = str;
- goto failed_syntax;
- }
+ /*
+ * All network paramaters are now known.
+ */
+ nnets++;
- ni = lnet_ni_alloc(net, el, nilist);
- if (ni == NULL)
+ /* always allocate a net, since we will eventually add an
+ * interface to it, or we will fail, in which case we'll
+ * just delete it */
+ net = lnet_net_alloc(net_id, netlist);
+ if (IS_ERR_OR_NULL(net))
goto failed;
- if (el != NULL) {
- cfs_expr_list_free(el);
- el = NULL;
- }
-
- niface = 0;
- iface = bracket + 1;
+ if (!nistr ||
+ (use_tcp_bonding && LNET_NETTYP(net_id) == SOCKLND)) {
+ /*
+ * No interface list was specified, allocate a
+ * ni using the defaults.
+ */
+ ni = lnet_ni_alloc(net, net_el, NULL);
+ if (IS_ERR_OR_NULL(ni))
+ goto failed;
- bracket = strchr(iface, ')');
- if (bracket == NULL) {
- tmp = iface;
- goto failed_syntax;
+ if (!nistr) {
+ if (net_el) {
+ cfs_expr_list_free(net_el);
+ net_el = NULL;
+ }
+ continue;
+ }
}
- *bracket = 0;
do {
- comma = strchr(iface, ',');
- if (comma != NULL)
- *comma++ = 0;
+ elstr = NULL;
+
+ /* Interface name (mandatory) */
+ while (isspace(*nistr))
+ *nistr++ = '\0';
+ name = nistr;
+ nistr += strcspn(nistr, SPACESTR "[],");
+ while (isspace(*nistr))
+ *nistr++ = '\0';
+
+ /* CPT expression (optional) */
+ if (*nistr == '[') {
+ elstr = nistr;
+ nistr += strcspn(nistr, "]");
+ if (*nistr != ']') {
+ str = elstr;
+ goto failed_syntax;
+ }
+ rc = cfs_expr_list_parse(elstr,
+ nistr - elstr + 1,
+ 0, LNET_CPT_NUMBER - 1,
+ &ni_el);
+ if (rc != 0) {
+ str = elstr;
+ goto failed_syntax;
+ }
+ *elstr = '\0';
+ do {
+ *nistr++ = '\0';
+ } while (isspace(*nistr));
+ } else {
+ ni_el = net_el;
+ }
- iface = cfs_trimwhite(iface);
- if (*iface == 0) {
- tmp = iface;
+ /*
+ * End of single interface specificaton,
+ * advance to the start of the next one, if
+ * any.
+ */
+ if (*nistr == ',') {
+ do {
+ *nistr++ = '\0';
+ } while (isspace(*nistr));
+ if (!*nistr) {
+ str = nistr;
+ goto failed_syntax;
+ }
+ } else if (*nistr) {
+ str = nistr;
goto failed_syntax;
}
- if (niface == LNET_MAX_INTERFACES) {
- LCONSOLE_ERROR_MSG(0x115, "Too many interfaces "
- "for net %s\n",
- libcfs_net2str(net));
- goto failed;
+ /*
+ * At this point the name is properly terminated.
+ */
+ if (!*name) {
+ str = name;
+ goto failed_syntax;
}
- /* Allocate a separate piece of memory and copy
- * into it the string, so we don't have
- * a depencency on the tokens string. This way we
- * can free the tokens at the end of the function.
- * The newly allocated ni_interfaces[] can be
- * freed when freeing the NI */
- LIBCFS_ALLOC(ni->ni_interfaces[niface],
- strlen(iface) + 1);
- if (ni->ni_interfaces[niface] == NULL) {
- CERROR("Can't allocate net interface name\n");
- goto failed;
+ if (use_tcp_bonding &&
+ LNET_NETTYP(net->net_id) == SOCKLND) {
+ rc = lnet_ni_add_interface(ni, name);
+ if (rc != 0)
+ goto failed;
+ } else {
+ ni = lnet_ni_alloc(net, ni_el, name);
+ if (IS_ERR_OR_NULL(ni))
+ goto failed;
}
- strncpy(ni->ni_interfaces[niface], iface,
- strlen(iface));
- niface++;
- iface = comma;
- } while (iface != NULL);
-
- str = bracket + 1;
- comma = strchr(bracket + 1, ',');
- if (comma != NULL) {
- *comma = 0;
- str = cfs_trimwhite(str);
- if (*str != 0) {
- tmp = str;
- goto failed_syntax;
+
+ if (ni_el) {
+ if (ni_el != net_el) {
+ cfs_expr_list_free(ni_el);
+ ni_el = NULL;
+ }
}
- str = comma + 1;
- continue;
- }
+ } while (*nistr);
- str = cfs_trimwhite(str);
- if (*str != 0) {
- tmp = str;
- goto failed_syntax;
+ if (net_el) {
+ cfs_expr_list_free(net_el);
+ net_el = NULL;
}
- }
-
- list_for_each(temp_node, nilist)
- nnets++;
+ } while (*str);
LIBCFS_FREE(tokens, tokensize);
return nnets;
failed_syntax:
- lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp));
+ lnet_syntax("networks", networks, (int)(str - tokens), strlen(str));
failed:
- while (!list_empty(nilist)) {
- ni = list_entry(nilist->next, lnet_ni_t, ni_list);
+ /* free the net list and all the nis on each net */
+ while (!list_empty(netlist)) {
+ net = list_entry(netlist->next, struct lnet_net, net_list);
- list_del(&ni->ni_list);
- lnet_ni_free(ni);
+ list_del_init(&net->net_list);
+ lnet_net_free(net);
}
- if (el != NULL)
- cfs_expr_list_free(el);
+ if (ni_el && ni_el != net_el)
+ cfs_expr_list_free(ni_el);
+ if (net_el)
+ cfs_expr_list_free(net_el);
LIBCFS_FREE(tokens, tokensize);
iov = msg->msg_iov;
kiov = msg->msg_kiov;
- LASSERT(niov > 0);
- LASSERT((iov == NULL) != (kiov == NULL));
+ LASSERT (niov > 0);
+ LASSERT ((iov == NULL) != (kiov == NULL));
}
}
- rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed,
- niov, iov, kiov, offset, mlen, rlen);
+ rc = (ni->ni_net->net_lnd->lnd_recv)(ni, private, msg, delayed,
+ niov, iov, kiov, offset, mlen,
+ rlen);
if (rc < 0)
lnet_finalize(ni, msg, rc);
}
LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND ||
(msg->msg_txcredit && msg->msg_peertxcredit));
- rc = (ni->ni_lnd->lnd_send)(ni, priv, msg);
+ rc = (ni->ni_net->net_lnd->lnd_send)(ni, priv, msg);
if (rc < 0)
lnet_finalize(ni, msg, rc);
}
LASSERT(!msg->msg_sending);
LASSERT(msg->msg_receiving);
LASSERT(!msg->msg_rx_ready_delay);
- LASSERT(ni->ni_lnd->lnd_eager_recv != NULL);
+ LASSERT(ni->ni_net->net_lnd->lnd_eager_recv != NULL);
msg->msg_rx_ready_delay = 1;
- rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
- &msg->msg_private);
+ rc = (ni->ni_net->net_lnd->lnd_eager_recv)(ni, msg->msg_private, msg,
+ &msg->msg_private);
if (rc != 0) {
CERROR("recv from %s / send to %s aborted: "
"eager_recv failed %d\n",
cfs_time_t last_alive = 0;
LASSERT(lnet_peer_aliveness_enabled(lp));
- LASSERT(ni->ni_lnd->lnd_query != NULL);
+ LASSERT(ni->ni_net->net_lnd->lnd_query != NULL);
lnet_net_unlock(lp->lp_cpt);
- (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
+ (ni->ni_net->net_lnd->lnd_query)(ni, lp->lp_nid, &last_alive);
lnet_net_lock(lp->lp_cpt);
lp->lp_last_query = cfs_time_current();
static inline int
lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now)
{
- int alive;
+ int alive;
cfs_time_t deadline;
- LASSERT(lnet_peer_aliveness_enabled(lp));
+ LASSERT (lnet_peer_aliveness_enabled(lp));
- /* Trust lnet_notify() if it has more recent aliveness news, but
+ /*
+ * Trust lnet_notify() if it has more recent aliveness news, but
* ignore the initial assumed death (see lnet_peers_start_down()).
*/
if (!lp->lp_alive && lp->lp_alive_count > 0 &&
cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive))
return 0;
- deadline = cfs_time_add(lp->lp_last_alive,
- cfs_time_seconds(lp->lp_ni->ni_peertimeout));
+ deadline =
+ cfs_time_add(lp->lp_last_alive,
+ cfs_time_seconds(lp->lp_net->net_tunables.
+ lct_peer_timeout));
alive = cfs_time_after(deadline, now);
- /* Update obsolete lp_alive except for routers assumed to be dead
+ /*
+ * Update obsolete lp_alive except for routers assumed to be dead
* initially, because router checker would update aliveness in this
* case, and moreover lp_last_alive at peer creation is assumed.
*/
/* NB: returns 1 when alive, 0 when dead, negative when error;
* may drop the lnet_net_lock */
static int
-lnet_peer_alive_locked (lnet_peer_t *lp)
+lnet_peer_alive_locked (struct lnet_ni *ni, lnet_peer_t *lp)
{
cfs_time_t now = cfs_time_current();
if (lnet_peer_is_alive(lp, now))
return 1;
- /* Peer appears dead, but we should avoid frequent NI queries (at
- * most once per lnet_queryinterval seconds). */
+ /*
+ * Peer appears dead, but we should avoid frequent NI queries (at
+ * most once per lnet_queryinterval seconds).
+ */
if (lp->lp_last_query != 0) {
static const int lnet_queryinterval = 1;
libcfs_nid2str(lp->lp_nid),
(int)now, (int)next_query,
lnet_queryinterval,
- lp->lp_ni->ni_peertimeout);
+ lp->lp_net->net_tunables.lct_peer_timeout);
return 0;
}
}
/* query NI for latest aliveness news */
- lnet_ni_query_locked(lp->lp_ni, lp);
+ lnet_ni_query_locked(ni, lp);
if (lnet_peer_is_alive(lp, now))
return 1;
lnet_post_send_locked(lnet_msg_t *msg, int do_send)
{
lnet_peer_t *lp = msg->msg_txpeer;
- lnet_ni_t *ni = lp->lp_ni;
+ lnet_ni_t *ni = msg->msg_txni;
int cpt = msg->msg_tx_cpt;
struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt];
/* NB 'lp' is always the next hop */
if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 &&
- lnet_peer_alive_locked(lp) == 0) {
+ lnet_peer_alive_locked(ni, lp) == 0) {
the_lnet.ln_counters[cpt]->drop_count++;
the_lnet.ln_counters[cpt]->drop_length += msg->msg_len;
lnet_net_unlock(cpt);
int cpt = msg->msg_rx_cpt;
lnet_net_unlock(cpt);
- lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1,
+ lnet_ni_recv(msg->msg_rxni, msg->msg_private, msg, 1,
0, msg->msg_len, msg->msg_len);
lnet_net_lock(cpt);
}
{
lnet_peer_t *txpeer = msg->msg_txpeer;
lnet_msg_t *msg2;
+ struct lnet_ni *txni = msg->msg_txni;
if (msg->msg_txcredit) {
- struct lnet_ni *ni = txpeer->lp_ni;
+ struct lnet_ni *ni = msg->msg_txni;
struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt];
/* give back NI txcredits */
lnet_msg_t, msg_list);
list_del(&msg2->msg_list);
- LASSERT(msg2->msg_txpeer->lp_ni == ni);
+ LASSERT(msg2->msg_txni == ni);
LASSERT(msg2->msg_tx_delayed);
(void) lnet_post_send_locked(msg2, 1);
}
}
+ if (txni != NULL) {
+ msg->msg_txni = NULL;
+ lnet_ni_decref_locked(txni, msg->msg_tx_cpt);
+ }
+
if (txpeer != NULL) {
msg->msg_txpeer = NULL;
lnet_peer_decref_locked(txpeer);
lnet_net_unlock(cpt);
list_for_each_entry_safe(msg, tmp, &drop, msg_list) {
- lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL,
+ lnet_ni_recv(msg->msg_rxni, msg->msg_private, NULL,
0, 0, 0, msg->msg_hdr.payload_length);
list_del_init(&msg->msg_list);
lnet_finalize(NULL, msg, -ECANCELED);
lnet_return_rx_credits_locked(lnet_msg_t *msg)
{
lnet_peer_t *rxpeer = msg->msg_rxpeer;
+ struct lnet_ni *rxni = msg->msg_rxni;
lnet_msg_t *msg2;
if (msg->msg_rtrcredit) {
(void) lnet_post_routed_recv_locked(msg2, 1);
}
}
+ if (rxni != NULL) {
+ msg->msg_rxni = NULL;
+ lnet_ni_decref_locked(rxni, msg->msg_rx_cpt);
+ }
if (rxpeer != NULL) {
msg->msg_rxpeer = NULL;
lnet_peer_decref_locked(rxpeer);
}
static lnet_peer_t *
-lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid)
+lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target,
+ lnet_nid_t rtr_nid)
{
lnet_remotenet_t *rnet;
lnet_route_t *route;
/* If @rtr_nid is not LNET_NID_ANY, return the gateway with
* rtr_nid nid, otherwise find the best gateway I can use */
- rnet = lnet_find_net_locked(LNET_NIDNET(target));
+ rnet = lnet_find_rnet_locked(LNET_NIDNET(target));
if (rnet == NULL)
return NULL;
if (!lnet_is_route_alive(route))
continue;
- if (ni != NULL && lp->lp_ni != ni)
+ if (net != NULL && lp->lp_net != net)
continue;
if (lp->lp_nid == rtr_nid) /* it's pre-determined router */
msg->msg_sending = 1;
LASSERT(!msg->msg_tx_committed);
- cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid);
+ local_ni = lnet_net2ni(LNET_NIDNET(dst_nid));
+ cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid,
+ local_ni);
again:
- lnet_net_lock(cpt);
-
- if (the_lnet.ln_shutdown) {
- lnet_net_unlock(cpt);
+ if (the_lnet.ln_shutdown)
return -ESHUTDOWN;
- }
+ lnet_net_lock(cpt);
if (src_nid == LNET_NID_ANY) {
src_ni = NULL;
if (src_ni == NULL) {
src_ni = local_ni;
src_nid = src_ni->ni_nid;
- } else if (src_ni == local_ni) {
- lnet_ni_decref_locked(local_ni, cpt);
- } else {
- lnet_ni_decref_locked(local_ni, cpt);
- lnet_ni_decref_locked(src_ni, cpt);
+ } else if (src_ni != local_ni) {
lnet_net_unlock(cpt);
LCONSOLE_WARN("No route to %s via from %s\n",
libcfs_nid2str(dst_nid),
/* No send credit hassles with LOLND */
lnet_net_unlock(cpt);
lnet_ni_send(src_ni, msg);
-
- lnet_net_lock(cpt);
- lnet_ni_decref_locked(src_ni, cpt);
- lnet_net_unlock(cpt);
return 0;
}
rc = lnet_nid2peer_locked(&lp, dst_nid, cpt);
- /* lp has ref on src_ni; lose mine */
- lnet_ni_decref_locked(src_ni, cpt);
if (rc != 0) {
lnet_net_unlock(cpt);
LCONSOLE_WARN("Error %d finding peer %s\n", rc,
/* ENOMEM or shutting down */
return rc;
}
- LASSERT(lp->lp_ni == src_ni);
+ LASSERT (lp->lp_net == src_ni->ni_net);
} else {
/* sending to a remote network */
- lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid);
+ lp = lnet_find_route_locked(src_ni != NULL ?
+ src_ni->ni_net : NULL,
+ dst_nid, rtr_nid);
if (lp == NULL) {
- if (src_ni != NULL)
- lnet_ni_decref_locked(src_ni, cpt);
lnet_net_unlock(cpt);
LCONSOLE_WARN("No route to %s via %s "
* pre-determined router, this can happen if router table
* was changed when we release the lock */
if (rtr_nid != lp->lp_nid) {
- cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid);
+ cpt2 = lp->lp_cpt;
if (cpt2 != cpt) {
- if (src_ni != NULL)
- lnet_ni_decref_locked(src_ni, cpt);
lnet_net_unlock(cpt);
rtr_nid = lp->lp_nid;
lnet_msgtyp2str(msg->msg_type), msg->msg_len);
if (src_ni == NULL) {
- src_ni = lp->lp_ni;
+ src_ni = lnet_get_next_ni_locked(lp->lp_net, NULL);
+ LASSERT(src_ni != NULL);
src_nid = src_ni->ni_nid;
} else {
- LASSERT(src_ni == lp->lp_ni);
- lnet_ni_decref_locked(src_ni, cpt);
+ LASSERT (src_ni->ni_net == lp->lp_net);
}
lnet_peer_addref_locked(lp);
LASSERT(!msg->msg_txcredit);
LASSERT(msg->msg_txpeer == NULL);
- msg->msg_txpeer = lp; /* msg takes my ref on lp */
+ msg->msg_txpeer = lp; /* msg takes my ref on lp */
+ /* set the NI for this message */
+ msg->msg_txni = src_ni;
+ lnet_ni_addref_locked(msg->msg_txni, cpt);
rc = lnet_post_send_locked(msg, 0);
lnet_net_unlock(cpt);
info.mi_rlength = hdr->payload_length;
info.mi_roffset = hdr->msg.put.offset;
info.mi_mbits = hdr->msg.put.match_bits;
+ info.mi_cpt = msg->msg_rxpeer->lp_cpt;
- msg->msg_rx_ready_delay = ni->ni_lnd->lnd_eager_recv == NULL;
+ msg->msg_rx_ready_delay = ni->ni_net->net_lnd->lnd_eager_recv == NULL;
ready_delay = msg->msg_rx_ready_delay;
again:
if (msg->msg_rxpeer->lp_rtrcredits <= 0 ||
lnet_msg2bufpool(msg)->rbp_credits <= 0) {
- if (ni->ni_lnd->lnd_eager_recv == NULL) {
+ if (ni->ni_net->net_lnd->lnd_eager_recv == NULL) {
msg->msg_rx_ready_delay = 1;
} else {
lnet_net_unlock(msg->msg_rx_cpt);
payload_length = le32_to_cpu(hdr->payload_length);
for_me = (ni->ni_nid == dest_nid);
- cpt = lnet_cpt_of_nid(from_nid);
+ cpt = lnet_cpt_of_nid(from_nid, ni);
switch (type) {
case LNET_MSG_ACK:
return 0;
goto drop;
}
+ msg->msg_rxni = ni;
+ lnet_ni_addref_locked(ni, cpt);
if (lnet_isrouter(msg->msg_rxpeer)) {
lnet_peer_set_alive(msg->msg_rxpeer);
* called lnet_drop_message(), so I just hang onto msg as well
* until that's done */
- lnet_drop_message(msg->msg_rxpeer->lp_ni,
+ lnet_drop_message(msg->msg_rxni,
msg->msg_rxpeer->lp_cpt,
msg->msg_private, msg->msg_len);
/*
* but we still should give error code so lnet_msg_decommit()
* can skip counters operations and other checks.
*/
- lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT);
+ lnet_finalize(msg->msg_rxni, msg, -ENOENT);
}
}
LASSERT(msg->msg_rx_delayed);
LASSERT(msg->msg_md != NULL);
LASSERT(msg->msg_rxpeer != NULL);
+ LASSERT(msg->msg_rxni != NULL);
LASSERT(msg->msg_hdr.type == LNET_MSG_PUT);
CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d "
msg->msg_hdr.msg.put.offset,
msg->msg_hdr.payload_length);
- lnet_recv_put(msg->msg_rxpeer->lp_ni, msg);
+ lnet_recv_put(msg->msg_rxni, msg);
}
}
lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length);
lnet_res_unlock(cpt);
- cpt = lnet_cpt_of_nid(peer_id.nid);
+ cpt = lnet_cpt_of_nid(peer_id.nid, ni);
lnet_net_lock(cpt);
lnet_msg_commit(msg, cpt);
return msg;
drop:
- cpt = lnet_cpt_of_nid(peer_id.nid);
+ cpt = lnet_cpt_of_nid(peer_id.nid, ni);
lnet_net_lock(cpt);
the_lnet.ln_counters[cpt]->drop_count++;
LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp)
{
struct list_head *e;
- struct lnet_ni *ni;
+ struct lnet_ni *ni = NULL;
lnet_remotenet_t *rnet;
__u32 dstnet = LNET_NIDNET(dstnid);
int hops;
cpt = lnet_net_lock_current();
- list_for_each(e, &the_lnet.ln_nis) {
- ni = list_entry(e, lnet_ni_t, ni_list);
-
+ while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
if (ni->ni_nid == dstnid) {
if (srcnidp != NULL)
*srcnidp = dstnid;
LASSERT(shortest != NULL);
hops = shortest_hops;
- if (srcnidp != NULL)
- *srcnidp = shortest->lr_gateway->lp_ni->ni_nid;
+ if (srcnidp != NULL) {
+ ni = lnet_get_next_ni_locked(
+ shortest->lr_gateway->lp_net,
+ NULL);
+ *srcnidp = ni->ni_nid;
+ }
if (orderp != NULL)
*orderp = order;
lnet_net_unlock(cpt);
/* if it's a unique portal, return match-table hashed by NID */
return lnet_ptl_is_unique(ptl) ?
- ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL;
+ ptl->ptl_mtables[lnet_cpt_of_nid(id.nid, NULL)] : NULL;
}
struct lnet_match_table *
rotor = ptl->ptl_rotor++; /* get round-robin factor */
if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed)
- cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid);
+ cpt = info->mi_cpt;
else
cpt = rotor % LNET_CPT_NUMBER;
/* grab all messages which are on the NI passed in */
list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed,
msg_list) {
- if (msg->msg_rxpeer->lp_ni == ni)
+ if (msg->msg_txni == ni || msg->msg_rxni == ni)
list_move(&msg->msg_list, &zombies);
}
} else {
static int
lolnd_startup (lnet_ni_t *ni)
{
- LASSERT (ni->ni_lnd == &the_lolnd);
+ LASSERT (ni->ni_net->net_lnd == &the_lolnd);
LASSERT (!lolnd_instanced);
lolnd_instanced = 1;
msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
LASSERT(msg->msg_rxpeer != NULL);
+ LASSERT(msg->msg_rxni != NULL);
- ni = msg->msg_rxpeer->lp_ni;
+ ni = msg->msg_rxni;
cpt = msg->msg_rx_cpt;
list_del_init(&msg->msg_list);
for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
lp_hashlist) {
- if (ni != NULL && ni != lp->lp_ni)
+ if (ni != NULL && ni->ni_net != lp->lp_net)
continue;
list_del_init(&lp->lp_hashlist);
/* Lose hash table's ref */
for (i = 0; i < LNET_PEER_HASH_SIZE; i++) {
list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i],
lp_hashlist) {
- if (ni != lp->lp_ni)
+ if (ni->ni_net != lp->lp_net)
continue;
if (lp->lp_rtr_refcount == 0)
LASSERT(ptable->pt_number > 0);
ptable->pt_number--;
- lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt);
- lp->lp_ni = NULL;
+ lp->lp_net = NULL;
list_add(&lp->lp_hashlist, &ptable->pt_deathrow);
LASSERT(ptable->pt_zombies > 0);
return -ESHUTDOWN;
/* cpt can be LNET_LOCK_EX if it's called from router functions */
- cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid);
+ cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid, NULL);
ptable = the_lnet.ln_peer_tables[cpt2];
lp = lnet_find_peer_locked(ptable, nid);
goto out;
}
- lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2);
- if (lp->lp_ni == NULL) {
- rc = -EHOSTUNREACH;
- goto out;
- }
-
+ lp->lp_net = lnet_get_net_locked(LNET_NIDNET(lp->lp_nid));
lp->lp_txcredits =
- lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits;
+ lp->lp_mintxcredits = lp->lp_net->net_tunables.lct_peer_tx_credits;
lp->lp_rtrcredits =
- lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni);
+ lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_net);
list_add_tail(&lp->lp_hashlist,
&ptable->pt_hash[lnet_nid2peerhash(nid)]);
int rc;
int cpt;
- cpt = lnet_cpt_of_nid(nid);
+ cpt = lnet_cpt_of_nid(nid, NULL);
lnet_net_lock(cpt);
rc = lnet_nid2peer_locked(&lp, nid, cpt);
CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n",
libcfs_nid2str(lp->lp_nid), lp->lp_refcount,
- aliveness, lp->lp_ni->ni_peertxcredits,
+ aliveness, lp->lp_net->net_tunables.lct_peer_tx_credits,
lp->lp_rtrcredits, lp->lp_minrtrcredits,
lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob);
*nid = lp->lp_nid;
*refcount = lp->lp_refcount;
- *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits;
+ *ni_peer_tx_credits =
+ lp->lp_net->net_tunables.lct_peer_tx_credits;
*peer_tx_credits = lp->lp_txcredits;
*peer_rtr_credits = lp->lp_rtrcredits;
*peer_min_rtr_credits = lp->lp_mintxcredits;
MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
int
-lnet_peer_buffer_credits(lnet_ni_t *ni)
+lnet_peer_buffer_credits(struct lnet_net *net)
{
/* NI option overrides LNet default */
- if (ni->ni_peerrtrcredits > 0)
- return ni->ni_peerrtrcredits;
+ if (net->net_tunables.lct_peer_rtr_credits > 0)
+ return net->net_tunables.lct_peer_rtr_credits;
if (peer_buffer_credits > 0)
return peer_buffer_credits;
/* As an approximation, allow this peer the same number of router
* buffers as it is allowed outstanding sends */
- return ni->ni_peertxcredits;
+ return net->net_tunables.lct_peer_tx_credits;
}
/* forward ref's */
lp->lp_notifylnd = 0;
lp->lp_notify = 0;
- if (notifylnd && ni->ni_lnd->lnd_notify != NULL) {
+ if (notifylnd && ni->ni_net->net_lnd->lnd_notify != NULL) {
lnet_net_unlock(lp->lp_cpt);
/* A new notification could happen now; I'll handle it
* when control returns to me */
- (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
+ (ni->ni_net->net_lnd->lnd_notify)(ni, lp->lp_nid,
+ alive);
lnet_net_lock(lp->lp_cpt);
}
}
lnet_remotenet_t *
-lnet_find_net_locked (__u32 net)
+lnet_find_rnet_locked(__u32 net)
{
lnet_remotenet_t *rnet;
struct list_head *tmp;
__u32 lnd_type;
__u32 seed[2];
struct timespec64 ts;
- lnet_ni_t *ni;
- struct list_head *tmp;
+ lnet_ni_t *ni = NULL;
if (seeded)
return;
/* Nodes with small feet have little entropy
* the NID for this node gives the most entropy in the low bits */
- list_for_each(tmp, &the_lnet.ln_nis) {
- ni = list_entry(tmp, lnet_ni_t, ni_list);
+ while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
if (lnd_type != LOLND)
LASSERT(!the_lnet.ln_shutdown);
- rnet2 = lnet_find_net_locked(net);
+ rnet2 = lnet_find_rnet_locked(net);
if (rnet2 == NULL) {
/* new network */
list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
lnet_add_route_to_rnet(rnet2, route);
- ni = route->lr_gateway->lp_ni;
+ ni = lnet_get_next_ni_locked(route->lr_gateway->lp_net, NULL);
lnet_net_unlock(LNET_LOCK_EX);
/* XXX Assume alive */
- if (ni->ni_lnd->lnd_notify != NULL)
- (ni->ni_lnd->lnd_notify)(ni, gateway, 1);
+ if (ni->ni_net->net_lnd->lnd_notify != NULL)
+ (ni->ni_net->net_lnd->lnd_notify)(ni, gateway, 1);
lnet_net_lock(LNET_LOCK_EX);
}
continue;
}
- if (route->lr_gateway->lp_ni ==
- route2->lr_gateway->lp_ni)
+ if (route->lr_gateway->lp_net ==
+ route2->lr_gateway->lp_net)
continue;
nid1 = route->lr_gateway->lp_nid;
static void
lnet_update_ni_status_locked(void)
{
- lnet_ni_t *ni;
- time64_t now;
+ lnet_ni_t *ni = NULL;
+ time64_t now;
int timeout;
LASSERT(the_lnet.ln_routing);
MAX(live_router_check_interval, dead_router_check_interval);
now = ktime_get_real_seconds();
- list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
- if (ni->ni_lnd->lnd_type == LOLND)
+ while ((ni = lnet_get_next_ni_locked(NULL, ni))) {
+ if (ni->ni_net->net_lnd->lnd_type == LOLND)
continue;
if (now < ni->ni_last_alive + timeout)
lnet_ping_router_locked (lnet_peer_t *rtr)
{
lnet_rc_data_t *rcd = NULL;
- cfs_time_t now = cfs_time_current();
- int secs;
+ cfs_time_t now = cfs_time_current();
+ int secs;
+ struct lnet_ni *ni;
lnet_peer_addref_locked(rtr);
lnet_notify_locked(rtr, 1, 0, now);
/* Run any outstanding notifications */
- lnet_ni_notify_locked(rtr->lp_ni, rtr);
+ ni = lnet_get_next_ni_locked(rtr->lp_net, NULL);
+ lnet_ni_notify_locked(ni, rtr);
if (!lnet_isrouter(rtr) ||
the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
list_for_each(entry, &the_lnet.ln_routers) {
rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
- cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
+ cpt2 = rtr->lp_cpt;
if (cpt != cpt2) {
lnet_net_unlock(cpt);
cpt = cpt2;
{
struct lnet_peer *lp = NULL;
cfs_time_t now = cfs_time_current();
- int cpt = lnet_cpt_of_nid(nid);
+ int cpt = lnet_cpt_of_nid(nid, ni);
LASSERT (!in_interrupt ());
p = NULL;
hoff = 1;
hash++;
- }
+ }
if (peer != NULL) {
- lnet_nid_t nid = peer->lp_nid;
- int nrefs = peer->lp_refcount;
- int lastalive = -1;
- char *aliveness = "NA";
- int maxcr = peer->lp_ni->ni_peertxcredits;
- int txcr = peer->lp_txcredits;
- int mintxcr = peer->lp_mintxcredits;
- int rtrcr = peer->lp_rtrcredits;
- int minrtrcr = peer->lp_minrtrcredits;
- int txqnob = peer->lp_txqnob;
+ lnet_nid_t nid = peer->lp_nid;
+ int nrefs = peer->lp_refcount;
+ int lastalive = -1;
+ char *aliveness = "NA";
+ int maxcr = peer->lp_net->net_tunables.lct_peer_tx_credits;
+ int txcr = peer->lp_txcredits;
+ int mintxcr = peer->lp_mintxcredits;
+ int rtrcr = peer->lp_rtrcredits;
+ int minrtrcr = peer->lp_minrtrcredits;
+ int txqnob = peer->lp_txqnob;
if (lnet_isrouter(peer) ||
lnet_peer_aliveness_enabled(peer))
"%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n",
"nid", "status", "alive", "refs", "peer",
"rtr", "max", "tx", "min");
- LASSERT(tmpstr + tmpsiz - s > 0);
+ LASSERT (tmpstr + tmpsiz - s > 0);
} else {
- struct list_head *n;
- lnet_ni_t *ni = NULL;
- int skip = *ppos - 1;
+ lnet_ni_t *ni = NULL;
+ int skip = *ppos - 1;
lnet_net_lock(0);
- n = the_lnet.ln_nis.next;
-
- while (n != &the_lnet.ln_nis) {
- lnet_ni_t *a_ni = list_entry(n, lnet_ni_t, ni_list);
-
- if (skip == 0) {
- ni = a_ni;
- break;
- }
-
- skip--;
- n = n->next;
- }
+ ni = lnet_get_ni_idx_locked(skip);
if (ni != NULL) {
struct lnet_tx_queue *tq;
last_alive = now - ni->ni_last_alive;
/* @lo forever alive */
- if (ni->ni_lnd->lnd_type == LOLND)
+ if (ni->ni_net->net_lnd->lnd_type == LOLND)
last_alive = 0;
lnet_ni_lock(ni);
"%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n",
libcfs_nid2str(ni->ni_nid), stat,
last_alive, *ni->ni_refs[i],
- ni->ni_peertxcredits,
- ni->ni_peerrtrcredits,
+ ni->ni_net->net_tunables.lct_peer_tx_credits,
+ ni->ni_net->net_tunables.lct_peer_rtr_credits,
tq->tq_credits_max,
tq->tq_credits, tq->tq_credits_min);
if (i != 0)
return -EINVAL;
list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) {
- bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid),
+ bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL),
off, npg, len, opc == LST_BRW_READ);
if (bulk == NULL) {
brw_client_fini(tsi);
wi = &tsu->tsu_worker;
swi_init_workitem(wi, tsu, sfw_run_test,
lst_sched_test[\
- lnet_cpt_of_nid(tsu->tsu_dest.nid)]);
+ lnet_cpt_of_nid(tsu->tsu_dest.nid,
+ NULL)]);
swi_schedule_workitem(wi);
}
}
INIT_LIST_HEAD(&rpc->crpc_list);
swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc,
- lst_sched_test[lnet_cpt_of_nid(peer.nid)]);
+ lst_sched_test[lnet_cpt_of_nid(peer.nid, NULL)]);
spin_lock_init(&rpc->crpc_lock);
atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */