From 8cbb8cd3e771e7f7e0f99cafc19fad32770dc015 Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Fri, 11 Dec 2015 20:02:54 -0800 Subject: [PATCH] LU-7734 lnet: Multi-Rail local NI split This patch allows the configuration of multiple NIs under one Net. It is now possible to have multiple NIDs on the same network: Ex: @tcp, @tcp. This can be configured using the following syntax: Ex: tcp(eth0, eth1) The data structures for the example above can be visualized as follows NET(tcp) | ----------------- | | NI(eth0) NI(eth1) For more details refer to the Mult-Rail Requirements and HLD documents Signed-off-by: Amir Shehata Change-Id: Id7c73b9b811a3082b61e53b9e9f95743188cbd51 Reviewed-on: http://review.whamcloud.com/18274 Tested-by: Jenkins Reviewed-by: Doug Oucharek Tested-by: Maloo Reviewed-by: Olaf Weber --- lnet/include/lnet/lib-dlc.h | 18 +- lnet/include/lnet/lib-lnet.h | 80 ++- lnet/include/lnet/lib-types.h | 201 +++++-- lnet/klnds/gnilnd/gnilnd.c | 4 +- lnet/klnds/o2iblnd/o2iblnd.c | 12 +- lnet/klnds/o2iblnd/o2iblnd.h | 6 +- lnet/klnds/o2iblnd/o2iblnd_cb.c | 120 ++-- lnet/klnds/o2iblnd/o2iblnd_modparams.c | 74 +-- lnet/klnds/socklnd/socklnd.c | 21 +- lnet/lnet/acceptor.c | 10 +- lnet/lnet/api-ni.c | 972 ++++++++++++++++++++++----------- lnet/lnet/config.c | 701 ++++++++++++++++++------ lnet/lnet/lib-move.c | 159 +++--- lnet/lnet/lib-ptl.c | 6 +- lnet/lnet/lo.c | 2 +- lnet/lnet/net_fault.c | 3 +- lnet/lnet/peer.c | 27 +- lnet/lnet/router.c | 53 +- lnet/lnet/router_proc.c | 49 +- lnet/selftest/brw_test.c | 2 +- lnet/selftest/framework.c | 3 +- lnet/selftest/selftest.h | 2 +- 22 files changed, 1682 insertions(+), 843 deletions(-) diff --git a/lnet/include/lnet/lib-dlc.h b/lnet/include/lnet/lib-dlc.h index 436d9e8..69247f3 100644 --- a/lnet/include/lnet/lib-dlc.h +++ b/lnet/include/lnet/lib-dlc.h @@ -39,10 +39,10 @@ struct lnet_ioctl_config_lnd_cmn_tunables { __u32 lct_version; - __u32 lct_peer_timeout; - __u32 lct_peer_tx_credits; - __u32 lct_peer_rtr_credits; - __u32 lct_max_tx_credits; + __s32 lct_peer_timeout; + __s32 lct_peer_tx_credits; + __s32 lct_peer_rtr_credits; + __s32 lct_max_tx_credits; }; struct lnet_ioctl_config_o2iblnd_tunables { @@ -56,11 +56,15 @@ struct lnet_ioctl_config_o2iblnd_tunables { __u32 pad; }; +struct lnet_lnd_tunables { + union { + struct lnet_ioctl_config_o2iblnd_tunables lnd_o2ib; + } lnd_tun_u; +}; + struct lnet_ioctl_config_lnd_tunables { struct lnet_ioctl_config_lnd_cmn_tunables lt_cmn; - union { - struct lnet_ioctl_config_o2iblnd_tunables lt_o2ib; - } lt_tun_u; + struct lnet_lnd_tunables lt_tun; }; struct lnet_ioctl_net_config { diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 3b29582..6c12c35 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -279,24 +279,6 @@ lnet_me_free(lnet_me_t *me) kmem_cache_free(lnet_mes_cachep, me); } -static inline lnet_msg_t * -lnet_msg_alloc(void) -{ - lnet_msg_t *msg; - - LIBCFS_ALLOC(msg, sizeof(*msg)); - - /* no need to zero, LIBCFS_ALLOC does for us */ - return (msg); -} - -static inline void -lnet_msg_free(lnet_msg_t *msg) -{ - LASSERT(!msg->msg_onactivelist); - LIBCFS_FREE(msg, sizeof(*msg)); -} - lnet_libhandle_t *lnet_res_lh_lookup(struct lnet_res_container *rec, __u64 cookie); void lnet_res_lh_initialize(struct lnet_res_container *rec, @@ -454,9 +436,40 @@ lnet_ni_decref(lnet_ni_t *ni) lnet_net_unlock(0); } -void lnet_ni_free(lnet_ni_t *ni); -lnet_ni_t * -lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist); +static inline lnet_msg_t * +lnet_msg_alloc(void) +{ + lnet_msg_t *msg; + + LIBCFS_ALLOC(msg, sizeof(*msg)); + + /* no need to zero, LIBCFS_ALLOC does for us */ + return (msg); +} + +static inline void +lnet_msg_free(lnet_msg_t *msg) +{ + LASSERT(!msg->msg_onactivelist); + + /* Make sure we have no references to an NI. */ + if (msg->msg_txni) + lnet_ni_decref_locked(msg->msg_txni, msg->msg_tx_cpt); + if (msg->msg_rxni) + lnet_ni_decref_locked(msg->msg_rxni, msg->msg_rx_cpt); + + LIBCFS_FREE(msg, sizeof(*msg)); +} + +void lnet_ni_free(struct lnet_ni *ni); +void lnet_net_free(struct lnet_net *net); + +struct lnet_net * +lnet_net_alloc(__u32 net_type, struct list_head *netlist); + +struct lnet_ni * +lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, + char *iface); static inline int lnet_nid2peerhash(lnet_nid_t nid) @@ -475,11 +488,13 @@ lnet_net2rnethash(__u32 net) extern lnd_t the_lolnd; extern int avoid_asym_router_failure; -extern int lnet_cpt_of_nid_locked(lnet_nid_t nid); -extern int lnet_cpt_of_nid(lnet_nid_t nid); +extern int lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni); +extern int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni); extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt); +extern lnet_ni_t *lnet_nid2ni_addref(lnet_nid_t nid); extern lnet_ni_t *lnet_net2ni_locked(__u32 net, int cpt); extern lnet_ni_t *lnet_net2ni(__u32 net); +bool lnet_is_ni_healthy_locked(struct lnet_ni *ni); int lnet_lib_init(void); void lnet_lib_exit(void); @@ -496,6 +511,9 @@ void lnet_destroy_routes(void); int lnet_get_route(int idx, __u32 *net, __u32 *hops, lnet_nid_t *gateway, __u32 *alive, __u32 *priority); int lnet_get_rtr_pool_cfg(int idx, struct lnet_ioctl_pool_cfg *pool_cfg); +struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet, + struct lnet_ni *prev); +struct lnet_ni *lnet_get_ni_idx_locked(int idx); struct libcfs_ioctl_handler { struct list_head item; @@ -521,11 +539,12 @@ int lnet_rtrpools_adjust(int tiny, int small, int large); int lnet_rtrpools_enable(void); void lnet_rtrpools_disable(void); void lnet_rtrpools_free(int keep_pools); -lnet_remotenet_t *lnet_find_net_locked (__u32 net); +lnet_remotenet_t *lnet_find_rnet_locked(__u32 net); int lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf); int lnet_dyn_del_ni(__u32 net); int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason); +struct lnet_net *lnet_get_net_locked(__u32 net_id); int lnet_islocalnid(lnet_nid_t nid); int lnet_islocalnet(__u32 net); @@ -731,8 +750,8 @@ void lnet_unregister_lnd(lnd_t *lnd); int lnet_connect(struct socket **sockp, lnet_nid_t peer_nid, __u32 local_ip, __u32 peer_ip, int peer_port); void lnet_connect_console_error(int rc, lnet_nid_t peer_nid, - __u32 peer_ip, int port); -int lnet_count_acceptor_nis(void); + __u32 peer_ip, int port); +int lnet_count_acceptor_nets(void); int lnet_acceptor_timeout(void); int lnet_acceptor_port(void); int lnet_acceptor_start(void); @@ -754,7 +773,7 @@ int lnet_sock_connect(struct socket **sockp, int *fatal, __u32 peer_ip, int peer_port); int lnet_peers_start_down(void); -int lnet_peer_buffer_credits(lnet_ni_t *ni); +int lnet_peer_buffer_credits(struct lnet_net *net); int lnet_router_checker_start(void); void lnet_router_checker_stop(void); @@ -763,8 +782,11 @@ void lnet_swap_pinginfo(struct lnet_ping_info *info); int lnet_parse_ip2nets(char **networksp, char *ip2nets); int lnet_parse_routes(char *route_str, int *im_a_router); -int lnet_parse_networks(struct list_head *nilist, char *networks); -int lnet_net_unique(__u32 net, struct list_head *nilist); +int lnet_parse_networks(struct list_head *nilist, char *networks, + bool use_tcp_bonding); +bool lnet_net_unique(__u32 net_id, struct list_head *nilist, + struct lnet_net **net); +bool lnet_ni_unique_net(struct list_head *nilist, char *iface); int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt); lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable, diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index fca5ace..601488f 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -87,34 +87,37 @@ typedef struct lnet_msg { /* ready for pending on RX delay list */ unsigned int msg_rx_ready_delay:1; - unsigned int msg_vmflush:1; /* VM trying to free memory */ - unsigned int msg_target_is_router:1; /* sending to a router */ - unsigned int msg_routing:1; /* being forwarded */ - unsigned int msg_ack:1; /* ack on finalize (PUT) */ - unsigned int msg_sending:1; /* outgoing message */ - unsigned int msg_receiving:1; /* being received */ - unsigned int msg_txcredit:1; /* taken an NI send credit */ - unsigned int msg_peertxcredit:1; /* taken a peer send credit */ - unsigned int msg_rtrcredit:1; /* taken a globel router credit */ - unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ - unsigned int msg_onactivelist:1; /* on the activelist */ + unsigned int msg_vmflush:1; /* VM trying to free memory */ + unsigned int msg_target_is_router:1; /* sending to a router */ + unsigned int msg_routing:1; /* being forwarded */ + unsigned int msg_ack:1; /* ack on finalize (PUT) */ + unsigned int msg_sending:1; /* outgoing message */ + unsigned int msg_receiving:1; /* being received */ + unsigned int msg_txcredit:1; /* taken an NI send credit */ + unsigned int msg_peertxcredit:1; /* taken a peer send credit */ + unsigned int msg_rtrcredit:1; /* taken a globel router credit */ + unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ + unsigned int msg_onactivelist:1; /* on the activelist */ unsigned int msg_rdma_get:1; - struct lnet_peer *msg_txpeer; /* peer I'm sending to */ - struct lnet_peer *msg_rxpeer; /* peer I received from */ + struct lnet_peer *msg_txpeer; /* peer I'm sending to */ + struct lnet_peer *msg_rxpeer; /* peer I received from */ - void *msg_private; + void *msg_private; struct lnet_libmd *msg_md; - - unsigned int msg_len; - unsigned int msg_wanted; - unsigned int msg_offset; - unsigned int msg_niov; + /* the NI the message was sent or received over */ + struct lnet_ni *msg_txni; + struct lnet_ni *msg_rxni; + + unsigned int msg_len; + unsigned int msg_wanted; + unsigned int msg_offset; + unsigned int msg_niov; struct kvec *msg_iov; - lnet_kiov_t *msg_kiov; + lnet_kiov_t *msg_kiov; - lnet_event_t msg_ev; - lnet_hdr_t msg_hdr; + lnet_event_t msg_ev; + lnet_hdr_t msg_hdr; } lnet_msg_t; @@ -263,29 +266,123 @@ struct lnet_tx_queue { struct list_head tq_delayed; /* delayed TXs */ }; +enum lnet_net_state { + /* set when net block is allocated */ + LNET_NET_STATE_INIT = 0, + /* set when NIs in net are started successfully */ + LNET_NET_STATE_ACTIVE, + /* set if all NIs in net are in FAILED state */ + LNET_NET_STATE_INACTIVE, + /* set when shutting down a NET */ + LNET_NET_STATE_DELETING +}; + +enum lnet_ni_state { + /* set when NI block is allocated */ + LNET_NI_STATE_INIT = 0, + /* set when NI is started successfully */ + LNET_NI_STATE_ACTIVE, + /* set when LND notifies NI failed */ + LNET_NI_STATE_FAILED, + /* set when LND notifies NI degraded */ + LNET_NI_STATE_DEGRADED, + /* set when shuttding down NI */ + LNET_NI_STATE_DELETING +}; + +struct lnet_net { + /* chain on the ln_nets */ + struct list_head net_list; + + /* net ID, which is compoed of + * (net_type << 16) | net_num. + * net_type can be one of the enumarated types defined in + * lnet/include/lnet/nidstr.h */ + __u32 net_id; + + /* priority of the network */ + __u32 net_prio; + + /* total number of CPTs in the array */ + __u32 net_ncpts; + + /* cumulative CPTs of all NIs in this net */ + __u32 *net_cpts; + + /* network tunables */ + struct lnet_ioctl_config_lnd_cmn_tunables net_tunables; + + /* + * boolean to indicate that the tunables have been set and + * shouldn't be reset + */ + bool net_tunables_set; + + /* procedural interface */ + lnd_t *net_lnd; + + /* list of NIs on this net */ + struct list_head net_ni_list; + + /* list of NIs being added, but not started yet */ + struct list_head net_ni_added; + + /* dying LND instances */ + struct list_head net_ni_zombie; + + /* network state */ + enum lnet_net_state net_state; +}; + typedef struct lnet_ni { + /* chain on the lnet_net structure */ + struct list_head ni_netlist; + + /* chain on net_ni_cpt */ + struct list_head ni_cptlist; + spinlock_t ni_lock; - struct list_head ni_list; /* chain on ln_nis */ - struct list_head ni_cptlist; /* chain on ln_nis_cpt */ - int ni_maxtxcredits; /* # tx credits */ - /* # per-peer send credits */ - int ni_peertxcredits; - /* # per-peer router buffer credits */ - int ni_peerrtrcredits; - /* seconds to consider peer dead */ - int ni_peertimeout; - int ni_ncpts; /* number of CPTs */ - __u32 *ni_cpts; /* bond NI on some CPTs */ - lnet_nid_t ni_nid; /* interface's NID */ - void *ni_data; /* instance-specific data */ - lnd_t *ni_lnd; /* procedural interface */ - struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ - int **ni_refs; /* percpt reference count */ - time64_t ni_last_alive; /* when I was last alive */ - struct lnet_ni_status *ni_status; /* my health status */ + + /* number of CPTs */ + int ni_ncpts; + + /* bond NI on some CPTs */ + __u32 *ni_cpts; + + /* interface's NID */ + lnet_nid_t ni_nid; + + /* instance-specific data */ + void *ni_data; + + /* percpt TX queues */ + struct lnet_tx_queue **ni_tx_queues; + + /* percpt reference count */ + int **ni_refs; + + /* when I was last alive */ + long ni_last_alive; + + /* pointer to parent network */ + struct lnet_net *ni_net; + + /* my health status */ + lnet_ni_status_t *ni_status; + + /* NI FSM */ + enum lnet_ni_state ni_state; + /* per NI LND tunables */ - struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables; - /* equivalent interfaces to use */ + struct lnet_lnd_tunables ni_lnd_tunables; + + /* lnd tunables set explicitly */ + bool ni_lnd_tunables_set; + + /* + * equivalent interfaces to use + * This is an array because socklnd bonding can still be configured + */ char *ni_interfaces[LNET_MAX_INTERFACES]; struct net *ni_net_ns; /* original net namespace */ } lnet_ni_t; @@ -362,8 +459,8 @@ typedef struct lnet_peer { cfs_time_t lp_last_alive; /* when lp_ni was queried last time */ cfs_time_t lp_last_query; - /* interface peer is on */ - lnet_ni_t *lp_ni; + /* network peer is on */ + struct lnet_net *lp_net; lnet_nid_t lp_nid; /* peer's NID */ int lp_refcount; /* # refs */ int lp_cpt; /* CPT this peer attached on */ @@ -392,7 +489,7 @@ struct lnet_peer_table { /* peer aliveness is enabled only on routers for peers in a network where the * lnet_ni_t::ni_peertimeout has been set to a positive value */ #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \ - (lp)->lp_ni->ni_peertimeout > 0) + (lp)->lp_net->net_tunables.lct_peer_timeout > 0) typedef struct { struct list_head lr_list; /* chain on net */ @@ -470,6 +567,7 @@ enum { struct lnet_match_info { __u64 mi_mbits; lnet_process_id_t mi_id; + unsigned int mi_cpt; unsigned int mi_opc; unsigned int mi_portal; unsigned int mi_rlength; @@ -597,13 +695,12 @@ typedef struct struct list_head ln_test_peers; struct list_head ln_drop_rules; struct list_head ln_delay_rules; - - struct list_head ln_nis; /* LND instances */ - /* NIs bond on specific CPT(s) */ - struct list_head ln_nis_cpt; - /* dying LND instances */ - struct list_head ln_nis_zombie; - lnet_ni_t *ln_loni; /* the loopback NI */ + /* LND instances */ + struct list_head ln_nets; + /* the loopback NI */ + struct lnet_ni *ln_loni; + /* network zombie list */ + struct list_head ln_net_zombie; /* remote networks with routes to them */ struct list_head *ln_remote_nets_hash; diff --git a/lnet/klnds/gnilnd/gnilnd.c b/lnet/klnds/gnilnd/gnilnd.c index 4e1d708..2922f5e 100644 --- a/lnet/klnds/gnilnd/gnilnd.c +++ b/lnet/klnds/gnilnd/gnilnd.c @@ -2684,9 +2684,9 @@ kgnilnd_startup(lnet_ni_t *ni) kgn_net_t *net; ENTRY; - LASSERTF(ni->ni_lnd == &the_kgnilnd, + LASSERTF(ni->ni_net->net_lnd == &the_kgnilnd, "bad LND 0x%p != the_kgnilnd @ 0x%p\n", - ni->ni_lnd, &the_kgnilnd); + ni->ni_net->net_lnd, &the_kgnilnd); if (kgnilnd_data.kgn_init == GNILND_INIT_NOTHING) { rc = kgnilnd_base_startup(); diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index ee5a01f..1c3e2d2 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -317,7 +317,7 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) { kib_peer_t *peer; kib_net_t *net = ni->ni_data; - int cpt = lnet_cpt_of_nid(nid); + int cpt = lnet_cpt_of_nid(nid, ni); unsigned long flags; LASSERT(net != NULL); @@ -334,7 +334,7 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) peer->ibp_error = 0; peer->ibp_last_alive = 0; peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni); - peer->ibp_queue_depth = ni->ni_peertxcredits; + peer->ibp_queue_depth = ni->ni_net->net_tunables.lct_peer_tx_credits; atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ @@ -722,7 +722,7 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, dev = net->ibn_dev; - cpt = lnet_cpt_of_nid(peer->ibp_nid); + cpt = lnet_cpt_of_nid(peer->ibp_nid, peer->ibp_ni); sched = kiblnd_data.kib_scheds[cpt]; LASSERT(sched->ibs_nthreads > 0); @@ -1391,7 +1391,7 @@ kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int mod; __u16 nfrags; - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; mod = tunables->lnd_map_on_demand; nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod; @@ -2395,7 +2395,7 @@ kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts, int ncpts) int rc; int i; - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); if (tunables->lnd_map_on_demand == 0) { @@ -3177,7 +3177,7 @@ kiblnd_startup (lnet_ni_t *ni) int rc; int newdev; - LASSERT (ni->ni_lnd == &the_o2iblnd); + LASSERT (ni->ni_net->net_lnd == &the_o2iblnd); if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { rc = kiblnd_base_startup(); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index a617b63..c398b84 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -782,7 +782,7 @@ kiblnd_cfg_rdma_frags(struct lnet_ni *ni) struct lnet_ioctl_config_o2iblnd_tunables *tunables; int mod; - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; mod = tunables->lnd_map_on_demand; return mod != 0 ? mod : IBLND_MAX_RDMA_FRAGS; } @@ -801,7 +801,7 @@ kiblnd_concurrent_sends(int version, struct lnet_ni *ni) struct lnet_ioctl_config_o2iblnd_tunables *tunables; int concurrent_sends; - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; concurrent_sends = tunables->lnd_concurrent_sends; if (version == IBLND_MSG_VERSION_1) { @@ -941,7 +941,7 @@ kiblnd_need_noop(kib_conn_t *conn) struct lnet_ioctl_config_o2iblnd_tunables *tunables; LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; if (conn->ibc_outstanding_credits < IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) && diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index a72bdba..d352dd3 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -116,7 +116,7 @@ kiblnd_get_idle_tx(lnet_ni_t *ni, lnet_nid_t target) kib_tx_t *tx; kib_tx_poolset_t *tps; - tps = net->ibn_tx_ps[lnet_cpt_of_nid(target)]; + tps = net->ibn_tx_ps[lnet_cpt_of_nid(target, ni)]; node = kiblnd_pool_alloc_node(&tps->tps_poolset); if (node == NULL) return NULL; @@ -2243,75 +2243,75 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) __u32 ip = ntohl(peer_addr->sin_addr.s_addr); CERROR("Peer's port (%pI4h:%hu) is not privileged\n", &ip, ntohs(peer_addr->sin_port)); - goto failed; - } + goto failed; + } - if (priv_nob < offsetof(kib_msg_t, ibm_type)) { - CERROR("Short connection request\n"); - goto failed; - } + if (priv_nob < offsetof(kib_msg_t, ibm_type)) { + CERROR("Short connection request\n"); + goto failed; + } - /* Future protocol version compatibility support! If the - * o2iblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will - * negotiate a protocol version. I trap this here to avoid - * console errors; the reject tells the peer which protocol I - * speak. */ - if (reqmsg->ibm_magic == LNET_PROTO_MAGIC || - reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) - goto failed; - if (reqmsg->ibm_magic == IBLND_MSG_MAGIC && - reqmsg->ibm_version != IBLND_MSG_VERSION && - reqmsg->ibm_version != IBLND_MSG_VERSION_1) - goto failed; - if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) && - reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) && - reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1)) - goto failed; + /* Future protocol version compatibility support! If the + * o2iblnd-specific protocol changes, or when LNET unifies + * protocols over all LNDs, the initial connection will + * negotiate a protocol version. I trap this here to avoid + * console errors; the reject tells the peer which protocol I + * speak. */ + if (reqmsg->ibm_magic == LNET_PROTO_MAGIC || + reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) + goto failed; + if (reqmsg->ibm_magic == IBLND_MSG_MAGIC && + reqmsg->ibm_version != IBLND_MSG_VERSION && + reqmsg->ibm_version != IBLND_MSG_VERSION_1) + goto failed; + if (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) && + reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION) && + reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION_1)) + goto failed; - rc = kiblnd_unpack_msg(reqmsg, priv_nob); - if (rc != 0) { - CERROR("Can't parse connection request: %d\n", rc); - goto failed; - } + rc = kiblnd_unpack_msg(reqmsg, priv_nob); + if (rc != 0) { + CERROR("Can't parse connection request: %d\n", rc); + goto failed; + } - nid = reqmsg->ibm_srcnid; - ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid)); + nid = reqmsg->ibm_srcnid; + ni = lnet_nid2ni_addref(reqmsg->ibm_dstnid); - if (ni != NULL) { - net = (kib_net_t *)ni->ni_data; - rej.ibr_incarnation = net->ibn_incarnation; - } + if (ni != NULL) { + net = (kib_net_t *)ni->ni_data; + rej.ibr_incarnation = net->ibn_incarnation; + } - if (ni == NULL || /* no matching net */ - ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ - net->ibn_dev != ibdev) { /* wrong device */ + if (ni == NULL || /* no matching net */ + ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ + net->ibn_dev != ibdev) { /* wrong device */ CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): " - "bad dst nid %s\n", libcfs_nid2str(nid), - ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid), - ibdev->ibd_ifname, ibdev->ibd_nnets, + "bad dst nid %s\n", libcfs_nid2str(nid), + ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid), + ibdev->ibd_ifname, ibdev->ibd_nnets, &ibdev->ibd_ifip, - libcfs_nid2str(reqmsg->ibm_dstnid)); + libcfs_nid2str(reqmsg->ibm_dstnid)); - goto failed; - } + goto failed; + } /* check time stamp as soon as possible */ - if (reqmsg->ibm_dststamp != 0 && - reqmsg->ibm_dststamp != net->ibn_incarnation) { - CWARN("Stale connection request\n"); - rej.ibr_why = IBLND_REJECT_CONN_STALE; - goto failed; - } + if (reqmsg->ibm_dststamp != 0 && + reqmsg->ibm_dststamp != net->ibn_incarnation) { + CWARN("Stale connection request\n"); + rej.ibr_why = IBLND_REJECT_CONN_STALE; + goto failed; + } - /* I can accept peer's version */ - version = reqmsg->ibm_version; + /* I can accept peer's version */ + version = reqmsg->ibm_version; - if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - reqmsg->ibm_type, libcfs_nid2str(nid)); - goto failed; - } + if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) { + CERROR("Unexpected connreq msg type: %x from %s\n", + reqmsg->ibm_type, libcfs_nid2str(nid)); + goto failed; + } if (reqmsg->ibm_u.connparams.ibcp_queue_depth > kiblnd_msg_queue_size(version, ni)) { @@ -2575,14 +2575,14 @@ kiblnd_check_reconnect(kib_conn_t *conn, int version, break; case IBLND_REJECT_RDMA_FRAGS: { - struct lnet_ioctl_config_lnd_tunables *tunables; + struct lnet_ioctl_config_o2iblnd_tunables *tunables; if (!cp) { reason = "can't negotiate max frags"; goto out; } - tunables = peer->ibp_ni->ni_lnd_tunables; - if (!tunables->lt_tun_u.lt_o2ib.lnd_map_on_demand) { + tunables = &peer->ibp_ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; + if (!tunables->lnd_map_on_demand) { reason = "map_on_demand must be enabled"; goto out; } diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c index 1466dd9..54a81b5 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -164,7 +164,7 @@ kiblnd_msg_queue_size(int version, lnet_ni_t *ni) if (version == IBLND_MSG_VERSION_1) return IBLND_MSG_QUEUE_SIZE_V1; else if (ni) - return ni->ni_peertxcredits; + return ni->ni_net->net_tunables.lct_peer_tx_credits; else return peer_credits; } @@ -173,21 +173,17 @@ int kiblnd_tunables_setup(lnet_ni_t *ni) { struct lnet_ioctl_config_o2iblnd_tunables *tunables; + struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables; /* * if there was no tunables specified, setup the tunables to be * defaulted */ - if (!ni->ni_lnd_tunables) { - LIBCFS_ALLOC(ni->ni_lnd_tunables, - sizeof(*ni->ni_lnd_tunables)); - if (!ni->ni_lnd_tunables) - return -ENOMEM; - - memcpy(&ni->ni_lnd_tunables->lt_tun_u.lt_o2ib, + if (!ni->ni_lnd_tunables_set) + memcpy(&ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib, &default_tunables, sizeof(*tunables)); - } - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; /* Current API version */ tunables->lnd_version = 0; @@ -198,35 +194,39 @@ kiblnd_tunables_setup(lnet_ni_t *ni) return -EINVAL; } - if (!ni->ni_peertimeout) - ni->ni_peertimeout = peer_timeout; + net_tunables = &ni->ni_net->net_tunables; - if (!ni->ni_maxtxcredits) - ni->ni_maxtxcredits = credits; + if (net_tunables->lct_peer_timeout == -1) + net_tunables->lct_peer_timeout = peer_timeout; - if (!ni->ni_peertxcredits) - ni->ni_peertxcredits = peer_credits; + if (net_tunables->lct_max_tx_credits == -1) + net_tunables->lct_max_tx_credits = credits; - if (!ni->ni_peerrtrcredits) - ni->ni_peerrtrcredits = peer_buffer_credits; + if (net_tunables->lct_peer_tx_credits == -1) + net_tunables->lct_peer_tx_credits = peer_credits; - if (ni->ni_peertxcredits < IBLND_CREDITS_DEFAULT) - ni->ni_peertxcredits = IBLND_CREDITS_DEFAULT; + if (net_tunables->lct_peer_rtr_credits == -1) + net_tunables->lct_peer_rtr_credits = peer_buffer_credits; - if (ni->ni_peertxcredits > IBLND_CREDITS_MAX) - ni->ni_peertxcredits = IBLND_CREDITS_MAX; + if (net_tunables->lct_peer_tx_credits < IBLND_CREDITS_DEFAULT) + net_tunables->lct_peer_tx_credits = IBLND_CREDITS_DEFAULT; - if (ni->ni_peertxcredits > credits) - ni->ni_peertxcredits = credits; + if (net_tunables->lct_peer_tx_credits > IBLND_CREDITS_MAX) + net_tunables->lct_peer_tx_credits = IBLND_CREDITS_MAX; + + if (net_tunables->lct_peer_tx_credits > + net_tunables->lct_max_tx_credits) + net_tunables->lct_peer_tx_credits = + net_tunables->lct_max_tx_credits; if (!tunables->lnd_peercredits_hiw) tunables->lnd_peercredits_hiw = peer_credits_hiw; - if (tunables->lnd_peercredits_hiw < ni->ni_peertxcredits / 2) - tunables->lnd_peercredits_hiw = ni->ni_peertxcredits / 2; + if (tunables->lnd_peercredits_hiw < net_tunables->lct_peer_tx_credits / 2) + tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits / 2; - if (tunables->lnd_peercredits_hiw >= ni->ni_peertxcredits) - tunables->lnd_peercredits_hiw = ni->ni_peertxcredits - 1; + if (tunables->lnd_peercredits_hiw >= net_tunables->lct_peer_tx_credits) + tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits - 1; if (tunables->lnd_map_on_demand < 0 || tunables->lnd_map_on_demand > IBLND_MAX_RDMA_FRAGS) { @@ -243,22 +243,24 @@ kiblnd_tunables_setup(lnet_ni_t *ni) if (tunables->lnd_map_on_demand > 0 && tunables->lnd_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8) { tunables->lnd_concurrent_sends = - ni->ni_peertxcredits * 2; + net_tunables->lct_peer_tx_credits * 2; } else { - tunables->lnd_concurrent_sends = ni->ni_peertxcredits; + tunables->lnd_concurrent_sends = + net_tunables->lct_peer_tx_credits; } } - if (tunables->lnd_concurrent_sends > ni->ni_peertxcredits * 2) - tunables->lnd_concurrent_sends = ni->ni_peertxcredits * 2; + if (tunables->lnd_concurrent_sends > net_tunables->lct_peer_tx_credits * 2) + tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits * 2; - if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits / 2) - tunables->lnd_concurrent_sends = ni->ni_peertxcredits / 2; + if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits / 2) + tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits / 2; - if (tunables->lnd_concurrent_sends < ni->ni_peertxcredits) { + if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits) { CWARN("Concurrent sends %d is lower than message " "queue size: %d, performance may drop slightly.\n", - tunables->lnd_concurrent_sends, ni->ni_peertxcredits); + tunables->lnd_concurrent_sends, + net_tunables->lct_peer_tx_credits); } if (!tunables->lnd_fmr_pool_size) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 33c34cd..bed371c 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -98,7 +98,7 @@ ksocknal_destroy_route (ksock_route_t *route) static int ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) { - int cpt = lnet_cpt_of_nid(id.nid); + int cpt = lnet_cpt_of_nid(id.nid, ni); ksock_net_t *net = ni->ni_data; ksock_peer_t *peer; @@ -1117,7 +1117,7 @@ ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, LASSERT (conn->ksnc_proto != NULL); LASSERT (peerid.nid != LNET_NID_ANY); - cpt = lnet_cpt_of_nid(peerid.nid); + cpt = lnet_cpt_of_nid(peerid.nid, ni); if (active) { ksocknal_peer_addref(peer); @@ -2775,7 +2775,7 @@ ksocknal_startup (lnet_ni_t *ni) int rc; int i; - LASSERT (ni->ni_lnd == &the_ksocklnd); + LASSERT (ni->ni_net->net_lnd == &the_ksocklnd); if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { rc = ksocknal_base_startup(); @@ -2790,10 +2790,17 @@ ksocknal_startup (lnet_ni_t *ni) spin_lock_init(&net->ksnn_lock); net->ksnn_incarnation = ksocknal_new_incarnation(); ni->ni_data = net; - ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout; - ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; - ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits; - ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits; + if (!ni->ni_net->net_tunables_set) { + ni->ni_net->net_tunables.lct_peer_timeout = + *ksocknal_tunables.ksnd_peertimeout; + ni->ni_net->net_tunables.lct_max_tx_credits = + *ksocknal_tunables.ksnd_credits; + ni->ni_net->net_tunables.lct_peer_tx_credits = + *ksocknal_tunables.ksnd_peertxcredits; + ni->ni_net->net_tunables.lct_peer_rtr_credits = + *ksocknal_tunables.ksnd_peerrtrcredits; + ni->ni_net->net_tunables_set = true; + } if (ni->ni_interfaces[0] == NULL) { rc = ksocknal_enumerate_interfaces(net); diff --git a/lnet/lnet/acceptor.c b/lnet/lnet/acceptor.c index 4de013a..8230ceb 100644 --- a/lnet/lnet/acceptor.c +++ b/lnet/lnet/acceptor.c @@ -310,8 +310,8 @@ lnet_accept(struct socket *sock, __u32 magic) if (flip) __swab64s(&cr.acr_nid); - ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid)); - if (ni == NULL || /* no matching net */ + ni = lnet_nid2ni_addref(cr.acr_nid); + if (ni == NULL || /* no matching net */ ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */ if (ni != NULL) lnet_ni_decref(ni); @@ -321,7 +321,7 @@ lnet_accept(struct socket *sock, __u32 magic) return -EPERM; } - if (ni->ni_lnd->lnd_accept == NULL) { + if (ni->ni_net->net_lnd->lnd_accept == NULL) { /* This catches a request for the loopback LND */ lnet_ni_decref(ni); LCONSOLE_ERROR_MSG(0x121, "Refusing connection from %pI4h " @@ -333,7 +333,7 @@ lnet_accept(struct socket *sock, __u32 magic) CDEBUG(D_NET, "Accept %s from %pI4h\n", libcfs_nid2str(cr.acr_nid), &peer_ip); - rc = ni->ni_lnd->lnd_accept(ni, sock); + rc = ni->ni_net->net_lnd->lnd_accept(ni, sock); lnet_ni_decref(ni); return rc; @@ -476,7 +476,7 @@ lnet_acceptor_start(void) if (rc <= 0) return rc; - if (lnet_count_acceptor_nis() == 0) /* not required */ + if (lnet_count_acceptor_nets() == 0) /* not required */ return 0; task = kthread_run(lnet_acceptor, (void *)(uintptr_t)secure, diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 270629d..4910d3a 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -57,6 +57,11 @@ static int rnet_htable_size = LNET_REMOTE_NETS_HASH_DEFAULT; module_param(rnet_htable_size, int, 0444); MODULE_PARM_DESC(rnet_htable_size, "size of remote network hash table"); +static int use_tcp_bonding = false; +module_param(use_tcp_bonding, int, 0444); +MODULE_PARM_DESC(use_tcp_bonding, + "Set to 1 to use socklnd bonding. 0 to use Multi-Rail"); + static int lnet_ping(lnet_process_id_t id, signed long timeout, lnet_process_id_t __user *ids, int n_ids); @@ -584,9 +589,7 @@ lnet_prepare(lnet_pid_t requested_pid) the_lnet.ln_pid = requested_pid; INIT_LIST_HEAD(&the_lnet.ln_test_peers); - INIT_LIST_HEAD(&the_lnet.ln_nis); - INIT_LIST_HEAD(&the_lnet.ln_nis_cpt); - INIT_LIST_HEAD(&the_lnet.ln_nis_zombie); + INIT_LIST_HEAD(&the_lnet.ln_nets); INIT_LIST_HEAD(&the_lnet.ln_routers); INIT_LIST_HEAD(&the_lnet.ln_drop_rules); INIT_LIST_HEAD(&the_lnet.ln_delay_rules); @@ -667,9 +670,7 @@ lnet_unprepare (void) LASSERT(the_lnet.ln_refcount == 0); LASSERT(list_empty(&the_lnet.ln_test_peers)); - LASSERT(list_empty(&the_lnet.ln_nis)); - LASSERT(list_empty(&the_lnet.ln_nis_cpt)); - LASSERT(list_empty(&the_lnet.ln_nis_zombie)); + LASSERT(list_empty(&the_lnet.ln_nets)); lnet_portals_destroy(); @@ -700,18 +701,17 @@ lnet_unprepare (void) } lnet_ni_t * -lnet_net2ni_locked(__u32 net, int cpt) +lnet_net2ni_locked(__u32 net_id, int cpt) { - struct list_head *tmp; - lnet_ni_t *ni; + struct lnet_ni *ni; + struct lnet_net *net; LASSERT(cpt != LNET_LOCK_EX); - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (LNET_NIDNET(ni->ni_nid) == net) { - lnet_ni_addref_locked(ni, cpt); + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + if (net->net_id == net_id) { + ni = list_entry(net->net_ni_list.next, struct lnet_ni, + ni_netlist); return ni; } } @@ -732,6 +732,19 @@ lnet_net2ni(__u32 net) } EXPORT_SYMBOL(lnet_net2ni); +struct lnet_net * +lnet_get_net_locked(__u32 net_id) +{ + struct lnet_net *net; + + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + if (net->net_id == net_id) + return net; + } + + return NULL; +} + static unsigned int lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number) { @@ -752,31 +765,41 @@ lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number) } int -lnet_cpt_of_nid_locked(lnet_nid_t nid) +lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni) { - struct lnet_ni *ni; + struct lnet_net *net; /* must called with hold of lnet_net_lock */ if (LNET_CPT_NUMBER == 1) return 0; /* the only one */ - /* take lnet_net_lock(any) would be OK */ - if (!list_empty(&the_lnet.ln_nis_cpt)) { - list_for_each_entry(ni, &the_lnet.ln_nis_cpt, ni_cptlist) { - if (LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) - continue; + /* + * If NI is provided then use the CPT identified in the NI cpt + * list if one exists. If one doesn't exist, then that NI is + * associated with all CPTs and it follows that the net it belongs + * to is implicitly associated with all CPTs, so just hash the nid + * and return that. + */ + if (ni != NULL) { + if (ni->ni_cpts != NULL) + return ni->ni_cpts[lnet_nid_cpt_hash(nid, + ni->ni_ncpts)]; + else + return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); + } - LASSERT(ni->ni_cpts != NULL); - return ni->ni_cpts[lnet_nid_cpt_hash - (nid, ni->ni_ncpts)]; - } + /* no NI provided so look at the net */ + net = lnet_get_net_locked(LNET_NIDNET(nid)); + + if (net != NULL && net->net_cpts != NULL) { + return net->net_cpts[lnet_nid_cpt_hash(nid, net->net_ncpts)]; } return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); } int -lnet_cpt_of_nid(lnet_nid_t nid) +lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni) { int cpt; int cpt2; @@ -784,11 +807,10 @@ lnet_cpt_of_nid(lnet_nid_t nid) if (LNET_CPT_NUMBER == 1) return 0; /* the only one */ - if (list_empty(&the_lnet.ln_nis_cpt)) - return lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); - cpt = lnet_net_lock_current(); - cpt2 = lnet_cpt_of_nid_locked(nid); + + cpt2 = lnet_cpt_of_nid_locked(nid, ni); + lnet_net_unlock(cpt); return cpt2; @@ -796,42 +818,66 @@ lnet_cpt_of_nid(lnet_nid_t nid) EXPORT_SYMBOL(lnet_cpt_of_nid); int -lnet_islocalnet(__u32 net) +lnet_islocalnet(__u32 net_id) { - struct lnet_ni *ni; + struct lnet_net *net; int cpt; + bool local; cpt = lnet_net_lock_current(); - ni = lnet_net2ni_locked(net, cpt); - if (ni != NULL) - lnet_ni_decref_locked(ni, cpt); + net = lnet_get_net_locked(net_id); + + local = net != NULL; lnet_net_unlock(cpt); - return ni != NULL; + return local; +} + +bool +lnet_is_ni_healthy_locked(struct lnet_ni *ni) +{ + if (ni->ni_state == LNET_NI_STATE_ACTIVE || + ni->ni_state == LNET_NI_STATE_DEGRADED) + return true; + + return false; } lnet_ni_t * lnet_nid2ni_locked(lnet_nid_t nid, int cpt) { + struct lnet_net *net; struct lnet_ni *ni; - struct list_head *tmp; LASSERT(cpt != LNET_LOCK_EX); - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (ni->ni_nid == nid) { - lnet_ni_addref_locked(ni, cpt); - return ni; + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { + if (ni->ni_nid == nid) + return ni; } } return NULL; } +lnet_ni_t * +lnet_nid2ni_addref(lnet_nid_t nid) +{ + lnet_ni_t *ni; + + lnet_net_lock(0); + ni = lnet_nid2ni_locked(nid, 0); + if (ni) + lnet_ni_addref_locked(ni, 0); + lnet_net_unlock(0); + + return ni; +} +EXPORT_SYMBOL(lnet_nid2ni_addref); + int lnet_islocalnid(lnet_nid_t nid) { @@ -840,27 +886,24 @@ lnet_islocalnid(lnet_nid_t nid) cpt = lnet_net_lock_current(); ni = lnet_nid2ni_locked(nid, cpt); - if (ni != NULL) - lnet_ni_decref_locked(ni, cpt); lnet_net_unlock(cpt); return ni != NULL; } int -lnet_count_acceptor_nis (void) +lnet_count_acceptor_nets(void) { /* Return the # of NIs that need the acceptor. */ int count = 0; - struct list_head *tmp; - struct lnet_ni *ni; + struct lnet_net *net; int cpt; cpt = lnet_net_lock_current(); - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (ni->ni_lnd->lnd_accept != NULL) + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + /* all socklnd type networks should have the acceptor + * thread started */ + if (net->net_lnd->lnd_accept != NULL) count++; } @@ -891,15 +934,30 @@ lnet_ping_info_create(int num_ni) } static inline int +lnet_get_net_ni_count_locked(struct lnet_net *net) +{ + struct lnet_ni *ni; + int count = 0; + + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) + count++; + + return count; +} + +static inline int lnet_get_ni_count(void) { - struct lnet_ni *ni; - int count = 0; + struct lnet_ni *ni; + struct lnet_net *net; + int count = 0; lnet_net_lock(0); - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) - count++; + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) + count++; + } lnet_net_unlock(0); @@ -917,14 +975,17 @@ lnet_ping_info_free(struct lnet_ping_info *pinfo) static void lnet_ping_info_destroy(void) { + struct lnet_net *net; struct lnet_ni *ni; lnet_net_lock(LNET_LOCK_EX); - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - lnet_ni_lock(ni); - ni->ni_status = NULL; - lnet_ni_unlock(ni); + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { + lnet_ni_lock(ni); + ni->ni_status = NULL; + lnet_ni_unlock(ni); + } } lnet_ping_info_free(the_lnet.ln_ping_info); @@ -1029,24 +1090,29 @@ static void lnet_ping_info_install_locked(struct lnet_ping_info *ping_info) { int i; - lnet_ni_t *ni; + struct lnet_ni *ni; + struct lnet_net *net; struct lnet_ni_status *ns; i = 0; - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - LASSERT(i < ping_info->pi_nnis); + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { + LASSERT(i < ping_info->pi_nnis); - ns = &ping_info->pi_ni[i]; + ns = &ping_info->pi_ni[i]; - ns->ns_nid = ni->ni_nid; + ns->ns_nid = ni->ni_nid; - lnet_ni_lock(ni); - ns->ns_status = (ni->ni_status != NULL) ? - ni->ni_status->ns_status : LNET_NI_STATUS_UP; - ni->ni_status = ns; - lnet_ni_unlock(ni); + lnet_ni_lock(ni); + ns->ns_status = (ni->ni_status != NULL) ? + ni->ni_status->ns_status : + LNET_NI_STATUS_UP; + ni->ni_status = ns; + lnet_ni_unlock(ni); + + i++; + } - i++; } } @@ -1101,11 +1167,11 @@ lnet_ni_tq_credits(lnet_ni_t *ni) LASSERT(ni->ni_ncpts >= 1); if (ni->ni_ncpts == 1) - return ni->ni_maxtxcredits; + return ni->ni_net->net_tunables.lct_max_tx_credits; - credits = ni->ni_maxtxcredits / ni->ni_ncpts; - credits = max(credits, 8 * ni->ni_peertxcredits); - credits = min(credits, ni->ni_maxtxcredits); + credits = ni->ni_net->net_tunables.lct_max_tx_credits / ni->ni_ncpts; + credits = max(credits, 8 * ni->ni_net->net_tunables.lct_peer_tx_credits); + credits = min(credits, ni->ni_net->net_tunables.lct_max_tx_credits); return credits; } @@ -1119,37 +1185,43 @@ lnet_ni_unlink_locked(lnet_ni_t *ni) } /* move it to zombie list and nobody can find it anymore */ - LASSERT(!list_empty(&ni->ni_list)); - list_move(&ni->ni_list, &the_lnet.ln_nis_zombie); - lnet_ni_decref_locked(ni, 0); /* drop ln_nis' ref */ + LASSERT(!list_empty(&ni->ni_netlist)); + list_move(&ni->ni_netlist, &ni->ni_net->net_ni_zombie); + lnet_ni_decref_locked(ni, 0); } static void -lnet_clear_zombies_nis_locked(void) +lnet_clear_zombies_nis_locked(struct lnet_net *net) { int i; int islo; lnet_ni_t *ni; + struct list_head *zombie_list = &net->net_ni_zombie; - /* Now wait for the NI's I just nuked to show up on ln_zombie_nis - * and shut them down in guaranteed thread context */ + /* + * Now wait for the NIs I just nuked to show up on the zombie + * list and shut them down in guaranteed thread context + */ i = 2; - while (!list_empty(&the_lnet.ln_nis_zombie)) { + while (!list_empty(zombie_list)) { int *ref; int j; - ni = list_entry(the_lnet.ln_nis_zombie.next, - lnet_ni_t, ni_list); - list_del_init(&ni->ni_list); + ni = list_entry(zombie_list->next, + lnet_ni_t, ni_netlist); + list_del_init(&ni->ni_netlist); + /* the ni should be in deleting state. If it's not it's + * a bug */ + LASSERT(ni->ni_state == LNET_NI_STATE_DELETING); cfs_percpt_for_each(ref, j, ni->ni_refs) { if (*ref == 0) continue; /* still busy, add it back to zombie list */ - list_add(&ni->ni_list, &the_lnet.ln_nis_zombie); + list_add(&ni->ni_netlist, zombie_list); break; } - if (!list_empty(&ni->ni_list)) { + if (!list_empty(&ni->ni_netlist)) { lnet_net_unlock(LNET_LOCK_EX); ++i; if ((i & (-i)) == i) { @@ -1163,16 +1235,12 @@ lnet_clear_zombies_nis_locked(void) continue; } - ni->ni_lnd->lnd_refcount--; lnet_net_unlock(LNET_LOCK_EX); - islo = ni->ni_lnd->lnd_type == LOLND; + islo = ni->ni_net->net_lnd->lnd_type == LOLND; LASSERT(!in_interrupt()); - (ni->ni_lnd->lnd_shutdown)(ni); - - /* can't deref lnd anymore now; it might have unregistered - * itself... */ + (net->net_lnd->lnd_shutdown)(ni); if (!islo) CDEBUG(D_LNI, "Removed LNI %s\n", @@ -1184,60 +1252,15 @@ lnet_clear_zombies_nis_locked(void) } } -static void -lnet_shutdown_lndnis(void) -{ - int i; - lnet_ni_t *ni; - - /* NB called holding the global mutex */ - - /* All quiet on the API front */ - LASSERT(!the_lnet.ln_shutdown); - LASSERT(the_lnet.ln_refcount == 0); - LASSERT(list_empty(&the_lnet.ln_nis_zombie)); - - lnet_net_lock(LNET_LOCK_EX); - the_lnet.ln_shutdown = 1; /* flag shutdown */ - - /* Unlink NIs from the global table */ - while (!list_empty(&the_lnet.ln_nis)) { - ni = list_entry(the_lnet.ln_nis.next, - lnet_ni_t, ni_list); - lnet_ni_unlink_locked(ni); - } - - /* Drop the cached loopback NI. */ - if (the_lnet.ln_loni != NULL) { - lnet_ni_decref_locked(the_lnet.ln_loni, 0); - the_lnet.ln_loni = NULL; - } - - lnet_net_unlock(LNET_LOCK_EX); - - /* Clear lazy portals and drop delayed messages which hold refs - * on their lnet_msg_t::msg_rxpeer */ - for (i = 0; i < the_lnet.ln_nportals; i++) - LNetClearLazyPortal(i); - - /* Clear the peer table and wait for all peers to go (they hold refs on - * their NIs) */ - lnet_peer_tables_cleanup(NULL); - - lnet_net_lock(LNET_LOCK_EX); - - lnet_clear_zombies_nis_locked(); - the_lnet.ln_shutdown = 0; - lnet_net_unlock(LNET_LOCK_EX); -} - /* shutdown down the NI and release refcount */ static void lnet_shutdown_lndni(struct lnet_ni *ni) { int i; + struct lnet_net *net = ni->ni_net; lnet_net_lock(LNET_LOCK_EX); + ni->ni_state = LNET_NI_STATE_DELETING; lnet_ni_unlink_locked(ni); lnet_net_unlock(LNET_LOCK_EX); @@ -1249,147 +1272,131 @@ lnet_shutdown_lndni(struct lnet_ni *ni) lnet_peer_tables_cleanup(ni); lnet_net_lock(LNET_LOCK_EX); - lnet_clear_zombies_nis_locked(); + lnet_clear_zombies_nis_locked(net); lnet_net_unlock(LNET_LOCK_EX); } -static int -lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) +static void +lnet_shutdown_lndnet(struct lnet_net *net) { - struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL; - int rc = -EINVAL; - __u32 lnd_type; - lnd_t *lnd; - struct lnet_tx_queue *tq; - int i; + struct lnet_ni *ni; - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); + lnet_net_lock(LNET_LOCK_EX); - LASSERT(libcfs_isknown_lnd(lnd_type)); + net->net_state = LNET_NET_STATE_DELETING; - if (lnd_type == CIBLND || lnd_type == OPENIBLND || - lnd_type == IIBLND || lnd_type == VIBLND) { - CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type)); - goto failed0; + list_del_init(&net->net_list); + + while (!list_empty(&net->net_ni_list)) { + ni = list_entry(net->net_ni_list.next, + lnet_ni_t, ni_netlist); + lnet_net_unlock(LNET_LOCK_EX); + lnet_shutdown_lndni(ni); + lnet_net_lock(LNET_LOCK_EX); } - /* Make sure this new NI is unique. */ - lnet_net_lock(LNET_LOCK_EX); - rc = lnet_net_unique(LNET_NIDNET(ni->ni_nid), &the_lnet.ln_nis); + /* + * decrement ref count on lnd only when the entire network goes + * away + */ + net->net_lnd->lnd_refcount--; + lnet_net_unlock(LNET_LOCK_EX); - if (!rc) { - if (lnd_type == LOLND) { - lnet_ni_free(ni); - return 0; - } + lnet_net_free(net); +} - CERROR("Net %s is not unique\n", - libcfs_net2str(LNET_NIDNET(ni->ni_nid))); +static void +lnet_shutdown_lndnets(void) +{ + struct lnet_net *net; - rc = -EEXIST; - goto failed0; - } + /* NB called holding the global mutex */ - mutex_lock(&the_lnet.ln_lnd_mutex); - lnd = lnet_find_lnd_by_type(lnd_type); + /* All quiet on the API front */ + LASSERT(!the_lnet.ln_shutdown); + LASSERT(the_lnet.ln_refcount == 0); - if (lnd == NULL) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - rc = request_module("%s", libcfs_lnd2modname(lnd_type)); - mutex_lock(&the_lnet.ln_lnd_mutex); + lnet_net_lock(LNET_LOCK_EX); + the_lnet.ln_shutdown = 1; /* flag shutdown */ - lnd = lnet_find_lnd_by_type(lnd_type); - if (lnd == NULL) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - CERROR("Can't load LND %s, module %s, rc=%d\n", - libcfs_lnd2str(lnd_type), - libcfs_lnd2modname(lnd_type), rc); -#ifndef HAVE_MODULE_LOADING_SUPPORT - LCONSOLE_ERROR_MSG(0x104, "Your kernel must be " - "compiled with kernel module " - "loading support."); -#endif - rc = -EINVAL; - goto failed0; - } + while (!list_empty(&the_lnet.ln_nets)) { + /* + * move the nets to the zombie list to avoid them being + * picked up for new work. LONET is also included in the + * Nets that will be moved to the zombie list + */ + net = list_entry(the_lnet.ln_nets.next, + struct lnet_net, net_list); + list_move(&net->net_list, &the_lnet.ln_net_zombie); } - lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount++; + /* Drop the cached loopback Net. */ + if (the_lnet.ln_loni != NULL) { + lnet_ni_decref_locked(the_lnet.ln_loni, 0); + the_lnet.ln_loni = NULL; + } lnet_net_unlock(LNET_LOCK_EX); - ni->ni_lnd = lnd; + /* iterate through the net zombie list and delete each net */ + while (!list_empty(&the_lnet.ln_net_zombie)) { + net = list_entry(the_lnet.ln_net_zombie.next, + struct lnet_net, net_list); + lnet_shutdown_lndnet(net); + } - if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf)) - lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk; + lnet_net_lock(LNET_LOCK_EX); + the_lnet.ln_shutdown = 0; + lnet_net_unlock(LNET_LOCK_EX); +} - if (lnd_tunables != NULL) { - LIBCFS_ALLOC(ni->ni_lnd_tunables, - sizeof(*ni->ni_lnd_tunables)); - if (ni->ni_lnd_tunables == NULL) { - mutex_unlock(&the_lnet.ln_lnd_mutex); - rc = -ENOMEM; - goto failed0; - } - memcpy(ni->ni_lnd_tunables, lnd_tunables, - sizeof(*ni->ni_lnd_tunables)); - } +static int +lnet_startup_lndni(struct lnet_ni *ni, struct lnet_lnd_tunables *tun) +{ + int rc = -EINVAL; + struct lnet_tx_queue *tq; + int i; + struct lnet_net *net = ni->ni_net; - /* If given some LND tunable parameters, parse those now to - * override the values in the NI structure. */ - if (conf) { - if (conf->cfg_config_u.cfg_net.net_peer_rtr_credits >= 0) - ni->ni_peerrtrcredits = - conf->cfg_config_u.cfg_net.net_peer_rtr_credits; - if (conf->cfg_config_u.cfg_net.net_peer_timeout >= 0) - ni->ni_peertimeout = - conf->cfg_config_u.cfg_net.net_peer_timeout; - if (conf->cfg_config_u.cfg_net.net_peer_tx_credits >= 0) - ni->ni_peertxcredits = - conf->cfg_config_u.cfg_net.net_peer_tx_credits; - if (conf->cfg_config_u.cfg_net.net_max_tx_credits >= 0) - ni->ni_maxtxcredits = - conf->cfg_config_u.cfg_net.net_max_tx_credits; + mutex_lock(&the_lnet.ln_lnd_mutex); + + if (tun) { + memcpy(&ni->ni_lnd_tunables, tun, sizeof(*tun)); + ni->ni_lnd_tunables_set = true; } - rc = (lnd->lnd_startup)(ni); + rc = (net->net_lnd->lnd_startup)(ni); mutex_unlock(&the_lnet.ln_lnd_mutex); if (rc != 0) { LCONSOLE_ERROR_MSG(0x105, "Error %d starting up LNI %s\n", - rc, libcfs_lnd2str(lnd->lnd_type)); + rc, libcfs_lnd2str(net->net_lnd->lnd_type)); lnet_net_lock(LNET_LOCK_EX); - lnd->lnd_refcount--; + net->net_lnd->lnd_refcount--; lnet_net_unlock(LNET_LOCK_EX); goto failed0; } - LASSERT(ni->ni_peertimeout <= 0 || lnd->lnd_query != NULL); - - lnet_net_lock(LNET_LOCK_EX); - /* refcount for ln_nis */ - lnet_ni_addref_locked(ni, 0); - list_add_tail(&ni->ni_list, &the_lnet.ln_nis); - if (ni->ni_cpts != NULL) { - lnet_ni_addref_locked(ni, 0); - list_add_tail(&ni->ni_cptlist, &the_lnet.ln_nis_cpt); - } - - lnet_net_unlock(LNET_LOCK_EX); + ni->ni_state = LNET_NI_STATE_ACTIVE; - if (lnd->lnd_type == LOLND) { + /* We keep a reference on the loopback net through the loopback NI */ + if (net->net_lnd->lnd_type == LOLND) { lnet_ni_addref(ni); LASSERT(the_lnet.ln_loni == NULL); the_lnet.ln_loni = ni; + ni->ni_net->net_tunables.lct_peer_tx_credits = 0; + ni->ni_net->net_tunables.lct_peer_rtr_credits = 0; + ni->ni_net->net_tunables.lct_max_tx_credits = 0; + ni->ni_net->net_tunables.lct_peer_timeout = 0; return 0; } - if (ni->ni_peertxcredits == 0 || ni->ni_maxtxcredits == 0) { + if (ni->ni_net->net_tunables.lct_peer_tx_credits == 0 || + ni->ni_net->net_tunables.lct_max_tx_credits == 0) { LCONSOLE_ERROR_MSG(0x107, "LNI %s has no %scredits\n", - libcfs_lnd2str(lnd->lnd_type), - ni->ni_peertxcredits == 0 ? + libcfs_lnd2str(net->net_lnd->lnd_type), + ni->ni_net->net_tunables.lct_peer_tx_credits == 0 ? "" : "per-peer "); /* shutdown the NI since if we get here then it must've already * been started @@ -1405,9 +1412,11 @@ lnet_startup_lndni(struct lnet_ni *ni, struct lnet_ioctl_config_data *conf) } CDEBUG(D_LNI, "Added LNI %s [%d/%d/%d/%d]\n", - libcfs_nid2str(ni->ni_nid), ni->ni_peertxcredits, + libcfs_nid2str(ni->ni_nid), + ni->ni_net->net_tunables.lct_peer_tx_credits, lnet_ni_tq_credits(ni) * LNET_CPT_NUMBER, - ni->ni_peerrtrcredits, ni->ni_peertimeout); + ni->ni_net->net_tunables.lct_peer_rtr_credits, + ni->ni_net->net_tunables.lct_peer_timeout); return 0; failed0: @@ -1416,26 +1425,216 @@ failed0: } static int -lnet_startup_lndnis(struct list_head *nilist) +lnet_startup_lndnet(struct lnet_net *net, struct lnet_lnd_tunables *tun) { struct lnet_ni *ni; + struct lnet_net *net_l = NULL; + struct list_head local_ni_list; int rc; int ni_count = 0; + __u32 lnd_type; + lnd_t *lnd; + int peer_timeout = + net->net_tunables.lct_peer_timeout; + int maxtxcredits = + net->net_tunables.lct_max_tx_credits; + int peerrtrcredits = + net->net_tunables.lct_peer_rtr_credits; + + INIT_LIST_HEAD(&local_ni_list); + + /* + * make sure that this net is unique. If it isn't then + * we are adding interfaces to an already existing network, and + * 'net' is just a convenient way to pass in the list. + * if it is unique we need to find the LND and load it if + * necessary. + */ + if (lnet_net_unique(net->net_id, &the_lnet.ln_nets, &net_l)) { + lnd_type = LNET_NETTYP(net->net_id); + + LASSERT(libcfs_isknown_lnd(lnd_type)); + + if (lnd_type == CIBLND || lnd_type == OPENIBLND || + lnd_type == IIBLND || lnd_type == VIBLND) { + CERROR("LND %s obsoleted\n", libcfs_lnd2str(lnd_type)); + rc = -EINVAL; + goto failed0; + } + + mutex_lock(&the_lnet.ln_lnd_mutex); + lnd = lnet_find_lnd_by_type(lnd_type); + + if (lnd == NULL) { + mutex_unlock(&the_lnet.ln_lnd_mutex); + rc = request_module("%s", libcfs_lnd2modname(lnd_type)); + mutex_lock(&the_lnet.ln_lnd_mutex); + + lnd = lnet_find_lnd_by_type(lnd_type); + if (lnd == NULL) { + mutex_unlock(&the_lnet.ln_lnd_mutex); + CERROR("Can't load LND %s, module %s, rc=%d\n", + libcfs_lnd2str(lnd_type), + libcfs_lnd2modname(lnd_type), rc); +#ifndef HAVE_MODULE_LOADING_SUPPORT + LCONSOLE_ERROR_MSG(0x104, "Your kernel must be " + "compiled with kernel module " + "loading support."); +#endif + rc = -EINVAL; + goto failed0; + } + } + + lnet_net_lock(LNET_LOCK_EX); + lnd->lnd_refcount++; + lnet_net_unlock(LNET_LOCK_EX); + + net->net_lnd = lnd; + + mutex_unlock(&the_lnet.ln_lnd_mutex); + + net_l = net; + } + + /* + * net_l: if the network being added is unique then net_l + * will point to that network + * if the network being added is not unique then + * net_l points to the existing network. + * + * When we enter the loop below, we'll pick NIs off he + * network beign added and start them up, then add them to + * a local ni list. Once we've successfully started all + * the NIs then we join the local NI list (of started up + * networks) with the net_l->net_ni_list, which should + * point to the correct network to add the new ni list to + * + * If any of the new NIs fail to start up, then we want to + * iterate through the local ni list, which should include + * any NIs which were successfully started up, and shut + * them down. + * + * After than we want to delete the network being added, + * to avoid a memory leak. + */ + + /* + * When a network uses TCP bonding then all its interfaces + * must be specified when the network is first defined: the + * TCP bonding code doesn't allow for interfaces to be added + * or removed. + */ + if (net_l != net && net_l != NULL && use_tcp_bonding && + LNET_NETTYP(net_l->net_id) == SOCKLND) { + rc = -EINVAL; + goto failed0; + } + + while (!list_empty(&net->net_ni_added)) { + ni = list_entry(net->net_ni_added.next, struct lnet_ni, + ni_netlist); + list_del_init(&ni->ni_netlist); + + /* make sure that the the NI we're about to start + * up is actually unique. if it's not fail. */ + if (!lnet_ni_unique_net(&net_l->net_ni_list, + ni->ni_interfaces[0])) { + rc = -EINVAL; + goto failed1; + } + + /* adjust the pointer the parent network, just in case it + * the net is a duplicate */ + ni->ni_net = net_l; - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, lnet_ni_t, ni_list); - list_del(&ni->ni_list); - rc = lnet_startup_lndni(ni, NULL); + rc = lnet_startup_lndni(ni, tun); + + LASSERT(ni->ni_net->net_tunables.lct_peer_timeout <= 0 || + ni->ni_net->net_lnd->lnd_query != NULL); if (rc < 0) - goto failed; + goto failed1; + + lnet_ni_addref(ni); + list_add_tail(&ni->ni_netlist, &local_ni_list); ni_count++; } + lnet_net_lock(LNET_LOCK_EX); + list_splice_tail(&local_ni_list, &net_l->net_ni_list); + lnet_net_unlock(LNET_LOCK_EX); + + /* if the network is not unique then we don't want to keep + * it around after we're done. Free it. Otherwise add that + * net to the global the_lnet.ln_nets */ + if (net_l != net && net_l != NULL) { + /* + * TODO - note. currently the tunables can not be updated + * once added + */ + lnet_net_free(net); + } else { + net->net_state = LNET_NET_STATE_ACTIVE; + /* + * restore tunables after it has been overwitten by the + * lnd + */ + if (peer_timeout != -1) + net->net_tunables.lct_peer_timeout = peer_timeout; + if (maxtxcredits != -1) + net->net_tunables.lct_max_tx_credits = maxtxcredits; + if (peerrtrcredits != -1) + net->net_tunables.lct_peer_rtr_credits = peerrtrcredits; + + lnet_net_lock(LNET_LOCK_EX); + list_add_tail(&net->net_list, &the_lnet.ln_nets); + lnet_net_unlock(LNET_LOCK_EX); + } + + return ni_count; + +failed1: + /* + * shutdown the new NIs that are being started up + * free the NET being started + */ + while (!list_empty(&local_ni_list)) { + ni = list_entry(local_ni_list.next, struct lnet_ni, + ni_netlist); + + lnet_shutdown_lndni(ni); + } + +failed0: + lnet_net_free(net); + + return rc; +} + +static int +lnet_startup_lndnets(struct list_head *netlist) +{ + struct lnet_net *net; + int rc; + int ni_count = 0; + + while (!list_empty(netlist)) { + net = list_entry(netlist->next, struct lnet_net, net_list); + list_del_init(&net->net_list); + + rc = lnet_startup_lndnet(net, NULL); + + if (rc < 0) + goto failed; + + ni_count += rc; + } + return ni_count; failed: - lnet_shutdown_lndnis(); + lnet_shutdown_lndnets(); return rc; } @@ -1483,6 +1682,7 @@ int lnet_lib_init(void) the_lnet.ln_refcount = 0; LNetInvalidateHandle(&the_lnet.ln_rc_eqh); INIT_LIST_HEAD(&the_lnet.ln_lnds); + INIT_LIST_HEAD(&the_lnet.ln_net_zombie); INIT_LIST_HEAD(&the_lnet.ln_rcd_zombie); INIT_LIST_HEAD(&the_lnet.ln_rcd_deathrow); @@ -1543,6 +1743,7 @@ LNetNIInit(lnet_pid_t requested_pid) struct lnet_ping_info *pinfo; lnet_handle_md_t md_handle; struct list_head net_head; + struct lnet_net *net; INIT_LIST_HEAD(&net_head); @@ -1562,8 +1763,15 @@ LNetNIInit(lnet_pid_t requested_pid) return rc; } - /* Add in the loopback network */ - if (lnet_ni_alloc(LNET_MKNET(LOLND, 0), NULL, &net_head) == NULL) { + /* create a network for Loopback network */ + net = lnet_net_alloc(LNET_MKNET(LOLND, 0), &net_head); + if (net == NULL) { + rc = -ENOMEM; + goto err_empty_list; + } + + /* Add in the loopback NI */ + if (lnet_ni_alloc(net, NULL, NULL) == NULL) { rc = -ENOMEM; goto err_empty_list; } @@ -1575,13 +1783,13 @@ LNetNIInit(lnet_pid_t requested_pid) * in this case. On cleanup in case of failure only clean up * routes if it has been loaded */ if (!the_lnet.ln_nis_from_mod_params) { - rc = lnet_parse_networks(&net_head, - lnet_get_networks()); + rc = lnet_parse_networks(&net_head, lnet_get_networks(), + use_tcp_bonding); if (rc < 0) goto err_empty_list; } - ni_count = lnet_startup_lndnis(&net_head); + ni_count = lnet_startup_lndnets(&net_head); if (ni_count < 0) { rc = ni_count; goto err_empty_list; @@ -1634,17 +1842,17 @@ err_destroy_routes: if (!the_lnet.ln_nis_from_mod_params) lnet_destroy_routes(); err_shutdown_lndnis: - lnet_shutdown_lndnis(); + lnet_shutdown_lndnets(); err_empty_list: lnet_unprepare(); LASSERT(rc < 0); mutex_unlock(&the_lnet.ln_api_mutex); while (!list_empty(&net_head)) { - struct lnet_ni *ni; + struct lnet_net *net; - ni = list_entry(net_head.next, struct lnet_ni, ni_list); - list_del_init(&ni->ni_list); - lnet_ni_free(ni); + net = list_entry(net_head.next, struct lnet_net, net_list); + list_del_init(&net->net_list); + lnet_net_free(net); } return rc; } @@ -1682,7 +1890,7 @@ LNetNIFini() lnet_acceptor_stop(); lnet_destroy_routes(); - lnet_shutdown_lndnis(); + lnet_shutdown_lndnets(); lnet_unprepare(); } @@ -1732,10 +1940,14 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config) } config->cfg_nid = ni->ni_nid; - config->cfg_config_u.cfg_net.net_peer_timeout = ni->ni_peertimeout; - config->cfg_config_u.cfg_net.net_max_tx_credits = ni->ni_maxtxcredits; - config->cfg_config_u.cfg_net.net_peer_tx_credits = ni->ni_peertxcredits; - config->cfg_config_u.cfg_net.net_peer_rtr_credits = ni->ni_peerrtrcredits; + config->cfg_config_u.cfg_net.net_peer_timeout = + ni->ni_net->net_tunables.lct_peer_timeout; + config->cfg_config_u.cfg_net.net_max_tx_credits = + ni->ni_net->net_tunables.lct_max_tx_credits; + config->cfg_config_u.cfg_net.net_peer_tx_credits = + ni->ni_net->net_tunables.lct_peer_tx_credits; + config->cfg_config_u.cfg_net.net_peer_rtr_credits = + ni->ni_net->net_tunables.lct_peer_rtr_credits; net_config->ni_status = ni->ni_status->ns_status; @@ -1757,46 +1969,99 @@ lnet_fill_ni_info(struct lnet_ni *ni, struct lnet_ioctl_config_data *config) if (config->cfg_hdr.ioc_len > min_size) tunable_size = config->cfg_hdr.ioc_len - min_size; - /* Don't copy to much data to user space */ - min_size = min(tunable_size, sizeof(*ni->ni_lnd_tunables)); + /* Don't copy too much data to user space */ + min_size = min(tunable_size, sizeof(ni->ni_lnd_tunables)); lnd_cfg = (struct lnet_ioctl_config_lnd_tunables *)net_config->cfg_bulk; - if (ni->ni_lnd_tunables && lnd_cfg && min_size) { - memcpy(lnd_cfg, ni->ni_lnd_tunables, min_size); + if (lnd_cfg && min_size) { + memcpy(&lnd_cfg->lt_tun, &ni->ni_lnd_tunables, min_size); config->cfg_config_u.cfg_net.net_interface_count = 1; /* Tell user land that kernel side has less data */ - if (tunable_size > sizeof(*ni->ni_lnd_tunables)) { + if (tunable_size > sizeof(ni->ni_lnd_tunables)) { min_size = tunable_size - sizeof(ni->ni_lnd_tunables); config->cfg_hdr.ioc_len -= min_size; } } } -static int +struct lnet_ni * +lnet_get_ni_idx_locked(int idx) +{ + struct lnet_ni *ni; + struct lnet_net *net; + + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { + if (idx-- == 0) + return ni; + } + } + + return NULL; +} + +struct lnet_ni * +lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev) +{ + struct lnet_ni *ni; + struct lnet_net *net = mynet; + + if (prev == NULL) { + if (net == NULL) + net = list_entry(the_lnet.ln_nets.next, struct lnet_net, + net_list); + ni = list_entry(net->net_ni_list.next, struct lnet_ni, + ni_netlist); + + return ni; + } + + if (prev->ni_netlist.next == &prev->ni_net->net_ni_list) { + /* if you reached the end of the ni list and the net is + * specified, then there are no more nis in that net */ + if (net != NULL) + return NULL; + + /* we reached the end of this net ni list. move to the + * next net */ + if (prev->ni_net->net_list.next == &the_lnet.ln_nets) + /* no more nets and no more NIs. */ + return NULL; + + /* get the next net */ + net = list_entry(prev->ni_net->net_list.next, struct lnet_net, + net_list); + /* get the ni on it */ + ni = list_entry(net->net_ni_list.next, struct lnet_ni, + ni_netlist); + + return ni; + } + + /* there are more nis left */ + ni = list_entry(prev->ni_netlist.next, struct lnet_ni, ni_netlist); + + return ni; +} + +int lnet_get_net_config(struct lnet_ioctl_config_data *config) { struct lnet_ni *ni; - struct list_head *tmp; - int idx = config->cfg_count; + int cpt; int rc = -ENOENT; - int cpt, i = 0; - - if (unlikely(!config->cfg_bulk)) - return -EINVAL; + int idx = config->cfg_count; cpt = lnet_net_lock_current(); - list_for_each(tmp, &the_lnet.ln_nis) { - if (i++ != idx) - continue; + ni = lnet_get_ni_idx_locked(idx); - ni = list_entry(tmp, lnet_ni_t, ni_list); + if (ni != NULL) { + rc = 0; lnet_ni_lock(ni); lnet_fill_ni_info(ni, config); lnet_ni_unlock(ni); - rc = 0; - break; } lnet_net_unlock(cpt); @@ -1809,29 +2074,36 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf) char *nets = conf->cfg_config_u.cfg_net.net_intf; struct lnet_ping_info *pinfo; lnet_handle_md_t md_handle; - struct lnet_ni *ni; + struct lnet_net *net; struct list_head net_head; int rc; lnet_remotenet_t *rnet; + int net_ni_count; + int num_acceptor_nets; + __u32 net_type; + struct lnet_ioctl_config_lnd_tunables *lnd_tunables = NULL; INIT_LIST_HEAD(&net_head); - /* Create a ni structure for the network string */ - rc = lnet_parse_networks(&net_head, nets); + if (conf && conf->cfg_hdr.ioc_len > sizeof(*conf)) + lnd_tunables = (struct lnet_ioctl_config_lnd_tunables *)conf->cfg_bulk; + + /* Create a net/ni structures for the network string */ + rc = lnet_parse_networks(&net_head, nets, use_tcp_bonding); if (rc <= 0) return rc == 0 ? -EINVAL : rc; mutex_lock(&the_lnet.ln_api_mutex); if (rc > 1) { - rc = -EINVAL; /* only add one interface per call */ + rc = -EINVAL; /* only add one network per call */ goto failed0; } - ni = list_entry(net_head.next, struct lnet_ni, ni_list); + net = list_entry(net_head.next, struct lnet_net, net_list); lnet_net_lock(LNET_LOCK_EX); - rnet = lnet_find_net_locked(LNET_NIDNET(ni->ni_nid)); + rnet = lnet_find_rnet_locked(net->net_id); lnet_net_unlock(LNET_LOCK_EX); /* make sure that the net added doesn't invalidate the current * configuration LNet is keeping */ @@ -1842,23 +2114,66 @@ lnet_dyn_add_ni(lnet_pid_t requested_pid, struct lnet_ioctl_config_data *conf) goto failed0; } - rc = lnet_ping_info_setup(&pinfo, &md_handle, 1 + lnet_get_ni_count(), + /* + * make sure you calculate the correct number of slots in the ping + * info. Since the ping info is a flattened list of all the NIs, + * we should allocate enough slots to accomodate the number of NIs + * which will be added. + * + * We can use lnet_get_net_ni_count_locked() since the net is not + * on a public list yet, so locking is not a problem + */ + net_ni_count = lnet_get_net_ni_count_locked(net); + + rc = lnet_ping_info_setup(&pinfo, &md_handle, + net_ni_count + lnet_get_ni_count(), false); if (rc != 0) goto failed0; - list_del_init(&ni->ni_list); + list_del_init(&net->net_list); - rc = lnet_startup_lndni(ni, conf); - if (rc != 0) + if (lnd_tunables) + memcpy(&net->net_tunables, + &lnd_tunables->lt_cmn, sizeof(lnd_tunables->lt_cmn)); + + /* + * before starting this network get a count of the current TCP + * networks which require the acceptor thread running. If that + * count is == 0 before we start up this network, then we'd want to + * start up the acceptor thread after starting up this network + */ + num_acceptor_nets = lnet_count_acceptor_nets(); + + /* + * lnd_startup_lndnet() can deallocate 'net' even if it it returns + * success, because we endded up adding interfaces to an existing + * network. So grab the net_type now + */ + net_type = LNET_NETTYP(net->net_id); + + rc = lnet_startup_lndnet(net, + (lnd_tunables) ? &lnd_tunables->lt_tun : NULL); + if (rc < 0) goto failed1; - if (ni->ni_lnd->lnd_accept != NULL) { + /* + * Start the acceptor thread if this is the first network + * being added that requires the thread. + */ + if (net_type == SOCKLND && num_acceptor_nets == 0) + { rc = lnet_acceptor_start(); if (rc < 0) { - /* shutdown the ni that we just started */ + /* shutdown the net that we just started */ CERROR("Failed to start up acceptor thread\n"); - lnet_shutdown_lndni(ni); + /* + * Note that if we needed to start the acceptor + * thread, then 'net' must have been the first TCP + * network, therefore was unique, and therefore + * wasn't deallocated by lnet_startup_lndnet() + */ + lnet_shutdown_lndnet(net); goto failed1; } } @@ -1874,51 +2189,53 @@ failed1: failed0: mutex_unlock(&the_lnet.ln_api_mutex); while (!list_empty(&net_head)) { - ni = list_entry(net_head.next, struct lnet_ni, ni_list); - list_del_init(&ni->ni_list); - lnet_ni_free(ni); + net = list_entry(net_head.next, struct lnet_net, net_list); + list_del_init(&net->net_list); + lnet_net_free(net); } return rc; } int -lnet_dyn_del_ni(__u32 net) +lnet_dyn_del_ni(__u32 net_id) { - lnet_ni_t *ni; + struct lnet_net *net; struct lnet_ping_info *pinfo; lnet_handle_md_t md_handle; int rc; + int net_ni_count; /* don't allow userspace to shutdown the LOLND */ - if (LNET_NETTYP(net) == LOLND) + if (LNET_NETTYP(net_id) == LOLND) return -EINVAL; mutex_lock(&the_lnet.ln_api_mutex); - /* create and link a new ping info, before removing the old one */ - rc = lnet_ping_info_setup(&pinfo, &md_handle, - lnet_get_ni_count() - 1, false); - if (rc != 0) - goto out; - ni = lnet_net2ni(net); - if (ni == NULL) { + lnet_net_lock(0); + + net = lnet_get_net_locked(net_id); + if (net == NULL) { rc = -EINVAL; - goto failed; + goto out; } - /* decrement the reference counter taken by lnet_net2ni() */ - lnet_ni_decref_locked(ni, 0); + net_ni_count = lnet_get_net_ni_count_locked(net); - lnet_shutdown_lndni(ni); + lnet_net_unlock(0); - if (lnet_count_acceptor_nis() == 0) + /* create and link a new ping info, before removing the old one */ + rc = lnet_ping_info_setup(&pinfo, &md_handle, + lnet_get_ni_count() - net_ni_count, false); + if (rc != 0) + goto out; + + lnet_shutdown_lndnet(net); + + if (lnet_count_acceptor_nets() == 0) lnet_acceptor_stop(); lnet_ping_target_update(pinfo, md_handle); - goto out; -failed: - lnet_ping_md_unlink(pinfo, &md_handle); - lnet_ping_info_free(pinfo); + out: mutex_unlock(&the_lnet.ln_api_mutex); @@ -2137,12 +2454,11 @@ LNetCtl(unsigned int cmd, void *arg) if (ni == NULL) return -EINVAL; - if (ni->ni_lnd->lnd_ctl == NULL) + if (ni->ni_net->net_lnd->lnd_ctl == NULL) rc = -EINVAL; else - rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg); + rc = ni->ni_net->net_lnd->lnd_ctl(ni, cmd, arg); - lnet_ni_decref(ni); return rc; } /* not reached */ @@ -2170,7 +2486,7 @@ int LNetGetId(unsigned int index, lnet_process_id_t *id) { struct lnet_ni *ni; - struct list_head *tmp; + struct lnet_net *net; int cpt; int rc = -ENOENT; @@ -2178,16 +2494,16 @@ LNetGetId(unsigned int index, lnet_process_id_t *id) cpt = lnet_net_lock_current(); - list_for_each(tmp, &the_lnet.ln_nis) { - if (index-- != 0) - continue; - - ni = list_entry(tmp, lnet_ni_t, ni_list); + list_for_each_entry(net, &the_lnet.ln_nets, net_list) { + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { + if (index-- != 0) + continue; - id->nid = ni->ni_nid; - id->pid = the_lnet.ln_pid; - rc = 0; - break; + id->nid = ni->ni_nid; + id->pid = the_lnet.ln_pid; + rc = 0; + break; + } } lnet_net_unlock(cpt); diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c index ba8b879..34889e6 100644 --- a/lnet/lnet/config.c +++ b/lnet/lnet/config.c @@ -46,8 +46,11 @@ static int lnet_tbnob = 0; /* track text buf allocation */ #define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */ #define LNET_SINGLE_TEXTBUF_NOB (4<<10) +#define SPACESTR " \t\v\r\n" +#define DELIMITERS ":()[]" + static void -lnet_syntax(char *name, char *str, int offset, int width) +lnet_syntax(const char *name, const char *str, int offset, int width) { static char dots[LNET_SINGLE_TEXTBUF_NOB]; static char dashes[LNET_SINGLE_TEXTBUF_NOB]; @@ -76,20 +79,212 @@ lnet_issep (char c) } } -int -lnet_net_unique(__u32 net, struct list_head *nilist) +bool +lnet_net_unique(__u32 net_id, struct list_head *netlist, + struct lnet_net **net) +{ + struct lnet_net *net_l; + + list_for_each_entry(net_l, netlist, net_list) { + if (net_l->net_id == net_id) { + if (net != NULL) + *net = net_l; + return false; + } + } + + return true; +} + +/* check that the NI is unique within the list of NIs already added to + * a network */ +bool +lnet_ni_unique_net(struct list_head *nilist, char *iface) { struct list_head *tmp; - lnet_ni_t *ni; + struct lnet_ni *ni; list_for_each(tmp, nilist) { - ni = list_entry(tmp, lnet_ni_t, ni_list); + ni = list_entry(tmp, struct lnet_ni, ni_netlist); - if (LNET_NIDNET(ni->ni_nid) == net) - return 0; + if (ni->ni_interfaces[0] != NULL && + strncmp(ni->ni_interfaces[0], iface, strlen(iface)) == 0) + return false; } - return 1; + return true; +} + +/* check that the NI is unique to the interfaces with in the same NI. + * This is only a consideration if use_tcp_bonding is set */ +static bool +lnet_ni_unique_ni(char *iface_list[LNET_MAX_INTERFACES], char *iface) +{ + int i; + for (i = 0; i < LNET_MAX_INTERFACES; i++) { + if (iface_list[i] != NULL && + strncmp(iface_list[i], iface, strlen(iface)) == 0) + return false; + } + + return true; +} + +static bool +in_array(__u32 *array, __u32 size, __u32 value) +{ + int i; + + for (i = 0; i < size; i++) { + if (array[i] == value) + return false; + } + + return true; +} + +static int +lnet_net_append_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net) +{ + __u32 *added_cpts = NULL; + int i, j = 0, rc = 0; + + /* + * no need to go futher since a subset of the NIs already exist on + * all CPTs + */ + if (net->net_ncpts == LNET_CPT_NUMBER) + return 0; + + if (cpts == NULL) { + /* there is an NI which will exist on all CPTs */ + if (net->net_cpts != NULL) + LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) * + net->net_ncpts); + net->net_cpts = NULL; + net->net_ncpts = LNET_CPT_NUMBER; + return 0; + } + + if (net->net_cpts == NULL) { + LIBCFS_ALLOC(net->net_cpts, sizeof(*net->net_cpts) * ncpts); + if (net->net_cpts == NULL) + return -ENOMEM; + memcpy(net->net_cpts, cpts, ncpts); + return 0; + } + + LIBCFS_ALLOC(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER); + if (added_cpts == NULL) + return -ENOMEM; + + for (i = 0; i < ncpts; i++) { + if (!in_array(net->net_cpts, net->net_ncpts, cpts[i])) { + added_cpts[j] = cpts[i]; + j++; + } + } + + /* append the new cpts if any to the list of cpts in the net */ + if (j > 0) { + __u32 *array = NULL, *loc; + __u32 total_entries = j + net->net_ncpts; + + LIBCFS_ALLOC(array, sizeof(*net->net_cpts) * total_entries); + if (array == NULL) { + rc = -ENOMEM; + goto failed; + } + + memcpy(array, net->net_cpts, net->net_ncpts); + loc = array + net->net_ncpts; + memcpy(loc, added_cpts, j); + + LIBCFS_FREE(net->net_cpts, sizeof(*net->net_cpts) * + net->net_ncpts); + net->net_ncpts = total_entries; + net->net_cpts = array; + } + +failed: + LIBCFS_FREE(added_cpts, sizeof(*added_cpts) * LNET_CPT_NUMBER); + + return rc; +} + +static void +lnet_net_remove_cpts(__u32 *cpts, __u32 ncpts, struct lnet_net *net) +{ + struct lnet_ni *ni; + int rc; + + /* + * Operation Assumption: + * This function is called after an NI has been removed from + * its parent net. + * + * if we're removing an NI which exists on all CPTs then + * we have to check if any of the other NIs on this net also + * exists on all CPTs. If none, then we need to build our Net CPT + * list based on the remaining NIs. + * + * If the NI being removed exist on a subset of the CPTs then we + * alo rebuild the Net CPT list based on the remaining NIs, which + * should resutl in the expected Net CPT list. + */ + + /* + * sometimes this function can be called due to some failure + * creating an NI, before any of the cpts are allocated, so check + * for that case and don't do anything + */ + if (ncpts == 0) + return; + + if (ncpts == LNET_CPT_NUMBER) { + /* + * first iteration through the NI list in the net to see + * if any of the NIs exist on all the CPTs. If one is + * found then our job is done. + */ + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { + if (ni->ni_ncpts == LNET_CPT_NUMBER) + return; + } + } + + /* + * Rebuild the Net CPT list again, thereby only including only the + * CPTs which the remaining NIs are associated with. + */ + if (net->net_cpts != NULL) { + LIBCFS_FREE(net->net_cpts, + sizeof(*net->net_cpts) * net->net_ncpts); + net->net_cpts = NULL; + } + + list_for_each_entry(ni, &net->net_ni_list, ni_netlist) { + rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, + net); + if (rc != 0) { + CERROR("Out of Memory\n"); + /* + * do our best to keep on going. Delete + * the net cpts and set it to NULL. This + * way we can keep on going but less + * efficiently, since memory accesses might be + * accross CPT lines. + */ + if (net->net_cpts != NULL) { + LIBCFS_FREE(net->net_cpts, + sizeof(*net->net_cpts) * + net->net_ncpts); + net->net_cpts = NULL; + net->net_ncpts = LNET_CPT_NUMBER; + } + return; + } + } } void @@ -97,6 +292,8 @@ lnet_ni_free(struct lnet_ni *ni) { int i; + lnet_net_remove_cpts(ni->ni_cpts, ni->ni_ncpts, ni->ni_net); + if (ni->ni_refs != NULL) cfs_percpt_free(ni->ni_refs); @@ -106,9 +303,6 @@ lnet_ni_free(struct lnet_ni *ni) if (ni->ni_cpts != NULL) cfs_expr_list_values_free(ni->ni_cpts, ni->ni_ncpts); - if (ni->ni_lnd_tunables != NULL) - LIBCFS_FREE(ni->ni_lnd_tunables, sizeof(*ni->ni_lnd_tunables)); - for (i = 0; i < LNET_MAX_INTERFACES && ni->ni_interfaces[i] != NULL; i++) { LIBCFS_FREE(ni->ni_interfaces[i], @@ -122,29 +316,143 @@ lnet_ni_free(struct lnet_ni *ni) LIBCFS_FREE(ni, sizeof(*ni)); } +void +lnet_net_free(struct lnet_net *net) +{ + struct list_head *tmp, *tmp2; + struct lnet_ni *ni; + + LASSERT(list_empty(&net->net_ni_zombie)); + + /* + * delete any nis that haven't been added yet. This could happen + * if there is a failure on net startup + */ + list_for_each_safe(tmp, tmp2, &net->net_ni_added) { + ni = list_entry(tmp, struct lnet_ni, ni_netlist); + list_del_init(&ni->ni_netlist); + lnet_ni_free(ni); + } + + /* delete any nis which have been started. */ + list_for_each_safe(tmp, tmp2, &net->net_ni_list) { + ni = list_entry(tmp, struct lnet_ni, ni_netlist); + list_del_init(&ni->ni_netlist); + lnet_ni_free(ni); + } + + if (net->net_cpts != NULL) + LIBCFS_FREE(net->net_cpts, + sizeof(*net->net_cpts) * net->net_ncpts); + + LIBCFS_FREE(net, sizeof(*net)); +} + +struct lnet_net * +lnet_net_alloc(__u32 net_id, struct list_head *net_list) +{ + struct lnet_net *net; + + if (!lnet_net_unique(net_id, net_list, NULL)) { + CERROR("Duplicate net %s. Ignore\n", + libcfs_net2str(net_id)); + return NULL; + } + + LIBCFS_ALLOC(net, sizeof(*net)); + if (net == NULL) { + CERROR("Out of memory creating network %s\n", + libcfs_net2str(net_id)); + return NULL; + } + + INIT_LIST_HEAD(&net->net_list); + INIT_LIST_HEAD(&net->net_ni_list); + INIT_LIST_HEAD(&net->net_ni_added); + INIT_LIST_HEAD(&net->net_ni_zombie); + + net->net_id = net_id; + net->net_state = LNET_NET_STATE_INIT; + + /* initialize global paramters to undefiend */ + net->net_tunables.lct_peer_timeout = -1; + net->net_tunables.lct_max_tx_credits = -1; + net->net_tunables.lct_peer_tx_credits = -1; + net->net_tunables.lct_peer_rtr_credits = -1; + + list_add_tail(&net->net_list, net_list); + + return net; +} + +static int +lnet_ni_add_interface(struct lnet_ni *ni, char *iface) +{ + int niface = 0; + + if (ni == NULL) + return -ENOMEM; + + if (!lnet_ni_unique_ni(ni->ni_interfaces, iface)) + return -EINVAL; + + /* Allocate a separate piece of memory and copy + * into it the string, so we don't have + * a depencency on the tokens string. This way we + * can free the tokens at the end of the function. + * The newly allocated ni_interfaces[] can be + * freed when freeing the NI */ + while (niface < LNET_MAX_INTERFACES && + ni->ni_interfaces[niface] != NULL) + niface++; + + if (niface >= LNET_MAX_INTERFACES) { + LCONSOLE_ERROR_MSG(0x115, "Too many interfaces " + "for net %s\n", + libcfs_net2str(LNET_NIDNET(ni->ni_nid))); + return -EINVAL; + } + + LIBCFS_ALLOC(ni->ni_interfaces[niface], + strlen(iface) + 1); + + if (ni->ni_interfaces[niface] == NULL) { + CERROR("Can't allocate net interface name\n"); + return -ENOMEM; + } + + strncpy(ni->ni_interfaces[niface], iface, + strlen(iface) + 1); + + return 0; +} + +/* allocate and add to the provided network */ lnet_ni_t * -lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) +lnet_ni_alloc(struct lnet_net *net, struct cfs_expr_list *el, char *iface) { struct lnet_tx_queue *tq; struct lnet_ni *ni; int rc; int i; - if (!lnet_net_unique(net, nilist)) { - LCONSOLE_ERROR_MSG(0x111, "Duplicate network specified: %s\n", - libcfs_net2str(net)); - return NULL; - } + if (iface != NULL) + /* make sure that this NI is unique in the net it's + * being added to */ + if (!lnet_ni_unique_net(&net->net_ni_added, iface)) + return NULL; LIBCFS_ALLOC(ni, sizeof(*ni)); if (ni == NULL) { - CERROR("Out of memory creating network %s\n", - libcfs_net2str(net)); + CERROR("Out of memory creating network interface %s%s\n", + libcfs_net2str(net->net_id), + (iface != NULL) ? iface : ""); return NULL; } spin_lock_init(&ni->ni_lock); INIT_LIST_HEAD(&ni->ni_cptlist); + INIT_LIST_HEAD(&ni->ni_netlist); ni->ni_refs = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*ni->ni_refs[0])); if (ni->ni_refs == NULL) @@ -164,8 +472,9 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) } else { rc = cfs_expr_list_values(el, LNET_CPT_NUMBER, &ni->ni_cpts); if (rc <= 0) { - CERROR("Failed to set CPTs for NI %s: %d\n", - libcfs_net2str(net), rc); + CERROR("Failed to set CPTs for NI %s(%s): %d\n", + libcfs_net2str(net->net_id), + (iface != NULL) ? iface : "", rc); goto failed; } @@ -178,8 +487,9 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) ni->ni_ncpts = rc; } + ni->ni_net = net; /* LND will fill in the address part of the NID */ - ni->ni_nid = LNET_MKNID(net, 0); + ni->ni_nid = LNET_MKNID(net->net_id, 0); /* Store net namespace in which current ni is being created */ if (current->nsproxy->net_ns != NULL) @@ -188,25 +498,41 @@ lnet_ni_alloc(__u32 net, struct cfs_expr_list *el, struct list_head *nilist) ni->ni_net_ns = NULL; ni->ni_last_alive = ktime_get_real_seconds(); - list_add_tail(&ni->ni_list, nilist); + ni->ni_state = LNET_NI_STATE_INIT; + rc = lnet_net_append_cpts(ni->ni_cpts, ni->ni_ncpts, net); + if (rc != 0) + goto failed; + list_add_tail(&ni->ni_netlist, &net->net_ni_added); + + /* if an interface name is provided then make sure to add in that + * interface name in NI */ + if (iface != NULL) + if (lnet_ni_add_interface(ni, iface) != 0) + goto failed; + return ni; - failed: +failed: lnet_ni_free(ni); return NULL; } +/* + * Parse the networks string and create the matching set of NIs on the + * nilist. + */ int -lnet_parse_networks(struct list_head *nilist, char *networks) +lnet_parse_networks(struct list_head *netlist, char *networks, + bool use_tcp_bonding) { - struct cfs_expr_list *el = NULL; + struct cfs_expr_list *net_el = NULL; + struct cfs_expr_list *ni_el = NULL; int tokensize; char *tokens; char *str; - char *tmp; - struct lnet_ni *ni; - __u32 net; + struct lnet_net *net; + struct lnet_ni *ni = NULL; + __u32 net_id; int nnets = 0; - struct list_head *temp_node; if (networks == NULL) { CERROR("networks string is undefined\n"); @@ -229,173 +555,238 @@ lnet_parse_networks(struct list_head *nilist, char *networks) } memcpy(tokens, networks, tokensize); - str = tmp = tokens; - - while (str != NULL && *str != 0) { - char *comma = strchr(str, ','); - char *bracket = strchr(str, '('); - char *square = strchr(str, '['); - char *iface; - int niface; - int rc; - - /* NB we don't check interface conflicts here; it's the LNDs - * responsibility (if it cares at all) */ - - if (square != NULL && (comma == NULL || square < comma)) { - /* i.e: o2ib0(ib0)[1,2], number between square - * brackets are CPTs this NI needs to be bond */ - if (bracket != NULL && bracket > square) { - tmp = square; + str = tokens; + + /* + * Main parser loop. + * + * NB we don't check interface conflicts here; it's the LNDs + * responsibility (if it cares at all) + */ + do { + char *nistr; + char *elstr; + char *name; + int rc; + + /* + * Parse a network string into its components. + * + * {"("...")"}{"[""]"} + */ + + /* Network name (mandatory) */ + while (isspace(*str)) + *str++ = '\0'; + if (!*str) + break; + name = str; + str += strcspn(str, SPACESTR ":()[],"); + while (isspace(*str)) + *str++ = '\0'; + + /* Interface list (optional) */ + if (*str == '(') { + *str++ = '\0'; + nistr = str; + str += strcspn(str, ")"); + if (*str != ')') { + str = nistr; goto failed_syntax; } + do { + *str++ = '\0'; + } while (isspace(*str)); + } else { + nistr = NULL; + } - tmp = strchr(square, ']'); - if (tmp == NULL) { - tmp = square; + /* CPT expression (optional) */ + if (*str == '[') { + elstr = str; + str += strcspn(str, "]"); + if (*str != ']') { + str = elstr; goto failed_syntax; } - - rc = cfs_expr_list_parse(square, tmp - square + 1, - 0, LNET_CPT_NUMBER - 1, &el); + rc = cfs_expr_list_parse(elstr, str - elstr + 1, + 0, LNET_CPT_NUMBER - 1, + &net_el); if (rc != 0) { - tmp = square; + str = elstr; goto failed_syntax; } - - while (square <= tmp) - *square++ = ' '; + *elstr = '\0'; + do { + *str++ = '\0'; + } while (isspace(*str)); } - if (bracket == NULL || - (comma != NULL && comma < bracket)) { - - /* no interface list specified */ - - if (comma != NULL) - *comma++ = 0; - net = libcfs_str2net(cfs_trimwhite(str)); - - if (net == LNET_NIDNET(LNET_NID_ANY)) { - LCONSOLE_ERROR_MSG(0x113, "Unrecognised network" - " type\n"); - tmp = str; - goto failed_syntax; - } + /* Bad delimiters */ + if (*str && (strchr(DELIMITERS, *str) != NULL)) + goto failed_syntax; - if (LNET_NETTYP(net) != LOLND && /* LO is implicit */ - lnet_ni_alloc(net, el, nilist) == NULL) - goto failed; + /* go to the next net if it exits */ + str += strcspn(str, ","); + if (*str == ',') + *str++ = '\0'; + + /* + * At this point the name is properly terminated. + */ + net_id = libcfs_str2net(name); + if (net_id == LNET_NIDNET(LNET_NID_ANY)) { + LCONSOLE_ERROR_MSG(0x113, + "Unrecognised network type\n"); + str = name; + goto failed_syntax; + } - if (el != NULL) { - cfs_expr_list_free(el); - el = NULL; + if (LNET_NETTYP(net_id) == LOLND) { + /* Loopback is implicit, and there can be only one. */ + if (net_el) { + cfs_expr_list_free(net_el); + net_el = NULL; } - - str = comma; + /* Should we error out instead? */ continue; } - *bracket = 0; - net = libcfs_str2net(cfs_trimwhite(str)); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - tmp = str; - goto failed_syntax; - } + /* + * All network paramaters are now known. + */ + nnets++; - ni = lnet_ni_alloc(net, el, nilist); - if (ni == NULL) + /* always allocate a net, since we will eventually add an + * interface to it, or we will fail, in which case we'll + * just delete it */ + net = lnet_net_alloc(net_id, netlist); + if (IS_ERR_OR_NULL(net)) goto failed; - if (el != NULL) { - cfs_expr_list_free(el); - el = NULL; - } - - niface = 0; - iface = bracket + 1; + if (!nistr || + (use_tcp_bonding && LNET_NETTYP(net_id) == SOCKLND)) { + /* + * No interface list was specified, allocate a + * ni using the defaults. + */ + ni = lnet_ni_alloc(net, net_el, NULL); + if (IS_ERR_OR_NULL(ni)) + goto failed; - bracket = strchr(iface, ')'); - if (bracket == NULL) { - tmp = iface; - goto failed_syntax; + if (!nistr) { + if (net_el) { + cfs_expr_list_free(net_el); + net_el = NULL; + } + continue; + } } - *bracket = 0; do { - comma = strchr(iface, ','); - if (comma != NULL) - *comma++ = 0; + elstr = NULL; + + /* Interface name (mandatory) */ + while (isspace(*nistr)) + *nistr++ = '\0'; + name = nistr; + nistr += strcspn(nistr, SPACESTR "[],"); + while (isspace(*nistr)) + *nistr++ = '\0'; + + /* CPT expression (optional) */ + if (*nistr == '[') { + elstr = nistr; + nistr += strcspn(nistr, "]"); + if (*nistr != ']') { + str = elstr; + goto failed_syntax; + } + rc = cfs_expr_list_parse(elstr, + nistr - elstr + 1, + 0, LNET_CPT_NUMBER - 1, + &ni_el); + if (rc != 0) { + str = elstr; + goto failed_syntax; + } + *elstr = '\0'; + do { + *nistr++ = '\0'; + } while (isspace(*nistr)); + } else { + ni_el = net_el; + } - iface = cfs_trimwhite(iface); - if (*iface == 0) { - tmp = iface; + /* + * End of single interface specificaton, + * advance to the start of the next one, if + * any. + */ + if (*nistr == ',') { + do { + *nistr++ = '\0'; + } while (isspace(*nistr)); + if (!*nistr) { + str = nistr; + goto failed_syntax; + } + } else if (*nistr) { + str = nistr; goto failed_syntax; } - if (niface == LNET_MAX_INTERFACES) { - LCONSOLE_ERROR_MSG(0x115, "Too many interfaces " - "for net %s\n", - libcfs_net2str(net)); - goto failed; + /* + * At this point the name is properly terminated. + */ + if (!*name) { + str = name; + goto failed_syntax; } - /* Allocate a separate piece of memory and copy - * into it the string, so we don't have - * a depencency on the tokens string. This way we - * can free the tokens at the end of the function. - * The newly allocated ni_interfaces[] can be - * freed when freeing the NI */ - LIBCFS_ALLOC(ni->ni_interfaces[niface], - strlen(iface) + 1); - if (ni->ni_interfaces[niface] == NULL) { - CERROR("Can't allocate net interface name\n"); - goto failed; + if (use_tcp_bonding && + LNET_NETTYP(net->net_id) == SOCKLND) { + rc = lnet_ni_add_interface(ni, name); + if (rc != 0) + goto failed; + } else { + ni = lnet_ni_alloc(net, ni_el, name); + if (IS_ERR_OR_NULL(ni)) + goto failed; } - strncpy(ni->ni_interfaces[niface], iface, - strlen(iface)); - niface++; - iface = comma; - } while (iface != NULL); - - str = bracket + 1; - comma = strchr(bracket + 1, ','); - if (comma != NULL) { - *comma = 0; - str = cfs_trimwhite(str); - if (*str != 0) { - tmp = str; - goto failed_syntax; + + if (ni_el) { + if (ni_el != net_el) { + cfs_expr_list_free(ni_el); + ni_el = NULL; + } } - str = comma + 1; - continue; - } + } while (*nistr); - str = cfs_trimwhite(str); - if (*str != 0) { - tmp = str; - goto failed_syntax; + if (net_el) { + cfs_expr_list_free(net_el); + net_el = NULL; } - } - - list_for_each(temp_node, nilist) - nnets++; + } while (*str); LIBCFS_FREE(tokens, tokensize); return nnets; failed_syntax: - lnet_syntax("networks", networks, (int)(tmp - tokens), strlen(tmp)); + lnet_syntax("networks", networks, (int)(str - tokens), strlen(str)); failed: - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, lnet_ni_t, ni_list); + /* free the net list and all the nis on each net */ + while (!list_empty(netlist)) { + net = list_entry(netlist->next, struct lnet_net, net_list); - list_del(&ni->ni_list); - lnet_ni_free(ni); + list_del_init(&net->net_list); + lnet_net_free(net); } - if (el != NULL) - cfs_expr_list_free(el); + if (ni_el && ni_el != net_el) + cfs_expr_list_free(ni_el); + if (net_el) + cfs_expr_list_free(net_el); LIBCFS_FREE(tokens, tokensize); diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index d93d061..e9a63eb 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -584,13 +584,14 @@ lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, iov = msg->msg_iov; kiov = msg->msg_kiov; - LASSERT(niov > 0); - LASSERT((iov == NULL) != (kiov == NULL)); + LASSERT (niov > 0); + LASSERT ((iov == NULL) != (kiov == NULL)); } } - rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed, - niov, iov, kiov, offset, mlen, rlen); + rc = (ni->ni_net->net_lnd->lnd_recv)(ni, private, msg, delayed, + niov, iov, kiov, offset, mlen, + rlen); if (rc < 0) lnet_finalize(ni, msg, rc); } @@ -645,7 +646,7 @@ lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg) LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND || (msg->msg_txcredit && msg->msg_peertxcredit)); - rc = (ni->ni_lnd->lnd_send)(ni, priv, msg); + rc = (ni->ni_net->net_lnd->lnd_send)(ni, priv, msg); if (rc < 0) lnet_finalize(ni, msg, rc); } @@ -658,11 +659,11 @@ lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg) LASSERT(!msg->msg_sending); LASSERT(msg->msg_receiving); LASSERT(!msg->msg_rx_ready_delay); - LASSERT(ni->ni_lnd->lnd_eager_recv != NULL); + LASSERT(ni->ni_net->net_lnd->lnd_eager_recv != NULL); msg->msg_rx_ready_delay = 1; - rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg, - &msg->msg_private); + rc = (ni->ni_net->net_lnd->lnd_eager_recv)(ni, msg->msg_private, msg, + &msg->msg_private); if (rc != 0) { CERROR("recv from %s / send to %s aborted: " "eager_recv failed %d\n", @@ -681,10 +682,10 @@ lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp) cfs_time_t last_alive = 0; LASSERT(lnet_peer_aliveness_enabled(lp)); - LASSERT(ni->ni_lnd->lnd_query != NULL); + LASSERT(ni->ni_net->net_lnd->lnd_query != NULL); lnet_net_unlock(lp->lp_cpt); - (ni->ni_lnd->lnd_query)(ni, lp->lp_nid, &last_alive); + (ni->ni_net->net_lnd->lnd_query)(ni, lp->lp_nid, &last_alive); lnet_net_lock(lp->lp_cpt); lp->lp_last_query = cfs_time_current(); @@ -697,23 +698,27 @@ lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp) static inline int lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) { - int alive; + int alive; cfs_time_t deadline; - LASSERT(lnet_peer_aliveness_enabled(lp)); + LASSERT (lnet_peer_aliveness_enabled(lp)); - /* Trust lnet_notify() if it has more recent aliveness news, but + /* + * Trust lnet_notify() if it has more recent aliveness news, but * ignore the initial assumed death (see lnet_peers_start_down()). */ if (!lp->lp_alive && lp->lp_alive_count > 0 && cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive)) return 0; - deadline = cfs_time_add(lp->lp_last_alive, - cfs_time_seconds(lp->lp_ni->ni_peertimeout)); + deadline = + cfs_time_add(lp->lp_last_alive, + cfs_time_seconds(lp->lp_net->net_tunables. + lct_peer_timeout)); alive = cfs_time_after(deadline, now); - /* Update obsolete lp_alive except for routers assumed to be dead + /* + * Update obsolete lp_alive except for routers assumed to be dead * initially, because router checker would update aliveness in this * case, and moreover lp_last_alive at peer creation is assumed. */ @@ -728,7 +733,7 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) /* NB: returns 1 when alive, 0 when dead, negative when error; * may drop the lnet_net_lock */ static int -lnet_peer_alive_locked (lnet_peer_t *lp) +lnet_peer_alive_locked (struct lnet_ni *ni, lnet_peer_t *lp) { cfs_time_t now = cfs_time_current(); @@ -738,8 +743,10 @@ lnet_peer_alive_locked (lnet_peer_t *lp) if (lnet_peer_is_alive(lp, now)) return 1; - /* Peer appears dead, but we should avoid frequent NI queries (at - * most once per lnet_queryinterval seconds). */ + /* + * Peer appears dead, but we should avoid frequent NI queries (at + * most once per lnet_queryinterval seconds). + */ if (lp->lp_last_query != 0) { static const int lnet_queryinterval = 1; @@ -754,13 +761,13 @@ lnet_peer_alive_locked (lnet_peer_t *lp) libcfs_nid2str(lp->lp_nid), (int)now, (int)next_query, lnet_queryinterval, - lp->lp_ni->ni_peertimeout); + lp->lp_net->net_tunables.lct_peer_timeout); return 0; } } /* query NI for latest aliveness news */ - lnet_ni_query_locked(lp->lp_ni, lp); + lnet_ni_query_locked(ni, lp); if (lnet_peer_is_alive(lp, now)) return 1; @@ -784,7 +791,7 @@ static int lnet_post_send_locked(lnet_msg_t *msg, int do_send) { lnet_peer_t *lp = msg->msg_txpeer; - lnet_ni_t *ni = lp->lp_ni; + lnet_ni_t *ni = msg->msg_txni; int cpt = msg->msg_tx_cpt; struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt]; @@ -795,7 +802,7 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) /* NB 'lp' is always the next hop */ if ((msg->msg_target.pid & LNET_PID_USERFLAG) == 0 && - lnet_peer_alive_locked(lp) == 0) { + lnet_peer_alive_locked(ni, lp) == 0) { the_lnet.ln_counters[cpt]->drop_count++; the_lnet.ln_counters[cpt]->drop_length += msg->msg_len; lnet_net_unlock(cpt); @@ -954,7 +961,7 @@ lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv) int cpt = msg->msg_rx_cpt; lnet_net_unlock(cpt); - lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1, + lnet_ni_recv(msg->msg_rxni, msg->msg_private, msg, 1, 0, msg->msg_len, msg->msg_len); lnet_net_lock(cpt); } @@ -966,9 +973,10 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) { lnet_peer_t *txpeer = msg->msg_txpeer; lnet_msg_t *msg2; + struct lnet_ni *txni = msg->msg_txni; if (msg->msg_txcredit) { - struct lnet_ni *ni = txpeer->lp_ni; + struct lnet_ni *ni = msg->msg_txni; struct lnet_tx_queue *tq = ni->ni_tx_queues[msg->msg_tx_cpt]; /* give back NI txcredits */ @@ -983,7 +991,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) lnet_msg_t, msg_list); list_del(&msg2->msg_list); - LASSERT(msg2->msg_txpeer->lp_ni == ni); + LASSERT(msg2->msg_txni == ni); LASSERT(msg2->msg_tx_delayed); (void) lnet_post_send_locked(msg2, 1); @@ -1013,6 +1021,11 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) } } + if (txni != NULL) { + msg->msg_txni = NULL; + lnet_ni_decref_locked(txni, msg->msg_tx_cpt); + } + if (txpeer != NULL) { msg->msg_txpeer = NULL; lnet_peer_decref_locked(txpeer); @@ -1047,7 +1060,7 @@ lnet_drop_routed_msgs_locked(struct list_head *list, int cpt) lnet_net_unlock(cpt); list_for_each_entry_safe(msg, tmp, &drop, msg_list) { - lnet_ni_recv(msg->msg_rxpeer->lp_ni, msg->msg_private, NULL, + lnet_ni_recv(msg->msg_rxni, msg->msg_private, NULL, 0, 0, 0, msg->msg_hdr.payload_length); list_del_init(&msg->msg_list); lnet_finalize(NULL, msg, -ECANCELED); @@ -1060,6 +1073,7 @@ void lnet_return_rx_credits_locked(lnet_msg_t *msg) { lnet_peer_t *rxpeer = msg->msg_rxpeer; + struct lnet_ni *rxni = msg->msg_rxni; lnet_msg_t *msg2; if (msg->msg_rtrcredit) { @@ -1129,6 +1143,10 @@ routing_off: (void) lnet_post_routed_recv_locked(msg2, 1); } } + if (rxni != NULL) { + msg->msg_rxni = NULL; + lnet_ni_decref_locked(rxni, msg->msg_rx_cpt); + } if (rxpeer != NULL) { msg->msg_rxpeer = NULL; lnet_peer_decref_locked(rxpeer); @@ -1174,7 +1192,8 @@ lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2) } static lnet_peer_t * -lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid) +lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target, + lnet_nid_t rtr_nid) { lnet_remotenet_t *rnet; lnet_route_t *route; @@ -1187,7 +1206,7 @@ lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid) /* If @rtr_nid is not LNET_NID_ANY, return the gateway with * rtr_nid nid, otherwise find the best gateway I can use */ - rnet = lnet_find_net_locked(LNET_NIDNET(target)); + rnet = lnet_find_rnet_locked(LNET_NIDNET(target)); if (rnet == NULL) return NULL; @@ -1199,7 +1218,7 @@ lnet_find_route_locked(lnet_ni_t *ni, lnet_nid_t target, lnet_nid_t rtr_nid) if (!lnet_is_route_alive(route)) continue; - if (ni != NULL && lp->lp_ni != ni) + if (net != NULL && lp->lp_net != net) continue; if (lp->lp_nid == rtr_nid) /* it's pre-determined router */ @@ -1254,14 +1273,13 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) msg->msg_sending = 1; LASSERT(!msg->msg_tx_committed); - cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid); + local_ni = lnet_net2ni(LNET_NIDNET(dst_nid)); + cpt = lnet_cpt_of_nid(rtr_nid == LNET_NID_ANY ? dst_nid : rtr_nid, + local_ni); again: - lnet_net_lock(cpt); - - if (the_lnet.ln_shutdown) { - lnet_net_unlock(cpt); + if (the_lnet.ln_shutdown) return -ESHUTDOWN; - } + lnet_net_lock(cpt); if (src_nid == LNET_NID_ANY) { src_ni = NULL; @@ -1284,11 +1302,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) if (src_ni == NULL) { src_ni = local_ni; src_nid = src_ni->ni_nid; - } else if (src_ni == local_ni) { - lnet_ni_decref_locked(local_ni, cpt); - } else { - lnet_ni_decref_locked(local_ni, cpt); - lnet_ni_decref_locked(src_ni, cpt); + } else if (src_ni != local_ni) { lnet_net_unlock(cpt); LCONSOLE_WARN("No route to %s via from %s\n", libcfs_nid2str(dst_nid), @@ -1306,16 +1320,10 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) /* No send credit hassles with LOLND */ lnet_net_unlock(cpt); lnet_ni_send(src_ni, msg); - - lnet_net_lock(cpt); - lnet_ni_decref_locked(src_ni, cpt); - lnet_net_unlock(cpt); return 0; } rc = lnet_nid2peer_locked(&lp, dst_nid, cpt); - /* lp has ref on src_ni; lose mine */ - lnet_ni_decref_locked(src_ni, cpt); if (rc != 0) { lnet_net_unlock(cpt); LCONSOLE_WARN("Error %d finding peer %s\n", rc, @@ -1323,13 +1331,13 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) /* ENOMEM or shutting down */ return rc; } - LASSERT(lp->lp_ni == src_ni); + LASSERT (lp->lp_net == src_ni->ni_net); } else { /* sending to a remote network */ - lp = lnet_find_route_locked(src_ni, dst_nid, rtr_nid); + lp = lnet_find_route_locked(src_ni != NULL ? + src_ni->ni_net : NULL, + dst_nid, rtr_nid); if (lp == NULL) { - if (src_ni != NULL) - lnet_ni_decref_locked(src_ni, cpt); lnet_net_unlock(cpt); LCONSOLE_WARN("No route to %s via %s " @@ -1344,10 +1352,8 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) * pre-determined router, this can happen if router table * was changed when we release the lock */ if (rtr_nid != lp->lp_nid) { - cpt2 = lnet_cpt_of_nid_locked(lp->lp_nid); + cpt2 = lp->lp_cpt; if (cpt2 != cpt) { - if (src_ni != NULL) - lnet_ni_decref_locked(src_ni, cpt); lnet_net_unlock(cpt); rtr_nid = lp->lp_nid; @@ -1361,11 +1367,11 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) lnet_msgtyp2str(msg->msg_type), msg->msg_len); if (src_ni == NULL) { - src_ni = lp->lp_ni; + src_ni = lnet_get_next_ni_locked(lp->lp_net, NULL); + LASSERT(src_ni != NULL); src_nid = src_ni->ni_nid; } else { - LASSERT(src_ni == lp->lp_ni); - lnet_ni_decref_locked(src_ni, cpt); + LASSERT (src_ni->ni_net == lp->lp_net); } lnet_peer_addref_locked(lp); @@ -1389,7 +1395,10 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) LASSERT(!msg->msg_txcredit); LASSERT(msg->msg_txpeer == NULL); - msg->msg_txpeer = lp; /* msg takes my ref on lp */ + msg->msg_txpeer = lp; /* msg takes my ref on lp */ + /* set the NI for this message */ + msg->msg_txni = src_ni; + lnet_ni_addref_locked(msg->msg_txni, cpt); rc = lnet_post_send_locked(msg, 0); lnet_net_unlock(cpt); @@ -1453,8 +1462,9 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) info.mi_rlength = hdr->payload_length; info.mi_roffset = hdr->msg.put.offset; info.mi_mbits = hdr->msg.put.match_bits; + info.mi_cpt = msg->msg_rxpeer->lp_cpt; - msg->msg_rx_ready_delay = ni->ni_lnd->lnd_eager_recv == NULL; + msg->msg_rx_ready_delay = ni->ni_net->net_lnd->lnd_eager_recv == NULL; ready_delay = msg->msg_rx_ready_delay; again: @@ -1687,7 +1697,7 @@ lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg) if (msg->msg_rxpeer->lp_rtrcredits <= 0 || lnet_msg2bufpool(msg)->rbp_credits <= 0) { - if (ni->ni_lnd->lnd_eager_recv == NULL) { + if (ni->ni_net->net_lnd->lnd_eager_recv == NULL) { msg->msg_rx_ready_delay = 1; } else { lnet_net_unlock(msg->msg_rx_cpt); @@ -1832,7 +1842,7 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, payload_length = le32_to_cpu(hdr->payload_length); for_me = (ni->ni_nid == dest_nid); - cpt = lnet_cpt_of_nid(from_nid); + cpt = lnet_cpt_of_nid(from_nid, ni); switch (type) { case LNET_MSG_ACK: @@ -1994,6 +2004,8 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, return 0; goto drop; } + msg->msg_rxni = ni; + lnet_ni_addref_locked(ni, cpt); if (lnet_isrouter(msg->msg_rxpeer)) { lnet_peer_set_alive(msg->msg_rxpeer); @@ -2078,7 +2090,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason) * called lnet_drop_message(), so I just hang onto msg as well * until that's done */ - lnet_drop_message(msg->msg_rxpeer->lp_ni, + lnet_drop_message(msg->msg_rxni, msg->msg_rxpeer->lp_cpt, msg->msg_private, msg->msg_len); /* @@ -2086,7 +2098,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason) * but we still should give error code so lnet_msg_decommit() * can skip counters operations and other checks. */ - lnet_finalize(msg->msg_rxpeer->lp_ni, msg, -ENOENT); + lnet_finalize(msg->msg_rxni, msg, -ENOENT); } } @@ -2109,6 +2121,7 @@ lnet_recv_delayed_msg_list(struct list_head *head) LASSERT(msg->msg_rx_delayed); LASSERT(msg->msg_md != NULL); LASSERT(msg->msg_rxpeer != NULL); + LASSERT(msg->msg_rxni != NULL); LASSERT(msg->msg_hdr.type == LNET_MSG_PUT); CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d " @@ -2118,7 +2131,7 @@ lnet_recv_delayed_msg_list(struct list_head *head) msg->msg_hdr.msg.put.offset, msg->msg_hdr.payload_length); - lnet_recv_put(msg->msg_rxpeer->lp_ni, msg); + lnet_recv_put(msg->msg_rxni, msg); } } @@ -2303,7 +2316,7 @@ lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg) lnet_msg_attach_md(msg, getmd, getmd->md_offset, getmd->md_length); lnet_res_unlock(cpt); - cpt = lnet_cpt_of_nid(peer_id.nid); + cpt = lnet_cpt_of_nid(peer_id.nid, ni); lnet_net_lock(cpt); lnet_msg_commit(msg, cpt); @@ -2314,7 +2327,7 @@ lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg) return msg; drop: - cpt = lnet_cpt_of_nid(peer_id.nid); + cpt = lnet_cpt_of_nid(peer_id.nid, ni); lnet_net_lock(cpt); the_lnet.ln_counters[cpt]->drop_count++; @@ -2461,7 +2474,7 @@ int LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) { struct list_head *e; - struct lnet_ni *ni; + struct lnet_ni *ni = NULL; lnet_remotenet_t *rnet; __u32 dstnet = LNET_NIDNET(dstnid); int hops; @@ -2478,9 +2491,7 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) cpt = lnet_net_lock_current(); - list_for_each(e, &the_lnet.ln_nis) { - ni = list_entry(e, lnet_ni_t, ni_list); - + while ((ni = lnet_get_next_ni_locked(NULL, ni))) { if (ni->ni_nid == dstnid) { if (srcnidp != NULL) *srcnidp = dstnid; @@ -2540,8 +2551,12 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) LASSERT(shortest != NULL); hops = shortest_hops; - if (srcnidp != NULL) - *srcnidp = shortest->lr_gateway->lp_ni->ni_nid; + if (srcnidp != NULL) { + ni = lnet_get_next_ni_locked( + shortest->lr_gateway->lp_net, + NULL); + *srcnidp = ni->ni_nid; + } if (orderp != NULL) *orderp = order; lnet_net_unlock(cpt); diff --git a/lnet/lnet/lib-ptl.c b/lnet/lnet/lib-ptl.c index acba755..cddd7de 100644 --- a/lnet/lnet/lib-ptl.c +++ b/lnet/lnet/lib-ptl.c @@ -222,7 +222,7 @@ lnet_match2mt(struct lnet_portal *ptl, lnet_process_id_t id, __u64 mbits) /* if it's a unique portal, return match-table hashed by NID */ return lnet_ptl_is_unique(ptl) ? - ptl->ptl_mtables[lnet_cpt_of_nid(id.nid)] : NULL; + ptl->ptl_mtables[lnet_cpt_of_nid(id.nid, NULL)] : NULL; } struct lnet_match_table * @@ -292,7 +292,7 @@ lnet_mt_of_match(struct lnet_match_info *info, struct lnet_msg *msg) rotor = ptl->ptl_rotor++; /* get round-robin factor */ if (portal_rotor == LNET_PTL_ROTOR_HASH_RT && routed) - cpt = lnet_cpt_of_nid(msg->msg_hdr.src_nid); + cpt = info->mi_cpt; else cpt = rotor % LNET_CPT_NUMBER; @@ -941,7 +941,7 @@ lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason) /* grab all messages which are on the NI passed in */ list_for_each_entry_safe(msg, tmp, &ptl->ptl_msg_delayed, msg_list) { - if (msg->msg_rxpeer->lp_ni == ni) + if (msg->msg_txni == ni || msg->msg_rxni == ni) list_move(&msg->msg_list, &zombies); } } else { diff --git a/lnet/lnet/lo.c b/lnet/lnet/lo.c index 673f9b3..cda649b 100644 --- a/lnet/lnet/lo.c +++ b/lnet/lnet/lo.c @@ -94,7 +94,7 @@ lolnd_shutdown(lnet_ni_t *ni) static int lolnd_startup (lnet_ni_t *ni) { - LASSERT (ni->ni_lnd == &the_lolnd); + LASSERT (ni->ni_net->net_lnd == &the_lolnd); LASSERT (!lolnd_instanced); lolnd_instanced = 1; diff --git a/lnet/lnet/net_fault.c b/lnet/lnet/net_fault.c index 083b169..91c9c6b 100644 --- a/lnet/lnet/net_fault.c +++ b/lnet/lnet/net_fault.c @@ -617,8 +617,9 @@ delayed_msg_process(struct list_head *msg_list, bool drop) msg = list_entry(msg_list->next, struct lnet_msg, msg_list); LASSERT(msg->msg_rxpeer != NULL); + LASSERT(msg->msg_rxni != NULL); - ni = msg->msg_rxpeer->lp_ni; + ni = msg->msg_rxni; cpt = msg->msg_rx_cpt; list_del_init(&msg->msg_list); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 523d5b3..236f63b 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -110,7 +110,7 @@ lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable) for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], lp_hashlist) { - if (ni != NULL && ni != lp->lp_ni) + if (ni != NULL && ni->ni_net != lp->lp_net) continue; list_del_init(&lp->lp_hashlist); /* Lose hash table's ref */ @@ -152,7 +152,7 @@ lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable, for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], lp_hashlist) { - if (ni != lp->lp_ni) + if (ni->ni_net != lp->lp_net) continue; if (lp->lp_rtr_refcount == 0) @@ -224,8 +224,7 @@ lnet_destroy_peer_locked(lnet_peer_t *lp) LASSERT(ptable->pt_number > 0); ptable->pt_number--; - lnet_ni_decref_locked(lp->lp_ni, lp->lp_cpt); - lp->lp_ni = NULL; + lp->lp_net = NULL; list_add(&lp->lp_hashlist, &ptable->pt_deathrow); LASSERT(ptable->pt_zombies > 0); @@ -265,7 +264,7 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) return -ESHUTDOWN; /* cpt can be LNET_LOCK_EX if it's called from router functions */ - cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid); + cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid, NULL); ptable = the_lnet.ln_peer_tables[cpt2]; lp = lnet_find_peer_locked(ptable, nid); @@ -330,16 +329,11 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) goto out; } - lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid), cpt2); - if (lp->lp_ni == NULL) { - rc = -EHOSTUNREACH; - goto out; - } - + lp->lp_net = lnet_get_net_locked(LNET_NIDNET(lp->lp_nid)); lp->lp_txcredits = - lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; + lp->lp_mintxcredits = lp->lp_net->net_tunables.lct_peer_tx_credits; lp->lp_rtrcredits = - lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_ni); + lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_net); list_add_tail(&lp->lp_hashlist, &ptable->pt_hash[lnet_nid2peerhash(nid)]); @@ -362,7 +356,7 @@ lnet_debug_peer(lnet_nid_t nid) int rc; int cpt; - cpt = lnet_cpt_of_nid(nid); + cpt = lnet_cpt_of_nid(nid, NULL); lnet_net_lock(cpt); rc = lnet_nid2peer_locked(&lp, nid, cpt); @@ -377,7 +371,7 @@ lnet_debug_peer(lnet_nid_t nid) CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", libcfs_nid2str(lp->lp_nid), lp->lp_refcount, - aliveness, lp->lp_ni->ni_peertxcredits, + aliveness, lp->lp_net->net_tunables.lct_peer_tx_credits, lp->lp_rtrcredits, lp->lp_minrtrcredits, lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob); @@ -431,7 +425,8 @@ int lnet_get_peer_info(__u32 peer_index, __u64 *nid, *nid = lp->lp_nid; *refcount = lp->lp_refcount; - *ni_peer_tx_credits = lp->lp_ni->ni_peertxcredits; + *ni_peer_tx_credits = + lp->lp_net->net_tunables.lct_peer_tx_credits; *peer_tx_credits = lp->lp_txcredits; *peer_rtr_credits = lp->lp_rtrcredits; *peer_min_rtr_credits = lp->lp_mintxcredits; diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index 3ae2ba3..b4c5842 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -55,17 +55,17 @@ module_param(auto_down, int, 0444); MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error"); int -lnet_peer_buffer_credits(lnet_ni_t *ni) +lnet_peer_buffer_credits(struct lnet_net *net) { /* NI option overrides LNet default */ - if (ni->ni_peerrtrcredits > 0) - return ni->ni_peerrtrcredits; + if (net->net_tunables.lct_peer_rtr_credits > 0) + return net->net_tunables.lct_peer_rtr_credits; if (peer_buffer_credits > 0) return peer_buffer_credits; /* As an approximation, allow this peer the same number of router * buffers as it is allowed outstanding sends */ - return ni->ni_peertxcredits; + return net->net_tunables.lct_peer_tx_credits; } /* forward ref's */ @@ -148,13 +148,14 @@ lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp) lp->lp_notifylnd = 0; lp->lp_notify = 0; - if (notifylnd && ni->ni_lnd->lnd_notify != NULL) { + if (notifylnd && ni->ni_net->net_lnd->lnd_notify != NULL) { lnet_net_unlock(lp->lp_cpt); /* A new notification could happen now; I'll handle it * when control returns to me */ - (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive); + (ni->ni_net->net_lnd->lnd_notify)(ni, lp->lp_nid, + alive); lnet_net_lock(lp->lp_cpt); } @@ -216,7 +217,7 @@ lnet_rtr_decref_locked(lnet_peer_t *lp) } lnet_remotenet_t * -lnet_find_net_locked (__u32 net) +lnet_find_rnet_locked(__u32 net) { lnet_remotenet_t *rnet; struct list_head *tmp; @@ -240,8 +241,7 @@ static void lnet_shuffle_seed(void) __u32 lnd_type; __u32 seed[2]; struct timespec64 ts; - lnet_ni_t *ni; - struct list_head *tmp; + lnet_ni_t *ni = NULL; if (seeded) return; @@ -250,8 +250,7 @@ static void lnet_shuffle_seed(void) /* Nodes with small feet have little entropy * the NID for this node gives the most entropy in the low bits */ - list_for_each(tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); + while ((ni = lnet_get_next_ni_locked(NULL, ni))) { lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); if (lnd_type != LOLND) @@ -356,7 +355,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, LASSERT(!the_lnet.ln_shutdown); - rnet2 = lnet_find_net_locked(net); + rnet2 = lnet_find_rnet_locked(net); if (rnet2 == NULL) { /* new network */ list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net)); @@ -381,12 +380,12 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */ lnet_add_route_to_rnet(rnet2, route); - ni = route->lr_gateway->lp_ni; + ni = lnet_get_next_ni_locked(route->lr_gateway->lp_net, NULL); lnet_net_unlock(LNET_LOCK_EX); /* XXX Assume alive */ - if (ni->ni_lnd->lnd_notify != NULL) - (ni->ni_lnd->lnd_notify)(ni, gateway, 1); + if (ni->ni_net->net_lnd->lnd_notify != NULL) + (ni->ni_net->net_lnd->lnd_notify)(ni, gateway, 1); lnet_net_lock(LNET_LOCK_EX); } @@ -444,8 +443,8 @@ lnet_check_routes(void) continue; } - if (route->lr_gateway->lp_ni == - route2->lr_gateway->lp_ni) + if (route->lr_gateway->lp_net == + route2->lr_gateway->lp_net) continue; nid1 = route->lr_gateway->lp_nid; @@ -833,8 +832,8 @@ lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net) static void lnet_update_ni_status_locked(void) { - lnet_ni_t *ni; - time64_t now; + lnet_ni_t *ni = NULL; + time64_t now; int timeout; LASSERT(the_lnet.ln_routing); @@ -843,8 +842,8 @@ lnet_update_ni_status_locked(void) MAX(live_router_check_interval, dead_router_check_interval); now = ktime_get_real_seconds(); - list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) { - if (ni->ni_lnd->lnd_type == LOLND) + while ((ni = lnet_get_next_ni_locked(NULL, ni))) { + if (ni->ni_net->net_lnd->lnd_type == LOLND) continue; if (now < ni->ni_last_alive + timeout) @@ -977,8 +976,9 @@ static void lnet_ping_router_locked (lnet_peer_t *rtr) { lnet_rc_data_t *rcd = NULL; - cfs_time_t now = cfs_time_current(); - int secs; + cfs_time_t now = cfs_time_current(); + int secs; + struct lnet_ni *ni; lnet_peer_addref_locked(rtr); @@ -987,7 +987,8 @@ lnet_ping_router_locked (lnet_peer_t *rtr) lnet_notify_locked(rtr, 1, 0, now); /* Run any outstanding notifications */ - lnet_ni_notify_locked(rtr->lp_ni, rtr); + ni = lnet_get_next_ni_locked(rtr->lp_net, NULL); + lnet_ni_notify_locked(ni, rtr); if (!lnet_isrouter(rtr) || the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) { @@ -1242,7 +1243,7 @@ rescan: list_for_each(entry, &the_lnet.ln_routers) { rtr = list_entry(entry, lnet_peer_t, lp_rtr_list); - cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid); + cpt2 = rtr->lp_cpt; if (cpt != cpt2) { lnet_net_unlock(cpt); cpt = cpt2; @@ -1718,7 +1719,7 @@ lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when) { struct lnet_peer *lp = NULL; cfs_time_t now = cfs_time_current(); - int cpt = lnet_cpt_of_nid(nid); + int cpt = lnet_cpt_of_nid(nid, ni); LASSERT (!in_interrupt ()); diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index efec11b..0d2d670 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -490,19 +490,19 @@ proc_lnet_peers(struct ctl_table *table, int write, void __user *buffer, p = NULL; hoff = 1; hash++; - } + } if (peer != NULL) { - lnet_nid_t nid = peer->lp_nid; - int nrefs = peer->lp_refcount; - int lastalive = -1; - char *aliveness = "NA"; - int maxcr = peer->lp_ni->ni_peertxcredits; - int txcr = peer->lp_txcredits; - int mintxcr = peer->lp_mintxcredits; - int rtrcr = peer->lp_rtrcredits; - int minrtrcr = peer->lp_minrtrcredits; - int txqnob = peer->lp_txqnob; + lnet_nid_t nid = peer->lp_nid; + int nrefs = peer->lp_refcount; + int lastalive = -1; + char *aliveness = "NA"; + int maxcr = peer->lp_net->net_tunables.lct_peer_tx_credits; + int txcr = peer->lp_txcredits; + int mintxcr = peer->lp_mintxcredits; + int rtrcr = peer->lp_rtrcredits; + int minrtrcr = peer->lp_minrtrcredits; + int txqnob = peer->lp_txqnob; if (lnet_isrouter(peer) || lnet_peer_aliveness_enabled(peer)) @@ -656,27 +656,14 @@ proc_lnet_nis(struct ctl_table *table, int write, void __user *buffer, "%-24s %6s %5s %4s %4s %4s %5s %5s %5s\n", "nid", "status", "alive", "refs", "peer", "rtr", "max", "tx", "min"); - LASSERT(tmpstr + tmpsiz - s > 0); + LASSERT (tmpstr + tmpsiz - s > 0); } else { - struct list_head *n; - lnet_ni_t *ni = NULL; - int skip = *ppos - 1; + lnet_ni_t *ni = NULL; + int skip = *ppos - 1; lnet_net_lock(0); - n = the_lnet.ln_nis.next; - - while (n != &the_lnet.ln_nis) { - lnet_ni_t *a_ni = list_entry(n, lnet_ni_t, ni_list); - - if (skip == 0) { - ni = a_ni; - break; - } - - skip--; - n = n->next; - } + ni = lnet_get_ni_idx_locked(skip); if (ni != NULL) { struct lnet_tx_queue *tq; @@ -690,7 +677,7 @@ proc_lnet_nis(struct ctl_table *table, int write, void __user *buffer, last_alive = now - ni->ni_last_alive; /* @lo forever alive */ - if (ni->ni_lnd->lnd_type == LOLND) + if (ni->ni_net->net_lnd->lnd_type == LOLND) last_alive = 0; lnet_ni_lock(ni); @@ -718,8 +705,8 @@ proc_lnet_nis(struct ctl_table *table, int write, void __user *buffer, "%-24s %6s %5d %4d %4d %4d %5d %5d %5d\n", libcfs_nid2str(ni->ni_nid), stat, last_alive, *ni->ni_refs[i], - ni->ni_peertxcredits, - ni->ni_peerrtrcredits, + ni->ni_net->net_tunables.lct_peer_tx_credits, + ni->ni_net->net_tunables.lct_peer_rtr_credits, tq->tq_credits_max, tq->tq_credits, tq->tq_credits_min); if (i != 0) diff --git a/lnet/selftest/brw_test.c b/lnet/selftest/brw_test.c index bfabc6c..51e3254 100644 --- a/lnet/selftest/brw_test.c +++ b/lnet/selftest/brw_test.c @@ -120,7 +120,7 @@ brw_client_init (sfw_test_instance_t *tsi) return -EINVAL; list_for_each_entry(tsu, &tsi->tsi_units, tsu_list) { - bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid), + bulk = srpc_alloc_bulk(lnet_cpt_of_nid(tsu->tsu_dest.nid, NULL), off, npg, len, opc == LST_BRW_READ); if (bulk == NULL) { brw_client_fini(tsi); diff --git a/lnet/selftest/framework.c b/lnet/selftest/framework.c index ae43d09..50cf411 100644 --- a/lnet/selftest/framework.c +++ b/lnet/selftest/framework.c @@ -1032,7 +1032,8 @@ sfw_run_batch (sfw_batch_t *tsb) wi = &tsu->tsu_worker; swi_init_workitem(wi, tsu, sfw_run_test, lst_sched_test[\ - lnet_cpt_of_nid(tsu->tsu_dest.nid)]); + lnet_cpt_of_nid(tsu->tsu_dest.nid, + NULL)]); swi_schedule_workitem(wi); } } diff --git a/lnet/selftest/selftest.h b/lnet/selftest/selftest.h index 970f130..9fecaad 100644 --- a/lnet/selftest/selftest.h +++ b/lnet/selftest/selftest.h @@ -524,7 +524,7 @@ srpc_init_client_rpc(srpc_client_rpc_t *rpc, lnet_process_id_t peer, INIT_LIST_HEAD(&rpc->crpc_list); swi_init_workitem(&rpc->crpc_wi, rpc, srpc_send_rpc, - lst_sched_test[lnet_cpt_of_nid(peer.nid)]); + lst_sched_test[lnet_cpt_of_nid(peer.nid, NULL)]); spin_lock_init(&rpc->crpc_lock); atomic_set(&rpc->crpc_refcount, 1); /* 1 ref for caller */ -- 1.8.3.1