From 58091af960fed0cc16e2b96e54ee6e2f8791d9c1 Mon Sep 17 00:00:00 2001 From: Amir Shehata Date: Fri, 11 Dec 2015 20:02:54 -0800 Subject: [PATCH] LU-7734 lnet: Multi-Rail peer split Split the peer structure into peer/peer_net/peer_ni, as described in the Multi-Rail HLD. Removed deathrow list in peers, instead peers are immediately deleted. deathrow complicates memory management for peers to little gain. Moved to LNET_LOCK_EX for any operations which will modify the peer tables. And CPT locks for any operatios which read the peer tables. Therefore there is no need to use lnet_cpt_of_nid() to calculate the CPT of the peer NID, instead we use lnet_nid_cpt_hash() to distribute peers across multiple CPTs. It is no longe true that peers and NIs would exist on the same CPT. In the new design peers and NIs don't have a 1-1 relationship. You can send to the same peer from several NIs, which can exist on separate CPTs Signed-off-by: Amir Shehata Change-Id: Ida41d830d38d0ab2bb551476e4a8866d52a25fe2 Reviewed-on: http://review.whamcloud.com/18293 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Olaf Weber Reviewed-by: Doug Oucharek --- lnet/include/lnet/lib-lnet.h | 45 ++--- lnet/include/lnet/lib-types.h | 107 ++++++++---- lnet/lnet/api-ni.c | 3 +- lnet/lnet/lib-move.c | 204 +++++++++++------------ lnet/lnet/peer.c | 380 ++++++++++++++++++++++++++---------------- lnet/lnet/router.c | 288 ++++++++++++++++---------------- lnet/lnet/router_proc.c | 66 ++++---- 7 files changed, 613 insertions(+), 480 deletions(-) diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index 6c12c35..0d3fc99 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -72,9 +72,9 @@ extern lnet_t the_lnet; /* THE network */ static inline int lnet_is_route_alive(lnet_route_t *route) { - if (!route->lr_gateway->lp_alive) + if (!route->lr_gateway->lpni_alive) return 0; /* gateway is down */ - if ((route->lr_gateway->lp_ping_feats & + if ((route->lr_gateway->lpni_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0) return 1; /* no NI status, assume it's alive */ /* has NI status, check # down NIs */ @@ -379,27 +379,27 @@ lnet_handle2me(lnet_handle_me_t *handle) } static inline void -lnet_peer_addref_locked(lnet_peer_t *lp) +lnet_peer_ni_addref_locked(struct lnet_peer_ni *lp) { - LASSERT(lp->lp_refcount > 0); - lp->lp_refcount++; + LASSERT (atomic_read(&lp->lpni_refcount) > 0); + atomic_inc(&lp->lpni_refcount); } -extern void lnet_destroy_peer_locked(lnet_peer_t *lp); +extern void lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lp); static inline void -lnet_peer_decref_locked(lnet_peer_t *lp) +lnet_peer_ni_decref_locked(struct lnet_peer_ni *lp) { - LASSERT(lp->lp_refcount > 0); - lp->lp_refcount--; - if (lp->lp_refcount == 0) - lnet_destroy_peer_locked(lp); + LASSERT (atomic_read(&lp->lpni_refcount) > 0); + atomic_dec(&lp->lpni_refcount); + if (atomic_read(&lp->lpni_refcount) == 0) + lnet_destroy_peer_ni_locked(lp); } static inline int -lnet_isrouter(lnet_peer_t *lp) +lnet_isrouter(struct lnet_peer_ni *lp) { - return lp->lp_rtr_refcount != 0; + return lp->lpni_rtr_refcount != 0; } static inline void @@ -488,6 +488,7 @@ lnet_net2rnethash(__u32 net) extern lnd_t the_lolnd; extern int avoid_asym_router_failure; +extern unsigned int lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number); extern int lnet_cpt_of_nid_locked(lnet_nid_t nid, struct lnet_ni *ni); extern int lnet_cpt_of_nid(lnet_nid_t nid, struct lnet_ni *ni); extern lnet_ni_t *lnet_nid2ni_locked(lnet_nid_t nid, int cpt); @@ -502,7 +503,8 @@ void lnet_lib_exit(void); extern int portal_rotor; int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, cfs_time_t when); -void lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when); +void lnet_notify_locked(struct lnet_peer_ni *lp, int notifylnd, int alive, + cfs_time_t when); int lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway_nid, unsigned int priority); int lnet_check_routes(void); @@ -777,7 +779,7 @@ int lnet_peer_buffer_credits(struct lnet_net *net); int lnet_router_checker_start(void); void lnet_router_checker_stop(void); -void lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net); +void lnet_router_ni_update_locked(struct lnet_peer_ni *gw, __u32 net); void lnet_swap_pinginfo(struct lnet_ping_info *info); int lnet_parse_ip2nets(char **networksp, char *ip2nets); @@ -788,9 +790,8 @@ bool lnet_net_unique(__u32 net_id, struct list_head *nilist, struct lnet_net **net); bool lnet_ni_unique_net(struct list_head *nilist, char *iface); -int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt); -lnet_peer_t *lnet_find_peer_locked(struct lnet_peer_table *ptable, - lnet_nid_t nid); +int lnet_nid2peerni_locked(struct lnet_peer_ni **lpp, lnet_nid_t nid, int cpt); +struct lnet_peer_ni *lnet_find_peer_ni_locked(lnet_nid_t nid, int cpt); void lnet_peer_tables_cleanup(lnet_ni_t *ni); void lnet_peer_tables_destroy(void); int lnet_peer_tables_create(void); @@ -803,11 +804,11 @@ int lnet_get_peer_info(__u32 peer_index, __u64 *nid, __u32 *peer_tx_qnob); static inline void -lnet_peer_set_alive(lnet_peer_t *lp) +lnet_peer_set_alive(struct lnet_peer_ni *lp) { - lp->lp_last_alive = lp->lp_last_query = cfs_time_current(); - if (!lp->lp_alive) - lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); + lp->lpni_last_alive = lp->lpni_last_query = cfs_time_current(); + if (!lp->lpni_alive) + lnet_notify_locked(lp, 0, 1, lp->lpni_last_alive); } #endif diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index 601488f..357f67a 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -100,8 +100,8 @@ typedef struct lnet_msg { unsigned int msg_onactivelist:1; /* on the activelist */ unsigned int msg_rdma_get:1; - struct lnet_peer *msg_txpeer; /* peer I'm sending to */ - struct lnet_peer *msg_rxpeer; /* peer I received from */ + struct lnet_peer_ni *msg_txpeer; /* peer I'm sending to */ + struct lnet_peer_ni *msg_rxpeer; /* peer I received from */ void *msg_private; struct lnet_libmd *msg_md; @@ -414,63 +414,96 @@ typedef struct { /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ struct list_head rcd_list; lnet_handle_md_t rcd_mdh; /* ping buffer MD */ - struct lnet_peer *rcd_gateway; /* reference to gateway */ + struct lnet_peer_ni *rcd_gateway; /* reference to gateway */ struct lnet_ping_info *rcd_pinginfo; /* ping buffer */ } lnet_rc_data_t; -typedef struct lnet_peer { +struct lnet_peer_ni { + /* cahian on peer_net */ + struct list_head lpni_on_peer_net_list; + /* chain on peer hash */ - struct list_head lp_hashlist; + struct list_head lpni_hashlist; /* messages blocking for tx credits */ - struct list_head lp_txq; + struct list_head lpni_txq; /* messages blocking for router credits */ - struct list_head lp_rtrq; + struct list_head lpni_rtrq; /* chain on router list */ - struct list_head lp_rtr_list; + struct list_head lpni_rtr_list; + /* pointer to peer net I'm part of */ + struct lnet_peer_net *lpni_peer_net; /* # tx credits available */ - int lp_txcredits; + int lpni_txcredits; /* low water mark */ - int lp_mintxcredits; + int lpni_mintxcredits; /* # router credits */ - int lp_rtrcredits; + int lpni_rtrcredits; /* low water mark */ - int lp_minrtrcredits; + int lpni_minrtrcredits; /* alive/dead? */ - unsigned int lp_alive:1; + unsigned int lpni_alive:1; /* notification outstanding? */ - unsigned int lp_notify:1; + unsigned int lpni_notify:1; /* outstanding notification for LND? */ - unsigned int lp_notifylnd:1; + unsigned int lpni_notifylnd:1; /* some thread is handling notification */ - unsigned int lp_notifying:1; + unsigned int lpni_notifying:1; /* SEND event outstanding from ping */ - unsigned int lp_ping_notsent; + unsigned int lpni_ping_notsent; /* # times router went dead<->alive */ - int lp_alive_count; + int lpni_alive_count; /* bytes queued for sending */ - long lp_txqnob; + long lpni_txqnob; /* time of last aliveness news */ - cfs_time_t lp_timestamp; + cfs_time_t lpni_timestamp; /* time of last ping attempt */ - cfs_time_t lp_ping_timestamp; + cfs_time_t lpni_ping_timestamp; /* != 0 if ping reply expected */ - cfs_time_t lp_ping_deadline; + cfs_time_t lpni_ping_deadline; /* when I was last alive */ - cfs_time_t lp_last_alive; - /* when lp_ni was queried last time */ - cfs_time_t lp_last_query; + cfs_time_t lpni_last_alive; + /* when lpni_ni was queried last time */ + cfs_time_t lpni_last_query; /* network peer is on */ - struct lnet_net *lp_net; - lnet_nid_t lp_nid; /* peer's NID */ - int lp_refcount; /* # refs */ - int lp_cpt; /* CPT this peer attached on */ + struct lnet_net *lpni_net; + /* peer's NID */ + lnet_nid_t lpni_nid; + /* # refs */ + atomic_t lpni_refcount; + /* CPT this peer attached on */ + int lpni_cpt; /* # refs from lnet_route_t::lr_gateway */ - int lp_rtr_refcount; + int lpni_rtr_refcount; /* returned RC ping features */ - unsigned int lp_ping_feats; - struct list_head lp_routes; /* routers on this peer */ - lnet_rc_data_t *lp_rcd; /* router checker state */ -} lnet_peer_t; + unsigned int lpni_ping_feats; + struct list_head lpni_routes; /* routers on this peer */ + lnet_rc_data_t *lpni_rcd; /* router checker state */ +}; + +struct lnet_peer { + /* chain on global peer list */ + struct list_head lp_on_lnet_peer_list; + + /* list of peer nets */ + struct list_head lp_peer_nets; + + /* primary NID of the peer */ + lnet_nid_t lp_primary_nid; +}; + +struct lnet_peer_net { + /* chain on peer block */ + struct list_head lpn_on_peer_list; + + /* list of peer_nis on this network */ + struct list_head lpn_peer_nis; + + /* pointer to the peer I'm part of */ + struct lnet_peer *lpn_peer; + + /* Net ID */ + __u32 lpn_net_id; +}; /* peer hash size */ #define LNET_PEER_HASH_BITS 9 @@ -489,12 +522,12 @@ struct lnet_peer_table { /* peer aliveness is enabled only on routers for peers in a network where the * lnet_ni_t::ni_peertimeout has been set to a positive value */ #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \ - (lp)->lp_net->net_tunables.lct_peer_timeout > 0) + (lp)->lpni_net->net_tunables.lct_peer_timeout > 0) typedef struct { struct list_head lr_list; /* chain on net */ struct list_head lr_gwlist; /* chain on gateway */ - lnet_peer_t *lr_gateway; /* router node */ + struct lnet_peer_ni *lr_gateway; /* router node */ __u32 lr_net; /* remote network number */ int lr_seq; /* sequence for round-robin */ unsigned int lr_downis; /* number of down NIs */ @@ -691,6 +724,8 @@ typedef struct struct lnet_msg_container **ln_msg_containers; lnet_counters_t **ln_counters; struct lnet_peer_table **ln_peer_tables; + /* list of configured or discovered peers */ + struct list_head ln_peers; /* failure simulation */ struct list_head ln_test_peers; struct list_head ln_drop_rules; diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 4910d3a..75116ad 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -589,6 +589,7 @@ lnet_prepare(lnet_pid_t requested_pid) the_lnet.ln_pid = requested_pid; INIT_LIST_HEAD(&the_lnet.ln_test_peers); + INIT_LIST_HEAD(&the_lnet.ln_peers); INIT_LIST_HEAD(&the_lnet.ln_nets); INIT_LIST_HEAD(&the_lnet.ln_routers); INIT_LIST_HEAD(&the_lnet.ln_drop_rules); @@ -745,7 +746,7 @@ lnet_get_net_locked(__u32 net_id) return NULL; } -static unsigned int +unsigned int lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number) { __u64 key = nid; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index e9a63eb..904c7c2 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -667,7 +667,7 @@ lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg) if (rc != 0) { CERROR("recv from %s / send to %s aborted: " "eager_recv failed %d\n", - libcfs_nid2str(msg->msg_rxpeer->lp_nid), + libcfs_nid2str(msg->msg_rxpeer->lpni_nid), libcfs_id2str(msg->msg_target), rc); LASSERT(rc < 0); /* required by my callers */ } @@ -677,26 +677,26 @@ lnet_ni_eager_recv(lnet_ni_t *ni, lnet_msg_t *msg) /* NB: caller shall hold a ref on 'lp' as I'd drop lnet_net_lock */ static void -lnet_ni_query_locked(lnet_ni_t *ni, lnet_peer_t *lp) +lnet_ni_query_locked(lnet_ni_t *ni, struct lnet_peer_ni *lp) { cfs_time_t last_alive = 0; LASSERT(lnet_peer_aliveness_enabled(lp)); LASSERT(ni->ni_net->net_lnd->lnd_query != NULL); - lnet_net_unlock(lp->lp_cpt); - (ni->ni_net->net_lnd->lnd_query)(ni, lp->lp_nid, &last_alive); - lnet_net_lock(lp->lp_cpt); + lnet_net_unlock(lp->lpni_cpt); + (ni->ni_net->net_lnd->lnd_query)(ni, lp->lpni_nid, &last_alive); + lnet_net_lock(lp->lpni_cpt); - lp->lp_last_query = cfs_time_current(); + lp->lpni_last_query = cfs_time_current(); if (last_alive != 0) /* NI has updated timestamp */ - lp->lp_last_alive = last_alive; + lp->lpni_last_alive = last_alive; } /* NB: always called with lnet_net_lock held */ static inline int -lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) +lnet_peer_is_alive (struct lnet_peer_ni *lp, cfs_time_t now) { int alive; cfs_time_t deadline; @@ -707,24 +707,24 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) * Trust lnet_notify() if it has more recent aliveness news, but * ignore the initial assumed death (see lnet_peers_start_down()). */ - if (!lp->lp_alive && lp->lp_alive_count > 0 && - cfs_time_aftereq(lp->lp_timestamp, lp->lp_last_alive)) + if (!lp->lpni_alive && lp->lpni_alive_count > 0 && + cfs_time_aftereq(lp->lpni_timestamp, lp->lpni_last_alive)) return 0; deadline = - cfs_time_add(lp->lp_last_alive, - cfs_time_seconds(lp->lp_net->net_tunables. + cfs_time_add(lp->lpni_last_alive, + cfs_time_seconds(lp->lpni_net->net_tunables. lct_peer_timeout)); alive = cfs_time_after(deadline, now); /* * Update obsolete lp_alive except for routers assumed to be dead * initially, because router checker would update aliveness in this - * case, and moreover lp_last_alive at peer creation is assumed. + * case, and moreover lpni_last_alive at peer creation is assumed. */ - if (alive && !lp->lp_alive && - !(lnet_isrouter(lp) && lp->lp_alive_count == 0)) - lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); + if (alive && !lp->lpni_alive && + !(lnet_isrouter(lp) && lp->lpni_alive_count == 0)) + lnet_notify_locked(lp, 0, 1, lp->lpni_last_alive); return alive; } @@ -733,7 +733,7 @@ lnet_peer_is_alive (lnet_peer_t *lp, cfs_time_t now) /* NB: returns 1 when alive, 0 when dead, negative when error; * may drop the lnet_net_lock */ static int -lnet_peer_alive_locked (struct lnet_ni *ni, lnet_peer_t *lp) +lnet_peer_alive_locked (struct lnet_ni *ni, struct lnet_peer_ni *lp) { cfs_time_t now = cfs_time_current(); @@ -747,21 +747,21 @@ lnet_peer_alive_locked (struct lnet_ni *ni, lnet_peer_t *lp) * Peer appears dead, but we should avoid frequent NI queries (at * most once per lnet_queryinterval seconds). */ - if (lp->lp_last_query != 0) { + if (lp->lpni_last_query != 0) { static const int lnet_queryinterval = 1; cfs_time_t next_query = - cfs_time_add(lp->lp_last_query, + cfs_time_add(lp->lpni_last_query, cfs_time_seconds(lnet_queryinterval)); if (cfs_time_before(now, next_query)) { - if (lp->lp_alive) + if (lp->lpni_alive) CWARN("Unexpected aliveness of peer %s: " "%d < %d (%d/%d)\n", - libcfs_nid2str(lp->lp_nid), + libcfs_nid2str(lp->lpni_nid), (int)now, (int)next_query, lnet_queryinterval, - lp->lp_net->net_tunables.lct_peer_timeout); + lp->lpni_net->net_tunables.lct_peer_timeout); return 0; } } @@ -772,7 +772,7 @@ lnet_peer_alive_locked (struct lnet_ni *ni, lnet_peer_t *lp) if (lnet_peer_is_alive(lp, now)) return 1; - lnet_notify_locked(lp, 0, 0, lp->lp_last_alive); + lnet_notify_locked(lp, 0, 0, lp->lpni_last_alive); return 0; } @@ -790,8 +790,8 @@ lnet_peer_alive_locked (struct lnet_ni *ni, lnet_peer_t *lp) static int lnet_post_send_locked(lnet_msg_t *msg, int do_send) { - lnet_peer_t *lp = msg->msg_txpeer; - lnet_ni_t *ni = msg->msg_txni; + struct lnet_peer_ni *lp = msg->msg_txpeer; + struct lnet_ni *ni = msg->msg_txni; int cpt = msg->msg_tx_cpt; struct lnet_tx_queue *tq = ni->ni_tx_queues[cpt]; @@ -831,19 +831,19 @@ lnet_post_send_locked(lnet_msg_t *msg, int do_send) } if (!msg->msg_peertxcredit) { - LASSERT((lp->lp_txcredits < 0) == - !list_empty(&lp->lp_txq)); + LASSERT((lp->lpni_txcredits < 0) == + !list_empty(&lp->lpni_txq)); msg->msg_peertxcredit = 1; - lp->lp_txqnob += msg->msg_len + sizeof(lnet_hdr_t); - lp->lp_txcredits--; + lp->lpni_txqnob += msg->msg_len + sizeof(lnet_hdr_t); + lp->lpni_txcredits--; - if (lp->lp_txcredits < lp->lp_mintxcredits) - lp->lp_mintxcredits = lp->lp_txcredits; + if (lp->lpni_txcredits < lp->lpni_mintxcredits) + lp->lpni_mintxcredits = lp->lpni_txcredits; - if (lp->lp_txcredits < 0) { + if (lp->lpni_txcredits < 0) { msg->msg_tx_delayed = 1; - list_add_tail(&msg->msg_list, &lp->lp_txq); + list_add_tail(&msg->msg_list, &lp->lpni_txq); return LNET_CREDIT_WAIT; } } @@ -901,34 +901,34 @@ lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv) * sets do_recv FALSE and I don't do the unlock/send/lock bit. * I return LNET_CREDIT_WAIT if msg blocked and LNET_CREDIT_OK if * received or OK to receive */ - lnet_peer_t *lp = msg->msg_rxpeer; + struct lnet_peer_ni *lp = msg->msg_rxpeer; lnet_rtrbufpool_t *rbp; - lnet_rtrbuf_t *rb; + lnet_rtrbuf_t *rb; - LASSERT(msg->msg_iov == NULL); - LASSERT(msg->msg_kiov == NULL); - LASSERT(msg->msg_niov == 0); - LASSERT(msg->msg_routing); - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_sending); + LASSERT (msg->msg_iov == NULL); + LASSERT (msg->msg_kiov == NULL); + LASSERT (msg->msg_niov == 0); + LASSERT (msg->msg_routing); + LASSERT (msg->msg_receiving); + LASSERT (!msg->msg_sending); /* non-lnet_parse callers only receive delayed messages */ LASSERT(!do_recv || msg->msg_rx_delayed); if (!msg->msg_peerrtrcredit) { - LASSERT((lp->lp_rtrcredits < 0) == - !list_empty(&lp->lp_rtrq)); + LASSERT((lp->lpni_rtrcredits < 0) == + !list_empty(&lp->lpni_rtrq)); msg->msg_peerrtrcredit = 1; - lp->lp_rtrcredits--; - if (lp->lp_rtrcredits < lp->lp_minrtrcredits) - lp->lp_minrtrcredits = lp->lp_rtrcredits; + lp->lpni_rtrcredits--; + if (lp->lpni_rtrcredits < lp->lpni_minrtrcredits) + lp->lpni_minrtrcredits = lp->lpni_rtrcredits; - if (lp->lp_rtrcredits < 0) { + if (lp->lpni_rtrcredits < 0) { /* must have checked eager_recv before here */ LASSERT(msg->msg_rx_ready_delay); msg->msg_rx_delayed = 1; - list_add_tail(&msg->msg_list, &lp->lp_rtrq); + list_add_tail(&msg->msg_list, &lp->lpni_rtrq); return LNET_CREDIT_WAIT; } } @@ -971,9 +971,9 @@ lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv) void lnet_return_tx_credits_locked(lnet_msg_t *msg) { - lnet_peer_t *txpeer = msg->msg_txpeer; - lnet_msg_t *msg2; - struct lnet_ni *txni = msg->msg_txni; + struct lnet_peer_ni *txpeer = msg->msg_txpeer; + struct lnet_ni *txni = msg->msg_txni; + lnet_msg_t *msg2; if (msg->msg_txcredit) { struct lnet_ni *ni = msg->msg_txni; @@ -1002,16 +1002,16 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) /* give back peer txcredits */ msg->msg_peertxcredit = 0; - LASSERT((txpeer->lp_txcredits < 0) == - !list_empty(&txpeer->lp_txq)); + LASSERT((txpeer->lpni_txcredits < 0) == + !list_empty(&txpeer->lpni_txq)); - txpeer->lp_txqnob -= msg->msg_len + sizeof(lnet_hdr_t); - LASSERT(txpeer->lp_txqnob >= 0); + txpeer->lpni_txqnob -= msg->msg_len + sizeof(lnet_hdr_t); + LASSERT (txpeer->lpni_txqnob >= 0); - txpeer->lp_txcredits++; - if (txpeer->lp_txcredits <= 0) { - msg2 = list_entry(txpeer->lp_txq.next, - lnet_msg_t, msg_list); + txpeer->lpni_txcredits++; + if (txpeer->lpni_txcredits <= 0) { + msg2 = list_entry(txpeer->lpni_txq.next, + lnet_msg_t, msg_list); list_del(&msg2->msg_list); LASSERT(msg2->msg_txpeer == txpeer); @@ -1028,7 +1028,7 @@ lnet_return_tx_credits_locked(lnet_msg_t *msg) if (txpeer != NULL) { msg->msg_txpeer = NULL; - lnet_peer_decref_locked(txpeer); + lnet_peer_ni_decref_locked(txpeer); } } @@ -1072,9 +1072,9 @@ lnet_drop_routed_msgs_locked(struct list_head *list, int cpt) void lnet_return_rx_credits_locked(lnet_msg_t *msg) { - lnet_peer_t *rxpeer = msg->msg_rxpeer; - struct lnet_ni *rxni = msg->msg_rxni; - lnet_msg_t *msg2; + struct lnet_peer_ni *rxpeer = msg->msg_rxpeer; + struct lnet_ni *rxni = msg->msg_rxni; + lnet_msg_t *msg2; if (msg->msg_rtrcredit) { /* give back global router credits */ @@ -1125,18 +1125,18 @@ routing_off: /* give back peer router credits */ msg->msg_peerrtrcredit = 0; - LASSERT((rxpeer->lp_rtrcredits < 0) == - !list_empty(&rxpeer->lp_rtrq)); + LASSERT((rxpeer->lpni_rtrcredits < 0) == + !list_empty(&rxpeer->lpni_rtrq)); - rxpeer->lp_rtrcredits++; + rxpeer->lpni_rtrcredits++; /* drop all messages which are queued to be routed on that * peer. */ if (!the_lnet.ln_routing) { - lnet_drop_routed_msgs_locked(&rxpeer->lp_rtrq, + lnet_drop_routed_msgs_locked(&rxpeer->lpni_rtrq, msg->msg_rx_cpt); - } else if (rxpeer->lp_rtrcredits <= 0) { - msg2 = list_entry(rxpeer->lp_rtrq.next, + } else if (rxpeer->lpni_rtrcredits <= 0) { + msg2 = list_entry(rxpeer->lpni_rtrq.next, lnet_msg_t, msg_list); list_del(&msg2->msg_list); @@ -1149,15 +1149,15 @@ routing_off: } if (rxpeer != NULL) { msg->msg_rxpeer = NULL; - lnet_peer_decref_locked(rxpeer); + lnet_peer_ni_decref_locked(rxpeer); } } static int lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2) { - lnet_peer_t *p1 = r1->lr_gateway; - lnet_peer_t *p2 = r2->lr_gateway; + struct lnet_peer_ni *p1 = r1->lr_gateway; + struct lnet_peer_ni *p2 = r2->lr_gateway; int r1_hops = (r1->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r1->lr_hops; int r2_hops = (r2->lr_hops == LNET_UNDEFINED_HOPS) ? 1 : r2->lr_hops; @@ -1173,16 +1173,16 @@ lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2) if (r1_hops > r2_hops) return -ERANGE; - if (p1->lp_txqnob < p2->lp_txqnob) + if (p1->lpni_txqnob < p2->lpni_txqnob) return 1; - if (p1->lp_txqnob > p2->lp_txqnob) + if (p1->lpni_txqnob > p2->lpni_txqnob) return -ERANGE; - if (p1->lp_txcredits > p2->lp_txcredits) + if (p1->lpni_txcredits > p2->lpni_txcredits) return 1; - if (p1->lp_txcredits < p2->lp_txcredits) + if (p1->lpni_txcredits < p2->lpni_txcredits) return -ERANGE; if (r1->lr_seq - r2->lr_seq <= 0) @@ -1191,7 +1191,7 @@ lnet_compare_routes(lnet_route_t *r1, lnet_route_t *r2) return -ERANGE; } -static lnet_peer_t * +static struct lnet_peer_ni * lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target, lnet_nid_t rtr_nid) { @@ -1199,8 +1199,8 @@ lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target, lnet_route_t *route; lnet_route_t *best_route; lnet_route_t *last_route; - struct lnet_peer *lp_best; - struct lnet_peer *lp; + struct lnet_peer_ni *lpni_best; + struct lnet_peer_ni *lp; int rc; /* If @rtr_nid is not LNET_NID_ANY, return the gateway with @@ -1210,7 +1210,7 @@ lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target, if (rnet == NULL) return NULL; - lp_best = NULL; + lpni_best = NULL; best_route = last_route = NULL; list_for_each_entry(route, &rnet->lrn_routes, lr_list) { lp = route->lr_gateway; @@ -1218,15 +1218,15 @@ lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target, if (!lnet_is_route_alive(route)) continue; - if (net != NULL && lp->lp_net != net) + if (net != NULL && lp->lpni_net != net) continue; - if (lp->lp_nid == rtr_nid) /* it's pre-determined router */ + if (lp->lpni_nid == rtr_nid) /* it's pre-determined router */ return lp; - if (lp_best == NULL) { + if (lpni_best == NULL) { best_route = last_route = route; - lp_best = lp; + lpni_best = lp; continue; } @@ -1239,7 +1239,7 @@ lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target, continue; best_route = route; - lp_best = lp; + lpni_best = lp; } /* set sequence number on the best router to the latest sequence + 1 @@ -1247,7 +1247,7 @@ lnet_find_route_locked(struct lnet_net *net, lnet_nid_t target, * harmless and functional */ if (best_route != NULL) best_route->lr_seq = last_route->lr_seq + 1; - return lp_best; + return lpni_best; } int @@ -1256,7 +1256,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) lnet_nid_t dst_nid = msg->msg_target.nid; struct lnet_ni *src_ni; struct lnet_ni *local_ni; - struct lnet_peer *lp; + struct lnet_peer_ni *lp; int cpt; int cpt2; int rc; @@ -1323,7 +1323,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) return 0; } - rc = lnet_nid2peer_locked(&lp, dst_nid, cpt); + rc = lnet_nid2peerni_locked(&lp, dst_nid, cpt); if (rc != 0) { lnet_net_unlock(cpt); LCONSOLE_WARN("Error %d finding peer %s\n", rc, @@ -1331,7 +1331,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) /* ENOMEM or shutting down */ return rc; } - LASSERT (lp->lp_net == src_ni->ni_net); + LASSERT (lp->lpni_net == src_ni->ni_net); } else { /* sending to a remote network */ lp = lnet_find_route_locked(src_ni != NULL ? @@ -1351,30 +1351,30 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) * it's possible that rtr_nid isn't LNET_NID_ANY and lp isn't * pre-determined router, this can happen if router table * was changed when we release the lock */ - if (rtr_nid != lp->lp_nid) { - cpt2 = lp->lp_cpt; + if (rtr_nid != lp->lpni_nid) { + cpt2 = lp->lpni_cpt; if (cpt2 != cpt) { lnet_net_unlock(cpt); - rtr_nid = lp->lp_nid; + rtr_nid = lp->lpni_nid; cpt = cpt2; goto again; } } CDEBUG(D_NET, "Best route to %s via %s for %s %d\n", - libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lp_nid), + libcfs_nid2str(dst_nid), libcfs_nid2str(lp->lpni_nid), lnet_msgtyp2str(msg->msg_type), msg->msg_len); if (src_ni == NULL) { - src_ni = lnet_get_next_ni_locked(lp->lp_net, NULL); + src_ni = lnet_get_next_ni_locked(lp->lpni_net, NULL); LASSERT(src_ni != NULL); src_nid = src_ni->ni_nid; } else { - LASSERT (src_ni->ni_net == lp->lp_net); + LASSERT (src_ni->ni_net == lp->lpni_net); } - lnet_peer_addref_locked(lp); + lnet_peer_ni_addref_locked(lp); LASSERT(src_nid != LNET_NID_ANY); lnet_msg_commit(msg, cpt); @@ -1385,7 +1385,7 @@ lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg, lnet_nid_t rtr_nid) } msg->msg_target_is_router = 1; - msg->msg_target.nid = lp->lp_nid; + msg->msg_target.nid = lp->lpni_nid; msg->msg_target.pid = LNET_PID_LUSTRE; } @@ -1462,7 +1462,7 @@ lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) info.mi_rlength = hdr->payload_length; info.mi_roffset = hdr->msg.put.offset; info.mi_mbits = hdr->msg.put.match_bits; - info.mi_cpt = msg->msg_rxpeer->lp_cpt; + info.mi_cpt = msg->msg_rxpeer->lpni_cpt; msg->msg_rx_ready_delay = ni->ni_net->net_lnd->lnd_eager_recv == NULL; ready_delay = msg->msg_rx_ready_delay; @@ -1695,7 +1695,7 @@ lnet_parse_forward_locked(lnet_ni_t *ni, lnet_msg_t *msg) if (!the_lnet.ln_routing) return -ECANCELED; - if (msg->msg_rxpeer->lp_rtrcredits <= 0 || + if (msg->msg_rxpeer->lpni_rtrcredits <= 0 || lnet_msg2bufpool(msg)->rbp_credits <= 0) { if (ni->ni_net->net_lnd->lnd_eager_recv == NULL) { msg->msg_rx_ready_delay = 1; @@ -1991,7 +1991,7 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, } lnet_net_lock(cpt); - rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid, cpt); + rc = lnet_nid2peerni_locked(&msg->msg_rxpeer, from_nid, cpt); if (rc != 0) { lnet_net_unlock(cpt); CERROR("%s, src %s: Dropping %s " @@ -2091,7 +2091,7 @@ lnet_drop_delayed_msg_list(struct list_head *head, char *reason) * until that's done */ lnet_drop_message(msg->msg_rxni, - msg->msg_rxpeer->lp_cpt, + msg->msg_rxpeer->lpni_cpt, msg->msg_private, msg->msg_len); /* * NB: message will not generate event because w/o attached MD, @@ -2553,7 +2553,7 @@ LNetDist(lnet_nid_t dstnid, lnet_nid_t *srcnidp, __u32 *orderp) hops = shortest_hops; if (srcnidp != NULL) { ni = lnet_get_next_ni_locked( - shortest->lr_gateway->lp_net, + shortest->lr_gateway->lpni_net, NULL); *srcnidp = ni->ni_nid; } diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c index 236f63b..6a6f56b 100644 --- a/lnet/lnet/peer.c +++ b/lnet/lnet/peer.c @@ -53,8 +53,6 @@ lnet_peer_tables_create(void) } cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - INIT_LIST_HEAD(&ptable->pt_deathrow); - LIBCFS_CPT_ALLOC(hash, lnet_cpt_table(), i, LNET_PEER_HASH_SIZE * sizeof(*hash)); if (hash == NULL) { @@ -87,8 +85,6 @@ lnet_peer_tables_destroy(void) if (hash == NULL) /* not intialized */ break; - LASSERT(list_empty(&ptable->pt_deathrow)); - ptable->pt_hash = NULL; for (j = 0; j < LNET_PEER_HASH_SIZE; j++) LASSERT(list_empty(&hash[j])); @@ -103,25 +99,25 @@ lnet_peer_tables_destroy(void) static void lnet_peer_table_cleanup_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable) { - int i; - lnet_peer_t *lp; - lnet_peer_t *tmp; + int i; + struct lnet_peer_ni *lp; + struct lnet_peer_ni *tmp; for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], - lp_hashlist) { - if (ni != NULL && ni->ni_net != lp->lp_net) + lpni_hashlist) { + if (ni != NULL && ni->ni_net != lp->lpni_net) continue; - list_del_init(&lp->lp_hashlist); + list_del_init(&lp->lpni_hashlist); /* Lose hash table's ref */ ptable->pt_zombies++; - lnet_peer_decref_locked(lp); + lnet_peer_ni_decref_locked(lp); } } } static void -lnet_peer_table_deathrow_wait_locked(struct lnet_peer_table *ptable, +lnet_peer_table_finalize_wait_locked(struct lnet_peer_table *ptable, int cpt_locked) { int i; @@ -144,24 +140,24 @@ static void lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable, int cpt_locked) { - lnet_peer_t *lp; - lnet_peer_t *tmp; - lnet_nid_t lp_nid; - int i; + struct lnet_peer_ni *lp; + struct lnet_peer_ni *tmp; + lnet_nid_t lpni_nid; + int i; for (i = 0; i < LNET_PEER_HASH_SIZE; i++) { list_for_each_entry_safe(lp, tmp, &ptable->pt_hash[i], - lp_hashlist) { - if (ni->ni_net != lp->lp_net) + lpni_hashlist) { + if (ni->ni_net != lp->lpni_net) continue; - if (lp->lp_rtr_refcount == 0) + if (lp->lpni_rtr_refcount == 0) continue; - lp_nid = lp->lp_nid; + lpni_nid = lp->lpni_nid; lnet_net_unlock(cpt_locked); - lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lp_nid); + lnet_del_route(LNET_NIDNET(LNET_NID_ANY), lpni_nid); lnet_net_lock(cpt_locked); } } @@ -170,113 +166,199 @@ lnet_peer_table_del_rtrs_locked(lnet_ni_t *ni, struct lnet_peer_table *ptable, void lnet_peer_tables_cleanup(lnet_ni_t *ni) { - int i; - struct lnet_peer_table *ptable; - lnet_peer_t *lp; - struct list_head deathrow; - - INIT_LIST_HEAD(&deathrow); + int i; + struct lnet_peer_table *ptable; LASSERT(the_lnet.ln_shutdown || ni != NULL); /* If just deleting the peers for a NI, get rid of any routes these * peers are gateways for. */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); + lnet_net_lock(LNET_LOCK_EX); lnet_peer_table_del_rtrs_locked(ni, ptable, i); - lnet_net_unlock(i); + lnet_net_unlock(LNET_LOCK_EX); } - /* Start the process of moving the applicable peers to - * deathrow. */ + /* Start the cleanup process */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); + lnet_net_lock(LNET_LOCK_EX); lnet_peer_table_cleanup_locked(ni, ptable); - lnet_net_unlock(i); + lnet_net_unlock(LNET_LOCK_EX); } - /* Cleanup all entries on deathrow. */ + /* Wait until all peers have been destroyed. */ cfs_percpt_for_each(ptable, i, the_lnet.ln_peer_tables) { - lnet_net_lock(i); - lnet_peer_table_deathrow_wait_locked(ptable, i); - list_splice_init(&ptable->pt_deathrow, &deathrow); - lnet_net_unlock(i); + lnet_net_lock(LNET_LOCK_EX); + lnet_peer_table_finalize_wait_locked(ptable, i); + lnet_net_unlock(LNET_LOCK_EX); } +} + +static struct lnet_peer_ni * +lnet_get_peer_ni_locked(struct lnet_peer_table *ptable, lnet_nid_t nid) +{ + struct list_head *peers; + struct lnet_peer_ni *lp; + + LASSERT(!the_lnet.ln_shutdown); - while (!list_empty(&deathrow)) { - lp = list_entry(deathrow.next, lnet_peer_t, lp_hashlist); - list_del(&lp->lp_hashlist); - LIBCFS_FREE(lp, sizeof(*lp)); + peers = &ptable->pt_hash[lnet_nid2peerhash(nid)]; + list_for_each_entry(lp, peers, lpni_hashlist) { + if (lp->lpni_nid == nid) { + lnet_peer_ni_addref_locked(lp); + return lp; + } } + + return NULL; } -void -lnet_destroy_peer_locked(lnet_peer_t *lp) +struct lnet_peer_ni * +lnet_find_peer_ni_locked(lnet_nid_t nid, int cpt) { + struct lnet_peer_ni *lpni; struct lnet_peer_table *ptable; - LASSERT(lp->lp_refcount == 0); - LASSERT(lp->lp_rtr_refcount == 0); - LASSERT(list_empty(&lp->lp_txq)); - LASSERT(list_empty(&lp->lp_hashlist)); - LASSERT(lp->lp_txqnob == 0); + ptable = the_lnet.ln_peer_tables[cpt]; + lpni = lnet_get_peer_ni_locked(ptable, nid); - ptable = the_lnet.ln_peer_tables[lp->lp_cpt]; - LASSERT(ptable->pt_number > 0); - ptable->pt_number--; + return lpni; +} - lp->lp_net = NULL; +static void +lnet_try_destroy_peer_hierarchy_locked(struct lnet_peer_ni *lpni) +{ + struct lnet_peer_net *peer_net; + struct lnet_peer *peer; - list_add(&lp->lp_hashlist, &ptable->pt_deathrow); - LASSERT(ptable->pt_zombies > 0); - ptable->pt_zombies--; + /* TODO: could the below situation happen? accessing an already + * destroyed peer? */ + if (lpni->lpni_peer_net == NULL || + lpni->lpni_peer_net->lpn_peer == NULL) + return; + + peer_net = lpni->lpni_peer_net; + peer = lpni->lpni_peer_net->lpn_peer; + + list_del_init(&lpni->lpni_on_peer_net_list); + lpni->lpni_peer_net = NULL; + + /* if peer_net is empty, then remove it from the peer */ + if (list_empty(&peer_net->lpn_peer_nis)) { + list_del_init(&peer_net->lpn_on_peer_list); + peer_net->lpn_peer = NULL; + LIBCFS_FREE(peer_net, sizeof(*peer_net)); + + /* if the peer is empty then remove it from the + * the_lnet.ln_peers */ + if (list_empty(&peer->lp_peer_nets)) { + list_del_init(&peer->lp_on_lnet_peer_list); + LIBCFS_FREE(peer, sizeof(*peer)); + } + } } -lnet_peer_t * -lnet_find_peer_locked(struct lnet_peer_table *ptable, lnet_nid_t nid) +static int +lnet_build_peer_hierarchy(struct lnet_peer_ni *lpni) { - struct list_head *peers; - lnet_peer_t *lp; + struct lnet_peer *peer; + struct lnet_peer_net *peer_net; + __u32 lpni_net = LNET_NIDNET(lpni->lpni_nid); - LASSERT(!the_lnet.ln_shutdown); + peer = NULL; + peer_net = NULL; - peers = &ptable->pt_hash[lnet_nid2peerhash(nid)]; - list_for_each_entry(lp, peers, lp_hashlist) { - if (lp->lp_nid == nid) { - lnet_peer_addref_locked(lp); - return lp; - } + LIBCFS_ALLOC(peer, sizeof(*peer)); + if (peer == NULL) + return -ENOMEM; + + LIBCFS_ALLOC(peer_net, sizeof(*peer_net)); + if (peer_net == NULL) { + LIBCFS_FREE(peer, sizeof(*peer)); + return -ENOMEM; } - return NULL; + INIT_LIST_HEAD(&peer->lp_on_lnet_peer_list); + INIT_LIST_HEAD(&peer->lp_peer_nets); + INIT_LIST_HEAD(&peer_net->lpn_on_peer_list); + INIT_LIST_HEAD(&peer_net->lpn_peer_nis); + + /* build the hierarchy */ + peer_net->lpn_net_id = lpni_net; + peer_net->lpn_peer = peer; + lpni->lpni_peer_net = peer_net; + peer->lp_primary_nid = lpni->lpni_nid; + list_add_tail(&peer_net->lpn_on_peer_list, &peer->lp_peer_nets); + list_add_tail(&lpni->lpni_on_peer_net_list, &peer_net->lpn_peer_nis); + list_add_tail(&peer->lp_on_lnet_peer_list, &the_lnet.ln_peers); + + return 0; +} + +void +lnet_destroy_peer_ni_locked(struct lnet_peer_ni *lpni) +{ + struct lnet_peer_table *ptable; + + LASSERT(atomic_read(&lpni->lpni_refcount) == 0); + LASSERT(lpni->lpni_rtr_refcount == 0); + LASSERT(list_empty(&lpni->lpni_txq)); + LASSERT(list_empty(&lpni->lpni_hashlist)); + LASSERT(lpni->lpni_txqnob == 0); + LASSERT(lpni->lpni_peer_net != NULL); + LASSERT(lpni->lpni_peer_net->lpn_peer != NULL); + + ptable = the_lnet.ln_peer_tables[lpni->lpni_cpt]; + LASSERT(ptable->pt_number > 0); + ptable->pt_number--; + + lpni->lpni_net = NULL; + + lnet_try_destroy_peer_hierarchy_locked(lpni); + + LIBCFS_FREE(lpni, sizeof(*lpni)); + + LASSERT(ptable->pt_zombies > 0); + ptable->pt_zombies--; } int -lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) +lnet_nid2peerni_locked(struct lnet_peer_ni **lpnip, lnet_nid_t nid, int cpt) { struct lnet_peer_table *ptable; - lnet_peer_t *lp = NULL; - lnet_peer_t *lp2; + struct lnet_peer_ni *lpni = NULL; + struct lnet_peer_ni *lpni2; int cpt2; int rc = 0; - *lpp = NULL; + *lpnip = NULL; if (the_lnet.ln_shutdown) /* it's shutting down */ return -ESHUTDOWN; - /* cpt can be LNET_LOCK_EX if it's called from router functions */ - cpt2 = cpt != LNET_LOCK_EX ? cpt : lnet_cpt_of_nid_locked(nid, NULL); + /* + * calculate cpt2 with the standard hash function + * This cpt2 becomes the slot where we'll find or create the peer. + */ + cpt2 = lnet_nid_cpt_hash(nid, LNET_CPT_NUMBER); - ptable = the_lnet.ln_peer_tables[cpt2]; - lp = lnet_find_peer_locked(ptable, nid); - if (lp != NULL) { - *lpp = lp; - return 0; + /* + * Any changes to the peer tables happen under exclusive write + * lock. Any reads to the peer tables can be done via a standard + * CPT read lock. + */ + if (cpt != LNET_LOCK_EX) { + lnet_net_unlock(cpt); + lnet_net_lock(LNET_LOCK_EX); } - if (!list_empty(&ptable->pt_deathrow)) { - lp = list_entry(ptable->pt_deathrow.next, - lnet_peer_t, lp_hashlist); - list_del(&lp->lp_hashlist); + ptable = the_lnet.ln_peer_tables[cpt2]; + lpni = lnet_get_peer_ni_locked(ptable, nid); + if (lpni != NULL) { + *lpnip = lpni; + if (cpt != LNET_LOCK_EX) { + lnet_net_unlock(LNET_LOCK_EX); + lnet_net_lock(cpt); + } + return 0; } /* @@ -284,82 +366,88 @@ lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid, int cpt) * and destroyed locks and peer-table before I finish the allocation */ ptable->pt_number++; - lnet_net_unlock(cpt); + lnet_net_unlock(LNET_LOCK_EX); - if (lp != NULL) - memset(lp, 0, sizeof(*lp)); - else - LIBCFS_CPT_ALLOC(lp, lnet_cpt_table(), cpt2, sizeof(*lp)); + LIBCFS_CPT_ALLOC(lpni, lnet_cpt_table(), cpt2, sizeof(*lpni)); - if (lp == NULL) { + if (lpni == NULL) { rc = -ENOMEM; lnet_net_lock(cpt); goto out; } - INIT_LIST_HEAD(&lp->lp_txq); - INIT_LIST_HEAD(&lp->lp_rtrq); - INIT_LIST_HEAD(&lp->lp_routes); - - lp->lp_notify = 0; - lp->lp_notifylnd = 0; - lp->lp_notifying = 0; - lp->lp_alive_count = 0; - lp->lp_timestamp = 0; - lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */ - lp->lp_last_alive = cfs_time_current(); /* assumes alive */ - lp->lp_last_query = 0; /* haven't asked NI yet */ - lp->lp_ping_timestamp = 0; - lp->lp_ping_feats = LNET_PING_FEAT_INVAL; - lp->lp_nid = nid; - lp->lp_cpt = cpt2; - lp->lp_refcount = 2; /* 1 for caller; 1 for hash */ - lp->lp_rtr_refcount = 0; + INIT_LIST_HEAD(&lpni->lpni_txq); + INIT_LIST_HEAD(&lpni->lpni_rtrq); + INIT_LIST_HEAD(&lpni->lpni_routes); - lnet_net_lock(cpt); + lpni->lpni_alive = !lnet_peers_start_down(); /* 1 bit!! */ + lpni->lpni_last_alive = cfs_time_current(); /* assumes alive */ + lpni->lpni_ping_feats = LNET_PING_FEAT_INVAL; + lpni->lpni_nid = nid; + lpni->lpni_cpt = cpt2; + atomic_set(&lpni->lpni_refcount, 2); /* 1 for caller; 1 for hash */ + + rc = lnet_build_peer_hierarchy(lpni); + if (rc != 0) + goto out; + + lnet_net_lock(LNET_LOCK_EX); if (the_lnet.ln_shutdown) { rc = -ESHUTDOWN; goto out; } - lp2 = lnet_find_peer_locked(ptable, nid); - if (lp2 != NULL) { - *lpp = lp2; + lpni2 = lnet_get_peer_ni_locked(ptable, nid); + if (lpni2 != NULL) { + *lpnip = lpni2; goto out; } - lp->lp_net = lnet_get_net_locked(LNET_NIDNET(lp->lp_nid)); - lp->lp_txcredits = - lp->lp_mintxcredits = lp->lp_net->net_tunables.lct_peer_tx_credits; - lp->lp_rtrcredits = - lp->lp_minrtrcredits = lnet_peer_buffer_credits(lp->lp_net); + lpni->lpni_net = lnet_get_net_locked(LNET_NIDNET(lpni->lpni_nid)); + lpni->lpni_txcredits = + lpni->lpni_mintxcredits = + lpni->lpni_net->net_tunables.lct_peer_tx_credits; + lpni->lpni_rtrcredits = + lpni->lpni_minrtrcredits = + lnet_peer_buffer_credits(lpni->lpni_net); - list_add_tail(&lp->lp_hashlist, - &ptable->pt_hash[lnet_nid2peerhash(nid)]); + list_add_tail(&lpni->lpni_hashlist, + &ptable->pt_hash[lnet_nid2peerhash(nid)]); ptable->pt_version++; - *lpp = lp; + *lpnip = lpni; + + if (cpt != LNET_LOCK_EX) { + lnet_net_unlock(LNET_LOCK_EX); + lnet_net_lock(cpt); + } return 0; out: - if (lp != NULL) - list_add(&lp->lp_hashlist, &ptable->pt_deathrow); + if (lpni != NULL) { + lnet_try_destroy_peer_hierarchy_locked(lpni); + LIBCFS_FREE(lpni, sizeof(*lpni)); + } ptable->pt_number--; + if (cpt != LNET_LOCK_EX) { + lnet_net_unlock(LNET_LOCK_EX); + lnet_net_lock(cpt); + } return rc; } void lnet_debug_peer(lnet_nid_t nid) { - char *aliveness = "NA"; - lnet_peer_t *lp; - int rc; - int cpt; + char *aliveness = "NA"; + struct lnet_peer_ni *lp; + int rc; + int cpt; cpt = lnet_cpt_of_nid(nid, NULL); lnet_net_lock(cpt); - rc = lnet_nid2peer_locked(&lp, nid, cpt); + rc = lnet_nid2peerni_locked(&lp, nid, cpt); if (rc != 0) { lnet_net_unlock(cpt); CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid)); @@ -367,15 +455,15 @@ lnet_debug_peer(lnet_nid_t nid) } if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp)) - aliveness = lp->lp_alive ? "up" : "down"; + aliveness = lp->lpni_alive ? "up" : "down"; CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", - libcfs_nid2str(lp->lp_nid), lp->lp_refcount, - aliveness, lp->lp_net->net_tunables.lct_peer_tx_credits, - lp->lp_rtrcredits, lp->lp_minrtrcredits, - lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob); + libcfs_nid2str(lp->lpni_nid), atomic_read(&lp->lpni_refcount), + aliveness, lp->lpni_net->net_tunables.lct_peer_tx_credits, + lp->lpni_rtrcredits, lp->lpni_minrtrcredits, + lp->lpni_txcredits, lp->lpni_mintxcredits, lp->lpni_txqnob); - lnet_peer_decref_locked(lp); + lnet_peer_ni_decref_locked(lp); lnet_net_unlock(cpt); } @@ -387,11 +475,11 @@ int lnet_get_peer_info(__u32 peer_index, __u64 *nid, __u32 *peer_rtr_credits, __u32 *peer_min_rtr_credits, __u32 *peer_tx_qnob) { - struct lnet_peer_table *peer_table; - lnet_peer_t *lp; - int j; - int lncpt; - bool found = false; + struct lnet_peer_table *peer_table; + struct lnet_peer_ni *lp; + int j; + int lncpt; + bool found = false; /* get the number of CPTs */ lncpt = cfs_percpt_number(the_lnet.ln_peer_tables); @@ -413,7 +501,7 @@ int lnet_get_peer_info(__u32 peer_index, __u64 *nid, for (j = 0; j < LNET_PEER_HASH_SIZE && !found; j++) { struct list_head *peers = &peer_table->pt_hash[j]; - list_for_each_entry(lp, peers, lp_hashlist) { + list_for_each_entry(lp, peers, lpni_hashlist) { if (peer_index-- > 0) continue; @@ -421,16 +509,16 @@ int lnet_get_peer_info(__u32 peer_index, __u64 *nid, if (lnet_isrouter(lp) || lnet_peer_aliveness_enabled(lp)) snprintf(aliveness, LNET_MAX_STR_LEN, - lp->lp_alive ? "up" : "down"); + lp->lpni_alive ? "up" : "down"); - *nid = lp->lp_nid; - *refcount = lp->lp_refcount; + *nid = lp->lpni_nid; + *refcount = atomic_read(&lp->lpni_refcount); *ni_peer_tx_credits = - lp->lp_net->net_tunables.lct_peer_tx_credits; - *peer_tx_credits = lp->lp_txcredits; - *peer_rtr_credits = lp->lp_rtrcredits; - *peer_min_rtr_credits = lp->lp_mintxcredits; - *peer_tx_qnob = lp->lp_txqnob; + lp->lpni_net->net_tunables.lct_peer_tx_credits; + *peer_tx_credits = lp->lpni_txcredits; + *peer_rtr_credits = lp->lpni_rtrcredits; + *peer_min_rtr_credits = lp->lpni_mintxcredits; + *peer_tx_qnob = lp->lpni_txqnob; found = true; } diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index b4c5842..252a548 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -98,120 +98,122 @@ lnet_peers_start_down(void) } void -lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, cfs_time_t when) +lnet_notify_locked(struct lnet_peer_ni *lp, int notifylnd, int alive, + cfs_time_t when) { - if (cfs_time_before(when, lp->lp_timestamp)) { /* out of date information */ + if (cfs_time_before(when, lp->lpni_timestamp)) { /* out of date information */ CDEBUG(D_NET, "Out of date\n"); return; } - lp->lp_timestamp = when; /* update timestamp */ - lp->lp_ping_deadline = 0; /* disable ping timeout */ + lp->lpni_timestamp = when; /* update timestamp */ + lp->lpni_ping_deadline = 0; /* disable ping timeout */ - if (lp->lp_alive_count != 0 && /* got old news */ - (!lp->lp_alive) == (!alive)) { /* new date for old news */ + if (lp->lpni_alive_count != 0 && /* got old news */ + (!lp->lpni_alive) == (!alive)) { /* new date for old news */ CDEBUG(D_NET, "Old news\n"); return; } /* Flag that notification is outstanding */ - lp->lp_alive_count++; - lp->lp_alive = !(!alive); /* 1 bit! */ - lp->lp_notify = 1; - lp->lp_notifylnd |= notifylnd; - if (lp->lp_alive) - lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */ + lp->lpni_alive_count++; + lp->lpni_alive = !(!alive); /* 1 bit! */ + lp->lpni_notify = 1; + lp->lpni_notifylnd |= notifylnd; + if (lp->lpni_alive) + lp->lpni_ping_feats = LNET_PING_FEAT_INVAL; /* reset */ - CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive); + CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lpni_nid), alive); } static void -lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp) +lnet_ni_notify_locked(lnet_ni_t *ni, struct lnet_peer_ni *lp) { - int alive; - int notifylnd; + int alive; + int notifylnd; /* Notify only in 1 thread at any time to ensure ordered notification. * NB individual events can be missed; the only guarantee is that you * always get the most recent news */ - if (lp->lp_notifying || ni == NULL) + if (lp->lpni_notifying || ni == NULL) return; - lp->lp_notifying = 1; + lp->lpni_notifying = 1; - while (lp->lp_notify) { - alive = lp->lp_alive; - notifylnd = lp->lp_notifylnd; + while (lp->lpni_notify) { + alive = lp->lpni_alive; + notifylnd = lp->lpni_notifylnd; - lp->lp_notifylnd = 0; - lp->lp_notify = 0; + lp->lpni_notifylnd = 0; + lp->lpni_notify = 0; if (notifylnd && ni->ni_net->net_lnd->lnd_notify != NULL) { - lnet_net_unlock(lp->lp_cpt); + lnet_net_unlock(lp->lpni_cpt); /* A new notification could happen now; I'll handle it * when control returns to me */ - (ni->ni_net->net_lnd->lnd_notify)(ni, lp->lp_nid, + (ni->ni_net->net_lnd->lnd_notify)(ni, lp->lpni_nid, alive); - lnet_net_lock(lp->lp_cpt); + lnet_net_lock(lp->lpni_cpt); } } - lp->lp_notifying = 0; + lp->lpni_notifying = 0; } static void -lnet_rtr_addref_locked(lnet_peer_t *lp) +lnet_rtr_addref_locked(struct lnet_peer_ni *lp) { - LASSERT(lp->lp_refcount > 0); - LASSERT(lp->lp_rtr_refcount >= 0); + LASSERT(atomic_read(&lp->lpni_refcount) > 0); + LASSERT(lp->lpni_rtr_refcount >= 0); /* lnet_net_lock must be exclusively locked */ - lp->lp_rtr_refcount++; - if (lp->lp_rtr_refcount == 1) { + lp->lpni_rtr_refcount++; + if (lp->lpni_rtr_refcount == 1) { struct list_head *pos; /* a simple insertion sort */ list_for_each_prev(pos, &the_lnet.ln_routers) { - lnet_peer_t *rtr = list_entry(pos, lnet_peer_t, - lp_rtr_list); + struct lnet_peer_ni *rtr = + list_entry(pos, struct lnet_peer_ni, + lpni_rtr_list); - if (rtr->lp_nid < lp->lp_nid) + if (rtr->lpni_nid < lp->lpni_nid) break; } - list_add(&lp->lp_rtr_list, pos); + list_add(&lp->lpni_rtr_list, pos); /* addref for the_lnet.ln_routers */ - lnet_peer_addref_locked(lp); + lnet_peer_ni_addref_locked(lp); the_lnet.ln_routers_version++; } } static void -lnet_rtr_decref_locked(lnet_peer_t *lp) +lnet_rtr_decref_locked(struct lnet_peer_ni *lp) { - LASSERT(lp->lp_refcount > 0); - LASSERT(lp->lp_rtr_refcount > 0); + LASSERT(atomic_read(&lp->lpni_refcount) > 0); + LASSERT(lp->lpni_rtr_refcount > 0); /* lnet_net_lock must be exclusively locked */ - lp->lp_rtr_refcount--; - if (lp->lp_rtr_refcount == 0) { - LASSERT(list_empty(&lp->lp_routes)); + lp->lpni_rtr_refcount--; + if (lp->lpni_rtr_refcount == 0) { + LASSERT(list_empty(&lp->lpni_routes)); - if (lp->lp_rcd != NULL) { - list_add(&lp->lp_rcd->rcd_list, + if (lp->lpni_rcd != NULL) { + list_add(&lp->lpni_rcd->rcd_list, &the_lnet.ln_rcd_deathrow); - lp->lp_rcd = NULL; + lp->lpni_rcd = NULL; } - list_del(&lp->lp_rtr_list); + list_del(&lp->lpni_rtr_list); /* decref for the_lnet.ln_routers */ - lnet_peer_decref_locked(lp); + lnet_peer_ni_decref_locked(lp); the_lnet.ln_routers_version++; } } @@ -285,7 +287,7 @@ lnet_add_route_to_rnet(lnet_remotenet_t *rnet, lnet_route_t *route) offset--; } list_add(&route->lr_list, e); - list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes); + list_add(&route->lr_gwlist, &route->lr_gateway->lpni_routes); the_lnet.ln_remote_nets_version++; lnet_rtr_addref_locked(route->lr_gateway); @@ -338,7 +340,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, lnet_net_lock(LNET_LOCK_EX); - rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX); + rc = lnet_nid2peerni_locked(&route->lr_gateway, gateway, LNET_LOCK_EX); if (rc != 0) { lnet_net_unlock(LNET_LOCK_EX); @@ -373,14 +375,14 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, } /* our lookups must be true */ - LASSERT(route2->lr_gateway->lp_nid != gateway); + LASSERT(route2->lr_gateway->lpni_nid != gateway); } if (add_route) { - lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */ + lnet_peer_ni_addref_locked(route->lr_gateway); /* +1 for notify */ lnet_add_route_to_rnet(rnet2, route); - ni = lnet_get_next_ni_locked(route->lr_gateway->lp_net, NULL); + ni = lnet_get_next_ni_locked(route->lr_gateway->lpni_net, NULL); lnet_net_unlock(LNET_LOCK_EX); /* XXX Assume alive */ @@ -391,7 +393,7 @@ lnet_add_route(__u32 net, __u32 hops, lnet_nid_t gateway, } /* -1 for notify or !add_route */ - lnet_peer_decref_locked(route->lr_gateway); + lnet_peer_ni_decref_locked(route->lr_gateway); lnet_net_unlock(LNET_LOCK_EX); rc = 0; @@ -443,12 +445,12 @@ lnet_check_routes(void) continue; } - if (route->lr_gateway->lp_net == - route2->lr_gateway->lp_net) + if (route->lr_gateway->lpni_net == + route2->lr_gateway->lpni_net) continue; - nid1 = route->lr_gateway->lp_nid; - nid2 = route2->lr_gateway->lp_nid; + nid1 = route->lr_gateway->lpni_nid; + nid2 = route2->lr_gateway->lpni_nid; net = rnet->lrn_net; lnet_net_unlock(cpt); @@ -470,7 +472,7 @@ lnet_check_routes(void) int lnet_del_route(__u32 net, lnet_nid_t gw_nid) { - struct lnet_peer *gateway; + struct lnet_peer_ni *gateway; lnet_remotenet_t *rnet; lnet_route_t *route; struct list_head *e1; @@ -504,7 +506,7 @@ again: gateway = route->lr_gateway; if (!(gw_nid == LNET_NID_ANY || - gw_nid == gateway->lp_nid)) + gw_nid == gateway->lpni_nid)) continue; list_del(&route->lr_list); @@ -517,7 +519,7 @@ again: rnet = NULL; lnet_rtr_decref_locked(gateway); - lnet_peer_decref_locked(gateway); + lnet_peer_ni_decref_locked(gateway); lnet_net_unlock(LNET_LOCK_EX); @@ -607,7 +609,7 @@ lnet_get_route(int idx, __u32 *net, __u32 *hops, *net = rnet->lrn_net; *hops = route->lr_hops; *priority = route->lr_priority; - *gateway = route->lr_gateway->lp_nid; + *gateway = route->lr_gateway->lpni_nid; *alive = lnet_is_route_alive(route); lnet_net_unlock(cpt); return 0; @@ -646,10 +648,10 @@ static void lnet_parse_rc_info(lnet_rc_data_t *rcd) { struct lnet_ping_info *info = rcd->rcd_pinginfo; - struct lnet_peer *gw = rcd->rcd_gateway; + struct lnet_peer_ni *gw = rcd->rcd_gateway; lnet_route_t *rte; - if (!gw->lp_alive) + if (!gw->lpni_alive) return; if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) @@ -658,27 +660,27 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) /* NB always racing with network! */ if (info->pi_magic != LNET_PROTO_PING_MAGIC) { CDEBUG(D_NET, "%s: Unexpected magic %08x\n", - libcfs_nid2str(gw->lp_nid), info->pi_magic); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; + libcfs_nid2str(gw->lpni_nid), info->pi_magic); + gw->lpni_ping_feats = LNET_PING_FEAT_INVAL; return; } - gw->lp_ping_feats = info->pi_features; - if ((gw->lp_ping_feats & LNET_PING_FEAT_MASK) == 0) { + gw->lpni_ping_feats = info->pi_features; + if ((gw->lpni_ping_feats & LNET_PING_FEAT_MASK) == 0) { CDEBUG(D_NET, "%s: Unexpected features 0x%x\n", - libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats); + libcfs_nid2str(gw->lpni_nid), gw->lpni_ping_feats); return; /* nothing I can understand */ } - if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0) + if ((gw->lpni_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0) return; /* can't carry NI status info */ - list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { + list_for_each_entry(rte, &gw->lpni_routes, lr_gwlist) { int down = 0; int up = 0; int i; - if ((gw->lp_ping_feats & LNET_PING_FEAT_RTE_DISABLED) != 0) { + if ((gw->lpni_ping_feats & LNET_PING_FEAT_RTE_DISABLED) != 0) { rte->lr_downis = 1; continue; } @@ -689,8 +691,8 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) if (nid == LNET_NID_ANY) { CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n", - libcfs_nid2str(gw->lp_nid)); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; + libcfs_nid2str(gw->lpni_nid)); + gw->lpni_ping_feats = LNET_PING_FEAT_INVAL; return; } @@ -711,8 +713,8 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) } CDEBUG(D_NET, "%s: Unexpected status 0x%x\n", - libcfs_nid2str(gw->lp_nid), stat->ns_status); - gw->lp_ping_feats = LNET_PING_FEAT_INVAL; + libcfs_nid2str(gw->lpni_nid), stat->ns_status); + gw->lpni_ping_feats = LNET_PING_FEAT_INVAL; return; } @@ -733,7 +735,7 @@ static void lnet_router_checker_event(lnet_event_t *event) { lnet_rc_data_t *rcd = event->md.user_ptr; - struct lnet_peer *lp; + struct lnet_peer_ni *lp; LASSERT(rcd != NULL); @@ -751,14 +753,14 @@ lnet_router_checker_event(lnet_event_t *event) /* NB: it's called with holding lnet_res_lock, we have a few * places need to hold both locks at the same time, please take * care of lock ordering */ - lnet_net_lock(lp->lp_cpt); - if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) { + lnet_net_lock(lp->lpni_cpt); + if (!lnet_isrouter(lp) || lp->lpni_rcd != rcd) { /* ignore if no longer a router or rcd is replaced */ goto out; } if (event->type == LNET_EVENT_SEND) { - lp->lp_ping_notsent = 0; + lp->lpni_ping_notsent = 0; if (event->status == 0) goto out; } @@ -779,15 +781,15 @@ lnet_router_checker_event(lnet_event_t *event) lnet_parse_rc_info(rcd); out: - lnet_net_unlock(lp->lp_cpt); + lnet_net_unlock(lp->lpni_cpt); } static void lnet_wait_known_routerstate(void) { - lnet_peer_t *rtr; + struct lnet_peer_ni *rtr; struct list_head *entry; - int all_known; + int all_known; LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); @@ -796,9 +798,10 @@ lnet_wait_known_routerstate(void) all_known = 1; list_for_each(entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, lnet_peer_t, lp_rtr_list); + rtr = list_entry(entry, struct lnet_peer_ni, + lpni_rtr_list); - if (rtr->lp_alive_count == 0) { + if (rtr->lpni_alive_count == 0) { all_known = 0; break; } @@ -815,12 +818,12 @@ lnet_wait_known_routerstate(void) } void -lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net) +lnet_router_ni_update_locked(struct lnet_peer_ni *gw, __u32 net) { lnet_route_t *rte; - if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) { - list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) { + if ((gw->lpni_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) { + list_for_each_entry(rte, &gw->lpni_routes, lr_gwlist) { if (rte->lr_net == net) { rte->lr_downis = 0; break; @@ -877,10 +880,10 @@ lnet_destroy_rc_data(lnet_rc_data_t *rcd) LASSERT(LNetHandleIsInvalid(rcd->rcd_mdh)); if (rcd->rcd_gateway != NULL) { - int cpt = rcd->rcd_gateway->lp_cpt; + int cpt = rcd->rcd_gateway->lpni_cpt; lnet_net_lock(cpt); - lnet_peer_decref_locked(rcd->rcd_gateway); + lnet_peer_ni_decref_locked(rcd->rcd_gateway); lnet_net_unlock(cpt); } @@ -891,14 +894,14 @@ lnet_destroy_rc_data(lnet_rc_data_t *rcd) } static lnet_rc_data_t * -lnet_create_rc_data_locked(lnet_peer_t *gateway) +lnet_create_rc_data_locked(struct lnet_peer_ni *gateway) { lnet_rc_data_t *rcd = NULL; struct lnet_ping_info *pi; int rc; int i; - lnet_net_unlock(gateway->lp_cpt); + lnet_net_unlock(gateway->lpni_cpt); LIBCFS_ALLOC(rcd, sizeof(*rcd)); if (rcd == NULL) @@ -932,21 +935,21 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway) } LASSERT(rc == 0); - lnet_net_lock(gateway->lp_cpt); + lnet_net_lock(gateway->lpni_cpt); /* router table changed or someone has created rcd for this gateway */ - if (!lnet_isrouter(gateway) || gateway->lp_rcd != NULL) { - lnet_net_unlock(gateway->lp_cpt); + if (!lnet_isrouter(gateway) || gateway->lpni_rcd != NULL) { + lnet_net_unlock(gateway->lpni_cpt); goto out; } - lnet_peer_addref_locked(gateway); + lnet_peer_ni_addref_locked(gateway); rcd->rcd_gateway = gateway; - gateway->lp_rcd = rcd; - gateway->lp_ping_notsent = 0; + gateway->lpni_rcd = rcd; + gateway->lpni_ping_notsent = 0; return rcd; - out: +out: if (rcd != NULL) { if (!LNetHandleIsInvalid(rcd->rcd_mdh)) { rc = LNetMDUnlink(rcd->rcd_mdh); @@ -955,16 +958,16 @@ lnet_create_rc_data_locked(lnet_peer_t *gateway) lnet_destroy_rc_data(rcd); } - lnet_net_lock(gateway->lp_cpt); - return gateway->lp_rcd; + lnet_net_lock(gateway->lpni_cpt); + return gateway->lpni_rcd; } static int -lnet_router_check_interval (lnet_peer_t *rtr) +lnet_router_check_interval (struct lnet_peer_ni *rtr) { int secs; - secs = rtr->lp_alive ? live_router_check_interval : + secs = rtr->lpni_alive ? live_router_check_interval : dead_router_check_interval; if (secs < 0) secs = 0; @@ -973,32 +976,32 @@ lnet_router_check_interval (lnet_peer_t *rtr) } static void -lnet_ping_router_locked (lnet_peer_t *rtr) +lnet_ping_router_locked (struct lnet_peer_ni *rtr) { lnet_rc_data_t *rcd = NULL; cfs_time_t now = cfs_time_current(); int secs; struct lnet_ni *ni; - lnet_peer_addref_locked(rtr); + lnet_peer_ni_addref_locked(rtr); - if (rtr->lp_ping_deadline != 0 && /* ping timed out? */ - cfs_time_after(now, rtr->lp_ping_deadline)) + if (rtr->lpni_ping_deadline != 0 && /* ping timed out? */ + cfs_time_after(now, rtr->lpni_ping_deadline)) lnet_notify_locked(rtr, 1, 0, now); /* Run any outstanding notifications */ - ni = lnet_get_next_ni_locked(rtr->lp_net, NULL); + ni = lnet_get_next_ni_locked(rtr->lpni_net, NULL); lnet_ni_notify_locked(ni, rtr); if (!lnet_isrouter(rtr) || the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) { /* router table changed or router checker is shutting down */ - lnet_peer_decref_locked(rtr); + lnet_peer_ni_decref_locked(rtr); return; } - rcd = rtr->lp_rcd != NULL ? - rtr->lp_rcd : lnet_create_rc_data_locked(rtr); + rcd = rtr->lpni_rcd != NULL ? + rtr->lpni_rcd : lnet_create_rc_data_locked(rtr); if (rcd == NULL) return; @@ -1007,43 +1010,43 @@ lnet_ping_router_locked (lnet_peer_t *rtr) CDEBUG(D_NET, "rtr %s %d: deadline %lu ping_notsent %d alive %d " - "alive_count %d lp_ping_timestamp %lu\n", - libcfs_nid2str(rtr->lp_nid), secs, - rtr->lp_ping_deadline, rtr->lp_ping_notsent, - rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp); + "alive_count %d lpni_ping_timestamp %lu\n", + libcfs_nid2str(rtr->lpni_nid), secs, + rtr->lpni_ping_deadline, rtr->lpni_ping_notsent, + rtr->lpni_alive, rtr->lpni_alive_count, rtr->lpni_ping_timestamp); - if (secs != 0 && !rtr->lp_ping_notsent && - cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp, + if (secs != 0 && !rtr->lpni_ping_notsent && + cfs_time_after(now, cfs_time_add(rtr->lpni_ping_timestamp, cfs_time_seconds(secs)))) { - int rc; + int rc; lnet_process_id_t id; lnet_handle_md_t mdh; - id.nid = rtr->lp_nid; + id.nid = rtr->lpni_nid; id.pid = LNET_PID_LUSTRE; CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id)); - rtr->lp_ping_notsent = 1; - rtr->lp_ping_timestamp = now; + rtr->lpni_ping_notsent = 1; + rtr->lpni_ping_timestamp = now; mdh = rcd->rcd_mdh; - if (rtr->lp_ping_deadline == 0) { - rtr->lp_ping_deadline = + if (rtr->lpni_ping_deadline == 0) { + rtr->lpni_ping_deadline = cfs_time_shift(router_ping_timeout); } - lnet_net_unlock(rtr->lp_cpt); + lnet_net_unlock(rtr->lpni_cpt); rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL, LNET_PROTO_PING_MATCHBITS, 0); - lnet_net_lock(rtr->lp_cpt); + lnet_net_lock(rtr->lpni_cpt); if (rc != 0) - rtr->lp_ping_notsent = 0; /* no event pending */ + rtr->lpni_ping_notsent = 0; /* no event pending */ } - lnet_peer_decref_locked(rtr); + lnet_peer_ni_decref_locked(rtr); return; } @@ -1122,7 +1125,7 @@ lnet_prune_rc_data(int wait_unlink) { lnet_rc_data_t *rcd; lnet_rc_data_t *tmp; - lnet_peer_t *lp; + struct lnet_peer_ni *lp; struct list_head head; int i = 2; @@ -1138,14 +1141,14 @@ lnet_prune_rc_data(int wait_unlink) if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) { /* router checker is stopping, prune all */ list_for_each_entry(lp, &the_lnet.ln_routers, - lp_rtr_list) { - if (lp->lp_rcd == NULL) + lpni_rtr_list) { + if (lp->lpni_rcd == NULL) continue; - LASSERT(list_empty(&lp->lp_rcd->rcd_list)); - list_add(&lp->lp_rcd->rcd_list, + LASSERT(list_empty(&lp->lpni_rcd->rcd_list)); + list_add(&lp->lpni_rcd->rcd_list, &the_lnet.ln_rcd_deathrow); - lp->lp_rcd = NULL; + lp->lpni_rcd = NULL; } } @@ -1226,8 +1229,8 @@ lnet_router_checker_active(void) static int lnet_router_checker(void *arg) { - lnet_peer_t *rtr; - struct list_head *entry; + struct lnet_peer_ni *rtr; + struct list_head *entry; cfs_block_allsigs(); @@ -1241,9 +1244,10 @@ rescan: version = the_lnet.ln_routers_version; list_for_each(entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, lnet_peer_t, lp_rtr_list); + rtr = list_entry(entry, struct lnet_peer_ni, + lpni_rtr_list); - cpt2 = rtr->lp_cpt; + cpt2 = rtr->lpni_cpt; if (cpt != cpt2) { lnet_net_unlock(cpt); cpt = cpt2; @@ -1717,7 +1721,7 @@ lnet_rtrpools_disable(void) int lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when) { - struct lnet_peer *lp = NULL; + struct lnet_peer_ni *lp = NULL; cfs_time_t now = cfs_time_current(); int cpt = lnet_cpt_of_nid(nid, ni); @@ -1759,7 +1763,7 @@ lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when) return -ESHUTDOWN; } - lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid); + lp = lnet_find_peer_ni_locked(nid, cpt); if (lp == NULL) { /* nid not found */ lnet_net_unlock(cpt); @@ -1771,15 +1775,15 @@ lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, cfs_time_t when) * if he notifies us about dead peer. For example ksocklnd can * call us with when == _time_when_the_node_was_booted_ if * no connections were successfully established */ - if (ni != NULL && !alive && when < lp->lp_last_alive) - when = lp->lp_last_alive; + if (ni != NULL && !alive && when < lp->lpni_last_alive) + when = lp->lpni_last_alive; lnet_notify_locked(lp, ni == NULL, alive, when); if (ni != NULL) lnet_ni_notify_locked(ni, lp); - lnet_peer_decref_locked(lp); + lnet_peer_ni_decref_locked(lp); lnet_net_unlock(cpt); return 0; diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index 0d2d670..b2dfe42 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -224,8 +224,8 @@ proc_lnet_routes(struct ctl_table *table, int write, void __user *buffer, __u32 net = rnet->lrn_net; __u32 hops = route->lr_hops; unsigned int priority = route->lr_priority; - lnet_nid_t nid = route->lr_gateway->lp_nid; - int alive = lnet_is_route_alive(route); + lnet_nid_t nid = route->lr_gateway->lpni_nid; + int alive = lnet_is_route_alive(route); s += snprintf(s, tmpstr + tmpsiz - s, "%-8s %4u %8u %7s %s\n", @@ -300,7 +300,7 @@ proc_lnet_routers(struct ctl_table *table, int write, void __user *buffer, *ppos = LNET_PROC_POS_MAKE(0, ver, 0, off); } else { struct list_head *r; - struct lnet_peer *peer = NULL; + struct lnet_peer_ni *peer = NULL; int skip = off - 1; lnet_net_lock(0); @@ -315,8 +315,9 @@ proc_lnet_routers(struct ctl_table *table, int write, void __user *buffer, r = the_lnet.ln_routers.next; while (r != &the_lnet.ln_routers) { - lnet_peer_t *lp = list_entry(r, lnet_peer_t, - lp_rtr_list); + struct lnet_peer_ni *lp = + list_entry(r, struct lnet_peer_ni, + lpni_rtr_list); if (skip == 0) { peer = lp; @@ -328,22 +329,22 @@ proc_lnet_routers(struct ctl_table *table, int write, void __user *buffer, } if (peer != NULL) { - lnet_nid_t nid = peer->lp_nid; + lnet_nid_t nid = peer->lpni_nid; cfs_time_t now = cfs_time_current(); - cfs_time_t deadline = peer->lp_ping_deadline; - int nrefs = peer->lp_refcount; - int nrtrrefs = peer->lp_rtr_refcount; - int alive_cnt = peer->lp_alive_count; - int alive = peer->lp_alive; - int pingsent = !peer->lp_ping_notsent; + cfs_time_t deadline = peer->lpni_ping_deadline; + int nrefs = atomic_read(&peer->lpni_refcount); + int nrtrrefs = peer->lpni_rtr_refcount; + int alive_cnt = peer->lpni_alive_count; + int alive = peer->lpni_alive; + int pingsent = !peer->lpni_ping_notsent; int last_ping = cfs_duration_sec(cfs_time_sub(now, - peer->lp_ping_timestamp)); + peer->lpni_ping_timestamp)); int down_ni = 0; lnet_route_t *rtr; - if ((peer->lp_ping_feats & + if ((peer->lpni_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) { - list_for_each_entry(rtr, &peer->lp_routes, + list_for_each_entry(rtr, &peer->lpni_routes, lr_gwlist) { /* downis on any route should be the * number of downis on the gateway */ @@ -396,6 +397,8 @@ proc_lnet_routers(struct ctl_table *table, int write, void __user *buffer, return rc; } +/* TODO: there should be no direct access to ptable. We should add a set + * of APIs that give access to the ptable and its members */ static int proc_lnet_peers(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -437,7 +440,7 @@ proc_lnet_peers(struct ctl_table *table, int write, void __user *buffer, hoff++; } else { - struct lnet_peer *peer; + struct lnet_peer_ni *peer; struct list_head *p; int skip; again: @@ -461,15 +464,16 @@ proc_lnet_peers(struct ctl_table *table, int write, void __user *buffer, p = ptable->pt_hash[hash].next; while (p != &ptable->pt_hash[hash]) { - lnet_peer_t *lp = list_entry(p, lnet_peer_t, - lp_hashlist); + struct lnet_peer_ni *lp = + list_entry(p, struct lnet_peer_ni, + lpni_hashlist); if (skip == 0) { peer = lp; /* minor optimization: start from idx+1 * on next iteration if we've just - * drained lp_hashlist */ - if (lp->lp_hashlist.next == + * drained lpni_hashlist */ + if (lp->lpni_hashlist.next == &ptable->pt_hash[hash]) { hoff = 1; hash++; @@ -481,7 +485,7 @@ proc_lnet_peers(struct ctl_table *table, int write, void __user *buffer, } skip--; - p = lp->lp_hashlist.next; + p = lp->lpni_hashlist.next; } if (peer != NULL) @@ -493,26 +497,26 @@ proc_lnet_peers(struct ctl_table *table, int write, void __user *buffer, } if (peer != NULL) { - lnet_nid_t nid = peer->lp_nid; - int nrefs = peer->lp_refcount; + lnet_nid_t nid = peer->lpni_nid; + int nrefs = atomic_read(&peer->lpni_refcount); int lastalive = -1; char *aliveness = "NA"; - int maxcr = peer->lp_net->net_tunables.lct_peer_tx_credits; - int txcr = peer->lp_txcredits; - int mintxcr = peer->lp_mintxcredits; - int rtrcr = peer->lp_rtrcredits; - int minrtrcr = peer->lp_minrtrcredits; - int txqnob = peer->lp_txqnob; + int maxcr = peer->lpni_net->net_tunables.lct_peer_tx_credits; + int txcr = peer->lpni_txcredits; + int mintxcr = peer->lpni_mintxcredits; + int rtrcr = peer->lpni_rtrcredits; + int minrtrcr = peer->lpni_minrtrcredits; + int txqnob = peer->lpni_txqnob; if (lnet_isrouter(peer) || lnet_peer_aliveness_enabled(peer)) - aliveness = peer->lp_alive ? "up" : "down"; + aliveness = peer->lpni_alive ? "up" : "down"; if (lnet_peer_aliveness_enabled(peer)) { cfs_time_t now = cfs_time_current(); cfs_duration_t delta; - delta = cfs_time_sub(now, peer->lp_last_alive); + delta = cfs_time_sub(now, peer->lpni_last_alive); lastalive = cfs_duration_sec(delta); /* No need to mess up peers contents with -- 1.8.3.1