From c66668387a11492ef95482add340321cb646c961 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Mon, 6 Jul 2020 08:34:40 -0400 Subject: [PATCH] LU-12678 o2iblnd: convert peers hash table to hashtable.h Using a hashtable.h hashtable, rather than bespoke code, has several advantages: - the table is comprised of hlist_head, rather than list_head, so it consumes less memory (though we need to make it a little bigger as it must be a power-of-2) - there are existing macros for easily walking the whole table - it uses a "real" hash function rather than "mod a prime number". In some ways, rhashtable might be even better, but it can change the ordering of objects in the table are arbitrary moments, and that could hurt the user-space API. It also does not support the partitioned walking that ksocknal_check_peer_timeouts() depends on. Note that new peers are inserted at the top of a hash chain, rather than appended at the end. I don't think that should be a problem. Also various white-space cleanups etc. Signed-off-by: Mr NeilBrown Change-Id: I2917024835abdd327c7da11dee3fd369570a9671 Reviewed-on: https://review.whamcloud.com/39303 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Chris Horn Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- lnet/klnds/o2iblnd/o2iblnd.c | 243 +++++++++++++-------------- lnet/klnds/o2iblnd/o2iblnd.h | 21 +-- lnet/klnds/o2iblnd/o2iblnd_cb.c | 354 ++++++++++++++++++++-------------------- 3 files changed, 291 insertions(+), 327 deletions(-) diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index b2dbb44..152173b 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -324,10 +324,10 @@ kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp, LASSERT(nid != LNET_NID_ANY); LIBCFS_CPT_ALLOC(peer_ni, lnet_cpt_table(), cpt, sizeof(*peer_ni)); - if (peer_ni == NULL) { - CERROR("Cannot allocate peer_ni\n"); - return -ENOMEM; - } + if (!peer_ni) { + CERROR("Cannot allocate peer_ni\n"); + return -ENOMEM; + } peer_ni->ibp_ni = ni; peer_ni->ibp_nid = nid; @@ -338,7 +338,7 @@ kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp, peer_ni->ibp_queue_depth_mod = 0; /* try to use the default */ atomic_set(&peer_ni->ibp_refcount, 1); /* 1 ref for caller */ - INIT_LIST_HEAD(&peer_ni->ibp_list); /* not in the peer_ni table yet */ + INIT_HLIST_NODE(&peer_ni->ibp_list); INIT_LIST_HEAD(&peer_ni->ibp_conns); INIT_LIST_HEAD(&peer_ni->ibp_tx_queue); @@ -372,7 +372,8 @@ kiblnd_destroy_peer(struct kib_peer_ni *peer_ni) /* NB a peer_ni's connections keep a reference on their peer_ni until * they are destroyed, so we can be assured that _all_ state to do * with this peer_ni has been cleaned up when its refcount drops to - * zero. */ + * zero. + */ if (atomic_dec_and_test(&net->ibn_npeers)) wake_up_var(&net->ibn_npeers); } @@ -381,14 +382,12 @@ struct kib_peer_ni * kiblnd_find_peer_locked(struct lnet_ni *ni, lnet_nid_t nid) { /* the caller is responsible for accounting the additional reference - * that this creates */ - struct list_head *peer_list = kiblnd_nid2peerlist(nid); - struct list_head *tmp; - struct kib_peer_ni *peer_ni; - - list_for_each(tmp, peer_list) { + * that this creates + */ + struct kib_peer_ni *peer_ni; - peer_ni = list_entry(tmp, struct kib_peer_ni, ibp_list); + hash_for_each_possible(kiblnd_data.kib_peers, peer_ni, + ibp_list, nid) { LASSERT(!kiblnd_peer_idle(peer_ni)); /* @@ -415,10 +414,10 @@ kiblnd_unlink_peer_locked(struct kib_peer_ni *peer_ni) { LASSERT(list_empty(&peer_ni->ibp_conns)); - LASSERT (kiblnd_peer_active(peer_ni)); - list_del_init(&peer_ni->ibp_list); - /* lose peerlist's ref */ - kiblnd_peer_decref(peer_ni); + LASSERT(kiblnd_peer_active(peer_ni)); + hlist_del_init(&peer_ni->ibp_list); + /* lose peerlist's ref */ + kiblnd_peer_decref(peer_ni); } static int @@ -426,32 +425,25 @@ kiblnd_get_peer_info(struct lnet_ni *ni, int index, lnet_nid_t *nidp, int *count) { struct kib_peer_ni *peer_ni; - struct list_head *ptmp; int i; unsigned long flags; read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - - list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - - peer_ni = list_entry(ptmp, struct kib_peer_ni, ibp_list); - LASSERT(!kiblnd_peer_idle(peer_ni)); + hash_for_each(kiblnd_data.kib_peers, i, peer_ni, ibp_list) { + LASSERT(!kiblnd_peer_idle(peer_ni)); - if (peer_ni->ibp_ni != ni) - continue; + if (peer_ni->ibp_ni != ni) + continue; - if (index-- > 0) - continue; + if (index-- > 0) + continue; - *nidp = peer_ni->ibp_nid; - *count = atomic_read(&peer_ni->ibp_refcount); + *nidp = peer_ni->ibp_nid; + *count = atomic_read(&peer_ni->ibp_refcount); - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return 0; - } + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + return 0; } read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); @@ -480,27 +472,27 @@ static int kiblnd_del_peer(struct lnet_ni *ni, lnet_nid_t nid) { LIST_HEAD(zombies); - struct list_head *ptmp; - struct list_head *pnxt; - struct kib_peer_ni *peer_ni; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; + struct hlist_node *pnxt; + struct kib_peer_ni *peer_ni; + int lo; + int hi; + int i; + unsigned long flags; + int rc = -ENOENT; write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (nid != LNET_NID_ANY) { - lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - } else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } + if (nid != LNET_NID_ANY) { + lo = hash_min(nid, HASH_BITS(kiblnd_data.kib_peers)); + hi = lo; + } else { + lo = 0; + hi = HASH_SIZE(kiblnd_data.kib_peers) - 1; + } for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer_ni = list_entry(ptmp, struct kib_peer_ni, ibp_list); + hlist_for_each_entry_safe(peer_ni, pnxt, + &kiblnd_data.kib_peers[i], ibp_list) { LASSERT(!kiblnd_peer_idle(peer_ni)); if (peer_ni->ibp_ni != ni) @@ -531,39 +523,34 @@ kiblnd_del_peer(struct lnet_ni *ni, lnet_nid_t nid) static struct kib_conn * kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index) { - struct kib_peer_ni *peer_ni; - struct list_head *ptmp; + struct kib_peer_ni *peer_ni; struct kib_conn *conn; - struct list_head *ctmp; - int i; - unsigned long flags; + struct list_head *ctmp; + int i; + unsigned long flags; read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { + hash_for_each(kiblnd_data.kib_peers, i, peer_ni, ibp_list) { + LASSERT(!kiblnd_peer_idle(peer_ni)); - peer_ni = list_entry(ptmp, struct kib_peer_ni, ibp_list); - LASSERT(!kiblnd_peer_idle(peer_ni)); + if (peer_ni->ibp_ni != ni) + continue; - if (peer_ni->ibp_ni != ni) + list_for_each(ctmp, &peer_ni->ibp_conns) { + if (index-- > 0) continue; - list_for_each(ctmp, &peer_ni->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, struct kib_conn, ibc_list); - kiblnd_conn_addref(conn); - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return conn; - } + conn = list_entry(ctmp, struct kib_conn, ibc_list); + kiblnd_conn_addref(conn); + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, + flags); + return conn; } } read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return NULL; + return NULL; } static void @@ -1100,28 +1087,27 @@ kiblnd_close_stale_conns_locked(struct kib_peer_ni *peer_ni, static int kiblnd_close_matching_conns(struct lnet_ni *ni, lnet_nid_t nid) { - struct kib_peer_ni *peer_ni; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - unsigned long flags; - int count = 0; + struct kib_peer_ni *peer_ni; + struct hlist_node *pnxt; + int lo; + int hi; + int i; + unsigned long flags; + int count = 0; write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (nid != LNET_NID_ANY) - lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - else { + if (nid != LNET_NID_ANY) { + lo = hash_min(nid, HASH_BITS(kiblnd_data.kib_peers)); + hi = lo; + } else { lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; + hi = HASH_SIZE(kiblnd_data.kib_peers) - 1; } for (i = lo; i <= hi; i++) { - list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - - peer_ni = list_entry(ptmp, struct kib_peer_ni, ibp_list); + hlist_for_each_entry_safe(peer_ni, pnxt, + &kiblnd_data.kib_peers[i], ibp_list) { LASSERT(!kiblnd_peer_idle(peer_ni)); if (peer_ni->ibp_ni != ni) @@ -2936,24 +2922,23 @@ kiblnd_destroy_dev(struct kib_dev *dev) static void kiblnd_base_shutdown(void) { - struct kib_sched_info *sched; - int i; + struct kib_sched_info *sched; + struct kib_peer_ni *peer_ni; + int i; LASSERT(list_empty(&kiblnd_data.kib_devs)); CDEBUG(D_MALLOC, "before LND base cleanup: kmem %lld\n", libcfs_kmem_read()); - switch (kiblnd_data.kib_init) { - default: - LBUG(); + switch (kiblnd_data.kib_init) { + default: + LBUG(); - case IBLND_INIT_ALL: - case IBLND_INIT_DATA: - LASSERT (kiblnd_data.kib_peers != NULL); - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - LASSERT(list_empty(&kiblnd_data.kib_peers[i])); - } + case IBLND_INIT_ALL: + case IBLND_INIT_DATA: + hash_for_each(kiblnd_data.kib_peers, i, peer_ni, ibp_list) + LASSERT(0); LASSERT(list_empty(&kiblnd_data.kib_connd_zombies)); LASSERT(list_empty(&kiblnd_data.kib_connd_conns)); LASSERT(list_empty(&kiblnd_data.kib_reconn_list)); @@ -2964,7 +2949,8 @@ kiblnd_base_shutdown(void) /* NB: we really want to stop scheduler threads net by net * instead of the whole module, this should be improved - * with dynamic configuration LNet */ + * with dynamic configuration LNet. + */ cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) wake_up_all(&sched->ibs_waitq); @@ -2977,13 +2963,9 @@ kiblnd_base_shutdown(void) atomic_read(&kiblnd_data.kib_nthreads)); /* fall through */ - case IBLND_INIT_NOTHING: - break; - } - - if (kiblnd_data.kib_peers) - CFS_FREE_PTR_ARRAY(kiblnd_data.kib_peers, - kiblnd_data.kib_peer_hash_size); + case IBLND_INIT_NOTHING: + break; + } if (kiblnd_data.kib_scheds != NULL) cfs_percpt_free(kiblnd_data.kib_scheds); @@ -3065,9 +3047,9 @@ out: static int kiblnd_base_startup(struct net *ns) { - struct kib_sched_info *sched; - int rc; - int i; + struct kib_sched_info *sched; + int rc; + int i; LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING); @@ -3081,14 +3063,7 @@ kiblnd_base_startup(struct net *ns) INIT_LIST_HEAD(&kiblnd_data.kib_devs); INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs); - kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE; - CFS_ALLOC_PTR_ARRAY(kiblnd_data.kib_peers, - kiblnd_data.kib_peer_hash_size); - if (kiblnd_data.kib_peers == NULL) - goto failed; - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); + hash_init(kiblnd_data.kib_peers); spin_lock_init(&kiblnd_data.kib_connd_lock); INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); @@ -3125,36 +3100,36 @@ kiblnd_base_startup(struct net *ns) sched->ibs_cpt = i; } - kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR; + kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR; - /* lists/ptrs/locks initialised */ - kiblnd_data.kib_init = IBLND_INIT_DATA; - /*****************************************************/ + /* lists/ptrs/locks initialised */ + kiblnd_data.kib_init = IBLND_INIT_DATA; + /*****************************************************/ rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd"); - if (rc != 0) { - CERROR("Can't spawn o2iblnd connd: %d\n", rc); - goto failed; - } + if (rc != 0) { + CERROR("Can't spawn o2iblnd connd: %d\n", rc); + goto failed; + } if (*kiblnd_tunables.kib_dev_failover != 0) rc = kiblnd_thread_start(kiblnd_failover_thread, ns, "kiblnd_failover"); - if (rc != 0) { - CERROR("Can't spawn o2iblnd failover thread: %d\n", rc); - goto failed; - } + if (rc != 0) { + CERROR("Can't spawn o2iblnd failover thread: %d\n", rc); + goto failed; + } - /* flag everything initialised */ - kiblnd_data.kib_init = IBLND_INIT_ALL; - /*****************************************************/ + /* flag everything initialised */ + kiblnd_data.kib_init = IBLND_INIT_ALL; + /*****************************************************/ - return 0; + return 0; failed: - kiblnd_base_shutdown(); - return -ENETDOWN; + kiblnd_base_shutdown(); + return -ENETDOWN; } static int diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index 60d1590..3e4013f 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -93,7 +93,7 @@ #include #include "o2iblnd-idl.h" -#define IBLND_PEER_HASH_SIZE 101 /* # peer_ni lists */ +#define IBLND_PEER_HASH_BITS 7 /* log2 of # peer_ni lists */ #define IBLND_N_SCHED 2 #define IBLND_N_SCHED_HIGH 4 @@ -439,9 +439,7 @@ struct kib_data { /* stabilize net/dev/peer_ni/conn ops */ rwlock_t kib_global_lock; /* hash table of all my known peers */ - struct list_head *kib_peers; - /* size of kib_peers */ - int kib_peer_hash_size; + DECLARE_HASHTABLE(kib_peers, IBLND_PEER_HASH_BITS); /* the connd task (serialisation assertions) */ void *kib_connd; /* connections to setup/teardown */ @@ -642,8 +640,8 @@ struct kib_conn { #define IBLND_CONN_DISCONNECTED 5 /* disconnected */ struct kib_peer_ni { - /* stash on global peer_ni list */ - struct list_head ibp_list; + /* on peer_ni hash chain */ + struct hlist_node ibp_list; /* who's on the other end(s) */ lnet_nid_t ibp_nid; /* LNet interface */ @@ -814,20 +812,11 @@ kiblnd_peer_idle(struct kib_peer_ni *peer_ni) return !kiblnd_peer_connecting(peer_ni) && list_empty(&peer_ni->ibp_conns); } -static inline struct list_head * -kiblnd_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = - ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size; - - return &kiblnd_data.kib_peers[hash]; -} - static inline int kiblnd_peer_active(struct kib_peer_ni *peer_ni) { /* Am I in the peer_ni hash table? */ - return !list_empty(&peer_ni->ibp_list); + return !hlist_unhashed(&peer_ni->ibp_list); } static inline struct kib_conn * diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 6036756..658d0ad 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1494,47 +1494,49 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) struct kib_peer_ni *peer2; struct kib_conn *conn; rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - unsigned long flags; - int rc; - int i; + unsigned long flags; + int rc; + int i; struct lnet_ioctl_config_o2iblnd_tunables *tunables; - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ + /* If I get here, I've committed to send, so I complete the tx with + * failure on any problems + */ - LASSERT (tx == NULL || tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx == NULL || tx->tx_nwrq > 0); /* work items have been set up */ + LASSERT(!tx || !tx->tx_conn); /* only set when assigned a conn */ + LASSERT(!tx || tx->tx_nwrq > 0); /* work items have been set up */ - /* First time, just use a read lock since I expect to find my peer_ni - * connected */ + /* First time, just use a read lock since I expect to find my peer_ni + * connected + */ read_lock_irqsave(g_lock, flags); - peer_ni = kiblnd_find_peer_locked(ni, nid); + peer_ni = kiblnd_find_peer_locked(ni, nid); if (peer_ni != NULL && !list_empty(&peer_ni->ibp_conns)) { - /* Found a peer_ni with an established connection */ - conn = kiblnd_get_conn_locked(peer_ni); - kiblnd_conn_addref(conn); /* 1 ref for me... */ + /* Found a peer_ni with an established connection */ + conn = kiblnd_get_conn_locked(peer_ni); + kiblnd_conn_addref(conn); /* 1 ref for me... */ read_unlock_irqrestore(g_lock, flags); - if (tx != NULL) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - return; - } + if (tx != NULL) + kiblnd_queue_tx(tx, conn); + kiblnd_conn_decref(conn); /* ...to here */ + return; + } read_unlock(g_lock); /* Re-try with a write lock */ write_lock(g_lock); - peer_ni = kiblnd_find_peer_locked(ni, nid); - if (peer_ni != NULL) { + peer_ni = kiblnd_find_peer_locked(ni, nid); + if (peer_ni != NULL) { if (list_empty(&peer_ni->ibp_conns)) { - /* found a peer_ni, but it's still connecting... */ + /* found a peer_ni, but it's still connecting... */ LASSERT(kiblnd_peer_connecting(peer_ni)); - if (tx != NULL) + if (tx != NULL) list_add_tail(&tx->tx_list, - &peer_ni->ibp_tx_queue); + &peer_ni->ibp_tx_queue); write_unlock_irqrestore(g_lock, flags); } else { conn = kiblnd_get_conn_locked(peer_ni); @@ -1542,12 +1544,12 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) write_unlock_irqrestore(g_lock, flags); - if (tx != NULL) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - return; - } + if (tx != NULL) + kiblnd_queue_tx(tx, conn); + kiblnd_conn_decref(conn); /* ...to here */ + } + return; + } write_unlock_irqrestore(g_lock, flags); @@ -1566,14 +1568,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) write_lock_irqsave(g_lock, flags); - peer2 = kiblnd_find_peer_locked(ni, nid); - if (peer2 != NULL) { + peer2 = kiblnd_find_peer_locked(ni, nid); + if (peer2 != NULL) { if (list_empty(&peer2->ibp_conns)) { - /* found a peer_ni, but it's still connecting... */ + /* found a peer_ni, but it's still connecting... */ LASSERT(kiblnd_peer_connecting(peer2)); - if (tx != NULL) + if (tx != NULL) list_add_tail(&tx->tx_list, - &peer2->ibp_tx_queue); + &peer2->ibp_tx_queue); write_unlock_irqrestore(g_lock, flags); } else { conn = kiblnd_get_conn_locked(peer2); @@ -1581,14 +1583,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) write_unlock_irqrestore(g_lock, flags); - if (tx != NULL) - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } + if (tx != NULL) + kiblnd_queue_tx(tx, conn); + kiblnd_conn_decref(conn); /* ...to here */ + } - kiblnd_peer_decref(peer_ni); - return; - } + kiblnd_peer_decref(peer_ni); + return; + } /* Brand new peer_ni */ LASSERT(peer_ni->ibp_connecting == 0); @@ -1601,14 +1603,14 @@ kiblnd_launch_tx(struct lnet_ni *ni, struct kib_tx *tx, lnet_nid_t nid) if (tx != NULL) list_add_tail(&tx->tx_list, &peer_ni->ibp_tx_queue); - kiblnd_peer_addref(peer_ni); - list_add_tail(&peer_ni->ibp_list, kiblnd_nid2peerlist(nid)); + kiblnd_peer_addref(peer_ni); + hash_add(kiblnd_data.kib_peers, &peer_ni->ibp_list, nid); write_unlock_irqrestore(g_lock, flags); for (i = 0; i < tunables->lnd_conns_per_peer; i++) kiblnd_connect_peer(peer_ni); - kiblnd_peer_decref(peer_ni); + kiblnd_peer_decref(peer_ni); } int @@ -2386,7 +2388,7 @@ kiblnd_reject(struct rdma_cm_id *cmid, struct kib_rej *rej) static int kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) { - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; + rwlock_t *g_lock = &kiblnd_data.kib_global_lock; struct kib_msg *reqmsg = priv; struct kib_msg *ackmsg; struct kib_dev *ibdev; @@ -2395,27 +2397,27 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) struct kib_conn *conn; struct lnet_ni *ni = NULL; struct kib_net *net = NULL; - lnet_nid_t nid; - struct rdma_conn_param cp; + lnet_nid_t nid; + struct rdma_conn_param cp; struct kib_rej rej; - int version = IBLND_MSG_VERSION; - unsigned long flags; - int rc; - struct sockaddr_in *peer_addr; - LASSERT (!in_interrupt()); + int version = IBLND_MSG_VERSION; + unsigned long flags; + int rc; + struct sockaddr_in *peer_addr; + LASSERT(!in_interrupt()); /* cmid inherits 'context' from the corresponding listener id */ ibdev = cmid->context; LASSERT(ibdev); - memset(&rej, 0, sizeof(rej)); - rej.ibr_magic = IBLND_MSG_MAGIC; - rej.ibr_why = IBLND_REJECT_FATAL; - rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE; + memset(&rej, 0, sizeof(rej)); + rej.ibr_magic = IBLND_MSG_MAGIC; + rej.ibr_why = IBLND_REJECT_FATAL; + rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE; - peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr); - if (*kiblnd_tunables.kib_require_priv_port && - ntohs(peer_addr->sin_port) >= PROT_SOCK) { + peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr); + if (*kiblnd_tunables.kib_require_priv_port && + ntohs(peer_addr->sin_port) >= PROT_SOCK) { __u32 ip = ntohl(peer_addr->sin_addr.s_addr); CERROR("peer_ni's port (%pI4h:%hu) is not privileged\n", &ip, ntohs(peer_addr->sin_port)); @@ -2462,17 +2464,16 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) if (ni == NULL || /* no matching net */ ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ net->ibn_dev != ibdev) { /* wrong device */ - CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): " - "bad dst nid %s\n", libcfs_nid2str(nid), - ni == NULL ? "NA" : libcfs_nid2str(ni->ni_nid), + CERROR("Can't accept conn from %s on %s (%s:%d:%pI4h): bad dst nid %s\n", libcfs_nid2str(nid), + ni ? libcfs_nid2str(ni->ni_nid) : "NA", ibdev->ibd_ifname, ibdev->ibd_nnets, - &ibdev->ibd_ifip, + &ibdev->ibd_ifip, libcfs_nid2str(reqmsg->ibm_dstnid)); goto failed; } - /* check time stamp as soon as possible */ + /* check time stamp as soon as possible */ if (reqmsg->ibm_dststamp != 0 && reqmsg->ibm_dststamp != net->ibn_incarnation) { CWARN("Stale connection request\n"); @@ -2491,8 +2492,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) if (reqmsg->ibm_u.connparams.ibcp_queue_depth > kiblnd_msg_queue_size(version, ni)) { - CERROR("Can't accept conn from %s, queue depth too large: " - " %d (<=%d wanted)\n", + CERROR("Can't accept conn from %s, queue depth too large: %d (<=%d wanted)\n", libcfs_nid2str(nid), reqmsg->ibm_u.connparams.ibcp_queue_depth, kiblnd_msg_queue_size(version, ni)); @@ -2505,8 +2505,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) if (reqmsg->ibm_u.connparams.ibcp_max_frags > IBLND_MAX_RDMA_FRAGS) { - CWARN("Can't accept conn from %s (version %x): " - "max_frags %d too large (%d wanted)\n", + CWARN("Can't accept conn from %s (version %x): max_frags %d too large (%d wanted)\n", libcfs_nid2str(nid), version, reqmsg->ibm_u.connparams.ibcp_max_frags, IBLND_MAX_RDMA_FRAGS); @@ -2518,9 +2517,7 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) } else if (reqmsg->ibm_u.connparams.ibcp_max_frags < IBLND_MAX_RDMA_FRAGS && net->ibn_fmr_ps == NULL) { - CWARN("Can't accept conn from %s (version %x): " - "max_frags %d incompatible without FMR pool " - "(%d wanted)\n", + CWARN("Can't accept conn from %s (version %x): max_frags %d incompatible without FMR pool (%d wanted)\n", libcfs_nid2str(nid), version, reqmsg->ibm_u.connparams.ibcp_max_frags, IBLND_MAX_RDMA_FRAGS); @@ -2531,13 +2528,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) goto failed; } - if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("Can't accept %s: message size %d too big (%d max)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - goto failed; - } + if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { + CERROR("Can't accept %s: message size %d too big (%d max)\n", + libcfs_nid2str(nid), + reqmsg->ibm_u.connparams.ibcp_max_msg_size, + IBLND_MSG_SIZE); + goto failed; + } /* assume 'nid' is a new peer_ni; create */ rc = kiblnd_create_peer(ni, &peer_ni, nid); @@ -2553,16 +2550,16 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) write_lock_irqsave(g_lock, flags); - peer2 = kiblnd_find_peer_locked(ni, nid); - if (peer2 != NULL) { - if (peer2->ibp_version == 0) { - peer2->ibp_version = version; - peer2->ibp_incarnation = reqmsg->ibm_srcstamp; - } + peer2 = kiblnd_find_peer_locked(ni, nid); + if (peer2 != NULL) { + if (peer2->ibp_version == 0) { + peer2->ibp_version = version; + peer2->ibp_incarnation = reqmsg->ibm_srcstamp; + } - /* not the guy I've talked with */ - if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp || - peer2->ibp_version != version) { + /* not the guy I've talked with */ + if (peer2->ibp_incarnation != reqmsg->ibm_srcstamp || + peer2->ibp_version != version) { kiblnd_close_peer_conns_locked(peer2, -ESTALE); if (kiblnd_peer_active(peer2)) { @@ -2575,10 +2572,10 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) libcfs_nid2str(nid), peer2->ibp_version, version, peer2->ibp_incarnation, reqmsg->ibm_srcstamp); - kiblnd_peer_decref(peer_ni); - rej.ibr_why = IBLND_REJECT_CONN_STALE; - goto failed; - } + kiblnd_peer_decref(peer_ni); + rej.ibr_why = IBLND_REJECT_CONN_STALE; + goto failed; + } /* Tie-break connection race in favour of the higher NID. * If we keep running into a race condition multiple times, @@ -2620,78 +2617,80 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob) peer2->ibp_queue_depth = peer_ni->ibp_queue_depth; write_unlock_irqrestore(g_lock, flags); - kiblnd_peer_decref(peer_ni); - peer_ni = peer2; - } else { - /* Brand new peer_ni */ - LASSERT (peer_ni->ibp_accepting == 0); - LASSERT (peer_ni->ibp_version == 0 && - peer_ni->ibp_incarnation == 0); + kiblnd_peer_decref(peer_ni); + peer_ni = peer2; + } else { + /* Brand new peer_ni */ + LASSERT(peer_ni->ibp_accepting == 0); + LASSERT(peer_ni->ibp_version == 0 && + peer_ni->ibp_incarnation == 0); - peer_ni->ibp_accepting = 1; - peer_ni->ibp_version = version; - peer_ni->ibp_incarnation = reqmsg->ibm_srcstamp; + peer_ni->ibp_accepting = 1; + peer_ni->ibp_version = version; + peer_ni->ibp_incarnation = reqmsg->ibm_srcstamp; - /* I have a ref on ni that prevents it being shutdown */ - LASSERT (net->ibn_shutdown == 0); + /* I have a ref on ni that prevents it being shutdown */ + LASSERT(net->ibn_shutdown == 0); - kiblnd_peer_addref(peer_ni); - list_add_tail(&peer_ni->ibp_list, kiblnd_nid2peerlist(nid)); + kiblnd_peer_addref(peer_ni); + hash_add(kiblnd_data.kib_peers, &peer_ni->ibp_list, nid); write_unlock_irqrestore(g_lock, flags); - } + } - conn = kiblnd_create_conn(peer_ni, cmid, IBLND_CONN_PASSIVE_WAIT, version); - if (conn == NULL) { - kiblnd_peer_connect_failed(peer_ni, 0, -ENOMEM); - kiblnd_peer_decref(peer_ni); - rej.ibr_why = IBLND_REJECT_NO_RESOURCES; - goto failed; - } + conn = kiblnd_create_conn(peer_ni, cmid, IBLND_CONN_PASSIVE_WAIT, + version); + if (!conn) { + kiblnd_peer_connect_failed(peer_ni, 0, -ENOMEM); + kiblnd_peer_decref(peer_ni); + rej.ibr_why = IBLND_REJECT_NO_RESOURCES; + goto failed; + } - /* conn now "owns" cmid, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. */ + /* conn now "owns" cmid, so I return success from here on to ensure the + * CM callback doesn't destroy cmid. + */ conn->ibc_incarnation = reqmsg->ibm_srcstamp; conn->ibc_credits = conn->ibc_queue_depth; conn->ibc_reserved_credits = conn->ibc_queue_depth; LASSERT(conn->ibc_credits + conn->ibc_reserved_credits + IBLND_OOB_MSGS(version) <= IBLND_RX_MSGS(conn)); - ackmsg = &conn->ibc_connvars->cv_msg; - memset(ackmsg, 0, sizeof(*ackmsg)); + ackmsg = &conn->ibc_connvars->cv_msg; + memset(ackmsg, 0, sizeof(*ackmsg)); - kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, - sizeof(ackmsg->ibm_u.connparams)); + kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, + sizeof(ackmsg->ibm_u.connparams)); ackmsg->ibm_u.connparams.ibcp_queue_depth = conn->ibc_queue_depth; ackmsg->ibm_u.connparams.ibcp_max_frags = conn->ibc_max_frags; ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); + kiblnd_pack_msg(ni, ackmsg, version, 0, nid, reqmsg->ibm_srcstamp); - memset(&cp, 0, sizeof(cp)); - cp.private_data = ackmsg; - cp.private_data_len = ackmsg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; + memset(&cp, 0, sizeof(cp)); + cp.private_data = ackmsg; + cp.private_data_len = ackmsg->ibm_nob; + cp.responder_resources = 0; /* No atomic ops or RDMA reads */ + cp.initiator_depth = 0; + cp.flow_control = 1; + cp.retry_count = *kiblnd_tunables.kib_retry_count; + cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); + CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); - rc = rdma_accept(cmid, &cp); - if (rc != 0) { - CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); - rej.ibr_version = version; - rej.ibr_why = IBLND_REJECT_FATAL; + rc = rdma_accept(cmid, &cp); + if (rc != 0) { + CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); + rej.ibr_version = version; + rej.ibr_why = IBLND_REJECT_FATAL; - kiblnd_reject(cmid, &rej); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } + kiblnd_reject(cmid, &rej); + kiblnd_connreq_done(conn, rc); + kiblnd_conn_decref(conn); + } - lnet_ni_decref(ni); - return 0; + lnet_ni_decref(ni); + return 0; failed: if (ni != NULL) { @@ -3355,22 +3354,20 @@ kiblnd_check_conns (int idx) LIST_HEAD(closes); LIST_HEAD(checksends); LIST_HEAD(timedout_txs); - struct list_head *peers = &kiblnd_data.kib_peers[idx]; - struct list_head *ptmp; + struct hlist_head *peers = &kiblnd_data.kib_peers[idx]; struct kib_peer_ni *peer_ni; - struct kib_conn *conn; + struct kib_conn *conn; struct kib_tx *tx, *tx_tmp; struct list_head *ctmp; - unsigned long flags; + unsigned long flags; /* NB. We expect to have a look at all the peers and not find any * RDMAs to time out, so we just use a shared lock while we - * take a look... */ + * take a look... + */ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - list_for_each(ptmp, peers) { - peer_ni = list_entry(ptmp, struct kib_peer_ni, ibp_list); - + hlist_for_each_entry(peer_ni, peers, ibp_list) { /* Check tx_deadline */ list_for_each_entry_safe(tx, tx_tmp, &peer_ni->ibp_tx_queue, tx_list) { if (ktime_compare(ktime_get(), tx->tx_deadline) >= 0) { @@ -3400,10 +3397,10 @@ kiblnd_check_conns (int idx) } if (timedout) { - CERROR("Timed out RDMA with %s (%lld): " - "c: %u, oc: %u, rc: %u\n", + CERROR("Timed out RDMA with %s (%lld): c: %u, oc: %u, rc: %u\n", libcfs_nid2str(peer_ni->ibp_nid), - ktime_get_seconds() - peer_ni->ibp_last_alive, + ktime_get_seconds() + - peer_ni->ibp_last_alive, conn->ibc_credits, conn->ibc_outstanding_credits, conn->ibc_reserved_credits); @@ -3426,7 +3423,8 @@ kiblnd_check_conns (int idx) /* Handle timeout by closing the whole * connection. We can only be sure RDMA activity - * has ceased once the QP has been modified. */ + * has ceased once the QP has been modified. + */ while (!list_empty(&closes)) { conn = list_entry(closes.next, struct kib_conn, ibc_connd_list); @@ -3437,7 +3435,8 @@ kiblnd_check_conns (int idx) /* In case we have enough credits to return via a * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ + * free to do it last time... + */ while (!list_empty(&checksends)) { conn = list_entry(checksends.next, struct kib_conn, ibc_connd_list); @@ -3478,15 +3477,15 @@ kiblnd_disconnect_conn(struct kib_conn *conn) int kiblnd_connd (void *arg) { - spinlock_t *lock= &kiblnd_data.kib_connd_lock; + spinlock_t *lock = &kiblnd_data.kib_connd_lock; wait_queue_entry_t wait; - unsigned long flags; + unsigned long flags; struct kib_conn *conn; - int timeout; - int i; - int dropped_lock; - int peer_index = 0; - unsigned long deadline = jiffies; + int timeout; + int i; + int dropped_lock; + int peer_index = 0; + unsigned long deadline = jiffies; init_wait(&wait); kiblnd_data.kib_connd = current; @@ -3496,7 +3495,7 @@ kiblnd_connd (void *arg) while (!kiblnd_data.kib_shutdown) { int reconn = 0; - dropped_lock = 0; + dropped_lock = 0; if (!list_empty(&kiblnd_data.kib_connd_zombies)) { struct kib_peer_ni *peer_ni = NULL; @@ -3548,7 +3547,7 @@ kiblnd_connd (void *arg) if (wait) list_add_tail(&conn->ibc_list, &kiblnd_data.kib_connd_waits); - } + } while (reconn < KIB_RECONN_BREAK) { if (kiblnd_data.kib_reconn_sec != @@ -3591,24 +3590,25 @@ kiblnd_connd (void *arg) &kiblnd_data.kib_connd_waits); } - /* careful with the jiffy wrap... */ - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kiblnd_data.kib_peer_hash_size; + /* careful with the jiffy wrap... */ + timeout = (int)(deadline - jiffies); + if (timeout <= 0) { + const int n = 4; + const int p = 1; + int chunk = HASH_SIZE(kiblnd_data.kib_peers); unsigned int lnd_timeout; spin_unlock_irqrestore(lock, flags); - dropped_lock = 1; + dropped_lock = 1; - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer_ni table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ + /* Time to check for RDMA timeouts on a few more + * peers: I do checks every 'p' seconds on a + * proportion of the peer_ni table and I need to check + * every connection 'n' times within a timeout + * interval, to ensure I detect a timeout on any + * connection within (n+1)/n times the timeout + * interval. + */ lnd_timeout = kiblnd_timeout(); if (lnd_timeout > n * p) @@ -3619,7 +3619,7 @@ kiblnd_connd (void *arg) for (i = 0; i < chunk; i++) { kiblnd_check_conns(peer_index); peer_index = (peer_index + 1) % - kiblnd_data.kib_peer_hash_size; + HASH_SIZE(kiblnd_data.kib_peers); } deadline += cfs_time_seconds(p); -- 1.8.3.1