X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fo2iblnd%2Fo2iblnd.c;h=17f0596fd5f6913746e068736c65e62ed98b9968;hb=d8792a7dab933def57f6069296234ad48ea0da09;hp=e919008d442d0f2d7c0f57f87915ce0d5c812478;hpb=d10200a80770f0029d1d665af954187b9ad883df;p=fs%2Flustre-release.git diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index e919008..17f0596 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -255,7 +255,7 @@ kiblnd_unpack_msg(kib_msg_t *msg, int nob) msg->ibm_cksum = msg_cksum; if (flip) { - /* leave magic unflipped as a clue to peer endianness */ + /* leave magic unflipped as a clue to peer_ni endianness */ msg->ibm_version = version; CLASSERT (sizeof(msg->ibm_type) == 1); CLASSERT (sizeof(msg->ibm_credits) == 1); @@ -313,33 +313,33 @@ kiblnd_unpack_msg(kib_msg_t *msg, int nob) } int -kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) +kiblnd_create_peer(lnet_ni_t *ni, kib_peer_ni_t **peerp, lnet_nid_t nid) { - kib_peer_t *peer; + kib_peer_ni_t *peer_ni; kib_net_t *net = ni->ni_data; - int cpt = lnet_cpt_of_nid(nid); + int cpt = lnet_cpt_of_nid(nid, ni); unsigned long flags; LASSERT(net != NULL); LASSERT(nid != LNET_NID_ANY); - LIBCFS_CPT_ALLOC(peer, lnet_cpt_table(), cpt, sizeof(*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); + LIBCFS_CPT_ALLOC(peer_ni, lnet_cpt_table(), cpt, sizeof(*peer_ni)); + if (peer_ni == NULL) { + CERROR("Cannot allocate peer_ni\n"); return -ENOMEM; } - peer->ibp_ni = ni; - peer->ibp_nid = nid; - peer->ibp_error = 0; - peer->ibp_last_alive = 0; - peer->ibp_max_frags = kiblnd_cfg_rdma_frags(peer->ibp_ni); - peer->ibp_queue_depth = ni->ni_peertxcredits; - atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ + peer_ni->ibp_ni = ni; + peer_ni->ibp_nid = nid; + peer_ni->ibp_error = 0; + peer_ni->ibp_last_alive = 0; + peer_ni->ibp_max_frags = kiblnd_cfg_rdma_frags(peer_ni->ibp_ni); + peer_ni->ibp_queue_depth = ni->ni_net->net_tunables.lct_peer_tx_credits; + atomic_set(&peer_ni->ibp_refcount, 1); /* 1 ref for caller */ - INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD(&peer->ibp_conns); - INIT_LIST_HEAD(&peer->ibp_tx_queue); + INIT_LIST_HEAD(&peer_ni->ibp_list); /* not in the peer_ni table yet */ + INIT_LIST_HEAD(&peer_ni->ibp_conns); + INIT_LIST_HEAD(&peer_ni->ibp_tx_queue); write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); @@ -351,72 +351,79 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - *peerp = peer; + *peerp = peer_ni; return 0; } void -kiblnd_destroy_peer (kib_peer_t *peer) +kiblnd_destroy_peer (kib_peer_ni_t *peer_ni) { - kib_net_t *net = peer->ibp_ni->ni_data; + kib_net_t *net = peer_ni->ibp_ni->ni_data; LASSERT(net != NULL); - LASSERT (atomic_read(&peer->ibp_refcount) == 0); - LASSERT(!kiblnd_peer_active(peer)); - LASSERT(kiblnd_peer_idle(peer)); - LASSERT(list_empty(&peer->ibp_tx_queue)); + LASSERT (atomic_read(&peer_ni->ibp_refcount) == 0); + LASSERT(!kiblnd_peer_active(peer_ni)); + LASSERT(kiblnd_peer_idle(peer_ni)); + LASSERT(list_empty(&peer_ni->ibp_tx_queue)); - LIBCFS_FREE(peer, sizeof(*peer)); + LIBCFS_FREE(peer_ni, sizeof(*peer_ni)); - /* NB a peer's connections keep a reference on their peer until + /* NB a peer_ni's connections keep a reference on their peer_ni until * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to + * with this peer_ni has been cleaned up when its refcount drops to * zero. */ atomic_dec(&net->ibn_npeers); } -kib_peer_t * -kiblnd_find_peer_locked (lnet_nid_t nid) +kib_peer_ni_t * +kiblnd_find_peer_locked(struct lnet_ni *ni, lnet_nid_t nid) { /* the caller is responsible for accounting the additional reference * that this creates */ struct list_head *peer_list = kiblnd_nid2peerlist(nid); struct list_head *tmp; - kib_peer_t *peer; + kib_peer_ni_t *peer_ni; list_for_each(tmp, peer_list) { - peer = list_entry(tmp, kib_peer_t, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); - - if (peer->ibp_nid != nid) + peer_ni = list_entry(tmp, kib_peer_ni_t, ibp_list); + LASSERT(!kiblnd_peer_idle(peer_ni)); + + /* + * Match a peer if its NID and the NID of the local NI it + * communicates over are the same. Otherwise don't match + * the peer, which will result in a new lnd peer being + * created. + */ + if (peer_ni->ibp_nid != nid || + peer_ni->ibp_ni->ni_nid != ni->ni_nid) continue; - CDEBUG(D_NET, "got peer [%p] -> %s (%d) version: %x\n", - peer, libcfs_nid2str(nid), - atomic_read(&peer->ibp_refcount), - peer->ibp_version); - return peer; + CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d) version: %x\n", + peer_ni, libcfs_nid2str(nid), + atomic_read(&peer_ni->ibp_refcount), + peer_ni->ibp_version); + return peer_ni; } return NULL; } void -kiblnd_unlink_peer_locked (kib_peer_t *peer) +kiblnd_unlink_peer_locked (kib_peer_ni_t *peer_ni) { - LASSERT(list_empty(&peer->ibp_conns)); + LASSERT(list_empty(&peer_ni->ibp_conns)); - LASSERT (kiblnd_peer_active(peer)); - list_del_init(&peer->ibp_list); + LASSERT (kiblnd_peer_active(peer_ni)); + list_del_init(&peer_ni->ibp_list); /* lose peerlist's ref */ - kiblnd_peer_decref(peer); + kiblnd_peer_decref(peer_ni); } static int kiblnd_get_peer_info(lnet_ni_t *ni, int index, lnet_nid_t *nidp, int *count) { - kib_peer_t *peer; + kib_peer_ni_t *peer_ni; struct list_head *ptmp; int i; unsigned long flags; @@ -427,17 +434,17 @@ kiblnd_get_peer_info(lnet_ni_t *ni, int index, list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); + peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list); + LASSERT(!kiblnd_peer_idle(peer_ni)); - if (peer->ibp_ni != ni) + if (peer_ni->ibp_ni != ni) continue; if (index-- > 0) continue; - *nidp = peer->ibp_nid; - *count = atomic_read(&peer->ibp_refcount); + *nidp = peer_ni->ibp_nid; + *count = atomic_read(&peer_ni->ibp_refcount); read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); @@ -450,23 +457,23 @@ kiblnd_get_peer_info(lnet_ni_t *ni, int index, } static void -kiblnd_del_peer_locked (kib_peer_t *peer) +kiblnd_del_peer_locked (kib_peer_ni_t *peer_ni) { struct list_head *ctmp; struct list_head *cnxt; kib_conn_t *conn; - if (list_empty(&peer->ibp_conns)) { - kiblnd_unlink_peer_locked(peer); + if (list_empty(&peer_ni->ibp_conns)) { + kiblnd_unlink_peer_locked(peer_ni); } else { - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { + list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) { conn = list_entry(ctmp, kib_conn_t, ibc_list); kiblnd_close_conn_locked(conn, 0); } - /* NB closing peer's last conn unlinked it. */ + /* NB closing peer_ni's last conn unlinked it. */ } - /* NB peer now unlinked; might even be freed if the peer table had the + /* NB peer_ni now unlinked; might even be freed if the peer_ni table had the * last ref on it. */ } @@ -476,7 +483,7 @@ kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid) struct list_head zombies = LIST_HEAD_INIT(zombies); struct list_head *ptmp; struct list_head *pnxt; - kib_peer_t *peer; + kib_peer_ni_t *peer_ni; int lo; int hi; int i; @@ -494,23 +501,23 @@ kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid) for (i = lo; i <= hi; i++) { list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); + peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list); + LASSERT(!kiblnd_peer_idle(peer_ni)); - if (peer->ibp_ni != ni) + if (peer_ni->ibp_ni != ni) continue; - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) + if (!(nid == LNET_NID_ANY || peer_ni->ibp_nid == nid)) continue; - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT(list_empty(&peer->ibp_conns)); + if (!list_empty(&peer_ni->ibp_tx_queue)) { + LASSERT(list_empty(&peer_ni->ibp_conns)); - list_splice_init(&peer->ibp_tx_queue, + list_splice_init(&peer_ni->ibp_tx_queue, &zombies); } - kiblnd_del_peer_locked(peer); + kiblnd_del_peer_locked(peer_ni); rc = 0; /* matched something */ } } @@ -525,7 +532,7 @@ kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid) static kib_conn_t * kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index) { - kib_peer_t *peer; + kib_peer_ni_t *peer_ni; struct list_head *ptmp; kib_conn_t *conn; struct list_head *ctmp; @@ -537,13 +544,13 @@ kiblnd_get_conn_by_idx(lnet_ni_t *ni, int index) for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { list_for_each(ptmp, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); + peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list); + LASSERT(!kiblnd_peer_idle(peer_ni)); - if (peer->ibp_ni != ni) + if (peer_ni->ibp_ni != ni) continue; - list_for_each(ctmp, &peer->ibp_conns) { + list_for_each(ctmp, &peer_ni->ibp_conns) { if (index-- > 0) continue; @@ -692,18 +699,18 @@ kiblnd_get_completion_vector(kib_conn_t *conn, int cpt) } kib_conn_t * -kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, +kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid, int state, int version) { /* CAVEAT EMPTOR: * If the new conn is created successfully it takes over the caller's - * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself - * is destroyed. On failure, the caller's ref on 'peer' remains and + * ref on 'peer_ni'. It also "owns" 'cmid' and destroys it when it itself + * is destroyed. On failure, the caller's ref on 'peer_ni' remains and * she must dispose of 'cmid'. (Actually I'd block forever if I tried * to destroy 'cmid' here since I'm called from the CM which still has * its ref on 'cmid'). */ rwlock_t *glock = &kiblnd_data.kib_global_lock; - kib_net_t *net = peer->ibp_ni->ni_data; + kib_net_t *net = peer_ni->ibp_ni->ni_data; kib_dev_t *dev; struct ib_qp_init_attr *init_qp_attr; struct kib_sched_info *sched; @@ -722,7 +729,7 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, dev = net->ibn_dev; - cpt = lnet_cpt_of_nid(peer->ibp_nid); + cpt = lnet_cpt_of_nid(peer_ni->ibp_nid, peer_ni->ibp_ni); sched = kiblnd_data.kib_scheds[cpt]; LASSERT(sched->ibs_nthreads > 0); @@ -731,24 +738,24 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, sizeof(*init_qp_attr)); if (init_qp_attr == NULL) { CERROR("Can't allocate qp_attr for %s\n", - libcfs_nid2str(peer->ibp_nid)); + libcfs_nid2str(peer_ni->ibp_nid)); goto failed_0; } LIBCFS_CPT_ALLOC(conn, lnet_cpt_table(), cpt, sizeof(*conn)); if (conn == NULL) { CERROR("Can't allocate connection for %s\n", - libcfs_nid2str(peer->ibp_nid)); + libcfs_nid2str(peer_ni->ibp_nid)); goto failed_1; } conn->ibc_state = IBLND_CONN_INIT; conn->ibc_version = version; - conn->ibc_peer = peer; /* I take the caller's ref */ + conn->ibc_peer = peer_ni; /* I take the caller's ref */ cmid->context = conn; /* for future CM callbacks */ conn->ibc_cmid = cmid; - conn->ibc_max_frags = peer->ibp_max_frags; - conn->ibc_queue_depth = peer->ibp_queue_depth; + conn->ibc_max_frags = peer_ni->ibp_max_frags; + conn->ibc_queue_depth = peer_ni->ibp_queue_depth; INIT_LIST_HEAD(&conn->ibc_early_rxs); INIT_LIST_HEAD(&conn->ibc_tx_noops); @@ -921,7 +928,7 @@ void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn) { struct rdma_cm_id *cmid = conn->ibc_cmid; - kib_peer_t *peer = conn->ibc_peer; + kib_peer_ni_t *peer_ni = conn->ibc_peer; int rc; LASSERT (!in_interrupt()); @@ -975,9 +982,9 @@ kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn) /* See CAVEAT EMPTOR above in kiblnd_create_conn */ if (conn->ibc_state != IBLND_CONN_INIT) { - kib_net_t *net = peer->ibp_ni->ni_data; + kib_net_t *net = peer_ni->ibp_ni->ni_data; - kiblnd_peer_decref(peer); + kiblnd_peer_decref(peer_ni); rdma_destroy_id(cmid); atomic_dec(&net->ibn_nconns); } @@ -987,19 +994,19 @@ kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn) } int -kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why) +kiblnd_close_peer_conns_locked(kib_peer_ni_t *peer_ni, int why) { kib_conn_t *conn; struct list_head *ctmp; struct list_head *cnxt; int count = 0; - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { + list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) { conn = list_entry(ctmp, kib_conn_t, ibc_list); CDEBUG(D_NET, "Closing conn -> %s, " "version: %x, reason: %d\n", - libcfs_nid2str(peer->ibp_nid), + libcfs_nid2str(peer_ni->ibp_nid), conn->ibc_version, why); kiblnd_close_conn_locked(conn, why); @@ -1010,7 +1017,7 @@ kiblnd_close_peer_conns_locked(kib_peer_t *peer, int why) } int -kiblnd_close_stale_conns_locked(kib_peer_t *peer, +kiblnd_close_stale_conns_locked(kib_peer_ni_t *peer_ni, int version, __u64 incarnation) { kib_conn_t *conn; @@ -1018,7 +1025,7 @@ kiblnd_close_stale_conns_locked(kib_peer_t *peer, struct list_head *cnxt; int count = 0; - list_for_each_safe(ctmp, cnxt, &peer->ibp_conns) { + list_for_each_safe(ctmp, cnxt, &peer_ni->ibp_conns) { conn = list_entry(ctmp, kib_conn_t, ibc_list); if (conn->ibc_version == version && @@ -1027,7 +1034,7 @@ kiblnd_close_stale_conns_locked(kib_peer_t *peer, CDEBUG(D_NET, "Closing stale conn -> %s version: %x, " "incarnation:%#llx(%x, %#llx)\n", - libcfs_nid2str(peer->ibp_nid), + libcfs_nid2str(peer_ni->ibp_nid), conn->ibc_version, conn->ibc_incarnation, version, incarnation); @@ -1041,7 +1048,7 @@ kiblnd_close_stale_conns_locked(kib_peer_t *peer, static int kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid) { - kib_peer_t *peer; + kib_peer_ni_t *peer_ni; struct list_head *ptmp; struct list_head *pnxt; int lo; @@ -1062,16 +1069,16 @@ kiblnd_close_matching_conns(lnet_ni_t *ni, lnet_nid_t nid) for (i = lo; i <= hi; i++) { list_for_each_safe(ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT(!kiblnd_peer_idle(peer)); + peer_ni = list_entry(ptmp, kib_peer_ni_t, ibp_list); + LASSERT(!kiblnd_peer_idle(peer_ni)); - if (peer->ibp_ni != ni) + if (peer_ni->ibp_ni != ni) continue; - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) + if (!(nid == LNET_NID_ANY || nid == peer_ni->ibp_nid)) continue; - count += kiblnd_close_peer_conns_locked(peer, 0); + count += kiblnd_close_peer_conns_locked(peer_ni, 0); } } @@ -1144,27 +1151,27 @@ kiblnd_query(lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when) cfs_time_t last_alive = 0; cfs_time_t now = cfs_time_current(); rwlock_t *glock = &kiblnd_data.kib_global_lock; - kib_peer_t *peer; + kib_peer_ni_t *peer_ni; unsigned long flags; read_lock_irqsave(glock, flags); - peer = kiblnd_find_peer_locked(nid); - if (peer != NULL) - last_alive = peer->ibp_last_alive; + peer_ni = kiblnd_find_peer_locked(ni, nid); + if (peer_ni != NULL) + last_alive = peer_ni->ibp_last_alive; read_unlock_irqrestore(glock, flags); if (last_alive != 0) *when = last_alive; - /* peer is not persistent in hash, trigger peer creation + /* peer_ni is not persistent in hash, trigger peer_ni creation * and connection establishment with a NULL tx */ - if (peer == NULL) + if (peer_ni == NULL) kiblnd_launch_tx(ni, NULL, nid); - CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago\n", - libcfs_nid2str(nid), peer, + CDEBUG(D_NET, "peer_ni %s %p, alive %ld secs ago\n", + libcfs_nid2str(nid), peer_ni, last_alive ? cfs_duration_sec(now - last_alive) : -1); return; } @@ -1391,7 +1398,7 @@ kiblnd_find_rd_dma_mr(struct lnet_ni *ni, kib_rdma_desc_t *rd, int mod; __u16 nfrags; - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; mod = tunables->lnd_map_on_demand; nfrags = (negotiated_nfrags != -1) ? negotiated_nfrags : mod; @@ -1536,7 +1543,10 @@ static int kiblnd_alloc_freg_pool(kib_fmr_poolset_t *fps, kib_fmr_pool_t *fpo) goto out_middle; } - frd->frd_valid = true; + /* There appears to be a bug in MLX5 code where you must + * invalidate the rkey of a new FastReg pool before first + * using it. Thus, I am marking the FRD invalid here. */ + frd->frd_valid = false; list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); fpo->fast_reg.fpo_pool_size++; @@ -2392,7 +2402,7 @@ kiblnd_net_init_pools(kib_net_t *net, lnet_ni_t *ni, __u32 *cpts, int ncpts) int rc; int i; - tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); if (tunables->lnd_map_on_demand == 0) { @@ -2923,7 +2933,7 @@ kiblnd_shutdown (lnet_ni_t *ni) /* nuke all existing peers within this net */ kiblnd_del_peer(ni, LNET_NID_ANY); - /* Wait for all peer state to clean up */ + /* Wait for all peer_ni state to clean up */ i = 2; while (atomic_read(&net->ibn_npeers) != 0) { i++; @@ -3173,8 +3183,9 @@ kiblnd_startup (lnet_ni_t *ni) unsigned long flags; int rc; int newdev; + int node_id; - LASSERT (ni->ni_lnd == &the_o2iblnd); + LASSERT (ni->ni_net->net_lnd == &the_o2iblnd); if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { rc = kiblnd_base_startup(); @@ -3216,13 +3227,16 @@ kiblnd_startup (lnet_ni_t *ni) newdev = ibdev == NULL; /* hmm...create kib_dev even for alias */ if (ibdev == NULL || strcmp(&ibdev->ibd_ifname[0], ifname) != 0) - ibdev = kiblnd_create_dev(ifname); + ibdev = kiblnd_create_dev(ifname); - if (ibdev == NULL) - goto failed; + if (ibdev == NULL) + goto failed; + + node_id = dev_to_node(ibdev->ibd_hdev->ibh_ibdev->dma_device); + ni->dev_cpt = cfs_cpt_of_node(lnet_cpt_table(), node_id); - net->ibn_dev = ibdev; - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip); + net->ibn_dev = ibdev; + ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip); rc = kiblnd_dev_start_threads(ibdev, newdev, ni->ni_cpts, ni->ni_ncpts);