X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fo2iblnd%2Fo2iblnd.c;h=2da50e21b48423c291a0aad7bc587cc7ab313837;hb=59071a8334bbc1a3a6d31565b7474063438d1f43;hp=35a613298524579178de4d725474094483e09ec8;hpb=36054d9942fcf7d980398d2062b5214c4417ac3c;p=fs%2Flustre-release.git diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 35a6132..2da50e2 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, Whamcloud, Inc. + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -352,7 +352,7 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) CFS_INIT_LIST_HEAD(&peer->ibp_conns); CFS_INIT_LIST_HEAD(&peer->ibp_tx_queue); - cfs_write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); /* always called with a ref on ni, which prevents ni being shutdown */ LASSERT (net->ibn_shutdown == 0); @@ -360,7 +360,7 @@ kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) /* npeers only grows with the global lock held */ cfs_atomic_inc(&net->ibn_npeers); - cfs_write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); *peerp = peer; return 0; @@ -437,7 +437,7 @@ kiblnd_get_peer_info (lnet_ni_t *ni, int index, int i; unsigned long flags; - cfs_read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { @@ -457,13 +457,13 @@ kiblnd_get_peer_info (lnet_ni_t *ni, int index, *nidp = peer->ibp_nid; *count = cfs_atomic_read(&peer->ibp_refcount); - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return 0; - } - } + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, + flags); + return 0; + } + } - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); return -ENOENT; } @@ -501,7 +501,7 @@ kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid) unsigned long flags; int rc = -ENOENT; - cfs_write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); if (nid != LNET_NID_ANY) { lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; @@ -535,7 +535,7 @@ kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid) } } - cfs_write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); kiblnd_txlist_done(ni, &zombies, -EIO); @@ -552,7 +552,7 @@ kiblnd_get_conn_by_idx (lnet_ni_t *ni, int index) int i; unsigned long flags; - cfs_read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { cfs_list_for_each (ptmp, &kiblnd_data.kib_peers[i]) { @@ -572,14 +572,14 @@ kiblnd_get_conn_by_idx (lnet_ni_t *ni, int index) conn = cfs_list_entry(ctmp, kib_conn_t, ibc_list); kiblnd_conn_addref(conn); - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return conn; - } - } - } + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, + flags); + return conn; + } + } + } - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); return NULL; } @@ -606,10 +606,10 @@ kiblnd_debug_tx (kib_tx_t *tx) void kiblnd_debug_conn (kib_conn_t *conn) { - cfs_list_t *tmp; - int i; + cfs_list_t *tmp; + int i; - cfs_spin_lock(&conn->ibc_lock); + spin_lock(&conn->ibc_lock); CDEBUG(D_CONSOLE, "conn[%d] %p [version %x] -> %s: \n", cfs_atomic_read(&conn->ibc_refcount), conn, @@ -648,7 +648,7 @@ kiblnd_debug_conn (kib_conn_t *conn) for (i = 0; i < IBLND_RX_MSGS(conn->ibc_version); i++) kiblnd_debug_rx(&conn->ibc_rxs[i]); - cfs_spin_unlock(&conn->ibc_lock); + spin_unlock(&conn->ibc_lock); } int @@ -725,9 +725,9 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, * she must dispose of 'cmid'. (Actually I'd block forever if I tried * to destroy 'cmid' here since I'm called from the CM which still has * its ref on 'cmid'). */ - cfs_rwlock_t *glock = &kiblnd_data.kib_global_lock; + rwlock_t *glock = &kiblnd_data.kib_global_lock; kib_net_t *net = peer->ibp_ni->ni_data; - kib_dev_t *dev = net->ibn_dev; + kib_dev_t *dev; struct ib_qp_init_attr *init_qp_attr; struct kib_sched_info *sched; kib_conn_t *conn; @@ -740,6 +740,8 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, LASSERT(net != NULL); LASSERT(!cfs_in_interrupt()); + dev = net->ibn_dev; + cpt = lnet_cpt_of_nid(peer->ibp_nid); sched = kiblnd_data.kib_scheds[cpt]; @@ -772,42 +774,42 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, CFS_INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd); CFS_INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred); CFS_INIT_LIST_HEAD(&conn->ibc_active_txs); - cfs_spin_lock_init(&conn->ibc_lock); + spin_lock_init(&conn->ibc_lock); LIBCFS_CPT_ALLOC(conn->ibc_connvars, lnet_cpt_table(), cpt, sizeof(*conn->ibc_connvars)); - if (conn->ibc_connvars == NULL) { - CERROR("Can't allocate in-progress connection state\n"); - goto failed_2; - } + if (conn->ibc_connvars == NULL) { + CERROR("Can't allocate in-progress connection state\n"); + goto failed_2; + } - cfs_write_lock_irqsave(glock, flags); - if (dev->ibd_failover) { - cfs_write_unlock_irqrestore(glock, flags); - CERROR("%s: failover in progress\n", dev->ibd_ifname); - goto failed_2; - } + write_lock_irqsave(glock, flags); + if (dev->ibd_failover) { + write_unlock_irqrestore(glock, flags); + CERROR("%s: failover in progress\n", dev->ibd_ifname); + goto failed_2; + } - if (dev->ibd_hdev->ibh_ibdev != cmid->device) { - /* wakeup failover thread and teardown connection */ - if (kiblnd_dev_can_failover(dev)) { - cfs_list_add_tail(&dev->ibd_fail_list, - &kiblnd_data.kib_failed_devs); - cfs_waitq_signal(&kiblnd_data.kib_failover_waitq); - } + if (dev->ibd_hdev->ibh_ibdev != cmid->device) { + /* wakeup failover thread and teardown connection */ + if (kiblnd_dev_can_failover(dev)) { + cfs_list_add_tail(&dev->ibd_fail_list, + &kiblnd_data.kib_failed_devs); + wake_up(&kiblnd_data.kib_failover_waitq); + } - cfs_write_unlock_irqrestore(glock, flags); - CERROR("cmid HCA(%s), kib_dev(%s) need failover\n", - cmid->device->name, dev->ibd_ifname); - goto failed_2; - } + write_unlock_irqrestore(glock, flags); + CERROR("cmid HCA(%s), kib_dev(%s) need failover\n", + cmid->device->name, dev->ibd_ifname); + goto failed_2; + } kiblnd_hdev_addref_locked(dev->ibd_hdev); conn->ibc_hdev = dev->ibd_hdev; kiblnd_setup_mtu_locked(cmid); - cfs_write_unlock_irqrestore(glock, flags); + write_unlock_irqrestore(glock, flags); LIBCFS_CPT_ALLOC(conn->ibc_rxs, lnet_cpt_table(), cpt, IBLND_RX_MSGS(version) * sizeof(kib_rx_t)); @@ -818,10 +820,10 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, cpt, IBLND_RX_MSG_PAGES(version)); - if (rc != 0) - goto failed_2; + if (rc != 0) + goto failed_2; - kiblnd_map_rx_descs(conn); + kiblnd_map_rx_descs(conn); #ifdef HAVE_OFED_IB_COMP_VECTOR cq = ib_create_cq(cmid->device, @@ -886,9 +888,9 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid, /* correct # of posted buffers * NB locking needed now I'm racing with completion */ - cfs_spin_lock_irqsave(&sched->ibs_lock, flags); + spin_lock_irqsave(&sched->ibs_lock, flags); conn->ibc_nrx -= IBLND_RX_MSGS(version) - i; - cfs_spin_unlock_irqrestore(&sched->ibs_lock, flags); + spin_unlock_irqrestore(&sched->ibs_lock, flags); /* cmid will be destroyed by CM(ofed) after cm_callback * returned, so we can't refer it anymore @@ -1053,7 +1055,7 @@ kiblnd_close_matching_conns (lnet_ni_t *ni, lnet_nid_t nid) unsigned long flags; int count = 0; - cfs_write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); if (nid != LNET_NID_ANY) lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; @@ -1080,7 +1082,7 @@ kiblnd_close_matching_conns (lnet_ni_t *ni, lnet_nid_t nid) } } - cfs_write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); /* wildcards always succeed */ if (nid == LNET_NID_ANY) @@ -1146,13 +1148,13 @@ kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) void kiblnd_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when) { - cfs_time_t last_alive = 0; - cfs_time_t now = cfs_time_current(); - cfs_rwlock_t *glock = &kiblnd_data.kib_global_lock; - kib_peer_t *peer; - unsigned long flags; + cfs_time_t last_alive = 0; + cfs_time_t now = cfs_time_current(); + rwlock_t *glock = &kiblnd_data.kib_global_lock; + kib_peer_t *peer; + unsigned long flags; - cfs_read_lock_irqsave(glock, flags); + read_lock_irqsave(glock, flags); peer = kiblnd_find_peer_locked(nid); if (peer != NULL) { @@ -1162,7 +1164,7 @@ kiblnd_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when) last_alive = peer->ibp_last_alive; } - cfs_read_unlock_irqrestore(glock, flags); + read_unlock_irqrestore(glock, flags); if (last_alive != 0) *when = last_alive; @@ -1186,7 +1188,7 @@ kiblnd_free_pages(kib_pages_t *p) for (i = 0; i < npages; i++) { if (p->ibp_pages[i] != NULL) - cfs_free_page(p->ibp_pages[i]); + __free_page(p->ibp_pages[i]); } LIBCFS_FREE(p, offsetof(kib_pages_t, ibp_pages[npages])); @@ -1210,7 +1212,7 @@ kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages) for (i = 0; i < npages; i++) { p->ibp_pages[i] = cfs_page_cpt_alloc(lnet_cpt_table(), cpt, - CFS_ALLOC_IO); + __GFP_IO); if (p->ibp_pages[i] == NULL) { CERROR("Can't allocate page %d of %d\n", i, npages); kiblnd_free_pages(p); @@ -1317,22 +1319,23 @@ kiblnd_current_hdev(kib_dev_t *dev) unsigned long flags; int i = 0; - cfs_read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - while (dev->ibd_failover) { - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - if (i++ % 50 == 0) - CDEBUG(D_NET, "Wait for dev(%s) failover\n", dev->ibd_ifname); - cfs_schedule_timeout(cfs_time_seconds(1) / 100); + read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + while (dev->ibd_failover) { + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + if (i++ % 50 == 0) + CDEBUG(D_NET, "%s: Wait for failover\n", + dev->ibd_ifname); + schedule_timeout(cfs_time_seconds(1) / 100); - cfs_read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - } + read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + } - kiblnd_hdev_addref_locked(dev->ibd_hdev); - hdev = dev->ibd_hdev; + kiblnd_hdev_addref_locked(dev->ibd_hdev); + hdev = dev->ibd_hdev; - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return hdev; + return hdev; } static void @@ -1341,7 +1344,7 @@ kiblnd_map_tx_pool(kib_tx_pool_t *tpo) kib_pages_t *txpgs = tpo->tpo_tx_pages; kib_pool_t *pool = &tpo->tpo_pool; kib_net_t *net = pool->po_owner->ps_net; - kib_dev_t *dev = net->ibn_dev; + kib_dev_t *dev; struct page *page; kib_tx_t *tx; int page_offset; @@ -1350,6 +1353,8 @@ kiblnd_map_tx_pool(kib_tx_pool_t *tpo) LASSERT (net != NULL); + dev = net->ibn_dev; + /* pre-mapped messages are not bigger than 1 page */ CLASSERT (IBLND_MSG_SIZE <= PAGE_SIZE); @@ -1525,7 +1530,7 @@ kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, cfs_list_t *zombies) if (fps->fps_net == NULL) /* intialized? */ return; - cfs_spin_lock(&fps->fps_lock); + spin_lock(&fps->fps_lock); while (!cfs_list_empty(&fps->fps_pool_list)) { kib_fmr_pool_t *fpo = cfs_list_entry(fps->fps_pool_list.next, @@ -1538,7 +1543,7 @@ kiblnd_fail_fmr_poolset(kib_fmr_poolset_t *fps, cfs_list_t *zombies) cfs_list_add(&fpo->fpo_list, &fps->fps_failed_pool_list); } - cfs_spin_unlock(&fps->fps_lock); + spin_unlock(&fps->fps_lock); } static void @@ -1563,7 +1568,7 @@ kiblnd_init_fmr_poolset(kib_fmr_poolset_t *fps, int cpt, kib_net_t *net, fps->fps_cpt = cpt; fps->fps_pool_size = pool_size; fps->fps_flush_trigger = flush_trigger; - cfs_spin_lock_init(&fps->fps_lock); + spin_lock_init(&fps->fps_lock); CFS_INIT_LIST_HEAD(&fps->fps_pool_list); CFS_INIT_LIST_HEAD(&fps->fps_failed_pool_list); @@ -1605,7 +1610,7 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) fmr->fmr_pool = NULL; fmr->fmr_pfmr = NULL; - cfs_spin_lock(&fps->fps_lock); + spin_lock(&fps->fps_lock); fpo->fpo_map_count --; /* decref the pool */ cfs_list_for_each_entry_safe(fpo, tmp, &fps->fps_pool_list, fpo_list) { @@ -1618,7 +1623,7 @@ kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status) fps->fps_version ++; } } - cfs_spin_unlock(&fps->fps_lock); + spin_unlock(&fps->fps_lock); if (!cfs_list_empty(&zombies)) kiblnd_destroy_fmr_pool_list(&zombies); @@ -1634,12 +1639,12 @@ kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, int rc; again: - cfs_spin_lock(&fps->fps_lock); - version = fps->fps_version; - cfs_list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) { - fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); - fpo->fpo_map_count ++; - cfs_spin_unlock(&fps->fps_lock); + spin_lock(&fps->fps_lock); + version = fps->fps_version; + cfs_list_for_each_entry(fpo, &fps->fps_pool_list, fpo_list) { + fpo->fpo_deadline = cfs_time_shift(IBLND_POOL_DEADLINE); + fpo->fpo_map_count++; + spin_unlock(&fps->fps_lock); pfmr = ib_fmr_pool_map_phys(fpo->fpo_fmr_pool, pages, npages, iov); @@ -1649,51 +1654,51 @@ kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages, return 0; } - cfs_spin_lock(&fps->fps_lock); - fpo->fpo_map_count --; - if (PTR_ERR(pfmr) != -EAGAIN) { - cfs_spin_unlock(&fps->fps_lock); - return PTR_ERR(pfmr); - } + spin_lock(&fps->fps_lock); + fpo->fpo_map_count--; + if (PTR_ERR(pfmr) != -EAGAIN) { + spin_unlock(&fps->fps_lock); + return PTR_ERR(pfmr); + } - /* EAGAIN and ... */ - if (version != fps->fps_version) { - cfs_spin_unlock(&fps->fps_lock); - goto again; - } - } + /* EAGAIN and ... */ + if (version != fps->fps_version) { + spin_unlock(&fps->fps_lock); + goto again; + } + } - if (fps->fps_increasing) { - cfs_spin_unlock(&fps->fps_lock); - CDEBUG(D_NET, "Another thread is allocating new " - "FMR pool, waiting for her to complete\n"); - cfs_schedule(); - goto again; + if (fps->fps_increasing) { + spin_unlock(&fps->fps_lock); + CDEBUG(D_NET, "Another thread is allocating new " + "FMR pool, waiting for her to complete\n"); + schedule(); + goto again; - } + } - if (cfs_time_before(cfs_time_current(), fps->fps_next_retry)) { - /* someone failed recently */ - cfs_spin_unlock(&fps->fps_lock); - return -EAGAIN; - } + if (cfs_time_before(cfs_time_current(), fps->fps_next_retry)) { + /* someone failed recently */ + spin_unlock(&fps->fps_lock); + return -EAGAIN; + } - fps->fps_increasing = 1; - cfs_spin_unlock(&fps->fps_lock); + fps->fps_increasing = 1; + spin_unlock(&fps->fps_lock); - CDEBUG(D_NET, "Allocate new FMR pool\n"); - rc = kiblnd_create_fmr_pool(fps, &fpo); - cfs_spin_lock(&fps->fps_lock); - fps->fps_increasing = 0; - if (rc == 0) { - fps->fps_version ++; - cfs_list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); - } else { - fps->fps_next_retry = cfs_time_shift(IBLND_POOL_RETRY); - } - cfs_spin_unlock(&fps->fps_lock); + CDEBUG(D_NET, "Allocate new FMR pool\n"); + rc = kiblnd_create_fmr_pool(fps, &fpo); + spin_lock(&fps->fps_lock); + fps->fps_increasing = 0; + if (rc == 0) { + fps->fps_version++; + cfs_list_add_tail(&fpo->fpo_list, &fps->fps_pool_list); + } else { + fps->fps_next_retry = cfs_time_shift(IBLND_POOL_RETRY); + } + spin_unlock(&fps->fps_lock); - goto again; + goto again; } static void @@ -1737,7 +1742,7 @@ kiblnd_fail_poolset(kib_poolset_t *ps, cfs_list_t *zombies) if (ps->ps_net == NULL) /* intialized? */ return; - cfs_spin_lock(&ps->ps_lock); + spin_lock(&ps->ps_lock); while (!cfs_list_empty(&ps->ps_pool_list)) { kib_pool_t *po = cfs_list_entry(ps->ps_pool_list.next, kib_pool_t, po_list); @@ -1748,7 +1753,7 @@ kiblnd_fail_poolset(kib_poolset_t *ps, cfs_list_t *zombies) else cfs_list_add(&po->po_list, &ps->ps_failed_pool_list); } - cfs_spin_unlock(&ps->ps_lock); + spin_unlock(&ps->ps_lock); } static void @@ -1780,8 +1785,10 @@ kiblnd_init_poolset(kib_poolset_t *ps, int cpt, ps->ps_node_init = nd_init; ps->ps_node_fini = nd_fini; ps->ps_pool_size = size; - strncpy(ps->ps_name, name, IBLND_POOL_NAME_LEN); - cfs_spin_lock_init(&ps->ps_lock); + if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name)) + >= sizeof(ps->ps_name)) + return -E2BIG; + spin_lock_init(&ps->ps_lock); CFS_INIT_LIST_HEAD(&ps->ps_pool_list); CFS_INIT_LIST_HEAD(&ps->ps_failed_pool_list); @@ -1812,7 +1819,7 @@ kiblnd_pool_free_node(kib_pool_t *pool, cfs_list_t *node) kib_pool_t *tmp; cfs_time_t now = cfs_time_current(); - cfs_spin_lock(&ps->ps_lock); + spin_lock(&ps->ps_lock); if (ps->ps_node_fini != NULL) ps->ps_node_fini(pool, node); @@ -1829,10 +1836,10 @@ kiblnd_pool_free_node(kib_pool_t *pool, cfs_list_t *node) if (kiblnd_pool_is_idle(pool, now)) cfs_list_move(&pool->po_list, &zombies); } - cfs_spin_unlock(&ps->ps_lock); + spin_unlock(&ps->ps_lock); - if (!cfs_list_empty(&zombies)) - kiblnd_destroy_pool_list(&zombies); + if (!cfs_list_empty(&zombies)) + kiblnd_destroy_pool_list(&zombies); } cfs_list_t * @@ -1843,7 +1850,7 @@ kiblnd_pool_alloc_node(kib_poolset_t *ps) int rc; again: - cfs_spin_lock(&ps->ps_lock); + spin_lock(&ps->ps_lock); cfs_list_for_each_entry(pool, &ps->ps_pool_list, po_list) { if (cfs_list_empty(&pool->po_free_list)) continue; @@ -1857,35 +1864,35 @@ kiblnd_pool_alloc_node(kib_poolset_t *ps) /* still hold the lock */ ps->ps_node_init(pool, node); } - cfs_spin_unlock(&ps->ps_lock); - return node; - } + spin_unlock(&ps->ps_lock); + return node; + } - /* no available tx pool and ... */ - if (ps->ps_increasing) { - /* another thread is allocating a new pool */ - cfs_spin_unlock(&ps->ps_lock); + /* no available tx pool and ... */ + if (ps->ps_increasing) { + /* another thread is allocating a new pool */ + spin_unlock(&ps->ps_lock); CDEBUG(D_NET, "Another thread is allocating new " "%s pool, waiting for her to complete\n", ps->ps_name); - cfs_schedule(); + schedule(); goto again; } - if (cfs_time_before(cfs_time_current(), ps->ps_next_retry)) { - /* someone failed recently */ - cfs_spin_unlock(&ps->ps_lock); - return NULL; - } + if (cfs_time_before(cfs_time_current(), ps->ps_next_retry)) { + /* someone failed recently */ + spin_unlock(&ps->ps_lock); + return NULL; + } - ps->ps_increasing = 1; - cfs_spin_unlock(&ps->ps_lock); + ps->ps_increasing = 1; + spin_unlock(&ps->ps_lock); - CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name); + CDEBUG(D_NET, "%s pool exhausted, allocate new pool\n", ps->ps_name); - rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool); + rc = ps->ps_pool_create(ps, ps->ps_pool_size, &pool); - cfs_spin_lock(&ps->ps_lock); + spin_lock(&ps->ps_lock); ps->ps_increasing = 0; if (rc == 0) { cfs_list_add_tail(&pool->po_list, &ps->ps_pool_list); @@ -1894,9 +1901,9 @@ kiblnd_pool_alloc_node(kib_poolset_t *ps) CERROR("Can't allocate new %s pool because out of memory\n", ps->ps_name); } - cfs_spin_unlock(&ps->ps_lock); + spin_unlock(&ps->ps_lock); - goto again; + goto again; } void @@ -2147,6 +2154,8 @@ kiblnd_create_tx_pool(kib_poolset_t *ps, int size, kib_pool_t **pp_po) if (tx->tx_frags == NULL) break; + sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS); + LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt, (1 + IBLND_MAX_RDMA_FRAGS) * sizeof(*tx->tx_wrq)); @@ -2236,45 +2245,29 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) int rc; int i; - net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(), - sizeof(kib_tx_poolset_t)); - if (net->ibn_tx_ps == NULL) { - CERROR("Failed to allocate tx pool array\n"); - return -ENOMEM; - } - - for (i = 0; i < ncpts; i++) { - cpt = (cpts == NULL) ? i : cpts[i]; - rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset, - cpt, net, "TX", - kiblnd_tx_pool_size(ncpts), - kiblnd_create_tx_pool, - kiblnd_destroy_tx_pool, - kiblnd_tx_init, NULL); - if (rc != 0) { - CERROR("Failed to initialize TX pool\n"); - goto failed; - } - } - - cfs_read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); if (*kiblnd_tunables.kib_map_on_demand == 0 && net->ibn_dev->ibd_hdev->ibh_nmrs == 1) { - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return 0; + goto create_tx_pool; } - cfs_read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); if (*kiblnd_tunables.kib_fmr_pool_size < *kiblnd_tunables.kib_ntx / 4) { CERROR("Can't set fmr pool size (%d) < ntx / 4(%d)\n", *kiblnd_tunables.kib_fmr_pool_size, *kiblnd_tunables.kib_ntx / 4); + rc = -EINVAL; goto failed; } + /* TX pool must be created later than FMR/PMR, see LU-2268 + * for details */ + LASSERT(net->ibn_tx_ps == NULL); + /* premapping can fail if ibd_nmr > 1, so we always create * FMR/PMR pool and map-on-demand if premapping failed */ @@ -2282,6 +2275,7 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) sizeof(kib_fmr_poolset_t)); if (net->ibn_fmr_ps == NULL) { CERROR("Failed to allocate FMR pool array\n"); + rc = -ENOMEM; goto failed; } @@ -2292,18 +2286,30 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) kiblnd_fmr_flush_trigger(ncpts)); if (rc == -ENOSYS && i == 0) /* no FMR */ break; /* create PMR pool */ - if (rc != 0) - goto failed; /* a real error */ + + if (rc != 0) { /* a real error */ + CERROR("Can't initialize FMR pool for CPT %d: %d\n", + cpt, rc); + goto failed; + } + } + + if (i > 0) { + LASSERT(i == ncpts); + goto create_tx_pool; } cfs_percpt_free(net->ibn_fmr_ps); net->ibn_fmr_ps = NULL; + CWARN("Device does not support FMR, failing back to PMR\n"); + if (*kiblnd_tunables.kib_pmr_pool_size < *kiblnd_tunables.kib_ntx / 4) { CERROR("Can't set pmr pool size (%d) < ntx / 4(%d)\n", *kiblnd_tunables.kib_pmr_pool_size, *kiblnd_tunables.kib_ntx / 4); + rc = -EINVAL; goto failed; } @@ -2311,6 +2317,7 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) sizeof(kib_pmr_poolset_t)); if (net->ibn_pmr_ps == NULL) { CERROR("Failed to allocate PMR pool array\n"); + rc = -ENOMEM; goto failed; } @@ -2321,14 +2328,41 @@ kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts) kiblnd_pmr_pool_size(ncpts), kiblnd_create_pmr_pool, kiblnd_destroy_pmr_pool, NULL, NULL); - if (rc != 0) + if (rc != 0) { + CERROR("Can't initialize PMR pool for CPT %d: %d\n", + cpt, rc); goto failed; + } } - return 0; + create_tx_pool: + net->ibn_tx_ps = cfs_percpt_alloc(lnet_cpt_table(), + sizeof(kib_tx_poolset_t)); + if (net->ibn_tx_ps == NULL) { + CERROR("Failed to allocate tx pool array\n"); + rc = -ENOMEM; + goto failed; + } + for (i = 0; i < ncpts; i++) { + cpt = (cpts == NULL) ? i : cpts[i]; + rc = kiblnd_init_poolset(&net->ibn_tx_ps[cpt]->tps_poolset, + cpt, net, "TX", + kiblnd_tx_pool_size(ncpts), + kiblnd_create_tx_pool, + kiblnd_destroy_tx_pool, + kiblnd_tx_init, NULL); + if (rc != 0) { + CERROR("Can't initialize TX pool for CPT %d: %d\n", + cpt, rc); + goto failed; + } + } + + return 0; failed: kiblnd_net_fini_pools(net); + LASSERT(rc != 0); return rc; } @@ -2591,13 +2625,13 @@ kiblnd_dev_failover(kib_dev_t *dev) * because we can fail to create new listener. * But we have to close it now, otherwise rdma_bind_addr * will return EADDRINUSE... How crap! */ - cfs_write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - cmid = dev->ibd_hdev->ibh_cmid; - /* make next schedule of kiblnd_dev_need_failover - * will return 1 for me */ - dev->ibd_hdev->ibh_cmid = NULL; - cfs_write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + cmid = dev->ibd_hdev->ibh_cmid; + /* make next schedule of kiblnd_dev_need_failover() + * return 1 for me */ + dev->ibd_hdev->ibh_cmid = NULL; + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); rdma_destroy_id(cmid); } @@ -2659,7 +2693,7 @@ kiblnd_dev_failover(kib_dev_t *dev) goto out; } - cfs_write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); old = dev->ibd_hdev; dev->ibd_hdev = hdev; /* take over the refcount */ @@ -2681,7 +2715,7 @@ kiblnd_dev_failover(kib_dev_t *dev) } } - cfs_write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); out: if (!cfs_list_empty(&zombie_tpo)) kiblnd_destroy_pool_list(&zombie_tpo); @@ -2797,20 +2831,20 @@ kiblnd_base_shutdown(void) LASSERT (cfs_list_empty(&kiblnd_data.kib_connd_zombies)); LASSERT (cfs_list_empty(&kiblnd_data.kib_connd_conns)); - /* flag threads to terminate; wake and wait for them to die */ - kiblnd_data.kib_shutdown = 1; + /* flag threads to terminate; wake and wait for them to die */ + kiblnd_data.kib_shutdown = 1; /* NB: we really want to stop scheduler threads net by net * instead of the whole module, this should be improved * with dynamic configuration LNet */ cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) - cfs_waitq_broadcast(&sched->ibs_waitq); + wake_up_all(&sched->ibs_waitq); - cfs_waitq_broadcast(&kiblnd_data.kib_connd_waitq); - cfs_waitq_broadcast(&kiblnd_data.kib_failover_waitq); + wake_up_all(&kiblnd_data.kib_connd_waitq); + wake_up_all(&kiblnd_data.kib_failover_waitq); - i = 2; - while (cfs_atomic_read(&kiblnd_data.kib_nthreads) != 0) { + i = 2; + while (cfs_atomic_read(&kiblnd_data.kib_nthreads) != 0) { i++; CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ "Waiting for %d threads to terminate\n", @@ -2836,15 +2870,15 @@ kiblnd_base_shutdown(void) CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n", cfs_atomic_read(&libcfs_kmemory)); - kiblnd_data.kib_init = IBLND_INIT_NOTHING; - PORTAL_MODULE_UNUSE; + kiblnd_data.kib_init = IBLND_INIT_NOTHING; + module_put(THIS_MODULE); } void kiblnd_shutdown (lnet_ni_t *ni) { kib_net_t *net = ni->ni_data; - cfs_rwlock_t *g_lock = &kiblnd_data.kib_global_lock; + rwlock_t *g_lock = &kiblnd_data.kib_global_lock; int i; unsigned long flags; @@ -2856,9 +2890,9 @@ kiblnd_shutdown (lnet_ni_t *ni) CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n", cfs_atomic_read(&libcfs_kmemory)); - cfs_write_lock_irqsave(g_lock, flags); - net->ibn_shutdown = 1; - cfs_write_unlock_irqrestore(g_lock, flags); + write_lock_irqsave(g_lock, flags); + net->ibn_shutdown = 1; + write_unlock_irqrestore(g_lock, flags); switch (net->ibn_init) { default: @@ -2881,11 +2915,11 @@ kiblnd_shutdown (lnet_ni_t *ni) kiblnd_net_fini_pools(net); - cfs_write_lock_irqsave(g_lock, flags); - LASSERT (net->ibn_dev->ibd_nnets > 0); - net->ibn_dev->ibd_nnets--; - cfs_list_del(&net->ibn_list); - cfs_write_unlock_irqrestore(g_lock, flags); + write_lock_irqsave(g_lock, flags); + LASSERT(net->ibn_dev->ibd_nnets > 0); + net->ibn_dev->ibd_nnets--; + cfs_list_del(&net->ibn_list); + write_unlock_irqrestore(g_lock, flags); /* fall through */ @@ -2922,10 +2956,10 @@ kiblnd_base_startup(void) LASSERT (kiblnd_data.kib_init == IBLND_INIT_NOTHING); - PORTAL_MODULE_USE; - memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */ + try_module_get(THIS_MODULE); + memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */ - cfs_rwlock_init(&kiblnd_data.kib_global_lock); + rwlock_init(&kiblnd_data.kib_global_lock); CFS_INIT_LIST_HEAD(&kiblnd_data.kib_devs); CFS_INIT_LIST_HEAD(&kiblnd_data.kib_failed_devs); @@ -2940,11 +2974,11 @@ kiblnd_base_startup(void) for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) CFS_INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); - cfs_spin_lock_init(&kiblnd_data.kib_connd_lock); - CFS_INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); - CFS_INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); - cfs_waitq_init(&kiblnd_data.kib_connd_waitq); - cfs_waitq_init(&kiblnd_data.kib_failover_waitq); + spin_lock_init(&kiblnd_data.kib_connd_lock); + CFS_INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); + CFS_INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); + init_waitqueue_head(&kiblnd_data.kib_connd_waitq); + init_waitqueue_head(&kiblnd_data.kib_failover_waitq); kiblnd_data.kib_scheds = cfs_percpt_alloc(lnet_cpt_table(), sizeof(*sched)); @@ -2954,9 +2988,9 @@ kiblnd_base_startup(void) cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) { int nthrs; - cfs_spin_lock_init(&sched->ibs_lock); + spin_lock_init(&sched->ibs_lock); CFS_INIT_LIST_HEAD(&sched->ibs_conns); - cfs_waitq_init(&sched->ibs_waitq); + init_waitqueue_head(&sched->ibs_waitq); nthrs = cfs_cpt_weight(lnet_cpt_table(), i); if (*kiblnd_tunables.kib_nscheds > 0) { @@ -2977,14 +3011,15 @@ kiblnd_base_startup(void) kiblnd_data.kib_init = IBLND_INIT_DATA; /*****************************************************/ - rc = kiblnd_thread_start(kiblnd_connd, NULL); + rc = kiblnd_thread_start(kiblnd_connd, NULL, "kiblnd_connd"); if (rc != 0) { CERROR("Can't spawn o2iblnd connd: %d\n", rc); goto failed; } - if (*kiblnd_tunables.kib_dev_failover != 0) - rc = kiblnd_thread_start(kiblnd_failover_thread, NULL); + if (*kiblnd_tunables.kib_dev_failover != 0) + rc = kiblnd_thread_start(kiblnd_failover_thread, NULL, + "kiblnd_failover"); if (rc != 0) { CERROR("Can't spawn o2iblnd failover thread: %d\n", rc); @@ -3026,9 +3061,11 @@ kiblnd_start_schedulers(struct kib_sched_info *sched) for (i = 0; i < nthrs; i++) { long id; - + char name[20]; id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i); - rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id); + snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld", + KIB_THREAD_CPT(id), KIB_THREAD_TID(id)); + rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name); if (rc == 0) continue; @@ -3124,10 +3161,10 @@ kiblnd_startup (lnet_ni_t *ni) if (net == NULL) goto failed; - memset(net, 0, sizeof(*net)); + memset(net, 0, sizeof(*net)); - cfs_gettimeofday(&tv); - net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; + do_gettimeofday(&tv); + net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; ni->ni_peertimeout = *kiblnd_tunables.kib_peertimeout; ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; @@ -3177,17 +3214,17 @@ kiblnd_startup (lnet_ni_t *ni) goto failed; } - cfs_write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - ibdev->ibd_nnets++; - cfs_list_add_tail(&net->ibn_list, &ibdev->ibd_nets); - cfs_write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); + write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); + ibdev->ibd_nnets++; + cfs_list_add_tail(&net->ibn_list, &ibdev->ibd_nets); + write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); net->ibn_init = IBLND_INIT_ALL; return 0; failed: - if (net->ibn_dev == NULL && ibdev != NULL) + if (net != NULL && net->ibn_dev == NULL && ibdev != NULL) kiblnd_destroy_dev(ibdev); kiblnd_shutdown(ni);