return 1;
}
+/*
+ * Get the scheduler bound to this CPT. If the scheduler has no
+ * threads, which means that the CPT has no CPUs, then grab the
+ * next scheduler that we can use.
+ *
+ * This case would be triggered if a NUMA node is configured with
+ * no associated CPUs.
+ */
+static struct kib_sched_info *
+kiblnd_get_scheduler(int cpt)
+{
+ struct kib_sched_info *sched;
+ int i;
+
+ sched = kiblnd_data.kib_scheds[cpt];
+
+ if (sched->ibs_nthreads > 0)
+ return sched;
+
+ cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) {
+ if (sched->ibs_nthreads > 0) {
+ CDEBUG(D_NET, "scheduler[%d] has no threads. selected scheduler[%d]\n",
+ cpt, sched->ibs_cpt);
+ return sched;
+ }
+ }
+
+ return NULL;
+}
+
kib_conn_t *
kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid,
int state, int version)
dev = net->ibn_dev;
cpt = lnet_cpt_of_nid(peer_ni->ibp_nid, peer_ni->ibp_ni);
- sched = kiblnd_data.kib_scheds[cpt];
+ sched = kiblnd_get_scheduler(cpt);
+
+ if (sched == NULL) {
+ CERROR("no schedulers available. node is unhealthy\n");
+ goto failed_0;
+ }
- LASSERT(sched->ibs_nthreads > 0);
+ /*
+ * The cpt might have changed if we ended up selecting a non cpt
+ * native scheduler. So use the scheduler's cpt instead.
+ */
+ cpt = sched->ibs_cpt;
LIBCFS_CPT_ALLOC(init_qp_attr, lnet_cpt_table(), cpt,
sizeof(*init_qp_attr));
goto failed_2;
}
- init_qp_attr->event_handler = kiblnd_qp_event;
- init_qp_attr->qp_context = conn;
+ init_qp_attr->event_handler = kiblnd_qp_event;
+ init_qp_attr->qp_context = conn;
init_qp_attr->cap.max_send_wr = IBLND_SEND_WRS(conn);
init_qp_attr->cap.max_recv_wr = IBLND_RECV_WRS(conn);
- init_qp_attr->cap.max_send_sge = 1;
- init_qp_attr->cap.max_recv_sge = 1;
- init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
- init_qp_attr->qp_type = IB_QPT_RC;
- init_qp_attr->send_cq = cq;
- init_qp_attr->recv_cq = cq;
+ init_qp_attr->cap.max_send_sge = *kiblnd_tunables.kib_wrq_sge;
+ init_qp_attr->cap.max_recv_sge = 1;
+ init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+ init_qp_attr->qp_type = IB_QPT_RC;
+ init_qp_attr->send_cq = cq;
+ init_qp_attr->recv_cq = cq;
conn->ibc_sched = sched;
} while (rc);
if (rc) {
- CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d\n",
- rc, init_qp_attr->cap.max_send_wr,
- init_qp_attr->cap.max_recv_wr);
+ CERROR("Can't create QP: %d, send_wr: %d, recv_wr: %d, "
+ "send_sge: %d, recv_sge: %d\n",
+ rc, init_qp_attr->cap.max_send_wr,
+ init_qp_attr->cap.max_recv_wr,
+ init_qp_attr->cap.max_send_sge,
+ init_qp_attr->cap.max_recv_sge);
goto failed_2;
}
break;
}
- LASSERT (conn->ibc_cmid != NULL);
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- if (conn->ibc_cmid->route.path_rec == NULL)
- data->ioc_u32[0] = 0; /* iWarp has no path MTU */
- else
- data->ioc_u32[0] =
- ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu);
- kiblnd_conn_decref(conn);
- break;
+ LASSERT(conn->ibc_cmid != NULL);
+ data->ioc_nid = conn->ibc_peer->ibp_nid;
+ if (conn->ibc_cmid->route.path_rec == NULL)
+ data->ioc_u32[0] = 0; /* iWarp has no path MTU */
+ else
+ data->ioc_u32[0] =
+ ib_mtu_enum_to_int(conn->ibc_cmid->route.path_rec->mtu);
+ kiblnd_conn_decref(conn);
+ break;
}
case IOC_LIBCFS_CLOSE_CONNECTION: {
rc = kiblnd_close_matching_conns(ni, data->ioc_nid);
return 0;
}
spin_unlock(&fps->fps_lock);
- rc = -EBUSY;
+ rc = -EAGAIN;
}
spin_lock(&fps->fps_lock);
goto out;
for (i = 0; i < pool->po_size; i++) {
- kib_tx_t *tx = &tpo->tpo_tx_descs[i];
+ kib_tx_t *tx = &tpo->tpo_tx_descs[i];
+ int wrq_sge = *kiblnd_tunables.kib_wrq_sge;
list_del(&tx->tx_list);
if (tx->tx_pages != NULL)
LIBCFS_FREE(tx->tx_wrq,
(1 + IBLND_MAX_RDMA_FRAGS) *
sizeof(*tx->tx_wrq));
- if (tx->tx_sge != NULL)
- LIBCFS_FREE(tx->tx_sge,
- (1 + IBLND_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_sge));
+ if (tx->tx_sge != NULL)
+ LIBCFS_FREE(tx->tx_sge,
+ (1 + IBLND_MAX_RDMA_FRAGS) * wrq_sge *
+ sizeof(*tx->tx_sge));
if (tx->tx_rd != NULL)
LIBCFS_FREE(tx->tx_rd,
offsetof(kib_rdma_desc_t,
memset(tpo->tpo_tx_descs, 0, size * sizeof(kib_tx_t));
for (i = 0; i < size; i++) {
- kib_tx_t *tx = &tpo->tpo_tx_descs[i];
+ kib_tx_t *tx = &tpo->tpo_tx_descs[i];
+ int wrq_sge = *kiblnd_tunables.kib_wrq_sge;
tx->tx_pool = tpo;
if (ps->ps_net->ibn_fmr_ps != NULL) {
break;
LIBCFS_CPT_ALLOC(tx->tx_sge, lnet_cpt_table(), ps->ps_cpt,
- (1 + IBLND_MAX_RDMA_FRAGS) *
+ (1 + IBLND_MAX_RDMA_FRAGS) * wrq_sge *
sizeof(*tx->tx_sge));
if (tx->tx_sge == NULL)
break;
hdev->ibh_cmid = cmid;
hdev->ibh_ibdev = cmid->device;
-#ifdef HAVE_IB_GET_DMA_MR
- pd = ib_alloc_pd(cmid->device);
-#else
+#ifdef HAVE_IB_ALLOC_PD_2ARGS
pd = ib_alloc_pd(cmid->device, 0);
+#else
+ pd = ib_alloc_pd(cmid->device);
#endif
if (IS_ERR(pd)) {
rc = PTR_ERR(pd);
kiblnd_tunables_setup(ni);
- if (ni->ni_interfaces[0] != NULL) {
- /* Use the IPoIB interface specified in 'networks=' */
+ if (ni->ni_interfaces[0] != NULL) {
+ /* Use the IPoIB interface specified in 'networks=' */
- CLASSERT (LNET_MAX_INTERFACES > 1);
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("Multiple interfaces not supported\n");
- goto failed;
- }
+ CLASSERT(LNET_NUM_INTERFACES > 1);
+ if (ni->ni_interfaces[1] != NULL) {
+ CERROR("Multiple interfaces not supported\n");
+ goto failed;
+ }
- ifname = ni->ni_interfaces[0];
- } else {
- ifname = *kiblnd_tunables.kib_default_ipif;
- }
+ ifname = ni->ni_interfaces[0];
+ } else {
+ ifname = *kiblnd_tunables.kib_default_ipif;
+ }
if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) {
CERROR("IPoIB interface name too long: %s\n", ifname);