*/
#include <asm/page.h>
+#include <linux/ethtool.h>
#include <linux/inetdevice.h>
#include "o2iblnd.h"
{
struct kib_net *net = ni->ni_data;
- /* CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously. */
- msg->ibm_magic = IBLND_MSG_MAGIC;
- msg->ibm_version = version;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = ni->ni_nid;
- msg->ibm_srcstamp = net->ibn_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
-
- if (*kiblnd_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
- }
+ /* CAVEAT EMPTOR! all message fields not set here should have been
+ * initialised previously.
+ */
+ msg->ibm_magic = IBLND_MSG_MAGIC;
+ msg->ibm_version = version;
+ /* ibm_type */
+ msg->ibm_credits = credits;
+ /* ibm_nob */
+ msg->ibm_cksum = 0;
+ msg->ibm_srcnid = lnet_nid_to_nid4(&ni->ni_nid);
+ msg->ibm_srcstamp = net->ibn_incarnation;
+ msg->ibm_dstnid = dstnid;
+ msg->ibm_dststamp = dststamp;
+
+ if (*kiblnd_tunables.kib_cksum) {
+ /* NB ibm_cksum zero while computing cksum */
+ msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob);
+ }
}
int kiblnd_unpack_msg(struct kib_msg *msg, int nob)
* created.
*/
if (peer_ni->ibp_nid != nid ||
- peer_ni->ibp_ni->ni_nid != ni->ni_nid)
+ !nid_same(&peer_ni->ibp_ni->ni_nid, &ni->ni_nid))
continue;
CDEBUG(D_NET, "got peer_ni [%p] -> %s (%d) version: %x\n",
struct kib_fmr_pool *fpo)
{
struct ib_fmr_pool_param param = {
- .max_pages_per_fmr = LNET_MAX_IOV,
+ .max_pages_per_fmr = IBLND_MAX_RDMA_FRAGS,
.page_shift = PAGE_SHIFT,
.access = (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE),
#ifndef HAVE_IB_MAP_MR_SG
frd->frd_frpl = ib_alloc_fast_reg_page_list(fpo->fpo_hdev->ibh_ibdev,
- LNET_MAX_IOV);
+ IBLND_MAX_RDMA_FRAGS);
if (IS_ERR(frd->frd_frpl)) {
rc = PTR_ERR(frd->frd_frpl);
CERROR("Failed to allocate ib_fast_reg_page_list: %d\n",
#ifdef HAVE_IB_ALLOC_FAST_REG_MR
frd->frd_mr = ib_alloc_fast_reg_mr(fpo->fpo_hdev->ibh_pd,
- LNET_MAX_IOV);
+ IBLND_MAX_RDMA_FRAGS);
#else
/*
* it is expected to get here if this is an MLX-5 card.
#else
IB_MR_TYPE_MEM_REG,
#endif
- LNET_MAX_IOV);
+ IBLND_MAX_RDMA_FRAGS);
if ((*kiblnd_tunables.kib_use_fastreg_gaps == 1) &&
(dev_caps & IBLND_DEV_CAPS_FASTREG_GAPS_SUPPORT))
CWARN("using IB_MR_TYPE_SG_GAPS, expect a performance drop\n");
if (frd) {
frd->frd_valid = false;
+ frd->frd_posted = false;
fmr->fmr_frd = NULL;
spin_lock(&fps->fps_lock);
list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list);
fmr->fmr_key = is_rx ? mr->rkey : mr->lkey;
fmr->fmr_frd = frd;
fmr->fmr_pool = fpo;
+ frd->frd_posted = false;
return 0;
}
spin_unlock(&fps->fps_lock);
CFS_FREE_PTR_ARRAY(tx->tx_pages, LNET_MAX_IOV);
if (tx->tx_frags != NULL)
CFS_FREE_PTR_ARRAY(tx->tx_frags,
- (1 + IBLND_MAX_RDMA_FRAGS));
+ IBLND_MAX_RDMA_FRAGS);
if (tx->tx_wrq != NULL)
CFS_FREE_PTR_ARRAY(tx->tx_wrq,
- (1 + IBLND_MAX_RDMA_FRAGS));
+ IBLND_MAX_RDMA_FRAGS);
if (tx->tx_sge != NULL)
CFS_FREE_PTR_ARRAY(tx->tx_sge,
- (1 + IBLND_MAX_RDMA_FRAGS) *
+ IBLND_MAX_RDMA_FRAGS *
wrq_sge);
if (tx->tx_rd != NULL)
LIBCFS_FREE(tx->tx_rd,
}
LIBCFS_CPT_ALLOC(tx->tx_frags, lnet_cpt_table(), ps->ps_cpt,
- (1 + IBLND_MAX_RDMA_FRAGS) *
+ IBLND_MAX_RDMA_FRAGS *
sizeof(*tx->tx_frags));
if (tx->tx_frags == NULL)
break;
- sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS + 1);
+ sg_init_table(tx->tx_frags, IBLND_MAX_RDMA_FRAGS);
LIBCFS_CPT_ALLOC(tx->tx_wrq, lnet_cpt_table(), ps->ps_cpt,
- (1 + IBLND_MAX_RDMA_FRAGS) *
+ IBLND_MAX_RDMA_FRAGS *
sizeof(*tx->tx_wrq));
if (tx->tx_wrq == NULL)
break;
LIBCFS_CPT_ALLOC(tx->tx_sge, lnet_cpt_table(), ps->ps_cpt,
- (1 + IBLND_MAX_RDMA_FRAGS) * wrq_sge *
+ IBLND_MAX_RDMA_FRAGS * wrq_sge *
sizeof(*tx->tx_sge));
if (tx->tx_sge == NULL)
break;
list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) {
if (val)
CDEBUG(D_NETERROR, "Fatal device error for NI %s\n",
- libcfs_nid2str(net->ibn_ni->ni_nid));
+ libcfs_nidstr(&net->ibn_ni->ni_nid));
atomic_set(&net->ibn_ni->ni_fatal_error_on, val);
}
}
return 0;
}
+static int kiblnd_get_link_status(struct net_device *dev)
+{
+ int ret = -1;
+
+ LASSERT(dev);
+
+ if (!netif_running(dev))
+ ret = 0;
+ /* Some devices may not be providing link settings */
+ else if (dev->ethtool_ops->get_link)
+ ret = dev->ethtool_ops->get_link(dev);
+
+ return ret;
+}
+
static int
kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns)
{
LIST_HEAD(zombie_tpo);
LIST_HEAD(zombie_ppo);
LIST_HEAD(zombie_fpo);
- struct rdma_cm_id *cmid = NULL;
+ struct rdma_cm_id *cmid = NULL;
struct kib_hca_dev *hdev = NULL;
struct kib_hca_dev *old;
- struct ib_pd *pd;
+ struct ib_pd *pd;
struct kib_net *net;
- struct sockaddr_in addr;
- unsigned long flags;
- int rc = 0;
+ struct sockaddr_in addr;
+ struct net_device *netdev;
+ unsigned long flags;
+ int rc = 0;
int i;
- LASSERT (*kiblnd_tunables.kib_dev_failover > 1 ||
- dev->ibd_can_failover ||
- dev->ibd_hdev == NULL);
+ LASSERT(*kiblnd_tunables.kib_dev_failover > 1 ||
+ dev->ibd_can_failover ||
+ dev->ibd_hdev == NULL);
rc = kiblnd_dev_need_failover(dev, ns);
- if (rc <= 0)
- goto out;
+ if (rc <= 0)
+ goto out;
- if (dev->ibd_hdev != NULL &&
- dev->ibd_hdev->ibh_cmid != NULL) {
- /* XXX it's not good to close old listener at here,
- * because we can fail to create new listener.
- * But we have to close it now, otherwise rdma_bind_addr
- * will return EADDRINUSE... How crap! */
+ if (dev->ibd_hdev != NULL &&
+ dev->ibd_hdev->ibh_cmid != NULL) {
+ /* XXX it's not good to close old listener at here,
+ * because we can fail to create new listener.
+ * But we have to close it now, otherwise rdma_bind_addr
+ * will return EADDRINUSE... How crap! */
write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
cmid = dev->ibd_hdev->ibh_cmid;
dev->ibd_hdev->ibh_cmid = NULL;
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
- rdma_destroy_id(cmid);
- }
+ rdma_destroy_id(cmid);
+ }
cmid = kiblnd_rdma_create_id(ns, kiblnd_cm_callback, dev, RDMA_PS_TCP,
IB_QPT_RC);
- if (IS_ERR(cmid)) {
- rc = PTR_ERR(cmid);
- CERROR("Failed to create cmid for failover: %d\n", rc);
- goto out;
- }
+ if (IS_ERR(cmid)) {
+ rc = PTR_ERR(cmid);
+ CERROR("Failed to create cmid for failover: %d\n", rc);
+ goto out;
+ }
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip);
- addr.sin_port = htons(*kiblnd_tunables.kib_service);
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip);
+ addr.sin_port = htons(*kiblnd_tunables.kib_service);
- /* Bind to failover device or port */
- rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr);
+ /* Bind to failover device or port */
+ rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr);
if (rc != 0 || cmid->device == NULL) {
CERROR("Failed to bind %s:%pI4h to device(%p): %d\n",
dev->ibd_ifname, &dev->ibd_ifip,
cmid->device, rc);
- rdma_destroy_id(cmid);
- goto out;
- }
+ rdma_destroy_id(cmid);
+ goto out;
+ }
LIBCFS_ALLOC(hdev, sizeof(*hdev));
- if (hdev == NULL) {
- CERROR("Failed to allocate kib_hca_dev\n");
- rdma_destroy_id(cmid);
- rc = -ENOMEM;
- goto out;
- }
+ if (hdev == NULL) {
+ CERROR("Failed to allocate kib_hca_dev\n");
+ rdma_destroy_id(cmid);
+ rc = -ENOMEM;
+ goto out;
+ }
- atomic_set(&hdev->ibh_ref, 1);
- hdev->ibh_dev = dev;
- hdev->ibh_cmid = cmid;
- hdev->ibh_ibdev = cmid->device;
+ atomic_set(&hdev->ibh_ref, 1);
+ hdev->ibh_dev = dev;
+ hdev->ibh_cmid = cmid;
+ hdev->ibh_ibdev = cmid->device;
hdev->ibh_port = cmid->port_num;
#ifdef HAVE_IB_ALLOC_PD_2ARGS
goto out;
}
- hdev->ibh_pd = pd;
+ hdev->ibh_pd = pd;
- rc = rdma_listen(cmid, 0);
- if (rc != 0) {
- CERROR("Can't start new listener: %d\n", rc);
- goto out;
- }
+ rc = rdma_listen(cmid, 0);
+ if (rc != 0) {
+ CERROR("Can't start new listener: %d\n", rc);
+ goto out;
+ }
rc = kiblnd_hdev_get_attr(hdev);
if (rc != 0) {
if (hdev != NULL)
kiblnd_hdev_decref(hdev);
- if (rc != 0)
+ if (rc != 0) {
dev->ibd_failed_failover++;
- else
+ } else {
dev->ibd_failed_failover = 0;
+ rcu_read_lock();
+ netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname);
+ if (netdev && (kiblnd_get_link_status(netdev) == 1))
+ kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0);
+ rcu_read_unlock();
+ }
+
return rc;
}
wait_var_event_warning(&net->ibn_npeers,
atomic_read(&net->ibn_npeers) == 0,
"%s: waiting for %d peers to disconnect\n",
- libcfs_nid2str(ni->ni_nid),
+ libcfs_nidstr(&ni->ni_nid),
atomic_read(&net->ibn_npeers));
kiblnd_net_fini_pools(net);
}
for (i = 0; i < nthrs; i++) {
- long id;
- char name[20];
- id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i);
- snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld",
- KIB_THREAD_CPT(id), KIB_THREAD_TID(id));
- rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name);
+ long id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i);
+
+ rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id,
+ "kiblnd_sd_%02ld_%02ld",
+ KIB_THREAD_CPT(id), KIB_THREAD_TID(id));
if (rc == 0)
continue;
}
net->ibn_dev = ibdev;
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip);
+ ni->ni_nid.nid_addr[0] = cpu_to_be32(ibdev->ibd_ifip);
ni->ni_dev_cpt = ifaces[i].li_cpt;
.lnd_ctl = kiblnd_ctl,
.lnd_send = kiblnd_send,
.lnd_recv = kiblnd_recv,
+ .lnd_get_dev_prio = kiblnd_get_dev_prio,
};
static void ko2inlnd_assert_wire_constants(void)