#include "o2iblnd.h"
-static struct lnet_lnd the_o2iblnd;
+static const struct lnet_lnd the_o2iblnd;
struct kib_data kiblnd_data;
if (flip) {
/* leave magic unflipped as a clue to peer_ni endianness */
msg->ibm_version = version;
- CLASSERT (sizeof(msg->ibm_type) == 1);
- CLASSERT (sizeof(msg->ibm_credits) == 1);
+ BUILD_BUG_ON(sizeof(msg->ibm_type) != 1);
+ BUILD_BUG_ON(sizeof(msg->ibm_credits) != 1);
msg->ibm_nob = msg_nob;
__swab64s(&msg->ibm_srcnid);
__swab64s(&msg->ibm_srcstamp);
}
static void
-kiblnd_query(struct lnet_ni *ni, lnet_nid_t nid, time64_t *when)
-{
- time64_t last_alive = 0;
- time64_t now = ktime_get_seconds();
- rwlock_t *glock = &kiblnd_data.kib_global_lock;
- struct kib_peer_ni *peer_ni;
- unsigned long flags;
-
- read_lock_irqsave(glock, flags);
-
- peer_ni = kiblnd_find_peer_locked(ni, nid);
- if (peer_ni != NULL)
- last_alive = peer_ni->ibp_last_alive;
-
- read_unlock_irqrestore(glock, flags);
-
- if (last_alive != 0)
- *when = last_alive;
-
- /* peer_ni is not persistent in hash, trigger peer_ni creation
- * and connection establishment with a NULL tx */
- if (peer_ni == NULL)
- kiblnd_launch_tx(ni, NULL, nid);
-
- CDEBUG(D_NET, "peer_ni %s %p, alive %lld secs ago\n",
- libcfs_nid2str(nid), peer_ni,
- last_alive ? now - last_alive : -1);
-}
-
-static void
kiblnd_free_pages(struct kib_pages *p)
{
int npages = p->ibp_npages;
if (i++ % 50 == 0)
CDEBUG(D_NET, "%s: Wait for failover\n",
dev->ibd_ifname);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(cfs_time_seconds(1) / 100);
+ schedule_timeout_interruptible(cfs_time_seconds(1) / 100);
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
}
dev = net->ibn_dev;
- /* pre-mapped messages are not bigger than 1 page */
- CLASSERT (IBLND_MSG_SIZE <= PAGE_SIZE);
+ /* pre-mapped messages are not bigger than 1 page */
+ BUILD_BUG_ON(IBLND_MSG_SIZE > PAGE_SIZE);
- /* No fancy arithmetic when we do the buffer calculations */
- CLASSERT (PAGE_SIZE % IBLND_MSG_SIZE == 0);
+ /* No fancy arithmetic when we do the buffer calculations */
+ BUILD_BUG_ON(PAGE_SIZE % IBLND_MSG_SIZE != 0);
tpo->tpo_hdev = kiblnd_current_hdev(dev);
fpo_list);
fpo->fpo_failed = 1;
- list_del(&fpo->fpo_list);
if (fpo->fpo_map_count == 0)
- list_add(&fpo->fpo_list, zombies);
+ list_move(&fpo->fpo_list, zombies);
else
- list_add(&fpo->fpo_list, &fps->fps_failed_pool_list);
+ list_move(&fpo->fpo_list, &fps->fps_failed_pool_list);
}
spin_unlock(&fps->fps_lock);
struct kib_pool, po_list);
po->po_failed = 1;
- list_del(&po->po_list);
if (po->po_allocated == 0)
- list_add(&po->po_list, zombies);
+ list_move(&po->po_list, zombies);
else
- list_add(&po->po_list, &ps->ps_failed_pool_list);
+ list_move(&po->po_list, &ps->ps_failed_pool_list);
}
spin_unlock(&ps->ps_lock);
}
"trips = %d\n",
ps->ps_name, interval, trips);
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(interval);
+ schedule_timeout_interruptible(interval);
if (interval < cfs_time_seconds(1))
interval *= 2;
pool->po_size * sizeof(struct kib_tx));
out:
kiblnd_fini_pool(pool);
- LIBCFS_FREE(tpo, sizeof(struct kib_tx_pool));
+ CFS_FREE_PTR(tpo);
}
static int kiblnd_tx_pool_size(struct lnet_ni *ni, int ncpts)
npg = (size * IBLND_MSG_SIZE + PAGE_SIZE - 1) / PAGE_SIZE;
if (kiblnd_alloc_pages(&tpo->tpo_tx_pages, ps->ps_cpt, npg) != 0) {
CERROR("Can't allocate tx pages: %d\n", npg);
- LIBCFS_FREE(tpo, sizeof(struct kib_tx_pool));
+ CFS_FREE_PTR(tpo);
return -ENOMEM;
}
}
static int
+kiblnd_port_get_attr(struct kib_hca_dev *hdev)
+{
+ struct ib_port_attr *port_attr;
+ int rc;
+ unsigned long flags;
+ rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
+
+ LIBCFS_ALLOC(port_attr, sizeof(*port_attr));
+ if (port_attr == NULL) {
+ CDEBUG(D_NETERROR, "Out of memory\n");
+ return -ENOMEM;
+ }
+
+ rc = ib_query_port(hdev->ibh_ibdev, hdev->ibh_port, port_attr);
+
+ write_lock_irqsave(g_lock, flags);
+
+ if (rc == 0)
+ hdev->ibh_state = port_attr->state == IB_PORT_ACTIVE
+ ? IBLND_DEV_PORT_ACTIVE
+ : IBLND_DEV_PORT_DOWN;
+
+ write_unlock_irqrestore(g_lock, flags);
+ LIBCFS_FREE(port_attr, sizeof(*port_attr));
+
+ if (rc != 0) {
+ CDEBUG(D_NETERROR, "Failed to query IB port: %d\n", rc);
+ return rc;
+ }
+ return 0;
+}
+
+static inline void
+kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val)
+{
+ struct kib_net *net;
+
+ /* for health check */
+ list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) {
+ if (val)
+ CDEBUG(D_NETERROR, "Fatal device error for NI %s\n",
+ libcfs_nid2str(net->ibn_ni->ni_nid));
+ atomic_set(&net->ibn_ni->ni_fatal_error_on, val);
+ }
+}
+
+void
+kiblnd_event_handler(struct ib_event_handler *handler, struct ib_event *event)
+{
+ rwlock_t *g_lock = &kiblnd_data.kib_global_lock;
+ struct kib_hca_dev *hdev;
+ unsigned long flags;
+
+ hdev = container_of(handler, struct kib_hca_dev, ibh_event_handler);
+
+ write_lock_irqsave(g_lock, flags);
+
+ switch (event->event) {
+ case IB_EVENT_DEVICE_FATAL:
+ CDEBUG(D_NET, "IB device fatal\n");
+ hdev->ibh_state = IBLND_DEV_FATAL;
+ kiblnd_set_ni_fatal_on(hdev, 1);
+ break;
+ case IB_EVENT_PORT_ACTIVE:
+ CDEBUG(D_NET, "IB port active\n");
+ if (event->element.port_num == hdev->ibh_port) {
+ hdev->ibh_state = IBLND_DEV_PORT_ACTIVE;
+ kiblnd_set_ni_fatal_on(hdev, 0);
+ }
+ break;
+ case IB_EVENT_PORT_ERR:
+ CDEBUG(D_NET, "IB port err\n");
+ if (event->element.port_num == hdev->ibh_port) {
+ hdev->ibh_state = IBLND_DEV_PORT_DOWN;
+ kiblnd_set_ni_fatal_on(hdev, 1);
+ }
+ break;
+ default:
+ break;
+ }
+ write_unlock_irqrestore(g_lock, flags);
+}
+
+static int
kiblnd_hdev_get_attr(struct kib_hca_dev *hdev)
{
struct ib_device_attr *dev_attr;
int rc = 0;
+ int rc2 = 0;
/* It's safe to assume a HCA can handle a page size
* matching that of the native system */
rc = -ENOSYS;
}
+ rc2 = kiblnd_port_get_attr(hdev);
+ if (rc2 != 0)
+ return rc2;
+
if (rc != 0)
rc = -EINVAL;
void
kiblnd_hdev_destroy(struct kib_hca_dev *hdev)
{
+ if (hdev->ibh_event_handler.device != NULL)
+ ib_unregister_event_handler(&hdev->ibh_event_handler);
+
#ifdef HAVE_IB_GET_DMA_MR
kiblnd_hdev_cleanup_mrs(hdev);
#endif
hdev->ibh_dev = dev;
hdev->ibh_cmid = cmid;
hdev->ibh_ibdev = cmid->device;
+ hdev->ibh_port = cmid->port_num;
#ifdef HAVE_IB_ALLOC_PD_2ARGS
pd = ib_alloc_pd(cmid->device, 0);
}
#endif
+ INIT_IB_EVENT_HANDLER(&hdev->ibh_event_handler,
+ hdev->ibh_ibdev, kiblnd_event_handler);
+ ib_register_event_handler(&hdev->ibh_event_handler);
+
write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
old = dev->ibd_hdev;
CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
"Waiting for %d threads to terminate\n",
atomic_read(&kiblnd_data.kib_nthreads));
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(cfs_time_seconds(1));
+ schedule_timeout_uninterruptible(cfs_time_seconds(1));
}
/* fall through */
"%s: waiting for %d peers to disconnect\n",
libcfs_nid2str(ni->ni_nid),
atomic_read(&net->ibn_npeers));
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(cfs_time_seconds(1));
+ schedule_timeout_uninterruptible(cfs_time_seconds(1));
}
kiblnd_net_fini_pools(net);
LASSERT(kiblnd_data.kib_init == IBLND_INIT_NOTHING);
- try_module_get(THIS_MODULE);
+ if (!try_module_get(THIS_MODULE))
+ goto failed;
+
memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */
rwlock_init(&kiblnd_data.kib_global_lock);
goto failed;
}
+ net->ibn_ni = ni;
net->ibn_incarnation = ktime_get_real_ns() / NSEC_PER_USEC;
kiblnd_tunables_setup(ni);
write_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
ibdev->ibd_nnets++;
list_add_tail(&net->ibn_list, &ibdev->ibd_nets);
+ /* for health check */
+ if (ibdev->ibd_hdev->ibh_state == IBLND_DEV_PORT_DOWN)
+ kiblnd_set_ni_fatal_on(ibdev->ibd_hdev, 1);
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
net->ibn_init = IBLND_INIT_ALL;
return -ENETDOWN;
}
-static struct lnet_lnd the_o2iblnd = {
+static const struct lnet_lnd the_o2iblnd = {
.lnd_type = O2IBLND,
.lnd_startup = kiblnd_startup,
.lnd_shutdown = kiblnd_shutdown,
.lnd_ctl = kiblnd_ctl,
- .lnd_query = kiblnd_query,
.lnd_send = kiblnd_send,
.lnd_recv = kiblnd_recv,
};
{
int rc;
- CLASSERT(sizeof(struct kib_msg) <= IBLND_MSG_SIZE);
- CLASSERT(offsetof(struct kib_msg,
- ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) <=
- IBLND_MSG_SIZE);
- CLASSERT(offsetof(struct kib_msg,
- ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS])
- <= IBLND_MSG_SIZE);
+ BUILD_BUG_ON(sizeof(struct kib_msg) > IBLND_MSG_SIZE);
+ BUILD_BUG_ON(offsetof(struct kib_msg,
+ ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) >
+ IBLND_MSG_SIZE);
+ BUILD_BUG_ON(offsetof(struct kib_msg,
+ ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) >
+ IBLND_MSG_SIZE);
rc = kiblnd_tunables_init();
if (rc != 0)