* Author: Eric Barton <eric@bartonsoftware.com>
*/
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#if defined(NEED_LOCKDEP_IS_HELD_DISCARD_CONST) \
+ && defined(CONFIG_LOCKDEP) \
+ && defined(lockdep_is_held)
+#undef lockdep_is_held
+ #define lockdep_is_held(lock) \
+ lock_is_held((struct lockdep_map *)&(lock)->dep_map)
+#endif
+
#ifdef HAVE_COMPAT_RDMA
#include <linux/compat-2.6.h>
#undef NEED_KTIME_GET_REAL_NS
#endif
+/* MOFED has its own bitmap_alloc backport */
+#define HAVE_BITMAP_ALLOC 1
+
#endif
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/kmod.h>
#include <linux/sysctl.h>
#include <linux/pci.h>
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,32)
-#include <linux/pci-dma.h>
-#endif
#include <net/sock.h>
#include <linux/in.h>
#include <lnet/lib-lnet.h>
#define IBLND_PEER_HASH_SIZE 101 /* # peer_ni lists */
-/* # scheduler loops before reschedule */
-#define IBLND_RESCHED 100
#define IBLND_N_SCHED 2
#define IBLND_N_SCHED_HIGH 4
struct kib_tunables {
int *kib_dev_failover; /* HCA failover */
unsigned int *kib_service; /* IB service number */
- int *kib_min_reconnect_interval; /* first failed connection retry... */
- int *kib_max_reconnect_interval; /* ...exponentially increasing to this */
int *kib_cksum; /* checksum struct kib_msg? */
int *kib_timeout; /* comms timeout (seconds) */
int *kib_keepalive; /* keepalive timeout (seconds) */
- int *kib_ntx; /* # tx descs */
char **kib_default_ipif; /* default IPoIB interface */
int *kib_retry_count;
int *kib_rnr_retry_count;
#define IBLND_CREDITS_MAX ((typeof(((struct kib_msg *) 0)->ibm_credits)) - 1) /* Max # of peer_ni credits */
/* when eagerly to return credits */
-#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
+#define IBLND_CREDITS_HIGHWATER(t, conn) ((conn->ibc_version) == IBLND_MSG_VERSION_1 ? \
IBLND_CREDIT_HIGHWATER_V1 : \
- t->lnd_peercredits_hiw)
+ min(t->lnd_peercredits_hiw, (__u32)conn->ibc_queue_depth - 1))
#ifdef HAVE_RDMA_CREATE_ID_5ARG
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
- cb, dev, \
- ps, qpt)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+ rdma_create_id((ns) ? (ns) : &init_net, cb, dev, ps, qpt)
#else
# ifdef HAVE_RDMA_CREATE_ID_4ARG
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, \
- ps, qpt)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+ rdma_create_id(cb, dev, ps, qpt)
# else
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+ rdma_create_id(cb, dev, ps)
# endif
#endif
#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
/* 2 = LNet msg + Transfer chain */
-#define IBLND_CQ_ENTRIES(c) \
- (IBLND_RECV_WRS(c) + 2 * c->ibc_queue_depth)
+#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + kiblnd_send_wrs(c))
struct kib_hca_dev;
int ibh_page_shift; /* page shift of current HCA */
int ibh_page_size; /* page size of current HCA */
__u64 ibh_page_mask; /* page mask of current HCA */
- int ibh_mr_shift; /* bits shift of max MR size */
__u64 ibh_mr_size; /* size of MR */
+ int ibh_max_qp_wr; /* maximum work requests size */
#ifdef HAVE_IB_GET_DMA_MR
struct ib_mr *ibh_mrs; /* global MR */
#endif
struct ib_pd *ibh_pd; /* PD */
+ u8 ibh_port; /* port number */
+ struct ib_event_handler
+ ibh_event_handler; /* IB event handler */
+ int ibh_state; /* device status */
+#define IBLND_DEV_PORT_DOWN 0
+#define IBLND_DEV_PORT_ACTIVE 1
+#define IBLND_DEV_FATAL 2
struct kib_dev *ibh_dev; /* owner */
atomic_t ibh_ref; /* refcount */
};
time64_t fpo_deadline; /* deadline of this pool */
int fpo_failed; /* fmr pool is failed */
int fpo_map_count; /* # of mapped FMR */
+ bool fpo_is_fmr; /* True if FMR pools allocated */
};
struct kib_fmr {
u32 fmr_key;
};
+#ifdef HAVE_ORACLE_OFED_EXTENSIONS
+#define kib_fmr_pool_map(pool, pgs, n, iov) \
+ ib_fmr_pool_map_phys((pool), (pgs), (n), (iov), NULL)
+#else
+#define kib_fmr_pool_map(pool, pgs, n, iov) \
+ ib_fmr_pool_map_phys((pool), (pgs), (n), (iov))
+#endif
+
struct kib_net {
/* chain on struct kib_dev::ibd_nets */
struct list_head ibn_list;
struct kib_fmr_poolset **ibn_fmr_ps; /* fmr pool-set */
struct kib_dev *ibn_dev; /* underlying IB device */
+ struct lnet_ni *ibn_ni; /* LNet interface */
};
#define KIB_THREAD_SHIFT 16
struct kib_conn *rx_conn;
/* # bytes received (-1 while posted) */
int rx_nob;
- /* completion status */
- enum ib_wc_status rx_status;
/* message buffer (host vaddr) */
struct kib_msg *rx_msg;
/* message buffer (I/O addr) */
__u64 rx_msgaddr;
/* for dma_unmap_single() */
- DECLARE_PCI_UNMAP_ADDR(rx_msgunmap);
+ DEFINE_DMA_UNMAP_ADDR(rx_msgunmap);
/* receive work item... */
struct ib_recv_wr rx_wrq;
/* ...and its memory */
/* message buffer (I/O addr) */
__u64 tx_msgaddr;
/* for dma_unmap_single() */
- DECLARE_PCI_UNMAP_ADDR(tx_msgunmap);
+ DEFINE_DMA_UNMAP_ADDR(tx_msgunmap);
/** sge for tx_msgaddr */
struct ib_sge tx_msgsge;
/* # send work items */
struct list_head ibc_tx_queue_rsrvd;
/* active tx awaiting completion */
struct list_head ibc_active_txs;
+ /* zombie tx awaiting done */
+ struct list_head ibc_zombie_txs;
/* serialise */
spinlock_t ibc_lock;
/* the rx descs */
int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
+static inline int kiblnd_timeout(void)
+{
+ return *kiblnd_tunables.kib_timeout ? *kiblnd_tunables.kib_timeout :
+ lnet_get_lnd_timeout();
+}
+
+static inline int
+kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
+{
+ struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+ int concurrent_sends;
+
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
+ concurrent_sends = tunables->lnd_concurrent_sends;
+
+ if (version == IBLND_MSG_VERSION_1) {
+ if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2)
+ return IBLND_MSG_QUEUE_SIZE_V1 * 2;
+
+ if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2)
+ return IBLND_MSG_QUEUE_SIZE_V1 / 2;
+ }
+
+ return concurrent_sends;
+}
+
static inline void
kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev)
{
tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
if (conn->ibc_outstanding_credits <
- IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
+ IBLND_CREDITS_HIGHWATER(tunables, conn) &&
!kiblnd_send_keepalive(conn))
return 0; /* No need to send NOOP */
return rd->rd_frags[index].rf_addr;
}
-static inline __u32
+static inline int
kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index)
{
return rd->rd_frags[index].rf_nob;
ib_dma_unmap_sg(dev, sg, nents, direction);
}
+#ifndef HAVE_IB_SG_DMA_ADDRESS
+#include <linux/scatterlist.h>
+#define ib_sg_dma_address(dev, sg) sg_dma_address(sg)
+#define ib_sg_dma_len(dev, sg) sg_dma_len(sg)
+#endif
+
static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
struct scatterlist *sg)
{
#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
+void kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs);
void kiblnd_map_rx_descs(struct kib_conn *conn);
void kiblnd_unmap_rx_descs(struct kib_conn *conn);
void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node);
struct rdma_cm_event *event);
int kiblnd_translate_mtu(int value);
-int kiblnd_dev_failover(struct kib_dev *dev);
+int kiblnd_dev_failover(struct kib_dev *dev, struct net *ns);
int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp,
lnet_nid_t nid);
void kiblnd_destroy_peer(struct kib_peer_ni *peer);
int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
- int delayed, unsigned int niov, struct kvec *iov,
- lnet_kiov_t *kiov, unsigned int offset, unsigned int mlen,
+ int delayed, unsigned int niov,
+ struct bio_vec *kiov, unsigned int offset, unsigned int mlen,
unsigned int rlen);