* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2013, Intel Corporation.
+ * Copyright (c) 2011, 2015, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Author: Eric Barton <eric@bartonsoftware.com>
*/
+#ifdef HAVE_COMPAT_RDMA
+#include <linux/compat-2.6.h>
+#endif
+
#include <linux/version.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/pci-dma.h>
#endif
-#ifdef HAVE_COMPAT_RDMA
-#include <linux/compat-2.6.h>
-#endif
-
#include <net/sock.h>
#include <linux/in.h>
int *kib_ib_mtu; /* IB MTU */
int *kib_map_on_demand; /* map-on-demand if RD has more fragments
* than this value, 0 disable map-on-demand */
- int *kib_pmr_pool_size; /* # physical MR in pool */
int *kib_fmr_pool_size; /* # FMRs in pool */
int *kib_fmr_flush_trigger; /* When to trigger FMR flush */
int *kib_fmr_cache; /* enable FMR pool cache? */
/* Pools (shared by connections on each CPT) */
/* These pools can grow at runtime, so don't need give a very large value */
#define IBLND_TX_POOL 256
-#define IBLND_PMR_POOL 256
#define IBLND_FMR_POOL 256
#define IBLND_FMR_POOL_FLUSH 192
-/* TX messages (shared by all connections) */
-#define IBLND_TX_MSGS() (*kiblnd_tunables.kib_ntx)
-
/* RX messages (per connection) */
-#define IBLND_RX_MSGS(v) (IBLND_MSG_QUEUE_SIZE(v) * 2 + IBLND_OOB_MSGS(v))
-#define IBLND_RX_MSG_BYTES(v) (IBLND_RX_MSGS(v) * IBLND_MSG_SIZE)
-#define IBLND_RX_MSG_PAGES(v) ((IBLND_RX_MSG_BYTES(v) + PAGE_SIZE - 1) / PAGE_SIZE)
+#define IBLND_RX_MSGS(c) \
+ ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version))
+#define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE)
+#define IBLND_RX_MSG_PAGES(c) \
+ ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE)
/* WRs and CQEs (per connection) */
-#define IBLND_RECV_WRS(v) IBLND_RX_MSGS(v)
-#define IBLND_SEND_WRS(v) ((IBLND_RDMA_FRAGS(v) + 1) * IBLND_CONCURRENT_SENDS(v))
-#define IBLND_CQ_ENTRIES(v) (IBLND_RECV_WRS(v) + IBLND_SEND_WRS(v))
+#define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c)
+#define IBLND_SEND_WRS(c) \
+ ((c->ibc_max_frags + 1) * IBLND_CONCURRENT_SENDS(c->ibc_version))
+#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c))
struct kib_hca_dev;
__u64 ibh_page_mask; /* page mask of current HCA */
int ibh_mr_shift; /* bits shift of max MR size */
__u64 ibh_mr_size; /* size of MR */
- int ibh_nmrs; /* # of global MRs */
- struct ib_mr **ibh_mrs; /* global MR */
+ struct ib_mr *ibh_mrs; /* global MR */
struct ib_pd *ibh_pd; /* PD */
kib_dev_t *ibh_dev; /* owner */
atomic_t ibh_ref; /* refcount */
struct page *ibp_pages[0]; /* page array */
} kib_pages_t;
-struct kib_pmr_pool;
-
-typedef struct {
- struct list_head pmr_list; /* chain node */
- struct ib_phys_buf *pmr_ipb; /* physical buffer */
- struct ib_mr *pmr_mr; /* IB MR */
- struct kib_pmr_pool *pmr_pool; /* owner of this MR */
- __u64 pmr_iova; /* Virtual I/O address */
- int pmr_refcount; /* reference count */
-} kib_phys_mr_t;
-
struct kib_pool;
struct kib_poolset;
kib_pages_t *tpo_tx_pages; /* premapped tx msg pages */
} kib_tx_pool_t;
-typedef struct {
- kib_poolset_t pps_poolset; /* pool-set */
-} kib_pmr_poolset_t;
-
-typedef struct kib_pmr_pool {
- struct kib_hca_dev *ppo_hdev; /* device for this pool */
- kib_pool_t ppo_pool; /* pool */
-} kib_pmr_pool_t;
-
typedef struct
{
spinlock_t fps_lock; /* serialize */
kib_tx_poolset_t **ibn_tx_ps; /* tx pool-set */
kib_fmr_poolset_t **ibn_fmr_ps; /* fmr pool-set */
- kib_pmr_poolset_t **ibn_pmr_ps; /* pmr pool-set */
kib_dev_t *ibn_dev; /* underlying IB device */
} kib_net_t;
#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */
#define IBLND_REJECT_CONN_STALE 5 /* stale peer */
-#define IBLND_REJECT_RDMA_FRAGS 6 /* Fatal: peer's rdma frags can't match mine */
-#define IBLND_REJECT_MSG_QUEUE_SIZE 7 /* Fatal: peer's msg queue size can't match mine */
+/* peer's rdma frags doesn't match mine */
+#define IBLND_REJECT_RDMA_FRAGS 6
+/* peer's msg queue size doesn't match mine */
+#define IBLND_REJECT_MSG_QUEUE_SIZE 7
/***********************************************************************/
struct scatterlist *tx_frags;
/* rdma phys page addrs */
__u64 *tx_pages;
- union {
- /* MR for physical buffer */
- kib_phys_mr_t *pmr;
- /* FMR */
- kib_fmr_t fmr;
- } tx_u;
+ /* FMR */
+ kib_fmr_t fmr;
/* dma direction */
int tx_dmadir;
} kib_tx_t;
int ibc_reserved_credits;
/* set on comms error */
int ibc_comms_error;
+ /* connections queue depth */
+ __u16 ibc_queue_depth;
+ /* connections max frags */
+ __u16 ibc_max_frags;
/* receive buffers owned */
- unsigned int ibc_nrx:16;
+ unsigned short ibc_nrx;
+ /** rejected by connection race */
+ unsigned short ibc_conn_race:1;
/* scheduled for attention */
- unsigned int ibc_scheduled:1;
+ unsigned short ibc_scheduled:1;
/* CQ callback fired */
- unsigned int ibc_ready:1;
+ unsigned short ibc_ready:1;
/* time of last send */
unsigned long ibc_last_send;
/** link chain for kiblnd_check_conns only */
int ibp_error;
/* when (in jiffies) I was last alive */
cfs_time_t ibp_last_alive;
+ /* max map_on_demand */
+ __u16 ibp_max_frags;
+ /* max_peer_credits */
+ __u16 ibp_queue_depth;
} kib_peer_t;
extern kib_data_t kiblnd_data;
{
return (*kiblnd_tunables.kib_keepalive > 0) &&
cfs_time_after(jiffies, conn->ibc_last_send +
- *kiblnd_tunables.kib_keepalive*HZ);
+ msecs_to_jiffies(*kiblnd_tunables.kib_keepalive *
+ MSEC_PER_SEC));
}
static inline int
offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
}
-#ifdef HAVE_OFED_IB_DMA_MAP
-
static inline __u64
kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
{
#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
-#else
-
-static inline __u64
-kiblnd_dma_mapping_error(struct ib_device *dev, dma_addr_t dma_addr)
-{
- return dma_mapping_error(dma_addr);
-}
-
-static inline dma_addr_t kiblnd_dma_map_single(struct ib_device *dev,
- void *msg, size_t size,
- enum dma_data_direction direction)
-{
- return dma_map_single(dev->dma_device, msg, size, direction);
-}
-
-static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
- dma_addr_t addr, size_t size,
- enum dma_data_direction direction)
-{
- dma_unmap_single(dev->dma_device, addr, size, direction);
-}
-
-#define KIBLND_UNMAP_ADDR_SET(p, m, a) pci_unmap_addr_set(p, m, a)
-#define KIBLND_UNMAP_ADDR(p, m, a) pci_unmap_addr(p, m)
-
-static inline int kiblnd_dma_map_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- return dma_map_sg(dev->dma_device, sg, nents, direction);
-}
-
-static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- return dma_unmap_sg(dev->dma_device, sg, nents, direction);
-}
-
-
-static inline dma_addr_t kiblnd_sg_dma_address(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg_dma_address(sg);
-}
-
-
-static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg_dma_len(sg);
-}
-
-#define KIBLND_CONN_PARAM(e) ((e)->private_data)
-#define KIBLND_CONN_PARAM_LEN(e) ((e)->private_data_len)
-
-#endif
-
struct ib_mr *kiblnd_find_rd_dma_mr(kib_hca_dev_t *hdev,
- kib_rdma_desc_t *rd);
-struct ib_mr *kiblnd_find_dma_mr(kib_hca_dev_t *hdev,
- __u64 addr, __u64 size);
+ kib_rdma_desc_t *rd,
+ int negotiated_nfrags);
void kiblnd_map_rx_descs(kib_conn_t *conn);
void kiblnd_unmap_rx_descs(kib_conn_t *conn);
-int kiblnd_map_tx(lnet_ni_t *ni, kib_tx_t *tx,
- kib_rdma_desc_t *rd, int nfrags);
-void kiblnd_unmap_tx(lnet_ni_t *ni, kib_tx_t *tx);
void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
int npages, __u64 iov, kib_fmr_t *fmr);
void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
-int kiblnd_pmr_pool_map(kib_pmr_poolset_t *pps, kib_hca_dev_t *hdev,
- kib_rdma_desc_t *rd, __u64 *iova, kib_phys_mr_t **pp_pmr);
-void kiblnd_pmr_pool_unmap(kib_phys_mr_t *pmr);
-
-int kiblnd_startup (lnet_ni_t *ni);
-void kiblnd_shutdown (lnet_ni_t *ni);
-int kiblnd_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg);
-void kiblnd_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
-
int kiblnd_tunables_init(void);
void kiblnd_tunables_fini(void);
int kiblnd_failover_thread (void *arg);
int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages);
-void kiblnd_free_pages (kib_pages_t *p);
int kiblnd_cm_callback(struct rdma_cm_id *cmid,
struct rdma_cm_event *event);
int kiblnd_translate_mtu(int value);
int kiblnd_dev_failover(kib_dev_t *dev);
-int kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
+int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
void kiblnd_destroy_peer (kib_peer_t *peer);
+void kiblnd_connect_peer(kib_peer_t *peer);
void kiblnd_destroy_dev (kib_dev_t *dev);
void kiblnd_unlink_peer_locked (kib_peer_t *peer);
-void kiblnd_peer_alive (kib_peer_t *peer);
kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
-void kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error);
int kiblnd_close_stale_conns_locked (kib_peer_t *peer,
int version, __u64 incarnation);
int kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why);
-void kiblnd_connreq_done(kib_conn_t *conn, int status);
-kib_conn_t *kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid,
- int state, int version);
+kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
+ int state, int version);
void kiblnd_destroy_conn (kib_conn_t *conn);
void kiblnd_close_conn (kib_conn_t *conn, int error);
void kiblnd_close_conn_locked (kib_conn_t *conn, int error);
-int kiblnd_init_rdma (kib_conn_t *conn, kib_tx_t *tx, int type,
- int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie);
-
void kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid);
-void kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn);
-void kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn);
-void kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob);
void kiblnd_txlist_done(lnet_ni_t *ni, struct list_head *txlist, int status);
-void kiblnd_check_sends (kib_conn_t *conn);
void kiblnd_qp_event(struct ib_event *event, void *arg);
void kiblnd_cq_event(struct ib_event *event, void *arg);
int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int niov, struct kvec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int mlen, unsigned int rlen);