X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fo2iblnd%2Fo2iblnd.h;h=8ff3b8ce0f2257968bb941db448daa46f5a9985e;hb=7c8ad11ef08f0f2f886004ae4a56f67722c16d5c;hp=ddee13d46cd6587338095c047d81264444644e81;hpb=51965d73a7ab75bc0f11c7c180d35a68f5571670;p=fs%2Flustre-release.git diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index ddee13d..8ff3b8c 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -34,6 +34,17 @@ * Author: Eric Barton */ +#include +#include + +#if defined(NEED_LOCKDEP_IS_HELD_DISCARD_CONST) \ + && defined(CONFIG_LOCKDEP) \ + && defined(lockdep_is_held) +#undef lockdep_is_held + #define lockdep_is_held(lock) \ + lock_is_held((struct lockdep_map *)&(lock)->dep_map) +#endif + #ifdef HAVE_COMPAT_RDMA #include @@ -41,11 +52,11 @@ #undef NEED_KTIME_GET_REAL_NS #endif +/* MOFED has its own bitmap_alloc backport */ +#define HAVE_BITMAP_ALLOC 1 + #endif -#include -#include -#include #include #include #include @@ -65,9 +76,6 @@ #include #include #include -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,32) -#include -#endif #include #include @@ -83,8 +91,6 @@ #include #define IBLND_PEER_HASH_SIZE 101 /* # peer_ni lists */ -/* # scheduler loops before reschedule */ -#define IBLND_RESCHED 100 #define IBLND_N_SCHED 2 #define IBLND_N_SCHED_HIGH 4 @@ -92,12 +98,9 @@ struct kib_tunables { int *kib_dev_failover; /* HCA failover */ unsigned int *kib_service; /* IB service number */ - int *kib_min_reconnect_interval; /* first failed connection retry... */ - int *kib_max_reconnect_interval; /* ...exponentially increasing to this */ int *kib_cksum; /* checksum struct kib_msg? */ int *kib_timeout; /* comms timeout (seconds) */ int *kib_keepalive; /* keepalive timeout (seconds) */ - int *kib_ntx; /* # tx descs */ char **kib_default_ipif; /* default IPoIB interface */ int *kib_retry_count; int *kib_rnr_retry_count; @@ -119,20 +122,20 @@ extern struct kib_tunables kiblnd_tunables; #define IBLND_CREDITS_MAX ((typeof(((struct kib_msg *) 0)->ibm_credits)) - 1) /* Max # of peer_ni credits */ /* when eagerly to return credits */ -#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \ +#define IBLND_CREDITS_HIGHWATER(t, conn) ((conn->ibc_version) == IBLND_MSG_VERSION_1 ? \ IBLND_CREDIT_HIGHWATER_V1 : \ - t->lnd_peercredits_hiw) + min(t->lnd_peercredits_hiw, (__u32)conn->ibc_queue_depth - 1)) #ifdef HAVE_RDMA_CREATE_ID_5ARG -# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \ - cb, dev, \ - ps, qpt) +# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \ + rdma_create_id((ns) ? (ns) : &init_net, cb, dev, ps, qpt) #else # ifdef HAVE_RDMA_CREATE_ID_4ARG -# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, \ - ps, qpt) +# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \ + rdma_create_id(cb, dev, ps, qpt) # else -# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps) +# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \ + rdma_create_id(cb, dev, ps) # endif #endif @@ -162,8 +165,7 @@ extern struct kib_tunables kiblnd_tunables; #define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c) /* 2 = LNet msg + Transfer chain */ -#define IBLND_CQ_ENTRIES(c) \ - (IBLND_RECV_WRS(c) + 2 * c->ibc_queue_depth) +#define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + kiblnd_send_wrs(c)) struct kib_hca_dev; @@ -206,12 +208,19 @@ struct kib_hca_dev { int ibh_page_shift; /* page shift of current HCA */ int ibh_page_size; /* page size of current HCA */ __u64 ibh_page_mask; /* page mask of current HCA */ - int ibh_mr_shift; /* bits shift of max MR size */ __u64 ibh_mr_size; /* size of MR */ + int ibh_max_qp_wr; /* maximum work requests size */ #ifdef HAVE_IB_GET_DMA_MR struct ib_mr *ibh_mrs; /* global MR */ #endif struct ib_pd *ibh_pd; /* PD */ + u8 ibh_port; /* port number */ + struct ib_event_handler + ibh_event_handler; /* IB event handler */ + int ibh_state; /* device status */ +#define IBLND_DEV_PORT_DOWN 0 +#define IBLND_DEV_PORT_ACTIVE 1 +#define IBLND_DEV_FATAL 2 struct kib_dev *ibh_dev; /* owner */ atomic_t ibh_ref; /* refcount */ }; @@ -359,6 +368,14 @@ struct kib_fmr { u32 fmr_key; }; +#ifdef HAVE_ORACLE_OFED_EXTENSIONS +#define kib_fmr_pool_map(pool, pgs, n, iov) \ + ib_fmr_pool_map_phys((pool), (pgs), (n), (iov), NULL) +#else +#define kib_fmr_pool_map(pool, pgs, n, iov) \ + ib_fmr_pool_map_phys((pool), (pgs), (n), (iov)) +#endif + struct kib_net { /* chain on struct kib_dev::ibd_nets */ struct list_head ibn_list; @@ -373,6 +390,7 @@ struct kib_net { struct kib_fmr_poolset **ibn_fmr_ps; /* fmr pool-set */ struct kib_dev *ibn_dev; /* underlying IB device */ + struct lnet_ni *ibn_ni; /* LNet interface */ }; #define KIB_THREAD_SHIFT 16 @@ -558,14 +576,12 @@ struct kib_rx { /* receive message */ struct kib_conn *rx_conn; /* # bytes received (-1 while posted) */ int rx_nob; - /* completion status */ - enum ib_wc_status rx_status; /* message buffer (host vaddr) */ struct kib_msg *rx_msg; /* message buffer (I/O addr) */ __u64 rx_msgaddr; /* for dma_unmap_single() */ - DECLARE_PCI_UNMAP_ADDR(rx_msgunmap); + DEFINE_DMA_UNMAP_ADDR(rx_msgunmap); /* receive work item... */ struct ib_recv_wr rx_wrq; /* ...and its memory */ @@ -605,7 +621,7 @@ struct kib_tx { /* transmit message */ /* message buffer (I/O addr) */ __u64 tx_msgaddr; /* for dma_unmap_single() */ - DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); + DEFINE_DMA_UNMAP_ADDR(tx_msgunmap); /** sge for tx_msgaddr */ struct ib_sge tx_msgsge; /* # send work items */ @@ -696,6 +712,8 @@ struct kib_conn { struct list_head ibc_tx_queue_rsrvd; /* active tx awaiting completion */ struct list_head ibc_active_txs; + /* zombie tx awaiting done */ + struct list_head ibc_zombie_txs; /* serialise */ spinlock_t ibc_lock; /* the rx descs */ @@ -750,6 +768,8 @@ struct kib_peer_ni { unsigned char ibp_races; /* # consecutive reconnection attempts to this peer */ unsigned int ibp_reconnected; + /* number of total active retries */ + unsigned int ibp_retries; /* errno on closing this peer_ni */ int ibp_error; /* max map_on_demand */ @@ -777,6 +797,32 @@ extern void kiblnd_hdev_destroy(struct kib_hca_dev *hdev); int kiblnd_msg_queue_size(int version, struct lnet_ni *ni); +static inline int kiblnd_timeout(void) +{ + return *kiblnd_tunables.kib_timeout ? *kiblnd_tunables.kib_timeout : + lnet_get_lnd_timeout(); +} + +static inline int +kiblnd_concurrent_sends(int version, struct lnet_ni *ni) +{ + struct lnet_ioctl_config_o2iblnd_tunables *tunables; + int concurrent_sends; + + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; + concurrent_sends = tunables->lnd_concurrent_sends; + + if (version == IBLND_MSG_VERSION_1) { + if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2) + return IBLND_MSG_QUEUE_SIZE_V1 * 2; + + if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2) + return IBLND_MSG_QUEUE_SIZE_V1 / 2; + } + + return concurrent_sends; +} + static inline void kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev) { @@ -916,7 +962,7 @@ kiblnd_need_noop(struct kib_conn *conn) tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib; if (conn->ibc_outstanding_credits < - IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) && + IBLND_CREDITS_HIGHWATER(tunables, conn) && !kiblnd_send_keepalive(conn)) return 0; /* No need to send NOOP */ @@ -1032,7 +1078,7 @@ kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index) return rd->rd_frags[index].rf_addr; } -static inline __u32 +static inline int kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index) { return rd->rd_frags[index].rf_nob; @@ -1105,6 +1151,12 @@ static inline void kiblnd_dma_unmap_sg(struct ib_device *dev, ib_dma_unmap_sg(dev, sg, nents, direction); } +#ifndef HAVE_IB_SG_DMA_ADDRESS +#include +#define ib_sg_dma_address(dev, sg) sg_dma_address(sg) +#define ib_sg_dma_len(dev, sg) sg_dma_len(sg) +#endif + static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) { @@ -1124,6 +1176,7 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, #define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data) #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) +void kiblnd_abort_txs(struct kib_conn *conn, struct list_head *txs); void kiblnd_map_rx_descs(struct kib_conn *conn); void kiblnd_unmap_rx_descs(struct kib_conn *conn); void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node); @@ -1148,7 +1201,7 @@ int kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event); int kiblnd_translate_mtu(int value); -int kiblnd_dev_failover(struct kib_dev *dev); +int kiblnd_dev_failover(struct kib_dev *dev, struct net *ns); int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp, lnet_nid_t nid); void kiblnd_destroy_peer(struct kib_peer_ni *peer); @@ -1182,7 +1235,7 @@ int kiblnd_post_rx(struct kib_rx *rx, int credit); int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg); int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg, - int delayed, unsigned int niov, struct kvec *iov, - lnet_kiov_t *kiov, unsigned int offset, unsigned int mlen, + int delayed, unsigned int niov, + struct bio_vec *kiov, unsigned int offset, unsigned int mlen, unsigned int rlen);