Whamcloud - gitweb
LU-12901 o2iblnd: retry qp creation with reduced queue depth
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd.h
index 7a20963..2f7573c 100644 (file)
  * Author: Eric Barton <eric@bartonsoftware.com>
  */
 
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#if defined(NEED_LOCKDEP_IS_HELD_DISCARD_CONST) \
+ && defined(CONFIG_LOCKDEP) \
+ && defined(lockdep_is_held)
+#undef lockdep_is_held
+       #define lockdep_is_held(lock) \
+               lock_is_held((struct lockdep_map *)&(lock)->dep_map)
+#endif
+
 #ifdef HAVE_COMPAT_RDMA
 #include <linux/compat-2.6.h>
 
 #undef NEED_KTIME_GET_REAL_NS
 #endif
 
+/* MOFED has its own bitmap_alloc backport */
+#define HAVE_BITMAP_ALLOC 1
+
 #endif
 
-#include <linux/module.h>
-#include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/mm.h>
 #include <linux/string.h>
@@ -79,8 +91,6 @@
 #include <lnet/lib-lnet.h>
 
 #define IBLND_PEER_HASH_SIZE           101     /* # peer_ni lists */
-/* # scheduler loops before reschedule */
-#define IBLND_RESCHED                  100
 
 #define IBLND_N_SCHED                  2
 #define IBLND_N_SCHED_HIGH             4
 struct kib_tunables {
        int              *kib_dev_failover;     /* HCA failover */
        unsigned int     *kib_service;          /* IB service number */
-       int              *kib_min_reconnect_interval; /* first failed connection retry... */
-       int              *kib_max_reconnect_interval; /* ...exponentially increasing to this */
        int              *kib_cksum;            /* checksum struct kib_msg? */
        int              *kib_timeout;          /* comms timeout (seconds) */
        int              *kib_keepalive;        /* keepalive timeout (seconds) */
-       int              *kib_ntx;              /* # tx descs */
        char            **kib_default_ipif;     /* default IPoIB interface */
        int              *kib_retry_count;
        int              *kib_rnr_retry_count;
@@ -115,21 +122,20 @@ extern struct kib_tunables  kiblnd_tunables;
 #define IBLND_CREDITS_MAX          ((typeof(((struct kib_msg *) 0)->ibm_credits)) - 1)  /* Max # of peer_ni credits */
 
 /* when eagerly to return credits */
-#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
+#define IBLND_CREDITS_HIGHWATER(t, conn) ((conn->ibc_version) == IBLND_MSG_VERSION_1 ? \
                                        IBLND_CREDIT_HIGHWATER_V1 : \
-                                       t->lnd_peercredits_hiw)
+                       min(t->lnd_peercredits_hiw, (__u32)conn->ibc_queue_depth - 1))
 
 #ifdef HAVE_RDMA_CREATE_ID_5ARG
-# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(ns, cb, \
-                                                                   dev, ps, \
-                                                                   qpt)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+        rdma_create_id((ns) ? (ns) : &init_net, cb, dev, ps, qpt)
 #else
 # ifdef HAVE_RDMA_CREATE_ID_4ARG
-#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(cb, dev, \
-                                                                    ps, qpt)
+#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+         rdma_create_id(cb, dev, ps, qpt)
 # else
-#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) rdma_create_id(cb, dev, \
-                                                                    ps)
+#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+         rdma_create_id(cb, dev, ps)
 # endif
 #endif
 
@@ -202,12 +208,19 @@ struct kib_hca_dev {
        int                  ibh_page_shift;    /* page shift of current HCA */
        int                  ibh_page_size;     /* page size of current HCA */
        __u64                ibh_page_mask;     /* page mask of current HCA */
-       int                  ibh_mr_shift;      /* bits shift of max MR size */
        __u64                ibh_mr_size;       /* size of MR */
+       int                  ibh_max_qp_wr;     /* maximum work requests size */
 #ifdef HAVE_IB_GET_DMA_MR
        struct ib_mr        *ibh_mrs;           /* global MR */
 #endif
        struct ib_pd        *ibh_pd;            /* PD */
+       u8                   ibh_port;          /* port number */
+       struct ib_event_handler
+                            ibh_event_handler; /* IB event handler */
+       int                  ibh_state;         /* device status */
+#define IBLND_DEV_PORT_DOWN     0
+#define IBLND_DEV_PORT_ACTIVE   1
+#define IBLND_DEV_FATAL         2
        struct kib_dev           *ibh_dev;           /* owner */
        atomic_t             ibh_ref;           /* refcount */
 };
@@ -377,6 +390,7 @@ struct kib_net {
        struct kib_fmr_poolset  **ibn_fmr_ps;   /* fmr pool-set */
 
        struct kib_dev          *ibn_dev;       /* underlying IB device */
+       struct lnet_ni          *ibn_ni;        /* LNet interface */
 };
 
 #define KIB_THREAD_SHIFT               16
@@ -423,6 +437,8 @@ struct kib_data {
        struct list_head        kib_reconn_list;
        /* peers wait for reconnection */
        struct list_head        kib_reconn_wait;
+       /* connections wait for completion */
+       struct list_head        kib_connd_waits;
        /*
         * The second that peers are pulled out from \a kib_reconn_wait
         * for reconnection.
@@ -562,8 +578,6 @@ struct kib_rx {                                     /* receive message */
        struct kib_conn        *rx_conn;
        /* # bytes received (-1 while posted) */
        int                     rx_nob;
-       /* completion status */
-       enum ib_wc_status       rx_status;
        /* message buffer (host vaddr) */
        struct kib_msg         *rx_msg;
        /* message buffer (I/O addr) */
@@ -678,6 +692,8 @@ struct kib_conn {
        __u16                   ibc_queue_depth;
        /* connections max frags */
        __u16                   ibc_max_frags;
+       /* count of timeout txs waiting on cq */
+       __u16                   ibc_waits;
        /* receive buffers owned */
        unsigned int            ibc_nrx:16;
        /* scheduled for attention */
@@ -756,12 +772,16 @@ struct kib_peer_ni {
        unsigned char           ibp_races;
        /* # consecutive reconnection attempts to this peer */
        unsigned int            ibp_reconnected;
+       /* number of total active retries */
+       unsigned int            ibp_retries;
        /* errno on closing this peer_ni */
        int                     ibp_error;
        /* max map_on_demand */
        __u16                   ibp_max_frags;
        /* max_peer_credits */
        __u16                   ibp_queue_depth;
+       /* reduced value which allows conn to be created if max fails */
+       __u16                   ibp_queue_depth_mod;
 };
 
 #ifndef HAVE_IB_INC_RKEY
@@ -783,6 +803,12 @@ extern void kiblnd_hdev_destroy(struct kib_hca_dev *hdev);
 
 int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
 
+static inline int kiblnd_timeout(void)
+{
+       return *kiblnd_tunables.kib_timeout ? *kiblnd_tunables.kib_timeout :
+               lnet_get_lnd_timeout();
+}
+
 static inline int
 kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
 {
@@ -942,7 +968,7 @@ kiblnd_need_noop(struct kib_conn *conn)
        tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
 
         if (conn->ibc_outstanding_credits <
-           IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
+           IBLND_CREDITS_HIGHWATER(tunables, conn) &&
             !kiblnd_send_keepalive(conn))
                 return 0; /* No need to send NOOP */
 
@@ -1058,7 +1084,7 @@ kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index)
         return rd->rd_frags[index].rf_addr;
 }
 
-static inline __u32
+static inline int
 kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index)
 {
         return rd->rd_frags[index].rf_nob;
@@ -1133,8 +1159,8 @@ static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
 
 #ifndef HAVE_IB_SG_DMA_ADDRESS
 #include <linux/scatterlist.h>
-#define ib_sg_dma_address(dev, sg)     sg_dma_address((dev), (sg))
-#define ib_sg_dma_len(dev, sg)         sg_dma_len((dev), (sg))
+#define ib_sg_dma_address(dev, sg)     sg_dma_address(sg)
+#define ib_sg_dma_len(dev, sg)         sg_dma_len(sg)
 #endif
 
 static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
@@ -1215,7 +1241,7 @@ int kiblnd_post_rx(struct kib_rx *rx, int credit);
 
 int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
 int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
-               int delayed, unsigned int niov, struct kvec *iov,
-               lnet_kiov_t *kiov, unsigned int offset, unsigned int mlen,
+               int delayed, unsigned int niov,
+               struct bio_vec *kiov, unsigned int offset, unsigned int mlen,
                unsigned int rlen);