Whamcloud - gitweb
LU-13972 o2iblnd: Don't retry indefinitely
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd.h
index b0ca501..8ff3b8c 100644 (file)
  * Author: Eric Barton <eric@bartonsoftware.com>
  */
 
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#if defined(NEED_LOCKDEP_IS_HELD_DISCARD_CONST) \
+ && defined(CONFIG_LOCKDEP) \
+ && defined(lockdep_is_held)
+#undef lockdep_is_held
+       #define lockdep_is_held(lock) \
+               lock_is_held((struct lockdep_map *)&(lock)->dep_map)
+#endif
+
 #ifdef HAVE_COMPAT_RDMA
 #include <linux/compat-2.6.h>
 
 #undef NEED_KTIME_GET_REAL_NS
 #endif
 
+/* MOFED has its own bitmap_alloc backport */
+#define HAVE_BITMAP_ALLOC 1
+
 #endif
 
-#include <linux/module.h>
-#include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/mm.h>
 #include <linux/string.h>
@@ -79,8 +91,6 @@
 #include <lnet/lib-lnet.h>
 
 #define IBLND_PEER_HASH_SIZE           101     /* # peer_ni lists */
-/* # scheduler loops before reschedule */
-#define IBLND_RESCHED                  100
 
 #define IBLND_N_SCHED                  2
 #define IBLND_N_SCHED_HIGH             4
 struct kib_tunables {
        int              *kib_dev_failover;     /* HCA failover */
        unsigned int     *kib_service;          /* IB service number */
-       int              *kib_min_reconnect_interval; /* first failed connection retry... */
-       int              *kib_max_reconnect_interval; /* ...exponentially increasing to this */
        int              *kib_cksum;            /* checksum struct kib_msg? */
        int              *kib_timeout;          /* comms timeout (seconds) */
        int              *kib_keepalive;        /* keepalive timeout (seconds) */
-       int              *kib_ntx;              /* # tx descs */
        char            **kib_default_ipif;     /* default IPoIB interface */
        int              *kib_retry_count;
        int              *kib_rnr_retry_count;
@@ -115,20 +122,20 @@ extern struct kib_tunables  kiblnd_tunables;
 #define IBLND_CREDITS_MAX          ((typeof(((struct kib_msg *) 0)->ibm_credits)) - 1)  /* Max # of peer_ni credits */
 
 /* when eagerly to return credits */
-#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
+#define IBLND_CREDITS_HIGHWATER(t, conn) ((conn->ibc_version) == IBLND_MSG_VERSION_1 ? \
                                        IBLND_CREDIT_HIGHWATER_V1 : \
-                                       t->lnd_peercredits_hiw)
+                       min(t->lnd_peercredits_hiw, (__u32)conn->ibc_queue_depth - 1))
 
 #ifdef HAVE_RDMA_CREATE_ID_5ARG
-# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
-                                                               cb, dev, \
-                                                               ps, qpt)
+# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+        rdma_create_id((ns) ? (ns) : &init_net, cb, dev, ps, qpt)
 #else
 # ifdef HAVE_RDMA_CREATE_ID_4ARG
-#  define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, \
-                                                                ps, qpt)
+#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+         rdma_create_id(cb, dev, ps, qpt)
 # else
-#  define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
+#  define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
+         rdma_create_id(cb, dev, ps)
 # endif
 #endif
 
@@ -201,12 +208,19 @@ struct kib_hca_dev {
        int                  ibh_page_shift;    /* page shift of current HCA */
        int                  ibh_page_size;     /* page size of current HCA */
        __u64                ibh_page_mask;     /* page mask of current HCA */
-       int                  ibh_mr_shift;      /* bits shift of max MR size */
        __u64                ibh_mr_size;       /* size of MR */
+       int                  ibh_max_qp_wr;     /* maximum work requests size */
 #ifdef HAVE_IB_GET_DMA_MR
        struct ib_mr        *ibh_mrs;           /* global MR */
 #endif
        struct ib_pd        *ibh_pd;            /* PD */
+       u8                   ibh_port;          /* port number */
+       struct ib_event_handler
+                            ibh_event_handler; /* IB event handler */
+       int                  ibh_state;         /* device status */
+#define IBLND_DEV_PORT_DOWN     0
+#define IBLND_DEV_PORT_ACTIVE   1
+#define IBLND_DEV_FATAL         2
        struct kib_dev           *ibh_dev;           /* owner */
        atomic_t             ibh_ref;           /* refcount */
 };
@@ -376,6 +390,7 @@ struct kib_net {
        struct kib_fmr_poolset  **ibn_fmr_ps;   /* fmr pool-set */
 
        struct kib_dev          *ibn_dev;       /* underlying IB device */
+       struct lnet_ni          *ibn_ni;        /* LNet interface */
 };
 
 #define KIB_THREAD_SHIFT               16
@@ -561,8 +576,6 @@ struct kib_rx {                                     /* receive message */
        struct kib_conn        *rx_conn;
        /* # bytes received (-1 while posted) */
        int                     rx_nob;
-       /* completion status */
-       enum ib_wc_status       rx_status;
        /* message buffer (host vaddr) */
        struct kib_msg         *rx_msg;
        /* message buffer (I/O addr) */
@@ -755,6 +768,8 @@ struct kib_peer_ni {
        unsigned char           ibp_races;
        /* # consecutive reconnection attempts to this peer */
        unsigned int            ibp_reconnected;
+       /* number of total active retries */
+       unsigned int            ibp_retries;
        /* errno on closing this peer_ni */
        int                     ibp_error;
        /* max map_on_demand */
@@ -782,6 +797,12 @@ extern void kiblnd_hdev_destroy(struct kib_hca_dev *hdev);
 
 int kiblnd_msg_queue_size(int version, struct lnet_ni *ni);
 
+static inline int kiblnd_timeout(void)
+{
+       return *kiblnd_tunables.kib_timeout ? *kiblnd_tunables.kib_timeout :
+               lnet_get_lnd_timeout();
+}
+
 static inline int
 kiblnd_concurrent_sends(int version, struct lnet_ni *ni)
 {
@@ -941,7 +962,7 @@ kiblnd_need_noop(struct kib_conn *conn)
        tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
 
         if (conn->ibc_outstanding_credits <
-           IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
+           IBLND_CREDITS_HIGHWATER(tunables, conn) &&
             !kiblnd_send_keepalive(conn))
                 return 0; /* No need to send NOOP */
 
@@ -1057,7 +1078,7 @@ kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index)
         return rd->rd_frags[index].rf_addr;
 }
 
-static inline __u32
+static inline int
 kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index)
 {
         return rd->rd_frags[index].rf_nob;
@@ -1132,8 +1153,8 @@ static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
 
 #ifndef HAVE_IB_SG_DMA_ADDRESS
 #include <linux/scatterlist.h>
-#define ib_sg_dma_address(dev, sg)     sg_dma_address((dev), (sg))
-#define ib_sg_dma_len(dev, sg)         sg_dma_len((dev), (sg))
+#define ib_sg_dma_address(dev, sg)     sg_dma_address(sg)
+#define ib_sg_dma_len(dev, sg)         sg_dma_len(sg)
 #endif
 
 static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev,
@@ -1180,7 +1201,7 @@ int  kiblnd_cm_callback(struct rdma_cm_id *cmid,
                         struct rdma_cm_event *event);
 int  kiblnd_translate_mtu(int value);
 
-int  kiblnd_dev_failover(struct kib_dev *dev);
+int  kiblnd_dev_failover(struct kib_dev *dev, struct net *ns);
 int kiblnd_create_peer(struct lnet_ni *ni, struct kib_peer_ni **peerp,
                       lnet_nid_t nid);
 void kiblnd_destroy_peer(struct kib_peer_ni *peer);
@@ -1214,7 +1235,7 @@ int kiblnd_post_rx(struct kib_rx *rx, int credit);
 
 int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
 int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
-               int delayed, unsigned int niov, struct kvec *iov,
-               lnet_kiov_t *kiov, unsigned int offset, unsigned int mlen,
+               int delayed, unsigned int niov,
+               struct bio_vec *kiov, unsigned int offset, unsigned int mlen,
                unsigned int rlen);