Whamcloud - gitweb
LU-78 o2iblnd: kiblnd_check_conns can deadlock
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd.h
index bac486d..59fd3d0 100644 (file)
@@ -125,6 +125,8 @@ typedef struct
 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
         cfs_sysctl_table_header_t *kib_sysctl;  /* sysctl interface */
 #endif
+        int              *kib_require_priv_port;/* accept only privileged ports */
+        int              *kib_use_priv_port;    /* use privileged port for active connect */
 } kib_tunables_t;
 
 extern kib_tunables_t  kiblnd_tunables;
@@ -142,6 +144,12 @@ extern kib_tunables_t  kiblnd_tunables;
                                      IBLND_CREDIT_HIGHWATER_V1 : \
                                      *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
 
+#ifdef HAVE_RDMA_CREATE_ID_4ARG
+#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)
+#else
+#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
+#endif
+
 static inline int
 kiblnd_concurrent_sends_v1(void)
 {
@@ -187,12 +195,20 @@ kiblnd_concurrent_sends_v1(void)
 
 struct kib_hca_dev;
 
+/* o2iblnd can run over aliased interface */
+#ifdef IFALIASZ
+#define KIB_IFNAME_SIZE              IFALIASZ
+#else
+#define KIB_IFNAME_SIZE              256
+#endif
+
 typedef struct
 {
         cfs_list_t           ibd_list;          /* chain on kib_devs */
         cfs_list_t           ibd_fail_list;     /* chain on kib_failed_devs */
         __u32                ibd_ifip;          /* IPoIB interface IP */
-        char                 ibd_ifname[32];    /* IPoIB interface name */
+        /** IPoIB interface name */
+        char                 ibd_ifname[KIB_IFNAME_SIZE];
         int                  ibd_nnets;         /* # nets extant */
 
         cfs_time_t           ibd_next_failover;
@@ -219,7 +235,10 @@ typedef struct kib_hca_dev
         cfs_atomic_t         ibh_ref;           /* refcount */
 } kib_hca_dev_t;
 
-#define IBLND_POOL_DEADLINE     300             /* # of seconds to keep pool alive */
+/** # of seconds to keep pool alive */
+#define IBLND_POOL_DEADLINE     300
+/** # of seconds to retry if allocation failed */
+#define IBLND_POOL_RETRY        1
 
 typedef struct
 {
@@ -553,40 +572,43 @@ typedef struct kib_connvars
 
 typedef struct kib_conn
 {
-        struct kib_peer   *ibc_peer;          /* owning peer */
-        kib_hca_dev_t     *ibc_hdev;           /* HCA bound on */
-        cfs_list_t         ibc_list;          /* stash on peer's conn list */
-        cfs_list_t         ibc_sched_list;    /* schedule for attention */
-        cfs_list_t         ibc_connd_list;    /* kiblnd_check_conns only */
-        __u16              ibc_version;       /* version of connection */
-        __u64              ibc_incarnation;   /* which instance of the peer */
-        cfs_atomic_t       ibc_refcount;      /* # users */
-        int                ibc_state;         /* what's happening */
-        int                ibc_nsends_posted; /* # uncompleted sends */
-        int                ibc_noops_posted;  /* # uncompleted NOOPs */
-        int                ibc_credits;       /* # credits I have */
-        int                ibc_outstanding_credits; /* # credits to return */
-        int                ibc_reserved_credits;/* # ACK/DONE msg credits */
-        int                ibc_retry_noop;    /* need to retry returning credits */
-        int                ibc_comms_error;   /* set on comms error */
-        int                ibc_nrx:16;        /* receive buffers owned */
-        int                ibc_scheduled:1;   /* scheduled for attention */
-        int                ibc_ready:1;       /* CQ callback fired */
-        unsigned long      ibc_last_send;     /* time of last send */
-        cfs_list_t         ibc_early_rxs;     /* rxs completed before ESTABLISHED */
-        cfs_list_t         ibc_tx_noops;       /* IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */
-        cfs_list_t         ibc_tx_queue;       /* sends that need a credit */
-        cfs_list_t         ibc_tx_queue_nocred;/* sends that don't need a credit */
-        cfs_list_t         ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */
-        cfs_list_t         ibc_active_txs;     /* active tx awaiting completion */
-        cfs_spinlock_t     ibc_lock;           /* serialise */
-        kib_rx_t          *ibc_rxs;            /* the rx descs */
-        kib_pages_t       *ibc_rx_pages;       /* premapped rx msg pages */
-
-        struct rdma_cm_id *ibc_cmid;           /* CM id */
-        struct ib_cq      *ibc_cq;             /* completion queue */
-
-        kib_connvars_t    *ibc_connvars;       /* in-progress connection state */
+        struct kib_peer     *ibc_peer;          /* owning peer */
+        kib_hca_dev_t       *ibc_hdev;          /* HCA bound on */
+        cfs_list_t           ibc_list;          /* stash on peer's conn list */
+        cfs_list_t           ibc_sched_list;    /* schedule for attention */
+        __u16                ibc_version;       /* version of connection */
+        __u64                ibc_incarnation;   /* which instance of the peer */
+        cfs_atomic_t         ibc_refcount;      /* # users */
+        int                  ibc_state;         /* what's happening */
+        int                  ibc_nsends_posted; /* # uncompleted sends */
+        int                  ibc_noops_posted;  /* # uncompleted NOOPs */
+        int                  ibc_credits;       /* # credits I have */
+        int                  ibc_outstanding_credits; /* # credits to return */
+        int                  ibc_reserved_credits;/* # ACK/DONE msg credits */
+        int                  ibc_comms_error;   /* set on comms error */
+        int                  ibc_nrx:16;        /* receive buffers owned */
+        int                  ibc_scheduled:1;   /* scheduled for attention */
+        int                  ibc_ready:1;       /* CQ callback fired */
+        /* time of last send */
+        unsigned long        ibc_last_send;
+        /** link chain for kiblnd_check_conns only */
+        cfs_list_t           ibc_connd_list;
+        /** rxs completed before ESTABLISHED */
+        cfs_list_t           ibc_early_rxs;
+        /** IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */
+        cfs_list_t           ibc_tx_noops;
+        cfs_list_t           ibc_tx_queue;       /* sends that need a credit */
+        cfs_list_t           ibc_tx_queue_nocred;/* sends that don't need a credit */
+        cfs_list_t           ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */
+        cfs_list_t           ibc_active_txs;     /* active tx awaiting completion */
+        cfs_spinlock_t       ibc_lock;           /* serialise */
+        kib_rx_t            *ibc_rxs;            /* the rx descs */
+        kib_pages_t         *ibc_rx_pages;       /* premapped rx msg pages */
+
+        struct rdma_cm_id   *ibc_cmid;           /* CM id */
+        struct ib_cq        *ibc_cq;             /* completion queue */
+
+        kib_connvars_t      *ibc_connvars;       /* in-progress connection state */
 } kib_conn_t;
 
 #define IBLND_CONN_INIT               0         /* being initialised */
@@ -721,7 +743,7 @@ kiblnd_send_keepalive(kib_conn_t *conn)
 }
 
 static inline int
-kiblnd_send_noop(kib_conn_t *conn)
+kiblnd_need_noop(kib_conn_t *conn)
 {
         LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED);
 
@@ -735,11 +757,12 @@ kiblnd_send_noop(kib_conn_t *conn)
                         return 0; /* NOOP can be piggybacked */
 
                 /* No tx to piggyback NOOP onto or no credit to send a tx */
-                return (cfs_list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 0);
+                return (cfs_list_empty(&conn->ibc_tx_queue) ||
+                        conn->ibc_credits == 0);
         }
 
-        if (!cfs_list_empty(&conn->ibc_tx_noops) ||       /* NOOP already queued */
-            !cfs_list_empty(&conn->ibc_tx_queue_nocred) || /* can be piggybacked */
+        if (!cfs_list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */
+            !cfs_list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */
             conn->ibc_credits == 0)                    /* no credit */
                 return 0;