Whamcloud - gitweb
LU-8734 gnilnd: Handle dla credits exhaustion
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd.h
index 785142c..9589ae8 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2009-2012 Cray, Inc.
  *
- * Copyright (c) 2013, 2014, Intel Corporation.
+ * Copyright (c) 2014, 2016, Intel Corporation.
  *
  *   Derived from work by: Eric Barton <eric@bartonsoftware.com>
  *   Author: Nic Henke <nic@cray.com>
@@ -28,9 +28,6 @@
 #ifndef _GNILND_GNILND_H_
 #define _GNILND_GNILND_H_
 
-#ifdef HAVE_COMPAT_RDMA
-#include <linux/compat-2.6.h>
-#endif
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -64,7 +61,6 @@
 
 #define DEBUG_SUBSYSTEM S_LND
 
-#include <libcfs/linux/kp30.h>
 #include <libcfs/libcfs.h>
 #include <lnet/lnet.h>
 #include <lnet/lib-lnet.h>
 #define GNILND_LASTRX(conn) (time_after(conn->gnc_last_rx, conn->gnc_last_rx_cq) \
                                ? conn->gnc_last_rx : conn->gnc_last_rx_cq)
 
+/* fmablk registration failures timeout before failing node */
+#define GNILND_REGFAILTO_DISABLE  -1
+
 /************************************************************************
  * Enum, flag and tag data
  */
 #define GNILND_DEL_PEER              1
 #define GNILND_CLEAR_PURGATORY       2
 
-#define GNILND_RCA_NODE_UP           0
-#define GNILND_RCA_NODE_DOWN         1
-#define GNILND_RCA_NODE_UNKNOWN      2
+#define GNILND_PEER_UP               0
+#define GNILND_PEER_DOWN             1
+#define GNILND_PEER_TIMED_OUT        2
+#define GNILND_PEER_UNKNOWN          3
 
 /* defines for reverse RDMA states */
 #define GNILND_REVERSE_NONE            0
@@ -395,12 +395,12 @@ typedef struct {
 } WIRE_ATTR kgn_rdma_desc_t;
 
 typedef struct {
-       lnet_hdr_t        gnim_hdr;             /* LNet header */
+       struct lnet_hdr   gnim_hdr;             /* LNet header */
        /* LNet payload is in FMA "Message Data" */
 } WIRE_ATTR kgn_immediate_msg_t;
 
 typedef struct {
-       lnet_hdr_t        gnprm_hdr;            /* LNet header */
+       struct lnet_hdr   gnprm_hdr;            /* LNet header */
        __u64             gnprm_cookie;         /* opaque completion cookie */
 } WIRE_ATTR kgn_putreq_msg_t;
 
@@ -412,7 +412,7 @@ typedef struct {
 } WIRE_ATTR kgn_putack_msg_t;
 
 typedef struct {
-       lnet_hdr_t        gngm_hdr;             /* LNet header */
+       struct lnet_hdr   gngm_hdr;             /* LNet header */
        __u64             gngm_cookie;          /* opaque completion cookie */
        __u16             gngm_payload_cksum;   /* checksum for put msg */
        kgn_rdma_desc_t   gngm_desc;            /* sender's sink buffer */
@@ -457,11 +457,12 @@ typedef struct kgn_tunables {
        int              *kgn_max_immediate;    /* immediate payload breakpoint */
        int              *kgn_checksum;         /* checksum data */
        int              *kgn_checksum_dump;    /* dump raw data to D_INFO log when checksumming */
-       int              *kgn_bte_dlvr_mode;    /* BTE delivery mode mask */
+       int              *kgn_bte_put_dlvr_mode; /* BTE Put delivery mode */
+       int              *kgn_bte_get_dlvr_mode; /* BTE Get delivery mode */
        int              *kgn_bte_relaxed_ordering; /* relaxed ordering (PASSPW) on BTE transfers */
        int              *kgn_ptag;             /* PTAG for cdm_create */
        int              *kgn_pkey;             /* PKEY for cdm_create */
-       int              *kgn_max_retransmits;  /* max number of FMA retransmits */
+       int              *kgn_max_retransmits;  /* max number of FMA retransmits before entering delay list */
        int              *kgn_nwildcard;        /* # wildcard per net to post */
        int              *kgn_nice;             /* nice value for kgnilnd threads */
        int              *kgn_rdmaq_intervals;  /* # intervals per second for rdmaq throttle */
@@ -485,11 +486,10 @@ typedef struct kgn_tunables {
        int     *kgn_fast_reconn;      /* fast reconnection on conn timeout */
        int     *kgn_efault_lbug;      /* LBUG on receiving an EFAULT */
        int     *kgn_max_purgatory;    /* # conns/peer to keep in purgatory */
+       int     *kgn_reg_fail_timeout; /* registration failure timeout */
        int     *kgn_thread_affinity;  /* bind scheduler threads to cpus */
+       int     *kgn_to_reconn_disable;/* disable reconnect after timeout */
        int     *kgn_thread_safe;      /* use thread safe kgni API */
-#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-       struct ctl_table_header *kgn_sysctl;  /* sysctl interface */
-#endif
 } kgn_tunables_t;
 
 typedef struct kgn_mbox_info {
@@ -541,6 +541,7 @@ typedef struct kgn_device {
        atomic_t                gnd_neps;         /* # EP allocated to conns */
        short                   gnd_ready;        /* stuff to do in scheduler thread */
        struct list_head        gnd_ready_conns;  /* connections ready to tx/rx */
+       struct list_head        gnd_delay_conns;  /* connections in need of dla/or smsg credits */
        struct list_head        gnd_map_tx;       /* TX: needing buffer mapping */
        wait_queue_head_t       gnd_waitq;        /* scheduler wakeup */
        spinlock_t              gnd_lock;         /* serialise gnd_ready_conns */
@@ -706,6 +707,7 @@ typedef struct kgn_conn {
        struct list_head    gnc_schedlist;      /* schedule (on gnd_?_conns) for attention */
        struct list_head    gnc_fmaq;           /* txs queued for FMA */
        struct list_head    gnc_mdd_list;       /* hold list for MDD on hard conn reset */
+       struct list_head    gnc_delaylist;      /* If on this list schedule anytime we get interrupted */
        __u64               gnc_peerstamp;      /* peer's unique stamp */
        __u64               gnc_peer_connstamp; /* peer's unique connection stamp */
        __u64               gnc_my_connstamp;   /* my unique connection stamp */
@@ -780,7 +782,7 @@ typedef struct kgn_peer {
        unsigned long       gnp_reconnect_time;         /* get_seconds() when reconnect OK */
        unsigned long       gnp_reconnect_interval;     /* exponential backoff */
        atomic_t            gnp_dirty_eps;              /* # of old but yet to be destroyed EPs from conns */
-       int                 gnp_down;                   /* rca says peer down */
+       int                 gnp_state;                  /* up/down/timedout */
        unsigned long       gnp_down_event_time;        /* time peer down */
        unsigned long       gnp_up_event_time;          /* time peer back up */
 } kgn_peer_t;
@@ -879,7 +881,8 @@ extern kgn_tunables_t     kgnilnd_tunables;
 
 extern void kgnilnd_destroy_peer(kgn_peer_t *peer);
 extern void kgnilnd_destroy_conn(kgn_conn_t *conn);
-extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld);
+extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held);
+extern int _kgnilnd_schedule_delay_conn(kgn_conn_t *conn);
 
 /* Macro wrapper for _kgnilnd_schedule_conn. This will store the function
  * and the line of the calling function to allow us to debug problematic
@@ -887,10 +890,20 @@ extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line
  * the location manually.
  */
 #define kgnilnd_schedule_conn(conn)                                    \
-       _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0);
+       _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0, 0);
 
 #define kgnilnd_schedule_conn_refheld(conn, refheld)                   \
-       _kgnilnd_schedule_conn(conn, __func__, __LINE__, refheld);
+       _kgnilnd_schedule_conn(conn, __func__, __LINE__, refheld, 0);
+
+#define kgnilnd_schedule_conn_nolock(conn)                             \
+       _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0, 1);
+
+
+/* Macro wrapper for _kgnilnd_schedule_delay_conn. This will allow us to store
+ * extra data if we need to.
+ */
+#define kgnilnd_schedule_delay_conn(conn) \
+       _kgnilnd_schedule_delay_conn(conn);
 
 static inline void
 kgnilnd_thread_fini(void)
@@ -970,12 +983,18 @@ static inline int kgnilnd_trylock(struct mutex *cq_lock,
 
 static inline void *kgnilnd_vzalloc(int size)
 {
-       void *ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOFS | __GFP_ZERO,
+       void *ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_NORETRY | __GFP_ZERO,
                              PAGE_KERNEL);
        LIBCFS_ALLOC_POST(ret, size);
        return ret;
 }
 
+static inline void kgnilnd_vfree(void *ptr, int size)
+{
+       libcfs_kmem_dec(ptr, size);
+       vfree(ptr);
+}
+
 /* Copied from DEBUG_REQ in Lustre - the dance is needed to save stack space */
 
 extern void
@@ -996,7 +1015,7 @@ do {                                                                          \
 #define GNIDBG_MSG(level, msg, fmt, args...)                                  \
 do {                                                                          \
        if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) {                   \
-           static cfs_debug_limit_state_t cdls;                              \
+           static struct cfs_debug_limit_state cdls;                         \
            LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls);                \
            kgnilnd_debug_msg(&msgdata, level, &cdls, msg,                    \
                              "$$ "fmt" from %s ", ## args,                   \
@@ -1013,7 +1032,7 @@ do {                                                                          \
 #define GNIDBG_TOMSG(level, msg, fmt, args...)                                \
 do {                                                                          \
        if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) {                   \
-           static cfs_debug_limit_state_t cdls;                              \
+           static struct cfs_debug_limit_state cdls;                         \
            LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls);                \
            kgnilnd_debug_msg(&msgdata, level, &cdls, msg,                    \
                              "$$ "fmt" ", ## args);                          \
@@ -1042,7 +1061,7 @@ do {                                                                           \
 #define GNIDBG_CONN(level, conn, fmt, args...)                                  \
 do {                                                                            \
        if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) {                     \
-           static cfs_debug_limit_state_t cdls;                                \
+           static struct cfs_debug_limit_state cdls;                           \
            LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls);                  \
            kgnilnd_debug_conn(&msgdata, level, &cdls, conn,                    \
                               "$$ "fmt" ", ## args);                           \
@@ -1071,7 +1090,7 @@ do {                                                                           \
 #define GNIDBG_TX(level, tx, fmt, args...)                                      \
 do {                                                                            \
        if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) {                     \
-           static cfs_debug_limit_state_t cdls;                                \
+           static struct cfs_debug_limit_state cdls;                           \
            LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls);                  \
            kgnilnd_debug_tx(&msgdata, level, &cdls, tx,                        \
                              "$$ "fmt" ", ## args);                            \
@@ -1741,7 +1760,7 @@ int kgnilnd_eager_recv(lnet_ni_t *ni, void *private,
                        lnet_msg_t *lntmsg, void **new_private);
 int kgnilnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
                int delayed, unsigned int niov,
-               struct iovec *iov, lnet_kiov_t *kiov,
+               struct kvec *iov, lnet_kiov_t *kiov,
                unsigned int offset, unsigned int mlen, unsigned int rlen);
 
 __u16 kgnilnd_cksum_kiov(unsigned int nkiov, lnet_kiov_t *kiov, unsigned int offset, unsigned int nob, int dump_blob);
@@ -1758,7 +1777,7 @@ kgn_tx_t *kgnilnd_new_tx_msg(int type, lnet_nid_t source);
 void kgnilnd_tx_done(kgn_tx_t *tx, int completion);
 void kgnilnd_txlist_done(struct list_head *txlist, int error);
 void kgnilnd_unlink_peer_locked(kgn_peer_t *peer);
-int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld);
+int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held);
 int kgnilnd_schedule_process_conn(kgn_conn_t *conn, int sched_intent);
 
 void kgnilnd_schedule_dgram(kgn_device_t *dev);
@@ -1807,7 +1826,6 @@ int kgnilnd_start_rca_thread(void);
 int kgnilnd_get_node_state(__u32 nid);
 
 int kgnilnd_tunables_init(void);
-void kgnilnd_tunables_fini(void);
 void kgnilnd_init_msg(kgn_msg_t *msg, int type, lnet_nid_t source);
 
 void kgnilnd_bump_timeouts(__u32 nap_time, char *reason);