*
* Copyright (C) 2009-2012 Cray, Inc.
*
- * Copyright (c) 2014, Intel Corporation.
+ * Copyright (c) 2014, 2016, Intel Corporation.
*
* Derived from work by: Eric Barton <eric@bartonsoftware.com>
* Author: Nic Henke <nic@cray.com>
#ifndef _GNILND_GNILND_H_
#define _GNILND_GNILND_H_
-#ifdef HAVE_COMPAT_RDMA
-#include <linux/compat-2.6.h>
-#endif
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#define DEBUG_SUBSYSTEM S_LND
-#include <libcfs/linux/kp30.h>
#include <libcfs/libcfs.h>
#include <lnet/lnet.h>
#include <lnet/lib-lnet.h>
#define GNILND_LASTRX(conn) (time_after(conn->gnc_last_rx, conn->gnc_last_rx_cq) \
? conn->gnc_last_rx : conn->gnc_last_rx_cq)
+/* fmablk registration failures timeout before failing node */
+#define GNILND_REGFAILTO_DISABLE -1
+
/************************************************************************
* Enum, flag and tag data
*/
#define GNILND_DEL_PEER 1
#define GNILND_CLEAR_PURGATORY 2
-#define GNILND_RCA_NODE_UP 0
-#define GNILND_RCA_NODE_DOWN 1
-#define GNILND_RCA_NODE_UNKNOWN 2
+#define GNILND_PEER_UP 0
+#define GNILND_PEER_DOWN 1
+#define GNILND_PEER_TIMED_OUT 2
+#define GNILND_PEER_UNKNOWN 3
/* defines for reverse RDMA states */
#define GNILND_REVERSE_NONE 0
} WIRE_ATTR kgn_rdma_desc_t;
typedef struct {
- lnet_hdr_t gnim_hdr; /* LNet header */
+ struct lnet_hdr gnim_hdr; /* LNet header */
/* LNet payload is in FMA "Message Data" */
} WIRE_ATTR kgn_immediate_msg_t;
typedef struct {
- lnet_hdr_t gnprm_hdr; /* LNet header */
+ struct lnet_hdr gnprm_hdr; /* LNet header */
__u64 gnprm_cookie; /* opaque completion cookie */
} WIRE_ATTR kgn_putreq_msg_t;
} WIRE_ATTR kgn_putack_msg_t;
typedef struct {
- lnet_hdr_t gngm_hdr; /* LNet header */
+ struct lnet_hdr gngm_hdr; /* LNet header */
__u64 gngm_cookie; /* opaque completion cookie */
__u16 gngm_payload_cksum; /* checksum for put msg */
kgn_rdma_desc_t gngm_desc; /* sender's sink buffer */
int *kgn_max_immediate; /* immediate payload breakpoint */
int *kgn_checksum; /* checksum data */
int *kgn_checksum_dump; /* dump raw data to D_INFO log when checksumming */
- int *kgn_bte_dlvr_mode; /* BTE delivery mode mask */
+ int *kgn_bte_put_dlvr_mode; /* BTE Put delivery mode */
+ int *kgn_bte_get_dlvr_mode; /* BTE Get delivery mode */
int *kgn_bte_relaxed_ordering; /* relaxed ordering (PASSPW) on BTE transfers */
int *kgn_ptag; /* PTAG for cdm_create */
int *kgn_pkey; /* PKEY for cdm_create */
- int *kgn_max_retransmits; /* max number of FMA retransmits */
+ int *kgn_max_retransmits; /* max number of FMA retransmits before entering delay list */
int *kgn_nwildcard; /* # wildcard per net to post */
int *kgn_nice; /* nice value for kgnilnd threads */
int *kgn_rdmaq_intervals; /* # intervals per second for rdmaq throttle */
int *kgn_fast_reconn; /* fast reconnection on conn timeout */
int *kgn_efault_lbug; /* LBUG on receiving an EFAULT */
int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */
+ int *kgn_reg_fail_timeout; /* registration failure timeout */
int *kgn_thread_affinity; /* bind scheduler threads to cpus */
+ int *kgn_to_reconn_disable;/* disable reconnect after timeout */
int *kgn_thread_safe; /* use thread safe kgni API */
-#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
- struct ctl_table_header *kgn_sysctl; /* sysctl interface */
-#endif
} kgn_tunables_t;
typedef struct kgn_mbox_info {
atomic_t gnd_neps; /* # EP allocated to conns */
short gnd_ready; /* stuff to do in scheduler thread */
struct list_head gnd_ready_conns; /* connections ready to tx/rx */
+ struct list_head gnd_delay_conns; /* connections in need of dla/or smsg credits */
struct list_head gnd_map_tx; /* TX: needing buffer mapping */
wait_queue_head_t gnd_waitq; /* scheduler wakeup */
spinlock_t gnd_lock; /* serialise gnd_ready_conns */
struct list_head gnc_schedlist; /* schedule (on gnd_?_conns) for attention */
struct list_head gnc_fmaq; /* txs queued for FMA */
struct list_head gnc_mdd_list; /* hold list for MDD on hard conn reset */
+ struct list_head gnc_delaylist; /* If on this list schedule anytime we get interrupted */
__u64 gnc_peerstamp; /* peer's unique stamp */
__u64 gnc_peer_connstamp; /* peer's unique connection stamp */
__u64 gnc_my_connstamp; /* my unique connection stamp */
unsigned long gnp_reconnect_time; /* get_seconds() when reconnect OK */
unsigned long gnp_reconnect_interval; /* exponential backoff */
atomic_t gnp_dirty_eps; /* # of old but yet to be destroyed EPs from conns */
- int gnp_down; /* rca says peer down */
+ int gnp_state; /* up/down/timedout */
unsigned long gnp_down_event_time; /* time peer down */
unsigned long gnp_up_event_time; /* time peer back up */
} kgn_peer_t;
extern void kgnilnd_destroy_peer(kgn_peer_t *peer);
extern void kgnilnd_destroy_conn(kgn_conn_t *conn);
-extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld);
+extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held);
+extern int _kgnilnd_schedule_delay_conn(kgn_conn_t *conn);
/* Macro wrapper for _kgnilnd_schedule_conn. This will store the function
* and the line of the calling function to allow us to debug problematic
* the location manually.
*/
#define kgnilnd_schedule_conn(conn) \
- _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0);
+ _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0, 0);
#define kgnilnd_schedule_conn_refheld(conn, refheld) \
- _kgnilnd_schedule_conn(conn, __func__, __LINE__, refheld);
+ _kgnilnd_schedule_conn(conn, __func__, __LINE__, refheld, 0);
+
+#define kgnilnd_schedule_conn_nolock(conn) \
+ _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0, 1);
+
+
+/* Macro wrapper for _kgnilnd_schedule_delay_conn. This will allow us to store
+ * extra data if we need to.
+ */
+#define kgnilnd_schedule_delay_conn(conn) \
+ _kgnilnd_schedule_delay_conn(conn);
static inline void
kgnilnd_thread_fini(void)
return ret;
}
+static inline void kgnilnd_vfree(void *ptr, int size)
+{
+ libcfs_kmem_dec(ptr, size);
+ vfree(ptr);
+}
+
/* Copied from DEBUG_REQ in Lustre - the dance is needed to save stack space */
extern void
#define GNIDBG_MSG(level, msg, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_msg(&msgdata, level, &cdls, msg, \
"$$ "fmt" from %s ", ## args, \
#define GNIDBG_TOMSG(level, msg, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_msg(&msgdata, level, &cdls, msg, \
"$$ "fmt" ", ## args); \
#define GNIDBG_CONN(level, conn, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_conn(&msgdata, level, &cdls, conn, \
"$$ "fmt" ", ## args); \
#define GNIDBG_TX(level, tx, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_tx(&msgdata, level, &cdls, tx, \
"$$ "fmt" ", ## args); \
void kgnilnd_tx_done(kgn_tx_t *tx, int completion);
void kgnilnd_txlist_done(struct list_head *txlist, int error);
void kgnilnd_unlink_peer_locked(kgn_peer_t *peer);
-int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld);
+int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held);
int kgnilnd_schedule_process_conn(kgn_conn_t *conn, int sched_intent);
void kgnilnd_schedule_dgram(kgn_device_t *dev);
int kgnilnd_get_node_state(__u32 nid);
int kgnilnd_tunables_init(void);
-void kgnilnd_tunables_fini(void);
void kgnilnd_init_msg(kgn_msg_t *msg, int type, lnet_nid_t source);
void kgnilnd_bump_timeouts(__u32 nap_time, char *reason);