*
* Copyright (C) 2009-2012 Cray, Inc.
*
- * Copyright (c) 2013, 2014, Intel Corporation.
+ * Copyright (c) 2014, Intel Corporation.
*
* Derived from work by: Eric Barton <eric@bartonsoftware.com>
* Author: Nic Henke <nic@cray.com>
#define DEBUG_SUBSYSTEM S_LND
-#include <libcfs/linux/kp30.h>
#include <libcfs/libcfs.h>
#include <lnet/lnet.h>
#include <lnet/lib-lnet.h>
/* fixed constants */
#define GNILND_MAXDEVS 1 /* max # of GNI devices currently supported */
#define GNILND_MBOX_CREDITS 256 /* number of credits per mailbox */
-#define GNILND_COOKIE 0xa3579 /* cookie used by along with ptag by GNI */
#define GNILND_CONN_MAGIC 0xa100f /* magic value for verifying connection validity */
/* checksum values */
#define GNILND_CHECKSUM_OFF 0 /* checksum turned off */
#define GNILND_COMPUTE 1 /* compute image */
#define GNILND_FAST_RECONNECT 1 /* Fast Reconnect option */
#else
-#define GNILND_SCHED_THREADS 3 /* default # of kgnilnd_scheduler threads */
#define GNILND_FMABLK 1024 /* default number of mboxes per fmablk */
#define GNILND_SCHED_NICE -20 /* default nice value for scheduler threads */
#define GNILND_COMPUTE 0 /* service image */
/* Max number of connections to keep in purgatory per peer */
#define GNILND_PURGATORY_MAX 5
+/* Closing, don't put in purgatory */
+#define GNILND_NOPURG 222
/* payload size to add to the base mailbox size
* This is subtracting 2 from the concurrent_sends as 4 messages are included in the size
#define GNILND_LASTRX(conn) (time_after(conn->gnc_last_rx, conn->gnc_last_rx_cq) \
? conn->gnc_last_rx : conn->gnc_last_rx_cq)
+/* fmablk registration failures timeout before failing node */
+#define GNILND_REGFAILTO_DISABLE -1
+
/************************************************************************
* Enum, flag and tag data
*/
int *kgn_max_immediate; /* immediate payload breakpoint */
int *kgn_checksum; /* checksum data */
int *kgn_checksum_dump; /* dump raw data to D_INFO log when checksumming */
- int *kgn_bte_dlvr_mode; /* BTE delivery mode mask */
+ int *kgn_bte_put_dlvr_mode; /* BTE Put delivery mode */
+ int *kgn_bte_get_dlvr_mode; /* BTE Get delivery mode */
int *kgn_bte_relaxed_ordering; /* relaxed ordering (PASSPW) on BTE transfers */
int *kgn_ptag; /* PTAG for cdm_create */
+ int *kgn_pkey; /* PKEY for cdm_create */
int *kgn_max_retransmits; /* max number of FMA retransmits */
int *kgn_nwildcard; /* # wildcard per net to post */
int *kgn_nice; /* nice value for kgnilnd threads */
int *kgn_fast_reconn; /* fast reconnection on conn timeout */
int *kgn_efault_lbug; /* LBUG on receiving an EFAULT */
int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */
-#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kgn_sysctl; /* sysctl interface */
-#endif
+ int *kgn_reg_fail_timeout; /* registration failure timeout */
+ int *kgn_thread_affinity; /* bind scheduler threads to cpus */
+ int *kgn_thread_safe; /* use thread safe kgni API */
} kgn_tunables_t;
typedef struct kgn_mbox_info {
atomic_t gnd_n_schedule;
atomic_t gnd_canceled_dgrams; /* # of outstanding cancels */
struct rw_semaphore gnd_conn_sem; /* serialize connection changes/data movement */
+ void *gnd_smdd_hold_buf; /* buffer to keep smdd */
+ gni_mem_handle_t gnd_smdd_hold_hndl; /* buffer mem handle */
} kgn_device_t;
typedef struct kgn_net {
atomic_t gnc_sched_noop; /* # sched triggered NOOP */
unsigned int gnc_timeout; /* infer peer death if no rx for this many seconds */
__u32 gnc_cqid; /* my completion callback id (non-unique) */
- __u32 gnc_tx_seq; /* tx msg sequence number */
- __u32 gnc_rx_seq; /* rx msg sequence number */
+ atomic_t gnc_tx_seq; /* tx msg sequence number */
+ atomic_t gnc_rx_seq; /* rx msg sequence number */
+ struct mutex gnc_smsg_mutex; /* tx smsg sequence serialization */
+ struct mutex gnc_rdma_mutex; /* tx rdma sequence serialization */
__u64 gnc_tx_retrans; /* # retrans on SMSG */
atomic_t gnc_nlive_fma; /* # live FMA */
atomic_t gnc_nq_rdma; /* # queued (on device) RDMA */
atomic_t kgn_rev_copy_buff; /* # of REV rdma buffer copies */
struct socket *kgn_sock; /* for Apollo */
unsigned long free_pages_limit; /* # of free pages reserve from fma block allocations */
+ int kgn_enable_gl_mutex; /* kgni api mtx enable */
} kgn_data_t;
extern kgn_data_t kgnilnd_data;
#define kgnilnd_schedule_conn(conn) \
_kgnilnd_schedule_conn(conn, __func__, __LINE__, 0);
-#define kgnilnd_schedule_conn_refheld(conn, refheld) \
+#define kgnilnd_schedule_conn_refheld(conn, refheld) \
_kgnilnd_schedule_conn(conn, __func__, __LINE__, refheld);
-static inline int
-kgnilnd_thread_start(int(*fn)(void *arg), void *arg, char *name, int id)
+static inline void
+kgnilnd_thread_fini(void)
{
- struct task_struct *thrd = kthread_run(fn, arg, "%s_%02d", name, id);
- if (IS_ERR(thrd))
- return PTR_ERR(thrd);
+ atomic_dec(&kgnilnd_data.kgn_nthreads);
+}
- atomic_inc(&kgnilnd_data.kgn_nthreads);
- return 0;
+static inline void kgnilnd_gl_mutex_lock(struct mutex *lock)
+{
+ if (kgnilnd_data.kgn_enable_gl_mutex)
+ mutex_lock(lock);
}
-static inline void
-kgnilnd_thread_fini(void)
+static inline void kgnilnd_gl_mutex_unlock(struct mutex *lock)
{
- atomic_dec(&kgnilnd_data.kgn_nthreads);
+ if (kgnilnd_data.kgn_enable_gl_mutex)
+ mutex_unlock(lock);
+}
+
+static inline void kgnilnd_conn_mutex_lock(struct mutex *lock)
+{
+ if (!kgnilnd_data.kgn_enable_gl_mutex)
+ mutex_lock(lock);
+}
+
+static inline void kgnilnd_conn_mutex_unlock(struct mutex *lock)
+{
+ if (!kgnilnd_data.kgn_enable_gl_mutex)
+ mutex_unlock(lock);
}
/* like mutex_trylock but with a jiffies spinner. This is to allow certain
* This function must not be used in interrupt context. The
* mutex must be released by the same task that acquired it.
*/
-static inline int kgnilnd_mutex_trylock(struct mutex *lock)
+static inline int __kgnilnd_mutex_trylock(struct mutex *lock)
{
int ret;
unsigned long timeout;
return 0;
}
+static inline int kgnilnd_mutex_trylock(struct mutex *lock)
+{
+ if (!kgnilnd_data.kgn_enable_gl_mutex)
+ return 1;
+
+ return __kgnilnd_mutex_trylock(lock);
+}
+
+static inline int kgnilnd_trylock(struct mutex *cq_lock,
+ struct mutex *c_lock)
+{
+ if (kgnilnd_data.kgn_enable_gl_mutex)
+ return __kgnilnd_mutex_trylock(cq_lock);
+ else
+ return __kgnilnd_mutex_trylock(c_lock);
+}
+
+static inline void *kgnilnd_vzalloc(int size)
+{
+ void *ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_NORETRY | __GFP_ZERO,
+ PAGE_KERNEL);
+ LIBCFS_ALLOC_POST(ret, size);
+ return ret;
+}
+
+static inline void kgnilnd_vfree(void *ptr, int size)
+{
+ libcfs_kmem_dec(ptr, size);
+ vfree(ptr);
+}
+
/* Copied from DEBUG_REQ in Lustre - the dance is needed to save stack space */
extern void
#define GNIDBG_MSG(level, msg, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_msg(&msgdata, level, &cdls, msg, \
"$$ "fmt" from %s ", ## args, \
#define GNIDBG_TOMSG(level, msg, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_msg(&msgdata, level, &cdls, msg, \
"$$ "fmt" ", ## args); \
#define GNIDBG_CONN(level, conn, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_conn(&msgdata, level, &cdls, conn, \
"$$ "fmt" ", ## args); \
#define GNIDBG_TX(level, tx, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_tx(&msgdata, level, &cdls, tx, \
"$$ "fmt" ", ## args); \
lnet_msg_t *lntmsg, void **new_private);
int kgnilnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
+ struct kvec *iov, lnet_kiov_t *kiov,
unsigned int offset, unsigned int mlen, unsigned int rlen);
__u16 kgnilnd_cksum_kiov(unsigned int nkiov, lnet_kiov_t *kiov, unsigned int offset, unsigned int nob, int dump_blob);
int kgnilnd_scheduler(void *arg);
int kgnilnd_dgram_mover(void *arg);
int kgnilnd_rca(void *arg);
+int kgnilnd_thread_start(int(*fn)(void *arg), void *arg, char *name, int id);
int kgnilnd_create_conn(kgn_conn_t **connp, kgn_device_t *dev);
int kgnilnd_conn_isdup_locked(kgn_peer_t *peer, kgn_conn_t *newconn);
int kgnilnd_get_node_state(__u32 nid);
int kgnilnd_tunables_init(void);
-void kgnilnd_tunables_fini(void);
void kgnilnd_init_msg(kgn_msg_t *msg, int type, lnet_nid_t source);
void kgnilnd_bump_timeouts(__u32 nap_time, char *reason);
#undef DO_TYPE
-/* API wrapper functions - include late to pick up all of the other defines */
-#include "gnilnd_api_wrap.h"
-
/* pulls in tunables per platform and adds in nid/nic conversion
* if RCA wasn't available at build time */
#include "gnilnd_hss_ops.h"
+/* API wrapper functions - include late to pick up all of the other defines */
+#include "gnilnd_api_wrap.h"
#if defined(CONFIG_CRAY_GEMINI)
#include "gnilnd_gemini.h"
#error "Undefined Network Hardware Type"
#endif
+extern uint32_t kgni_driver_version;
+
+static inline void
+kgnilnd_check_kgni_version(void)
+{
+ uint32_t *kdv;
+
+ kgnilnd_data.kgn_enable_gl_mutex = 1;
+ kdv = symbol_get(kgni_driver_version);
+ if (!kdv) {
+ LCONSOLE_INFO("Not using thread safe locking -"
+ " no symbol kgni_driver_version\n");
+ return;
+ }
+
+ /* Thread-safe kgni implemented in minor ver 0x44/45, code rev 0xb9 */
+ if (*kdv < GNI_VERSION_CHECK(0, GNILND_KGNI_TS_MINOR_VER, 0xb9)) {
+ symbol_put(kgni_driver_version);
+ LCONSOLE_INFO("Not using thread safe locking, gni version 0x%x,"
+ " need >= 0x%x\n", *kdv,
+ GNI_VERSION_CHECK(0, GNILND_KGNI_TS_MINOR_VER, 0xb9));
+ return;
+ }
+
+ symbol_put(kgni_driver_version);
+
+ if (!*kgnilnd_tunables.kgn_thread_safe) {
+ return;
+ }
+
+ /* Use thread-safe locking */
+ kgnilnd_data.kgn_enable_gl_mutex = 0;
+}
+
#endif /* _GNILND_GNILND_H_ */