*
* Copyright (C) 2009-2012 Cray, Inc.
*
- * Copyright (c) 2013, 2014, Intel Corporation.
+ * Copyright (c) 2014, 2016, Intel Corporation.
*
* Derived from work by: Eric Barton <eric@bartonsoftware.com>
* Author: Nic Henke <nic@cray.com>
#ifndef _GNILND_GNILND_H_
#define _GNILND_GNILND_H_
-#ifdef HAVE_COMPAT_RDMA
-#include <linux/compat-2.6.h>
-#endif
+#define DEBUG_SUBSYSTEM S_LND
+
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/in.h>
#include <linux/nmi.h>
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/linux/kp30.h>
-#include <libcfs/libcfs.h>
-#include <lnet/lnet.h>
#include <lnet/lib-lnet.h>
#include <gni_pub.h>
-#include "gnilnd_version.h"
+
+static inline time_t cfs_duration_sec(long duration_jiffies)
+{
+ return jiffies_to_msecs(duration_jiffies) / MSEC_PER_SEC;
+}
+
+#ifdef CONFIG_SLAB
+#define GNILND_MBOX_SIZE KMALLOC_MAX_SIZE
+#else
+#define GNILND_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
+ (MAX_ORDER + PAGE_SHIFT - 1) : 25)
+#define GNILND_SHIFT_MAX GNILND_SHIFT_HIGH
+#define GNILND_MBOX_SIZE (1UL << GNILND_SHIFT_MAX)
+#endif
/* tunables determined at compile time */
(cfs_time_seconds(*kgnilnd_tunables.kgn_timeout * \
*kgnilnd_tunables.kgn_timeout))
+/* Should we use the no_retry flag with vzalloc */
+#define GNILND_VZALLOC_RETRY 0
+
/* reaper thread wakup interval */
#define GNILND_REAPER_THREAD_WAKE 1
/* reaper thread checks each conn NCHECKS time every kgnilnd_data.kgn_new_min_timeout */
#define GNILND_SCHED_NICE 0 /* default nice value for scheduler threads */
#define GNILND_COMPUTE 1 /* compute image */
#define GNILND_FAST_RECONNECT 1 /* Fast Reconnect option */
+#define GNILND_DEFAULT_CREDITS 64 /* Default number of simultaneous transmits */
#else
#define GNILND_FMABLK 1024 /* default number of mboxes per fmablk */
#define GNILND_SCHED_NICE -20 /* default nice value for scheduler threads */
#define GNILND_COMPUTE 0 /* service image */
#define GNILND_FAST_RECONNECT 0 /* Fast Reconnect option */
+#define GNILND_DEFAULT_CREDITS 256 /* Default number of simultaneous transmits */
#endif
/* EXTRA_BITS are there to allow us to hide NOOP/CLOSE and anything else out of band */
/* need sane upper bound to limit copy overhead */
#define GNILND_MAX_IMMEDIATE (64<<10)
+/* allow for 4M transfers over gni. Note 2.5M used by DVS */
+#define GNILND_MAX_IOV 1024
/* Max number of connections to keep in purgatory per peer */
#define GNILND_PURGATORY_MAX 5
+/* Closing, don't put in purgatory */
+#define GNILND_NOPURG 222
/* payload size to add to the base mailbox size
* This is subtracting 2 from the concurrent_sends as 4 messages are included in the size
#define GNILND_LASTRX(conn) (time_after(conn->gnc_last_rx, conn->gnc_last_rx_cq) \
? conn->gnc_last_rx : conn->gnc_last_rx_cq)
+/* fmablk registration failures timeout before failing node */
+#define GNILND_REGFAILTO_DISABLE -1
+
/************************************************************************
* Enum, flag and tag data
*/
#define GNILND_BUF_IMMEDIATE_KIOV 2 /* immediate data */
#define GNILND_BUF_PHYS_UNMAPPED 3 /* physical: not mapped yet */
#define GNILND_BUF_PHYS_MAPPED 4 /* physical: mapped already */
-#define GNILND_BUF_VIRT_UNMAPPED 5 /* virtual: not mapped yet */
-#define GNILND_BUF_VIRT_MAPPED 6 /* virtual: mapped already */
#define GNILND_TX_WAITING_REPLY (1<<1) /* expecting to receive reply */
#define GNILND_TX_WAITING_COMPLETION (1<<2) /* waiting for smsg_send to complete */
#define GNILND_DEL_PEER 1
#define GNILND_CLEAR_PURGATORY 2
-#define GNILND_RCA_NODE_UP 0
-#define GNILND_RCA_NODE_DOWN 1
-#define GNILND_RCA_NODE_UNKNOWN 2
+#define GNILND_PEER_UP 0
+#define GNILND_PEER_DOWN 1
+#define GNILND_PEER_TIMED_OUT 2
+#define GNILND_PEER_UNKNOWN 3
/* defines for reverse RDMA states */
#define GNILND_REVERSE_NONE 0
v2:
* - added checksum to FMA
* moved seq before paylod
- * WIRE_ATTR added for alignment
+ * __packed added for alignment
v3:
* added gnm_payload_len for FMA payload size
v4:
__u32 gnpr_host_id; /* ph. host ID of the NIC */
__u32 gnpr_cqid; /* cqid I want peer to use when sending events to me */
gni_smsg_attr_t gnpr_smsg_attr; /* my short msg. attributes */
-} WIRE_ATTR kgn_gniparams_t;
+} __packed kgn_gniparams_t;
typedef struct kgn_nak_data {
__s32 gnnd_errno; /* errno reason for NAK */
-} WIRE_ATTR kgn_nak_data_t;
+} __packed kgn_nak_data_t;
/* the first bits of the connreq struct CANNOT CHANGE FORM EVER
* without breaking the ability for us to properly NAK someone */
kgn_gniparams_t gncr_gnparams; /* sender's endpoint info */
kgn_nak_data_t gncr_nakdata; /* data (rc, etc) for NAK */
};
-} WIRE_ATTR kgn_connreq_t;
+} __packed kgn_connreq_t;
typedef struct {
gni_mem_handle_t gnrd_key;
__u64 gnrd_addr;
__u32 gnrd_nob;
-} WIRE_ATTR kgn_rdma_desc_t;
+} __packed kgn_rdma_desc_t;
typedef struct {
- lnet_hdr_t gnim_hdr; /* LNet header */
+ struct lnet_hdr_nid4 gnim_hdr; /* LNet header */
/* LNet payload is in FMA "Message Data" */
-} WIRE_ATTR kgn_immediate_msg_t;
+} __packed kgn_immediate_msg_t;
typedef struct {
- lnet_hdr_t gnprm_hdr; /* LNet header */
- __u64 gnprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR kgn_putreq_msg_t;
+ struct lnet_hdr_nid4 gnprm_hdr; /* LNet header */
+ __u64 gnprm_cookie; /* opaque completion cookie */
+} __packed kgn_putreq_msg_t;
typedef struct {
__u64 gnpam_src_cookie; /* reflected completion cookie */
__u64 gnpam_dst_cookie; /* opaque completion cookie */
__u16 gnpam_payload_cksum; /* checksum for get msg */
kgn_rdma_desc_t gnpam_desc; /* sender's sink buffer */
-} WIRE_ATTR kgn_putack_msg_t;
+} __packed kgn_putack_msg_t;
typedef struct {
- lnet_hdr_t gngm_hdr; /* LNet header */
- __u64 gngm_cookie; /* opaque completion cookie */
- __u16 gngm_payload_cksum; /* checksum for put msg */
- kgn_rdma_desc_t gngm_desc; /* sender's sink buffer */
-} WIRE_ATTR kgn_get_msg_t;
+ struct lnet_hdr_nid4 gngm_hdr; /* LNet header */
+ __u64 gngm_cookie; /* opaque completion cookie */
+ __u16 gngm_payload_cksum; /* checksum for put msg */
+ kgn_rdma_desc_t gngm_desc; /* sender's sink buffer */
+} __packed kgn_get_msg_t;
typedef struct {
int gncm_retval; /* error on NAK, size on REQ */
__u64 gncm_cookie; /* reflected completion cookie */
-} WIRE_ATTR kgn_completion_msg_t;
+} __packed kgn_completion_msg_t;
typedef struct { /* NB must fit in FMA "Prefix" */
__u32 gnm_magic; /* I'm an gni message */
kgn_get_msg_t get;
kgn_completion_msg_t completion;
} gnm_u;
-} WIRE_ATTR kgn_msg_t;
+} __packed kgn_msg_t;
/************************************************************************
* runtime tunable data
int *kgn_max_immediate; /* immediate payload breakpoint */
int *kgn_checksum; /* checksum data */
int *kgn_checksum_dump; /* dump raw data to D_INFO log when checksumming */
- int *kgn_bte_dlvr_mode; /* BTE delivery mode mask */
+ int *kgn_bte_put_dlvr_mode; /* BTE Put delivery mode */
+ int *kgn_bte_get_dlvr_mode; /* BTE Get delivery mode */
int *kgn_bte_relaxed_ordering; /* relaxed ordering (PASSPW) on BTE transfers */
int *kgn_ptag; /* PTAG for cdm_create */
int *kgn_pkey; /* PKEY for cdm_create */
- int *kgn_max_retransmits; /* max number of FMA retransmits */
+ int *kgn_max_retransmits; /* max number of FMA retransmits before entering delay list */
int *kgn_nwildcard; /* # wildcard per net to post */
int *kgn_nice; /* nice value for kgnilnd threads */
int *kgn_rdmaq_intervals; /* # intervals per second for rdmaq throttle */
int *kgn_fast_reconn; /* fast reconnection on conn timeout */
int *kgn_efault_lbug; /* LBUG on receiving an EFAULT */
int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */
+ int *kgn_reg_fail_timeout; /* registration failure timeout */
int *kgn_thread_affinity; /* bind scheduler threads to cpus */
-#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kgn_sysctl; /* sysctl interface */
-#endif
+ int *kgn_to_reconn_disable;/* disable reconnect after timeout */
+ int *kgn_thread_safe; /* use thread safe kgni API */
+ int *kgn_vzalloc_noretry; /* Should we pass the noretry flag */
} kgn_tunables_t;
typedef struct kgn_mbox_info {
atomic_t gnd_neps; /* # EP allocated to conns */
short gnd_ready; /* stuff to do in scheduler thread */
struct list_head gnd_ready_conns; /* connections ready to tx/rx */
+ struct list_head gnd_delay_conns; /* connections in need of dla/or smsg credits */
struct list_head gnd_map_tx; /* TX: needing buffer mapping */
wait_queue_head_t gnd_waitq; /* scheduler wakeup */
spinlock_t gnd_lock; /* serialise gnd_ready_conns */
atomic64_t gnd_nbytes_map; /* bytes of total GART maps - fma, tx, etc */
__u32 gnd_map_nphys; /* # TX phys mappings */
__u32 gnd_map_physnop; /* # TX phys pages mapped */
- __u32 gnd_map_nvirt; /* # TX virt mappings */
- __u64 gnd_map_virtnob; /* # TX virt bytes mapped */
spinlock_t gnd_map_lock; /* serialize gnd_map_XXX */
unsigned long gnd_next_map; /* next mapping attempt in jiffies */
int gnd_map_attempt; /* last map attempt # */
atomic_t gnd_n_schedule;
atomic_t gnd_canceled_dgrams; /* # of outstanding cancels */
struct rw_semaphore gnd_conn_sem; /* serialize connection changes/data movement */
+ void *gnd_smdd_hold_buf; /* buffer to keep smdd */
+ gni_mem_handle_t gnd_smdd_hold_hndl; /* buffer mem handle */
} kgn_device_t;
typedef struct kgn_net {
struct list_head gnn_list; /* chain on kgni_data::kgn_nets */
kgn_device_t *gnn_dev; /* device for this net */
- lnet_ni_t *gnn_ni; /* network interface instance */
+ struct lnet_ni *gnn_ni; /* network interface instance */
atomic_t gnn_refcount; /* # current references */
int gnn_shutdown; /* lnd_shutdown set */
__u16 gnn_netnum; /* stash netnum for quicker lookup */
kgn_tx_list_state_t tx_list_state;/* where in state machine is this TX ? */
struct list_head *tx_list_p; /* pointer to current list */
struct kgn_conn *tx_conn; /* owning conn */
- lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */
+ struct lnet_msg *tx_lntmsg[2]; /* ptl msgs to finalize on completion */
unsigned long tx_qtime; /* when tx started to wait for something (jiffies) */
unsigned long tx_cred_wait; /* time spend waiting for smsg creds */
struct list_head tx_map_list; /* list entry on device map list */
struct list_head gnc_schedlist; /* schedule (on gnd_?_conns) for attention */
struct list_head gnc_fmaq; /* txs queued for FMA */
struct list_head gnc_mdd_list; /* hold list for MDD on hard conn reset */
+ struct list_head gnc_delaylist; /* If on this list schedule anytime we get interrupted */
__u64 gnc_peerstamp; /* peer's unique stamp */
__u64 gnc_peer_connstamp; /* peer's unique connection stamp */
__u64 gnc_my_connstamp; /* my unique connection stamp */
kgn_fma_memblock_t *gnc_fma_blk; /* pointer to fma block for our mailbox */
gni_smsg_attr_t gnpr_smsg_attr; /* my short msg. attributes */
spinlock_t gnc_tx_lock; /* protect tx alloc/free */
- __u8 gnc_tx_bits[GNILND_MAX_MSG_ID/8]; /* bit table for tx id */
+ unsigned long gnc_tx_bits[(GNILND_MAX_MSG_ID/8)/sizeof(unsigned long)]; /* bit table for tx id */
int gnc_next_tx; /* next tx to use in tx_ref_table */
kgn_tx_t **gnc_tx_ref_table; /* table of TX descriptors for this conn */
int gnc_mbox_id; /* id of mbox in fma_blk */
short gnp_connecting; /* connection forming */
short gnp_pending_unlink; /* need last conn close to trigger unlink */
int gnp_last_errno; /* last error conn saw */
- unsigned long gnp_last_alive; /* last time I had valid comms */
+ time64_t gnp_last_alive; /* last time I had valid comms */
int gnp_last_dgram_errno; /* last error dgrams saw */
unsigned long gnp_last_dgram_time; /* last time I tried to connect */
unsigned long gnp_reconnect_time; /* get_seconds() when reconnect OK */
unsigned long gnp_reconnect_interval; /* exponential backoff */
atomic_t gnp_dirty_eps; /* # of old but yet to be destroyed EPs from conns */
- int gnp_down; /* rca says peer down */
+ int gnp_state; /* up/down/timedout */
unsigned long gnp_down_event_time; /* time peer down */
unsigned long gnp_up_event_time; /* time peer back up */
} kgn_peer_t;
typedef struct kgn_rx {
kgn_conn_t *grx_conn; /* connection */
kgn_msg_t *grx_msg; /* message */
- lnet_msg_t *grx_lntmsg; /* lnet msg for this rx (eager only) */
+ struct lnet_msg *grx_lntmsg; /* lnet msg for this rx (eager only) */
int grx_eager; /* if eager, we copied msg to somewhere */
- struct timespec grx_received; /* time this msg received */
+ struct timespec64 grx_received; /* time this msg received */
} kgn_rx_t;
typedef struct kgn_data {
atomic_t kgn_rev_offset; /* # of REV rdma w/misaligned offsets */
atomic_t kgn_rev_length; /* # of REV rdma have misaligned len */
atomic_t kgn_rev_copy_buff; /* # of REV rdma buffer copies */
- struct socket *kgn_sock; /* for Apollo */
unsigned long free_pages_limit; /* # of free pages reserve from fma block allocations */
int kgn_enable_gl_mutex; /* kgni api mtx enable */
} kgn_data_t;
extern void kgnilnd_destroy_peer(kgn_peer_t *peer);
extern void kgnilnd_destroy_conn(kgn_conn_t *conn);
-extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld);
+extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held);
+extern int _kgnilnd_schedule_delay_conn(kgn_conn_t *conn);
+
+static inline int kgnilnd_timeout(void)
+{
+ return *kgnilnd_tunables.kgn_timeout ?
+ *kgnilnd_tunables.kgn_timeout :
+ lnet_get_lnd_timeout();
+}
/* Macro wrapper for _kgnilnd_schedule_conn. This will store the function
* and the line of the calling function to allow us to debug problematic
* the location manually.
*/
#define kgnilnd_schedule_conn(conn) \
- _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0);
+ _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0, 0);
#define kgnilnd_schedule_conn_refheld(conn, refheld) \
- _kgnilnd_schedule_conn(conn, __func__, __LINE__, refheld);
+ _kgnilnd_schedule_conn(conn, __func__, __LINE__, refheld, 0);
+
+#define kgnilnd_schedule_conn_nolock(conn) \
+ _kgnilnd_schedule_conn(conn, __func__, __LINE__, 0, 1);
+
+
+/* Macro wrapper for _kgnilnd_schedule_delay_conn. This will allow us to store
+ * extra data if we need to.
+ */
+#define kgnilnd_schedule_delay_conn(conn) \
+ _kgnilnd_schedule_delay_conn(conn);
static inline void
kgnilnd_thread_fini(void)
atomic_dec(&kgnilnd_data.kgn_nthreads);
}
-static inline int kgnilnd_gl_mutex_trylock(struct mutex *lock)
-{
- if (kgnilnd_data.kgn_enable_gl_mutex)
- return mutex_trylock(lock);
- else
- return 1;
-}
-
static inline void kgnilnd_gl_mutex_lock(struct mutex *lock)
{
if (kgnilnd_data.kgn_enable_gl_mutex)
* This function must not be used in interrupt context. The
* mutex must be released by the same task that acquired it.
*/
-static inline int kgnilnd_mutex_trylock(struct mutex *lock)
+static inline int __kgnilnd_mutex_trylock(struct mutex *lock)
{
int ret;
unsigned long timeout;
- if (!kgnilnd_data.kgn_enable_gl_mutex)
- return 1;
-
LASSERT(!in_interrupt());
for (timeout = jiffies + 1; time_before(jiffies, timeout);) {
return 0;
}
+static inline int kgnilnd_mutex_trylock(struct mutex *lock)
+{
+ if (!kgnilnd_data.kgn_enable_gl_mutex)
+ return 1;
+
+ return __kgnilnd_mutex_trylock(lock);
+}
+
+static inline int kgnilnd_trylock(struct mutex *cq_lock,
+ struct mutex *c_lock)
+{
+ if (kgnilnd_data.kgn_enable_gl_mutex)
+ return __kgnilnd_mutex_trylock(cq_lock);
+ else
+ return __kgnilnd_mutex_trylock(c_lock);
+}
+
+static inline void *kgnilnd_vzalloc(int size)
+{
+ void *ret;
+ if (*kgnilnd_tunables.kgn_vzalloc_noretry)
+ ret = __ll_vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_ZERO |
+ __GFP_NORETRY);
+ else
+ ret = __ll_vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_ZERO);
+
+ LIBCFS_ALLOC_POST(ret, size, "alloc");
+ return ret;
+}
+
+static inline void kgnilnd_vfree(void *ptr, int size)
+{
+ LIBCFS_FREE_PRE(ptr, size, "vfree");
+ vfree(ptr);
+}
+
+/* as of kernel version 4.2, set_mb is replaced with smp_store_mb */
+#ifndef set_mb
+#define set_mb smp_store_mb
+#endif
+
/* Copied from DEBUG_REQ in Lustre - the dance is needed to save stack space */
extern void
#define GNIDBG_MSG(level, msg, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_msg(&msgdata, level, &cdls, msg, \
"$$ "fmt" from %s ", ## args, \
#define GNIDBG_TOMSG(level, msg, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_msg(&msgdata, level, &cdls, msg, \
"$$ "fmt" ", ## args); \
#define GNIDBG_CONN(level, conn, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_conn(&msgdata, level, &cdls, conn, \
"$$ "fmt" ", ## args); \
#define GNIDBG_TX(level, tx, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING | D_NETERROR)) { \
- static cfs_debug_limit_state_t cdls; \
+ static struct cfs_debug_limit_state cdls; \
LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, level, &cdls); \
kgnilnd_debug_tx(&msgdata, level, &cdls, tx, \
"$$ "fmt" ", ## args); \
#error "this code uses actions inside LASSERT for ref counting"
#endif
-#define kgnilnd_admin_addref(atomic) \
-do { \
- int val = atomic_inc_return(&atomic); \
- LASSERTF(val > 0, #atomic " refcount %d\n", val); \
- CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \
+#define kgnilnd_admin_addref(atomic) \
+do { \
+ int val = atomic_inc_return(&atomic); \
+ LASSERTF(val > 0, #atomic " refcount %d\n", val); \
+ CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \
} while (0)
-#define kgnilnd_admin_decref(atomic) \
-do { \
- int val = atomic_dec_return(&atomic); \
- LASSERTF(val >=0, #atomic " refcount %d\n", val); \
- CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \
+#define kgnilnd_admin_decref(atomic) \
+do { \
+ int val = atomic_dec_return(&atomic); \
+ LASSERTF(val >= 0, #atomic " refcount %d\n", val); \
+ CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \
+ if (!val) \
+ wake_up_var(&kgnilnd_data); \
}while (0)
-#define kgnilnd_net_addref(net) \
-do { \
- int val = atomic_inc_return(&net->gnn_refcount); \
- LASSERTF(val > 1, "net %p refcount %d\n", net, val); \
- CDEBUG(D_NETTRACE, "net %p->%s++ (%d)\n", net, \
- libcfs_nid2str(net->gnn_ni->ni_nid), val); \
+#define kgnilnd_net_addref(net) \
+do { \
+ int val = atomic_inc_return(&net->gnn_refcount); \
+ LASSERTF(val > 1, "net %p refcount %d\n", net, val); \
+ CDEBUG(D_NETTRACE, "net %p->%s++ (%d)\n", net, \
+ libcfs_nidstr(&net->gnn_ni->ni_nid), val); \
} while (0)
-#define kgnilnd_net_decref(net) \
-do { \
- int val = atomic_dec_return(&net->gnn_refcount); \
- LASSERTF(val >= 0, "net %p refcount %d\n", net, val); \
- CDEBUG(D_NETTRACE, "net %p->%s-- (%d)\n", net, \
- libcfs_nid2str(net->gnn_ni->ni_nid), val); \
+#define kgnilnd_net_decref(net) \
+do { \
+ int val = atomic_dec_return(&net->gnn_refcount); \
+ LASSERTF(val >= 0, "net %p refcount %d\n", net, val); \
+ CDEBUG(D_NETTRACE, "net %p->%s-- (%d)\n", net, \
+ libcfs_nidstr(&net->gnn_ni->ni_nid), val); \
} while (0)
#define kgnilnd_peer_addref(peer) \
if (conn->gnc_peer) {
loopback = conn->gnc_peer->gnp_nid ==
- conn->gnc_peer->gnp_net->gnn_ni->ni_nid;
+ lnet_nid_to_nid4(&conn->gnc_peer->gnp_net->gnn_ni->ni_nid);
} else {
/* short circuit - a conn that didn't complete
* setup never needs a purgatory hold */
static inline int
kgnilnd_tx_mapped(kgn_tx_t *tx)
{
- return (tx->tx_buftype == GNILND_BUF_VIRT_MAPPED ||
- tx->tx_buftype == GNILND_BUF_PHYS_MAPPED);
+ return tx->tx_buftype == GNILND_BUF_PHYS_MAPPED;
}
static inline struct list_head *
return -ESHUTDOWN;
}
- list_for_each_entry(net, kgnilnd_netnum2netlist(LNET_NETNUM(LNET_NIDNET(nid))), gnn_list) {
- if (!net->gnn_shutdown && LNET_NIDNET(net->gnn_ni->ni_nid) == LNET_NIDNET(nid)) {
+ list_for_each_entry(net,
+ kgnilnd_netnum2netlist(LNET_NETNUM(LNET_NIDNET(nid))),
+ gnn_list) {
+ if (!net->gnn_shutdown &&
+ LNET_NID_NET(&net->gnn_ni->ni_nid) == LNET_NIDNET(nid)) {
kgnilnd_net_addref(net);
up_read(&kgnilnd_data.kgn_net_rw_sem);
*netp = net;
int kgnilnd_dev_init(kgn_device_t *dev);
void kgnilnd_dev_fini(kgn_device_t *dev);
-int kgnilnd_startup(lnet_ni_t *ni);
-void kgnilnd_shutdown(lnet_ni_t *ni);
+int kgnilnd_startup(struct lnet_ni *ni);
+void kgnilnd_shutdown(struct lnet_ni *ni);
int kgnilnd_base_startup(void);
void kgnilnd_base_shutdown(void);
void kgnilnd_unmap_fma_blocks(kgn_device_t *device);
void kgnilnd_free_phys_fmablk(kgn_device_t *device);
-int kgnilnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-void kgnilnd_query(lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when);
-int kgnilnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kgnilnd_eager_recv(lnet_ni_t *ni, void *private,
- lnet_msg_t *lntmsg, void **new_private);
-int kgnilnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
+int kgnilnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg);
+int kgnilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
+int kgnilnd_eager_recv(struct lnet_ni *ni, void *private,
+ struct lnet_msg *lntmsg, void **new_private);
+int kgnilnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
+ struct bio_vec *kiov,
unsigned int offset, unsigned int mlen, unsigned int rlen);
-__u16 kgnilnd_cksum_kiov(unsigned int nkiov, lnet_kiov_t *kiov, unsigned int offset, unsigned int nob, int dump_blob);
+__u16 kgnilnd_cksum_kiov(unsigned int nkiov, struct bio_vec *kiov,
+ unsigned int offset, unsigned int nob, int dump_blob);
/* purgatory functions */
void kgnilnd_add_purgatory_locked(kgn_conn_t *conn, kgn_peer_t *peer);
void kgnilnd_tx_done(kgn_tx_t *tx, int completion);
void kgnilnd_txlist_done(struct list_head *txlist, int error);
void kgnilnd_unlink_peer_locked(kgn_peer_t *peer);
-int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld);
+int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held);
int kgnilnd_schedule_process_conn(kgn_conn_t *conn, int sched_intent);
void kgnilnd_schedule_dgram(kgn_device_t *dev);
void kgnilnd_peer_increase_reconnect_locked(kgn_peer_t *peer);
void kgnilnd_queue_reply(kgn_conn_t *conn, kgn_tx_t *tx);
void kgnilnd_queue_tx(kgn_conn_t *conn, kgn_tx_t *tx);
-void kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, lnet_process_id_t *target);
+void kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net,
+ struct lnet_processid *target);
int kgnilnd_send_mapped_tx(kgn_tx_t *tx, int try_map_if_full);
void kgnilnd_consume_rx(kgn_rx_t *rx);
void kgnilnd_schedule_device(kgn_device_t *dev);
void kgnilnd_device_callback(__u32 devid, __u64 arg);
-void kgnilnd_schedule_device_timer(unsigned long arg);
+void kgnilnd_schedule_device_timer(cfs_timer_cb_arg_t data);
+void kgnilnd_schedule_device_timer_rd(cfs_timer_cb_arg_t data);
int kgnilnd_reaper(void *arg);
int kgnilnd_scheduler(void *arg);
int kgnilnd_get_node_state(__u32 nid);
int kgnilnd_tunables_init(void);
-void kgnilnd_tunables_fini(void);
void kgnilnd_init_msg(kgn_msg_t *msg, int type, lnet_nid_t source);
void kgnilnd_bump_timeouts(__u32 nap_time, char *reason);
/* pulls in tunables per platform and adds in nid/nic conversion
* if RCA wasn't available at build time */
#include "gnilnd_hss_ops.h"
+/* API wrapper functions - include late to pick up all of the other defines */
+#include "gnilnd_api_wrap.h"
#if defined(CONFIG_CRAY_GEMINI)
#include "gnilnd_gemini.h"
#error "Undefined Network Hardware Type"
#endif
-/* API wrapper functions - include late to pick up all of the other defines */
-#include "gnilnd_api_wrap.h"
+extern uint32_t kgni_driver_version;
+
+static inline void
+kgnilnd_check_kgni_version(void)
+{
+ uint32_t *kdv;
+
+ kgnilnd_data.kgn_enable_gl_mutex = 1;
+ kdv = symbol_get(kgni_driver_version);
+ if (!kdv) {
+ LCONSOLE_INFO("Not using thread safe locking -"
+ " no symbol kgni_driver_version\n");
+ return;
+ }
+
+ /* Thread-safe kgni implemented in minor ver 0x44/45, code rev 0xb9 */
+ if (*kdv < GNI_VERSION_CHECK(0, GNILND_KGNI_TS_MINOR_VER, 0xb9)) {
+ symbol_put(kgni_driver_version);
+ LCONSOLE_INFO("Not using thread safe locking, gni version 0x%x,"
+ " need >= 0x%x\n", *kdv,
+ GNI_VERSION_CHECK(0, GNILND_KGNI_TS_MINOR_VER, 0xb9));
+ return;
+ }
+
+ symbol_put(kgni_driver_version);
+
+ if (!*kgnilnd_tunables.kgn_thread_safe) {
+ return;
+ }
+
+ /* Use thread-safe locking */
+ kgnilnd_data.kgn_enable_gl_mutex = 0;
+}
#endif /* _GNILND_GNILND_H_ */