*
* Copyright (C) 2009-2012 Cray, Inc.
*
+ * Copyright (c) 2013, 2014, Intel Corporation.
+ *
* Derived from work by: Eric Barton <eric@bartonsoftware.com>
* Author: Nic Henke <nic@cray.com>
* Author: James Shimek <jshimek@cray.com>
#ifndef _GNILND_GNILND_H_
#define _GNILND_GNILND_H_
+#ifdef HAVE_COMPAT_RDMA
+#include <linux/compat-2.6.h>
+#endif
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/time.h>
#include <asm/timex.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#define GNILND_FMABLK 64 /* default number of mboxes per fmablk */
#define GNILND_SCHED_NICE 0 /* default nice value for scheduler threads */
#define GNILND_COMPUTE 1 /* compute image */
+#define GNILND_FAST_RECONNECT 1 /* Fast Reconnect option */
#else
#define GNILND_SCHED_THREADS 3 /* default # of kgnilnd_scheduler threads */
#define GNILND_FMABLK 1024 /* default number of mboxes per fmablk */
#define GNILND_SCHED_NICE -20 /* default nice value for scheduler threads */
#define GNILND_COMPUTE 0 /* service image */
+#define GNILND_FAST_RECONNECT 0 /* Fast Reconnect option */
#endif
/* EXTRA_BITS are there to allow us to hide NOOP/CLOSE and anything else out of band */
/* need sane upper bound to limit copy overhead */
#define GNILND_MAX_IMMEDIATE (64<<10)
+/* Max number of connections to keep in purgatory per peer */
+#define GNILND_PURGATORY_MAX 5
+
/* payload size to add to the base mailbox size
* This is subtracting 2 from the concurrent_sends as 4 messages are included in the size
* gni_smsg_buff_size_needed calculates, the MAX_PAYLOAD is added to
int *kgn_sched_nice; /* nice value for kgnilnd scheduler threads */
int *kgn_reverse_rdma; /* Reverse RDMA setting */
int *kgn_eager_credits; /* allocated eager buffers */
- int *kgn_efault_lbug; /* Should we LBUG on receiving an EFAULT */
+ int *kgn_fast_reconn; /* fast reconnection on conn timeout */
+ int *kgn_efault_lbug; /* LBUG on receiving an EFAULT */
+ int *kgn_max_purgatory; /* # conns/peer to keep in purgatory */
#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
cfs_sysctl_table_header_t *kgn_sysctl; /* sysctl interface */
#endif
int gnd_id; /* device id, also index in kgn_devices */
__u32 gnd_nid; /* ph host ID translated to NID */
struct list_head gnd_fma_buffs; /* list of FMA memory blocks */
- struct semaphore gnd_fmablk_sem; /* semaphore for FMA block memory alloc/free */
+ struct mutex gnd_fmablk_mutex; /* mutex for FMA block memory alloc/free */
spinlock_t gnd_fmablk_lock; /* lock for mbox alloc/release */
atomic_t gnd_nfmablk; /* # of fmablk live */
atomic_t gnd_fmablk_vers; /* gnd_fma_bufs stamp */
int gnd_dgram_ready; /* dgrams need movin' */
struct list_head *gnd_dgrams; /* nid hash to dgrams */
atomic_t gnd_ndgrams; /* # dgrams extant */
- atomic_t gnd_nwcdgrams; /* # wildcard dgrams to post on device */
+ atomic_t gnd_nwcdgrams; /* # wildcard dgrams to post*/
spinlock_t gnd_dgram_lock; /* serialize gnd_dgrams */
struct list_head gnd_map_list; /* list of all mapped regions */
int gnd_map_version; /* version flag for map list */
atomic_t gnc_sched_noop; /* # sched triggered NOOP */
unsigned int gnc_timeout; /* infer peer death if no rx for this many seconds */
__u32 gnc_cqid; /* my completion callback id (non-unique) */
- __u32 gnc_tx_seq; /* tx msg sequence number */
- __u32 gnc_rx_seq; /* rx msg sequence number */
+ atomic_t gnc_tx_seq; /* tx msg sequence number */
+ atomic_t gnc_rx_seq; /* rx msg sequence number */
+ struct mutex gnc_smsg_mutex; /* tx smsg sequence serialization */
+ struct mutex gnc_rdma_mutex; /* tx rdma sequence serialization */
__u64 gnc_tx_retrans; /* # retrans on SMSG */
atomic_t gnc_nlive_fma; /* # live FMA */
atomic_t gnc_nq_rdma; /* # queued (on device) RDMA */
wait_queue_head_t kgn_ruhroh_waitq; /* ruhroh thread wakeup */
int kgn_quiesce_trigger; /* should we quiesce ? */
atomic_t kgn_nquiesce; /* how many quiesced ? */
- struct semaphore kgn_quiesce_sem; /* serialize ruhroh task, startup and shutdown */
+ struct mutex kgn_quiesce_mutex; /* serialize ruhroh task, startup and shutdown */
int kgn_needs_reset; /* we need stack reset */
/* These next three members implement communication from gnilnd into
wait_queue_head_t kgn_reaper_waitq; /* reaper sleeps here */
spinlock_t kgn_reaper_lock; /* serialise */
- struct kmem_cache *kgn_rx_cache; /* rx descriptor space */
- struct kmem_cache *kgn_tx_cache; /* tx descriptor memory */
- struct kmem_cache *kgn_tx_phys_cache; /* tx phys descriptor memory */
+ struct kmem_cache *kgn_rx_cache; /* rx descriptor space */
+ struct kmem_cache *kgn_tx_cache; /* tx descriptor memory */
+ struct kmem_cache *kgn_tx_phys_cache; /* tx phys descriptor memory */
atomic_t kgn_ntx; /* # tx in use */
- struct kmem_cache *kgn_dgram_cache; /* outgoing datagrams */
+ struct kmem_cache *kgn_dgram_cache; /* outgoing datagrams */
struct page ***kgn_cksum_map_pages; /* page arrays for mapping pages on checksum */
- __u64 kgn_cksum_npages; /* Number of pages allocated for checksumming */
+ __u64 kgn_cksum_npages; /* # pages alloc'd for checksumming */
atomic_t kgn_nvmap_cksum; /* # times we vmapped for checksums */
atomic_t kgn_nvmap_short; /* # times we vmapped for short kiov */
atomic_t kgn_npending_unlink; /* # of peers pending unlink */
atomic_t kgn_npending_conns; /* # of conns with pending closes */
atomic_t kgn_npending_detach; /* # of conns with a pending detach */
- unsigned long kgn_last_scheduled; /* last time schedule was called in a sched thread */
- unsigned long kgn_last_condresched; /* last time cond_resched was called in a sched thread */
- atomic_t kgn_rev_offset; /* number of time REV rdma have been misaligned offsets */
- atomic_t kgn_rev_length; /* Number of times REV rdma have been misaligned lengths */
- atomic_t kgn_rev_copy_buff; /* Number of times REV rdma have had to make a copy buffer */
+ unsigned long kgn_last_scheduled; /* last time schedule was called */
+ unsigned long kgn_last_condresched; /* last time cond_resched was called */
+ atomic_t kgn_rev_offset; /* # of REV rdma w/misaligned offsets */
+ atomic_t kgn_rev_length; /* # of REV rdma have misaligned len */
+ atomic_t kgn_rev_copy_buff; /* # of REV rdma buffer copies */
struct socket *kgn_sock; /* for Apollo */
+ unsigned long free_pages_limit; /* # of free pages reserve from fma block allocations */
+ int kgn_enable_gl_mutex; /* kgni api mtx enable */
} kgn_data_t;
extern kgn_data_t kgnilnd_data;
atomic_dec(&kgnilnd_data.kgn_nthreads);
}
+static inline int kgnilnd_gl_mutex_trylock(struct mutex *lock)
+{
+ if (kgnilnd_data.kgn_enable_gl_mutex)
+ return mutex_trylock(lock);
+ else
+ return 1;
+}
+
+static inline void kgnilnd_gl_mutex_lock(struct mutex *lock)
+{
+ if (kgnilnd_data.kgn_enable_gl_mutex)
+ mutex_lock(lock);
+}
+
+static inline void kgnilnd_gl_mutex_unlock(struct mutex *lock)
+{
+ if (kgnilnd_data.kgn_enable_gl_mutex)
+ mutex_unlock(lock);
+}
+
+static inline void kgnilnd_conn_mutex_lock(struct mutex *lock)
+{
+ if (!kgnilnd_data.kgn_enable_gl_mutex)
+ mutex_lock(lock);
+}
+
+static inline void kgnilnd_conn_mutex_unlock(struct mutex *lock)
+{
+ if (!kgnilnd_data.kgn_enable_gl_mutex)
+ mutex_unlock(lock);
+}
+
/* like mutex_trylock but with a jiffies spinner. This is to allow certain
* parts of the code to avoid a scheduler trip when the mutex is held
*
int ret;
unsigned long timeout;
+ if (!kgnilnd_data.kgn_enable_gl_mutex)
+ return 1;
+
LASSERT(!in_interrupt());
for (timeout = jiffies + 1; time_before(jiffies, timeout);) {
atomic_inc(&kgnilnd_data.kgn_nquiesce); \
CDEBUG(D_NET, "Waiting for thread pause to be over...\n"); \
while (kgnilnd_data.kgn_quiesce_trigger) { \
- set_current_state(TASK_INTERRUPTIBLE); \
- schedule_timeout(HZ); \
+ msleep_interruptible(MSEC_PER_SEC); \
} \
/* Mom, my homework is done */ \
CDEBUG(D_NET, "Waking up from thread pause\n"); \
void kgnilnd_cancel_peer_connect_locked(kgn_peer_t *peer, struct list_head *zombies);
int kgnilnd_close_stale_conns_locked(kgn_peer_t *peer, kgn_conn_t *newconn);
void kgnilnd_peer_alive(kgn_peer_t *peer);
-void kgnilnd_peer_notify(kgn_peer_t *peer, int error);
+void kgnilnd_peer_notify(kgn_peer_t *peer, int error, int alive);
void kgnilnd_close_conn_locked(kgn_conn_t *conn, int error);
void kgnilnd_close_conn(kgn_conn_t *conn, int error);
void kgnilnd_complete_closed_conn(kgn_conn_t *conn);
#undef DO_TYPE
-/* API wrapper functions - include late to pick up all of the other defines */
-#include "gnilnd_api_wrap.h"
-
/* pulls in tunables per platform and adds in nid/nic conversion
* if RCA wasn't available at build time */
#include "gnilnd_hss_ops.h"
#error "Undefined Network Hardware Type"
#endif
+/* API wrapper functions - include late to pick up all of the other defines */
+#include "gnilnd_api_wrap.h"
+
#endif /* _GNILND_GNILND_H_ */