#define DEBUG_SUBSYSTEM S_LND
-#include <libcfs/libcfs.h>
-#include <lnet/lnet.h>
#include <lnet/lib-lnet.h>
#include <gni_pub.h>
-#include "gnilnd_version.h"
+
+static inline time_t cfs_duration_sec(long duration_jiffies)
+{
+ return jiffies_to_msecs(duration_jiffies) / MSEC_PER_SEC;
+}
+
+#ifdef CONFIG_SLAB
+#define GNILND_MBOX_SIZE KMALLOC_MAX_SIZE
+#else
+#define GNILND_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
+ (MAX_ORDER + PAGE_SHIFT - 1) : 25)
+#define GNILND_SHIFT_MAX GNILND_SHIFT_HIGH
+#define GNILND_MBOX_SIZE (1UL << GNILND_SHIFT_MAX)
+#endif
/* tunables determined at compile time */
(cfs_time_seconds(*kgnilnd_tunables.kgn_timeout * \
*kgnilnd_tunables.kgn_timeout))
+/* Should we use the no_retry flag with vzalloc */
+#define GNILND_VZALLOC_RETRY 0
+
/* reaper thread wakup interval */
#define GNILND_REAPER_THREAD_WAKE 1
/* reaper thread checks each conn NCHECKS time every kgnilnd_data.kgn_new_min_timeout */
#define GNILND_SCHED_NICE 0 /* default nice value for scheduler threads */
#define GNILND_COMPUTE 1 /* compute image */
#define GNILND_FAST_RECONNECT 1 /* Fast Reconnect option */
+#define GNILND_DEFAULT_CREDITS 64 /* Default number of simultaneous transmits */
#else
#define GNILND_FMABLK 1024 /* default number of mboxes per fmablk */
#define GNILND_SCHED_NICE -20 /* default nice value for scheduler threads */
#define GNILND_COMPUTE 0 /* service image */
#define GNILND_FAST_RECONNECT 0 /* Fast Reconnect option */
+#define GNILND_DEFAULT_CREDITS 256 /* Default number of simultaneous transmits */
#endif
/* EXTRA_BITS are there to allow us to hide NOOP/CLOSE and anything else out of band */
#define GNILND_BUF_IMMEDIATE_KIOV 2 /* immediate data */
#define GNILND_BUF_PHYS_UNMAPPED 3 /* physical: not mapped yet */
#define GNILND_BUF_PHYS_MAPPED 4 /* physical: mapped already */
-#define GNILND_BUF_VIRT_UNMAPPED 5 /* virtual: not mapped yet */
-#define GNILND_BUF_VIRT_MAPPED 6 /* virtual: mapped already */
#define GNILND_TX_WAITING_REPLY (1<<1) /* expecting to receive reply */
#define GNILND_TX_WAITING_COMPLETION (1<<2) /* waiting for smsg_send to complete */
int *kgn_thread_affinity; /* bind scheduler threads to cpus */
int *kgn_to_reconn_disable;/* disable reconnect after timeout */
int *kgn_thread_safe; /* use thread safe kgni API */
+ int *kgn_vzalloc_noretry; /* Should we pass the noretry flag */
} kgn_tunables_t;
typedef struct kgn_mbox_info {
atomic64_t gnd_nbytes_map; /* bytes of total GART maps - fma, tx, etc */
__u32 gnd_map_nphys; /* # TX phys mappings */
__u32 gnd_map_physnop; /* # TX phys pages mapped */
- __u32 gnd_map_nvirt; /* # TX virt mappings */
- __u64 gnd_map_virtnob; /* # TX virt bytes mapped */
spinlock_t gnd_map_lock; /* serialize gnd_map_XXX */
unsigned long gnd_next_map; /* next mapping attempt in jiffies */
int gnd_map_attempt; /* last map attempt # */
typedef struct kgn_net {
struct list_head gnn_list; /* chain on kgni_data::kgn_nets */
kgn_device_t *gnn_dev; /* device for this net */
- lnet_ni_t *gnn_ni; /* network interface instance */
+ struct lnet_ni *gnn_ni; /* network interface instance */
atomic_t gnn_refcount; /* # current references */
int gnn_shutdown; /* lnd_shutdown set */
__u16 gnn_netnum; /* stash netnum for quicker lookup */
kgn_tx_list_state_t tx_list_state;/* where in state machine is this TX ? */
struct list_head *tx_list_p; /* pointer to current list */
struct kgn_conn *tx_conn; /* owning conn */
- lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */
+ struct lnet_msg *tx_lntmsg[2]; /* ptl msgs to finalize on completion */
unsigned long tx_qtime; /* when tx started to wait for something (jiffies) */
unsigned long tx_cred_wait; /* time spend waiting for smsg creds */
struct list_head tx_map_list; /* list entry on device map list */
kgn_fma_memblock_t *gnc_fma_blk; /* pointer to fma block for our mailbox */
gni_smsg_attr_t gnpr_smsg_attr; /* my short msg. attributes */
spinlock_t gnc_tx_lock; /* protect tx alloc/free */
- __u8 gnc_tx_bits[GNILND_MAX_MSG_ID/8]; /* bit table for tx id */
+ unsigned long gnc_tx_bits[(GNILND_MAX_MSG_ID/8)/sizeof(unsigned long)]; /* bit table for tx id */
int gnc_next_tx; /* next tx to use in tx_ref_table */
kgn_tx_t **gnc_tx_ref_table; /* table of TX descriptors for this conn */
int gnc_mbox_id; /* id of mbox in fma_blk */
short gnp_connecting; /* connection forming */
short gnp_pending_unlink; /* need last conn close to trigger unlink */
int gnp_last_errno; /* last error conn saw */
- unsigned long gnp_last_alive; /* last time I had valid comms */
+ time64_t gnp_last_alive; /* last time I had valid comms */
int gnp_last_dgram_errno; /* last error dgrams saw */
unsigned long gnp_last_dgram_time; /* last time I tried to connect */
unsigned long gnp_reconnect_time; /* get_seconds() when reconnect OK */
typedef struct kgn_rx {
kgn_conn_t *grx_conn; /* connection */
kgn_msg_t *grx_msg; /* message */
- lnet_msg_t *grx_lntmsg; /* lnet msg for this rx (eager only) */
+ struct lnet_msg *grx_lntmsg; /* lnet msg for this rx (eager only) */
int grx_eager; /* if eager, we copied msg to somewhere */
struct timespec grx_received; /* time this msg received */
} kgn_rx_t;
atomic_t kgn_rev_offset; /* # of REV rdma w/misaligned offsets */
atomic_t kgn_rev_length; /* # of REV rdma have misaligned len */
atomic_t kgn_rev_copy_buff; /* # of REV rdma buffer copies */
- struct socket *kgn_sock; /* for Apollo */
unsigned long free_pages_limit; /* # of free pages reserve from fma block allocations */
int kgn_enable_gl_mutex; /* kgni api mtx enable */
} kgn_data_t;
static inline void *kgnilnd_vzalloc(int size)
{
- void *ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_NORETRY | __GFP_ZERO,
- PAGE_KERNEL);
+ void *ret;
+ if (*kgnilnd_tunables.kgn_vzalloc_noretry)
+ ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_NORETRY |
+ __GFP_ZERO,
+ PAGE_KERNEL);
+ else
+ ret = __vmalloc(size, __GFP_HIGHMEM | GFP_NOIO | __GFP_ZERO,
+ PAGE_KERNEL);
+
LIBCFS_ALLOC_POST(ret, size);
return ret;
}
vfree(ptr);
}
+/* as of kernel version 4.2, set_mb is replaced with smp_store_mb */
+#ifndef set_mb
+#define set_mb smp_store_mb
+#endif
+
/* Copied from DEBUG_REQ in Lustre - the dance is needed to save stack space */
extern void
#error "this code uses actions inside LASSERT for ref counting"
#endif
-#define kgnilnd_admin_addref(atomic) \
-do { \
- int val = atomic_inc_return(&atomic); \
- LASSERTF(val > 0, #atomic " refcount %d\n", val); \
- CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \
+#define kgnilnd_admin_addref(atomic) \
+do { \
+ int val = atomic_inc_return(&atomic); \
+ LASSERTF(val > 0, #atomic " refcount %d\n", val); \
+ CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \
} while (0)
-#define kgnilnd_admin_decref(atomic) \
-do { \
- int val = atomic_dec_return(&atomic); \
- LASSERTF(val >=0, #atomic " refcount %d\n", val); \
- CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \
+#define kgnilnd_admin_decref(atomic) \
+do { \
+ int val = atomic_dec_return(&atomic); \
+ LASSERTF(val >= 0, #atomic " refcount %d\n", val); \
+ CDEBUG(D_NETTRACE, #atomic " refcount %d\n", val); \
+ if (!val) \
+ wake_up_var(&kgnilnd_data); \
}while (0)
#define kgnilnd_net_addref(net) \
static inline int
kgnilnd_tx_mapped(kgn_tx_t *tx)
{
- return (tx->tx_buftype == GNILND_BUF_VIRT_MAPPED ||
- tx->tx_buftype == GNILND_BUF_PHYS_MAPPED);
+ return tx->tx_buftype == GNILND_BUF_PHYS_MAPPED;
}
static inline struct list_head *
int kgnilnd_dev_init(kgn_device_t *dev);
void kgnilnd_dev_fini(kgn_device_t *dev);
-int kgnilnd_startup(lnet_ni_t *ni);
-void kgnilnd_shutdown(lnet_ni_t *ni);
+int kgnilnd_startup(struct lnet_ni *ni);
+void kgnilnd_shutdown(struct lnet_ni *ni);
int kgnilnd_base_startup(void);
void kgnilnd_base_shutdown(void);
void kgnilnd_unmap_fma_blocks(kgn_device_t *device);
void kgnilnd_free_phys_fmablk(kgn_device_t *device);
-int kgnilnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-void kgnilnd_query(lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when);
-int kgnilnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kgnilnd_eager_recv(lnet_ni_t *ni, void *private,
- lnet_msg_t *lntmsg, void **new_private);
-int kgnilnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
+int kgnilnd_ctl(struct lnet_ni *ni, unsigned int cmd, void *arg);
+int kgnilnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
+int kgnilnd_eager_recv(struct lnet_ni *ni, void *private,
+ struct lnet_msg *lntmsg, void **new_private);
+int kgnilnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
int delayed, unsigned int niov,
- struct kvec *iov, lnet_kiov_t *kiov,
+ struct bio_vec *kiov,
unsigned int offset, unsigned int mlen, unsigned int rlen);
-__u16 kgnilnd_cksum_kiov(unsigned int nkiov, lnet_kiov_t *kiov, unsigned int offset, unsigned int nob, int dump_blob);
+__u16 kgnilnd_cksum_kiov(unsigned int nkiov, struct bio_vec *kiov,
+ unsigned int offset, unsigned int nob, int dump_blob);
/* purgatory functions */
void kgnilnd_add_purgatory_locked(kgn_conn_t *conn, kgn_peer_t *peer);
void kgnilnd_peer_increase_reconnect_locked(kgn_peer_t *peer);
void kgnilnd_queue_reply(kgn_conn_t *conn, kgn_tx_t *tx);
void kgnilnd_queue_tx(kgn_conn_t *conn, kgn_tx_t *tx);
-void kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, lnet_process_id_t *target);
+void kgnilnd_launch_tx(kgn_tx_t *tx, kgn_net_t *net, struct lnet_process_id *target);
int kgnilnd_send_mapped_tx(kgn_tx_t *tx, int try_map_if_full);
void kgnilnd_consume_rx(kgn_rx_t *rx);