* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, Whamcloud, Inc.
+ * Copyright (c) 2011, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Author: Eric Barton <eric@bartonsoftware.com>
*/
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
+#include <linux/version.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/unistd.h>
#include <linux/uio.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <linux/kmod.h>
#include <linux/sysctl.h>
#include <linux/pci.h>
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,32)
+#include <linux/pci-dma.h>
+#endif
#include <net/sock.h>
#include <linux/in.h>
#include <lnet/lib-lnet.h>
#include <lnet/lnet-sysctl.h>
+#ifdef HAVE_COMPAT_RDMA
+#include <linux/compat-2.6.h>
+#endif
#include <rdma/rdma_cm.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_verbs.h>
typedef struct
{
- int *kib_dev_failover; /* HCA failover */
- unsigned int *kib_service; /* IB service number */
- int *kib_min_reconnect_interval; /* first failed connection retry... */
- int *kib_max_reconnect_interval; /* ...exponentially increasing to this */
- int *kib_cksum; /* checksum kib_msg_t? */
- int *kib_timeout; /* comms timeout (seconds) */
- int *kib_keepalive; /* keepalive timeout (seconds) */
- int *kib_ntx; /* # tx descs */
- int *kib_credits; /* # concurrent sends */
- int *kib_peertxcredits; /* # concurrent sends to 1 peer */
- int *kib_peerrtrcredits; /* # per-peer router buffer credits */
- int *kib_peercredits_hiw; /* # when eagerly to return credits */
- int *kib_peertimeout; /* seconds to consider peer dead */
- char **kib_default_ipif; /* default IPoIB interface */
- int *kib_retry_count;
- int *kib_rnr_retry_count;
- int *kib_concurrent_sends; /* send work queue sizing */
- int *kib_ib_mtu; /* IB MTU */
- int *kib_map_on_demand; /* map-on-demand if RD has more fragments
- * than this value, 0 disable map-on-demand */
- int *kib_pmr_pool_size; /* # physical MR in pool */
- int *kib_fmr_pool_size; /* # FMRs in pool */
- int *kib_fmr_flush_trigger; /* When to trigger FMR flush */
- int *kib_fmr_cache; /* enable FMR pool cache? */
+ int *kib_dev_failover; /* HCA failover */
+ unsigned int *kib_service; /* IB service number */
+ int *kib_min_reconnect_interval; /* first failed connection retry... */
+ int *kib_max_reconnect_interval; /* ...exponentially increasing to this */
+ int *kib_cksum; /* checksum kib_msg_t? */
+ int *kib_timeout; /* comms timeout (seconds) */
+ int *kib_keepalive; /* keepalive timeout (seconds) */
+ int *kib_ntx; /* # tx descs */
+ int *kib_credits; /* # concurrent sends */
+ int *kib_peertxcredits; /* # concurrent sends to 1 peer */
+ int *kib_peerrtrcredits; /* # per-peer router buffer credits */
+ int *kib_peercredits_hiw; /* # when eagerly to return credits */
+ int *kib_peertimeout; /* seconds to consider peer dead */
+ char **kib_default_ipif; /* default IPoIB interface */
+ int *kib_retry_count;
+ int *kib_rnr_retry_count;
+ int *kib_concurrent_sends; /* send work queue sizing */
+ int *kib_ib_mtu; /* IB MTU */
+ int *kib_map_on_demand; /* map-on-demand if RD has more fragments
+ * than this value, 0 disable map-on-demand */
+ int *kib_pmr_pool_size; /* # physical MR in pool */
+ int *kib_fmr_pool_size; /* # FMRs in pool */
+ int *kib_fmr_flush_trigger; /* When to trigger FMR flush */
+ int *kib_fmr_cache; /* enable FMR pool cache? */
#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */
+ struct ctl_table_header *kib_sysctl; /* sysctl interface */
#endif
- int *kib_require_priv_port;/* accept only privileged ports */
- int *kib_use_priv_port; /* use privileged port for active connect */
+ int *kib_require_priv_port;/* accept only privileged ports */
+ int *kib_use_priv_port; /* use privileged port for active connect */
/* # threads on each CPT */
int *kib_nscheds;
} kib_tunables_t;
typedef struct kib_poolset
{
- cfs_spinlock_t ps_lock; /* serialize */
+ spinlock_t ps_lock; /* serialize */
struct kib_net *ps_net; /* network it belongs to */
char ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */
cfs_list_t ps_pool_list; /* list of pools */
typedef struct
{
- cfs_spinlock_t fps_lock; /* serialize */
+ spinlock_t fps_lock; /* serialize */
struct kib_net *fps_net; /* IB network */
cfs_list_t fps_pool_list; /* FMR pool list */
cfs_list_t fps_failed_pool_list; /* FMR pool list */
struct kib_sched_info {
/* serialise */
- cfs_spinlock_t ibs_lock;
+ spinlock_t ibs_lock;
/* schedulers sleep here */
- cfs_waitq_t ibs_waitq;
+ wait_queue_head_t ibs_waitq;
/* conns to check for rx completions */
cfs_list_t ibs_conns;
/* number of scheduler threads */
/* list head of failed devices */
cfs_list_t kib_failed_devs;
/* schedulers sleep here */
- cfs_waitq_t kib_failover_waitq;
+ wait_queue_head_t kib_failover_waitq;
cfs_atomic_t kib_nthreads; /* # live threads */
/* stabilize net/dev/peer/conn ops */
- cfs_rwlock_t kib_global_lock;
+ rwlock_t kib_global_lock;
/* hash table of all my known peers */
cfs_list_t *kib_peers;
/* size of kib_peers */
/* connections with zero refcount */
cfs_list_t kib_connd_zombies;
/* connection daemon sleeps here */
- cfs_waitq_t kib_connd_waitq;
- cfs_spinlock_t kib_connd_lock; /* serialise */
+ wait_queue_head_t kib_connd_waitq;
+ spinlock_t kib_connd_lock; /* serialise */
struct ib_qp_attr kib_error_qpa; /* QP->ERROR */
/* percpt data for schedulers */
struct kib_sched_info **kib_scheds;
int ibc_outstanding_credits; /* # credits to return */
int ibc_reserved_credits;/* # ACK/DONE msg credits */
int ibc_comms_error; /* set on comms error */
- int ibc_nrx:16; /* receive buffers owned */
- int ibc_scheduled:1; /* scheduled for attention */
- int ibc_ready:1; /* CQ callback fired */
+ unsigned int ibc_nrx:16; /* receive buffers owned */
+ unsigned int ibc_scheduled:1; /* scheduled for attention */
+ unsigned int ibc_ready:1; /* CQ callback fired */
/* time of last send */
unsigned long ibc_last_send;
/** link chain for kiblnd_check_conns only */
cfs_list_t ibc_tx_queue_nocred;/* sends that don't need a credit */
cfs_list_t ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */
cfs_list_t ibc_active_txs; /* active tx awaiting completion */
- cfs_spinlock_t ibc_lock; /* serialise */
+ spinlock_t ibc_lock; /* serialise */
kib_rx_t *ibc_rxs; /* the rx descs */
kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */
cfs_atomic_inc(&(conn)->ibc_refcount); \
} while (0)
-#define kiblnd_conn_decref(conn) \
-do { \
- unsigned long flags; \
- \
- CDEBUG(D_NET, "conn[%p] (%d)--\n", \
- (conn), cfs_atomic_read(&(conn)->ibc_refcount)); \
- LASSERT_ATOMIC_POS(&(conn)->ibc_refcount); \
- if (cfs_atomic_dec_and_test(&(conn)->ibc_refcount)) { \
- cfs_spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \
- cfs_list_add_tail(&(conn)->ibc_list, \
- &kiblnd_data.kib_connd_zombies); \
- cfs_waitq_signal(&kiblnd_data.kib_connd_waitq); \
- cfs_spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
- } \
+#define kiblnd_conn_decref(conn) \
+do { \
+ unsigned long flags; \
+ \
+ CDEBUG(D_NET, "conn[%p] (%d)--\n", \
+ (conn), cfs_atomic_read(&(conn)->ibc_refcount)); \
+ LASSERT_ATOMIC_POS(&(conn)->ibc_refcount); \
+ if (cfs_atomic_dec_and_test(&(conn)->ibc_refcount)) { \
+ spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \
+ cfs_list_add_tail(&(conn)->ibc_list, \
+ &kiblnd_data.kib_connd_zombies); \
+ wake_up(&kiblnd_data.kib_connd_waitq); \
+ spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\
+ } \
} while (0)
#define kiblnd_peer_addref(peer) \
static inline int
kiblnd_send_keepalive(kib_conn_t *conn)
{
- return (*kiblnd_tunables.kib_keepalive > 0) &&
- cfs_time_after(jiffies, conn->ibc_last_send +
- *kiblnd_tunables.kib_keepalive*CFS_HZ);
+ return (*kiblnd_tunables.kib_keepalive > 0) &&
+ cfs_time_after(jiffies, conn->ibc_last_send +
+ *kiblnd_tunables.kib_keepalive*HZ);
}
static inline int
int kiblnd_connd (void *arg);
int kiblnd_scheduler(void *arg);
-int kiblnd_thread_start (int (*fn)(void *arg), void *arg);
+int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name);
int kiblnd_failover_thread (void *arg);
int kiblnd_alloc_pages(kib_pages_t **pp, int cpt, int npages);