*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2015, Intel Corporation.
+ * Copyright (c) 2011, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <lnet/lnet.h>
#include <lnet/lib-lnet.h>
-#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */
+#define IBLND_PEER_HASH_SIZE 101 /* # peer_ni lists */
/* # scheduler loops before reschedule */
#define IBLND_RESCHED 100
int *kib_retry_count;
int *kib_rnr_retry_count;
int *kib_ib_mtu; /* IB MTU */
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- struct ctl_table_header *kib_sysctl; /* sysctl interface */
-#endif
int *kib_require_priv_port;/* accept only privileged ports */
int *kib_use_priv_port; /* use privileged port for active connect */
/* # threads on each CPT */
#define IBLND_MSG_QUEUE_SIZE_V1 8 /* V1 only : # messages/RDMAs in-flight */
#define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */
-#define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */
-#define IBLND_CREDITS_MAX ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1) /* Max # of peer credits */
+#define IBLND_CREDITS_DEFAULT 8 /* default # of peer_ni credits */
+#define IBLND_CREDITS_MAX ((typeof(((kib_msg_t*) 0)->ibm_credits)) - 1) /* Max # of peer_ni credits */
/* when eagerly to return credits */
#define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \
IBLND_CREDIT_HIGHWATER_V1 : \
t->lnd_peercredits_hiw)
-#ifdef HAVE_RDMA_CREATE_ID_4ARG
-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)
+#ifdef HAVE_RDMA_CREATE_ID_5ARG
+# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \
+ cb, dev, \
+ ps, qpt)
#else
-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
+# ifdef HAVE_RDMA_CREATE_ID_4ARG
+# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, \
+ ps, qpt)
+# else
+# define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps)
+# endif
#endif
/* 2 OOB shall suffice for 1 keepalive and 1 returning credits */
cfs_time_t fps_next_retry;
} kib_fmr_poolset_t;
+#ifndef HAVE_IB_RDMA_WR
+struct ib_rdma_wr {
+ struct ib_send_wr wr;
+};
+#endif
+
struct kib_fast_reg_descriptor { /* For fast registration */
struct list_head frd_list;
- struct ib_send_wr frd_inv_wr;
- struct ib_send_wr frd_fastreg_wr;
- struct ib_mr *frd_mr;
+ struct ib_rdma_wr frd_inv_wr;
+#ifdef HAVE_IB_MAP_MR_SG
+ struct ib_reg_wr frd_fastreg_wr;
+#else
+ struct ib_rdma_wr frd_fastreg_wr;
struct ib_fast_reg_page_list *frd_frpl;
+#endif
+ struct ib_mr *frd_mr;
bool frd_valid;
};
/* schedulers sleep here */
wait_queue_head_t kib_failover_waitq;
atomic_t kib_nthreads; /* # live threads */
- /* stabilize net/dev/peer/conn ops */
+ /* stabilize net/dev/peer_ni/conn ops */
rwlock_t kib_global_lock;
/* hash table of all my known peers */
struct list_head *kib_peers;
* The second that peers are pulled out from \a kib_reconn_wait
* for reconnection.
*/
- unsigned int kib_reconn_sec;
+ time64_t kib_reconn_sec;
/* connection daemon sleeps here */
wait_queue_head_t kib_connd_waitq;
spinlock_t kib_connd_lock; /* serialise */
__u16 ibr_version; /* sender's version */
__u8 ibr_why; /* reject reason */
__u8 ibr_padding; /* padding */
- __u64 ibr_incarnation; /* incarnation of peer */
+ __u64 ibr_incarnation; /* incarnation of peer_ni */
kib_connparams_t ibr_cp; /* connection parameters */
} WIRE_ATTR kib_rej_t;
#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */
#define IBLND_REJECT_FATAL 3 /* Anything else */
-#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */
-#define IBLND_REJECT_CONN_STALE 5 /* stale peer */
+#define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer_ni */
+#define IBLND_REJECT_CONN_STALE 5 /* stale peer_ni */
-/* peer's rdma frags doesn't match mine */
+/* peer_ni's rdma frags doesn't match mine */
#define IBLND_REJECT_RDMA_FRAGS 6
-/* peer's msg queue size doesn't match mine */
+/* peer_ni's msg queue size doesn't match mine */
#define IBLND_REJECT_MSG_QUEUE_SIZE 7
/***********************************************************************/
#define IBLND_POSTRX_DONT_POST 0 /* don't post */
#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */
-#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
+#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer_ni back 1 credit */
#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give myself back 1 reserved credit */
typedef struct kib_tx /* transmit message */
short tx_sending;
/* queued for sending */
short tx_queued;
- /* waiting for peer */
+ /* waiting for peer_ni */
short tx_waiting;
/* LNET completion status */
int tx_status;
/* # send work items */
int tx_nwrq;
/* send work items... */
- struct ib_send_wr *tx_wrq;
+ struct ib_rdma_wr *tx_wrq;
/* ...and their memory */
struct ib_sge *tx_sge;
/* rdma descriptor */
{
/* scheduler information */
struct kib_sched_info *ibc_sched;
- /* owning peer */
+ /* owning peer_ni */
struct kib_peer *ibc_peer;
/* HCA bound on */
kib_hca_dev_t *ibc_hdev;
- /* stash on peer's conn list */
+ /* stash on peer_ni's conn list */
struct list_head ibc_list;
/* schedule for attention */
struct list_head ibc_sched_list;
typedef struct kib_peer
{
- /* stash on global peer list */
+ /* stash on global peer_ni list */
struct list_head ibp_list;
/* who's on the other end(s) */
lnet_nid_t ibp_nid;
struct list_head ibp_conns;
/* msgs waiting for a conn */
struct list_head ibp_tx_queue;
- /* incarnation of peer */
+ /* incarnation of peer_ni */
__u64 ibp_incarnation;
/* when (in jiffies) I was last alive */
cfs_time_t ibp_last_alive;
/* # users */
atomic_t ibp_refcount;
- /* version of peer */
+ /* version of peer_ni */
__u16 ibp_version;
/* current passive connection attempts */
unsigned short ibp_accepting;
/* current active connection attempts */
unsigned short ibp_connecting;
- /* reconnect this peer later */
+ /* reconnect this peer_ni later */
unsigned short ibp_reconnecting:1;
+ /* counter of how many times we triggered a conn race */
+ unsigned char ibp_races;
/* # consecutive reconnection attempts to this peer */
unsigned int ibp_reconnected;
- /* errno on closing this peer */
+ /* errno on closing this peer_ni */
int ibp_error;
/* max map_on_demand */
__u16 ibp_max_frags;
/* max_peer_credits */
__u16 ibp_queue_depth;
-} kib_peer_t;
+} kib_peer_ni_t;
#ifndef HAVE_IB_INC_RKEY
/**
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
int mod;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
mod = tunables->lnd_map_on_demand;
return mod != 0 ? mod : IBLND_MAX_RDMA_FRAGS;
}
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
int concurrent_sends;
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
concurrent_sends = tunables->lnd_concurrent_sends;
if (version == IBLND_MSG_VERSION_1) {
} \
} while (0)
-#define kiblnd_peer_addref(peer) \
+#define kiblnd_peer_addref(peer_ni) \
do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- atomic_inc(&(peer)->ibp_refcount); \
+ CDEBUG(D_NET, "peer_ni[%p] -> %s (%d)++\n", \
+ (peer_ni), libcfs_nid2str((peer_ni)->ibp_nid), \
+ atomic_read (&(peer_ni)->ibp_refcount)); \
+ atomic_inc(&(peer_ni)->ibp_refcount); \
} while (0)
-#define kiblnd_peer_decref(peer) \
+#define kiblnd_peer_decref(peer_ni) \
do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT_ATOMIC_POS(&(peer)->ibp_refcount); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kiblnd_destroy_peer(peer); \
+ CDEBUG(D_NET, "peer_ni[%p] -> %s (%d)--\n", \
+ (peer_ni), libcfs_nid2str((peer_ni)->ibp_nid), \
+ atomic_read (&(peer_ni)->ibp_refcount)); \
+ LASSERT_ATOMIC_POS(&(peer_ni)->ibp_refcount); \
+ if (atomic_dec_and_test(&(peer_ni)->ibp_refcount)) \
+ kiblnd_destroy_peer(peer_ni); \
} while (0)
static inline bool
-kiblnd_peer_connecting(kib_peer_t *peer)
+kiblnd_peer_connecting(kib_peer_ni_t *peer_ni)
{
- return peer->ibp_connecting != 0 ||
- peer->ibp_reconnecting != 0 ||
- peer->ibp_accepting != 0;
+ return peer_ni->ibp_connecting != 0 ||
+ peer_ni->ibp_reconnecting != 0 ||
+ peer_ni->ibp_accepting != 0;
}
static inline bool
-kiblnd_peer_idle(kib_peer_t *peer)
+kiblnd_peer_idle(kib_peer_ni_t *peer_ni)
{
- return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns);
+ return !kiblnd_peer_connecting(peer_ni) && list_empty(&peer_ni->ibp_conns);
}
static inline struct list_head *
}
static inline int
-kiblnd_peer_active (kib_peer_t *peer)
+kiblnd_peer_active (kib_peer_ni_t *peer_ni)
{
- /* Am I in the peer hash table? */
- return !list_empty(&peer->ibp_list);
+ /* Am I in the peer_ni hash table? */
+ return !list_empty(&peer_ni->ibp_list);
}
static inline kib_conn_t *
-kiblnd_get_conn_locked (kib_peer_t *peer)
+kiblnd_get_conn_locked (kib_peer_ni_t *peer_ni)
{
- LASSERT(!list_empty(&peer->ibp_conns));
+ LASSERT(!list_empty(&peer_ni->ibp_conns));
/* just return the first connection */
- return list_entry(peer->ibp_conns.next, kib_conn_t, ibc_list);
+ return list_entry(peer_ni->ibp_conns.next, kib_conn_t, ibc_list);
}
static inline int
struct lnet_ioctl_config_o2iblnd_tunables *tunables;
LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED);
- tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib;
+ tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
if (conn->ibc_outstanding_credits <
IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) &&
void kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node);
struct list_head *kiblnd_pool_alloc_node(kib_poolset_t *ps);
-int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, __u64 *pages, int npages,
- __u32 nob, __u64 iov, bool is_rx, kib_fmr_t *fmr);
+int kiblnd_fmr_pool_map(kib_fmr_poolset_t *fps, kib_tx_t *tx,
+ kib_rdma_desc_t *rd, __u32 nob, __u64 iov,
+ kib_fmr_t *fmr);
void kiblnd_fmr_pool_unmap(kib_fmr_t *fmr, int status);
int kiblnd_tunables_setup(struct lnet_ni *ni);
int kiblnd_tunables_init(void);
-void kiblnd_tunables_fini(void);
int kiblnd_connd (void *arg);
int kiblnd_scheduler(void *arg);
int kiblnd_translate_mtu(int value);
int kiblnd_dev_failover(kib_dev_t *dev);
-int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
-void kiblnd_destroy_peer (kib_peer_t *peer);
-bool kiblnd_reconnect_peer(kib_peer_t *peer);
+int kiblnd_create_peer(lnet_ni_t *ni, kib_peer_ni_t **peerp, lnet_nid_t nid);
+void kiblnd_destroy_peer (kib_peer_ni_t *peer);
+bool kiblnd_reconnect_peer(kib_peer_ni_t *peer);
void kiblnd_destroy_dev (kib_dev_t *dev);
-void kiblnd_unlink_peer_locked (kib_peer_t *peer);
-kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
-int kiblnd_close_stale_conns_locked (kib_peer_t *peer,
+void kiblnd_unlink_peer_locked (kib_peer_ni_t *peer_ni);
+kib_peer_ni_t *kiblnd_find_peer_locked(struct lnet_ni *ni, lnet_nid_t nid);
+int kiblnd_close_stale_conns_locked (kib_peer_ni_t *peer_ni,
int version, __u64 incarnation);
-int kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why);
+int kiblnd_close_peer_conns_locked (kib_peer_ni_t *peer_ni, int why);
-kib_conn_t *kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
+kib_conn_t *kiblnd_create_conn(kib_peer_ni_t *peer_ni, struct rdma_cm_id *cmid,
int state, int version);
void kiblnd_destroy_conn(kib_conn_t *conn, bool free_conn);
void kiblnd_close_conn (kib_conn_t *conn, int error);