tbd Sun Microsystems, Inc.
- * version 1.6.8
+ * version 1.8.1
* Support for networks:
socklnd - any kernel supported by Lustre,
qswlnd - Qsnet kernel modules 5.20 and later,
mxlnd - MX 1.2.1 or later,
ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
-Severity :
-Bugzilla :
-Description:
-Details :
+Severity :
+Bugzilla :
+Description:
+Details :
---------------------------------------------------------------------------
+-------------------------------------------------------------------------------
-01-15-2009 Sun Microsystems, Inc.
- * version 1.6.7
+12-31-2008 Sun Microsystems, Inc.
+ * version 1.8.0
* Support for networks:
socklnd - any kernel supported by Lustre,
qswlnd - Qsnet kernel modules 5.20 and later,
Description:
Details :
+Severity : major
+Bugzilla : 15983
+Description: workaround for OOM from o2iblnd
+Details : OFED needs allocate big chunk of memory for QP while creating
+ connection for o2iblnd, OOM can happen if no such a contiguous
+ memory chunk.
+ QP size is decided by concurrent_sends and max_fragments of
+ o2iblnd, now we permit user to specify smaller value for
+ concurrent_sends of o2iblnd(i.e: concurrent_sends=7), which
+ will decrease memory block size required by creating QP.
---------------------------------------------------------------------------
+Severity : major
+Bugzilla : 15093
+Description: Support Zerocopy receive of Chelsio device
+Details : Chelsio driver can support zerocopy for iov[1] if it's
+ contiguous and large enough.
-2008-08-31 Sun Microsystems, Inc.
- * version 1.6.6
- * Support for networks:
- socklnd - any kernel supported by Lustre,
- qswlnd - Qsnet kernel modules 5.20 and later,
- openiblnd - IbGold 1.8.2,
- o2iblnd - OFED 1.1, 1.2.0, 1.2.5, and 1.3
- viblnd - Voltaire ibhost 3.4.5 and later,
- ciblnd - Topspin 3.2.0,
- iiblnd - Infiniserv 3.3 + PathBits patch,
- gmlnd - GM 2.1.22 and later,
- mxlnd - MX 1.2.1 or later,
- ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x
+Severity : normal
+Bugzilla : 13490
+Description: fix credit flow deadlock in uptllnd
-Severity :
-Bugzilla :
-Description:
-Details :
+Severity : normal
+Bugzilla : 16308
+Description: finalize network operation in reasonable time
+Details : conf-sanity test_32a couldn't stop ost and mds because it
+ tried to access non-existent peer and tcp connect took
+ quite long before timing out.
+
+Severity : major
+Bugzilla : 16338
+Description: Continuous recovery on 33 of 413 nodes after lustre oss failure
+Details : Lost reference on conn prevents peer from being destroyed, which
+ could prevent new peer creation if peer count has reached upper
+ limit.
+
+Severity : normal
+Bugzilla : 16102
+Description: LNET Selftest results in Soft lockup on OSS CPU
+Details : only hits when 8 or more o2ib clients involved and a session is
+ torn down with 'lst end_session' without preceeding 'lst stop'.
+
+Severity : minor
+Bugzilla : 16321
+Description: concurrent_sends in IB LNDs should not be changeable at run time
+Details : concurrent_sends in IB LNDs should not be changeable at run time
Severity : normal
Bugzilla : 15272
AC_MSG_RESULT([disabled])
else
o2ib_found=false
+
for O2IBPATH in $O2IBPATHS; do
if test \( -f ${O2IBPATH}/include/rdma/rdma_cm.h -a \
-f ${O2IBPATH}/include/rdma/ib_cm.h -a \
-f ${O2IBPATH}/include/rdma/ib_fmr_pool.h \); then
o2ib_found=true
break
- fi
+ fi
done
+
if ! $o2ib_found; then
AC_MSG_RESULT([no])
case $ENABLEO2IB in
fi
fi
- # version checking is a hack and isn't reliable,
- # we need verify it with each new ofed release
-
- if grep -q ib_dma_map_single \
- ${O2IBPATH}/include/rdma/ib_verbs.h; then
- if grep -q comp_vector \
- ${O2IBPATH}/include/rdma/ib_verbs.h; then
- IBLND_OFED_VERSION="1025"
- else
- IBLND_OFED_VERSION="1020"
- fi
- else
- IBLND_OFED_VERSION="1010"
- fi
+ LB_LINUX_TRY_COMPILE([
+ #include <linux/version.h>
+ #include <linux/pci.h>
+ #if !HAVE_GFP_T
+ typedef int gfp_t;
+ #endif
+ #include <rdma/ib_verbs.h>
+ ],[
+ ib_dma_map_single(NULL, NULL, 0, 0);
+ return 0;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_OFED_IB_DMA_MAP, 1,
+ [ib_dma_map_single defined])
+ ],[
+ AC_MSG_RESULT(NO)
+ ])
- AC_DEFINE_UNQUOTED(IBLND_OFED_VERSION, $IBLND_OFED_VERSION,
- [OFED version])
+ LB_LINUX_TRY_COMPILE([
+ #include <linux/version.h>
+ #include <linux/pci.h>
+ #if !HAVE_GFP_T
+ typedef int gfp_t;
+ #endif
+ #include <rdma/ib_verbs.h>
+ ],[
+ ib_create_cq(NULL, NULL, NULL, NULL, 0, 0);
+ return 0;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_OFED_IB_COMP_VECTOR, 1,
+ [has completion vector])
+ ],[
+ AC_MSG_RESULT(NO)
+ ])
EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
fi
AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT, 1,
[kmem_cache_destroy(cachep) return int])
],[
- AC_MSG_RESULT(NO)
+ AC_MSG_RESULT(no)
])
])
AC_DEFINE(HAVE_ATOMIC_PANIC_NOTIFIER, 1,
[panic_notifier_list is atomic_notifier_head])
],[
- AC_MSG_RESULT(NO)
+ AC_MSG_RESULT(no)
])
])
AC_DEFINE(HAVE_3ARGS_INIT_WORK, 1,
[INIT_WORK use 3 args and store data inside])
],[
- AC_MSG_RESULT(NO)
+ AC_MSG_RESULT(no)
])
])
AC_DEFINE(HAVE_2ARGS_REGISTER_SYSCTL, 1,
[register_sysctl_table want 2 args])
],[
- AC_MSG_RESULT(NO)
+ AC_MSG_RESULT(no)
])
])
AC_DEFINE(HAVE_KMEM_CACHE, 1,
[kernel has struct kmem_cache])
],[
- AC_MSG_RESULT(NO)
+ AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DTOR, 1,
[kmem_cache_create has dtor argument])
],[
- AC_MSG_RESULT(NO)
+ AC_MSG_RESULT(no)
])
])
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
#ifndef __LIBCFS_LIBCFS_H__
#define __LIBCFS_LIBCFS_H__
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
#ifndef __LIBCFS_LINUX_KP30_H__
#define __LIBCFS_LINUX_KP30_H__
.procname = "concurrent_sends",
.data = &concurrent_sends,
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0444,
.proc_handler = &proc_dointvec
},
{0}
static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
{
.ctl_name = CTL_IIBLND,
- .procname = "openibnal",
+ .procname = "iibnal",
.data = NULL,
.maxlen = 0,
.mode = 0555,
}
}
-#if (IBLND_OFED_VERSION == 1025)
+#ifdef HAVE_OFED_IB_COMP_VECTOR
cq = ib_create_cq(cmid->device,
kiblnd_cq_completion, kiblnd_cq_event, conn,
IBLND_CQ_ENTRIES(), 0);
break;
}
- if (conn->ibc_cmid->qp != NULL)
- rdma_destroy_qp(conn->ibc_cmid);
+ if (cmid->qp != NULL)
+ rdma_destroy_qp(cmid);
if (conn->ibc_cq != NULL) {
rc = ib_destroy_cq(conn->ibc_cq);
LASSERT (rx->rx_nob >= 0); /* not posted */
- kiblnd_dma_unmap_single(conn->ibc_cmid->device,
+ kiblnd_dma_unmap_single(cmid->device,
KIBLND_UNMAP_ADDR(rx, rx_msgunmap,
rx->rx_msgaddr),
IBLND_MSG_SIZE, DMA_FROM_DEVICE);
}
#endif
-#if (IBLND_OFED_VERSION == 1020) || (IBLND_OFED_VERSION == 1025)
+#ifdef HAVE_OFED_IB_DMA_MAP
static inline __u64 kiblnd_dma_map_single(struct ib_device *dev,
void *msg, size_t size,
#define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data)
#define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len)
-#elif (IBLND_OFED_VERSION == 1010)
+#else
static inline dma_addr_t kiblnd_dma_map_single(struct ib_device *dev,
void *msg, size_t size,
if (failed) {
if (conn->ibc_state == IBLND_CONN_ESTABLISHED)
CDEBUG(D_NETERROR, "Tx -> %s cookie "LPX64
- "sending %d waiting %d: failed %d\n",
+ " sending %d waiting %d: failed %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid),
tx->tx_cookie, tx->tx_sending, tx->tx_waiting,
status);
if (rc == 0) {
rc = ib_req_notify_cq(conn->ibc_cq,
IB_CQ_NEXT_COMP);
- LASSERT (rc >= 0);
+ if (rc < 0) {
+ CWARN("%s: ib_req_notify_cq failed: %d, "
+ "closing connection\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
+ kiblnd_close_conn(conn, -EIO);
+ kiblnd_conn_decref(conn);
+ spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
+ continue;
+ }
rc = ib_poll_cq(conn->ibc_cq, 1, &wc);
}
- LASSERT (rc >= 0);
+ if (rc < 0) {
+ CWARN("%s: ib_poll_cq failed: %d, "
+ "closing connection\n",
+ libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
+ kiblnd_close_conn(conn, -EIO);
+ kiblnd_conn_decref(conn);
+ spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags);
+ continue;
+ }
spin_lock_irqsave(&kiblnd_data.kib_sched_lock,
flags);
.procname = "concurrent_sends",
.data = &concurrent_sends,
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0444,
.proc_handler = &proc_dointvec
},
{
if (*kiblnd_tunables.kib_concurrent_sends > IBLND_RX_MSGS)
*kiblnd_tunables.kib_concurrent_sends = IBLND_RX_MSGS;
- if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE)
- *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE;
+ if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE / 2)
+ *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE / 2;
+
+ if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE) {
+ CWARN("Concurrent sends %d is lower than message queue size: %d, "
+ "performance may drop slightly.\n",
+ *kiblnd_tunables.kib_concurrent_sends, IBLND_MSG_QUEUE_SIZE);
+ }
return 0;
}
}
memset (conn, 0, sizeof (*conn));
+
conn->ksnc_peer = NULL;
conn->ksnc_route = NULL;
conn->ksnc_sock = sock;
}
void
+ksocknal_finalize_zcreq(ksock_conn_t *conn)
+{
+ ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_tx_t *tx;
+ ksock_tx_t *tmp;
+ CFS_LIST_HEAD (zlist);
+
+ /* NB safe to finalize TXs because closing of socket will
+ * abort all buffered data */
+ LASSERT (conn->ksnc_sock == NULL);
+
+ cfs_spin_lock(&peer->ksnp_lock);
+
+ list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
+ if (tx->tx_conn != conn)
+ continue;
+
+ LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0);
+
+ tx->tx_msg.ksm_zc_req_cookie = 0;
+ list_del(&tx->tx_zc_list);
+ list_add(&tx->tx_zc_list, &zlist);
+ }
+
+ cfs_spin_unlock(&peer->ksnp_lock);
+
+ while (!list_empty(&zlist)) {
+ tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
+
+ list_del(&tx->tx_zc_list);
+ ksocknal_tx_decref(tx);
+ }
+}
+
+void
ksocknal_terminate_conn (ksock_conn_t *conn)
{
/* This gets called by the reaper (guaranteed thread context) to
ksock_peer_t *peer = conn->ksnc_peer;
ksock_sched_t *sched = conn->ksnc_scheduler;
int failed = 0;
- struct list_head *tmp;
- struct list_head *nxt;
- ksock_tx_t *tx;
- LIST_HEAD (zlist);
LASSERT(conn->ksnc_closing);
cfs_spin_unlock_bh (&sched->kss_lock);
- cfs_spin_lock(&peer->ksnp_lock);
-
- list_for_each_safe(tmp, nxt, &peer->ksnp_zc_req_list) {
- tx = list_entry(tmp, ksock_tx_t, tx_zc_list);
-
- if (tx->tx_conn != conn)
- continue;
-
- LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0);
-
- tx->tx_msg.ksm_zc_req_cookie = 0;
- list_del(&tx->tx_zc_list);
- list_add(&tx->tx_zc_list, &zlist);
- }
-
- cfs_spin_unlock(&peer->ksnp_lock);
-
- list_for_each_safe(tmp, nxt, &zlist) {
- tx = list_entry(tmp, ksock_tx_t, tx_zc_list);
-
- list_del(&tx->tx_zc_list);
- ksocknal_tx_decref(tx);
- }
-
/* serialise with callbacks */
cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
struct list_head kss_zombie_noop_txs; /* zombie noop tx list */
cfs_waitq_t kss_waitq; /* where scheduler sleeps */
int kss_nconns; /* # connections assigned to this scheduler */
+#if !SOCKNAL_SINGLE_FRAG_RX
+ struct page *kss_rx_scratch_pgs[LNET_MAX_IOV];
+#endif
+#if !SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_SINGLE_FRAG_RX
+ struct iovec kss_scratch_iov[LNET_MAX_IOV];
+#endif
+
} ksock_sched_t;
typedef struct
int *ksnd_enable_csum; /* enable check sum */
int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */
unsigned int *ksnd_zc_min_frag; /* minimum zero copy frag size */
+ int *ksnd_zc_recv; /* enable ZC receive (for Chelsio TOE) */
+ int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to enable ZC receive */
#ifdef CPU_AFFINITY
int *ksnd_irq_affinity; /* enable IRQ affinity? */
#endif
lnet_kiov_t *tx_kiov; /* packet page frags */
struct ksock_conn *tx_conn; /* owning conn */
lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */
+ cfs_time_t tx_deadline; /* when (in jiffies) tx times out */
ksock_msg_t tx_msg; /* socklnd message buffer */
int tx_desc_size; /* size of this descriptor */
union {
cfs_atomic_t ksnc_tx_nob; /* # bytes queued */
int ksnc_tx_ready; /* write space */
int ksnc_tx_scheduled; /* being progressed */
-
-#if !SOCKNAL_SINGLE_FRAG_RX
- struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV];
-#endif
-#if !SOCKNAL_SINGLE_FRAG_TX
- struct iovec ksnc_tx_scratch_iov[LNET_MAX_IOV];
-#endif
} ksock_conn_t;
typedef struct ksock_route
}
extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn);
+extern void ksocknal_finalize_zcreq(ksock_conn_t *conn);
static inline void
ksocknal_conn_decref (ksock_conn_t *conn)
LASSERT (conn->ksnc_closing);
libcfs_sock_release(conn->ksnc_sock);
conn->ksnc_sock = NULL;
+ ksocknal_finalize_zcreq(conn);
}
}
cfs_spin_lock(&peer->ksnp_lock);
+ /* ZC_REQ is going to be pinned to the peer */
+ tx->tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+
LASSERT (tx->tx_msg.ksm_zc_req_cookie == 0);
tx->tx_msg.ksm_zc_req_cookie = peer->ksnp_zc_next_cookie++;
list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list);
tx->tx_conn = conn;
ksocknal_conn_addref(conn); /* +1 ref for tx */
- /*
- * NB Darwin: SOCK_WMEM_QUEUED()->sock_getsockopt() will take
- * a blockable lock(socket lock), so SOCK_WMEM_QUEUED can't be
- * put in spinlock.
+ /*
+ * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__
+ * but they're used inside spinlocks a lot.
*/
bufnob = libcfs_sock_wmem_queued(conn->ksnc_sock);
cfs_spin_lock_bh (&sched->kss_lock);
if (peer->ksnp_accepting > 0 ||
ksocknal_find_connecting_route_locked (peer) != NULL) {
+ /* the message is going to be pinned to the peer */
+ tx->tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+
/* Queue the message until a connection is established */
list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
cfs_write_unlock_bh (g_lock);
__swab64s(&conn->ksnc_msg.ksm_zc_ack_cookie);
}
+ if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP &&
+ conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) {
+ CERROR("%s: Unknown message type: %x\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ conn->ksnc_msg.ksm_type);
+ ksocknal_new_packet(conn, 0);
+ ksocknal_close_conn_and_siblings(conn, -EPROTO);
+ return (-EPROTO);
+ }
+
if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP &&
conn->ksnc_msg.ksm_csum != 0 && /* has checksum */
conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) {
ksocknal_new_packet (conn, 0);
return 0; /* NOOP is done and just return */
}
- LASSERT (conn->ksnc_msg.ksm_type == KSOCK_MSG_LNET);
conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER;
conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t);
return (NULL);
}
+static inline void
+ksocknal_flush_stale_txs(ksock_peer_t *peer)
+{
+ ksock_tx_t *tx;
+ CFS_LIST_HEAD (stale_txs);
+
+ cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock);
+
+ while (!list_empty (&peer->ksnp_tx_queue)) {
+ tx = list_entry (peer->ksnp_tx_queue.next,
+ ksock_tx_t, tx_list);
+
+ if (!cfs_time_aftereq(cfs_time_current(),
+ tx->tx_deadline))
+ break;
+
+ list_del (&tx->tx_list);
+ list_add_tail (&tx->tx_list, &stale_txs);
+ }
+
+ cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1);
+}
+
void
ksocknal_check_peer_timeouts (int idx)
{
ksocknal_conn_decref(conn);
goto again;
}
+
+ /* we can't process stale txs right here because we're
+ * holding only shared lock */
+ if (!list_empty (&peer->ksnp_tx_queue)) {
+ ksock_tx_t *tx = list_entry (peer->ksnp_tx_queue.next,
+ ksock_tx_t, tx_list);
+
+ if (cfs_time_aftereq(cfs_time_current(),
+ tx->tx_deadline)) {
+
+ ksocknal_peer_addref(peer);
+ cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_flush_stale_txs(peer);
+
+ ksocknal_peer_decref(peer);
+ goto again;
+ }
+ }
}
+ /* print out warnings about stale ZC_REQs */
+ list_for_each_entry(peer, peers, ksnp_list) {
+ ksock_tx_t *tx;
+ int n = 0;
+
+ list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) {
+ if (!cfs_time_aftereq(cfs_time_current(),
+ tx->tx_deadline))
+ break;
+ n++;
+ }
+
+ if (n != 0) {
+ tx = list_entry (peer->ksnp_zc_req_list.next,
+ ksock_tx_t, tx_zc_list);
+ CWARN("Stale ZC_REQs for peer %s detected: %d; the "
+ "oldest (%p) timed out %ld secs ago\n",
+ libcfs_nid2str(peer->ksnp_id.nid), n, tx,
+ cfs_duration_sec(cfs_time_current() -
+ tx->tx_deadline));
+ }
+ }
+
cfs_read_unlock (&ksocknal_data.ksnd_global_lock);
}
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_niov;
#endif
struct msghdr msg = {
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_nkiov;
#endif
struct msghdr msg = {
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
#endif
struct iovec *iov = conn->ksnc_rx_iov;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_nkiov;
#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_niov;
#endif
struct socket *sock = conn->ksnc_sock;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_nkiov;
#endif
struct socket *sock = conn->ksnc_sock;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
#endif
struct iovec *iov = conn->ksnc_rx_iov;
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_nkiov;
#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
SOCKLND_KEEPALIVE_INTVL,
SOCKLND_BACKOFF_INIT,
SOCKLND_BACKOFF_MAX,
- SOCKLND_PROTOCOL
+ SOCKLND_PROTOCOL,
+ SOCKLND_ZERO_COPY_RECV,
+ SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS
};
#else
#define SOCKLND_BACKOFF_INIT CTL_UNNUMBERED
#define SOCKLND_BACKOFF_MAX CTL_UNNUMBERED
#define SOCKLND_PROTOCOL CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY_RECV CTL_UNNUMBERED
+#define SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS CTL_UNNUMBERED
#endif
static cfs_sysctl_table_t ksocknal_ctl_table[] = {
.strategy = &sysctl_intvec,
},
{
+ .ctl_name = SOCKLND_ZERO_COPY_RECV,
+ .procname = "zero_copy_recv",
+ .data = &ksocknal_tunables.ksnd_zc_recv,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+
+ {
+ .ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS,
+ .procname = "zero_copy_recv",
+ .data = &ksocknal_tunables.ksnd_zc_recv_min_nfrags,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
.ctl_name = SOCKLND_TYPED,
.procname = "typed",
.data = &ksocknal_tunables.ksnd_typed_conns,
int
ksocknal_lib_tunables_init ()
{
+ if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2)
+ *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2;
+ if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV)
+ *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV;
+
ksocknal_tunables.ksnd_sysctl =
cfs_register_sysctl_table(ksocknal_top_ctl_table, 0);
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_niov;
#endif
struct msghdr msg = {
#ifdef CONFIG_HIGHMEM
#warning "XXX risk of kmap deadlock on multiple frags..."
#endif
- struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = tx->tx_nkiov;
#endif
struct msghdr msg = {
struct iovec *scratchiov = &scratch;
unsigned int niov = 1;
#else
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
unsigned int niov = conn->ksnc_rx_niov;
#endif
struct iovec *iov = conn->ksnc_rx_iov;
return rc;
}
+static void
+ksocknal_lib_kiov_vunmap(void *addr)
+{
+ if (addr == NULL)
+ return;
+
+ vunmap(addr);
+}
+
+static void *
+ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov,
+ struct iovec *iov, struct page **pages)
+{
+ void *addr;
+ int nob;
+ int i;
+
+ if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL)
+ return NULL;
+
+ LASSERT (niov <= LNET_MAX_IOV);
+
+ if (niov < 2 ||
+ niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags)
+ return NULL;
+
+ for (nob = i = 0; i < niov; i++) {
+ if ((kiov[i].kiov_offset != 0 && i > 0) ||
+ (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1))
+ return NULL;
+
+ pages[i] = kiov[i].kiov_page;
+ nob += kiov[i].kiov_len;
+ }
+
+ addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL);
+ if (addr == NULL)
+ return NULL;
+
+ iov->iov_base = addr + kiov[0].kiov_offset;
+ iov->iov_len = nob;
+
+ return addr;
+}
+
int
ksocknal_lib_recv_kiov (ksock_conn_t *conn)
{
#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
- struct iovec scratch;
- struct iovec *scratchiov = &scratch;
- unsigned int niov = 1;
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ struct page **pages = NULL;
+ unsigned int niov = 1;
#else
#ifdef CONFIG_HIGHMEM
#warning "XXX risk of kmap deadlock on multiple frags..."
#endif
- struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
- unsigned int niov = conn->ksnc_rx_nkiov;
+ struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
+ struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs;
+ unsigned int niov = conn->ksnc_rx_nkiov;
#endif
lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
struct msghdr msg = {
.msg_name = NULL,
.msg_namelen = 0,
.msg_iov = scratchiov,
- .msg_iovlen = niov,
.msg_control = NULL,
.msg_controllen = 0,
.msg_flags = 0
int i;
int rc;
void *base;
+ void *addr;
int sum;
int fragnob;
/* NB we can't trust socket ops to either consume our iovs
* or leave them alone. */
- for (nob = i = 0; i < niov; i++) {
- scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
- nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
+ nob = scratchiov[0].iov_len;
+ msg.msg_iovlen = 1;
+
+ } else {
+ for (nob = i = 0; i < niov; i++) {
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
+ kiov[i].kiov_offset;
+ }
+ msg.msg_iovlen = niov;
}
+
LASSERT (nob <= conn->ksnc_rx_nob_wanted);
set_fs (KERNEL_DS);
kunmap(kiov[i].kiov_page);
}
}
- for (i = 0; i < niov; i++)
- kunmap(kiov[i].kiov_page);
+
+ if (addr != NULL) {
+ ksocknal_lib_kiov_vunmap(addr);
+ } else {
+ for (i = 0; i < niov; i++)
+ kunmap(kiov[i].kiov_page);
+ }
return (rc);
}
CFS_MODULE_PARM(zc_min_frag, "i", int, 0644,
"minimum fragment to zero copy");
+static unsigned int zc_recv = 0;
+CFS_MODULE_PARM(zc_recv, "i", int, 0644,
+ "enable ZC recv for Chelsio driver");
+
+static unsigned int zc_recv_min_nfrags = 16;
+CFS_MODULE_PARM(zc_recv_min_nfrags, "i", int, 0644,
+ "minimum # of fragments to enable ZC recv");
+
#ifdef SOCKNAL_BACKOFF
static int backoff_init = 3;
CFS_MODULE_PARM(backoff_init, "i", int, 0644,
.ksnd_enable_csum = &enable_csum,
.ksnd_inject_csum_error = &inject_csum_error,
.ksnd_zc_min_frag = &zc_min_frag,
+ .ksnd_zc_recv = &zc_recv,
+ .ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags,
#ifdef CPU_AFFINITY
.ksnd_irq_affinity = &enable_irq_affinity,
#endif
}
void
-kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
+kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
lnet_nid_t dstnid, __u64 dststamp, __u64 seq)
{
/* CAVEAT EMPTOR! all message fields not set here should have been
msg_version != IBNAL_MSG_VERSION)
return -EPROTO;
} else if (msg_version != expected_version) {
- CERROR("Bad version: %x(%x expected)\n",
+ CERROR("Bad version: %x(%x expected)\n",
msg_version, expected_version);
return -EPROTO;
}
return -EPROTO;
}
msg->ibm_cksum = msg_cksum;
-
+
if (flip) {
/* leave magic unflipped as a clue to peer endianness */
msg->ibm_version = msg_version;
__swab64s(&msg->ibm_dststamp);
__swab64s(&msg->ibm_seq);
}
-
+
if (msg->ibm_srcnid == LNET_NID_ANY) {
CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
return -EPROTO;
default:
CERROR("Unknown message type %x\n", msg->ibm_type);
return -EPROTO;
-
+
case IBNAL_MSG_NOOP:
break;
__swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
__swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
}
-
+
n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
+ CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
n, IBNAL_MAX_RDMA_FRAGS);
return -EPROTO;
}
-
+
if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
(int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
}
-#else
+#else
if (flip) {
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
+ CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
n, IBNAL_MAX_RDMA_FRAGS);
return -EPROTO;
}
-
+
if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
(int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
return -EPROTO;
}
-
+
if (flip)
for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
__swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
LASSERT (kibnal_data.kib_listen_handle == NULL);
- kibnal_data.kib_listen_handle =
+ kibnal_data.kib_listen_handle =
cm_create_cep(cm_cep_transp_rc);
if (kibnal_data.kib_listen_handle == NULL) {
CERROR ("Can't create listen CEP\n");
return -ENOMEM;
}
- CDEBUG(D_NET, "Created CEP %p for listening\n",
+ CDEBUG(D_NET, "Created CEP %p for listening\n",
kibnal_data.kib_listen_handle);
memset(&info, 0, sizeof(info));
- info.listen_addr.end_pt.sid =
+ info.listen_addr.end_pt.sid =
(__u64)(*kibnal_tunables.kib_service_number);
cmrc = cm_listen(kibnal_data.kib_listen_handle, &info,
kibnal_listen_callback, NULL);
if (cmrc == cm_stat_success)
return 0;
-
+
CERROR ("cm_listen error: %d\n", cmrc);
cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
cm_return_t cmrc;
LASSERT (kibnal_data.kib_listen_handle != NULL);
-
+
cmrc = cm_cancel(kibnal_data.kib_listen_handle);
if (cmrc != cm_stat_success)
CERROR ("Error %d stopping listener\n", cmrc);
cfs_pause(cfs_time_seconds(1)/10); /* ensure no more callbacks */
-
+
cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle);
if (cmrc != vv_return_ok)
CERROR ("Error %d destroying CEP\n", cmrc);
/* npeers only grows with the global lock held */
atomic_inc(&kibnal_data.kib_npeers);
}
-
+
write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
if (rc != 0) {
CERROR("Can't create peer: %s\n",
- (rc == -ESHUTDOWN) ? "shutting down" :
+ (rc == -ESHUTDOWN) ? "shutting down" :
"too many peers");
LIBCFS_FREE(peer, sizeof(*peer));
} else {
*peerp = peer;
}
-
+
return rc;
}
LASSERT (peer->ibp_accepting == 0);
LASSERT (list_empty (&peer->ibp_conns));
LASSERT (list_empty (&peer->ibp_tx_queue));
-
+
LIBCFS_FREE (peer, sizeof (*peer));
/* NB a peer's connections keep a reference on their peer until
CDEBUG(D_NET, "%s at %u.%u.%u.%u\n",
libcfs_nid2str(nid), HIPQUAD(ip));
-
+
if (nid == LNET_NID_ANY)
return (-EINVAL);
peer->ibp_ip = ip;
peer->ibp_persistence++;
-
+
write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
return (0);
}
{
struct list_head *tmp;
int i;
-
+
spin_lock(&conn->ibc_lock);
-
- CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n",
- atomic_read(&conn->ibc_refcount), conn,
+
+ CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n",
+ atomic_read(&conn->ibc_refcount), conn,
libcfs_nid2str(conn->ibc_peer->ibp_nid));
CDEBUG(D_CONSOLE, " txseq "LPD64" rxseq "LPD64" state %d \n",
conn->ibc_txseq, conn->ibc_rxseq, conn->ibc_state);
CDEBUG(D_CONSOLE, " nposted %d cred %d o_cred %d r_cred %d\n",
- conn->ibc_nsends_posted, conn->ibc_credits,
+ conn->ibc_nsends_posted, conn->ibc_credits,
conn->ibc_outstanding_credits, conn->ibc_reserved_credits);
CDEBUG(D_CONSOLE, " disc %d comms_err %d\n",
conn->ibc_disconnect, conn->ibc_comms_error);
CDEBUG(D_CONSOLE, " early_rxs:\n");
list_for_each(tmp, &conn->ibc_early_rxs)
kibnal_debug_rx(list_entry(tmp, kib_rx_t, rx_list));
-
+
CDEBUG(D_CONSOLE, " tx_queue_nocred:\n");
list_for_each(tmp, &conn->ibc_tx_queue_nocred)
kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
CDEBUG(D_CONSOLE, " tx_queue:\n");
list_for_each(tmp, &conn->ibc_tx_queue)
kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
+
CDEBUG(D_CONSOLE, " active_txs:\n");
list_for_each(tmp, &conn->ibc_active_txs)
kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list));
-
+
CDEBUG(D_CONSOLE, " rxs:\n");
for (i = 0; i < IBNAL_RX_MSGS; i++)
kibnal_debug_rx(&conn->ibc_rxs[i]);
kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state)
{
static vv_qp_attr_t attr;
-
+
kib_connvars_t *cv = conn->ibc_connvars;
vv_return_t vvrc;
-
+
/* Only called by connd => static OK */
LASSERT (!in_interrupt());
LASSERT (current == kibnal_data.kib_connd);
memset(&attr, 0, sizeof(attr));
-
+
switch (new_state) {
default:
LBUG();
-
+
case vv_qp_state_init: {
struct vv_qp_modify_init_st *init = &attr.modify.params.init;
init->access_control = vv_acc_r_mem_read |
vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */
- attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX |
+ attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX |
VV_QP_AT_PHY_PORT_NUM |
VV_QP_AT_ACCESS_CON_F;
break;
// XXX sdp sets VV_QP_AT_OP_F but no actual optional options
- attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC |
+ attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC |
VV_QP_AT_DEST_QP |
- VV_QP_AT_R_PSN |
+ VV_QP_AT_R_PSN |
VV_QP_AT_MIN_RNR_NAK_T |
VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM |
VV_QP_AT_OP_F;
rts->retry_num = *kibnal_tunables.kib_retry_cnt;
rts->rnr_num = *kibnal_tunables.kib_rnr_cnt;
rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD;
-
+
attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN |
VV_QP_AT_L_ACK_T |
VV_QP_AT_RETRY_NUM |
attr.modify.vv_qp_attr_mask = 0;
break;
}
-
+
attr.modify.qp_modify_into_state = new_state;
attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE;
-
+
vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL);
if (vvrc != vv_return_ok) {
- CERROR("Can't modify qp -> %s state to %d: %d\n",
+ CERROR("Can't modify qp -> %s state to %d: %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid),
new_state, vvrc);
return -EIO;
}
-
+
return 0;
}
/* Only the connd creates conns => single threaded */
LASSERT(!in_interrupt());
LASSERT(current == kibnal_data.kib_connd);
-
+
LIBCFS_ALLOC(conn, sizeof (*conn));
if (conn == NULL) {
CERROR ("Can't allocate connection\n");
INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
INIT_LIST_HEAD (&conn->ibc_active_txs);
spin_lock_init (&conn->ibc_lock);
-
+
atomic_inc (&kibnal_data.kib_nconns);
/* well not really, but I call destroy() on failure, which decrements */
vv_r_key_t r_key;
rx->rx_conn = conn;
- rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
+ rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
page_offset);
vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca,
reqattr.create.qp_type = vv_qp_type_r_conn;
reqattr.create.cq_send_h = kibnal_data.kib_cq;
reqattr.create.cq_receive_h = kibnal_data.kib_cq;
- reqattr.create.send_max_outstand_wr = (1 + IBNAL_MAX_RDMA_FRAGS) *
+ reqattr.create.send_max_outstand_wr = (1 + IBNAL_MAX_RDMA_FRAGS) *
(*kibnal_tunables.kib_concurrent_sends);
reqattr.create.receive_max_outstand_wr = IBNAL_RX_MSGS;
reqattr.create.max_scatgat_per_send_wr = 1;
conn->ibc_state = IBNAL_CONN_INIT_QP;
conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num;
- if (rspattr.create_return.receive_max_outstand_wr <
+ if (rspattr.create_return.receive_max_outstand_wr <
IBNAL_RX_MSGS ||
- rspattr.create_return.send_max_outstand_wr <
+ rspattr.create_return.send_max_outstand_wr <
(1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends)) {
CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n",
- IBNAL_RX_MSGS,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
+ IBNAL_RX_MSGS,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
(*kibnal_tunables.kib_concurrent_sends),
rspattr.create_return.receive_max_outstand_wr,
rspattr.create_return.send_max_outstand_wr);
/* 1 ref for caller */
atomic_set (&conn->ibc_refcount, 1);
return (conn);
-
+
failed:
kibnal_destroy_conn (conn);
return (NULL);
/* Only the connd does this (i.e. single threaded) */
LASSERT (!in_interrupt());
LASSERT (current == kibnal_data.kib_connd);
-
+
CDEBUG (D_NET, "connection %p\n", conn);
LASSERT (atomic_read (&conn->ibc_refcount) == 0);
if (vvrc != vv_return_ok)
CERROR("Can't destroy QP: %d\n", vvrc);
/* fall through */
-
+
case IBNAL_CONN_INIT_NOTHING:
break;
}
- if (conn->ibc_rx_pages != NULL)
+ if (conn->ibc_rx_pages != NULL)
kibnal_free_pages(conn->ibc_rx_pages);
if (conn->ibc_rxs != NULL)
- LIBCFS_FREE(conn->ibc_rxs,
+ LIBCFS_FREE(conn->ibc_rxs,
IBNAL_RX_MSGS * sizeof(kib_rx_t));
if (conn->ibc_connvars != NULL)
CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n",
libcfs_nid2str(peer->ibp_nid),
conn->ibc_incarnation, incarnation);
-
+
count++;
kibnal_close_conn_locked (conn, -ESTALE);
}
/* wildcards always succeed */
if (nid == LNET_NID_ANY)
return (0);
-
+
return (count == 0 ? -ENOENT : 0);
}
{
int npages = p->ibp_npages;
int i;
-
+
for (i = 0; i < npages; i++)
if (p->ibp_pages[i] != NULL)
__free_page(p->ibp_pages[i]);
-
+
LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
}
memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
p->ibp_npages = npages;
-
+
for (i = 0; i < npages; i++) {
p->ibp_pages[i] = alloc_page (GFP_KERNEL);
if (p->ibp_pages[i] == NULL) {
}
int
-kibnal_alloc_tx_descs (void)
+kibnal_alloc_tx_descs (void)
{
int i;
-
+
LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
IBNAL_TX_MSGS() * sizeof(kib_tx_t));
if (kibnal_data.kib_tx_descs == NULL)
return -ENOMEM;
-
+
memset(kibnal_data.kib_tx_descs, 0,
IBNAL_TX_MSGS() * sizeof(kib_tx_t));
if (tx->tx_pages == NULL)
return -ENOMEM;
#else
- LIBCFS_ALLOC(tx->tx_wrq,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
+ LIBCFS_ALLOC(tx->tx_wrq,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
sizeof(*tx->tx_wrq));
if (tx->tx_wrq == NULL)
return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_gl,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
+
+ LIBCFS_ALLOC(tx->tx_gl,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
sizeof(*tx->tx_gl));
if (tx->tx_gl == NULL)
return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
+
+ LIBCFS_ALLOC(tx->tx_rd,
+ offsetof(kib_rdma_desc_t,
rd_frags[IBNAL_MAX_RDMA_FRAGS]));
if (tx->tx_rd == NULL)
return -ENOMEM;
}
void
-kibnal_free_tx_descs (void)
+kibnal_free_tx_descs (void)
{
int i;
sizeof(*tx->tx_pages));
#else
if (tx->tx_wrq != NULL)
- LIBCFS_FREE(tx->tx_wrq,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
+ LIBCFS_FREE(tx->tx_wrq,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
sizeof(*tx->tx_wrq));
if (tx->tx_gl != NULL)
- LIBCFS_FREE(tx->tx_gl,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
+ LIBCFS_FREE(tx->tx_gl,
+ (1 + IBNAL_MAX_RDMA_FRAGS) *
sizeof(*tx->tx_gl));
if (tx->tx_rd != NULL)
- LIBCFS_FREE(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
+ LIBCFS_FREE(tx->tx_rd,
+ offsetof(kib_rdma_desc_t,
rd_frags[IBNAL_MAX_RDMA_FRAGS]));
#endif
}
#if IBNAL_USE_FMR
void
-kibnal_free_fmrs (int n)
+kibnal_free_fmrs (int n)
{
int i;
vv_return_t vvrc;
/* No fancy arithmetic when we do the buffer calculations */
CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
- rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
+ rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
IBNAL_TX_MSG_PAGES(), 0);
if (rc != 0)
return (rc);
LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
}
}
-
+
return (0);
}
LASSERT (ni == kibnal_data.kib_ni);
LASSERT (ni->ni_data == &kibnal_data);
-
+
CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
atomic_read (&libcfs_kmemory));
kibnal_async_callback);
if (vvrc != vv_return_ok)
CERROR("vv_dell_async_event_cb error: %d\n", vvrc);
-
+
/* fall through */
case IBNAL_INIT_HCA:
cfs_pause(cfs_time_seconds(1));
}
/* fall through */
-
+
case IBNAL_INIT_NOTHING:
break;
}
if (kibnal_data.kib_peers != NULL)
LIBCFS_FREE (kibnal_data.kib_peers,
- sizeof (struct list_head) *
+ sizeof (struct list_head) *
kibnal_data.kib_peer_hash_size);
CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
/* Found a suitable port. Get its GUID and PKEY. */
tbl_count = 1;
- vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca,
+ vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca,
port_num, &tbl_count,
&kibnal_data.kib_port_gid);
if (vvrc != vv_return_ok) {
CERROR("vv_get_port_gid_tbl failed "
- "for %s port %d: %d\n",
+ "for %s port %d: %d\n",
hca_name, port_num, vvrc);
continue;
}
tbl_count = 1;
- vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca,
+ vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca,
port_num, &tbl_count,
&kibnal_data.kib_port_pkey);
if (vvrc != vv_return_ok) {
}
CDEBUG(D_NET, "Using %s port %d - GID="LPX64":"LPX64"\n",
- hca_name, kibnal_data.kib_port,
- kibnal_data.kib_port_gid.scope.g.subnet,
+ hca_name, kibnal_data.kib_port,
+ kibnal_data.kib_port_gid.scope.g.subnet,
kibnal_data.kib_port_gid.scope.g.eui64);
/*****************************************************/
__u32 nentries;
vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
- kibnal_cq_callback,
+ kibnal_cq_callback,
NULL, /* context */
&kibnal_data.kib_cq, &nentries);
if (vvrc != 0) {
kibnal_data.kib_init = IBNAL_INIT_CQ;
if (nentries < IBNAL_CQ_ENTRIES()) {
- CERROR ("CQ only has %d entries, need %d\n",
+ CERROR ("CQ only has %d entries, need %d\n",
nentries, IBNAL_CQ_ENTRIES());
goto failed;
}
- vvrc = vv_request_completion_notification(kibnal_data.kib_hca,
- kibnal_data.kib_cq,
+ vvrc = vv_request_completion_notification(kibnal_data.kib_hca,
+ kibnal_data.kib_cq,
vv_next_solicit_unsolicit_event);
if (vvrc != 0) {
CERROR ("Failed to re-arm completion queue: %d\n", rc);
failed:
CDEBUG(D_NET, "kibnal_startup failed\n");
- kibnal_shutdown (ni);
+ kibnal_shutdown (ni);
return (-ENETDOWN);
}
vibnal_assert_wire_constants();
- CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
+ CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
<= cm_REQ_priv_data_len);
- CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
+ CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)
<= cm_REP_priv_data_len);
CLASSERT (sizeof(kib_msg_t) <= IBNAL_MSG_SIZE);
#if !IBNAL_USE_FMR
LASSERT (conn->ibc_state >= IBNAL_CONN_INIT);
LASSERT (rx->rx_nob >= 0); /* not posted */
- CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n",
+ CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n",
rx->rx_wrq.scatgat_list->length,
rx->rx_wrq.scatgat_list->l_key,
KIBNAL_SG2ADDR(rx->rx_wrq.scatgat_list->v_address));
spin_unlock(&conn->ibc_lock);
- CERROR ("post rx -> %s failed %d\n",
+ CERROR ("post rx -> %s failed %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc);
rc = -EIO;
- kibnal_close_conn(rx->rx_conn, rc);
+ kibnal_close_conn(conn, rc);
/* No more posts for this rx; so lose its ref */
kibnal_conn_decref(conn);
return rc;
case IBNAL_MSG_PUT_REQ:
if (mlen == 0) {
lnet_finalize(ni, lntmsg, 0);
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0,
+ kibnal_send_completion(conn, IBNAL_MSG_PUT_NAK, 0,
rxmsg->ibm_u.putreq.ibprm_cookie);
break;
}
libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
kibnal_tx_done(tx);
/* tell peer it's over */
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc,
+ kibnal_send_completion(conn, IBNAL_MSG_PUT_NAK, rc,
rxmsg->ibm_u.putreq.ibprm_cookie);
break;
}
kibnal_reply(ni, rx, lntmsg);
} else {
/* GET didn't match anything */
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE,
- -ENODATA,
+ kibnal_send_completion(conn, IBNAL_MSG_GET_DONE, -ENODATA,
rxmsg->ibm_u.get.ibgm_cookie);
}
break;
write_unlock_irqrestore(g_lock, flags);
CWARN("Conn race %s\n",
- libcfs_nid2str(peer2->ibp_nid));
+ libcfs_nid2str(rxmsg.ibm_srcnid));
kibnal_peer_decref(peer);
reason = IBNAL_REJECT_CONN_RACE;
if (conn != NULL) {
LASSERT (rc != 0);
kibnal_connreq_done(conn, 0, rc);
+ kibnal_conn_decref(conn);
} else {
cm_destroy_cep(cep);
}
path->pkey, &cv->cv_pkey_index);
if (vvrc != vv_return_ok) {
CWARN("pkey2pkey_index failed for %s @ %u.%u.%u.%u: %d\n",
- libcfs_nid2str(peer->ibp_nid),
+ libcfs_nid2str(peer->ibp_nid),
HIPQUAD(peer->ibp_ip), vvrc);
goto failed;
}
&path->slid);
if (vvrc != vv_return_ok) {
CWARN("port_num2base_lid failed for %s @ %u.%u.%u.%u: %d\n",
- libcfs_nid2str(peer->ibp_ip),
+ libcfs_nid2str(peer->ibp_ip),
HIPQUAD(peer->ibp_ip), vvrc);
goto failed;
}
.procname = "concurrent_sends",
.data = &concurrent_sends,
.maxlen = sizeof(int),
- .mode = 0644,
+ .mode = 0444,
.proc_handler = &proc_dointvec
},
#if IBNAL_USE_FMR
cfs_time_current_sec(), (long)arg);
printk(KERN_ALERT "LustreError: dumping log to %s\n",
debug_file_name);
+
tracefile_dump_all_pages(debug_file_name);
libcfs_run_debug_log_upcall(debug_file_name);
}
"'ip2nets' but not both at once\n");
return NULL;
}
-
+
if (*ip2nets != 0) {
rc = lnet_parse_ip2nets(&nets, ip2nets);
return (rc == 0) ? nets : NULL;
if (!strcmp(portals_compatibility, "strong")) {
return 2;
LCONSOLE_WARN("Starting in strong portals-compatible mode\n");
- }
+ }
LCONSOLE_ERROR_MSG(0x102, "portals_compatibility=\"%s\" not supported\n",
portals_compatibility);
lnet_get_routes(void)
{
char *str = getenv("LNET_ROUTES");
-
+
return (str == NULL) ? "" : str;
}
str = default_networks;
*str = 0;
sep = "";
-
+
list_for_each (tmp, &the_lnet.ln_lnds) {
- lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
-
- nob = snprintf(str, len, "%s%s", sep,
- libcfs_lnd2str(lnd->lnd_type));
- len -= nob;
- if (len < 0) {
- /* overflowed the string; leave it where it was */
- *str = 0;
- break;
- }
-
- str += nob;
- sep = ",";
+ lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list);
+
+ nob = snprintf(str, len, "%s%s", sep,
+ libcfs_lnd2str(lnd->lnd_type));
+ len -= nob;
+ if (len < 0) {
+ /* overflowed the string; leave it where it was */
+ *str = 0;
+ break;
+ }
+
+ str += nob;
+ sep = ",";
}
return default_networks;
}
lnd_t *
-lnet_find_lnd_by_type (int type)
+lnet_find_lnd_by_type (int type)
{
lnd_t *lnd;
struct list_head *tmp;
if (lnd->lnd_type == type)
return lnd;
}
-
+
return NULL;
}
LASSERT (the_lnet.ln_init);
LASSERT (libcfs_isknown_lnd(lnd->lnd_type));
LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL);
-
+
list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds);
lnd->lnd_refcount = 0;
LASSERT (the_lnet.ln_init);
LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd);
LASSERT (lnd->lnd_refcount == 0);
-
+
list_del (&lnd->lnd_list);
CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type));
LASSERT (count == fl->fl_nobjs);
LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
- memset (fl, 0, sizeof (fl));
+ memset (fl, 0, sizeof (*fl));
}
int
}
int
-lnet_setup_handle_hash (void)
+lnet_setup_handle_hash (void)
{
int i;
-
+
/* Arbitrary choice of hash table size */
#ifdef __KERNEL__
the_lnet.ln_lh_hash_size = CFS_PAGE_SIZE / sizeof (struct list_head);
the_lnet.ln_lh_hash_size * sizeof (struct list_head));
if (the_lnet.ln_lh_hash_table == NULL)
return (-ENOMEM);
-
+
for (i = 0; i < the_lnet.ln_lh_hash_size; i++)
CFS_INIT_LIST_HEAD (&the_lnet.ln_lh_hash_table[i]);
the_lnet.ln_next_object_cookie = LNET_COOKIE_TYPES;
-
+
return (0);
}
{
if (the_lnet.ln_lh_hash_table == NULL)
return;
-
+
LIBCFS_FREE(the_lnet.ln_lh_hash_table,
the_lnet.ln_lh_hash_size * sizeof (struct list_head));
}
lnet_libhandle_t *
-lnet_lookup_cookie (__u64 cookie, int type)
+lnet_lookup_cookie (__u64 cookie, int type)
{
/* ALWAYS called with LNET_LOCK held */
struct list_head *list;
if ((cookie & (LNET_COOKIE_TYPES - 1)) != type)
return (NULL);
-
+
hash = ((unsigned int)cookie) % the_lnet.ln_lh_hash_size;
list = &the_lnet.ln_lh_hash_table[hash];
-
+
list_for_each (el, list) {
lnet_libhandle_t *lh = list_entry (el, lnet_libhandle_t,
lh_hash_chain);
-
+
if (lh->lh_cookie == cookie)
return (lh);
}
-
+
return (NULL);
}
void
-lnet_initialise_handle (lnet_libhandle_t *lh, int type)
+lnet_initialise_handle (lnet_libhandle_t *lh, int type)
{
/* ALWAYS called with LNET_LOCK held */
unsigned int hash;
LASSERT (type >= 0 && type < LNET_COOKIE_TYPES);
lh->lh_cookie = the_lnet.ln_next_object_cookie | type;
the_lnet.ln_next_object_cookie += LNET_COOKIE_TYPES;
-
+
hash = ((unsigned int)lh->lh_cookie) % the_lnet.ln_lh_hash_size;
list_add (&lh->lh_hash_chain, &the_lnet.ln_lh_hash_table[hash]);
}
the_lnet.ln_nfinalizers = num_online_cpus();
LIBCFS_ALLOC(the_lnet.ln_finalizers,
- the_lnet.ln_nfinalizers *
+ the_lnet.ln_nfinalizers *
sizeof(*the_lnet.ln_finalizers));
if (the_lnet.ln_finalizers == NULL) {
CERROR("Can't allocate ln_finalizers\n");
{
#ifdef __KERNEL__
int i;
-
+
for (i = 0; i < the_lnet.ln_nfinalizers; i++)
LASSERT (the_lnet.ln_finalizers[i] == NULL);
lnet_server_mode() {
the_lnet.ln_server_mode_flag = 1;
}
-#endif
+#endif
int
lnet_prepare(lnet_pid_t requested_pid)
#else
if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */
LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0);
-
+
if (cfs_curproc_uid())/* Only root can run user-space server */
return -EPERM;
the_lnet.ln_pid = requested_pid;
/* My PID must be unique on this node and flag I'm userspace */
the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG;
- }
+ }
#endif
rc = lnet_descriptor_setup();
if (rc != 0)
goto failed0;
- memset(&the_lnet.ln_counters, 0,
+ memset(&the_lnet.ln_counters, 0,
sizeof(the_lnet.ln_counters));
CFS_INIT_LIST_HEAD (&the_lnet.ln_active_msgs);
goto failed2;
the_lnet.ln_nportals = MAX_PORTALS;
- LIBCFS_ALLOC(the_lnet.ln_portals,
- the_lnet.ln_nportals *
+ LIBCFS_ALLOC(the_lnet.ln_portals,
+ the_lnet.ln_nportals *
sizeof(*the_lnet.ln_portals));
if (the_lnet.ln_portals == NULL) {
rc = -ENOMEM;
}
return 0;
-
+
failed3:
lnet_fini_finalizers();
failed2:
lnet_unprepare (void)
{
int idx;
-
+
/* NB no LNET_LOCK since this is the last reference. All LND instances
* have shut down already, so it is safe to unlink and free all
* descriptors, even those that appear committed to a network op (eg MD
LASSERT (list_empty(&the_lnet.ln_nis));
LASSERT (list_empty(&the_lnet.ln_zombie_nis));
LASSERT (the_lnet.ln_nzombie_nis == 0);
-
+
for (idx = 0; idx < the_lnet.ln_nportals; idx++) {
LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq));
return ni;
}
}
-
+
return NULL;
}
lnet_islocalnet (__u32 net)
{
lnet_ni_t *ni;
-
+
LNET_LOCK();
ni = lnet_net2ni_locked(net);
if (ni != NULL)
return ni;
}
}
-
+
return NULL;
}
lnet_islocalnid (lnet_nid_t nid)
{
lnet_ni_t *ni;
-
+
LNET_LOCK();
ni = lnet_nid2ni_locked(nid);
if (ni != NULL)
count++;
}
}
-
+
LNET_UNLOCK();
#endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */
}
libcfs_setnet0alias(lnd->lnd_type);
}
-
+
nicount++;
}
case IOC_LIBCFS_FAIL_NID:
return lnet_fail_nid(data->ioc_nid, data->ioc_count);
-
+
case IOC_LIBCFS_ADD_ROUTE:
- rc = lnet_add_route(data->ioc_net, data->ioc_count,
+ rc = lnet_add_route(data->ioc_net, data->ioc_count,
data->ioc_nid);
return (rc != 0) ? rc : lnet_check_routes();
-
+
case IOC_LIBCFS_DEL_ROUTE:
return lnet_del_route(data->ioc_net, data->ioc_nid);
case IOC_LIBCFS_GET_ROUTE:
- return lnet_get_route(data->ioc_count,
- &data->ioc_net, &data->ioc_count,
+ return lnet_get_route(data->ioc_count,
+ &data->ioc_net, &data->ioc_count,
&data->ioc_nid, &data->ioc_flags);
case IOC_LIBCFS_NOTIFY_ROUTER:
- return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
+ return lnet_notify(NULL, data->ioc_nid, data->ioc_flags,
(time_t)data->ioc_u64[0]);
case IOC_LIBCFS_PORTALS_COMPATIBILITY:
rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]);
if (rc < 0 && rc != -EHOSTUNREACH)
return rc;
-
+
data->ioc_u32[0] = rc;
return 0;
} else {
(void)ni->ni_lnd->lnd_ctl(ni, cmd, arg);
}
-
+
lnet_ni_decref(ni);
}
return 0;
}
-
+
default:
ni = lnet_net2ni(data->ioc_net);
if (ni == NULL)
list_for_each(tmp, &the_lnet.ln_nis) {
if (index-- != 0)
continue;
-
+
ni = list_entry(tmp, lnet_ni_t, ni_list);
id->nid = ni->ni_nid;
int n;
int infosz;
int i;
-
+
for (n = 0; ; n++) {
rc = LNetGetId(n, &id);
if (rc == -ENOENT)
LASSERT (rc == 0);
the_lnet.ln_ping_info->pi_nid[i] = id.nid;
}
-
+
/* We can have a tiny EQ since we only need to see the unlink event on
* teardown, which by definition is the last one! */
rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq);
}
if (nob < offsetof(lnet_ping_info_t, pi_nid[0])) {
- CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
+ CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id),
nob, (int)offsetof(lnet_ping_info_t, pi_nid[0]));
goto out_1;
}
n_ids = info->pi_nnids;
if (nob < offsetof(lnet_ping_info_t, pi_nid[n_ids])) {
- CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
+ CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id),
nob, (int)offsetof(lnet_ping_info_t, pi_nid[n_ids]));
goto out_1;
}
}
if (eq->eq_refcount != 0) {
+ CDEBUG(D_NET, "Event queue (%d) busy on destroy.\n",
+ eq->eq_refcount);
LNET_UNLOCK();
return (-EBUSY);
}
gettimeofday(&then, NULL);
ts.tv_sec = then.tv_sec + timeout_ms/1000;
- ts.tv_nsec = then.tv_usec * 1000 +
+ ts.tv_nsec = then.tv_usec * 1000 +
(timeout_ms%1000) * 1000000;
if (ts.tv_nsec >= 1000000000) {
ts.tv_sec++;
LASSERT (the_lnet.ln_init);
LASSERT (the_lnet.ln_refcount > 0);
-
+
if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
umd.length > LNET_MAX_IOV) /* too many fragments */
return -EINVAL;
+ if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0)
+ return -EINVAL;
+
md = lnet_md_alloc(&umd);
if (md == NULL)
return -ENOMEM;
LASSERT (the_lnet.ln_init);
LASSERT (the_lnet.ln_refcount > 0);
-
+
if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 &&
umd.length > LNET_MAX_IOV) /* too many fragments */
return -EINVAL;
+ if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) != 0)
+ return -EINVAL;
+
md = lnet_md_alloc(&umd);
if (md == NULL)
return -ENOMEM;
int
LNetMEAttach(unsigned int portal,
- lnet_process_id_t match_id,
+ lnet_process_id_t match_id,
__u64 match_bits, __u64 ignore_bits,
- lnet_unlink_t unlink, lnet_ins_pos_t pos,
+ lnet_unlink_t unlink, lnet_ins_pos_t pos,
lnet_handle_me_t *handle)
{
lnet_me_t *me;
LASSERT (the_lnet.ln_init);
LASSERT (the_lnet.ln_refcount > 0);
-
+
if (portal >= the_lnet.ln_nportals)
return -EINVAL;
return 0;
}
-int
-LNetMEInsert(lnet_handle_me_t current_meh,
- lnet_process_id_t match_id,
+int
+LNetMEInsert(lnet_handle_me_t current_meh,
+ lnet_process_id_t match_id,
__u64 match_bits, __u64 ignore_bits,
lnet_unlink_t unlink, lnet_ins_pos_t pos,
lnet_handle_me_t *handle)
lnet_me_t *current_me;
lnet_me_t *new_me;
- LASSERT (the_lnet.ln_init);
+ LASSERT (the_lnet.ln_init);
LASSERT (the_lnet.ln_refcount > 0);
-
+
new_me = lnet_me_alloc();
if (new_me == NULL)
return -ENOMEM;
lnet_initialise_handle (&new_me->me_lh, LNET_COOKIE_TYPE_ME);
if (pos == LNET_INS_AFTER)
- list_add_tail(&new_me->me_list, ¤t_me->me_list);
- else
list_add(&new_me->me_list, ¤t_me->me_list);
+ else
+ list_add_tail(&new_me->me_list, ¤t_me->me_list);
lnet_me2handle(handle, new_me);
/* forward ref */
static void lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg);
-static void lnet_drop_delayed_put(lnet_msg_t *msg, char *reason);
#define LNET_MATCHMD_NONE 0 /* Didn't match */
#define LNET_MATCHMD_OK 1 /* Matched OK */
LNET_UNLOCK();
+ msg->msg_ev.type = LNET_EVENT_GET;
+ msg->msg_ev.target.pid = hdr->dest_pid;
+ msg->msg_ev.target.nid = hdr->dest_nid;
+ msg->msg_ev.hdr_data = 0;
+
reply_wmd = hdr->msg.get.return_wmd;
lnet_prep_send(msg, LNET_MSG_REPLY, src, offset, mlength);
msg->msg_hdr.msg.reply.dst_wmd = reply_wmd;
- msg->msg_ev.type = LNET_EVENT_GET;
- msg->msg_ev.target.pid = hdr->dest_pid;
- msg->msg_ev.target.nid = hdr->dest_nid;
- msg->msg_ev.hdr_data = 0;
-
if (rdma_get) {
/* The LND completes the REPLY from her recv procedure */
lnet_ni_recv(ni, msg->msg_private, msg, 0,
/* NB handles only looked up by creator (no flips) */
md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd);
- if (md == NULL || md->md_threshold == 0) {
+ if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
CDEBUG(D_NETERROR, "%s: Dropping REPLY from %s for %s "
"MD "LPX64"."LPX64"\n",
libcfs_nid2str(ni->ni_nid), libcfs_id2str(src),
(md == NULL) ? "invalid" : "inactive",
hdr->msg.reply.dst_wmd.wh_interface_cookie,
hdr->msg.reply.dst_wmd.wh_object_cookie);
+ if (md != NULL && md->md_me != NULL)
+ CERROR("REPLY MD also attached to portal %d\n",
+ md->md_me->me_portal);
LNET_UNLOCK();
return ENOENT; /* +ve: OK but no match */
/* NB handles only looked up by creator (no flips) */
md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd);
- if (md == NULL || md->md_threshold == 0) {
+ if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
/* Don't moan; this is expected */
CDEBUG(D_NET,
"%s: Dropping ACK from %s to %s MD "LPX64"."LPX64"\n",
(md == NULL) ? "invalid" : "inactive",
hdr->msg.ack.dst_wmd.wh_interface_cookie,
hdr->msg.ack.dst_wmd.wh_object_cookie);
+ if (md != NULL && md->md_me != NULL)
+ CERROR("Source MD also attached to portal %d\n",
+ md->md_me->me_portal);
+
LNET_UNLOCK();
return ENOENT; /* +ve! */
}
LNET_LOCK();
md = lnet_handle2md(&mdh);
- if (md == NULL || md->md_threshold == 0) {
+ if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
lnet_msg_free(msg);
- LNET_UNLOCK();
- CERROR("Dropping PUT to %s: MD invalid\n",
- libcfs_id2str(target));
+ CERROR("Dropping PUT ("LPU64":%d:%s): MD (%d) invalid\n",
+ match_bits, portal, libcfs_id2str(target),
+ md == NULL ? -1 : md->md_threshold);
+ if (md != NULL && md->md_me != NULL)
+ CERROR("Source MD also attached to portal %d\n",
+ md->md_me->me_portal);
+
+ LNET_UNLOCK();
return -ENOENT;
}
LNET_LOCK();
md = lnet_handle2md(&mdh);
- if (md == NULL || md->md_threshold == 0) {
+ if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) {
lnet_msg_free(msg);
- LNET_UNLOCK();
- CERROR("Dropping GET to %s: MD invalid\n",
- libcfs_id2str(target));
+ CERROR("Dropping GET ("LPU64":%d:%s): MD (%d) invalid\n",
+ match_bits, portal, libcfs_id2str(target),
+ md == NULL ? -1 : md->md_threshold);
+ if (md != NULL && md->md_me != NULL)
+ CERROR("REPLY MD also attached to portal %d\n",
+ md->md_me->me_portal);
+
+ LNET_UNLOCK();
return -ENOENT;
}
#if defined(__KERNEL__) && defined(LNET_ROUTER)
#include <linux/seq_file.h>
-#include <linux/lustre_compat25.h>
/* this is really lnet_proc.c */
static srpc_peer_t *
srpc_nid2peer (lnet_nid_t nid)
{
- srpc_peer_t *peer;
- srpc_peer_t *new_peer;
+ srpc_peer_t *peer;
+ srpc_peer_t *new_peer;
spin_lock(&srpc_data.rpc_glock);
peer = srpc_find_peer_locked(nid);
if (peer != NULL)
return peer;
-
+
new_peer = srpc_create_peer(nid);
spin_lock(&srpc_data.rpc_glock);
spin_unlock(&srpc_data.rpc_glock);
return NULL;
}
-
+
list_add_tail(&new_peer->stp_list, srpc_nid2peerlist(nid));
spin_unlock(&srpc_data.rpc_glock);
return new_peer;
}
int
-srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
+srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len,
int options, lnet_process_id_t peer, lnet_nid_t self,
lnet_handle_md_t *mdh, srpc_event_t *ev)
{
else
portal = SRPC_FRAMEWORK_REQUEST_PORTAL;
- rc = srpc_post_active_rdma(portal, service, buf, len,
+ rc = srpc_post_active_rdma(portal, service, buf, len,
LNET_MD_OP_PUT, peer,
LNET_NID_ANY, mdh, ev);
return rc;
spin_unlock(&sv->sv_lock);
LIBCFS_FREE(buf, sizeof(*buf));
spin_lock(&sv->sv_lock);
- return rc;
+ return rc;
}
int
msg = &rpc->srpc_reqstbuf->buf_msg;
reply = &rpc->srpc_replymsg.msg_body.reply;
- if (msg->msg_version != SRPC_MSG_VERSION &&
- msg->msg_version != __swab32(SRPC_MSG_VERSION)) {
+ if (msg->msg_magic == 0) {
+ /* moaned already in srpc_lnet_ev_handler */
+ rc = EBADMSG;
+ } else if (msg->msg_version != SRPC_MSG_VERSION &&
+ msg->msg_version != __swab32(SRPC_MSG_VERSION)) {
CWARN ("Version mismatch: %u, %u expected, from %s\n",
msg->msg_version, SRPC_MSG_VERSION,
libcfs_id2str(rpc->srpc_peer));
}
}
case SWI_STATE_BULK_STARTED:
- LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired);
+ /* we cannot LASSERT ev_fired right here because it
+ * may be set only upon an event with unlinked==1 */
if (rpc->srpc_bulk != NULL) {
rc = ev->ev_status;
rc = (*sv->sv_bulk_ready) (rpc, rc);
if (rc != 0) {
- srpc_server_rpc_done(rpc, rc);
- return 1;
+ if (ev->ev_fired) {
+ srpc_server_rpc_done(rpc, rc);
+ return 1;
+ }
+
+ rpc->srpc_status = rc;
+ wi->wi_state = SWI_STATE_BULK_ERRORED;
+ LNetMDUnlink(rpc->srpc_bulk->bk_mdh);
+ return 0; /* wait for UNLINK event */
}
}
+ LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired);
+
wi->wi_state = SWI_STATE_REPLY_SUBMITTED;
rc = srpc_send_reply(rpc);
if (rc == 0)
wi->wi_state = SWI_STATE_DONE;
srpc_server_rpc_done(rpc, ev->ev_status);
return 1;
+
+ case SWI_STATE_BULK_ERRORED:
+ LASSERT (rpc->srpc_bulk != NULL && ev->ev_fired);
+ LASSERT (rpc->srpc_status != 0);
+
+ srpc_server_rpc_done(rpc, rpc->srpc_status);
+ return 1;
}
return 0;
CFS_INIT_LIST_HEAD(&timer->stt_list);
timer->stt_data = rpc;
timer->stt_func = srpc_client_rpc_expired;
- timer->stt_expires = cfs_time_add(rpc->crpc_timeout,
+ timer->stt_expires = cfs_time_add(rpc->crpc_timeout,
cfs_time_current_sec());
stt_add_timer(timer);
return;
}
-/*
+/*
* Called with rpc->crpc_lock held.
*
* Upon exit the RPC expiry timer is not queued and the handler is not
* running on any CPU. */
void
srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc)
-{
+{
/* timer not planted or already exploded */
if (rpc->crpc_timeout == 0) return;
while (rpc->crpc_timeout != 0) {
spin_unlock(&rpc->crpc_lock);
- cfs_schedule();
+ cfs_schedule();
spin_lock(&rpc->crpc_lock);
}
* No one can schedule me now since:
* - RPC timer has been defused.
* - all LNet events have been fired.
- * - crpc_closed has been set, preventing srpc_abort_rpc from
+ * - crpc_closed has been set, preventing srpc_abort_rpc from
* scheduling me.
* Cancel pending schedules and prevent future schedule attempts:
*/
case SWI_STATE_REQUEST_SUBMITTED:
/* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any
- * order; however, they're processed in a strict order:
+ * order; however, they're processed in a strict order:
* rqt, rpy, and bulk. */
if (!rpc->crpc_reqstev.ev_fired) break;
rc = rpc->crpc_replyev.ev_status;
if (rc != 0) break;
- if ((reply->msg_type != type &&
+ if ((reply->msg_type != type &&
reply->msg_type != __swab32(type)) ||
(reply->msg_magic != SRPC_MSG_MAGIC &&
reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
{
srpc_client_rpc_t *rpc;
- LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t,
+ LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t,
crpc_bulk.bk_iovs[nbulkiov]));
if (rpc == NULL)
return NULL;
}
/* when in kernel always called with LNET_LOCK() held, and in thread context */
-void
+void
srpc_lnet_ev_handler (lnet_event_t *ev)
{
srpc_event_t *rpcev = ev->md.user_ptr;
srpc_service_t *sv;
srpc_msg_t *msg;
srpc_msg_type_t type;
+ int fired_flag = 1;
LASSERT (!in_interrupt());
LASSERT (rpcev->ev_fired == 0);
rpcev->ev_fired = 1;
- rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
+ rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
-EINTR : ev->status;
swi_schedule_workitem(&crpc->crpc_wi);
LASSERT (sv->sv_nposted_msg >= 0);
if (sv->sv_shuttingdown) {
- /* Leave buffer on sv->sv_posted_msgq since
+ /* Leave buffer on sv->sv_posted_msgq since
* srpc_finish_service needs to traverse it. */
spin_unlock(&sv->sv_lock);
break;
type = srpc_service2request(sv->sv_id);
if (ev->status != 0 || ev->mlength != sizeof(*msg) ||
- (msg->msg_type != type &&
+ (msg->msg_type != type &&
msg->msg_type != __swab32(type)) ||
(msg->msg_magic != SRPC_MSG_MAGIC &&
msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) {
ev->status, ev->mlength,
msg->msg_type, msg->msg_magic);
- /* NB might drop sv_lock in srpc_service_recycle_buffer,
- * sv_nposted_msg++ as an implicit reference to prevent
- * sv from disappearing under me */
- sv->sv_nposted_msg++;
- srpc_service_recycle_buffer(sv, buffer);
- sv->sv_nposted_msg--;
- spin_unlock(&sv->sv_lock);
-
- if (ev->status == 0) { /* status!=0 counted already */
- spin_lock(&srpc_data.rpc_glock);
- srpc_data.rpc_counters.errors++;
- spin_unlock(&srpc_data.rpc_glock);
- }
- break;
+ /* NB can't call srpc_service_recycle_buffer here since
+ * it may call LNetM[DE]Attach. The invalid magic tells
+ * srpc_handle_rpc to drop this RPC */
+ msg->msg_magic = 0;
}
if (!list_empty(&sv->sv_free_rpcq)) {
ev->type == LNET_EVENT_REPLY ||
ev->type == LNET_EVENT_UNLINK);
- if (ev->type == LNET_EVENT_SEND &&
- ev->status == 0 && !ev->unlinked)
- break; /* wait for the final LNET_EVENT_REPLY */
-
+ if (ev->type == LNET_EVENT_SEND && !ev->unlinked) {
+ if (ev->status == 0)
+ break; /* wait for the final LNET_EVENT_REPLY */
+ else
+ fired_flag = 0; /* LNET_EVENT_REPLY may arrive
+ (optimized GET case) */
+ }
case SRPC_BULK_PUT_SENT:
if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) {
spin_lock(&srpc_data.rpc_glock);
LASSERT (rpcev == &srpc->srpc_ev);
spin_lock(&sv->sv_lock);
- rpcev->ev_fired = 1;
- rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
+ if (fired_flag)
+ rpcev->ev_fired = 1;
+
+ rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ?
-EINTR : ev->status;
+
srpc_schedule_server_rpc(srpc);
spin_unlock(&sv->sv_lock);
break;
#define SWI_STATE_REQUEST_SENT 4
#define SWI_STATE_REPLY_RECEIVED 5
#define SWI_STATE_BULK_STARTED 6
+#define SWI_STATE_BULK_ERRORED 7
#define SWI_STATE_DONE 10
/* forward refs */
* serialized with respect to itself.
* - no CPU affinity, a workitem does not necessarily run on the same CPU
* that schedules it. However, this might change in the future.
- * - if a workitem is scheduled again before it has a chance to run, it
+ * - if a workitem is scheduled again before it has a chance to run, it
* runs only once.
- * - if a workitem is scheduled while it runs, it runs again after it
- * completes; this ensures that events occurring while other events are
- * being processed receive due attention. This behavior also allows a
+ * - if a workitem is scheduled while it runs, it runs again after it
+ * completes; this ensures that events occurring while other events are
+ * being processed receive due attention. This behavior also allows a
* workitem to reschedule itself.
*
* Usage notes:
typedef struct {
int (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */
void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */
- int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
+ int (*tso_prep_rpc)(struct sfw_test_unit *tsu,
lnet_process_id_t dest,
srpc_client_rpc_t **rpc); /* prep a tests rpc */
void (*tso_done_rpc)(struct sfw_test_unit *tsu,
} tsi_u;
} sfw_test_instance_t;
-/* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at
+/* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at
* the end of pages are not used */
#define SFW_MAX_CONCUR LST_MAX_CONCUR
#define SFW_ID_PER_PAGE (CFS_PAGE_SIZE / sizeof(lnet_process_id_t))
int sfw_alloc_pages(srpc_server_rpc_t *rpc, int npages, int sink);
srpc_client_rpc_t *
-srpc_create_client_rpc(lnet_process_id_t peer, int service,
+srpc_create_client_rpc(lnet_process_id_t peer, int service,
int nbulkiov, int bulklen,
void (*rpc_done)(srpc_client_rpc_t *),
void (*rpc_fini)(srpc_client_rpc_t *), void *priv);
return;
}
-static inline const char *
+static inline const char *
swi_state2str (int state)
{
#define STATE2STR(x) case x: return #x
switch(state) {
- default:
+ default:
LBUG();
STATE2STR(SWI_STATE_NEWBORN);
STATE2STR(SWI_STATE_REPLY_SUBMITTED);
STATE2STR(SWI_STATE_REQUEST_SENT);
STATE2STR(SWI_STATE_REPLY_RECEIVED);
STATE2STR(SWI_STATE_BULK_STARTED);
+ STATE2STR(SWI_STATE_BULK_ERRORED);
STATE2STR(SWI_STATE_DONE);
}
#undef STATE2STR
*
* Author: Eric Barton <eeb@bartonsoftware.com>
*/
- */
#include "ptllnd.h"
.lnd_type = PTLLND,
.lnd_startup = ptllnd_startup,
.lnd_shutdown = ptllnd_shutdown,
- .lnd_ctl = ptllnd_ctl,
+ .lnd_ctl = ptllnd_ctl,
.lnd_send = ptllnd_send,
.lnd_recv = ptllnd_recv,
.lnd_eager_recv = ptllnd_eager_recv,
.lnd_notify = ptllnd_notify,
.lnd_wait = ptllnd_wait,
- .lnd_setasync = ptllnd_setasync,
+ .lnd_setasync = ptllnd_setasync,
};
static int ptllnd_ni_count = 0;
void
ptllnd_history_fini(void)
{
- ptllnd_he_t *he;
-
- while (!list_empty(&ptllnd_idle_history)) {
- he = list_entry(ptllnd_idle_history.next,
- ptllnd_he_t, he_list);
-
- list_del(&he->he_list);
- LIBCFS_FREE(he, sizeof(*he));
- }
-
- while (!list_empty(&ptllnd_history_list)) {
- he = list_entry(ptllnd_history_list.next,
- ptllnd_he_t, he_list);
-
- list_del(&he->he_list);
- LIBCFS_FREE(he, sizeof(*he));
- }
+ ptllnd_he_t *he;
+
+ while (!list_empty(&ptllnd_idle_history)) {
+ he = list_entry(ptllnd_idle_history.next,
+ ptllnd_he_t, he_list);
+
+ list_del(&he->he_list);
+ LIBCFS_FREE(he, sizeof(*he));
+ }
+
+ while (!list_empty(&ptllnd_history_list)) {
+ he = list_entry(ptllnd_history_list.next,
+ ptllnd_he_t, he_list);
+
+ list_del(&he->he_list);
+ LIBCFS_FREE(he, sizeof(*he));
+ }
}
int
ptllnd_history_init(void)
{
- int i;
- ptllnd_he_t *he;
- int n;
- int rc;
-
- CFS_INIT_LIST_HEAD(&ptllnd_idle_history);
- CFS_INIT_LIST_HEAD(&ptllnd_history_list);
-
- rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0);
- if (rc != 0)
- return rc;
-
- for (i = 0; i < n; i++) {
- LIBCFS_ALLOC(he, sizeof(*he));
- if (he == NULL) {
- ptllnd_history_fini();
- return -ENOMEM;
- }
-
- list_add(&he->he_list, &ptllnd_idle_history);
- }
-
- PTLLND_HISTORY("Init");
-
- return 0;
+ int i;
+ ptllnd_he_t *he;
+ int n;
+ int rc;
+
+ CFS_INIT_LIST_HEAD(&ptllnd_idle_history);
+ CFS_INIT_LIST_HEAD(&ptllnd_history_list);
+
+ rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0);
+ if (rc != 0)
+ return rc;
+
+ for (i = 0; i < n; i++) {
+ LIBCFS_ALLOC(he, sizeof(*he));
+ if (he == NULL) {
+ ptllnd_history_fini();
+ return -ENOMEM;
+ }
+
+ list_add(&he->he_list, &ptllnd_idle_history);
+ }
+
+ PTLLND_HISTORY("Init");
+
+ return 0;
}
void
ptllnd_history(const char *fn, const char *file, const int line,
- const char *fmt, ...)
+ const char *fmt, ...)
{
- static int seq;
-
+ static int seq;
+
va_list ap;
- ptllnd_he_t *he;
-
- if (!list_empty(&ptllnd_idle_history)) {
- he = list_entry(ptllnd_idle_history.next,
- ptllnd_he_t, he_list);
- } else if (!list_empty(&ptllnd_history_list)) {
- he = list_entry(ptllnd_history_list.next,
- ptllnd_he_t, he_list);
- } else {
- return;
- }
-
- list_del(&he->he_list);
- list_add_tail(&he->he_list, &ptllnd_history_list);
-
- he->he_seq = seq++;
- he->he_fn = fn;
- he->he_file = file;
- he->he_line = line;
- gettimeofday(&he->he_time, NULL);
-
- va_start(ap, fmt);
- vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap);
- va_end(ap);
+ ptllnd_he_t *he;
+
+ if (!list_empty(&ptllnd_idle_history)) {
+ he = list_entry(ptllnd_idle_history.next,
+ ptllnd_he_t, he_list);
+ } else if (!list_empty(&ptllnd_history_list)) {
+ he = list_entry(ptllnd_history_list.next,
+ ptllnd_he_t, he_list);
+ } else {
+ return;
+ }
+
+ list_del(&he->he_list);
+ list_add_tail(&he->he_list, &ptllnd_history_list);
+
+ he->he_seq = seq++;
+ he->he_fn = fn;
+ he->he_file = file;
+ he->he_line = line;
+ gettimeofday(&he->he_time, NULL);
+
+ va_start(ap, fmt);
+ vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap);
+ va_end(ap);
}
void
ptllnd_dump_history(void)
{
- ptllnd_he_t *he;
+ ptllnd_he_t *he;
+
+ PTLLND_HISTORY("dumping...");
- PTLLND_HISTORY("dumping...");
-
- while (!list_empty(&ptllnd_history_list)) {
- he = list_entry(ptllnd_history_list.next,
- ptllnd_he_t, he_list);
+ while (!list_empty(&ptllnd_history_list)) {
+ he = list_entry(ptllnd_history_list.next,
+ ptllnd_he_t, he_list);
- list_del(&he->he_list);
-
- CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq,
- (int)he->he_time.tv_sec, (int)he->he_time.tv_usec,
- he->he_file, he->he_line, he->he_fn, he->he_msg);
+ list_del(&he->he_list);
- list_add_tail(&he->he_list, &ptllnd_idle_history);
- }
+ CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq,
+ (int)he->he_time.tv_sec, (int)he->he_time.tv_usec,
+ he->he_file, he->he_line, he->he_fn, he->he_msg);
- PTLLND_HISTORY("complete");
+ list_add_tail(&he->he_list, &ptllnd_idle_history);
+ }
+
+ PTLLND_HISTORY("complete");
}
-void
+void
ptllnd_assert_wire_constants (void)
{
/* Wire protocol assertions generated by 'wirecheck'
int rc;
int temp;
- /* Other tunable defaults depend on this */
- rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0);
- if (rc != 0)
- return rc;
+ /* Other tunable defaults depend on this */
+ rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0);
+ if (rc != 0)
+ return rc;
rc = ptllnd_parse_int_tunable(&plni->plni_portal,
"PTLLND_PORTAL", PTLLND_PORTAL);
"PTLLND_PEERCREDITS", PTLLND_PEERCREDITS);
if (rc != 0)
return rc;
+ /* kptl_msg_t::ptlm_credits is only a __u8 */
+ if (plni->plni_peer_credits > 255) {
+ CERROR("PTLLND_PEERCREDITS must be <= 255\n");
+ return -EINVAL;
+ }
rc = ptllnd_parse_int_tunable(&max_msg_size,
"PTLLND_MAX_MSG_SIZE",
if (rc != 0)
return rc;
- rc = ptllnd_parse_int_tunable(&plni->plni_checksum,
- "PTLLND_CHECKSUM", 0);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history,
- "PTLLND_TX_HISTORY",
- plni->plni_debug ? 1024 : 0);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch,
- "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak,
- "PTLLND_ABORT_ON_NAK", 0);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak,
- "PTLLND_DUMP_ON_NAK", plni->plni_debug);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval,
- "PTLLND_WATCHDOG_INTERVAL", 1);
- if (rc != 0)
- return rc;
- if (plni->plni_watchdog_interval <= 0)
- plni->plni_watchdog_interval = 1;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_timeout,
- "PTLLND_TIMEOUT", 50);
- if (rc != 0)
- return rc;
-
- rc = ptllnd_parse_int_tunable(&plni->plni_long_wait,
- "PTLLND_LONG_WAIT",
- plni->plni_debug ? 5 : plni->plni_timeout);
- if (rc != 0)
- return rc;
- plni->plni_long_wait *= 1000; /* convert to mS */
+ rc = ptllnd_parse_int_tunable(&plni->plni_checksum,
+ "PTLLND_CHECKSUM", 0);
+ if (rc != 0)
+ return rc;
+
+ rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history,
+ "PTLLND_TX_HISTORY",
+ plni->plni_debug ? 1024 : 0);
+ if (rc != 0)
+ return rc;
+
+ rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch,
+ "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1);
+ if (rc != 0)
+ return rc;
+
+ rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak,
+ "PTLLND_ABORT_ON_NAK", 0);
+ if (rc != 0)
+ return rc;
+
+ rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak,
+ "PTLLND_DUMP_ON_NAK", plni->plni_debug);
+ if (rc != 0)
+ return rc;
+
+ rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval,
+ "PTLLND_WATCHDOG_INTERVAL", 1);
+ if (rc != 0)
+ return rc;
+ if (plni->plni_watchdog_interval <= 0)
+ plni->plni_watchdog_interval = 1;
+
+ rc = ptllnd_parse_int_tunable(&plni->plni_timeout,
+ "PTLLND_TIMEOUT", 50);
+ if (rc != 0)
+ return rc;
+
+ rc = ptllnd_parse_int_tunable(&plni->plni_long_wait,
+ "PTLLND_LONG_WAIT",
+ plni->plni_debug ? 5 : plni->plni_timeout);
+ if (rc != 0)
+ return rc;
+ plni->plni_long_wait *= 1000; /* convert to mS */
plni->plni_max_msg_size = max_msg_size & ~7;
if (plni->plni_max_msg_size < PTLLND_MIN_BUFFER_SIZE)
plni->plni_max_msg_size = PTLLND_MIN_BUFFER_SIZE;
- CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
- CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
+ CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
+ CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
plni->plni_buffer_size = plni->plni_max_msg_size * msgs_per_buffer;
CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers);
- plni->plni_nmsgs += delta;
- LASSERT(plni->plni_nmsgs >= 0);
-
+ plni->plni_nmsgs += delta;
+ LASSERT(plni->plni_nmsgs >= 0);
+
nmsgs = plni->plni_nmsgs + plni->plni_msgs_spare;
nbufs = (nmsgs * plni->plni_max_msg_size + plni->plni_buffer_size - 1) /
LASSERT (plni->plni_nbuffers > 0);
if (buf->plb_posted) {
- time_t start = cfs_time_current_sec();
- int w = plni->plni_long_wait;
+ time_t start = cfs_time_current_sec();
+ int w = plni->plni_long_wait;
LASSERT (plni->plni_nposted_buffers > 0);
#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
(void) PtlMDUnlink(buf->plb_md);
- while (buf->plb_posted) {
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds to unlink buffer\n",
- (int)(cfs_time_current_sec() - start));
- w *= 2;
- }
- ptllnd_wait(ni, w);
- }
+ while (buf->plb_posted) {
+ if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+ CWARN("Waited %ds to unlink buffer\n",
+ (int)(cfs_time_current_sec() - start));
+ w *= 2;
+ }
+ ptllnd_wait(ni, w);
+ }
#else
while (buf->plb_posted) {
rc = PtlMDUnlink(buf->plb_md);
break;
}
LASSERT (rc == PTL_MD_IN_USE);
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds to unlink buffer\n",
- cfs_time_current_sec() - start);
- w *= 2;
- }
- ptllnd_wait(ni, w);
+ if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+ CWARN("Waited %ds to unlink buffer\n",
+ cfs_time_current_sec() - start);
+ w *= 2;
+ }
+ ptllnd_wait(ni, w);
}
#endif
}
int
ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
{
- switch (cmd) {
- case IOC_LIBCFS_DEBUG_PEER:
- ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg));
- return 0;
-
- default:
- return -EINVAL;
- }
+ switch (cmd) {
+ case IOC_LIBCFS_DEBUG_PEER:
+ ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg));
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
}
__u64
{
ptllnd_ni_t *plni = ni->ni_data;
int rc;
- time_t start = cfs_time_current_sec();
- int w = plni->plni_long_wait;
+ time_t start = cfs_time_current_sec();
+ int w = plni->plni_long_wait;
LASSERT (ptllnd_ni_count == 1);
- plni->plni_max_tx_history = 0;
+ plni->plni_max_tx_history = 0;
- ptllnd_cull_tx_history(plni);
+ ptllnd_cull_tx_history(plni);
ptllnd_close_peers(ni);
ptllnd_destroy_buffers(ni);
while (plni->plni_npeers > 0) {
- if (w > 0 && cfs_time_current_sec() > start + w/1000) {
- CWARN("Waited %ds for peers to shutdown\n",
- (int)(cfs_time_current_sec() - start));
- w *= 2;
- }
+ if (w > 0 && cfs_time_current_sec() > start + w/1000) {
+ CWARN("Waited %ds for peers to shutdown\n",
+ (int)(cfs_time_current_sec() - start));
+ w *= 2;
+ }
ptllnd_wait(ni, w);
- }
+ }
LASSERT (plni->plni_ntxs == 0);
LASSERT (plni->plni_nrxs == 0);
ptllnd_ni_t *plni;
int rc;
- /* could get limits from portals I guess... */
- ni->ni_maxtxcredits =
- ni->ni_peertxcredits = 1000;
+ /* could get limits from portals I guess... */
+ ni->ni_maxtxcredits =
+ ni->ni_peertxcredits = 1000;
if (ptllnd_ni_count != 0) {
CERROR("Can't have > 1 instance of ptllnd\n");
ptllnd_ni_count++;
- rc = ptllnd_history_init();
- if (rc != 0) {
- CERROR("Can't init history\n");
- goto failed0;
- }
-
+ rc = ptllnd_history_init();
+ if (rc != 0) {
+ CERROR("Can't init history\n");
+ goto failed0;
+ }
+
LIBCFS_ALLOC(plni, sizeof(*plni));
if (plni == NULL) {
CERROR("Can't allocate ptllnd state\n");
plni->plni_stamp = ptllnd_get_timestamp();
plni->plni_nrxs = 0;
plni->plni_ntxs = 0;
- plni->plni_ntx_history = 0;
- plni->plni_watchdog_peeridx = 0;
- plni->plni_watchdog_nextt = cfs_time_current_sec();
+ plni->plni_ntx_history = 0;
+ plni->plni_watchdog_peeridx = 0;
+ plni->plni_watchdog_nextt = cfs_time_current_sec();
CFS_INIT_LIST_HEAD(&plni->plni_zombie_txs);
CFS_INIT_LIST_HEAD(&plni->plni_tx_history);
NULL, NULL, &plni->plni_nih);
if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
CERROR("PtlNIInit failed: %s(%d)\n",
- ptllnd_errtype2str(rc), rc);
+ ptllnd_errtype2str(rc), rc);
rc = -ENODEV;
goto failed2;
}
PTL_EQ_HANDLER_NONE, &plni->plni_eqh);
if (rc != PTL_OK) {
CERROR("PtlEQAlloc failed: %s(%d)\n",
- ptllnd_errtype2str(rc), rc);
+ ptllnd_errtype2str(rc), rc);
rc = -ENODEV;
goto failed3;
}
/*
* Fetch the Portals NID
*/
- rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id);
+ rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id);
if (rc != PTL_OK) {
CERROR ("PtlGetID failed : %s(%d)\n",
- ptllnd_errtype2str(rc), rc);
+ ptllnd_errtype2str(rc), rc);
rc = -EINVAL;
goto failed4;
}
if (rc != 0)
goto failed4;
- return 0;
+ return 0;
failed4:
ptllnd_destroy_buffers(ni);
failed1:
LIBCFS_FREE(plni, sizeof(*plni));
failed0:
- ptllnd_history_fini();
+ ptllnd_history_fini();
ptllnd_ni_count--;
CDEBUG(D_NET, "<<< rc=%d\n",rc);
return rc;
int plp_max_msg_size;
int plp_refcount;
+ int plp_sent_hello:1;
int plp_recvd_hello:1;
int plp_closing:1;
__u64 plp_match;
__u64 plp_stamp;
struct list_head plp_txq;
+ struct list_head plp_noopq;
struct list_head plp_activeq;
} ptllnd_peer_t;
static inline lnet_nid_t
ptllnd_ptl2lnetnid(lnet_ni_t *ni, ptl_nid_t portals_nid)
{
- return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid);
+ return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid);
}
static inline ptl_nid_t
ptllnd_lnet2ptlnid(lnet_nid_t lnet_nid)
{
- return LNET_NIDADDR(lnet_nid);
+ return LNET_NIDADDR(lnet_nid);
}
/*
{
ptllnd_peer_t *peer = tx->tx_peer;
+ LASSERT (tx->tx_type != PTLLND_MSG_TYPE_NOOP);
+
ptllnd_set_tx_deadline(tx);
list_add_tail(&tx->tx_list, &peer->plp_txq);
ptllnd_check_sends(peer);
static int idx = 0;
char *str = strs[idx++];
-
+
if (idx >= sizeof(strs)/sizeof(strs[0]))
idx = 0;
LASSERT (peer->plp_closing);
LASSERT (plni->plni_npeers > 0);
LASSERT (list_empty(&peer->plp_txq));
+ LASSERT (list_empty(&peer->plp_noopq));
LASSERT (list_empty(&peer->plp_activeq));
plni->plni_npeers--;
LIBCFS_FREE(peer, sizeof(*peer));
peer->plp_closing = 1;
if (!list_empty(&peer->plp_txq) ||
+ !list_empty(&peer->plp_noopq) ||
!list_empty(&peer->plp_activeq) ||
error != 0) {
CWARN("Closing %s\n", libcfs_id2str(peer->plp_id));
if (plni->plni_debug)
ptllnd_dump_debug(ni, peer->plp_id);
}
-
+
ptllnd_abort_txs(plni, &peer->plp_txq);
+ ptllnd_abort_txs(plni, &peer->plp_noopq);
ptllnd_abort_txs(plni, &peer->plp_activeq);
list_del(&peer->plp_list);
{
ptllnd_ni_t *plni = ni->ni_data;
unsigned int hash = LNET_NIDADDR(id.nid) % plni->plni_peer_hash_size;
- struct list_head *tmp;
ptllnd_peer_t *plp;
ptllnd_tx_t *tx;
int rc;
LASSERT (LNET_NIDNET(id.nid) == LNET_NIDNET(ni->ni_nid));
- list_for_each(tmp, &plni->plni_peer_hash[hash]) {
- plp = list_entry(tmp, ptllnd_peer_t, plp_list);
-
+ list_for_each_entry (plp, &plni->plni_peer_hash[hash], plp_list) {
if (plp->plp_id.nid == id.nid &&
plp->plp_id.pid == id.pid) {
ptllnd_peer_addref(plp);
plp->plp_extra_lazy_credits = 0;
plp->plp_match = 0;
plp->plp_stamp = 0;
+ plp->plp_sent_hello = 0;
plp->plp_recvd_hello = 0;
plp->plp_closing = 0;
plp->plp_refcount = 1;
CFS_INIT_LIST_HEAD(&plp->plp_list);
CFS_INIT_LIST_HEAD(&plp->plp_txq);
+ CFS_INIT_LIST_HEAD(&plp->plp_noopq);
CFS_INIT_LIST_HEAD(&plp->plp_activeq);
ptllnd_peer_addref(plp);
{
struct list_head *e;
int n = 0;
-
+
list_for_each(e, q) {
n++;
}
-
+
return n;
}
const char *
-ptllnd_tx_typestr(int type)
+ptllnd_tx_typestr(int type)
{
switch (type) {
case PTLLND_RDMA_WRITE:
return "rdma_write";
-
+
case PTLLND_RDMA_READ:
return "rdma_read";
case PTLLND_MSG_TYPE_PUT:
return "put_req";
-
+
case PTLLND_MSG_TYPE_GET:
return "get_req";
}
void
-ptllnd_debug_tx(ptllnd_tx_t *tx)
+ptllnd_debug_tx(ptllnd_tx_t *tx)
{
CDEBUG(D_WARNING, "%s %s b %ld.%06ld/%ld.%06ld"
" r %ld.%06ld/%ld.%06ld status %d\n",
ptllnd_tx_typestr(tx->tx_type),
libcfs_id2str(tx->tx_peer->plp_id),
- tx->tx_bulk_posted.tv_sec, tx->tx_bulk_posted.tv_usec,
+ tx->tx_bulk_posted.tv_sec, tx->tx_bulk_posted.tv_usec,
tx->tx_bulk_done.tv_sec, tx->tx_bulk_done.tv_usec,
tx->tx_req_posted.tv_sec, tx->tx_req_posted.tv_usec,
tx->tx_req_done.tv_sec, tx->tx_req_done.tv_usec,
ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id)
{
ptllnd_peer_t *plp = ptllnd_find_peer(ni, id, 0);
- struct list_head *tmp;
ptllnd_ni_t *plni = ni->ni_data;
ptllnd_tx_t *tx;
-
+
if (plp == NULL) {
CDEBUG(D_WARNING, "No peer %s\n", libcfs_id2str(id));
return;
}
-
- CDEBUG(D_WARNING, "%s %s%s [%d] "LPU64".%06d m "LPU64" q %d/%d c %d/%d+%d(%d)\n",
- libcfs_id2str(id),
- plp->plp_recvd_hello ? "H" : "_",
- plp->plp_closing ? "C" : "_",
- plp->plp_refcount,
- plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000),
- plp->plp_match,
- ptllnd_count_q(&plp->plp_txq),
- ptllnd_count_q(&plp->plp_activeq),
- plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits,
- plni->plni_peer_credits + plp->plp_lazy_credits);
+
+ CWARN("%s %s%s [%d] "LPU64".%06d m "LPU64" q %d/%d/%d c %d/%d+%d(%d)\n",
+ libcfs_id2str(id),
+ plp->plp_recvd_hello ? "H" : "_",
+ plp->plp_closing ? "C" : "_",
+ plp->plp_refcount,
+ plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000),
+ plp->plp_match,
+ ptllnd_count_q(&plp->plp_txq),
+ ptllnd_count_q(&plp->plp_noopq),
+ ptllnd_count_q(&plp->plp_activeq),
+ plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits,
+ plni->plni_peer_credits + plp->plp_lazy_credits);
CDEBUG(D_WARNING, "txq:\n");
- list_for_each (tmp, &plp->plp_txq) {
- tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
+ list_for_each_entry (tx, &plp->plp_txq, tx_list) {
+ ptllnd_debug_tx(tx);
+ }
+
+ CDEBUG(D_WARNING, "noopq:\n");
+ list_for_each_entry (tx, &plp->plp_noopq, tx_list) {
ptllnd_debug_tx(tx);
}
CDEBUG(D_WARNING, "activeq:\n");
- list_for_each (tmp, &plp->plp_activeq) {
- tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
+ list_for_each_entry (tx, &plp->plp_activeq, tx_list) {
ptllnd_debug_tx(tx);
}
CDEBUG(D_WARNING, "zombies:\n");
- list_for_each (tmp, &plni->plni_zombie_txs) {
- tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
+ list_for_each_entry (tx, &plni->plni_zombie_txs, tx_list) {
if (tx->tx_peer->plp_id.nid == id.nid &&
tx->tx_peer->plp_id.pid == id.pid)
ptllnd_debug_tx(tx);
}
-
+
CDEBUG(D_WARNING, "history:\n");
- list_for_each (tmp, &plni->plni_tx_history) {
- tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
+ list_for_each_entry (tx, &plni->plni_tx_history, tx_list) {
if (tx->tx_peer->plp_id.nid == id.nid &&
tx->tx_peer->plp_id.pid == id.pid)
ptllnd_debug_tx(tx);
}
-
+
ptllnd_peer_decref(plp);
}
id.nid = nid;
id.pid = LUSTRE_SRV_LNET_PID;
-
+
peer = ptllnd_find_peer(ni, id, 1);
if (peer == NULL)
return;
libcfs_id2str(id));
w *= 2;
}
-
+
ptllnd_wait(ni, w);
}
-
+
ptllnd_peer_decref(peer);
}
{
ptllnd_peer_t *peer = ptllnd_find_peer(ni, id, nasync > 0);
int rc;
-
+
if (peer == NULL)
return -ENOMEM;
nasync -= peer->plp_extra_lazy_credits;
peer->plp_extra_lazy_credits = 0;
-
+
rc = ptllnd_size_buffers(ni, nasync);
if (rc == 0) {
peer->plp_lazy_credits += nasync;
}
ptllnd_close_peer(peer, tx->tx_status);
}
-
+
ptllnd_abort_tx(tx, &tx->tx_reqmdh);
ptllnd_abort_tx(tx, &tx->tx_bulkmdh);
plni->plni_ntx_history++;
list_add_tail(&tx->tx_list, &plni->plni_tx_history);
-
+
ptllnd_cull_tx_history(plni);
}
piov[npiov].iov_base = iov[npiov].iov_base + temp_offset;
piov[npiov].iov_len = iov[npiov].iov_len - temp_offset;
-
+
if (piov[npiov].iov_len >= resid) {
piov[npiov].iov_len = resid;
npiov++;
return -ENOMEM;
}
+static inline int
+ptllnd_peer_send_noop (ptllnd_peer_t *peer)
+{
+ ptllnd_ni_t *plni = peer->plp_ni->ni_data;
+
+ if (!peer->plp_sent_hello ||
+ peer->plp_credits == 0 ||
+ !list_empty(&peer->plp_noopq) ||
+ peer->plp_outstanding_credits < PTLLND_CREDIT_HIGHWATER(plni))
+ return 0;
+
+ /* No tx to piggyback NOOP onto or no credit to send a tx */
+ return (list_empty(&peer->plp_txq) || peer->plp_credits == 1);
+}
+
void
ptllnd_check_sends(ptllnd_peer_t *peer)
{
- lnet_ni_t *ni = peer->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
+ ptllnd_ni_t *plni = peer->plp_ni->ni_data;
ptllnd_tx_t *tx;
ptl_md_t md;
ptl_handle_md_t mdh;
peer->plp_outstanding_credits, peer->plp_sent_credits,
plni->plni_peer_credits + peer->plp_lazy_credits);
- if (list_empty(&peer->plp_txq) &&
- peer->plp_outstanding_credits >= PTLLND_CREDIT_HIGHWATER(plni) &&
- peer->plp_credits != 0) {
-
+ if (ptllnd_peer_send_noop(peer)) {
tx = ptllnd_new_tx(peer, PTLLND_MSG_TYPE_NOOP, 0);
CDEBUG(D_NET, "NOOP tx=%p\n",tx);
if (tx == NULL) {
libcfs_id2str(peer->plp_id));
} else {
ptllnd_set_tx_deadline(tx);
- list_add_tail(&tx->tx_list, &peer->plp_txq);
+ list_add_tail(&tx->tx_list, &peer->plp_noopq);
}
}
- while (!list_empty(&peer->plp_txq)) {
- tx = list_entry(peer->plp_txq.next, ptllnd_tx_t, tx_list);
+ for (;;) {
+ if (!list_empty(&peer->plp_noopq)) {
+ LASSERT (peer->plp_sent_hello);
+ tx = list_entry(peer->plp_noopq.next,
+ ptllnd_tx_t, tx_list);
+ } else if (!list_empty(&peer->plp_txq)) {
+ tx = list_entry(peer->plp_txq.next,
+ ptllnd_tx_t, tx_list);
+ } else {
+ /* nothing to send right now */
+ break;
+ }
LASSERT (tx->tx_msgsize > 0);
<= plni->plni_peer_credits + peer->plp_lazy_credits);
LASSERT (peer->plp_credits >= 0);
+ /* say HELLO first */
+ if (!peer->plp_sent_hello) {
+ LASSERT (list_empty(&peer->plp_noopq));
+ LASSERT (tx->tx_type == PTLLND_MSG_TYPE_HELLO);
+
+ peer->plp_sent_hello = 1;
+ }
+
if (peer->plp_credits == 0) { /* no credits */
PTLLND_HISTORY("%s[%d/%d+%d(%d)]: no creds for %p",
libcfs_id2str(peer->plp_id),
peer->plp_lazy_credits, tx);
break;
}
-
- if (peer->plp_credits == 1 && /* last credit reserved for */
- peer->plp_outstanding_credits == 0) { /* returning credits */
+
+ /* Last/Initial credit reserved for NOOP/HELLO */
+ if (peer->plp_credits == 1 &&
+ tx->tx_type != PTLLND_MSG_TYPE_NOOP &&
+ tx->tx_type != PTLLND_MSG_TYPE_HELLO) {
PTLLND_HISTORY("%s[%d/%d+%d(%d)]: too few creds for %p",
libcfs_id2str(peer->plp_id),
peer->plp_credits,
peer->plp_lazy_credits, tx);
break;
}
-
+
list_del(&tx->tx_list);
list_add_tail(&tx->tx_list, &peer->plp_activeq);
ptllnd_msgtype2str(tx->tx_type),tx->tx_type);
if (tx->tx_type == PTLLND_MSG_TYPE_NOOP &&
- (!list_empty(&peer->plp_txq) ||
- peer->plp_outstanding_credits <
- PTLLND_CREDIT_HIGHWATER(plni))) {
+ !ptllnd_peer_send_noop(peer)) {
/* redundant NOOP */
ptllnd_tx_done(tx);
continue;
LASSERT (tx->tx_type != PTLLND_RDMA_WRITE &&
tx->tx_type != PTLLND_RDMA_READ);
-
+
tx->tx_reqmdh = mdh;
gettimeofday(&tx->tx_req_posted, NULL);
LASSERT (msg->msg_niov <= PTL_MD_MAX_IOV); /* !!! */
- CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n",
+ CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n",
lnet_msgtyp2str(msg->msg_type),
msg->msg_niov, msg->msg_offset, msg->msg_len,
libcfs_nid2str(msg->msg_target.nid),
libcfs_id2str(msg->msg_target));
return -EHOSTUNREACH;
}
-
+
plp = ptllnd_find_peer(ni, msg->msg_target, 1);
if (plp == NULL)
return -ENOMEM;
ptllnd_rx_done(ptllnd_rx_t *rx)
{
ptllnd_peer_t *plp = rx->rx_peer;
- lnet_ni_t *ni = plp->plp_ni;
- ptllnd_ni_t *plni = ni->ni_data;
+ ptllnd_ni_t *plni = plp->plp_ni->ni_data;
plp->plp_outstanding_credits++;
plp->plp_sent_credits,
plni->plni_peer_credits + plp->plp_lazy_credits, rx);
- ptllnd_check_sends(rx->rx_peer);
+ ptllnd_check_sends(plp);
LASSERT (plni->plni_nrxs > 0);
plni->plni_nrxs--;
msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version;
if (msg_version != PTLLND_MSG_VERSION) {
- CERROR("Bad protocol version %04x from %s: %04x expected\n",
+ CERROR("Bad protocol version %04x from %s: %04x expected\n",
(__u32)msg_version, ptllnd_ptlid2str(initiator), PTLLND_MSG_VERSION);
if (plni->plni_abort_on_protocol_mismatch)
msg->ptlm_version = msg_version;
msg->ptlm_cksum = msg_cksum;
-
+
if (flip) {
/* NB stamps are opaque cookies */
__swab32s(&msg->ptlm_nob);
__swab32s(&msg->ptlm_srcpid);
__swab32s(&msg->ptlm_dstpid);
}
-
+
srcid.nid = msg->ptlm_srcnid;
srcid.pid = msg->ptlm_srcpid;
}
if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) {
- CERROR("NAK from %s (%s)\n",
+ CERROR("NAK from %s (%s)\n",
libcfs_id2str(srcid),
ptllnd_ptlid2str(initiator));
if (plni->plni_dump_on_nak)
ptllnd_dump_debug(ni, srcid);
-
+
if (plni->plni_abort_on_nak)
abort();
-
+
return;
}
-
+
if (msg->ptlm_dstnid != ni->ni_nid ||
msg->ptlm_dstpid != the_lnet.ln_pid) {
CERROR("Bad dstid %s (%s expected) from %s\n",
__swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size);
}
break;
-
+
case PTLLND_MSG_TYPE_NOOP:
break;
if (plp->plp_sent_credits == 0) {
CERROR("%s[%d/%d+%d(%d)]: unexpected message\n",
libcfs_id2str(plp->plp_id),
- plp->plp_credits, plp->plp_outstanding_credits,
+ plp->plp_credits, plp->plp_outstanding_credits,
plp->plp_sent_credits,
plni->plni_peer_credits + plp->plp_lazy_credits);
return;
}
plp->plp_sent_credits--;
-
+
/* No check for credit overflow - the peer may post new buffers after
* the startup handshake. */
- if (msg->ptlm_credits > 0) {
- plp->plp_credits += msg->ptlm_credits;
- ptllnd_check_sends(plp);
- }
+ plp->plp_credits += msg->ptlm_credits;
/* All OK so far; assume the message is good... */
break;
}
+ if (msg->ptlm_credits > 0)
+ ptllnd_check_sends(plp);
+
ptllnd_peer_decref(plp);
}
/* Portals can't force message alignment - someone sending an
* odd-length message could misalign subsequent messages */
if ((event->mlength & 7) != 0) {
- CERROR("Message from %s has odd length %llu: "
+ CERROR("Message from %s has odd length %u: "
"probable version incompatibility\n",
ptllnd_ptlid2str(event->initiator),
event->mlength);
LASSERT (!isreq != !isbulk); /* always one and only 1 match */
PTLLND_HISTORY("%s[%d/%d+%d(%d)]: TX done %p %s%s",
- libcfs_id2str(tx->tx_peer->plp_id),
+ libcfs_id2str(tx->tx_peer->plp_id),
tx->tx_peer->plp_credits,
tx->tx_peer->plp_outstanding_credits,
tx->tx_peer->plp_sent_credits,
ptllnd_find_timed_out_tx(ptllnd_peer_t *peer)
{
time_t now = cfs_time_current_sec();
- struct list_head *tmp;
+ ptllnd_tx_t *tx;
+
+ list_for_each_entry (tx, &peer->plp_txq, tx_list) {
+ if (tx->tx_deadline < now)
+ return tx;
+ }
- list_for_each(tmp, &peer->plp_txq) {
- ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
+ list_for_each_entry (tx, &peer->plp_noopq, tx_list) {
if (tx->tx_deadline < now)
return tx;
}
-
- list_for_each(tmp, &peer->plp_activeq) {
- ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list);
-
+
+ list_for_each_entry (tx, &peer->plp_activeq, tx_list) {
if (tx->tx_deadline < now)
return tx;
}
ptllnd_check_peer(ptllnd_peer_t *peer)
{
ptllnd_tx_t *tx = ptllnd_find_timed_out_tx(peer);
-
+
if (tx == NULL)
return;
-
+
CERROR("%s: timed out\n", libcfs_id2str(peer->plp_id));
ptllnd_close_peer(peer, -ETIMEDOUT);
}
for (i = 0; i < chunk; i++) {
hashlist = &plni->plni_peer_hash[plni->plni_watchdog_peeridx];
-
+
list_for_each_safe(tmp, nxt, hashlist) {
ptllnd_check_peer(list_entry(tmp, ptllnd_peer_t, plp_list));
}
-
+
plni->plni_watchdog_peeridx = (plni->plni_watchdog_peeridx + 1) %
plni->plni_peer_hash_size;
}
struct timeval then;
struct timeval now;
struct timeval deadline;
-
+
ptllnd_ni_t *plni = ni->ni_data;
ptllnd_tx_t *tx;
ptl_event_t event;
for (;;) {
gettimeofday(&then, NULL);
-
+
rc = PtlEQPoll(&plni->plni_eqh, 1, timeout, &event, &which);
gettimeofday(&now, NULL);
ptllnd_watchdog(ni, now.tv_sec);
LASSERT (now.tv_sec < plni->plni_watchdog_nextt);
}
-
+
if (now.tv_sec > deadline.tv_sec || /* timeout expired */
(now.tv_sec == deadline.tv_sec &&
now.tv_usec >= deadline.tv_usec))
continue;
}
-
+
LASSERT (rc == PTL_OK || rc == PTL_EQ_DROPPED);
if (rc == PTL_EQ_DROPPED)
#define __USE_FILE_OFFSET64
#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
+#define _GNU_SOURCE
#endif
#include <stdio.h>
"pinger", "filter", "", "echo",
"ldlm", "lov", "lquota", "",
"", "", "", "lmv",
- "", "sec", "gss", "",
+ "", "sec", "gss", "",
"mgc", "mgs", "fid", "fld", NULL};
static const char *libcfs_debug_masks[] =
{"trace", "inode", "super", "ext2",
if (rc != 0) {
fprintf(stderr, "Write to %s failed: %s\n",
procpath, strerror(errno));
- return rc;
}
+
dbg_close_ctlhandle(fd);
- return 0;
+
+ return rc;
}
static void applymask_all(unsigned int subs_mask, unsigned int debug_mask)
*linevp = linev;
*lenp = nlen;
}
- linev[used] = line;
+ linev[used] = line;
return 1;
}
line->text = p;
if (!add_rec(line, &linev, &linev_len, kept)) {
- fprintf(stderr, "malloc failed; printing accumulated "
+ fprintf(stderr, "malloc failed; printing accumulated "
"records and exiting.\n");
break;
- }
+ }
kept++;
}
strcpy(filename, argv[1]);
else
sprintf(filename, "/tmp/lustre-log."CFS_TIME_T".%u",
- time(NULL),getpid());
+ time(NULL),getpid());
if (stat(filename, &st) == 0 && S_ISREG(st.st_mode))
unlink(filename);
if (rc != 0) {
fprintf(stderr, "write(%s) failed: %s\n", filename,
strerror(errno));
- close(fd);
+ dbg_close_ctlhandle(fd);
return 1;
}
dbg_close_ctlhandle(fd);