From dc127060d83351d5d0583bc9a4371ecc4dd73db4 Mon Sep 17 00:00:00 2001 From: johann Date: Thu, 18 Dec 2008 23:56:46 +0000 Subject: [PATCH] Land b1_x_lnet_gate onto b1_x (20081219_0014) b=18078 Land the b1_x_lnet_gate to b1_x (used by b1_6/b1_8) since we are going to retire b1_8_gate. --- lnet/ChangeLog | 76 ++++--- lnet/autoconf/lustre-lnet.m4 | 66 ++++-- lnet/include/libcfs/libcfs.h | 3 + lnet/include/libcfs/linux/kp30.h | 3 + lnet/klnds/iiblnd/iiblnd_modparams.c | 4 +- lnet/klnds/o2iblnd/o2iblnd.c | 8 +- lnet/klnds/o2iblnd/o2iblnd.h | 4 +- lnet/klnds/o2iblnd/o2iblnd_cb.c | 22 +- lnet/klnds/o2iblnd/o2iblnd_modparams.c | 12 +- lnet/klnds/socklnd/socklnd.c | 64 +++--- lnet/klnds/socklnd/socklnd.h | 19 +- lnet/klnds/socklnd/socklnd_cb.c | 93 +++++++- lnet/klnds/socklnd/socklnd_lib-darwin.c | 16 +- lnet/klnds/socklnd/socklnd_lib-linux.c | 119 ++++++++-- lnet/klnds/socklnd/socklnd_modparams.c | 10 + lnet/klnds/viblnd/viblnd.c | 198 ++++++++-------- lnet/klnds/viblnd/viblnd_cb.c | 20 +- lnet/klnds/viblnd/viblnd_modparams.c | 2 +- lnet/libcfs/debug.c | 1 + lnet/lnet/api-ni.c | 130 +++++------ lnet/lnet/lib-eq.c | 4 +- lnet/lnet/lib-md.c | 10 +- lnet/lnet/lib-me.c | 20 +- lnet/lnet/lib-move.c | 48 ++-- lnet/lnet/router_proc.c | 1 - lnet/selftest/rpc.c | 105 +++++---- lnet/selftest/selftest.h | 20 +- lnet/ulnds/ptllnd/ptllnd.c | 388 ++++++++++++++++---------------- lnet/ulnds/ptllnd/ptllnd.h | 6 +- lnet/ulnds/ptllnd/ptllnd_cb.c | 236 ++++++++++--------- lnet/utils/debug.c | 19 +- 31 files changed, 1037 insertions(+), 690 deletions(-) diff --git a/lnet/ChangeLog b/lnet/ChangeLog index 72baa91..b650e33 100644 --- a/lnet/ChangeLog +++ b/lnet/ChangeLog @@ -1,5 +1,5 @@ tbd Sun Microsystems, Inc. - * version 1.6.8 + * version 1.8.1 * Support for networks: socklnd - any kernel supported by Lustre, qswlnd - Qsnet kernel modules 5.20 and later, @@ -12,16 +12,16 @@ tbd Sun Microsystems, Inc. mxlnd - MX 1.2.1 or later, ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x -Severity : -Bugzilla : -Description: -Details : +Severity : +Bugzilla : +Description: +Details : --------------------------------------------------------------------------- +------------------------------------------------------------------------------- -01-15-2009 Sun Microsystems, Inc. - * version 1.6.7 +12-31-2008 Sun Microsystems, Inc. + * version 1.8.0 * Support for networks: socklnd - any kernel supported by Lustre, qswlnd - Qsnet kernel modules 5.20 and later, @@ -39,27 +39,51 @@ Bugzilla : Description: Details : +Severity : major +Bugzilla : 15983 +Description: workaround for OOM from o2iblnd +Details : OFED needs allocate big chunk of memory for QP while creating + connection for o2iblnd, OOM can happen if no such a contiguous + memory chunk. + QP size is decided by concurrent_sends and max_fragments of + o2iblnd, now we permit user to specify smaller value for + concurrent_sends of o2iblnd(i.e: concurrent_sends=7), which + will decrease memory block size required by creating QP. --------------------------------------------------------------------------- +Severity : major +Bugzilla : 15093 +Description: Support Zerocopy receive of Chelsio device +Details : Chelsio driver can support zerocopy for iov[1] if it's + contiguous and large enough. -2008-08-31 Sun Microsystems, Inc. - * version 1.6.6 - * Support for networks: - socklnd - any kernel supported by Lustre, - qswlnd - Qsnet kernel modules 5.20 and later, - openiblnd - IbGold 1.8.2, - o2iblnd - OFED 1.1, 1.2.0, 1.2.5, and 1.3 - viblnd - Voltaire ibhost 3.4.5 and later, - ciblnd - Topspin 3.2.0, - iiblnd - Infiniserv 3.3 + PathBits patch, - gmlnd - GM 2.1.22 and later, - mxlnd - MX 1.2.1 or later, - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x +Severity : normal +Bugzilla : 13490 +Description: fix credit flow deadlock in uptllnd -Severity : -Bugzilla : -Description: -Details : +Severity : normal +Bugzilla : 16308 +Description: finalize network operation in reasonable time +Details : conf-sanity test_32a couldn't stop ost and mds because it + tried to access non-existent peer and tcp connect took + quite long before timing out. + +Severity : major +Bugzilla : 16338 +Description: Continuous recovery on 33 of 413 nodes after lustre oss failure +Details : Lost reference on conn prevents peer from being destroyed, which + could prevent new peer creation if peer count has reached upper + limit. + +Severity : normal +Bugzilla : 16102 +Description: LNET Selftest results in Soft lockup on OSS CPU +Details : only hits when 8 or more o2ib clients involved and a session is + torn down with 'lst end_session' without preceeding 'lst stop'. + +Severity : minor +Bugzilla : 16321 +Description: concurrent_sends in IB LNDs should not be changeable at run time +Details : concurrent_sends in IB LNDs should not be changeable at run time Severity : normal Bugzilla : 15272 diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index 69e6adf..b7c02e7 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -523,6 +523,7 @@ if test $ENABLEO2IB -eq 0; then AC_MSG_RESULT([disabled]) else o2ib_found=false + for O2IBPATH in $O2IBPATHS; do if test \( -f ${O2IBPATH}/include/rdma/rdma_cm.h -a \ -f ${O2IBPATH}/include/rdma/ib_cm.h -a \ @@ -530,8 +531,9 @@ else -f ${O2IBPATH}/include/rdma/ib_fmr_pool.h \); then o2ib_found=true break - fi + fi done + if ! $o2ib_found; then AC_MSG_RESULT([no]) case $ENABLEO2IB in @@ -599,23 +601,41 @@ else fi fi - # version checking is a hack and isn't reliable, - # we need verify it with each new ofed release - - if grep -q ib_dma_map_single \ - ${O2IBPATH}/include/rdma/ib_verbs.h; then - if grep -q comp_vector \ - ${O2IBPATH}/include/rdma/ib_verbs.h; then - IBLND_OFED_VERSION="1025" - else - IBLND_OFED_VERSION="1020" - fi - else - IBLND_OFED_VERSION="1010" - fi + LB_LINUX_TRY_COMPILE([ + #include + #include + #if !HAVE_GFP_T + typedef int gfp_t; + #endif + #include + ],[ + ib_dma_map_single(NULL, NULL, 0, 0); + return 0; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_OFED_IB_DMA_MAP, 1, + [ib_dma_map_single defined]) + ],[ + AC_MSG_RESULT(NO) + ]) - AC_DEFINE_UNQUOTED(IBLND_OFED_VERSION, $IBLND_OFED_VERSION, - [OFED version]) + LB_LINUX_TRY_COMPILE([ + #include + #include + #if !HAVE_GFP_T + typedef int gfp_t; + #endif + #include + ],[ + ib_create_cq(NULL, NULL, NULL, NULL, 0, 0); + return 0; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_OFED_IB_COMP_VECTOR, 1, + [has completion vector]) + ],[ + AC_MSG_RESULT(NO) + ]) EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" fi @@ -1193,7 +1213,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT, 1, [kmem_cache_destroy(cachep) return int]) ],[ - AC_MSG_RESULT(NO) + AC_MSG_RESULT(no) ]) ]) @@ -1212,7 +1232,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFINE(HAVE_ATOMIC_PANIC_NOTIFIER, 1, [panic_notifier_list is atomic_notifier_head]) ],[ - AC_MSG_RESULT(NO) + AC_MSG_RESULT(no) ]) ]) @@ -1231,7 +1251,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFINE(HAVE_3ARGS_INIT_WORK, 1, [INIT_WORK use 3 args and store data inside]) ],[ - AC_MSG_RESULT(NO) + AC_MSG_RESULT(no) ]) ]) @@ -1248,7 +1268,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFINE(HAVE_2ARGS_REGISTER_SYSCTL, 1, [register_sysctl_table want 2 args]) ],[ - AC_MSG_RESULT(NO) + AC_MSG_RESULT(no) ]) ]) @@ -1270,7 +1290,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFINE(HAVE_KMEM_CACHE, 1, [kernel has struct kmem_cache]) ],[ - AC_MSG_RESULT(NO) + AC_MSG_RESULT(no) ]) EXTRA_KCFLAGS="$tmp_flags" ]) @@ -1286,7 +1306,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DTOR, 1, [kmem_cache_create has dtor argument]) ],[ - AC_MSG_RESULT(NO) + AC_MSG_RESULT(no) ]) ]) diff --git a/lnet/include/libcfs/libcfs.h b/lnet/include/libcfs/libcfs.h index 4a158c1..5db6433 100644 --- a/lnet/include/libcfs/libcfs.h +++ b/lnet/include/libcfs/libcfs.h @@ -34,6 +34,9 @@ * Lustre is a trademark of Sun Microsystems, Inc. */ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ #ifndef __LIBCFS_LIBCFS_H__ #define __LIBCFS_LIBCFS_H__ diff --git a/lnet/include/libcfs/linux/kp30.h b/lnet/include/libcfs/linux/kp30.h index b620d55..ac90aa0 100644 --- a/lnet/include/libcfs/linux/kp30.h +++ b/lnet/include/libcfs/linux/kp30.h @@ -34,6 +34,9 @@ * Lustre is a trademark of Sun Microsystems, Inc. */ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ #ifndef __LIBCFS_LINUX_KP30_H__ #define __LIBCFS_LINUX_KP30_H__ diff --git a/lnet/klnds/iiblnd/iiblnd_modparams.c b/lnet/klnds/iiblnd/iiblnd_modparams.c index 4e02eee..8e7212d 100644 --- a/lnet/klnds/iiblnd/iiblnd_modparams.c +++ b/lnet/klnds/iiblnd/iiblnd_modparams.c @@ -267,7 +267,7 @@ static cfs_sysctl_table_t kibnal_ctl_table[] = { .procname = "concurrent_sends", .data = &concurrent_sends, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = &proc_dointvec }, {0} @@ -276,7 +276,7 @@ static cfs_sysctl_table_t kibnal_ctl_table[] = { static cfs_sysctl_table_t kibnal_top_ctl_table[] = { { .ctl_name = CTL_IIBLND, - .procname = "openibnal", + .procname = "iibnal", .data = NULL, .maxlen = 0, .mode = 0555, diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 313f690..0416c7c 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -738,7 +738,7 @@ kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid, int state) } } -#if (IBLND_OFED_VERSION == 1025) +#ifdef HAVE_OFED_IB_COMP_VECTOR cq = ib_create_cq(cmid->device, kiblnd_cq_completion, kiblnd_cq_event, conn, IBLND_CQ_ENTRIES(), 0); @@ -892,8 +892,8 @@ kiblnd_destroy_conn (kib_conn_t *conn) break; } - if (conn->ibc_cmid->qp != NULL) - rdma_destroy_qp(conn->ibc_cmid); + if (cmid->qp != NULL) + rdma_destroy_qp(cmid); if (conn->ibc_cq != NULL) { rc = ib_destroy_cq(conn->ibc_cq); @@ -909,7 +909,7 @@ kiblnd_destroy_conn (kib_conn_t *conn) LASSERT (rx->rx_nob >= 0); /* not posted */ - kiblnd_dma_unmap_single(conn->ibc_cmid->device, + kiblnd_dma_unmap_single(cmid->device, KIBLND_UNMAP_ADDR(rx, rx_msgunmap, rx->rx_msgaddr), IBLND_MSG_SIZE, DMA_FROM_DEVICE); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h index f699983..74ae887 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ b/lnet/klnds/o2iblnd/o2iblnd.h @@ -614,7 +614,7 @@ kiblnd_rd_size (kib_rdma_desc_t *rd) } #endif -#if (IBLND_OFED_VERSION == 1020) || (IBLND_OFED_VERSION == 1025) +#ifdef HAVE_OFED_IB_DMA_MAP static inline __u64 kiblnd_dma_map_single(struct ib_device *dev, void *msg, size_t size, @@ -666,7 +666,7 @@ static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, #define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data) #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) -#elif (IBLND_OFED_VERSION == 1010) +#else static inline dma_addr_t kiblnd_dma_map_single(struct ib_device *dev, void *msg, size_t size, diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 867c67f..7bef169 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -1090,7 +1090,7 @@ kiblnd_tx_complete (kib_tx_t *tx, int status) if (failed) { if (conn->ibc_state == IBLND_CONN_ESTABLISHED) CDEBUG(D_NETERROR, "Tx -> %s cookie "LPX64 - "sending %d waiting %d: failed %d\n", + " sending %d waiting %d: failed %d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), tx->tx_cookie, tx->tx_sending, tx->tx_waiting, status); @@ -3152,12 +3152,28 @@ kiblnd_scheduler(void *arg) if (rc == 0) { rc = ib_req_notify_cq(conn->ibc_cq, IB_CQ_NEXT_COMP); - LASSERT (rc >= 0); + if (rc < 0) { + CWARN("%s: ib_req_notify_cq failed: %d, " + "closing connection\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); + kiblnd_close_conn(conn, -EIO); + kiblnd_conn_decref(conn); + spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); + continue; + } rc = ib_poll_cq(conn->ibc_cq, 1, &wc); } - LASSERT (rc >= 0); + if (rc < 0) { + CWARN("%s: ib_poll_cq failed: %d, " + "closing connection\n", + libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); + kiblnd_close_conn(conn, -EIO); + kiblnd_conn_decref(conn); + spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); + continue; + } spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c index 707b1f1..dff7e7c 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ b/lnet/klnds/o2iblnd/o2iblnd_modparams.c @@ -255,7 +255,7 @@ static cfs_sysctl_table_t kiblnd_ctl_table[] = { .procname = "concurrent_sends", .data = &concurrent_sends, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = &proc_dointvec }, { @@ -355,8 +355,14 @@ kiblnd_tunables_init (void) if (*kiblnd_tunables.kib_concurrent_sends > IBLND_RX_MSGS) *kiblnd_tunables.kib_concurrent_sends = IBLND_RX_MSGS; - if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE) - *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE; + if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE / 2) + *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE / 2; + + if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE) { + CWARN("Concurrent sends %d is lower than message queue size: %d, " + "performance may drop slightly.\n", + *kiblnd_tunables.kib_concurrent_sends, IBLND_MSG_QUEUE_SIZE); + } return 0; } diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index fd916ba..b29eff1 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -1060,6 +1060,7 @@ ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, } memset (conn, 0, sizeof (*conn)); + conn->ksnc_peer = NULL; conn->ksnc_route = NULL; conn->ksnc_sock = sock; @@ -1506,6 +1507,41 @@ ksocknal_peer_failed (ksock_peer_t *peer) } void +ksocknal_finalize_zcreq(ksock_conn_t *conn) +{ + ksock_peer_t *peer = conn->ksnc_peer; + ksock_tx_t *tx; + ksock_tx_t *tmp; + CFS_LIST_HEAD (zlist); + + /* NB safe to finalize TXs because closing of socket will + * abort all buffered data */ + LASSERT (conn->ksnc_sock == NULL); + + cfs_spin_lock(&peer->ksnp_lock); + + list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) { + if (tx->tx_conn != conn) + continue; + + LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0); + + tx->tx_msg.ksm_zc_req_cookie = 0; + list_del(&tx->tx_zc_list); + list_add(&tx->tx_zc_list, &zlist); + } + + cfs_spin_unlock(&peer->ksnp_lock); + + while (!list_empty(&zlist)) { + tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list); + + list_del(&tx->tx_zc_list); + ksocknal_tx_decref(tx); + } +} + +void ksocknal_terminate_conn (ksock_conn_t *conn) { /* This gets called by the reaper (guaranteed thread context) to @@ -1515,10 +1551,6 @@ ksocknal_terminate_conn (ksock_conn_t *conn) ksock_peer_t *peer = conn->ksnc_peer; ksock_sched_t *sched = conn->ksnc_scheduler; int failed = 0; - struct list_head *tmp; - struct list_head *nxt; - ksock_tx_t *tx; - LIST_HEAD (zlist); LASSERT(conn->ksnc_closing); @@ -1541,30 +1573,6 @@ ksocknal_terminate_conn (ksock_conn_t *conn) cfs_spin_unlock_bh (&sched->kss_lock); - cfs_spin_lock(&peer->ksnp_lock); - - list_for_each_safe(tmp, nxt, &peer->ksnp_zc_req_list) { - tx = list_entry(tmp, ksock_tx_t, tx_zc_list); - - if (tx->tx_conn != conn) - continue; - - LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0); - - tx->tx_msg.ksm_zc_req_cookie = 0; - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); - } - - cfs_spin_unlock(&peer->ksnp_lock); - - list_for_each_safe(tmp, nxt, &zlist) { - tx = list_entry(tmp, ksock_tx_t, tx_zc_list); - - list_del(&tx->tx_zc_list); - ksocknal_tx_decref(tx); - } - /* serialise with callbacks */ cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index 43d2fcd..7c5cf9f 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -75,6 +75,13 @@ typedef struct /* per scheduler state */ struct list_head kss_zombie_noop_txs; /* zombie noop tx list */ cfs_waitq_t kss_waitq; /* where scheduler sleeps */ int kss_nconns; /* # connections assigned to this scheduler */ +#if !SOCKNAL_SINGLE_FRAG_RX + struct page *kss_rx_scratch_pgs[LNET_MAX_IOV]; +#endif +#if !SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_SINGLE_FRAG_RX + struct iovec kss_scratch_iov[LNET_MAX_IOV]; +#endif + } ksock_sched_t; typedef struct @@ -113,6 +120,8 @@ typedef struct int *ksnd_enable_csum; /* enable check sum */ int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */ unsigned int *ksnd_zc_min_frag; /* minimum zero copy frag size */ + int *ksnd_zc_recv; /* enable ZC receive (for Chelsio TOE) */ + int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to enable ZC receive */ #ifdef CPU_AFFINITY int *ksnd_irq_affinity; /* enable IRQ affinity? */ #endif @@ -210,6 +219,7 @@ typedef struct /* transmit packet */ lnet_kiov_t *tx_kiov; /* packet page frags */ struct ksock_conn *tx_conn; /* owning conn */ lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */ + cfs_time_t tx_deadline; /* when (in jiffies) tx times out */ ksock_msg_t tx_msg; /* socklnd message buffer */ int tx_desc_size; /* size of this descriptor */ union { @@ -293,13 +303,6 @@ typedef struct ksock_conn cfs_atomic_t ksnc_tx_nob; /* # bytes queued */ int ksnc_tx_ready; /* write space */ int ksnc_tx_scheduled; /* being progressed */ - -#if !SOCKNAL_SINGLE_FRAG_RX - struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV]; -#endif -#if !SOCKNAL_SINGLE_FRAG_TX - struct iovec ksnc_tx_scratch_iov[LNET_MAX_IOV]; -#endif } ksock_conn_t; typedef struct ksock_route @@ -401,6 +404,7 @@ ksocknal_conn_addref (ksock_conn_t *conn) } extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn); +extern void ksocknal_finalize_zcreq(ksock_conn_t *conn); static inline void ksocknal_conn_decref (ksock_conn_t *conn) @@ -434,6 +438,7 @@ ksocknal_connsock_decref (ksock_conn_t *conn) LASSERT (conn->ksnc_closing); libcfs_sock_release(conn->ksnc_sock); conn->ksnc_sock = NULL; + ksocknal_finalize_zcreq(conn); } } diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 6fe8111..4c3f704 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -444,6 +444,10 @@ ksocknal_check_zc_req(ksock_tx_t *tx) cfs_spin_lock(&peer->ksnp_lock); + /* ZC_REQ is going to be pinned to the peer */ + tx->tx_deadline = + cfs_time_shift(*ksocknal_tunables.ksnd_timeout); + LASSERT (tx->tx_msg.ksm_zc_req_cookie == 0); tx->tx_msg.ksm_zc_req_cookie = peer->ksnp_zc_next_cookie++; list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list); @@ -738,10 +742,9 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) tx->tx_conn = conn; ksocknal_conn_addref(conn); /* +1 ref for tx */ - /* - * NB Darwin: SOCK_WMEM_QUEUED()->sock_getsockopt() will take - * a blockable lock(socket lock), so SOCK_WMEM_QUEUED can't be - * put in spinlock. + /* + * FIXME: SOCK_WMEM_QUEUED and SOCK_ERROR could block in __DARWIN8__ + * but they're used inside spinlocks a lot. */ bufnob = libcfs_sock_wmem_queued(conn->ksnc_sock); cfs_spin_lock_bh (&sched->kss_lock); @@ -961,6 +964,10 @@ ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) if (peer->ksnp_accepting > 0 || ksocknal_find_connecting_route_locked (peer) != NULL) { + /* the message is going to be pinned to the peer */ + tx->tx_deadline = + cfs_time_shift(*ksocknal_tunables.ksnd_timeout); + /* Queue the message until a connection is established */ list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); cfs_write_unlock_bh (g_lock); @@ -1291,6 +1298,16 @@ ksocknal_process_receive (ksock_conn_t *conn) __swab64s(&conn->ksnc_msg.ksm_zc_ack_cookie); } + if (conn->ksnc_msg.ksm_type != KSOCK_MSG_NOOP && + conn->ksnc_msg.ksm_type != KSOCK_MSG_LNET) { + CERROR("%s: Unknown message type: %x\n", + libcfs_id2str(conn->ksnc_peer->ksnp_id), + conn->ksnc_msg.ksm_type); + ksocknal_new_packet(conn, 0); + ksocknal_close_conn_and_siblings(conn, -EPROTO); + return (-EPROTO); + } + if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP && conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { @@ -1322,7 +1339,6 @@ ksocknal_process_receive (ksock_conn_t *conn) ksocknal_new_packet (conn, 0); return 0; /* NOOP is done and just return */ } - LASSERT (conn->ksnc_msg.ksm_type == KSOCK_MSG_LNET); conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t); @@ -2615,6 +2631,31 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) return (NULL); } +static inline void +ksocknal_flush_stale_txs(ksock_peer_t *peer) +{ + ksock_tx_t *tx; + CFS_LIST_HEAD (stale_txs); + + cfs_write_lock_bh (&ksocknal_data.ksnd_global_lock); + + while (!list_empty (&peer->ksnp_tx_queue)) { + tx = list_entry (peer->ksnp_tx_queue.next, + ksock_tx_t, tx_list); + + if (!cfs_time_aftereq(cfs_time_current(), + tx->tx_deadline)) + break; + + list_del (&tx->tx_list); + list_add_tail (&tx->tx_list, &stale_txs); + } + + cfs_write_unlock_bh (&ksocknal_data.ksnd_global_lock); + + ksocknal_txlist_done(peer->ksnp_ni, &stale_txs, 1); +} + void ksocknal_check_peer_timeouts (int idx) { @@ -2644,8 +2685,50 @@ ksocknal_check_peer_timeouts (int idx) ksocknal_conn_decref(conn); goto again; } + + /* we can't process stale txs right here because we're + * holding only shared lock */ + if (!list_empty (&peer->ksnp_tx_queue)) { + ksock_tx_t *tx = list_entry (peer->ksnp_tx_queue.next, + ksock_tx_t, tx_list); + + if (cfs_time_aftereq(cfs_time_current(), + tx->tx_deadline)) { + + ksocknal_peer_addref(peer); + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); + + ksocknal_flush_stale_txs(peer); + + ksocknal_peer_decref(peer); + goto again; + } + } } + /* print out warnings about stale ZC_REQs */ + list_for_each_entry(peer, peers, ksnp_list) { + ksock_tx_t *tx; + int n = 0; + + list_for_each_entry(tx, &peer->ksnp_zc_req_list, tx_zc_list) { + if (!cfs_time_aftereq(cfs_time_current(), + tx->tx_deadline)) + break; + n++; + } + + if (n != 0) { + tx = list_entry (peer->ksnp_zc_req_list.next, + ksock_tx_t, tx_zc_list); + CWARN("Stale ZC_REQs for peer %s detected: %d; the " + "oldest (%p) timed out %ld secs ago\n", + libcfs_nid2str(peer->ksnp_id.nid), n, tx, + cfs_duration_sec(cfs_time_current() - + tx->tx_deadline)); + } + } + cfs_read_unlock (&ksocknal_data.ksnd_global_lock); } diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.c b/lnet/klnds/socklnd/socklnd_lib-darwin.c index fbb2a5b..70e4294 100644 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.c +++ b/lnet/klnds/socklnd/socklnd_lib-darwin.c @@ -215,7 +215,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_niov; #endif struct msghdr msg = { @@ -260,7 +260,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_nkiov; #endif struct msghdr msg = { @@ -302,7 +302,7 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; #endif struct iovec *iov = conn->ksnc_rx_iov; @@ -342,7 +342,7 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; @@ -544,7 +544,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_niov; #endif struct socket *sock = conn->ksnc_sock; @@ -600,7 +600,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_nkiov; #endif struct socket *sock = conn->ksnc_sock; @@ -738,7 +738,7 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; #endif struct iovec *iov = conn->ksnc_rx_iov; @@ -792,7 +792,7 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.c b/lnet/klnds/socklnd/socklnd_lib-linux.c index 800d4f5..5b0a9e9 100644 --- a/lnet/klnds/socklnd/socklnd_lib-linux.c +++ b/lnet/klnds/socklnd/socklnd_lib-linux.c @@ -60,7 +60,9 @@ enum { SOCKLND_KEEPALIVE_INTVL, SOCKLND_BACKOFF_INIT, SOCKLND_BACKOFF_MAX, - SOCKLND_PROTOCOL + SOCKLND_PROTOCOL, + SOCKLND_ZERO_COPY_RECV, + SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS }; #else @@ -84,6 +86,8 @@ enum { #define SOCKLND_BACKOFF_INIT CTL_UNNUMBERED #define SOCKLND_BACKOFF_MAX CTL_UNNUMBERED #define SOCKLND_PROTOCOL CTL_UNNUMBERED +#define SOCKLND_ZERO_COPY_RECV CTL_UNNUMBERED +#define SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS CTL_UNNUMBERED #endif static cfs_sysctl_table_t ksocknal_ctl_table[] = { @@ -160,6 +164,25 @@ static cfs_sysctl_table_t ksocknal_ctl_table[] = { .strategy = &sysctl_intvec, }, { + .ctl_name = SOCKLND_ZERO_COPY_RECV, + .procname = "zero_copy_recv", + .data = &ksocknal_tunables.ksnd_zc_recv, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + + { + .ctl_name = SOCKLND_ZERO_COPY_RECV_MIN_NFRAGS, + .procname = "zero_copy_recv", + .data = &ksocknal_tunables.ksnd_zc_recv_min_nfrags, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec, + }, + { .ctl_name = SOCKLND_TYPED, .procname = "typed", .data = &ksocknal_tunables.ksnd_typed_conns, @@ -292,6 +315,11 @@ cfs_sysctl_table_t ksocknal_top_ctl_table[] = { int ksocknal_lib_tunables_init () { + if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags < 2) + *ksocknal_tunables.ksnd_zc_recv_min_nfrags = 2; + if (*ksocknal_tunables.ksnd_zc_recv_min_nfrags > LNET_MAX_IOV) + *ksocknal_tunables.ksnd_zc_recv_min_nfrags = LNET_MAX_IOV; + ksocknal_tunables.ksnd_sysctl = cfs_register_sysctl_table(ksocknal_top_ctl_table, 0); @@ -451,7 +479,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_niov; #endif struct msghdr msg = { @@ -524,7 +552,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) #ifdef CONFIG_HIGHMEM #warning "XXX risk of kmap deadlock on multiple frags..." #endif - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = tx->tx_nkiov; #endif struct msghdr msg = { @@ -585,7 +613,7 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn) struct iovec *scratchiov = &scratch; unsigned int niov = 1; #else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; unsigned int niov = conn->ksnc_rx_niov; #endif struct iovec *iov = conn->ksnc_rx_iov; @@ -645,26 +673,72 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn) return rc; } +static void +ksocknal_lib_kiov_vunmap(void *addr) +{ + if (addr == NULL) + return; + + vunmap(addr); +} + +static void * +ksocknal_lib_kiov_vmap(lnet_kiov_t *kiov, int niov, + struct iovec *iov, struct page **pages) +{ + void *addr; + int nob; + int i; + + if (!*ksocknal_tunables.ksnd_zc_recv || pages == NULL) + return NULL; + + LASSERT (niov <= LNET_MAX_IOV); + + if (niov < 2 || + niov < *ksocknal_tunables.ksnd_zc_recv_min_nfrags) + return NULL; + + for (nob = i = 0; i < niov; i++) { + if ((kiov[i].kiov_offset != 0 && i > 0) || + (kiov[i].kiov_offset + kiov[i].kiov_len != CFS_PAGE_SIZE && i < niov - 1)) + return NULL; + + pages[i] = kiov[i].kiov_page; + nob += kiov[i].kiov_len; + } + + addr = vmap(pages, niov, VM_MAP, PAGE_KERNEL); + if (addr == NULL) + return NULL; + + iov->iov_base = addr + kiov[0].kiov_offset; + iov->iov_len = nob; + + return addr; +} + int ksocknal_lib_recv_kiov (ksock_conn_t *conn) { #if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; + struct iovec scratch; + struct iovec *scratchiov = &scratch; + struct page **pages = NULL; + unsigned int niov = 1; #else #ifdef CONFIG_HIGHMEM #warning "XXX risk of kmap deadlock on multiple frags..." #endif - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_nkiov; + struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; + struct page **pages = conn->ksnc_scheduler->kss_rx_scratch_pgs; + unsigned int niov = conn->ksnc_rx_nkiov; #endif lnet_kiov_t *kiov = conn->ksnc_rx_kiov; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_iov = scratchiov, - .msg_iovlen = niov, .msg_control = NULL, .msg_controllen = 0, .msg_flags = 0 @@ -674,15 +748,25 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn) int i; int rc; void *base; + void *addr; int sum; int fragnob; /* NB we can't trust socket ops to either consume our iovs * or leave them alone. */ - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; + if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) { + nob = scratchiov[0].iov_len; + msg.msg_iovlen = 1; + + } else { + for (nob = i = 0; i < niov; i++) { + nob += scratchiov[i].iov_len = kiov[i].kiov_len; + scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + + kiov[i].kiov_offset; + } + msg.msg_iovlen = niov; } + LASSERT (nob <= conn->ksnc_rx_nob_wanted); set_fs (KERNEL_DS); @@ -709,8 +793,13 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn) kunmap(kiov[i].kiov_page); } } - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); + + if (addr != NULL) { + ksocknal_lib_kiov_vunmap(addr); + } else { + for (i = 0; i < niov; i++) + kunmap(kiov[i].kiov_page); + } return (rc); } diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index a8cba44..32ffa3c 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -117,6 +117,14 @@ static unsigned int zc_min_frag = (2<<10); CFS_MODULE_PARM(zc_min_frag, "i", int, 0644, "minimum fragment to zero copy"); +static unsigned int zc_recv = 0; +CFS_MODULE_PARM(zc_recv, "i", int, 0644, + "enable ZC recv for Chelsio driver"); + +static unsigned int zc_recv_min_nfrags = 16; +CFS_MODULE_PARM(zc_recv_min_nfrags, "i", int, 0644, + "minimum # of fragments to enable ZC recv"); + #ifdef SOCKNAL_BACKOFF static int backoff_init = 3; CFS_MODULE_PARM(backoff_init, "i", int, 0644, @@ -152,6 +160,8 @@ ksock_tunables_t ksocknal_tunables = { .ksnd_enable_csum = &enable_csum, .ksnd_inject_csum_error = &inject_csum_error, .ksnd_zc_min_frag = &zc_min_frag, + .ksnd_zc_recv = &zc_recv, + .ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags, #ifdef CPU_AFFINITY .ksnd_irq_affinity = &enable_irq_affinity, #endif diff --git a/lnet/klnds/viblnd/viblnd.c b/lnet/klnds/viblnd/viblnd.c index 8e15389..ce47a6c 100644 --- a/lnet/klnds/viblnd/viblnd.c +++ b/lnet/klnds/viblnd/viblnd.c @@ -191,7 +191,7 @@ kibnal_init_msg(kib_msg_t *msg, int type, int body_nob) } void -kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, +kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, lnet_nid_t dstnid, __u64 dststamp, __u64 seq) { /* CAVEAT EMPTOR! all message fields not set here should have been @@ -260,7 +260,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) msg_version != IBNAL_MSG_VERSION) return -EPROTO; } else if (msg_version != expected_version) { - CERROR("Bad version: %x(%x expected)\n", + CERROR("Bad version: %x(%x expected)\n", msg_version, expected_version); return -EPROTO; } @@ -286,7 +286,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) return -EPROTO; } msg->ibm_cksum = msg_cksum; - + if (flip) { /* leave magic unflipped as a clue to peer endianness */ msg->ibm_version = msg_version; @@ -299,7 +299,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) __swab64s(&msg->ibm_dststamp); __swab64s(&msg->ibm_seq); } - + if (msg->ibm_srcnid == LNET_NID_ANY) { CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); return -EPROTO; @@ -309,7 +309,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) default: CERROR("Unknown message type %x\n", msg->ibm_type); return -EPROTO; - + case IBNAL_MSG_NOOP: break; @@ -346,14 +346,14 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag); } - + n = msg->ibm_u.putack.ibpam_rd.rd_nfrag; if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", + CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", n, IBNAL_MAX_RDMA_FRAGS); return -EPROTO; } - + if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); @@ -382,7 +382,7 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); } -#else +#else if (flip) { __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag); @@ -390,17 +390,17 @@ kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) n = msg->ibm_u.get.ibgm_rd.rd_nfrag; if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", + CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", n, IBNAL_MAX_RDMA_FRAGS); return -EPROTO; } - + if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { CERROR("Short GET_REQ: %d(%d)\n", msg_nob, (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); return -EPROTO; } - + if (flip) for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) { __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); @@ -448,25 +448,25 @@ kibnal_start_listener (lnet_ni_t *ni) LASSERT (kibnal_data.kib_listen_handle == NULL); - kibnal_data.kib_listen_handle = + kibnal_data.kib_listen_handle = cm_create_cep(cm_cep_transp_rc); if (kibnal_data.kib_listen_handle == NULL) { CERROR ("Can't create listen CEP\n"); return -ENOMEM; } - CDEBUG(D_NET, "Created CEP %p for listening\n", + CDEBUG(D_NET, "Created CEP %p for listening\n", kibnal_data.kib_listen_handle); memset(&info, 0, sizeof(info)); - info.listen_addr.end_pt.sid = + info.listen_addr.end_pt.sid = (__u64)(*kibnal_tunables.kib_service_number); cmrc = cm_listen(kibnal_data.kib_listen_handle, &info, kibnal_listen_callback, NULL); if (cmrc == cm_stat_success) return 0; - + CERROR ("cm_listen error: %d\n", cmrc); cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle); @@ -482,13 +482,13 @@ kibnal_stop_listener(lnet_ni_t *ni) cm_return_t cmrc; LASSERT (kibnal_data.kib_listen_handle != NULL); - + cmrc = cm_cancel(kibnal_data.kib_listen_handle); if (cmrc != cm_stat_success) CERROR ("Error %d stopping listener\n", cmrc); cfs_pause(cfs_time_seconds(1)/10); /* ensure no more callbacks */ - + cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle); if (cmrc != vv_return_ok) CERROR ("Error %d destroying CEP\n", cmrc); @@ -536,18 +536,18 @@ kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid) /* npeers only grows with the global lock held */ atomic_inc(&kibnal_data.kib_npeers); } - + write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); if (rc != 0) { CERROR("Can't create peer: %s\n", - (rc == -ESHUTDOWN) ? "shutting down" : + (rc == -ESHUTDOWN) ? "shutting down" : "too many peers"); LIBCFS_FREE(peer, sizeof(*peer)); } else { *peerp = peer; } - + return rc; } @@ -561,7 +561,7 @@ kibnal_destroy_peer (kib_peer_t *peer) LASSERT (peer->ibp_accepting == 0); LASSERT (list_empty (&peer->ibp_conns)); LASSERT (list_empty (&peer->ibp_tx_queue)); - + LIBCFS_FREE (peer, sizeof (*peer)); /* NB a peer's connections keep a reference on their peer until @@ -660,7 +660,7 @@ kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip) CDEBUG(D_NET, "%s at %u.%u.%u.%u\n", libcfs_nid2str(nid), HIPQUAD(ip)); - + if (nid == LNET_NID_ANY) return (-EINVAL); @@ -686,7 +686,7 @@ kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip) peer->ibp_ip = ip; peer->ibp_persistence++; - + write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); return (0); } @@ -831,16 +831,16 @@ kibnal_debug_conn (kib_conn_t *conn) { struct list_head *tmp; int i; - + spin_lock(&conn->ibc_lock); - - CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n", - atomic_read(&conn->ibc_refcount), conn, + + CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n", + atomic_read(&conn->ibc_refcount), conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); CDEBUG(D_CONSOLE, " txseq "LPD64" rxseq "LPD64" state %d \n", conn->ibc_txseq, conn->ibc_rxseq, conn->ibc_state); CDEBUG(D_CONSOLE, " nposted %d cred %d o_cred %d r_cred %d\n", - conn->ibc_nsends_posted, conn->ibc_credits, + conn->ibc_nsends_posted, conn->ibc_credits, conn->ibc_outstanding_credits, conn->ibc_reserved_credits); CDEBUG(D_CONSOLE, " disc %d comms_err %d\n", conn->ibc_disconnect, conn->ibc_comms_error); @@ -848,7 +848,7 @@ kibnal_debug_conn (kib_conn_t *conn) CDEBUG(D_CONSOLE, " early_rxs:\n"); list_for_each(tmp, &conn->ibc_early_rxs) kibnal_debug_rx(list_entry(tmp, kib_rx_t, rx_list)); - + CDEBUG(D_CONSOLE, " tx_queue_nocred:\n"); list_for_each(tmp, &conn->ibc_tx_queue_nocred) kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); @@ -860,11 +860,11 @@ kibnal_debug_conn (kib_conn_t *conn) CDEBUG(D_CONSOLE, " tx_queue:\n"); list_for_each(tmp, &conn->ibc_tx_queue) kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - + CDEBUG(D_CONSOLE, " active_txs:\n"); list_for_each(tmp, &conn->ibc_active_txs) kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - + CDEBUG(D_CONSOLE, " rxs:\n"); for (i = 0; i < IBNAL_RX_MSGS; i++) kibnal_debug_rx(&conn->ibc_rxs[i]); @@ -876,20 +876,20 @@ int kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state) { static vv_qp_attr_t attr; - + kib_connvars_t *cv = conn->ibc_connvars; vv_return_t vvrc; - + /* Only called by connd => static OK */ LASSERT (!in_interrupt()); LASSERT (current == kibnal_data.kib_connd); memset(&attr, 0, sizeof(attr)); - + switch (new_state) { default: LBUG(); - + case vv_qp_state_init: { struct vv_qp_modify_init_st *init = &attr.modify.params.init; @@ -899,7 +899,7 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state) init->access_control = vv_acc_r_mem_read | vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */ - attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX | + attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX | VV_QP_AT_PHY_PORT_NUM | VV_QP_AT_ACCESS_CON_F; break; @@ -928,9 +928,9 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state) // XXX sdp sets VV_QP_AT_OP_F but no actual optional options - attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC | + attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC | VV_QP_AT_DEST_QP | - VV_QP_AT_R_PSN | + VV_QP_AT_R_PSN | VV_QP_AT_MIN_RNR_NAK_T | VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM | VV_QP_AT_OP_F; @@ -944,7 +944,7 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state) rts->retry_num = *kibnal_tunables.kib_retry_cnt; rts->rnr_num = *kibnal_tunables.kib_rnr_cnt; rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD; - + attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN | VV_QP_AT_L_ACK_T | VV_QP_AT_RETRY_NUM | @@ -957,18 +957,18 @@ kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state) attr.modify.vv_qp_attr_mask = 0; break; } - + attr.modify.qp_modify_into_state = new_state; attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE; - + vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL); if (vvrc != vv_return_ok) { - CERROR("Can't modify qp -> %s state to %d: %d\n", + CERROR("Can't modify qp -> %s state to %d: %d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), new_state, vvrc); return -EIO; } - + return 0; } @@ -988,7 +988,7 @@ kibnal_create_conn (cm_cep_handle_t cep) /* Only the connd creates conns => single threaded */ LASSERT(!in_interrupt()); LASSERT(current == kibnal_data.kib_connd); - + LIBCFS_ALLOC(conn, sizeof (*conn)); if (conn == NULL) { CERROR ("Can't allocate connection\n"); @@ -1006,7 +1006,7 @@ kibnal_create_conn (cm_cep_handle_t cep) INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd); INIT_LIST_HEAD (&conn->ibc_active_txs); spin_lock_init (&conn->ibc_lock); - + atomic_inc (&kibnal_data.kib_nconns); /* well not really, but I call destroy() on failure, which decrements */ @@ -1040,7 +1040,7 @@ kibnal_create_conn (cm_cep_handle_t cep) vv_r_key_t r_key; rx->rx_conn = conn; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + + rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset); vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca, @@ -1069,7 +1069,7 @@ kibnal_create_conn (cm_cep_handle_t cep) reqattr.create.qp_type = vv_qp_type_r_conn; reqattr.create.cq_send_h = kibnal_data.kib_cq; reqattr.create.cq_receive_h = kibnal_data.kib_cq; - reqattr.create.send_max_outstand_wr = (1 + IBNAL_MAX_RDMA_FRAGS) * + reqattr.create.send_max_outstand_wr = (1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends); reqattr.create.receive_max_outstand_wr = IBNAL_RX_MSGS; reqattr.create.max_scatgat_per_send_wr = 1; @@ -1089,13 +1089,13 @@ kibnal_create_conn (cm_cep_handle_t cep) conn->ibc_state = IBNAL_CONN_INIT_QP; conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num; - if (rspattr.create_return.receive_max_outstand_wr < + if (rspattr.create_return.receive_max_outstand_wr < IBNAL_RX_MSGS || - rspattr.create_return.send_max_outstand_wr < + rspattr.create_return.send_max_outstand_wr < (1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends)) { CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n", - IBNAL_RX_MSGS, - (1 + IBNAL_MAX_RDMA_FRAGS) * + IBNAL_RX_MSGS, + (1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends), rspattr.create_return.receive_max_outstand_wr, rspattr.create_return.send_max_outstand_wr); @@ -1108,7 +1108,7 @@ kibnal_create_conn (cm_cep_handle_t cep) /* 1 ref for caller */ atomic_set (&conn->ibc_refcount, 1); return (conn); - + failed: kibnal_destroy_conn (conn); return (NULL); @@ -1122,7 +1122,7 @@ kibnal_destroy_conn (kib_conn_t *conn) /* Only the connd does this (i.e. single threaded) */ LASSERT (!in_interrupt()); LASSERT (current == kibnal_data.kib_connd); - + CDEBUG (D_NET, "connection %p\n", conn); LASSERT (atomic_read (&conn->ibc_refcount) == 0); @@ -1154,16 +1154,16 @@ kibnal_destroy_conn (kib_conn_t *conn) if (vvrc != vv_return_ok) CERROR("Can't destroy QP: %d\n", vvrc); /* fall through */ - + case IBNAL_CONN_INIT_NOTHING: break; } - if (conn->ibc_rx_pages != NULL) + if (conn->ibc_rx_pages != NULL) kibnal_free_pages(conn->ibc_rx_pages); if (conn->ibc_rxs != NULL) - LIBCFS_FREE(conn->ibc_rxs, + LIBCFS_FREE(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof(kib_rx_t)); if (conn->ibc_connvars != NULL) @@ -1212,7 +1212,7 @@ kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n", libcfs_nid2str(peer->ibp_nid), conn->ibc_incarnation, incarnation); - + count++; kibnal_close_conn_locked (conn, -ESTALE); } @@ -1262,7 +1262,7 @@ kibnal_close_matching_conns (lnet_nid_t nid) /* wildcards always succeed */ if (nid == LNET_NID_ANY) return (0); - + return (count == 0 ? -ENOENT : 0); } @@ -1335,11 +1335,11 @@ kibnal_free_pages (kib_pages_t *p) { int npages = p->ibp_npages; int i; - + for (i = 0; i < npages; i++) if (p->ibp_pages[i] != NULL) __free_page(p->ibp_pages[i]); - + LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); } @@ -1357,7 +1357,7 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write) memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages])); p->ibp_npages = npages; - + for (i = 0; i < npages; i++) { p->ibp_pages[i] = alloc_page (GFP_KERNEL); if (p->ibp_pages[i] == NULL) { @@ -1372,15 +1372,15 @@ kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write) } int -kibnal_alloc_tx_descs (void) +kibnal_alloc_tx_descs (void) { int i; - + LIBCFS_ALLOC (kibnal_data.kib_tx_descs, IBNAL_TX_MSGS() * sizeof(kib_tx_t)); if (kibnal_data.kib_tx_descs == NULL) return -ENOMEM; - + memset(kibnal_data.kib_tx_descs, 0, IBNAL_TX_MSGS() * sizeof(kib_tx_t)); @@ -1393,20 +1393,20 @@ kibnal_alloc_tx_descs (void) if (tx->tx_pages == NULL) return -ENOMEM; #else - LIBCFS_ALLOC(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * + LIBCFS_ALLOC(tx->tx_wrq, + (1 + IBNAL_MAX_RDMA_FRAGS) * sizeof(*tx->tx_wrq)); if (tx->tx_wrq == NULL) return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * + + LIBCFS_ALLOC(tx->tx_gl, + (1 + IBNAL_MAX_RDMA_FRAGS) * sizeof(*tx->tx_gl)); if (tx->tx_gl == NULL) return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_rd, - offsetof(kib_rdma_desc_t, + + LIBCFS_ALLOC(tx->tx_rd, + offsetof(kib_rdma_desc_t, rd_frags[IBNAL_MAX_RDMA_FRAGS])); if (tx->tx_rd == NULL) return -ENOMEM; @@ -1417,7 +1417,7 @@ kibnal_alloc_tx_descs (void) } void -kibnal_free_tx_descs (void) +kibnal_free_tx_descs (void) { int i; @@ -1433,18 +1433,18 @@ kibnal_free_tx_descs (void) sizeof(*tx->tx_pages)); #else if (tx->tx_wrq != NULL) - LIBCFS_FREE(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * + LIBCFS_FREE(tx->tx_wrq, + (1 + IBNAL_MAX_RDMA_FRAGS) * sizeof(*tx->tx_wrq)); if (tx->tx_gl != NULL) - LIBCFS_FREE(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * + LIBCFS_FREE(tx->tx_gl, + (1 + IBNAL_MAX_RDMA_FRAGS) * sizeof(*tx->tx_gl)); if (tx->tx_rd != NULL) - LIBCFS_FREE(tx->tx_rd, - offsetof(kib_rdma_desc_t, + LIBCFS_FREE(tx->tx_rd, + offsetof(kib_rdma_desc_t, rd_frags[IBNAL_MAX_RDMA_FRAGS])); #endif } @@ -1455,7 +1455,7 @@ kibnal_free_tx_descs (void) #if IBNAL_USE_FMR void -kibnal_free_fmrs (int n) +kibnal_free_fmrs (int n) { int i; vv_return_t vvrc; @@ -1494,7 +1494,7 @@ kibnal_setup_tx_descs (void) /* No fancy arithmetic when we do the buffer calculations */ CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0); - rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, + rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, IBNAL_TX_MSG_PAGES(), 0); if (rc != 0) return (rc); @@ -1550,7 +1550,7 @@ kibnal_setup_tx_descs (void) LASSERT (ipage <= IBNAL_TX_MSG_PAGES()); } } - + return (0); } @@ -1562,7 +1562,7 @@ kibnal_shutdown (lnet_ni_t *ni) LASSERT (ni == kibnal_data.kib_ni); LASSERT (ni->ni_data == &kibnal_data); - + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&libcfs_kmemory)); @@ -1614,7 +1614,7 @@ kibnal_shutdown (lnet_ni_t *ni) kibnal_async_callback); if (vvrc != vv_return_ok) CERROR("vv_dell_async_event_cb error: %d\n", vvrc); - + /* fall through */ case IBNAL_INIT_HCA: @@ -1649,7 +1649,7 @@ kibnal_shutdown (lnet_ni_t *ni) cfs_pause(cfs_time_seconds(1)); } /* fall through */ - + case IBNAL_INIT_NOTHING: break; } @@ -1658,7 +1658,7 @@ kibnal_shutdown (lnet_ni_t *ni) if (kibnal_data.kib_peers != NULL) LIBCFS_FREE (kibnal_data.kib_peers, - sizeof (struct list_head) * + sizeof (struct list_head) * kibnal_data.kib_peer_hash_size); CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", @@ -1873,18 +1873,18 @@ kibnal_startup (lnet_ni_t *ni) /* Found a suitable port. Get its GUID and PKEY. */ tbl_count = 1; - vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca, + vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca, port_num, &tbl_count, &kibnal_data.kib_port_gid); if (vvrc != vv_return_ok) { CERROR("vv_get_port_gid_tbl failed " - "for %s port %d: %d\n", + "for %s port %d: %d\n", hca_name, port_num, vvrc); continue; } tbl_count = 1; - vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca, + vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca, port_num, &tbl_count, &kibnal_data.kib_port_pkey); if (vvrc != vv_return_ok) { @@ -1912,8 +1912,8 @@ kibnal_startup (lnet_ni_t *ni) } CDEBUG(D_NET, "Using %s port %d - GID="LPX64":"LPX64"\n", - hca_name, kibnal_data.kib_port, - kibnal_data.kib_port_gid.scope.g.subnet, + hca_name, kibnal_data.kib_port, + kibnal_data.kib_port_gid.scope.g.subnet, kibnal_data.kib_port_gid.scope.g.eui64); /*****************************************************/ @@ -1947,7 +1947,7 @@ kibnal_startup (lnet_ni_t *ni) __u32 nentries; vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(), - kibnal_cq_callback, + kibnal_cq_callback, NULL, /* context */ &kibnal_data.kib_cq, &nentries); if (vvrc != 0) { @@ -1959,13 +1959,13 @@ kibnal_startup (lnet_ni_t *ni) kibnal_data.kib_init = IBNAL_INIT_CQ; if (nentries < IBNAL_CQ_ENTRIES()) { - CERROR ("CQ only has %d entries, need %d\n", + CERROR ("CQ only has %d entries, need %d\n", nentries, IBNAL_CQ_ENTRIES()); goto failed; } - vvrc = vv_request_completion_notification(kibnal_data.kib_hca, - kibnal_data.kib_cq, + vvrc = vv_request_completion_notification(kibnal_data.kib_hca, + kibnal_data.kib_cq, vv_next_solicit_unsolicit_event); if (vvrc != 0) { CERROR ("Failed to re-arm completion queue: %d\n", rc); @@ -1987,7 +1987,7 @@ kibnal_startup (lnet_ni_t *ni) failed: CDEBUG(D_NET, "kibnal_startup failed\n"); - kibnal_shutdown (ni); + kibnal_shutdown (ni); return (-ENETDOWN); } @@ -2005,9 +2005,9 @@ kibnal_module_init (void) vibnal_assert_wire_constants(); - CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) + CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) <= cm_REQ_priv_data_len); - CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) + CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) <= cm_REP_priv_data_len); CLASSERT (sizeof(kib_msg_t) <= IBNAL_MSG_SIZE); #if !IBNAL_USE_FMR diff --git a/lnet/klnds/viblnd/viblnd_cb.c b/lnet/klnds/viblnd/viblnd_cb.c index 8f016bc..0528b0ed 100644 --- a/lnet/klnds/viblnd/viblnd_cb.c +++ b/lnet/klnds/viblnd/viblnd_cb.c @@ -177,7 +177,7 @@ kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit) LASSERT (conn->ibc_state >= IBNAL_CONN_INIT); LASSERT (rx->rx_nob >= 0); /* not posted */ - CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n", + CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n", rx->rx_wrq.scatgat_list->length, rx->rx_wrq.scatgat_list->l_key, KIBNAL_SG2ADDR(rx->rx_wrq.scatgat_list->v_address)); @@ -211,10 +211,10 @@ kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit) spin_unlock(&conn->ibc_lock); - CERROR ("post rx -> %s failed %d\n", + CERROR ("post rx -> %s failed %d\n", libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc); rc = -EIO; - kibnal_close_conn(rx->rx_conn, rc); + kibnal_close_conn(conn, rc); /* No more posts for this rx; so lose its ref */ kibnal_conn_decref(conn); return rc; @@ -1756,7 +1756,7 @@ kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, case IBNAL_MSG_PUT_REQ: if (mlen == 0) { lnet_finalize(ni, lntmsg, 0); - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0, + kibnal_send_completion(conn, IBNAL_MSG_PUT_NAK, 0, rxmsg->ibm_u.putreq.ibprm_cookie); break; } @@ -1786,7 +1786,7 @@ kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); kibnal_tx_done(tx); /* tell peer it's over */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc, + kibnal_send_completion(conn, IBNAL_MSG_PUT_NAK, rc, rxmsg->ibm_u.putreq.ibprm_cookie); break; } @@ -1818,8 +1818,7 @@ kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, kibnal_reply(ni, rx, lntmsg); } else { /* GET didn't match anything */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE, - -ENODATA, + kibnal_send_completion(conn, IBNAL_MSG_GET_DONE, -ENODATA, rxmsg->ibm_u.get.ibgm_cookie); } break; @@ -2494,7 +2493,7 @@ kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq) write_unlock_irqrestore(g_lock, flags); CWARN("Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); + libcfs_nid2str(rxmsg.ibm_srcnid)); kibnal_peer_decref(peer); reason = IBNAL_REJECT_CONN_RACE; @@ -2632,6 +2631,7 @@ kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq) if (conn != NULL) { LASSERT (rc != 0); kibnal_connreq_done(conn, 0, rc); + kibnal_conn_decref(conn); } else { cm_destroy_cep(cep); } @@ -3072,7 +3072,7 @@ kibnal_arp_done (kib_conn_t *conn) path->pkey, &cv->cv_pkey_index); if (vvrc != vv_return_ok) { CWARN("pkey2pkey_index failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_nid), + libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), vvrc); goto failed; } @@ -3102,7 +3102,7 @@ kibnal_arp_done (kib_conn_t *conn) &path->slid); if (vvrc != vv_return_ok) { CWARN("port_num2base_lid failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_ip), + libcfs_nid2str(peer->ibp_ip), HIPQUAD(peer->ibp_ip), vvrc); goto failed; } diff --git a/lnet/klnds/viblnd/viblnd_modparams.c b/lnet/klnds/viblnd/viblnd_modparams.c index 2903ba5..bb2e9f6 100644 --- a/lnet/klnds/viblnd/viblnd_modparams.c +++ b/lnet/klnds/viblnd/viblnd_modparams.c @@ -336,7 +336,7 @@ static cfs_sysctl_table_t kibnal_ctl_table[] = { .procname = "concurrent_sends", .data = &concurrent_sends, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = &proc_dointvec }, #if IBNAL_USE_FMR diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 6adaa83..1f61363 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -438,6 +438,7 @@ void libcfs_debug_dumplog_internal(void *arg) cfs_time_current_sec(), (long)arg); printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); + tracefile_dump_all_pages(debug_file_name); libcfs_run_debug_log_upcall(debug_file_name); } diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 838e814..8918fea 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -80,7 +80,7 @@ lnet_get_networks(void) "'ip2nets' but not both at once\n"); return NULL; } - + if (*ip2nets != 0) { rc = lnet_parse_ip2nets(&nets, ip2nets); return (rc == 0) ? nets : NULL; @@ -107,7 +107,7 @@ lnet_get_portals_compatibility(void) if (!strcmp(portals_compatibility, "strong")) { return 2; LCONSOLE_WARN("Starting in strong portals-compatible mode\n"); - } + } LCONSOLE_ERROR_MSG(0x102, "portals_compatibility=\"%s\" not supported\n", portals_compatibility); @@ -134,7 +134,7 @@ char * lnet_get_routes(void) { char *str = getenv("LNET_ROUTES"); - + return (str == NULL) ? "" : str; } @@ -175,21 +175,21 @@ lnet_get_networks (void) str = default_networks; *str = 0; sep = ""; - + list_for_each (tmp, &the_lnet.ln_lnds) { - lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list); - - nob = snprintf(str, len, "%s%s", sep, - libcfs_lnd2str(lnd->lnd_type)); - len -= nob; - if (len < 0) { - /* overflowed the string; leave it where it was */ - *str = 0; - break; - } - - str += nob; - sep = ","; + lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list); + + nob = snprintf(str, len, "%s%s", sep, + libcfs_lnd2str(lnd->lnd_type)); + len -= nob; + if (len < 0) { + /* overflowed the string; leave it where it was */ + *str = 0; + break; + } + + str += nob; + sep = ","; } return default_networks; @@ -332,7 +332,7 @@ void lnet_assert_wire_constants (void) } lnd_t * -lnet_find_lnd_by_type (int type) +lnet_find_lnd_by_type (int type) { lnd_t *lnd; struct list_head *tmp; @@ -344,7 +344,7 @@ lnet_find_lnd_by_type (int type) if (lnd->lnd_type == type) return lnd; } - + return NULL; } @@ -356,7 +356,7 @@ lnet_register_lnd (lnd_t *lnd) LASSERT (the_lnet.ln_init); LASSERT (libcfs_isknown_lnd(lnd->lnd_type)); LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL); - + list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds); lnd->lnd_refcount = 0; @@ -373,7 +373,7 @@ lnet_unregister_lnd (lnd_t *lnd) LASSERT (the_lnet.ln_init); LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd); LASSERT (lnd->lnd_refcount == 0); - + list_del (&lnd->lnd_list); CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type)); @@ -439,7 +439,7 @@ lnet_freelist_fini (lnet_freelist_t *fl) LASSERT (count == fl->fl_nobjs); LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); - memset (fl, 0, sizeof (fl)); + memset (fl, 0, sizeof (*fl)); } int @@ -507,10 +507,10 @@ lnet_create_interface_cookie (void) } int -lnet_setup_handle_hash (void) +lnet_setup_handle_hash (void) { int i; - + /* Arbitrary choice of hash table size */ #ifdef __KERNEL__ the_lnet.ln_lh_hash_size = CFS_PAGE_SIZE / sizeof (struct list_head); @@ -521,12 +521,12 @@ lnet_setup_handle_hash (void) the_lnet.ln_lh_hash_size * sizeof (struct list_head)); if (the_lnet.ln_lh_hash_table == NULL) return (-ENOMEM); - + for (i = 0; i < the_lnet.ln_lh_hash_size; i++) CFS_INIT_LIST_HEAD (&the_lnet.ln_lh_hash_table[i]); the_lnet.ln_next_object_cookie = LNET_COOKIE_TYPES; - + return (0); } @@ -535,13 +535,13 @@ lnet_cleanup_handle_hash (void) { if (the_lnet.ln_lh_hash_table == NULL) return; - + LIBCFS_FREE(the_lnet.ln_lh_hash_table, the_lnet.ln_lh_hash_size * sizeof (struct list_head)); } lnet_libhandle_t * -lnet_lookup_cookie (__u64 cookie, int type) +lnet_lookup_cookie (__u64 cookie, int type) { /* ALWAYS called with LNET_LOCK held */ struct list_head *list; @@ -550,23 +550,23 @@ lnet_lookup_cookie (__u64 cookie, int type) if ((cookie & (LNET_COOKIE_TYPES - 1)) != type) return (NULL); - + hash = ((unsigned int)cookie) % the_lnet.ln_lh_hash_size; list = &the_lnet.ln_lh_hash_table[hash]; - + list_for_each (el, list) { lnet_libhandle_t *lh = list_entry (el, lnet_libhandle_t, lh_hash_chain); - + if (lh->lh_cookie == cookie) return (lh); } - + return (NULL); } void -lnet_initialise_handle (lnet_libhandle_t *lh, int type) +lnet_initialise_handle (lnet_libhandle_t *lh, int type) { /* ALWAYS called with LNET_LOCK held */ unsigned int hash; @@ -574,7 +574,7 @@ lnet_initialise_handle (lnet_libhandle_t *lh, int type) LASSERT (type >= 0 && type < LNET_COOKIE_TYPES); lh->lh_cookie = the_lnet.ln_next_object_cookie | type; the_lnet.ln_next_object_cookie += LNET_COOKIE_TYPES; - + hash = ((unsigned int)lh->lh_cookie) % the_lnet.ln_lh_hash_size; list_add (&lh->lh_hash_chain, &the_lnet.ln_lh_hash_table[hash]); } @@ -595,7 +595,7 @@ lnet_init_finalizers(void) the_lnet.ln_nfinalizers = num_online_cpus(); LIBCFS_ALLOC(the_lnet.ln_finalizers, - the_lnet.ln_nfinalizers * + the_lnet.ln_nfinalizers * sizeof(*the_lnet.ln_finalizers)); if (the_lnet.ln_finalizers == NULL) { CERROR("Can't allocate ln_finalizers\n"); @@ -617,7 +617,7 @@ lnet_fini_finalizers(void) { #ifdef __KERNEL__ int i; - + for (i = 0; i < the_lnet.ln_nfinalizers; i++) LASSERT (the_lnet.ln_finalizers[i] == NULL); @@ -639,7 +639,7 @@ void lnet_server_mode() { the_lnet.ln_server_mode_flag = 1; } -#endif +#endif int lnet_prepare(lnet_pid_t requested_pid) @@ -658,7 +658,7 @@ lnet_prepare(lnet_pid_t requested_pid) #else if (the_lnet.ln_server_mode_flag) {/* server case (uOSS) */ LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0); - + if (cfs_curproc_uid())/* Only root can run user-space server */ return -EPERM; the_lnet.ln_pid = requested_pid; @@ -667,14 +667,14 @@ lnet_prepare(lnet_pid_t requested_pid) /* My PID must be unique on this node and flag I'm userspace */ the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG; - } + } #endif rc = lnet_descriptor_setup(); if (rc != 0) goto failed0; - memset(&the_lnet.ln_counters, 0, + memset(&the_lnet.ln_counters, 0, sizeof(the_lnet.ln_counters)); CFS_INIT_LIST_HEAD (&the_lnet.ln_active_msgs); @@ -703,8 +703,8 @@ lnet_prepare(lnet_pid_t requested_pid) goto failed2; the_lnet.ln_nportals = MAX_PORTALS; - LIBCFS_ALLOC(the_lnet.ln_portals, - the_lnet.ln_nportals * + LIBCFS_ALLOC(the_lnet.ln_portals, + the_lnet.ln_nportals * sizeof(*the_lnet.ln_portals)); if (the_lnet.ln_portals == NULL) { rc = -ENOMEM; @@ -718,7 +718,7 @@ lnet_prepare(lnet_pid_t requested_pid) } return 0; - + failed3: lnet_fini_finalizers(); failed2: @@ -734,7 +734,7 @@ int lnet_unprepare (void) { int idx; - + /* NB no LNET_LOCK since this is the last reference. All LND instances * have shut down already, so it is safe to unlink and free all * descriptors, even those that appear committed to a network op (eg MD @@ -747,7 +747,7 @@ lnet_unprepare (void) LASSERT (list_empty(&the_lnet.ln_nis)); LASSERT (list_empty(&the_lnet.ln_zombie_nis)); LASSERT (the_lnet.ln_nzombie_nis == 0); - + for (idx = 0; idx < the_lnet.ln_nportals; idx++) { LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq)); @@ -816,7 +816,7 @@ lnet_net2ni_locked (__u32 net) return ni; } } - + return NULL; } @@ -824,7 +824,7 @@ int lnet_islocalnet (__u32 net) { lnet_ni_t *ni; - + LNET_LOCK(); ni = lnet_net2ni_locked(net); if (ni != NULL) @@ -848,7 +848,7 @@ lnet_nid2ni_locked (lnet_nid_t nid) return ni; } } - + return NULL; } @@ -856,7 +856,7 @@ int lnet_islocalnid (lnet_nid_t nid) { lnet_ni_t *ni; - + LNET_LOCK(); ni = lnet_nid2ni_locked(nid); if (ni != NULL) @@ -890,7 +890,7 @@ lnet_count_acceptor_nis (lnet_ni_t **first_ni) count++; } } - + LNET_UNLOCK(); #endif /* defined(__KERNEL__) || defined(HAVE_LIBPTHREAD) */ @@ -1133,7 +1133,7 @@ lnet_startup_lndnis (void) } libcfs_setnet0alias(lnd->lnd_type); } - + nicount++; } @@ -1337,21 +1337,21 @@ LNetCtl(unsigned int cmd, void *arg) case IOC_LIBCFS_FAIL_NID: return lnet_fail_nid(data->ioc_nid, data->ioc_count); - + case IOC_LIBCFS_ADD_ROUTE: - rc = lnet_add_route(data->ioc_net, data->ioc_count, + rc = lnet_add_route(data->ioc_net, data->ioc_count, data->ioc_nid); return (rc != 0) ? rc : lnet_check_routes(); - + case IOC_LIBCFS_DEL_ROUTE: return lnet_del_route(data->ioc_net, data->ioc_nid); case IOC_LIBCFS_GET_ROUTE: - return lnet_get_route(data->ioc_count, - &data->ioc_net, &data->ioc_count, + return lnet_get_route(data->ioc_count, + &data->ioc_net, &data->ioc_count, &data->ioc_nid, &data->ioc_flags); case IOC_LIBCFS_NOTIFY_ROUTER: - return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, + return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, (time_t)data->ioc_u64[0]); case IOC_LIBCFS_PORTALS_COMPATIBILITY: @@ -1361,7 +1361,7 @@ LNetCtl(unsigned int cmd, void *arg) rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]); if (rc < 0 && rc != -EHOSTUNREACH) return rc; - + data->ioc_u32[0] = rc; return 0; @@ -1399,12 +1399,12 @@ LNetCtl(unsigned int cmd, void *arg) } else { (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg); } - + lnet_ni_decref(ni); } return 0; } - + default: ni = lnet_net2ni(data->ioc_net); if (ni == NULL) @@ -1436,7 +1436,7 @@ LNetGetId(unsigned int index, lnet_process_id_t *id) list_for_each(tmp, &the_lnet.ln_nis) { if (index-- != 0) continue; - + ni = list_entry(tmp, lnet_ni_t, ni_list); id->nid = ni->ni_nid; @@ -1467,7 +1467,7 @@ lnet_ping_target_init(void) int n; int infosz; int i; - + for (n = 0; ; n++) { rc = LNetGetId(n, &id); if (rc == -ENOENT) @@ -1493,7 +1493,7 @@ lnet_ping_target_init(void) LASSERT (rc == 0); the_lnet.ln_ping_info->pi_nid[i] = id.nid; } - + /* We can have a tiny EQ since we only need to see the unlink event on * teardown, which by definition is the last one! */ rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq); @@ -1734,7 +1734,7 @@ lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_i } if (nob < offsetof(lnet_ping_info_t, pi_nid[0])) { - CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), + CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), nob, (int)offsetof(lnet_ping_info_t, pi_nid[0])); goto out_1; } @@ -1743,7 +1743,7 @@ lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_i n_ids = info->pi_nnids; if (nob < offsetof(lnet_ping_info_t, pi_nid[n_ids])) { - CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), + CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), nob, (int)offsetof(lnet_ping_info_t, pi_nid[n_ids])); goto out_1; } diff --git a/lnet/lnet/lib-eq.c b/lnet/lnet/lib-eq.c index a1d2de6..4ca84f3 100644 --- a/lnet/lnet/lib-eq.c +++ b/lnet/lnet/lib-eq.c @@ -118,6 +118,8 @@ LNetEQFree(lnet_handle_eq_t eqh) } if (eq->eq_refcount != 0) { + CDEBUG(D_NET, "Event queue (%d) busy on destroy.\n", + eq->eq_refcount); LNET_UNLOCK(); return (-EBUSY); } @@ -311,7 +313,7 @@ LNetEQPoll (lnet_handle_eq_t *eventqs, int neq, int timeout_ms, gettimeofday(&then, NULL); ts.tv_sec = then.tv_sec + timeout_ms/1000; - ts.tv_nsec = then.tv_usec * 1000 + + ts.tv_nsec = then.tv_usec * 1000 + (timeout_ms%1000) * 1000000; if (ts.tv_nsec >= 1000000000) { ts.tv_sec++; diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c index a7e14ff..39fa978 100644 --- a/lnet/lnet/lib-md.c +++ b/lnet/lnet/lib-md.c @@ -222,11 +222,14 @@ LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); - + if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 && umd.length > LNET_MAX_IOV) /* too many fragments */ return -EINVAL; + if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) == 0) + return -EINVAL; + md = lnet_md_alloc(&umd); if (md == NULL) return -ENOMEM; @@ -268,11 +271,14 @@ LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); - + if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 && umd.length > LNET_MAX_IOV) /* too many fragments */ return -EINVAL; + if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT)) != 0) + return -EINVAL; + md = lnet_md_alloc(&umd); if (md == NULL) return -ENOMEM; diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c index 66b3d84..90131a1 100644 --- a/lnet/lnet/lib-me.c +++ b/lnet/lnet/lib-me.c @@ -44,16 +44,16 @@ int LNetMEAttach(unsigned int portal, - lnet_process_id_t match_id, + lnet_process_id_t match_id, __u64 match_bits, __u64 ignore_bits, - lnet_unlink_t unlink, lnet_ins_pos_t pos, + lnet_unlink_t unlink, lnet_ins_pos_t pos, lnet_handle_me_t *handle) { lnet_me_t *me; LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); - + if (portal >= the_lnet.ln_nportals) return -EINVAL; @@ -84,9 +84,9 @@ LNetMEAttach(unsigned int portal, return 0; } -int -LNetMEInsert(lnet_handle_me_t current_meh, - lnet_process_id_t match_id, +int +LNetMEInsert(lnet_handle_me_t current_meh, + lnet_process_id_t match_id, __u64 match_bits, __u64 ignore_bits, lnet_unlink_t unlink, lnet_ins_pos_t pos, lnet_handle_me_t *handle) @@ -94,9 +94,9 @@ LNetMEInsert(lnet_handle_me_t current_meh, lnet_me_t *current_me; lnet_me_t *new_me; - LASSERT (the_lnet.ln_init); + LASSERT (the_lnet.ln_init); LASSERT (the_lnet.ln_refcount > 0); - + new_me = lnet_me_alloc(); if (new_me == NULL) return -ENOMEM; @@ -121,9 +121,9 @@ LNetMEInsert(lnet_handle_me_t current_meh, lnet_initialise_handle (&new_me->me_lh, LNET_COOKIE_TYPE_ME); if (pos == LNET_INS_AFTER) - list_add_tail(&new_me->me_list, ¤t_me->me_list); - else list_add(&new_me->me_list, ¤t_me->me_list); + else + list_add_tail(&new_me->me_list, ¤t_me->me_list); lnet_me2handle(handle, new_me); diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index ccad196..26fccb4 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -48,7 +48,6 @@ CFS_MODULE_PARM(local_nid_dist_zero, "i", int, 0444, /* forward ref */ static void lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg); -static void lnet_drop_delayed_put(lnet_msg_t *msg, char *reason); #define LNET_MATCHMD_NONE 0 /* Didn't match */ #define LNET_MATCHMD_OK 1 /* Matched OK */ @@ -1711,17 +1710,17 @@ lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get) LNET_UNLOCK(); + msg->msg_ev.type = LNET_EVENT_GET; + msg->msg_ev.target.pid = hdr->dest_pid; + msg->msg_ev.target.nid = hdr->dest_nid; + msg->msg_ev.hdr_data = 0; + reply_wmd = hdr->msg.get.return_wmd; lnet_prep_send(msg, LNET_MSG_REPLY, src, offset, mlength); msg->msg_hdr.msg.reply.dst_wmd = reply_wmd; - msg->msg_ev.type = LNET_EVENT_GET; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.hdr_data = 0; - if (rdma_get) { /* The LND completes the REPLY from her recv procedure */ lnet_ni_recv(ni, msg->msg_private, msg, 0, @@ -1759,13 +1758,16 @@ lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg) /* NB handles only looked up by creator (no flips) */ md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd); - if (md == NULL || md->md_threshold == 0) { + if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) { CDEBUG(D_NETERROR, "%s: Dropping REPLY from %s for %s " "MD "LPX64"."LPX64"\n", libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), (md == NULL) ? "invalid" : "inactive", hdr->msg.reply.dst_wmd.wh_interface_cookie, hdr->msg.reply.dst_wmd.wh_object_cookie); + if (md != NULL && md->md_me != NULL) + CERROR("REPLY MD also attached to portal %d\n", + md->md_me->me_portal); LNET_UNLOCK(); return ENOENT; /* +ve: OK but no match */ @@ -1832,7 +1834,7 @@ lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg) /* NB handles only looked up by creator (no flips) */ md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd); - if (md == NULL || md->md_threshold == 0) { + if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) { /* Don't moan; this is expected */ CDEBUG(D_NET, "%s: Dropping ACK from %s to %s MD "LPX64"."LPX64"\n", @@ -1840,6 +1842,10 @@ lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg) (md == NULL) ? "invalid" : "inactive", hdr->msg.ack.dst_wmd.wh_interface_cookie, hdr->msg.ack.dst_wmd.wh_object_cookie); + if (md != NULL && md->md_me != NULL) + CERROR("Source MD also attached to portal %d\n", + md->md_me->me_portal); + LNET_UNLOCK(); return ENOENT; /* +ve! */ } @@ -2206,12 +2212,17 @@ LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, LNET_LOCK(); md = lnet_handle2md(&mdh); - if (md == NULL || md->md_threshold == 0) { + if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) { lnet_msg_free(msg); - LNET_UNLOCK(); - CERROR("Dropping PUT to %s: MD invalid\n", - libcfs_id2str(target)); + CERROR("Dropping PUT ("LPU64":%d:%s): MD (%d) invalid\n", + match_bits, portal, libcfs_id2str(target), + md == NULL ? -1 : md->md_threshold); + if (md != NULL && md->md_me != NULL) + CERROR("Source MD also attached to portal %d\n", + md->md_me->me_portal); + + LNET_UNLOCK(); return -ENOENT; } @@ -2383,12 +2394,17 @@ LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, LNET_LOCK(); md = lnet_handle2md(&mdh); - if (md == NULL || md->md_threshold == 0) { + if (md == NULL || md->md_threshold == 0 || md->md_me != NULL) { lnet_msg_free(msg); - LNET_UNLOCK(); - CERROR("Dropping GET to %s: MD invalid\n", - libcfs_id2str(target)); + CERROR("Dropping GET ("LPU64":%d:%s): MD (%d) invalid\n", + match_bits, portal, libcfs_id2str(target), + md == NULL ? -1 : md->md_threshold); + if (md != NULL && md->md_me != NULL) + CERROR("REPLY MD also attached to portal %d\n", + md->md_me->me_portal); + + LNET_UNLOCK(); return -ENOENT; } diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c index c02fc2f..58733f9 100644 --- a/lnet/lnet/router_proc.c +++ b/lnet/lnet/router_proc.c @@ -28,7 +28,6 @@ #if defined(__KERNEL__) && defined(LNET_ROUTER) #include -#include /* this is really lnet_proc.c */ diff --git a/lnet/selftest/rpc.c b/lnet/selftest/rpc.c index dbf828a..8066bde 100644 --- a/lnet/selftest/rpc.c +++ b/lnet/selftest/rpc.c @@ -233,8 +233,8 @@ srpc_find_peer_locked (lnet_nid_t nid) static srpc_peer_t * srpc_nid2peer (lnet_nid_t nid) { - srpc_peer_t *peer; - srpc_peer_t *new_peer; + srpc_peer_t *peer; + srpc_peer_t *new_peer; spin_lock(&srpc_data.rpc_glock); peer = srpc_find_peer_locked(nid); @@ -242,7 +242,7 @@ srpc_nid2peer (lnet_nid_t nid) if (peer != NULL) return peer; - + new_peer = srpc_create_peer(nid); spin_lock(&srpc_data.rpc_glock); @@ -260,7 +260,7 @@ srpc_nid2peer (lnet_nid_t nid) spin_unlock(&srpc_data.rpc_glock); return NULL; } - + list_add_tail(&new_peer->stp_list, srpc_nid2peerlist(nid)); spin_unlock(&srpc_data.rpc_glock); return new_peer; @@ -410,7 +410,7 @@ srpc_post_passive_rdma(int portal, __u64 matchbits, void *buf, } int -srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, +srpc_post_active_rdma(int portal, __u64 matchbits, void *buf, int len, int options, lnet_process_id_t peer, lnet_nid_t self, lnet_handle_md_t *mdh, srpc_event_t *ev) { @@ -473,7 +473,7 @@ srpc_post_active_rqtbuf(lnet_process_id_t peer, int service, void *buf, else portal = SRPC_FRAMEWORK_REQUEST_PORTAL; - rc = srpc_post_active_rdma(portal, service, buf, len, + rc = srpc_post_active_rdma(portal, service, buf, len, LNET_MD_OP_PUT, peer, LNET_NID_ANY, mdh, ev); return rc; @@ -541,7 +541,7 @@ srpc_service_post_buffer (srpc_service_t *sv, srpc_buffer_t *buf) spin_unlock(&sv->sv_lock); LIBCFS_FREE(buf, sizeof(*buf)); spin_lock(&sv->sv_lock); - return rc; + return rc; } int @@ -924,8 +924,11 @@ srpc_handle_rpc (swi_workitem_t *wi) msg = &rpc->srpc_reqstbuf->buf_msg; reply = &rpc->srpc_replymsg.msg_body.reply; - if (msg->msg_version != SRPC_MSG_VERSION && - msg->msg_version != __swab32(SRPC_MSG_VERSION)) { + if (msg->msg_magic == 0) { + /* moaned already in srpc_lnet_ev_handler */ + rc = EBADMSG; + } else if (msg->msg_version != SRPC_MSG_VERSION && + msg->msg_version != __swab32(SRPC_MSG_VERSION)) { CWARN ("Version mismatch: %u, %u expected, from %s\n", msg->msg_version, SRPC_MSG_VERSION, libcfs_id2str(rpc->srpc_peer)); @@ -953,7 +956,8 @@ srpc_handle_rpc (swi_workitem_t *wi) } } case SWI_STATE_BULK_STARTED: - LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired); + /* we cannot LASSERT ev_fired right here because it + * may be set only upon an event with unlinked==1 */ if (rpc->srpc_bulk != NULL) { rc = ev->ev_status; @@ -962,11 +966,20 @@ srpc_handle_rpc (swi_workitem_t *wi) rc = (*sv->sv_bulk_ready) (rpc, rc); if (rc != 0) { - srpc_server_rpc_done(rpc, rc); - return 1; + if (ev->ev_fired) { + srpc_server_rpc_done(rpc, rc); + return 1; + } + + rpc->srpc_status = rc; + wi->wi_state = SWI_STATE_BULK_ERRORED; + LNetMDUnlink(rpc->srpc_bulk->bk_mdh); + return 0; /* wait for UNLINK event */ } } + LASSERT (rpc->srpc_bulk == NULL || ev->ev_fired); + wi->wi_state = SWI_STATE_REPLY_SUBMITTED; rc = srpc_send_reply(rpc); if (rc == 0) @@ -980,6 +993,13 @@ srpc_handle_rpc (swi_workitem_t *wi) wi->wi_state = SWI_STATE_DONE; srpc_server_rpc_done(rpc, ev->ev_status); return 1; + + case SWI_STATE_BULK_ERRORED: + LASSERT (rpc->srpc_bulk != NULL && ev->ev_fired); + LASSERT (rpc->srpc_status != 0); + + srpc_server_rpc_done(rpc, rpc->srpc_status); + return 1; } return 0; @@ -1017,20 +1037,20 @@ srpc_add_client_rpc_timer (srpc_client_rpc_t *rpc) CFS_INIT_LIST_HEAD(&timer->stt_list); timer->stt_data = rpc; timer->stt_func = srpc_client_rpc_expired; - timer->stt_expires = cfs_time_add(rpc->crpc_timeout, + timer->stt_expires = cfs_time_add(rpc->crpc_timeout, cfs_time_current_sec()); stt_add_timer(timer); return; } -/* +/* * Called with rpc->crpc_lock held. * * Upon exit the RPC expiry timer is not queued and the handler is not * running on any CPU. */ void srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc) -{ +{ /* timer not planted or already exploded */ if (rpc->crpc_timeout == 0) return; @@ -1042,7 +1062,7 @@ srpc_del_client_rpc_timer (srpc_client_rpc_t *rpc) while (rpc->crpc_timeout != 0) { spin_unlock(&rpc->crpc_lock); - cfs_schedule(); + cfs_schedule(); spin_lock(&rpc->crpc_lock); } @@ -1110,7 +1130,7 @@ srpc_client_rpc_done (srpc_client_rpc_t *rpc, int status) * No one can schedule me now since: * - RPC timer has been defused. * - all LNet events have been fired. - * - crpc_closed has been set, preventing srpc_abort_rpc from + * - crpc_closed has been set, preventing srpc_abort_rpc from * scheduling me. * Cancel pending schedules and prevent future schedule attempts: */ @@ -1168,7 +1188,7 @@ srpc_send_rpc (swi_workitem_t *wi) case SWI_STATE_REQUEST_SUBMITTED: /* CAVEAT EMPTOR: rqtev, rpyev, and bulkev may come in any - * order; however, they're processed in a strict order: + * order; however, they're processed in a strict order: * rqt, rpy, and bulk. */ if (!rpc->crpc_reqstev.ev_fired) break; @@ -1185,7 +1205,7 @@ srpc_send_rpc (swi_workitem_t *wi) rc = rpc->crpc_replyev.ev_status; if (rc != 0) break; - if ((reply->msg_type != type && + if ((reply->msg_type != type && reply->msg_type != __swab32(type)) || (reply->msg_magic != SRPC_MSG_MAGIC && reply->msg_magic != __swab32(SRPC_MSG_MAGIC))) { @@ -1254,7 +1274,7 @@ srpc_create_client_rpc (lnet_process_id_t peer, int service, { srpc_client_rpc_t *rpc; - LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t, + LIBCFS_ALLOC(rpc, offsetof(srpc_client_rpc_t, crpc_bulk.bk_iovs[nbulkiov])); if (rpc == NULL) return NULL; @@ -1403,7 +1423,7 @@ srpc_send_reply (srpc_server_rpc_t *rpc) } /* when in kernel always called with LNET_LOCK() held, and in thread context */ -void +void srpc_lnet_ev_handler (lnet_event_t *ev) { srpc_event_t *rpcev = ev->md.user_ptr; @@ -1413,6 +1433,7 @@ srpc_lnet_ev_handler (lnet_event_t *ev) srpc_service_t *sv; srpc_msg_t *msg; srpc_msg_type_t type; + int fired_flag = 1; LASSERT (!in_interrupt()); @@ -1445,7 +1466,7 @@ srpc_lnet_ev_handler (lnet_event_t *ev) LASSERT (rpcev->ev_fired == 0); rpcev->ev_fired = 1; - rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? + rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? -EINTR : ev->status; swi_schedule_workitem(&crpc->crpc_wi); @@ -1473,7 +1494,7 @@ srpc_lnet_ev_handler (lnet_event_t *ev) LASSERT (sv->sv_nposted_msg >= 0); if (sv->sv_shuttingdown) { - /* Leave buffer on sv->sv_posted_msgq since + /* Leave buffer on sv->sv_posted_msgq since * srpc_finish_service needs to traverse it. */ spin_unlock(&sv->sv_lock); break; @@ -1484,7 +1505,7 @@ srpc_lnet_ev_handler (lnet_event_t *ev) type = srpc_service2request(sv->sv_id); if (ev->status != 0 || ev->mlength != sizeof(*msg) || - (msg->msg_type != type && + (msg->msg_type != type && msg->msg_type != __swab32(type)) || (msg->msg_magic != SRPC_MSG_MAGIC && msg->msg_magic != __swab32(SRPC_MSG_MAGIC))) { @@ -1494,20 +1515,10 @@ srpc_lnet_ev_handler (lnet_event_t *ev) ev->status, ev->mlength, msg->msg_type, msg->msg_magic); - /* NB might drop sv_lock in srpc_service_recycle_buffer, - * sv_nposted_msg++ as an implicit reference to prevent - * sv from disappearing under me */ - sv->sv_nposted_msg++; - srpc_service_recycle_buffer(sv, buffer); - sv->sv_nposted_msg--; - spin_unlock(&sv->sv_lock); - - if (ev->status == 0) { /* status!=0 counted already */ - spin_lock(&srpc_data.rpc_glock); - srpc_data.rpc_counters.errors++; - spin_unlock(&srpc_data.rpc_glock); - } - break; + /* NB can't call srpc_service_recycle_buffer here since + * it may call LNetM[DE]Attach. The invalid magic tells + * srpc_handle_rpc to drop this RPC */ + msg->msg_magic = 0; } if (!list_empty(&sv->sv_free_rpcq)) { @@ -1534,10 +1545,13 @@ srpc_lnet_ev_handler (lnet_event_t *ev) ev->type == LNET_EVENT_REPLY || ev->type == LNET_EVENT_UNLINK); - if (ev->type == LNET_EVENT_SEND && - ev->status == 0 && !ev->unlinked) - break; /* wait for the final LNET_EVENT_REPLY */ - + if (ev->type == LNET_EVENT_SEND && !ev->unlinked) { + if (ev->status == 0) + break; /* wait for the final LNET_EVENT_REPLY */ + else + fired_flag = 0; /* LNET_EVENT_REPLY may arrive + (optimized GET case) */ + } case SRPC_BULK_PUT_SENT: if (ev->status == 0 && ev->type != LNET_EVENT_UNLINK) { spin_lock(&srpc_data.rpc_glock); @@ -1556,9 +1570,12 @@ srpc_lnet_ev_handler (lnet_event_t *ev) LASSERT (rpcev == &srpc->srpc_ev); spin_lock(&sv->sv_lock); - rpcev->ev_fired = 1; - rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? + if (fired_flag) + rpcev->ev_fired = 1; + + rpcev->ev_status = (ev->type == LNET_EVENT_UNLINK) ? -EINTR : ev->status; + srpc_schedule_server_rpc(srpc); spin_unlock(&sv->sv_lock); break; diff --git a/lnet/selftest/selftest.h b/lnet/selftest/selftest.h index 8dff8d8..cdfa933 100644 --- a/lnet/selftest/selftest.h +++ b/lnet/selftest/selftest.h @@ -90,6 +90,7 @@ typedef struct { volatile int counter; } atomic_t; #define SWI_STATE_REQUEST_SENT 4 #define SWI_STATE_REPLY_RECEIVED 5 #define SWI_STATE_BULK_STARTED 6 +#define SWI_STATE_BULK_ERRORED 7 #define SWI_STATE_DONE 10 /* forward refs */ @@ -105,11 +106,11 @@ struct sfw_test_instance; * serialized with respect to itself. * - no CPU affinity, a workitem does not necessarily run on the same CPU * that schedules it. However, this might change in the future. - * - if a workitem is scheduled again before it has a chance to run, it + * - if a workitem is scheduled again before it has a chance to run, it * runs only once. - * - if a workitem is scheduled while it runs, it runs again after it - * completes; this ensures that events occurring while other events are - * being processed receive due attention. This behavior also allows a + * - if a workitem is scheduled while it runs, it runs again after it + * completes; this ensures that events occurring while other events are + * being processed receive due attention. This behavior also allows a * workitem to reschedule itself. * * Usage notes: @@ -389,7 +390,7 @@ typedef struct { typedef struct { int (*tso_init)(struct sfw_test_instance *tsi); /* intialize test client */ void (*tso_fini)(struct sfw_test_instance *tsi); /* finalize test client */ - int (*tso_prep_rpc)(struct sfw_test_unit *tsu, + int (*tso_prep_rpc)(struct sfw_test_unit *tsu, lnet_process_id_t dest, srpc_client_rpc_t **rpc); /* prep a tests rpc */ void (*tso_done_rpc)(struct sfw_test_unit *tsu, @@ -422,7 +423,7 @@ typedef struct sfw_test_instance { } tsi_u; } sfw_test_instance_t; -/* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at +/* XXX: trailing (CFS_PAGE_SIZE % sizeof(lnet_process_id_t)) bytes at * the end of pages are not used */ #define SFW_MAX_CONCUR LST_MAX_CONCUR #define SFW_ID_PER_PAGE (CFS_PAGE_SIZE / sizeof(lnet_process_id_t)) @@ -459,7 +460,7 @@ void sfw_add_bulk_page(srpc_bulk_t *bk, cfs_page_t *pg, int i); int sfw_alloc_pages(srpc_server_rpc_t *rpc, int npages, int sink); srpc_client_rpc_t * -srpc_create_client_rpc(lnet_process_id_t peer, int service, +srpc_create_client_rpc(lnet_process_id_t peer, int service, int nbulkiov, int bulklen, void (*rpc_done)(srpc_client_rpc_t *), void (*rpc_fini)(srpc_client_rpc_t *), void *priv); @@ -547,12 +548,12 @@ srpc_init_client_rpc (srpc_client_rpc_t *rpc, lnet_process_id_t peer, return; } -static inline const char * +static inline const char * swi_state2str (int state) { #define STATE2STR(x) case x: return #x switch(state) { - default: + default: LBUG(); STATE2STR(SWI_STATE_NEWBORN); STATE2STR(SWI_STATE_REPLY_SUBMITTED); @@ -561,6 +562,7 @@ swi_state2str (int state) STATE2STR(SWI_STATE_REQUEST_SENT); STATE2STR(SWI_STATE_REPLY_RECEIVED); STATE2STR(SWI_STATE_BULK_STARTED); + STATE2STR(SWI_STATE_BULK_ERRORED); STATE2STR(SWI_STATE_DONE); } #undef STATE2STR diff --git a/lnet/ulnds/ptllnd/ptllnd.c b/lnet/ulnds/ptllnd/ptllnd.c index 7000a5e..3b504e7 100644 --- a/lnet/ulnds/ptllnd/ptllnd.c +++ b/lnet/ulnds/ptllnd/ptllnd.c @@ -37,7 +37,6 @@ * * Author: Eric Barton */ - */ #include "ptllnd.h" @@ -45,13 +44,13 @@ lnd_t the_ptllnd = { .lnd_type = PTLLND, .lnd_startup = ptllnd_startup, .lnd_shutdown = ptllnd_shutdown, - .lnd_ctl = ptllnd_ctl, + .lnd_ctl = ptllnd_ctl, .lnd_send = ptllnd_send, .lnd_recv = ptllnd_recv, .lnd_eager_recv = ptllnd_eager_recv, .lnd_notify = ptllnd_notify, .lnd_wait = ptllnd_wait, - .lnd_setasync = ptllnd_setasync, + .lnd_setasync = ptllnd_setasync, }; static int ptllnd_ni_count = 0; @@ -62,112 +61,112 @@ static struct list_head ptllnd_history_list; void ptllnd_history_fini(void) { - ptllnd_he_t *he; - - while (!list_empty(&ptllnd_idle_history)) { - he = list_entry(ptllnd_idle_history.next, - ptllnd_he_t, he_list); - - list_del(&he->he_list); - LIBCFS_FREE(he, sizeof(*he)); - } - - while (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); - - list_del(&he->he_list); - LIBCFS_FREE(he, sizeof(*he)); - } + ptllnd_he_t *he; + + while (!list_empty(&ptllnd_idle_history)) { + he = list_entry(ptllnd_idle_history.next, + ptllnd_he_t, he_list); + + list_del(&he->he_list); + LIBCFS_FREE(he, sizeof(*he)); + } + + while (!list_empty(&ptllnd_history_list)) { + he = list_entry(ptllnd_history_list.next, + ptllnd_he_t, he_list); + + list_del(&he->he_list); + LIBCFS_FREE(he, sizeof(*he)); + } } int ptllnd_history_init(void) { - int i; - ptllnd_he_t *he; - int n; - int rc; - - CFS_INIT_LIST_HEAD(&ptllnd_idle_history); - CFS_INIT_LIST_HEAD(&ptllnd_history_list); - - rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0); - if (rc != 0) - return rc; - - for (i = 0; i < n; i++) { - LIBCFS_ALLOC(he, sizeof(*he)); - if (he == NULL) { - ptllnd_history_fini(); - return -ENOMEM; - } - - list_add(&he->he_list, &ptllnd_idle_history); - } - - PTLLND_HISTORY("Init"); - - return 0; + int i; + ptllnd_he_t *he; + int n; + int rc; + + CFS_INIT_LIST_HEAD(&ptllnd_idle_history); + CFS_INIT_LIST_HEAD(&ptllnd_history_list); + + rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0); + if (rc != 0) + return rc; + + for (i = 0; i < n; i++) { + LIBCFS_ALLOC(he, sizeof(*he)); + if (he == NULL) { + ptllnd_history_fini(); + return -ENOMEM; + } + + list_add(&he->he_list, &ptllnd_idle_history); + } + + PTLLND_HISTORY("Init"); + + return 0; } void ptllnd_history(const char *fn, const char *file, const int line, - const char *fmt, ...) + const char *fmt, ...) { - static int seq; - + static int seq; + va_list ap; - ptllnd_he_t *he; - - if (!list_empty(&ptllnd_idle_history)) { - he = list_entry(ptllnd_idle_history.next, - ptllnd_he_t, he_list); - } else if (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); - } else { - return; - } - - list_del(&he->he_list); - list_add_tail(&he->he_list, &ptllnd_history_list); - - he->he_seq = seq++; - he->he_fn = fn; - he->he_file = file; - he->he_line = line; - gettimeofday(&he->he_time, NULL); - - va_start(ap, fmt); - vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap); - va_end(ap); + ptllnd_he_t *he; + + if (!list_empty(&ptllnd_idle_history)) { + he = list_entry(ptllnd_idle_history.next, + ptllnd_he_t, he_list); + } else if (!list_empty(&ptllnd_history_list)) { + he = list_entry(ptllnd_history_list.next, + ptllnd_he_t, he_list); + } else { + return; + } + + list_del(&he->he_list); + list_add_tail(&he->he_list, &ptllnd_history_list); + + he->he_seq = seq++; + he->he_fn = fn; + he->he_file = file; + he->he_line = line; + gettimeofday(&he->he_time, NULL); + + va_start(ap, fmt); + vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap); + va_end(ap); } void ptllnd_dump_history(void) { - ptllnd_he_t *he; + ptllnd_he_t *he; + + PTLLND_HISTORY("dumping..."); - PTLLND_HISTORY("dumping..."); - - while (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); + while (!list_empty(&ptllnd_history_list)) { + he = list_entry(ptllnd_history_list.next, + ptllnd_he_t, he_list); - list_del(&he->he_list); - - CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq, - (int)he->he_time.tv_sec, (int)he->he_time.tv_usec, - he->he_file, he->he_line, he->he_fn, he->he_msg); + list_del(&he->he_list); - list_add_tail(&he->he_list, &ptllnd_idle_history); - } + CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq, + (int)he->he_time.tv_sec, (int)he->he_time.tv_usec, + he->he_file, he->he_line, he->he_fn, he->he_msg); - PTLLND_HISTORY("complete"); + list_add_tail(&he->he_list, &ptllnd_idle_history); + } + + PTLLND_HISTORY("complete"); } -void +void ptllnd_assert_wire_constants (void) { /* Wire protocol assertions generated by 'wirecheck' @@ -273,10 +272,10 @@ ptllnd_get_tunables(lnet_ni_t *ni) int rc; int temp; - /* Other tunable defaults depend on this */ - rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0); - if (rc != 0) - return rc; + /* Other tunable defaults depend on this */ + rc = ptllnd_parse_int_tunable(&plni->plni_debug, "PTLLND_DEBUG", 0); + if (rc != 0) + return rc; rc = ptllnd_parse_int_tunable(&plni->plni_portal, "PTLLND_PORTAL", PTLLND_PORTAL); @@ -293,6 +292,11 @@ ptllnd_get_tunables(lnet_ni_t *ni) "PTLLND_PEERCREDITS", PTLLND_PEERCREDITS); if (rc != 0) return rc; + /* kptl_msg_t::ptlm_credits is only a __u8 */ + if (plni->plni_peer_credits > 255) { + CERROR("PTLLND_PEERCREDITS must be <= 255\n"); + return -EINVAL; + } rc = ptllnd_parse_int_tunable(&max_msg_size, "PTLLND_MAX_MSG_SIZE", @@ -321,56 +325,56 @@ ptllnd_get_tunables(lnet_ni_t *ni) if (rc != 0) return rc; - rc = ptllnd_parse_int_tunable(&plni->plni_checksum, - "PTLLND_CHECKSUM", 0); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history, - "PTLLND_TX_HISTORY", - plni->plni_debug ? 1024 : 0); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch, - "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak, - "PTLLND_ABORT_ON_NAK", 0); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak, - "PTLLND_DUMP_ON_NAK", plni->plni_debug); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval, - "PTLLND_WATCHDOG_INTERVAL", 1); - if (rc != 0) - return rc; - if (plni->plni_watchdog_interval <= 0) - plni->plni_watchdog_interval = 1; - - rc = ptllnd_parse_int_tunable(&plni->plni_timeout, - "PTLLND_TIMEOUT", 50); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_long_wait, - "PTLLND_LONG_WAIT", - plni->plni_debug ? 5 : plni->plni_timeout); - if (rc != 0) - return rc; - plni->plni_long_wait *= 1000; /* convert to mS */ + rc = ptllnd_parse_int_tunable(&plni->plni_checksum, + "PTLLND_CHECKSUM", 0); + if (rc != 0) + return rc; + + rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history, + "PTLLND_TX_HISTORY", + plni->plni_debug ? 1024 : 0); + if (rc != 0) + return rc; + + rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_protocol_mismatch, + "PTLLND_ABORT_ON_PROTOCOL_MISMATCH", 1); + if (rc != 0) + return rc; + + rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak, + "PTLLND_ABORT_ON_NAK", 0); + if (rc != 0) + return rc; + + rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak, + "PTLLND_DUMP_ON_NAK", plni->plni_debug); + if (rc != 0) + return rc; + + rc = ptllnd_parse_int_tunable(&plni->plni_watchdog_interval, + "PTLLND_WATCHDOG_INTERVAL", 1); + if (rc != 0) + return rc; + if (plni->plni_watchdog_interval <= 0) + plni->plni_watchdog_interval = 1; + + rc = ptllnd_parse_int_tunable(&plni->plni_timeout, + "PTLLND_TIMEOUT", 50); + if (rc != 0) + return rc; + + rc = ptllnd_parse_int_tunable(&plni->plni_long_wait, + "PTLLND_LONG_WAIT", + plni->plni_debug ? 5 : plni->plni_timeout); + if (rc != 0) + return rc; + plni->plni_long_wait *= 1000; /* convert to mS */ plni->plni_max_msg_size = max_msg_size & ~7; if (plni->plni_max_msg_size < PTLLND_MIN_BUFFER_SIZE) plni->plni_max_msg_size = PTLLND_MIN_BUFFER_SIZE; - CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0); - CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE); + CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0); + CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE); plni->plni_buffer_size = plni->plni_max_msg_size * msgs_per_buffer; @@ -442,9 +446,9 @@ ptllnd_size_buffers (lnet_ni_t *ni, int delta) CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers); CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers); - plni->plni_nmsgs += delta; - LASSERT(plni->plni_nmsgs >= 0); - + plni->plni_nmsgs += delta; + LASSERT(plni->plni_nmsgs >= 0); + nmsgs = plni->plni_nmsgs + plni->plni_msgs_spare; nbufs = (nmsgs * plni->plni_max_msg_size + plni->plni_buffer_size - 1) / @@ -491,22 +495,22 @@ ptllnd_destroy_buffers (lnet_ni_t *ni) LASSERT (plni->plni_nbuffers > 0); if (buf->plb_posted) { - time_t start = cfs_time_current_sec(); - int w = plni->plni_long_wait; + time_t start = cfs_time_current_sec(); + int w = plni->plni_long_wait; LASSERT (plni->plni_nposted_buffers > 0); #ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS (void) PtlMDUnlink(buf->plb_md); - while (buf->plb_posted) { - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds to unlink buffer\n", - (int)(cfs_time_current_sec() - start)); - w *= 2; - } - ptllnd_wait(ni, w); - } + while (buf->plb_posted) { + if (w > 0 && cfs_time_current_sec() > start + w/1000) { + CWARN("Waited %ds to unlink buffer\n", + (int)(cfs_time_current_sec() - start)); + w *= 2; + } + ptllnd_wait(ni, w); + } #else while (buf->plb_posted) { rc = PtlMDUnlink(buf->plb_md); @@ -516,12 +520,12 @@ ptllnd_destroy_buffers (lnet_ni_t *ni) break; } LASSERT (rc == PTL_MD_IN_USE); - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds to unlink buffer\n", - cfs_time_current_sec() - start); - w *= 2; - } - ptllnd_wait(ni, w); + if (w > 0 && cfs_time_current_sec() > start + w/1000) { + CWARN("Waited %ds to unlink buffer\n", + cfs_time_current_sec() - start); + w *= 2; + } + ptllnd_wait(ni, w); } #endif } @@ -591,14 +595,14 @@ ptllnd_close_peers (lnet_ni_t *ni) int ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) { - switch (cmd) { - case IOC_LIBCFS_DEBUG_PEER: - ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg)); - return 0; - - default: - return -EINVAL; - } + switch (cmd) { + case IOC_LIBCFS_DEBUG_PEER: + ptllnd_dump_debug(ni, *((lnet_process_id_t *)arg)); + return 0; + + default: + return -EINVAL; + } } __u64 @@ -616,25 +620,25 @@ ptllnd_shutdown (lnet_ni_t *ni) { ptllnd_ni_t *plni = ni->ni_data; int rc; - time_t start = cfs_time_current_sec(); - int w = plni->plni_long_wait; + time_t start = cfs_time_current_sec(); + int w = plni->plni_long_wait; LASSERT (ptllnd_ni_count == 1); - plni->plni_max_tx_history = 0; + plni->plni_max_tx_history = 0; - ptllnd_cull_tx_history(plni); + ptllnd_cull_tx_history(plni); ptllnd_close_peers(ni); ptllnd_destroy_buffers(ni); while (plni->plni_npeers > 0) { - if (w > 0 && cfs_time_current_sec() > start + w/1000) { - CWARN("Waited %ds for peers to shutdown\n", - (int)(cfs_time_current_sec() - start)); - w *= 2; - } + if (w > 0 && cfs_time_current_sec() > start + w/1000) { + CWARN("Waited %ds for peers to shutdown\n", + (int)(cfs_time_current_sec() - start)); + w *= 2; + } ptllnd_wait(ni, w); - } + } LASSERT (plni->plni_ntxs == 0); LASSERT (plni->plni_nrxs == 0); @@ -656,9 +660,9 @@ ptllnd_startup (lnet_ni_t *ni) ptllnd_ni_t *plni; int rc; - /* could get limits from portals I guess... */ - ni->ni_maxtxcredits = - ni->ni_peertxcredits = 1000; + /* could get limits from portals I guess... */ + ni->ni_maxtxcredits = + ni->ni_peertxcredits = 1000; if (ptllnd_ni_count != 0) { CERROR("Can't have > 1 instance of ptllnd\n"); @@ -667,12 +671,12 @@ ptllnd_startup (lnet_ni_t *ni) ptllnd_ni_count++; - rc = ptllnd_history_init(); - if (rc != 0) { - CERROR("Can't init history\n"); - goto failed0; - } - + rc = ptllnd_history_init(); + if (rc != 0) { + CERROR("Can't init history\n"); + goto failed0; + } + LIBCFS_ALLOC(plni, sizeof(*plni)); if (plni == NULL) { CERROR("Can't allocate ptllnd state\n"); @@ -685,9 +689,9 @@ ptllnd_startup (lnet_ni_t *ni) plni->plni_stamp = ptllnd_get_timestamp(); plni->plni_nrxs = 0; plni->plni_ntxs = 0; - plni->plni_ntx_history = 0; - plni->plni_watchdog_peeridx = 0; - plni->plni_watchdog_nextt = cfs_time_current_sec(); + plni->plni_ntx_history = 0; + plni->plni_watchdog_peeridx = 0; + plni->plni_watchdog_nextt = cfs_time_current_sec(); CFS_INIT_LIST_HEAD(&plni->plni_zombie_txs); CFS_INIT_LIST_HEAD(&plni->plni_tx_history); @@ -714,7 +718,7 @@ ptllnd_startup (lnet_ni_t *ni) NULL, NULL, &plni->plni_nih); if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CERROR("PtlNIInit failed: %s(%d)\n", - ptllnd_errtype2str(rc), rc); + ptllnd_errtype2str(rc), rc); rc = -ENODEV; goto failed2; } @@ -723,7 +727,7 @@ ptllnd_startup (lnet_ni_t *ni) PTL_EQ_HANDLER_NONE, &plni->plni_eqh); if (rc != PTL_OK) { CERROR("PtlEQAlloc failed: %s(%d)\n", - ptllnd_errtype2str(rc), rc); + ptllnd_errtype2str(rc), rc); rc = -ENODEV; goto failed3; } @@ -731,10 +735,10 @@ ptllnd_startup (lnet_ni_t *ni) /* * Fetch the Portals NID */ - rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id); + rc = PtlGetId(plni->plni_nih, &plni->plni_portals_id); if (rc != PTL_OK) { CERROR ("PtlGetID failed : %s(%d)\n", - ptllnd_errtype2str(rc), rc); + ptllnd_errtype2str(rc), rc); rc = -EINVAL; goto failed4; } @@ -754,7 +758,7 @@ ptllnd_startup (lnet_ni_t *ni) if (rc != 0) goto failed4; - return 0; + return 0; failed4: ptllnd_destroy_buffers(ni); @@ -766,7 +770,7 @@ ptllnd_startup (lnet_ni_t *ni) failed1: LIBCFS_FREE(plni, sizeof(*plni)); failed0: - ptllnd_history_fini(); + ptllnd_history_fini(); ptllnd_ni_count--; CDEBUG(D_NET, "<<< rc=%d\n",rc); return rc; diff --git a/lnet/ulnds/ptllnd/ptllnd.h b/lnet/ulnds/ptllnd/ptllnd.h index 87697e7..b8198b2 100644 --- a/lnet/ulnds/ptllnd/ptllnd.h +++ b/lnet/ulnds/ptllnd/ptllnd.h @@ -140,11 +140,13 @@ typedef struct int plp_max_msg_size; int plp_refcount; + int plp_sent_hello:1; int plp_recvd_hello:1; int plp_closing:1; __u64 plp_match; __u64 plp_stamp; struct list_head plp_txq; + struct list_head plp_noopq; struct list_head plp_activeq; } ptllnd_peer_t; @@ -271,13 +273,13 @@ ptllnd_peer_decref (ptllnd_peer_t *peer) static inline lnet_nid_t ptllnd_ptl2lnetnid(lnet_ni_t *ni, ptl_nid_t portals_nid) { - return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid); + return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid); } static inline ptl_nid_t ptllnd_lnet2ptlnid(lnet_nid_t lnet_nid) { - return LNET_NIDADDR(lnet_nid); + return LNET_NIDADDR(lnet_nid); } /* diff --git a/lnet/ulnds/ptllnd/ptllnd_cb.c b/lnet/ulnds/ptllnd/ptllnd_cb.c index da6d277..c8431df 100644 --- a/lnet/ulnds/ptllnd/ptllnd_cb.c +++ b/lnet/ulnds/ptllnd/ptllnd_cb.c @@ -55,6 +55,8 @@ ptllnd_post_tx(ptllnd_tx_t *tx) { ptllnd_peer_t *peer = tx->tx_peer; + LASSERT (tx->tx_type != PTLLND_MSG_TYPE_NOOP); + ptllnd_set_tx_deadline(tx); list_add_tail(&tx->tx_list, &peer->plp_txq); ptllnd_check_sends(peer); @@ -67,7 +69,7 @@ ptllnd_ptlid2str(ptl_process_id_t id) static int idx = 0; char *str = strs[idx++]; - + if (idx >= sizeof(strs)/sizeof(strs[0])) idx = 0; @@ -88,6 +90,7 @@ ptllnd_destroy_peer(ptllnd_peer_t *peer) LASSERT (peer->plp_closing); LASSERT (plni->plni_npeers > 0); LASSERT (list_empty(&peer->plp_txq)); + LASSERT (list_empty(&peer->plp_noopq)); LASSERT (list_empty(&peer->plp_activeq)); plni->plni_npeers--; LIBCFS_FREE(peer, sizeof(*peer)); @@ -117,14 +120,16 @@ ptllnd_close_peer(ptllnd_peer_t *peer, int error) peer->plp_closing = 1; if (!list_empty(&peer->plp_txq) || + !list_empty(&peer->plp_noopq) || !list_empty(&peer->plp_activeq) || error != 0) { CWARN("Closing %s\n", libcfs_id2str(peer->plp_id)); if (plni->plni_debug) ptllnd_dump_debug(ni, peer->plp_id); } - + ptllnd_abort_txs(plni, &peer->plp_txq); + ptllnd_abort_txs(plni, &peer->plp_noopq); ptllnd_abort_txs(plni, &peer->plp_activeq); list_del(&peer->plp_list); @@ -136,16 +141,13 @@ ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create) { ptllnd_ni_t *plni = ni->ni_data; unsigned int hash = LNET_NIDADDR(id.nid) % plni->plni_peer_hash_size; - struct list_head *tmp; ptllnd_peer_t *plp; ptllnd_tx_t *tx; int rc; LASSERT (LNET_NIDNET(id.nid) == LNET_NIDNET(ni->ni_nid)); - list_for_each(tmp, &plni->plni_peer_hash[hash]) { - plp = list_entry(tmp, ptllnd_peer_t, plp_list); - + list_for_each_entry (plp, &plni->plni_peer_hash[hash], plp_list) { if (plp->plp_id.nid == id.nid && plp->plp_id.pid == id.pid) { ptllnd_peer_addref(plp); @@ -184,11 +186,13 @@ ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create) plp->plp_extra_lazy_credits = 0; plp->plp_match = 0; plp->plp_stamp = 0; + plp->plp_sent_hello = 0; plp->plp_recvd_hello = 0; plp->plp_closing = 0; plp->plp_refcount = 1; CFS_INIT_LIST_HEAD(&plp->plp_list); CFS_INIT_LIST_HEAD(&plp->plp_txq); + CFS_INIT_LIST_HEAD(&plp->plp_noopq); CFS_INIT_LIST_HEAD(&plp->plp_activeq); ptllnd_peer_addref(plp); @@ -221,27 +225,27 @@ ptllnd_count_q(struct list_head *q) { struct list_head *e; int n = 0; - + list_for_each(e, q) { n++; } - + return n; } const char * -ptllnd_tx_typestr(int type) +ptllnd_tx_typestr(int type) { switch (type) { case PTLLND_RDMA_WRITE: return "rdma_write"; - + case PTLLND_RDMA_READ: return "rdma_read"; case PTLLND_MSG_TYPE_PUT: return "put_req"; - + case PTLLND_MSG_TYPE_GET: return "get_req"; @@ -260,13 +264,13 @@ ptllnd_tx_typestr(int type) } void -ptllnd_debug_tx(ptllnd_tx_t *tx) +ptllnd_debug_tx(ptllnd_tx_t *tx) { CDEBUG(D_WARNING, "%s %s b %ld.%06ld/%ld.%06ld" " r %ld.%06ld/%ld.%06ld status %d\n", ptllnd_tx_typestr(tx->tx_type), libcfs_id2str(tx->tx_peer->plp_id), - tx->tx_bulk_posted.tv_sec, tx->tx_bulk_posted.tv_usec, + tx->tx_bulk_posted.tv_sec, tx->tx_bulk_posted.tv_usec, tx->tx_bulk_done.tv_sec, tx->tx_bulk_done.tv_usec, tx->tx_req_posted.tv_sec, tx->tx_req_posted.tv_usec, tx->tx_req_done.tv_sec, tx->tx_req_done.tv_usec, @@ -277,59 +281,56 @@ void ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id) { ptllnd_peer_t *plp = ptllnd_find_peer(ni, id, 0); - struct list_head *tmp; ptllnd_ni_t *plni = ni->ni_data; ptllnd_tx_t *tx; - + if (plp == NULL) { CDEBUG(D_WARNING, "No peer %s\n", libcfs_id2str(id)); return; } - - CDEBUG(D_WARNING, "%s %s%s [%d] "LPU64".%06d m "LPU64" q %d/%d c %d/%d+%d(%d)\n", - libcfs_id2str(id), - plp->plp_recvd_hello ? "H" : "_", - plp->plp_closing ? "C" : "_", - plp->plp_refcount, - plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000), - plp->plp_match, - ptllnd_count_q(&plp->plp_txq), - ptllnd_count_q(&plp->plp_activeq), - plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits); + + CWARN("%s %s%s [%d] "LPU64".%06d m "LPU64" q %d/%d/%d c %d/%d+%d(%d)\n", + libcfs_id2str(id), + plp->plp_recvd_hello ? "H" : "_", + plp->plp_closing ? "C" : "_", + plp->plp_refcount, + plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000), + plp->plp_match, + ptllnd_count_q(&plp->plp_txq), + ptllnd_count_q(&plp->plp_noopq), + ptllnd_count_q(&plp->plp_activeq), + plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits, + plni->plni_peer_credits + plp->plp_lazy_credits); CDEBUG(D_WARNING, "txq:\n"); - list_for_each (tmp, &plp->plp_txq) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - + list_for_each_entry (tx, &plp->plp_txq, tx_list) { + ptllnd_debug_tx(tx); + } + + CDEBUG(D_WARNING, "noopq:\n"); + list_for_each_entry (tx, &plp->plp_noopq, tx_list) { ptllnd_debug_tx(tx); } CDEBUG(D_WARNING, "activeq:\n"); - list_for_each (tmp, &plp->plp_activeq) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - + list_for_each_entry (tx, &plp->plp_activeq, tx_list) { ptllnd_debug_tx(tx); } CDEBUG(D_WARNING, "zombies:\n"); - list_for_each (tmp, &plni->plni_zombie_txs) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - + list_for_each_entry (tx, &plni->plni_zombie_txs, tx_list) { if (tx->tx_peer->plp_id.nid == id.nid && tx->tx_peer->plp_id.pid == id.pid) ptllnd_debug_tx(tx); } - + CDEBUG(D_WARNING, "history:\n"); - list_for_each (tmp, &plni->plni_tx_history) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - + list_for_each_entry (tx, &plni->plni_tx_history, tx_list) { if (tx->tx_peer->plp_id.nid == id.nid && tx->tx_peer->plp_id.pid == id.pid) ptllnd_debug_tx(tx); } - + ptllnd_peer_decref(plp); } @@ -354,7 +355,7 @@ ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive) id.nid = nid; id.pid = LUSTRE_SRV_LNET_PID; - + peer = ptllnd_find_peer(ni, id, 1); if (peer == NULL) return; @@ -367,10 +368,10 @@ ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive) libcfs_id2str(id)); w *= 2; } - + ptllnd_wait(ni, w); } - + ptllnd_peer_decref(peer); } @@ -379,7 +380,7 @@ ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int nasync) { ptllnd_peer_t *peer = ptllnd_find_peer(ni, id, nasync > 0); int rc; - + if (peer == NULL) return -ENOMEM; @@ -404,7 +405,7 @@ ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int nasync) nasync -= peer->plp_extra_lazy_credits; peer->plp_extra_lazy_credits = 0; - + rc = ptllnd_size_buffers(ni, nasync); if (rc == 0) { peer->plp_lazy_credits += nasync; @@ -597,7 +598,7 @@ ptllnd_tx_done(ptllnd_tx_t *tx) } ptllnd_close_peer(peer, tx->tx_status); } - + ptllnd_abort_tx(tx, &tx->tx_reqmdh); ptllnd_abort_tx(tx, &tx->tx_bulkmdh); @@ -619,7 +620,7 @@ ptllnd_tx_done(ptllnd_tx_t *tx) plni->plni_ntx_history++; list_add_tail(&tx->tx_list, &plni->plni_tx_history); - + ptllnd_cull_tx_history(plni); } @@ -663,7 +664,7 @@ ptllnd_set_txiov(ptllnd_tx_t *tx, piov[npiov].iov_base = iov[npiov].iov_base + temp_offset; piov[npiov].iov_len = iov[npiov].iov_len - temp_offset; - + if (piov[npiov].iov_len >= resid) { piov[npiov].iov_len = resid; npiov++; @@ -759,11 +760,25 @@ ptllnd_post_buffer(ptllnd_buffer_t *buf) return -ENOMEM; } +static inline int +ptllnd_peer_send_noop (ptllnd_peer_t *peer) +{ + ptllnd_ni_t *plni = peer->plp_ni->ni_data; + + if (!peer->plp_sent_hello || + peer->plp_credits == 0 || + !list_empty(&peer->plp_noopq) || + peer->plp_outstanding_credits < PTLLND_CREDIT_HIGHWATER(plni)) + return 0; + + /* No tx to piggyback NOOP onto or no credit to send a tx */ + return (list_empty(&peer->plp_txq) || peer->plp_credits == 1); +} + void ptllnd_check_sends(ptllnd_peer_t *peer) { - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; + ptllnd_ni_t *plni = peer->plp_ni->ni_data; ptllnd_tx_t *tx; ptl_md_t md; ptl_handle_md_t mdh; @@ -774,10 +789,7 @@ ptllnd_check_sends(ptllnd_peer_t *peer) peer->plp_outstanding_credits, peer->plp_sent_credits, plni->plni_peer_credits + peer->plp_lazy_credits); - if (list_empty(&peer->plp_txq) && - peer->plp_outstanding_credits >= PTLLND_CREDIT_HIGHWATER(plni) && - peer->plp_credits != 0) { - + if (ptllnd_peer_send_noop(peer)) { tx = ptllnd_new_tx(peer, PTLLND_MSG_TYPE_NOOP, 0); CDEBUG(D_NET, "NOOP tx=%p\n",tx); if (tx == NULL) { @@ -785,12 +797,22 @@ ptllnd_check_sends(ptllnd_peer_t *peer) libcfs_id2str(peer->plp_id)); } else { ptllnd_set_tx_deadline(tx); - list_add_tail(&tx->tx_list, &peer->plp_txq); + list_add_tail(&tx->tx_list, &peer->plp_noopq); } } - while (!list_empty(&peer->plp_txq)) { - tx = list_entry(peer->plp_txq.next, ptllnd_tx_t, tx_list); + for (;;) { + if (!list_empty(&peer->plp_noopq)) { + LASSERT (peer->plp_sent_hello); + tx = list_entry(peer->plp_noopq.next, + ptllnd_tx_t, tx_list); + } else if (!list_empty(&peer->plp_txq)) { + tx = list_entry(peer->plp_txq.next, + ptllnd_tx_t, tx_list); + } else { + /* nothing to send right now */ + break; + } LASSERT (tx->tx_msgsize > 0); @@ -800,6 +822,14 @@ ptllnd_check_sends(ptllnd_peer_t *peer) <= plni->plni_peer_credits + peer->plp_lazy_credits); LASSERT (peer->plp_credits >= 0); + /* say HELLO first */ + if (!peer->plp_sent_hello) { + LASSERT (list_empty(&peer->plp_noopq)); + LASSERT (tx->tx_type == PTLLND_MSG_TYPE_HELLO); + + peer->plp_sent_hello = 1; + } + if (peer->plp_credits == 0) { /* no credits */ PTLLND_HISTORY("%s[%d/%d+%d(%d)]: no creds for %p", libcfs_id2str(peer->plp_id), @@ -810,9 +840,11 @@ ptllnd_check_sends(ptllnd_peer_t *peer) peer->plp_lazy_credits, tx); break; } - - if (peer->plp_credits == 1 && /* last credit reserved for */ - peer->plp_outstanding_credits == 0) { /* returning credits */ + + /* Last/Initial credit reserved for NOOP/HELLO */ + if (peer->plp_credits == 1 && + tx->tx_type != PTLLND_MSG_TYPE_NOOP && + tx->tx_type != PTLLND_MSG_TYPE_HELLO) { PTLLND_HISTORY("%s[%d/%d+%d(%d)]: too few creds for %p", libcfs_id2str(peer->plp_id), peer->plp_credits, @@ -822,7 +854,7 @@ ptllnd_check_sends(ptllnd_peer_t *peer) peer->plp_lazy_credits, tx); break; } - + list_del(&tx->tx_list); list_add_tail(&tx->tx_list, &peer->plp_activeq); @@ -830,9 +862,7 @@ ptllnd_check_sends(ptllnd_peer_t *peer) ptllnd_msgtype2str(tx->tx_type),tx->tx_type); if (tx->tx_type == PTLLND_MSG_TYPE_NOOP && - (!list_empty(&peer->plp_txq) || - peer->plp_outstanding_credits < - PTLLND_CREDIT_HIGHWATER(plni))) { + !ptllnd_peer_send_noop(peer)) { /* redundant NOOP */ ptllnd_tx_done(tx); continue; @@ -878,7 +908,7 @@ ptllnd_check_sends(ptllnd_peer_t *peer) LASSERT (tx->tx_type != PTLLND_RDMA_WRITE && tx->tx_type != PTLLND_RDMA_READ); - + tx->tx_reqmdh = mdh; gettimeofday(&tx->tx_req_posted, NULL); @@ -1130,7 +1160,7 @@ ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg) LASSERT (msg->msg_niov <= PTL_MD_MAX_IOV); /* !!! */ - CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n", + CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n", lnet_msgtyp2str(msg->msg_type), msg->msg_niov, msg->msg_offset, msg->msg_len, libcfs_nid2str(msg->msg_target.nid), @@ -1141,7 +1171,7 @@ ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg) libcfs_id2str(msg->msg_target)); return -EHOSTUNREACH; } - + plp = ptllnd_find_peer(ni, msg->msg_target, 1); if (plp == NULL) return -ENOMEM; @@ -1223,8 +1253,7 @@ void ptllnd_rx_done(ptllnd_rx_t *rx) { ptllnd_peer_t *plp = rx->rx_peer; - lnet_ni_t *ni = plp->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; + ptllnd_ni_t *plni = plp->plp_ni->ni_data; plp->plp_outstanding_credits++; @@ -1234,7 +1263,7 @@ ptllnd_rx_done(ptllnd_rx_t *rx) plp->plp_sent_credits, plni->plni_peer_credits + plp->plp_lazy_credits, rx); - ptllnd_check_sends(rx->rx_peer); + ptllnd_check_sends(plp); LASSERT (plni->plni_nrxs > 0); plni->plni_nrxs--; @@ -1337,7 +1366,7 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version; if (msg_version != PTLLND_MSG_VERSION) { - CERROR("Bad protocol version %04x from %s: %04x expected\n", + CERROR("Bad protocol version %04x from %s: %04x expected\n", (__u32)msg_version, ptllnd_ptlid2str(initiator), PTLLND_MSG_VERSION); if (plni->plni_abort_on_protocol_mismatch) @@ -1366,7 +1395,7 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, msg->ptlm_version = msg_version; msg->ptlm_cksum = msg_cksum; - + if (flip) { /* NB stamps are opaque cookies */ __swab32s(&msg->ptlm_nob); @@ -1375,7 +1404,7 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, __swab32s(&msg->ptlm_srcpid); __swab32s(&msg->ptlm_dstpid); } - + srcid.nid = msg->ptlm_srcnid; srcid.pid = msg->ptlm_srcpid; @@ -1387,19 +1416,19 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, } if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) { - CERROR("NAK from %s (%s)\n", + CERROR("NAK from %s (%s)\n", libcfs_id2str(srcid), ptllnd_ptlid2str(initiator)); if (plni->plni_dump_on_nak) ptllnd_dump_debug(ni, srcid); - + if (plni->plni_abort_on_nak) abort(); - + return; } - + if (msg->ptlm_dstnid != ni->ni_nid || msg->ptlm_dstpid != the_lnet.ln_pid) { CERROR("Bad dstid %s (%s expected) from %s\n", @@ -1459,7 +1488,7 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size); } break; - + case PTLLND_MSG_TYPE_NOOP: break; @@ -1509,19 +1538,16 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, if (plp->plp_sent_credits == 0) { CERROR("%s[%d/%d+%d(%d)]: unexpected message\n", libcfs_id2str(plp->plp_id), - plp->plp_credits, plp->plp_outstanding_credits, + plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits, plni->plni_peer_credits + plp->plp_lazy_credits); return; } plp->plp_sent_credits--; - + /* No check for credit overflow - the peer may post new buffers after * the startup handshake. */ - if (msg->ptlm_credits > 0) { - plp->plp_credits += msg->ptlm_credits; - ptllnd_check_sends(plp); - } + plp->plp_credits += msg->ptlm_credits; /* All OK so far; assume the message is good... */ @@ -1551,6 +1577,9 @@ ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, break; } + if (msg->ptlm_credits > 0) + ptllnd_check_sends(plp); + ptllnd_peer_decref(plp); } @@ -1580,7 +1609,7 @@ ptllnd_buf_event (lnet_ni_t *ni, ptl_event_t *event) /* Portals can't force message alignment - someone sending an * odd-length message could misalign subsequent messages */ if ((event->mlength & 7) != 0) { - CERROR("Message from %s has odd length %llu: " + CERROR("Message from %s has odd length %u: " "probable version incompatibility\n", ptllnd_ptlid2str(event->initiator), event->mlength); @@ -1655,7 +1684,7 @@ ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event) LASSERT (!isreq != !isbulk); /* always one and only 1 match */ PTLLND_HISTORY("%s[%d/%d+%d(%d)]: TX done %p %s%s", - libcfs_id2str(tx->tx_peer->plp_id), + libcfs_id2str(tx->tx_peer->plp_id), tx->tx_peer->plp_credits, tx->tx_peer->plp_outstanding_credits, tx->tx_peer->plp_sent_credits, @@ -1728,18 +1757,19 @@ ptllnd_tx_t * ptllnd_find_timed_out_tx(ptllnd_peer_t *peer) { time_t now = cfs_time_current_sec(); - struct list_head *tmp; + ptllnd_tx_t *tx; + + list_for_each_entry (tx, &peer->plp_txq, tx_list) { + if (tx->tx_deadline < now) + return tx; + } - list_for_each(tmp, &peer->plp_txq) { - ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list); - + list_for_each_entry (tx, &peer->plp_noopq, tx_list) { if (tx->tx_deadline < now) return tx; } - - list_for_each(tmp, &peer->plp_activeq) { - ptllnd_tx_t *tx = list_entry(tmp, ptllnd_tx_t, tx_list); - + + list_for_each_entry (tx, &peer->plp_activeq, tx_list) { if (tx->tx_deadline < now) return tx; } @@ -1751,10 +1781,10 @@ void ptllnd_check_peer(ptllnd_peer_t *peer) { ptllnd_tx_t *tx = ptllnd_find_timed_out_tx(peer); - + if (tx == NULL) return; - + CERROR("%s: timed out\n", libcfs_id2str(peer->plp_id)); ptllnd_close_peer(peer, -ETIMEDOUT); } @@ -1788,11 +1818,11 @@ ptllnd_watchdog (lnet_ni_t *ni, time_t now) for (i = 0; i < chunk; i++) { hashlist = &plni->plni_peer_hash[plni->plni_watchdog_peeridx]; - + list_for_each_safe(tmp, nxt, hashlist) { ptllnd_check_peer(list_entry(tmp, ptllnd_peer_t, plp_list)); } - + plni->plni_watchdog_peeridx = (plni->plni_watchdog_peeridx + 1) % plni->plni_peer_hash_size; } @@ -1811,7 +1841,7 @@ ptllnd_wait (lnet_ni_t *ni, int milliseconds) struct timeval then; struct timeval now; struct timeval deadline; - + ptllnd_ni_t *plni = ni->ni_data; ptllnd_tx_t *tx; ptl_event_t event; @@ -1841,7 +1871,7 @@ ptllnd_wait (lnet_ni_t *ni, int milliseconds) for (;;) { gettimeofday(&then, NULL); - + rc = PtlEQPoll(&plni->plni_eqh, 1, timeout, &event, &which); gettimeofday(&now, NULL); @@ -1862,7 +1892,7 @@ ptllnd_wait (lnet_ni_t *ni, int milliseconds) ptllnd_watchdog(ni, now.tv_sec); LASSERT (now.tv_sec < plni->plni_watchdog_nextt); } - + if (now.tv_sec > deadline.tv_sec || /* timeout expired */ (now.tv_sec == deadline.tv_sec && now.tv_usec >= deadline.tv_usec)) @@ -1878,7 +1908,7 @@ ptllnd_wait (lnet_ni_t *ni, int milliseconds) continue; } - + LASSERT (rc == PTL_OK || rc == PTL_EQ_DROPPED); if (rc == PTL_EQ_DROPPED) diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index 88272b0..6442e09 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -40,7 +40,7 @@ #define __USE_FILE_OFFSET64 #ifndef _GNU_SOURCE -#define _GNU_SOURCE +#define _GNU_SOURCE #endif #include @@ -89,7 +89,7 @@ static const char *libcfs_debug_subsystems[] = "pinger", "filter", "", "echo", "ldlm", "lov", "lquota", "", "", "", "", "lmv", - "", "sec", "gss", "", + "", "sec", "gss", "", "mgc", "mgs", "fid", "fld", NULL}; static const char *libcfs_debug_masks[] = {"trace", "inode", "super", "ext2", @@ -280,10 +280,11 @@ static int applymask(char* procpath, int value) if (rc != 0) { fprintf(stderr, "Write to %s failed: %s\n", procpath, strerror(errno)); - return rc; } + dbg_close_ctlhandle(fd); - return 0; + + return rc; } static void applymask_all(unsigned int subs_mask, unsigned int debug_mask) @@ -389,7 +390,7 @@ static int add_rec(struct dbg_line *line, struct dbg_line ***linevp, int *lenp, *linevp = linev; *lenp = nlen; } - linev[used] = line; + linev[used] = line; return 1; } @@ -456,10 +457,10 @@ static int parse_buffer(FILE *in, FILE *out) line->text = p; if (!add_rec(line, &linev, &linev_len, kept)) { - fprintf(stderr, "malloc failed; printing accumulated " + fprintf(stderr, "malloc failed; printing accumulated " "records and exiting.\n"); break; - } + } kept++; } @@ -499,7 +500,7 @@ int jt_dbg_debug_kernel(int argc, char **argv) strcpy(filename, argv[1]); else sprintf(filename, "/tmp/lustre-log."CFS_TIME_T".%u", - time(NULL),getpid()); + time(NULL),getpid()); if (stat(filename, &st) == 0 && S_ISREG(st.st_mode)) unlink(filename); @@ -515,7 +516,7 @@ int jt_dbg_debug_kernel(int argc, char **argv) if (rc != 0) { fprintf(stderr, "write(%s) failed: %s\n", filename, strerror(errno)); - close(fd); + dbg_close_ctlhandle(fd); return 1; } dbg_close_ctlhandle(fd); -- 1.8.3.1