From f569e36c2c16090108dfdeaf380c3dd237c546ec Mon Sep 17 00:00:00 2001 From: phil Date: Mon, 25 Aug 2003 19:26:27 +0000 Subject: [PATCH] b=1820 This patch from eeb tracks outstanding tx descriptors and provides an ioctl to fetch them; should be useful for tracking down cases where bug 1031 is not resolved by loading the modules (see also bug 1827) --- lnet/include/linux/kp30.h | 1 + lnet/include/lnet/lnetctl.h | 1 + lnet/include/lnet/ptlctl.h | 1 + lnet/klnds/qswlnd/qswlnd.c | 49 ++++++++++++++++-- lnet/klnds/qswlnd/qswlnd.h | 90 +++++++++++++++++---------------- lnet/klnds/qswlnd/qswlnd_cb.c | 64 +++++++++++------------ lnet/utils/portals.c | 40 +++++++++++++++ lnet/utils/ptlctl.c | 1 + lustre/portals/include/linux/kp30.h | 1 + lustre/portals/include/portals/ptlctl.h | 1 + lustre/portals/knals/qswnal/qswnal.c | 49 ++++++++++++++++-- lustre/portals/knals/qswnal/qswnal.h | 90 +++++++++++++++++---------------- lustre/portals/knals/qswnal/qswnal_cb.c | 64 +++++++++++------------ lustre/portals/utils/portals.c | 40 +++++++++++++++ lustre/portals/utils/ptlctl.c | 1 + lustre/utils/lctl.c | 2 + 16 files changed, 335 insertions(+), 160 deletions(-) diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 85fe8e7..750d16c 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -887,6 +887,7 @@ extern ptl_handle_ni_t kscimacnal_ni; #define NAL_CMD_DEL_AUTOCONN 105 #define NAL_CMD_ADD_AUTOCONN 106 #define NAL_CMD_GET_AUTOCONN 107 +#define NAL_CMD_GET_TXDESC 108 enum { DEBUG_DAEMON_START = 1, diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index ffe7e5b..8278111 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -41,6 +41,7 @@ int jt_ptl_print_connections (int argc, char **argv); int jt_ptl_connect(int argc, char **argv); int jt_ptl_disconnect(int argc, char **argv); int jt_ptl_push_connection(int argc, char **argv); +int jt_ptl_print_active_txs(int argc, char **argv); int jt_ptl_ping(int argc, char **argv); int jt_ptl_shownid(int argc, char **argv); int jt_ptl_mynid(int argc, char **argv); diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index ffe7e5b..8278111 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -41,6 +41,7 @@ int jt_ptl_print_connections (int argc, char **argv); int jt_ptl_connect(int argc, char **argv); int jt_ptl_disconnect(int argc, char **argv); int jt_ptl_push_connection(int argc, char **argv); +int jt_ptl_print_active_txs(int argc, char **argv); int jt_ptl_ping(int argc, char **argv); int jt_ptl_shownid(int argc, char **argv); int jt_ptl_mynid(int argc, char **argv); diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c index 1a8fb74..b5e1e39 100644 --- a/lnet/klnds/qswlnd/qswlnd.c +++ b/lnet/klnds/qswlnd/qswlnd.c @@ -112,11 +112,49 @@ kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, } int +kqswnal_get_tx_desc (struct portal_ioctl_data *data) +{ + unsigned long flags; + struct list_head *tmp; + kqswnal_tx_t *ktx; + int index = data->ioc_count; + int rc = -ENOENT; + + spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); + + list_for_each (tmp, &kqswnal_data.kqn_activetxds) { + if (index-- != 0) + continue; + + ktx = list_entry (tmp, kqswnal_tx_t, ktx_list); + + data->ioc_pbuf1 = (char *)ktx; + data->ioc_count = NTOH__u32(ktx->ktx_wire_hdr->type); + data->ioc_size = NTOH__u32(PTL_HDR_LENGTH(ktx->ktx_wire_hdr)); + data->ioc_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid); + data->ioc_nid2 = ktx->ktx_nid; + data->ioc_misc = ktx->ktx_launcher; + data->ioc_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) | + ((!ktx->ktx_forwarding) ? 0 : 2) | + ((!ktx->ktx_isnblk) ? 0 : 4); + + rc = 0; + break; + } + + spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); + return (rc); +} + +int kqswnal_cmd (struct portal_ioctl_data *data, void *private) { LASSERT (data != NULL); switch (data->ioc_nal_cmd) { + case NAL_CMD_GET_TXDESC: + return (kqswnal_get_tx_desc (data)); + case NAL_CMD_REGISTER_MYNID: CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n", data->ioc_nid - kqswnal_data.kqn_elanid, @@ -318,6 +356,7 @@ kqswnal_initialise (void) INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds); + INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds); spin_lock_init (&kqswnal_data.kqn_idletxd_lock); init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq); INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq); @@ -458,12 +497,12 @@ kqswnal_initialise (void) ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */ ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */ - if (i < KQSW_NTXMSGS) - ktx->ktx_idle = &kqswnal_data.kqn_idletxds; - else - ktx->ktx_idle = &kqswnal_data.kqn_nblk_idletxds; + INIT_LIST_HEAD (&ktx->ktx_delayed_list); - list_add_tail (&ktx->ktx_list, ktx->ktx_idle); + ktx->ktx_isnblk = (i >= KQSW_NTXMSGS); + list_add_tail (&ktx->ktx_list, + ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds : + &kqswnal_data.kqn_idletxds); } /**********************************************************************/ diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index 85e585b..294f3e5 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -73,20 +73,20 @@ #include #include -#define KQSW_CHECKSUM 0 +#define KQSW_CHECKSUM 0 #if KQSW_CHECKSUM typedef unsigned long kqsw_csum_t; -#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t)) +#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t)) #else -#define KQSW_CSUM_SIZE 0 +#define KQSW_CSUM_SIZE 0 #endif -#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE) +#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE) /* * Elan NAL */ -#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ -#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ +#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ +#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ /* NB small/large message sizes are GLOBAL constants */ /* @@ -94,51 +94,51 @@ typedef unsigned long kqsw_csum_t; * NB no mention of PAGE_SIZE for interoperability */ #if PTL_LARGE_MTU -# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */ +# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */ #else -# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */ +# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */ #endif -#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */ +#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */ -#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ +#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ -#define KQSW_NTXMSGS 8 /* # normal transmit messages */ -#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ +#define KQSW_NTXMSGS 8 /* # normal transmit messages */ +#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ -#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ -#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ +#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ +#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ -#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */ -#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */ +#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */ +#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */ -#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ +#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ /* * derived constants */ -#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG) +#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG) /* The pre-allocated tx buffer (hdr + small payload) */ -#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1) +#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1) /* Reserve elan address space for pre-allocated and pre-mapped transmit * buffer and a full payload too. Extra pages allow for page alignment */ -#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) +#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) /* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) +#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) -#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD)) +#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD)) /* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) +#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) /* biggest complete packet we can receive (or transmit) */ typedef struct { struct list_head krx_list; /* enqueue -> thread */ - EP_RCVR *krx_eprx; /* port to post receives to */ + EP_RCVR *krx_eprx; /* port to post receives to */ EP_RXD *krx_rxd; /* receive descriptor (for repost) */ E3_Addr krx_elanaddr; /* Elan address of buffer (contiguous in elan vm) */ int krx_npages; /* # pages in receive buffer */ @@ -150,24 +150,25 @@ typedef struct typedef struct { - struct list_head ktx_list; /* enqueue idle/delayed */ - struct list_head *ktx_idle; /* where to put when idle */ - char ktx_state; /* What I'm doing */ + struct list_head ktx_list; /* enqueue idle/active */ + struct list_head ktx_delayed_list; /* enqueue delayedtxds */ + int ktx_isnblk:1; /* reserved descriptor? */ + int ktx_forwarding:1; /* forwarding (rather than local send) */ uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */ int ktx_npages; /* pages reserved for mapping messages */ int ktx_nmappedpages; /* # pages mapped for current message */ - EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */ + EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */ int ktx_niov; /* # message frags */ int ktx_port; /* destination ep port */ ptl_nid_t ktx_nid; /* destination node */ void *ktx_args[2]; /* completion passthru */ - E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ + E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */ -} kqswnal_tx_t; -#define KTX_IDLE 0 /* MUST BE ZERO (so zeroed ktx is idle) */ -#define KTX_SENDING 1 /* local send */ -#define KTX_FORWARDING 2 /* routing a packet */ + /* debug/info fields */ + pid_t ktx_launcher; /* pid of launching process */ + ptl_hdr_t *ktx_wire_hdr; /* portals header (wire endian) */ +} kqswnal_tx_t; typedef struct { @@ -179,7 +180,8 @@ typedef struct kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ struct list_head kqn_idletxds; /* transmit descriptors free to use */ - struct list_head kqn_nblk_idletxds; /* reserve of */ + struct list_head kqn_nblk_idletxds; /* reserved free transmit descriptors */ + struct list_head kqn_activetxds; /* transmit descriptors being used */ spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */ struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */ @@ -193,12 +195,12 @@ typedef struct spinlock_t kqn_statelock; /* cb_cli/cb_sti */ nal_cb_t *kqn_cb; /* -> kqswnal_lib */ - EP_DEV *kqn_epdev; /* elan device */ - EP_XMTR *kqn_eptx; /* elan transmitter */ - EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ - EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ - ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ - ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ + EP_DEV *kqn_epdev; /* elan device */ + EP_XMTR *kqn_eptx; /* elan transmitter */ + EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ + EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ + ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ + ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ kpr_router_t kqn_router; /* connection to Kernel Portals Router module */ ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */ @@ -207,10 +209,10 @@ typedef struct } kqswnal_data_t; /* kqn_init state */ -#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ -#define KQN_INIT_DATA 1 -#define KQN_INIT_PTL 2 -#define KQN_INIT_ALL 3 +#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ +#define KQN_INIT_DATA 1 +#define KQN_INIT_PTL 2 +#define KQN_INIT_ALL 3 extern nal_cb_t kqswnal_lib; extern nal_t kqswnal_api; diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index a6b4b93..ede36d1 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -296,22 +296,23 @@ void kqswnal_put_idle_tx (kqswnal_tx_t *ktx) { kpr_fwd_desc_t *fwd = NULL; - struct list_head *idle = ktx->ktx_idle; unsigned long flags; - kqswnal_unmap_tx (ktx); /* release temporary mappings */ - ktx->ktx_state = KTX_IDLE; + kqswnal_unmap_tx (ktx); /* release temporary mappings */ spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - list_add (&ktx->ktx_list, idle); + list_del (&ktx->ktx_list); /* take off active list */ - /* reserved for non-blocking tx */ - if (idle == &kqswnal_data.kqn_nblk_idletxds) { + if (ktx->ktx_isnblk) { + /* reserved for non-blocking tx */ + list_add (&ktx->ktx_list, &kqswnal_data.kqn_nblk_idletxds); spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); return; } + list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); + /* anything blocking for a tx descriptor? */ if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */ { @@ -357,7 +358,6 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) if (!list_empty (&kqswnal_data.kqn_idletxds)) { ktx = list_entry (kqswnal_data.kqn_idletxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); break; } @@ -379,7 +379,6 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); break; } @@ -392,6 +391,12 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) !list_empty (&kqswnal_data.kqn_idletxds)); } + if (ktx != NULL) { + list_del (&ktx->ktx_list); + list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds); + ktx->ktx_launcher = current->pid; + } + spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */ @@ -402,20 +407,12 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) void kqswnal_tx_done (kqswnal_tx_t *ktx, int error) { - switch (ktx->ktx_state) { - case KTX_FORWARDING: /* router asked me to forward this packet */ + if (ktx->ktx_forwarding) /* router asked me to forward this packet */ kpr_fwd_done (&kqswnal_data.kqn_router, (kpr_fwd_desc_t *)ktx->ktx_args[0], error); - break; - - case KTX_SENDING: /* packet sourced locally */ + else /* packet sourced locally */ lib_finalize (&kqswnal_lib, ktx->ktx_args[0], (lib_msg_t *)ktx->ktx_args[1]); - break; - - default: - LASSERT (0); - } kqswnal_put_idle_tx (ktx); } @@ -467,7 +464,7 @@ kqswnal_launch (kqswnal_tx_t *ktx) spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_delayedtxds); + list_add_tail (&ktx->ktx_delayed_list, &kqswnal_data.kqn_delayedtxds); if (waitqueue_active (&kqswnal_data.kqn_sched_waitq)) wake_up (&kqswnal_data.kqn_sched_waitq); @@ -612,6 +609,7 @@ kqswnal_sendmsg (nal_cb_t *nal, } memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ + ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer; #if KQSW_CHECKSUM csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr)); @@ -666,12 +664,12 @@ kqswnal_sendmsg (nal_cb_t *nal, } } - ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ? - EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; - ktx->ktx_nid = nid; - ktx->ktx_state = KTX_SENDING; /* => lib_finalize() on completion */ - ktx->ktx_args[0] = private; - ktx->ktx_args[1] = cookie; + ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ? + EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; + ktx->ktx_nid = nid; + ktx->ktx_forwarding = 0; /* => lib_finalize() on completion */ + ktx->ktx_args[0] = private; + ktx->ktx_args[1] = cookie; rc = kqswnal_launch (ktx); if (rc != 0) { /* failed? */ @@ -766,6 +764,8 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) ktx->ktx_iov[0].Base = ktx->ktx_ebuffer; /* already mapped */ ktx->ktx_iov[0].Len = nob; ktx->ktx_niov = 1; + + ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer; } else { @@ -774,13 +774,15 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) rc = kqswnal_map_tx_iov (ktx, nob, niov, iov); if (rc != 0) goto failed; + + ktx->ktx_wire_hdr = (ptl_hdr_t *)iov[0].iov_base; } - ktx->ktx_port = (nob <= (sizeof (ptl_hdr_t) + KQSW_SMALLPAYLOAD)) ? - EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; - ktx->ktx_nid = nid; - ktx->ktx_state = KTX_FORWARDING; /* kpr_put_packet() on completion */ - ktx->ktx_args[0] = fwd; + ktx->ktx_port = (nob <= (sizeof (ptl_hdr_t) + KQSW_SMALLPAYLOAD)) ? + EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; + ktx->ktx_nid = nid; + ktx->ktx_forwarding = 1; + ktx->ktx_args[0] = fwd; rc = kqswnal_launch (ktx); if (rc == 0) @@ -1156,7 +1158,7 @@ kqswnal_scheduler (void *arg) { ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); + list_del_init (&ktx->ktx_delayed_list); spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags); diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 6a9030c..4a05234 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -848,6 +848,46 @@ int jt_ptl_push_connection (int argc, char **argv) return 0; } +int +jt_ptl_print_active_txs (int argc, char **argv) +{ + struct portal_ioctl_data data; + int index; + int rc; + + if (!g_nal_is_compatible (argv[0], QSWNAL, 0)) + return -1; + + for (index = 0;;index++) { + PORTAL_IOC_INIT (data); + data.ioc_nal = g_nal; + data.ioc_nal_cmd = NAL_CMD_GET_TXDESC; + data.ioc_count = index; + + rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); + if (rc != 0) + break; + + printf ("%p: %5s payload %6d bytes to "LPX64" via "LPX64" by pid %6d: %s,%s,%s\n", + data.ioc_pbuf1, + data.ioc_count == PTL_MSG_ACK ? "ACK" : + data.ioc_count == PTL_MSG_PUT ? "PUT" : + data.ioc_count == PTL_MSG_GET ? "GET" : + data.ioc_count == PTL_MSG_REPLY ? "REPLY" : "", + data.ioc_size, + data.ioc_nid, + data.ioc_nid2, + data.ioc_misc, + (data.ioc_flags & 1) ? "delayed" : "active", + (data.ioc_flags & 2) ? "forwarding" : "sending", + (data.ioc_flags & 4) ? "nblk" : "normal"); + } + + if (index == 0) + printf ("\n"); + return 0; +} + int jt_ptl_ping(int argc, char **argv) { int rc; diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c index 50d9c87..c083e48 100644 --- a/lnet/utils/ptlctl.c +++ b/lnet/utils/ptlctl.c @@ -37,6 +37,7 @@ command_t list[] = { {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [xi])"}, {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"}, {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"}, + {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"}, {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"}, {"shownid", jt_ptl_shownid, 0, "print the local NID"}, {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"}, diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 85fe8e7..750d16c 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -887,6 +887,7 @@ extern ptl_handle_ni_t kscimacnal_ni; #define NAL_CMD_DEL_AUTOCONN 105 #define NAL_CMD_ADD_AUTOCONN 106 #define NAL_CMD_GET_AUTOCONN 107 +#define NAL_CMD_GET_TXDESC 108 enum { DEBUG_DAEMON_START = 1, diff --git a/lustre/portals/include/portals/ptlctl.h b/lustre/portals/include/portals/ptlctl.h index ffe7e5b..8278111 100644 --- a/lustre/portals/include/portals/ptlctl.h +++ b/lustre/portals/include/portals/ptlctl.h @@ -41,6 +41,7 @@ int jt_ptl_print_connections (int argc, char **argv); int jt_ptl_connect(int argc, char **argv); int jt_ptl_disconnect(int argc, char **argv); int jt_ptl_push_connection(int argc, char **argv); +int jt_ptl_print_active_txs(int argc, char **argv); int jt_ptl_ping(int argc, char **argv); int jt_ptl_shownid(int argc, char **argv); int jt_ptl_mynid(int argc, char **argv); diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c index 1a8fb74..b5e1e39 100644 --- a/lustre/portals/knals/qswnal/qswnal.c +++ b/lustre/portals/knals/qswnal/qswnal.c @@ -112,11 +112,49 @@ kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, } int +kqswnal_get_tx_desc (struct portal_ioctl_data *data) +{ + unsigned long flags; + struct list_head *tmp; + kqswnal_tx_t *ktx; + int index = data->ioc_count; + int rc = -ENOENT; + + spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); + + list_for_each (tmp, &kqswnal_data.kqn_activetxds) { + if (index-- != 0) + continue; + + ktx = list_entry (tmp, kqswnal_tx_t, ktx_list); + + data->ioc_pbuf1 = (char *)ktx; + data->ioc_count = NTOH__u32(ktx->ktx_wire_hdr->type); + data->ioc_size = NTOH__u32(PTL_HDR_LENGTH(ktx->ktx_wire_hdr)); + data->ioc_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid); + data->ioc_nid2 = ktx->ktx_nid; + data->ioc_misc = ktx->ktx_launcher; + data->ioc_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) | + ((!ktx->ktx_forwarding) ? 0 : 2) | + ((!ktx->ktx_isnblk) ? 0 : 4); + + rc = 0; + break; + } + + spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); + return (rc); +} + +int kqswnal_cmd (struct portal_ioctl_data *data, void *private) { LASSERT (data != NULL); switch (data->ioc_nal_cmd) { + case NAL_CMD_GET_TXDESC: + return (kqswnal_get_tx_desc (data)); + case NAL_CMD_REGISTER_MYNID: CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n", data->ioc_nid - kqswnal_data.kqn_elanid, @@ -318,6 +356,7 @@ kqswnal_initialise (void) INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds); + INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds); spin_lock_init (&kqswnal_data.kqn_idletxd_lock); init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq); INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq); @@ -458,12 +497,12 @@ kqswnal_initialise (void) ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */ ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */ - if (i < KQSW_NTXMSGS) - ktx->ktx_idle = &kqswnal_data.kqn_idletxds; - else - ktx->ktx_idle = &kqswnal_data.kqn_nblk_idletxds; + INIT_LIST_HEAD (&ktx->ktx_delayed_list); - list_add_tail (&ktx->ktx_list, ktx->ktx_idle); + ktx->ktx_isnblk = (i >= KQSW_NTXMSGS); + list_add_tail (&ktx->ktx_list, + ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds : + &kqswnal_data.kqn_idletxds); } /**********************************************************************/ diff --git a/lustre/portals/knals/qswnal/qswnal.h b/lustre/portals/knals/qswnal/qswnal.h index 85e585b..294f3e5 100644 --- a/lustre/portals/knals/qswnal/qswnal.h +++ b/lustre/portals/knals/qswnal/qswnal.h @@ -73,20 +73,20 @@ #include #include -#define KQSW_CHECKSUM 0 +#define KQSW_CHECKSUM 0 #if KQSW_CHECKSUM typedef unsigned long kqsw_csum_t; -#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t)) +#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t)) #else -#define KQSW_CSUM_SIZE 0 +#define KQSW_CSUM_SIZE 0 #endif -#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE) +#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE) /* * Elan NAL */ -#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ -#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ +#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ +#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ /* NB small/large message sizes are GLOBAL constants */ /* @@ -94,51 +94,51 @@ typedef unsigned long kqsw_csum_t; * NB no mention of PAGE_SIZE for interoperability */ #if PTL_LARGE_MTU -# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */ +# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */ #else -# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */ +# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */ #endif -#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */ +#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */ -#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ +#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ -#define KQSW_NTXMSGS 8 /* # normal transmit messages */ -#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ +#define KQSW_NTXMSGS 8 /* # normal transmit messages */ +#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ -#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ -#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ +#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ +#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ -#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */ -#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */ +#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */ +#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */ -#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ +#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ /* * derived constants */ -#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG) +#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG) /* The pre-allocated tx buffer (hdr + small payload) */ -#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1) +#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1) /* Reserve elan address space for pre-allocated and pre-mapped transmit * buffer and a full payload too. Extra pages allow for page alignment */ -#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) +#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) /* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) +#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) -#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD)) +#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD)) /* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) +#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) /* biggest complete packet we can receive (or transmit) */ typedef struct { struct list_head krx_list; /* enqueue -> thread */ - EP_RCVR *krx_eprx; /* port to post receives to */ + EP_RCVR *krx_eprx; /* port to post receives to */ EP_RXD *krx_rxd; /* receive descriptor (for repost) */ E3_Addr krx_elanaddr; /* Elan address of buffer (contiguous in elan vm) */ int krx_npages; /* # pages in receive buffer */ @@ -150,24 +150,25 @@ typedef struct typedef struct { - struct list_head ktx_list; /* enqueue idle/delayed */ - struct list_head *ktx_idle; /* where to put when idle */ - char ktx_state; /* What I'm doing */ + struct list_head ktx_list; /* enqueue idle/active */ + struct list_head ktx_delayed_list; /* enqueue delayedtxds */ + int ktx_isnblk:1; /* reserved descriptor? */ + int ktx_forwarding:1; /* forwarding (rather than local send) */ uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */ int ktx_npages; /* pages reserved for mapping messages */ int ktx_nmappedpages; /* # pages mapped for current message */ - EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */ + EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */ int ktx_niov; /* # message frags */ int ktx_port; /* destination ep port */ ptl_nid_t ktx_nid; /* destination node */ void *ktx_args[2]; /* completion passthru */ - E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ + E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */ -} kqswnal_tx_t; -#define KTX_IDLE 0 /* MUST BE ZERO (so zeroed ktx is idle) */ -#define KTX_SENDING 1 /* local send */ -#define KTX_FORWARDING 2 /* routing a packet */ + /* debug/info fields */ + pid_t ktx_launcher; /* pid of launching process */ + ptl_hdr_t *ktx_wire_hdr; /* portals header (wire endian) */ +} kqswnal_tx_t; typedef struct { @@ -179,7 +180,8 @@ typedef struct kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ struct list_head kqn_idletxds; /* transmit descriptors free to use */ - struct list_head kqn_nblk_idletxds; /* reserve of */ + struct list_head kqn_nblk_idletxds; /* reserved free transmit descriptors */ + struct list_head kqn_activetxds; /* transmit descriptors being used */ spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */ struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */ @@ -193,12 +195,12 @@ typedef struct spinlock_t kqn_statelock; /* cb_cli/cb_sti */ nal_cb_t *kqn_cb; /* -> kqswnal_lib */ - EP_DEV *kqn_epdev; /* elan device */ - EP_XMTR *kqn_eptx; /* elan transmitter */ - EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ - EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ - ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ - ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ + EP_DEV *kqn_epdev; /* elan device */ + EP_XMTR *kqn_eptx; /* elan transmitter */ + EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ + EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ + ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ + ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ kpr_router_t kqn_router; /* connection to Kernel Portals Router module */ ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */ @@ -207,10 +209,10 @@ typedef struct } kqswnal_data_t; /* kqn_init state */ -#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ -#define KQN_INIT_DATA 1 -#define KQN_INIT_PTL 2 -#define KQN_INIT_ALL 3 +#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ +#define KQN_INIT_DATA 1 +#define KQN_INIT_PTL 2 +#define KQN_INIT_ALL 3 extern nal_cb_t kqswnal_lib; extern nal_t kqswnal_api; diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c index a6b4b93..ede36d1 100644 --- a/lustre/portals/knals/qswnal/qswnal_cb.c +++ b/lustre/portals/knals/qswnal/qswnal_cb.c @@ -296,22 +296,23 @@ void kqswnal_put_idle_tx (kqswnal_tx_t *ktx) { kpr_fwd_desc_t *fwd = NULL; - struct list_head *idle = ktx->ktx_idle; unsigned long flags; - kqswnal_unmap_tx (ktx); /* release temporary mappings */ - ktx->ktx_state = KTX_IDLE; + kqswnal_unmap_tx (ktx); /* release temporary mappings */ spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - list_add (&ktx->ktx_list, idle); + list_del (&ktx->ktx_list); /* take off active list */ - /* reserved for non-blocking tx */ - if (idle == &kqswnal_data.kqn_nblk_idletxds) { + if (ktx->ktx_isnblk) { + /* reserved for non-blocking tx */ + list_add (&ktx->ktx_list, &kqswnal_data.kqn_nblk_idletxds); spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); return; } + list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); + /* anything blocking for a tx descriptor? */ if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */ { @@ -357,7 +358,6 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) if (!list_empty (&kqswnal_data.kqn_idletxds)) { ktx = list_entry (kqswnal_data.kqn_idletxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); break; } @@ -379,7 +379,6 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); break; } @@ -392,6 +391,12 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) !list_empty (&kqswnal_data.kqn_idletxds)); } + if (ktx != NULL) { + list_del (&ktx->ktx_list); + list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds); + ktx->ktx_launcher = current->pid; + } + spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */ @@ -402,20 +407,12 @@ kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) void kqswnal_tx_done (kqswnal_tx_t *ktx, int error) { - switch (ktx->ktx_state) { - case KTX_FORWARDING: /* router asked me to forward this packet */ + if (ktx->ktx_forwarding) /* router asked me to forward this packet */ kpr_fwd_done (&kqswnal_data.kqn_router, (kpr_fwd_desc_t *)ktx->ktx_args[0], error); - break; - - case KTX_SENDING: /* packet sourced locally */ + else /* packet sourced locally */ lib_finalize (&kqswnal_lib, ktx->ktx_args[0], (lib_msg_t *)ktx->ktx_args[1]); - break; - - default: - LASSERT (0); - } kqswnal_put_idle_tx (ktx); } @@ -467,7 +464,7 @@ kqswnal_launch (kqswnal_tx_t *ktx) spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_delayedtxds); + list_add_tail (&ktx->ktx_delayed_list, &kqswnal_data.kqn_delayedtxds); if (waitqueue_active (&kqswnal_data.kqn_sched_waitq)) wake_up (&kqswnal_data.kqn_sched_waitq); @@ -612,6 +609,7 @@ kqswnal_sendmsg (nal_cb_t *nal, } memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ + ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer; #if KQSW_CHECKSUM csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr)); @@ -666,12 +664,12 @@ kqswnal_sendmsg (nal_cb_t *nal, } } - ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ? - EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; - ktx->ktx_nid = nid; - ktx->ktx_state = KTX_SENDING; /* => lib_finalize() on completion */ - ktx->ktx_args[0] = private; - ktx->ktx_args[1] = cookie; + ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ? + EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; + ktx->ktx_nid = nid; + ktx->ktx_forwarding = 0; /* => lib_finalize() on completion */ + ktx->ktx_args[0] = private; + ktx->ktx_args[1] = cookie; rc = kqswnal_launch (ktx); if (rc != 0) { /* failed? */ @@ -766,6 +764,8 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) ktx->ktx_iov[0].Base = ktx->ktx_ebuffer; /* already mapped */ ktx->ktx_iov[0].Len = nob; ktx->ktx_niov = 1; + + ktx->ktx_wire_hdr = (ptl_hdr_t *)ktx->ktx_buffer; } else { @@ -774,13 +774,15 @@ kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) rc = kqswnal_map_tx_iov (ktx, nob, niov, iov); if (rc != 0) goto failed; + + ktx->ktx_wire_hdr = (ptl_hdr_t *)iov[0].iov_base; } - ktx->ktx_port = (nob <= (sizeof (ptl_hdr_t) + KQSW_SMALLPAYLOAD)) ? - EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; - ktx->ktx_nid = nid; - ktx->ktx_state = KTX_FORWARDING; /* kpr_put_packet() on completion */ - ktx->ktx_args[0] = fwd; + ktx->ktx_port = (nob <= (sizeof (ptl_hdr_t) + KQSW_SMALLPAYLOAD)) ? + EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; + ktx->ktx_nid = nid; + ktx->ktx_forwarding = 1; + ktx->ktx_args[0] = fwd; rc = kqswnal_launch (ktx); if (rc == 0) @@ -1156,7 +1158,7 @@ kqswnal_scheduler (void *arg) { ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); + list_del_init (&ktx->ktx_delayed_list); spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags); diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index 6a9030c..4a05234 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -848,6 +848,46 @@ int jt_ptl_push_connection (int argc, char **argv) return 0; } +int +jt_ptl_print_active_txs (int argc, char **argv) +{ + struct portal_ioctl_data data; + int index; + int rc; + + if (!g_nal_is_compatible (argv[0], QSWNAL, 0)) + return -1; + + for (index = 0;;index++) { + PORTAL_IOC_INIT (data); + data.ioc_nal = g_nal; + data.ioc_nal_cmd = NAL_CMD_GET_TXDESC; + data.ioc_count = index; + + rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); + if (rc != 0) + break; + + printf ("%p: %5s payload %6d bytes to "LPX64" via "LPX64" by pid %6d: %s,%s,%s\n", + data.ioc_pbuf1, + data.ioc_count == PTL_MSG_ACK ? "ACK" : + data.ioc_count == PTL_MSG_PUT ? "PUT" : + data.ioc_count == PTL_MSG_GET ? "GET" : + data.ioc_count == PTL_MSG_REPLY ? "REPLY" : "", + data.ioc_size, + data.ioc_nid, + data.ioc_nid2, + data.ioc_misc, + (data.ioc_flags & 1) ? "delayed" : "active", + (data.ioc_flags & 2) ? "forwarding" : "sending", + (data.ioc_flags & 4) ? "nblk" : "normal"); + } + + if (index == 0) + printf ("\n"); + return 0; +} + int jt_ptl_ping(int argc, char **argv) { int rc; diff --git a/lustre/portals/utils/ptlctl.c b/lustre/portals/utils/ptlctl.c index 50d9c87..c083e48 100644 --- a/lustre/portals/utils/ptlctl.c +++ b/lustre/portals/utils/ptlctl.c @@ -37,6 +37,7 @@ command_t list[] = { {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [xi])"}, {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"}, {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"}, + {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"}, {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"}, {"shownid", jt_ptl_shownid, 0, "print the local NID"}, {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"}, diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 9c1beb0..a90eeee 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -77,6 +77,8 @@ command_t cmdlist[] = { "usage: connect [ix]"}, {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid\n" "usage: disconnect []"}, + {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)\n" + "usage: active_tx"}, {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local nid. " "The nid defaults to hostname for tcp networks and is automatically " "setup for elan/myrinet/scimac networks.\n" -- 1.8.3.1