X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fqswlnd%2Fqswlnd.h;h=438edc682fc94cda7c7ede5ad7e5e7f13cbe26b9;hp=0d8e2fa61d59de54787c60beab6f12e81d5aaef1;hb=c39489126f88bb5b30643ebb11c72fbe9f9d2241;hpb=a14f741223ce7e957b591201a6de2d5ec57ae7fa diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index 0d8e2fa..438edc6 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -24,35 +24,40 @@ #ifndef _QSWNAL_H #define _QSWNAL_H -#define EXPORT_SYMTAB - -#ifdef PROPRIETARY_ELAN -# include -#else -# include +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB #endif +#include #undef printf /* nasty QSW #define */ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#if MULTIRAIL_EKC +# include +#else +# include +# include +# include +# include +# include +# include +# include +# include +# include +#endif #include #include #include #include #include -#include +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#include /* wait_on_buffer */ +#else +#include /* wait_on_buffer */ +#endif #include #include #include @@ -64,14 +69,16 @@ #include #include #include -#include +#include #include #define DEBUG_SUBSYSTEM S_QSWNAL #include +#include #include #include +#include #define KQSW_CHECKSUM 0 #if KQSW_CHECKSUM @@ -83,22 +90,10 @@ typedef unsigned long kqsw_csum_t; #define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE) /* - * Elan NAL - */ -#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ -#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ -/* NB small/large message sizes are GLOBAL constants */ - -/* * Performance Tuning defines * NB no mention of PAGE_SIZE for interoperability */ -#if PTL_LARGE_MTU -# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */ -#else -# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */ -#endif - +#define KQSW_MAXPAYLOAD PTL_MTU #define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */ #define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ @@ -114,6 +109,10 @@ typedef unsigned long kqsw_csum_t; #define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ +#define KQSW_OPTIMIZED_GETS 1 /* optimize gets >= this size */ +#define KQSW_OPTIMIZED_PUTS (32<<10) /* optimize puts >= this size */ +#define KQSW_COPY_SMALL_FWD 0 /* copy small fwd messages to pre-mapped buffer? */ + /* * derived constants */ @@ -134,48 +133,94 @@ typedef unsigned long kqsw_csum_t; #define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) /* biggest complete packet we can receive (or transmit) */ +/* Remote memory descriptor */ +typedef struct +{ + __u32 kqrmd_nfrag; /* # frags */ +#if MULTIRAIL_EKC + EP_NMD kqrmd_frag[0]; /* actual frags */ +#else + EP_IOVEC kqrmd_frag[0]; /* actual frags */ +#endif +} kqswnal_remotemd_t; typedef struct { struct list_head krx_list; /* enqueue -> thread */ EP_RCVR *krx_eprx; /* port to post receives to */ EP_RXD *krx_rxd; /* receive descriptor (for repost) */ - E3_Addr krx_elanaddr; /* Elan address of buffer (contiguous in elan vm) */ +#if MULTIRAIL_EKC + EP_NMD krx_elanbuffer; /* contiguous Elan buffer */ +#else + E3_Addr krx_elanbuffer; /* contiguous Elan buffer */ +#endif int krx_npages; /* # pages in receive buffer */ int krx_nob; /* Number Of Bytes received into buffer */ + int krx_rpc_reply_needed; /* peer waiting for EKC RPC reply */ + int krx_rpc_reply_status; /* what status to send */ + int krx_state; /* what this RX is doing */ + atomic_t krx_refcount; /* how to tell when rpc is done */ kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */ - struct page *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */ - struct iovec krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */ + ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */ } kqswnal_rx_t; +#define KRX_POSTED 1 /* receiving */ +#define KRX_PARSE 2 /* ready to be parsed */ +#define KRX_COMPLETING 3 /* waiting to be completed */ + + typedef struct { struct list_head ktx_list; /* enqueue idle/active */ struct list_head ktx_delayed_list; /* enqueue delayedtxds */ - int ktx_isnblk:1; /* reserved descriptor? */ - int ktx_forwarding:1; /* forwarding (rather than local send) */ + unsigned int ktx_isnblk:1; /* reserved descriptor? */ + unsigned int ktx_state:7; /* What I'm doing */ + unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */ uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */ int ktx_npages; /* pages reserved for mapping messages */ int ktx_nmappedpages; /* # pages mapped for current message */ - EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */ - int ktx_niov; /* # message frags */ int ktx_port; /* destination ep port */ ptl_nid_t ktx_nid; /* destination node */ - void *ktx_args[2]; /* completion passthru */ - E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ + void *ktx_args[3]; /* completion passthru */ char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */ unsigned long ktx_launchtime; /* when (in jiffies) the transmit was launched */ /* debug/info fields */ pid_t ktx_launcher; /* pid of launching process */ - ptl_hdr_t *ktx_wire_hdr; /* portals header (wire endian) */ + + int ktx_nfrag; /* # message frags */ +#if MULTIRAIL_EKC + int ktx_rail; /* preferred rail */ + EP_NMD ktx_ebuffer; /* elan mapping of ktx_buffer */ + EP_NMD ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */ +#else + E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ + EP_IOVEC ktx_frags[EP_MAXFRAG];/* msg frags (elan vaddrs) */ +#endif } kqswnal_tx_t; +#define KTX_IDLE 0 /* on kqn_(nblk_)idletxds */ +#define KTX_FORWARDING 1 /* sending a forwarded packet */ +#define KTX_SENDING 2 /* normal send */ +#define KTX_GETTING 3 /* sending optimised get */ +#define KTX_PUTTING 4 /* sending optimised put */ +#define KTX_RDMAING 5 /* handling optimised put/get */ + +typedef struct +{ + /* dynamic tunables... */ + int kqn_optimized_puts; /* optimized PUTs? */ + int kqn_optimized_gets; /* optimized GETs? */ +#if CONFIG_SYSCTL + struct ctl_table_header *kqn_sysctl; /* sysctl interface */ +#endif +} kqswnal_tunables_t; + typedef struct { char kqn_init; /* what's been initialised */ char kqn_shuttingdown; /* I'm trying to shut down */ - atomic_t kqn_nthreads; /* # threads still running */ + atomic_t kqn_nthreads; /* # threads running */ kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */ kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ @@ -186,6 +231,7 @@ typedef struct spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */ struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */ + atomic_t kqn_pending_txs; /* # transmits being prepped */ spinlock_t kqn_sched_lock; /* serialise packet schedulers */ wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */ @@ -194,35 +240,44 @@ typedef struct struct list_head kqn_delayedfwds; /* delayed forwards */ struct list_head kqn_delayedtxds; /* delayed transmits */ - spinlock_t kqn_statelock; /* cb_cli/cb_sti */ - nal_cb_t *kqn_cb; /* -> kqswnal_lib */ - EP_DEV *kqn_epdev; /* elan device */ +#if MULTIRAIL_EKC + EP_SYS *kqn_ep; /* elan system */ + EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */ + EP_NMH *kqn_ep_rx_nmh; /* elan reserved rx vaddrs */ +#else + EP_DEV *kqn_ep; /* elan device */ + ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ + ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ +#endif EP_XMTR *kqn_eptx; /* elan transmitter */ EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ - ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ - ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ kpr_router_t kqn_router; /* connection to Kernel Portals Router module */ ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */ int kqn_nnodes; /* this cluster's size */ int kqn_elanid; /* this nodes's elan ID */ + + EP_STATUSBLK kqn_rpc_success; /* preset RPC reply status blocks */ + EP_STATUSBLK kqn_rpc_failed; } kqswnal_data_t; /* kqn_init state */ #define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ #define KQN_INIT_DATA 1 -#define KQN_INIT_PTL 2 +#define KQN_INIT_LIB 2 #define KQN_INIT_ALL 3 -extern nal_cb_t kqswnal_lib; -extern nal_t kqswnal_api; -extern kqswnal_data_t kqswnal_data; +extern lib_nal_t kqswnal_lib; +extern nal_t kqswnal_api; +extern kqswnal_tunables_t kqswnal_tunables; +extern kqswnal_data_t kqswnal_data; extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg); extern void kqswnal_rxhandler(EP_RXD *rxd); extern int kqswnal_scheduler (void *); extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); +extern void kqswnal_rx_done (kqswnal_rx_t *krx); static inline ptl_nid_t kqswnal_elanid2nid (int elanid) @@ -241,11 +296,10 @@ kqswnal_nid2elanid (ptl_nid_t nid) return (nid - kqswnal_data.kqn_nid_offset); } -static inline void -kqswnal_requeue_rx (kqswnal_rx_t *krx) +static inline ptl_nid_t +kqswnal_rx_nid(kqswnal_rx_t *krx) { - ep_requeue_receive (krx->krx_rxd, kqswnal_rxhandler, krx, - krx->krx_elanaddr, krx->krx_npages * PAGE_SIZE); + return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd))); } static inline int @@ -270,4 +324,51 @@ static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob) } #endif +static inline void kqswnal_rx_decref (kqswnal_rx_t *krx) +{ + LASSERT (atomic_read (&krx->krx_refcount) > 0); + if (atomic_dec_and_test (&krx->krx_refcount)) + kqswnal_rx_done(krx); +} + +#if MULTIRAIL_EKC +# ifndef EP_RAILMASK_ALL +# error "old (unsupported) version of EKC headers" +# endif +#else +/* multirail defines these in */ +#define EP_MSG_SVC_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ +#define EP_MSG_SVC_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ +/* NB small/large message sizes are GLOBAL constants */ + +/* A minimal attempt to minimise inline #ifdeffing */ + +#define EP_SUCCESS ESUCCESS +#define EP_ENOMEM ENOMEM + +static inline EP_XMTR * +ep_alloc_xmtr(EP_DEV *e) +{ + return (ep_alloc_large_xmtr(e)); +} + +static inline EP_RCVR * +ep_alloc_rcvr(EP_DEV *e, int svc, int nenv) +{ + return (ep_install_large_rcvr(e, svc, nenv)); +} + +static inline void +ep_free_xmtr(EP_XMTR *x) +{ + ep_free_large_xmtr(x); +} + +static inline void +ep_free_rcvr(EP_RCVR *r) +{ + ep_remove_large_rcvr(r); +} +#endif + #endif /* _QSWNAL_H */