1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * Basic library routines.
28 # define EXPORT_SYMTAB
31 #ifdef PROPRIETARY_ELAN
32 # include <qsw/kernel.h>
34 # include <qsnet/kernel.h>
37 #undef printf /* nasty QSW #define */
39 #include <linux/config.h>
40 #include <linux/module.h>
43 # include <elan/epcomms.h>
45 # include <elan3/elanregs.h>
46 # include <elan3/elandev.h>
47 # include <elan3/elanvp.h>
48 # include <elan3/elan3mmu.h>
49 # include <elan3/elanctxt.h>
50 # include <elan3/elandebug.h>
51 # include <elan3/urom_addrs.h>
52 # include <elan3/busops.h>
53 # include <elan3/kcomm.h>
56 #include <linux/kernel.h>
58 #include <linux/string.h>
59 #include <linux/stat.h>
60 #include <linux/errno.h>
61 #include <linux/locks.h>
62 #include <linux/unistd.h>
64 #include <linux/uio.h>
66 #include <asm/system.h>
67 #include <asm/uaccess.h>
70 #include <linux/file.h>
71 #include <linux/stat.h>
72 #include <linux/list.h>
73 #include <linux/sysctl.h>
74 #include <asm/segment.h>
76 #define DEBUG_SUBSYSTEM S_QSWNAL
78 #include <linux/kp30.h>
79 #include <portals/p30.h>
80 #include <portals/lib-p30.h>
82 #define KQSW_CHECKSUM 0
84 typedef unsigned long kqsw_csum_t;
85 #define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t))
87 #define KQSW_CSUM_SIZE 0
89 #define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE)
92 * Performance Tuning defines
93 * NB no mention of PAGE_SIZE for interoperability
95 #define KQSW_MAXPAYLOAD PTL_MTU
96 #define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */
98 #define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */
100 #define KQSW_NTXMSGS 8 /* # normal transmit messages */
101 #define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */
103 #define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */
104 #define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */
106 #define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */
107 #define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */
109 #define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */
111 #define KQSW_OPTIMIZED_GETS 1 /* optimized gets? */
112 #define KQSW_COPY_SMALL_FWD 0 /* copy small fwd messages to pre-mapped buffer? */
118 #define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG)
119 /* The pre-allocated tx buffer (hdr + small payload) */
121 #define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1)
122 /* Reserve elan address space for pre-allocated and pre-mapped transmit
123 * buffer and a full payload too. Extra pages allow for page alignment */
125 #define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD))
126 /* receive hdr/payload always contiguous and page aligned */
127 #define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE)
129 #define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD))
130 /* receive hdr/payload always contiguous and page aligned */
131 #define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE)
132 /* biggest complete packet we can receive (or transmit) */
134 /* Remote memory descriptor */
137 __u32 kqrmd_nfrag; /* # frags */
139 EP_NMD kqrmd_frag[0]; /* actual frags */
141 EP_IOVEC kqrmd_frag[0]; /* actual frags */
143 } kqswnal_remotemd_t;
147 struct list_head krx_list; /* enqueue -> thread */
148 EP_RCVR *krx_eprx; /* port to post receives to */
149 EP_RXD *krx_rxd; /* receive descriptor (for repost) */
151 EP_NMD krx_elanbuffer; /* contiguous Elan buffer */
153 E3_Addr krx_elanbuffer; /* contiguous Elan buffer */
155 int krx_npages; /* # pages in receive buffer */
156 int krx_nob; /* Number Of Bytes received into buffer */
157 int krx_rpc_reply_needed; /* peer waiting for EKC RPC reply */
158 int krx_rpc_reply_sent; /* rpc reply sent */
159 atomic_t krx_refcount; /* how to tell when rpc is done */
160 kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */
161 struct page *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */
162 struct iovec krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */
167 struct list_head ktx_list; /* enqueue idle/active */
168 struct list_head ktx_delayed_list; /* enqueue delayedtxds */
169 unsigned int ktx_isnblk:1; /* reserved descriptor? */
170 unsigned int ktx_state:7; /* What I'm doing */
171 unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */
172 uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */
173 int ktx_npages; /* pages reserved for mapping messages */
174 int ktx_nmappedpages; /* # pages mapped for current message */
175 int ktx_port; /* destination ep port */
176 ptl_nid_t ktx_nid; /* destination node */
177 void *ktx_args[2]; /* completion passthru */
178 char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */
179 unsigned long ktx_launchtime; /* when (in jiffies) the transmit was launched */
181 /* debug/info fields */
182 pid_t ktx_launcher; /* pid of launching process */
183 ptl_hdr_t *ktx_wire_hdr; /* portals header (wire endian) */
185 int ktx_nfrag; /* # message frags */
187 EP_NMD ktx_ebuffer; /* elan mapping of ktx_buffer */
188 EP_NMD ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */
190 E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */
191 EP_IOVEC ktx_frags[EP_MAXFRAG];/* msg frags (elan vaddrs) */
195 #define KTX_IDLE 0 /* on kqn_(nblk_)idletxds */
196 #define KTX_SENDING 1 /* local send */
197 #define KTX_FORWARDING 2 /* routing a packet */
198 #define KTX_GETTING 3 /* local optimised get */
202 char kqn_init; /* what's been initialised */
203 char kqn_shuttingdown; /* I'm trying to shut down */
204 atomic_t kqn_nthreads; /* # threads not terminated */
205 atomic_t kqn_nthreads_running;/* # threads still running */
207 int kqn_optimized_gets; /* optimized GETs? */
208 int kqn_copy_small_fwd; /* fwd small msgs from pre-allocated buffer? */
211 struct ctl_table_header *kqn_sysctl; /* sysctl interface */
213 kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */
214 kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */
216 struct list_head kqn_idletxds; /* transmit descriptors free to use */
217 struct list_head kqn_nblk_idletxds; /* reserved free transmit descriptors */
218 struct list_head kqn_activetxds; /* transmit descriptors being used */
219 spinlock_t kqn_idletxd_lock; /* serialise idle txd access */
220 wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */
221 struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */
223 spinlock_t kqn_sched_lock; /* serialise packet schedulers */
224 wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */
226 struct list_head kqn_readyrxds; /* rxds full of data */
227 struct list_head kqn_delayedfwds; /* delayed forwards */
228 struct list_head kqn_delayedtxds; /* delayed transmits */
230 spinlock_t kqn_statelock; /* cb_cli/cb_sti */
231 nal_cb_t *kqn_cb; /* -> kqswnal_lib */
233 EP_SYS *kqn_ep; /* elan system */
234 EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */
235 EP_NMH *kqn_ep_rx_nmh; /* elan reserved rx vaddrs */
237 EP_DEV *kqn_ep; /* elan device */
238 ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */
239 ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */
241 EP_XMTR *kqn_eptx; /* elan transmitter */
242 EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */
243 EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */
244 kpr_router_t kqn_router; /* connection to Kernel Portals Router module */
246 ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */
247 int kqn_nnodes; /* this cluster's size */
248 int kqn_elanid; /* this nodes's elan ID */
252 #define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */
253 #define KQN_INIT_DATA 1
254 #define KQN_INIT_PTL 2
255 #define KQN_INIT_ALL 3
257 extern nal_cb_t kqswnal_lib;
258 extern nal_t kqswnal_api;
259 extern kqswnal_data_t kqswnal_data;
261 /* global pre-prepared replies to keep off the stack */
262 extern EP_STATUSBLK kqswnal_rpc_success;
263 extern EP_STATUSBLK kqswnal_rpc_failed;
265 extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
266 extern void kqswnal_rxhandler(EP_RXD *rxd);
267 extern int kqswnal_scheduler (void *);
268 extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
269 extern void kqswnal_dma_reply_complete (EP_RXD *rxd);
270 extern void kqswnal_requeue_rx (kqswnal_rx_t *krx);
272 static inline ptl_nid_t
273 kqswnal_elanid2nid (int elanid)
275 return (kqswnal_data.kqn_nid_offset + elanid);
279 kqswnal_nid2elanid (ptl_nid_t nid)
281 /* not in this cluster? */
282 if (nid < kqswnal_data.kqn_nid_offset ||
283 nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes)
286 return (nid - kqswnal_data.kqn_nid_offset);
290 kqswnal_pages_spanned (void *base, int nob)
292 unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT;
293 unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT;
295 LASSERT (last_page >= first_page); /* can't wrap address space */
296 return (last_page - first_page + 1);
300 static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob)
302 unsigned char *ptr = (unsigned char *)base;
311 static inline void kqswnal_rx_done (kqswnal_rx_t *krx)
313 LASSERT (atomic_read (&krx->krx_refcount) > 0);
314 if (atomic_dec_and_test (&krx->krx_refcount))
315 kqswnal_requeue_rx(krx);
320 #ifndef EP_RAILMASK_ALL
321 # error "old (unsupported) version of EKC headers"
325 ep_nmd_merge (EP_NMD *merged, EP_NMD *a, EP_NMD *b)
327 if (EP_NMD_NODEID(a) != EP_NMD_NODEID(b)) /* not generated on the same node */
330 if ((EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b)) == 0) /* no common rails */
333 if (b->nmd_addr == (a->nmd_addr + a->nmd_len)) {
334 if (merged != NULL) {
335 merged->nmd_addr = a->nmd_addr;
336 merged->nmd_len = a->nmd_len + b->nmd_len;
337 merged->nmd_attr = EP_NMD_ATTR(EP_NMD_NODEID(a), EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b));
342 if (a->nmd_addr == (b->nmd_addr + b->nmd_len)) {
343 if (merged != NULL) {
344 merged->nmd_addr = b->nmd_addr;
345 merged->nmd_len = b->nmd_len + a->nmd_len;
346 merged->nmd_attr = EP_NMD_ATTR(EP_NMD_NODEID(b), EP_NMD_RAILMASK(a) & EP_NMD_RAILMASK(b));
354 /* multirail defines these in <elan/epcomms.h> */
355 #define EP_MSG_SVC_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */
356 #define EP_MSG_SVC_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */
357 /* NB small/large message sizes are GLOBAL constants */
359 /* A minimal attempt to minimise inline #ifdeffing */
361 #define EP_SUCCESS ESUCCESS
362 #define EP_ENOMEM ENOMEM
364 static inline EP_XMTR *
365 ep_alloc_xmtr(EP_DEV *e)
367 return (ep_alloc_large_xmtr(e));
370 static inline EP_RCVR *
371 ep_alloc_rcvr(EP_DEV *e, int svc, int nenv)
373 return (ep_install_large_rcvr(e, svc, nenv));
377 ep_free_xmtr(EP_XMTR *x)
379 ep_free_large_xmtr(x);
383 ep_free_rcvr(EP_RCVR *r)
385 ep_remove_large_rcvr(r);
389 #endif /* _QSWNAL_H */