2 * Copyright (C) 2002 Cluster File Systems, Inc.
3 * Author: Eric Barton <eric@bartonsoftware.com>
5 * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
6 * W. Marcus Miller - Based on ksocknal
8 * This file is part of Portals, http://www.sf.net/projects/lustre/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Portals; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 ptl_handle_ni_t kqswnal_ni;
29 kqswnal_data_t kqswnal_data;
31 kpr_nal_interface_t kqswnal_router_interface = {
34 kprni_fwd: kqswnal_fwd_packet,
35 kprni_notify: NULL, /* we're connectionless */
39 #define QSWNAL_SYSCTL 201
41 #define QSWNAL_SYSCTL_OPTIMIZED_GETS 1
42 #define QSWNAL_SYSCTL_COPY_SMALL_FWD 2
44 static ctl_table kqswnal_ctl_table[] = {
45 {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
46 &kqswnal_data.kqn_optimized_gets, sizeof (int),
47 0644, NULL, &proc_dointvec},
48 {QSWNAL_SYSCTL_COPY_SMALL_FWD, "copy_small_fwd",
49 &kqswnal_data.kqn_copy_small_fwd, sizeof (int),
50 0644, NULL, &proc_dointvec},
54 static ctl_table kqswnal_top_ctl_table[] = {
55 {QSWNAL_SYSCTL, "qswnal", NULL, 0, 0555, kqswnal_ctl_table},
61 kqswnal_forward(nal_t *nal,
63 void *args, size_t args_len,
64 void *ret, size_t ret_len)
66 kqswnal_data_t *k = nal->nal_data;
67 nal_cb_t *nal_cb = k->kqn_cb;
69 LASSERT (nal == &kqswnal_api);
70 LASSERT (k == &kqswnal_data);
71 LASSERT (nal_cb == &kqswnal_lib);
73 lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
78 kqswnal_lock (nal_t *nal, unsigned long *flags)
80 kqswnal_data_t *k = nal->nal_data;
81 nal_cb_t *nal_cb = k->kqn_cb;
83 LASSERT (nal == &kqswnal_api);
84 LASSERT (k == &kqswnal_data);
85 LASSERT (nal_cb == &kqswnal_lib);
87 nal_cb->cb_cli(nal_cb,flags);
91 kqswnal_unlock(nal_t *nal, unsigned long *flags)
93 kqswnal_data_t *k = nal->nal_data;
94 nal_cb_t *nal_cb = k->kqn_cb;
96 LASSERT (nal == &kqswnal_api);
97 LASSERT (k == &kqswnal_data);
98 LASSERT (nal_cb == &kqswnal_lib);
100 nal_cb->cb_sti(nal_cb,flags);
104 kqswnal_shutdown(nal_t *nal, int ni)
106 CDEBUG (D_NET, "shutdown\n");
108 LASSERT (nal == &kqswnal_api);
113 kqswnal_yield(nal_t *nal, unsigned long *flags, int milliseconds)
115 /* NB called holding statelock */
117 unsigned long now = jiffies;
119 CDEBUG (D_NET, "yield\n");
121 if (milliseconds == 0) {
122 if (current->need_resched)
127 init_waitqueue_entry(&wait, current);
128 set_current_state(TASK_INTERRUPTIBLE);
129 add_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
131 kqswnal_unlock(nal, flags);
133 if (milliseconds < 0)
136 schedule_timeout((milliseconds * HZ) / 1000);
138 kqswnal_lock(nal, flags);
140 remove_wait_queue(&kqswnal_data.kqn_yield_waitq, &wait);
142 if (milliseconds > 0) {
143 milliseconds -= ((jiffies - now) * 1000) / HZ;
144 if (milliseconds < 0)
148 return (milliseconds);
152 kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
153 ptl_pid_t requested_pid)
155 ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid);
156 int nnids = kqswnal_data.kqn_nnodes;
158 CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids);
160 lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size);
162 return (&kqswnal_api);
166 kqswnal_get_tx_desc (struct portals_cfg *pcfg)
169 struct list_head *tmp;
171 int index = pcfg->pcfg_count;
174 spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
176 list_for_each (tmp, &kqswnal_data.kqn_activetxds) {
180 ktx = list_entry (tmp, kqswnal_tx_t, ktx_list);
182 pcfg->pcfg_pbuf1 = (char *)ktx;
183 pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type);
184 pcfg->pcfg_size = NTOH__u32(ktx->ktx_wire_hdr->payload_length);
185 pcfg->pcfg_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid);
186 pcfg->pcfg_nid2 = ktx->ktx_nid;
187 pcfg->pcfg_misc = ktx->ktx_launcher;
188 pcfg->pcfg_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) |
189 (!ktx->ktx_isnblk ? 0 : 2) |
190 (ktx->ktx_state << 2);
195 spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
200 kqswnal_cmd (struct portals_cfg *pcfg, void *private)
202 LASSERT (pcfg != NULL);
204 switch (pcfg->pcfg_command) {
205 case NAL_CMD_GET_TXDESC:
206 return (kqswnal_get_tx_desc (pcfg));
208 case NAL_CMD_REGISTER_MYNID:
209 CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n",
210 pcfg->pcfg_nid - kqswnal_data.kqn_elanid,
211 kqswnal_data.kqn_nid_offset);
212 kqswnal_data.kqn_nid_offset =
213 pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
214 kqswnal_lib.ni.nid = pcfg->pcfg_nid;
223 kqswnal_finalise (void)
228 switch (kqswnal_data.kqn_init)
235 if (kqswnal_data.kqn_sysctl != NULL)
236 unregister_sysctl_table (kqswnal_data.kqn_sysctl);
238 PORTAL_SYMBOL_UNREGISTER (kqswnal_ni);
239 kportal_nal_unregister(QSWNAL);
249 case KQN_INIT_NOTHING:
253 /**********************************************************************/
254 /* Tell router we're shutting down. Any router calls my threads
255 * make will now fail immediately and the router will stop calling
257 kpr_shutdown (&kqswnal_data.kqn_router);
259 /**********************************************************************/
260 /* Signal the start of shutdown... */
261 spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
262 kqswnal_data.kqn_shuttingdown = 1;
263 spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
265 wake_up_all(&kqswnal_data.kqn_idletxd_waitq);
267 /**********************************************************************/
268 /* wait for sends that have allocated a tx desc to launch or give up */
269 while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) {
270 CDEBUG(D_NET, "waiting for %d pending sends\n",
271 atomic_read (&kqswnal_data.kqn_pending_txs));
272 set_current_state (TASK_UNINTERRUPTIBLE);
273 schedule_timeout (HZ);
276 /**********************************************************************/
277 /* close elan comms */
279 /* Shut down receivers first; rx callbacks might try sending... */
280 if (kqswnal_data.kqn_eprx_small != NULL)
281 ep_free_rcvr (kqswnal_data.kqn_eprx_small);
283 if (kqswnal_data.kqn_eprx_large != NULL)
284 ep_free_rcvr (kqswnal_data.kqn_eprx_large);
286 /* NB ep_free_rcvr() returns only after we've freed off all receive
287 * buffers (see shutdown handling in kqswnal_requeue_rx()). This
288 * means we must have completed any messages we passed to
289 * lib_parse() or kpr_fwd_start(). */
291 if (kqswnal_data.kqn_eptx != NULL)
292 ep_free_xmtr (kqswnal_data.kqn_eptx);
294 /* NB ep_free_xmtr() returns only after all outstanding transmits
295 * have called their callback... */
296 LASSERT(list_empty(&kqswnal_data.kqn_activetxds));
298 /* "Old" EKC just pretends to shutdown cleanly but actually
299 * provides no guarantees */
300 if (kqswnal_data.kqn_eprx_small != NULL)
301 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
303 if (kqswnal_data.kqn_eprx_large != NULL)
304 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
306 /* wait for transmits to complete */
307 while (!list_empty(&kqswnal_data.kqn_activetxds)) {
308 CWARN("waiting for active transmits to complete\n");
309 set_current_state(TASK_UNINTERRUPTIBLE);
310 schedule_timeout(HZ);
313 if (kqswnal_data.kqn_eptx != NULL)
314 ep_free_large_xmtr (kqswnal_data.kqn_eptx);
316 /**********************************************************************/
317 /* flag threads to terminate, wake them and wait for them to die */
318 kqswnal_data.kqn_shuttingdown = 2;
319 wake_up_all (&kqswnal_data.kqn_sched_waitq);
321 while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
322 CDEBUG(D_NET, "waiting for %d threads to terminate\n",
323 atomic_read (&kqswnal_data.kqn_nthreads));
324 set_current_state (TASK_UNINTERRUPTIBLE);
325 schedule_timeout (HZ);
328 /**********************************************************************/
329 /* No more threads. No more portals, router or comms callbacks!
330 * I control the horizontals and the verticals...
334 LASSERT (list_empty (&kqswnal_data.kqn_readyrxds));
335 LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds));
336 LASSERT (list_empty (&kqswnal_data.kqn_delayedfwds));
339 /**********************************************************************/
340 /* Complete any blocked forwarding packets, with error
343 while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
345 kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
346 kpr_fwd_desc_t, kprfd_list);
347 list_del (&fwd->kprfd_list);
348 kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -ESHUTDOWN);
351 /**********************************************************************/
352 /* finalise router and portals lib */
354 kpr_deregister (&kqswnal_data.kqn_router);
357 PtlNIFini (kqswnal_ni);
358 lib_fini (&kqswnal_lib);
361 /**********************************************************************/
362 /* Unmap message buffers and free all descriptors and buffers
366 /* FTTB, we need to unmap any remaining mapped memory. When
367 * ep_dvma_release() get fixed (and releases any mappings in the
368 * region), we can delete all the code from here --------> */
370 if (kqswnal_data.kqn_txds != NULL) {
373 for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) {
374 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
376 /* If ktx has a buffer, it got mapped; unmap now.
377 * NB only the pre-mapped stuff is still mapped
378 * since all tx descs must be idle */
380 if (ktx->ktx_buffer != NULL)
381 ep_dvma_unload(kqswnal_data.kqn_ep,
382 kqswnal_data.kqn_ep_tx_nmh,
387 if (kqswnal_data.kqn_rxds != NULL) {
390 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
391 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
393 /* If krx_kiov[0].kiov_page got allocated, it got mapped.
394 * NB subsequent pages get merged */
396 if (krx->krx_kiov[0].kiov_page != NULL)
397 ep_dvma_unload(kqswnal_data.kqn_ep,
398 kqswnal_data.kqn_ep_rx_nmh,
399 &krx->krx_elanbuffer);
402 /* <----------- to here */
404 if (kqswnal_data.kqn_ep_rx_nmh != NULL)
405 ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
407 if (kqswnal_data.kqn_ep_tx_nmh != NULL)
408 ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
410 if (kqswnal_data.kqn_eprxdmahandle != NULL)
412 elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
413 kqswnal_data.kqn_eprxdmahandle, 0,
414 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
415 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
417 elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
418 kqswnal_data.kqn_eprxdmahandle);
421 if (kqswnal_data.kqn_eptxdmahandle != NULL)
423 elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
424 kqswnal_data.kqn_eptxdmahandle, 0,
425 KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
428 elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
429 kqswnal_data.kqn_eptxdmahandle);
433 if (kqswnal_data.kqn_txds != NULL)
437 for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
439 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
441 if (ktx->ktx_buffer != NULL)
442 PORTAL_FREE(ktx->ktx_buffer,
443 KQSW_TX_BUFFER_SIZE);
446 PORTAL_FREE(kqswnal_data.kqn_txds,
447 sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
451 if (kqswnal_data.kqn_rxds != NULL)
456 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
458 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
460 for (j = 0; j < krx->krx_npages; j++)
461 if (krx->krx_kiov[j].kiov_page != NULL)
462 __free_page (krx->krx_kiov[j].kiov_page);
465 PORTAL_FREE(kqswnal_data.kqn_rxds,
466 sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
467 KQSW_NRXMSGS_LARGE));
470 /* resets flags, pointers to NULL etc */
471 memset(&kqswnal_data, 0, sizeof (kqswnal_data));
473 CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
475 printk (KERN_INFO "Lustre: Routing QSW NAL unloaded (final mem %d)\n",
476 atomic_read(&portal_kmemory));
480 kqswnal_initialise (void)
483 EP_RAILMASK all_rails = EP_RAILMASK_ALL;
485 ELAN3_DMA_REQUEST dmareq;
490 int pkmem = atomic_read(&portal_kmemory);
492 LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
494 CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
496 kqswnal_api.forward = kqswnal_forward;
497 kqswnal_api.shutdown = kqswnal_shutdown;
498 kqswnal_api.yield = kqswnal_yield;
499 kqswnal_api.validate = NULL; /* our api validate is a NOOP */
500 kqswnal_api.lock = kqswnal_lock;
501 kqswnal_api.unlock = kqswnal_unlock;
502 kqswnal_api.nal_data = &kqswnal_data;
504 kqswnal_lib.nal_data = &kqswnal_data;
506 memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success));
507 memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed));
509 kqswnal_rpc_failed.Data[0] = -ECONNREFUSED;
511 kqswnal_rpc_failed.Status = -ECONNREFUSED;
513 /* ensure all pointers NULL etc */
514 memset (&kqswnal_data, 0, sizeof (kqswnal_data));
516 kqswnal_data.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
517 kqswnal_data.kqn_copy_small_fwd = KQSW_COPY_SMALL_FWD;
519 kqswnal_data.kqn_cb = &kqswnal_lib;
521 INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
522 INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
523 INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
524 spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
525 init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
526 INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
528 INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
529 INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
530 INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
532 spin_lock_init (&kqswnal_data.kqn_sched_lock);
533 init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
535 spin_lock_init (&kqswnal_data.kqn_statelock);
536 init_waitqueue_head (&kqswnal_data.kqn_yield_waitq);
538 /* pointers/lists/locks initialised */
539 kqswnal_data.kqn_init = KQN_INIT_DATA;
542 kqswnal_data.kqn_ep = ep_system();
543 if (kqswnal_data.kqn_ep == NULL) {
544 CERROR("Can't initialise EKC\n");
548 if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
549 CERROR("Can't get elan ID\n");
554 /**********************************************************************/
555 /* Find the first Elan device */
557 kqswnal_data.kqn_ep = ep_device (0);
558 if (kqswnal_data.kqn_ep == NULL)
560 CERROR ("Can't get elan device 0\n");
565 kqswnal_data.kqn_nid_offset = 0;
566 kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_ep);
567 kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_ep);
569 /**********************************************************************/
570 /* Get the transmitter */
572 kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep);
573 if (kqswnal_data.kqn_eptx == NULL)
575 CERROR ("Can't allocate transmitter\n");
580 /**********************************************************************/
581 /* Get the receivers */
583 kqswnal_data.kqn_eprx_small = ep_alloc_rcvr (kqswnal_data.kqn_ep,
584 EP_MSG_SVC_PORTALS_SMALL,
585 KQSW_EP_ENVELOPES_SMALL);
586 if (kqswnal_data.kqn_eprx_small == NULL)
588 CERROR ("Can't install small msg receiver\n");
593 kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep,
594 EP_MSG_SVC_PORTALS_LARGE,
595 KQSW_EP_ENVELOPES_LARGE);
596 if (kqswnal_data.kqn_eprx_large == NULL)
598 CERROR ("Can't install large msg receiver\n");
603 /**********************************************************************/
604 /* Reserve Elan address space for transmit descriptors NB we may
605 * either send the contents of associated buffers immediately, or
606 * map them for the peer to suck/blow... */
608 kqswnal_data.kqn_ep_tx_nmh =
609 ep_dvma_reserve(kqswnal_data.kqn_ep,
610 KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
612 if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
613 CERROR("Can't reserve tx dma space\n");
618 dmareq.Waitfn = DDI_DMA_SLEEP;
619 dmareq.ElanAddr = (E3_Addr) 0;
620 dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
621 dmareq.Perm = ELAN_PERM_REMOTEWRITE;
623 rc = elan3_dma_reserve(kqswnal_data.kqn_ep->DmaState,
624 KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
625 &dmareq, &kqswnal_data.kqn_eptxdmahandle);
626 if (rc != DDI_SUCCESS)
628 CERROR ("Can't reserve rx dma space\n");
633 /**********************************************************************/
634 /* Reserve Elan address space for receive buffers */
636 kqswnal_data.kqn_ep_rx_nmh =
637 ep_dvma_reserve(kqswnal_data.kqn_ep,
638 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
639 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
641 if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
642 CERROR("Can't reserve rx dma space\n");
647 dmareq.Waitfn = DDI_DMA_SLEEP;
648 dmareq.ElanAddr = (E3_Addr) 0;
649 dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
650 dmareq.Perm = ELAN_PERM_REMOTEWRITE;
652 rc = elan3_dma_reserve (kqswnal_data.kqn_ep->DmaState,
653 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
654 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
655 &dmareq, &kqswnal_data.kqn_eprxdmahandle);
656 if (rc != DDI_SUCCESS)
658 CERROR ("Can't reserve rx dma space\n");
663 /**********************************************************************/
664 /* Allocate/Initialise transmit descriptors */
666 PORTAL_ALLOC(kqswnal_data.kqn_txds,
667 sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
668 if (kqswnal_data.kqn_txds == NULL)
674 /* clear flags, null pointers etc */
675 memset(kqswnal_data.kqn_txds, 0,
676 sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
677 for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
680 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
681 int basepage = i * KQSW_NTXMSGPAGES;
683 PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
684 if (ktx->ktx_buffer == NULL)
690 /* Map pre-allocated buffer NOW, to save latency on transmit */
691 premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
692 KQSW_TX_BUFFER_SIZE);
694 ep_dvma_load(kqswnal_data.kqn_ep, NULL,
695 ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
696 kqswnal_data.kqn_ep_tx_nmh, basepage,
697 &all_rails, &ktx->ktx_ebuffer);
699 elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
700 kqswnal_data.kqn_eptxdmahandle,
701 ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
702 basepage, &ktx->ktx_ebuffer);
704 ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
705 ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
707 INIT_LIST_HEAD (&ktx->ktx_delayed_list);
709 ktx->ktx_state = KTX_IDLE;
710 ktx->ktx_isnblk = (i >= KQSW_NTXMSGS);
711 list_add_tail (&ktx->ktx_list,
712 ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds :
713 &kqswnal_data.kqn_idletxds);
716 /**********************************************************************/
717 /* Allocate/Initialise receive descriptors */
719 PORTAL_ALLOC (kqswnal_data.kqn_rxds,
720 sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
721 if (kqswnal_data.kqn_rxds == NULL)
727 memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
728 sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
731 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
739 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
741 if (i < KQSW_NRXMSGS_SMALL)
743 krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
744 krx->krx_eprx = kqswnal_data.kqn_eprx_small;
748 krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
749 krx->krx_eprx = kqswnal_data.kqn_eprx_large;
752 LASSERT (krx->krx_npages > 0);
753 for (j = 0; j < krx->krx_npages; j++)
755 struct page *page = alloc_page(GFP_KERNEL);
762 krx->krx_kiov[j].kiov_page = page;
763 LASSERT(page_address(page) != NULL);
766 ep_dvma_load(kqswnal_data.kqn_ep, NULL,
768 PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
769 elan_page_idx, &all_rails, &elanbuffer);
772 krx->krx_elanbuffer = elanbuffer;
774 rc = ep_nmd_merge(&krx->krx_elanbuffer,
775 &krx->krx_elanbuffer,
777 /* NB contiguous mapping */
781 elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
782 kqswnal_data.kqn_eprxdmahandle,
784 PAGE_SIZE, elan_page_idx,
787 krx->krx_elanbuffer = elanbuffer;
789 /* NB contiguous mapping */
790 LASSERT (elanbuffer == krx->krx_elanbuffer + j * PAGE_SIZE);
796 LASSERT (elan_page_idx ==
797 (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
798 (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
800 /**********************************************************************/
801 /* Network interface ready to initialise */
803 rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni);
806 CERROR ("PtlNIInit failed %d\n", rc);
811 kqswnal_data.kqn_init = KQN_INIT_PTL;
813 /**********************************************************************/
814 /* Queue receives, now that it's OK to run their completion callbacks */
816 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
818 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
820 /* NB this enqueue can allocate/sleep (attr == 0) */
822 rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
823 &krx->krx_elanbuffer, 0);
825 rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
827 krx->krx_npages * PAGE_SIZE, 0);
829 if (rc != EP_SUCCESS)
831 CERROR ("failed ep_queue_receive %d\n", rc);
837 /**********************************************************************/
838 /* Spawn scheduling threads */
839 for (i = 0; i < smp_num_cpus; i++)
841 rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
844 CERROR ("failed to spawn scheduling thread: %d\n", rc);
850 /**********************************************************************/
851 /* Connect to the router */
852 rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
853 CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
855 rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL);
857 CERROR ("Can't initialise command interface (rc = %d)\n", rc);
863 /* Press on regardless even if registering sysctl doesn't work */
864 kqswnal_data.kqn_sysctl = register_sysctl_table (kqswnal_top_ctl_table, 0);
867 PORTAL_SYMBOL_REGISTER(kqswnal_ni);
868 kqswnal_data.kqn_init = KQN_INIT_ALL;
870 printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d "
871 "(Routing %s, initial mem %d)\n",
872 kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes,
873 kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
880 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
881 MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01");
882 MODULE_LICENSE("GPL");
884 module_init (kqswnal_initialise);
885 module_exit (kqswnal_finalise);
887 EXPORT_SYMBOL (kqswnal_ni);