2 * Copyright (C) 2002 Cluster File Systems, Inc.
3 * Author: Eric Barton <eric@bartonsoftware.com>
5 * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
6 * W. Marcus Miller - Based on ksocknal
8 * This file is part of Portals, http://www.sf.net/projects/lustre/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Portals; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 kqswnal_data_t kqswnal_data;
29 ptl_handle_ni_t kqswnal_ni;
30 kqswnal_tunables_t kqswnal_tunables;
32 kpr_nal_interface_t kqswnal_router_interface = {
35 kprni_fwd: kqswnal_fwd_packet,
36 kprni_notify: NULL, /* we're connectionless */
40 #define QSWNAL_SYSCTL 201
42 #define QSWNAL_SYSCTL_OPTIMIZED_GETS 1
43 #define QSWNAL_SYSCTL_COPY_SMALL_FWD 2
45 static ctl_table kqswnal_ctl_table[] = {
46 {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_puts",
47 &kqswnal_tunables.kqn_optimized_puts, sizeof (int),
48 0644, NULL, &proc_dointvec},
49 {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
50 &kqswnal_tunables.kqn_optimized_gets, sizeof (int),
51 0644, NULL, &proc_dointvec},
55 static ctl_table kqswnal_top_ctl_table[] = {
56 {QSWNAL_SYSCTL, "qswnal", NULL, 0, 0555, kqswnal_ctl_table},
62 kqswnal_get_tx_desc (struct portals_cfg *pcfg)
65 struct list_head *tmp;
67 int index = pcfg->pcfg_count;
70 spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
72 list_for_each (tmp, &kqswnal_data.kqn_activetxds) {
76 ktx = list_entry (tmp, kqswnal_tx_t, ktx_list);
78 pcfg->pcfg_pbuf1 = (char *)ktx;
79 pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type);
80 pcfg->pcfg_size = NTOH__u32(ktx->ktx_wire_hdr->payload_length);
81 pcfg->pcfg_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid);
82 pcfg->pcfg_nid2 = ktx->ktx_nid;
83 pcfg->pcfg_misc = ktx->ktx_launcher;
84 pcfg->pcfg_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) |
85 (!ktx->ktx_isnblk ? 0 : 2) |
86 (ktx->ktx_state << 2);
91 spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
96 kqswnal_cmd (struct portals_cfg *pcfg, void *private)
98 LASSERT (pcfg != NULL);
100 switch (pcfg->pcfg_command) {
101 case NAL_CMD_GET_TXDESC:
102 return (kqswnal_get_tx_desc (pcfg));
104 case NAL_CMD_REGISTER_MYNID:
105 CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n",
106 pcfg->pcfg_nid - kqswnal_data.kqn_elanid,
107 kqswnal_data.kqn_nid_offset);
108 kqswnal_data.kqn_nid_offset =
109 pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
110 kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
119 kqswnal_shutdown(nal_t *nal)
124 /* NB The first ref was this module! */
125 if (nal->nal_refct != 0) {
130 CDEBUG (D_NET, "shutdown\n");
131 LASSERT (nal == &kqswnal_api);
133 switch (kqswnal_data.kqn_init)
139 libcfs_nal_cmd_unregister(QSWNAL);
149 case KQN_INIT_NOTHING:
153 /**********************************************************************/
154 /* Tell router we're shutting down. Any router calls my threads
155 * make will now fail immediately and the router will stop calling
157 kpr_shutdown (&kqswnal_data.kqn_router);
159 /**********************************************************************/
160 /* Signal the start of shutdown... */
161 spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
162 kqswnal_data.kqn_shuttingdown = 1;
163 spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
165 wake_up_all(&kqswnal_data.kqn_idletxd_waitq);
167 /**********************************************************************/
168 /* wait for sends that have allocated a tx desc to launch or give up */
169 while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) {
170 CDEBUG(D_NET, "waiting for %d pending sends\n",
171 atomic_read (&kqswnal_data.kqn_pending_txs));
172 set_current_state (TASK_UNINTERRUPTIBLE);
173 schedule_timeout (HZ);
176 /**********************************************************************/
177 /* close elan comms */
179 /* Shut down receivers first; rx callbacks might try sending... */
180 if (kqswnal_data.kqn_eprx_small != NULL)
181 ep_free_rcvr (kqswnal_data.kqn_eprx_small);
183 if (kqswnal_data.kqn_eprx_large != NULL)
184 ep_free_rcvr (kqswnal_data.kqn_eprx_large);
186 /* NB ep_free_rcvr() returns only after we've freed off all receive
187 * buffers (see shutdown handling in kqswnal_requeue_rx()). This
188 * means we must have completed any messages we passed to
189 * lib_parse() or kpr_fwd_start(). */
191 if (kqswnal_data.kqn_eptx != NULL)
192 ep_free_xmtr (kqswnal_data.kqn_eptx);
194 /* NB ep_free_xmtr() returns only after all outstanding transmits
195 * have called their callback... */
196 LASSERT(list_empty(&kqswnal_data.kqn_activetxds));
198 /* "Old" EKC just pretends to shutdown cleanly but actually
199 * provides no guarantees */
200 if (kqswnal_data.kqn_eprx_small != NULL)
201 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
203 if (kqswnal_data.kqn_eprx_large != NULL)
204 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
206 /* wait for transmits to complete */
207 while (!list_empty(&kqswnal_data.kqn_activetxds)) {
208 CWARN("waiting for active transmits to complete\n");
209 set_current_state(TASK_UNINTERRUPTIBLE);
210 schedule_timeout(HZ);
213 if (kqswnal_data.kqn_eptx != NULL)
214 ep_free_large_xmtr (kqswnal_data.kqn_eptx);
216 /**********************************************************************/
217 /* flag threads to terminate, wake them and wait for them to die */
218 kqswnal_data.kqn_shuttingdown = 2;
219 wake_up_all (&kqswnal_data.kqn_sched_waitq);
221 while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
222 CDEBUG(D_NET, "waiting for %d threads to terminate\n",
223 atomic_read (&kqswnal_data.kqn_nthreads));
224 set_current_state (TASK_UNINTERRUPTIBLE);
225 schedule_timeout (HZ);
228 /**********************************************************************/
229 /* No more threads. No more portals, router or comms callbacks!
230 * I control the horizontals and the verticals...
234 LASSERT (list_empty (&kqswnal_data.kqn_readyrxds));
235 LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds));
236 LASSERT (list_empty (&kqswnal_data.kqn_delayedfwds));
239 /**********************************************************************/
240 /* Complete any blocked forwarding packets, with error
243 while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
245 kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
246 kpr_fwd_desc_t, kprfd_list);
247 list_del (&fwd->kprfd_list);
248 kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -ESHUTDOWN);
251 /**********************************************************************/
252 /* finalise router and portals lib */
254 kpr_deregister (&kqswnal_data.kqn_router);
257 lib_fini (&kqswnal_lib);
259 /**********************************************************************/
260 /* Unmap message buffers and free all descriptors and buffers
264 /* FTTB, we need to unmap any remaining mapped memory. When
265 * ep_dvma_release() get fixed (and releases any mappings in the
266 * region), we can delete all the code from here --------> */
268 if (kqswnal_data.kqn_txds != NULL) {
271 for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) {
272 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
274 /* If ktx has a buffer, it got mapped; unmap now.
275 * NB only the pre-mapped stuff is still mapped
276 * since all tx descs must be idle */
278 if (ktx->ktx_buffer != NULL)
279 ep_dvma_unload(kqswnal_data.kqn_ep,
280 kqswnal_data.kqn_ep_tx_nmh,
285 if (kqswnal_data.kqn_rxds != NULL) {
288 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
289 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
291 /* If krx_kiov[0].kiov_page got allocated, it got mapped.
292 * NB subsequent pages get merged */
294 if (krx->krx_kiov[0].kiov_page != NULL)
295 ep_dvma_unload(kqswnal_data.kqn_ep,
296 kqswnal_data.kqn_ep_rx_nmh,
297 &krx->krx_elanbuffer);
300 /* <----------- to here */
302 if (kqswnal_data.kqn_ep_rx_nmh != NULL)
303 ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
305 if (kqswnal_data.kqn_ep_tx_nmh != NULL)
306 ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
308 if (kqswnal_data.kqn_eprxdmahandle != NULL)
310 elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
311 kqswnal_data.kqn_eprxdmahandle, 0,
312 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
313 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
315 elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
316 kqswnal_data.kqn_eprxdmahandle);
319 if (kqswnal_data.kqn_eptxdmahandle != NULL)
321 elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
322 kqswnal_data.kqn_eptxdmahandle, 0,
323 KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
326 elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
327 kqswnal_data.kqn_eptxdmahandle);
331 if (kqswnal_data.kqn_txds != NULL)
335 for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
337 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
339 if (ktx->ktx_buffer != NULL)
340 PORTAL_FREE(ktx->ktx_buffer,
341 KQSW_TX_BUFFER_SIZE);
344 PORTAL_FREE(kqswnal_data.kqn_txds,
345 sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
349 if (kqswnal_data.kqn_rxds != NULL)
354 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
356 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
358 for (j = 0; j < krx->krx_npages; j++)
359 if (krx->krx_kiov[j].kiov_page != NULL)
360 __free_page (krx->krx_kiov[j].kiov_page);
363 PORTAL_FREE(kqswnal_data.kqn_rxds,
364 sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
365 KQSW_NRXMSGS_LARGE));
368 /* resets flags, pointers to NULL etc */
369 memset(&kqswnal_data, 0, sizeof (kqswnal_data));
371 CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
373 printk (KERN_INFO "Lustre: Routing QSW NAL unloaded (final mem %d)\n",
374 atomic_read(&portal_kmemory));
378 kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
379 ptl_ni_limits_t *requested_limits,
380 ptl_ni_limits_t *actual_limits)
383 EP_RAILMASK all_rails = EP_RAILMASK_ALL;
385 ELAN3_DMA_REQUEST dmareq;
390 ptl_process_id_t my_process_id;
391 int pkmem = atomic_read(&portal_kmemory);
393 LASSERT (nal == &kqswnal_api);
395 if (nal->nal_refct != 0) {
396 if (actual_limits != NULL)
397 *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits;
398 /* This module got the first ref */
403 LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
405 CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
407 /* ensure all pointers NULL etc */
408 memset (&kqswnal_data, 0, sizeof (kqswnal_data));
410 INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
411 INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
412 INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
413 spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
414 init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
415 INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
417 INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
418 INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
419 INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
421 spin_lock_init (&kqswnal_data.kqn_sched_lock);
422 init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
424 /* Leave kqn_rpc_success zeroed */
426 kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED;
428 kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED;
431 /* pointers/lists/locks initialised */
432 kqswnal_data.kqn_init = KQN_INIT_DATA;
435 kqswnal_data.kqn_ep = ep_system();
436 if (kqswnal_data.kqn_ep == NULL) {
437 CERROR("Can't initialise EKC\n");
438 kqswnal_shutdown(nal);
439 return (PTL_IFACE_INVALID);
442 if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
443 CERROR("Can't get elan ID\n");
444 kqswnal_shutdown(nal);
445 return (PTL_IFACE_INVALID);
448 /**********************************************************************/
449 /* Find the first Elan device */
451 kqswnal_data.kqn_ep = ep_device (0);
452 if (kqswnal_data.kqn_ep == NULL)
454 CERROR ("Can't get elan device 0\n");
455 kqswnal_shutdown(nal);
456 return (PTL_IFACE_INVALID);
460 kqswnal_data.kqn_nid_offset = 0;
461 kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_ep);
462 kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_ep);
464 /**********************************************************************/
465 /* Get the transmitter */
467 kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep);
468 if (kqswnal_data.kqn_eptx == NULL)
470 CERROR ("Can't allocate transmitter\n");
471 kqswnal_shutdown (nal);
472 return (PTL_NO_SPACE);
475 /**********************************************************************/
476 /* Get the receivers */
478 kqswnal_data.kqn_eprx_small = ep_alloc_rcvr (kqswnal_data.kqn_ep,
479 EP_MSG_SVC_PORTALS_SMALL,
480 KQSW_EP_ENVELOPES_SMALL);
481 if (kqswnal_data.kqn_eprx_small == NULL)
483 CERROR ("Can't install small msg receiver\n");
484 kqswnal_shutdown (nal);
485 return (PTL_NO_SPACE);
488 kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep,
489 EP_MSG_SVC_PORTALS_LARGE,
490 KQSW_EP_ENVELOPES_LARGE);
491 if (kqswnal_data.kqn_eprx_large == NULL)
493 CERROR ("Can't install large msg receiver\n");
494 kqswnal_shutdown (nal);
495 return (PTL_NO_SPACE);
498 /**********************************************************************/
499 /* Reserve Elan address space for transmit descriptors NB we may
500 * either send the contents of associated buffers immediately, or
501 * map them for the peer to suck/blow... */
503 kqswnal_data.kqn_ep_tx_nmh =
504 ep_dvma_reserve(kqswnal_data.kqn_ep,
505 KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
507 if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
508 CERROR("Can't reserve tx dma space\n");
509 kqswnal_shutdown(nal);
510 return (PTL_NO_SPACE);
513 dmareq.Waitfn = DDI_DMA_SLEEP;
514 dmareq.ElanAddr = (E3_Addr) 0;
515 dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
516 dmareq.Perm = ELAN_PERM_REMOTEWRITE;
518 rc = elan3_dma_reserve(kqswnal_data.kqn_ep->DmaState,
519 KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
520 &dmareq, &kqswnal_data.kqn_eptxdmahandle);
521 if (rc != DDI_SUCCESS)
523 CERROR ("Can't reserve rx dma space\n");
524 kqswnal_shutdown (nal);
525 return (PTL_NO_SPACE);
528 /**********************************************************************/
529 /* Reserve Elan address space for receive buffers */
531 kqswnal_data.kqn_ep_rx_nmh =
532 ep_dvma_reserve(kqswnal_data.kqn_ep,
533 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
534 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
536 if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
537 CERROR("Can't reserve rx dma space\n");
538 kqswnal_shutdown(nal);
539 return (PTL_NO_SPACE);
542 dmareq.Waitfn = DDI_DMA_SLEEP;
543 dmareq.ElanAddr = (E3_Addr) 0;
544 dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
545 dmareq.Perm = ELAN_PERM_REMOTEWRITE;
547 rc = elan3_dma_reserve (kqswnal_data.kqn_ep->DmaState,
548 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
549 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
550 &dmareq, &kqswnal_data.kqn_eprxdmahandle);
551 if (rc != DDI_SUCCESS)
553 CERROR ("Can't reserve rx dma space\n");
554 kqswnal_shutdown (nal);
555 return (PTL_NO_SPACE);
558 /**********************************************************************/
559 /* Allocate/Initialise transmit descriptors */
561 PORTAL_ALLOC(kqswnal_data.kqn_txds,
562 sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
563 if (kqswnal_data.kqn_txds == NULL)
565 kqswnal_shutdown (nal);
566 return (PTL_NO_SPACE);
569 /* clear flags, null pointers etc */
570 memset(kqswnal_data.kqn_txds, 0,
571 sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
572 for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
575 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
576 int basepage = i * KQSW_NTXMSGPAGES;
578 PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
579 if (ktx->ktx_buffer == NULL)
581 kqswnal_shutdown (nal);
582 return (PTL_NO_SPACE);
585 /* Map pre-allocated buffer NOW, to save latency on transmit */
586 premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
587 KQSW_TX_BUFFER_SIZE);
589 ep_dvma_load(kqswnal_data.kqn_ep, NULL,
590 ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
591 kqswnal_data.kqn_ep_tx_nmh, basepage,
592 &all_rails, &ktx->ktx_ebuffer);
594 elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
595 kqswnal_data.kqn_eptxdmahandle,
596 ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
597 basepage, &ktx->ktx_ebuffer);
599 ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
600 ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
602 INIT_LIST_HEAD (&ktx->ktx_delayed_list);
604 ktx->ktx_state = KTX_IDLE;
605 ktx->ktx_isnblk = (i >= KQSW_NTXMSGS);
606 list_add_tail (&ktx->ktx_list,
607 ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds :
608 &kqswnal_data.kqn_idletxds);
611 /**********************************************************************/
612 /* Allocate/Initialise receive descriptors */
614 PORTAL_ALLOC (kqswnal_data.kqn_rxds,
615 sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
616 if (kqswnal_data.kqn_rxds == NULL)
618 kqswnal_shutdown (nal);
619 return (PTL_NO_SPACE);
622 memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
623 sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
626 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
634 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
636 if (i < KQSW_NRXMSGS_SMALL)
638 krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
639 krx->krx_eprx = kqswnal_data.kqn_eprx_small;
643 krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
644 krx->krx_eprx = kqswnal_data.kqn_eprx_large;
647 LASSERT (krx->krx_npages > 0);
648 for (j = 0; j < krx->krx_npages; j++)
650 struct page *page = alloc_page(GFP_KERNEL);
653 kqswnal_shutdown (nal);
654 return (PTL_NO_SPACE);
657 krx->krx_kiov[j].kiov_page = page;
658 LASSERT(page_address(page) != NULL);
661 ep_dvma_load(kqswnal_data.kqn_ep, NULL,
663 PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
664 elan_page_idx, &all_rails, &elanbuffer);
667 krx->krx_elanbuffer = elanbuffer;
669 rc = ep_nmd_merge(&krx->krx_elanbuffer,
670 &krx->krx_elanbuffer,
672 /* NB contiguous mapping */
676 elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
677 kqswnal_data.kqn_eprxdmahandle,
679 PAGE_SIZE, elan_page_idx,
682 krx->krx_elanbuffer = elanbuffer;
684 /* NB contiguous mapping */
685 LASSERT (elanbuffer == krx->krx_elanbuffer + j * PAGE_SIZE);
691 LASSERT (elan_page_idx ==
692 (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
693 (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
695 /**********************************************************************/
696 /* Network interface ready to initialise */
698 my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
699 my_process_id.pid = 0;
701 rc = lib_init(&kqswnal_lib, nal, my_process_id,
702 requested_limits, actual_limits);
705 CERROR ("lib_init failed %d\n", rc);
706 kqswnal_shutdown (nal);
710 kqswnal_data.kqn_init = KQN_INIT_LIB;
712 /**********************************************************************/
713 /* Queue receives, now that it's OK to run their completion callbacks */
715 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
717 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
719 /* NB this enqueue can allocate/sleep (attr == 0) */
720 krx->krx_state = KRX_POSTED;
722 rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
723 &krx->krx_elanbuffer, 0);
725 rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
727 krx->krx_npages * PAGE_SIZE, 0);
729 if (rc != EP_SUCCESS)
731 CERROR ("failed ep_queue_receive %d\n", rc);
732 kqswnal_shutdown (nal);
737 /**********************************************************************/
738 /* Spawn scheduling threads */
739 for (i = 0; i < num_online_cpus(); i++) {
740 rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
743 CERROR ("failed to spawn scheduling thread: %d\n", rc);
744 kqswnal_shutdown (nal);
749 /**********************************************************************/
750 /* Connect to the router */
751 rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
752 CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
754 rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL);
756 CERROR ("Can't initialise command interface (rc = %d)\n", rc);
757 kqswnal_shutdown (nal);
761 kqswnal_data.kqn_init = KQN_INIT_ALL;
763 printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d "
764 "(Routing %s, initial mem %d)\n",
765 kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes,
766 kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
773 kqswnal_finalise (void)
776 if (kqswnal_tunables.kqn_sysctl != NULL)
777 unregister_sysctl_table (kqswnal_tunables.kqn_sysctl);
779 PtlNIFini(kqswnal_ni);
781 ptl_unregister_nal(QSWNAL);
785 kqswnal_initialise (void)
789 kqswnal_api.nal_ni_init = kqswnal_startup;
790 kqswnal_api.nal_ni_fini = kqswnal_shutdown;
792 /* Initialise dynamic tunables to defaults once only */
793 kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS;
794 kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
796 rc = ptl_register_nal(QSWNAL, &kqswnal_api);
798 CERROR("Can't register QSWNAL: %d\n", rc);
799 return (-ENOMEM); /* or something... */
802 /* Pure gateways, and the workaround for 'EKC blocks forever until
803 * the service is active' want the NAL started up at module load
805 rc = PtlNIInit(QSWNAL, 0, NULL, NULL, &kqswnal_ni);
806 if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
807 ptl_unregister_nal(QSWNAL);
812 /* Press on regardless even if registering sysctl doesn't work */
813 kqswnal_tunables.kqn_sysctl =
814 register_sysctl_table (kqswnal_top_ctl_table, 0);
819 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
820 MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01");
821 MODULE_LICENSE("GPL");
823 module_init (kqswnal_initialise);
824 module_exit (kqswnal_finalise);