2 * Copyright (C) 2002 Cluster File Systems, Inc.
3 * Author: Eric Barton <eric@bartonsoftware.com>
5 * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
6 * W. Marcus Miller - Based on ksocknal
8 * This file is part of Portals, http://www.sf.net/projects/lustre/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Portals; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 ptl_handle_ni_t kqswnal_ni;
29 kqswnal_data_t kqswnal_data;
31 kpr_nal_interface_t kqswnal_router_interface = {
34 kprni_fwd: kqswnal_fwd_packet,
35 kprni_notify: NULL, /* we're connectionless */
40 kqswnal_forward(nal_t *nal,
42 void *args, size_t args_len,
43 void *ret, size_t ret_len)
45 kqswnal_data_t *k = nal->nal_data;
46 nal_cb_t *nal_cb = k->kqn_cb;
48 LASSERT (nal == &kqswnal_api);
49 LASSERT (k == &kqswnal_data);
50 LASSERT (nal_cb == &kqswnal_lib);
52 lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
57 kqswnal_lock (nal_t *nal, unsigned long *flags)
59 kqswnal_data_t *k = nal->nal_data;
60 nal_cb_t *nal_cb = k->kqn_cb;
62 LASSERT (nal == &kqswnal_api);
63 LASSERT (k == &kqswnal_data);
64 LASSERT (nal_cb == &kqswnal_lib);
66 nal_cb->cb_cli(nal_cb,flags);
70 kqswnal_unlock(nal_t *nal, unsigned long *flags)
72 kqswnal_data_t *k = nal->nal_data;
73 nal_cb_t *nal_cb = k->kqn_cb;
75 LASSERT (nal == &kqswnal_api);
76 LASSERT (k == &kqswnal_data);
77 LASSERT (nal_cb == &kqswnal_lib);
79 nal_cb->cb_sti(nal_cb,flags);
83 kqswnal_shutdown(nal_t *nal, int ni)
85 CDEBUG (D_NET, "shutdown\n");
87 LASSERT (nal == &kqswnal_api);
92 kqswnal_yield( nal_t *nal )
94 CDEBUG (D_NET, "yield\n");
96 if (current->need_resched)
102 kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
103 ptl_pid_t requested_pid)
105 ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid);
106 int nnids = kqswnal_data.kqn_nnodes;
108 CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids);
110 lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size);
112 return (&kqswnal_api);
116 kqswnal_get_tx_desc (struct portals_cfg *pcfg)
119 struct list_head *tmp;
121 int index = pcfg->pcfg_count;
124 spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
126 list_for_each (tmp, &kqswnal_data.kqn_activetxds) {
130 ktx = list_entry (tmp, kqswnal_tx_t, ktx_list);
132 pcfg->pcfg_pbuf1 = (char *)ktx;
133 pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type);
134 pcfg->pcfg_size = NTOH__u32(ktx->ktx_wire_hdr->payload_length);
135 pcfg->pcfg_nid = NTOH__u64(ktx->ktx_wire_hdr->dest_nid);
136 pcfg->pcfg_nid2 = ktx->ktx_nid;
137 pcfg->pcfg_misc = ktx->ktx_launcher;
138 pcfg->pcfg_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) |
139 (!ktx->ktx_isnblk ? 0 : 2) |
140 (ktx->ktx_state << 2);
145 spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
150 kqswnal_cmd (struct portals_cfg *pcfg, void *private)
152 LASSERT (pcfg != NULL);
154 switch (pcfg->pcfg_command) {
155 case NAL_CMD_GET_TXDESC:
156 return (kqswnal_get_tx_desc (pcfg));
158 case NAL_CMD_REGISTER_MYNID:
159 CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n",
160 pcfg->pcfg_nid - kqswnal_data.kqn_elanid,
161 kqswnal_data.kqn_nid_offset);
162 kqswnal_data.kqn_nid_offset =
163 pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
164 kqswnal_lib.ni.nid = pcfg->pcfg_nid;
173 kqswnal_finalise (void)
175 switch (kqswnal_data.kqn_init)
181 PORTAL_SYMBOL_UNREGISTER (kqswnal_ni);
182 kportal_nal_unregister(QSWNAL);
186 PtlNIFini (kqswnal_ni);
187 lib_fini (&kqswnal_lib);
191 LASSERT(list_empty(&kqswnal_data.kqn_activetxds));
194 case KQN_INIT_NOTHING:
198 /**********************************************************************/
199 /* Make router stop her calling me and fail any more call-ins */
200 kpr_shutdown (&kqswnal_data.kqn_router);
202 /**********************************************************************/
203 /* flag threads to terminate, wake them and wait for them to die */
205 kqswnal_data.kqn_shuttingdown = 1;
206 wake_up_all (&kqswnal_data.kqn_sched_waitq);
208 while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
209 CDEBUG(D_NET, "waiting for %d threads to terminate\n",
210 atomic_read (&kqswnal_data.kqn_nthreads));
211 set_current_state (TASK_UNINTERRUPTIBLE);
212 schedule_timeout (HZ);
215 /**********************************************************************/
216 /* close elan comms */
218 if (kqswnal_data.kqn_eprx_small != NULL)
219 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
221 if (kqswnal_data.kqn_eprx_large != NULL)
222 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
224 if (kqswnal_data.kqn_eptx != NULL)
225 ep_free_large_xmtr (kqswnal_data.kqn_eptx);
227 /**********************************************************************/
228 /* No more threads. No more portals, router or comms callbacks!
229 * I control the horizontals and the verticals...
232 /**********************************************************************/
233 /* Complete any blocked forwarding packets with error
236 while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
238 kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
239 kpr_fwd_desc_t, kprfd_list);
240 list_del (&fwd->kprfd_list);
241 kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
244 while (!list_empty (&kqswnal_data.kqn_delayedfwds))
246 kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next,
247 kpr_fwd_desc_t, kprfd_list);
248 list_del (&fwd->kprfd_list);
249 kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
252 /**********************************************************************/
253 /* Wait for router to complete any packets I sent her
256 kpr_deregister (&kqswnal_data.kqn_router);
259 /**********************************************************************/
260 /* Unmap message buffers and free all descriptors and buffers
263 if (kqswnal_data.kqn_eprxdmahandle != NULL)
265 elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
266 kqswnal_data.kqn_eprxdmahandle, 0,
267 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
268 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
270 elan3_dma_release(kqswnal_data.kqn_epdev->DmaState,
271 kqswnal_data.kqn_eprxdmahandle);
274 if (kqswnal_data.kqn_eptxdmahandle != NULL)
276 elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
277 kqswnal_data.kqn_eptxdmahandle, 0,
278 KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
281 elan3_dma_release(kqswnal_data.kqn_epdev->DmaState,
282 kqswnal_data.kqn_eptxdmahandle);
285 if (kqswnal_data.kqn_txds != NULL)
289 for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
291 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
293 if (ktx->ktx_buffer != NULL)
294 PORTAL_FREE(ktx->ktx_buffer,
295 KQSW_TX_BUFFER_SIZE);
298 PORTAL_FREE(kqswnal_data.kqn_txds,
299 sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
303 if (kqswnal_data.kqn_rxds != NULL)
308 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
310 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
312 for (j = 0; j < krx->krx_npages; j++)
313 if (krx->krx_pages[j] != NULL)
314 __free_page (krx->krx_pages[j]);
317 PORTAL_FREE(kqswnal_data.kqn_rxds,
318 sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
319 KQSW_NRXMSGS_LARGE));
322 /* resets flags, pointers to NULL etc */
323 memset(&kqswnal_data, 0, sizeof (kqswnal_data));
325 CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
327 printk (KERN_INFO "Lustre: Routing QSW NAL unloaded (final mem %d)\n",
328 atomic_read(&portal_kmemory));
332 kqswnal_initialise (void)
334 ELAN3_DMA_REQUEST dmareq;
338 int pkmem = atomic_read(&portal_kmemory);
340 LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
342 CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
344 kqswnal_api.forward = kqswnal_forward;
345 kqswnal_api.shutdown = kqswnal_shutdown;
346 kqswnal_api.yield = kqswnal_yield;
347 kqswnal_api.validate = NULL; /* our api validate is a NOOP */
348 kqswnal_api.lock = kqswnal_lock;
349 kqswnal_api.unlock = kqswnal_unlock;
350 kqswnal_api.nal_data = &kqswnal_data;
352 kqswnal_lib.nal_data = &kqswnal_data;
354 /* ensure all pointers NULL etc */
355 memset (&kqswnal_data, 0, sizeof (kqswnal_data));
357 kqswnal_data.kqn_cb = &kqswnal_lib;
359 INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
360 INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
361 INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
362 spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
363 init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
364 INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
366 INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
367 INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
368 INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
370 spin_lock_init (&kqswnal_data.kqn_sched_lock);
371 init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
373 spin_lock_init (&kqswnal_data.kqn_statelock);
375 /* pointers/lists/locks initialised */
376 kqswnal_data.kqn_init = KQN_INIT_DATA;
378 /**********************************************************************/
379 /* Find the first Elan device */
381 kqswnal_data.kqn_epdev = ep_device (0);
382 if (kqswnal_data.kqn_epdev == NULL)
384 CERROR ("Can't get elan device 0\n");
388 kqswnal_data.kqn_nid_offset = 0;
389 kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_epdev);
390 kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_epdev);
392 /**********************************************************************/
393 /* Get the transmitter */
395 kqswnal_data.kqn_eptx = ep_alloc_large_xmtr (kqswnal_data.kqn_epdev);
396 if (kqswnal_data.kqn_eptx == NULL)
398 CERROR ("Can't allocate transmitter\n");
403 /**********************************************************************/
404 /* Get the receivers */
406 kqswnal_data.kqn_eprx_small = ep_install_large_rcvr (kqswnal_data.kqn_epdev,
407 EP_SVC_LARGE_PORTALS_SMALL,
408 KQSW_EP_ENVELOPES_SMALL);
409 if (kqswnal_data.kqn_eprx_small == NULL)
411 CERROR ("Can't install small msg receiver\n");
416 kqswnal_data.kqn_eprx_large = ep_install_large_rcvr (kqswnal_data.kqn_epdev,
417 EP_SVC_LARGE_PORTALS_LARGE,
418 KQSW_EP_ENVELOPES_LARGE);
419 if (kqswnal_data.kqn_eprx_large == NULL)
421 CERROR ("Can't install large msg receiver\n");
426 /**********************************************************************/
427 /* Reserve Elan address space for transmit descriptors NB we may
428 * either send the contents of associated buffers immediately, or
429 * map them for the peer to suck/blow... */
431 dmareq.Waitfn = DDI_DMA_SLEEP;
432 dmareq.ElanAddr = (E3_Addr) 0;
433 dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
434 dmareq.Perm = ELAN_PERM_REMOTEWRITE;
436 rc = elan3_dma_reserve(kqswnal_data.kqn_epdev->DmaState,
437 KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
438 &dmareq, &kqswnal_data.kqn_eptxdmahandle);
439 if (rc != DDI_SUCCESS)
441 CERROR ("Can't reserve rx dma space\n");
446 /**********************************************************************/
447 /* Reserve Elan address space for receive buffers */
449 dmareq.Waitfn = DDI_DMA_SLEEP;
450 dmareq.ElanAddr = (E3_Addr) 0;
451 dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
452 dmareq.Perm = ELAN_PERM_REMOTEWRITE;
454 rc = elan3_dma_reserve (kqswnal_data.kqn_epdev->DmaState,
455 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
456 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
457 &dmareq, &kqswnal_data.kqn_eprxdmahandle);
458 if (rc != DDI_SUCCESS)
460 CERROR ("Can't reserve rx dma space\n");
465 /**********************************************************************/
466 /* Allocate/Initialise transmit descriptors */
468 PORTAL_ALLOC(kqswnal_data.kqn_txds,
469 sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
470 if (kqswnal_data.kqn_txds == NULL)
476 /* clear flags, null pointers etc */
477 memset(kqswnal_data.kqn_txds, 0,
478 sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
479 for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
482 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
483 int basepage = i * KQSW_NTXMSGPAGES;
485 PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
486 if (ktx->ktx_buffer == NULL)
492 /* Map pre-allocated buffer NOW, to save latency on transmit */
493 premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
494 KQSW_TX_BUFFER_SIZE);
496 elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState,
497 kqswnal_data.kqn_eptxdmahandle,
498 ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
499 basepage, &ktx->ktx_ebuffer);
501 ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
502 ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
504 INIT_LIST_HEAD (&ktx->ktx_delayed_list);
506 ktx->ktx_state = KTX_IDLE;
507 ktx->ktx_isnblk = (i >= KQSW_NTXMSGS);
508 list_add_tail (&ktx->ktx_list,
509 ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds :
510 &kqswnal_data.kqn_idletxds);
513 /**********************************************************************/
514 /* Allocate/Initialise receive descriptors */
516 PORTAL_ALLOC (kqswnal_data.kqn_rxds,
517 sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
518 if (kqswnal_data.kqn_rxds == NULL)
524 memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
525 sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
528 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
532 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
534 if (i < KQSW_NRXMSGS_SMALL)
536 krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
537 krx->krx_eprx = kqswnal_data.kqn_eprx_small;
541 krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
542 krx->krx_eprx = kqswnal_data.kqn_eprx_large;
545 LASSERT (krx->krx_npages > 0);
546 for (j = 0; j < krx->krx_npages; j++)
548 krx->krx_pages[j] = alloc_page(GFP_KERNEL);
549 if (krx->krx_pages[j] == NULL)
555 LASSERT(page_address(krx->krx_pages[j]) != NULL);
557 elan3_dvma_kaddr_load(kqswnal_data.kqn_epdev->DmaState,
558 kqswnal_data.kqn_eprxdmahandle,
559 page_address(krx->krx_pages[j]),
560 PAGE_SIZE, elan_page_idx,
565 krx->krx_elanaddr = elanaddr;
567 /* NB we assume a contiguous */
568 LASSERT (elanaddr == krx->krx_elanaddr + j * PAGE_SIZE);
571 LASSERT (elan_page_idx ==
572 (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
573 (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
575 /**********************************************************************/
576 /* Network interface ready to initialise */
578 rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni);
581 CERROR ("PtlNIInit failed %d\n", rc);
586 kqswnal_data.kqn_init = KQN_INIT_PTL;
588 /**********************************************************************/
589 /* Queue receives, now that it's OK to run their completion callbacks */
591 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
593 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
595 /* NB this enqueue can allocate/sleep (attr == 0) */
596 rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
598 krx->krx_npages * PAGE_SIZE, 0);
601 CERROR ("failed ep_queue_receive %d\n", rc);
607 /**********************************************************************/
608 /* Spawn scheduling threads */
609 for (i = 0; i < smp_num_cpus; i++)
611 rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
614 CERROR ("failed to spawn scheduling thread: %d\n", rc);
620 /**********************************************************************/
621 /* Connect to the router */
622 rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
623 CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
625 rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL);
627 CERROR ("Can't initialise command interface (rc = %d)\n", rc);
632 PORTAL_SYMBOL_REGISTER(kqswnal_ni);
633 kqswnal_data.kqn_init = KQN_INIT_ALL;
635 printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d "
636 "(Routing %s, initial mem %d)\n",
637 kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes,
638 kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
645 MODULE_AUTHOR("W. Marcus Miller <marcusm@llnl.gov>");
646 MODULE_DESCRIPTION("Kernel Quadrics Switch NAL v1.00");
647 MODULE_LICENSE("GPL");
649 module_init (kqswnal_initialise);
650 module_exit (kqswnal_finalise);
652 EXPORT_SYMBOL (kqswnal_ni);