2 * Copyright (C) 2002 Cluster File Systems, Inc.
3 * Author: Eric Barton <eric@bartonsoftware.com>
5 * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
6 * W. Marcus Miller - Based on ksocknal
8 * This file is part of Portals, http://www.sf.net/projects/lustre/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Portals; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 ptl_handle_ni_t kqswnal_ni;
29 kqswnal_data_t kqswnal_data;
31 kpr_nal_interface_t kqswnal_router_interface = {
34 kprni_fwd: kqswnal_fwd_packet,
39 kqswnal_forward(nal_t *nal,
41 void *args, size_t args_len,
42 void *ret, size_t ret_len)
44 kqswnal_data_t *k = nal->nal_data;
45 nal_cb_t *nal_cb = k->kqn_cb;
47 LASSERT (nal == &kqswnal_api);
48 LASSERT (k == &kqswnal_data);
49 LASSERT (nal_cb == &kqswnal_lib);
51 lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
56 kqswnal_lock (nal_t *nal, unsigned long *flags)
58 kqswnal_data_t *k = nal->nal_data;
59 nal_cb_t *nal_cb = k->kqn_cb;
61 LASSERT (nal == &kqswnal_api);
62 LASSERT (k == &kqswnal_data);
63 LASSERT (nal_cb == &kqswnal_lib);
65 nal_cb->cb_cli(nal_cb,flags);
69 kqswnal_unlock(nal_t *nal, unsigned long *flags)
71 kqswnal_data_t *k = nal->nal_data;
72 nal_cb_t *nal_cb = k->kqn_cb;
74 LASSERT (nal == &kqswnal_api);
75 LASSERT (k == &kqswnal_data);
76 LASSERT (nal_cb == &kqswnal_lib);
78 nal_cb->cb_sti(nal_cb,flags);
82 kqswnal_shutdown(nal_t *nal, int ni)
84 CDEBUG (D_NET, "shutdown\n");
86 LASSERT (nal == &kqswnal_api);
91 kqswnal_yield( nal_t *nal )
93 CDEBUG (D_NET, "yield\n");
95 if (current->need_resched)
101 kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
102 ptl_pid_t requested_pid)
104 ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid);
105 int nnids = kqswnal_data.kqn_nnodes;
107 CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids);
109 lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size);
111 return (&kqswnal_api);
115 kqswnal_cmd (struct portal_ioctl_data *data, void *private)
117 LASSERT (data != NULL);
119 switch (data->ioc_nal_cmd) {
120 case NAL_CMD_REGISTER_MYNID:
121 CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n",
122 data->ioc_nid - kqswnal_data.kqn_elanid,
123 kqswnal_data.kqn_nid_offset);
124 kqswnal_data.kqn_nid_offset =
125 data->ioc_nid - kqswnal_data.kqn_elanid;
126 kqswnal_lib.ni.nid = data->ioc_nid;
135 kqswnal_finalise (void)
137 switch (kqswnal_data.kqn_init)
143 PORTAL_SYMBOL_UNREGISTER (kqswnal_ni);
147 PtlNIFini (kqswnal_ni);
148 lib_fini (&kqswnal_lib);
154 case KQN_INIT_NOTHING:
158 /**********************************************************************/
159 /* Make router stop her calling me and fail any more call-ins */
160 kpr_shutdown (&kqswnal_data.kqn_router);
162 /**********************************************************************/
163 /* flag threads to terminate, wake them and wait for them to die */
165 kqswnal_data.kqn_shuttingdown = 1;
166 wake_up_all (&kqswnal_data.kqn_sched_waitq);
168 while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
169 CDEBUG(D_NET, "waiting for %d threads to terminate\n",
170 atomic_read (&kqswnal_data.kqn_nthreads));
171 set_current_state (TASK_UNINTERRUPTIBLE);
172 schedule_timeout (HZ);
175 /**********************************************************************/
176 /* close elan comms */
178 if (kqswnal_data.kqn_eprx_small != NULL)
179 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
181 if (kqswnal_data.kqn_eprx_large != NULL)
182 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
184 if (kqswnal_data.kqn_eptx != NULL)
185 ep_free_large_xmtr (kqswnal_data.kqn_eptx);
187 /**********************************************************************/
188 /* No more threads. No more portals, router or comms callbacks!
189 * I control the horizontals and the verticals...
192 /**********************************************************************/
193 /* Complete any blocked forwarding packets with error
196 while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
198 kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
199 kpr_fwd_desc_t, kprfd_list);
200 list_del (&fwd->kprfd_list);
201 kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
204 while (!list_empty (&kqswnal_data.kqn_delayedfwds))
206 kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next,
207 kpr_fwd_desc_t, kprfd_list);
208 list_del (&fwd->kprfd_list);
209 kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
212 /**********************************************************************/
213 /* Wait for router to complete any packets I sent her
216 kpr_deregister (&kqswnal_data.kqn_router);
219 /**********************************************************************/
220 /* Unmap message buffers and free all descriptors and buffers
223 if (kqswnal_data.kqn_eprxdmahandle != NULL)
225 elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
226 kqswnal_data.kqn_eprxdmahandle, 0,
227 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
228 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
230 elan3_dma_release(kqswnal_data.kqn_epdev->DmaState,
231 kqswnal_data.kqn_eprxdmahandle);
234 if (kqswnal_data.kqn_eptxdmahandle != NULL)
236 elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState,
237 kqswnal_data.kqn_eptxdmahandle, 0,
238 KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
241 elan3_dma_release(kqswnal_data.kqn_epdev->DmaState,
242 kqswnal_data.kqn_eptxdmahandle);
245 if (kqswnal_data.kqn_txds != NULL)
249 for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
251 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
253 if (ktx->ktx_buffer != NULL)
254 PORTAL_FREE(ktx->ktx_buffer,
255 KQSW_TX_BUFFER_SIZE);
258 PORTAL_FREE(kqswnal_data.kqn_txds,
259 sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
263 if (kqswnal_data.kqn_rxds != NULL)
268 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
270 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
272 for (j = 0; j < krx->krx_npages; j++)
273 if (krx->krx_pages[j] != NULL)
274 __free_page (krx->krx_pages[j]);
277 PORTAL_FREE(kqswnal_data.kqn_rxds,
278 sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
279 KQSW_NRXMSGS_LARGE));
282 /* resets flags, pointers to NULL etc */
283 memset(&kqswnal_data, 0, sizeof (kqswnal_data));
285 CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
287 printk (KERN_INFO "Routing QSW NAL unloaded (final mem %d)\n",
288 atomic_read(&portal_kmemory));
292 kqswnal_initialise (void)
294 ELAN3_DMA_REQUEST dmareq;
298 int pkmem = atomic_read(&portal_kmemory);
300 LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
302 CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
304 kqswnal_api.forward = kqswnal_forward;
305 kqswnal_api.shutdown = kqswnal_shutdown;
306 kqswnal_api.yield = kqswnal_yield;
307 kqswnal_api.validate = NULL; /* our api validate is a NOOP */
308 kqswnal_api.lock = kqswnal_lock;
309 kqswnal_api.unlock = kqswnal_unlock;
310 kqswnal_api.nal_data = &kqswnal_data;
312 kqswnal_lib.nal_data = &kqswnal_data;
314 /* ensure all pointers NULL etc */
315 memset (&kqswnal_data, 0, sizeof (kqswnal_data));
317 kqswnal_data.kqn_cb = &kqswnal_lib;
319 INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
320 INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
321 spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
322 init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
323 INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
325 INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
326 INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
327 INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
329 spin_lock_init (&kqswnal_data.kqn_sched_lock);
330 init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
332 spin_lock_init (&kqswnal_data.kqn_statelock);
334 /* pointers/lists/locks initialised */
335 kqswnal_data.kqn_init = KQN_INIT_DATA;
337 /**********************************************************************/
338 /* Find the first Elan device */
340 kqswnal_data.kqn_epdev = ep_device (0);
341 if (kqswnal_data.kqn_epdev == NULL)
343 CERROR ("Can't get elan device 0\n");
347 kqswnal_data.kqn_nid_offset = 0;
348 kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_epdev);
349 kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_epdev);
351 /**********************************************************************/
352 /* Get the transmitter */
354 kqswnal_data.kqn_eptx = ep_alloc_large_xmtr (kqswnal_data.kqn_epdev);
355 if (kqswnal_data.kqn_eptx == NULL)
357 CERROR ("Can't allocate transmitter\n");
362 /**********************************************************************/
363 /* Get the receivers */
365 kqswnal_data.kqn_eprx_small = ep_install_large_rcvr (kqswnal_data.kqn_epdev,
366 EP_SVC_LARGE_PORTALS_SMALL,
367 KQSW_EP_ENVELOPES_SMALL);
368 if (kqswnal_data.kqn_eprx_small == NULL)
370 CERROR ("Can't install small msg receiver\n");
375 kqswnal_data.kqn_eprx_large = ep_install_large_rcvr (kqswnal_data.kqn_epdev,
376 EP_SVC_LARGE_PORTALS_LARGE,
377 KQSW_EP_ENVELOPES_LARGE);
378 if (kqswnal_data.kqn_eprx_large == NULL)
380 CERROR ("Can't install large msg receiver\n");
385 /**********************************************************************/
386 /* Reserve Elan address space for transmit buffers */
388 dmareq.Waitfn = DDI_DMA_SLEEP;
389 dmareq.ElanAddr = (E3_Addr) 0;
390 dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
391 dmareq.Perm = ELAN_PERM_REMOTEREAD;
393 rc = elan3_dma_reserve(kqswnal_data.kqn_epdev->DmaState,
394 KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
395 &dmareq, &kqswnal_data.kqn_eptxdmahandle);
396 if (rc != DDI_SUCCESS)
398 CERROR ("Can't reserve rx dma space\n");
403 /**********************************************************************/
404 /* Reserve Elan address space for receive buffers */
406 dmareq.Waitfn = DDI_DMA_SLEEP;
407 dmareq.ElanAddr = (E3_Addr) 0;
408 dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN;
409 dmareq.Perm = ELAN_PERM_REMOTEWRITE;
411 rc = elan3_dma_reserve (kqswnal_data.kqn_epdev->DmaState,
412 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
413 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
414 &dmareq, &kqswnal_data.kqn_eprxdmahandle);
415 if (rc != DDI_SUCCESS)
417 CERROR ("Can't reserve rx dma space\n");
422 /**********************************************************************/
423 /* Allocate/Initialise transmit descriptors */
425 PORTAL_ALLOC(kqswnal_data.kqn_txds,
426 sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
427 if (kqswnal_data.kqn_txds == NULL)
433 /* clear flags, null pointers etc */
434 memset(kqswnal_data.kqn_txds, 0,
435 sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
436 for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
439 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
440 int basepage = i * KQSW_NTXMSGPAGES;
442 PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
443 if (ktx->ktx_buffer == NULL)
449 /* Map pre-allocated buffer NOW, to save latency on transmit */
450 premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
451 KQSW_TX_BUFFER_SIZE);
453 elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState,
454 kqswnal_data.kqn_eptxdmahandle,
455 ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
456 basepage, &ktx->ktx_ebuffer);
458 ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
459 ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
461 if (i < KQSW_NTXMSGS)
462 ktx->ktx_idle = &kqswnal_data.kqn_idletxds;
464 ktx->ktx_idle = &kqswnal_data.kqn_nblk_idletxds;
466 list_add_tail (&ktx->ktx_list, ktx->ktx_idle);
469 /**********************************************************************/
470 /* Allocate/Initialise receive descriptors */
472 PORTAL_ALLOC (kqswnal_data.kqn_rxds,
473 sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
474 if (kqswnal_data.kqn_rxds == NULL)
480 memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
481 sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
484 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
488 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
490 if (i < KQSW_NRXMSGS_SMALL)
492 krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
493 krx->krx_eprx = kqswnal_data.kqn_eprx_small;
497 krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
498 krx->krx_eprx = kqswnal_data.kqn_eprx_large;
501 LASSERT (krx->krx_npages > 0);
502 for (j = 0; j < krx->krx_npages; j++)
504 krx->krx_pages[j] = alloc_page(GFP_KERNEL);
505 if (krx->krx_pages[j] == NULL)
511 LASSERT(page_address(krx->krx_pages[j]) != NULL);
513 elan3_dvma_kaddr_load(kqswnal_data.kqn_epdev->DmaState,
514 kqswnal_data.kqn_eprxdmahandle,
515 page_address(krx->krx_pages[j]),
516 PAGE_SIZE, elan_page_idx,
521 krx->krx_elanaddr = elanaddr;
523 /* NB we assume a contiguous */
524 LASSERT (elanaddr == krx->krx_elanaddr + j * PAGE_SIZE);
527 LASSERT (elan_page_idx ==
528 (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
529 (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
531 /**********************************************************************/
532 /* Network interface ready to initialise */
534 rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni);
537 CERROR ("PtlNIInit failed %d\n", rc);
542 kqswnal_data.kqn_init = KQN_INIT_PTL;
544 /**********************************************************************/
545 /* Queue receives, now that it's OK to run their completion callbacks */
547 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
549 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
551 /* NB this enqueue can allocate/sleep (attr == 0) */
552 rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
554 krx->krx_npages * PAGE_SIZE, 0);
557 CERROR ("failed ep_queue_receive %d\n", rc);
563 /**********************************************************************/
564 /* Spawn scheduling threads */
565 for (i = 0; i < smp_num_cpus; i++)
567 rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
570 CERROR ("failed to spawn scheduling thread: %d\n", rc);
576 /**********************************************************************/
577 /* Connect to the router */
578 rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
579 CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
581 rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL);
583 CERROR ("Can't initialise command interface (rc = %d)\n", rc);
588 PORTAL_SYMBOL_REGISTER(kqswnal_ni);
589 kqswnal_data.kqn_init = KQN_INIT_ALL;
591 printk(KERN_INFO "Routing QSW NAL loaded on node %d of %d "
592 "(Routing %s, initial mem %d)\n",
593 kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes,
594 kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
601 MODULE_AUTHOR("W. Marcus Miller <marcusm@llnl.gov>");
602 MODULE_DESCRIPTION("Kernel Quadrics Switch NAL v1.00");
603 MODULE_LICENSE("GPL");
605 module_init (kqswnal_initialise);
606 module_exit (kqswnal_finalise);
608 EXPORT_SYMBOL (kqswnal_ni);