Whamcloud - gitweb
127bbce6f4584314d298cf845205cba197e253be
[fs/lustre-release.git] / lustre / portals / knals / qswnal / qswnal.c
1 /*
2  * Copyright (C) 2002 Cluster File Systems, Inc.
3  *   Author: Eric Barton <eric@bartonsoftware.com>
4  *
5  * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
6  * W. Marcus Miller - Based on ksocknal
7  *
8  * This file is part of Portals, http://www.sf.net/projects/lustre/
9  *
10  * Portals is free software; you can redistribute it and/or
11  * modify it under the terms of version 2 of the GNU General Public
12  * License as published by the Free Software Foundation.
13  *
14  * Portals is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with Portals; if not, write to the Free Software
21  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  *
23  */
24
25 #include "qswnal.h"
26
27 ptl_handle_ni_t         kqswnal_ni;
28 nal_t                   kqswnal_api;
29 kqswnal_data_t          kqswnal_data;
30
31 kpr_nal_interface_t kqswnal_router_interface = {
32         kprni_nalid:    QSWNAL,
33         kprni_arg:      NULL,
34         kprni_fwd:      kqswnal_fwd_packet,
35         kprni_notify:   NULL,                   /* we're connectionless */
36 };
37
38 #if CONFIG_SYSCTL
39 #define QSWNAL_SYSCTL  201
40
41 #define QSWNAL_SYSCTL_OPTIMIZED_GETS     1
42 #define QSWNAL_SYSCTL_COPY_SMALL_FWD     2
43
44 static ctl_table kqswnal_ctl_table[] = {
45         {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
46          &kqswnal_data.kqn_optimized_gets, sizeof (int),
47          0644, NULL, &proc_dointvec},
48         {QSWNAL_SYSCTL_COPY_SMALL_FWD, "copy_small_fwd",
49          &kqswnal_data.kqn_copy_small_fwd, sizeof (int),
50          0644, NULL, &proc_dointvec},
51         {0}
52 };
53
54 static ctl_table kqswnal_top_ctl_table[] = {
55         {QSWNAL_SYSCTL, "qswnal", NULL, 0, 0555, kqswnal_ctl_table},
56         {0}
57 };
58 #endif
59
60 static int
61 kqswnal_forward(nal_t   *nal,
62                 int     id,
63                 void    *args,  size_t args_len,
64                 void    *ret,   size_t ret_len)
65 {
66         kqswnal_data_t *k = nal->nal_data;
67         nal_cb_t       *nal_cb = k->kqn_cb;
68
69         LASSERT (nal == &kqswnal_api);
70         LASSERT (k == &kqswnal_data);
71         LASSERT (nal_cb == &kqswnal_lib);
72
73         lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */
74         return (PTL_OK);
75 }
76
77 static void
78 kqswnal_lock (nal_t *nal, unsigned long *flags)
79 {
80         kqswnal_data_t *k = nal->nal_data;
81         nal_cb_t       *nal_cb = k->kqn_cb;
82
83         LASSERT (nal == &kqswnal_api);
84         LASSERT (k == &kqswnal_data);
85         LASSERT (nal_cb == &kqswnal_lib);
86
87         nal_cb->cb_cli(nal_cb,flags);
88 }
89
90 static void
91 kqswnal_unlock(nal_t *nal, unsigned long *flags)
92 {
93         kqswnal_data_t *k = nal->nal_data;
94         nal_cb_t       *nal_cb = k->kqn_cb;
95
96         LASSERT (nal == &kqswnal_api);
97         LASSERT (k == &kqswnal_data);
98         LASSERT (nal_cb == &kqswnal_lib);
99
100         nal_cb->cb_sti(nal_cb,flags);
101 }
102
103 static int
104 kqswnal_shutdown(nal_t *nal, int ni)
105 {
106         CDEBUG (D_NET, "shutdown\n");
107
108         LASSERT (nal == &kqswnal_api);
109         return (0);
110 }
111
112 static void
113 kqswnal_yield( nal_t *nal )
114 {
115         CDEBUG (D_NET, "yield\n");
116
117         if (current->need_resched)
118                 schedule();
119         return;
120 }
121
122 static nal_t *
123 kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size,
124              ptl_pid_t requested_pid)
125 {
126         ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid);
127         int       nnids = kqswnal_data.kqn_nnodes;
128
129         CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids);
130
131         lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size);
132
133         return (&kqswnal_api);
134 }
135
136 int
137 kqswnal_get_tx_desc (struct portals_cfg *pcfg)
138 {
139         unsigned long      flags;
140         struct list_head  *tmp;
141         kqswnal_tx_t      *ktx;
142         int                index = pcfg->pcfg_count;
143         int                rc = -ENOENT;
144
145         spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
146
147         list_for_each (tmp, &kqswnal_data.kqn_activetxds) {
148                 if (index-- != 0)
149                         continue;
150
151                 ktx = list_entry (tmp, kqswnal_tx_t, ktx_list);
152
153                 pcfg->pcfg_pbuf1 = (char *)ktx;
154                 pcfg->pcfg_count = NTOH__u32(ktx->ktx_wire_hdr->type);
155                 pcfg->pcfg_size  = NTOH__u32(ktx->ktx_wire_hdr->payload_length);
156                 pcfg->pcfg_nid   = NTOH__u64(ktx->ktx_wire_hdr->dest_nid);
157                 pcfg->pcfg_nid2  = ktx->ktx_nid;
158                 pcfg->pcfg_misc  = ktx->ktx_launcher;
159                 pcfg->pcfg_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) |
160                                   (!ktx->ktx_isnblk                    ? 0 : 2) |
161                                   (ktx->ktx_state << 2);
162                 rc = 0;
163                 break;
164         }
165         
166         spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
167         return (rc);
168 }
169
170 int
171 kqswnal_cmd (struct portals_cfg *pcfg, void *private)
172 {
173         LASSERT (pcfg != NULL);
174         
175         switch (pcfg->pcfg_command) {
176         case NAL_CMD_GET_TXDESC:
177                 return (kqswnal_get_tx_desc (pcfg));
178
179         case NAL_CMD_REGISTER_MYNID:
180                 CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n",
181                         pcfg->pcfg_nid - kqswnal_data.kqn_elanid,
182                         kqswnal_data.kqn_nid_offset);
183                 kqswnal_data.kqn_nid_offset =
184                         pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
185                 kqswnal_lib.ni.nid = pcfg->pcfg_nid;
186                 return (0);
187                 
188         default:
189                 return (-EINVAL);
190         }
191 }
192
193 void __exit
194 kqswnal_finalise (void)
195 {
196         switch (kqswnal_data.kqn_init)
197         {
198         default:
199                 LASSERT (0);
200
201         case KQN_INIT_ALL:
202 #if CONFIG_SYSCTL
203                 if (kqswnal_data.kqn_sysctl != NULL)
204                         unregister_sysctl_table (kqswnal_data.kqn_sysctl);
205 #endif          
206                 PORTAL_SYMBOL_UNREGISTER (kqswnal_ni);
207                 kportal_nal_unregister(QSWNAL);
208                 /* fall through */
209
210         case KQN_INIT_PTL:
211                 PtlNIFini (kqswnal_ni);
212                 lib_fini (&kqswnal_lib);
213                 /* fall through */
214
215         case KQN_INIT_DATA:
216                 LASSERT(list_empty(&kqswnal_data.kqn_activetxds));
217                 break;
218
219         case KQN_INIT_NOTHING:
220                 return;
221         }
222
223         /**********************************************************************/
224         /* Make router stop her calling me and fail any more call-ins */
225         kpr_shutdown (&kqswnal_data.kqn_router);
226
227         /**********************************************************************/
228         /* flag threads we've started to terminate and wait for all to ack */
229
230         kqswnal_data.kqn_shuttingdown = 1;
231         wake_up_all (&kqswnal_data.kqn_sched_waitq);
232
233         while (atomic_read (&kqswnal_data.kqn_nthreads_running) != 0) {
234                 CDEBUG(D_NET, "waiting for %d threads to start shutting down\n",
235                        atomic_read (&kqswnal_data.kqn_nthreads_running));
236                 set_current_state (TASK_UNINTERRUPTIBLE);
237                 schedule_timeout (HZ);
238         }
239
240         /**********************************************************************/
241         /* close elan comms */
242 #if MULTIRAIL_EKC
243         if (kqswnal_data.kqn_eprx_small != NULL)
244                 ep_free_rcvr (kqswnal_data.kqn_eprx_small);
245
246         if (kqswnal_data.kqn_eprx_large != NULL)
247                 ep_free_rcvr (kqswnal_data.kqn_eprx_large);
248
249         if (kqswnal_data.kqn_eptx != NULL)
250                 ep_free_xmtr (kqswnal_data.kqn_eptx);
251 #else
252         if (kqswnal_data.kqn_eprx_small != NULL)
253                 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
254
255         if (kqswnal_data.kqn_eprx_large != NULL)
256                 ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
257
258         if (kqswnal_data.kqn_eptx != NULL)
259                 ep_free_large_xmtr (kqswnal_data.kqn_eptx);
260 #endif
261         /**********************************************************************/
262         /* flag threads to terminate, wake them and wait for them to die */
263
264         kqswnal_data.kqn_shuttingdown = 2;
265         wake_up_all (&kqswnal_data.kqn_sched_waitq);
266
267         while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
268                 CDEBUG(D_NET, "waiting for %d threads to terminate\n",
269                        atomic_read (&kqswnal_data.kqn_nthreads));
270                 set_current_state (TASK_UNINTERRUPTIBLE);
271                 schedule_timeout (HZ);
272         }
273
274         /**********************************************************************/
275         /* No more threads.  No more portals, router or comms callbacks!
276          * I control the horizontals and the verticals...
277          */
278
279 #if MULTIRAIL_EKC
280         LASSERT (list_empty (&kqswnal_data.kqn_readyrxds));
281 #endif
282
283         /**********************************************************************/
284         /* Complete any blocked forwarding packets with error
285          */
286
287         while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
288         {
289                 kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
290                                                   kpr_fwd_desc_t, kprfd_list);
291                 list_del (&fwd->kprfd_list);
292                 kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
293         }
294
295         while (!list_empty (&kqswnal_data.kqn_delayedfwds))
296         {
297                 kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next,
298                                                   kpr_fwd_desc_t, kprfd_list);
299                 list_del (&fwd->kprfd_list);
300                 kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH);
301         }
302
303         /**********************************************************************/
304         /* Wait for router to complete any packets I sent her
305          */
306
307         kpr_deregister (&kqswnal_data.kqn_router);
308
309
310         /**********************************************************************/
311         /* Unmap message buffers and free all descriptors and buffers
312          */
313
314 #if MULTIRAIL_EKC
315         /* FTTB, we need to unmap any remaining mapped memory.  When
316          * ep_dvma_release() get fixed (and releases any mappings in the
317          * region), we can delete all the code from here -------->  */
318
319         if (kqswnal_data.kqn_txds != NULL) {
320                 int  i;
321
322                 for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) {
323                         kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
324
325                         /* If ktx has a buffer, it got mapped; unmap now.
326                          * NB only the pre-mapped stuff is still mapped
327                          * since all tx descs must be idle */
328
329                         if (ktx->ktx_buffer != NULL)
330                                 ep_dvma_unload(kqswnal_data.kqn_ep,
331                                                kqswnal_data.kqn_ep_tx_nmh,
332                                                &ktx->ktx_ebuffer);
333                 }
334         }
335
336         if (kqswnal_data.kqn_rxds != NULL) {
337                 int   i;
338
339                 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
340                         kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
341
342                         /* If krx_pages[0] got allocated, it got mapped.
343                          * NB subsequent pages get merged */
344
345                         if (krx->krx_pages[0] != NULL)
346                                 ep_dvma_unload(kqswnal_data.kqn_ep,
347                                                kqswnal_data.kqn_ep_rx_nmh,
348                                                &krx->krx_elanbuffer);
349                 }
350         }
351         /* <----------- to here */
352
353         if (kqswnal_data.kqn_ep_rx_nmh != NULL)
354                 ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
355
356         if (kqswnal_data.kqn_ep_tx_nmh != NULL)
357                 ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
358 #else
359         if (kqswnal_data.kqn_eprxdmahandle != NULL)
360         {
361                 elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
362                                   kqswnal_data.kqn_eprxdmahandle, 0,
363                                   KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
364                                   KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
365
366                 elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
367                                   kqswnal_data.kqn_eprxdmahandle);
368         }
369
370         if (kqswnal_data.kqn_eptxdmahandle != NULL)
371         {
372                 elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
373                                   kqswnal_data.kqn_eptxdmahandle, 0,
374                                   KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
375                                                       KQSW_NNBLK_TXMSGS));
376
377                 elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
378                                   kqswnal_data.kqn_eptxdmahandle);
379         }
380 #endif
381
382         if (kqswnal_data.kqn_txds != NULL)
383         {
384                 int   i;
385
386                 for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
387                 {
388                         kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
389
390                         if (ktx->ktx_buffer != NULL)
391                                 PORTAL_FREE(ktx->ktx_buffer,
392                                             KQSW_TX_BUFFER_SIZE);
393                 }
394
395                 PORTAL_FREE(kqswnal_data.kqn_txds,
396                             sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
397                                                      KQSW_NNBLK_TXMSGS));
398         }
399
400         if (kqswnal_data.kqn_rxds != NULL)
401         {
402                 int   i;
403                 int   j;
404
405                 for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
406                 {
407                         kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
408
409                         for (j = 0; j < krx->krx_npages; j++)
410                                 if (krx->krx_pages[j] != NULL)
411                                         __free_page (krx->krx_pages[j]);
412                 }
413
414                 PORTAL_FREE(kqswnal_data.kqn_rxds,
415                             sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
416                                                     KQSW_NRXMSGS_LARGE));
417         }
418
419         /* resets flags, pointers to NULL etc */
420         memset(&kqswnal_data, 0, sizeof (kqswnal_data));
421
422         CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
423
424         printk (KERN_INFO "Lustre: Routing QSW NAL unloaded (final mem %d)\n",
425                 atomic_read(&portal_kmemory));
426 }
427
428 static int __init
429 kqswnal_initialise (void)
430 {
431 #if MULTIRAIL_EKC
432         EP_RAILMASK       all_rails = EP_RAILMASK_ALL;
433 #else
434         ELAN3_DMA_REQUEST dmareq;
435 #endif
436         int               rc;
437         int               i;
438         int               elan_page_idx;
439         int               pkmem = atomic_read(&portal_kmemory);
440
441         LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
442
443         CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
444
445         kqswnal_api.forward  = kqswnal_forward;
446         kqswnal_api.shutdown = kqswnal_shutdown;
447         kqswnal_api.yield    = kqswnal_yield;
448         kqswnal_api.validate = NULL;            /* our api validate is a NOOP */
449         kqswnal_api.lock     = kqswnal_lock;
450         kqswnal_api.unlock   = kqswnal_unlock;
451         kqswnal_api.nal_data = &kqswnal_data;
452
453         kqswnal_lib.nal_data = &kqswnal_data;
454
455         memset(&kqswnal_rpc_success, 0, sizeof(kqswnal_rpc_success));
456         memset(&kqswnal_rpc_failed, 0, sizeof(kqswnal_rpc_failed));
457 #if MULTIRAIL_EKC
458         kqswnal_rpc_failed.Data[0] = -ECONNREFUSED;
459 #else
460         kqswnal_rpc_failed.Status = -ECONNREFUSED;
461 #endif
462         /* ensure all pointers NULL etc */
463         memset (&kqswnal_data, 0, sizeof (kqswnal_data));
464
465         kqswnal_data.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
466         kqswnal_data.kqn_copy_small_fwd = KQSW_COPY_SMALL_FWD;
467
468         kqswnal_data.kqn_cb = &kqswnal_lib;
469
470         INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
471         INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
472         INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
473         spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
474         init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
475         INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
476
477         INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
478         INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
479         INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
480
481         spin_lock_init (&kqswnal_data.kqn_sched_lock);
482         init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
483
484         spin_lock_init (&kqswnal_data.kqn_statelock);
485
486         /* pointers/lists/locks initialised */
487         kqswnal_data.kqn_init = KQN_INIT_DATA;
488
489 #if MULTIRAIL_EKC
490         kqswnal_data.kqn_ep = ep_system();
491         if (kqswnal_data.kqn_ep == NULL) {
492                 CERROR("Can't initialise EKC\n");
493                 return (-ENODEV);
494         }
495
496         if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
497                 CERROR("Can't get elan ID\n");
498                 kqswnal_finalise();
499                 return (-ENODEV);
500         }
501 #else
502         /**********************************************************************/
503         /* Find the first Elan device */
504
505         kqswnal_data.kqn_ep = ep_device (0);
506         if (kqswnal_data.kqn_ep == NULL)
507         {
508                 CERROR ("Can't get elan device 0\n");
509                 return (-ENODEV);
510         }
511 #endif
512
513         kqswnal_data.kqn_nid_offset = 0;
514         kqswnal_data.kqn_nnodes     = ep_numnodes (kqswnal_data.kqn_ep);
515         kqswnal_data.kqn_elanid     = ep_nodeid (kqswnal_data.kqn_ep);
516         
517         /**********************************************************************/
518         /* Get the transmitter */
519
520         kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep);
521         if (kqswnal_data.kqn_eptx == NULL)
522         {
523                 CERROR ("Can't allocate transmitter\n");
524                 kqswnal_finalise ();
525                 return (-ENOMEM);
526         }
527
528         /**********************************************************************/
529         /* Get the receivers */
530
531         kqswnal_data.kqn_eprx_small = ep_alloc_rcvr (kqswnal_data.kqn_ep,
532                                                      EP_MSG_SVC_PORTALS_SMALL,
533                                                      KQSW_EP_ENVELOPES_SMALL);
534         if (kqswnal_data.kqn_eprx_small == NULL)
535         {
536                 CERROR ("Can't install small msg receiver\n");
537                 kqswnal_finalise ();
538                 return (-ENOMEM);
539         }
540
541         kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep,
542                                                      EP_MSG_SVC_PORTALS_LARGE,
543                                                      KQSW_EP_ENVELOPES_LARGE);
544         if (kqswnal_data.kqn_eprx_large == NULL)
545         {
546                 CERROR ("Can't install large msg receiver\n");
547                 kqswnal_finalise ();
548                 return (-ENOMEM);
549         }
550
551         /**********************************************************************/
552         /* Reserve Elan address space for transmit descriptors NB we may
553          * either send the contents of associated buffers immediately, or
554          * map them for the peer to suck/blow... */
555 #if MULTIRAIL_EKC
556         kqswnal_data.kqn_ep_tx_nmh = 
557                 ep_dvma_reserve(kqswnal_data.kqn_ep,
558                                 KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
559                                 EP_PERM_WRITE);
560         if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
561                 CERROR("Can't reserve tx dma space\n");
562                 kqswnal_finalise();
563                 return (-ENOMEM);
564         }
565 #else
566         dmareq.Waitfn   = DDI_DMA_SLEEP;
567         dmareq.ElanAddr = (E3_Addr) 0;
568         dmareq.Attr     = PTE_LOAD_LITTLE_ENDIAN;
569         dmareq.Perm     = ELAN_PERM_REMOTEWRITE;
570
571         rc = elan3_dma_reserve(kqswnal_data.kqn_ep->DmaState,
572                               KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
573                               &dmareq, &kqswnal_data.kqn_eptxdmahandle);
574         if (rc != DDI_SUCCESS)
575         {
576                 CERROR ("Can't reserve rx dma space\n");
577                 kqswnal_finalise ();
578                 return (-ENOMEM);
579         }
580 #endif
581         /**********************************************************************/
582         /* Reserve Elan address space for receive buffers */
583 #if MULTIRAIL_EKC
584         kqswnal_data.kqn_ep_rx_nmh =
585                 ep_dvma_reserve(kqswnal_data.kqn_ep,
586                                 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
587                                 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
588                                 EP_PERM_WRITE);
589         if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
590                 CERROR("Can't reserve rx dma space\n");
591                 kqswnal_finalise();
592                 return (-ENOMEM);
593         }
594 #else
595         dmareq.Waitfn   = DDI_DMA_SLEEP;
596         dmareq.ElanAddr = (E3_Addr) 0;
597         dmareq.Attr     = PTE_LOAD_LITTLE_ENDIAN;
598         dmareq.Perm     = ELAN_PERM_REMOTEWRITE;
599
600         rc = elan3_dma_reserve (kqswnal_data.kqn_ep->DmaState,
601                                 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
602                                 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
603                                 &dmareq, &kqswnal_data.kqn_eprxdmahandle);
604         if (rc != DDI_SUCCESS)
605         {
606                 CERROR ("Can't reserve rx dma space\n");
607                 kqswnal_finalise ();
608                 return (-ENOMEM);
609         }
610 #endif
611         /**********************************************************************/
612         /* Allocate/Initialise transmit descriptors */
613
614         PORTAL_ALLOC(kqswnal_data.kqn_txds,
615                      sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
616         if (kqswnal_data.kqn_txds == NULL)
617         {
618                 kqswnal_finalise ();
619                 return (-ENOMEM);
620         }
621
622         /* clear flags, null pointers etc */
623         memset(kqswnal_data.kqn_txds, 0,
624                sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
625         for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
626         {
627                 int           premapped_pages;
628                 kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
629                 int           basepage = i * KQSW_NTXMSGPAGES;
630
631                 PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
632                 if (ktx->ktx_buffer == NULL)
633                 {
634                         kqswnal_finalise ();
635                         return (-ENOMEM);
636                 }
637
638                 /* Map pre-allocated buffer NOW, to save latency on transmit */
639                 premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
640                                                         KQSW_TX_BUFFER_SIZE);
641 #if MULTIRAIL_EKC
642                 ep_dvma_load(kqswnal_data.kqn_ep, NULL, 
643                              ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, 
644                              kqswnal_data.kqn_ep_tx_nmh, basepage,
645                              &all_rails, &ktx->ktx_ebuffer);
646 #else
647                 elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
648                                        kqswnal_data.kqn_eptxdmahandle,
649                                        ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
650                                        basepage, &ktx->ktx_ebuffer);
651 #endif
652                 ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
653                 ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
654
655                 INIT_LIST_HEAD (&ktx->ktx_delayed_list);
656
657                 ktx->ktx_state = KTX_IDLE;
658                 ktx->ktx_isnblk = (i >= KQSW_NTXMSGS);
659                 list_add_tail (&ktx->ktx_list, 
660                                ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds :
661                                                  &kqswnal_data.kqn_idletxds);
662         }
663
664         /**********************************************************************/
665         /* Allocate/Initialise receive descriptors */
666
667         PORTAL_ALLOC (kqswnal_data.kqn_rxds,
668                       sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
669         if (kqswnal_data.kqn_rxds == NULL)
670         {
671                 kqswnal_finalise ();
672                 return (-ENOMEM);
673         }
674
675         memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
676                sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
677
678         elan_page_idx = 0;
679         for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
680         {
681 #if MULTIRAIL_EKC
682                 EP_NMD        elanbuffer;
683 #else
684                 E3_Addr       elanbuffer;
685 #endif
686                 int           j;
687                 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
688
689                 if (i < KQSW_NRXMSGS_SMALL)
690                 {
691                         krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
692                         krx->krx_eprx   = kqswnal_data.kqn_eprx_small;
693                 }
694                 else
695                 {
696                         krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
697                         krx->krx_eprx   = kqswnal_data.kqn_eprx_large;
698                 }
699
700                 LASSERT (krx->krx_npages > 0);
701                 for (j = 0; j < krx->krx_npages; j++)
702                 {
703                         krx->krx_pages[j] = alloc_page(GFP_KERNEL);
704                         if (krx->krx_pages[j] == NULL)
705                         {
706                                 kqswnal_finalise ();
707                                 return (-ENOMEM);
708                         }
709
710                         LASSERT(page_address(krx->krx_pages[j]) != NULL);
711
712 #if MULTIRAIL_EKC
713                         ep_dvma_load(kqswnal_data.kqn_ep, NULL,
714                                      page_address(krx->krx_pages[j]),
715                                      PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
716                                      elan_page_idx, &all_rails, &elanbuffer);
717                         
718                         if (j == 0) {
719                                 krx->krx_elanbuffer = elanbuffer;
720                         } else {
721                                 rc = ep_nmd_merge(&krx->krx_elanbuffer,
722                                                   &krx->krx_elanbuffer, 
723                                                   &elanbuffer);
724                                 /* NB contiguous mapping */
725                                 LASSERT(rc);
726                         }
727 #else
728                         elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
729                                               kqswnal_data.kqn_eprxdmahandle,
730                                               page_address(krx->krx_pages[j]),
731                                               PAGE_SIZE, elan_page_idx,
732                                               &elanbuffer);
733                         if (j == 0)
734                                 krx->krx_elanbuffer = elanbuffer;
735
736                         /* NB contiguous mapping */
737                         LASSERT (elanbuffer == krx->krx_elanbuffer + j * PAGE_SIZE);
738 #endif
739                         elan_page_idx++;
740
741                 }
742         }
743         LASSERT (elan_page_idx ==
744                  (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
745                  (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
746
747         /**********************************************************************/
748         /* Network interface ready to initialise */
749
750         rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni);
751         if (rc != 0)
752         {
753                 CERROR ("PtlNIInit failed %d\n", rc);
754                 kqswnal_finalise ();
755                 return (-ENOMEM);
756         }
757
758         kqswnal_data.kqn_init = KQN_INIT_PTL;
759
760         /**********************************************************************/
761         /* Queue receives, now that it's OK to run their completion callbacks */
762
763         for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
764         {
765                 kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
766
767                 /* NB this enqueue can allocate/sleep (attr == 0) */
768 #if MULTIRAIL_EKC
769                 rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
770                                       &krx->krx_elanbuffer, 0);
771 #else
772                 rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
773                                       krx->krx_elanbuffer,
774                                       krx->krx_npages * PAGE_SIZE, 0);
775 #endif
776                 if (rc != EP_SUCCESS)
777                 {
778                         CERROR ("failed ep_queue_receive %d\n", rc);
779                         kqswnal_finalise ();
780                         return (-ENOMEM);
781                 }
782         }
783
784         /**********************************************************************/
785         /* Spawn scheduling threads */
786         for (i = 0; i < smp_num_cpus; i++)
787         {
788                 rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
789                 if (rc != 0)
790                 {
791                         CERROR ("failed to spawn scheduling thread: %d\n", rc);
792                         kqswnal_finalise ();
793                         return (rc);
794                 }
795         }
796
797         /**********************************************************************/
798         /* Connect to the router */
799         rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
800         CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
801
802         rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL);
803         if (rc != 0) {
804                 CERROR ("Can't initialise command interface (rc = %d)\n", rc);
805                 kqswnal_finalise ();
806                 return (rc);
807         }
808
809 #if CONFIG_SYSCTL
810         /* Press on regardless even if registering sysctl doesn't work */
811         kqswnal_data.kqn_sysctl = register_sysctl_table (kqswnal_top_ctl_table, 0);
812 #endif
813
814         PORTAL_SYMBOL_REGISTER(kqswnal_ni);
815         kqswnal_data.kqn_init = KQN_INIT_ALL;
816
817         printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d "
818                "(Routing %s, initial mem %d)\n", 
819                kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes,
820                kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
821                pkmem);
822
823         return (0);
824 }
825
826
827 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
828 MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01");
829 MODULE_LICENSE("GPL");
830
831 module_init (kqswnal_initialise);
832 module_exit (kqswnal_finalise);
833
834 EXPORT_SYMBOL (kqswnal_ni);