Whamcloud - gitweb
* Landed b_cray_portals_merge.
[fs/lustre-release.git] / lnet / router / router.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Portals
7  *   http://sourceforge.net/projects/sandiaportals/
8  *
9  *   Portals is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Portals is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Portals; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  */
23
24 #include "router.h"
25
26 LIST_HEAD(kpr_routes);
27 LIST_HEAD(kpr_gateways);
28 LIST_HEAD(kpr_nals);
29
30 unsigned long long kpr_fwd_bytes;
31 unsigned long      kpr_fwd_packets;
32 unsigned long      kpr_fwd_errors;
33 atomic_t           kpr_queue_depth;
34
35 /* Mostly the tables are read-only (thread and interrupt context)
36  *
37  * Once in a blue moon we register/deregister NALs and add/remove routing
38  * entries (thread context only)... */
39 rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
40
41 kpr_router_interface_t kpr_router_interface = {
42         kprri_register:         kpr_register_nal,
43         kprri_lookup:           kpr_lookup_target,
44         kprri_fwd_start:        kpr_forward_packet,
45         kprri_fwd_done:         kpr_complete_packet,
46         kprri_notify:           kpr_nal_notify,
47         kprri_shutdown:         kpr_shutdown_nal,
48         kprri_deregister:       kpr_deregister_nal,
49 };
50
51 int
52 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
53 {
54         unsigned long      flags;
55         struct list_head  *e;
56         kpr_nal_entry_t   *ne;
57
58         CDEBUG (D_NET, "Registering NAL %d\n", nalif->kprni_nalid);
59
60         PORTAL_ALLOC (ne, sizeof (*ne));
61         if (ne == NULL)
62                 return (-ENOMEM);
63
64         memset (ne, 0, sizeof (*ne));
65         memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif));
66
67         LASSERT (!in_interrupt());
68         write_lock_irqsave (&kpr_rwlock, flags);
69
70         for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
71         {
72                 kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list);
73
74                 if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid)
75                 {
76                         write_unlock_irqrestore (&kpr_rwlock, flags);
77
78                         CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid);
79
80                         PORTAL_FREE (ne, sizeof (*ne));
81                         return (-EEXIST);
82                 }
83         }
84
85         list_add (&ne->kpne_list, &kpr_nals);
86
87         write_unlock_irqrestore (&kpr_rwlock, flags);
88
89         *argp = ne;
90         PORTAL_MODULE_USE;
91         return (0);
92 }
93
94 void
95 kpr_do_upcall (void *arg)
96 {
97         kpr_upcall_t *u = (kpr_upcall_t *)arg;
98         char          nalstr[10];
99         char          nidstr[36];
100         char          whenstr[36];
101         char         *argv[] = {
102                 NULL,
103                 "ROUTER_NOTIFY",
104                 nalstr,
105                 nidstr,
106                 u->kpru_alive ? "up" : "down",
107                 whenstr,
108                 NULL};
109         
110         snprintf (nalstr, sizeof(nalstr), "%d", u->kpru_nal_id);
111         snprintf (nidstr, sizeof(nidstr), LPX64, u->kpru_nid);
112         snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
113
114         portals_run_upcall (argv);
115
116         kfree (u);
117 }
118
119 void
120 kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when)
121 {
122         char str[PTL_NALFMT_SIZE];
123         
124         /* May be in arbitrary context */
125         kpr_upcall_t  *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC);
126
127         if (u == NULL) {
128                 CERROR ("Upcall out of memory: nal %d nid "LPX64" (%s) %s\n",
129                         gw_nalid, gw_nid,
130                         portals_nid2str(gw_nalid, gw_nid, str),
131                         alive ? "up" : "down");
132                 return;
133         }
134
135         u->kpru_nal_id     = gw_nalid;
136         u->kpru_nid        = gw_nid;
137         u->kpru_alive      = alive;
138         u->kpru_when       = when;
139
140         prepare_work (&u->kpru_tq, kpr_do_upcall, u);
141         schedule_work (&u->kpru_tq);
142 }
143
144 int
145 kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
146                int alive, time_t when)
147 {
148         unsigned long        flags;
149         int                  found;
150         kpr_nal_entry_t     *ne = NULL;
151         kpr_gateway_entry_t *ge = NULL;
152         struct timeval       now;
153         struct list_head    *e;
154         struct list_head    *n;
155         char                 str[PTL_NALFMT_SIZE];
156
157         CDEBUG (D_NET, "%s notifying [%d] "LPX64": %s\n", 
158                 byNal ? "NAL" : "userspace", 
159                 gateway_nalid, gateway_nid, alive ? "up" : "down");
160
161         /* can't do predictions... */
162         do_gettimeofday (&now);
163         if (when > now.tv_sec) {
164                 CWARN ("Ignoring prediction from %s of [%d] "LPX64" %s "
165                        "%ld seconds in the future\n", 
166                        byNal ? "NAL" : "userspace", 
167                        gateway_nalid, gateway_nid, 
168                        alive ? "up" : "down",
169                        when - now.tv_sec);
170                 return (EINVAL);
171         }
172
173         LASSERT (when <= now.tv_sec);
174
175         /* Serialise with lookups (i.e. write lock) */
176         write_lock_irqsave(&kpr_rwlock, flags);
177
178         found = 0;
179         list_for_each_safe (e, n, &kpr_gateways) {
180
181                 ge = list_entry(e, kpr_gateway_entry_t, kpge_list);
182                 if ((gateway_nalid != 0 &&
183                      ge->kpge_nalid != gateway_nalid) ||
184                     ge->kpge_nid != gateway_nid)
185                         continue;
186
187                 found = 1;
188                 break;
189         }
190
191         if (!found) {
192                 /* gateway not found */
193                 write_unlock_irqrestore(&kpr_rwlock, flags);
194                 CDEBUG (D_NET, "Gateway not found\n");
195                 return (0);
196         }
197         
198         if (when < ge->kpge_timestamp) {
199                 /* out of date information */
200                 write_unlock_irqrestore (&kpr_rwlock, flags);
201                 CDEBUG (D_NET, "Out of date\n");
202                 return (0);
203         }
204
205         /* update timestamp */
206         ge->kpge_timestamp = when;
207
208         if ((!ge->kpge_alive) == (!alive)) {
209                 /* new date for old news */
210                 write_unlock_irqrestore (&kpr_rwlock, flags);
211                 CDEBUG (D_NET, "Old news\n");
212                 return (0);
213         }
214
215         ge->kpge_alive = alive;
216         CDEBUG(D_NET, "set "LPX64" [%p] %d\n", gateway_nid, ge, alive);
217
218         if (alive) {
219                 /* Reset all gateway weights so the newly-enabled gateway
220                  * doesn't have to play catch-up */
221                 list_for_each_safe (e, n, &kpr_gateways) {
222                         kpr_gateway_entry_t *ge = list_entry(e, kpr_gateway_entry_t,
223                                                              kpge_list);
224                         atomic_set (&ge->kpge_weight, 0);
225                 }
226         }
227
228         found = 0;
229         if (!byNal) {
230                 /* userland notified me: notify NAL? */
231                 ne = kpr_find_nal_entry_locked (ge->kpge_nalid);
232                 if (ne != NULL) {
233                         if (!ne->kpne_shutdown &&
234                             ne->kpne_interface.kprni_notify != NULL) {
235                                 /* take a ref on this NAL until notifying
236                                  * it has completed... */
237                                 atomic_inc (&ne->kpne_refcount);
238                                 found = 1;
239                         }
240                 }
241         }
242
243         write_unlock_irqrestore(&kpr_rwlock, flags);
244
245         if (found) {
246                 ne->kpne_interface.kprni_notify (ne->kpne_interface.kprni_arg,
247                                                  gateway_nid, alive);
248                 /* 'ne' can disappear now... */
249                 atomic_dec (&ne->kpne_refcount);
250         }
251         
252         if (byNal) {
253                 /* It wasn't userland that notified me... */
254                 CWARN ("Upcall: NAL %d NID "LPX64" (%s) is %s\n",
255                        gateway_nalid, gateway_nid,
256                        portals_nid2str(gateway_nalid, gateway_nid, str),
257                        alive ? "alive" : "dead");
258                 kpr_upcall (gateway_nalid, gateway_nid, alive, when);
259         } else {
260                 CDEBUG (D_NET, " NOT Doing upcall\n");
261         }
262         
263         return (0);
264 }
265
266 void
267 kpr_nal_notify (void *arg, ptl_nid_t peer, int alive, time_t when)
268 {
269         kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
270         
271         kpr_do_notify (1, ne->kpne_interface.kprni_nalid, peer, alive, when);
272 }
273
274 void
275 kpr_shutdown_nal (void *arg)
276 {
277         unsigned long    flags;
278         kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
279
280         CDEBUG (D_NET, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
281
282         LASSERT (!ne->kpne_shutdown);
283         LASSERT (!in_interrupt());
284
285         write_lock_irqsave (&kpr_rwlock, flags);
286         ne->kpne_shutdown = 1;
287         write_unlock_irqrestore (&kpr_rwlock, flags);
288 }
289
290 void
291 kpr_deregister_nal (void *arg)
292 {
293         unsigned long     flags;
294         kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
295
296         CDEBUG (D_NET, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
297
298         LASSERT (ne->kpne_shutdown);            /* caller must have issued shutdown already */
299         LASSERT (!in_interrupt());
300
301         write_lock_irqsave (&kpr_rwlock, flags);
302         list_del (&ne->kpne_list);
303         write_unlock_irqrestore (&kpr_rwlock, flags);
304
305         /* Wait until all outstanding messages/notifications have completed */
306         while (atomic_read (&ne->kpne_refcount) != 0)
307         {
308                 CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n",
309                         ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
310
311                 set_current_state (TASK_UNINTERRUPTIBLE);
312                 schedule_timeout (HZ);
313         }
314
315         PORTAL_FREE (ne, sizeof (*ne));
316         PORTAL_MODULE_UNUSE;
317 }
318
319 int
320 kpr_ge_isbetter (kpr_gateway_entry_t *ge1, kpr_gateway_entry_t *ge2)
321 {
322         const int significant_bits = 0x00ffffff;
323         /* We use atomic_t to record/compare route weights for
324          * load-balancing.  Here we limit ourselves to only using
325          * 'significant_bits' when we do an 'after' comparison */
326
327         int    diff = (atomic_read (&ge1->kpge_weight) -
328                        atomic_read (&ge2->kpge_weight)) & significant_bits;
329         int    rc = (diff > (significant_bits >> 1));
330
331         CDEBUG(D_NET, "[%p]"LPX64"=%d %s [%p]"LPX64"=%d\n",
332                ge1, ge1->kpge_nid, atomic_read (&ge1->kpge_weight),
333                rc ? ">" : "<",
334                ge2, ge2->kpge_nid, atomic_read (&ge2->kpge_weight));
335
336         return (rc);
337 }
338
339 void
340 kpr_update_weight (kpr_gateway_entry_t *ge, int nob)
341 {
342         int weight = 1 + (nob + sizeof (ptl_hdr_t)/2)/sizeof (ptl_hdr_t);
343
344         /* We've chosen this route entry (i.e. gateway) to forward payload
345          * of length 'nob'; update the route's weight to make it less
346          * favoured.  Note that the weight is 1 plus the payload size
347          * rounded and scaled to the portals header size, so we get better
348          * use of the significant bits in kpge_weight. */
349
350         CDEBUG(D_NET, "gateway [%p]"LPX64" += %d\n", ge,
351                ge->kpge_nid, weight);
352         
353         atomic_add (weight, &ge->kpge_weight);
354 }
355
356 int
357 kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob,
358                    ptl_nid_t *gateway_nidp)
359 {
360         kpr_nal_entry_t     *ne = (kpr_nal_entry_t *)arg;
361         struct list_head    *e;
362         kpr_route_entry_t   *re;
363         kpr_gateway_entry_t *ge = NULL;
364         int                  rc = -ENOENT;
365
366         /* Caller wants to know if 'target_nid' can be reached via a gateway
367          * ON HER OWN NETWORK */
368
369         CDEBUG (D_NET, "lookup "LPX64" from NAL %d\n", target_nid, 
370                 ne->kpne_interface.kprni_nalid);
371         LASSERT (!in_interrupt());
372
373         read_lock (&kpr_rwlock);
374
375         if (ne->kpne_shutdown) {        /* caller is shutting down */
376                 read_unlock (&kpr_rwlock);
377                 return (-ENOENT);
378         }
379
380         /* Search routes for one that has a gateway to target_nid on the callers network */
381
382         list_for_each (e, &kpr_routes) {
383                 re = list_entry (e, kpr_route_entry_t, kpre_list);
384
385                 if (re->kpre_lo_nid > target_nid ||
386                     re->kpre_hi_nid < target_nid)
387                         continue;
388
389                 /* found table entry */
390
391                 if (re->kpre_gateway->kpge_nalid != ne->kpne_interface.kprni_nalid ||
392                     !re->kpre_gateway->kpge_alive) {
393                         /* different NAL or gateway down */
394                         rc = -EHOSTUNREACH;
395                         continue;
396                 }
397                 
398                 if (ge == NULL ||
399                     kpr_ge_isbetter (re->kpre_gateway, ge))
400                     ge = re->kpre_gateway;
401         }
402
403         if (ge != NULL) {
404                 kpr_update_weight (ge, nob);
405                 *gateway_nidp = ge->kpge_nid;
406                 rc = 0;
407         }
408         
409         read_unlock (&kpr_rwlock);
410
411         /* NB can't deref 're' now; it might have been removed! */
412
413         CDEBUG (D_NET, "lookup "LPX64" from NAL %d: %d ("LPX64")\n",
414                 target_nid, ne->kpne_interface.kprni_nalid, rc,
415                 (rc == 0) ? *gateway_nidp : (ptl_nid_t)0);
416         return (rc);
417 }
418
419 kpr_nal_entry_t *
420 kpr_find_nal_entry_locked (int nal_id)
421 {
422         struct list_head    *e;
423         
424         /* Called with kpr_rwlock held */
425
426         list_for_each (e, &kpr_nals) {
427                 kpr_nal_entry_t *ne = list_entry (e, kpr_nal_entry_t, kpne_list);
428
429                 if (nal_id != ne->kpne_interface.kprni_nalid) /* no match */
430                         continue;
431
432                 return (ne);
433         }
434         
435         return (NULL);
436 }
437
438 void
439 kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
440 {
441         kpr_nal_entry_t     *src_ne = (kpr_nal_entry_t *)arg;
442         ptl_nid_t            target_nid = fwd->kprfd_target_nid;
443         int                  nob = fwd->kprfd_nob;
444         kpr_gateway_entry_t *ge = NULL;
445         kpr_nal_entry_t     *dst_ne = NULL;
446         struct list_head    *e;
447         kpr_route_entry_t   *re;
448         kpr_nal_entry_t     *tmp_ne;
449         int                  rc;
450
451         CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d\n", fwd,
452                 target_nid, src_ne->kpne_interface.kprni_nalid);
453
454         LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
455         LASSERT (!in_interrupt());
456
457         read_lock (&kpr_rwlock);
458
459         kpr_fwd_packets++;                   /* (loose) stats accounting */
460         kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
461
462         if (src_ne->kpne_shutdown) {         /* caller is shutting down */
463                 rc = -ESHUTDOWN;
464                 goto out;
465         }
466
467         fwd->kprfd_router_arg = src_ne;      /* stash caller's nal entry */
468
469         /* Search routes for one that has a gateway to target_nid NOT on the caller's network */
470
471         list_for_each (e, &kpr_routes) {
472                 re = list_entry (e, kpr_route_entry_t, kpre_list);
473
474                 if (re->kpre_lo_nid > target_nid || /* no match */
475                     re->kpre_hi_nid < target_nid)
476                         continue;
477
478                 if (re->kpre_gateway->kpge_nalid == src_ne->kpne_interface.kprni_nalid)
479                         continue;               /* don't route to same NAL */
480
481                 if (!re->kpre_gateway->kpge_alive)
482                         continue;               /* gateway is dead */
483                 
484                 tmp_ne = kpr_find_nal_entry_locked (re->kpre_gateway->kpge_nalid);
485
486                 if (tmp_ne == NULL ||
487                     tmp_ne->kpne_shutdown) {
488                         /* NAL must be registered and not shutting down */
489                         continue;
490                 }
491
492                 if (ge == NULL ||
493                     kpr_ge_isbetter (re->kpre_gateway, ge)) {
494                         ge = re->kpre_gateway;
495                         dst_ne = tmp_ne;
496                 }
497         }
498         
499         if (ge != NULL) {
500                 LASSERT (dst_ne != NULL);
501                 
502                 kpr_update_weight (ge, nob);
503
504                 fwd->kprfd_gateway_nid = ge->kpge_nid;
505                 atomic_inc (&src_ne->kpne_refcount); /* source and dest nals are */
506                 atomic_inc (&dst_ne->kpne_refcount); /* busy until fwd completes */
507                 atomic_inc (&kpr_queue_depth);
508
509                 read_unlock (&kpr_rwlock);
510
511                 CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %d: "
512                         "to "LPX64" on NAL %d\n", 
513                         fwd, target_nid, src_ne->kpne_interface.kprni_nalid,
514                         fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid);
515
516                 dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd);
517                 return;
518         }
519
520         rc = -EHOSTUNREACH;
521  out:
522         kpr_fwd_errors++;
523
524         CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %d: %d\n", 
525                 fwd, target_nid, src_ne->kpne_interface.kprni_nalid, rc);
526
527         (fwd->kprfd_callback)(fwd->kprfd_callback_arg, rc);
528
529         read_unlock (&kpr_rwlock);
530 }
531
532 void
533 kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error)
534 {
535         kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg;
536         kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg;
537
538         CDEBUG (D_NET, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd,
539                 src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error);
540
541         atomic_dec (&dst_ne->kpne_refcount);    /* CAVEAT EMPTOR dst_ne can disappear now!!! */
542
543         (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error);
544
545         CDEBUG (D_NET, "complete(2) [%p] from NAL %d: %d\n", fwd,
546                 src_ne->kpne_interface.kprni_nalid, error);
547
548         atomic_dec (&kpr_queue_depth);
549         atomic_dec (&src_ne->kpne_refcount);    /* CAVEAT EMPTOR src_ne can disappear now!!! */
550 }
551
552 int
553 kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, 
554                ptl_nid_t lo_nid, ptl_nid_t hi_nid)
555 {
556         unsigned long        flags;
557         struct list_head    *e;
558         kpr_route_entry_t   *re;
559         kpr_gateway_entry_t *ge;
560         int                  dup = 0;
561
562         CDEBUG(D_NET, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n",
563                gateway_nalid, gateway_nid, lo_nid, hi_nid);
564
565         if (gateway_nalid == PTL_NID_ANY ||
566             lo_nid == PTL_NID_ANY ||
567             hi_nid == PTL_NID_ANY ||
568             lo_nid > hi_nid)
569                 return (-EINVAL);
570
571         PORTAL_ALLOC (ge, sizeof (*ge));
572         if (ge == NULL)
573                 return (-ENOMEM);
574
575         ge->kpge_nalid = gateway_nalid;
576         ge->kpge_nid   = gateway_nid;
577         ge->kpge_alive = 1;
578         ge->kpge_timestamp = 0;
579         ge->kpge_refcount = 0;
580         atomic_set (&ge->kpge_weight, 0);
581
582         PORTAL_ALLOC (re, sizeof (*re));
583         if (re == NULL) {
584                 PORTAL_FREE (ge, sizeof (*ge));
585                 return (-ENOMEM);
586         }
587
588         re->kpre_lo_nid = lo_nid;
589         re->kpre_hi_nid = hi_nid;
590
591         LASSERT(!in_interrupt());
592         write_lock_irqsave (&kpr_rwlock, flags);
593
594         list_for_each (e, &kpr_gateways) {
595                 kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
596                                                       kpge_list);
597                 
598                 if (ge2->kpge_nalid == gateway_nalid &&
599                     ge2->kpge_nid == gateway_nid) {
600                         PORTAL_FREE (ge, sizeof (*ge));
601                         ge = ge2;
602                         dup = 1;
603                         break;
604                 }
605         }
606
607         if (!dup) {
608                 /* Adding a new gateway... */
609  
610                 list_add (&ge->kpge_list, &kpr_gateways);
611
612                 /* ...zero all gateway weights so this one doesn't have to
613                  * play catch-up */
614
615                 list_for_each (e, &kpr_gateways) {
616                         kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
617                                                               kpge_list);
618                         atomic_set (&ge2->kpge_weight, 0);
619                 }
620                 
621         }
622
623         re->kpre_gateway = ge;
624         ge->kpge_refcount++;
625         list_add (&re->kpre_list, &kpr_routes);
626
627         write_unlock_irqrestore (&kpr_rwlock, flags);
628         return (0);
629 }
630
631 int
632 kpr_sys_notify (int gateway_nalid, ptl_nid_t gateway_nid,
633                 int alive, time_t when)
634 {
635         return (kpr_do_notify (0, gateway_nalid, gateway_nid, alive, when));
636 }
637
638 int
639 kpr_del_route (int gw_nalid, ptl_nid_t gw_nid,
640                ptl_nid_t lo, ptl_nid_t hi)
641 {
642         int                specific = (lo != PTL_NID_ANY);
643         unsigned long      flags;
644         int                rc = -ENOENT;
645         struct list_head  *e;
646         struct list_head  *n;
647
648         CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", 
649                gw_nalid, gw_nid, lo, hi);
650
651         LASSERT(!in_interrupt());
652
653         /* NB Caller may specify either all routes via the given gateway
654          * (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are
655          * actual NIDs) */
656         
657         if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY))
658                 return (-EINVAL);
659
660         write_lock_irqsave(&kpr_rwlock, flags);
661
662         list_for_each_safe (e, n, &kpr_routes) {
663                 kpr_route_entry_t   *re = list_entry(e, kpr_route_entry_t,
664                                                    kpre_list);
665                 kpr_gateway_entry_t *ge = re->kpre_gateway;
666                 
667                 if (ge->kpge_nalid != gw_nalid ||
668                     ge->kpge_nid != gw_nid ||
669                     (specific && 
670                      (lo != re->kpre_lo_nid || hi != re->kpre_hi_nid)))
671                         continue;
672
673                 rc = 0;
674
675                 if (--ge->kpge_refcount == 0) {
676                         list_del (&ge->kpge_list);
677                         PORTAL_FREE (ge, sizeof (*ge));
678                 }
679
680                 list_del (&re->kpre_list);
681                 PORTAL_FREE(re, sizeof (*re));
682
683                 if (specific)
684                         break;
685         }
686
687         write_unlock_irqrestore(&kpr_rwlock, flags);
688         return (rc);
689 }
690
691 int
692 kpr_get_route (int idx, __u32 *gateway_nalid, ptl_nid_t *gateway_nid,
693                ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, __u32 *alive)
694 {
695         struct list_head  *e;
696
697         LASSERT (!in_interrupt());
698         read_lock(&kpr_rwlock);
699
700         for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
701                 kpr_route_entry_t   *re = list_entry(e, kpr_route_entry_t,
702                                                      kpre_list);
703                 kpr_gateway_entry_t *ge = re->kpre_gateway;
704                 
705                 if (idx-- == 0) {
706                         *gateway_nalid = ge->kpge_nalid;
707                         *gateway_nid = ge->kpge_nid;
708                         *alive = ge->kpge_alive;
709                         *lo_nid = re->kpre_lo_nid;
710                         *hi_nid = re->kpre_hi_nid;
711
712                         read_unlock(&kpr_rwlock);
713                         return (0);
714                 }
715         }
716
717         read_unlock (&kpr_rwlock);
718         return (-ENOENT);
719 }
720
721 static int 
722 kpr_nal_cmd(struct portals_cfg *pcfg, void * private)
723 {
724         int err = -EINVAL;
725         ENTRY;
726
727         switch(pcfg->pcfg_command) {
728         default:
729                 CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
730                 break;
731                 
732         case NAL_CMD_ADD_ROUTE:
733                 CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n",
734                        pcfg->pcfg_nal, pcfg->pcfg_nid, 
735                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
736                 err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
737                                     pcfg->pcfg_nid2, pcfg->pcfg_nid3);
738                 break;
739
740         case NAL_CMD_DEL_ROUTE:
741                 CDEBUG (D_IOCTL, "Removing routes via [%d] "LPU64" : "LPU64" - "LPU64"\n",
742                         pcfg->pcfg_gw_nal, pcfg->pcfg_nid, 
743                         pcfg->pcfg_nid2, pcfg->pcfg_nid3);
744                 err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
745                                      pcfg->pcfg_nid2, pcfg->pcfg_nid3);
746                 break;
747
748         case NAL_CMD_NOTIFY_ROUTER: {
749                 CDEBUG (D_IOCTL, "Notifying peer [%d] "LPU64" %s @ %ld\n",
750                         pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
751                         pcfg->pcfg_flags ? "Enabling" : "Disabling",
752                         (time_t)pcfg->pcfg_nid3);
753                 
754                 err = kpr_sys_notify (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
755                                       pcfg->pcfg_flags, (time_t)pcfg->pcfg_nid3);
756                 break;
757         }
758                 
759         case NAL_CMD_GET_ROUTE:
760                 CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
761                 err = kpr_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
762                                     &pcfg->pcfg_nid, 
763                                     &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
764                                     &pcfg->pcfg_flags);
765                 break;
766         }
767         RETURN(err);
768 }
769
770
771 static void /*__exit*/
772 kpr_finalise (void)
773 {
774         LASSERT (list_empty (&kpr_nals));
775
776         libcfs_nal_cmd_unregister(ROUTER);
777
778         PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
779
780         kpr_proc_fini();
781
782         while (!list_empty (&kpr_routes)) {
783                 kpr_route_entry_t *re = list_entry(kpr_routes.next,
784                                                    kpr_route_entry_t,
785                                                    kpre_list);
786
787                 list_del(&re->kpre_list);
788                 PORTAL_FREE(re, sizeof (*re));
789         }
790
791         CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n",
792                atomic_read(&portal_kmemory));
793 }
794
795 static int __init
796 kpr_initialise (void)
797 {
798         int     rc;
799         
800         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
801                atomic_read(&portal_kmemory));
802
803         kpr_proc_init();
804
805         rc = libcfs_nal_cmd_register(ROUTER, kpr_nal_cmd, NULL);
806         if (rc != 0) {
807                 CERROR("Can't register nal cmd handler\n");
808                 return (rc);
809         }
810         
811         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
812         return (0);
813 }
814
815 MODULE_AUTHOR("Eric Barton");
816 MODULE_DESCRIPTION("Kernel Portals Router v0.01");
817 MODULE_LICENSE("GPL");
818
819 module_init (kpr_initialise);
820 module_exit (kpr_finalise);
821
822 EXPORT_SYMBOL (kpr_router_interface);