Whamcloud - gitweb
merge b_devel into HEAD, which will become 0.7.3
[fs/lustre-release.git] / lustre / portals / router / router.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Portals
7  *   http://sourceforge.net/projects/sandiaportals/
8  *
9  *   Portals is free software; you can redistribute it and/or
10  *   modify it under the terms of version 2 of the GNU General Public
11  *   License as published by the Free Software Foundation.
12  *
13  *   Portals is distributed in the hope that it will be useful,
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *   GNU General Public License for more details.
17  *
18  *   You should have received a copy of the GNU General Public License
19  *   along with Portals; if not, write to the Free Software
20  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  *
22  */
23
24 #include "router.h"
25
26 LIST_HEAD(kpr_routes);
27 LIST_HEAD(kpr_nals);
28
29 unsigned long long kpr_fwd_bytes;
30 unsigned long      kpr_fwd_packets;
31 unsigned long      kpr_fwd_errors;
32 atomic_t           kpr_queue_depth;
33
34 /* Mostly the tables are read-only (thread and interrupt context)
35  *
36  * Once in a blue moon we register/deregister NALs and add/remove routing
37  * entries (thread context only)... */
38 rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
39
40 kpr_router_interface_t kpr_router_interface = {
41         kprri_register:         kpr_register_nal,
42         kprri_lookup:           kpr_lookup_target,
43         kprri_fwd_start:        kpr_forward_packet,
44         kprri_fwd_done:         kpr_complete_packet,
45         kprri_shutdown:         kpr_shutdown_nal,
46         kprri_deregister:       kpr_deregister_nal,
47 };
48
49 kpr_control_interface_t kpr_control_interface = {
50         kprci_add_route:        kpr_add_route,
51         kprci_del_route:        kpr_del_route,
52         kprci_get_route:        kpr_get_route,
53 };
54
55 int
56 kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
57 {
58         unsigned long      flags;
59         struct list_head  *e;
60         kpr_nal_entry_t   *ne;
61
62         CDEBUG (D_OTHER, "Registering NAL %d\n", nalif->kprni_nalid);
63
64         PORTAL_ALLOC (ne, sizeof (*ne));
65         if (ne == NULL)
66                 return (-ENOMEM);
67
68         memset (ne, 0, sizeof (*ne));
69         memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif));
70
71         LASSERT (!in_interrupt());
72         write_lock_irqsave (&kpr_rwlock, flags);
73
74         for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
75         {
76                 kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list);
77
78                 if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid)
79                 {
80                         write_unlock_irqrestore (&kpr_rwlock, flags);
81
82                         CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid);
83
84                         PORTAL_FREE (ne, sizeof (*ne));
85                         return (-EEXIST);
86                 }
87         }
88
89         list_add (&ne->kpne_list, &kpr_nals);
90
91         write_unlock_irqrestore (&kpr_rwlock, flags);
92
93         *argp = ne;
94         PORTAL_MODULE_USE;
95         return (0);
96 }
97
98 void
99 kpr_shutdown_nal (void *arg)
100 {
101         unsigned long    flags;
102         kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
103
104         CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
105
106         LASSERT (!ne->kpne_shutdown);
107         LASSERT (!in_interrupt());
108
109         write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */
110         ne->kpne_shutdown = 1;
111         write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */
112
113         while (atomic_read (&ne->kpne_refcount) != 0)
114         {
115                 CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n",
116                         ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
117
118                 set_current_state (TASK_UNINTERRUPTIBLE);
119                 schedule_timeout (HZ);
120         }
121 }
122
123 void
124 kpr_deregister_nal (void *arg)
125 {
126         unsigned long     flags;
127         kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
128
129         CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
130
131         LASSERT (ne->kpne_shutdown);            /* caller must have issued shutdown already */
132         LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */
133         LASSERT (!in_interrupt());
134
135         write_lock_irqsave (&kpr_rwlock, flags);
136
137         list_del (&ne->kpne_list);
138
139         write_unlock_irqrestore (&kpr_rwlock, flags);
140
141         PORTAL_FREE (ne, sizeof (*ne));
142         PORTAL_MODULE_UNUSE;
143 }
144
145
146 int
147 kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp)
148 {
149         kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
150         struct list_head *e;
151         int               rc = -ENOENT;
152
153         CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d\n", target_nid, ne->kpne_interface.kprni_nalid);
154
155         if (ne->kpne_shutdown)          /* caller is shutting down */
156                 return (-ENOENT);
157
158         read_lock (&kpr_rwlock);
159
160         /* Search routes for one that has a gateway to target_nid on the callers network */
161
162         for (e = kpr_routes.next; e != &kpr_routes; e = e->next)
163         {
164                 kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list);
165
166                 if (re->kpre_lo_nid > target_nid ||
167                     re->kpre_hi_nid < target_nid)
168                         continue;
169
170                 /* found table entry */
171
172                 if (re->kpre_gateway_nalid != ne->kpne_interface.kprni_nalid) /* different NAL */
173                         rc = -EHOSTUNREACH;
174                 else
175                 {
176                         rc = 0;
177                         *gateway_nidp = re->kpre_gateway_nid;
178                 }
179                 break;
180         }
181
182         read_unlock (&kpr_rwlock);
183
184         CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d: %d ("LPX64")\n",
185                 target_nid, ne->kpne_interface.kprni_nalid, rc,
186                 (rc == 0) ? *gateway_nidp : (ptl_nid_t)0);
187         return (rc);
188 }
189
190 void
191 kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
192 {
193         kpr_nal_entry_t  *src_ne = (kpr_nal_entry_t *)arg;
194         ptl_nid_t         target_nid = fwd->kprfd_target_nid;
195         int               nob = fwd->kprfd_nob;
196         struct list_head *e;
197
198         CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d\n", fwd,
199                 target_nid, src_ne->kpne_interface.kprni_nalid);
200
201         LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */
202         LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov));
203         
204         atomic_inc (&kpr_queue_depth);
205         atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */
206
207         kpr_fwd_packets++;                   /* (loose) stats accounting */
208         kpr_fwd_bytes += nob;
209
210         if (src_ne->kpne_shutdown)           /* caller is shutting down */
211                 goto out;
212
213         fwd->kprfd_router_arg = src_ne;      /* stash caller's nal entry */
214
215         read_lock (&kpr_rwlock);
216
217         /* Search routes for one that has a gateway to target_nid NOT on the caller's network */
218
219         for (e = kpr_routes.next; e != &kpr_routes; e = e->next)
220         {
221                 kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list);
222
223                 if (re->kpre_lo_nid > target_nid || /* no match */
224                     re->kpre_hi_nid < target_nid)
225                         continue;
226
227                 CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: match "LPX64" on NAL %d\n", fwd,
228                         target_nid, src_ne->kpne_interface.kprni_nalid,
229                         re->kpre_gateway_nid, re->kpre_gateway_nalid);
230
231                 if (re->kpre_gateway_nalid == src_ne->kpne_interface.kprni_nalid)
232                         break;                  /* don't route to same NAL */
233
234                 /* Search for gateway's NAL's entry */
235
236                 for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
237                 {
238                         kpr_nal_entry_t *dst_ne = list_entry (e, kpr_nal_entry_t, kpne_list);
239
240                         if (re->kpre_gateway_nalid != dst_ne->kpne_interface.kprni_nalid) /* no match */
241                                 continue;
242
243                         if (dst_ne->kpne_shutdown) /* don't route if NAL is shutting down */
244                                 break;
245
246                         fwd->kprfd_gateway_nid = re->kpre_gateway_nid;
247                         atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */
248
249                         read_unlock (&kpr_rwlock);
250
251                         CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: "LPX64" on NAL %d\n", fwd,
252                                 target_nid, src_ne->kpne_interface.kprni_nalid,
253                                 fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid);
254
255                         dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd);
256                         return;
257                 }
258                 break;
259         }
260
261         read_unlock (&kpr_rwlock);
262  out:
263         kpr_fwd_errors++;
264
265         CDEBUG (D_OTHER, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd,
266                 target_nid, src_ne->kpne_interface.kprni_nalid);
267
268         /* Can't find anywhere to forward to */
269         (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH);
270
271         atomic_dec (&kpr_queue_depth);
272         atomic_dec (&src_ne->kpne_refcount);
273 }
274
275 void
276 kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error)
277 {
278         kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg;
279         kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg;
280
281         CDEBUG (D_OTHER, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd,
282                 src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error);
283
284         atomic_dec (&dst_ne->kpne_refcount);    /* CAVEAT EMPTOR dst_ne can disappear now!!! */
285
286         (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error);
287
288         CDEBUG (D_OTHER, "complete(2) [%p] from NAL %d: %d\n", fwd,
289                 src_ne->kpne_interface.kprni_nalid, error);
290
291         atomic_dec (&kpr_queue_depth);
292         atomic_dec (&src_ne->kpne_refcount);    /* CAVEAT EMPTOR src_ne can disappear now!!! */
293 }
294
295 int
296 kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
297                ptl_nid_t hi_nid)
298 {
299         unsigned long      flags;
300         struct list_head  *e;
301         kpr_route_entry_t *re;
302
303         CDEBUG(D_OTHER, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n",
304                gateway_nalid, gateway_nid, lo_nid, hi_nid);
305
306         LASSERT(lo_nid <= hi_nid);
307
308         PORTAL_ALLOC (re, sizeof (*re));
309         if (re == NULL)
310                 return (-ENOMEM);
311
312         re->kpre_gateway_nalid = gateway_nalid;
313         re->kpre_gateway_nid = gateway_nid;
314         re->kpre_lo_nid = lo_nid;
315         re->kpre_hi_nid = hi_nid;
316
317         LASSERT(!in_interrupt());
318         write_lock_irqsave (&kpr_rwlock, flags);
319
320         for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
321                 kpr_route_entry_t *re2 = list_entry(e, kpr_route_entry_t,
322                                                     kpre_list);
323
324                 if (re->kpre_lo_nid > re2->kpre_hi_nid ||
325                     re->kpre_hi_nid < re2->kpre_lo_nid)
326                         continue;
327
328                 CERROR ("Attempt to add duplicate routes ["LPX64" - "LPX64"]"
329                         "to ["LPX64" - "LPX64"]\n",
330                         re->kpre_lo_nid, re->kpre_hi_nid,
331                         re2->kpre_lo_nid, re2->kpre_hi_nid);
332
333                 write_unlock_irqrestore (&kpr_rwlock, flags);
334
335                 PORTAL_FREE (re, sizeof (*re));
336                 return (-EINVAL);
337         }
338
339         list_add (&re->kpre_list, &kpr_routes);
340
341         write_unlock_irqrestore (&kpr_rwlock, flags);
342         return (0);
343 }
344
345 int
346 kpr_del_route (ptl_nid_t nid)
347 {
348         unsigned long      flags;
349         struct list_head  *e;
350
351         CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
352
353         LASSERT(!in_interrupt());
354         write_lock_irqsave(&kpr_rwlock, flags);
355
356         for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
357                 kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
358                                                    kpre_list);
359
360                 if (re->kpre_lo_nid > nid || re->kpre_hi_nid < nid)
361                         continue;
362
363                 list_del (&re->kpre_list);
364                 write_unlock_irqrestore(&kpr_rwlock, flags);
365
366                 PORTAL_FREE(re, sizeof (*re));
367                 return (0);
368         }
369
370         write_unlock_irqrestore(&kpr_rwlock, flags);
371         return (-ENOENT);
372 }
373
374 int
375 kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid,
376               ptl_nid_t *lo_nid, ptl_nid_t *hi_nid)
377 {
378         struct list_head  *e;
379
380         read_lock(&kpr_rwlock);
381
382         for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
383                 kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
384                                                    kpre_list);
385
386                 if (idx-- == 0) {
387                         *gateway_nalid = re->kpre_gateway_nalid;
388                         *gateway_nid = re->kpre_gateway_nid;
389                         *lo_nid = re->kpre_lo_nid;
390                         *hi_nid = re->kpre_hi_nid;
391
392                         read_unlock(&kpr_rwlock);
393                         return (0);
394                 }
395         }
396
397         read_unlock (&kpr_rwlock);
398         return (-ENOENT);
399 }
400
401 static void /*__exit*/
402 kpr_finalise (void)
403 {
404         LASSERT (list_empty (&kpr_nals));
405
406         while (!list_empty (&kpr_routes)) {
407                 kpr_route_entry_t *re = list_entry(kpr_routes.next,
408                                                    kpr_route_entry_t,
409                                                    kpre_list);
410
411                 list_del(&re->kpre_list);
412                 PORTAL_FREE(re, sizeof (*re));
413         }
414
415         kpr_proc_fini();
416
417         PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
418         PORTAL_SYMBOL_UNREGISTER(kpr_control_interface);
419
420         CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n",
421                atomic_read(&portal_kmemory));
422 }
423
424 static int __init
425 kpr_initialise (void)
426 {
427         CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
428                atomic_read(&portal_kmemory));
429
430         kpr_proc_init();
431
432         PORTAL_SYMBOL_REGISTER(kpr_router_interface);
433         PORTAL_SYMBOL_REGISTER(kpr_control_interface);
434         return (0);
435 }
436
437 MODULE_AUTHOR("Eric Barton");
438 MODULE_DESCRIPTION("Kernel Portals Router v0.01");
439 MODULE_LICENSE("GPL");
440
441 module_init (kpr_initialise);
442 module_exit (kpr_finalise);
443
444 EXPORT_SYMBOL (kpr_control_interface);
445 EXPORT_SYMBOL (kpr_router_interface);