Whamcloud - gitweb
land 0.5.20.3 b_devel onto HEAD (b_devel will remain)
[fs/lustre-release.git] / lustre / ptlrpc / events.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define DEBUG_SUBSYSTEM S_RPC
24
25 #ifdef __KERNEL__
26 #include <linux/module.h>
27 #else
28 #include <liblustre.h>
29 #endif
30 #include <linux/obd_class.h>
31 #include <linux/lustre_net.h>
32
33 struct ptlrpc_ni  ptlrpc_interfaces[NAL_MAX_NR];
34 int               ptlrpc_ninterfaces;
35
36 /*
37  *  Free the packet when it has gone out
38  */
39 static int request_out_callback(ptl_event_t *ev)
40 {
41         struct ptlrpc_request *req = ev->mem_desc.user_ptr;
42         ENTRY;
43
44         /* requests always contiguous */
45         LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
46
47         if (ev->type != PTL_EVENT_SENT) {
48                 // XXX make sure we understand all events, including ACK's
49                 CERROR("Unknown event %d\n", ev->type);
50                 LBUG();
51         }
52
53         /* this balances the atomic_inc in ptl_send_rpc */
54         ptlrpc_req_finished(req);
55         RETURN(1);
56 }
57
58
59 /*
60  *  Free the packet when it has gone out
61  */
62 static int reply_out_callback(ptl_event_t *ev)
63 {
64         ENTRY;
65
66         /* replies always contiguous */
67         LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
68
69         if (ev->type == PTL_EVENT_SENT) {
70                 OBD_FREE(ev->mem_desc.start, ev->mem_desc.length);
71         } else if (ev->type == PTL_EVENT_ACK) {
72                 struct ptlrpc_request *req = ev->mem_desc.user_ptr;
73                 if (req->rq_flags & PTL_RPC_FL_WANT_ACK) {
74                         req->rq_flags &= ~PTL_RPC_FL_WANT_ACK;
75                         wake_up(&req->rq_wait_for_rep);
76                 } else {
77                         DEBUG_REQ(D_ERROR, req,
78                                   "ack received for reply, not wanted");
79                 }
80         } else {
81                 // XXX make sure we understand all events
82                 CERROR("Unknown event %d\n", ev->type);
83                 LBUG();
84         }
85
86         RETURN(1);
87 }
88
89 /*
90  * Wake up the thread waiting for the reply once it comes in.
91  */
92 int reply_in_callback(ptl_event_t *ev)
93 {
94         struct ptlrpc_request *req = ev->mem_desc.user_ptr;
95         ENTRY;
96
97         /* replies always contiguous */
98         LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
99
100         if (req->rq_xid == 0x5a5a5a5a5a5a5a5a) {
101                 CERROR("Reply received for freed request!  Probably a missing "
102                        "ptlrpc_abort()\n");
103                 LBUG();
104         }
105
106         if (req->rq_xid != ev->match_bits) {
107                 CERROR("Reply packet for wrong request\n");
108                 LBUG();
109         }
110
111         if (ev->type == PTL_EVENT_PUT) {
112                 req->rq_repmsg = ev->mem_desc.start + ev->offset;
113                 barrier();
114                 wake_up(&req->rq_wait_for_rep);
115         } else {
116                 // XXX make sure we understand all events, including ACK's
117                 CERROR("Unknown event %d\n", ev->type);
118                 LBUG();
119         }
120
121         RETURN(1);
122 }
123
124 int request_in_callback(ptl_event_t *ev)
125 {
126         struct ptlrpc_request_buffer_desc *rqbd = ev->mem_desc.user_ptr;
127         struct ptlrpc_srv_ni  *srv_ni = rqbd->rqbd_srv_ni;
128         struct ptlrpc_service *service = srv_ni->sni_service;
129
130         /* requests always contiguous */
131         LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
132         /* we only enable puts */
133         LASSERT(ev->type == PTL_EVENT_PUT);
134         LASSERT(atomic_read(&srv_ni->sni_nrqbds_receiving) > 0);
135         LASSERT(atomic_read(&rqbd->rqbd_refcount) > 0);
136
137         if (ev->rlength != ev->mlength)
138                 CERROR("Warning: Possibly truncated rpc (%d/%d)\n",
139                        ev->mlength, ev->rlength);
140
141         if (ptl_is_valid_handle(&ev->unlinked_me)) {
142                 /* This is the last request to be received into this
143                  * request buffer.  We don't bump the refcount, since the
144                  * thread servicing this event is effectively taking over
145                  * portals' reference.
146                  */
147 #warning ev->unlinked_me.nal_idx is not set properly in a callback
148                 LASSERT(ev->unlinked_me.handle_idx==rqbd->rqbd_me_h.handle_idx);
149
150                 /* we're off the air */
151                 /* we'll probably start dropping packets in portals soon */
152                 if (atomic_dec_and_test(&srv_ni->sni_nrqbds_receiving))
153                         CERROR("All request buffers busy\n");
154         } else {
155                 /* +1 ref for service thread */
156                 atomic_inc(&rqbd->rqbd_refcount);
157         }
158
159         wake_up(&service->srv_waitq);
160
161         return 0;
162 }
163
164 static int bulk_put_source_callback(ptl_event_t *ev)
165 {
166         struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
167         struct ptlrpc_bulk_page *bulk;
168         struct list_head        *tmp;
169         struct list_head        *next;
170         ENTRY;
171
172         CDEBUG(D_NET, "got %s event %d\n",
173                (ev->type == PTL_EVENT_SENT) ? "SENT" :
174                (ev->type == PTL_EVENT_ACK)  ? "ACK"  : "UNEXPECTED", ev->type);
175
176         LASSERT(ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_ACK);
177
178         LASSERT(atomic_read(&desc->bd_source_callback_count) > 0 &&
179                 atomic_read(&desc->bd_source_callback_count) <= 2);
180
181         /* 1 fragment for each page always */
182         LASSERT(ev->mem_desc.niov == desc->bd_page_count);
183
184         if (atomic_dec_and_test(&desc->bd_source_callback_count)) {
185                 void (*event_handler)(struct ptlrpc_bulk_desc *);
186
187                 list_for_each_safe(tmp, next, &desc->bd_page_list) {
188                         bulk = list_entry(tmp, struct ptlrpc_bulk_page,
189                                           bp_link);
190
191                         if (bulk->bp_cb != NULL)
192                                 bulk->bp_cb(bulk);
193                 }
194
195                 /* We need to make a note of whether there's an event handler
196                  * before we call wake_up, because if there is no event handler,
197                  * 'desc' might be freed before we're scheduled again. */
198                 event_handler = desc->bd_ptl_ev_hdlr;
199
200                 desc->bd_flags |= PTL_BULK_FL_SENT;
201                 wake_up(&desc->bd_waitq);
202                 if (event_handler) {
203                         LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
204                         event_handler(desc);
205                 }
206         }
207
208         RETURN(0);
209 }
210
211 static int bulk_put_sink_callback(ptl_event_t *ev)
212 {
213         struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
214         struct ptlrpc_bulk_page *bulk;
215         struct list_head        *tmp;
216         struct list_head        *next;
217         ptl_size_t               total = 0;
218         void                   (*event_handler)(struct ptlrpc_bulk_desc *);
219         ENTRY;
220
221         LASSERT(ev->type == PTL_EVENT_PUT);
222
223         /* put with zero offset */
224         LASSERT(ev->offset == 0);
225         /* used iovs */
226         LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0);
227         /* 1 fragment for each page always */
228         LASSERT(ev->mem_desc.niov == desc->bd_page_count);
229
230         list_for_each_safe (tmp, next, &desc->bd_page_list) {
231                 bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
232
233                 total += bulk->bp_buflen;
234
235                 if (bulk->bp_cb != NULL)
236                         bulk->bp_cb(bulk);
237         }
238
239         LASSERT(ev->mem_desc.length == total);
240
241         /* We need to make a note of whether there's an event handler
242          * before we call wake_up, because if there is no event
243          * handler, 'desc' might be freed before we're scheduled again. */
244         event_handler = desc->bd_ptl_ev_hdlr;
245
246         desc->bd_flags |= PTL_BULK_FL_RCVD;
247         wake_up(&desc->bd_waitq);
248         if (event_handler) {
249                 LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
250                 event_handler(desc);
251         }
252
253         RETURN(1);
254 }
255
256 static int bulk_get_source_callback(ptl_event_t *ev)
257 {
258         struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
259         struct ptlrpc_bulk_page *bulk;
260         struct list_head        *tmp;
261         struct list_head        *next;
262         ptl_size_t               total = 0;
263         void                   (*event_handler)(struct ptlrpc_bulk_desc *);
264         ENTRY;
265
266         LASSERT(ev->type == PTL_EVENT_GET);
267
268         /* put with zero offset */
269         LASSERT(ev->offset == 0);
270         /* used iovs */
271         LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0);
272         /* 1 fragment for each page always */
273         LASSERT(ev->mem_desc.niov == desc->bd_page_count);
274
275         list_for_each_safe (tmp, next, &desc->bd_page_list) {
276                 bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
277
278                 total += bulk->bp_buflen;
279
280                 if (bulk->bp_cb != NULL)
281                         bulk->bp_cb(bulk);
282         }
283
284         LASSERT(ev->mem_desc.length == total);
285
286         /* We need to make a note of whether there's an event handler
287          * before we call wake_up, because if there is no event
288          * handler, 'desc' might be freed before we're scheduled again. */
289         event_handler = desc->bd_ptl_ev_hdlr;
290
291         desc->bd_flags |= PTL_BULK_FL_SENT;
292         wake_up(&desc->bd_waitq);
293         if (event_handler) {
294                 LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
295                 event_handler(desc);
296         }
297
298         RETURN(1);
299 }
300
301
302 static int bulk_get_sink_callback(ptl_event_t *ev)
303 {
304         struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
305         struct ptlrpc_bulk_page *bulk;
306         struct list_head        *tmp;
307         struct list_head        *next;
308         ENTRY;
309
310         CDEBUG(D_NET, "got %s event %d\n",
311                (ev->type == PTL_EVENT_SENT) ? "SENT" :
312                (ev->type == PTL_EVENT_REPLY)  ? "REPLY"  : "UNEXPECTED", 
313                ev->type);
314
315         LASSERT(ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_REPLY);
316
317         LASSERT(atomic_read(&desc->bd_source_callback_count) > 0 &&
318                 atomic_read(&desc->bd_source_callback_count) <= 2);
319
320         /* 1 fragment for each page always */
321         LASSERT(ev->mem_desc.niov == desc->bd_page_count);
322
323         if (atomic_dec_and_test(&desc->bd_source_callback_count)) {
324                 void (*event_handler)(struct ptlrpc_bulk_desc *);
325
326                 list_for_each_safe(tmp, next, &desc->bd_page_list) {
327                         bulk = list_entry(tmp, struct ptlrpc_bulk_page,
328                                           bp_link);
329
330                         if (bulk->bp_cb != NULL)
331                                 bulk->bp_cb(bulk);
332                 }
333
334                 /* We need to make a note of whether there's an event handler
335                  * before we call wake_up, because if there is no event handler,
336                  * 'desc' might be freed before we're scheduled again. */
337                 event_handler = desc->bd_ptl_ev_hdlr;
338
339                 desc->bd_flags |= PTL_BULK_FL_RCVD;
340                 wake_up(&desc->bd_waitq);
341                 if (event_handler) {
342                         LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
343                         event_handler(desc);
344                 }
345         }
346
347         RETURN(0);
348 }
349
350 int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) 
351 {
352         struct ptlrpc_ni   *pni;
353         struct lustre_peer  lpeer;
354         int                 i;
355         int                 rc = lustre_uuid_to_peer (uuid->uuid, &lpeer);
356         
357         if (rc != 0)
358                 RETURN (rc);
359         
360         for (i = 0; i < ptlrpc_ninterfaces; i++) {
361                 pni = &ptlrpc_interfaces[i];
362
363                 if (!memcmp (&lpeer.peer_ni, &pni->pni_ni_h,
364                              sizeof (lpeer.peer_ni))) {
365                         peer->peer_nid = lpeer.peer_nid;
366                         peer->peer_ni = pni;
367                         return (0);
368                 }
369         }
370         
371         CERROR ("Can't find ptlrpc interface for "LPX64" ni handle %08lx %08lx\n",
372                 lpeer.peer_nid, lpeer.peer_ni.nal_idx, lpeer.peer_ni.handle_idx);
373         return (-ENOENT);
374 }
375
376 void ptlrpc_ni_fini (struct ptlrpc_ni *pni) 
377 {
378         PtlEQFree(pni->pni_request_out_eq_h);
379         PtlEQFree(pni->pni_reply_out_eq_h);
380         PtlEQFree(pni->pni_reply_in_eq_h);
381         PtlEQFree(pni->pni_bulk_put_source_eq_h);
382         PtlEQFree(pni->pni_bulk_put_sink_eq_h);
383         PtlEQFree(pni->pni_bulk_get_source_eq_h);
384         PtlEQFree(pni->pni_bulk_get_sink_eq_h);
385         
386         inter_module_put(pni->pni_name);
387 }
388
389 int ptlrpc_ni_init (char *name, struct ptlrpc_ni *pni) 
390 {
391         int              rc;
392         ptl_handle_ni_t *nip;
393
394         nip = (ptl_handle_ni_t *)inter_module_get (name);
395         if (nip == NULL) {
396                 CDEBUG (D_NET, "Network interface %s not loaded\n", name);
397                 return (-ENOENT);
398         }
399         
400         CDEBUG (D_NET, "init %s: nal_idx %ld\n", name, nip->nal_idx);
401                 
402         pni->pni_name = name;
403         pni->pni_ni_h = *nip;
404
405         ptl_set_inv_handle (&pni->pni_request_out_eq_h);
406         ptl_set_inv_handle (&pni->pni_reply_out_eq_h);
407         ptl_set_inv_handle (&pni->pni_reply_in_eq_h);
408         ptl_set_inv_handle (&pni->pni_bulk_put_source_eq_h);
409         ptl_set_inv_handle (&pni->pni_bulk_put_sink_eq_h);
410         ptl_set_inv_handle (&pni->pni_bulk_get_source_eq_h);
411         ptl_set_inv_handle (&pni->pni_bulk_get_sink_eq_h);
412         
413         /* NB We never actually PtlEQGet() out of these events queues since
414          * we're only interested in the event callback, so we can just let
415          * them wrap.  Their sizes aren't a big deal, apart from providing
416          * a little history for debugging... */
417         
418         rc = PtlEQAlloc(pni->pni_ni_h, 1024, request_out_callback, 
419                         &pni->pni_request_out_eq_h);
420         if (rc != PTL_OK)
421                 GOTO (fail, rc = -ENOMEM);
422                 
423         rc = PtlEQAlloc(pni->pni_ni_h, 1024, reply_out_callback, 
424                         &pni->pni_reply_out_eq_h);
425         if (rc != PTL_OK)
426                 GOTO (fail, rc = -ENOMEM);
427         
428         rc = PtlEQAlloc(pni->pni_ni_h, 1024, reply_in_callback,
429                         &pni->pni_reply_in_eq_h);
430         if (rc != PTL_OK)
431                 GOTO (fail, rc = -ENOMEM);
432                 
433         rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_put_source_callback,
434                         &pni->pni_bulk_put_source_eq_h);
435         if (rc != PTL_OK)
436                 GOTO (fail, rc = -ENOMEM);
437                 
438         rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_put_sink_callback,
439                         &pni->pni_bulk_put_sink_eq_h);
440         if (rc != PTL_OK)
441                 GOTO (fail, rc = -ENOMEM);
442                 
443         rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_get_source_callback,
444                         &pni->pni_bulk_get_source_eq_h);
445         if (rc != PTL_OK)
446                 GOTO (fail, rc = -ENOMEM);
447                 
448         rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_get_sink_callback,
449                         &pni->pni_bulk_get_sink_eq_h);
450         if (rc != PTL_OK)
451                 GOTO (fail, rc = -ENOMEM);
452         
453         return (0);
454  fail: 
455         CERROR ("Failed to initialise network interface %s: %d\n",
456                 name, rc);
457
458         /* OK to do complete teardown since we invalidated the handles above... */
459         ptlrpc_ni_fini (pni);
460         return (rc);
461 }
462
463 int ptlrpc_init_portals(void)
464 {
465         /* Add new portals network interface names here.
466          * Order is irrelevent! */
467         char *ni_names[] = { "kqswnal_ni",
468                              "kgmnal_ni",
469                              "ksocknal_ni",
470                              "ktoenal_ni",
471                              "tcpnal_ni",
472                              NULL };
473         int   rc;
474         int   i;
475         
476         LASSERT (ptlrpc_ninterfaces == 0);
477
478         for (i = 0; ni_names[i] != NULL; i++) {
479                 LASSERT (ptlrpc_ninterfaces < 
480                          sizeof (ptlrpc_interfaces)/sizeof (ptlrpc_interfaces[0]));
481                 
482                 rc = ptlrpc_ni_init (ni_names[i],
483                                      &ptlrpc_interfaces[ptlrpc_ninterfaces]);
484                 if (rc == 0)
485                         ptlrpc_ninterfaces++;
486         }
487         
488         if (ptlrpc_ninterfaces == 0) {
489                 CERROR("network initialisation failed: is a NAL module loaded?\n");
490                 return -EIO;
491         }
492         return 0;
493 }
494
495 void ptlrpc_exit_portals(void)
496 {
497         while (ptlrpc_ninterfaces > 0)
498                 ptlrpc_ni_fini (&ptlrpc_interfaces[--ptlrpc_ninterfaces]);
499 }