1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #define DEBUG_SUBSYSTEM S_RPC
26 #include <linux/module.h>
28 #include <liblustre.h>
30 #include <linux/obd_class.h>
31 #include <linux/lustre_net.h>
33 struct ptlrpc_ni ptlrpc_interfaces[NAL_MAX_NR];
34 int ptlrpc_ninterfaces;
37 * Free the packet when it has gone out
39 static int request_out_callback(ptl_event_t *ev)
41 struct ptlrpc_request *req = ev->mem_desc.user_ptr;
44 /* requests always contiguous */
45 LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
47 if (ev->type != PTL_EVENT_SENT) {
48 // XXX make sure we understand all events, including ACK's
49 CERROR("Unknown event %d\n", ev->type);
53 /* this balances the atomic_inc in ptl_send_rpc */
54 ptlrpc_req_finished(req);
60 * Free the packet when it has gone out
62 static int reply_out_callback(ptl_event_t *ev)
66 /* replies always contiguous */
67 LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
69 if (ev->type == PTL_EVENT_SENT) {
70 OBD_FREE(ev->mem_desc.start, ev->mem_desc.length);
71 } else if (ev->type == PTL_EVENT_ACK) {
72 struct ptlrpc_request *req = ev->mem_desc.user_ptr;
73 if (req->rq_flags & PTL_RPC_FL_WANT_ACK) {
74 req->rq_flags &= ~PTL_RPC_FL_WANT_ACK;
75 wake_up(&req->rq_wait_for_rep);
77 DEBUG_REQ(D_ERROR, req,
78 "ack received for reply, not wanted");
81 // XXX make sure we understand all events
82 CERROR("Unknown event %d\n", ev->type);
90 * Wake up the thread waiting for the reply once it comes in.
92 int reply_in_callback(ptl_event_t *ev)
94 struct ptlrpc_request *req = ev->mem_desc.user_ptr;
97 /* replies always contiguous */
98 LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
100 if (req->rq_xid == 0x5a5a5a5a5a5a5a5a) {
101 CERROR("Reply received for freed request! Probably a missing "
106 if (req->rq_xid != ev->match_bits) {
107 CERROR("Reply packet for wrong request\n");
111 if (ev->type == PTL_EVENT_PUT) {
112 req->rq_repmsg = ev->mem_desc.start + ev->offset;
114 wake_up(&req->rq_wait_for_rep);
116 // XXX make sure we understand all events, including ACK's
117 CERROR("Unknown event %d\n", ev->type);
124 int request_in_callback(ptl_event_t *ev)
126 struct ptlrpc_request_buffer_desc *rqbd = ev->mem_desc.user_ptr;
127 struct ptlrpc_srv_ni *srv_ni = rqbd->rqbd_srv_ni;
128 struct ptlrpc_service *service = srv_ni->sni_service;
130 /* requests always contiguous */
131 LASSERT((ev->mem_desc.options & PTL_MD_IOV) == 0);
132 /* we only enable puts */
133 LASSERT(ev->type == PTL_EVENT_PUT);
134 LASSERT(atomic_read(&srv_ni->sni_nrqbds_receiving) > 0);
135 LASSERT(atomic_read(&rqbd->rqbd_refcount) > 0);
137 if (ev->rlength != ev->mlength)
138 CERROR("Warning: Possibly truncated rpc (%d/%d)\n",
139 ev->mlength, ev->rlength);
141 if (ptl_is_valid_handle(&ev->unlinked_me)) {
142 /* This is the last request to be received into this
143 * request buffer. We don't bump the refcount, since the
144 * thread servicing this event is effectively taking over
145 * portals' reference.
147 #warning ev->unlinked_me.nal_idx is not set properly in a callback
148 LASSERT(ev->unlinked_me.handle_idx==rqbd->rqbd_me_h.handle_idx);
150 /* we're off the air */
151 /* we'll probably start dropping packets in portals soon */
152 if (atomic_dec_and_test(&srv_ni->sni_nrqbds_receiving))
153 CERROR("All request buffers busy\n");
155 /* +1 ref for service thread */
156 atomic_inc(&rqbd->rqbd_refcount);
159 wake_up(&service->srv_waitq);
164 static int bulk_put_source_callback(ptl_event_t *ev)
166 struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
167 struct ptlrpc_bulk_page *bulk;
168 struct list_head *tmp;
169 struct list_head *next;
172 CDEBUG(D_NET, "got %s event %d\n",
173 (ev->type == PTL_EVENT_SENT) ? "SENT" :
174 (ev->type == PTL_EVENT_ACK) ? "ACK" : "UNEXPECTED", ev->type);
176 LASSERT(ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_ACK);
178 LASSERT(atomic_read(&desc->bd_source_callback_count) > 0 &&
179 atomic_read(&desc->bd_source_callback_count) <= 2);
181 /* 1 fragment for each page always */
182 LASSERT(ev->mem_desc.niov == desc->bd_page_count);
184 if (atomic_dec_and_test(&desc->bd_source_callback_count)) {
185 void (*event_handler)(struct ptlrpc_bulk_desc *);
187 list_for_each_safe(tmp, next, &desc->bd_page_list) {
188 bulk = list_entry(tmp, struct ptlrpc_bulk_page,
191 if (bulk->bp_cb != NULL)
195 /* We need to make a note of whether there's an event handler
196 * before we call wake_up, because if there is no event handler,
197 * 'desc' might be freed before we're scheduled again. */
198 event_handler = desc->bd_ptl_ev_hdlr;
200 desc->bd_flags |= PTL_BULK_FL_SENT;
201 wake_up(&desc->bd_waitq);
203 LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
211 static int bulk_put_sink_callback(ptl_event_t *ev)
213 struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
214 struct ptlrpc_bulk_page *bulk;
215 struct list_head *tmp;
216 struct list_head *next;
217 ptl_size_t total = 0;
218 void (*event_handler)(struct ptlrpc_bulk_desc *);
221 LASSERT(ev->type == PTL_EVENT_PUT);
223 /* put with zero offset */
224 LASSERT(ev->offset == 0);
226 LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0);
227 /* 1 fragment for each page always */
228 LASSERT(ev->mem_desc.niov == desc->bd_page_count);
230 list_for_each_safe (tmp, next, &desc->bd_page_list) {
231 bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
233 total += bulk->bp_buflen;
235 if (bulk->bp_cb != NULL)
239 LASSERT(ev->mem_desc.length == total);
241 /* We need to make a note of whether there's an event handler
242 * before we call wake_up, because if there is no event
243 * handler, 'desc' might be freed before we're scheduled again. */
244 event_handler = desc->bd_ptl_ev_hdlr;
246 desc->bd_flags |= PTL_BULK_FL_RCVD;
247 wake_up(&desc->bd_waitq);
249 LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
256 static int bulk_get_source_callback(ptl_event_t *ev)
258 struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
259 struct ptlrpc_bulk_page *bulk;
260 struct list_head *tmp;
261 struct list_head *next;
262 ptl_size_t total = 0;
263 void (*event_handler)(struct ptlrpc_bulk_desc *);
266 LASSERT(ev->type == PTL_EVENT_GET);
268 /* put with zero offset */
269 LASSERT(ev->offset == 0);
271 LASSERT((ev->mem_desc.options & PTL_MD_IOV) != 0);
272 /* 1 fragment for each page always */
273 LASSERT(ev->mem_desc.niov == desc->bd_page_count);
275 list_for_each_safe (tmp, next, &desc->bd_page_list) {
276 bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link);
278 total += bulk->bp_buflen;
280 if (bulk->bp_cb != NULL)
284 LASSERT(ev->mem_desc.length == total);
286 /* We need to make a note of whether there's an event handler
287 * before we call wake_up, because if there is no event
288 * handler, 'desc' might be freed before we're scheduled again. */
289 event_handler = desc->bd_ptl_ev_hdlr;
291 desc->bd_flags |= PTL_BULK_FL_SENT;
292 wake_up(&desc->bd_waitq);
294 LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
302 static int bulk_get_sink_callback(ptl_event_t *ev)
304 struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr;
305 struct ptlrpc_bulk_page *bulk;
306 struct list_head *tmp;
307 struct list_head *next;
310 CDEBUG(D_NET, "got %s event %d\n",
311 (ev->type == PTL_EVENT_SENT) ? "SENT" :
312 (ev->type == PTL_EVENT_REPLY) ? "REPLY" : "UNEXPECTED",
315 LASSERT(ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_REPLY);
317 LASSERT(atomic_read(&desc->bd_source_callback_count) > 0 &&
318 atomic_read(&desc->bd_source_callback_count) <= 2);
320 /* 1 fragment for each page always */
321 LASSERT(ev->mem_desc.niov == desc->bd_page_count);
323 if (atomic_dec_and_test(&desc->bd_source_callback_count)) {
324 void (*event_handler)(struct ptlrpc_bulk_desc *);
326 list_for_each_safe(tmp, next, &desc->bd_page_list) {
327 bulk = list_entry(tmp, struct ptlrpc_bulk_page,
330 if (bulk->bp_cb != NULL)
334 /* We need to make a note of whether there's an event handler
335 * before we call wake_up, because if there is no event handler,
336 * 'desc' might be freed before we're scheduled again. */
337 event_handler = desc->bd_ptl_ev_hdlr;
339 desc->bd_flags |= PTL_BULK_FL_RCVD;
340 wake_up(&desc->bd_waitq);
342 LASSERT(desc->bd_ptl_ev_hdlr == event_handler);
350 int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer)
352 struct ptlrpc_ni *pni;
353 struct lustre_peer lpeer;
355 int rc = lustre_uuid_to_peer (uuid->uuid, &lpeer);
360 for (i = 0; i < ptlrpc_ninterfaces; i++) {
361 pni = &ptlrpc_interfaces[i];
363 if (!memcmp (&lpeer.peer_ni, &pni->pni_ni_h,
364 sizeof (lpeer.peer_ni))) {
365 peer->peer_nid = lpeer.peer_nid;
371 CERROR ("Can't find ptlrpc interface for "LPX64" ni handle %08lx %08lx\n",
372 lpeer.peer_nid, lpeer.peer_ni.nal_idx, lpeer.peer_ni.handle_idx);
376 void ptlrpc_ni_fini (struct ptlrpc_ni *pni)
378 PtlEQFree(pni->pni_request_out_eq_h);
379 PtlEQFree(pni->pni_reply_out_eq_h);
380 PtlEQFree(pni->pni_reply_in_eq_h);
381 PtlEQFree(pni->pni_bulk_put_source_eq_h);
382 PtlEQFree(pni->pni_bulk_put_sink_eq_h);
383 PtlEQFree(pni->pni_bulk_get_source_eq_h);
384 PtlEQFree(pni->pni_bulk_get_sink_eq_h);
386 inter_module_put(pni->pni_name);
389 int ptlrpc_ni_init (char *name, struct ptlrpc_ni *pni)
392 ptl_handle_ni_t *nip;
394 nip = (ptl_handle_ni_t *)inter_module_get (name);
396 CDEBUG (D_NET, "Network interface %s not loaded\n", name);
400 CDEBUG (D_NET, "init %s: nal_idx %ld\n", name, nip->nal_idx);
402 pni->pni_name = name;
403 pni->pni_ni_h = *nip;
405 ptl_set_inv_handle (&pni->pni_request_out_eq_h);
406 ptl_set_inv_handle (&pni->pni_reply_out_eq_h);
407 ptl_set_inv_handle (&pni->pni_reply_in_eq_h);
408 ptl_set_inv_handle (&pni->pni_bulk_put_source_eq_h);
409 ptl_set_inv_handle (&pni->pni_bulk_put_sink_eq_h);
410 ptl_set_inv_handle (&pni->pni_bulk_get_source_eq_h);
411 ptl_set_inv_handle (&pni->pni_bulk_get_sink_eq_h);
413 /* NB We never actually PtlEQGet() out of these events queues since
414 * we're only interested in the event callback, so we can just let
415 * them wrap. Their sizes aren't a big deal, apart from providing
416 * a little history for debugging... */
418 rc = PtlEQAlloc(pni->pni_ni_h, 1024, request_out_callback,
419 &pni->pni_request_out_eq_h);
421 GOTO (fail, rc = -ENOMEM);
423 rc = PtlEQAlloc(pni->pni_ni_h, 1024, reply_out_callback,
424 &pni->pni_reply_out_eq_h);
426 GOTO (fail, rc = -ENOMEM);
428 rc = PtlEQAlloc(pni->pni_ni_h, 1024, reply_in_callback,
429 &pni->pni_reply_in_eq_h);
431 GOTO (fail, rc = -ENOMEM);
433 rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_put_source_callback,
434 &pni->pni_bulk_put_source_eq_h);
436 GOTO (fail, rc = -ENOMEM);
438 rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_put_sink_callback,
439 &pni->pni_bulk_put_sink_eq_h);
441 GOTO (fail, rc = -ENOMEM);
443 rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_get_source_callback,
444 &pni->pni_bulk_get_source_eq_h);
446 GOTO (fail, rc = -ENOMEM);
448 rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_get_sink_callback,
449 &pni->pni_bulk_get_sink_eq_h);
451 GOTO (fail, rc = -ENOMEM);
455 CERROR ("Failed to initialise network interface %s: %d\n",
458 /* OK to do complete teardown since we invalidated the handles above... */
459 ptlrpc_ni_fini (pni);
463 int ptlrpc_init_portals(void)
465 /* Add new portals network interface names here.
466 * Order is irrelevent! */
467 char *ni_names[] = { "kqswnal_ni",
476 LASSERT (ptlrpc_ninterfaces == 0);
478 for (i = 0; ni_names[i] != NULL; i++) {
479 LASSERT (ptlrpc_ninterfaces <
480 sizeof (ptlrpc_interfaces)/sizeof (ptlrpc_interfaces[0]));
482 rc = ptlrpc_ni_init (ni_names[i],
483 &ptlrpc_interfaces[ptlrpc_ninterfaces]);
485 ptlrpc_ninterfaces++;
488 if (ptlrpc_ninterfaces == 0) {
489 CERROR("network initialisation failed: is a NAL module loaded?\n");
495 void ptlrpc_exit_portals(void)
497 while (ptlrpc_ninterfaces > 0)
498 ptlrpc_ni_fini (&ptlrpc_interfaces[--ptlrpc_ninterfaces]);