1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:cindent:
4 * Copyright (C) 2003 High Performance Computing Center North (HPC2N)
5 * Author: Niklas Edmundsson <nikke@hpc2n.umu.se>
8 * This file is part of Portals, http://www.sf.net/projects/lustre/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Portals; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "scimacnal.h"
28 kscimacnal_read (nal_cb_t *nal, void *private,
29 void *dst_addr, user_ptr src_addr, size_t len)
31 CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n",
32 nal->ni.nid, (long)len, src_addr, dst_addr );
33 memcpy( dst_addr, src_addr, len );
39 kscimacnal_write(nal_cb_t *nal, void *private,
40 user_ptr dst_addr, void *src_addr, size_t len)
42 CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n",
43 nal->ni.nid, (long)len, src_addr, dst_addr );
44 memcpy( dst_addr, src_addr, len );
50 kscimacnal_malloc(nal_cb_t *nal, size_t len)
54 PORTAL_ALLOC(buf, len);
60 kscimacnal_free(nal_cb_t *nal, void *buf, size_t len)
62 PORTAL_FREE(buf, len);
67 kscimacnal_printf(nal_cb_t *nal, const char *fmt, ...)
72 if (portal_debug & D_NET) {
74 vsnprintf( msg, sizeof(msg), fmt, ap );
77 printk("Lustre: CPUId: %d %s",smp_processor_id(), msg);
83 kscimacnal_cli(nal_cb_t *nal, unsigned long *flags)
85 kscimacnal_data_t *data= nal->nal_data;
87 spin_lock_irqsave(&data->ksci_dispatch_lock,*flags);
92 kscimacnal_sti(nal_cb_t *nal, unsigned long *flags)
94 kscimacnal_data_t *data= nal->nal_data;
96 spin_unlock_irqrestore(&data->ksci_dispatch_lock,*flags);
101 kscimacnal_callback(nal_cb_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev)
103 /* holding ksci_dispatch_lock */
105 if (eq->event_callback != NULL)
106 eq->event_callback(ev);
108 /* We will wake theads sleeping in yield() here, AFTER the
109 * callback, when we implement blocking yield */
113 kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist)
115 /* FIXME: Network distance has a meaning, but is there no easy
116 * way to figure it out (depends on routing) */
118 if ( nal->ni.nid == nid ) {
129 char * get_mac_error(mac_status_t status)
132 case MAC_MSG_STAT_OK:
133 return "MAC_MSG_STAT_OK";
134 case MAC_MSG_STAT_FREED:
135 return "MAC_MSG_STAT_FREED";
136 case MAC_MSG_STAT_ABORTED:
137 return "MAC_MSG_STAT_ABORTED";
138 case MAC_MSG_STAT_TIMEDOUT:
139 return "MAC_MSG_STAT_TIMEDOUT";
140 case MAC_MSG_STAT_NODEUNREACH:
141 return "MAC_MSG_STAT_NODEUNREACH";
142 case MAC_MSG_STAT_NETDOWN:
143 return "MAC_MSG_STAT_NETDOWN";
144 case MAC_MSG_STAT_RESET:
145 return "MAC_MSG_STAT_RESET";
146 case MAC_MSG_STAT_INITFAILED:
147 return "MAC_MSG_STAT_INITFAILED";
148 case MAC_MSG_STAT_SYNCFAILED:
149 return "MAC_MSG_STAT_SYNCFAILED";
150 case MAC_MSG_STAT_BADPROTO:
151 return "MAC_MSG_STAT_BADPROTO";
152 case MAC_MSG_STAT_NOBUFSPACE:
153 return "MAC_MSG_STAT_NOBUFSPACE";
154 case MAC_MSG_STAT_CONGESTION:
155 return "MAC_MSG_STAT_CONGESTION";
156 case MAC_MSG_STAT_OTHER:
157 return "MAC_MSG_STAT_OTHER";
159 return "Unknown error";
164 /* FIXME add routing code here ? */
166 /* Called by ScaMac when transmission is complete (ie. message is released) */
168 kscimacnal_txrelease(mac_mblk_t *msg, mac_msg_status_t status, void *context)
170 kscimacnal_tx_t *ktx = (kscimacnal_tx_t *)context;
173 LASSERT (ktx != NULL);
174 /* Unmap any mapped pages */
175 for(i=0; i<ktx->ktx_nmapped; i++) {
176 kunmap(ktx->ktx_kpages[i]);
179 CDEBUG(D_NET, "kunmapped %d pages\n", ktx->ktx_nmapped);
181 /* Euh, there is no feedback when transmission fails?! */
183 case MAC_MSG_STAT_OK: /* normal */
186 CERROR("%s (%d):\n", get_mac_error(status), status);
191 lib_finalize(ktx->ktx_nal, ktx->ktx_private, ktx->ktx_cookie,
192 (err == 0) ? PTL_OK : PTL_FAIL);
194 PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
198 /* Called by portals when it wants to send a message.
199 * Since ScaMAC has it's own TX thread we don't bother setting up our own. */
201 /* FIXME: Read comments in qswnal_cb.c for _sendmsg and fix return-on-error
204 kscimacnal_sendmsg(nal_cb_t *nal,
211 unsigned int payload_niov,
212 struct iovec *payload_iov,
213 ptl_kiov_t *payload_kiov,
216 kscimacnal_tx_t *ktx=NULL;
217 kscimacnal_data_t *ksci = nal->nal_data;
219 int buf_len = sizeof(ptl_hdr_t) + payload_len;
220 mac_mblk_t *msg=NULL, *lastblk, *newblk;
221 unsigned long physaddr;
224 CDEBUG(D_NET, "sending %d bytes from %p/%p to nid "LPX64" niov: %d\n",
225 payload_len, payload_iov, payload_kiov, nid, payload_niov);
227 /* Basic sanity checks */
228 LASSERT(ksci != NULL);
229 LASSERT(hdr != NULL);
230 LASSERT (payload_len == 0 || payload_niov > 0);
231 LASSERT (payload_niov <= PTL_MD_MAX_IOV);
232 /* It must be OK to kmap() if required */
233 LASSERT (payload_kiov == NULL || !in_interrupt ());
234 /* payload is either all vaddrs or all pages */
235 LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
237 /* Do real check if we can send this */
238 if (buf_len > mac_get_mtusize(ksci->ksci_machandle)) {
239 CERROR("kscimacnal:request exceeds TX MTU size (%ld).\n",
240 mac_get_mtusize(ksci->ksci_machandle));
245 /* save transaction info for later finalize and cleanup */
246 PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t)));
251 ktx->ktx_nmapped = 0; /* Start with no mapped pages :) */
253 /* *SIGH* hdr is a stack variable in the calling function, so we
254 * need to copy it to a buffer. Zerocopy magic (or is it just
255 * deferred memcpy?) is annoying sometimes. */
256 memcpy(&ktx->ktx_hdr, hdr, sizeof(ptl_hdr_t));
258 /* First, put the header in the main message mblk */
259 msg = mac_alloc_mblk(&ktx->ktx_hdr, sizeof(ptl_hdr_t),
260 kscimacnal_txrelease, ktx);
262 PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
265 mac_put_mblk(msg, sizeof(ptl_hdr_t));
268 /* Allocate additional mblks for each iov as needed.
269 * Essentially lib_copy_(k)iov2buf with a twist or two */
270 while (payload_len > 0)
275 LASSERT (payload_niov > 0);
277 if(payload_iov != NULL) {
278 nob = MIN (payload_iov->iov_len, payload_len);
279 addr = payload_iov->iov_base;
282 nob = MIN (payload_kiov->kiov_len, payload_len);
283 /* Bollocks. We need to handle paged IO for things to
284 * work but there is no good way to do this. We
285 * do it by kmap():ing all pages and keep them
286 * mapped until scimac is done with them. */
287 /* FIXME: kunmap() on error */
288 addr = kmap(payload_kiov->kiov_page);
289 ktx->ktx_kpages[ktx->ktx_nmapped++] =
290 payload_kiov->kiov_page;
292 /* We don't need a callback on the additional mblks,
293 * since all release callbacks seems to be called when
294 * the entire message has been sent */
295 newblk=mac_alloc_mblk(addr, nob, NULL, NULL);
299 PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
302 mac_put_mblk(newblk, nob);
303 mac_link_mblk(lastblk, newblk);
308 if(payload_iov != NULL) {
316 CDEBUG(D_NET, "kmapped %d pages\n", ktx->ktx_nmapped);
319 ktx->ktx_private = private;
320 ktx->ktx_cookie = cookie;
322 CDEBUG(D_NET, "mac_send %d bytes to nid: 0x%Lx\n", buf_len, nid);
324 physaddr = htonl(nid);
326 if((rc=mac_send(ksci->ksci_machandle, msg,
327 (mac_physaddr_t *) &physaddr))) {
328 CERROR("kscimacnal: mac_send() failed, rc=%d\n", rc);
330 PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t)));
339 kscimacnal_send (nal_cb_t *nal,
346 unsigned int payload_niov,
347 struct iovec *payload_iov,
350 return (kscimacnal_sendmsg (nal, private, cookie, hdr, type, nid, pid,
351 payload_niov, payload_iov, NULL, payload_nob));
355 kscimacnal_send_pages (nal_cb_t *nal,
362 unsigned int payload_niov,
363 ptl_kiov_t *payload_kiov,
366 return (kscimacnal_sendmsg (nal, private, cookie, hdr, type, nid, pid,
367 payload_niov, NULL, payload_kiov, payload_nob));
372 kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
374 CERROR ("forwarding not implemented\n");
378 /* Process a received portals packet */
379 /* Called by the ScaMac RX thread when a packet is received */
381 kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type,
384 ptl_hdr_t *hdr = NULL;
387 kscimacnal_data_t *ksci = userdata;
389 LASSERT(ksci != NULL);
391 if ( !ksci->ksci_init || ksci->ksci_shuttingdown ||
392 type == MAC_MSG_TYPE_CTRL || type == MAC_MSG_TYPE_OTHER ) {
393 /* We're not interested in messages not for us, ignore */
398 size = mac_msg_size(msg);
400 CDEBUG(D_NET,"msg %p type %d, size %ld bytes (%ld mblks)\n",
401 msg, type, size, mac_msg_mblks(msg));
403 if( size < sizeof( ptl_hdr_t ) ) {
404 /* XXX what's this for? */
405 if (ksci->ksci_shuttingdown)
407 CERROR("kscimacnal: did not receive complete portal header,"
408 "size= %ld\n", size);
409 /* Free the message before exiting */
414 /* Provide everything we know */
418 krx.userdata = userdata;
420 /* mac_msg_next returns the next mblk with unread data */
421 hdr = mac_get_mblk(mac_msg_next(msg), sizeof(ptl_hdr_t) );
424 CERROR("kscimacnal: no data block in message %p\n", msg);
429 if ( hdr->dest_nid == kscimacnal_lib.ni.nid ) {
430 PROF_START(lib_parse);
431 /* sets wanted_len, iovs etc and calls our callback */
432 lib_parse(&kscimacnal_lib, hdr, &krx);
433 PROF_FINISH(lib_parse);
434 #if 0 /* FIXME: Is it possible to detect this? */
435 } else if (kgmnal_ispeer(hdr->dest_nid)) {
436 /* should have gone direct to peer */
437 CERROR("dropping packet from 0x%llx to 0x%llx:"
438 "target is a peer\n",
439 hdr->src_nid, hdr->dest_nid);
440 kgmnal_requeue_rx(&krx);
441 #endif /* if 0 FIXME */
443 /* forward to gateway */
444 CERROR("forwarding not implemented, mynid=0x%llx dest=0x%llx\n",
445 kscimacnal_lib.ni.nid, hdr->dest_nid);
450 CDEBUG(D_NET, "msg %p: Done\n", msg);
454 /* Called by portals to process a recieved packet */
456 kscimacnal_recvmsg(nal_cb_t *nal,
465 kscimacnal_rx_t *krx = private;
469 ptl_size_t iovused=0;
472 LASSERT (krx != NULL);
473 LASSERT (krx->msg != NULL);
475 CDEBUG(D_NET,"msg %p: mlen=%d, rlen=%d, niov=%d\n",
476 krx->msg, mlen, rlen, niov);
478 /* What was actually received must be >= what sender claims to have
480 LASSERT (mlen <= rlen); /* something is wrong if this isn't true */
481 if (mac_msg_size(krx->msg) < sizeof(ptl_hdr_t)+mlen) {
482 /* We didn't receive everything lib thinks we did */
483 CERROR("Bad message size: have %d, need %d + %d\n",
484 mac_msg_size(krx->msg), sizeof(ptl_hdr_t), mlen);
488 /* It must be OK to kmap() if required */
489 LASSERT (kiov == NULL || !in_interrupt ());
490 /* Either all pages or all vaddrs */
491 LASSERT (!(kiov != NULL && iov != NULL));
495 /* mac_msg_next returns next mblk with unread data (ie. can
497 while (mlen != 0 && (mblk = mac_msg_next(krx->msg))) {
498 pkt_len = mac_mblk_len(mblk);
499 src = mac_get_mblk(mblk, pkt_len); /* Next unread block */
501 CDEBUG(D_NET,"msg %p: mblk: %p pkt_len: %ld src: %p\n",
502 krx->msg, mblk, pkt_len, src);
504 LASSERT(src != NULL);
506 /* Essentially lib_copy_buf2(k)iov but with continuation
507 * support, we "gracefully" thrash the argument vars ;) */
508 while (pkt_len > 0) {
514 LASSERT(iovused < iov->iov_len);
516 base = iov->iov_base;
519 LASSERT(iovused < kiov->kiov_len);
520 len = kiov->kiov_len;
523 base = kmap(kiov->kiov_page);
527 nob = MIN (len-iovused, pkt_len);
528 CDEBUG(D_NET, "base: %p len: %d src: %p nob: %d "
530 base, len, src, nob, iovused);
532 memcpy (base+iovused, src, nob);
536 if(nob+iovused < len) {
537 /* We didn't use all of the iov */
547 kunmap(kiov->kiov_page);
554 /* Just to make sure the last page is unmapped */
555 if(kiov!=NULL && base!=NULL) {
556 kunmap(kiov->kiov_page);
561 CDEBUG(D_NET, "Calling lib_finalize.\n");
563 PROF_START(lib_finalize);
564 lib_finalize(nal, private, cookie, PTL_OK);
565 PROF_FINISH(lib_finalize);
567 CDEBUG(D_NET, "Done.\n");
574 kscimacnal_recv(nal_cb_t *nal,
582 return (kscimacnal_recvmsg (nal, private, cookie, niov, iov, NULL, mlen, rlen));
587 kscimacnal_recv_pages (nal_cb_t *nal,
595 return (kscimacnal_recvmsg (nal, private, cookie, niov, NULL, kiov, mlen, rlen));
599 nal_cb_t kscimacnal_lib = {
600 nal_data: &kscimacnal_data, /* NAL private data */
601 cb_send: kscimacnal_send,
602 cb_send_pages: kscimacnal_send_pages,
603 cb_recv: kscimacnal_recv,
604 cb_recv_pages: kscimacnal_recv_pages,
605 cb_read: kscimacnal_read,
606 cb_write: kscimacnal_write,
607 cb_malloc: kscimacnal_malloc,
608 cb_free: kscimacnal_free,
609 cb_printf: kscimacnal_printf,
610 cb_cli: kscimacnal_cli,
611 cb_sti: kscimacnal_sti,
612 cb_callback: kscimacnal_callback,
613 cb_dist: kscimacnal_dist