1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Eric Barton <eric@bartonsoftware.com>
7 * This file is part of Lustre, http://www.lustre.org.
9 * Lustre is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
13 * Lustre is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Lustre; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "openiblnd.h"
30 .lnd_type = OPENIBLND,
32 .lnd_startup = kibnal_startup,
33 .lnd_shutdown = kibnal_shutdown,
34 .lnd_ctl = kibnal_ctl,
35 .lnd_send = kibnal_send,
36 .lnd_recv = kibnal_recv,
37 .lnd_eager_recv = kibnal_eager_recv,
38 .lnd_accept = kibnal_accept,
41 kib_data_t kibnal_data;
44 kibnal_cksum (void *ptr, int nob)
50 sum = ((sum << 1) | (sum >> 31)) + *c++;
52 /* ensure I don't return 0 (== no checksum) */
53 return (sum == 0) ? 1 : sum;
57 kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
60 msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
64 kibnal_pack_msg(kib_msg_t *msg, int version, int credits,
65 lnet_nid_t dstnid, __u64 dststamp)
67 /* CAVEAT EMPTOR! all message fields not set here should have been
68 * initialised previously. */
69 msg->ibm_magic = IBNAL_MSG_MAGIC;
70 msg->ibm_version = version;
72 msg->ibm_credits = credits;
75 msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid,
77 msg->ibm_srcstamp = kibnal_data.kib_incarnation;
78 msg->ibm_dstnid = dstnid;
79 msg->ibm_dststamp = dststamp;
81 if (*kibnal_tunables.kib_cksum) {
82 /* NB ibm_cksum zero while computing cksum */
83 msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
88 kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob)
90 const int hdr_size = offsetof(kib_msg_t, ibm_u);
97 CERROR("Short message: %d\n", nob);
101 if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
103 } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
106 CERROR("Bad magic: %08x\n", msg->ibm_magic);
110 msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
111 if ((expected_version == 0) ?
112 (msg_version != IBNAL_MSG_VERSION &&
113 msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) :
114 (msg_version != expected_version)) {
115 CERROR("Bad version: %x\n", msg_version);
119 if (nob < hdr_size) {
120 CERROR("Short message: %d\n", nob);
124 msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
126 CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
130 /* checksum must be computed with ibm_cksum zero and BEFORE anything
132 msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
134 if (msg_cksum != 0 &&
135 msg_cksum != kibnal_cksum(msg, msg_nob)) {
136 CERROR("Bad checksum\n");
139 msg->ibm_cksum = msg_cksum;
142 /* leave magic unflipped as a clue to peer endianness */
143 msg->ibm_version = msg_version;
144 LASSERT (sizeof(msg->ibm_type) == 1);
145 LASSERT (sizeof(msg->ibm_credits) == 1);
146 msg->ibm_nob = msg_nob;
147 __swab64s(&msg->ibm_srcnid);
148 __swab64s(&msg->ibm_srcstamp);
149 __swab64s(&msg->ibm_dstnid);
150 __swab64s(&msg->ibm_dststamp);
153 if (msg->ibm_srcnid == LNET_NID_ANY) {
154 CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
158 switch (msg->ibm_type) {
160 CERROR("Unknown message type %x\n", msg->ibm_type);
163 case IBNAL_MSG_SVCQRY:
167 case IBNAL_MSG_SVCRSP:
168 if (msg_nob < hdr_size + sizeof(msg->ibm_u.svcrsp)) {
169 CERROR("Short SVCRSP: %d(%d)\n", msg_nob,
170 (int)(hdr_size + sizeof(msg->ibm_u.svcrsp)));
174 __swab64s(&msg->ibm_u.svcrsp.ibsr_svc_id);
175 __swab16s(&msg->ibm_u.svcrsp.ibsr_svc_pkey);
179 case IBNAL_MSG_CONNREQ:
180 case IBNAL_MSG_CONNACK:
181 if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
182 CERROR("Short CONNREQ: %d(%d)\n", msg_nob,
183 (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
187 __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
190 case IBNAL_MSG_IMMEDIATE:
191 if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
192 CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
193 (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
198 case IBNAL_MSG_PUT_RDMA:
199 case IBNAL_MSG_GET_RDMA:
200 if (msg_nob < hdr_size + sizeof(msg->ibm_u.rdma)) {
201 CERROR("Short RDMA req: %d(%d)\n", msg_nob,
202 (int)(hdr_size + sizeof(msg->ibm_u.rdma)));
206 __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key);
207 __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob);
208 __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr);
212 case IBNAL_MSG_PUT_DONE:
213 case IBNAL_MSG_GET_DONE:
214 if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
215 CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
216 (int)(hdr_size + sizeof(msg->ibm_u.completion)));
220 __swab32s(&msg->ibm_u.completion.ibcm_status);
227 kibnal_make_svcqry (kib_conn_t *conn)
229 kib_peer_t *peer = conn->ibc_peer;
230 int version = IBNAL_MSG_VERSION;
237 LASSERT (conn->ibc_connreq != NULL);
238 msg = &conn->ibc_connreq->cr_msg;
241 kibnal_init_msg(msg, IBNAL_MSG_SVCQRY, 0);
242 kibnal_pack_msg(msg, version, 0, peer->ibp_nid, 0);
244 rc = lnet_connect(&sock, peer->ibp_nid,
245 0, peer->ibp_ip, peer->ibp_port);
247 return -ECONNABORTED;
249 rc = libcfs_sock_write(sock, msg, msg->ibm_nob,
250 lnet_acceptor_timeout());
252 CERROR("Error %d sending svcqry to %s at %u.%u.%u.%u/%d\n",
253 rc, libcfs_nid2str(peer->ibp_nid),
254 HIPQUAD(peer->ibp_ip), peer->ibp_port);
258 /* The first 6 bytes are invariably MAGIC + proto version */
259 rc = libcfs_sock_read(sock, msg, 6, *kibnal_tunables.kib_timeout);
261 CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n",
262 rc, libcfs_nid2str(peer->ibp_nid),
263 HIPQUAD(peer->ibp_ip), peer->ibp_port);
267 if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
268 msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
269 CERROR("Bad magic: %08x from %s at %u.%u.%u.%u/%d\n",
270 msg->ibm_magic, libcfs_nid2str(peer->ibp_nid),
271 HIPQUAD(peer->ibp_ip), peer->ibp_port);
276 msg_version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ?
277 msg->ibm_version : __swab16(msg->ibm_version);
278 if (msg_version != version) {
279 if (version == IBNAL_MSG_VERSION) {
280 /* retry with previous version */
281 libcfs_sock_release(sock);
282 version = IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD;
286 CERROR("Bad version %x from %s at %u.%u.%u.%u/%d\n",
287 msg_version, libcfs_nid2str(peer->ibp_nid),
288 HIPQUAD(peer->ibp_ip), peer->ibp_port);
293 /* Read in the rest of the message now we know the expected format */
294 nob = offsetof(kib_msg_t, ibm_u) + sizeof(kib_svcrsp_t);
295 rc = libcfs_sock_read(sock, ((char *)msg) + 6, nob - 6,
296 *kibnal_tunables.kib_timeout);
298 CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n",
299 rc, libcfs_nid2str(peer->ibp_nid),
300 HIPQUAD(peer->ibp_ip), peer->ibp_port);
304 rc = kibnal_unpack_msg(msg, version, nob);
306 CERROR("Error %d unpacking svcrsp from %s at %u.%u.%u.%u/%d\n",
307 rc, libcfs_nid2str(peer->ibp_nid),
308 HIPQUAD(peer->ibp_ip), peer->ibp_port);
312 if (msg->ibm_type != IBNAL_MSG_SVCRSP) {
313 CERROR("Unexpected response type %d from %s at %u.%u.%u.%u/%d\n",
314 msg->ibm_type, libcfs_nid2str(peer->ibp_nid),
315 HIPQUAD(peer->ibp_ip), peer->ibp_port);
320 if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
322 msg->ibm_dststamp != kibnal_data.kib_incarnation) {
323 CERROR("Unexpected dst NID/stamp %s/"LPX64" from "
324 "%s at %u.%u.%u.%u/%d\n",
325 libcfs_nid2str(msg->ibm_dstnid), msg->ibm_dststamp,
326 libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip),
332 if (!lnet_ptlcompat_matchnid(peer->ibp_nid, msg->ibm_srcnid)) {
333 CERROR("Unexpected src NID %s from %s at %u.%u.%u.%u/%d\n",
334 libcfs_nid2str(msg->ibm_srcnid),
335 libcfs_nid2str(peer->ibp_nid),
336 HIPQUAD(peer->ibp_ip), peer->ibp_port);
341 conn->ibc_incarnation = msg->ibm_srcstamp;
342 conn->ibc_connreq->cr_svcrsp = msg->ibm_u.svcrsp;
343 conn->ibc_version = version;
346 libcfs_sock_release(sock);
351 kibnal_handle_svcqry (struct socket *sock)
354 unsigned int peer_port;
362 rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
364 CERROR("Can't get peer's IP: %d\n", rc);
368 LIBCFS_ALLOC(msg, sizeof(*msg));
370 CERROR("Can't allocate msgs for %u.%u.%u.%u/%d\n",
371 HIPQUAD(peer_ip), peer_port);
375 rc = libcfs_sock_read(sock, &msg->ibm_magic, sizeof(msg->ibm_magic),
376 lnet_acceptor_timeout());
378 CERROR("Error %d receiving svcqry(1) from %u.%u.%u.%u/%d\n",
379 rc, HIPQUAD(peer_ip), peer_port);
383 if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
384 msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
385 /* Unexpected magic! */
386 if (the_lnet.ln_ptlcompat == 0) {
387 if (msg->ibm_magic == LNET_PROTO_MAGIC ||
388 msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) {
389 /* future protocol version compatibility!
390 * When LNET unifies protocols over all LNDs,
391 * the first thing sent will be a version
392 * query. I send back a reply in my current
393 * protocol to tell her I'm "old" */
394 kibnal_init_msg(msg, 0, 0);
395 kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0,
401 CERROR ("Bad magic(1) %#08x (%#08x expected) from "
402 "%u.%u.%u.%u/%d\n", msg->ibm_magic,
403 IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port);
407 /* When portals compatibility is set, I may be passed a new
408 * connection "blindly" by the acceptor, and I have to
409 * determine if my peer has sent an acceptor connection request
411 rc = lnet_accept(kibnal_data.kib_ni, sock, msg->ibm_magic);
415 /* It was an acceptor connection request!
416 * Now I should see my magic... */
417 rc = libcfs_sock_read(sock, &msg->ibm_magic,
418 sizeof(msg->ibm_magic),
419 lnet_acceptor_timeout());
421 CERROR("Error %d receiving svcqry(2) from %u.%u.%u.%u/%d\n",
422 rc, HIPQUAD(peer_ip), peer_port);
426 if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
427 msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
428 CERROR ("Bad magic(2) %#08x (%#08x expected) from "
429 "%u.%u.%u.%u/%d\n", msg->ibm_magic,
430 IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port);
435 /* Now check version */
437 rc = libcfs_sock_read(sock, &msg->ibm_version, sizeof(msg->ibm_version),
438 lnet_acceptor_timeout());
440 CERROR("Error %d receiving svcqry(3) from %u.%u.%u.%u/%d\n",
441 rc, HIPQUAD(peer_ip), peer_port);
445 version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ?
446 msg->ibm_version : __swab32(msg->ibm_version);
447 /* Peer is a different protocol version: reply in my current protocol
448 * to tell her I'm "old" */
449 if (version != IBNAL_MSG_VERSION &&
450 version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
451 kibnal_init_msg(msg, 0, 0);
452 kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0, LNET_NID_ANY, 0);
457 /* Now read in all the rest */
458 rc = libcfs_sock_read(sock, &msg->ibm_type,
459 offsetof(kib_msg_t, ibm_u) -
460 offsetof(kib_msg_t, ibm_type),
461 lnet_acceptor_timeout());
463 CERROR("Error %d receiving svcqry(4) from %u.%u.%u.%u/%d\n",
464 rc, HIPQUAD(peer_ip), peer_port);
468 rc = kibnal_unpack_msg(msg, version, offsetof(kib_msg_t, ibm_u));
470 CERROR("Error %d unpacking svcqry from %u.%u.%u.%u/%d\n",
471 rc, HIPQUAD(peer_ip), peer_port);
475 if (msg->ibm_type != IBNAL_MSG_SVCQRY) {
476 CERROR("Unexpected message %d from %u.%u.%u.%u/%d\n",
477 msg->ibm_type, HIPQUAD(peer_ip), peer_port);
481 if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
483 CERROR("Unexpected dstnid %s: expected %s from %u.%u.%u.%u/%d\n",
484 libcfs_nid2str(msg->ibm_dstnid),
485 libcfs_nid2str(kibnal_data.kib_ni->ni_nid),
486 HIPQUAD(peer_ip), peer_port);
490 srcnid = msg->ibm_srcnid;
491 srcstamp = msg->ibm_srcstamp;
493 kibnal_init_msg(msg, IBNAL_MSG_SVCRSP, sizeof(msg->ibm_u.svcrsp));
495 msg->ibm_u.svcrsp.ibsr_svc_id = kibnal_data.kib_svc_id;
496 memcpy(msg->ibm_u.svcrsp.ibsr_svc_gid, kibnal_data.kib_svc_gid,
497 sizeof(kibnal_data.kib_svc_gid));
498 msg->ibm_u.svcrsp.ibsr_svc_pkey = kibnal_data.kib_svc_pkey;
500 kibnal_pack_msg(msg, version, 0, srcnid, srcstamp);
503 rc = libcfs_sock_write (sock, msg, msg->ibm_nob,
504 lnet_acceptor_timeout());
505 if (!reject && rc != 0) {
506 /* Only complain if we're not rejecting */
507 CERROR("Error %d replying to svcqry from %u.%u.%u.%u/%d\n",
508 rc, HIPQUAD(peer_ip), peer_port);
513 LIBCFS_FREE(msg, sizeof(*msg));
517 kibnal_free_acceptsock (kib_acceptsock_t *as)
519 libcfs_sock_release(as->ibas_sock);
520 LIBCFS_FREE(as, sizeof(*as));
524 kibnal_accept(lnet_ni_t *ni, struct socket *sock)
526 kib_acceptsock_t *as;
529 LIBCFS_ALLOC(as, sizeof(*as));
531 CERROR("Out of Memory\n");
535 as->ibas_sock = sock;
537 spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
539 list_add_tail(&as->ibas_list, &kibnal_data.kib_connd_acceptq);
540 wake_up(&kibnal_data.kib_connd_waitq);
542 spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
547 kibnal_start_ib_listener (void)
551 LASSERT (kibnal_data.kib_listen_handle == NULL);
553 kibnal_data.kib_svc_id = ib_cm_service_assign();
554 CDEBUG(D_NET, "svc id "LPX64"\n", kibnal_data.kib_svc_id);
556 rc = ib_cached_gid_get(kibnal_data.kib_device,
557 kibnal_data.kib_port, 0,
558 kibnal_data.kib_svc_gid);
560 CERROR("Can't get port %d GID: %d\n",
561 kibnal_data.kib_port, rc);
565 rc = ib_cached_pkey_get(kibnal_data.kib_device,
566 kibnal_data.kib_port, 0,
567 &kibnal_data.kib_svc_pkey);
569 CERROR ("Can't get port %d PKEY: %d\n",
570 kibnal_data.kib_port, rc);
574 rc = ib_cm_listen(kibnal_data.kib_svc_id,
575 TS_IB_CM_SERVICE_EXACT_MASK,
576 kibnal_passive_conn_callback, NULL,
577 &kibnal_data.kib_listen_handle);
579 kibnal_data.kib_listen_handle = NULL;
580 CERROR ("Can't create IB listener: %d\n", rc);
584 LASSERT (kibnal_data.kib_listen_handle != NULL);
589 kibnal_stop_ib_listener (void)
593 LASSERT (kibnal_data.kib_listen_handle != NULL);
595 rc = ib_cm_listen_stop (kibnal_data.kib_listen_handle);
597 CERROR("Error stopping IB listener: %d\n", rc);
599 kibnal_data.kib_listen_handle = NULL;
603 kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
609 LASSERT (nid != LNET_NID_ANY);
611 LIBCFS_ALLOC(peer, sizeof (*peer));
613 CERROR("Cannot allocate peer\n");
617 memset(peer, 0, sizeof(*peer)); /* zero flags etc */
620 atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */
622 INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */
623 INIT_LIST_HEAD (&peer->ibp_conns);
624 INIT_LIST_HEAD (&peer->ibp_tx_queue);
625 INIT_LIST_HEAD (&peer->ibp_connd_list); /* not queued for connecting */
628 peer->ibp_last_alive = cfs_time_current();
629 peer->ibp_reconnect_interval = 0; /* OK to connect at any time */
631 write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
633 if (atomic_read(&kibnal_data.kib_npeers) >=
634 *kibnal_tunables.kib_concurrent_peers) {
635 rc = -EOVERFLOW; /* !! but at least it distinguishes */
636 } else if (kibnal_data.kib_nonewpeers) {
637 rc = -ESHUTDOWN; /* shutdown has started */
640 /* npeers only grows with kib_global_lock held */
641 atomic_inc(&kibnal_data.kib_npeers);
644 write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
647 CERROR("Can't create peer: %s\n",
648 (rc == -ESHUTDOWN) ? "shutting down" :
650 LIBCFS_FREE(peer, sizeof(*peer));
659 kibnal_destroy_peer (kib_peer_t *peer)
661 CDEBUG (D_NET, "peer %s %p deleted\n",
662 libcfs_nid2str(peer->ibp_nid), peer);
664 LASSERT (atomic_read (&peer->ibp_refcount) == 0);
665 LASSERT (peer->ibp_persistence == 0);
666 LASSERT (!kibnal_peer_active(peer));
667 LASSERT (peer->ibp_connecting == 0);
668 LASSERT (peer->ibp_accepting == 0);
669 LASSERT (list_empty (&peer->ibp_connd_list));
670 LASSERT (list_empty (&peer->ibp_conns));
671 LASSERT (list_empty (&peer->ibp_tx_queue));
673 LIBCFS_FREE (peer, sizeof (*peer));
675 /* NB a peer's connections keep a reference on their peer until
676 * they are destroyed, so we can be assured that _all_ state to do
677 * with this peer has been cleaned up when its refcount drops to
679 atomic_dec(&kibnal_data.kib_npeers);
683 kibnal_find_peer_locked (lnet_nid_t nid)
685 struct list_head *peer_list = kibnal_nid2peerlist (nid);
686 struct list_head *tmp;
689 list_for_each (tmp, peer_list) {
691 peer = list_entry (tmp, kib_peer_t, ibp_list);
693 LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
694 peer->ibp_connecting != 0 || /* creating conns */
695 peer->ibp_accepting != 0 ||
696 !list_empty (&peer->ibp_conns)); /* active conn */
698 if (peer->ibp_nid != nid)
707 kibnal_get_peer (lnet_nid_t nid)
712 read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
713 peer = kibnal_find_peer_locked (nid);
714 if (peer != NULL) /* +1 ref for caller? */
715 kibnal_peer_addref(peer);
716 read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
722 kibnal_unlink_peer_locked (kib_peer_t *peer)
724 LASSERT (peer->ibp_persistence == 0);
725 LASSERT (list_empty(&peer->ibp_conns));
727 LASSERT (kibnal_peer_active(peer));
728 list_del_init (&peer->ibp_list);
729 /* lose peerlist's ref */
730 kibnal_peer_decref(peer);
734 kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp,
738 struct list_head *ptmp;
742 read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
744 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
746 list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
748 peer = list_entry (ptmp, kib_peer_t, ibp_list);
749 LASSERT (peer->ibp_persistence != 0 ||
750 peer->ibp_connecting != 0 ||
751 peer->ibp_accepting != 0 ||
752 !list_empty (&peer->ibp_conns));
757 *nidp = peer->ibp_nid;
759 *portp = peer->ibp_port;
760 *persistencep = peer->ibp_persistence;
762 read_unlock_irqrestore(&kibnal_data.kib_global_lock,
768 read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
773 kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port)
780 if (nid == LNET_NID_ANY)
783 rc = kibnal_create_peer (&peer, nid);
787 write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
789 peer2 = kibnal_find_peer_locked (nid);
791 kibnal_peer_decref(peer);
794 /* peer table takes existing ref on peer */
795 list_add_tail (&peer->ibp_list,
796 kibnal_nid2peerlist (nid));
800 peer->ibp_port = port;
801 peer->ibp_persistence++;
803 write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
808 kibnal_del_peer_locked (kib_peer_t *peer)
810 struct list_head *ctmp;
811 struct list_head *cnxt;
814 peer->ibp_persistence = 0;
816 if (list_empty(&peer->ibp_conns)) {
817 kibnal_unlink_peer_locked(peer);
819 list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
820 conn = list_entry(ctmp, kib_conn_t, ibc_list);
822 kibnal_close_conn_locked (conn, 0);
824 /* NB peer is no longer persistent; closing its last conn
827 /* NB peer now unlinked; might even be freed if the peer table had the
832 kibnal_del_peer (lnet_nid_t nid)
835 CFS_LIST_HEAD (zombies);
836 struct list_head *ptmp;
837 struct list_head *pnxt;
844 write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
846 if (nid != LNET_NID_ANY)
847 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
850 hi = kibnal_data.kib_peer_hash_size - 1;
853 for (i = lo; i <= hi; i++) {
854 list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
855 peer = list_entry (ptmp, kib_peer_t, ibp_list);
856 LASSERT (peer->ibp_persistence != 0 ||
857 peer->ibp_connecting != 0 ||
858 peer->ibp_accepting != 0 ||
859 !list_empty (&peer->ibp_conns));
861 if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
864 if (!list_empty(&peer->ibp_tx_queue)) {
865 LASSERT (list_empty(&peer->ibp_conns));
867 list_splice_init(&peer->ibp_tx_queue, &zombies);
870 kibnal_del_peer_locked (peer);
871 rc = 0; /* matched something */
875 write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
877 kibnal_txlist_done(&zombies, -EIO);
883 kibnal_get_conn_by_idx (int index)
886 struct list_head *ptmp;
888 struct list_head *ctmp;
892 read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
894 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
895 list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
897 peer = list_entry (ptmp, kib_peer_t, ibp_list);
898 LASSERT (peer->ibp_persistence > 0 ||
899 peer->ibp_connecting != 0 ||
900 peer->ibp_accepting != 0 ||
901 !list_empty (&peer->ibp_conns));
903 list_for_each (ctmp, &peer->ibp_conns) {
907 conn = list_entry (ctmp, kib_conn_t, ibc_list);
908 kibnal_conn_addref(conn);
909 read_unlock_irqrestore(&kibnal_data.kib_global_lock,
916 read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
921 kibnal_create_conn (void)
931 struct ib_qp_create_param qp_create;
932 struct ib_qp_attribute qp_attr;
935 LIBCFS_ALLOC (conn, sizeof (*conn));
937 CERROR ("Can't allocate connection\n");
941 /* zero flags, NULL pointers etc... */
942 memset (conn, 0, sizeof (*conn));
944 INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
945 INIT_LIST_HEAD (&conn->ibc_tx_queue);
946 INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
947 INIT_LIST_HEAD (&conn->ibc_active_txs);
948 spin_lock_init (&conn->ibc_lock);
950 atomic_inc (&kibnal_data.kib_nconns);
951 /* well not really, but I call destroy() on failure, which decrements */
953 LIBCFS_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
954 if (conn->ibc_rxs == NULL)
956 memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
958 rc = kibnal_alloc_pages(&conn->ibc_rx_pages,
960 IB_ACCESS_LOCAL_WRITE);
964 vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
966 for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
967 struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
968 kib_rx_t *rx = &conn->ibc_rxs[i];
971 rx->rx_vaddr = vaddr;
972 rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
974 vaddr += IBNAL_MSG_SIZE;
975 LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
977 page_offset += IBNAL_MSG_SIZE;
978 LASSERT (page_offset <= PAGE_SIZE);
980 if (page_offset == PAGE_SIZE) {
983 LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
987 /* We can post up to IBLND_MSG_QUEUE_SIZE immediate/req messages and
988 * the same # of ack/nak/rdma+done messages */
990 params.qp_create = (struct ib_qp_create_param) {
992 .max_outstanding_send_request = 3 * IBNAL_MSG_QUEUE_SIZE,
993 .max_outstanding_receive_request = IBNAL_RX_MSGS,
994 .max_send_gather_element = 1,
995 .max_receive_scatter_element = 1,
997 .pd = kibnal_data.kib_pd,
998 .send_queue = kibnal_data.kib_cq,
999 .receive_queue = kibnal_data.kib_cq,
1000 .send_policy = IB_WQ_SIGNAL_SELECTABLE,
1001 .receive_policy = IB_WQ_SIGNAL_SELECTABLE,
1003 .transport = IB_TRANSPORT_RC,
1004 .device_specific = NULL,
1007 rc = ib_qp_create (¶ms.qp_create, &conn->ibc_qp, &conn->ibc_qpn);
1009 CERROR ("Failed to create queue pair: %d\n", rc);
1013 /* Mark QP created */
1014 conn->ibc_state = IBNAL_CONN_INIT_QP;
1016 params.qp_attr = (struct ib_qp_attribute) {
1017 .state = IB_QP_STATE_INIT,
1018 .port = kibnal_data.kib_port,
1019 .enable_rdma_read = 1,
1020 .enable_rdma_write = 1,
1021 .valid_fields = (IB_QP_ATTRIBUTE_STATE |
1022 IB_QP_ATTRIBUTE_PORT |
1023 IB_QP_ATTRIBUTE_PKEY_INDEX |
1024 IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE),
1026 rc = ib_qp_modify(conn->ibc_qp, ¶ms.qp_attr);
1028 CERROR ("Failed to modify queue pair: %d\n", rc);
1032 /* 1 ref for caller */
1033 atomic_set (&conn->ibc_refcount, 1);
1037 kibnal_destroy_conn (conn);
1042 kibnal_destroy_conn (kib_conn_t *conn)
1046 CDEBUG (D_NET, "connection %p\n", conn);
1048 LASSERT (atomic_read (&conn->ibc_refcount) == 0);
1049 LASSERT (list_empty(&conn->ibc_tx_queue));
1050 LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
1051 LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
1052 LASSERT (list_empty(&conn->ibc_active_txs));
1053 LASSERT (conn->ibc_nsends_posted == 0);
1054 LASSERT (conn->ibc_connreq == NULL);
1056 switch (conn->ibc_state) {
1057 case IBNAL_CONN_ZOMBIE:
1058 /* called after connection sequence initiated */
1060 case IBNAL_CONN_INIT_QP:
1061 rc = ib_qp_destroy(conn->ibc_qp);
1063 CERROR("Can't destroy QP: %d\n", rc);
1066 case IBNAL_CONN_INIT_NOTHING:
1073 if (conn->ibc_rx_pages != NULL)
1074 kibnal_free_pages(conn->ibc_rx_pages);
1076 if (conn->ibc_rxs != NULL)
1077 LIBCFS_FREE(conn->ibc_rxs,
1078 IBNAL_RX_MSGS * sizeof(kib_rx_t));
1080 if (conn->ibc_peer != NULL)
1081 kibnal_peer_decref(conn->ibc_peer);
1083 LIBCFS_FREE(conn, sizeof (*conn));
1085 atomic_dec(&kibnal_data.kib_nconns);
1087 if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
1088 kibnal_data.kib_shutdown) {
1089 /* I just nuked the last connection on shutdown; wake up
1090 * everyone so they can exit. */
1091 wake_up_all(&kibnal_data.kib_sched_waitq);
1092 wake_up_all(&kibnal_data.kib_reaper_waitq);
1097 kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
1100 struct list_head *ctmp;
1101 struct list_head *cnxt;
1104 list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
1105 conn = list_entry (ctmp, kib_conn_t, ibc_list);
1108 kibnal_close_conn_locked (conn, why);
1115 kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
1118 struct list_head *ctmp;
1119 struct list_head *cnxt;
1122 list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
1123 conn = list_entry (ctmp, kib_conn_t, ibc_list);
1125 if (conn->ibc_incarnation == incarnation)
1128 CDEBUG(D_NET, "Closing stale conn %p nid: %s"
1129 " incarnation:"LPX64"("LPX64")\n", conn,
1130 libcfs_nid2str(peer->ibp_nid),
1131 conn->ibc_incarnation, incarnation);
1134 kibnal_close_conn_locked (conn, -ESTALE);
1141 kibnal_close_matching_conns (lnet_nid_t nid)
1143 unsigned long flags;
1145 struct list_head *ptmp;
1146 struct list_head *pnxt;
1152 write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
1154 if (nid != LNET_NID_ANY)
1155 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
1158 hi = kibnal_data.kib_peer_hash_size - 1;
1161 for (i = lo; i <= hi; i++) {
1162 list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
1164 peer = list_entry (ptmp, kib_peer_t, ibp_list);
1165 LASSERT (peer->ibp_persistence != 0 ||
1166 peer->ibp_connecting != 0 ||
1167 peer->ibp_accepting != 0 ||
1168 !list_empty (&peer->ibp_conns));
1170 if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
1173 count += kibnal_close_peer_conns_locked (peer, 0);
1177 write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
1179 /* wildcards always succeed */
1180 if (nid == LNET_NID_ANY)
1183 return (count == 0 ? -ENOENT : 0);
1187 kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
1189 struct libcfs_ioctl_data *data = arg;
1192 LASSERT (ni == kibnal_data.kib_ni);
1195 case IOC_LIBCFS_GET_PEER: {
1199 int share_count = 0;
1201 rc = kibnal_get_peer_info(data->ioc_count,
1202 &nid, &ip, &port, &share_count);
1203 data->ioc_nid = nid;
1204 data->ioc_count = share_count;
1205 data->ioc_u32[0] = ip;
1206 data->ioc_u32[1] = port;
1209 case IOC_LIBCFS_ADD_PEER: {
1210 rc = kibnal_add_persistent_peer (data->ioc_nid,
1211 data->ioc_u32[0], /* IP */
1212 data->ioc_u32[1]); /* port */
1215 case IOC_LIBCFS_DEL_PEER: {
1216 rc = kibnal_del_peer (data->ioc_nid);
1219 case IOC_LIBCFS_GET_CONN: {
1220 kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
1226 data->ioc_nid = conn->ibc_peer->ibp_nid;
1227 kibnal_conn_decref(conn);
1231 case IOC_LIBCFS_CLOSE_CONNECTION: {
1232 rc = kibnal_close_matching_conns (data->ioc_nid);
1235 case IOC_LIBCFS_REGISTER_MYNID: {
1236 /* Ignore if this is a noop */
1237 if (data->ioc_nid == ni->ni_nid) {
1240 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
1241 libcfs_nid2str(data->ioc_nid),
1242 libcfs_nid2str(ni->ni_nid));
1253 kibnal_free_pages (kib_pages_t *p)
1255 int npages = p->ibp_npages;
1259 if (p->ibp_mapped) {
1260 rc = ib_memory_deregister(p->ibp_handle);
1262 CERROR ("Deregister error: %d\n", rc);
1265 for (i = 0; i < npages; i++)
1266 if (p->ibp_pages[i] != NULL)
1267 __free_page(p->ibp_pages[i]);
1269 LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
1273 kibnal_alloc_pages (kib_pages_t **pp, int npages, int access)
1276 struct ib_physical_buffer *phys_pages;
1280 LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
1282 CERROR ("Can't allocate buffer %d\n", npages);
1286 memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
1287 p->ibp_npages = npages;
1289 for (i = 0; i < npages; i++) {
1290 p->ibp_pages[i] = alloc_page (GFP_KERNEL);
1291 if (p->ibp_pages[i] == NULL) {
1292 CERROR ("Can't allocate page %d of %d\n", i, npages);
1293 kibnal_free_pages(p);
1298 LIBCFS_ALLOC(phys_pages, npages * sizeof(*phys_pages));
1299 if (phys_pages == NULL) {
1300 CERROR ("Can't allocate physarray for %d pages\n", npages);
1301 kibnal_free_pages(p);
1305 for (i = 0; i < npages; i++) {
1306 phys_pages[i].size = PAGE_SIZE;
1307 phys_pages[i].address =
1308 lnet_page2phys(p->ibp_pages[i]);
1312 rc = ib_memory_register_physical(kibnal_data.kib_pd,
1315 npages * PAGE_SIZE, 0,
1321 LIBCFS_FREE(phys_pages, npages * sizeof(*phys_pages));
1324 CERROR ("Error %d mapping %d pages\n", rc, npages);
1325 kibnal_free_pages(p);
1335 kibnal_setup_tx_descs (void)
1338 int page_offset = 0;
1346 /* pre-mapped messages are not bigger than 1 page */
1347 LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
1349 /* No fancy arithmetic when we do the buffer calculations */
1350 LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
1352 rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
1353 IBNAL_TX_MSG_PAGES(),
1354 0); /* local read access only */
1358 vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
1360 for (i = 0; i < IBNAL_TX_MSGS(); i++) {
1361 page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
1362 tx = &kibnal_data.kib_tx_descs[i];
1364 memset (tx, 0, sizeof(*tx)); /* zero flags etc */
1366 tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
1367 tx->tx_vaddr = vaddr;
1368 tx->tx_mapped = KIB_TX_UNMAPPED;
1370 CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n",
1371 i, tx, tx->tx_msg, tx->tx_vaddr);
1373 list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
1375 vaddr += IBNAL_MSG_SIZE;
1376 LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES());
1378 page_offset += IBNAL_MSG_SIZE;
1379 LASSERT (page_offset <= PAGE_SIZE);
1381 if (page_offset == PAGE_SIZE) {
1384 LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
1392 kibnal_shutdown (lnet_ni_t *ni)
1396 unsigned long flags;
1398 CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
1399 atomic_read (&libcfs_kmemory));
1401 LASSERT(ni == kibnal_data.kib_ni);
1402 LASSERT(ni->ni_data == &kibnal_data);
1404 switch (kibnal_data.kib_init) {
1406 CERROR ("Unexpected state %d\n", kibnal_data.kib_init);
1409 case IBNAL_INIT_ALL:
1410 /* Prevent new peers from being created */
1411 write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
1412 kibnal_data.kib_nonewpeers = 1;
1413 write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
1415 kibnal_stop_ib_listener();
1417 /* Remove all existing peers from the peer table */
1418 kibnal_del_peer(LNET_NID_ANY);
1420 /* Wait for pending conn reqs to be handled */
1422 spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
1423 while (!list_empty(&kibnal_data.kib_connd_acceptq)) {
1424 spin_unlock_irqrestore(&kibnal_data.kib_connd_lock,
1427 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
1428 "waiting for conn reqs to clean up\n");
1429 cfs_pause(cfs_time_seconds(1));
1431 spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
1433 spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
1435 /* Wait for all peer state to clean up */
1437 while (atomic_read(&kibnal_data.kib_npeers) != 0) {
1439 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
1440 "waiting for %d peers to close down\n",
1441 atomic_read(&kibnal_data.kib_npeers));
1442 cfs_pause(cfs_time_seconds(1));
1447 rc = ib_cq_destroy (kibnal_data.kib_cq);
1449 CERROR ("Destroy CQ error: %d\n", rc);
1452 case IBNAL_INIT_TXD:
1453 kibnal_free_pages (kibnal_data.kib_tx_pages);
1456 case IBNAL_INIT_FMR:
1457 rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
1459 CERROR ("Destroy FMR pool error: %d\n", rc);
1463 rc = ib_pd_destroy(kibnal_data.kib_pd);
1465 CERROR ("Destroy PD error: %d\n", rc);
1468 case IBNAL_INIT_DATA:
1469 /* Module refcount only gets to zero when all peers
1470 * have been closed so all lists must be empty */
1471 LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0);
1472 LASSERT (kibnal_data.kib_peers != NULL);
1473 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
1474 LASSERT (list_empty (&kibnal_data.kib_peers[i]));
1476 LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
1477 LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
1478 LASSERT (list_empty (&kibnal_data.kib_sched_txq));
1479 LASSERT (list_empty (&kibnal_data.kib_reaper_conns));
1480 LASSERT (list_empty (&kibnal_data.kib_connd_peers));
1481 LASSERT (list_empty (&kibnal_data.kib_connd_acceptq));
1483 /* flag threads to terminate; wake and wait for them to die */
1484 kibnal_data.kib_shutdown = 1;
1485 wake_up_all (&kibnal_data.kib_sched_waitq);
1486 wake_up_all (&kibnal_data.kib_reaper_waitq);
1487 wake_up_all (&kibnal_data.kib_connd_waitq);
1490 while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
1492 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
1493 "Waiting for %d threads to terminate\n",
1494 atomic_read (&kibnal_data.kib_nthreads));
1495 cfs_pause(cfs_time_seconds(1));
1499 case IBNAL_INIT_NOTHING:
1503 if (kibnal_data.kib_tx_descs != NULL)
1504 LIBCFS_FREE (kibnal_data.kib_tx_descs,
1505 IBNAL_TX_MSGS() * sizeof(kib_tx_t));
1507 if (kibnal_data.kib_peers != NULL)
1508 LIBCFS_FREE (kibnal_data.kib_peers,
1509 sizeof (struct list_head) *
1510 kibnal_data.kib_peer_hash_size);
1512 CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
1513 atomic_read (&libcfs_kmemory));
1515 kibnal_data.kib_init = IBNAL_INIT_NOTHING;
1516 PORTAL_MODULE_UNUSE;
1520 kibnal_get_ipoibidx(void)
1522 /* NB single threaded! */
1523 static struct ib_port_properties port_props;
1529 struct ib_device *device;
1531 for (devidx = 0; devidx <= kibnal_data.kib_hca_idx; devidx++) {
1532 device = ib_device_get_by_index(devidx);
1534 if (device == NULL) {
1535 CERROR("Can't get IB device %d\n", devidx);
1539 for (port = 1; port <= 2; port++) {
1540 if (devidx == kibnal_data.kib_hca_idx &&
1541 port == kibnal_data.kib_port)
1544 rc = ib_port_properties_get(device, port,
1556 kibnal_startup (lnet_ni_t *ni)
1569 LASSERT (ni->ni_lnd == &the_kiblnd);
1571 /* Only 1 instance supported */
1572 if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
1573 CERROR ("Only 1 instance supported\n");
1577 if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
1578 CERROR ("Can't set credits(%d) > ntx(%d)\n",
1579 *kibnal_tunables.kib_credits,
1580 *kibnal_tunables.kib_ntx);
1584 memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
1586 ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
1587 ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
1589 CLASSERT (LNET_MAX_INTERFACES > 1);
1592 kibnal_data.kib_hca_idx = 0; /* default: first HCA */
1593 kibnal_data.kib_port = 0; /* any port */
1595 if (ni->ni_interfaces[0] != NULL) {
1596 /* hca.port specified in 'networks=openib(h.p)' */
1597 if (ni->ni_interfaces[1] != NULL) {
1598 CERROR("Multiple interfaces not supported\n");
1602 nob = strlen(ni->ni_interfaces[0]);
1603 i = sscanf(ni->ni_interfaces[0], "%d.%d%n", &hca, &port, &nob);
1604 if (i >= 2 && nob == strlen(ni->ni_interfaces[0])) {
1605 kibnal_data.kib_hca_idx = hca;
1606 kibnal_data.kib_port = port;
1608 nob = strlen(ni->ni_interfaces[0]);
1609 i = sscanf(ni->ni_interfaces[0], "%d%n", &hca, &nob);
1611 if (i >= 1 && nob == strlen(ni->ni_interfaces[0])) {
1612 kibnal_data.kib_hca_idx = hca;
1614 CERROR("Can't parse interface '%s'\n",
1615 ni->ni_interfaces[0]);
1621 kibnal_data.kib_ni = ni;
1622 ni->ni_data = &kibnal_data;
1624 do_gettimeofday(&tv);
1625 kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
1629 rwlock_init(&kibnal_data.kib_global_lock);
1631 kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
1632 LIBCFS_ALLOC (kibnal_data.kib_peers,
1633 sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
1634 if (kibnal_data.kib_peers == NULL) {
1637 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
1638 INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
1640 spin_lock_init (&kibnal_data.kib_reaper_lock);
1641 INIT_LIST_HEAD (&kibnal_data.kib_reaper_conns);
1642 init_waitqueue_head (&kibnal_data.kib_reaper_waitq);
1644 spin_lock_init (&kibnal_data.kib_connd_lock);
1645 INIT_LIST_HEAD (&kibnal_data.kib_connd_acceptq);
1646 INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
1647 init_waitqueue_head (&kibnal_data.kib_connd_waitq);
1649 spin_lock_init (&kibnal_data.kib_sched_lock);
1650 INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
1651 INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
1652 init_waitqueue_head (&kibnal_data.kib_sched_waitq);
1654 spin_lock_init (&kibnal_data.kib_tx_lock);
1655 INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
1657 LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
1658 IBNAL_TX_MSGS() * sizeof(kib_tx_t));
1659 if (kibnal_data.kib_tx_descs == NULL) {
1660 CERROR ("Can't allocate tx descs\n");
1664 /* lists/ptrs/locks initialised */
1665 kibnal_data.kib_init = IBNAL_INIT_DATA;
1666 /*****************************************************/
1668 for (i = 0; i < IBNAL_N_SCHED; i++) {
1669 rc = kibnal_thread_start (kibnal_scheduler,
1670 (void *)((unsigned long)i));
1672 CERROR("Can't spawn openibnal scheduler[%d]: %d\n",
1678 /* must have at least 2 connds to remain responsive to svcqry while
1680 if (*kibnal_tunables.kib_n_connd < 2)
1681 *kibnal_tunables.kib_n_connd = 2;
1684 for (i = 0; i < *kibnal_tunables.kib_n_connd; i++) {
1685 rc = kibnal_thread_start (kibnal_connd,
1686 (void *)((unsigned long)i));
1688 CERROR("Can't spawn openibnal connd[%d]: %d\n",
1694 rc = kibnal_thread_start (kibnal_reaper, NULL);
1696 CERROR ("Can't spawn openibnal reaper: %d\n", rc);
1700 kibnal_data.kib_device = ib_device_get_by_index(kibnal_data.kib_hca_idx);
1701 if (kibnal_data.kib_device == NULL) {
1702 CERROR ("Can't open ib device %d\n",
1703 kibnal_data.kib_hca_idx);
1707 rc = ib_device_properties_get(kibnal_data.kib_device,
1708 &kibnal_data.kib_device_props);
1710 CERROR ("Can't get device props: %d\n", rc);
1714 CDEBUG(D_NET, "Max Initiator: %d Max Responder %d\n",
1715 kibnal_data.kib_device_props.max_initiator_per_qp,
1716 kibnal_data.kib_device_props.max_responder_per_qp);
1718 if (kibnal_data.kib_port != 0) {
1719 rc = ib_port_properties_get(kibnal_data.kib_device,
1720 kibnal_data.kib_port,
1721 &kibnal_data.kib_port_props);
1723 CERROR("Error %d open port %d on HCA %d\n", rc,
1724 kibnal_data.kib_port,
1725 kibnal_data.kib_hca_idx);
1729 for (i = 1; i <= 2; i++) {
1730 rc = ib_port_properties_get(kibnal_data.kib_device, i,
1731 &kibnal_data.kib_port_props);
1733 kibnal_data.kib_port = i;
1737 if (kibnal_data.kib_port == 0) {
1738 CERROR ("Can't find a port\n");
1743 i = kibnal_get_ipoibidx();
1747 snprintf(ipif_name, sizeof(ipif_name), "%s%d",
1748 *kibnal_tunables.kib_ipif_basename, i);
1749 if (strlen(ipif_name) == sizeof(ipif_name - 1)) {
1750 CERROR("IPoIB interface name %s truncated\n", ipif_name);
1754 rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
1756 CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
1761 CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
1765 ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
1767 rc = ib_pd_create(kibnal_data.kib_device,
1768 NULL, &kibnal_data.kib_pd);
1770 CERROR ("Can't create PD: %d\n", rc);
1774 /* flag PD initialised */
1775 kibnal_data.kib_init = IBNAL_INIT_PD;
1776 /*****************************************************/
1779 const int pool_size = *kibnal_tunables.kib_ntx;
1780 struct ib_fmr_pool_param params = {
1781 .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
1782 .access = (IB_ACCESS_LOCAL_WRITE |
1783 IB_ACCESS_REMOTE_WRITE |
1784 IB_ACCESS_REMOTE_READ),
1785 .pool_size = pool_size,
1786 .dirty_watermark = (pool_size * 3)/4,
1787 .flush_function = NULL,
1791 rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms,
1792 &kibnal_data.kib_fmr_pool);
1794 CERROR ("Can't create FMR pool size %d: %d\n",
1800 /* flag FMR pool initialised */
1801 kibnal_data.kib_init = IBNAL_INIT_FMR;
1803 /*****************************************************/
1805 rc = kibnal_setup_tx_descs();
1807 CERROR ("Can't register tx descs: %d\n", rc);
1811 /* flag TX descs initialised */
1812 kibnal_data.kib_init = IBNAL_INIT_TXD;
1813 /*****************************************************/
1816 struct ib_cq_callback callback = {
1817 .context = IBNAL_CALLBACK_CTXT,
1818 .policy = IB_CQ_PROVIDER_REARM,
1820 .entry = kibnal_callback,
1824 int nentries = IBNAL_CQ_ENTRIES();
1826 rc = ib_cq_create (kibnal_data.kib_device,
1827 &nentries, &callback, NULL,
1828 &kibnal_data.kib_cq);
1830 CERROR ("Can't create CQ: %d\n", rc);
1834 /* I only want solicited events */
1835 rc = ib_cq_request_notification(kibnal_data.kib_cq, 1);
1839 /* flag CQ initialised */
1840 kibnal_data.kib_init = IBNAL_INIT_CQ;
1841 /*****************************************************/
1843 rc = kibnal_start_ib_listener();
1847 /* flag everything initialised */
1848 kibnal_data.kib_init = IBNAL_INIT_ALL;
1849 /*****************************************************/
1854 kibnal_shutdown(ni);
1859 kibnal_module_fini (void)
1861 lnet_unregister_lnd(&the_kiblnd);
1862 kibnal_tunables_fini();
1866 kibnal_module_init (void)
1870 rc = kibnal_tunables_init();
1874 lnet_register_lnd(&the_kiblnd);
1879 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1881 MODULE_DESCRIPTION("Kernel Cisco IB LND v1.00");
1883 MODULE_DESCRIPTION("Kernel OpenIB(gen1) LND v1.00");
1885 MODULE_LICENSE("GPL");
1887 module_init(kibnal_module_init);
1888 module_exit(kibnal_module_fini);