1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2004 Cluster File Systems, Inc.
5 * Author: Eric Barton <eric@bartonsoftware.com>
7 * This file is part of Lustre, http://www.lustre.org.
9 * Lustre is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
13 * Lustre is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Lustre; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "openiblnd.h"
30 .lnd_type = OPENIBLND,
32 .lnd_startup = kibnal_startup,
33 .lnd_shutdown = kibnal_shutdown,
34 .lnd_ctl = kibnal_ctl,
35 .lnd_send = kibnal_send,
36 .lnd_recv = kibnal_recv,
37 .lnd_eager_recv = kibnal_eager_recv,
38 .lnd_accept = kibnal_accept,
41 kib_data_t kibnal_data;
44 kibnal_cksum (void *ptr, int nob)
50 sum = ((sum << 1) | (sum >> 31)) + *c++;
52 /* ensure I don't return 0 (== no checksum) */
53 return (sum == 0) ? 1 : sum;
57 kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
60 msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
64 kibnal_pack_msg(kib_msg_t *msg, int version, int credits,
65 lnet_nid_t dstnid, __u64 dststamp)
67 /* CAVEAT EMPTOR! all message fields not set here should have been
68 * initialised previously. */
69 msg->ibm_magic = IBNAL_MSG_MAGIC;
70 msg->ibm_version = version;
72 msg->ibm_credits = credits;
75 msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid,
77 msg->ibm_srcstamp = kibnal_data.kib_incarnation;
78 msg->ibm_dstnid = dstnid;
79 msg->ibm_dststamp = dststamp;
81 if (*kibnal_tunables.kib_cksum) {
82 /* NB ibm_cksum zero while computing cksum */
83 msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
88 kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob)
90 const int hdr_size = offsetof(kib_msg_t, ibm_u);
97 CERROR("Short message: %d\n", nob);
101 if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
103 } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
106 CERROR("Bad magic: %08x\n", msg->ibm_magic);
110 msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
111 if ((expected_version == 0) ?
112 (msg_version != IBNAL_MSG_VERSION &&
113 msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) :
114 (msg_version != expected_version)) {
115 CERROR("Bad version: %x\n", msg_version);
119 if (nob < hdr_size) {
120 CERROR("Short message: %d\n", nob);
124 msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
126 CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
130 /* checksum must be computed with ibm_cksum zero and BEFORE anything
132 msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
134 if (msg_cksum != 0 &&
135 msg_cksum != kibnal_cksum(msg, msg_nob)) {
136 CERROR("Bad checksum\n");
139 msg->ibm_cksum = msg_cksum;
142 /* leave magic unflipped as a clue to peer endianness */
143 msg->ibm_version = msg_version;
144 LASSERT (sizeof(msg->ibm_type) == 1);
145 LASSERT (sizeof(msg->ibm_credits) == 1);
146 msg->ibm_nob = msg_nob;
147 __swab64s(&msg->ibm_srcnid);
148 __swab64s(&msg->ibm_srcstamp);
149 __swab64s(&msg->ibm_dstnid);
150 __swab64s(&msg->ibm_dststamp);
153 if (msg->ibm_srcnid == LNET_NID_ANY) {
154 CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
158 switch (msg->ibm_type) {
160 CERROR("Unknown message type %x\n", msg->ibm_type);
163 case IBNAL_MSG_SVCQRY:
167 case IBNAL_MSG_SVCRSP:
168 if (msg_nob < hdr_size + sizeof(msg->ibm_u.svcrsp)) {
169 CERROR("Short SVCRSP: %d(%d)\n", msg_nob,
170 (int)(hdr_size + sizeof(msg->ibm_u.svcrsp)));
174 __swab64s(&msg->ibm_u.svcrsp.ibsr_svc_id);
175 __swab16s(&msg->ibm_u.svcrsp.ibsr_svc_pkey);
179 case IBNAL_MSG_CONNREQ:
180 case IBNAL_MSG_CONNACK:
181 if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
182 CERROR("Short CONNREQ: %d(%d)\n", msg_nob,
183 (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
187 __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
190 case IBNAL_MSG_IMMEDIATE:
191 if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
192 CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
193 (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
198 case IBNAL_MSG_PUT_RDMA:
199 case IBNAL_MSG_GET_RDMA:
200 if (msg_nob < hdr_size + sizeof(msg->ibm_u.rdma)) {
201 CERROR("Short RDMA req: %d(%d)\n", msg_nob,
202 (int)(hdr_size + sizeof(msg->ibm_u.rdma)));
206 __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key);
207 __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob);
208 __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr);
212 case IBNAL_MSG_PUT_DONE:
213 case IBNAL_MSG_GET_DONE:
214 if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
215 CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
216 (int)(hdr_size + sizeof(msg->ibm_u.completion)));
220 __swab32s(&msg->ibm_u.completion.ibcm_status);
227 kibnal_make_svcqry (kib_conn_t *conn)
229 kib_peer_t *peer = conn->ibc_peer;
230 int version = IBNAL_MSG_VERSION;
237 LASSERT (conn->ibc_connreq != NULL);
238 msg = &conn->ibc_connreq->cr_msg;
241 kibnal_init_msg(msg, IBNAL_MSG_SVCQRY, 0);
242 kibnal_pack_msg(msg, version, 0, peer->ibp_nid, 0);
244 rc = lnet_connect(&sock, peer->ibp_nid,
245 0, peer->ibp_ip, peer->ibp_port);
247 return -ECONNABORTED;
249 rc = libcfs_sock_write(sock, msg, msg->ibm_nob,
250 lnet_acceptor_timeout());
252 CERROR("Error %d sending svcqry to %s at %u.%u.%u.%u/%d\n",
253 rc, libcfs_nid2str(peer->ibp_nid),
254 HIPQUAD(peer->ibp_ip), peer->ibp_port);
258 /* The first 6 bytes are invariably MAGIC + proto version */
259 rc = libcfs_sock_read(sock, msg, 6, *kibnal_tunables.kib_timeout);
261 CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n",
262 rc, libcfs_nid2str(peer->ibp_nid),
263 HIPQUAD(peer->ibp_ip), peer->ibp_port);
267 if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
268 msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
269 CERROR("Bad magic: %08x from %s at %u.%u.%u.%u/%d\n",
270 msg->ibm_magic, libcfs_nid2str(peer->ibp_nid),
271 HIPQUAD(peer->ibp_ip), peer->ibp_port);
276 msg_version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ?
277 msg->ibm_version : __swab16(msg->ibm_version);
278 if (msg_version != version) {
279 if (version == IBNAL_MSG_VERSION) {
280 /* retry with previous version */
281 libcfs_sock_release(sock);
282 version = IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD;
286 CERROR("Bad version %x from %s at %u.%u.%u.%u/%d\n",
287 msg_version, libcfs_nid2str(peer->ibp_nid),
288 HIPQUAD(peer->ibp_ip), peer->ibp_port);
293 /* Read in the rest of the message now we know the expected format */
294 nob = offsetof(kib_msg_t, ibm_u) + sizeof(kib_svcrsp_t);
295 rc = libcfs_sock_read(sock, ((char *)msg) + 6, nob - 6,
296 *kibnal_tunables.kib_timeout);
298 CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n",
299 rc, libcfs_nid2str(peer->ibp_nid),
300 HIPQUAD(peer->ibp_ip), peer->ibp_port);
304 rc = kibnal_unpack_msg(msg, version, nob);
306 CERROR("Error %d unpacking svcrsp from %s at %u.%u.%u.%u/%d\n",
307 rc, libcfs_nid2str(peer->ibp_nid),
308 HIPQUAD(peer->ibp_ip), peer->ibp_port);
312 if (msg->ibm_type != IBNAL_MSG_SVCRSP) {
313 CERROR("Unexpected response type %d from %s at %u.%u.%u.%u/%d\n",
314 msg->ibm_type, libcfs_nid2str(peer->ibp_nid),
315 HIPQUAD(peer->ibp_ip), peer->ibp_port);
320 if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
322 msg->ibm_dststamp != kibnal_data.kib_incarnation) {
323 CERROR("Unexpected dst NID/stamp %s/"LPX64" from "
324 "%s at %u.%u.%u.%u/%d\n",
325 libcfs_nid2str(msg->ibm_dstnid), msg->ibm_dststamp,
326 libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip),
332 if (!lnet_ptlcompat_matchnid(peer->ibp_nid, msg->ibm_srcnid)) {
333 CERROR("Unexpected src NID %s from %s at %u.%u.%u.%u/%d\n",
334 libcfs_nid2str(msg->ibm_srcnid),
335 libcfs_nid2str(peer->ibp_nid),
336 HIPQUAD(peer->ibp_ip), peer->ibp_port);
341 conn->ibc_incarnation = msg->ibm_srcstamp;
342 conn->ibc_connreq->cr_svcrsp = msg->ibm_u.svcrsp;
343 conn->ibc_version = version;
346 libcfs_sock_release(sock);
351 kibnal_handle_svcqry (struct socket *sock)
354 unsigned int peer_port;
362 rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
364 CERROR("Can't get peer's IP: %d\n", rc);
368 LIBCFS_ALLOC(msg, sizeof(*msg));
370 CERROR("Can't allocate msgs for %u.%u.%u.%u/%d\n",
371 HIPQUAD(peer_ip), peer_port);
375 rc = libcfs_sock_read(sock, &msg->ibm_magic, sizeof(msg->ibm_magic),
376 lnet_acceptor_timeout());
378 CERROR("Error %d receiving svcqry(1) from %u.%u.%u.%u/%d\n",
379 rc, HIPQUAD(peer_ip), peer_port);
383 if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
384 msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
385 /* Unexpected magic! */
386 if (the_lnet.ln_ptlcompat == 0) {
387 if (msg->ibm_magic == LNET_PROTO_MAGIC ||
388 msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) {
389 /* future protocol version compatibility!
390 * When LNET unifies protocols over all LNDs,
391 * the first thing sent will be a version
392 * query. I send back a reply in my current
393 * protocol to tell her I'm "old" */
394 kibnal_init_msg(msg, 0, 0);
395 kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0,
401 CERROR ("Bad magic(1) %#08x (%#08x expected) from "
402 "%u.%u.%u.%u/%d\n", msg->ibm_magic,
403 IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port);
407 /* When portals compatibility is set, I may be passed a new
408 * connection "blindly" by the acceptor, and I have to
409 * determine if my peer has sent an acceptor connection request
411 rc = lnet_accept(kibnal_data.kib_ni, sock, msg->ibm_magic);
415 /* It was an acceptor connection request!
416 * Now I should see my magic... */
417 rc = libcfs_sock_read(sock, &msg->ibm_magic,
418 sizeof(msg->ibm_magic),
419 lnet_acceptor_timeout());
421 CERROR("Error %d receiving svcqry(2) from %u.%u.%u.%u/%d\n",
422 rc, HIPQUAD(peer_ip), peer_port);
426 if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
427 msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
428 CERROR ("Bad magic(2) %#08x (%#08x expected) from "
429 "%u.%u.%u.%u/%d\n", msg->ibm_magic,
430 IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port);
435 /* Now check version */
437 rc = libcfs_sock_read(sock, &msg->ibm_version, sizeof(msg->ibm_version),
438 lnet_acceptor_timeout());
440 CERROR("Error %d receiving svcqry(3) from %u.%u.%u.%u/%d\n",
441 rc, HIPQUAD(peer_ip), peer_port);
445 version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ?
446 msg->ibm_version : __swab32(msg->ibm_version);
447 /* Peer is a different protocol version: reply in my current protocol
448 * to tell her I'm "old" */
449 if (version != IBNAL_MSG_VERSION &&
450 version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
451 kibnal_init_msg(msg, 0, 0);
452 kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0, LNET_NID_ANY, 0);
457 /* Now read in all the rest */
458 rc = libcfs_sock_read(sock, &msg->ibm_type,
459 offsetof(kib_msg_t, ibm_u) -
460 offsetof(kib_msg_t, ibm_type),
461 lnet_acceptor_timeout());
463 CERROR("Error %d receiving svcqry(4) from %u.%u.%u.%u/%d\n",
464 rc, HIPQUAD(peer_ip), peer_port);
468 rc = kibnal_unpack_msg(msg, version, offsetof(kib_msg_t, ibm_u));
470 CERROR("Error %d unpacking svcqry from %u.%u.%u.%u/%d\n",
471 rc, HIPQUAD(peer_ip), peer_port);
475 if (msg->ibm_type != IBNAL_MSG_SVCQRY) {
476 CERROR("Unexpected message %d from %u.%u.%u.%u/%d\n",
477 msg->ibm_type, HIPQUAD(peer_ip), peer_port);
481 if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid,
483 CERROR("Unexpected dstnid %s: expected %s from %u.%u.%u.%u/%d\n",
484 libcfs_nid2str(msg->ibm_dstnid),
485 libcfs_nid2str(kibnal_data.kib_ni->ni_nid),
486 HIPQUAD(peer_ip), peer_port);
490 srcnid = msg->ibm_srcnid;
491 srcstamp = msg->ibm_srcstamp;
493 kibnal_init_msg(msg, IBNAL_MSG_SVCRSP, sizeof(msg->ibm_u.svcrsp));
495 msg->ibm_u.svcrsp.ibsr_svc_id = kibnal_data.kib_svc_id;
496 memcpy(msg->ibm_u.svcrsp.ibsr_svc_gid, kibnal_data.kib_svc_gid,
497 sizeof(kibnal_data.kib_svc_gid));
498 msg->ibm_u.svcrsp.ibsr_svc_pkey = kibnal_data.kib_svc_pkey;
500 kibnal_pack_msg(msg, version, 0, srcnid, srcstamp);
503 rc = libcfs_sock_write (sock, msg, msg->ibm_nob,
504 lnet_acceptor_timeout());
505 if (!reject && rc != 0) {
506 /* Only complain if we're not rejecting */
507 CERROR("Error %d replying to svcqry from %u.%u.%u.%u/%d\n",
508 rc, HIPQUAD(peer_ip), peer_port);
513 LIBCFS_FREE(msg, sizeof(*msg));
517 kibnal_free_acceptsock (kib_acceptsock_t *as)
519 libcfs_sock_release(as->ibas_sock);
520 LIBCFS_FREE(as, sizeof(*as));
524 kibnal_accept(lnet_ni_t *ni, struct socket *sock)
526 kib_acceptsock_t *as;
529 LIBCFS_ALLOC(as, sizeof(*as));
531 CERROR("Out of Memory\n");
535 as->ibas_sock = sock;
537 spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
539 list_add_tail(&as->ibas_list, &kibnal_data.kib_connd_acceptq);
540 wake_up(&kibnal_data.kib_connd_waitq);
542 spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
547 kibnal_start_ib_listener (void)
551 LASSERT (kibnal_data.kib_listen_handle == NULL);
553 kibnal_data.kib_svc_id = ib_cm_service_assign();
554 CDEBUG(D_NET, "svc id "LPX64"\n", kibnal_data.kib_svc_id);
556 rc = ib_cached_gid_get(kibnal_data.kib_device,
557 kibnal_data.kib_port, 0,
558 kibnal_data.kib_svc_gid);
560 CERROR("Can't get port %d GID: %d\n",
561 kibnal_data.kib_port, rc);
565 rc = ib_cached_pkey_get(kibnal_data.kib_device,
566 kibnal_data.kib_port, 0,
567 &kibnal_data.kib_svc_pkey);
569 CERROR ("Can't get port %d PKEY: %d\n",
570 kibnal_data.kib_port, rc);
574 rc = ib_cm_listen(kibnal_data.kib_svc_id,
575 TS_IB_CM_SERVICE_EXACT_MASK,
576 kibnal_passive_conn_callback, NULL,
577 &kibnal_data.kib_listen_handle);
579 kibnal_data.kib_listen_handle = NULL;
580 CERROR ("Can't create IB listener: %d\n", rc);
584 LASSERT (kibnal_data.kib_listen_handle != NULL);
589 kibnal_stop_ib_listener (void)
593 LASSERT (kibnal_data.kib_listen_handle != NULL);
595 rc = ib_cm_listen_stop (kibnal_data.kib_listen_handle);
597 CERROR("Error stopping IB listener: %d\n", rc);
599 kibnal_data.kib_listen_handle = NULL;
603 kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
609 LASSERT (nid != LNET_NID_ANY);
611 LIBCFS_ALLOC(peer, sizeof (*peer));
613 CERROR("Cannot allocate peer\n");
617 memset(peer, 0, sizeof(*peer)); /* zero flags etc */
620 atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */
622 INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */
623 INIT_LIST_HEAD (&peer->ibp_conns);
624 INIT_LIST_HEAD (&peer->ibp_tx_queue);
625 INIT_LIST_HEAD (&peer->ibp_connd_list); /* not queued for connecting */
628 peer->ibp_last_alive = cfs_time_current();
629 peer->ibp_reconnect_interval = 0; /* OK to connect at any time */
631 write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
633 if (atomic_read(&kibnal_data.kib_npeers) >=
634 *kibnal_tunables.kib_concurrent_peers) {
635 rc = -EOVERFLOW; /* !! but at least it distinguishes */
636 } else if (kibnal_data.kib_nonewpeers) {
637 rc = -ESHUTDOWN; /* shutdown has started */
640 /* npeers only grows with kib_global_lock held */
641 atomic_inc(&kibnal_data.kib_npeers);
644 write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
647 CERROR("Can't create peer: %s\n",
648 (rc == -ESHUTDOWN) ? "shutting down" :
650 LIBCFS_FREE(peer, sizeof(*peer));
659 kibnal_destroy_peer (kib_peer_t *peer)
661 CDEBUG (D_NET, "peer %s %p deleted\n",
662 libcfs_nid2str(peer->ibp_nid), peer);
664 LASSERT (atomic_read (&peer->ibp_refcount) == 0);
665 LASSERT (peer->ibp_persistence == 0);
666 LASSERT (!kibnal_peer_active(peer));
667 LASSERT (peer->ibp_connecting == 0);
668 LASSERT (peer->ibp_accepting == 0);
669 LASSERT (list_empty (&peer->ibp_connd_list));
670 LASSERT (list_empty (&peer->ibp_conns));
671 LASSERT (list_empty (&peer->ibp_tx_queue));
673 LIBCFS_FREE (peer, sizeof (*peer));
675 /* NB a peer's connections keep a reference on their peer until
676 * they are destroyed, so we can be assured that _all_ state to do
677 * with this peer has been cleaned up when its refcount drops to
679 atomic_dec(&kibnal_data.kib_npeers);
683 kibnal_find_peer_locked (lnet_nid_t nid)
685 struct list_head *peer_list = kibnal_nid2peerlist (nid);
686 struct list_head *tmp;
689 list_for_each (tmp, peer_list) {
691 peer = list_entry (tmp, kib_peer_t, ibp_list);
693 LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
694 peer->ibp_connecting != 0 || /* creating conns */
695 peer->ibp_accepting != 0 ||
696 !list_empty (&peer->ibp_conns)); /* active conn */
698 if (peer->ibp_nid != nid)
707 kibnal_get_peer (lnet_nid_t nid)
712 read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
713 peer = kibnal_find_peer_locked (nid);
714 if (peer != NULL) /* +1 ref for caller? */
715 kibnal_peer_addref(peer);
716 read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
722 kibnal_unlink_peer_locked (kib_peer_t *peer)
724 LASSERT (peer->ibp_persistence == 0);
725 LASSERT (list_empty(&peer->ibp_conns));
727 LASSERT (kibnal_peer_active(peer));
728 list_del_init (&peer->ibp_list);
729 /* lose peerlist's ref */
730 kibnal_peer_decref(peer);
734 kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp,
738 struct list_head *ptmp;
742 read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
744 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
746 list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
748 peer = list_entry (ptmp, kib_peer_t, ibp_list);
749 LASSERT (peer->ibp_persistence != 0 ||
750 peer->ibp_connecting != 0 ||
751 peer->ibp_accepting != 0 ||
752 !list_empty (&peer->ibp_conns));
757 *nidp = peer->ibp_nid;
759 *portp = peer->ibp_port;
760 *persistencep = peer->ibp_persistence;
762 read_unlock_irqrestore(&kibnal_data.kib_global_lock,
768 read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
773 kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port)
780 if (nid == LNET_NID_ANY)
783 rc = kibnal_create_peer (&peer, nid);
787 write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
789 /* I'm always called with a reference on kibnal_data.kib_ni
790 * so shutdown can't have started */
791 LASSERT (kibnal_data.kib_nonewpeers == 0);
793 peer2 = kibnal_find_peer_locked (nid);
795 kibnal_peer_decref(peer);
798 /* peer table takes existing ref on peer */
799 list_add_tail (&peer->ibp_list,
800 kibnal_nid2peerlist (nid));
804 peer->ibp_port = port;
805 peer->ibp_persistence++;
807 write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
812 kibnal_del_peer_locked (kib_peer_t *peer)
814 struct list_head *ctmp;
815 struct list_head *cnxt;
818 peer->ibp_persistence = 0;
820 if (list_empty(&peer->ibp_conns)) {
821 kibnal_unlink_peer_locked(peer);
823 list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
824 conn = list_entry(ctmp, kib_conn_t, ibc_list);
826 kibnal_close_conn_locked (conn, 0);
828 /* NB peer is no longer persistent; closing its last conn
831 /* NB peer now unlinked; might even be freed if the peer table had the
836 kibnal_del_peer (lnet_nid_t nid)
839 CFS_LIST_HEAD (zombies);
840 struct list_head *ptmp;
841 struct list_head *pnxt;
848 write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
850 if (nid != LNET_NID_ANY)
851 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
854 hi = kibnal_data.kib_peer_hash_size - 1;
857 for (i = lo; i <= hi; i++) {
858 list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
859 peer = list_entry (ptmp, kib_peer_t, ibp_list);
860 LASSERT (peer->ibp_persistence != 0 ||
861 peer->ibp_connecting != 0 ||
862 peer->ibp_accepting != 0 ||
863 !list_empty (&peer->ibp_conns));
865 if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
868 if (!list_empty(&peer->ibp_tx_queue)) {
869 LASSERT (list_empty(&peer->ibp_conns));
871 list_splice_init(&peer->ibp_tx_queue, &zombies);
874 kibnal_del_peer_locked (peer);
875 rc = 0; /* matched something */
879 write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
881 kibnal_txlist_done(&zombies, -EIO);
887 kibnal_get_conn_by_idx (int index)
890 struct list_head *ptmp;
892 struct list_head *ctmp;
896 read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
898 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
899 list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
901 peer = list_entry (ptmp, kib_peer_t, ibp_list);
902 LASSERT (peer->ibp_persistence > 0 ||
903 peer->ibp_connecting != 0 ||
904 peer->ibp_accepting != 0 ||
905 !list_empty (&peer->ibp_conns));
907 list_for_each (ctmp, &peer->ibp_conns) {
911 conn = list_entry (ctmp, kib_conn_t, ibc_list);
912 kibnal_conn_addref(conn);
913 read_unlock_irqrestore(&kibnal_data.kib_global_lock,
920 read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
925 kibnal_create_conn (void)
935 struct ib_qp_create_param qp_create;
936 struct ib_qp_attribute qp_attr;
939 LIBCFS_ALLOC (conn, sizeof (*conn));
941 CERROR ("Can't allocate connection\n");
945 /* zero flags, NULL pointers etc... */
946 memset (conn, 0, sizeof (*conn));
948 INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
949 INIT_LIST_HEAD (&conn->ibc_tx_queue);
950 INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
951 INIT_LIST_HEAD (&conn->ibc_active_txs);
952 spin_lock_init (&conn->ibc_lock);
954 atomic_inc (&kibnal_data.kib_nconns);
955 /* well not really, but I call destroy() on failure, which decrements */
957 LIBCFS_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
958 if (conn->ibc_rxs == NULL)
960 memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
962 rc = kibnal_alloc_pages(&conn->ibc_rx_pages,
964 IB_ACCESS_LOCAL_WRITE);
968 vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
970 for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
971 struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
972 kib_rx_t *rx = &conn->ibc_rxs[i];
975 rx->rx_vaddr = vaddr;
976 rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
978 vaddr += IBNAL_MSG_SIZE;
979 LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
981 page_offset += IBNAL_MSG_SIZE;
982 LASSERT (page_offset <= PAGE_SIZE);
984 if (page_offset == PAGE_SIZE) {
987 LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
991 /* We can post up to IBLND_MSG_QUEUE_SIZE immediate/req messages and
992 * the same # of ack/nak/rdma+done messages */
994 params.qp_create = (struct ib_qp_create_param) {
996 .max_outstanding_send_request = 3 * IBNAL_MSG_QUEUE_SIZE,
997 .max_outstanding_receive_request = IBNAL_RX_MSGS,
998 .max_send_gather_element = 1,
999 .max_receive_scatter_element = 1,
1001 .pd = kibnal_data.kib_pd,
1002 .send_queue = kibnal_data.kib_cq,
1003 .receive_queue = kibnal_data.kib_cq,
1004 .send_policy = IB_WQ_SIGNAL_SELECTABLE,
1005 .receive_policy = IB_WQ_SIGNAL_SELECTABLE,
1007 .transport = IB_TRANSPORT_RC,
1008 .device_specific = NULL,
1011 rc = ib_qp_create (¶ms.qp_create, &conn->ibc_qp, &conn->ibc_qpn);
1013 CERROR ("Failed to create queue pair: %d\n", rc);
1017 /* Mark QP created */
1018 conn->ibc_state = IBNAL_CONN_INIT_QP;
1020 params.qp_attr = (struct ib_qp_attribute) {
1021 .state = IB_QP_STATE_INIT,
1022 .port = kibnal_data.kib_port,
1023 .enable_rdma_read = 1,
1024 .enable_rdma_write = 1,
1025 .valid_fields = (IB_QP_ATTRIBUTE_STATE |
1026 IB_QP_ATTRIBUTE_PORT |
1027 IB_QP_ATTRIBUTE_PKEY_INDEX |
1028 IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE),
1030 rc = ib_qp_modify(conn->ibc_qp, ¶ms.qp_attr);
1032 CERROR ("Failed to modify queue pair: %d\n", rc);
1036 /* 1 ref for caller */
1037 atomic_set (&conn->ibc_refcount, 1);
1041 kibnal_destroy_conn (conn);
1046 kibnal_destroy_conn (kib_conn_t *conn)
1050 CDEBUG (D_NET, "connection %p\n", conn);
1052 LASSERT (atomic_read (&conn->ibc_refcount) == 0);
1053 LASSERT (list_empty(&conn->ibc_tx_queue));
1054 LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
1055 LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
1056 LASSERT (list_empty(&conn->ibc_active_txs));
1057 LASSERT (conn->ibc_nsends_posted == 0);
1058 LASSERT (conn->ibc_connreq == NULL);
1060 switch (conn->ibc_state) {
1061 case IBNAL_CONN_ZOMBIE:
1062 /* called after connection sequence initiated */
1064 case IBNAL_CONN_INIT_QP:
1065 rc = ib_qp_destroy(conn->ibc_qp);
1067 CERROR("Can't destroy QP: %d\n", rc);
1070 case IBNAL_CONN_INIT_NOTHING:
1077 if (conn->ibc_rx_pages != NULL)
1078 kibnal_free_pages(conn->ibc_rx_pages);
1080 if (conn->ibc_rxs != NULL)
1081 LIBCFS_FREE(conn->ibc_rxs,
1082 IBNAL_RX_MSGS * sizeof(kib_rx_t));
1084 if (conn->ibc_peer != NULL)
1085 kibnal_peer_decref(conn->ibc_peer);
1087 LIBCFS_FREE(conn, sizeof (*conn));
1089 atomic_dec(&kibnal_data.kib_nconns);
1091 if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
1092 kibnal_data.kib_shutdown) {
1093 /* I just nuked the last connection on shutdown; wake up
1094 * everyone so they can exit. */
1095 wake_up_all(&kibnal_data.kib_sched_waitq);
1096 wake_up_all(&kibnal_data.kib_reaper_waitq);
1101 kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
1104 struct list_head *ctmp;
1105 struct list_head *cnxt;
1108 list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
1109 conn = list_entry (ctmp, kib_conn_t, ibc_list);
1112 kibnal_close_conn_locked (conn, why);
1119 kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
1122 struct list_head *ctmp;
1123 struct list_head *cnxt;
1126 list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
1127 conn = list_entry (ctmp, kib_conn_t, ibc_list);
1129 if (conn->ibc_incarnation == incarnation)
1132 CDEBUG(D_NET, "Closing stale conn %p nid: %s"
1133 " incarnation:"LPX64"("LPX64")\n", conn,
1134 libcfs_nid2str(peer->ibp_nid),
1135 conn->ibc_incarnation, incarnation);
1138 kibnal_close_conn_locked (conn, -ESTALE);
1145 kibnal_close_matching_conns (lnet_nid_t nid)
1147 unsigned long flags;
1149 struct list_head *ptmp;
1150 struct list_head *pnxt;
1156 write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
1158 if (nid != LNET_NID_ANY)
1159 lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
1162 hi = kibnal_data.kib_peer_hash_size - 1;
1165 for (i = lo; i <= hi; i++) {
1166 list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
1168 peer = list_entry (ptmp, kib_peer_t, ibp_list);
1169 LASSERT (peer->ibp_persistence != 0 ||
1170 peer->ibp_connecting != 0 ||
1171 peer->ibp_accepting != 0 ||
1172 !list_empty (&peer->ibp_conns));
1174 if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
1177 count += kibnal_close_peer_conns_locked (peer, 0);
1181 write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
1183 /* wildcards always succeed */
1184 if (nid == LNET_NID_ANY)
1187 return (count == 0 ? -ENOENT : 0);
1191 kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
1193 struct libcfs_ioctl_data *data = arg;
1196 LASSERT (ni == kibnal_data.kib_ni);
1199 case IOC_LIBCFS_GET_PEER: {
1203 int share_count = 0;
1205 rc = kibnal_get_peer_info(data->ioc_count,
1206 &nid, &ip, &port, &share_count);
1207 data->ioc_nid = nid;
1208 data->ioc_count = share_count;
1209 data->ioc_u32[0] = ip;
1210 data->ioc_u32[1] = port;
1213 case IOC_LIBCFS_ADD_PEER: {
1214 rc = kibnal_add_persistent_peer (data->ioc_nid,
1215 data->ioc_u32[0], /* IP */
1216 data->ioc_u32[1]); /* port */
1219 case IOC_LIBCFS_DEL_PEER: {
1220 rc = kibnal_del_peer (data->ioc_nid);
1223 case IOC_LIBCFS_GET_CONN: {
1224 kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
1230 data->ioc_nid = conn->ibc_peer->ibp_nid;
1231 kibnal_conn_decref(conn);
1235 case IOC_LIBCFS_CLOSE_CONNECTION: {
1236 rc = kibnal_close_matching_conns (data->ioc_nid);
1239 case IOC_LIBCFS_REGISTER_MYNID: {
1240 /* Ignore if this is a noop */
1241 if (data->ioc_nid == ni->ni_nid) {
1244 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
1245 libcfs_nid2str(data->ioc_nid),
1246 libcfs_nid2str(ni->ni_nid));
1257 kibnal_free_pages (kib_pages_t *p)
1259 int npages = p->ibp_npages;
1263 if (p->ibp_mapped) {
1264 rc = ib_memory_deregister(p->ibp_handle);
1266 CERROR ("Deregister error: %d\n", rc);
1269 for (i = 0; i < npages; i++)
1270 if (p->ibp_pages[i] != NULL)
1271 __free_page(p->ibp_pages[i]);
1273 LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
1277 kibnal_alloc_pages (kib_pages_t **pp, int npages, int access)
1280 struct ib_physical_buffer *phys_pages;
1284 LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
1286 CERROR ("Can't allocate buffer %d\n", npages);
1290 memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
1291 p->ibp_npages = npages;
1293 for (i = 0; i < npages; i++) {
1294 p->ibp_pages[i] = alloc_page (GFP_KERNEL);
1295 if (p->ibp_pages[i] == NULL) {
1296 CERROR ("Can't allocate page %d of %d\n", i, npages);
1297 kibnal_free_pages(p);
1302 LIBCFS_ALLOC(phys_pages, npages * sizeof(*phys_pages));
1303 if (phys_pages == NULL) {
1304 CERROR ("Can't allocate physarray for %d pages\n", npages);
1305 kibnal_free_pages(p);
1309 for (i = 0; i < npages; i++) {
1310 phys_pages[i].size = PAGE_SIZE;
1311 phys_pages[i].address =
1312 lnet_page2phys(p->ibp_pages[i]);
1316 rc = ib_memory_register_physical(kibnal_data.kib_pd,
1319 npages * PAGE_SIZE, 0,
1325 LIBCFS_FREE(phys_pages, npages * sizeof(*phys_pages));
1328 CERROR ("Error %d mapping %d pages\n", rc, npages);
1329 kibnal_free_pages(p);
1339 kibnal_setup_tx_descs (void)
1342 int page_offset = 0;
1350 /* pre-mapped messages are not bigger than 1 page */
1351 LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
1353 /* No fancy arithmetic when we do the buffer calculations */
1354 LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
1356 rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
1357 IBNAL_TX_MSG_PAGES(),
1358 0); /* local read access only */
1362 vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
1364 for (i = 0; i < IBNAL_TX_MSGS(); i++) {
1365 page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
1366 tx = &kibnal_data.kib_tx_descs[i];
1368 memset (tx, 0, sizeof(*tx)); /* zero flags etc */
1370 tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
1371 tx->tx_vaddr = vaddr;
1372 tx->tx_mapped = KIB_TX_UNMAPPED;
1374 CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n",
1375 i, tx, tx->tx_msg, tx->tx_vaddr);
1377 list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
1379 vaddr += IBNAL_MSG_SIZE;
1380 LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES());
1382 page_offset += IBNAL_MSG_SIZE;
1383 LASSERT (page_offset <= PAGE_SIZE);
1385 if (page_offset == PAGE_SIZE) {
1388 LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
1396 kibnal_shutdown (lnet_ni_t *ni)
1400 unsigned long flags;
1402 CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
1403 atomic_read (&libcfs_kmemory));
1405 LASSERT(ni == kibnal_data.kib_ni);
1406 LASSERT(ni->ni_data == &kibnal_data);
1408 switch (kibnal_data.kib_init) {
1410 CERROR ("Unexpected state %d\n", kibnal_data.kib_init);
1413 case IBNAL_INIT_ALL:
1414 /* Prevent new peers from being created */
1415 write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
1416 kibnal_data.kib_nonewpeers = 1;
1417 write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
1419 kibnal_stop_ib_listener();
1421 /* Remove all existing peers from the peer table */
1422 kibnal_del_peer(LNET_NID_ANY);
1424 /* Wait for pending conn reqs to be handled */
1426 spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
1427 while (!list_empty(&kibnal_data.kib_connd_acceptq)) {
1428 spin_unlock_irqrestore(&kibnal_data.kib_connd_lock,
1431 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
1432 "waiting for conn reqs to clean up\n");
1433 cfs_pause(cfs_time_seconds(1));
1435 spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
1437 spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
1439 /* Wait for all peer state to clean up */
1441 while (atomic_read(&kibnal_data.kib_npeers) != 0) {
1443 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
1444 "waiting for %d peers to close down\n",
1445 atomic_read(&kibnal_data.kib_npeers));
1446 cfs_pause(cfs_time_seconds(1));
1451 rc = ib_cq_destroy (kibnal_data.kib_cq);
1453 CERROR ("Destroy CQ error: %d\n", rc);
1456 case IBNAL_INIT_TXD:
1457 kibnal_free_pages (kibnal_data.kib_tx_pages);
1460 case IBNAL_INIT_FMR:
1461 rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
1463 CERROR ("Destroy FMR pool error: %d\n", rc);
1467 rc = ib_pd_destroy(kibnal_data.kib_pd);
1469 CERROR ("Destroy PD error: %d\n", rc);
1472 case IBNAL_INIT_DATA:
1473 /* Module refcount only gets to zero when all peers
1474 * have been closed so all lists must be empty */
1475 LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0);
1476 LASSERT (kibnal_data.kib_peers != NULL);
1477 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
1478 LASSERT (list_empty (&kibnal_data.kib_peers[i]));
1480 LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
1481 LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
1482 LASSERT (list_empty (&kibnal_data.kib_sched_txq));
1483 LASSERT (list_empty (&kibnal_data.kib_reaper_conns));
1484 LASSERT (list_empty (&kibnal_data.kib_connd_peers));
1485 LASSERT (list_empty (&kibnal_data.kib_connd_acceptq));
1487 /* flag threads to terminate; wake and wait for them to die */
1488 kibnal_data.kib_shutdown = 1;
1489 wake_up_all (&kibnal_data.kib_sched_waitq);
1490 wake_up_all (&kibnal_data.kib_reaper_waitq);
1491 wake_up_all (&kibnal_data.kib_connd_waitq);
1494 while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
1496 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
1497 "Waiting for %d threads to terminate\n",
1498 atomic_read (&kibnal_data.kib_nthreads));
1499 cfs_pause(cfs_time_seconds(1));
1503 case IBNAL_INIT_NOTHING:
1507 if (kibnal_data.kib_tx_descs != NULL)
1508 LIBCFS_FREE (kibnal_data.kib_tx_descs,
1509 IBNAL_TX_MSGS() * sizeof(kib_tx_t));
1511 if (kibnal_data.kib_peers != NULL)
1512 LIBCFS_FREE (kibnal_data.kib_peers,
1513 sizeof (struct list_head) *
1514 kibnal_data.kib_peer_hash_size);
1516 CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
1517 atomic_read (&libcfs_kmemory));
1519 kibnal_data.kib_init = IBNAL_INIT_NOTHING;
1520 PORTAL_MODULE_UNUSE;
1524 kibnal_get_ipoibidx(void)
1526 /* NB single threaded! */
1527 static struct ib_port_properties port_props;
1533 struct ib_device *device;
1535 for (devidx = 0; devidx <= kibnal_data.kib_hca_idx; devidx++) {
1536 device = ib_device_get_by_index(devidx);
1538 if (device == NULL) {
1539 CERROR("Can't get IB device %d\n", devidx);
1543 for (port = 1; port <= 2; port++) {
1544 if (devidx == kibnal_data.kib_hca_idx &&
1545 port == kibnal_data.kib_port)
1548 rc = ib_port_properties_get(device, port,
1560 kibnal_startup (lnet_ni_t *ni)
1573 LASSERT (ni->ni_lnd == &the_kiblnd);
1575 /* Only 1 instance supported */
1576 if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
1577 CERROR ("Only 1 instance supported\n");
1581 if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
1582 CERROR ("Can't set credits(%d) > ntx(%d)\n",
1583 *kibnal_tunables.kib_credits,
1584 *kibnal_tunables.kib_ntx);
1588 memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
1590 ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
1591 ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
1593 CLASSERT (LNET_MAX_INTERFACES > 1);
1596 kibnal_data.kib_hca_idx = 0; /* default: first HCA */
1597 kibnal_data.kib_port = 0; /* any port */
1599 if (ni->ni_interfaces[0] != NULL) {
1600 /* hca.port specified in 'networks=openib(h.p)' */
1601 if (ni->ni_interfaces[1] != NULL) {
1602 CERROR("Multiple interfaces not supported\n");
1606 nob = strlen(ni->ni_interfaces[0]);
1607 i = sscanf(ni->ni_interfaces[0], "%d.%d%n", &hca, &port, &nob);
1608 if (i >= 2 && nob == strlen(ni->ni_interfaces[0])) {
1609 kibnal_data.kib_hca_idx = hca;
1610 kibnal_data.kib_port = port;
1612 nob = strlen(ni->ni_interfaces[0]);
1613 i = sscanf(ni->ni_interfaces[0], "%d%n", &hca, &nob);
1615 if (i >= 1 && nob == strlen(ni->ni_interfaces[0])) {
1616 kibnal_data.kib_hca_idx = hca;
1618 CERROR("Can't parse interface '%s'\n",
1619 ni->ni_interfaces[0]);
1625 kibnal_data.kib_ni = ni;
1626 ni->ni_data = &kibnal_data;
1628 do_gettimeofday(&tv);
1629 kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
1633 rwlock_init(&kibnal_data.kib_global_lock);
1635 kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
1636 LIBCFS_ALLOC (kibnal_data.kib_peers,
1637 sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
1638 if (kibnal_data.kib_peers == NULL) {
1641 for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
1642 INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
1644 spin_lock_init (&kibnal_data.kib_reaper_lock);
1645 INIT_LIST_HEAD (&kibnal_data.kib_reaper_conns);
1646 init_waitqueue_head (&kibnal_data.kib_reaper_waitq);
1648 spin_lock_init (&kibnal_data.kib_connd_lock);
1649 INIT_LIST_HEAD (&kibnal_data.kib_connd_acceptq);
1650 INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
1651 init_waitqueue_head (&kibnal_data.kib_connd_waitq);
1653 spin_lock_init (&kibnal_data.kib_sched_lock);
1654 INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
1655 INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
1656 init_waitqueue_head (&kibnal_data.kib_sched_waitq);
1658 spin_lock_init (&kibnal_data.kib_tx_lock);
1659 INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
1661 LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
1662 IBNAL_TX_MSGS() * sizeof(kib_tx_t));
1663 if (kibnal_data.kib_tx_descs == NULL) {
1664 CERROR ("Can't allocate tx descs\n");
1668 /* lists/ptrs/locks initialised */
1669 kibnal_data.kib_init = IBNAL_INIT_DATA;
1670 /*****************************************************/
1672 for (i = 0; i < IBNAL_N_SCHED; i++) {
1673 rc = kibnal_thread_start (kibnal_scheduler,
1674 (void *)((unsigned long)i));
1676 CERROR("Can't spawn openibnal scheduler[%d]: %d\n",
1682 /* must have at least 2 connds to remain responsive to svcqry while
1684 if (*kibnal_tunables.kib_n_connd < 2)
1685 *kibnal_tunables.kib_n_connd = 2;
1688 for (i = 0; i < *kibnal_tunables.kib_n_connd; i++) {
1689 rc = kibnal_thread_start (kibnal_connd,
1690 (void *)((unsigned long)i));
1692 CERROR("Can't spawn openibnal connd[%d]: %d\n",
1698 rc = kibnal_thread_start (kibnal_reaper, NULL);
1700 CERROR ("Can't spawn openibnal reaper: %d\n", rc);
1704 kibnal_data.kib_device = ib_device_get_by_index(kibnal_data.kib_hca_idx);
1705 if (kibnal_data.kib_device == NULL) {
1706 CERROR ("Can't open ib device %d\n",
1707 kibnal_data.kib_hca_idx);
1711 rc = ib_device_properties_get(kibnal_data.kib_device,
1712 &kibnal_data.kib_device_props);
1714 CERROR ("Can't get device props: %d\n", rc);
1718 CDEBUG(D_NET, "Max Initiator: %d Max Responder %d\n",
1719 kibnal_data.kib_device_props.max_initiator_per_qp,
1720 kibnal_data.kib_device_props.max_responder_per_qp);
1722 if (kibnal_data.kib_port != 0) {
1723 rc = ib_port_properties_get(kibnal_data.kib_device,
1724 kibnal_data.kib_port,
1725 &kibnal_data.kib_port_props);
1727 CERROR("Error %d open port %d on HCA %d\n", rc,
1728 kibnal_data.kib_port,
1729 kibnal_data.kib_hca_idx);
1733 for (i = 1; i <= 2; i++) {
1734 rc = ib_port_properties_get(kibnal_data.kib_device, i,
1735 &kibnal_data.kib_port_props);
1737 kibnal_data.kib_port = i;
1741 if (kibnal_data.kib_port == 0) {
1742 CERROR ("Can't find a port\n");
1747 i = kibnal_get_ipoibidx();
1751 snprintf(ipif_name, sizeof(ipif_name), "%s%d",
1752 *kibnal_tunables.kib_ipif_basename, i);
1753 if (strlen(ipif_name) == sizeof(ipif_name) - 1) {
1754 CERROR("IPoIB interface name %s truncated\n", ipif_name);
1758 rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
1760 CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
1765 CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
1769 ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
1771 rc = ib_pd_create(kibnal_data.kib_device,
1772 NULL, &kibnal_data.kib_pd);
1774 CERROR ("Can't create PD: %d\n", rc);
1778 /* flag PD initialised */
1779 kibnal_data.kib_init = IBNAL_INIT_PD;
1780 /*****************************************************/
1783 const int pool_size = *kibnal_tunables.kib_ntx;
1784 struct ib_fmr_pool_param params = {
1785 .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
1786 .access = (IB_ACCESS_LOCAL_WRITE |
1787 IB_ACCESS_REMOTE_WRITE |
1788 IB_ACCESS_REMOTE_READ),
1789 .pool_size = pool_size,
1790 .dirty_watermark = (pool_size * 3)/4,
1791 .flush_function = NULL,
1795 rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms,
1796 &kibnal_data.kib_fmr_pool);
1798 CERROR ("Can't create FMR pool size %d: %d\n",
1804 /* flag FMR pool initialised */
1805 kibnal_data.kib_init = IBNAL_INIT_FMR;
1807 /*****************************************************/
1809 rc = kibnal_setup_tx_descs();
1811 CERROR ("Can't register tx descs: %d\n", rc);
1815 /* flag TX descs initialised */
1816 kibnal_data.kib_init = IBNAL_INIT_TXD;
1817 /*****************************************************/
1820 struct ib_cq_callback callback = {
1821 .context = IBNAL_CALLBACK_CTXT,
1822 .policy = IB_CQ_PROVIDER_REARM,
1824 .entry = kibnal_callback,
1828 int nentries = IBNAL_CQ_ENTRIES();
1830 rc = ib_cq_create (kibnal_data.kib_device,
1831 &nentries, &callback, NULL,
1832 &kibnal_data.kib_cq);
1834 CERROR ("Can't create CQ: %d\n", rc);
1838 /* I only want solicited events */
1839 rc = ib_cq_request_notification(kibnal_data.kib_cq, 1);
1843 /* flag CQ initialised */
1844 kibnal_data.kib_init = IBNAL_INIT_CQ;
1845 /*****************************************************/
1847 rc = kibnal_start_ib_listener();
1851 /* flag everything initialised */
1852 kibnal_data.kib_init = IBNAL_INIT_ALL;
1853 /*****************************************************/
1858 kibnal_shutdown(ni);
1863 kibnal_module_fini (void)
1865 lnet_unregister_lnd(&the_kiblnd);
1866 kibnal_tunables_fini();
1870 kibnal_module_init (void)
1874 rc = kibnal_tunables_init();
1878 lnet_register_lnd(&the_kiblnd);
1883 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
1885 MODULE_DESCRIPTION("Kernel Cisco IB LND v1.00");
1887 MODULE_DESCRIPTION("Kernel OpenIB(gen1) LND v1.00");
1889 MODULE_LICENSE("GPL");
1891 module_init(kibnal_module_init);
1892 module_exit(kibnal_module_fini);