+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/openiblnd/openiblnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "openiblnd.h"
-
-lnd_t the_kiblnd = {
-#ifdef USING_TSAPI
- .lnd_type = CIBLND,
-#else
- .lnd_type = OPENIBLND,
-#endif
- .lnd_startup = kibnal_startup,
- .lnd_shutdown = kibnal_shutdown,
- .lnd_ctl = kibnal_ctl,
- .lnd_send = kibnal_send,
- .lnd_recv = kibnal_recv,
- .lnd_eager_recv = kibnal_eager_recv,
- .lnd_accept = kibnal_accept,
-};
-
-kib_data_t kibnal_data;
-
-__u32
-kibnal_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-void
-kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
-{
- msg->ibm_type = type;
- msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
-}
-
-void
-kibnal_pack_msg(kib_msg_t *msg, int version, int credits,
- lnet_nid_t dstnid, __u64 dststamp)
-{
- /* CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously. */
- msg->ibm_magic = IBNAL_MSG_MAGIC;
- msg->ibm_version = version;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = kibnal_data.kib_ni->ni_nid;
- msg->ibm_srcstamp = kibnal_data.kib_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
-
- if (*kibnal_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
- }
-}
-
-int
-kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob)
-{
- const int hdr_size = offsetof(kib_msg_t, ibm_u);
- __u32 msg_cksum;
- int msg_version;
- int flip;
- int msg_nob;
-
- if (nob < 6) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
- flip = 1;
- } else {
- CERROR("Bad magic: %08x\n", msg->ibm_magic);
- return -EPROTO;
- }
-
- msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
- if ((expected_version == 0) ?
- (msg_version != IBNAL_MSG_VERSION &&
- msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) :
- (msg_version != expected_version)) {
- CERROR("Bad version: %x\n", msg_version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
- if (msg_nob > nob) {
- CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /* checksum must be computed with ibm_cksum zero and BEFORE anything
- * gets flipped */
- msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
- msg->ibm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != kibnal_cksum(msg, msg_nob)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
- msg->ibm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- msg->ibm_version = msg_version;
- LASSERT (sizeof(msg->ibm_type) == 1);
- LASSERT (sizeof(msg->ibm_credits) == 1);
- msg->ibm_nob = msg_nob;
- __swab64s(&msg->ibm_srcnid);
- __swab64s(&msg->ibm_srcstamp);
- __swab64s(&msg->ibm_dstnid);
- __swab64s(&msg->ibm_dststamp);
- }
-
- if (msg->ibm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Unknown message type %x\n", msg->ibm_type);
- return -EPROTO;
-
- case IBNAL_MSG_SVCQRY:
- case IBNAL_MSG_NOOP:
- break;
-
- case IBNAL_MSG_SVCRSP:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.svcrsp)) {
- CERROR("Short SVCRSP: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.svcrsp)));
- return -EPROTO;
- }
- if (flip) {
- __swab64s(&msg->ibm_u.svcrsp.ibsr_svc_id);
- __swab16s(&msg->ibm_u.svcrsp.ibsr_svc_pkey);
- }
- break;
-
- case IBNAL_MSG_CONNREQ:
- case IBNAL_MSG_CONNACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
- CERROR("Short CONNREQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
- return -EPROTO;
- }
- if (flip)
- __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
- CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
- return -EPROTO;
- }
- break;
-
- case IBNAL_MSG_PUT_RDMA:
- case IBNAL_MSG_GET_RDMA:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.rdma)) {
- CERROR("Short RDMA req: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.rdma)));
- return -EPROTO;
- }
- if (flip) {
- __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key);
- __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob);
- __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr);
- }
- break;
-
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
- CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.completion)));
- return -EPROTO;
- }
- if (flip)
- __swab32s(&msg->ibm_u.completion.ibcm_status);
- break;
- }
- return 0;
-}
-
-int
-kibnal_make_svcqry (kib_conn_t *conn)
-{
- kib_peer_t *peer = conn->ibc_peer;
- int version = IBNAL_MSG_VERSION;
- int msg_version;
- kib_msg_t *msg;
- struct socket *sock;
- int rc;
- int nob;
-
- LASSERT (conn->ibc_connreq != NULL);
- msg = &conn->ibc_connreq->cr_msg;
-
- again:
- kibnal_init_msg(msg, IBNAL_MSG_SVCQRY, 0);
- kibnal_pack_msg(msg, version, 0, peer->ibp_nid, 0);
-
- rc = lnet_connect(&sock, peer->ibp_nid,
- 0, peer->ibp_ip, peer->ibp_port);
- if (rc != 0)
- return -ECONNABORTED;
-
- rc = libcfs_sock_write(sock, msg, msg->ibm_nob,
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d sending svcqry to %s at %u.%u.%u.%u/%d\n",
- rc, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- goto out;
- }
-
- /* The first 6 bytes are invariably MAGIC + proto version */
- rc = libcfs_sock_read(sock, msg, 6, *kibnal_tunables.kib_timeout);
- if (rc != 0) {
- CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n",
- rc, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- goto out;
- }
-
- if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
- msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
- CERROR("Bad magic: %08x from %s at %u.%u.%u.%u/%d\n",
- msg->ibm_magic, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- msg_version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ?
- msg->ibm_version : __swab16(msg->ibm_version);
- if (msg_version != version) {
- if (version == IBNAL_MSG_VERSION) {
- /* retry with previous version */
- libcfs_sock_release(sock);
- version = IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD;
- goto again;
- }
-
- CERROR("Bad version %x from %s at %u.%u.%u.%u/%d\n",
- msg_version, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- /* Read in the rest of the message now we know the expected format */
- nob = offsetof(kib_msg_t, ibm_u) + sizeof(kib_svcrsp_t);
- rc = libcfs_sock_read(sock, ((char *)msg) + 6, nob - 6,
- *kibnal_tunables.kib_timeout);
- if (rc != 0) {
- CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n",
- rc, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- goto out;
- }
-
- rc = kibnal_unpack_msg(msg, version, nob);
- if (rc != 0) {
- CERROR("Error %d unpacking svcrsp from %s at %u.%u.%u.%u/%d\n",
- rc, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- goto out;
- }
-
- if (msg->ibm_type != IBNAL_MSG_SVCRSP) {
- CERROR("Unexpected response type %d from %s at %u.%u.%u.%u/%d\n",
- msg->ibm_type, libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- if (kibnal_data.kib_ni->ni_nid != msg->ibm_dstnid ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR("Unexpected dst NID/stamp %s/"LPX64" from "
- "%s at %u.%u.%u.%u/%d\n",
- libcfs_nid2str(msg->ibm_dstnid), msg->ibm_dststamp,
- libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip),
- peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- if (peer->ibp_nid != msg->ibm_srcnid) {
- CERROR("Unexpected src NID %s from %s at %u.%u.%u.%u/%d\n",
- libcfs_nid2str(msg->ibm_srcnid),
- libcfs_nid2str(peer->ibp_nid),
- HIPQUAD(peer->ibp_ip), peer->ibp_port);
- rc = -EPROTO;
- goto out;
- }
-
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_connreq->cr_svcrsp = msg->ibm_u.svcrsp;
- conn->ibc_version = version;
-
- out:
- libcfs_sock_release(sock);
- return rc;
-}
-
-void
-kibnal_handle_svcqry (struct socket *sock)
-{
- __u32 peer_ip;
- unsigned int peer_port;
- kib_msg_t *msg;
- __u64 srcnid;
- __u64 srcstamp;
- int version;
- int reject = 0;
- int rc;
-
- rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
- if (rc != 0) {
- CERROR("Can't get peer's IP: %d\n", rc);
- return;
- }
-
- LIBCFS_ALLOC(msg, sizeof(*msg));
- if (msg == NULL) {
- CERROR("Can't allocate msgs for %u.%u.%u.%u/%d\n",
- HIPQUAD(peer_ip), peer_port);
- return;
- }
-
- rc = libcfs_sock_read(sock, &msg->ibm_magic, sizeof(msg->ibm_magic),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d receiving svcqry(1) from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- if (msg->ibm_magic != IBNAL_MSG_MAGIC &&
- msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
- /* Unexpected magic! */
- if (msg->ibm_magic == LNET_PROTO_MAGIC ||
- msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) {
- /* future protocol version compatibility! When LNET
- * unifies protocols over all LNDs, the first thing
- * sent will be a version query. I send back a reply
- * in my current protocol to tell her I'm "old" */
- kibnal_init_msg(msg, 0, 0);
- kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0,
- LNET_NID_ANY, 0);
- reject = 1;
- goto reply;
- }
-
- CERROR ("Bad magic(1) %#08x (%#08x expected) from "
- "%u.%u.%u.%u/%d\n", msg->ibm_magic,
- IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- /* Now check version */
-
- rc = libcfs_sock_read(sock, &msg->ibm_version, sizeof(msg->ibm_version),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d receiving svcqry(2) from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ?
- msg->ibm_version : __swab16(msg->ibm_version);
- /* Peer is a different protocol version: reply in my current protocol
- * to tell her I'm "old" */
- if (version != IBNAL_MSG_VERSION &&
- version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- kibnal_init_msg(msg, 0, 0);
- kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0, LNET_NID_ANY, 0);
- reject = 1;
- goto reply;
- }
-
- /* Now read in all the rest */
- rc = libcfs_sock_read(sock, &msg->ibm_type,
- offsetof(kib_msg_t, ibm_u) -
- offsetof(kib_msg_t, ibm_type),
- lnet_acceptor_timeout());
- if (rc != 0) {
- CERROR("Error %d receiving svcqry(3) from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- rc = kibnal_unpack_msg(msg, version, offsetof(kib_msg_t, ibm_u));
- if (rc != 0) {
- CERROR("Error %d unpacking svcqry from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- if (msg->ibm_type != IBNAL_MSG_SVCQRY) {
- CERROR("Unexpected message %d from %u.%u.%u.%u/%d\n",
- msg->ibm_type, HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- if (kibnal_data.kib_ni->ni_nid != msg->ibm_dstnid) {
- CERROR("Unexpected dstnid %s: expected %s from %u.%u.%u.%u/%d\n",
- libcfs_nid2str(msg->ibm_dstnid),
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid),
- HIPQUAD(peer_ip), peer_port);
- goto out;
- }
-
- srcnid = msg->ibm_srcnid;
- srcstamp = msg->ibm_srcstamp;
-
- kibnal_init_msg(msg, IBNAL_MSG_SVCRSP, sizeof(msg->ibm_u.svcrsp));
-
- msg->ibm_u.svcrsp.ibsr_svc_id = kibnal_data.kib_svc_id;
- memcpy(msg->ibm_u.svcrsp.ibsr_svc_gid, kibnal_data.kib_svc_gid,
- sizeof(kibnal_data.kib_svc_gid));
- msg->ibm_u.svcrsp.ibsr_svc_pkey = kibnal_data.kib_svc_pkey;
-
- kibnal_pack_msg(msg, version, 0, srcnid, srcstamp);
-
- reply:
- rc = libcfs_sock_write (sock, msg, msg->ibm_nob,
- lnet_acceptor_timeout());
- if (!reject && rc != 0) {
- /* Only complain if we're not rejecting */
- CERROR("Error %d replying to svcqry from %u.%u.%u.%u/%d\n",
- rc, HIPQUAD(peer_ip), peer_port);
- }
-
- out:
- LIBCFS_FREE(msg, sizeof(*msg));
-}
-
-void
-kibnal_free_acceptsock (kib_acceptsock_t *as)
-{
- libcfs_sock_release(as->ibas_sock);
- LIBCFS_FREE(as, sizeof(*as));
-}
-
-int
-kibnal_accept(lnet_ni_t *ni, struct socket *sock)
-{
- kib_acceptsock_t *as;
- unsigned long flags;
-
- LIBCFS_ALLOC(as, sizeof(*as));
- if (as == NULL) {
- CERROR("Out of Memory\n");
- return -ENOMEM;
- }
-
- as->ibas_sock = sock;
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- list_add_tail(&as->ibas_list, &kibnal_data.kib_connd_acceptq);
- wake_up(&kibnal_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
- return 0;
-}
-
-int
-kibnal_start_ib_listener (void)
-{
- int rc;
-
- LASSERT (kibnal_data.kib_listen_handle == NULL);
-
- kibnal_data.kib_svc_id = ib_cm_service_assign();
- CDEBUG(D_NET, "svc id "LPX64"\n", kibnal_data.kib_svc_id);
-
- rc = ib_cached_gid_get(kibnal_data.kib_device,
- kibnal_data.kib_port, 0,
- kibnal_data.kib_svc_gid);
- if (rc != 0) {
- CERROR("Can't get port %d GID: %d\n",
- kibnal_data.kib_port, rc);
- return rc;
- }
-
- rc = ib_cached_pkey_get(kibnal_data.kib_device,
- kibnal_data.kib_port, 0,
- &kibnal_data.kib_svc_pkey);
- if (rc != 0) {
- CERROR ("Can't get port %d PKEY: %d\n",
- kibnal_data.kib_port, rc);
- return rc;
- }
-
- rc = ib_cm_listen(kibnal_data.kib_svc_id,
- TS_IB_CM_SERVICE_EXACT_MASK,
- kibnal_passive_conn_callback, NULL,
- &kibnal_data.kib_listen_handle);
- if (rc != 0) {
- kibnal_data.kib_listen_handle = NULL;
- CERROR ("Can't create IB listener: %d\n", rc);
- return rc;
- }
-
- LASSERT (kibnal_data.kib_listen_handle != NULL);
- return 0;
-}
-
-void
-kibnal_stop_ib_listener (void)
-{
- int rc;
-
- LASSERT (kibnal_data.kib_listen_handle != NULL);
-
- rc = ib_cm_listen_stop (kibnal_data.kib_listen_handle);
- if (rc != 0)
- CERROR("Error stopping IB listener: %d\n", rc);
-
- kibnal_data.kib_listen_handle = NULL;
-}
-
-int
-kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
- int rc;
-
- LASSERT (nid != LNET_NID_ANY);
-
- LIBCFS_ALLOC(peer, sizeof (*peer));
- if (peer == NULL) {
- CERROR("Cannot allocate peer\n");
- return -ENOMEM;
- }
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- peer->ibp_nid = nid;
- atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD (&peer->ibp_conns);
- INIT_LIST_HEAD (&peer->ibp_tx_queue);
- INIT_LIST_HEAD (&peer->ibp_connd_list); /* not queued for connecting */
-
- peer->ibp_error = 0;
- peer->ibp_last_alive = cfs_time_current();
- peer->ibp_reconnect_interval = 0; /* OK to connect at any time */
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (atomic_read(&kibnal_data.kib_npeers) >=
- *kibnal_tunables.kib_concurrent_peers) {
- rc = -EOVERFLOW; /* !! but at least it distinguishes */
- } else if (kibnal_data.kib_nonewpeers) {
- rc = -ESHUTDOWN; /* shutdown has started */
- } else {
- rc = 0;
- /* npeers only grows with kib_global_lock held */
- atomic_inc(&kibnal_data.kib_npeers);
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (rc != 0) {
- CERROR("Can't create peer: %s\n",
- (rc == -ESHUTDOWN) ? "shutting down" :
- "too many peers");
- LIBCFS_FREE(peer, sizeof(*peer));
- } else {
- *peerp = peer;
- }
-
- return rc;
-}
-
-void
-kibnal_destroy_peer (kib_peer_t *peer)
-{
- CDEBUG (D_NET, "peer %s %p deleted\n",
- libcfs_nid2str(peer->ibp_nid), peer);
-
- LASSERT (atomic_read (&peer->ibp_refcount) == 0);
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (!kibnal_peer_active(peer));
- LASSERT (peer->ibp_connecting == 0);
- LASSERT (peer->ibp_accepting == 0);
- LASSERT (list_empty (&peer->ibp_connd_list));
- LASSERT (list_empty (&peer->ibp_conns));
- LASSERT (list_empty (&peer->ibp_tx_queue));
-
- LIBCFS_FREE (peer, sizeof (*peer));
-
- /* NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero. */
- atomic_dec(&kibnal_data.kib_npeers);
-}
-
-kib_peer_t *
-kibnal_find_peer_locked (lnet_nid_t nid)
-{
- struct list_head *peer_list = kibnal_nid2peerlist (nid);
- struct list_head *tmp;
- kib_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry (tmp, kib_peer_t, ibp_list);
-
- LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
- peer->ibp_connecting != 0 || /* creating conns */
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns)); /* active conn */
-
- if (peer->ibp_nid != nid)
- continue;
-
- return (peer);
- }
- return (NULL);
-}
-
-kib_peer_t *
-kibnal_get_peer (lnet_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL) /* +1 ref for caller? */
- kibnal_peer_addref(peer);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- return (peer);
-}
-
-void
-kibnal_unlink_peer_locked (kib_peer_t *peer)
-{
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (list_empty(&peer->ibp_conns));
-
- LASSERT (kibnal_peer_active(peer));
- list_del_init (&peer->ibp_list);
- /* lose peerlist's ref */
- kibnal_peer_decref(peer);
-}
-
-int
-kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp,
- int *persistencep)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- unsigned long flags;
- int i;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->ibp_nid;
- *ipp = peer->ibp_ip;
- *portp = peer->ibp_port;
- *persistencep = peer->ibp_persistence;
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (0);
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (-ENOENT);
-}
-
-int
-kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port)
-{
- unsigned long flags;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- int rc;
-
- if (nid == LNET_NID_ANY)
- return (-EINVAL);
-
- rc = kibnal_create_peer (&peer, nid);
- if (rc != 0)
- return rc;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- /* I'm always called with a reference on kibnal_data.kib_ni
- * so shutdown can't have started */
- LASSERT (kibnal_data.kib_nonewpeers == 0);
-
- peer2 = kibnal_find_peer_locked (nid);
- if (peer2 != NULL) {
- kibnal_peer_decref(peer);
- peer = peer2;
- } else {
- /* peer table takes existing ref on peer */
- list_add_tail (&peer->ibp_list,
- kibnal_nid2peerlist (nid));
- }
-
- peer->ibp_ip = ip;
- peer->ibp_port = port;
- peer->ibp_persistence++;
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
- return (0);
-}
-
-void
-kibnal_del_peer_locked (kib_peer_t *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- kib_conn_t *conn;
-
- peer->ibp_persistence = 0;
-
- if (list_empty(&peer->ibp_conns)) {
- kibnal_unlink_peer_locked(peer);
- } else {
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
- kibnal_close_conn_locked (conn, 0);
- }
- /* NB peer is no longer persistent; closing its last conn
- * unlinked it. */
- }
- /* NB peer now unlinked; might even be freed if the peer table had the
- * last ref on it. */
-}
-
-int
-kibnal_del_peer (lnet_nid_t nid)
-{
- unsigned long flags;
- CFS_LIST_HEAD (zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- kib_peer_t *peer;
- int lo;
- int hi;
- int i;
- int rc = -ENOENT;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
- continue;
-
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT (list_empty(&peer->ibp_conns));
-
- list_splice_init(&peer->ibp_tx_queue, &zombies);
- }
-
- kibnal_del_peer_locked (peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- kibnal_txlist_done(&zombies, -EIO);
-
- return (rc);
-}
-
-kib_conn_t *
-kibnal_get_conn_by_idx (int index)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
- int i;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence > 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- list_for_each (ctmp, &peer->ibp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
- kibnal_conn_addref(conn);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (conn);
- }
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (NULL);
-}
-
-kib_conn_t *
-kibnal_create_conn (void)
-{
- kib_conn_t *conn;
- int i;
- __u64 vaddr = 0;
- __u64 vaddr_base;
- int page_offset;
- int ipage;
- int rc;
- union {
- struct ib_qp_create_param qp_create;
- struct ib_qp_attribute qp_attr;
- } params;
-
- LIBCFS_ALLOC (conn, sizeof (*conn));
- if (conn == NULL) {
- CERROR ("Can't allocate connection\n");
- return (NULL);
- }
-
- /* zero flags, NULL pointers etc... */
- memset (conn, 0, sizeof (*conn));
-
- INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
- INIT_LIST_HEAD (&conn->ibc_tx_queue);
- INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
- INIT_LIST_HEAD (&conn->ibc_active_txs);
- spin_lock_init (&conn->ibc_lock);
-
- atomic_inc (&kibnal_data.kib_nconns);
- /* well not really, but I call destroy() on failure, which decrements */
-
- LIBCFS_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
- if (conn->ibc_rxs == NULL)
- goto failed;
- memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- rc = kibnal_alloc_pages(&conn->ibc_rx_pages,
- IBNAL_RX_MSG_PAGES,
- IB_ACCESS_LOCAL_WRITE);
- if (rc != 0)
- goto failed;
-
- vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
-
- for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
- struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
- kib_rx_t *rx = &conn->ibc_rxs[i];
-
- rx->rx_conn = conn;
- rx->rx_vaddr = vaddr;
- rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
-
- vaddr += IBNAL_MSG_SIZE;
- LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
- }
- }
-
- /* We can post up to IBNAL_RX_MSGS, which may also include an
- * additional RDMA work item */
-
- params.qp_create = (struct ib_qp_create_param) {
- .limit = {
- .max_outstanding_send_request = 2 * IBNAL_RX_MSGS,
- .max_outstanding_receive_request = IBNAL_RX_MSGS,
- .max_send_gather_element = 1,
- .max_receive_scatter_element = 1,
- },
- .pd = kibnal_data.kib_pd,
- .send_queue = kibnal_data.kib_cq,
- .receive_queue = kibnal_data.kib_cq,
- .send_policy = IB_WQ_SIGNAL_SELECTABLE,
- .receive_policy = IB_WQ_SIGNAL_SELECTABLE,
- .rd_domain = 0,
- .transport = IB_TRANSPORT_RC,
- .device_specific = NULL,
- };
-
- rc = ib_qp_create (¶ms.qp_create, &conn->ibc_qp, &conn->ibc_qpn);
- if (rc != 0) {
- CERROR ("Failed to create queue pair: %d\n", rc);
- goto failed;
- }
-
- /* Mark QP created */
- conn->ibc_state = IBNAL_CONN_INIT_QP;
-
- params.qp_attr = (struct ib_qp_attribute) {
- .state = IB_QP_STATE_INIT,
- .port = kibnal_data.kib_port,
- .enable_rdma_read = 1,
- .enable_rdma_write = 1,
- .valid_fields = (IB_QP_ATTRIBUTE_STATE |
- IB_QP_ATTRIBUTE_PORT |
- IB_QP_ATTRIBUTE_PKEY_INDEX |
- IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE),
- };
- rc = ib_qp_modify(conn->ibc_qp, ¶ms.qp_attr);
- if (rc != 0) {
- CERROR ("Failed to modify queue pair: %d\n", rc);
- goto failed;
- }
-
- /* 1 ref for caller */
- atomic_set (&conn->ibc_refcount, 1);
- return (conn);
-
- failed:
- kibnal_destroy_conn (conn);
- return (NULL);
-}
-
-void
-kibnal_destroy_conn (kib_conn_t *conn)
-{
- int rc;
-
- CDEBUG (D_NET, "connection %p\n", conn);
-
- LASSERT (atomic_read (&conn->ibc_refcount) == 0);
- LASSERT (list_empty(&conn->ibc_tx_queue));
- LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
- LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
- LASSERT (list_empty(&conn->ibc_active_txs));
- LASSERT (conn->ibc_nsends_posted == 0);
- LASSERT (conn->ibc_connreq == NULL);
-
- switch (conn->ibc_state) {
- case IBNAL_CONN_ZOMBIE:
- /* called after connection sequence initiated */
-
- case IBNAL_CONN_INIT_QP:
- rc = ib_qp_destroy(conn->ibc_qp);
- if (rc != 0)
- CERROR("Can't destroy QP: %d\n", rc);
- /* fall through */
-
- case IBNAL_CONN_INIT_NOTHING:
- break;
-
- default:
- LASSERT (0);
- }
-
- if (conn->ibc_rx_pages != NULL)
- kibnal_free_pages(conn->ibc_rx_pages);
-
- if (conn->ibc_rxs != NULL)
- LIBCFS_FREE(conn->ibc_rxs,
- IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- if (conn->ibc_peer != NULL)
- kibnal_peer_decref(conn->ibc_peer);
-
- LIBCFS_FREE(conn, sizeof (*conn));
-
- atomic_dec(&kibnal_data.kib_nconns);
-
- if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
- kibnal_data.kib_shutdown) {
- /* I just nuked the last connection on shutdown; wake up
- * everyone so they can exit. */
- wake_up_all(&kibnal_data.kib_sched_waitq);
- wake_up_all(&kibnal_data.kib_reaper_waitq);
- }
-}
-
-int
-kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- count++;
- kibnal_close_conn_locked (conn, why);
- }
-
- return (count);
-}
-
-int
-kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- if (conn->ibc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET, "Closing stale conn %p nid: %s"
- " incarnation:"LPX64"("LPX64")\n", conn,
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_incarnation, incarnation);
-
- count++;
- kibnal_close_conn_locked (conn, -ESTALE);
- }
-
- return (count);
-}
-
-int
-kibnal_close_matching_conns (lnet_nid_t nid)
-{
- unsigned long flags;
- kib_peer_t *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- int count = 0;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
- continue;
-
- count += kibnal_close_peer_conns_locked (peer, 0);
- }
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return (0);
-
- return (count == 0 ? -ENOENT : 0);
-}
-
-int
-kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- LASSERT (ni == kibnal_data.kib_ni);
-
- switch(cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- __u32 ip = 0;
- int port = 0;
- int share_count = 0;
-
- rc = kibnal_get_peer_info(data->ioc_count,
- &nid, &ip, &port, &share_count);
- data->ioc_nid = nid;
- data->ioc_count = share_count;
- data->ioc_u32[0] = ip;
- data->ioc_u32[1] = port;
- break;
- }
- case IOC_LIBCFS_ADD_PEER: {
- rc = kibnal_add_persistent_peer (data->ioc_nid,
- data->ioc_u32[0], /* IP */
- data->ioc_u32[1]); /* port */
- break;
- }
- case IOC_LIBCFS_DEL_PEER: {
- rc = kibnal_del_peer (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
-
- if (conn == NULL)
- rc = -ENOENT;
- else {
- rc = 0;
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- kibnal_conn_decref(conn);
- }
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kibnal_close_matching_conns (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_REGISTER_MYNID: {
- /* Ignore if this is a noop */
- if (data->ioc_nid == ni->ni_nid) {
- rc = 0;
- } else {
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- rc = -EINVAL;
- }
- break;
- }
- }
-
- return rc;
-}
-
-void
-kibnal_free_pages (kib_pages_t *p)
-{
- int npages = p->ibp_npages;
- int rc;
- int i;
-
- if (p->ibp_mapped) {
- rc = ib_memory_deregister(p->ibp_handle);
- if (rc != 0)
- CERROR ("Deregister error: %d\n", rc);
- }
-
- for (i = 0; i < npages; i++)
- if (p->ibp_pages[i] != NULL)
- __free_page(p->ibp_pages[i]);
-
- LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
-}
-
-int
-kibnal_alloc_pages (kib_pages_t **pp, int npages, int access)
-{
- kib_pages_t *p;
- struct ib_physical_buffer *phys_pages;
- int i;
- int rc;
-
- LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
- if (p == NULL) {
- CERROR ("Can't allocate buffer %d\n", npages);
- return (-ENOMEM);
- }
-
- memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
- p->ibp_npages = npages;
-
- for (i = 0; i < npages; i++) {
- p->ibp_pages[i] = alloc_page (GFP_KERNEL);
- if (p->ibp_pages[i] == NULL) {
- CERROR ("Can't allocate page %d of %d\n", i, npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
- }
-
- LIBCFS_ALLOC(phys_pages, npages * sizeof(*phys_pages));
- if (phys_pages == NULL) {
- CERROR ("Can't allocate physarray for %d pages\n", npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
-
- for (i = 0; i < npages; i++) {
- phys_pages[i].size = PAGE_SIZE;
- phys_pages[i].address =
- lnet_page2phys(p->ibp_pages[i]);
- }
-
- p->ibp_vaddr = 0;
- rc = ib_memory_register_physical(kibnal_data.kib_pd,
- phys_pages, npages,
- &p->ibp_vaddr,
- npages * PAGE_SIZE, 0,
- access,
- &p->ibp_handle,
- &p->ibp_lkey,
- &p->ibp_rkey);
-
- LIBCFS_FREE(phys_pages, npages * sizeof(*phys_pages));
-
- if (rc != 0) {
- CERROR ("Error %d mapping %d pages\n", rc, npages);
- kibnal_free_pages(p);
- return (rc);
- }
-
- p->ibp_mapped = 1;
- *pp = p;
- return (0);
-}
-
-int
-kibnal_setup_tx_descs (void)
-{
- int ipage = 0;
- int page_offset = 0;
- __u64 vaddr;
- __u64 vaddr_base;
- struct page *page;
- kib_tx_t *tx;
- int i;
- int rc;
-
- /* pre-mapped messages are not bigger than 1 page */
- LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
-
- /* No fancy arithmetic when we do the buffer calculations */
- LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
-
- rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
- IBNAL_TX_MSG_PAGES(),
- 0); /* local read access only */
- if (rc != 0)
- return (rc);
-
- vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
- tx = &kibnal_data.kib_tx_descs[i];
-
- memset (tx, 0, sizeof(*tx)); /* zero flags etc */
-
- tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
- tx->tx_vaddr = vaddr;
- tx->tx_mapped = KIB_TX_UNMAPPED;
-
- CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n",
- i, tx, tx->tx_msg, tx->tx_vaddr);
-
- list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- vaddr += IBNAL_MSG_SIZE;
- LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES());
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
- }
- }
-
- return (0);
-}
-
-void
-kibnal_shutdown (lnet_ni_t *ni)
-{
- int i;
- int rc;
- unsigned long flags;
-
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- LASSERT(ni == kibnal_data.kib_ni);
- LASSERT(ni->ni_data == &kibnal_data);
-
- switch (kibnal_data.kib_init) {
- default:
- CERROR ("Unexpected state %d\n", kibnal_data.kib_init);
- LBUG();
-
- case IBNAL_INIT_ALL:
- /* Prevent new peers from being created */
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- kibnal_data.kib_nonewpeers = 1;
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_stop_ib_listener();
-
- /* Remove all existing peers from the peer table */
- kibnal_del_peer(LNET_NID_ANY);
-
- /* Wait for pending conn reqs to be handled */
- i = 2;
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
- while (!list_empty(&kibnal_data.kib_connd_acceptq)) {
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock,
- flags);
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */
- "waiting for conn reqs to clean up\n");
- cfs_pause(cfs_time_seconds(1));
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
- }
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
-
- /* Wait for all peer state to clean up */
- i = 2;
- while (atomic_read(&kibnal_data.kib_npeers) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to close down\n",
- atomic_read(&kibnal_data.kib_npeers));
- cfs_pause(cfs_time_seconds(1));
- }
- /* fall through */
-
- case IBNAL_INIT_CQ:
- rc = ib_cq_destroy (kibnal_data.kib_cq);
- if (rc != 0)
- CERROR ("Destroy CQ error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_TXD:
- kibnal_free_pages (kibnal_data.kib_tx_pages);
- /* fall through */
-#if IBNAL_FMR
- case IBNAL_INIT_FMR:
- rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
- if (rc != 0)
- CERROR ("Destroy FMR pool error: %d\n", rc);
- /* fall through */
-#endif
- case IBNAL_INIT_PD:
- rc = ib_pd_destroy(kibnal_data.kib_pd);
- if (rc != 0)
- CERROR ("Destroy PD error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_DATA:
- /* Module refcount only gets to zero when all peers
- * have been closed so all lists must be empty */
- LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0);
- LASSERT (kibnal_data.kib_peers != NULL);
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- LASSERT (list_empty (&kibnal_data.kib_peers[i]));
- }
- LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
- LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
- LASSERT (list_empty (&kibnal_data.kib_sched_txq));
- LASSERT (list_empty (&kibnal_data.kib_reaper_conns));
- LASSERT (list_empty (&kibnal_data.kib_connd_peers));
- LASSERT (list_empty (&kibnal_data.kib_connd_acceptq));
-
- /* flag threads to terminate; wake and wait for them to die */
- kibnal_data.kib_shutdown = 1;
- wake_up_all (&kibnal_data.kib_sched_waitq);
- wake_up_all (&kibnal_data.kib_reaper_waitq);
- wake_up_all (&kibnal_data.kib_connd_waitq);
-
- i = 2;
- while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- atomic_read (&kibnal_data.kib_nthreads));
- cfs_pause(cfs_time_seconds(1));
- }
- /* fall through */
-
- case IBNAL_INIT_NOTHING:
- break;
- }
-
- if (kibnal_data.kib_tx_descs != NULL)
- LIBCFS_FREE (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
-
- if (kibnal_data.kib_peers != NULL)
- LIBCFS_FREE (kibnal_data.kib_peers,
- sizeof (struct list_head) *
- kibnal_data.kib_peer_hash_size);
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- kibnal_data.kib_init = IBNAL_INIT_NOTHING;
- PORTAL_MODULE_UNUSE;
-}
-
-int
-kibnal_get_ipoibidx(void)
-{
- /* NB single threaded! */
- static struct ib_port_properties port_props;
-
- int ipoibidx = 0;
- int devidx;
- int port;
- int rc;
- struct ib_device *device;
-
- for (devidx = 0; devidx <= kibnal_data.kib_hca_idx; devidx++) {
- device = ib_device_get_by_index(devidx);
-
- if (device == NULL) {
- CERROR("Can't get IB device %d\n", devidx);
- return -1;
- }
-
- for (port = 1; port <= 2; port++) {
- if (devidx == kibnal_data.kib_hca_idx &&
- port == kibnal_data.kib_port)
- return ipoibidx;
-
- rc = ib_port_properties_get(device, port,
- &port_props);
- if (rc == 0)
- ipoibidx++;
- }
- }
-
- LBUG();
- return -1;
-}
-
-int
-kibnal_startup (lnet_ni_t *ni)
-{
- char ipif_name[32];
- __u32 ip;
- __u32 netmask;
- int up;
- struct timeval tv;
- int rc;
- int hca;
- int port;
- int i;
- int nob;
-
- LASSERT (ni->ni_lnd == &the_kiblnd);
-
- /* Only 1 instance supported */
- if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
- CERROR ("Only 1 instance supported\n");
- return -EPERM;
- }
-
- if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
- CERROR ("Can't set credits(%d) > ntx(%d)\n",
- *kibnal_tunables.kib_credits,
- *kibnal_tunables.kib_ntx);
- return -EINVAL;
- }
-
- memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
-
- ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
- ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
-
- CLASSERT (LNET_MAX_INTERFACES > 1);
-
-
- kibnal_data.kib_hca_idx = 0; /* default: first HCA */
- kibnal_data.kib_port = 0; /* any port */
-
- if (ni->ni_interfaces[0] != NULL) {
- /* hca.port specified in 'networks=openib(h.p)' */
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("Multiple interfaces not supported\n");
- return -EPERM;
- }
-
- nob = strlen(ni->ni_interfaces[0]);
- i = sscanf(ni->ni_interfaces[0], "%d.%d%n", &hca, &port, &nob);
- if (i >= 2 && nob == strlen(ni->ni_interfaces[0])) {
- kibnal_data.kib_hca_idx = hca;
- kibnal_data.kib_port = port;
- } else {
- nob = strlen(ni->ni_interfaces[0]);
- i = sscanf(ni->ni_interfaces[0], "%d%n", &hca, &nob);
-
- if (i >= 1 && nob == strlen(ni->ni_interfaces[0])) {
- kibnal_data.kib_hca_idx = hca;
- } else {
- CERROR("Can't parse interface '%s'\n",
- ni->ni_interfaces[0]);
- return -EINVAL;
- }
- }
- }
-
- kibnal_data.kib_ni = ni;
- ni->ni_data = &kibnal_data;
-
- do_gettimeofday(&tv);
- kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
- PORTAL_MODULE_USE;
-
- rwlock_init(&kibnal_data.kib_global_lock);
-
- kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
- LIBCFS_ALLOC (kibnal_data.kib_peers,
- sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
- if (kibnal_data.kib_peers == NULL) {
- goto failed;
- }
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
- INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
-
- spin_lock_init (&kibnal_data.kib_reaper_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_reaper_conns);
- init_waitqueue_head (&kibnal_data.kib_reaper_waitq);
-
- spin_lock_init (&kibnal_data.kib_connd_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_acceptq);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
- init_waitqueue_head (&kibnal_data.kib_connd_waitq);
-
- spin_lock_init (&kibnal_data.kib_sched_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
- INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
- init_waitqueue_head (&kibnal_data.kib_sched_waitq);
-
- spin_lock_init (&kibnal_data.kib_tx_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-
- LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
- if (kibnal_data.kib_tx_descs == NULL) {
- CERROR ("Can't allocate tx descs\n");
- goto failed;
- }
-
- /* lists/ptrs/locks initialised */
- kibnal_data.kib_init = IBNAL_INIT_DATA;
- /*****************************************************/
-
- for (i = 0; i < IBNAL_N_SCHED; i++) {
- rc = kibnal_thread_start (kibnal_scheduler,
- (void *)((unsigned long)i));
- if (rc != 0) {
- CERROR("Can't spawn openibnal scheduler[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- /* must have at least 2 connds to remain responsive to svcqry while
- * connecting */
- if (*kibnal_tunables.kib_n_connd < 2)
- *kibnal_tunables.kib_n_connd = 2;
-
-
- for (i = 0; i < *kibnal_tunables.kib_n_connd; i++) {
- rc = kibnal_thread_start (kibnal_connd,
- (void *)((unsigned long)i));
- if (rc != 0) {
- CERROR("Can't spawn openibnal connd[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- rc = kibnal_thread_start (kibnal_reaper, NULL);
- if (rc != 0) {
- CERROR ("Can't spawn openibnal reaper: %d\n", rc);
- goto failed;
- }
-
- kibnal_data.kib_device = ib_device_get_by_index(kibnal_data.kib_hca_idx);
- if (kibnal_data.kib_device == NULL) {
- CERROR ("Can't open ib device %d\n",
- kibnal_data.kib_hca_idx);
- goto failed;
- }
-
- rc = ib_device_properties_get(kibnal_data.kib_device,
- &kibnal_data.kib_device_props);
- if (rc != 0) {
- CERROR ("Can't get device props: %d\n", rc);
- goto failed;
- }
-
- CDEBUG(D_NET, "Max Initiator: %d Max Responder %d\n",
- kibnal_data.kib_device_props.max_initiator_per_qp,
- kibnal_data.kib_device_props.max_responder_per_qp);
-
- if (kibnal_data.kib_port != 0) {
- rc = ib_port_properties_get(kibnal_data.kib_device,
- kibnal_data.kib_port,
- &kibnal_data.kib_port_props);
- if (rc != 0) {
- CERROR("Error %d open port %d on HCA %d\n", rc,
- kibnal_data.kib_port,
- kibnal_data.kib_hca_idx);
- goto failed;
- }
- } else {
- for (i = 1; i <= 2; i++) {
- rc = ib_port_properties_get(kibnal_data.kib_device, i,
- &kibnal_data.kib_port_props);
- if (rc == 0) {
- kibnal_data.kib_port = i;
- break;
- }
- }
- if (kibnal_data.kib_port == 0) {
- CERROR ("Can't find a port\n");
- goto failed;
- }
- }
-
- i = kibnal_get_ipoibidx();
- if (i < 0)
- goto failed;
-
- snprintf(ipif_name, sizeof(ipif_name), "%s%d",
- *kibnal_tunables.kib_ipif_basename, i);
- if (strlen(ipif_name) == sizeof(ipif_name) - 1) {
- CERROR("IPoIB interface name %s truncated\n", ipif_name);
- return -EINVAL;
- }
-
- rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
- if (rc != 0) {
- CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
- goto failed;
- }
-
- if (!up) {
- CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
- goto failed;
- }
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
-
- rc = ib_pd_create(kibnal_data.kib_device,
- NULL, &kibnal_data.kib_pd);
- if (rc != 0) {
- CERROR ("Can't create PD: %d\n", rc);
- goto failed;
- }
-
- /* flag PD initialised */
- kibnal_data.kib_init = IBNAL_INIT_PD;
- /*****************************************************/
-#if IBNAL_FMR
- {
- const int pool_size = *kibnal_tunables.kib_ntx;
- struct ib_fmr_pool_param params = {
- .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE,
- .access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ),
- .pool_size = pool_size,
- .dirty_watermark = (pool_size * 3)/4,
- .flush_function = NULL,
- .flush_arg = NULL,
- .cache = 1,
- };
- rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms,
- &kibnal_data.kib_fmr_pool);
- if (rc != 0) {
- CERROR ("Can't create FMR pool size %d: %d\n",
- pool_size, rc);
- goto failed;
- }
- }
-
- /* flag FMR pool initialised */
- kibnal_data.kib_init = IBNAL_INIT_FMR;
-#endif
- /*****************************************************/
-
- rc = kibnal_setup_tx_descs();
- if (rc != 0) {
- CERROR ("Can't register tx descs: %d\n", rc);
- goto failed;
- }
-
- /* flag TX descs initialised */
- kibnal_data.kib_init = IBNAL_INIT_TXD;
- /*****************************************************/
-
- {
- struct ib_cq_callback callback = {
- .context = IBNAL_CALLBACK_CTXT,
- .policy = IB_CQ_PROVIDER_REARM,
- .function = {
- .entry = kibnal_callback,
- },
- .arg = NULL,
- };
- int nentries = IBNAL_CQ_ENTRIES();
-
- rc = ib_cq_create (kibnal_data.kib_device,
- &nentries, &callback, NULL,
- &kibnal_data.kib_cq);
- if (rc != 0) {
- CERROR ("Can't create CQ: %d\n", rc);
- goto failed;
- }
-
- /* I only want solicited events */
- rc = ib_cq_request_notification(kibnal_data.kib_cq, 1);
- LASSERT (rc == 0);
- }
-
- /* flag CQ initialised */
- kibnal_data.kib_init = IBNAL_INIT_CQ;
- /*****************************************************/
-
- rc = kibnal_start_ib_listener();
- if (rc != 0)
- goto failed;
-
- /* flag everything initialised */
- kibnal_data.kib_init = IBNAL_INIT_ALL;
- /*****************************************************/
-
- return 0;
-
- failed:
- kibnal_shutdown(ni);
- return -ENETDOWN;
-}
-
-void __exit
-kibnal_module_fini (void)
-{
- lnet_unregister_lnd(&the_kiblnd);
- kibnal_tunables_fini();
-}
-
-int __init
-kibnal_module_init (void)
-{
- int rc;
-
- rc = kibnal_tunables_init();
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_kiblnd);
-
- return (0);
-}
-
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-#ifdef USING_TSAPI
-MODULE_DESCRIPTION("Kernel Cisco IB LND v1.00");
-#else
-MODULE_DESCRIPTION("Kernel OpenIB(gen1) LND v1.00");
-#endif
-MODULE_LICENSE("GPL");
-
-module_init(kibnal_module_init);
-module_exit(kibnal_module_fini);