/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2006 Cluster File Systems, Inc.
- * Author: Eric Barton <eric@bartonsoftware.com>
+ * GPL HEADER START
*
- * This file is part of Lustre, http://www.lustre.org.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see [sun.com URL with a
+ * copy of GPLv2].
*
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lnet/klnds/o2iblnd/o2iblnd_cb.c
+ *
+ * Author: Eric Barton <eric@bartonsoftware.com>
*/
#include "o2iblnd.h"
conn->ibc_credits += credits;
+ /* This ensures the credit taken by NOOP can be returned */
+ if (msg->ibm_type == IBLND_MSG_NOOP)
+ conn->ibc_outstanding_credits++;
+
spin_unlock(&conn->ibc_lock);
kiblnd_check_sends(conn);
}
break;
case IBLND_MSG_NOOP:
- post_credit = IBLND_POSTRX_PEER_CREDIT;
+ if (credits != 0) /* credit already posted */
+ post_credit = IBLND_POSTRX_NO_CREDIT;
+ else /* a keepalive NOOP */
+ post_credit = IBLND_POSTRX_PEER_CREDIT;
break;
case IBLND_MSG_IMMEDIATE:
LASSERT (net != NULL);
LASSERT (rx->rx_nob < 0); /* was posted */
rx->rx_nob = 0; /* isn't now */
-
+
if (conn->ibc_state > IBLND_CONN_ESTABLISHED)
goto ignore;
conn->ibc_reserved_credits--;
}
- if (list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- (conn->ibc_outstanding_credits >= IBLND_CREDIT_HIGHWATER ||
- kiblnd_send_keepalive(conn))) {
+ if (kiblnd_send_noop(conn)) {
spin_unlock(&conn->ibc_lock);
tx = kiblnd_get_idle_tx(ni);
}
for (;;) {
- if (!list_empty (&conn->ibc_tx_queue_nocred)) {
- tx = list_entry (conn->ibc_tx_queue_nocred.next,
- kib_tx_t, tx_list);
+ if (!list_empty(&conn->ibc_tx_queue_nocred)) {
+ tx = list_entry(conn->ibc_tx_queue_nocred.next,
+ kib_tx_t, tx_list);
consume_cred = 0;
- } else if (!list_empty (&conn->ibc_tx_queue)) {
- tx = list_entry (conn->ibc_tx_queue.next,
- kib_tx_t, tx_list);
+ } else if (!list_empty(&conn->ibc_tx_noops)) {
+ tx = list_entry(conn->ibc_tx_noops.next,
+ kib_tx_t, tx_list);
+ consume_cred = 1;
+ } else if (!list_empty(&conn->ibc_tx_queue)) {
+ tx = list_entry(conn->ibc_tx_queue.next,
+ kib_tx_t, tx_list);
consume_cred = 1;
} else {
/* nothing to send right now */
if (conn->ibc_credits == 0) { /* no credits */
CDEBUG(D_NET, "%s: no credits\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
+ break; /* NB ibc_tx_queue_nocred checked */
}
- if (conn->ibc_credits == 1 && /* last credit reserved for */
- conn->ibc_outstanding_credits == 0) { /* giving back credits */
+ /* Last credit reserved for NOOP */
+ if (conn->ibc_credits == 1 &&
+ tx->tx_msg->ibm_type != IBLND_MSG_NOOP) {
CDEBUG(D_NET, "%s: not using last credit\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
+ break; /* NB ibc_tx_noops checked */
}
}
- list_del (&tx->tx_list);
+ list_del(&tx->tx_list);
tx->tx_queued = 0;
/* NB don't drop ibc_lock before bumping tx_sending */
if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP &&
- (!list_empty(&conn->ibc_tx_queue) ||
- !list_empty(&conn->ibc_tx_queue_nocred) ||
- (conn->ibc_outstanding_credits < IBLND_CREDIT_HIGHWATER &&
- !kiblnd_send_keepalive(conn)))) {
+ !kiblnd_send_noop(conn)) {
/* redundant NOOP */
spin_unlock(&conn->ibc_lock);
kiblnd_tx_done(ni, tx);
dstfrag++;
dstidx++;
}
-
+
tx->tx_nwrq++;
}
break;
case IBLND_MSG_NOOP:
+ q = &conn->ibc_tx_noops;
+ break;
+
case IBLND_MSG_IMMEDIATE:
q = &conn->ibc_tx_queue;
break;
kiblnd_connect_peer (kib_peer_t *peer)
{
struct rdma_cm_id *cmid;
- struct sockaddr_in sockaddr;
+ kib_net_t *net = peer->ibp_ni->ni_data;
+ struct sockaddr_in srcaddr;
+ struct sockaddr_in dstaddr;
int rc;
+ LASSERT (net != NULL);
LASSERT (peer->ibp_connecting > 0);
cmid = rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP);
goto failed;
}
- memset(&sockaddr, 0, sizeof(sockaddr));
- sockaddr.sin_family = AF_INET;
- sockaddr.sin_port = htons(*kiblnd_tunables.kib_service);
- sockaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
+ memset(&srcaddr, 0, sizeof(srcaddr));
+ srcaddr.sin_family = AF_INET;
+ srcaddr.sin_addr.s_addr = htonl(net->ibn_dev->ibd_ifip);
+
+ memset(&dstaddr, 0, sizeof(dstaddr));
+ dstaddr.sin_family = AF_INET;
+ dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
+ dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
kiblnd_peer_addref(peer); /* cmid's ref */
- rc = rdma_resolve_addr(cmid, NULL, (struct sockaddr *)&sockaddr,
+ rc = rdma_resolve_addr(cmid,
+ (struct sockaddr *)&srcaddr,
+ (struct sockaddr *)&dstaddr,
*kiblnd_tunables.kib_timeout * 1000);
if (rc == 0)
return;
LASSERT (peer->ibp_connecting == 0);
peer->ibp_connecting = 1;
+ /* always called with a ref on ni, which prevents ni being shutdown */
+ LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0);
+
list_add_tail(&tx->tx_list, &peer->ibp_tx_queue);
kiblnd_peer_addref(peer);
time_t last_alive = 0;
int error = 0;
unsigned long flags;
-
+
read_lock_irqsave(&kiblnd_data.kib_global_lock, flags);
if (list_empty(&peer->ibp_conns) &&
peer->ibp_error != 0) {
error = peer->ibp_error;
peer->ibp_error = 0;
-
+
last_alive = cfs_time_current_sec() -
cfs_duration_sec(cfs_time_current() -
peer->ibp_last_alive);
}
-
+
read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
-
+
if (error != 0)
lnet_notify(peer->ibp_ni,
peer->ibp_nid, 0, last_alive);
return; /* already being handled */
if (error == 0 &&
+ list_empty(&conn->ibc_tx_noops) &&
list_empty(&conn->ibc_tx_queue) &&
list_empty(&conn->ibc_tx_queue_rsrvd) &&
list_empty(&conn->ibc_tx_queue_nocred) &&
CDEBUG(D_NET, "closing conn to %s\n",
libcfs_nid2str(peer->ibp_nid));
} else {
- CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s\n",
+ CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s%s\n",
libcfs_nid2str(peer->ibp_nid), error,
list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
+ list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
/* Complete all tx descs not waiting for sends to complete.
* NB we should be safe from RDMA now that the QP has changed state */
+ kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
if (status != 0) {
/* failed to establish connection */
- kiblnd_peer_connect_failed(conn->ibc_peer, active, status);
+ kiblnd_peer_connect_failed(peer, active, status);
kiblnd_finalise_conn(conn);
return;
}
else
peer->ibp_accepting--;
- kiblnd_close_stale_conns_locked(conn->ibc_peer,
- conn->ibc_incarnation);
+ kiblnd_close_stale_conns_locked(peer, conn->ibc_incarnation);
+
+ /* grab pending txs while I have the lock */
+ list_add(&txs, &peer->ibp_tx_queue);
+ list_del_init(&peer->ibp_tx_queue);
if (!kiblnd_peer_active(peer) || /* peer has been deleted */
conn->ibc_comms_error != 0) { /* error has happened already */
+ lnet_ni_t *ni = peer->ibp_ni;
/* start to shut down connection */
kiblnd_close_conn_locked(conn, -ECONNABORTED);
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+ kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
+
return;
}
- /* grab pending txs while I have the lock */
- list_add(&txs, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
/* Schedule blocked txs */
if (reqmsg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) {
CERROR("Can't accept %s: incompatible max_frags %d (%d wanted)\n",
libcfs_nid2str(nid),
- reqmsg->ibm_u.connparams.ibcp_queue_depth,
- IBLND_MSG_QUEUE_SIZE);
+ reqmsg->ibm_u.connparams.ibcp_max_frags,
+ IBLND_MAX_RDMA_FRAGS);
goto failed;
}
LASSERT (peer->ibp_accepting == 0);
peer->ibp_accepting = 1;
+ /* I have a ref on ni that prevents it being shutdown */
+ LASSERT (((kib_net_t *)ni->ni_data)->ibn_shutdown == 0);
+
kiblnd_peer_addref(peer);
list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid));
/* conn now "owns" cmid, so I return success from here on to ensure the
* CM callback doesn't destroy cmid. */
- conn->ibc_incarnation = reqmsg->ibm_srcstamp;
- conn->ibc_credits = IBLND_MSG_QUEUE_SIZE;
+ conn->ibc_incarnation = reqmsg->ibm_srcstamp;
+ conn->ibc_credits = IBLND_MSG_QUEUE_SIZE;
conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE;
LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
<= IBLND_RX_MSGS);
if (msg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) {
CERROR("%s has incompatible max_frags %d (%d wanted)\n",
libcfs_nid2str(peer->ibp_nid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- IBLND_MSG_QUEUE_SIZE);
+ msg->ibm_u.connparams.ibcp_max_frags,
+ IBLND_MAX_RDMA_FRAGS);
rc = -EPROTO;
goto failed;
}
goto failed;
}
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = IBLND_MSG_QUEUE_SIZE;
+ conn->ibc_incarnation = msg->ibm_srcstamp;
+ conn->ibc_credits = IBLND_MSG_QUEUE_SIZE;
conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE;
LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
<= IBLND_RX_MSGS);
kiblnd_conn_decref(conn);
return 0;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- LCONSOLE_ERROR_MSG(0x131,
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ LCONSOLE_ERROR_MSG(0x131,
"Received notification of device removal\n"
"Please shutdown LNET to allow this to proceed\n");
/* Can't remove network from underneath LNET for now, so I have
* to ignore this */
- return 0;
- }
+ return 0;
+ }
}
int
} else {
LASSERT (!tx->tx_queued);
LASSERT (tx->tx_waiting || tx->tx_sending != 0);
- }
+ }
if (time_after_eq (jiffies, tx->tx_deadline)) {
timed_out = 1;
kiblnd_conn_timed_out (kib_conn_t *conn)
{
return kiblnd_check_txs(conn, &conn->ibc_tx_queue) ||
+ kiblnd_check_txs(conn, &conn->ibc_tx_noops) ||
kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
kiblnd_check_txs(conn, &conn->ibc_active_txs);