conn->ibc_credits += credits;
+ /* This ensures the credit taken by NOOP can be returned */
+ if (msg->ibm_type == IBLND_MSG_NOOP &&
+ !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
+ conn->ibc_outstanding_credits++;
+
cfs_spin_unlock(&conn->ibc_lock);
kiblnd_check_sends(conn);
}
break;
case IBLND_MSG_NOOP:
- if (IBLND_OOB_CAPABLE(conn->ibc_version))
+ if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
post_credit = IBLND_POSTRX_NO_CREDIT;
- else
+ break;
+ }
+
+ if (credits != 0) /* credit already posted */
+ post_credit = IBLND_POSTRX_NO_CREDIT;
+ else /* a keepalive NOOP */
post_credit = IBLND_POSTRX_PEER_CREDIT;
break;
}
if (credit != 0 && !IBLND_OOB_CAPABLE(ver) &&
- conn->ibc_credits == 1 && /* last credit reserved for */
- conn->ibc_outstanding_credits == 0) { /* giving back credits */
+ conn->ibc_credits == 1 && /* last credit reserved */
+ msg->ibm_type != IBLND_MSG_NOOP) { /* for NOOP */
CDEBUG(D_NET, "%s: not using last credit\n",
libcfs_nid2str(peer->ibp_nid));
return -EAGAIN;
credit = 0;
tx = cfs_list_entry(conn->ibc_tx_queue_nocred.next,
kib_tx_t, tx_list);
+ } else if (!cfs_list_empty(&conn->ibc_tx_noops)) {
+ LASSERT (!IBLND_OOB_CAPABLE(ver));
+ credit = 1;
+ tx = cfs_list_entry(conn->ibc_tx_noops.next,
+ kib_tx_t, tx_list);
} else if (!cfs_list_empty(&conn->ibc_tx_queue)) {
credit = 1;
tx = cfs_list_entry(conn->ibc_tx_queue.next,
if (IBLND_OOB_CAPABLE(conn->ibc_version))
q = &conn->ibc_tx_queue_nocred;
else
- q = &conn->ibc_tx_queue;
+ q = &conn->ibc_tx_noops;
break;
case IBLND_MSG_IMMEDIATE:
kiblnd_check_sends(conn);
}
+static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
+ struct sockaddr_in *srcaddr,
+ struct sockaddr_in *dstaddr,
+ int timeout_ms)
+{
+ unsigned short port;
+ int rc;
+
+#ifdef HAVE_OFED_RDMA_SET_REUSEADDR
+ /* allow the port to be reused */
+ rc = rdma_set_reuseaddr(cmid, 1);
+ if (rc != 0) {
+ CERROR("Unable to set reuse on cmid: %d\n", rc);
+ return rc;
+ }
+#endif
+
+ /* look for a free privileged port */
+ for (port = PROT_SOCK-1; port > 0; port--) {
+ srcaddr->sin_port = htons(port);
+ rc = rdma_resolve_addr(cmid,
+ (struct sockaddr *)srcaddr,
+ (struct sockaddr *)dstaddr,
+ timeout_ms);
+ if (rc == 0) {
+ CDEBUG(D_NET, "bound to port %hu\n", port);
+ return 0;
+ } else if (rc == -EADDRINUSE || rc == -EADDRNOTAVAIL) {
+ CDEBUG(D_NET, "bind to port %hu failed: %d\n",
+ port, rc);
+ } else {
+ return rc;
+ }
+ }
+
+ CERROR("Failed to bind to a free privileged port\n");
+#ifndef HAVE_OFED_RDMA_SET_REUSEADDR
+ CERROR("You may need IB verbs that supports rdma_set_reuseaddr()\n");
+#endif
+ return rc;
+}
+
void
kiblnd_connect_peer (kib_peer_t *peer)
{
kiblnd_peer_addref(peer); /* cmid's ref */
- rc = rdma_resolve_addr(cmid,
- (struct sockaddr *)&srcaddr,
- (struct sockaddr *)&dstaddr,
- *kiblnd_tunables.kib_timeout * 1000);
- if (rc == 0) {
- LASSERT (cmid->device != NULL);
- CDEBUG(D_NET, "%s: connection bound to %s:%u.%u.%u.%u:%s\n",
- libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
- HIPQUAD(dev->ibd_ifip), cmid->device->name);
- return;
+ if (*kiblnd_tunables.kib_use_priv_port) {
+ rc = kiblnd_resolve_addr(cmid, &srcaddr, &dstaddr,
+ *kiblnd_tunables.kib_timeout * 1000);
+ } else {
+ rc = rdma_resolve_addr(cmid,
+ (struct sockaddr *)&srcaddr,
+ (struct sockaddr *)&dstaddr,
+ *kiblnd_tunables.kib_timeout * 1000);
+ }
+ if (rc != 0) {
+ /* Can't initiate address resolution: */
+ CERROR("Can't resolve addr for %s: %d\n",
+ libcfs_nid2str(peer->ibp_nid), rc);
+ goto failed2;
}
- /* Can't initiate address resolution: */
- CERROR("Can't resolve addr for %s: %d\n",
- libcfs_nid2str(peer->ibp_nid), rc);
+ LASSERT (cmid->device != NULL);
+ CDEBUG(D_NET, "%s: connection bound to %s:%u.%u.%u.%u:%s\n",
+ libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
+ HIPQUAD(dev->ibd_ifip), cmid->device->name);
+
+ return;
+ failed2:
kiblnd_peer_decref(peer); /* cmid's ref */
rdma_destroy_id(cmid);
failed:
return; /* already being handled */
if (error == 0 &&
+ cfs_list_empty(&conn->ibc_tx_noops) &&
cfs_list_empty(&conn->ibc_tx_queue) &&
cfs_list_empty(&conn->ibc_tx_queue_rsrvd) &&
cfs_list_empty(&conn->ibc_tx_queue_nocred) &&
CDEBUG(D_NET, "closing conn to %s\n",
libcfs_nid2str(peer->ibp_nid));
} else {
- CNETERR("Closing conn to %s: error %d%s%s%s%s\n",
+ CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
libcfs_nid2str(peer->ibp_nid), error,
cfs_list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
+ cfs_list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
cfs_list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
cfs_list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
cfs_list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
/* Complete all tx descs not waiting for sends to complete.
* NB we should be safe from RDMA now that the QP has changed state */
+ kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
int version = IBLND_MSG_VERSION;
unsigned long flags;
int rc;
-
+ struct sockaddr_in *peer_addr;
LASSERT (!cfs_in_interrupt());
/* cmid inherits 'context' from the corresponding listener id */
rej.ibr_why = IBLND_REJECT_FATAL;
rej.ibr_cp.ibcp_max_msg_size = IBLND_MSG_SIZE;
+ peer_addr = (struct sockaddr_in *)&(cmid->route.addr.dst_addr);
+ if (*kiblnd_tunables.kib_require_priv_port &&
+ ntohs(peer_addr->sin_port) >= PROT_SOCK) {
+ __u32 ip = ntohl(peer_addr->sin_addr.s_addr);
+ CERROR("Peer's port (%u.%u.%u.%u:%hu) is not privileged\n",
+ HIPQUAD(ip), ntohs(peer_addr->sin_port));
+ goto failed;
+ }
+
if (priv_nob < offsetof(kib_msg_t, ibm_type)) {
CERROR("Short connection request\n");
goto failed;
kiblnd_conn_timed_out (kib_conn_t *conn)
{
return kiblnd_check_txs(conn, &conn->ibc_tx_queue) ||
+ kiblnd_check_txs(conn, &conn->ibc_tx_noops) ||
kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
kiblnd_check_txs(conn, &conn->ibc_active_txs);