Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_cb.c
index 3f271df..dfe6919 100644 (file)
@@ -376,6 +376,10 @@ kiblnd_handle_rx (kib_rx_t *rx)
 
                 conn->ibc_credits += credits;
 
+                /* This ensures the credit taken by NOOP can be returned */
+                if (msg->ibm_type == IBLND_MSG_NOOP)
+                        conn->ibc_outstanding_credits++;
+
                 spin_unlock(&conn->ibc_lock);
                 kiblnd_check_sends(conn);
         }
@@ -389,7 +393,10 @@ kiblnd_handle_rx (kib_rx_t *rx)
                 break;
 
         case IBLND_MSG_NOOP:
-                post_credit = IBLND_POSTRX_PEER_CREDIT;
+                if (credits != 0) /* credit already posted */
+                        post_credit = IBLND_POSTRX_NO_CREDIT;
+                else              /* a keepalive NOOP */
+                        post_credit = IBLND_POSTRX_PEER_CREDIT;
                 break;
 
         case IBLND_MSG_IMMEDIATE:
@@ -887,10 +894,7 @@ kiblnd_check_sends (kib_conn_t *conn)
                 conn->ibc_reserved_credits--;
         }
 
-        if (list_empty(&conn->ibc_tx_queue) &&
-            list_empty(&conn->ibc_tx_queue_nocred) &&
-            (conn->ibc_outstanding_credits >= IBLND_CREDIT_HIGHWATER ||
-             kiblnd_send_keepalive(conn))) {
+        if (kiblnd_send_noop(conn)) {
                 spin_unlock(&conn->ibc_lock);
 
                 tx = kiblnd_get_idle_tx(ni);
@@ -904,13 +908,17 @@ kiblnd_check_sends (kib_conn_t *conn)
         }
 
         for (;;) {
-                if (!list_empty (&conn->ibc_tx_queue_nocred)) {
-                        tx = list_entry (conn->ibc_tx_queue_nocred.next, 
-                                         kib_tx_t, tx_list);
+                if (!list_empty(&conn->ibc_tx_queue_nocred)) {
+                        tx = list_entry(conn->ibc_tx_queue_nocred.next, 
+                                        kib_tx_t, tx_list);
                         consume_cred = 0;
-                } else if (!list_empty (&conn->ibc_tx_queue)) {
-                        tx = list_entry (conn->ibc_tx_queue.next,
-                                         kib_tx_t, tx_list);
+                } else if (!list_empty(&conn->ibc_tx_noops)) {
+                        tx = list_entry(conn->ibc_tx_noops.next,
+                                        kib_tx_t, tx_list);
+                        consume_cred = 1;
+                } else if (!list_empty(&conn->ibc_tx_queue)) {
+                        tx = list_entry(conn->ibc_tx_queue.next,
+                                        kib_tx_t, tx_list);
                         consume_cred = 1;
                 } else {
                         /* nothing to send right now */
@@ -939,27 +947,25 @@ kiblnd_check_sends (kib_conn_t *conn)
                         if (conn->ibc_credits == 0) {   /* no credits */
                                 CDEBUG(D_NET, "%s: no credits\n",
                                        libcfs_nid2str(conn->ibc_peer->ibp_nid));
-                                break;
+                                break; /* NB ibc_tx_queue_nocred checked */
                         }
 
-                        if (conn->ibc_credits == 1 &&   /* last credit reserved for */
-                            conn->ibc_outstanding_credits == 0) { /* giving back credits */
+                        /* Last credit reserved for NOOP */
+                        if (conn->ibc_credits == 1 &&
+                            tx->tx_msg->ibm_type != IBLND_MSG_NOOP) {
                                 CDEBUG(D_NET, "%s: not using last credit\n",
                                        libcfs_nid2str(conn->ibc_peer->ibp_nid));
-                                break;
+                                break; /* NB ibc_tx_noops checked */
                         }
                 }
 
-                list_del (&tx->tx_list);
+                list_del(&tx->tx_list);
                 tx->tx_queued = 0;
 
                 /* NB don't drop ibc_lock before bumping tx_sending */
 
                 if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP &&
-                    (!list_empty(&conn->ibc_tx_queue) ||
-                     !list_empty(&conn->ibc_tx_queue_nocred) ||
-                     (conn->ibc_outstanding_credits < IBLND_CREDIT_HIGHWATER &&
-                      !kiblnd_send_keepalive(conn)))) {
+                    !kiblnd_send_noop(conn)) {
                         /* redundant NOOP */
                         spin_unlock(&conn->ibc_lock);
                         kiblnd_tx_done(ni, tx);
@@ -1304,6 +1310,9 @@ kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
                 break;
 
         case IBLND_MSG_NOOP:
+                q = &conn->ibc_tx_noops;
+                break;
+
         case IBLND_MSG_IMMEDIATE:
                 q = &conn->ibc_tx_queue;
                 break;
@@ -1326,9 +1335,12 @@ void
 kiblnd_connect_peer (kib_peer_t *peer)
 {
         struct rdma_cm_id *cmid;
-        struct sockaddr_in sockaddr;
+        kib_net_t         *net = peer->ibp_ni->ni_data;
+        struct sockaddr_in srcaddr;
+        struct sockaddr_in dstaddr;
         int                rc;
 
+        LASSERT (net != NULL);
         LASSERT (peer->ibp_connecting > 0);
 
         cmid = rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP);
@@ -1339,14 +1351,20 @@ kiblnd_connect_peer (kib_peer_t *peer)
                 goto failed;
         }
 
-        memset(&sockaddr, 0, sizeof(sockaddr));
-        sockaddr.sin_family = AF_INET;
-        sockaddr.sin_port = htons(*kiblnd_tunables.kib_service);
-        sockaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
+        memset(&srcaddr, 0, sizeof(srcaddr));
+        srcaddr.sin_family = AF_INET;
+        srcaddr.sin_addr.s_addr = htonl(net->ibn_dev->ibd_ifip);
+
+        memset(&dstaddr, 0, sizeof(dstaddr));
+        dstaddr.sin_family = AF_INET;
+        dstaddr.sin_port = htons(*kiblnd_tunables.kib_service);
+        dstaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid));
 
         kiblnd_peer_addref(peer);               /* cmid's ref */
 
-        rc = rdma_resolve_addr(cmid, NULL, (struct sockaddr *)&sockaddr,
+        rc = rdma_resolve_addr(cmid,
+                               (struct sockaddr *)&srcaddr,
+                               (struct sockaddr *)&dstaddr,
                                *kiblnd_tunables.kib_timeout * 1000);
         if (rc == 0)
                 return;
@@ -1897,6 +1915,7 @@ kiblnd_close_conn_locked (kib_conn_t *conn, int error)
                 return; /* already being handled  */
 
         if (error == 0 &&
+            list_empty(&conn->ibc_tx_noops) &&
             list_empty(&conn->ibc_tx_queue) &&
             list_empty(&conn->ibc_tx_queue_rsrvd) &&
             list_empty(&conn->ibc_tx_queue_nocred) &&
@@ -1904,9 +1923,10 @@ kiblnd_close_conn_locked (kib_conn_t *conn, int error)
                 CDEBUG(D_NET, "closing conn to %s\n", 
                        libcfs_nid2str(peer->ibp_nid));
         } else {
-                CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s\n",
+                CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s%s\n",
                        libcfs_nid2str(peer->ibp_nid), error,
                        list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
+                       list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
                        list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
                        list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
                        list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
@@ -2021,6 +2041,7 @@ kiblnd_finalise_conn (kib_conn_t *conn)
         /* Complete all tx descs not waiting for sends to complete.
          * NB we should be safe from RDMA now that the QP has changed state */
 
+        kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
         kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
         kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
         kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
@@ -2108,7 +2129,7 @@ kiblnd_connreq_done(kib_conn_t *conn, int status)
 
         if (status != 0) {
                 /* failed to establish connection */
-                kiblnd_peer_connect_failed(conn->ibc_peer, active, status);
+                kiblnd_peer_connect_failed(peer, active, status);
                 kiblnd_finalise_conn(conn);
                 return;
         }
@@ -2129,22 +2150,25 @@ kiblnd_connreq_done(kib_conn_t *conn, int status)
         else
                 peer->ibp_accepting--;
 
-        kiblnd_close_stale_conns_locked(conn->ibc_peer,
-                                        conn->ibc_incarnation);
+        kiblnd_close_stale_conns_locked(peer, conn->ibc_incarnation);
+
+        /* grab pending txs while I have the lock */
+        list_add(&txs, &peer->ibp_tx_queue);
+        list_del_init(&peer->ibp_tx_queue);
 
         if (!kiblnd_peer_active(peer) ||        /* peer has been deleted */
             conn->ibc_comms_error != 0) {       /* error has happened already */
+                lnet_ni_t *ni = peer->ibp_ni;
 
                 /* start to shut down connection */
                 kiblnd_close_conn_locked(conn, -ECONNABORTED);
                 write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
+
+                kiblnd_txlist_done(ni, &txs, -ECONNABORTED);
+
                 return;
         }
 
-        /* grab pending txs while I have the lock */
-        list_add(&txs, &peer->ibp_tx_queue);
-        list_del_init(&peer->ibp_tx_queue);
-
         write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
         /* Schedule blocked txs */
@@ -2245,8 +2269,8 @@ kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob)
         if (reqmsg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) {
                 CERROR("Can't accept %s: incompatible max_frags %d (%d wanted)\n",
                        libcfs_nid2str(nid),
-                       reqmsg->ibm_u.connparams.ibcp_queue_depth,
-                       IBLND_MSG_QUEUE_SIZE);
+                       reqmsg->ibm_u.connparams.ibcp_max_frags,
+                       IBLND_MAX_RDMA_FRAGS);
                 goto failed;
         }
 
@@ -2325,8 +2349,8 @@ kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob)
         /* conn now "owns" cmid, so I return success from here on to ensure the
          * CM callback doesn't destroy cmid. */
 
-        conn->ibc_incarnation = reqmsg->ibm_srcstamp;
-        conn->ibc_credits = IBLND_MSG_QUEUE_SIZE;
+        conn->ibc_incarnation      = reqmsg->ibm_srcstamp;
+        conn->ibc_credits          = IBLND_MSG_QUEUE_SIZE;
         conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE;
         LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
                  <= IBLND_RX_MSGS);
@@ -2506,8 +2530,8 @@ kiblnd_check_connreply (kib_conn_t *conn, void *priv, int priv_nob)
         if (msg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) {
                 CERROR("%s has incompatible max_frags %d (%d wanted)\n",
                        libcfs_nid2str(peer->ibp_nid),
-                       msg->ibm_u.connparams.ibcp_queue_depth,
-                       IBLND_MSG_QUEUE_SIZE);
+                       msg->ibm_u.connparams.ibcp_max_frags,
+                       IBLND_MAX_RDMA_FRAGS);
                 rc = -EPROTO;
                 goto failed;
         }
@@ -2535,8 +2559,8 @@ kiblnd_check_connreply (kib_conn_t *conn, void *priv, int priv_nob)
                 goto failed;
         }
 
-        conn->ibc_incarnation = msg->ibm_srcstamp;
-        conn->ibc_credits = IBLND_MSG_QUEUE_SIZE;
+        conn->ibc_incarnation      = msg->ibm_srcstamp;
+        conn->ibc_credits          = IBLND_MSG_QUEUE_SIZE;
         conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE;
         LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
                  <= IBLND_RX_MSGS);
@@ -2799,6 +2823,7 @@ int
 kiblnd_conn_timed_out (kib_conn_t *conn)
 {
         return  kiblnd_check_txs(conn, &conn->ibc_tx_queue) ||
+                kiblnd_check_txs(conn, &conn->ibc_tx_noops) ||
                 kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
                 kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
                 kiblnd_check_txs(conn, &conn->ibc_active_txs);