Whamcloud - gitweb
b=23575 O2iblnd credit deadlock regression
authorIsaac Huang <he.h.huang@oracle.com>
Mon, 29 Nov 2010 07:08:24 +0000 (00:08 -0700)
committerVitaly Fertman <vitaly.fertman@oracle.com>
Wed, 1 Dec 2010 01:45:38 +0000 (04:45 +0300)
This fixed a regression of bug 14425.

i=liang

lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c

index 216dbff..4959aba 100644 (file)
@@ -623,6 +623,10 @@ kiblnd_debug_conn (kib_conn_t *conn)
         cfs_list_for_each(tmp, &conn->ibc_early_rxs)
                 kiblnd_debug_rx(cfs_list_entry(tmp, kib_rx_t, rx_list));
 
+        CDEBUG(D_CONSOLE, "   tx_noops:\n");
+        cfs_list_for_each(tmp, &conn->ibc_tx_noops)
+                kiblnd_debug_tx(cfs_list_entry(tmp, kib_tx_t, tx_list));
+
         CDEBUG(D_CONSOLE, "   tx_queue_nocred:\n");
         cfs_list_for_each(tmp, &conn->ibc_tx_queue_nocred)
                 kiblnd_debug_tx(cfs_list_entry(tmp, kib_tx_t, tx_list));
@@ -729,6 +733,7 @@ kiblnd_create_conn(kib_peer_t *peer, struct rdma_cm_id *cmid,
         conn->ibc_cmid = cmid;
 
         CFS_INIT_LIST_HEAD(&conn->ibc_early_rxs);
+        CFS_INIT_LIST_HEAD(&conn->ibc_tx_noops);
         CFS_INIT_LIST_HEAD(&conn->ibc_tx_queue);
         CFS_INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd);
         CFS_INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred);
@@ -892,6 +897,7 @@ kiblnd_destroy_conn (kib_conn_t *conn)
         LASSERT (!cfs_in_interrupt());
         LASSERT (cfs_atomic_read(&conn->ibc_refcount) == 0);
         LASSERT (cfs_list_empty(&conn->ibc_early_rxs));
+        LASSERT (cfs_list_empty(&conn->ibc_tx_noops));
         LASSERT (cfs_list_empty(&conn->ibc_tx_queue));
         LASSERT (cfs_list_empty(&conn->ibc_tx_queue_rsrvd));
         LASSERT (cfs_list_empty(&conn->ibc_tx_queue_nocred));
index 491cd70..db98289 100644 (file)
@@ -572,6 +572,7 @@ typedef struct kib_conn
         int                  ibc_ready:1;       /* CQ callback fired */
         unsigned long        ibc_last_send;     /* time of last send */
         cfs_list_t           ibc_early_rxs;     /* rxs completed before ESTABLISHED */
+        cfs_list_t          ibc_tx_noops;       /* IBLND_MSG_NOOPs for IBLND_MSG_VERSION_1 */
         cfs_list_t           ibc_tx_queue;       /* sends that need a credit */
         cfs_list_t           ibc_tx_queue_nocred;/* sends that don't need a credit */
         cfs_list_t           ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */
@@ -727,15 +728,25 @@ kiblnd_send_noop(kib_conn_t *conn)
             !kiblnd_send_keepalive(conn))
                 return 0; /* No need to send NOOP */
 
-        if (!cfs_list_empty(&conn->ibc_tx_queue_nocred))
-                return 0; /* NOOP can be piggybacked */
+        if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
+                if (!cfs_list_empty(&conn->ibc_tx_queue_nocred))
+                        return 0; /* NOOP can be piggybacked */
 
-        if (!IBLND_OOB_CAPABLE(conn->ibc_version))
-                /* can't piggyback? */
-                return cfs_list_empty(&conn->ibc_tx_queue);
+                /* No tx to piggyback NOOP onto or no credit to send a tx */
+                return (cfs_list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 0);
+        }
+
+        if (!cfs_list_empty(&conn->ibc_tx_noops) ||       /* NOOP already queued */
+            !cfs_list_empty(&conn->ibc_tx_queue_nocred) || /* can be piggybacked */
+            conn->ibc_credits == 0)                    /* no credit */
+                return 0;
+
+        if (conn->ibc_credits == 1 &&      /* last credit reserved for */
+            conn->ibc_outstanding_credits == 0) /* giving back credits */
+                return 0;
 
         /* No tx to piggyback NOOP onto or no credit to send a tx */
-        return (cfs_list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 0);
+        return (cfs_list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1);
 }
 
 static inline void
index 577db3e..431aa90 100644 (file)
@@ -328,6 +328,11 @@ kiblnd_handle_rx (kib_rx_t *rx)
 
                 conn->ibc_credits += credits;
 
+                /* This ensures the credit taken by NOOP can be returned */
+                if (msg->ibm_type == IBLND_MSG_NOOP &&
+                    !IBLND_OOB_CAPABLE(conn->ibc_version)) /* v1 only */
+                        conn->ibc_outstanding_credits++;
+
                 cfs_spin_unlock(&conn->ibc_lock);
                 kiblnd_check_sends(conn);
         }
@@ -341,9 +346,14 @@ kiblnd_handle_rx (kib_rx_t *rx)
                 break;
 
         case IBLND_MSG_NOOP:
-                if (IBLND_OOB_CAPABLE(conn->ibc_version))
+                if (IBLND_OOB_CAPABLE(conn->ibc_version)) {
                         post_credit = IBLND_POSTRX_NO_CREDIT;
-                else
+                        break;
+                }
+
+                if (credits != 0) /* credit already posted */
+                        post_credit = IBLND_POSTRX_NO_CREDIT;
+                else              /* a keepalive NOOP */
                         post_credit = IBLND_POSTRX_PEER_CREDIT;
                 break;
 
@@ -791,8 +801,8 @@ kiblnd_post_tx_locked (kib_conn_t *conn, kib_tx_t *tx, int credit)
         }
 
         if (credit != 0 && !IBLND_OOB_CAPABLE(ver) &&
-            conn->ibc_credits == 1 &&   /* last credit reserved for */
-            conn->ibc_outstanding_credits == 0) { /* giving back credits */
+            conn->ibc_credits == 1 &&   /* last credit reserved */
+            msg->ibm_type != IBLND_MSG_NOOP) {      /* for NOOP */
                 CDEBUG(D_NET, "%s: not using last credit\n",
                        libcfs_nid2str(peer->ibp_nid));
                 return -EAGAIN;
@@ -939,6 +949,11 @@ kiblnd_check_sends (kib_conn_t *conn)
                         credit = 0;
                         tx = cfs_list_entry(conn->ibc_tx_queue_nocred.next,
                                             kib_tx_t, tx_list);
+                } else if (!cfs_list_empty(&conn->ibc_tx_noops)) {
+                        LASSERT (!IBLND_OOB_CAPABLE(ver));
+                        credit = 1;
+                        tx = cfs_list_entry(conn->ibc_tx_noops.next,
+                                        kib_tx_t, tx_list);
                 } else if (!cfs_list_empty(&conn->ibc_tx_queue)) {
                         credit = 1;
                         tx = cfs_list_entry(conn->ibc_tx_queue.next,
@@ -1171,7 +1186,7 @@ kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
                 if (IBLND_OOB_CAPABLE(conn->ibc_version))
                         q = &conn->ibc_tx_queue_nocred;
                 else
-                        q = &conn->ibc_tx_queue;
+                        q = &conn->ibc_tx_noops;
                 break;
 
         case IBLND_MSG_IMMEDIATE:
@@ -1788,6 +1803,7 @@ kiblnd_close_conn_locked (kib_conn_t *conn, int error)
                 return; /* already being handled  */
 
         if (error == 0 &&
+            cfs_list_empty(&conn->ibc_tx_noops) &&
             cfs_list_empty(&conn->ibc_tx_queue) &&
             cfs_list_empty(&conn->ibc_tx_queue_rsrvd) &&
             cfs_list_empty(&conn->ibc_tx_queue_nocred) &&
@@ -1795,9 +1811,10 @@ kiblnd_close_conn_locked (kib_conn_t *conn, int error)
                 CDEBUG(D_NET, "closing conn to %s\n", 
                        libcfs_nid2str(peer->ibp_nid));
         } else {
-                CNETERR("Closing conn to %s: error %d%s%s%s%s\n",
+                CNETERR("Closing conn to %s: error %d%s%s%s%s%s\n",
                        libcfs_nid2str(peer->ibp_nid), error,
                        cfs_list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
+                       cfs_list_empty(&conn->ibc_tx_noops) ? "" : "(sending_noops)",
                        cfs_list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
                        cfs_list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
                        cfs_list_empty(&conn->ibc_active_txs) ? "" : "(waiting)");
@@ -1921,6 +1938,7 @@ kiblnd_finalise_conn (kib_conn_t *conn)
         /* Complete all tx descs not waiting for sends to complete.
          * NB we should be safe from RDMA now that the QP has changed state */
 
+        kiblnd_abort_txs(conn, &conn->ibc_tx_noops);
         kiblnd_abort_txs(conn, &conn->ibc_tx_queue);
         kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
         kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred);
@@ -2926,6 +2944,7 @@ int
 kiblnd_conn_timed_out (kib_conn_t *conn)
 {
         return  kiblnd_check_txs(conn, &conn->ibc_tx_queue) ||
+                kiblnd_check_txs(conn, &conn->ibc_tx_noops) ||
                 kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
                 kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
                 kiblnd_check_txs(conn, &conn->ibc_active_txs);