Whamcloud - gitweb
LU-7210 o2iblnd: take extra refcount in kiblnd_connreq_done 27/17527/4
authorLiang Zhen <liang.zhen@intel.com>
Wed, 9 Dec 2015 14:27:05 +0000 (22:27 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 5 Jan 2016 00:49:12 +0000 (00:49 +0000)
refcount taken by cmid is not reliable after kiblnd_connreq_done
released the glock because this connection is visible to other
threads, another thread can find and close this connection right
after kiblnd_connreq_done released the glock, if kiblnd_cm_callback
for RDMA_CM_EVENT_DISCONNECTED is called, it can release the
connection refcount taken by cmid. It means the connection could be
destroyed before kiblnd_connreq_done() finish operations on it.

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Change-Id: Ic49b63551c13abc8c874732de5fd4ea5cef4c6b7
Reviewed-on: http://review.whamcloud.com/17527
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/klnds/o2iblnd/o2iblnd_cb.c

index 8196b1b..606396d 100644 (file)
@@ -934,8 +934,6 @@ kiblnd_check_sends (kib_conn_t *conn)
                         kiblnd_queue_tx_locked(tx, conn);
         }
 
-        kiblnd_conn_addref(conn); /* 1 ref for me.... (see b21911) */
-
         for (;;) {
                 int credit;
 
@@ -960,8 +958,6 @@ kiblnd_check_sends (kib_conn_t *conn)
         }
 
        spin_unlock(&conn->ibc_lock);
-
-       kiblnd_conn_decref(conn); /* ...until here */
 }
 
 static void
@@ -2120,6 +2116,15 @@ kiblnd_connreq_done(kib_conn_t *conn, int status)
                return;
        }
 
+       /* refcount taken by cmid is not reliable after I released the glock
+        * because this connection is visible to other threads now, another
+        * thread can find and close this connection right after I released
+        * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is
+        * called, it can release the connection refcount taken by cmid.
+        * It means the connection could be destroyed before I finish my
+        * operations on it.
+        */
+       kiblnd_conn_addref(conn);
        write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
        /* Schedule blocked txs */
@@ -2136,6 +2141,8 @@ kiblnd_connreq_done(kib_conn_t *conn, int status)
 
        /* schedule blocked rxs */
        kiblnd_handle_early_rxs(conn);
+
+       kiblnd_conn_decref(conn);
 }
 
 static void