From: Liang Zhen Date: Wed, 9 Dec 2015 14:27:05 +0000 (+0800) Subject: LU-7210 o2iblnd: take extra refcount in kiblnd_connreq_done X-Git-Tag: 2.7.65~45 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=2f033b088745bb98eeeed33c58518fddd16397db;p=fs%2Flustre-release.git LU-7210 o2iblnd: take extra refcount in kiblnd_connreq_done refcount taken by cmid is not reliable after kiblnd_connreq_done released the glock because this connection is visible to other threads, another thread can find and close this connection right after kiblnd_connreq_done released the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is called, it can release the connection refcount taken by cmid. It means the connection could be destroyed before kiblnd_connreq_done() finish operations on it. Signed-off-by: Liang Zhen Change-Id: Ic49b63551c13abc8c874732de5fd4ea5cef4c6b7 Reviewed-on: http://review.whamcloud.com/17527 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Doug Oucharek Reviewed-by: James Simmons Tested-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c index 8196b1b..606396d 100644 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -934,8 +934,6 @@ kiblnd_check_sends (kib_conn_t *conn) kiblnd_queue_tx_locked(tx, conn); } - kiblnd_conn_addref(conn); /* 1 ref for me.... (see b21911) */ - for (;;) { int credit; @@ -960,8 +958,6 @@ kiblnd_check_sends (kib_conn_t *conn) } spin_unlock(&conn->ibc_lock); - - kiblnd_conn_decref(conn); /* ...until here */ } static void @@ -2120,6 +2116,15 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) return; } + /* refcount taken by cmid is not reliable after I released the glock + * because this connection is visible to other threads now, another + * thread can find and close this connection right after I released + * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is + * called, it can release the connection refcount taken by cmid. + * It means the connection could be destroyed before I finish my + * operations on it. + */ + kiblnd_conn_addref(conn); write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); /* Schedule blocked txs */ @@ -2136,6 +2141,8 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) /* schedule blocked rxs */ kiblnd_handle_early_rxs(conn); + + kiblnd_conn_decref(conn); } static void