Whamcloud - gitweb
LU-5718 o2iblnd: avoid intensive reconnecting 00/14600/6
authorLiang Zhen <liang.zhen@intel.com>
Tue, 25 Aug 2015 16:25:34 +0000 (12:25 -0400)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 4 Sep 2015 05:15:30 +0000 (05:15 +0000)
When there is connection race between two nodes and one side of
connection is rejected by remote side, o2iblnd will reconnect
immediately, this is going to generate a lot of memory pressure and
even cause OOM if remote side is slow and can't complete connecting
request in short time.

This patch resolves this issue by reconnecting after rejected
connection has been destroyed by connd, so there is no more than
one zombie connection for each peer.

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Change-Id: I78d3b00be70231d576572832b9b0fba2df3d3c12
Reviewed-on: http://review.whamcloud.com/14600
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Doug Oucharek <doug.s.oucharek@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lnet/klnds/o2iblnd/o2iblnd.c
lnet/klnds/o2iblnd/o2iblnd.h
lnet/klnds/o2iblnd/o2iblnd_cb.c

index 3adff1f..2a21e65 100644 (file)
@@ -976,9 +976,22 @@ kiblnd_destroy_conn (kib_conn_t *conn)
        if (conn->ibc_state != IBLND_CONN_INIT) {
                kib_net_t *net = peer->ibp_ni->ni_data;
 
-               kiblnd_peer_decref(peer);
                rdma_destroy_id(cmid);
                atomic_dec(&net->ibn_nconns);
+               if (conn->ibc_conn_race) {
+                       if (peer->ibp_accepting == 0 &&
+                           !list_empty(&peer->ibp_tx_queue)) {
+                               kiblnd_connect_peer(peer);
+                       } else  {
+                               rwlock_t *glock = &kiblnd_data.kib_global_lock;
+                               unsigned long flags;
+
+                               write_lock_irqsave(glock, flags);
+                               peer->ibp_connecting--;
+                               write_unlock_irqrestore(glock, flags);
+                       }
+               }
+               kiblnd_peer_decref(peer);
        }
 
        LIBCFS_FREE(conn, sizeof(*conn));
index c30aa21..4e8b15b 100644 (file)
@@ -686,11 +686,13 @@ typedef struct kib_conn
        /* set on comms error */
        int                     ibc_comms_error;
        /* receive buffers owned */
-       unsigned int            ibc_nrx:16;
+       unsigned short          ibc_nrx;
+       /** rejected by connection race */
+       unsigned short          ibc_conn_race:1;
        /* scheduled for attention */
-       unsigned int            ibc_scheduled:1;
+       unsigned short          ibc_scheduled:1;
        /* CQ callback fired */
-       unsigned int            ibc_ready:1;
+       unsigned short          ibc_ready:1;
        /* time of last send */
        unsigned long           ibc_last_send;
        /** link chain for kiblnd_check_conns only */
@@ -1113,6 +1115,7 @@ int  kiblnd_translate_mtu(int value);
 int  kiblnd_dev_failover(kib_dev_t *dev);
 int  kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid);
 void kiblnd_destroy_peer (kib_peer_t *peer);
+void kiblnd_connect_peer(kib_peer_t *peer);
 void kiblnd_destroy_dev (kib_dev_t *dev);
 void kiblnd_unlink_peer_locked (kib_peer_t *peer);
 kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid);
index ce7c601..0dd9cdf 100644 (file)
@@ -1284,7 +1284,7 @@ static int kiblnd_resolve_addr(struct rdma_cm_id *cmid,
         return rc;
 }
 
-static void
+void
 kiblnd_connect_peer (kib_peer_t *peer)
 {
         struct rdma_cm_id *cmid;
@@ -2473,10 +2473,10 @@ static void
 kiblnd_reconnect (kib_conn_t *conn, int version,
                   __u64 incarnation, int why, kib_connparams_t *cp)
 {
-        kib_peer_t    *peer = conn->ibc_peer;
-        char          *reason;
-        int            retry = 0;
-        unsigned long  flags;
+       kib_peer_t      *peer = conn->ibc_peer;
+       char            *reason;
+       int              retry_now = 0;
+       unsigned long    flags;
 
         LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT);
         LASSERT (peer->ibp_connecting > 0);     /* 'conn' at least */
@@ -2492,7 +2492,15 @@ kiblnd_reconnect (kib_conn_t *conn, int version,
              peer->ibp_version != version) &&
             peer->ibp_connecting == 1 &&
             peer->ibp_accepting == 0) {
-                retry = 1;
+               if (why == IBLND_REJECT_CONN_RACE) {
+                       /* don't reconnect immediately, intensive reconnecting
+                        * may consume a lot of memory. kiblnd_destroy_conn
+                        * will reconnect after releasing all resources of
+                        * this connection */
+                       conn->ibc_conn_race = 1;
+               } else {
+                       retry_now = 1;
+               }
                 peer->ibp_connecting++;
 
                 peer->ibp_version     = version;
@@ -2501,7 +2509,7 @@ kiblnd_reconnect (kib_conn_t *conn, int version,
 
        write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags);
 
-        if (!retry)
+       if (!retry_now)
                 return;
 
         switch (why) {
@@ -2513,10 +2521,6 @@ kiblnd_reconnect (kib_conn_t *conn, int version,
                 reason = "stale";
                 break;
 
-        case IBLND_REJECT_CONN_RACE:
-                reason = "conn race";
-                break;
-
         case IBLND_REJECT_CONN_UNCOMPAT:
                 reason = "version negotiation";
                 break;