Whamcloud - gitweb
LU-17021 socklnd: fix late ksnr_max_conns set
authorCyril Bordage <cbordage@whamcloud.com>
Tue, 8 Aug 2023 12:34:17 +0000 (14:34 +0200)
committerAndreas Dilger <adilger@whamcloud.com>
Mon, 14 Aug 2023 21:34:30 +0000 (21:34 +0000)
ksnr_max_conns was set to the correct value after it was used.

Lustre-change: https://review.whamcloud.com/51890
Lustre-commit: TBD (from cc894288304fb8e6caa44543e6b44b8ec18deb9b)

Test-Parameters: trivial
Signed-off-by: Cyril Bordage <cbordage@whamcloud.com>
Change-Id: I9f2454d915ee1ab27db96f5247028db94965a11f
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51891
Tested-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
lnet/klnds/socklnd/socklnd.c
lnet/klnds/socklnd/socklnd.h
lnet/klnds/socklnd/socklnd_cb.c

index b0bf7b1..bfa943e 100644 (file)
@@ -115,6 +115,8 @@ ksocknal_create_conn_cb(__u32 ipaddr, int port)
        conn_cb->ksnr_blko_conn_count = 0;
        conn_cb->ksnr_max_conns = 0;
        conn_cb->ksnr_busy_retry_count = 0;
+       conn_cb->ksnr_t_last_conn = 0;
+       conn_cb->ksnr_max_retries = 0;
 
        return conn_cb;
 }
@@ -161,6 +163,7 @@ ksocknal_create_peer(struct lnet_ni *ni, struct lnet_process_id id)
        peer_ni->ksnp_last_alive = 0;
        peer_ni->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
        peer_ni->ksnp_conn_cb = NULL;
+       peer_ni->ksnp_t_created = ktime_get_seconds();
 
        INIT_LIST_HEAD(&peer_ni->ksnp_conns);
        INIT_LIST_HEAD(&peer_ni->ksnp_tx_queue);
@@ -581,13 +584,13 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
        if (peer_ni->ksnp_conn_cb) {
                ksocknal_conn_cb_decref(conn_cb);
        } else {
-               ksocknal_add_conn_cb_locked(peer_ni, conn_cb);
                /* Remember conns_per_peer setting at the time
                 * of connection initiation. It will define the
                 * max number of conns per type for this conn_cb
                 * while it's in use.
                 */
                conn_cb->ksnr_max_conns = ksocknal_get_conns_per_peer(peer_ni);
+               ksocknal_add_conn_cb_locked(peer_ni, conn_cb);
        }
 
        write_unlock_bh(&ksocknal_data.ksnd_global_lock);
index c7fbfb4..cf209fd 100644 (file)
@@ -391,6 +391,8 @@ struct ksock_conn_cb {
        unsigned int            ksnr_busy_retry_count;/* counts retry attempts
                                                       * due to EALREADY rc
                                                       */
+       time64_t                ksnr_t_last_conn;
+       unsigned int            ksnr_max_retries:4;/* max_retries by type */
 };
 
 #define SOCKNAL_KEEPALIVE_PING          1       /* cookie for keepalive ping */
@@ -414,6 +416,7 @@ struct ksock_peer_ni {
        struct list_head        ksnp_zc_req_list;
        time64_t                ksnp_send_keepalive; /* time to send keepalive */
        struct lnet_ni          *ksnp_ni;       /* which network */
+       time64_t                ksnp_t_created;
 };
 
 struct ksock_connreq {
index 4180f89..c50f25b 100644 (file)
@@ -1946,6 +1946,25 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
                           conn_cb->ksnr_blki_conn_count <= conn_cb->ksnr_blko_conn_count) {
                        type = SOCKLND_CONN_BULK_IN;
                } else {
+                       if ((wanted & BIT(SOCKLND_CONN_BULK_OUT)) == 0) {
+                               CERROR("Time %lld Peer %s (c:a:la:ct)(%d:%d:%lld:%lld)\n",
+                                      ktime_get_seconds(),
+                                      libcfs_nid2str(peer_ni->ksnp_id.nid),
+                                      peer_ni->ksnp_closing,
+                                      peer_ni->ksnp_accepting,
+                                      peer_ni->ksnp_last_alive,
+                                      peer_ni->ksnp_t_created);
+                               CERROR("Conn_cb %p IP %pI4h (sh:cg:cd:de:cc:ct:mr)(%u,%u,%u:%u:%d:%lld:%u)\n",
+                                      conn_cb,
+                                      &conn_cb->ksnr_ipaddr,
+                                      conn_cb->ksnr_scheduled,
+                                      conn_cb->ksnr_connecting,
+                                      conn_cb->ksnr_connected,
+                                      conn_cb->ksnr_deleted,
+                                      conn_cb->ksnr_conn_count,
+                                      conn_cb->ksnr_t_last_conn,
+                                      conn_cb->ksnr_max_retries);
+                       }
                        LASSERT ((wanted & BIT(SOCKLND_CONN_BULK_OUT)) != 0);
                        type = SOCKLND_CONN_BULK_OUT;
                }
@@ -1991,6 +2010,7 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
                if (retry_later)
                        CDEBUG(D_NET, "peer_ni %s: conn race, retry later. rc %d\n",
                               libcfs_nid2str(peer_ni->ksnp_id.nid), rc);
+               conn_cb->ksnr_t_last_conn = ktime_get_seconds();
 
                write_lock_bh(&ksocknal_data.ksnd_global_lock);
        }
@@ -2004,6 +2024,7 @@ ksocknal_connect(struct ksock_conn_cb *conn_cb)
                 * the peer doesn't support as many connections as we want
                 */
                conn_cb->ksnr_connected |= BIT(type);
+               conn_cb->ksnr_max_retries |= BIT(type);
                retry_later = false;
        }