Whamcloud - gitweb
LU-16548 lnet: report actual timeout used by lnd 20/50620/18
authorFrank Sehr <fsehr@whamcloud.com>
Wed, 12 Apr 2023 19:31:33 +0000 (12:31 -0700)
committerOleg Drokin <green@whamcloud.com>
Fri, 9 Jun 2023 05:25:39 +0000 (05:25 +0000)
lnd_timeout value reported by lnetctl may be different
from what is actually used.
There's an lnd_timeout calculated as a function of transaction
timeout and retry_count. This is the value displayed by
"lnetctl global show". However, each LND may define its own
timeout by setting timeout module parameter to a positive value,
which overrides the higher-level lnd_timeout defined by LNet.
lnetctl net show -v will show the timeout value in the
lnd_tunables section.
The timeout for socklnd, o2iblnd and gnilnd is implemented.
A test for sock, ib and gni is included.

Test-Parameters: trivial
Signed-off-by: Frank Sehr <fsehr@whamcloud.com>
Change-Id: I85a107ba6f1259c577f74945b89fd695f191d514
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50620
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/include/uapi/linux/lnet/lnet-dlc.h
lnet/klnds/gnilnd/gnilnd.c
lnet/klnds/gnilnd/gnilnd.h
lnet/klnds/gnilnd/gnilnd_modparams.c
lnet/klnds/o2iblnd/o2iblnd_modparams.c
lnet/klnds/socklnd/socklnd_modparams.c
lnet/utils/lnetconfig/liblnetconfig_lnd.c
lustre/tests/sanity-lnet.sh

index 969f0df..47ac566 100644 (file)
@@ -105,6 +105,7 @@ struct lnet_ioctl_config_o2iblnd_tunables {
        __u32 lnd_fmr_cache;
        __u16 lnd_conns_per_peer;
        __u16 lnd_ntx;
+       __u32 lnd_timeout;
 };
 
 struct lnet_ioctl_config_kfilnd_tunables {
@@ -120,6 +121,12 @@ struct lnet_ioctl_config_socklnd_tunables {
        __u32 lnd_version;
        __u16 lnd_conns_per_peer;
        __u16 lnd_pad;
+       __u32 lnd_timeout;
+};
+
+struct lnet_ioctl_config_gnilnd_tunables {
+       __u32 lnd_version;
+       __u32 lnd_timeout;
 };
 
 struct lnet_lnd_tunables {
@@ -127,6 +134,7 @@ struct lnet_lnd_tunables {
                struct lnet_ioctl_config_o2iblnd_tunables lnd_o2ib;
                struct lnet_ioctl_config_socklnd_tunables lnd_sock;
                struct lnet_ioctl_config_kfilnd_tunables lnd_kfi;
+               struct lnet_ioctl_config_gnilnd_tunables lnd_gni;
        } lnd_tun_u;
 };
 
index b785b13..62d578a 100644 (file)
@@ -2609,12 +2609,8 @@ kgnilnd_startup(struct lnet_ni *ni)
        INIT_LIST_HEAD(&net->gnn_list);
        ni->ni_data = net;
        net->gnn_ni = ni;
-       if (!ni->ni_net->net_tunables_set) {
-               ni->ni_net->net_tunables.lct_max_tx_credits =
-                       *kgnilnd_tunables.kgn_credits;
-               ni->ni_net->net_tunables.lct_peer_tx_credits =
-                       *kgnilnd_tunables.kgn_peer_credits;
-       }
+
+       kgnilnd_tunables_setup(ni);
 
        if (!ni->ni_interface) {
                rc = lnet_ni_add_interface(ni, "ipogif0");
index 7c335e2..bb1ccd7 100644 (file)
@@ -898,6 +898,13 @@ extern void kgnilnd_destroy_conn(kgn_conn_t *conn);
 extern int _kgnilnd_schedule_conn(kgn_conn_t *conn, const char *caller, int line, int refheld, int lock_held);
 extern int _kgnilnd_schedule_delay_conn(kgn_conn_t *conn);
 
+static inline int kgnilnd_timeout(void)
+{
+       return *kgnilnd_tunables.kgn_timeout ?
+              *kgnilnd_tunables.kgn_timeout :
+              lnet_get_lnd_timeout();
+}
+
 /* Macro wrapper for _kgnilnd_schedule_conn. This will store the function
  * and the line of the calling function to allow us to debug problematic
  * schedule calls in the future without the programmer having to mark
index 13db614..46a2b25 100644 (file)
@@ -310,3 +310,22 @@ kgnilnd_tunables_init(void)
 out:
        return rc;
 }
+
+void
+kgninal_tunables_setup(struct lnet_ni *ni)
+{
+       struct lnet_ioctl_config_gnilnd_tunables *tunables;
+
+       if (!ni->ni_net->net_tunables_set) {
+               ni->ni_net->net_tunables.lct_max_tx_credits =
+                       *kgnilnd_tunables.kgn_credits;
+               ni->ni_net->net_tunables.lct_peer_tx_credits =
+                       *kgnilnd_tunables.kgn_peer_credits;
+       }
+
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_gni;
+
+       tunables->lnd_version = CURRENT_LND_VERSION;
+
+       tunables->lnd_timeout = kgnilnd_timeout();
+}
index 9643fcb..cb6b7cc 100644 (file)
@@ -316,6 +316,8 @@ kiblnd_tunables_setup(struct lnet_ni *ni)
                        conns_per_peer : 1;
        }
 
+       tunables->lnd_timeout = kiblnd_timeout();
+
        return 0;
 }
 
index 68e2bea..bd97f1a 100644 (file)
@@ -377,4 +377,6 @@ void ksocknal_tunables_setup(struct lnet_ni *ni)
        if (!tunables->lnd_conns_per_peer)
                tunables->lnd_conns_per_peer =
                        ksocklnd_lookup_conns_per_peer(ni);
+
+       tunables->lnd_timeout = ksocknal_timeout();
 }
index 251e7da..80673d9 100644 (file)
@@ -70,6 +70,10 @@ lustre_o2iblnd_show_tun(struct cYAML *lndparams,
                                lnd_cfg->lnd_conns_per_peer) == NULL)
                return LUSTRE_CFG_RC_OUT_OF_MEM;
 
+       if (cYAML_create_number(lndparams, "timeout",
+                               lnd_cfg->lnd_timeout) == NULL)
+               return LUSTRE_CFG_RC_OUT_OF_MEM;
+
        return LUSTRE_CFG_RC_NO_ERR;
 }
 
@@ -82,6 +86,10 @@ lustre_socklnd_show_tun(struct cYAML *lndparams,
                                lnd_cfg->lnd_conns_per_peer) == NULL)
                return LUSTRE_CFG_RC_OUT_OF_MEM;
 
+       if (cYAML_create_number(lndparams, "timeout",
+                               lnd_cfg->lnd_timeout) == NULL)
+               return LUSTRE_CFG_RC_OUT_OF_MEM;
+
        return LUSTRE_CFG_RC_NO_ERR;
 }
 
@@ -116,6 +124,17 @@ lustre_kfilnd_show_tun(struct cYAML *lndparams,
 }
 #endif
 
+static int
+lustre_gnilnd_show_tun(struct cYAML *lndparams,
+                       struct lnet_ioctl_config_gnilnd_tunables *lnd_cfg)
+{
+       if (cYAML_create_number(lndparams, "timeout",
+                               lnd_cfg->lnd_timeout) == NULL)
+               return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+       return LUSTRE_CFG_RC_NO_ERR;
+}
+
 int
 lustre_net_show_tunables(struct cYAML *tunables,
                         struct lnet_ioctl_config_lnd_cmn_tunables *cmn)
@@ -167,7 +186,9 @@ lustre_ni_show_tunables(struct cYAML *lnd_tunables,
                                            &lnd->lnd_tun_u.lnd_kfi,
                                            backup);
 #endif
-
+       else if (net_type == GNILND)
+               rc = lustre_gnilnd_show_tun(lnd_tunables,
+                                           &lnd->lnd_tun_u.lnd_gni);
        return rc;
 }
 
index d7baa3d..e3659f9 100755 (executable)
@@ -3494,6 +3494,62 @@ test_303() {
 }
 run_test 303 "Check peer NI health after link down"
 
+
+check_parameter() {
+       local para=$1
+       local value=$2
+
+       echo "check parameter ${para} value ${value}"
+
+       return $(( $(do_lnetctl net show -v | \
+                    tee /dev/stderr | \
+                    grep -c "^ \+${para}: ${value}$") != 1 ))
+}
+
+static_config() {
+       local module=$1
+       local setting=$2
+
+       cleanup_lnet || error "Failed to cleanup LNet"
+
+       load_module ../libcfs/libcfs/libcfs ||
+               error "Failed to load module libcfs rc = $?"
+
+       load_module ../lnet/lnet/lnet ||
+               error "Failed to load module lnet rc = $?"
+
+       echo "loading ${module} ${setting} type ${NETTYPE}"
+       load_module "${module}" "${setting}" ||
+               error "Failed to load module ${module} rc = $?"
+
+       do_lnetctl lnet configure --all || error "lnet configure failed rc = $?"
+
+       return 0
+}
+
+test_310() {
+       local value=65
+
+       if [[ ${NETTYPE} == tcp* ]];then
+               static_config "../lnet/klnds/socklnd/ksocklnd" \
+                             "sock_timeout=${value}"
+       elif [[ ${NETTYPE} == o2ib* ]]; then
+               static_config "../lnet/klnds/o2iblnd/ko2iblnd" \
+                             "timeout=${value}"
+       elif [[ ${NETTYPE} == gni* ]]; then
+               static_config "../lnet/klnds/gnilnd/kgnilnd" \
+                             "timeout=${value}"
+       else
+               skip "NETTYPE ${NETTYPE} not supported"
+       fi
+
+       check_parameter "timeout" $value
+
+       return $?
+}
+run_test 310 "Set timeout and verify"
+
+
 check_udsp_prio() {
        local target_net="${1}"
        local target_nid="${2}"