Whamcloud - gitweb
LU-16145 lnet: Honor peer timeout of zero 89/48489/4
authorChris Horn <chris.horn@hpe.com>
Fri, 2 Sep 2022 16:47:02 +0000 (11:47 -0500)
committerOleg Drokin <green@whamcloud.com>
Sat, 17 Sep 2022 06:24:18 +0000 (06:24 +0000)
Zero is a valid value for the peer_timeout parameter (it is supposed
to disable the LNet Peer Health feature used on routers), but DLC
treats zero as uninitialized and assigns the default peer timeout
instead.

Test-Parameters: trivial testlist=sanity-lnet
HPE-bug-id: LUS-11233
Signed-off-by: Chris Horn <chris.horn@hpe.com>
Change-Id: I66f45ddf282757f46c0169ae0e725e56234d3d89
Reviewed-on: https://review.whamcloud.com/48489
Reviewed-by: Frank Sehr <fsehr@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lnet/lnet/api-ni.c
lnet/utils/lnetconfig/liblnetconfig.c
lnet/utils/lnetctl.c
lustre/tests/sanity-lnet.sh

index d4adae2..0fc1583 100644 (file)
@@ -3555,7 +3555,7 @@ static void
 lnet_set_tune_defaults(struct lnet_ioctl_config_lnd_tunables *tun)
 {
        if (tun) {
-               if (!tun->lt_cmn.lct_peer_timeout)
+               if (tun->lt_cmn.lct_peer_timeout < 0)
                        tun->lt_cmn.lct_peer_timeout = DEFAULT_PEER_TIMEOUT;
                if (!tun->lt_cmn.lct_peer_tx_credits)
                        tun->lt_cmn.lct_peer_tx_credits = DEFAULT_PEER_CREDITS;
index ad11da2..9b44a76 100644 (file)
@@ -4422,6 +4422,8 @@ yaml_extract_cmn_tunables(struct cYAML *tree,
                item = cYAML_get_object_item(tun, "peer_timeout");
                if (item != NULL)
                        tunables->lct_peer_timeout = item->cy_valueint;
+               else
+                       tunables->lct_peer_timeout = -1;
                item = cYAML_get_object_item(tun, "peer_credits");
                if (item != NULL)
                        tunables->lct_peer_tx_credits = item->cy_valueint;
index a0f9de7..94ce53a 100644 (file)
@@ -1178,7 +1178,7 @@ static int jt_add_ni(int argc, char **argv)
                found = true;
        }
 
-       if (pto > 0 || pc > 0 || pbc > 0 || cre > 0 || cpp > -1) {
+       if (pto >= 0 || pc > 0 || pbc > 0 || cre > 0 || cpp > -1) {
                tunables.lt_cmn.lct_peer_timeout = pto;
                tunables.lt_cmn.lct_peer_tx_credits = pc;
                tunables.lt_cmn.lct_peer_rtr_credits = pbc;
index 5ef45ef..95329d6 100755 (executable)
@@ -3032,6 +3032,52 @@ test_230() {
 }
 run_test 230 "Test setting conns-per-peer"
 
+test_231() {
+       reinit_dlc || return $?
+
+       do_lnetctl net add --net tcp --if ${INTERFACES[0]} ||
+               error "Failed to add net"
+
+       $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-expected.yaml
+       sed -i 's/peer_timeout: .*$/peer_timeout: 0/' \
+               $TMP/sanity-lnet-$testnum-expected.yaml
+
+       reinit_dlc || return $?
+
+       do_lnetctl import $TMP/sanity-lnet-$testnum-expected.yaml ||
+               error "Failed to import configuration"
+
+       $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-actual.yaml
+
+       compare_yaml_files || error "Wrong config after import"
+
+       do_lnetctl net del --net tcp --if ${INTERFACES[0]} ||
+               error "Failed to delete net tcp"
+
+       do_lnetctl net add --net tcp --if ${INTERFACES[0]} --peer-timeout=0 ||
+               error "Failed to add net with peer-timeout=0"
+
+       $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-actual.yaml
+
+       compare_yaml_files || error "Wrong config after lnetctl net add"
+
+       reinit_dlc || return $?
+
+       # lnet/include/lnet/lib-lnet.h defines DEFAULT_PEER_TIMEOUT 180
+       sed -i 's/peer_timeout: .*$/peer_timeout: 180/' \
+               $TMP/sanity-lnet-$testnum-expected.yaml
+
+       sed -i '/^.*peer_timeout:.*$/d' $TMP/sanity-lnet-$testnum-actual.yaml
+
+       do_lnetctl import $TMP/sanity-lnet-$testnum-actual.yaml ||
+               error "Failed to import config without peer_timeout"
+
+       $LNETCTL export --backup > $TMP/sanity-lnet-$testnum-actual.yaml
+
+       compare_yaml_files
+}
+run_test 231 "Check DLC handling of peer_timeout parameter"
+
 ### Test that linux route is added for each ni
 test_250() {
        reinit_dlc || return $?