Whamcloud - gitweb
LU-16959 lnet: auto-tune ARP-related sysctl setting 10/53310/24
authorFrank Sehr <fsehr@whamcloud.com>
Fri, 1 Dec 2023 23:00:51 +0000 (15:00 -0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 17 Jul 2024 15:20:56 +0000 (15:20 +0000)
Default linux settings for net.ipv4.neigh.default.gc_thresh* may be
too low. The configuration file contains recommended threshold values
for the arp table configuration for larger systems. These values are
not set by default and can be enabled by setting the
enable_sysctl_setup parameter to 1 in the configuration file.
To activate the changes immediately please execute
sysctl -p /etc/lnet-sysctl.conf as root.
New ticket fot documentation
LUDOC-528 - Adding documentation for enable_sysctl_setup

Test-Parameters: trivial testlist=sanity-lnet env=ONLY=260
Signed-off-by: Frank Sehr <fsehr@whamcloud.com>
Change-Id: I34af4b402b59341ee7e9cfb45fef7c67eb5e78e9
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53310
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Serguei Smirnov <ssmirnov@whamcloud.com>
Reviewed-by: Cyril Bordage <cbordage@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
12 files changed:
lnet/lnet/module.c
lnet/utils/lnetconfig/liblnetconfig.c
lnet/utils/lnetconfig/liblnetconfig.h
lnet/utils/lnetctl.c
lustre.spec.in
lustre/conf/99-lustre.rules
lustre/conf/Makefile.am
lustre/conf/lnet-sysctl.conf [new file with mode: 0644]
lustre/scripts/Makefile.am
lustre/scripts/lnet-sysctl-config [new file with mode: 0755]
lustre/tests/sanity-lnet.sh
lustre/tests/test-framework.sh

index 80452b2..946abc6 100644 (file)
@@ -20,6 +20,11 @@ static int config_on_load = 0;
 module_param(config_on_load, int, 0444);
 MODULE_PARM_DESC(config_on_load, "configure network at module load");
 
+/* enable sysctl configuration (especially for large systems) */
+static unsigned int enable_sysctl_setup;
+module_param(enable_sysctl_setup, int, 0644);
+MODULE_PARM_DESC(enable_sysctl_setup, "enable sysctl parameters for large systems");
+
 static DEFINE_MUTEX(lnet_config_mutex);
 
 int lnet_configure(void *arg)
index 09cb45b..47b4f7f 100644 (file)
@@ -4610,6 +4610,35 @@ out:
        return rc;
 }
 
+int lustre_lnet_setup_sysctl(struct cYAML **err_rc)
+{
+       int rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+       char err_str[LNET_MAX_STR_LEN] = "\"success\"";
+       char *env_ptr, *tmp_ptr, *syscmd = "/usr/sbin/lnet-sysctl-config";
+
+       env_ptr = getenv("LNET_SYSCTL_CONFIG");
+       if (env_ptr) {
+               tmp_ptr = strrchr(env_ptr, '/');
+               if (tmp_ptr && !strcmp(tmp_ptr, "/lnet-sysctl-config"))
+                       syscmd = env_ptr;
+       }
+
+       rc = system(syscmd);
+
+       if (rc != 0) {
+               rc = -errno;
+               snprintf(err_str,
+                        sizeof(err_str),
+                        "\"failed to execute lnet-sysctl-config : %s\"",
+                        strerror(errno));
+       }
+
+       cYAML_build_error(rc, -1, MANAGE_CMD, "setup-sysctl", err_str,
+                         err_rc);
+
+       return rc;
+}
+
 static int show_recovery_queue(enum lnet_health_type type, char *name,
                               int seq_no, struct cYAML **show_rc,
                               struct cYAML **err_rc)
index 613e1de..fad575e 100644 (file)
@@ -531,6 +531,15 @@ int lustre_lnet_calc_service_id(__u64 *service_id);
 int lustre_lnet_setup_mrrouting(struct cYAML **err_rc);
 
 /*
+ * lustre_lnet_setup_sysctl
+ *    enable  linux sysctl settings configured in /etc/lnet-sysctl.conf
+ *
+ *   err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ *   caller
+ */
+int lustre_lnet_setup_sysctl(struct cYAML **err_rc);
+
+/*
  * lustre_lnet_config_discovery
  *   Enable or disable peer discovery. Peer discovery is enabled by default.
  *
index bc600df..753ec51 100644 (file)
@@ -87,6 +87,7 @@ static int jt_set_recovery_limit(int argc, char **argv);
 static int jt_udsp(int argc, char **argv);
 static int jt_fault(int argc, char **argv);
 static int jt_setup_mrrouting(int argc, char **argv);
+static int jt_setup_sysctl(int argc, char **argv);
 static int jt_calc_cpt_of_nid(int argc, char **argv);
 static int jt_show_peer_debug_info(int argc, char **argv);
 
@@ -114,6 +115,8 @@ command_t cmd_list[] = {
        {"fault", jt_fault, 0, "udsp {show | help}"},
        {"setup-mrrouting", jt_setup_mrrouting, 0,
         "setup linux routing tables\n"},
+       {"setup-sysctl", jt_setup_sysctl, 0,
+        "setup linux sysctl parameter for large systems\n"},
        {"cpt-of-nid", jt_calc_cpt_of_nid, 0,
         "Calculate the CPTs associated with NIDs\n"
         " usage:\n\tlnetctl cpt-of-nid nid[ nid ...]\n"},
@@ -352,6 +355,21 @@ static int jt_setup_mrrouting(int argc, char **argv)
        return rc;
 }
 
+static int jt_setup_sysctl(int argc, char **argv)
+{
+       int rc;
+       struct cYAML *err_rc = NULL;
+
+       rc = lustre_lnet_setup_sysctl(&err_rc);
+
+       if (rc != LUSTRE_CFG_RC_NO_ERR)
+               cYAML_print_tree2file(stderr, err_rc);
+
+       cYAML_free_tree(err_rc);
+
+       return rc;
+}
+
 static inline void print_help(const command_t cmds[], const char *cmd_type,
                              const char *pc_name)
 {
index 61f1993..44aba16 100644 (file)
@@ -927,6 +927,7 @@ echo '%{_libdir}/lustre/tests/lutf/*' >>lustre-tests.files
 %if %{with zfs}
 %config(noreplace) %{_sysconfdir}/ldev.conf
 %endif
+%config(noreplace) %{_sysconfdir}/lnet-sysctl.conf
 %config(noreplace) %{_sysconfdir}/lnet.conf
 %config(noreplace) %{_sysconfdir}/modprobe.d/ko2iblnd.conf
 %if %{with lustre_utils}
index 22616e9..5cb6a87 100644 (file)
@@ -4,3 +4,5 @@ KERNEL=="obd", MODE="0666"
 SUBSYSTEM=="lustre", ACTION=="change", ENV{PARAM}=="?*", RUN+="/usr/sbin/lctl set_param '$env{PARAM}=$env{SETTING}'"
 # setup linux routes for mr on lustre load
 SUBSYSTEM=="module", ACTION=="add", DEVPATH=="/module/lustre", RUN+="/usr/sbin/lnetctl setup-mrrouting"
+# setup linux sysctl setting on lustre load
+SUBSYSTEM=="module", ACTION=="add", DEVPATH=="/module/lustre", RUN+="/usr/sbin/lnetctl setup-sysctl"
index 3b80868..6cec5b5 100644 (file)
@@ -36,7 +36,7 @@ DIST_SUBDIRS = resource
 
 EXTRA_DIST = lustre.xml 99-lustre.rules lustre ldev.conf ko2iblnd.conf \
              lsvcgss lnet_routes.conf lnet.conf lgssc.conf \
-             99-lustre-server.rules
+             lnet-sysctl.conf 99-lustre-server.rules
 
 sysconf_DATA = lnet.conf
 
@@ -69,3 +69,6 @@ endif
 
 modprobedir = $(sysconfdir)/modprobe.d
 modprobe_DATA = ko2iblnd.conf
+
+sysctldir = $(sysconfdir)
+sysctl_DATA = lnet-sysctl.conf
diff --git a/lustre/conf/lnet-sysctl.conf b/lustre/conf/lnet-sysctl.conf
new file mode 100644 (file)
index 0000000..b42e739
--- /dev/null
@@ -0,0 +1,30 @@
+# 70-lnet-sysctl.conf
+# The following file contains possible sysctl settings to optimize
+# the performance for lustre systems from an LNet perspective.
+# The changes take effect after a reboot, but can be activated
+# immediately with the command.
+# sysctl -p /etc/lnet-sysctl.conf as root
+
+# ARP related changes for larger systems
+# gc_thresh1 the minimum number of stored ARP records which is not cleared
+# gc_thresh2 the amount after which the records begin to be cleaned after 5 seconds
+# gc_thresh3 the amount upon reaching which the records begin to be cleared immediately
+#
+# linux default arp thresholds
+# net.ipv4.neigh.default.gc_thresh1 = 128
+# net.ipv4.neigh.default.gc_thresh2 = 512
+# net.ipv4.neigh.default.gc_thresh3 = 1024
+# net.ipv6.neigh.default.gc_thresh1 = 128
+# net.ipv6.neigh.default.gc_thresh2 = 512
+# net.ipv6.neigh.default.gc_thresh3 = 1024
+
+# recommended settings for large systems
+# IPV4
+net.ipv4.neigh.default.gc_thresh1 = 1024
+net.ipv4.neigh.default.gc_thresh2 = 2048
+net.ipv4.neigh.default.gc_thresh3 = 4096
+
+# IPV6
+net.ipv6.neigh.default.gc_thresh1 = 1024
+net.ipv6.neigh.default.gc_thresh2 = 2048
+net.ipv6.neigh.default.gc_thresh3 = 4096
index 5f33af4..ce32c8b 100644 (file)
@@ -36,7 +36,7 @@ genscripts = lc_modprobe lc_net lc_hb lc_cluman lc_md lc_lvm lustre_start lnet
 
 SUBDIRS = systemd
 
-sbin_SCRIPTS = lustre_rmmod ko2iblnd-probe ksocklnd-config
+sbin_SCRIPTS = lustre_rmmod ko2iblnd-probe ksocklnd-config lnet-sysctl-config
 
 if RHEL
 initdir = $(sysconfdir)/init.d
@@ -96,7 +96,7 @@ EXTRA_DIST = lustre_rmmod ldev lc_mon lhbadm \
             zfsobj2fid ko2iblnd-probe ksocklnd-config statechange-lustre.sh \
             vdev_attach-lustre.sh vdev_remove-lustre.sh vdev_clear-lustre.sh \
             bash-completion/lustre bash-completion/lctl bash-completion/lfs \
-            remove_changelog remove_updatelog lsvcgss_sysd
+            remove_changelog remove_updatelog lsvcgss_sysd lnet-sysctl-config
 
 CLEANFILES = $(genscripts)
 
diff --git a/lustre/scripts/lnet-sysctl-config b/lustre/scripts/lnet-sysctl-config
new file mode 100755 (executable)
index 0000000..49eea9f
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+sysctl_conf_file="/etc/lnet-sysctl.conf"
+sysctl_conf_bak="/etc/lnet-sysctl.bak"
+
+# Check if the user wants to enable sysctl settings
+sysctlstat=$(cat /sys/module/lnet/parameters/enable_sysctl_setup 2>&-)
+
+if [[ "${sysctlstat}" != "1" ]]; then
+       if [ -f "${sysctl_conf_bak}" ]; then
+               /usr/sbin/sysctl -p "${sysctl_conf_bak}" 2>&-
+               rm -f "${sysctl_conf_bak}"
+       fi
+       exit 0
+fi
+
+# create backup file for reset
+echo "# Generated backup for original lnet-sysctl.conf parameters" \
+     > "${sysctl_conf_bak}" 2>&-
+while IFS= read -r line; do
+       # Couldn't break this line
+       if [[ "$line" =~ \
+             ^[[:space:]]*([a-zA-Z0-9_.-]+)[[:space:]]*=[[:space:]]*([a-zA-Z0-9_.-]+)[[:space:]]*$ \
+          ]]; then
+               value=$(/usr/sbin/sysctl -n "${BASH_REMATCH[1]}")
+               echo "${BASH_REMATCH[1]}=${value}" >> "${sysctl_conf_bak}" 2>&-
+       fi
+done < "${sysctl_conf_file}"
+
+# set configuration values
+/usr/sbin/sysctl -p "${sysctl_conf_file}" 2>&-
+exit $?
index 2e5036b..bbbaebe 100755 (executable)
@@ -3984,6 +3984,81 @@ test_256() {
 }
 run_test 256 "Router should not drop messages that are past the deadline"
 
+check_sysctl() {
+       while IFS= read -r line; do
+               # Couldn't find a way to break this line
+               if [[ "$line" =~ \
+                     ^[[:space:]]*([a-zA-Z0-9_.-]+)[[:space:]]*=[[:space:]]*([a-zA-Z0-9_.-]+)[[:space:]]*$ \
+                  ]]; then
+                       value=$(sysctl -n "${BASH_REMATCH[1]}" 2>/dev/null)
+                       if [ -z "${value}" ]; then
+                               error "Parameter ${BASH_REMATCH[1]} not set"
+                       fi
+                       echo "found: ${BASH_REMATCH[1]} ${value}"
+                       if [ "${value}" != "${BASH_REMATCH[2]}" ]; then
+                               error "Parameter ${BASH_REMATCH[1]} \
+                                       wrong value: ${value} \
+                                       expected: ${BASH_REMATCH[2]}"
+                       fi
+               fi
+       done < "$1"
+}
+
+### Test that linux route is added for each ni
+
+test_260() {
+       local sysctl_file="/etc/lnet-sysctl.conf"
+       local sysctl_conf_bak="/etc/lnet-sysctl.bak"
+       local sysctl_bak=$TMP/lnet-sysctl.bak
+
+       echo "Setting default values and create backup for check"
+
+       sysctl -w net.ipv4.neigh.default.gc_thresh1=128 > "$sysctl_bak"
+       sysctl -w net.ipv4.neigh.default.gc_thresh2=512 >> "$sysctl_bak"
+       sysctl -w net.ipv4.neigh.default.gc_thresh3=1024 >> "$sysctl_bak"
+       sysctl -w net.ipv6.neigh.default.gc_thresh1=128 >> "$sysctl_bak"
+       sysctl -w net.ipv6.neigh.default.gc_thresh2=512 >> "$sysctl_bak"
+       sysctl -w net.ipv6.neigh.default.gc_thresh3=1024 >> "$sysctl_bak"
+
+       echo "Check default configuration"
+       check_sysctl "${sysctl_bak}"
+
+       load_modules || error "Failed to load Modules"
+
+       sysctlstat=$(cat /sys/module/lnet/parameters/enable_sysctl_setup 2>&-)
+       echo "enable_sysctl_setup set to ${sysctlstat}"
+
+       echo "New configuration"
+
+       echo 1 > /sys/module/lnet/parameters/enable_sysctl_setup 2>&1
+
+       sysctlstat=$(cat /sys/module/lnet/parameters/enable_sysctl_setup 2>&-)
+       echo "enable_sysctl_setup set to ${sysctlstat}"
+
+       $LNETCTL setup-sysctl ||
+               error "setup-sysctl failed"
+
+       echo "Check new configuration"
+       check_sysctl "${sysctl_file}"
+
+       echo "Reset to original values"
+       echo 0 > /sys/module/lnet/parameters/enable_sysctl_setup 2>&1
+
+       sysctlstat=$(cat /sys/module/lnet/parameters/enable_sysctl_setup 2>&-)
+       echo "enable_sysctl_setup set to ${sysctlstat}"
+
+       $LNETCTL setup-sysctl ||
+               error "setup-sysctl failed"
+
+       echo "Check original configuration"
+       check_sysctl "${sysctl_bak}"
+
+       rm -f "${sysctl_bak}"
+
+       unload_modules || error "Failed to cleanup Modules"
+}
+run_test 260 "test that linux sysctl parameter are set correctly"
+
 test_300() {
        # LU-13274
        local header
index 101f7d3..ad66b43 100755 (executable)
@@ -552,6 +552,9 @@ init_test_env() {
        export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"}
        [ ! -f "$KSOCKLND_CONFIG" ] &&
                export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null)
+       export LNET_SYSCTL_CONFIG=${LNET_SYSCTL_CONFIG:-"$LUSTRE/scripts/lnet-sysctl-config"}
+       [ ! -f "$LNET_SYSCTL_CONFIG" ] &&
+               export LNET_SYSCTL_CONFIG=$(which lnet-sysctl-config 2> /dev/null)
 
        export PERM_CMD=$(echo ${PERM_CMD:-"$LCTL conf_param"})