module_param(conns_per_peer, uint, 0644);
MODULE_PARM_DESC(conns_per_peer, "number of connections per peer");
+/* By default skip_mr_route_setup is 0 (do not skip) */
+static unsigned int skip_mr_route_setup;
+module_param(skip_mr_route_setup, uint, 0444);
+MODULE_PARM_DESC(skip_mr_route_setup, "skip automatic setup of linux routes for MR");
+
#ifdef SOCKNAL_BACKOFF
static int backoff_init = 3;
module_param(backoff_init, int, 0644);
return LUSTRE_CFG_RC_NO_ERR;
}
+int lustre_lnet_setup_mrrouting(struct cYAML **err_rc)
+{
+ char *buf;
+ int rc = LUSTRE_CFG_RC_OUT_OF_MEM, i;
+ int l_errno = 0;
+ char err_str[LNET_MAX_STR_LEN] = "\"out of memory\"";
+ struct lnet_ioctl_config_ni *ni_data;
+ struct lnet_ioctl_config_lnd_tunables *lnd;
+ struct lnet_ioctl_element_stats *stats;
+ size_t buf_size = sizeof(*ni_data) + sizeof(*lnd) + sizeof(*stats);
+ char ifstr_buf[LNET_INTERFACES_NUM*LNET_MAX_STR_LEN];
+ char *ifstr_ptr, *tmp_ptr, *tmp_ptr2;
+ int if_cnt = 0, prc;
+ char syscmdbuf[LNET_MAX_STR_LEN];
+ char cmdpath[LNET_MAX_STR_LEN];
+ bool use_custom = false;
+
+ buf = calloc(1, buf_size);
+ if (buf == NULL)
+ goto out;
+
+ ni_data = (struct lnet_ioctl_config_ni *)buf;
+
+ ifstr_buf[0] = 0;
+ ifstr_ptr = ifstr_buf;
+
+ for (i = 0;; i++) {
+ __u32 rc_net;
+
+ memset(buf, 0, buf_size);
+
+ LIBCFS_IOC_INIT_V2(*ni_data, lic_cfg_hdr);
+ /* set the ioc_len to the proper value since INIT assumes
+ * size of data
+ */
+ ni_data->lic_cfg_hdr.ioc_len = buf_size;
+ ni_data->lic_idx = i;
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_LOCAL_NI, ni_data);
+ if (rc != 0) {
+ l_errno = errno;
+ break;
+ }
+
+ rc_net = LNET_NIDNET(ni_data->lic_nid);
+
+ /* only need to setup routing for tcp */
+ if (LNET_NETTYP(rc_net) != SOCKLND)
+ continue;
+
+ /* don't add interfaces unless there is at least one
+ * interface
+ */
+ if (strlen(ni_data->lic_ni_intf) > 0) {
+ if (if_cnt > 0)
+ strcat(ifstr_ptr, ",");
+ strcat(ifstr_ptr, ni_data->lic_ni_intf);
+ if_cnt++;
+ }
+ }
+
+ if (l_errno != ENOENT) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot get networks: %s\"",
+ strerror(l_errno));
+ rc = -l_errno;
+ goto out;
+ } else {
+ rc = LUSTRE_CFG_RC_NO_ERR;
+ }
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ if (if_cnt > 0) {
+ tmp_ptr = getenv("KSOCKLND_CONFIG");
+ if (tmp_ptr) {
+ tmp_ptr2 = strrchr(tmp_ptr, '/');
+ if (tmp_ptr2 && !strcmp(tmp_ptr2, "/ksocklnd-config")) {
+ snprintf(cmdpath, sizeof(cmdpath), "%s",
+ tmp_ptr);
+ use_custom = true;
+ }
+ }
+
+ if (!use_custom)
+ snprintf(cmdpath, sizeof(cmdpath),
+ "/usr/sbin/ksocklnd-config");
+
+ prc = snprintf(0, 0, "%s %s", cmdpath, ifstr_ptr);
+
+ if (prc < 0) {
+ l_errno = errno;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"snprintf failed : %s\"",
+ strerror(l_errno));
+ rc = -l_errno;
+ } else if (prc >= LNET_MAX_STR_LEN) {
+ snprintf(err_str, sizeof(err_str),
+ "\"ksocklnd-config: argument too long\"");
+ } else {
+ prc = snprintf(syscmdbuf, sizeof(syscmdbuf), "%s %s",
+ cmdpath, ifstr_ptr);
+
+ if (prc < 0) {
+ l_errno = errno;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"snprintf failed : %s\"",
+ strerror(l_errno));
+ rc = -l_errno;
+ goto out;
+ }
+
+ rc = system(syscmdbuf);
+ if (rc != 0) {
+ l_errno = errno;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"failed to execute ksocklnd-config : %s\"",
+ strerror(l_errno));
+ rc = -l_errno;
+ }
+ }
+ }
+out:
+ if (buf)
+ free(buf);
+
+ cYAML_build_error(rc, -1, MANAGE_CMD, "setup-mrrouting", err_str,
+ err_rc);
+
+ return rc;
+}
+
int show_recovery_queue(enum lnet_health_type type, char *name, int seq_no,
struct cYAML **show_rc, struct cYAML **err_rc)
{
int lustre_lnet_calc_service_id(__u64 *service_id);
/*
+ * lustre_lnet_setup_mrrouting
+ * configure linux routing tables for tcp interfaces
+ *
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_setup_mrrouting(struct cYAML **err_rc);
+
+/*
* lustre_lnet_config_discovery
* Enable or disable peer discovery. Peer discovery is enabled by default.
*
static int jt_set_response_tracking(int argc, char **argv);
static int jt_set_recovery_limit(int argc, char **argv);
static int jt_udsp(int argc, char **argv);
+static int jt_setup_mrrouting(int argc, char **argv);
command_t cmd_list[] = {
{"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"},
{"discover", jt_discover, 0, "discover nid[,nid,...]"},
{"service-id", jt_calc_service_id, 0, "Calculate IB Lustre service ID\n"},
{"udsp", jt_udsp, 0, "udsp {add | del | help}"},
+ {"setup-mrrouting", jt_setup_mrrouting, 0,
+ "setup linux routing tables\n"},
{"help", Parser_help, 0, "help"},
{"exit", Parser_quit, 0, "quit"},
{"quit", Parser_quit, 0, "quit"},
"\t--peer-buffer-credits: the number of buffer credits per peer\n"
"\t--credits: Network Interface credits\n"
"\t--cpt: CPU Partitions configured net uses (e.g. [0,1]\n"
- "\t--conns-per-peer: number of connections per peer\n"},
+ "\t--conns-per-peer: number of connections per peer\n"
+ "\t--skip-mr-route-setup: do not add linux route for the ni\n"},
{"del", jt_del_ni, 0, "delete a network\n"
"\t--net: net name (e.g. tcp0)\n"
"\t--if: physical interface (e.g. eth0)\n"},
return rc;
}
+static int jt_setup_mrrouting(int argc, char **argv)
+{
+ int rc;
+ struct cYAML *err_rc = NULL;
+
+ rc = lustre_lnet_setup_mrrouting(&err_rc);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
static inline void print_help(const command_t cmds[], const char *cmd_type,
const char *pc_name)
{
struct cfs_expr_list *global_cpts = NULL;
struct lnet_ioctl_config_lnd_tunables tunables;
bool found = false;
+ bool skip_mr_route_setup = false;
memset(&tunables, 0, sizeof(tunables));
lustre_lnet_init_nw_descr(&nw_descr);
- const char *const short_options = "b:c:i:m:n:p:r:s:t:";
+ const char *const short_options = "b:c:i:k:m:n:p:r:s:t:";
static const struct option long_options[] = {
{ .name = "peer-buffer-credits",
.has_arg = required_argument, .val = 'b' },
{ .name = "peer-credits", .has_arg = required_argument, .val = 'c' },
{ .name = "if", .has_arg = required_argument, .val = 'i' },
+ { .name = "skip-mr-route-setup",
+ .has_arg = no_argument, .val = 'k' },
{ .name = "conns-per-peer",
.has_arg = required_argument, .val = 'm' },
{ .name = "net", .has_arg = required_argument, .val = 'n' },
goto failed;
}
break;
+ case 'k':
+ skip_mr_route_setup = true;
+ break;
case 'm':
rc = parse_long(optarg, &cpp);
if (rc != 0) {
cYAML_free_tree(err_rc);
+ if (rc == LUSTRE_CFG_RC_NO_ERR && !skip_mr_route_setup) {
+ err_rc = NULL;
+ rc = lustre_lnet_setup_mrrouting(&err_rc);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+ }
+
return rc;
}
# set sysfs values on client
SUBSYSTEM=="lustre", ACTION=="change", ENV{PARAM}=="?*", RUN+="/usr/sbin/lctl set_param '$env{PARAM}=$env{SETTING}'"
+# setup linux routes for mr on lustre load
+SUBSYSTEM=="module", ACTION=="add", DEVPATH=="/module/lustre", RUN+="/usr/sbin/lnetctl setup-mrrouting"
SUBDIRS = systemd
-sbin_SCRIPTS = lustre_rmmod ko2iblnd-probe
+sbin_SCRIPTS = lustre_rmmod ko2iblnd-probe ksocklnd-config
if RHEL
initdir = $(sysconfdir)/init.d
lc_servip lustre_routes_config lustre_routes_conversion \
$(addsuffix .in,$(genscripts)) lfs_migrate lustre_req_history \
lustre lsvcgss lc_common haconfig Lustre.ha_v2 dkms.mkconf \
- zfsobj2fid ko2iblnd-probe statechange-lustre.sh \
+ zfsobj2fid ko2iblnd-probe ksocklnd-config statechange-lustre.sh \
vdev_attach-lustre.sh vdev_remove-lustre.sh vdev_clear-lustre.sh \
bash-completion/lustre bash-completion/lctl bash-completion/lfs
--- /dev/null
+#!/bin/sh
+
+me="${0##*/}"
+
+# convert number of mask bits to x.x.x.x mask format
+cidr2mask() {
+ local i mask=""
+ local full_octets=$(($1/8))
+ local partial_octet=$(($1%8))
+
+ for ((i=0;i<4;i+=1)); do
+ if [ $i -lt $full_octets ]; then
+ mask+=255
+ elif [ $i -eq $full_octets ]; then
+ mask+=$((256 - 2**(8-$partial_octet)))
+ else
+ mask+=0
+ fi
+ test $i -lt 3 && mask+=.
+ done
+
+ echo $mask
+}
+
+# apply netmask (second argument) to ip address (first argument)
+netcalc() {
+ local ipa=$(echo ${1} | awk -F. '{ print $1 }')
+ local ipb=$(echo ${1} | awk -F. '{ print $2 }')
+ local ipc=$(echo ${1} | awk -F. '{ print $3 }')
+ local ipd=$(echo ${1} | awk -F. '{ print $4 }')
+ local mka=$(echo ${2} | awk -F. '{ print $1 }')
+ local mkb=$(echo ${2} | awk -F. '{ print $2 }')
+ local mkc=$(echo ${2} | awk -F. '{ print $3 }')
+ local mkd=$(echo ${2} | awk -F. '{ print $4 }')
+ local nta="$(( $ipa & $mka ))"
+ local ntb="$(( $ipb & $mkb ))"
+ local ntc="$(( $ipc & $mkc ))"
+ local ntd="$(( $ipd & $mkd ))"
+ echo "$nta.$ntb.$ntc.$ntd"
+}
+
+# Check if the user wants to skip setting the routes
+checkskipcmd=$(cat /sys/module/ksocklnd/parameters/skip_mr_route_setup 2>&-)
+if [ "$checkskipcmd" == "1" ]; then
+ exit 0
+fi
+
+# Extract comma-separated interfaces from the argument
+j=0
+declare -a interfaces
+for i in $(echo $1 | sed "s/,/ /g")
+do
+ # verify that the interface exists
+ #echo "$i"
+ addr=$(/sbin/ip -o -4 addr list $i 2>&- | awk '{print $4}' | cut -d/ -f1)
+ linelen=$(echo -n $addr | wc -m)
+ if [[ $linelen -eq 0 ]]; then
+ # there's a problem with this interface, skip it
+ #echo 'bad!'
+ continue
+ fi
+ # check if route is already set up for this interface
+ intfroute=$(/sbin/ip route show table $i 2>&-)
+ if [[ ! -z $intfroute ]]; then
+ # route exists so skip this interface
+ logcmd=(logger "${me}: skip setting up route for ${i}: don\'t overwrite existing route")
+ eval "${logcmd[@]}"
+ continue
+ fi
+ interfaces[$j]=$i
+ j=$((j+1))
+done
+
+# this array will contain the interfaces
+# already listed in rt_tables
+interfaces_listed=()
+
+# flush cache for every interface
+for i in "${interfaces[@]}"
+do
+ # build command
+ redirect="2>&-"
+ flushcmd=(/sbin/ip route flush table ${i} ${redirect} )
+ # execute command
+ eval "${flushcmd[@]}"
+ logcmd=(logger "${me}: ${flushcmd[@]}")
+ eval "${logcmd[@]}"
+done
+
+filename='/etc/iproute2/rt_tables'
+n=1
+max_table_num=0
+while read line; do
+ # reading each line
+ # trim leading and trailing spaces
+ line=`echo $line | sed -e 's/^[[:space:]]*//'`
+ linelen=$(echo -n $line | wc -m)
+ # don't check empty lines
+ if [ $linelen -lt 1 ]; then
+ continue
+ fi
+ # don't check comments
+ if [[ ${line:0:1} == "#" ]]; then
+ continue
+ fi
+ # split using space as separator
+ splitline=( $line )
+ # check the table number and update the max
+ if [ $max_table_num -lt ${splitline[0]} ]; then
+ max_table_num=${splitline[0]}
+ fi
+ # check if any of the interfaces are listed
+ for i in "${interfaces[@]}"
+ do
+ if [[ " ${splitline[@]} " =~ " ${i} " ]]; then
+ if [[ " ${interfaces[@]} " =~ " ${i} " ]]; then
+ interfaces_listed+=($i)
+ fi
+ fi
+ done
+ #echo "Line No. $n : $line: $max_table_num"
+ n=$((n+1))
+done < $filename
+
+# add entries for unlisted interfaces
+for i in "${interfaces[@]}"
+do
+ if [[ ! " ${interfaces_listed[@]} " =~ " ${i} " ]]; then
+ max_table_num=$((max_table_num+1))
+ echo "$max_table_num $i" >> $filename
+ fi
+done
+
+# add the routing entries and rules
+for i in "${interfaces[@]}"
+do
+ # extract ipv4 address and netmask in cidr format
+ addr=($(/sbin/ip -o -4 addr list $i 2>&- | awk '{print $4}' | cut -d/ -f1))
+ cidrmask=($(/sbin/ip -o -4 addr list $i 2>&- | awk '{print $4}' | cut -d/ -f2))
+ # convert cidr mask to mask in dot format
+ dotmask=$(cidr2mask ${cidrmask[0]})
+ # apply mask to ip addr
+ net=$(netcalc ${addr[0]} $dotmask)
+ # build and execute route commands
+ routecmd=(/sbin/ip route add ${net}/${cidrmask[0]} dev ${i} proto kernel scope link src ${addr[0]} table ${i})
+ ruledelcmd=(/sbin/ip rule del from ${addr[0]} table ${i} '&>/dev/null')
+ ruleaddcmd=(/sbin/ip rule add from ${addr[0]} table ${i})
+ eval ${routecmd[@]}
+ eval ${ruledelcmd[@]}
+ eval ${ruleaddcmd[@]}
+ logcmd1=(logger "${me}: ${routecmd[@]}")
+ logcmd2=(logger "${me}: ${ruledelcmd[@]}")
+ logcmd3=(logger "${me}: ${ruleaddcmd[@]}")
+ eval "${logcmd1[@]}"
+ eval "${logcmd2[@]}"
+ eval "${logcmd3[@]}"
+done
+
+# flush arp tables
+for i in "${interfaces[@]}"
+do
+ flushcmd=(/sbin/ip neigh flush dev ${i})
+ eval ${flushcmd[@]}
+done
+
lnid="$(lctl list_nids | head -n 1)"
do_lnetctl ping "$lnid" ||
error "failed to ping myself"
+
# "lctl --net tcp conn_list" prints the list of active
# connections. Since we're pinging ourselves, there should be
# 2 Control connections plus 2*conns_per_peer connections
}
run_test 230 "Test setting conns-per-peer"
+### Test that linux route is added for each ni
+test_250() {
+ have_interface "eth0" || skip "Need eth0 interface with ipv4 configured"
+ reinit_dlc || return $?
+ add_net "tcp" "eth0" || return $?
+ ip route show table eth0 | grep -q "eth0"
+}
+run_test 250 "test that linux routes are added"
+
test_300() {
# LU-13274
local header
[ ! -f "$LCTL" ] && export LCTL=$(which lctl)
export LFS=${LFS:-"$LUSTRE/utils/lfs"}
[ ! -f "$LFS" ] && export LFS=$(which lfs)
+ export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"}
+ [ ! -f "$KSOCKLND_CONFIG" ] &&
+ export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null)
export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"}