[ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey)
export MCREATE=${MCREATE:-mcreate}
export MULTIOP=${MULTIOP:-multiop}
+ export MMAP_CAT=${MMAP_CAT:-mmap_cat}
+ export STATX=${STATX:-statx}
# Ubuntu, at least, has a truncate command in /usr/bin
# so fully path our truncate command.
export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate}
fi
export RSYNC_RSH=${RSYNC_RSH:-rsh}
+ export LNETCTL=${LNETCTL:-"$LUSTRE/../lnet/utils/lnetctl"}
+ [ ! -f "$LNETCTL" ] && export LNETCTL=$(which lnetctl 2> /dev/null)
export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
[ ! -f "$LCTL" ] && export LCTL=$(which lctl)
export LFS=${LFS:-"$LUSTRE/utils/lfs"}
[ ! -f "$LFS" ] && export LFS=$(which lfs)
- SETSTRIPE=${SETSTRIPE:-"$LFS setstripe"}
- GETSTRIPE=${GETSTRIPE:-"$LFS getstripe"}
export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"}
# Constants used in more than one test script
export LOV_MAX_STRIPE_COUNT=2000
+ export DELETE_OLD_POOLS=${DELETE_OLD_POOLS:-false}
+ export KEEP_POOLS=${KEEP_POOLS:-false}
export MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
get_lustre_env
+ # use localrecov to enable recovery for local clients, LU-12722
+ [[ $MDS1_VERSION -lt $(version_code 2.13.52) ]] || {
+ export MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"-o localrecov"}
+ export MGS_MOUNT_OPTS=${MGS_MOUNT_OPTS:-"-o localrecov"}
+ }
+
+ [[ $OST1_VERSION -lt $(version_code 2.13.52) ]] ||
+ export OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"-o localrecov"}
}
check_cpt_number() {
# Return a numeric version code based on a version string. The version
# code is useful for comparison two version strings to see which is newer.
version_code() {
- # split arguments like "1.8.6-wc3" into "1", "8", "6", "wc3"
- eval set -- $(tr "[:punct:]" " " <<< $*)
+ # split arguments like "1.8.6-wc3" into "1", "8", "6", "3"
+ eval set -- $(tr "[:punct:][a-z]" " " <<< $*)
- echo -n "$((($1 << 16) | ($2 << 8) | $3))"
+ echo -n $(((${1:-0}<<24) | (${2:-0}<<16) | (${3:-0}<<8) | (${4:-0})))
}
export LINUX_VERSION=$(uname -r | sed -e "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/")
# output: prints version string to stdout in (up to 4) dotted-decimal values
lustre_build_version() {
local facet=${1:-client}
- local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null ||
- $LCTL lustre_build_version 2>/dev/null ||
- $LCTL --version 2>/dev/null | cut -d' ' -f2")
+ local facet_version=${facet}_VERSION
+
+ # if the global variable is already set, then use that
+ [ -n "${!facet_version}" ] && echo ${!facet_version} && return
+
+ # this is the currently-running version of the kernel modules
+ local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null")
+ # we mostly test 2.10+ systems, only try others if the above fails
+ if [ -z "$ver" ]; then
+ ver=$(do_facet $facet "$LCTL lustre_build_version 2>/dev/null")
+ fi
+ if [ -z "$ver" ]; then
+ ver=$(do_facet $facet "$LCTL --version 2>/dev/null" |
+ cut -d' ' -f2)
+ fi
local lver=$(egrep -i "lustre: |version: " <<<"$ver" | head -n 1)
[ -n "$lver" ] && ver="$lver"
- sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' -e 's/_/./g' <<<$ver |
- cut -d. -f1-4
+ lver=$(sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' <<<$ver |
+ tr _ . | cut -d. -f1-4)
+
+ # save in global variable for the future
+ export $facet_version=$lver
+
+ echo $lver
}
# Report the Lustre numeric build version code for the supplied facet.
# that obviously has nothing to do with this Lustre run
# Disable automatic memory scanning to avoid perf hit.
if [ -f /sys/kernel/debug/kmemleak ] ; then
- echo scan=off > /sys/kernel/debug/kmemleak
- echo scan > /sys/kernel/debug/kmemleak
- echo clear > /sys/kernel/debug/kmemleak
+ echo scan=off > /sys/kernel/debug/kmemleak || true
+ echo scan > /sys/kernel/debug/kmemleak || true
+ echo clear > /sys/kernel/debug/kmemleak || true
fi
echo Loading modules from $LUSTRE
# if there is more than 4 CPU cores, libcfs should create multiple CPU
# partitions. So we just force libcfs to create 2 partitions for
# system with 2 or 4 cores
+ local saved_opts="$MODOPTS_LIBCFS"
if [ $ncpus -le 4 ] && [ $ncpus -gt 1 ]; then
# force to enable multiple CPU partitions
echo "Force libcfs to create 2 CPU partitions"
load_module ../libcfs/libcfs/libcfs
# Prevent local MODOPTS_LIBCFS being passed as part of environment
# variable to remote nodes
- unset MODOPTS_LIBCFS
+ MODOPTS_LIBCFS=$saved_opts
set_default_debug
load_module ../lnet/lnet/lnet
load_module fid/fid
load_module lmv/lmv
load_module osc/osc
- load_module mdc/mdc
load_module lov/lov
+ load_module mdc/mdc
load_module mgc/mgc
load_module obdecho/obdecho
if ! client_only; then
}
check_mem_leak () {
- LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true)
- LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true)
- if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
- echo "$LEAK_LUSTRE" 1>&2
- echo "$LEAK_PORTALS" 1>&2
- mv $TMP/debug $TMP/debug-leak.`date +%s` || true
- echo "Memory leaks detected"
- [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true
- return 1
- fi
+ LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true)
+ LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || true)
+ if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
+ echo "$LEAK_LUSTRE" 1>&2
+ echo "$LEAK_PORTALS" 1>&2
+ mv $TMP/debug $TMP/debug-leak.`date +%s` || true
+ echo "Memory leaks detected"
+ [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true
+ return 1
+ fi
}
-unload_modules() {
- wait_exit_ST client # bug 12845
-
+unload_modules_local() {
$LUSTRE_RMMOD ldiskfs || return 2
[ -f /etc/udev/rules.d/99-lustre-test.rules ] &&
udevadm control --reload-rules
udevadm trigger
+ check_mem_leak || return 254
+
+ return 0
+}
+
+unload_modules() {
+ local rc=0
+
+ wait_exit_ST client # bug 12845
+
+ unload_modules_local || rc=$?
+
if $LOAD_MODULES_REMOTE; then
local list=$(comma_list $(remote_nodes_list))
if [ -n "$list" ]; then
echo "unloading modules on: '$list'"
- do_rpc_nodes "$list" $LUSTRE_RMMOD ldiskfs
- do_rpc_nodes "$list" check_mem_leak
- do_rpc_nodes "$list" "rm -f /etc/udev/rules.d/99-lustre-test.rules"
- do_rpc_nodes "$list" "udevadm control --reload-rules"
- do_rpc_nodes "$list" "udevadm trigger"
+ do_rpc_nodes "$list" unload_modules_local
fi
fi
rm -f $sbin_mount
fi
- check_mem_leak || return 254
+ [[ $rc -eq 0 ]] && echo "modules unloaded."
- echo "modules unloaded."
- return 0
+ return $rc
}
fs_log_size() {
echo -n $mt_opts
}
+from_build_tree() {
+ local from_tree
+
+ case $LUSTRE in
+ /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \
+ /usr/lib64/lustre )
+ from_tree=false
+ ;;
+ *)
+ from_tree=true
+ ;;
+ esac
+
+ [ $from_tree = true ]
+}
+
init_gss() {
if $SHARED_KEY; then
GSS=true
return
fi
- case $LUSTRE in
- /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \
- /usr/lib64/lustre )
- from_build_tree=false
- ;;
- *)
- from_build_tree=true
- ;;
- esac
-
if ! module_loaded ptlrpc_gss; then
load_module ptlrpc/gss/ptlrpc_gss
module_loaded ptlrpc_gss ||
start_gss_daemons || error_exit "start gss daemon failed! rc=$?"
fi
+ if $GSS_SK && ! $SK_NO_KEY; then
+ echo "Loading basic SSK keys on all servers"
+ do_nodes $(comma_list $(all_server_nodes)) \
+ "lgss_sk -t server -l $SK_PATH/$FSNAME.key || true"
+ do_nodes $(comma_list $(all_server_nodes)) \
+ "keyctl show | grep lustre | cut -c1-11 |
+ sed -e 's/ //g;' |
+ xargs -IX keyctl setperm X 0x3f3f3f3f"
+ fi
+
if $GSS_SK && $SK_NO_KEY; then
local numclients=${1:-$CLIENTCOUNT}
local clients=${CLIENTS:-$HOSTNAME}
SK_NO_KEY=false
local lgssc_conf_file="/etc/request-key.d/lgssc.conf"
- if $from_build_tree; then
+ if from_build_tree; then
mkdir -p $SK_OM_PATH
if grep -q request-key /proc/mounts > /dev/null; then
echo "SSK: Request key already mounted."
cat $lgssc_conf_file
if ! local_mode; then
- if $from_build_tree; then
+ if from_build_tree; then
do_nodes $(comma_list $(all_nodes)) "mkdir -p \
$SK_OM_PATH"
do_nodes $(comma_list $(all_nodes)) "mount \
OST_MOUNT_OPTS=$(add_sk_mntflag $OST_MOUNT_OPTS)
MOUNT_OPTS=$(add_sk_mntflag $MOUNT_OPTS)
SEC=$SK_FLAVOR
+ if [ -z "$LGSS_KEYRING_DEBUG" ]; then
+ LGSS_KEYRING_DEBUG=4
+ fi
fi
- if [ -n "$LGSS_KEYRING_DEBUG" ]; then
+ if [ -n "$LGSS_KEYRING_DEBUG" ] && \
+ ( local_mode || from_build_tree ); then
lctl set_param -n \
- sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG
+ sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG
+ elif [ -n "$LGSS_KEYRING_DEBUG" ]; then
+ do_nodes $(comma_list $(all_nodes)) "modprobe ptlrpc_gss && \
+ lctl set_param -n \
+ sptlrpc.gss.lgss_keyring.debug_level=$LGSS_KEYRING_DEBUG"
fi
}
cleanup_sk() {
if $GSS_SK; then
- case $LUSTRE in
- /usr/lib/lustre/* | /usr/lib64/lustre/* | /usr/lib/lustre | \
- /usr/lib64/lustre )
- from_build_tree=false
- ;;
- *)
- from_build_tree=true
- ;;
- esac
-
if $SK_S2S; then
do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNM"
do_node $(mgs_node) "$LCTL nodemap_del $SK_S2SNMCLI"
$SK_PATH/$FSNAME*.key $SK_PATH/nodemap/$FSNAME*.key"
do_nodes $(comma_list $(all_nodes)) "keyctl show | \
awk '/lustre/ { print \\\$1 }' | xargs -IX keyctl unlink X"
- if $from_build_tree; then
+ if from_build_tree; then
# Remove the mount and clean up the files we added to
# SK_PATH
do_nodes $(comma_list $(all_nodes)) "while grep -q \
echo -n $label
}
-mdsdevlabel() {
- local num=$1
- local device=$(mdsdevname $num)
- local label=$(devicelabel mds$num ${device} | grep -v "CMD: ")
- echo -n $label
-}
-
-ostdevlabel() {
- local num=$1
- local device=$(ostdevname $num)
- local label=$(devicelabel ost$num ${device} | grep -v "CMD: ")
- echo -n $label
-}
-
#
# Get the device of a facet.
#
}
set_default_debug () {
- local debug=${1:-"$PTLDEBUG"}
- local subsys=${2:-"$SUBSYSTEM"}
- local debug_size=${3:-$DEBUG_SIZE}
+ local debug=${1:-"$PTLDEBUG"}
+ local subsys=${2:-"$SUBSYSTEM"}
+ local debug_size=${3:-$DEBUG_SIZE}
- [ -n "$debug" ] && lctl set_param debug="$debug" >/dev/null
- [ -n "$subsys" ] && lctl set_param subsystem_debug="${subsys# }" >/dev/null
+ [ -n "$debug" ] && lctl set_param debug="$debug" >/dev/null
+ [ -n "$subsys" ] && lctl set_param subsystem_debug="${subsys# }" >/dev/null
- [ -n "$debug_size" ] && set_debug_size $debug_size > /dev/null
+ [ -n "$debug_size" ] && set_debug_size $debug_size > /dev/null
}
set_default_debug_nodes () {
local nodes="$1"
+ local debug="${2:-"$PTLDEBUG"}"
+ local subsys="${3:-"$SUBSYSTEM"}"
+ local debug_size="${4:-$DEBUG_SIZE}"
if [[ ,$nodes, = *,$HOSTNAME,* ]]; then
nodes=$(exclude_items_from_list "$nodes" "$HOSTNAME")
set_default_debug
fi
- do_rpc_nodes "$nodes" set_default_debug \
- \\\"$PTLDEBUG\\\" \\\"$SUBSYSTEM\\\" $DEBUG_SIZE || true
+ [[ -z "$nodes" ]] ||
+ do_rpc_nodes "$nodes" set_default_debug \
+ \\\"$debug\\\" \\\"$subsys\\\" $debug_size || true
}
set_default_debug_facet () {
- local facet=$1
- local node=$(facet_active_host $facet)
- [ -z "$node" ] && echo "No host defined for facet $facet" && exit 1
+ local facet=$1
+ local debug="${2:-"$PTLDEBUG"}"
+ local subsys="${3:-"$SUBSYSTEM"}"
+ local debug_size="${4:-$DEBUG_SIZE}"
+ local node=$(facet_active_host $facet)
+
+ [ -n "$node" ] || error "No host defined for facet $facet"
+
+ set_default_debug_nodes $node "$debug" "$subsys" $debug_size
+}
+
+set_params_nodes () {
+ [[ $# -ge 2 ]] || return 0
+
+ local nodes=$1
+ shift
+ do_nodes $nodes $LCTL set_param $@
+}
+
+set_params_clients () {
+ local clients=${1:-$CLIENTS}
+ local params=${2:-$CLIENT_LCTL_SETPARAM_PARAM}
- set_default_debug_nodes $node
+ [[ -n $params ]] || return 0
+ set_params_nodes $clients $params
}
set_hostid () {
ost_dev_status() {
local ost_idx=$1
local mnt_pnt=${2:-$MOUNT}
+ local opts=$3
local ost_uuid
ost_uuid=$(ostuuid_from_index $ost_idx $mnt_pnt)
- lfs_df $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }'
+ lfs_df $opts $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }'
}
setup_quota(){
exit 1
fi
+ if $GSS_SK; then
+ # update mount option with skpath
+ opts=$(add_sk_mntflag $opts)
+ fi
+
echo "Starting client: $client: $flags $opts $device $mnt"
do_node $client mkdir -p $mnt
if [ -n "$FILESET" -a -z "$SKIP_FILESET" ];then
fi
set_default_debug_nodes $client
+ set_params_clients $client
return 0
}
fi
}
+# Mount the file system on the MDS
+mount_mds_client() {
+ local mds_HOST=${SINGLEMDS}_HOST
+ echo $mds_HOST
+ zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS ||
+ error "unable to mount $MOUNT2 on MDS"
+}
+
+# Unmount the file system on the MDS
+umount_mds_client() {
+ local mds_HOST=${SINGLEMDS}_HOST
+ zconf_umount $mds1_HOST $MOUNT2
+ do_facet $SINGLEMDS "rmdir $MOUNT2"
+}
+
# nodes is comma list
sanity_mount_check_nodes () {
local nodes=$1
do_nodes $clients "mount | grep $mnt' '"
set_default_debug_nodes $clients
+ set_params_clients $clients
return 0
}
reboot_facet() {
local facet=$1
+ local node=$(facet_active_host $facet)
+
if [ "$FAILURE_MODE" = HARD ]; then
- reboot_node $(facet_active_host $facet)
+ boot_node $node
else
sleep 10
fi
}
boot_node() {
- local node=$1
- if [ "$FAILURE_MODE" = HARD ]; then
- reboot_node $node
- wait_for_host $node
- fi
+ local node=$1
+
+ if [ "$FAILURE_MODE" = HARD ]; then
+ reboot_node $node
+ wait_for_host $node
+ if $LOAD_MODULES_REMOTE; then
+ echo "loading modules on $node: $facet"
+ do_rpc_nodes $node load_modules_local
+ fi
+ fi
}
facets_hosts () {
}
start_client_loads () {
- local -a clients=(${1//,/ })
- local numloads=${#CLIENT_LOADS[@]}
- local testnum
+ local -a clients=(${1//,/ })
+ local numloads=${#CLIENT_LOADS[@]}
- for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do
- testnum=$((nodenum % numloads))
- start_client_load ${clients[nodenum]} ${CLIENT_LOADS[testnum]}
- done
- # bug 22169: wait the background threads to start
- sleep 2
+ for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do
+ local load=$((nodenum % numloads))
+ start_client_load ${clients[nodenum]} ${CLIENT_LOADS[load]}
+ done
+ # bug 22169: wait the background threads to start
+ sleep 2
}
# only for remote client
}
# End recovery-scale functions
-# verify that lustre actually cleaned up properly
-cleanup_check() {
- VAR=$(lctl get_param -n catastrophe 2>&1)
- if [ $? = 0 ] ; then
- if [ $VAR != 0 ]; then
- error "LBUG/LASSERT detected"
- fi
- fi
- BUSY=$(dmesg | grep -i destruct || true)
- if [ -n "$BUSY" ]; then
- echo "$BUSY" 1>&2
- [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.$(date +%s)
- exit 205
- fi
-
- check_mem_leak || exit 204
-
- [[ $($LCTL dl 2>/dev/null | wc -l) -gt 0 ]] && $LCTL dl &&
- echo "$TESTSUITE: lustre didn't clean up..." 1>&2 &&
- return 202 || true
-
- if module_loaded lnet || module_loaded libcfs; then
- echo "$TESTSUITE: modules still loaded..." 1>&2
- /sbin/lsmod 1>&2
- return 203
- fi
- return 0
-}
+##
+# wait for a command to return the expected result
+#
+# This will run @check on @node repeatedly until the output matches @expect
+# based on the supplied condition, or until @max_wait seconds have elapsed,
+# whichever comes first. @cond may be one of the normal bash operators,
+# "-gt", "-ge", "-eq", "-le", "-lt", "==", "!=", or "=~", and must be quoted
+# in the caller to avoid unintentional evaluation by the shell in the caller.
+#
+# If @max_wait is not specified, the condition will be checked for up to 90s.
+#
+# If --verbose is passed as the first argument, the result is printed on each
+# value change, otherwise it is only printed after every 10s interval.
+#
+# If --quiet is passed as the first/second argument, the do_node() command
+# will not print the remote command before executing it each time.
+#
+# Using wait_update_cond() or related helper function is preferable to adding
+# a "long enough" wait for some state to change in the background, since
+# "long enough" may be too short due to tunables, system config, or running in
+# a VM, and must by necessity wait too long for most cases or risk failure.
+#
+# usage: wait_update_cond [--verbose] [--quiet] node check cond expect [max_wait]
+wait_update_cond() {
+ local verbose
+ local quiet
-wait_update () {
- local verbose=false
- if [[ "$1" == "--verbose" ]]; then
- shift
- verbose=true
- fi
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
local node=$1
- local TEST=$2
- local FINAL=$3
- local MAX=${4:-90}
- local RESULT
- local PREV_RESULT
- local WAIT=0
+ local check="$2"
+ local cond="$3"
+ local expect="$4"
+ local max_wait=${5:-90}
+ local result
+ local prev_result
+ local waited=0
+ local begin=$SECONDS
local sleep=1
local print=10
- PREV_RESULT=$(do_node $node "$TEST")
- while [ true ]; do
- RESULT=$(do_node $node "$TEST")
- if [[ "$RESULT" == "$FINAL" ]]; then
- [[ -z "$RESULT" || $WAIT -le $sleep ]] ||
- echo "Updated after ${WAIT}s: wanted '$FINAL'"\
- "got '$RESULT'"
+ while (( $waited <= $max_wait )); do
+ result=$(do_node $quiet $node "$check")
+
+ eval [[ "'$result'" $cond "'$expect'" ]]
+ if [[ $? == 0 ]]; then
+ [[ -z "$result" || $waited -le $sleep ]] ||
+ echo "Updated after ${waited}s: want '$expect' got '$result'"
return 0
fi
- if [[ $verbose && "$RESULT" != "$PREV_RESULT" ]]; then
- echo "Changed after ${WAIT}s: from '$PREV_RESULT'"\
- "to '$RESULT'"
- PREV_RESULT=$RESULT
+ if [[ -n "$verbose" && "$result" != "$prev_result" ]]; then
+ [[ -n "$prev_result" ]] &&
+ echo "Changed after ${waited}s: from '$prev_result' to '$result'"
+ prev_result="$result"
fi
- [[ $WAIT -ge $MAX ]] && break
- [[ $((WAIT % print)) -eq 0 ]] &&
- echo "Waiting $((MAX - WAIT)) secs for update"
- WAIT=$((WAIT + sleep))
+ (( $waited % $print == 0 )) &&
+ echo "Waiting $((max_wait - waited))s for '$expect'"
sleep $sleep
+ waited=$((SECONDS - begin))
done
- echo "Update not seen after ${MAX}s: wanted '$FINAL' got '$RESULT'"
+ echo "Update not seen after ${max_wait}s: want '$expect' got '$result'"
return 3
}
+# usage: wait_update [--verbose] [--quiet] node check expect [max_wait]
+wait_update() {
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local node="$1"
+ local check="$2"
+ local expect="$3"
+ local max_wait=$4
+
+ wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait
+}
+
+# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait]
+wait_update_facet_cond() {
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local node=$(facet_active_host $1)
+ local check="$2"
+ local cond="$3"
+ local expect="$4"
+ local max_wait=$5
+
+ wait_update_cond $verbose $quiet $node "$check" "$cond" "$expect" $max_wait
+}
+
+# usage: wait_update_facet [--verbose] facet check expect [max_wait]
wait_update_facet() {
- local verbose=
- [ "$1" = "--verbose" ] && verbose="$1" && shift
+ local verbose
+ local quiet
- local facet=$1
- shift
- wait_update $verbose $(facet_active_host $facet) "$@"
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local node=$(facet_active_host $1)
+ local check="$2"
+ local expect="$3"
+ local max_wait=$4
+
+ wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait
}
sync_all_data() {
}
wait_for_host() {
- local hostlist=$1
+ local hostlist=$1
- # we can use "for" here because we are waiting the slowest
- for host in ${hostlist//,/ }; do
- check_network "$host" 900
- done
- while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done
+ # we can use "for" here because we are waiting the slowest
+ for host in ${hostlist//,/ }; do
+ check_network "$host" 900
+ done
+ while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done
}
wait_for_facet() {
}
wait_destroy_complete () {
- echo "Waiting for local destroys to complete"
+ echo "Waiting for MDT destroys to complete"
# MAX value shouldn't be big as this mean server responsiveness
# never increase this just to make test pass but investigate
# why it takes so long time
- local MAX=5
+ local MAX=${1:-5}
local WAIT=0
+ local list=$(comma_list $(mdts_nodes))
while [ $WAIT -lt $MAX ]; do
- local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight))
+ local -a RPCs=($(do_nodes $list $LCTL get_param -n osp.*.destroys_in_flight))
local con=1
local i
echo "Waiting ${WAIT}s for local destroys to complete"
WAIT=$((WAIT + 1))
done
- echo "Local destroys weren't done in $MAX sec."
+ echo "MDT destroys weren't done in $MAX sec."
return 1
}
done
}
-obd_name() {
- local facet=$1
-}
-
replay_barrier() {
local facet=$1
do_facet $facet "sync; sync; sync"
}
fail_nodf() {
- local facet=$1
- facet_failover $facet
+ local facet=$1
+
+ facet_failover $facet
}
fail_abort() {
local facet=$1
+ local abort_type=${2:-"abort_recovery"}
+
stop $facet
change_active $facet
wait_for_facet $facet
- mount_facet $facet -o abort_recovery
+ mount_facet $facet -o $abort_type
clients_up || echo "first stat failed: $?"
clients_up || error "post-failover stat: $?"
}
local nodes=$1
local net=${2:-"."}
- do_nodes $nodes "$LCTL list_nids | grep $net | cut -f 1 -d @"
+ do_nodes $nodes "$LCTL list_nids | grep -w $net | cut -f 1 -d @"
}
h2name_or_ip() {
}
do_node() {
- local verbose=false
- # do not stripe off hostname if verbose, bug 19215
- if [ x$1 = x--verbose ]; then
- shift
- verbose=true
- fi
+ local verbose
+ local quiet
- local HOST=$1
- shift
- local myPDSH=$PDSH
- if [ "$HOST" = "$HOSTNAME" ]; then
- myPDSH="no_dsh"
- elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then
- echo "cannot run remote command on $HOST with $myPDSH"
- return 128
- fi
- if $VERBOSE; then
- echo "CMD: $HOST $@" >&2
- $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
- fi
+ # do not strip off hostname if verbose, b=19215
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local HOST=$1
+ shift
+ local myPDSH=$PDSH
+
+ if [ "$HOST" = "$HOSTNAME" ]; then
+ myPDSH="no_dsh"
+ elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then
+ echo "cannot run remote command on $HOST with $myPDSH"
+ return 128
+ fi
+ if $VERBOSE && [[ -z "$quiet" ]]; then
+ echo "CMD: $HOST $@" >&2
+ $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
+ fi
if [[ "$myPDSH" == "rsh" ]] ||
[[ "$myPDSH" == *pdsh* && "$myPDSH" != *-S* ]]; then
return 0
fi
- if $verbose ; then
+ if [[ -n "$verbose" ]]; then
# print HOSTNAME for myPDSH="no_dsh"
if [[ $myPDSH = no_dsh ]]; then
$myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed -e "s/^/${HOSTNAME}: /"
return ${PIPESTATUS[0]}
}
-do_nodev() {
- do_node --verbose "$@"
-}
-
single_local_node () {
- [ "$1" = "$HOSTNAME" ]
+ [ "$1" = "$HOSTNAME" ]
}
# Outputs environment variable assignments that should be passed to remote nodes
}
do_nodes() {
- local verbose=false
- # do not stripe off hostname if verbose, bug 19215
- if [ x$1 = x--verbose ]; then
- shift
- verbose=true
- fi
+ local verbose
+ local quiet
- local rnodes=$1
- shift
+ # do not strip off hostname if verbose, b=19215
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
- if single_local_node $rnodes; then
- if $verbose; then
- do_nodev $rnodes "$@"
- else
- do_node $rnodes "$@"
- fi
- return $?
- fi
+ local rnodes=$1
+ shift
+
+ if single_local_node $rnodes; then
+ do_node $verbose $quiet $rnodes "$@"
+ return $?
+ fi
- # This is part from do_node
- local myPDSH=$PDSH
+ # This is part from do_node
+ local myPDSH=$PDSH
- [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && \
- echo "cannot run remote command on $rnodes with $myPDSH" && return 128
+ [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] &&
+ echo "cannot run remote command on $rnodes with $myPDSH" &&
+ return 128
- export FANOUT=$(get_node_count "${rnodes//,/ }")
- if $VERBOSE; then
- echo "CMD: $rnodes $@" >&2
- $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
- fi
+ export FANOUT=$(get_node_count "${rnodes//,/ }")
+ if $VERBOSE && [[ -z "$quiet" ]]; then
+ echo "CMD: $rnodes $@" >&2
+ $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
+ fi
- # do not replace anything from pdsh output if -N is used
- # -N Disable hostname: prefix on lines of output.
- if $verbose || [[ $myPDSH = *-N* ]]; then
- $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")"
- else
- $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g"
- fi
- return ${PIPESTATUS[0]}
+ # do not replace anything from pdsh output if -N is used
+ # -N Disable hostname: prefix on lines of output.
+ if [[ -n "$verbose" || $myPDSH = *-N* ]]; then
+ $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")"
+ else
+ $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g"
+ fi
+ return ${PIPESTATUS[0]}
}
##
#
# usage: do_facet $facet command [arg ...]
do_facet() {
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
local facet=$1
shift
- local HOST=$(facet_active_host $facet)
- [ -z $HOST ] && echo "No host defined for facet ${facet}" && exit 1
- do_node $HOST "$@"
+ local host=$(facet_active_host $facet)
+
+ [ -z "$host" ] && echo "No host defined for facet ${facet}" && exit 1
+ do_node $verbose $quiet $host "$@"
}
# Function: do_facet_random_file $FACET $FILE $SIZE
}
do_nodesv() {
- do_nodes --verbose "$@"
+ do_nodes --verbose "$@"
}
add() {
stop ${facet} -f
rm -f $TMP/${facet}active
[[ $facet = mds1 ]] && combined_mgs_mds && rm -f $TMP/mgsactive
+
+ # make sure in-tree ldiskfs is loaded before mkfs
+ if local_mode && [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then
+ load_module ../ldiskfs/ldiskfs
+ fi
+
do_facet ${facet} $MKFS $* || return ${PIPESTATUS[0]}
if [[ $(facet_fstype $facet) == zfs ]]; then
var=${type}_FS_MKFS_OPTS
fs_mkfs_opts+=${!var:+" ${!var}"}
+ [[ "$QUOTA_TYPE" =~ "p" ]] && fs_mkfs_opts+=" -O project"
+
[ $fstype == ldiskfs ] && fs_mkfs_opts=$(squash_opt $fs_mkfs_opts)
if [ -n "${fs_mkfs_opts## }" ]; then
TIMEOUT=$(do_facet $SINGLEMDS "lctl get_param -n timeout")
log "Using TIMEOUT=$TIMEOUT"
+ # tune down to speed up testing on (usually) small setups
+ local mgc_timeout=/sys/module/mgc/parameters/mgc_requeue_timeout_min
+ do_nodes $(comma_list $(nodes_list)) \
+ "[ -f $mgc_timeout ] && echo 1 > $mgc_timeout; exit 0"
+
osc_ensure_active $SINGLEMDS $TIMEOUT
osc_ensure_active client $TIMEOUT
$LCTL set_param osc.*.idle_timeout=debug
# $LFS quotaoff -ug $MOUNT > /dev/null 2>&1
fi
fi
+
+ (( MDS1_VERSION <= $(version_code 2.13.52) )) ||
+ do_nodes $(comma_list $(mdts_nodes)) \
+ "$LCTL set_param lod.*.mdt_hash=crush"
return 0
}
}
is_mounted () {
- local mntpt=$1
- [ -z $mntpt ] && return 1
- local mounted=$(mounted_lustre_filesystems)
+ local mntpt=$1
+ [ -z $mntpt ] && return 1
+ local mounted=$(mounted_lustre_filesystems)
- echo $mounted' ' | grep -w -q $mntpt' '
+ echo $mounted' ' | grep -w -q $mntpt' '
}
-is_empty_dir() {
- [ $(find $1 -maxdepth 1 -print | wc -l) = 1 ] && return 0
- return 1
+create_pools () {
+ local pool=$1
+ local ostsn=${2:-$OSTCOUNT}
+ local npools=${FS_NPOOLS:-$((OSTCOUNT / ostsn))}
+ local n
+
+ echo ostsn=$ostsn npools=$npools
+ if [[ $ostsn -gt $OSTCOUNT ]]; then
+ echo "request to use $ostsn OSTs in the pool, \
+ using max available OSTCOUNT=$OSTCOUNT"
+ ostsn=$OSTCOUNT
+ fi
+ for (( n=0; n < $npools; n++ )); do
+ p=${pool}$n
+ if ! $DELETE_OLD_POOLS; then
+ log "request to not delete old pools: $FSNAME.$p exist?"
+ if ! check_pool_not_exist $FSNAME.$p; then
+ echo "Using existing $FSNAME.$p"
+ $LCTL pool_list $FSNAME.$p
+ continue
+ fi
+ fi
+ create_pool $FSNAME.$p $KEEP_POOLS ||
+ error "create_pool $FSNAME.$p failed"
+
+ local first=$(( (n * ostsn) % OSTCOUNT ))
+ local last=$(( (first + ostsn - 1) % OSTCOUNT ))
+ if [[ $first -le $last ]]; then
+ pool_add_targets $p $first $last ||
+ error "pool_add_targets $p $first $last failed"
+ else
+ pool_add_targets $p $first $(( OSTCOUNT - 1 )) ||
+ error "pool_add_targets $p $first \
+ $(( OSTCOUNT - 1 )) failed"
+ pool_add_targets $p 0 $last ||
+ error "pool_add_targets $p 0 $last failed"
+ fi
+ done
}
-# empty lustre filesystem may have empty directories lost+found and .lustre
-is_empty_fs() {
- # exclude .lustre & lost+found
- [ $(find $1 -maxdepth 1 -name lost+found -o -name .lustre -prune -o \
- -print | wc -l) = 1 ] || return 1
- [ ! -d $1/lost+found ] || is_empty_dir $1/lost+found || return 1
- if [ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.4.0) ]; then
- # exclude .lustre/fid (LU-2780)
- [ $(find $1/.lustre -maxdepth 1 -name fid -prune -o \
- -print | wc -l) = 1 ] || return 1
- else
- [ ! -d $1/.lustre ] || is_empty_dir $1/.lustre || return 1
- fi
- return 0
+set_pools_quota () {
+ local u
+ local o
+ local p
+ local i
+ local j
+
+ [[ $ENABLE_QUOTA ]] || error "Required Pool Quotas: \
+ $POOLS_QUOTA_USERS_SET, but ENABLE_QUOTA not set!"
+
+ # POOLS_QUOTA_USERS_SET=
+ # "quota15_1:20M -- for all of the found pools
+ # quota15_2:1G:gpool0
+ # quota15_3 -- for global limit only
+ # quota15_4:200M:gpool0
+ # quota15_4:200M:gpool1"
+
+ declare -a pq_userset=(${POOLS_QUOTA_USERS_SET="mpiuser"})
+ declare -a pq_users
+ declare -A pq_limits
+
+ for ((i=0; i<${#pq_userset[@]}; i++)); do
+ u=${pq_userset[i]%%:*}
+ o=""
+ # user gets no pool limits if
+ # POOLS_QUOTA_USERS_SET does not specify it
+ [[ ${pq_userset[i]} =~ : ]] && o=${pq_userset[i]##$u:}
+ pq_limits[$u]+=" $o"
+ done
+ pq_users=(${!pq_limits[@]})
+
+ declare -a opts
+ local pool
+
+ for ((i=0; i<${#pq_users[@]}; i++)); do
+ u=${pq_users[i]}
+ # set to max limit (_u64)
+ $LFS setquota -u $u -B $((2**24 - 1))T $DIR
+ opts=(${pq_limits[$u]})
+ for ((j=0; j<${#opts[@]}; j++)); do
+ p=${opts[j]##*:}
+ o=${opts[j]%%:*}
+ # Set limit for all existing pools if
+ # no pool specified
+ if [ $p == $o ]; then
+ p=$(list_pool $FSNAME | sed "s/$FSNAME.//")
+ echo "No pool specified for $u,
+ set limit $o for all existing pools"
+ fi
+ for pool in $p; do
+ $LFS setquota -u $u -B $o --pool $pool $DIR ||
+ error "setquota -u $u -B $o \
+ --pool $pool failed"
+ done
+ done
+ $LFS quota -uv $u --pool $DIR
+ done
}
check_and_setup_lustre() {
export I_MOUNTED2=yes
fi
- if $do_check; then
- # FIXME: what to do if check_config failed?
- # i.e. if:
- # 1) remote client has mounted other Lustre fs?
- # 2) lustre is mounted on remote_clients atall ?
- check_config_clients $MOUNT
- init_facets_vars
- init_param_vars
+ if $do_check; then
+ # FIXME: what to do if check_config failed?
+ # i.e. if:
+ # 1) remote client has mounted other Lustre fs?
+ # 2) lustre is mounted on remote_clients atall ?
+ check_config_clients $MOUNT
+ init_facets_vars
+ init_param_vars
- set_default_debug_nodes $(comma_list $(nodes_list))
- fi
+ set_default_debug_nodes $(comma_list $(nodes_list))
+ set_params_clients
+ fi
if [ -z "$CLIENTONLY" -a $(lower $OSD_TRACK_DECLARES_LBUG) == 'yes' ]; then
local facets=""
fi
fi
+ if [ -n "$fs_STRIPEPARAMS" ]; then
+ setstripe_getstripe $MOUNT $fs_STRIPEPARAMS
+ fi
if $GSS_SK; then
set_flavor_all null
elif $GSS; then
set_flavor_all $SEC
fi
+ if $DELETE_OLD_POOLS; then
+ destroy_all_pools
+ fi
+ if [[ -n "$FS_POOL" ]]; then
+ create_pools $FS_POOL $FS_POOL_NOSTS
+ fi
+
+ if [[ -n "$POOLS_QUOTA_USERS_SET" ]]; then
+ set_pools_quota
+ fi
if [ "$ONLY" == "setup" ]; then
exit 0
fi
check_and_setup_lustre
}
-# Get all of the server target devices from a given server node and type.
-get_mnt_devs() {
- local node=$1
- local type=$2
- local devs
- local dev
-
- if [ "$type" == ost ]; then
- devs=$(get_osd_param $node "" mntdev)
- else
- devs=$(do_node $node $LCTL get_param -n osd-*.$FSNAME-M*.mntdev)
- fi
- for dev in $devs; do
- case $dev in
- *loop*) do_node $node "losetup $dev" | \
- sed -e "s/.*(//" -e "s/).*//" ;;
- *) echo $dev ;;
- esac
- done
-}
-
-# Get all of the server target devices.
-get_svr_devs() {
- local node
- local i
-
- # Master MDS parameters used by lfsck
- MDTNODE=$(facet_active_host $SINGLEMDS)
- MDTDEV=$(echo $(get_mnt_devs $MDTNODE mdt) | awk '{print $1}')
-
- # MDT devices
- i=0
- for node in $(mdts_nodes); do
- MDTDEVS[i]=$(get_mnt_devs $node mdt)
- i=$((i + 1))
- done
-
- # OST devices
- i=0
- for node in $(osts_nodes); do
- OSTDEVS[i]=$(get_mnt_devs $node ost)
- i=$((i + 1))
- done
-}
-
# Run e2fsck on MDT or OST device.
run_e2fsck() {
local node=$1
local log=$TMP/e2fsck.log
local rc=0
+ # turn on pfsck if it is supported
+ do_node $node $E2FSCK -h 2>&1 | grep -qw -- -m && cmd+=" -m8"
echo $cmd
do_node $node $cmd 2>&1 | tee $log
rc=${PIPESTATUS[0]}
[ "$host" = "$HOSTNAME" ] && return 0
- echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..."
- if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then
- echo "Network not available!"
+ if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep; then
+ echo "$(date +'%H:%M:%S (%s)') waited for $host network ${max}s"
exit 1
fi
-
- echo "$(date +'%H:%M:%S (%s)') network interface is UP"
}
no_dsh() {
at_get $1 at_max
}
-at_min_get() {
- at_get $1 at_min
-}
-
at_max_set() {
local at_max=$1
shift
default_lru_size()
{
- NR_CPU=$(grep -c "processor" /proc/cpuinfo)
- DEFAULT_LRU_SIZE=$((100 * NR_CPU))
- echo "$DEFAULT_LRU_SIZE"
+ local nr_cpu=$(grep -c "processor" /proc/cpuinfo)
+
+ echo $((100 * nr_cpu))
}
lru_resize_enable()
lru_resize_disable()
{
- lctl set_param ldlm.namespaces.*$1*.lru_size $(default_lru_size)
+ local dev=${1}
+ local lru_size=${2:-$(default_lru_size)}
+
+ $LCTL set_param ldlm.namespaces.*$dev*.lru_size=$lru_size
}
flock_is_enabled()
}
start_full_debug_logging() {
- debugsave
- debug_size_save
+ debugsave
+ debug_size_save
- local FULLDEBUG=-1
- local DEBUG_SIZE=150
+ local fulldebug=-1
+ local debug_size=150
+ local nodes=$(comma_list $(nodes_list))
- do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug_mb=$DEBUG_SIZE"
- do_nodes $(comma_list $(nodes_list)) "$LCTL set_param debug=$FULLDEBUG;"
+ do_nodes $nodes "$LCTL set_param debug=$fulldebug debug_mb=$debug_size"
}
stop_full_debug_logging() {
- debug_size_restore
- debugrestore
+ debug_size_restore
+ debugrestore
}
# prints bash call stack
exit_status () {
local status=0
- local log=$TESTSUITELOG
+ local logs="$TESTSUITELOG $1"
- [ -f "$log" ] && grep -qw FAIL $log && status=1
- exit $status
-}
+ for log in $logs; do
+ if [ -f "$log" ]; then
+ grep -qw FAIL $log && status=1
+ fi
+ done
+
+ exit $status
+}
error() {
report_error "$@"
[[ -n "$TESTSUITELOG" ]] &&
echo "$TESTSUITE: SKIP: $TESTNAME $@" >> $TESTSUITELOG || true
+ unset TESTNAME
}
skip() {
fi
done
- [ "$EXCEPT$ALWAYS_EXCEPT" ] && \
- log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`"
- [ "$EXCEPT_SLOW" ] && \
- log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`"
- for E in $EXCEPT; do
- eval EXCEPT_${E}=true
- done
- for E in $ALWAYS_EXCEPT; do
- eval EXCEPT_ALWAYS_${E}=true
- done
- for E in $EXCEPT_SLOW; do
- eval EXCEPT_SLOW_${E}=true
- done
- for G in $GRANT_CHECK_LIST; do
- eval GCHECK_ONLY_${G}=true
- done
+ [ "$EXCEPT$ALWAYS_EXCEPT" ] &&
+ log "excepting tests: `echo $EXCEPT $ALWAYS_EXCEPT`"
+ [ "$EXCEPT_SLOW" ] &&
+ log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`"
+ for E in $EXCEPT; do
+ eval EXCEPT_${E}=true
+ done
+ for E in $ALWAYS_EXCEPT; do
+ eval EXCEPT_ALWAYS_${E}=true
+ done
+ for E in $EXCEPT_SLOW; do
+ eval EXCEPT_SLOW_${E}=true
+ done
+ for G in $GRANT_CHECK_LIST; do
+ eval GCHECK_ONLY_${G}=true
+ done
}
basetest() {
export LAST_SKIPPED=
export ALWAYS_SKIPPED=
#
-# Main entry into test-framework. This is called with the name and
-# description of a test. The name is used to find the function to run
+# Main entry into test-framework. This is called with the number and
+# description of a test. The number is used to find the function to run
# the test using "test_$name".
#
# This supports a variety of methods of specifying specific test to
-# run or not run. These need to be documented...
+# run or not run:
+# - ONLY= env variable with space-separated list of test numbers to run
+# - EXCEPT= env variable with space-separated list of test numbers to exclude
#
run_test() {
assert_DIR
- export base=$(basetest $1)
- TESTNAME=test_$1
+ local testnum=$1
+ local testmsg=$2
+ export base=$(basetest $testnum)
+ export TESTNAME=test_$testnum
LAST_SKIPPED=
ALWAYS_SKIPPED=
# Check the EXCEPT, ALWAYS_EXCEPT and SLOW lists to see if we
# need to skip the current test. If so, set the ALWAYS_SKIPPED flag.
- local testname=EXCEPT_$1
- local testname_base=EXCEPT_$base
- if [ ${!testname}x != x ]; then
+ local isexcept=EXCEPT_$testnum
+ local isexcept_base=EXCEPT_$base
+ if [ ${!isexcept}x != x ]; then
ALWAYS_SKIPPED="y"
- skip_message="skipping excluded test $1"
- elif [ ${!testname_base}x != x ]; then
+ skip_message="skipping excluded test $testnum"
+ elif [ ${!isexcept_base}x != x ]; then
ALWAYS_SKIPPED="y"
- skip_message="skipping excluded test $1 (base $base)"
+ skip_message="skipping excluded test $testnum (base $base)"
fi
- testname=EXCEPT_ALWAYS_$1
- testname_base=EXCEPT_ALWAYS_$base
- if [ ${!testname}x != x ]; then
+ isexcept=EXCEPT_ALWAYS_$testnum
+ isexcept_base=EXCEPT_ALWAYS_$base
+ if [ ${!isexcept}x != x ]; then
ALWAYS_SKIPPED="y"
- skip_message="skipping ALWAYS excluded test $1"
- elif [ ${!testname_base}x != x ]; then
+ skip_message="skipping ALWAYS excluded test $testnum"
+ elif [ ${!isexcept_base}x != x ]; then
ALWAYS_SKIPPED="y"
- skip_message="skipping ALWAYS excluded test $1 (base $base)"
+ skip_message="skipping ALWAYS excluded test $testnum (base $base)"
fi
- testname=EXCEPT_SLOW_$1
- testname_base=EXCEPT_SLOW_$base
- if [ ${!testname}x != x ]; then
+ isexcept=EXCEPT_SLOW_$testnum
+ isexcept_base=EXCEPT_SLOW_$base
+ if [ ${!isexcept}x != x ]; then
ALWAYS_SKIPPED="y"
- skip_message="skipping SLOW test $1"
- elif [ ${!testname_base}x != x ]; then
+ skip_message="skipping SLOW test $testnum"
+ elif [ ${!isexcept_base}x != x ]; then
ALWAYS_SKIPPED="y"
- skip_message="skipping SLOW test $1 (base $base)"
+ skip_message="skipping SLOW test $testnum (base $base)"
fi
# If there are tests on the ONLY list, check if the current test
# is on that list and, if so, check if the test is to be skipped
# and if we are supposed to honor the skip lists.
if [ -n "$ONLY" ]; then
- testname=ONLY_$1
- testname_base=ONLY_$base
- if [[ ${!testname}x != x || ${!testname_base}x != x ]]; then
+ local isonly=ONLY_$testnum
+ local isonly_base=ONLY_$base
+ if [[ ${!isonly}x != x || ${!isonly_base}x != x ]]; then
if [[ -n "$ALWAYS_SKIPPED" && -n "$HONOR_EXCEPT" ]]; then
LAST_SKIPPED="y"
[ -n "$LAST_SKIPPED" ] &&
echo "" && LAST_SKIPPED=
ALWAYS_SKIPPED=
- run_one_logged $1 "$2"
+ run_one_logged $testnum "$testmsg"
return $?
fi
skip_noexit "$skip_message"
return 0
else
- run_one_logged $1 "$2"
+ run_one_logged $testnum "$testmsg"
return $?
fi
-
}
log() {
}
reset_fail_loc () {
- echo -n "Resetting fail_loc on all nodes..."
- do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \
- fail_val=0 2>/dev/null" || true
- echo done.
+ #echo -n "Resetting fail_loc on all nodes..."
+ do_nodes --quiet $(comma_list $(nodes_list)) \
+ "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true
+ #echo done.
}
# Also appends a timestamp and prepends the testsuite name.
#
-EQUALS="===================================================================================================="
+# ======================================================== 15:06:12 (1624050372)
+EQUALS="========================================================"
banner() {
msg="== ${TESTSUITE} $*"
last=${msg: -1:1}
ldiskfs_check_descriptors: Checksum for group 0 failed\|\
group descriptors corrupted"
- res=$(do_nodes $(comma_list $(nodes_list)) "dmesg" | grep "$errors")
+ res=$(do_nodes -q $(comma_list $(nodes_list)) "dmesg" | grep "$errors")
[ -z "$res" ] && return 0
echo "Kernel error detected: $res"
return 1
#
run_one() {
local testnum=$1
- local message=$2
- export tfile=f${testnum}.${TESTSUITE}
- export tdir=d${testnum}.${TESTSUITE}
- export TESTNAME=test_$testnum
+ local testmsg="$2"
local SAVE_UMASK=`umask`
umask 0022
$SETUP
fi
- banner "test $testnum: $message"
+ banner "test $testnum: $testmsg"
test_${testnum} || error "test_$testnum failed with $?"
cd $SAVE_PWD
reset_fail_loc
check_node_health
check_dmesg_for_errors || error "Error in dmesg detected"
if [ "$PARALLEL" != "yes" ]; then
- ps auxww | grep -v grep | grep -q multiop &&
+ ps auxww | grep -v grep | grep -q "multiop " &&
error "multiop still running"
fi
- unset TESTNAME
- unset tdir
- unset tfile
umask $SAVE_UMASK
$CLEANUP
return 0
# - test result is saved to data file
#
run_one_logged() {
- local BEFORE=$(date +%s)
- local TEST_ERROR
- local name=${TESTSUITE}.test_${1}.test_log.$(hostname -s).log
- local test_log=$LOGDIR/$name
- local zfs_log_name=${TESTSUITE}.test_${1}.zfs_log
- local zfs_debug_log=$LOGDIR/$zfs_log_name
- rm -rf $LOGDIR/err
- rm -rf $LOGDIR/ignore
- rm -rf $LOGDIR/skip
+ local before=$SECONDS
+ local testnum=$1
+ local testmsg=$2
+ export tfile=f${testnum}.${TESTSUITE}
+ export tdir=d${testnum}.${TESTSUITE}
+ local test_log=$TESTLOG_PREFIX.$TESTNAME.test_log.$(hostname -s).log
+ local zfs_debug_log=$TESTLOG_PREFIX.$TESTNAME.zfs_log
local SAVE_UMASK=$(umask)
+ local rc=0
umask 0022
+ rm -f $LOGDIR/err $LOGDIR/ignore $LOGDIR/skip
echo
- log_sub_test_begin test_${1}
- (run_one $1 "$2") 2>&1 | tee -i $test_log
- local RC=${PIPESTATUS[0]}
-
- [ $RC -ne 0 ] && [ ! -f $LOGDIR/err ] &&
- echo "test_$1 returned $RC" | tee $LOGDIR/err
-
- duration=$(($(date +%s) - $BEFORE))
- pass "$1" "(${duration}s)"
+ # if ${ONLY_$testnum} set, repeat $ONLY_REPEAT times, otherwise once
+ local isonly=ONLY_$testnum
+ local repeat=${!isonly:+$ONLY_REPEAT}
+
+ for testiter in $(seq ${repeat:-1}); do
+ local before_sub=$SECONDS
+ log_sub_test_begin $TESTNAME
+
+ # remove temp files between repetitions to avoid test failures
+ [ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] &&
+ rm -rvf $DIR/$tdir* $DIR/$tfile*
+ # loop around subshell so stack_trap EXIT triggers each time
+ (run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log
+ rc=${PIPESTATUS[0]}
+ local append=-a
+ local duration_sub=$((SECONDS - before_sub))
+ local test_error
+
+ [[ $rc != 0 && ! -f $LOGDIR/err ]] &&
+ echo "$TESTNAME returned $rc" | tee $LOGDIR/err
+
+ if [[ -f $LOGDIR/err ]]; then
+ test_error=$(cat $LOGDIR/err)
+ TEST_STATUS="FAIL"
+ elif [[ -f $LOGDIR/ignore ]]; then
+ test_error=$(cat $LOGDIR/ignore)
+ elif [[ -f $LOGDIR/skip ]]; then
+ test_error=$(cat $LOGDIR/skip)
+ TEST_STATUS="SKIP"
+ else
+ TEST_STATUS="PASS"
+ fi
- if [[ -f $LOGDIR/err ]]; then
- TEST_ERROR=$(cat $LOGDIR/err)
- elif [[ -f $LOGDIR/ignore ]]; then
- TEST_ERROR=$(cat $LOGDIR/ignore)
- elif [[ -f $LOGDIR/skip ]]; then
- TEST_ERROR=$(cat $LOGDIR/skip)
- fi
- log_sub_test_end $TEST_STATUS $duration "$RC" "$TEST_ERROR"
+ pass "$testnum" "(${duration_sub}s)"
+ log_sub_test_end $TEST_STATUS $duration_sub "$rc" "$test_error"
+ [[ $rc != 0 ]] && break
+ done
- if [[ "$TEST_STATUS" != "SKIP" ]] && [[ -f $TF_SKIP ]]; then
+ if [[ "$TEST_STATUS" != "SKIP" && -f $TF_SKIP ]]; then
rm -f $TF_SKIP
fi
if [ -f $LOGDIR/err ]; then
log_zfs_info "$zfs_debug_log"
- $FAIL_ON_ERROR && exit $RC
+ $FAIL_ON_ERROR && exit $rc
fi
umask $SAVE_UMASK
+ unset TESTNAME
+ unset tdir
+ unset tfile
+
return 0
}
log_sub_test_end "SKIP" "0" "0" "$@"
}
-canonical_path() {
- (cd $(dirname $1); echo $PWD/$(basename $1))
+grant_from_clients() {
+ local nodes="$1"
+
+ # get client grant
+ do_nodes $nodes "$LCTL get_param -n osc.${FSNAME}-*.cur_*grant_bytes" |
+ calc_sum
}
+grant_from_servers() {
+ local nodes="$1"
+
+ # get server grant
+ # which is tot_granted less grant_precreate
+ do_nodes $nodes "$LCTL get_param obdfilter.${FSNAME}-OST*.tot_granted" \
+ " obdfilter.${FSNAME}-OST*.tot_pending" \
+ " obdfilter.${FSNAME}-OST*.grant_precreate" |
+ tr '=' ' ' | awk '/tot_granted/{ total += $2 };
+ /tot_pending/{ total -= $2 };
+ /grant_precreate/{ total -= $2 };
+ END { printf("%0.0f", total) }'
+}
check_grant() {
export base=$(basetest $1)
[ "$CHECK_GRANT" == "no" ] && return 0
- testnamebase=GCHECK_ONLY_${base}
- testname=GCHECK_ONLY_$1
- [ ${!testnamebase}x == x -a ${!testname}x == x ] && return 0
+ local isonly_base=GCHECK_ONLY_${base}
+ local isonly=GCHECK_ONLY_$1
+ [ ${!isonly_base}x == x -a ${!isonly}x == x ] && return 0
echo -n "checking grant......"
+ local osts=$(comma_list $(osts_nodes))
local clients=$CLIENTS
[ -z "$clients" ] && clients=$(hostname)
clients_up # initiate all idling connections
# get client grant
- client_grant=$(do_nodes $clients \
- "$LCTL get_param -n osc.${FSNAME}-*.cur_*grant_bytes" |
- awk '{ total += $1 } END { printf("%0.0f", total) }')
+ cli_grant=$(grant_from_clients $clients)
# get server grant
# which is tot_granted less grant_precreate
- server_grant=$(do_nodes $(comma_list $(osts_nodes)) \
- "$LCTL get_param "\
- "obdfilter.${FSNAME}-OST*.{tot_granted,tot_pending,grant_precreate}" |
- sed 's/=/ /'| awk '/tot_granted/{ total += $2 };
- /tot_pending/{ total -= $2 };
- /grant_precreate/{ total -= $2 };
- END { printf("%0.0f", total) }')
+ srv_grant=$(grant_from_servers $osts)
+ count=0
# check whether client grant == server grant
- if [[ $client_grant -ne $server_grant ]]; then
+ while [[ $cli_grant != $srv_grant && count++ -lt 30 ]]; do
+ echo "wait for client:$cli_grant == server:$srv_grant"
+ sleep 1
+ cli_grant=$(grant_from_clients $clients)
+ srv_grant=$(grant_from_servers $osts)
+ done
+ if [[ $cli_grant -ne $srv_grant ]]; then
do_nodes $(comma_list $(osts_nodes)) \
"$LCTL get_param obdfilter.${FSNAME}-OST*.tot*" \
- "obdfilter.${FSNAME}-OST*.grant_*"
+ "obdfilter.${FSNAME}-OST*.grant_*"
do_nodes $clients "$LCTL get_param osc.${FSNAME}-*.cur_*_bytes"
- error "failed: client:${client_grant} server: ${server_grant}."
+ error "failed grant check: client:$cli_grant server:$srv_grant"
else
- echo "pass: client:${client_grant} server: ${server_grant}"
+ echo "pass grant check: client:$cli_grant server:$srv_grant"
fi
}
echo ${uuid/_UUID/}
}
+mdtname_from_index() {
+ local uuid=$(mdtuuid_from_index $1)
+ echo ${uuid/_UUID/}
+}
+
+mdssize_from_index () {
+ local mdt=$(mdtname_from_index $2)
+ $LFS df $1 | grep $mdt | awk '{ print $2 }'
+}
+
index_from_ostuuid()
{
$LFS osts $2 | sed -ne "/${1}/s/\(.*\): .* .*$/\1/p"
# Description:
# Returns list of ip addresses for each interface
local_addr_list() {
- ip addr | awk '/inet\ / {print $2}' | awk -F\/ '{print $1}'
+ ip addr | awk '/inet / {print $2}' | awk -F/ '{print $1}'
}
is_local_addr() {
echo -n $(facets_nodes $(get_facets OST))
}
-# Get all of the active AGT (HSM agent) nodes.
-agts_nodes () {
- echo -n $(facets_nodes $(get_facets AGT))
-}
-
# Get all of the client nodes and active server nodes.
nodes_list () {
local nodes=$HOSTNAME
echo $nodes | wc -w || true
}
-mixed_ost_devs () {
- local nodes=$(osts_nodes)
- local osscount=$(get_node_count "$nodes")
- [ ! "$OSTCOUNT" = "$osscount" ]
-}
-
mixed_mdt_devs () {
local nodes=$(mdts_nodes)
local mdtcount=$(get_node_count "$nodes")
rm -f $file
}
-setstripe_nfsserver () {
- local dir=$1
- local nfsexportdir=$2
- shift
- shift
-
- local -a nfsexport=($(awk '"'$dir'" ~ $2 && $3 ~ "nfs" && $2 != "/" \
- { print $1 }' /proc/mounts | cut -f 1 -d :))
-
- # check that only one nfs mounted
- [[ -z $nfsexport ]] && echo "$dir is not nfs mounted" && return 1
- (( ${#nfsexport[@]} == 1 )) ||
- error "several nfs mounts found for $dir: ${nfsexport[@]} !"
-
- do_nodev ${nfsexport[0]} lfs setstripe $nfsexportdir "$@"
-}
-
# Check and add a test group.
add_group() {
local group_id=$1
}
check_runas_id_ret() {
- local myRC=0
- local myRUNAS_UID=$1
- local myRUNAS_GID=$2
- shift 2
- local myRUNAS=$@
- if [ -z "$myRUNAS" ]; then
- error_exit "myRUNAS command must be specified for check_runas_id"
- fi
- if $GSS_KRB5; then
- $myRUNAS krb5_login.sh || \
- error "Failed to refresh Kerberos V5 TGT for UID $myRUNAS_ID."
- fi
- mkdir $DIR/d0_runas_test
- chmod 0755 $DIR
- chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test
- $myRUNAS touch $DIR/d0_runas_test/f$$ || myRC=$?
- rm -rf $DIR/d0_runas_test
- return $myRC
+ local myRC=0
+ local myRUNAS_UID=$1
+ local myRUNAS_GID=$2
+ shift 2
+ local myRUNAS=$@
+
+ if [ -z "$myRUNAS" ]; then
+ error_exit "check_runas_id_ret requires myRUNAS argument"
+ fi
+
+ $myRUNAS true ||
+ error "Unable to execute $myRUNAS"
+
+ id $myRUNAS_UID > /dev/null ||
+ error "Invalid RUNAS_ID $myRUNAS_UID. Please set RUNAS_ID to " \
+ "some UID which exists on MDS and client or add user " \
+ "$myRUNAS_UID:$myRUNAS_GID on these nodes."
+
+ if $GSS_KRB5; then
+ $myRUNAS krb5_login.sh ||
+ error "Failed to refresh krb5 TGT for UID $myRUNAS_ID."
+ fi
+ mkdir $DIR/d0_runas_test
+ chmod 0755 $DIR
+ chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test
+ $myRUNAS -u $myRUNAS_UID -g $myRUNAS_GID touch $DIR/d0_runas_test/f$$ ||
+ myRC=$?
+ rm -rf $DIR/d0_runas_test
+ return $myRC
}
check_runas_id() {
- local myRUNAS_UID=$1
- local myRUNAS_GID=$2
- shift 2
- local myRUNAS=$@
- check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \
- error "unable to write to $DIR/d0_runas_test as UID $myRUNAS_UID.
- Please set RUNAS_ID to some UID which exists on MDS and client or
- add user $myRUNAS_UID:$myRUNAS_GID on these nodes."
+ local myRUNAS_UID=$1
+ local myRUNAS_GID=$2
+ shift 2
+ local myRUNAS=$@
+
+ check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \
+ error "unable to write to $DIR/d0_runas_test as " \
+ "UID $myRUNAS_UID."
}
# obtain the UID/GID for MPI_USER
}
do_and_time () {
- local cmd=$1
- local rc
-
- SECONDS=0
- eval '$cmd'
+ local cmd="$1"
+ local start
+ local rc
- [ ${PIPESTATUS[0]} -eq 0 ] || rc=1
+ start=$SECONDS
+ eval '$cmd'
+ [ ${PIPESTATUS[0]} -eq 0 ] || rc=1
- echo $SECONDS
- return $rc
+ echo $((SECONDS - start))
+ return $rc
}
inodes_available () {
check_node_health() {
local nodes=${1:-$(comma_list $(nodes_list))}
-
- for node in ${nodes//,/ }; do
- check_network "$node" 5
- if [ $? -eq 0 ]; then
- do_node $node "rc=0;
- val=\\\$($LCTL get_param -n catastrophe 2>&1);
- if [[ \\\$? -eq 0 && \\\$val -ne 0 ]]; then
- echo \\\$(hostname -s): \\\$val;
- rc=\\\$val;
- fi;
- exit \\\$rc" || error "$node:LBUG/LASSERT detected"
- fi
- done
+ local health=$TMP/node_health.$$
+
+ do_nodes -q $nodes "$LCTL get_param catastrophe 2>&1" | tee $health |
+ grep "catastrophe=1" && error "LBUG/LASSERT detected"
+ # Only check/report network health if get_param isn't reported, since
+ # *clearly* the network is working if get_param returned something.
+ if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then
+ for node in ${nodes//,/}; do
+ check_network $node 5
+ done
+ fi
+ rm -f $health
}
mdsrate_cleanup () {
fi
}
-delayed_recovery_enabled () {
- local var=${SINGLEMDS}_svc
- do_facet $SINGLEMDS lctl get_param -n mdd.${!var}.stale_export_age > /dev/null 2>&1
-}
-
########################
convert_facet2label() {
echo "${1}-osc-[-0-9a-f]*"
}
-# If the 2.0 MDS was mounted on 1.8 device, then the OSC and LOV names
-# used by MDT would not be changed.
-# mdt lov: fsname-mdtlov
-# mdt osc: fsname-OSTXXXX-osc
-mds_on_old_device() {
- local mds=${1:-"$SINGLEMDS"}
-
- if [ $(lustre_version_code $mds) -gt $(version_code 1.9.0) ]; then
- do_facet $mds "lctl list_param osc.$FSNAME-OST*-osc \
- > /dev/null 2>&1" && return 0
- fi
- return 1
-}
-
get_mdtosc_proc_path() {
local mds_facet=$1
local ost_label=${2:-"*OST*"}
params=$param
fi
+ local plist=$(comma_list $params)
if ! do_rpc_nodes "$(facet_active_host $facet)" \
- wait_import_state $expected "$params" $maxtime; then
+ wait_import_state $expected $plist $maxtime; then
error "$facet: import is not in $expected state after $maxtime"
return 1
fi
params=$($LCTL list_param $param 2>/dev/null || true)
done
fi
+ local plist=$(comma_list $params)
if ! do_rpc_nodes "$(facet_active_host $facet)" \
- wait_import_state $expected "$params" $maxtime \
+ wait_import_state $expected $plist $maxtime \
$error_on_failure; then
if [ $error_on_failure -ne 0 ]; then
error "import is not in ${expected} state"
create_pool() {
local fsname=${1%%.*}
local poolname=${1##$fsname.}
+ local keep_pools=${2:-false}
stack_trap "destroy_test_pools $fsname" EXIT
do_facet mgs lctl pool_new $1
wait_update $HOSTNAME "lctl get_param -n lov.$fsname-*.pools.$poolname \
2>/dev/null || echo foo" "" || error "pool_new failed $1"
- add_pool_to_list $1
+ $keep_pools || add_pool_to_list $1
return $RC
}
local poolname=${1##$fsname.}
local listvar=${fsname}_CREATED_POOLS
- local temp=${listvar}=$(exclude_items_from_list ${!listvar} $poolname)
+ local temp=${listvar}=$(exclude_items_from_list "${!listvar}" $poolname)
eval export $temp
}
+# cleanup all pools exist on $FSNAME
+destroy_all_pools () {
+ local i
+ for i in $(list_pool $FSNAME); do
+ destroy_pool $i
+ done
+}
+
destroy_pool_int() {
local ost
local OSTS=$(list_pool $1)
local RC
- check_pool_not_exist $fsname.$poolname
- [[ $? -eq 0 ]] && return 0
+ check_pool_not_exist $fsname.$poolname && return 0 || true
destroy_pool_int $fsname.$poolname
RC=$?
}
gather_logs () {
- local list=$1
+ local list=$1
- local ts=$(date +%s)
- local docp=true
+ local ts=$(date +%s)
+ local docp=true
- if [[ ! -f "$YAML_LOG" ]]; then
- # init_logging is not performed before gather_logs,
- # so the $LOGDIR needs to be checked here
- check_shared_dir $LOGDIR && touch $LOGDIR/shared
- fi
+ if [[ ! -f "$YAML_LOG" ]]; then
+ # init_logging is not performed before gather_logs,
+ # so the $LOGDIR needs to be checked here
+ check_shared_dir $LOGDIR && touch $LOGDIR/shared
+ fi
- [ -f $LOGDIR/shared ] && docp=false
+ [ -f $LOGDIR/shared ] && docp=false
- # dump lustre logs, dmesg
+ # dump lustre logs, dmesg, and journal if GSS_SK=true
- prefix="$TESTLOG_PREFIX.$TESTNAME"
- suffix="$ts.log"
- echo "Dumping lctl log to ${prefix}.*.${suffix}"
+ prefix="$TESTLOG_PREFIX.$TESTNAME"
+ suffix="$ts.log"
+ echo "Dumping lctl log to ${prefix}.*.${suffix}"
- if [ -n "$CLIENTONLY" -o "$PDSH" == "no_dsh" ]; then
- echo "Dumping logs only on local client."
- $LCTL dk > ${prefix}.debug_log.$(hostname -s).${suffix}
- dmesg > ${prefix}.dmesg.$(hostname -s).${suffix}
- return
- fi
+ if [ -n "$CLIENTONLY" -o "$PDSH" == "no_dsh" ]; then
+ echo "Dumping logs only on local client."
+ $LCTL dk > ${prefix}.debug_log.$(hostname -s).${suffix}
+ dmesg > ${prefix}.dmesg.$(hostname -s).${suffix}
+ [ "$SHARED_KEY" = true ] && find $SK_PATH -name '*.key' -exec \
+ lgss_sk -r {} \; &> \
+ ${prefix}.ssk_keys.$(hostname -s).${suffix}
+ [ "$SHARED_KEY" = true ] && lctl get_param 'nodemap.*.*' > \
+ ${prefix}.nodemaps.$(hostname -s).${suffix}
+ [ "$GSS_SK" = true ] && keyctl show > \
+ ${prefix}.keyring.$(hostname -s).${suffix}
+ [ "$GSS_SK" = true ] && journalctl -a > \
+ ${prefix}.journal.$(hostname -s).${suffix}
+ return
+ fi
- do_nodesv $list \
- "$LCTL dk > ${prefix}.debug_log.\\\$(hostname -s).${suffix};
- dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}"
+ do_nodesv $list \
+ "$LCTL dk > ${prefix}.debug_log.\\\$(hostname -s).${suffix};
+ dmesg > ${prefix}.dmesg.\\\$(hostname -s).${suffix}"
+ if [ "$SHARED_KEY" = true ]; then
+ do_nodesv $list "find $SK_PATH -name '*.key' -exec \
+ lgss_sk -r {} \; &> \
+ ${prefix}.ssk_keys.\\\$(hostname -s).${suffix}"
+ do_facet mds1 "lctl get_param 'nodemap.*.*' > \
+ ${prefix}.nodemaps.\\\$(hostname -s).${suffix}"
+ fi
+ if [ "$GSS_SK" = true ]; then
+ do_nodesv $list "keyctl show > \
+ ${prefix}.keyring.\\\$(hostname -s).${suffix}"
+ do_nodesv $list "journalctl -a > \
+ ${prefix}.journal.\\\$(hostname -s).${suffix}"
+ fi
- if [ ! -f $LOGDIR/shared ]; then
- do_nodes $list rsync -az "${prefix}.*.${suffix}" $HOSTNAME:$LOGDIR
- fi
+ if [ ! -f $LOGDIR/shared ]; then
+ local remote_nodes=$(exclude_items_from_list $list $HOSTNAME)
+
+ for node in ${remote_nodes//,/ }; do
+ rsync -az -e ssh $node:${prefix}.'*'.${suffix} $LOGDIR &
+ done
+ fi
}
do_ls () {
umask $save_umask
- # If modules are not yet loaded then older "lctl lustre_build_version"
- # will fail. Use lctl build version instead.
- log "Client: $($LCTL lustre_build_version)"
- log "MDS: $(do_facet $SINGLEMDS $LCTL lustre_build_version 2>/dev/null||
- do_facet $SINGLEMDS $LCTL --version)"
- log "OSS: $(do_facet ost1 $LCTL lustre_build_version 2> /dev/null ||
- do_facet ost1 $LCTL --version)"
+ # log actual client and server versions if needed for debugging
+ log "Client: $(lustre_build_version client)"
+ log "MDS: $(lustre_build_version mds1)"
+ log "OSS: $(lustre_build_version ost1)"
}
log_test() {
llverfs $partial_arg $llverfs_opts $dir
}
-#Remove objects from OST
-remove_ost_objects() {
- local facet=$1
- local ostdev=$2
- local group=$3
- shift 3
- local objids="$@"
- local mntpt=$(facet_mntpt $facet)
- local opts=$OST_MOUNT_OPTS
- local i
- local rc
-
- echo "removing objects from $ostdev on $facet: $objids"
- if ! test -b $ostdev; then
- opts=$(csa_add "$opts" -o loop)
- fi
- mount -t $(facet_fstype $facet) $opts $ostdev $mntpt ||
- return $?
- rc=0
- for i in $objids; do
- rm $mntpt/O/$group/d$((i % 32))/$i || { rc=$?; break; }
- done
- umount -f $mntpt || return $?
- return $rc
-}
-
-#Remove files from MDT
-remove_mdt_files() {
- local facet=$1
- local mdtdev=$2
- shift 2
- local files="$@"
- local mntpt=$(facet_mntpt $facet)
- local opts=$MDS_MOUNT_OPTS
-
- echo "removing files from $mdtdev on $facet: $files"
- if [ $(facet_fstype $facet) == ldiskfs ] &&
- ! do_facet $facet test -b $mdtdev; then
- opts=$(csa_add "$opts" -o loop)
- fi
- mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt ||
- return $?
- rc=0
- for f in $files; do
- rm $mntpt/ROOT/$f || { rc=$?; break; }
- done
- umount -f $mntpt || return $?
- return $rc
-}
-
-duplicate_mdt_files() {
- local facet=$1
- local mdtdev=$2
- shift 2
- local files="$@"
- local mntpt=$(facet_mntpt $facet)
- local opts=$MDS_MOUNT_OPTS
-
- echo "duplicating files on $mdtdev on $facet: $files"
- mkdir -p $mntpt || return $?
- if [ $(facet_fstype $facet) == ldiskfs ] &&
- ! do_facet $facet test -b $mdtdev; then
- opts=$(csa_add "$opts" -o loop)
- fi
- mount -t $(facet_fstype $facet) $opts $mdtdev $mntpt ||
- return $?
-
- do_umount() {
- trap 0
- popd > /dev/null
- rm $tmp
- umount -f $mntpt
- }
- trap do_umount EXIT
-
- tmp=$(mktemp $TMP/setfattr.XXXXXXXXXX)
- pushd $mntpt/ROOT > /dev/null || return $?
- rc=0
- for f in $files; do
- touch $f.bad || return $?
- getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $tmp
- rc=${PIPESTATUS[0]}
- [ $rc -eq 0 ] || return $rc
- setfattr --restore $tmp || return $?
- done
- do_umount
-}
-
run_sgpdd () {
local devs=${1//,/ }
shift
echo -n ${count:-0}
}
-# Get the block size of the filesystem.
-get_block_size() {
- local facet=$1
- local device=$2
- local size
-
- [ -z "$CLIENTONLY" ] && size=$(do_facet $facet "$DUMPE2FS -h $device 2>&1" |
- awk '/^Block size:/ {print $3}')
- echo -n ${size:-0}
-}
-
# Check whether the "ea_inode" feature is enabled or not, to allow
# ldiskfs xattrs over one block in size. Allow both the historical
# Lustre feature name (large_xattr) and the upstream name (ea_inode).
local rcmd="do_facet $facet"
local metaea=${TMP}/backup_restore.ea
local metadata=${TMP}/backup_restore.tgz
- local opts=${MDS_MOUNT_OPTS}
+ local opts=${MDS_MOUNT_FS_OPTS}
local svc=${facet}_svc
if ! ${rcmd} test -b ${devname}; then
local devname=$(mdsdevname $(facet_number $facet))
local mntpt=$(facet_mntpt brpt)
local rcmd="do_facet $facet"
- local opts=${MDS_MOUNT_OPTS}
+ local opts=${MDS_MOUNT_FS_OPTS}
if ! ${rcmd} test -b ${devname}; then
opts=$(csa_add "$opts" -o loop)
test_mkdir() {
local path
local p_option
+ local hash_type
+ local hash_name=("all_char" "fnv_1a_64" "crush")
local dirstripe_count=${DIRSTRIPE_COUNT:-"2"}
local dirstripe_index=${DIRSTRIPE_INDEX:-$((base % $MDSCOUNT))}
local OPTIND=1
- while getopts "c:i:p" opt; do
+ while getopts "c:H:i:p" opt; do
case $opt in
c) dirstripe_count=$OPTARG;;
+ H) hash_type=$OPTARG;;
i) dirstripe_index=$OPTARG;;
p) p_option="-p";;
- \?) error "only support -i -c -p";;
+ \?) error "only support -c -H -i -p";;
esac
done
[ $# -eq 1 ] || error "Only creating single directory is supported"
path="$*"
+ local parent=$(dirname $path)
if [ "$p_option" == "-p" ]; then
- local parent=$(dirname $path)
-
[ -d $path ] && return 0
if [ ! -d ${parent} ]; then
mkdir -p ${parent} ||
fi
fi
- if [ $MDSCOUNT -le 1 ]; then
+ if [ $MDSCOUNT -le 1 ] || ! is_lustre ${parent}; then
mkdir $path || error "mkdir '$path' failed"
else
local mdt_index
mdt_index=$dirstripe_index
fi
+ # randomly choose hash type
+ [ -z "$hash_type" ] &&
+ hash_type=${hash_name[$((RANDOM % ${#hash_name[@]}))]}
+
if (($MDS1_VERSION >= $(version_code 2.8.0))); then
if [ $dirstripe_count -eq -1 ]; then
dirstripe_count=$((RANDOM % MDSCOUNT + 1))
dirstripe_count=1
fi
- echo "striped dir -i$mdt_index -c$dirstripe_count $path"
- $LFS mkdir -i$mdt_index -c$dirstripe_count $path ||
- error "mkdir -i $mdt_index -c$dirstripe_count $path failed"
+ echo "striped dir -i$mdt_index -c$dirstripe_count -H $hash_type $path"
+ $LFS mkdir -i$mdt_index -c$dirstripe_count -H $hash_type $path ||
+ error "mkdir -i $mdt_index -c$dirstripe_count -H $hash_type $path failed"
fi
}
is_mounted $MOUNT || setupall
rm -rf $DIR/[df][0-9]* || error "Fail to cleanup the env!"
- mkdir $DIR/$tdir || error "Fail to mkdir $DIR/$tdir."
+ mkdir_on_mdt0 $DIR/$tdir || error "Fail to mkdir $DIR/$tdir."
for idx in $(seq $MDSCOUNT); do
local name="MDT$(printf '%04x' $((idx - 1)))"
rm -rf $MOUNT/.lustre/lost+found/$name/*
local file=$1
local pool=$2
local tlist="$3"
- local res=$($GETSTRIPE $file | grep 0x | cut -f2)
+ local res=$($LFS getstripe $file | grep 0x | cut -f2)
for i in $res
do
for t in $tlist ; do
fi
local t=$(for i in $list; do printf "$FSNAME-OST%04x_UUID " $i; done)
+ local tg=$(for i in $list;
+ do printf -- "-e $FSNAME-OST%04x_UUID " $i; done)
+ local firstx=$(printf "%04x" $first)
+ local lastx=$(printf "%04x" $last)
+
do_facet mgs $LCTL pool_add \
- $FSNAME.$pool $FSNAME-OST[$first-$last/$step]
+ $FSNAME.$pool $FSNAME-OST[$firstx-$lastx/$step]
+ # ignore EEXIST(17)
+ if (( $? != 0 && $? != 17 )); then
+ error_noexit "pool_add $FSNAME-OST[$firstx-$lastx/$step] failed"
+ return 3
+ fi
# wait for OSTs to be added to the pool
for mds_id in $(seq $MDSCOUNT); do
local lodname=$FSNAME-MDT$(printf "%04x" $mdt_id)-mdtlov
wait_update_facet mds$mds_id \
"lctl get_param -n lod.$lodname.pools.$pool |
- sort -u | tr '\n' ' ' " "$t" || {
+ grep $tg | sort -u | tr '\n' ' '" "$t" || {
error_noexit "mds$mds_id: Add to pool failed"
- return 3
+ return 2
}
done
- wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool \
- | sort -u | tr '\n' ' ' " "$t" || {
+ wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool |
+ grep $tg | sort -u | tr '\n' ' ' " "$t" || {
error_noexit "Add to pool failed"
return 1
}
- local lfscount=$($LFS pool_list $FSNAME.$pool | grep -c "\-OST")
- local addcount=$(((last - first) / step + 1))
- [ $lfscount -eq $addcount ] || {
- error_noexit "lfs pool_list bad ost count" \
- "$lfscount != $addcount"
- return 2
- }
}
pool_set_dir() {
local tdir=$2
echo "Setting pool on directory $tdir"
- $SETSTRIPE -c 2 -p $pool $tdir && return 0
+ $LFS setstripe -c 2 -p $pool $tdir && return 0
error_noexit "Cannot set pool $pool to $tdir"
return 1
local tdir=$2
echo "Checking pool on directory $tdir"
- local res=$($GETSTRIPE --pool $tdir | sed "s/\s*$//")
+ local res=$($LFS getstripe --pool $tdir | sed "s/\s*$//")
[ "$res" = "$pool" ] && return 0
error_noexit "Pool on '$tdir' is '$res', not '$pool'"
for i in $(seq -w 1 $count)
do
local file=$tdir/spoo-$i
- $SETSTRIPE -p $pool $file
+ $LFS setstripe -p $pool $file
check_file_in_pool $file $pool "$tlist" || \
failed=$((failed + 1))
done
mkdir -p $tdir ||
{ error_noexit "unable to create $tdir"; return 1 ; }
local file="/..$tdir/$tfile-1"
- $SETSTRIPE -p $pool $file ||
+ $LFS setstripe -p $pool $file ||
{ error_noexit "unable to create $file" ; return 2 ; }
cd $tdir
- $SETSTRIPE -p $pool $tfile-2 || {
+ $LFS setstripe -p $pool $tfile-2 || {
error_noexit "unable to create $tfile-2 in $tdir"
return 3
}
pool_remove_first_target() {
echo "Removing first target from a pool"
+ pool_remove_target $1 -1
+}
+
+pool_remove_target() {
local pool=$1
+ local index=$2
local pname="lov.$FSNAME-*.pools.$pool"
- local t=$($LCTL get_param -n $pname | head -1)
+ if [ $index -eq -1 ]; then
+ local t=$($LCTL get_param -n $pname | head -1)
+ else
+ local t=$(printf "$FSNAME-OST%04x_UUID" $index)
+ fi
+
+ echo "Removing $t from $pool"
do_facet mgs $LCTL pool_remove $FSNAME.$pool $t
for mds_id in $(seq $MDSCOUNT); do
local mdt_id=$((mds_id-1))
return 2
}
# setstripe on an empty pool should fail
- $SETSTRIPE -p $pool $file 2>/dev/null && {
+ $LFS setstripe -p $pool $file 2>/dev/null && {
error_noexit "expected failure when creating file" \
"with empty pool"
return 3
return 1
}
# setstripe on an empty pool should fail
- $SETSTRIPE -p $pool $file 2>/dev/null && {
+ $LFS setstripe -p $pool $file 2>/dev/null && {
error_noexit "expected failure when creating file" \
"with missing pool"
return 2
[[ -z "$file" || -z "$expected" ]] &&
error "check_obdidx: invalid argument!"
- obdidx=$(comma_list $($GETSTRIPE $file | grep -A $OSTCOUNT obdidx |
+ obdidx=$(comma_list $($LFS getstripe $file | grep -A $OSTCOUNT obdidx |
grep -v obdidx | awk '{print $1}' | xargs))
[[ $obdidx = $expected ]] ||
[[ -z "$file" || -z "$expected" ]] &&
error "check_start_ost_idx: invalid argument!"
- start_ost_idx=$($GETSTRIPE $file | grep -A 1 obdidx | grep -v obdidx |
- awk '{print $1}')
+ start_ost_idx=$($LFS getstripe $file | grep -A 1 obdidx |
+ grep -v obdidx | awk '{print $1}')
[[ $start_ost_idx = $expected ]] ||
error "OST index of the first stripe on $file is" \
local rc=0
for osc in $oscs; do
- ((rc++))
echo "Check state for $osc"
local evicted=$(do_facet client $LCTL get_param osc.$osc.state |
- tail -n 3 | awk -F"[ [,]" \
- '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+ tail -n 5 | awk -F"[ ,]" \
+ '/EVICTED/ { if (mx<$4) { mx=$4; } } END { print mx }')
if (($? == 0)) && (($evicted > $before)); then
echo "$osc is evicted at $evicted"
- ((rc--))
+ else
+ ((rc++))
+ echo "$osc was not evicted after $before:"
+ do_facet client $LCTL get_param osc.$osc.state |
+ tail -n 8
fi
done
error "$mdt: changelog_mask=+hsm failed: $?"
local cl_user
- cl_user=$(do_facet $facet \
- $LCTL --device $mdt changelog_register -n) ||
+ cl_user=$(do_facet $facet $LCTL --device $mdt \
+ changelog_register -n $@) ||
error "$mdt: register changelog user failed: $?"
stack_trap "__changelog_deregister $facet $cl_user" EXIT
# so reorder to get same order than in changelog_register()
local cl_facets=$(echo "${!CL_USERS[@]}" | tr " " "\n" | sort |
tr "\n" " ")
+ local cl_user
for facet in $cl_facets; do
for cl_user in ${CL_USERS[$facet]}; do
}
changelog_dump() {
+ local rc
+
for M in $(seq $MDSCOUNT); do
local facet=mds$M
local mdt="$(facet_svc $facet)"
-
- $LFS changelog $mdt | sed -e 's/^/'$mdt'./'
+ local output
+ local ret
+
+ output=$($LFS changelog $mdt)
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ rc=${rc:-$ret}
+ elif [ -n "$output" ]; then
+ echo "$output" | sed -e 's/^/'$mdt'./'
+ fi
done
+
+ return ${rc:-0}
}
changelog_extract_field() {
is_project_quota_supported() {
$ENABLE_PROJECT_QUOTAS || return 1
- [ "$(facet_fstype $SINGLEMDS)" == "ldiskfs" ] &&
- [ $(lustre_version_code $SINGLEMDS) -gt \
- $(version_code 2.9.55) ] &&
- lfs --help | grep project >&/dev/null &&
- egrep -q "7." /etc/redhat-release && return 0
- if [ "$(facet_fstype $SINGLEMDS)" == "zfs" ]; then
- [ $(lustre_version_code $SINGLEMDS) -le \
- $(version_code 2.10.53) ] && return 1
+ [[ "$(facet_fstype $SINGLEMDS)" == "ldiskfs" &&
+ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.9.55) ]] &&
+ do_facet mds1 lfs --help |& grep -q project && return 0
- do_facet mds1 $ZPOOL upgrade -v |
- grep project_quota && return 0
- fi
+ [[ "$(facet_fstype $SINGLEMDS)" == "zfs" &&
+ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.10.53) ]] &&
+ do_facet mds1 $ZPOOL get all | grep -q project_quota && return 0
return 1
}
+# ZFS project quota enable/disable:
+# This feature will become active as soon as it is enabled and will never
+# return to being disabled. Each filesystem will be upgraded automatically
+# when remounted or when [a] new file is created under that filesystem. The
+# upgrade can also be triggered on filesystems via `zfs set version=current
+# <pool/fs>`. The upgrade process runs in the background and may take a
+# while to complete for the filesystems containing a large number of files.
enable_project_quota() {
is_project_quota_supported || return 0
- [ "$(facet_fstype $SINGLEMDS)" != "ldiskfs" ] && return 0
+ local zkeeper=${KEEP_ZPOOL}
+ stack_trap "KEEP_ZPOOL=$zkeeper" EXIT
+ KEEP_ZPOOL="true"
stopall || error "failed to stopall (1)"
- for num in $(seq $MDSCOUNT); do
- do_facet mds$num $TUNE2FS -O project $(mdsdevname $num) ||
- error "tune2fs $(mdsdevname $num) failed"
- done
+ local zfeat_en="feature@project_quota=enabled"
+ for facet in $(seq -f mds%g $MDSCOUNT) $(seq -f ost%g $OSTCOUNT); do
+ local facet_fstype=${facet:0:3}1_FSTYPE
+ local devname
- for num in $(seq $OSTCOUNT); do
- do_facet ost$num $TUNE2FS -O project $(ostdevname $num) ||
- error "tune2fs $(ostdevname $num) failed"
+ if [ "${!facet_fstype}" = "zfs" ]; then
+ devname=$(zpool_name ${facet})
+ do_facet ${facet} $ZPOOL set "$zfeat_en" $devname ||
+ error "$ZPOOL set $zfeat_en $devname"
+ else
+ [ ${facet:0:3} == "mds" ] &&
+ devname=$(mdsdevname ${facet:3}) ||
+ devname=$(ostdevname ${facet:3})
+ do_facet ${facet} $TUNE2FS -O project $devname ||
+ error "tune2fs $devname failed"
+ fi
done
+ KEEP_ZPOOL="${zkeeper}"
mount
setupall
}
disable_project_quota() {
is_project_quota_supported || return 0
- [ "$(facet_fstype $SINGLEMDS)" != "ldiskfs" ] && return 0
+ [ "$mds1_FSTYPE" != "ldiskfs" ] && return 0
stopall || error "failed to stopall (1)"
for num in $(seq $MDSCOUNT); do
export SINGLEAGT=${SINGLEAGT:-agt1}
export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
+ export HSMTOOL_PID_FILE=${HSMTOOL_PID_FILE:-"/var/run/lhsmtool_posix.pid"}
export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""}
export HSMTOOL_TESTDIR
- export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
+ export HSMTOOL_ARCHIVE_FORMAT=${HSMTOOL_ARCHIVE_FORMAT:-v2}
+
+ if ! [[ $HSMTOOL =~ hsmtool ]]; then
+ echo "HSMTOOL = '$HSMTOOL' does not contain 'hsmtool', GLWT" >&2
+ fi
HSM_ARCHIVE_NUMBER=2
done
}
-search_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- do_nodesv $hosts "pgrep -x $HSMTOOL_BASE"
+pkill_copytools() {
+ local hosts="$1"
+ local signal="$2"
+
+ do_nodes "$hosts" "pkill --pidfile=$HSMTOOL_PID_FILE --signal=$signal hsmtool"
}
-kill_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
+copytool_continue() {
+ local agents=${1:-$(facet_active_host $SINGLEAGT)}
- echo "Killing existing copytools on $hosts"
- do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true
+ pkill_copytools "$agents" CONT || return 0
+ echo "Copytool is continued on $agents"
}
-wait_copytools() {
+kill_copytools() {
local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- local wait_timeout=200
- local wait_start=$SECONDS
- local wait_end=$((wait_start + wait_timeout))
- local sleep_time=100000 # 0.1 second
-
- while ((SECONDS < wait_end)); do
- if ! search_copytools $hosts; then
- echo "copytools stopped in $((SECONDS - wait_start))s"
- return 0
- fi
-
- echo "copytools still running on $hosts"
- usleep $sleep_time
- [ $sleep_time -lt 32000000 ] && # 3.2 seconds
- sleep_time=$(bc <<< "$sleep_time * 2")
- done
- # try to dump Copytool's stack
- do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \
- "echo t >/proc/sysrq-trigger"
-
- echo "copytools failed to stop in ${wait_timeout}s"
-
- return 1
+ echo "Killing existing copytools on $hosts"
+ pkill_copytools "$hosts" TERM || return 0
+ copytool_continue "$hosts"
}
copytool_monitor_cleanup() {
__lhsmtool_rebind()
{
- do_facet $facet $HSMTOOL -p "$hsm_root" --rebind "$@" "$mountpoint"
+ do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --rebind "$@" "$mountpoint"
}
__lhsmtool_import()
{
mkdir -p "$(dirname "$2")" ||
error "cannot create directory '$(dirname "$2")'"
- do_facet $facet $HSMTOOL -p "$hsm_root" --import "$@" "$mountpoint"
+ do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --import "$@" "$mountpoint"
}
__lhsmtool_setup()
{
- local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\""
+ local host="$(facet_host "$facet")"
+ local cmd="$HSMTOOL ${hsmtool_options[@]} --daemon --pid-file=$HSMTOOL_PID_FILE"
[ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth"
[ -n "$archive_id" ] && cmd+=" --archive $archive_id"
- [ ${#misc_options[@]} -gt 0 ] &&
- cmd+=" $(IFS=" " echo "$@")"
- cmd+=" \"$mountpoint\""
+# [ ${#misc_options[@]} -gt 0 ] &&
+# cmd+=" $(IFS=" " echo "$@")"
+ cmd+=" $@ \"$mountpoint\""
- echo "Starting copytool $facet on $(facet_host $facet)"
- stack_trap "do_facet $facet libtool execute pkill -x '$HSMTOOL' || true" EXIT
- do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1"
+ echo "Starting copytool '$facet' on '$host' with cmdline '$cmd'"
+ stack_trap "pkill_copytools $host TERM || true" EXIT
+ do_node "$host" "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1"
}
hsm_root() {
local action=$1
shift
+ # Use default values
+ local facet=$SINGLEAGT
+ local mountpoint="${MOUNT2:-$MOUNT}"
+ local hsm_root="${hsm_root:-$(hsm_root "$facet")}"
+
# Parse arguments
local fail_on_error=true
- local -a misc_options
+ local -a hsmtool_options=("--hsm-root=$hsm_root")
+ local -a action_options=()
+
+ if [[ -n "$HSMTOOL_ARCHIVE_FORMAT" ]]; then
+ hsmtool_options+=("--archive-format=$HSMTOOL_ARCHIVE_FORMAT")
+ fi
+
+ if [[ -n "$HSMTOOL_VERBOSE" ]]; then
+ hsmtool_options+=("$HSMTOOL_VERBOSE")
+ fi
+
while [ $# -gt 0 ]; do
case "$1" in
-f|--facet)
shift
- local facet="$1"
+ facet="$1"
;;
-m|--mountpoint)
shift
- local mountpoint="$1"
+ mountpoint="$1"
;;
-a|--archive-id)
shift
;;
-h|--hsm-root)
shift
- local hsm_root="$1"
+ hsm_root="$1"
;;
-b|--bwlimit)
shift
;;
*)
# Uncommon(/copytool dependent) option
- misc_options+=("$1")
+ action_options+=("$1")
;;
esac
shift
done
- # Use default values if needed
- local facet=${facet:-$SINGLEAGT}
- local mountpoint="${mountpoint:-${MOUNT2:-$MOUNT}}"
- local hsm_root="${hsm_root:-$(hsm_root "$facet")}"
-
stack_trap "do_facet $facet rm -rf '$hsm_root'" EXIT
do_facet $facet mkdir -p "$hsm_root" ||
error "mkdir '$hsm_root' failed"
;;
esac
- __${copytool}_${action} "${misc_options[@]}"
+ __${copytool}_${action} "${action_options[@]}"
if [ $? -ne 0 ]; then
local error_msg
error_msg="Failed to start copytool $facet on '$host'"
;;
import)
- local src="${misc_options[0]}"
- local dest="${misc_options[1]}"
+ local src="${action_options[0]}"
+ local dest="${action_options[1]}"
error_msg="Failed to import '$src' to '$dest'"
;;
rebind)
return $rc
}
-wait_result() {
- local facet=$1
- shift
- wait_update --verbose $(facet_active_host $facet) "$@"
-}
-
mdts_check_param() {
local key="$1"
local target="$2"
local timeout="$3"
local mdtno
+
for mdtno in $(seq 1 $MDSCOUNT); do
local idx=$(($mdtno - 1))
- wait_result mds${mdtno} \
+ wait_update_facet --verbose mds${mdtno} \
"$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \
$timeout ||
error "$key state is not '$target' on mds${mdtno}"
local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions"
cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
- wait_result $mds "$cmd" "$state" 200 ||
+ wait_update_facet --verbose $mds "$cmd" "$state" 200 ||
error "request on $fid is not $state on $mds"
}
check_component_count $file $comp_cnt
}
+statx_supported() {
+ $STATX --quiet --version
+ return $?
+}
+
+#
+# wrappers for createmany and unlinkmany
+# to set debug=0 if number of creates is high enough
+# this is to speedup testing
+#
+function createmany() {
+ local count=${!#}
+
+ (( count > 100 )) && {
+ local saved_debug=$($LCTL get_param -n debug)
+ local list=$(comma_list $(all_nodes))
+
+ do_nodes $list $LCTL set_param -n debug=0
+ }
+ $LUSTRE/tests/createmany $*
+ local rc=$?
+ (( count > 100 )) &&
+ do_nodes $list "$LCTL set_param -n debug=\\\"$saved_debug\\\""
+ return $rc
+}
+
+function unlinkmany() {
+ local count=${!#}
+
+ (( count > 100 )) && {
+ local saved_debug=$($LCTL get_param -n debug)
+ local list=$(comma_list $(all_nodes))
+
+ do_nodes $list $LCTL set_param -n debug=0
+ }
+ $LUSTRE/tests/unlinkmany $*
+ local rc=$?
+ (( count > 100 )) &&
+ do_nodes $list "$LCTL set_param -n debug=\\\"$saved_debug\\\""
+ return $rc
+}
+
+# Check if fallocate supported on OSTs, enable if unset, default mode=0
+# Optionally pass the OST fallocate mode (0=unwritten extents, 1=zero extents)
+function check_set_fallocate()
+{
+ local new_mode="$1"
+ local osts=$(comma_list $(osts_nodes))
+ local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks"
+ local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null|
+ head -n 1)
+
+ [[ -n "$old_mode" ]] || { echo "fallocate not supported"; return 1; }
+ [[ -z "$new_mode" && "$old_mode" != "-1" ]] &&
+ { echo "keep default fallocate mode: $old_mode"; return 0; }
+ [[ "$new_mode" && "$old_mode" == "$new_mode" ]] &&
+ { echo "keep current fallocate mode: $old_mode"; return 0; }
+
+ stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode"
+ do_nodes $osts $LCTL set_param $fa_mode=${new_mode:-0} ||
+ error "set $fa_mode=$new_mode"
+}
+
+# Check if fallocate supported on OSTs, enable if unset, skip if unavailable
+function check_set_fallocate_or_skip()
+{
+ [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
+ check_set_fallocate || skip "need at least 2.13.57 for fallocate"
+}
+
+function disable_opencache()
+{
+ local state=$($LCTL get_param -n "llite.*.opencache_threshold_count" | head -1)
+
+ test -z "${saved_OPENCACHE_value}" &&
+ export saved_OPENCACHE_value="$state"
+
+ [[ "$state" = "off" ]] && return
+
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=off
+}
+
+function set_opencache()
+{
+ local newvalue="$1"
+ local state=$($LCTL get_param -n "llite.*.opencache_threshold_count")
+
+ [[ -n "$newvalue" ]] || return
+
+ [[ -n "${saved_OPENCACHE_value}" ]] ||
+ export saved_OPENCACHE_value="$state"
+
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=$newvalue
+}
+
+
+
+function restore_opencache()
+{
+ [[ -z "${saved_OPENCACHE_value}" ]] ||
+ $LCTL set_param -n "llite.*.opencache_threshold_count"=${saved_OPENCACHE_value}
+}
+
+# LU-13417: XXX lots of tests assume the directory to be created under MDT0,
+# created on MDT0, use this function to create directory on specific MDT
+# explicitly, and set default LMV to create subdirs on the same MDT too.
+mkdir_on_mdt() {
+ local mdt
+ local OPTIND=1
+
+ while getopts "i:" opt $*; do
+ case $opt in
+ i) mdt=$OPTARG;;
+ esac
+ done
+
+ shift $((OPTIND - 1))
+
+ $LFS mkdir -i $mdt -c 1 $*
+ # setting default LMV in non-DNE system will cause sanity-quota 41 fail
+ ((MDSCOUNT < 2)) || $LFS setdirstripe -D -i $mdt -c 1 $*
+}
+
+mkdir_on_mdt0() {
+ mkdir_on_mdt -i0 $*
+}
+
+# Wait for nodemap synchronization
+wait_nm_sync() {
+ local nodemap_name=$1
+ local key=$2
+ local value=$3
+ local opt=$4
+ local proc_param
+ local is_active=$(do_facet mgs $LCTL get_param -n nodemap.active)
+ local max_retries=20
+ local is_sync
+ local out1=""
+ local out2
+ local mgs_ip=$(host_nids_address $mgs_HOST $NETTYPE | cut -d' ' -f1)
+ local i
+
+ if [ "$nodemap_name" == "active" ]; then
+ proc_param="active"
+ elif [ -z "$key" ]; then
+ proc_param=${nodemap_name}
+ else
+ proc_param="${nodemap_name}.${key}"
+ fi
+ if [ "$opt" == "inactive" ]; then
+ # check nm sync even if nodemap is not activated
+ is_active=1
+ opt=""
+ fi
+ (( is_active == 0 )) && [ "$proc_param" != "active" ] && return
+
+ if [ -z "$value" ]; then
+ out1=$(do_facet mgs $LCTL get_param $opt \
+ nodemap.${proc_param} 2>/dev/null)
+ echo "On MGS ${mgs_ip}, ${proc_param} = $out1"
+ else
+ out1=$value;
+ fi
+
+ # if servers run on the same node, it is impossible to tell if they get
+ # synced with the mgs, so just wait an arbitrary 10 seconds
+ if [ $(facet_active_host mgs) == $(facet_active_host mds) ] &&
+ [ $(facet_active_host mgs) == $(facet_active_host ost1) ]; then
+ echo "waiting 10 secs for sync"
+ sleep 10
+ return
+ fi
+
+ # wait up to 10 seconds for other servers to sync with mgs
+ for i in $(seq 1 10); do
+ for node in $(all_server_nodes); do
+ local node_ip=$(host_nids_address $node $NETTYPE |
+ cut -d' ' -f1)
+
+ is_sync=true
+ if [ -z "$value" ]; then
+ [ $node_ip == $mgs_ip ] && continue
+ fi
+
+ out2=$(do_node $node_ip $LCTL get_param $opt \
+ nodemap.$proc_param 2>/dev/null)
+ echo "On $node ${node_ip}, ${proc_param} = $out2"
+ [ "$out1" != "$out2" ] && is_sync=false && break
+ done
+ $is_sync && break
+ sleep 1
+ done
+ if ! $is_sync; then
+ echo MGS
+ echo $out1
+ echo OTHER - IP: $node_ip
+ echo $out2
+ error "mgs and $nodemap_name ${key} mismatch, $i attempts"
+ fi
+ echo "waited $((i - 1)) seconds for sync"
+}