[ ! -f "$SGPDDSURVEY" ] && export SGPDDSURVEY=$(which sgpdd-survey)
export MCREATE=${MCREATE:-mcreate}
export MULTIOP=${MULTIOP:-multiop}
+ export MMAP_CAT=${MMAP_CAT:-mmap_cat}
+ export STATX=${STATX:-statx}
# Ubuntu, at least, has a truncate command in /usr/bin
# so fully path our truncate command.
export TRUNCATE=${TRUNCATE:-$LUSTRE/tests/truncate}
fi
export RSYNC_RSH=${RSYNC_RSH:-rsh}
+ export LNETCTL=${LNETCTL:-"$LUSTRE/../lnet/utils/lnetctl"}
+ [ ! -f "$LNETCTL" ] && export LNETCTL=$(which lnetctl 2> /dev/null)
export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
[ ! -f "$LCTL" ] && export LCTL=$(which lctl)
export LFS=${LFS:-"$LUSTRE/utils/lfs"}
# Return a numeric version code based on a version string. The version
# code is useful for comparison two version strings to see which is newer.
version_code() {
- # split arguments like "1.8.6-wc3" into "1", "8", "6", "wc3"
- eval set -- $(tr "[:punct:]" " " <<< $*)
+ # split arguments like "1.8.6-wc3" into "1", "8", "6", "3"
+ eval set -- $(tr "[:punct:][a-z]" " " <<< $*)
- echo -n "$(((${1:-0} << 16) | (${2:-0} << 8) | ${3:-0}))"
+ echo -n $(((${1:-0}<<24) | (${2:-0}<<16) | (${3:-0}<<8) | (${4:-0})))
}
export LINUX_VERSION=$(uname -r | sed -e "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/")
# output: prints version string to stdout in (up to 4) dotted-decimal values
lustre_build_version() {
local facet=${1:-client}
- local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null ||
- $LCTL lustre_build_version 2>/dev/null ||
- $LCTL --version 2>/dev/null | cut -d' ' -f2")
+ local facet_version=${facet}_VERSION
+
+ # if the global variable is already set, then use that
+ [ -n "${!facet_version}" ] && echo ${!facet_version} && return
+
+ # this is the currently-running version of the kernel modules
+ local ver=$(do_facet $facet "$LCTL get_param -n version 2>/dev/null")
+ # we mostly test 2.10+ systems, only try others if the above fails
+ if [ -z "$ver" ]; then
+ ver=$(do_facet $facet "$LCTL lustre_build_version 2>/dev/null")
+ fi
+ if [ -z "$ver" ]; then
+ ver=$(do_facet $facet "$LCTL --version 2>/dev/null" |
+ cut -d' ' -f2)
+ fi
local lver=$(egrep -i "lustre: |version: " <<<"$ver" | head -n 1)
[ -n "$lver" ] && ver="$lver"
- sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' -e 's/_/./g' <<<$ver |
- cut -d. -f1-4
+ lver=$(sed -e 's/[^:]*: //' -e 's/^v//' -e 's/[ -].*//' <<<$ver |
+ tr _ . | cut -d. -f1-4)
+
+ # save in global variable for the future
+ export $facet_version=$lver
+
+ echo $lver
}
# Report the Lustre numeric build version code for the supplied facet.
# if there is more than 4 CPU cores, libcfs should create multiple CPU
# partitions. So we just force libcfs to create 2 partitions for
# system with 2 or 4 cores
+ local saved_opts="$MODOPTS_LIBCFS"
if [ $ncpus -le 4 ] && [ $ncpus -gt 1 ]; then
# force to enable multiple CPU partitions
echo "Force libcfs to create 2 CPU partitions"
load_module ../libcfs/libcfs/libcfs
# Prevent local MODOPTS_LIBCFS being passed as part of environment
# variable to remote nodes
- unset MODOPTS_LIBCFS
+ MODOPTS_LIBCFS=$saved_opts
set_default_debug
load_module ../lnet/lnet/lnet
load_module fid/fid
load_module lmv/lmv
load_module osc/osc
- load_module mdc/mdc
load_module lov/lov
+ load_module mdc/mdc
load_module mgc/mgc
load_module obdecho/obdecho
if ! client_only; then
}
check_mem_leak () {
- LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true)
- LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true)
- if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
- echo "$LEAK_LUSTRE" 1>&2
- echo "$LEAK_PORTALS" 1>&2
- mv $TMP/debug $TMP/debug-leak.`date +%s` || true
- echo "Memory leaks detected"
- [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true
- return 1
- fi
+ LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true)
+ LEAK_PORTALS=$(dmesg | tail -n 20 | egrep -i "libcfs.*memory leaked" || true)
+ if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
+ echo "$LEAK_LUSTRE" 1>&2
+ echo "$LEAK_PORTALS" 1>&2
+ mv $TMP/debug $TMP/debug-leak.`date +%s` || true
+ echo "Memory leaks detected"
+ [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true
+ return 1
+ fi
}
-unload_modules() {
- wait_exit_ST client # bug 12845
-
+unload_modules_local() {
$LUSTRE_RMMOD ldiskfs || return 2
[ -f /etc/udev/rules.d/99-lustre-test.rules ] &&
udevadm control --reload-rules
udevadm trigger
+ check_mem_leak || return 254
+
+ return 0
+}
+
+unload_modules() {
+ local rc=0
+
+ wait_exit_ST client # bug 12845
+
+ unload_modules_local || rc=$?
+
if $LOAD_MODULES_REMOTE; then
local list=$(comma_list $(remote_nodes_list))
if [ -n "$list" ]; then
echo "unloading modules on: '$list'"
- do_rpc_nodes "$list" $LUSTRE_RMMOD ldiskfs
- do_rpc_nodes "$list" check_mem_leak
- do_rpc_nodes "$list" "rm -f /etc/udev/rules.d/99-lustre-test.rules"
- do_rpc_nodes "$list" "udevadm control --reload-rules"
- do_rpc_nodes "$list" "udevadm trigger"
+ do_rpc_nodes "$list" unload_modules_local
fi
fi
rm -f $sbin_mount
fi
- check_mem_leak || return 254
+ [[ $rc -eq 0 ]] && echo "modules unloaded."
- echo "modules unloaded."
- return 0
+ return $rc
}
fs_log_size() {
start_gss_daemons || error_exit "start gss daemon failed! rc=$?"
fi
+ if $GSS_SK && ! $SK_NO_KEY; then
+ echo "Loading basic SSK keys on all servers"
+ do_nodes $(comma_list $(all_server_nodes)) \
+ "lgss_sk -t server -l $SK_PATH/$FSNAME.key || true"
+ do_nodes $(comma_list $(all_server_nodes)) \
+ "keyctl show | grep lustre | cut -c1-11 |
+ sed -e 's/ //g;' |
+ xargs -IX keyctl setperm X 0x3f3f3f3f"
+ fi
+
if $GSS_SK && $SK_NO_KEY; then
local numclients=${1:-$CLIENTCOUNT}
local clients=${CLIENTS:-$HOSTNAME}
set_default_debug_nodes $node "$debug" "$subsys" $debug_size
}
+set_params_nodes () {
+ [[ $# -ge 2 ]] || return 0
+
+ local nodes=$1
+ shift
+ do_nodes $nodes $LCTL set_param $@
+}
+
+set_params_clients () {
+ local clients=${1:-$CLIENTS}
+ local params=${2:-$CLIENT_LCTL_SETPARAM_PARAM}
+
+ [[ -n $params ]] || return 0
+ set_params_nodes $clients $params
+}
+
set_hostid () {
local hostid=${1:-$(hostid)}
ost_dev_status() {
local ost_idx=$1
local mnt_pnt=${2:-$MOUNT}
+ local opts=$3
local ost_uuid
ost_uuid=$(ostuuid_from_index $ost_idx $mnt_pnt)
- lfs_df $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }'
+ lfs_df $opts $mnt_pnt | awk '/'$ost_uuid'/ { print $7 }'
}
setup_quota(){
fi
set_default_debug_nodes $client
+ set_params_clients $client
return 0
}
mount_mds_client() {
local mds_HOST=${SINGLEMDS}_HOST
echo $mds_HOST
- do_facet $SINGLEMDS "mkdir -p $MOUNT2"
zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS ||
error "unable to mount $MOUNT2 on MDS"
}
umount_mds_client() {
local mds_HOST=${SINGLEMDS}_HOST
zconf_umount $mds1_HOST $MOUNT2
- do_facet $SINGLEMDS "rm -rf $MOUNT2"
+ do_facet $SINGLEMDS "rmdir $MOUNT2"
}
# nodes is comma list
do_nodes $clients "mount | grep $mnt' '"
set_default_debug_nodes $clients
+ set_params_clients $clients
return 0
}
reboot_facet() {
local facet=$1
+ local node=$(facet_active_host $facet)
+
if [ "$FAILURE_MODE" = HARD ]; then
- reboot_node $(facet_active_host $facet)
+ boot_node $node
else
sleep 10
fi
}
boot_node() {
- local node=$1
- if [ "$FAILURE_MODE" = HARD ]; then
- reboot_node $node
- wait_for_host $node
- fi
+ local node=$1
+
+ if [ "$FAILURE_MODE" = HARD ]; then
+ reboot_node $node
+ wait_for_host $node
+ if $LOAD_MODULES_REMOTE; then
+ echo "loading modules on $node: $facet"
+ do_rpc_nodes $node load_modules_local
+ fi
+ fi
}
facets_hosts () {
return 0
}
-wait_update () {
+##
+# wait for a command to return the expected result
+#
+# This will run @check on @node repeatedly until the output matches @expect
+# based on the supplied condition, or until @max_wait seconds have elapsed,
+# whichever comes first. @cond may be one of the normal bash operators,
+# "-gt", "-ge", "-eq", "-le", "-lt", "==", "!=", or "=~", and must be quoted
+# in the caller to avoid unintentional evaluation by the shell in the caller.
+#
+# If @max_wait is not specified, the condition will be checked for up to 90s.
+#
+# If --verbose is passed as the first argument, the result is printed on each
+# value change, otherwise it is only printed after every 10s interval.
+#
+# Using wait_update_cond() or related helper function is preferable to adding
+# a "long enough" wait for some state to change in the background, since
+# "long enough" may be too short due to tunables, system config, or running in
+# a VM, and must by necessity wait too long for most cases or risk failure.
+#
+# usage: wait_update_cond [--verbose] node check cond expect [max_wait]
+wait_update_cond() {
local verbose=false
- if [[ "$1" == "--verbose" ]]; then
- shift
- verbose=true
- fi
+ [[ "$1" == "--verbose" ]] && verbose=true && shift
local node=$1
- local TEST=$2
- local FINAL=$3
- local MAX=${4:-90}
- local RESULT
- local PREV_RESULT
- local WAIT=0
+ local check="$2"
+ local cond="$3"
+ local expect="$4"
+ local max_wait=${5:-90}
+ local result
+ local prev_result
+ local waited=0
+ local begin=$SECONDS
local sleep=1
local print=10
- PREV_RESULT=$(do_node $node "$TEST")
- while [ true ]; do
- RESULT=$(do_node $node "$TEST")
- if [[ "$RESULT" == "$FINAL" ]]; then
- [[ -z "$RESULT" || $WAIT -le $sleep ]] ||
- echo "Updated after ${WAIT}s: wanted '$FINAL'"\
- "got '$RESULT'"
+ while (( $waited <= $max_wait )); do
+ result=$(do_node $node "$check")
+
+ eval [[ "'$result'" $cond "'$expect'" ]]
+ if [[ $? == 0 ]]; then
+ [[ -z "$result" || $waited -le $sleep ]] ||
+ echo "Updated after ${waited}s: want '$expect' got '$result'"
return 0
fi
- if [[ $verbose && "$RESULT" != "$PREV_RESULT" ]]; then
- echo "Changed after ${WAIT}s: from '$PREV_RESULT'"\
- "to '$RESULT'"
- PREV_RESULT=$RESULT
+ if $verbose && [[ "$result" != "$prev_result" ]]; then
+ [[ -n "$prev_result" ]] &&
+ echo "Changed after ${waited}s: from '$prev_result' to '$result'"
+ prev_result="$result"
fi
- [[ $WAIT -ge $MAX ]] && break
- [[ $((WAIT % print)) -eq 0 ]] &&
- echo "Waiting $((MAX - WAIT)) secs for update"
- WAIT=$((WAIT + sleep))
+ (( $waited % $print == 0 )) &&
+ echo "Waiting $((max_wait - waited))s for '$expect'"
sleep $sleep
+ waited=$((SECONDS - begin))
done
- echo "Update not seen after ${MAX}s: wanted '$FINAL' got '$RESULT'"
+ echo "Update not seen after ${max_wait}s: want '$expect' got '$result'"
return 3
}
+# usage: wait_update [--verbose] node check expect [max_wait]
+wait_update() {
+ local verbose=
+ [ "$1" = "--verbose" ] && verbose="$1" && shift
+
+ local node="$1"
+ local check="$2"
+ local expect="$3"
+ local max_wait=$4
+
+ wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait
+}
+
+# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait]
+wait_update_facet_cond() {
+ local verbose=
+ [ "$1" = "--verbose" ] && verbose="$1" && shift
+
+ local node=$(facet_active_host $1)
+ local check="$2"
+ local cond="$3"
+ local expect="$4"
+ local max_wait=$5
+
+ wait_update_cond $verbose $node "$check" "$cond" "$expect" $max_wait
+}
+
+# usage: wait_update_facet [--verbose] facet check expect [max_wait]
wait_update_facet() {
local verbose=
[ "$1" = "--verbose" ] && verbose="$1" && shift
- local facet=$1
- shift
- wait_update $verbose $(facet_active_host $facet) "$@"
+ local node=$(facet_active_host $1)
+ local check="$2"
+ local expect="$3"
+ local max_wait=$4
+
+ wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait
}
sync_all_data() {
}
wait_for_host() {
- local hostlist=$1
+ local hostlist=$1
- # we can use "for" here because we are waiting the slowest
- for host in ${hostlist//,/ }; do
- check_network "$host" 900
- done
- while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done
+ # we can use "for" here because we are waiting the slowest
+ for host in ${hostlist//,/ }; do
+ check_network "$host" 900
+ done
+ while ! do_nodes $hostlist hostname > /dev/null; do sleep 5; done
}
wait_for_facet() {
}
fail_nodf() {
- local facet=$1
- facet_failover $facet
+ local facet=$1
+
+ facet_failover $facet
}
fail_abort() {
local facet=$1
+ local abort_type=${2:-"abort_recovery"}
+
stop $facet
change_active $facet
wait_for_facet $facet
- mount_facet $facet -o abort_recovery
+ mount_facet $facet -o $abort_type
clients_up || echo "first stat failed: $?"
clients_up || error "post-failover stat: $?"
}
local nodes=$1
local net=${2:-"."}
- do_nodes $nodes "$LCTL list_nids | grep $net | cut -f 1 -d @"
+ do_nodes $nodes "$LCTL list_nids | grep -w $net | cut -f 1 -d @"
}
h2name_or_ip() {
stop ${facet} -f
rm -f $TMP/${facet}active
[[ $facet = mds1 ]] && combined_mgs_mds && rm -f $TMP/mgsactive
+
+ # make sure in-tree ldiskfs is loaded before mkfs
+ if local_mode && [[ $(node_fstypes $HOSTNAME) == *ldiskfs* ]]; then
+ load_module ../ldiskfs/ldiskfs
+ fi
+
do_facet ${facet} $MKFS $* || return ${PIPESTATUS[0]}
if [[ $(facet_fstype $facet) == zfs ]]; then
var=${type}_FS_MKFS_OPTS
fs_mkfs_opts+=${!var:+" ${!var}"}
+ [[ "$QUOTA_TYPE" =~ "p" ]] && fs_mkfs_opts+=" -O project"
+
[ $fstype == ldiskfs ] && fs_mkfs_opts=$(squash_opt $fs_mkfs_opts)
if [ -n "${fs_mkfs_opts## }" ]; then
export I_MOUNTED2=yes
fi
- if $do_check; then
- # FIXME: what to do if check_config failed?
- # i.e. if:
- # 1) remote client has mounted other Lustre fs?
- # 2) lustre is mounted on remote_clients atall ?
- check_config_clients $MOUNT
- init_facets_vars
- init_param_vars
+ if $do_check; then
+ # FIXME: what to do if check_config failed?
+ # i.e. if:
+ # 1) remote client has mounted other Lustre fs?
+ # 2) lustre is mounted on remote_clients atall ?
+ check_config_clients $MOUNT
+ init_facets_vars
+ init_param_vars
- set_default_debug_nodes $(comma_list $(nodes_list))
- fi
+ set_default_debug_nodes $(comma_list $(nodes_list))
+ set_params_clients
+ fi
if [ -z "$CLIENTONLY" -a $(lower $OSD_TRACK_DECLARES_LBUG) == 'yes' ]; then
local facets=""
local log=$TMP/e2fsck.log
local rc=0
+ # turn on pfsck if it is supported
+ do_node $node $E2FSCK -h 2>&1 | grep -qw -- -m && cmd+=" -m8"
echo $cmd
do_node $node $cmd 2>&1 | tee $log
rc=${PIPESTATUS[0]}
[ "$host" = "$HOSTNAME" ] && return 0
- echo "$(date +'%H:%M:%S (%s)') waiting for $host network $max secs ..."
- if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep ; then
- echo "Network not available!"
+ if ! wait_for_function --quiet "ping -c 1 -w 3 $host" $max $sleep; then
+ echo "$(date +'%H:%M:%S (%s)') waited for $host network ${max}s"
exit 1
fi
-
- echo "$(date +'%H:%M:%S (%s)') network interface is UP"
}
no_dsh() {
default_lru_size()
{
- NR_CPU=$(grep -c "processor" /proc/cpuinfo)
- DEFAULT_LRU_SIZE=$((100 * NR_CPU))
- echo "$DEFAULT_LRU_SIZE"
+ local nr_cpu=$(grep -c "processor" /proc/cpuinfo)
+
+ echo $((100 * nr_cpu))
}
lru_resize_enable()
lru_resize_disable()
{
- lctl set_param ldlm.namespaces.*$1*.lru_size $(default_lru_size)
+ local dev=${1}
+ local lru_size=${2:-$(default_lru_size)}
+
+ $LCTL set_param ldlm.namespaces.*$dev*.lru_size=$lru_size
}
flock_is_enabled()
exit_status () {
local status=0
- local log=$TESTSUITELOG
+ local logs="$TESTSUITELOG $1"
+
+ for log in $logs; do
+ if [ -f "$log" ]; then
+ grep -qw FAIL $log && status=1
+ fi
+ done
- [ -f "$log" ] && grep -qw FAIL $log && status=1
exit $status
}
local testmsg=$2
export tfile=f${testnum}.${TESTSUITE}
export tdir=d${testnum}.${TESTSUITE}
- local name=$TESTSUITE.$TESTNAME.test_log.$(hostname -s).log
- local test_log=$LOGDIR/$name
- local zfs_log_name=$TESTSUITE.$TESTNAME.zfs_log
- local zfs_debug_log=$LOGDIR/$zfs_log_name
+ local test_log=$TESTLOG_PREFIX.$TESTNAME.test_log.$(hostname -s).log
+ local zfs_debug_log=$TESTLOG_PREFIX.$TESTNAME.zfs_log
local SAVE_UMASK=$(umask)
local rc=0
umask 0022
# remove temp files between repetitions to avoid test failures
[ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] &&
- rm -rf $DIR/$tdir* $DIR/$tfile*
+ rm -rvf $DIR/$tdir* $DIR/$tfile*
# loop around subshell so stack_trap EXIT triggers each time
(run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log
rc=${PIPESTATUS[0]}
TEST_STATUS="PASS"
fi
- pass "$testnum" "($((SECONDS - before))s)"
+ pass "$testnum" "(${duration_sub}s)"
log_sub_test_end $TEST_STATUS $duration_sub "$rc" "$test_error"
[[ $rc != 0 ]] && break
done
# Description:
# Returns list of ip addresses for each interface
local_addr_list() {
- ip addr | awk '/inet\ / {print $2}' | awk -F\/ '{print $1}'
+ ip addr | awk '/inet / {print $2}' | awk -F/ '{print $1}'
}
is_local_addr() {
}
check_runas_id_ret() {
- local myRC=0
- local myRUNAS_UID=$1
- local myRUNAS_GID=$2
- shift 2
- local myRUNAS=$@
- if [ -z "$myRUNAS" ]; then
- error_exit "myRUNAS command must be specified for check_runas_id"
- fi
- if $GSS_KRB5; then
- $myRUNAS krb5_login.sh || \
- error "Failed to refresh Kerberos V5 TGT for UID $myRUNAS_ID."
- fi
- mkdir $DIR/d0_runas_test
- chmod 0755 $DIR
- chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test
- $myRUNAS touch $DIR/d0_runas_test/f$$ || myRC=$?
- rm -rf $DIR/d0_runas_test
- return $myRC
+ local myRC=0
+ local myRUNAS_UID=$1
+ local myRUNAS_GID=$2
+ shift 2
+ local myRUNAS=$@
+
+ if [ -z "$myRUNAS" ]; then
+ error_exit "check_runas_id_ret requires myRUNAS argument"
+ fi
+
+ $myRUNAS true ||
+ error "Unable to execute $myRUNAS"
+
+ id $myRUNAS_UID > /dev/null ||
+ error "Invalid RUNAS_ID $myRUNAS_UID. Please set RUNAS_ID to " \
+ "some UID which exists on MDS and client or add user " \
+ "$myRUNAS_UID:$myRUNAS_GID on these nodes."
+
+ if $GSS_KRB5; then
+ $myRUNAS krb5_login.sh ||
+ error "Failed to refresh krb5 TGT for UID $myRUNAS_ID."
+ fi
+ mkdir $DIR/d0_runas_test
+ chmod 0755 $DIR
+ chown $myRUNAS_UID:$myRUNAS_GID $DIR/d0_runas_test
+ $myRUNAS -u $myRUNAS_UID -g $myRUNAS_GID touch $DIR/d0_runas_test/f$$ ||
+ myRC=$?
+ rm -rf $DIR/d0_runas_test
+ return $myRC
}
check_runas_id() {
- local myRUNAS_UID=$1
- local myRUNAS_GID=$2
- shift 2
- local myRUNAS=$@
- check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \
- error "unable to write to $DIR/d0_runas_test as UID $myRUNAS_UID.
- Please set RUNAS_ID to some UID which exists on MDS and client or
- add user $myRUNAS_UID:$myRUNAS_GID on these nodes."
+ local myRUNAS_UID=$1
+ local myRUNAS_GID=$2
+ shift 2
+ local myRUNAS=$@
+
+ check_runas_id_ret $myRUNAS_UID $myRUNAS_GID $myRUNAS || \
+ error "unable to write to $DIR/d0_runas_test as " \
+ "UID $myRUNAS_UID."
}
# obtain the UID/GID for MPI_USER
}
do_and_time () {
- local cmd=$1
- local rc
-
- SECONDS=0
- eval '$cmd'
+ local cmd="$1"
+ local start
+ local rc
- [ ${PIPESTATUS[0]} -eq 0 ] || rc=1
+ start=$SECONDS
+ eval '$cmd'
+ [ ${PIPESTATUS[0]} -eq 0 ] || rc=1
- echo $SECONDS
- return $rc
+ echo $((SECONDS - start))
+ return $rc
}
inodes_available () {
for node in ${nodes//,/ }; do
check_network "$node" 5
if [ $? -eq 0 ]; then
- do_node $node "rc=0;
- val=\\\$($LCTL get_param -n catastrophe 2>&1);
- if [[ \\\$? -eq 0 && \\\$val -ne 0 ]]; then
- echo \\\$(hostname -s): \\\$val;
- rc=\\\$val;
- fi;
- exit \\\$rc" || error "$node:LBUG/LASSERT detected"
+ do_node $node "$LCTL get_param catastrophe 2>&1" |
+ grep -q "catastrophe=1" &&
+ error "$node:LBUG/LASSERT detected" || true
fi
done
}
params=$param
fi
+ local plist=$(comma_list $params)
if ! do_rpc_nodes "$(facet_active_host $facet)" \
- wait_import_state $expected "$params" $maxtime; then
+ wait_import_state $expected $plist $maxtime; then
error "$facet: import is not in $expected state after $maxtime"
return 1
fi
params=$($LCTL list_param $param 2>/dev/null || true)
done
fi
+ local plist=$(comma_list $params)
if ! do_rpc_nodes "$(facet_active_host $facet)" \
- wait_import_state $expected "$params" $maxtime \
+ wait_import_state $expected $plist $maxtime \
$error_on_failure; then
if [ $error_on_failure -ne 0 ]; then
error "import is not in ${expected} state"
fi
if [ ! -f $LOGDIR/shared ]; then
- do_nodes $list rsync -az "${prefix}.*.${suffix}" \
- $HOSTNAME:$LOGDIR
+ local remote_nodes=$(exclude_items_from_list $list $HOSTNAME)
+
+ for node in ${remote_nodes//,/ }; do
+ rsync -az -e ssh $node:${prefix}.'*'.${suffix} $LOGDIR &
+ done
fi
}
umask $save_umask
- # If modules are not yet loaded then older "lctl lustre_build_version"
- # will fail. Use lctl build version instead.
- log "Client: $($LCTL lustre_build_version)"
- log "MDS: $(do_facet $SINGLEMDS $LCTL lustre_build_version 2>/dev/null||
- do_facet $SINGLEMDS $LCTL --version)"
- log "OSS: $(do_facet ost1 $LCTL lustre_build_version 2> /dev/null ||
- do_facet ost1 $LCTL --version)"
+ # log actual client and server versions if needed for debugging
+ log "Client: $(lustre_build_version client)"
+ log "MDS: $(lustre_build_version mds1)"
+ log "OSS: $(lustre_build_version ost1)"
}
log_test() {
[ $# -eq 1 ] || error "Only creating single directory is supported"
path="$*"
+ local parent=$(dirname $path)
if [ "$p_option" == "-p" ]; then
- local parent=$(dirname $path)
-
[ -d $path ] && return 0
if [ ! -d ${parent} ]; then
mkdir -p ${parent} ||
fi
fi
- if [ $MDSCOUNT -le 1 ]; then
+ if [ $MDSCOUNT -le 1 ] || ! is_lustre ${parent}; then
mkdir $path || error "mkdir '$path' failed"
else
local mdt_index
fi
local t=$(for i in $list; do printf "$FSNAME-OST%04x_UUID " $i; done)
+ local tg=$(for i in $list;
+ do printf -- "-e $FSNAME-OST%04x_UUID " $i; done)
+ local firstx=$(printf "%04x" $first)
+ local lastx=$(printf "%04x" $last)
+
do_facet mgs $LCTL pool_add \
- $FSNAME.$pool $FSNAME-OST[$first-$last/$step]
+ $FSNAME.$pool $FSNAME-OST[$firstx-$lastx/$step]
+ # ignore EEXIST(17)
+ if (( $? != 0 && $? != 17 )); then
+ error_noexit "pool_add $FSNAME-OST[$firstx-$lastx/$step] failed"
+ return 3
+ fi
# wait for OSTs to be added to the pool
for mds_id in $(seq $MDSCOUNT); do
local lodname=$FSNAME-MDT$(printf "%04x" $mdt_id)-mdtlov
wait_update_facet mds$mds_id \
"lctl get_param -n lod.$lodname.pools.$pool |
- sort -u | tr '\n' ' ' " "$t" || {
+ grep $tg | sort -u | tr '\n' ' '" "$t" || {
error_noexit "mds$mds_id: Add to pool failed"
- return 3
+ return 2
}
done
- wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool \
- | sort -u | tr '\n' ' ' " "$t" || {
+ wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$pool |
+ grep $tg | sort -u | tr '\n' ' ' " "$t" || {
error_noexit "Add to pool failed"
return 1
}
- local lfscount=$($LFS pool_list $FSNAME.$pool | grep -c "\-OST")
- local addcount=$(((last - first) / step + 1))
- [ $lfscount -eq $addcount ] || {
- error_noexit "lfs pool_list bad ost count" \
- "$lfscount != $addcount"
- return 2
- }
}
pool_set_dir() {
pool_remove_first_target() {
echo "Removing first target from a pool"
+ pool_remove_target $1 -1
+}
+
+pool_remove_target() {
local pool=$1
+ local index=$2
local pname="lov.$FSNAME-*.pools.$pool"
- local t=$($LCTL get_param -n $pname | head -1)
+ if [ $index -eq -1 ]; then
+ local t=$($LCTL get_param -n $pname | head -1)
+ else
+ local t=$(printf "$FSNAME-OST%04x_UUID" $index)
+ fi
+
+ echo "Removing $t from $pool"
do_facet mgs $LCTL pool_remove $FSNAME.$pool $t
for mds_id in $(seq $MDSCOUNT); do
local mdt_id=$((mds_id-1))
local rc=0
for osc in $oscs; do
- ((rc++))
echo "Check state for $osc"
local evicted=$(do_facet client $LCTL get_param osc.$osc.state |
- tail -n 3 | awk -F"[ [,]" \
- '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+ tail -n 5 | awk -F"[ ,]" \
+ '/EVICTED/ { if (mx<$4) { mx=$4; } } END { print mx }')
if (($? == 0)) && (($evicted > $before)); then
echo "$osc is evicted at $evicted"
- ((rc--))
+ else
+ ((rc++))
+ echo "$osc was not evicted after $before:"
+ do_facet client $LCTL get_param osc.$osc.state |
+ tail -n 8
fi
done
}
changelog_dump() {
+ local rc
+
for M in $(seq $MDSCOUNT); do
local facet=mds$M
local mdt="$(facet_svc $facet)"
-
- $LFS changelog $mdt | sed -e 's/^/'$mdt'./'
+ local output
+ local ret
+
+ output=$($LFS changelog $mdt)
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ rc=${rc:-$ret}
+ elif [ -n "$output" ]; then
+ echo "$output" | sed -e 's/^/'$mdt'./'
+ fi
done
+
+ return ${rc:-0}
}
changelog_extract_field() {
export SINGLEAGT=${SINGLEAGT:-agt1}
export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
+ export HSMTOOL_PID_FILE=${HSMTOOL_PID_FILE:-"/var/run/lhsmtool_posix.pid"}
export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""}
export HSMTOOL_TESTDIR
- export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
+ export HSMTOOL_ARCHIVE_FORMAT=${HSMTOOL_ARCHIVE_FORMAT:-v2}
+
+ if ! [[ $HSMTOOL =~ hsmtool ]]; then
+ echo "HSMTOOL = '$HSMTOOL' does not contain 'hsmtool', GLWT" >&2
+ fi
HSM_ARCHIVE_NUMBER=2
done
}
-search_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- do_nodesv $hosts "pgrep -x $HSMTOOL_BASE"
+pkill_copytools() {
+ local hosts="$1"
+ local signal="$2"
+
+ do_nodes "$hosts" "pkill --pidfile=$HSMTOOL_PID_FILE --signal=$signal hsmtool"
}
-kill_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
+copytool_continue() {
+ local agents=${1:-$(facet_active_host $SINGLEAGT)}
- echo "Killing existing copytools on $hosts"
- do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true
+ pkill_copytools "$agents" CONT || return 0
+ echo "Copytool is continued on $agents"
}
-wait_copytools() {
+kill_copytools() {
local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- local wait_timeout=200
- local wait_start=$SECONDS
- local wait_end=$((wait_start + wait_timeout))
- local sleep_time=100000 # 0.1 second
-
- while ((SECONDS < wait_end)); do
- if ! search_copytools $hosts; then
- echo "copytools stopped in $((SECONDS - wait_start))s"
- return 0
- fi
- echo "copytools still running on $hosts"
- usleep $sleep_time
- [ $sleep_time -lt 32000000 ] && # 3.2 seconds
- sleep_time=$(bc <<< "$sleep_time * 2")
- done
-
- # try to dump Copytool's stack
- do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \
- "echo t >/proc/sysrq-trigger"
-
- echo "copytools failed to stop in ${wait_timeout}s"
-
- return 1
+ echo "Killing existing copytools on $hosts"
+ pkill_copytools "$hosts" TERM || return 0
+ copytool_continue "$hosts"
}
copytool_monitor_cleanup() {
__lhsmtool_rebind()
{
- do_facet $facet $HSMTOOL -p "$hsm_root" --rebind "$@" "$mountpoint"
+ do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --rebind "$@" "$mountpoint"
}
__lhsmtool_import()
{
mkdir -p "$(dirname "$2")" ||
error "cannot create directory '$(dirname "$2")'"
- do_facet $facet $HSMTOOL -p "$hsm_root" --import "$@" "$mountpoint"
+ do_facet $facet $HSMTOOL "${hsmtool_options[@]}" --import "$@" "$mountpoint"
}
__lhsmtool_setup()
{
- local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\""
+ local host="$(facet_host "$facet")"
+ local cmd="$HSMTOOL ${hsmtool_options[@]} --daemon --pid-file=$HSMTOOL_PID_FILE"
[ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth"
[ -n "$archive_id" ] && cmd+=" --archive $archive_id"
- [ ${#misc_options[@]} -gt 0 ] &&
- cmd+=" $(IFS=" " echo "$@")"
- cmd+=" \"$mountpoint\""
+# [ ${#misc_options[@]} -gt 0 ] &&
+# cmd+=" $(IFS=" " echo "$@")"
+ cmd+=" $@ \"$mountpoint\""
- echo "Starting copytool $facet on $(facet_host $facet)"
- stack_trap "do_facet $facet libtool execute pkill -x '$HSMTOOL' || true" EXIT
- do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1"
+ echo "Starting copytool '$facet' on '$host' with cmdline '$cmd'"
+ stack_trap "pkill_copytools $host TERM || true" EXIT
+ do_node "$host" "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1"
}
hsm_root() {
# Parse arguments
local fail_on_error=true
- local -a misc_options
+ local -a hsmtool_options=("--hsm-root=$hsm_root")
+ local -a action_options=()
+
+ if [[ -n "$HSMTOOL_ARCHIVE_FORMAT" ]]; then
+ hsmtool_options+=("--archive-format=$HSMTOOL_ARCHIVE_FORMAT")
+ fi
+
+ if [[ -n "$HSMTOOL_VERBOSE" ]]; then
+ hsmtool_options+=("$HSMTOOL_VERBOSE")
+ fi
+
while [ $# -gt 0 ]; do
case "$1" in
-f|--facet)
;;
*)
# Uncommon(/copytool dependent) option
- misc_options+=("$1")
+ action_options+=("$1")
;;
esac
shift
;;
esac
- __${copytool}_${action} "${misc_options[@]}"
+ __${copytool}_${action} "${action_options[@]}"
if [ $? -ne 0 ]; then
local error_msg
error_msg="Failed to start copytool $facet on '$host'"
;;
import)
- local src="${misc_options[0]}"
- local dest="${misc_options[1]}"
+ local src="${action_options[0]}"
+ local dest="${action_options[1]}"
error_msg="Failed to import '$src' to '$dest'"
;;
rebind)
return $rc
}
-wait_result() {
- local facet=$1
- shift
- wait_update --verbose $(facet_active_host $facet) "$@"
-}
-
mdts_check_param() {
local key="$1"
local target="$2"
local timeout="$3"
local mdtno
+
for mdtno in $(seq 1 $MDSCOUNT); do
local idx=$(($mdtno - 1))
- wait_result mds${mdtno} \
+ wait_update_facet --verbose mds${mdtno} \
"$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \
$timeout ||
error "$key state is not '$target' on mds${mdtno}"
local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions"
cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
- wait_result $mds "$cmd" "$state" 200 ||
+ wait_update_facet --verbose $mds "$cmd" "$state" 200 ||
error "request on $fid is not $state on $mds"
}
check_component_count $file $comp_cnt
}
+statx_supported() {
+ $STATX --quiet --version
+ return $?
+}
+
+#
+# wrappers for createmany and unlinkmany
+# to set debug=0 if number of creates is high enough
+# this is to speedup testing
+#
+function createmany() {
+ local count=${!#}
+
+ (( count > 100 )) && {
+ local saved_debug=$($LCTL get_param -n debug)
+ local list=$(comma_list $(all_nodes))
+
+ do_nodes $list $LCTL set_param debug=0
+ }
+ $LUSTRE/tests/createmany $*
+ local rc=$?
+ (( count > 100 )) &&
+ do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\""
+ return $rc
+}
+
+function unlinkmany() {
+ local count=${!#}
+
+ (( count > 100 )) && {
+ local saved_debug=$($LCTL get_param -n debug)
+ local list=$(comma_list $(all_nodes))
+
+ do_nodes $list $LCTL set_param debug=0
+ }
+ $LUSTRE/tests/unlinkmany $*
+ local rc=$?
+ (( count > 100 )) &&
+ do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\""
+ return $rc
+}
+
+# Check if fallocate supported on OSTs, enable if unset, default mode=0
+# Optionally pass the OST fallocate mode (0=unwritten extents, 1=zero extents)
+function check_set_fallocate()
+{
+ local new_mode="$1"
+ local osts=$(comma_list $(osts_nodes))
+ local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks"
+ local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null|
+ head -n 1)
+
+ [[ -n "$old_mode" ]] || { echo "fallocate not supported"; return 1; }
+ [[ -z "$new_mode" && "$old_mode" != "-1" ]] &&
+ { echo "keep default fallocate mode: $old_mode"; return 0; }
+ [[ "$new_mode" && "$old_mode" == "$new_mode" ]] &&
+ { echo "keep current fallocate mode: $old_mode"; return 0; }
+
+ stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode"
+ do_nodes $osts $LCTL set_param $fa_mode=${new_mode:-0} ||
+ error "set $fa_mode=$new_mode"
+}
+
+# Check if fallocate supported on OSTs, enable if unset, skip if unavailable
+function check_set_fallocate_or_skip()
+{
+ [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
+ check_set_fallocate || skip "need at least 2.13.57 for fallocate"
+}
+