[ ! -f "$LCTL" ] && export LCTL=$(which lctl)
export LFS=${LFS:-"$LUSTRE/utils/lfs"}
[ ! -f "$LFS" ] && export LFS=$(which lfs)
+ export KSOCKLND_CONFIG=${KSOCKLND_CONFIG:-"$LUSTRE/scripts/ksocklnd-config"}
+ [ ! -f "$KSOCKLND_CONFIG" ] &&
+ export KSOCKLND_CONFIG=$(which ksocklnd-config 2> /dev/null)
- export PERM_CMD=${PERM_CMD:-"$LCTL conf_param"}
+ export PERM_CMD=$(echo ${PERM_CMD:-"$LCTL conf_param"})
export L_GETIDENTITY=${L_GETIDENTITY:-"$LUSTRE/utils/l_getidentity"}
if [ ! -f "$L_GETIDENTITY" ]; then
export LOV_MAX_STRIPE_COUNT=2000
export DELETE_OLD_POOLS=${DELETE_OLD_POOLS:-false}
export KEEP_POOLS=${KEEP_POOLS:-false}
+ export PARALLEL=${PARALLEL:-"no"}
export MACHINEFILE=${MACHINEFILE:-$TMP/$(basename $0 .sh).machines}
. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
/sbin/lsmod | grep -q "^\<$1\>"
}
+check_lfs_df_ret_val() {
+ # Ignore only EOPNOTSUPP (which is 95; Operation not supported) error
+ # returned by 'lfs df' for valid dentry but not a lustrefs.
+ #
+ # 'lfs df' historically always returned success(0) instead of
+ # EOPNOTSUPP. This function for compatibility reason, ignores and
+ # masquerades EOPNOTSUPP as success.
+ [[ $1 -eq 95 ]] && return 0
+ return $1
+}
+
PRLFS=false
lustre_insmod() {
local module=$1
fi
}
-load_modules_local() {
- if [ -n "$MODPROBE" ]; then
- # use modprobe
- echo "Using modprobe to load modules"
- return 0
- fi
-
- # Create special udev test rules on every node
- if [ -f $LUSTRE/lustre/conf/99-lustre.rules ]; then {
- sed -e 's|/usr/sbin/lctl|$LCTL|g' $LUSTRE/lustre/conf/99-lustre.rules > /etc/udev/rules.d/99-lustre-test.rules
- } else {
- echo "SUBSYSTEM==\"lustre\", ACTION==\"change\", ENV{PARAM}==\"?*\", RUN+=\"$LCTL set_param '\$env{PARAM}=\$env{SETTING}'\"" > /etc/udev/rules.d/99-lustre-test.rules
- } fi
- udevadm control --reload-rules
- udevadm trigger
+do_lnetctl() {
+ $LCTL mark "$LNETCTL $*"
+ echo "$LNETCTL $*"
+ $LNETCTL "$@"
+}
+load_lnet() {
# For kmemleak-enabled kernels we need clear all past state
# that obviously has nothing to do with this Lustre run
# Disable automatic memory scanning to avoid perf hit.
load_module ../libcfs/libcfs/libcfs
# Prevent local MODOPTS_LIBCFS being passed as part of environment
# variable to remote nodes
- MODOPTS_LIBCFS=$saved_opts
+ unset MODOPTS_LIBCFS
- set_default_debug
- load_module ../lnet/lnet/lnet
+ set_default_debug "neterror net nettrace malloc"
+ if [ "$1" = "config_on_load=1" ]; then
+ load_module ../lnet/lnet/lnet
+ else
+ load_module ../lnet/lnet/lnet "$@"
+ fi
LNDPATH=${LNDPATH:-"../lnet/klnds"}
if [ -z "$LNETLND" ]; then
case $NETTYPE in
- o2ib*) LNETLND="o2iblnd/ko2iblnd" ;;
- tcp*) LNETLND="socklnd/ksocklnd" ;;
- *) local lnd="${NETTYPE%%[0-9]}lnd"
+ o2ib*) LNETLND="o2iblnd/ko2iblnd" ;;
+ tcp*) LNETLND="socklnd/ksocklnd" ;;
+ *) local lnd="${NETTYPE%%[0-9]}lnd"
[ -f "$LNDPATH/$lnd/k$lnd.ko" ] &&
LNETLND="$lnd/k$lnd" ||
LNETLND="socklnd/ksocklnd"
esac
fi
load_module ../lnet/klnds/$LNETLND
+
+ if [ "$1" = "config_on_load=1" ]; then
+ do_lnetctl lnet configure --all ||
+ return $?
+ fi
+}
+
+load_modules_local() {
+ if [ -n "$MODPROBE" ]; then
+ # use modprobe
+ echo "Using modprobe to load modules"
+ return 0
+ fi
+
+ # Create special udev test rules on every node
+ if [ -f $LUSTRE/lustre/conf/99-lustre.rules ]; then {
+ sed -e 's|/usr/sbin/lctl|$LCTL|g' $LUSTRE/lustre/conf/99-lustre.rules > /etc/udev/rules.d/99-lustre-test.rules
+ } else {
+ echo "SUBSYSTEM==\"lustre\", ACTION==\"change\", ENV{PARAM}==\"?*\", RUN+=\"$LCTL set_param '\$env{PARAM}=\$env{SETTING}'\"" > /etc/udev/rules.d/99-lustre-test.rules
+ } fi
+ udevadm control --reload-rules
+ udevadm trigger
+
+ load_lnet
+
load_module obdclass/obdclass
+ if ! client_only; then
+ MODOPTS_PTLRPC=${MODOPTS_PTLRPC:-"lbug_on_grant_miscount=1"}
+ fi
load_module ptlrpc/ptlrpc
load_module ptlrpc/gss/ptlrpc_gss
load_module fld/fld
}
load_modules () {
+ local facets
+ local facet
+ local failover
load_modules_local
# bug 19124
# load modules on remote nodes optionally
# lustre-tests have to be installed on these nodes
if $LOAD_MODULES_REMOTE; then
local list=$(comma_list $(remote_nodes_list))
+
+ # include failover nodes in case they are not in the list yet
+ facets=$(get_facets)
+ for facet in ${facets//,/ }; do
+ failover=$(facet_failover_host $facet)
+ [ -n "$list" ] && [[ ! "$list" =~ "$failover" ]] &&
+ list="$list,$failover"
+ done
+
if [ -n "$list" ]; then
echo "loading modules on: '$list'"
do_rpc_nodes "$list" load_modules_local
local size=0
case $(facet_fstype $facet) in
- ldiskfs) size=50;; # largest seen is 44, leave some headroom
+ ldiskfs) size=72;; # largest seen is 64, leave some headroom
# grant_block_size is in bytes, allow at least 2x max blocksize
zfs) size=$(lctl get_param osc.$FSNAME*.import |
awk '/grant_block_size:/ {print $2/512; exit;}')
local device=${2:-$FSNAME-OST*}
local name=$3
- do_nodes $nodes "$LCTL get_param -n obdfilter.$device.$name \
- osd-*.$device.$name 2>&1" | grep -v 'error:'
+ do_nodes $nodes "$LCTL get_param -n osd-*.$device.$name"
}
set_osd_param() {
local name=$3
local value=$4
- do_nodes $nodes "$LCTL set_param -n obdfilter.$device.$name=$value \
- osd-*.$device.$name=$value 2>&1" | grep -v 'error:'
+ do_nodes $nodes "$LCTL set_param -n osd-*.$device.$name=$value"
}
set_debug_size () {
mount_facets () {
local facets=${1:-$(get_facets)}
local facet
+ local -a mountpids
+ local total=0
+ local ret=0
for facet in ${facets//,/ }; do
- mount_facet $facet
+ mount_facet $facet &
+ mountpids[total]=$!
+ total=$((total+1))
+ done
+ for ((index=0; index<$total; index++)); do
+ wait ${mountpids[index]}
local RC=$?
[ $RC -eq 0 ] && continue
else
error "Restart of $facet failed!"
fi
- return $RC
+ ret=$RC
done
+ return $ret
}
#
local devicelabel
local dm_dev=${!dev}
+ [[ $dev == "mgsfailover_dev" ]] && combined_mgs_mds &&
+ dev=mds1failover_dev
+
module_loaded lustre || load_modules
case $fstype in
eval export ${dev_alias}_dev=${device}
eval export ${facet}_opt=\"$@\"
+ combined_mgs_mds && [[ ${dev_alias} == mds1 ]] &&
+ eval export mgs_dev=${device}
+
local varname=${dev_alias}failover_dev
if [ -n "${!varname}" ] ; then
eval export ${dev_alias}failover_dev=${!varname}
else
eval export ${dev_alias}failover_dev=$device
+ combined_mgs_mds && [[ ${dev_alias} == mds1 ]] &&
+ eval export mgsfailover_dev=${device}
+
fi
local mntpt=$(facet_mntpt $facet)
if [ "$old_MDT_QUOTA_TYPE" ]; then
if [[ $PERM_CMD == *"set_param -P"* ]]; then
do_facet mgs $PERM_CMD \
- osd-*.$FSNAME-MDT*.quota_slave.enable = \
+ osd-*.$FSNAME-MDT*.quota_slave.enabled = \
$old_MDT_QUOTA_TYPE
else
do_facet mgs $PERM_CMD \
if [ "$old_OST_QUOTA_TYPE" ]; then
if [[ $PERM_CMD == *"set_param -P"* ]]; then
do_facet mgs $PERM_CMD \
- osd-*.$FSNAME-OST*.quota_slave.enable = \
+ osd-*.$FSNAME-OST*.quota_slave.enabled = \
$old_OST_QUOTA_TYPE
else
do_facet mgs $LCTL conf_param \
# This will allow fixing the "lfs df" summary line in the future.
lfs_df() {
$LFS df $* | sed -e 's/filesystem /filesystem_/'
+ check_lfs_df_ret_val $?
}
# Get free inodes on the MDT specified by mdt index, free indoes on
if [[ $PERM_CMD == *"set_param -P"* ]]; then
do_facet mgs $PERM_CMD \
- osd-*.$FSNAME-MDT*.quota_slave.enable=$QUOTA_TYPE
+ osd-*.$FSNAME-MDT*.quota_slave.enabled=$QUOTA_TYPE
do_facet mgs $PERM_CMD \
- osd-*.$FSNAME-OST*.quota_slave.enable=$QUOTA_TYPE
+ osd-*.$FSNAME-OST*.quota_slave.enabled=$QUOTA_TYPE
else
do_facet mgs $PERM_CMD $FSNAME.quota.mdt=$QUOTA_TYPE ||
error "set mdt quota type failed"
# Mount the file system on the MDS
mount_mds_client() {
- local mds_HOST=${SINGLEMDS}_HOST
- echo $mds_HOST
- zconf_mount $mds1_HOST $MOUNT2 $MOUNT_OPTS ||
- error "unable to mount $MOUNT2 on MDS"
+ local host=$(facet_active_host $SINGLEMDS)
+ echo $host
+ zconf_mount $host $MOUNT2 $MOUNT_OPTS ||
+ error "unable to mount $MOUNT2 on $host"
}
# Unmount the file system on the MDS
umount_mds_client() {
- local mds_HOST=${SINGLEMDS}_HOST
- zconf_umount $mds1_HOST $MOUNT2
+ local host=$(facet_active_host $SINGLEMDS)
+ zconf_umount $host $MOUNT2
do_facet $SINGLEMDS "rmdir $MOUNT2"
}
reboot_facet() {
local facet=$1
local node=$(facet_active_host $facet)
+ local sleep_time=${2:-10}
if [ "$FAILURE_MODE" = HARD ]; then
boot_node $node
else
- sleep 10
+ sleep $sleep_time
fi
}
# If --verbose is passed as the first argument, the result is printed on each
# value change, otherwise it is only printed after every 10s interval.
#
+# If --quiet is passed as the first/second argument, the do_node() command
+# will not print the remote command before executing it each time.
+#
# Using wait_update_cond() or related helper function is preferable to adding
# a "long enough" wait for some state to change in the background, since
# "long enough" may be too short due to tunables, system config, or running in
# a VM, and must by necessity wait too long for most cases or risk failure.
#
-# usage: wait_update_cond [--verbose] node check cond expect [max_wait]
+# usage: wait_update_cond [--verbose] [--quiet] node check cond expect [max_wait]
wait_update_cond() {
- local verbose=false
- [[ "$1" == "--verbose" ]] && verbose=true && shift
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
local node=$1
local check="$2"
local print=10
while (( $waited <= $max_wait )); do
- result=$(do_node $node "$check")
+ result=$(do_node $quiet $node "$check")
eval [[ "'$result'" $cond "'$expect'" ]]
if [[ $? == 0 ]]; then
echo "Updated after ${waited}s: want '$expect' got '$result'"
return 0
fi
- if $verbose && [[ "$result" != "$prev_result" ]]; then
+ if [[ -n "$verbose" && "$result" != "$prev_result" ]]; then
[[ -n "$prev_result" ]] &&
echo "Changed after ${waited}s: from '$prev_result' to '$result'"
prev_result="$result"
return 3
}
-# usage: wait_update [--verbose] node check expect [max_wait]
+# usage: wait_update [--verbose] [--quiet] node check expect [max_wait]
wait_update() {
- local verbose=
- [ "$1" = "--verbose" ] && verbose="$1" && shift
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
local node="$1"
local check="$2"
local expect="$3"
local max_wait=$4
- wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait
+ wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait
}
# usage: wait_update_facet_cond [--verbose] facet check cond expect [max_wait]
wait_update_facet_cond() {
- local verbose=
- [ "$1" = "--verbose" ] && verbose="$1" && shift
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
local node=$(facet_active_host $1)
local check="$2"
local expect="$4"
local max_wait=$5
- wait_update_cond $verbose $node "$check" "$cond" "$expect" $max_wait
+ wait_update_cond $verbose $quiet $node "$check" "$cond" "$expect" $max_wait
}
# usage: wait_update_facet [--verbose] facet check expect [max_wait]
wait_update_facet() {
- local verbose=
- [ "$1" = "--verbose" ] && verbose="$1" && shift
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
local node=$(facet_active_host $1)
local check="$2"
local expect="$3"
local max_wait=$4
- wait_update_cond $verbose $node "$check" "==" "$expect" $max_wait
+ wait_update_cond $verbose $quiet $node "$check" "==" "$expect" $max_wait
}
sync_all_data() {
fi
echo affected facets: $facets
- # we can use "for" here because we are waiting the slowest
- for facet in ${facets//,/ }; do
+ facets=${facets//,/ }
+ # We can use "for" here because we are waiting the slowest.
+ # The mgs not having the recovery_status proc entry, exclude it
+ # from the facet list.
+ for facet in ${facets//mgs/ }; do
local var_svc=${facet}_svc
local param="*.${!var_svc}.recovery_status"
# MAX value shouldn't be big as this mean server responsiveness
# never increase this just to make test pass but investigate
# why it takes so long time
- local MAX=5
+ local MAX=${1:-5}
local WAIT=0
local list=$(comma_list $(mdts_nodes))
while [ $WAIT -lt $MAX ]; do
lfs_df_check() {
local clients=${1:-$CLIENTS}
+ local rc=0
if [ -z "$clients" ]; then
- $LFS df $MOUNT
+ $LFS df $MOUNT > /dev/null || rc=$?
else
- $PDSH $clients "$LFS df $MOUNT" > /dev/null
+ $PDSH $clients "$LFS df $MOUNT" > /dev/null || rc=$?
fi
+
+ check_lfs_df_ret_val $rc
}
clients_up() {
lfs_df_check
}
+all_mds_up() {
+ (( MDSCOUNT == 1 )) && return
+
+ # wait so that statfs data on MDT expire
+ local delay=$(do_facet mds1 $LCTL \
+ get_param -n osp.*MDT*MDT0000.maxage | sort -n | tail -1)
+
+ [ -n "$delay" ] || error "fail to get maxage"
+ sleep $delay
+ local nodes=$(comma_list $(mdts_nodes))
+ # initiate statfs RPC, all to all MDTs
+ do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null
+ do_nodes $nodes $LCTL get_param -N osp.*MDT*MDT*.filesfree >&/dev/null
+}
+
client_up() {
# usually checked on particular client or locally
sleep 1
skip=0
#check whether facet has been included in other affected facets
for ((index=0; index<$total; index++)); do
- [[ *,$facet,* == ,${affecteds[index]}, ]] && skip=1
+ [[ ,${affecteds[index]}, == *,$facet,* ]] && skip=1
done
if [ $skip -eq 0 ]; then
shutdown_facet $facet
done
- $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \
- $(mdsdevname 1) "-n" || error "Running e2fsck")
+ echo "$(date +'%H:%M:%S (%s)') shut down"
- for ((index=0; index<$total; index++)); do
- facet=$(echo ${affecteds[index]} | tr -s " " | cut -d"," -f 1)
- echo reboot facets: ${affecteds[index]}
+ local hostlist
+ local waithostlist
+
+ for facet in ${facets//,/ }; do
+ local host=$(facet_active_host $facet)
+
+ hostlist=$(expand_list $hostlist $host)
+ if [ $(facet_host $facet) = \
+ $(facet_failover_host $facet) ]; then
+ waithostlist=$(expand_list $waithostlist $host)
+ fi
+ done
+
+ if [ "$FAILURE_MODE" = HARD ]; then
+ for host in ${hostlist//,/ }; do
+ reboot_node $host
+ done
+ echo "$(date +'%H:%M:%S (%s)') $hostlist rebooted"
+ # We need to wait the rebooted hosts in case if
+ # facet_HOST == facetfailover_HOST
+ if ! [ -z "$waithostlist" ]; then
+ wait_for_host $waithostlist
+ if $LOAD_MODULES_REMOTE; then
+ echo "loading modules on $waithostlist"
+ do_rpc_nodes $waithostlist load_modules_local
+ fi
+ fi
+ else
+ sleep 10
+ fi
+
+ if [[ " ${affecteds[@]} " =~ " $SINGLEMDS " ]]; then
+ change_active $SINGLEMDS
+ fi
- reboot_facet $facet
+ $E2FSCK_ON_MDT0 && (run_e2fsck $(facet_active_host $SINGLEMDS) \
+ $(facet_device $SINGLEMDS) "-n" || error "Running e2fsck")
- change_active ${affecteds[index]}
+ local -a mountpids
- wait_for_facet ${affecteds[index]}
+ for ((index=0; index<$total; index++)); do
+ if [[ ${affecteds[index]} != $SINGLEMDS ]]; then
+ change_active ${affecteds[index]}
+ fi
if $GSS_SK; then
init_gss
init_facets_vars_simple
if ! combined_mgs_mds &&
list_member ${affecteds[index]} mgs; then
mount_facet mgs || error "Restart of mgs failed"
+ affecteds[index]=$(exclude_items_from_list \
+ ${affecteds[index]} mgs)
+ fi
+ if [ -n "${affecteds[index]}" ]; then
+ echo mount facets: ${affecteds[index]}
+ mount_facets ${affecteds[index]} &
+ mountpids[index]=$!
+ fi
+ done
+ for ((index=0; index<$total; index++)); do
+ if [ -n "${affecteds[index]}" ]; then
+ wait ${mountpids[index]}
fi
- # FIXME; has to be changed to mount all facets concurrently
- affected=$(exclude_items_from_list ${affecteds[index]} mgs)
- echo mount facets: ${affecteds[index]}
- mount_facets ${affecteds[index]}
+
if $GSS_SK; then
do_nodes $(comma_list $(all_nodes)) \
"keyctl show | grep lustre | cut -c1-11 |
xargs -IX keyctl setperm X 0x3f3f3f3f"
fi
done
+ echo "$(date +'%H:%M:%S (%s)') targets are mounted"
+
+ if [ "$FAILURE_MODE" = HARD ]; then
+ hostlist=$(exclude_items_from_list $hostlist $waithostlist)
+ if ! [ -z "$hostlist" ]; then
+ wait_for_host $hostlist
+ if $LOAD_MODULES_REMOTE; then
+ echo "loading modules on $hostlist"
+ do_rpc_nodes $hostlist load_modules_local
+ fi
+ fi
+ fi
+
+ echo "$(date +'%H:%M:%S (%s)') facet_failover done"
}
replay_barrier() {
mount_facet $facet -o $abort_type
clients_up || echo "first stat failed: $?"
clients_up || error "post-failover stat: $?"
+ all_mds_up
}
host_nids_address() {
elif [ "${facet:0:3}" == "mdt" -o \
"${facet:0:3}" == "mds" -o \
"${facet:0:3}" == "mgs" ]; then
- eval export ${facet}_HOST=${mds_HOST}
+ local temp
+ if [ "${facet}" == "mgsfailover" ] &&
+ [ -n "$mds1failover_HOST" ]; then
+ temp=$mds1failover_HOST
+ else
+ temp=${mds_HOST}
+ fi
+ eval export ${facet}_HOST=$temp
fi
fi
echo -n ${!varname}
return
fi
+ if combined_mgs_mds && [ $facet == "mgs" ] &&
+ [ -z $mds1failover_HOST ]; then
+ temp=mds1failover_HOST
+ echo ${!temp}
+ return
+ fi
+
if [ "${facet:0:3}" == "mdt" -o "${facet:0:3}" == "mds" -o \
"${facet:0:3}" == "mgs" ]; then
local facetlist=$1
local facet
- facetlist=$(exclude_items_from_list $facetlist mgs)
-
for facet in ${facetlist//,/ }; do
local failover=${facet}failover
local host=`facet_host $failover`
}
do_node() {
- local verbose=false
- # do not stripe off hostname if verbose, bug 19215
- if [ x$1 = x--verbose ]; then
- shift
- verbose=true
- fi
+ local verbose
+ local quiet
- local HOST=$1
- shift
- local myPDSH=$PDSH
- if [ "$HOST" = "$HOSTNAME" ]; then
- myPDSH="no_dsh"
- elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then
- echo "cannot run remote command on $HOST with $myPDSH"
- return 128
- fi
- if $VERBOSE; then
- echo "CMD: $HOST $@" >&2
- $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
- fi
+ # do not strip off hostname if verbose, b=19215
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
+ local HOST=$1
+ shift
+ local myPDSH=$PDSH
+
+ if [ "$HOST" = "$HOSTNAME" ]; then
+ myPDSH="no_dsh"
+ elif [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" ]; then
+ echo "cannot run remote command on $HOST with $myPDSH"
+ return 128
+ fi
+ if $VERBOSE && [[ -z "$quiet" ]]; then
+ echo "CMD: $HOST $@" >&2
+ $myPDSH $HOST "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
+ fi
if [[ "$myPDSH" == "rsh" ]] ||
[[ "$myPDSH" == *pdsh* && "$myPDSH" != *-S* ]]; then
eval $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests;
PATH=\$PATH:/sbin:/usr/sbin;
cd $RPWD;
- LUSTRE=\"$RLUSTRE\" sh -c \"$@\") ||
+ LUSTRE=\"$RLUSTRE\" bash -c \"$@\") ||
echo command failed >$command_status"
[[ -n "$($myPDSH $HOST cat $command_status)" ]] && return 1 ||
return 0
fi
- if $verbose ; then
- # print HOSTNAME for myPDSH="no_dsh"
- if [[ $myPDSH = no_dsh ]]; then
- $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed -e "s/^/${HOSTNAME}: /"
- else
- $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")"
- fi
- else
- $myPDSH $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" sh -c \"$@\")" | sed "s/^${HOST}: //"
- fi
- return ${PIPESTATUS[0]}
+ if [[ -n "$verbose" ]]; then
+ # print HOSTNAME for myPDSH="no_dsh"
+ if [[ $myPDSH = no_dsh ]]; then
+ $myPDSH $HOST \
+ "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\
+ cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")" |
+ sed -e "s/^/${HOSTNAME}: /"
+ else
+ $myPDSH $HOST \
+ "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\
+ cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")"
+ fi
+ else
+ $myPDSH $HOST \
+ "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin;\
+ cd $RPWD; LUSTRE=\"$RLUSTRE\" bash -c \"$@\")" |
+ sed "s/^${HOST}: //"
+ fi
+ return ${PIPESTATUS[0]}
}
-do_nodev() {
- do_node --verbose "$@"
+##
+# Execute exact command line on host
+#
+# The \a host may be on a local or remote node, which is determined at
+# the time the command is run. Does careful argument quotation to
+# ensure that the exact command line is executed without any globbing,
+# substitution, or shell interpretation on the remote side. Does not
+# support --verbose or --quiet. Does not include "$host: " prefixes on
+# output. See also do_facet_vp().
+#
+# usage: do_node_vp "$host" "$command" "$arg"...
+do_node_vp() {
+ local host="$1"
+ shift
+
+ if [[ "$host" == "$HOSTNAME" ]]; then
+ sh -c "$(printf -- ' %q' "$@")"
+ return $?
+ fi
+
+ if [[ "${PDSH}" != *pdsh* || "${PDSH}" != *-S* ]]; then
+ echo "cannot run '$*' on host '${host}' with PDSH='${PDSH}'" >&2
+ return 128
+ fi
+
+ # -N Disable hostname: prefix on lines of output.
+
+ $PDSH "${host}" -N "cd $RPWD; PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; export LUSTRE=$RLUSTRE; $(printf -- ' %q' "$@")"
}
single_local_node () {
- [ "$1" = "$HOSTNAME" ]
+ [ "$1" = "$HOSTNAME" ]
}
# Outputs environment variable assignments that should be passed to remote nodes
}
do_nodes() {
- local verbose=false
- # do not stripe off hostname if verbose, bug 19215
- if [ x$1 = x--verbose ]; then
- shift
- verbose=true
- fi
+ local verbose
+ local quiet
- local rnodes=$1
- shift
+ # do not strip off hostname if verbose, b=19215
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
- if single_local_node $rnodes; then
- if $verbose; then
- do_nodev $rnodes "$@"
- else
- do_node $rnodes "$@"
- fi
- return $?
- fi
+ local rnodes=$1
+ shift
- # This is part from do_node
- local myPDSH=$PDSH
+ if single_local_node $rnodes; then
+ do_node $verbose $quiet $rnodes "$@"
+ return $?
+ fi
- [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] && \
- echo "cannot run remote command on $rnodes with $myPDSH" && return 128
+ # This is part from do_node
+ local myPDSH=$PDSH
- export FANOUT=$(get_node_count "${rnodes//,/ }")
- if $VERBOSE; then
- echo "CMD: $rnodes $@" >&2
- $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
- fi
+ [ -z "$myPDSH" -o "$myPDSH" = "no_dsh" -o "$myPDSH" = "rsh" ] &&
+ echo "cannot run remote command on $rnodes with $myPDSH" &&
+ return 128
- # do not replace anything from pdsh output if -N is used
- # -N Disable hostname: prefix on lines of output.
- if $verbose || [[ $myPDSH = *-N* ]]; then
- $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")"
- else
- $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) sh -c \"$@\")" | sed -re "s/^[^:]*: //g"
- fi
- return ${PIPESTATUS[0]}
+ export FANOUT=$(get_node_count "${rnodes//,/ }")
+ if $VERBOSE && [[ -z "$quiet" ]]; then
+ echo "CMD: $rnodes $@" >&2
+ $myPDSH $rnodes "$LCTL mark \"$@\"" > /dev/null 2>&1 || :
+ fi
+
+ # do not replace anything from pdsh output if -N is used
+ # -N Disable hostname: prefix on lines of output.
+ if [[ -n "$verbose" || $myPDSH = *-N* ]]; then
+ $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$@\")"
+ else
+ $myPDSH $rnodes "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; cd $RPWD; LUSTRE=\"$RLUSTRE\" $(get_env_vars) bash -c \"$@\")" | sed -re "s/^[^:]*: //g"
+ fi
+ return ${PIPESTATUS[0]}
}
##
#
# usage: do_facet $facet command [arg ...]
do_facet() {
+ local verbose
+ local quiet
+
+ [[ "$1" == "--verbose" ]] && verbose="$1" && shift
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
local facet=$1
shift
- local HOST=$(facet_active_host $facet)
- [ -z $HOST ] && echo "No host defined for facet ${facet}" && exit 1
- do_node $HOST "$@"
+ local host=$(facet_active_host $facet)
+
+ [ -z "$host" ] && echo "No host defined for facet ${facet}" && exit 1
+ do_node $verbose $quiet $host "$@"
+}
+
+##
+# Execute exact command line on the host of a facet
+#
+# The \a facet (service) may be on a local or remote node, which is
+# determined at the time the command is run. Does careful argument
+# quotation to ensure that the exact command line is executed without
+# any globbing, substitution, or shell interpretation on the remote
+# side. Does not support --verbose or --quiet. Does not include
+# "$host: " prefixes on output.
+#
+# usage: do_facet_vp "$facet" "$command" "$arg"...
+do_facet_vp() {
+ local facet="$1"
+ local host=$(facet_active_host "$facet")
+ shift
+
+ if [[ -z "$host" ]]; then
+ echo "no host defined for facet ${facet}" >&2
+ exit 1
+ fi
+
+ do_node_vp "$host" "$@"
}
# Function: do_facet_random_file $FACET $FILE $SIZE
}
do_nodesv() {
- do_nodes --verbose "$@"
+ do_nodes --verbose "$@"
}
add() {
opts+=${LDLM_TIMEOUT:+" --param=sys.ldlm_timeout=$LDLM_TIMEOUT"}
if [ $type == MDS ]; then
- opts+=${MDSCAPA:+" --param-mdt.capa=$MDSCAPA"}
opts+=${DEF_STRIPE_SIZE:+" --param=lov.stripesize=$DEF_STRIPE_SIZE"}
opts+=${DEF_STRIPE_COUNT:+" --param=lov.stripecount=$DEF_STRIPE_COUNT"}
opts+=${L_GETIDENTITY:+" --param=mdt.identity_upcall=$L_GETIDENTITY"}
if [ $fstype == ldiskfs ]; then
- fs_mkfs_opts+="-O ea_inode,large_dir"
-
var=${facet}_JRN
if [ -n "${!var}" ]; then
fs_mkfs_opts+=" -J device=${!var}"
fi
if [ $type == OST ]; then
- opts+=${OSSCAPA:+" --param=ost.capa=$OSSCAPA"}
-
if [ $fstype == ldiskfs ]; then
var=${facet}_JRN
if [ -n "${!var}" ]; then
local varname=${facet}failover_HOST
if [ -z "${!varname}" ]; then
- eval export $varname=$(facet_host $facet)
+ local temp
+ if combined_mgs_mds && [ $facet == "mgs" ] &&
+ [ -n "$mds1failover_HOST" ]; then
+ temp=$mds1failover_HOST
+ else
+ temp=$(facet_host $facet)
+ fi
+ eval export $varname=$temp
fi
varname=${facet}_HOST
if ! remote_mds_nodsh; then
for num in $(seq $MDSCOUNT); do
- DEVNAME=`mdsdevname $num`
+ DEVNAME=$(mdsdevname $num)
init_facet_vars mds$num $DEVNAME $MDS_MOUNT_OPTS
done
fi
- combined_mgs_mds || init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS
+ init_facet_vars mgs $(mgsdevname) $MGS_MOUNT_OPTS
if ! remote_ost_nodsh; then
for num in $(seq $OSTCOUNT); do
fi
(( MDS1_VERSION <= $(version_code 2.13.52) )) ||
- do_nodes $(comma_list $(mdts_nodes)) \
- "$LCTL set_param lod.*.mdt_hash=crush"
+ do_facet mgs "$LCTL set_param -P lod.*.mdt_hash=crush"
return 0
}
done
}
+set_pools_quota () {
+ local u
+ local o
+ local p
+ local i
+ local j
+
+ [[ $ENABLE_QUOTA ]] || error "Required Pool Quotas: \
+ $POOLS_QUOTA_USERS_SET, but ENABLE_QUOTA not set!"
+
+ # POOLS_QUOTA_USERS_SET=
+ # "quota15_1:20M -- for all of the found pools
+ # quota15_2:1G:gpool0
+ # quota15_3 -- for global limit only
+ # quota15_4:200M:gpool0
+ # quota15_4:200M:gpool1"
+
+ declare -a pq_userset=(${POOLS_QUOTA_USERS_SET="mpiuser"})
+ declare -a pq_users
+ declare -A pq_limits
+
+ for ((i=0; i<${#pq_userset[@]}; i++)); do
+ u=${pq_userset[i]%%:*}
+ o=""
+ # user gets no pool limits if
+ # POOLS_QUOTA_USERS_SET does not specify it
+ [[ ${pq_userset[i]} =~ : ]] && o=${pq_userset[i]##$u:}
+ pq_limits[$u]+=" $o"
+ done
+ pq_users=(${!pq_limits[@]})
+
+ declare -a opts
+ local pool
+
+ for ((i=0; i<${#pq_users[@]}; i++)); do
+ u=${pq_users[i]}
+ # set to max limit (_u64)
+ $LFS setquota -u $u -B $((2**24 - 1))T $DIR
+ opts=(${pq_limits[$u]})
+ for ((j=0; j<${#opts[@]}; j++)); do
+ p=${opts[j]##*:}
+ o=${opts[j]%%:*}
+ # Set limit for all existing pools if
+ # no pool specified
+ if [ $p == $o ]; then
+ p=$(list_pool $FSNAME | sed "s/$FSNAME.//")
+ echo "No pool specified for $u,
+ set limit $o for all existing pools"
+ fi
+ for pool in $p; do
+ $LFS setquota -u $u -B $o --pool $pool $DIR ||
+ error "setquota -u $u -B $o \
+ --pool $pool failed"
+ done
+ done
+ $LFS quota -uv $u --pool $DIR
+ done
+}
+
check_and_setup_lustre() {
sanitize_parameters
nfs_client_mode && return
create_pools $FS_POOL $FS_POOL_NOSTS
fi
+ if [[ -n "$POOLS_QUOTA_USERS_SET" ]]; then
+ set_pools_quota
+ fi
if [ "$ONLY" == "setup" ]; then
exit 0
fi
# General functions
wait_for_function () {
- local quiet=""
+ local quiet=""
- # suppress fn both stderr and stdout
- if [ "$1" = "--quiet" ]; then
- shift
- quiet=" > /dev/null 2>&1"
-
- fi
+ # suppress fn both stderr and stdout
+ if [ "$1" = "--quiet" ]; then
+ shift
+ quiet=" > /dev/null 2>&1"
+ fi
- local fn=$1
- local max=${2:-900}
- local sleep=${3:-5}
+ local fn=$1
+ local max=${2:-900}
+ local sleep=${3:-5}
- local wait=0
+ local wait=0
- while true; do
+ while true; do
- eval $fn $quiet && return 0
+ eval $fn $quiet && return 0
- wait=$((wait + sleep))
- [ $wait -lt $max ] || return 1
- echo waiting $fn, $((max - wait)) secs left ...
- sleep $sleep
- done
+ [ $wait -lt $max ] || return 1
+ echo waiting $fn, $((max - wait)) secs left ...
+ wait=$((wait + sleep))
+ [ $wait -gt $max ] && ((sleep -= wait - max))
+ sleep $sleep
+ done
}
check_network() {
exit 0
}
+#
+# For interop testing treate EOPNOTSUPP as success
+# and skip
+#
+skip_eopnotsupp() {
+ local retstr=$@
+
+ echo $retstr | awk -F'|' '{print $1}' |
+ grep -E unsupported\|"(Operation not supported)"
+ (( $? == 0 )) || error "$retstr"
+ skip $retstr
+}
+
+# Add a list of tests to ALWAYS_EXCEPT due to an issue.
+# Usage: always_except LU-4815 23 42q ...
+#
+function always_except() {
+ local issue="${1:-}" # single jira style issue ("LU-4815")
+ local test_num
+
+ shift
+
+ if ! [[ "$issue" =~ ^[[:upper:]]+-[[:digit:]]+$ ]]; then
+ error "always_except: invalid issue '$issue' for tests '$*'"
+ fi
+
+ for test_num in "$@"; do
+ ALWAYS_EXCEPT+=" $test_num"
+ done
+}
+
build_test_filter() {
EXCEPT="$EXCEPT $(testslist_filter)"
}
reset_fail_loc () {
- echo -n "Resetting fail_loc on all nodes..."
- do_nodes $(comma_list $(nodes_list)) "lctl set_param -n fail_loc=0 \
- fail_val=0 2>/dev/null" || true
- echo done.
+ #echo -n "Resetting fail_loc on all nodes..."
+ do_nodes --quiet $(comma_list $(nodes_list)) \
+ "lctl set_param -n fail_loc=0 fail_val=0 2>/dev/null" || true
+ #echo done.
}
# Also appends a timestamp and prepends the testsuite name.
#
-EQUALS="===================================================================================================="
+# ======================================================== 15:06:12 (1624050372)
+EQUALS="========================================================"
banner() {
msg="== ${TESTSUITE} $*"
last=${msg: -1:1}
ldiskfs_check_descriptors: Checksum for group 0 failed\|\
group descriptors corrupted"
- res=$(do_nodes $(comma_list $(nodes_list)) "dmesg" | grep "$errors")
+ res=$(do_nodes -q $(comma_list $(nodes_list)) "dmesg" | grep "$errors")
[ -z "$res" ] && return 0
echo "Kernel error detected: $res"
return 1
# sync all the data and make sure no pending data on server
do_nodes $clients sync
- clients_up # initiate all idling connections
+ do_nodes $clients $LFS df # initiate all idling connections
# get client grant
cli_grant=$(grant_from_clients $clients)
}
ostname_from_index() {
- local uuid=$(ostuuid_from_index $1)
+ local uuid=$(ostuuid_from_index $1 $2)
echo ${uuid/_UUID/}
}
ip addr | awk '/inet / {print $2}' | awk -F/ '{print $1}'
}
+# Description:
+# Returns list of interfaces configured for LNet
+lnet_if_list() {
+ local nids=( $($LCTL list_nids | xargs echo) )
+
+ [[ -z ${nids[@]} ]] &&
+ return 0
+
+ declare -a INTERFACES
+
+ for ((i = 0; i < ${#nids[@]}; i++)); do
+ ip=$(sed 's/^\(.*\)@.*$/\1/'<<<${nids[i]})
+ INTERFACES[i]=$(ip -o a s |
+ awk '$4 ~ /^'$ip'\//{print $2}')
+ INTERFACES=($(echo "${INTERFACES[@]}" | tr ' ' '\n' | uniq | tr '\n' ' '))
+ if [[ -z ${INTERFACES[i]} ]]; then
+ error "Can't determine interface name for NID ${nids[i]}"
+ elif [[ 1 -ne $(wc -w <<<${INTERFACES[i]}) ]]; then
+ error "Found $(wc -w <<<${INTERFACES[i]}) interfaces for NID ${nids[i]}. Expect 1"
+ fi
+ done
+
+ echo "${INTERFACES[@]}"
+
+ return 0
+}
+
is_local_addr() {
local addr=$1
# Cache address list to avoid mutiple execution of local_addr_list
# Sanity check: exclude the dup entries
RCLIENTS=$(for i in ${rclients//,/ }; do echo $i; done | sort -u)
- clients="$SINGLECLIENT $HOSTNAME $RCLIENTS"
+ export CLIENT1=${CLIENT1:-$HOSTNAME}
+ export SINGLECLIENT=$CLIENT1
+
+ clients="$SINGLECLIENT $HOSTNAME $RCLIENTS"
# Sanity check: exclude the dup entries from CLIENTS
# for those configs which has SINGLCLIENT set to local client
clients=$(for i in $clients; do echo $i; done | sort -u)
- CLIENTS=$(comma_list $clients)
+ export CLIENTS=$(comma_list $clients)
local -a remoteclients=($RCLIENTS)
for ((i=0; $i<${#remoteclients[@]}; i++)); do
varname=CLIENT$((i + 2))
- eval $varname=${remoteclients[i]}
+ eval export $varname=${remoteclients[i]}
done
- CLIENTCOUNT=$((${#remoteclients[@]} + 1))
+ export CLIENTCOUNT=$((${#remoteclients[@]} + 1))
}
get_random_entry () {
$LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum
}
+free_min_max () {
+ wait_delete_completed
+ AVAIL=($(lctl get_param -n osc.*[oO][sS][cC]-[^M]*.kbytesavail))
+ echo "OST kbytes available: ${AVAIL[*]}"
+ MAXV=${AVAIL[0]}
+ MAXI=0
+ MINV=${AVAIL[0]}
+ MINI=0
+ for ((i = 0; i < ${#AVAIL[@]}; i++)); do
+ #echo OST $i: ${AVAIL[i]}kb
+ if [[ ${AVAIL[i]} -gt $MAXV ]]; then
+ MAXV=${AVAIL[i]}
+ MAXI=$i
+ fi
+ if [[ ${AVAIL[i]} -lt $MINV ]]; then
+ MINV=${AVAIL[i]}
+ MINI=$i
+ fi
+ done
+ echo "Min free space: OST $MINI: $MINV"
+ echo "Max free space: OST $MAXI: $MAXV"
+}
+
# save_lustre_params(comma separated facet list, parameter_mask)
# generate a stream of formatted strings (<facet> <param name>=<param value>)
save_lustre_params() {
check_node_health() {
local nodes=${1:-$(comma_list $(nodes_list))}
-
- for node in ${nodes//,/ }; do
- check_network "$node" 5
- if [ $? -eq 0 ]; then
- do_node $node "$LCTL get_param catastrophe 2>&1" |
- grep -q "catastrophe=1" &&
- error "$node:LBUG/LASSERT detected" || true
- fi
- done
+ local health=$TMP/node_health.$$
+
+ do_nodes -q $nodes "$LCTL get_param catastrophe 2>&1" | tee $health |
+ grep "catastrophe=1" && error "LBUG/LASSERT detected"
+ # Only check/report network health if get_param isn't reported, since
+ # *clearly* the network is working if get_param returned something.
+ if (( $(grep -c catastro $health) != $(wc -w <<< ${nodes//,/ }) )); then
+ for node in ${nodes//,/ }; do
+ check_network $node 5
+ done
+ fi
+ rm -f $health
}
mdsrate_cleanup () {
}
_wait_import_state () {
- local expected=$1
- local CONN_PROC=$2
- local maxtime=${3:-$(max_recovery_time)}
- local error_on_failure=${4:-1}
- local CONN_STATE
- local i=0
+ local expected="$1"
+ local CONN_PROC="$2"
+ local maxtime=${3:-$(max_recovery_time)}
+ local err_on_fail=${4:-1}
+ local CONN_STATE
+ local i=0
CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq)
- while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do
- if [ "${expected}" == "DISCONN" ]; then
- # for disconn we can check after proc entry is removed
- [ "x${CONN_STATE}" == "x" ] && return 0
- # with AT enabled, we can have connect request timeout near of
- # reconnect timeout and test can't see real disconnect
- [ "${CONN_STATE}" == "CONNECTING" ] && return 0
- fi
- if [ $i -ge $maxtime ]; then
- [ $error_on_failure -ne 0 ] && \
- error "can't put import for $CONN_PROC into ${expected}" \
- "state after $i sec, have ${CONN_STATE}"
- return 1
- fi
- sleep 1
- # Add uniq for multi-mount case
- CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null | cut -f2 | uniq)
- i=$(($i + 1))
- done
+ while ! echo "${CONN_STATE}" | egrep -q "^${expected}\$" ; do
+ if [[ "${expected}" == "DISCONN" ]]; then
+ # for disconn we can check after proc entry is removed
+ [[ -z "${CONN_STATE}" ]] && return 0
+ # with AT, we can have connect request timeout near
+ # reconnect timeout and test can't see real disconnect
+ [[ "${CONN_STATE}" == "CONNECTING" ]] && return 0
+ fi
+ if (( $i >= $maxtime )); then
+ (( $err_on_fail != 0 )) &&
+ error "can't put import for $CONN_PROC into ${expected} state after $i sec, have ${CONN_STATE}"
+ return 1
+ fi
+ sleep 1
+ # Add uniq for multi-mount case
+ CONN_STATE=$($LCTL get_param -n $CONN_PROC 2>/dev/null |
+ cut -f2 | uniq)
+ i=$((i + 1))
+ done
- log "$CONN_PROC in ${CONN_STATE} state after $i sec"
- return 0
+ log "$CONN_PROC in ${CONN_STATE} state after $i sec"
+ return 0
}
wait_import_state() {
- local state=$1
- local params=$2
- local maxtime=${3:-$(max_recovery_time)}
- local error_on_failure=${4:-1}
- local param
-
- for param in ${params//,/ }; do
- _wait_import_state $state $param $maxtime $error_on_failure || return
- done
+ local expected="$1"
+ local params="$2"
+ local maxtime=${3:-$(max_recovery_time)}
+ local err_on_fail=${4:-1}
+ local param
+
+ for param in ${params//,/ }; do
+ _wait_import_state "$expected" "$param" $maxtime $err_on_fail ||
+ return
+ done
}
wait_import_state_mount() {
return 0
fi
- wait_import_state $*
+ wait_import_state "$@"
}
# One client request could be timed out because server was not ready
}
do_rpc_nodes () {
+ local quiet
+
+ [[ "$1" == "--quiet" || "$1" == "-q" ]] && quiet="$1" && shift
+
local list=$1
shift
local LIBPATH="/usr/lib/lustre/tests:/usr/lib64/lustre/tests:"
local TESTPATH="$RLUSTRE/tests:"
local RPATH="PATH=${TESTPATH}${LIBPATH}${PATH}:/sbin:/bin:/usr/sbin:"
- do_nodesv $list "${RPATH} NAME=${NAME} bash rpc.sh $@ "
+ do_nodes ${quiet:-"--verbose"} $list "${RPATH} NAME=${NAME} bash rpc.sh $@ "
}
wait_clients_import_state () {
- local list=$1
- local facet=$2
- local expected=$3
-
- local facets=$facet
+ local list="$1"
+ local facet="$2"
+ local expected="$3"
+ local facets="$facet"
if [ "$FAILURE_MODE" = HARD ]; then
- facets=$(facets_on_host $(facet_active_host $facet))
+ facets=$(for f in ${facet//,/ }; do
+ facets_on_host $(facet_active_host $f) | tr "," "\n"
+ done | sort -u | paste -sd , )
fi
for facet in ${facets//,/ }; do
local proc_path
case $facet in
ost* ) proc_path="osc.$(get_clientosc_proc_path \
- $label).ost_server_uuid" ;;
+ $label).ost_server_uuid" ;;
mds* ) proc_path="mdc.$(get_clientmdc_proc_path \
- $label).mds_server_uuid" ;;
+ $label).mds_server_uuid" ;;
mgs* ) proc_path="mgc.$(get_clientmgc_proc_path \
- $label).mgs_server_uuid" ;;
+ $label).mgs_server_uuid" ;;
*) error "unknown facet!" ;;
esac
run_llverdev()
{
- local dev=$1
- local llverdev_opts=$2
- local devname=$(basename $1)
- local size=$(grep "$devname"$ /proc/partitions | awk '{print $3}')
- # loop devices aren't in /proc/partitions
- [ "x$size" == "x" ] && local size=$(ls -l $dev | awk '{print $5}')
+ local dev=$1; shift
+ local llverdev_opts="$*"
+ local devname=$(basename $dev)
+ local size=$(awk "/$devname$/ {print \$3}" /proc/partitions)
+ # loop devices aren't in /proc/partitions
+ [[ -z "$size" ]] && size=$(stat -c %s $dev)
- size=$(($size / 1024 / 1024)) # Gb
+ local size_gb=$((size / 1024 / 1024)) # Gb
- local partial_arg=""
- # Run in partial (fast) mode if the size
- # of a partition > 1 GB
- [ $size -gt 1 ] && partial_arg="-p"
+ local partial_arg=""
+ # Run in partial (fast) mode if the size of a partition > 1 GB
+ (( $size == 0 || $size_gb > 1 )) && partial_arg="-p"
- llverdev --force $partial_arg $llverdev_opts $dev
+ llverdev --force $partial_arg $llverdev_opts $dev
}
run_llverfs()
{
- local dir=$1
- local llverfs_opts=$2
- local use_partial_arg=$3
- local partial_arg=""
- local size=$(df -B G $dir |tail -n 1 |awk '{print $2}' |sed 's/G//') #GB
+ local dir=$1
+ local llverfs_opts=$2
+ local use_partial_arg=$3
+ local partial_arg=""
+ local size=$(df -B G $dir |tail -n 1 |awk '{print $2}' |sed 's/G//') #GB
- # Run in partial (fast) mode if the size
- # of a partition > 1 GB
- [ "x$use_partial_arg" != "xno" ] && [ $size -gt 1 ] && partial_arg="-p"
+ # Run in partial (fast) mode if the size of a partition > 1 GB
+ [ "x$use_partial_arg" != "xno" ] && [ $size -gt 1 ] && partial_arg="-p"
- llverfs $partial_arg $llverfs_opts $dir
+ llverfs $partial_arg $llverfs_opts $dir
}
run_sgpdd () {
# ldiskfs xattrs over one block in size. Allow both the historical
# Lustre feature name (large_xattr) and the upstream name (ea_inode).
large_xattr_enabled() {
- [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 1
+ [[ $(facet_fstype $SINGLEMDS) == zfs ]] && return 0
local mds_dev=$(mdsdevname ${SINGLEMDS//mds/})
local devname=$(mdsdevname $(facet_number $facet))
local mntpt=$(facet_mntpt brpt)
local rcmd="do_facet $facet"
- local metaea=${TMP}/backup_restore.ea
local metadata=${TMP}/backup_restore.tgz
local opts=${MDS_MOUNT_FS_OPTS}
local svc=${facet}_svc
# step 1: build mount point
${rcmd} mkdir -p $mntpt
# step 2: cleanup old backup
- ${rcmd} rm -f $metaea $metadata
+ ${rcmd} rm -f $metadata
# step 3: mount dev
- ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 1
+ ${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 3
if [ ! -z $igif ]; then
# step 3.5: rm .lustre
- ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 1
+ ${rcmd} rm -rf $mntpt/ROOT/.lustre || return 3
fi
- # step 4: backup metaea
- echo "backup EA"
- ${rcmd} "cd $mntpt && getfattr -R -d -m '.*' -P . > $metaea && cd -" ||
- return 2
- # step 5: backup metadata
+ # step 4: backup metadata
echo "backup data"
- ${rcmd} tar zcf $metadata -C $mntpt/ . > /dev/null 2>&1 || return 3
- # step 6: umount
- ${rcmd} $UMOUNT $mntpt || return 4
- # step 8: reformat dev
+ ${rcmd} tar zcf $metadata --xattrs --xattrs-include="trusted.*" \
+ --sparse -C $mntpt/ . > /dev/null 2>&1 || return 4
+ # step 5: umount
+ ${rcmd} $UMOUNT $mntpt || return 5
+ # step 6: reformat dev
echo "reformat new device"
format_mdt $(facet_number $facet)
- # step 9: mount dev
+ # step 7: mount dev
${rcmd} mount -t ldiskfs $opts $devname $mntpt || return 7
- # step 10: restore metadata
+ # step 8: restore metadata
echo "restore data"
- ${rcmd} tar zxfp $metadata -C $mntpt > /dev/null 2>&1 || return 8
- # step 11: restore metaea
- echo "restore EA"
- ${rcmd} "cd $mntpt && setfattr --restore=$metaea && cd - " || return 9
- # step 12: remove recovery logs
+ ${rcmd} tar zxfp $metadata --xattrs --xattrs-include="trusted.*" \
+ --sparse -C $mntpt > /dev/null 2>&1 || return 8
+ # step 9: remove recovery logs
echo "remove recovery logs"
${rcmd} rm -fv $mntpt/OBJECTS/* $mntpt/CATALOGS
- # step 13: umount dev
+ # step 10: umount dev
${rcmd} $UMOUNT $mntpt || return 10
- # step 14: cleanup tmp backup
+ # step 11: cleanup tmp backup
${rcmd} rm -f $metaea $metadata
- # step 15: reset device label - it's not virgin on
+ # step 12: reset device label - it's not virgin on
${rcmd} e2label $devname ${!svc}
}
#
function createmany() {
local count=${!#}
+ local rc
- (( count > 100 )) && {
- local saved_debug=$($LCTL get_param -n debug)
- local list=$(comma_list $(all_nodes))
-
- do_nodes $list $LCTL set_param -n debug=0
- }
+ if (( count > 100 )); then
+ debugsave
+ do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0
+ fi
$LUSTRE/tests/createmany $*
- local rc=$?
- (( count > 100 )) &&
- do_nodes $list "$LCTL set_param -n debug=\\\"$saved_debug\\\""
+ rc=$?
+ debugrestore
+
return $rc
}
function unlinkmany() {
local count=${!#}
+ local rc
- (( count > 100 )) && {
- local saved_debug=$($LCTL get_param -n debug)
- local list=$(comma_list $(all_nodes))
-
- do_nodes $list $LCTL set_param -n debug=0
- }
+ if (( count > 100 )); then
+ debugsave
+ do_nodes $(comma_list $(all_nodes)) $LCTL set_param -n debug=0
+ fi
$LUSTRE/tests/unlinkmany $*
- local rc=$?
- (( count > 100 )) &&
- do_nodes $list "$LCTL set_param -n debug=\\\"$saved_debug\\\""
+ rc=$?
+ debugrestore
+
return $rc
}
shift $((OPTIND - 1))
$LFS mkdir -i $mdt -c 1 $*
- # setting default LMV in non-DNE system will cause sanity-quota 41 fail
- ((MDSCOUNT < 2)) || $LFS setdirstripe -D -i $mdt -c 1 $*
}
mkdir_on_mdt0() {
fi
echo "waited $((i - 1)) seconds for sync"
}
+
+consume_precreations() {
+ local dir=$1
+ local mfacet=$2
+ local OSTIDX=$3
+ local extra=${4:-2}
+ local OST=$(ostname_from_index $OSTIDX $dir)
+
+ test_mkdir -p $dir/${OST}
+ $LFS setstripe -i $OSTIDX -c 1 ${dir}/${OST}
+
+ # on the mdt's osc
+ local mdtosc_proc=$(get_mdtosc_proc_path $mfacet $OST)
+ local last_id=$(do_facet $mfacet $LCTL get_param -n \
+ osp.$mdtosc_proc.prealloc_last_id)
+ local next_id=$(do_facet $mfacet $LCTL get_param -n \
+ osp.$mdtosc_proc.prealloc_next_id)
+ echo "Creating to objid $last_id on ost $OST..."
+ createmany -o $dir/${OST}/f $next_id $((last_id - next_id + extra))
+}
+
+__exhaust_precreations() {
+ local OSTIDX=$1
+ local FAILLOC=$2
+ local FAILIDX=${3:-$OSTIDX}
+ local ofacet=ost$((OSTIDX + 1))
+
+ mkdir_on_mdt0 $DIR/$tdir
+ local mdtidx=$($LFS getstripe -m $DIR/$tdir)
+ local mfacet=mds$((mdtidx + 1))
+ echo OSTIDX=$OSTIDX MDTIDX=$mdtidx
+
+ local mdtosc_proc=$(get_mdtosc_proc_path $mfacet)
+ do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc*
+
+#define OBD_FAIL_OST_ENOSPC 0x215
+ do_facet $ofacet $LCTL set_param fail_val=$FAILIDX fail_loc=0x215
+
+ consume_precreations $DIR/$tdir $mfacet $OSTIDX
+
+ do_facet $mfacet $LCTL get_param osp.$mdtosc_proc.prealloc*
+ do_facet $ofacet $LCTL set_param fail_loc=$FAILLOC
+}
+
+exhaust_precreations() {
+ __exhaust_precreations $1 $2 $3
+ sleep_maxage
+}
+
+exhaust_all_precreations() {
+ local i
+ for (( i=0; i < OSTCOUNT; i++ )) ; do
+ __exhaust_precreations $i $1 -1
+ done
+ sleep_maxage
+}