set -e
set +o monitor
-SRCDIR=$(dirname $0)
-export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin
-
ONLY=${ONLY:-"$*"}
-# bug number for skipped test:
-# LU-4684
-ALWAYS_EXCEPT="406 $SANITY_HSM_EXCEPT"
-# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
-
-LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+LUSTRE=${LUSTRE:-$(dirname $0)/..}
. $LUSTRE/tests/test-framework.sh
init_test_env $@
-. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
init_logging
-MULTIOP=${MULTIOP:-multiop}
+ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT "
+if $SHARED_KEY; then
+# bug number for skipped tests: LU-9795
+ ALWAYS_EXCEPT+=" 402b "
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+fi
+
+# Skip tests for PPC that fail frequently
+if [[ $(uname -m) = ppc64 ]]; then
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 1a 1b 1d 1e 12c 12f "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 12g 12h 12m 12n 12o 12p "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 12q 21 22 23 24a 24b "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 24d 24e 24f 25b 30c 37 "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 57 58 90 110b 111b 113 "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 222b 222d 228 260a 260b 260c "
+ # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252
+ ALWAYS_EXCEPT+=" 220A 220a 221 222a 222c 223a "
+ # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252
+ ALWAYS_EXCEPT+=" 223b 224A 224a 226 227 600"
+ # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252
+ ALWAYS_EXCEPT+=" 601 602 603 604 605 "
+fi
+
+build_test_filter
+
+[ -n "$FILESET" ] && skip "Not functional for FILESET set"
+
OPENFILE=${OPENFILE:-openfile}
-MMAP_CAT=${MMAP_CAT:-mmap_cat}
MOUNT_2=${MOUNT_2:-"yes"}
FAIL_ON_ERROR=false
# script only handles up to 10 MDTs (because of MDT_PREFIX)
[ $MDSCOUNT -gt 9 ] &&
- error "script cannot handle more than 9 MDTs, please fix" && exit
+ error "script cannot handle more than 9 MDTs, please fix"
check_and_setup_lustre
-if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then
- skip_env "Need MDS version at least 2.4.53" && exit
+if [[ $MDS1_VERSION -lt $(version_code 2.4.53) ]]; then
+ skip_env "Need MDS version at least 2.4.53"
fi
# $RUNAS_ID may get set incorrectly somewhere else
if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then
- skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit
+ skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!"
fi
check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS
if getent group nobody; then
error "No generic nobody group"
fi
-build_test_filter
-
# if there is no CLIENT1 defined, some tests can be ran on localhost
CLIENT1=${CLIENT1:-$HOSTNAME}
# if CLIENT2 doesn't exist then use CLIENT1 instead
# Exception is the test which need two separate nodes
CLIENT2=${CLIENT2:-$CLIENT1}
-#
-# In order to test multiple remote HSM agents, a new facet type named "AGT" and
-# the following associated variables are added:
-#
-# AGTCOUNT: number of agents
-# AGTDEV{N}: target HSM mount point (root path of the backend)
-# agt{N}_HOST: hostname of the agent agt{N}
-# SINGLEAGT: facet of the single agent
-#
-# The number of agents is initialized as the number of remote client nodes.
-# By default, only single copytool is started on a remote client/agent. If there
-# was no remote client, then the copytool will be started on the local client.
-#
-init_agt_vars() {
- local n
- local agent
-
- export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))}
- [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1
-
- export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP}
- if [[ $CLIENTCOUNT -gt 1 ]] &&
- ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then
- skip_env "SHARED_DIRECTORY should be accessible"\
- "on all client nodes"
- exit 0
- fi
-
- # We used to put the HSM archive in $SHARED_DIRECTORY but that
- # meant NFS issues could hose sanity-hsm sessions. So now we
- # use $TMP instead.
- for n in $(seq $AGTCOUNT); do
- eval export AGTDEV$n=\$\{AGTDEV$n:-"$TMP/arc$n"\}
- agent=CLIENT$((n + 1))
- if [[ -z "${!agent}" ]]; then
- [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 ||
- agent=CLIENT2
- fi
- eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\}
- done
-
- export SINGLEAGT=${SINGLEAGT:-agt1}
-
- export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
- export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
- export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
- export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""}
- export HSMTOOL_TESTDIR
- export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
-
- HSM_ARCHIVE_NUMBER=2
-
- # The test only support up to 10 MDTs
- MDT_PREFIX="mdt.$FSNAME-MDT000"
- HSM_PARAM="${MDT_PREFIX}0.hsm"
-
- # archive is purged at copytool setup
- HSM_ARCHIVE_PURGE=true
-
- # Don't allow copytool error upon start/setup
- HSMTOOL_NOERROR=false
-}
-
-# Get the backend root path for the given agent facet.
-copytool_device() {
- local facet=$1
- local dev=AGTDEV$(facet_number $facet)
-
- echo -n ${!dev}
-}
-
-get_mdt_devices() {
- local mdtno
- # get MDT device for each mdc
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- MDT[$idx]=$($LCTL get_param -n \
- mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid |
- awk '{gsub(/_UUID/,""); print $1}' | head -n1)
- done
-}
-
search_copytools() {
local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- do_nodesv $hosts "pgrep -x $HSMTOOL_BASE"
-}
-
-kill_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
-
- echo "Killing existing copytools on $hosts"
- do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true
+ do_nodesv $hosts "libtool execute pgrep -x $HSMTOOL"
}
wait_copytools() {
local wait_timeout=200
local wait_start=$SECONDS
local wait_end=$((wait_start + wait_timeout))
- local sleep_time=100000 # 0.1 second
+ local sleep_time=1
while ((SECONDS < wait_end)); do
if ! search_copytools $hosts; then
fi
echo "copytools still running on $hosts"
- usleep $sleep_time
- [ $sleep_time -lt 32000000 ] && # 3.2 seconds
- sleep_time=$(bc <<< "$sleep_time * 2")
+ sleep $sleep_time
+ [ $sleep_time -lt 5 ] && sleep_time=$((sleep_time + 1))
done
# try to dump Copytool's stack
cmd="cat $test_dir/fifo > $test_dir/events &"
cmd+=" echo \\\$! > $test_dir/monitor_pid"
- if [[ $PDSH == *Rmrsh* ]]; then
- # This is required for pdsh -Rmrsh and its handling of remote
- # shells.
- # Regular ssh and pdsh -Rssh work fine without this
- # backgrounded subshell nonsense.
- (do_node $agent "$cmd") &
- export HSMTOOL_MONITOR_PDSH=$!
-
- # Slightly racy, but just making a best-effort to catch obvious
- # problems.
- sleep 1
- ps -p $HSMTOOL_MONITOR_PDSH > /dev/null ||
- error "Failed to start copytool monitor on $agent"
- else
- do_node $agent "$cmd"
- if [ $? != 0 ]; then
- error "Failed to start copytool monitor on $agent"
- fi
- fi
-}
-
-copytool_monitor_cleanup() {
- local facet=${1:-$SINGLEAGT}
- local agent=$(facet_active_host $facet)
-
- if [ -n "$HSMTOOL_MONITOR_DIR" ]; then
- # Should die when the copytool dies, but just in case.
- local cmd="kill \\\$(cat $HSMTOOL_MONITOR_DIR/monitor_pid)"
- cmd+=" 2>/dev/null || true"
- do_node $agent "$cmd"
- do_node $agent "rm -fr $HSMTOOL_MONITOR_DIR"
- export HSMTOOL_MONITOR_DIR=
- fi
+ # This background subshell nonsense is required when pdsh/ssh decides
+ # to wait for the cat process to exit on the remote client
+ (do_node $agent "$cmd") &
+ export HSMTOOL_MONITOR_PDSH=$!
- # The pdsh should die on its own when the monitor dies. Just
- # in case, though, try to clean up to avoid any cruft.
- if [ -n "$HSMTOOL_MONITOR_PDSH" ]; then
- kill $HSMTOOL_MONITOR_PDSH 2>/dev/null || true
- export HSMTOOL_MONITOR_PDSH=
+ # Slightly racy, but just making a best-effort to catch obvious
+ # problems.
+ sleep 1
+ do_node $agent "stat $HSMTOOL_MONITOR_DIR/monitor_pid 2>&1 > /dev/null"
+ if [ $? != 0 ]; then
+ error "Failed to start copytool monitor on $agent"
fi
}
esac
}
-copytool_logfile()
-{
- local host="$(facet_host "$1")"
- local prefix=$TESTLOG_PREFIX
- [ -n "$TESTNAME" ] && prefix+=.$TESTNAME
-
- printf "${prefix}.copytool${archive_id}_log.${host}.log"
-}
-
-__lhsmtool_rebind()
-{
- do_facet $facet $HSMTOOL -p "$hsm_root" --rebind "$@" "$mountpoint"
-}
-
-__lhsmtool_import()
-{
- mkdir -p "$(dirname "$2")" ||
- error "cannot create directory '$(dirname "$2")'"
- do_facet $facet $HSMTOOL -p "$hsm_root" --import "$@" "$mountpoint"
-}
-
-__lhsmtool_setup()
-{
- local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\""
- [ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth"
- [ -n "$archive_id" ] && cmd+=" --archive $archive_id"
- [ ${#misc_options[@]} -gt 0 ] &&
- cmd+=" $(IFS=" " echo "$@")"
- cmd+=" \"$mountpoint\""
-
- echo "Starting copytool $facet on $(facet_host $facet)"
- stack_trap "do_facet $facet \"pkill -x $HSMTOOL_BASE\" || true" EXIT
- do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1"
-}
-
-hsm_root() {
- local facet="${1:-$SINGLEAGT}"
-
- printf "$(copytool_device "$facet")/${TESTSUITE}.${TESTNAME}/"
-}
-
-# Main entry point to perform copytool related operations
-#
-# Sub-commands:
-#
-# setup setup a copytool to run in the background, that copytool will be
-# killed on EXIT
-# import import a file from an HSM backend
-# rebind rebind an archived file to a new fid
-#
-# Although the semantics might suggest otherwise, one does not need to 'setup'
-# a copytool before a call to 'copytool import' or 'copytool rebind'.
-#
-copytool()
-{
- local action=$1
- shift
-
- # Parse arguments
- local fail_on_error=true
- local -a misc_options
- while [ $# -gt 0 ]; do
- case "$1" in
- -f|--facet)
- shift
- local facet="$1"
- ;;
- -m|--mountpoint)
- shift
- local mountpoint="$1"
- ;;
- -a|--archive-id)
- shift
- local archive_id="$1"
- ;;
- -b|--bwlimit)
- shift
- local bandwidth="$1" # in MB/s
- ;;
- -n|--no-fail)
- local fail_on_error=false
- ;;
- *)
- # Uncommon(/copytool dependent) option
- misc_options+=("$1")
- ;;
- esac
- shift
- done
-
- # Use default values if needed
- local facet=${facet:-$SINGLEAGT}
- local mountpoint="${mountpoint:-${MOUNT2:-$MOUNT}}"
- local hsm_root="$(hsm_root "$facet")"
-
- stack_trap "do_facet $facet rm -rf '$hsm_root'" EXIT
- do_facet $facet mkdir -p "$hsm_root" ||
- error "mkdir '$hsm_root' failed"
-
- case "$HSMTOOL" in
- lhsmtool_posix)
- local copytool=lhsmtool
- ;;
- esac
-
- __${copytool}_${action} "${misc_options[@]}"
- if [ $? -ne 0 ]; then
- local error_msg
-
- case $action in
- setup)
- local host="$(facet_host $facet)"
- error_msg="Failed to start copytool $facet on '$host'"
- ;;
- import)
- local src="${misc_options[0]}"
- local dest="${misc_options[1]}"
- error_msg="Failed to import '$src' to '$dest'"
- ;;
- rebind)
- error_msg="could not rebind file"
- ;;
- esac
-
- $fail_on_error && error "$error_msg" || echo "$error_msg"
- fi
-}
-
get_copytool_event_log() {
local facet=${1:-$SINGLEAGT}
local agent=$(facet_active_host $facet)
copytool_suspend() {
local agents=${1:-$(facet_active_host $SINGLEAGT)}
- do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0
+ stack_trap \
+ "do_nodesv $agents libtool execute pkill -CONT -x '$HSMTOOL' || true" EXIT
+ do_nodesv $agents "libtool execute pkill -STOP -x $HSMTOOL" || return 0
echo "Copytool is suspended on $agents"
}
-copytool_continue() {
- local agents=${1:-$(facet_active_host $SINGLEAGT)}
-
- do_nodesv $agents "pkill -CONT -x $HSMTOOL_BASE" || return 0
- echo "Copytool is continued on $agents"
-}
-
copytool_remove_backend() {
local fid=$1
local be=$(do_facet $SINGLEAGT find "$(hsm_root)" -name $fid)
error "cannot copy '$1' to '$file'"
}
-mdts_set_param() {
- local arg=$1
- local key=$2
- local value=$3
- local mdtno
- local rc=0
- if [[ "$value" != "" ]]; then
- value="=$value"
- fi
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- local facet=mds${mdtno}
- # if $arg include -P option, run 1 set_param per MDT on the MGS
- # else, run set_param on each MDT
- [[ $arg = *"-P"* ]] && facet=mgs
- do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value
- [[ $? != 0 ]] && rc=1
- done
- return $rc
-}
-
-mdts_check_param() {
- local key="$1"
- local target="$2"
- local timeout="$3"
- local mdtno
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- wait_result mds${mdtno} \
- "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \
- $timeout ||
- error "$key state is not '$target' on mds${mdtno}"
- done
-}
-
get_hsm_param() {
local param=$1
local val=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.$param)
echo $val
}
-set_hsm_param() {
- local param=$1
- local value=$2
- local opt=$3
- mdts_set_param "$opt -n" "hsm.$param" "$value"
- return $?
-}
-
set_test_state() {
local cmd=$1
local target=$2
mdts_check_param hsm_control "$target" 10
}
-cdt_set_sanity_policy() {
- if [[ "$CDT_POLICY_HAD_CHANGED" ]]
- then
- # clear all
- mdts_set_param "" hsm.policy "+NRA"
- mdts_set_param "" hsm.policy "-NBR"
- CDT_POLICY_HAD_CHANGED=
- fi
-}
cdt_set_no_retry() {
mdts_set_param "" hsm.policy "+NRA"
mdts_set_param "-P -d" hsm_control ""
}
-cdt_set_mount_state() {
- mdts_set_param "-P" hsm_control "$1"
- # set_param -P is asynchronous operation and could race with set_param.
- # In such case configs could be retrieved and applied at mgc after
- # set_param -P completion. Sleep here to avoid race with set_param.
- # We need at least 20 seconds. 10 for mgc_requeue_thread to wake up
- # MGC_TIMEOUT_MIN_SECONDS + MGC_TIMEOUT_RAND_CENTISEC(5 + 5)
- # and 10 seconds to retrieve config from server.
- sleep 20
-}
-
-cdt_check_state() {
- mdts_check_param hsm_control "$1" 20
-}
-
cdt_disable() {
set_test_state disabled disabled
}
cdt_set_sanity_policy
}
-needclients() {
- local client_count=$1
- if [[ $CLIENTCOUNT -lt $client_count ]]; then
- skip "Need $client_count or more clients, have $CLIENTCOUNT"
- return 1
- fi
- return 0
-}
-
-path2fid() {
- $LFS path2fid $1 | tr -d '[]'
- return ${PIPESTATUS[0]}
-}
-
-get_hsm_flags() {
- local f=$1
- local u=$2
- local st
-
- if [[ $u == "user" ]]; then
- st=$($RUNAS $LFS hsm_state $f)
- else
- u=root
- st=$($LFS hsm_state $f)
- fi
-
- [[ $? == 0 ]] || error "$LFS hsm_state $f failed (run as $u)"
-
- st=$(echo $st | cut -f 2 -d" " | tr -d "()," )
- echo $st
-}
get_hsm_archive_id() {
local f=$1
echo $ar
}
-check_hsm_flags() {
- local f=$1
- local fl=$2
-
- local st=$(get_hsm_flags $f)
- [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl"
-}
-
check_hsm_flags_user() {
local f=$1
local fl=$2
wait_delete_completed
}
-make_custom_file_for_progress() {
- local count=${2:-"39"}
- local bs=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1)
- bs=${3:-$bs}
-
- [[ $count -gt 0 ]] || error "Invalid file size"
- [[ $bs -gt 0 ]] || error "Invalid stripe size"
-
- if ! create_file "${1/$DIR/$DIR2}" $bs $count fsync; then
- echo "The creation of '${1/$DIR/$DIR2}' failed" >&2
- echo "It might be due to a lack of space in the filesystem" >&2
- delete_large_files >&2
- create_file "${1/$DIR/$DIR2}" $bs $count fsync ||
- file_creation_failure dd "${1/$DIR/$DIR2}" $?
- fi
-}
-
-wait_result() {
- local facet=$1
- shift
- wait_update --verbose $(facet_active_host $facet) "$@"
-}
-
-wait_request_state() {
- local fid=$1
- local request=$2
- local state=$3
- # 4th arg (mdt index) is optional
- local mdtidx=${4:-0}
- local mds=mds$(($mdtidx + 1))
-
- local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions"
- cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
-
- wait_result $mds "$cmd" $state 200 ||
- error "request on $fid is not $state on $mds"
-}
-
get_request_state() {
local fid=$1
local request=$2
[[ -n $fid ]] && cmd+=" | grep '$fid'"
cmd+=" | egrep 'WAITING|STARTED'"
- wait_result $SINGLEMDS "$cmd" "" $timeout ||
+ wait_update_facet --verbose mds1 "$cmd" "" $timeout ||
error "requests did not complete"
}
# Lustre mount-point is mandatory and last parameter on
# copytool cmd-line.
- local mntpnt=$(do_rpc_nodes $agent ps -C $HSMTOOL_BASE -o args= |
+ local mntpnt=$(do_rpc_nodes $agent libtool execute ps -C $HSMTOOL -o args= |
awk '{print $NF}')
[ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\
"parameter"
}
run_test 1a "mmap & cat a HSM released file"
-test_1b() {
- mkdir -p $DIR/$tdir
- $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir ||
- error "failed to set default stripe"
- local f=$DIR/$tdir/$tfile
+test_1bde_base() {
+ local f=$1
rm -f $f
dd if=/dev/urandom of=$f bs=1M count=1 conv=sync ||
echo "verify restored state: "
check_hsm_flags $f "0x00000009" && echo "pass"
}
+
+test_1b() {
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir ||
+ error "failed to set default stripe"
+ local f=$DIR/$tdir/$tfile
+
+ test_1bde_base $f
+}
run_test 1b "Archive, Release and Restore composite file"
test_1c() {
[[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] ||
error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER"
- # Test whether setting archive number > 32 results in error.
- $LFS hsm_set --exists --archive-id 33 $f &&
- error "archive number is larger than 32"
- check_hsm_flags_user $f "0x00000001"
+ LOCAL_HSM_ARCHIVE_NUMBER=33
+ if [ $(lustre_version_code client) -ge $(version_code 2.11.56) ] &&
+ [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.11.56) ]; then
+ # lustre in the new version supports unlimited archiveID.
+ # Test whether setting archive number > 32 is supported
+ $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f ||
+ error "archive ID $LOCAL_HSM_ARCHIVE_NUMBER too large?"
+ check_hsm_flags_user $f "0x00000001"
+
+ echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER"
+ st=$(get_hsm_archive_id $f)
+ [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] ||
+ error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER"
+ else
+ # old client or old mds can only support at most 32 archiveID
+ # test whether setting archive number > 32 results in error.
+ $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f &&
+ error "bitmap archive number is larger than 32"
+ check_hsm_flags_user $f "0x00000001"
+ fi
# Test whether setting archive number 16 and archived flag.
LOCAL_HSM_ARCHIVE_NUMBER=16
run_test 1c "Check setting archive-id in lfs hsm_set"
test_1d() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.59) ] &&
+ skip "need MDS version at least 2.10.59"
+
mkdir -p $DIR/$tdir
$LFS setstripe -E 1M -L mdt -E -1 -c 2 $DIR/$tdir ||
error "failed to set default stripe"
local f=$DIR/$tdir/$tfile
- rm -f $f
- dd if=/dev/urandom of=$f bs=1M count=1 conv=sync ||
- error "failed to create file"
- local fid=$(path2fid $f)
+ test_1bde_base $f
+}
+run_test 1d "Archive, Release and Restore DoM file"
- copytool setup
+test_1e() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code $SEL_VER) ] &&
+ skip "skipped for lustre < $SEL_VER"
- echo "archive $f"
- $LFS hsm_archive $f || error "could not archive file"
- wait_request_state $fid ARCHIVE SUCCEED
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -E 1G -z 64M -E 10G -z 512M -E -1 -z 1G $DIR/$tdir ||
+ error "failed to set default stripe"
+ local comp_file=$DIR/$tdir/$tfile
- echo "release $f"
- $LFS hsm_release $f || error "could not release file"
- echo "verify released state: "
- check_hsm_flags $f "0x0000000d" && echo "pass"
+ test_1bde_base $comp_file
- echo "restore $f"
- $LFS hsm_restore $f || error "could not restore file"
- wait_request_state $fid RESTORE SUCCEED
- echo "verify restored state: "
- check_hsm_flags $f "0x00000009" && echo "pass"
+ local flg_opts="--comp-start 0 -E 64M --comp-flags init"
+ local found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "1st component not found"
+
+ flg_opts="--comp-start 64M -E 1G --comp-flags extension"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "2nd component not found"
+
+ flg_opts="--comp-start 1G -E 1G --comp-flags ^init"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "3rd component not found"
+
+ flg_opts="--comp-start 1G -E 10G --comp-flags extension"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "4th component not found"
+
+ flg_opts="--comp-start 10G -E 10G --comp-flags ^init"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "5th component not found"
+
+ flg_opts="--comp-start 10G -E EOF --comp-flags extension"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "6th component not found"
+
+ sel_layout_sanity $comp_file 6
}
-run_test 1d "Archive, Release and Restore DoM file"
+run_test 1e "Archive, Release and Restore SEL file"
test_2() {
local f=$DIR/$tdir/$tfile
echo "Verifying imported size $LSZ=$ASZ"
[[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ"
echo -n "Verifying released pattern: "
- local PTRN=$($GETSTRIPE -L $f)
+ local PTRN=$($LFS getstripe -L $f)
echo $PTRN
[[ $PTRN == released ]] || error "Is not released"
local fid=$(path2fid $f)
local f=$DIR/$tdir/$tfile
mkdir -p $DIR/$tdir
$LFS setstripe -c 2 "$f"
- local fid
- fid=$(make_custom_file_for_progress $f 5)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_file "$f" 1M 5)
local FILE_CRC=$(md5sum $f)
run_test 12p "implicit restore of a file on copytool mount point"
test_12q() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.58) ] &&
- skip "need MDS version at least 2.7.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.7.58) ] &&
+ skip "need MDS version at least 2.7.58"
stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT
zconf_mount $(facet_host $SINGLEAGT) $MOUNT3 ||
}
run_test 12q "file attributes are refreshed after restore"
+test_12r() {
+ # test needs a running copytool
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(copy_file /etc/hosts $f)
+
+ $LFS hsm_archive $f || error "archive of $f failed"
+ wait_request_state $fid ARCHIVE SUCCEED
+ $LFS hsm_release $f || error "release of $f failed"
+
+ offset=$(lseek_test -d 7 $f)
+
+ # we check we had a restore done
+ wait_request_state $fid RESTORE SUCCEED
+ [[ $offset == 7 ]] || error "offset $offset != 7"
+}
+run_test 12r "lseek restores released file"
+
test_13() {
local -i i j k=0
for i in {1..10}; do
}
run_test 24f "root can archive, release, and restore tar files"
+test_24g() {
+ [ $MDS1_VERSION -lt $(version_code 2.11.56) ] &&
+ skip "need MDS version 2.11.56 or later"
+
+ local file=$DIR/$tdir/$tfile
+ local fid
+
+ echo "RUNAS = '$RUNAS'"
+
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+ chmod ugo+rwx $DIR/$tdir
+
+ echo "Please listen carefully as our options have changed." | tee $file
+ fid=$(path2fid $file)
+ chmod ugo+rw $file
+
+ $LFS hsm_archive $file
+ wait_request_state $fid ARCHIVE SUCCEED
+ check_hsm_flags $file 0x00000009 # exists archived
+
+ echo "To be electrocuted by your telephone, press #." | $RUNAS tee $file
+ check_hsm_flags $file 0x0000000b # exists dirty archived
+}
+run_test 24g "write by non-owner still sets dirty" # LU-11369
+
test_25a() {
# test needs a running copytool
copytool setup
copytool setup
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
copytool setup
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
copytool setup
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
copytool setup
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_file "$f" 1MB 39)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
copytool setup
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 33 1048576)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_file "$f" 1M 39)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
run_test 31c "Restore a large aligned file and check size during restore"
test_33() {
- # test needs a running copytool
- copytool setup -b 1
-
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
+
+ copytool setup
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
- # to be sure wait_all_done will not be mislead by previous tests
- # and ops.
- cdt_purge
- wait_for_grace_delay
- # Also raise grace_delay significantly so the Canceled
- # Restore action will stay enough long avail.
- local old_grace=$(get_hsm_param grace_delay)
- stack_trap "set_hsm_param grace_delay $old_grace" EXIT
- set_hsm_param grace_delay 100
+ # Prevent restore from completing
+ copytool_suspend
+ # Implicit restore
md5sum $f >/dev/null &
local pid=$!
- wait_request_state $fid RESTORE STARTED
+ wait_request_state $fid RESTORE STARTED
kill -15 $pid
- sleep 1
-
- # Check restore trigger process was killed
- local killed=$(ps -o pid,comm hp $pid >/dev/null)
-
- $LFS hsm_cancel $f
- # instead of waiting+checking both Restore and Cancel ops
- # sequentially, wait for both to be finished and then check
- # each results.
- wait_all_done 100 $fid
- local rstate=$(get_request_state $fid RESTORE)
- local cstate=$(get_request_state $fid CANCEL)
-
- if [[ "$rstate" == "CANCELED" ]] ; then
- [[ "$cstate" == "SUCCEED" ]] ||
- error "Restore state is CANCELED and Cancel state " \
- "is not SUCCEED but $cstate"
- echo "Restore state is CANCELED, Cancel state is SUCCEED"
- elif [[ "$rstate" == "SUCCEED" ]] ; then
- [[ "$cstate" == "FAILED" ]] ||
- error "Restore state is SUCCEED and Cancel state " \
- "is not FAILED but $cstate"
- echo "Restore state is SUCCEED, Cancel state is FAILED"
- else
- error "Restore state is $rstate and Cancel state is $cstate"
- fi
+ copytool_continue
- [ -z $killed ] ||
- error "Cannot kill process waiting for restore ($killed)"
+ # Check restore trigger process was killed
+ wait $pid
+ [ $? -eq 143 ] || error "md5sum was not 'Terminated'"
}
run_test 33 "Kill a restore waiting process"
copytool setup -b 1
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+ # Prevent restore from completing
+ copytool_suspend
+
md5sum $f >/dev/null &
local pid=$!
+
wait_request_state $fid RESTORE STARTED
- rm $f || error "rm $f failed"
# rm must not block during restore
- wait_request_state $fid RESTORE STARTED
+ timeout --signal=KILL 1 rm "$f" || error "rm $f failed"
+ copytool_continue
wait_request_state $fid RESTORE SUCCEED
- # check md5sum pgm finished
- local there=$(ps -o pid,comm hp $pid >/dev/null)
- [[ -z $there ]] || error "Restore initiator does not exit"
+ # Check md5sum pgm finished
+ kill -0 $pid && error "Restore initiatior still running"
wait $pid || error "Restore initiator failed with $?"
+
+ # Check the file was actually deleted
+ [ ! -f "$f" ] || error "$f was not deleted"
}
run_test 34 "Remove file during restore"
local f=$DIR/$tdir/$tfile
local f1=$DIR/$tdir/$tfile-1
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
-
+ local fid=$(create_empty_file "$f")
local fid1=$(copy_file /etc/passwd $f1)
+
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+ # Prevent restore from completing
+ copytool_suspend
+
md5sum $f >/dev/null &
local pid=$!
+
wait_request_state $fid RESTORE STARTED
- mv $f1 $f || error "mv $f1 $f failed"
# mv must not block during restore
- wait_request_state $fid RESTORE STARTED
+ timeout --signal=KILL 1 mv "$f1" "$f" || error "mv $f1 $f failed"
+ copytool_continue
wait_request_state $fid RESTORE SUCCEED
- # check md5sum pgm finished
- local there=$(ps -o pid,comm hp $pid >/dev/null)
- [[ -z $there ]] || error "Restore initiator does not exit"
+ # Check md5sum pgm finished
+ kill -0 $pid && error "Restore initiatior still running"
wait $pid || error "Restore initiator failed with $?"
- fid2=$(path2fid $f)
+ local fid2=$(path2fid $f)
[[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1"
}
run_test 35 "Overwrite file during restore"
copytool setup -b 1
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+ # Prevent restore from completing
+ copytool_suspend
+
md5sum $f >/dev/null &
local pid=$!
- wait_request_state $fid RESTORE STARTED
- mv $f $f.new
- # rm must not block during restore
wait_request_state $fid RESTORE STARTED
+ # mv must not block during restore
+ timeout --signal=KILL 10 mv "$f" "$f.new" ||
+ error "mv '$f' '$f.new' failed with rc=$?"
+
+ copytool_continue
wait_request_state $fid RESTORE SUCCEED
- # check md5sum pgm finished
- local there=$(ps -o pid,comm hp $pid >/dev/null)
- [[ -z $there ]] ||
- error "Restore initiator does not exit"
+ # Check md5sum pgm finished
+ kill -0 $pid && error "Restore initiator is still running"
wait $pid || error "Restore initiator failed with $?"
}
run_test 36 "Move file during restore"
run_test 53 "Opened for read file on an evicted client should not be set dirty"
test_54() {
- # test needs a running copytool
- copytool setup -b 1
-
local f=$DIR/$tdir/$tfile
- local fid=$(make_custom_file_for_progress $f 39 1000000)
+ local fid=$(create_file "$f" 1MB 39)
+
+ copytool setup -b 1
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
run_test 54 "Write during an archive cancels it"
test_55() {
- # test needs a running copytool
- copytool setup -b 1
-
local f=$DIR/$tdir/$tfile
- local fid=$(make_custom_file_for_progress $f 39 1000000)
+ local fid=$(create_file "$f" 1MB 39)
+
+ copytool setup -b 1
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
run_test 55 "Truncate during an archive cancels it"
test_56() {
- # test needs a running copytool
- copytool setup -b 1
-
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_file "$f" 1MB 39)
+
+ copytool setup -b 1
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
test_59() {
local fid
- local server_version=$(lustre_version_code $SINGLEMDS)
- [[ $server_version -lt $(version_code 2.7.63) ]] &&
- skip "Need MDS version at least 2.7.63" && return
+ [[ $MDS1_VERSION -lt $(version_code 2.7.63) ]] &&
+ skip "Need MDS version at least 2.7.63"
copytool setup
$MCREATE $DIR/$tfile || error "mcreate failed"
# This test validates the fix for LU-4512. Ensure that the -u
# option changes the progress reporting interval from the
# default (30 seconds) to the user-specified interval.
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_file "$f" 1M 10)
+
local interval=5
local progress_timeout=$((interval * 4))
-
copytool setup -b 1 --update-interval $interval
- local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 10)
- [ $? != 0 ] && skip "not enough free space" && return
-
local mdtidx=0
local mdt=${MDT_PREFIX}${mdtidx}
local mds=mds$((mdtidx + 1))
cdt_clear_no_retry
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_small_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
local expected_fields="event_time data_fid source_fid"
expected_fields+=" total_bytes current_bytes"
- local START_EVENT
- local FINISH_EVENT
+ local -A events=(
+ [ARCHIVE_START]=false
+ [ARCHIVE_FINISH]=false
+ [ARCHIVE_RUNNING]=false
+ )
while read event; do
# Make sure we're not getting anything from previous events.
for field in $expected_fields; do
fi
eval $parsed
- if [ $event_type == "ARCHIVE_START" ]; then
- START_EVENT=$event
- continue
- elif [ $event_type == "ARCHIVE_FINISH" ]; then
- FINISH_EVENT=$event
- continue
- elif [ $event_type != "ARCHIVE_RUNNING" ]; then
- continue
- fi
+ events["$event_type"]=true
+
+ [ "$event_type" != ARCHIVE_RUNNING ] && continue
# Do some simple checking of the progress update events.
for expected_field in $expected_fields; do
fi
done
- if [ $total_bytes -eq 0 ]; then
- error "Expected total_bytes to be > 0"
- fi
+ [ $total_bytes -gt 0 ] || error "Expected total_bytes to be > 0"
- # These should be identical throughout an archive
- # operation.
- if [ $source_fid != $data_fid ]; then
+ # These should be identical throughout an archive operation
+ [ $source_fid == $data_fid ] ||
error "Expected source_fid to equal data_fid"
- fi
done < <(echo $"$(get_copytool_event_log)")
- if [ -z "$START_EVENT" ]; then
- error "Copytool failed to send archive start event to FIFO"
- fi
-
- if [ -z "$FINISH_EVENT" ]; then
- error "Copytool failed to send archive finish event to FIFO"
- fi
+ # Check we received every type of events we were expecting
+ for event in "${!events[@]}"; do
+ ${events["$event"]} ||
+ error "Copytool failed to send '$event' event to FIFO"
+ done
echo "Archive events look OK."
}
DATAHEX='[434541]'
test_104() {
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f
local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\
[[ "$data1" == "$DATAHEX" ]] ||
error "Data field in records is ($data1) and not ($DATAHEX)"
- # archive the file
- copytool setup
-
- wait_request_state $fid ARCHIVE SUCCEED
+ cdt_purge
}
run_test 104 "Copy tool data field"
}
run_test 112 "State of recorded request"
+test_113() {
+ local file1=$DIR/$tdir/$tfile
+ local file2=$DIR2/$tdir/$tfile
+
+ local fid=$(create_small_sync_file $file1)
+
+ stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT
+ zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" ||
+ error "cannot mount '$MOUNT3' on '$SINGLEAGT'"
+
+ copytool setup -m "$MOUNT3"
+
+ do_nodes $(comma_list $(nodes_list)) $LCTL clear
+
+ $LFS hsm_archive $file1 || error "Fail to archive $file1"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ $LFS hsm_release $file1
+ echo "Verifying released state: "
+ check_hsm_flags $file1 "0x0000000d"
+
+ multiop_bg_pause $file1 oO_WRONLY:O_APPEND:_w4c || error "multiop failed"
+ MULTIPID=$!
+ stat $file2 &
+ kill -USR1 $MULTIPID
+
+ wait
+ sync
+
+ local size1=$(stat -c "%s" $file1)
+ local size2=$(stat -c "%s" $file2)
+
+ [ $size1 -eq $size2 ] || error "sizes are different $size1 $size2"
+}
+run_test 113 "wrong stat after restore"
+
test_200() {
- # test needs a running copytool
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
+
copytool setup
- local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 103 1048576)
- [ $? != 0 ] && skip "not enough free space" && return
+ # Prevent archive from completing
+ copytool_suspend
- # test with cdt on is made in test_221
- cdt_disable
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
# wait archive to register at CDT
- wait_request_state $fid ARCHIVE WAITING
- $LFS hsm_cancel $f
- cdt_enable
+ wait_request_state $fid ARCHIVE STARTED
+
+ # Cancel the archive
+ $LFS hsm_cancel "$f"
+
wait_request_state $fid ARCHIVE CANCELED
+
+ copytool_continue
wait_request_state $fid CANCEL SUCCEED
}
run_test 200 "Register/Cancel archive"
run_test 201 "Register/Cancel restore"
test_202() {
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
+
# test needs a running copytool
copytool setup
- local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
-
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
- cdt_disable
+ copytool_suspend
$LFS hsm_remove $f
# wait remove to register at CDT
- wait_request_state $fid REMOVE WAITING
+ wait_request_state $fid REMOVE STARTED
$LFS hsm_cancel $f
- cdt_enable
+
wait_request_state $fid REMOVE CANCELED
}
run_test 202 "Register/Cancel remove"
run_test 220a "Changelog for failed archive"
test_221() {
- # test needs a running copytool
- copytool setup -b 1
-
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 103 1048576)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
+ copytool setup -b 1
changelog_register
+ # Prevent archive from completing
+ copytool_suspend
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE STARTED
+
$LFS hsm_cancel $f
wait_request_state $fid ARCHIVE CANCELED
+
+ copytool_continue
wait_request_state $fid CANCEL SUCCEED
changelog_find -type HSM -target-fid $fid -flags 0x7d ||
run_test 223a "Changelog for restore canceled (import case)"
test_223b() {
- # test needs a running copytool
- copytool setup -b 1
-
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
+ copytool setup -b 1
changelog_register
+
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+
+ # Prevent restore from completing
+ copytool_suspend
$LFS hsm_restore $f
wait_request_state $fid RESTORE STARTED
+
$LFS hsm_cancel $f
wait_request_state $fid RESTORE CANCELED
+
+ copytool_continue
wait_request_state $fid CANCEL SUCCEED
changelog_find -type HSM -target-fid $fid -flags 0xfd ||
copytool setup
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
changelog_register
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
- # if cdt is on, it can serve too quickly the request
- cdt_disable
+ # Prevent restore from completing
+ copytool_suspend
$LFS hsm_remove $f
+
$LFS hsm_cancel $f
- cdt_enable
wait_request_state $fid REMOVE CANCELED
+
+ copytool_continue
wait_request_state $fid CANCEL SUCCEED
changelog_find -type HSM -target-fid $fid -flags 0x27d
run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse"
test_250() {
- # test needs a running copytool
- copytool setup
+ local file="$DIR/$tdir/$tfile"
- mkdir -p $DIR/$tdir
- local maxrequest=$(get_hsm_param max_requests)
- local rqcnt=$(($maxrequest * 3))
- local i=""
+ # set max_requests to allow one request of each type to be started (3)
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+ # speed up test
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
- cdt_disable
- for i in $(seq -w 1 $rqcnt); do
- rm -f $DIR/$tdir/$i
- dd if=/dev/urandom of=$DIR/$tdir/$i bs=1M count=10 conv=fsync
+ # send 1 requests of each kind twice
+ copytool setup
+ # setup the files
+ for action in archive restore remove; do
+ local filepath="$file"-to-$action
+ local fid=$(create_empty_file "$filepath")
+ local fid2=$(create_empty_file "$filepath".bis)
+
+ if [ "$action" != archive ]; then
+ "$LFS" hsm_archive "$filepath"
+ wait_request_state $fid ARCHIVE SUCCEED
+ "$LFS" hsm_archive "$filepath".bis
+ wait_request_state $fid2 ARCHIVE SUCCEED
+ fi
+ if [ "$action" == restore ]; then
+ "$LFS" hsm_release "$filepath"
+ "$LFS" hsm_release "$filepath".bis
+ fi
done
- # we do it in 2 steps, so all requests arrive at the same time
- for i in $(seq -w 1 $rqcnt); do
- $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$i
+
+ # suspend the copytool to prevent requests from completing
+ stack_trap "copytool_continue" EXIT
+ copytool_suspend
+
+ # send `max_requests' requests (one of each kind)
+ for action in archive restore remove; do
+ filepath="$file"-to-$action
+ "$LFS" hsm_${action} "$filepath"
+ wait_request_state $(path2fid "$filepath") "${action^^}" STARTED
done
- cdt_enable
- local cnt=$rqcnt
- local wt=$rqcnt
- while [[ $cnt != 0 || $wt != 0 ]]; do
- sleep 1
- cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\
- $HSM_PARAM.actions |\
- grep STARTED | grep -v CANCEL | wc -l")
- [[ $cnt -le $maxrequest ]] ||
- error "$cnt > $maxrequest too many started requests"
- wt=$(do_facet $SINGLEMDS "$LCTL get_param\
- $HSM_PARAM.actions |\
- grep WAITING | wc -l")
- echo "max=$maxrequest started=$cnt waiting=$wt"
+
+ # send another batch of requests
+ for action in archive restore remove; do
+ "$LFS" hsm_${action} "$file-to-$action".bis
done
+ # wait for `loop_period' seconds to make sure the coordinator has time
+ # to register those, even though it should not
+ sleep 1
+
+ # only the first batch of request should be started
+ local -i count
+ count=$(do_facet $SINGLEMDS "$LCTL" get_param -n $HSM_PARAM.actions |
+ grep -c STARTED)
+
+ ((count == 3)) ||
+ error "expected 3 STARTED requests, found $count"
}
run_test 250 "Coordinator max request"
test_251() {
- # test needs a running copytool
- copytool setup -b 1
-
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 103 1048576)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
cdt_disable
# to have a short test
local old_to=$(get_hsm_param active_request_timeout)
- set_hsm_param active_request_timeout 4
+ set_hsm_param active_request_timeout 1
# to be sure the cdt will wake up frequently so
# it will be able to cancel the "old" request
local old_loop=$(get_hsm_param loop_period)
- set_hsm_param loop_period 2
+ set_hsm_param loop_period 1
cdt_enable
- # clear locks to avoid extra delay caused by flush/cancel
- # and thus prevent early copytool death to timeout.
- cancel_lru_locks osc
+ copytool setup
+ # Prevent archive from completing
+ copytool_suspend
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE STARTED
- sleep 5
+
+ # Let the request timeout
wait_request_state $fid ARCHIVE CANCELED
set_hsm_param active_request_timeout $old_to
run_test 251 "Coordinator request timeout"
test_252() {
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(create_empty_file "$f")
set_hsm_param loop_period 1
copytool setup
- copytool_suspend
+ # Prevent archive from completing
+ copytool_suspend
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE STARTED
- rm -f $f
+ rm -f "$f"
stack_trap "set_hsm_param active_request_timeout \
$(get_hsm_param active_request_timeout)" EXIT
test_254a()
{
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.56) ] &&
- skip "need MDS version at least 2.10.56" && return
+ [ $MDS1_VERSION -lt $(version_code 2.10.56) ] &&
+ skip "need MDS version at least 2.10.56"
# Check that the counters are initialized to 0
local count
test_254b()
{
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.56) ] &&
- skip "need MDS version at least 2.10.56" && return
+ [ $MDS1_VERSION -lt $(version_code 2.10.56) ] &&
+ skip "need MDS version at least 2.10.56"
# The number of request to launch (at least 32)
local request_count=$((RANDOM % 32 + 32))
}
run_test 254b "Request counters are correctly incremented and decremented"
+test_255()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.12.0) ] &&
+ skip "Need MDS version at least 2.12.0"
+
+ local file="$DIR/$tdir/$tfile"
+ local fid=$(create_empty_file "$file")
+
+ # How do you make sure the coordinator has consumed any outstanding
+ # event, without triggering an event yourself?
+ #
+ # You wait for a request to disappear from the coordinator's llog.
+
+ # Warning: the setup represents 90% of this test
+
+ # Create and process an HSM request
+ copytool setup
+ "$LFS" hsm_archive "$file"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ kill_copytools
+ wait_copytools || error "failed to stop copytools"
+
+ # Launch a new HSM request
+ rm "$file"
+ create_empty_file "$file"
+ "$LFS" hsm_archive "$file"
+
+ cdt_shutdown
+
+ # Have the completed request be removed as soon as the cdt wakes up
+ stack_trap "set_hsm_param grace_delay $(get_hsm_param grace_delay)" EXIT
+ set_hsm_param grace_delay 1
+ # (Hopefully, time on the MDS will behave nicely)
+ do_facet $SINGLEMDS sleep 2 &
+
+ # Increase `loop_period' as a mean to prevent the coordinator from
+ # waking itself up to do some housekeeping.
+ stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1000
+
+ wait $! || error "waiting failed"
+ cdt_enable
+ wait_request_state $fid ARCHIVE ""
+ # The coordinator will not wake up on its own for ~`loop_period' secs...
+
+ # ... Unless a copytool registers. Now the real test begins
+ copytool setup
+ wait_request_state $(path2fid "$file") ARCHIVE SUCCEED
+}
+run_test 255 "Copytool registration wakes the coordinator up"
+
+# tests 260[a-c] rely on the parsing of the copytool's log file, they might
+# break in the future because of that.
+test_260a()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.11.56) ] &&
+ skip "need MDS version 2.11.56 or later"
+
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
+
+ for file in "${files[@]}"; do
+ create_small_file "$file"
+ done
+
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup
+ "$LFS" hsm_archive "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
+
+ # Send several archive requests
+ for file in "${files[@]:1}"; do
+ "$LFS" hsm_archive "$file"
+ done
+
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
+
+ # Launch a copytool
+ copytool setup
+
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:1}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
+
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
+
+ printf '%s\n' "${actions[@]}"
+
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] && return
+ done
+
+ error "Too many ARCHIVE requests were run before the RESTORE request"
+}
+run_test 260a "Restore request have priority over other requests"
+
+# This test is very much tied to the implementation of the current priorisation
+# mechanism in the coordinator. It might not make sense to keep it in the future
+test_260b()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.11.56) ] &&
+ skip "need MDS version 2.11.56 or later"
+
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
+
+ for file in "${files[@]}"; do
+ create_small_file "$file"
+ done
+
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup --archive-id 2
+ "$LFS" hsm_archive --archive 2 "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
+
+ # Send several archive requests
+ for file in "${files[@]:1}"; do
+ "$LFS" hsm_archive "$file"
+ done
+
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
+
+ # Launch a copytool
+ copytool setup
+ copytool setup --archive-id 2
+
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:1}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
+
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
+
+ printf '%s\n' "${actions[@]}"
+
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] && return
+ done
+
+ error "Too many ARCHIVE requests were run before the RESTORE request"
+}
+run_test 260b "Restore request have priority over other requests"
+
+# This test is very much tied to the implementation of the current priorisation
+# mechanism in the coordinator. It might not make sense to keep it in the future
+test_260c()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.12.0) ] &&
+ skip "Need MDS version at least 2.12.0"
+
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
+
+ for file in "${files[@]}"; do
+ create_small_file "$file"
+ done
+
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1000
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup --archive-id 2
+ "$LFS" hsm_archive --archive 2 "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
+
+ # Force the next coordinator run to do housekeeping
+ cdt_shutdown
+ cdt_enable
+
+ "$LFS" hsm_archive "${files[1]}"
+
+ # Launch a copytool
+ copytool setup
+ copytool setup --archive-id 2
+
+ wait_request_state "$(path2fid "${files[1]}")" ARCHIVE SUCCEED
+ # The coordinator just did a housekeeping run it won't do another one
+ # for around `loop_period' seconds => requests will not be reordered
+ # if it costs too much (ie. when the coordinator has to discard a whole
+ # hal)
+
+ # Send several archive requests
+ for file in "${files[@]:2}"; do
+ "$LFS" hsm_archive "$file"
+ done
+
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
+
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:2}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
+
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
+
+ printf '%s\n' "${actions[@]}"
+
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] &&
+ error "Restore requests should not be prioritised" \
+ "unless the coordinator is doing housekeeping"
+ done
+ return 0
+}
+run_test 260c "Requests are not reordered on the 'hot' path of the coordinator"
+
test_300() {
[ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return
test_406() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] &&
- skip "need MDS version at least 2.7.64" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.7.64) ] &&
+ skip "need MDS version at least 2.7.64"
local fid
local mdt_index
run_test 406 "attempting to migrate HSM archived files is safe"
test_407() {
- needclients 2 || return 0
- # test needs a running copytool
- copytool setup
-
- mkdir -p $DIR/$tdir
-
local f=$DIR/$tdir/$tfile
local f2=$DIR2/$tdir/$tfile
- local fid
- fid=$(make_custom_file_for_progress $f 39 1000000)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
+
+ copytool setup
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
#define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164
do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x164
+ # Prevent restore from completing
+ copytool_suspend
+
md5sum $f &
# 1st request holds layout lock while appropriate
# RESTORE record is still not added to llog
md5sum $f2 &
sleep 2
+ do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"
# after umount hsm_actions->O/x/x log shouldn't have
# double RESTORE records like below
#[0x200000401:0x1:0x0]...0x58d03a0d/0x58d03a0c action=RESTORE...WAITING
sleep 30 &&
do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"&
fail $SINGLEMDS
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
- wait_request_state $fid RESTORE SUCCEED
+ do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"
+
+ copytool_continue
+ wait_all_done 100 $fid
}
run_test 407 "Check for double RESTORE records in llog"
test_500()
{
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] &&
- skip "HSM migrate is not supported" && return
+ [ $MDS1_VERSION -lt $(version_code 2.6.92) ] &&
+ skip "HSM migrate is not supported"
test_mkdir -p $DIR/$tdir
- llapi_hsm_test -d $DIR/$tdir || error "One llapi HSM test failed"
+
+ if [ $(lustre_version_code client) -lt $(version_code 2.11.56) ] ||
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.11.56) ];
+ then
+ llapi_hsm_test -d $DIR/$tdir -b ||
+ error "One llapi HSM test failed"
+ else
+ llapi_hsm_test -d $DIR/$tdir ||
+ error "One llapi HSM test failed"
+ fi
}
run_test 500 "various LLAPI HSM tests"
test_600() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] &&
- skip "need MDS version at least 2.10.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
mkdir -p $DIR/$tdir
run_test 600 "Changelog fields 'u=' and 'nid='"
test_601() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] &&
- skip "need MDS version at least 2.10.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
mkdir -p $DIR/$tdir
run_test 601 "OPEN Changelog entry"
test_602() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] &&
- skip "need MDS version at least 2.10.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
mkdir -p $DIR/$tdir
run_test 602 "Changelog record CLOSE only if open+write or OPEN recorded"
test_603() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] &&
- skip "need MDS version at least 2.10.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
mkdir -p $DIR/$tdir
run_test 603 "GETXATTR Changelog entry"
test_604() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] &&
- skip "need MDS version at least 2.10.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
mkdir -p $DIR/$tdir
run_test 604 "NOPEN Changelog entry"
test_605() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] &&
- skip "need MDS version at least 2.10.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
mkdir -p $DIR/$tdir
run_test 605 "Test OPEN and CLOSE rate limit in Changelogs"
test_606() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.58) ] &&
- skip "need MDS version at least 2.10.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null")
llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader}