set -e
set +o monitor
-SRCDIR=$(dirname $0)
-export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin
-
ONLY=${ONLY:-"$*"}
-# bug number for skipped test: LU-3815
-ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36"
-# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
-
-LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
+LUSTRE=${LUSTRE:-$(dirname $0)/..}
. $LUSTRE/tests/test-framework.sh
init_test_env $@
-. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
init_logging
-MULTIOP=${MULTIOP:-multiop}
+ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT "
+if $SHARED_KEY; then
+# bug number for skipped tests: LU-9795 LU-9795
+ ALWAYS_EXCEPT+=" 13 402b "
+# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
+fi
+
+# Skip tests for PPC that fail frequently
+if [[ $(uname -m) = ppc64 ]]; then
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 1a 1b 1d 1e 12c 12f "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 12g 12h 12m 12n 12o 12p "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 12q 21 22 23 24a 24b "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 24d 24e 24f 25b 30c 37 "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 57 58 90 110b 111b 113 "
+ # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251
+ ALWAYS_EXCEPT+=" 222b 222d 228 260a 260b 260c "
+ # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252
+ ALWAYS_EXCEPT+=" 220A 220a 221 222a 222c 223a "
+ # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252
+ ALWAYS_EXCEPT+=" 223b 224A 224a 226 227 600"
+ # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252
+ ALWAYS_EXCEPT+=" 601 602 603 604 605 "
+fi
+
+build_test_filter
+
+[ -n "$FILESET" ] && skip "Not functional for FILESET set"
+
OPENFILE=${OPENFILE:-openfile}
-MMAP_CAT=${MMAP_CAT:-mmap_cat}
MOUNT_2=${MOUNT_2:-"yes"}
FAIL_ON_ERROR=false
# script only handles up to 10 MDTs (because of MDT_PREFIX)
[ $MDSCOUNT -gt 9 ] &&
- error "script cannot handle more than 9 MDTs, please fix" && exit
+ error "script cannot handle more than 9 MDTs, please fix"
check_and_setup_lustre
-if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then
- skip_env "Need MDS version at least 2.4.53" && exit
+if [[ $MDS1_VERSION -lt $(version_code 2.4.53) ]]; then
+ skip_env "Need MDS version at least 2.4.53"
fi
# $RUNAS_ID may get set incorrectly somewhere else
if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then
- skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit
+ skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!"
fi
check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS
-
-build_test_filter
+if getent group nobody; then
+ GROUP=nobody
+elif getent group nogroup; then
+ GROUP=nogroup
+else
+ error "No generic nobody group"
+fi
# if there is no CLIENT1 defined, some tests can be ran on localhost
CLIENT1=${CLIENT1:-$HOSTNAME}
# Exception is the test which need two separate nodes
CLIENT2=${CLIENT2:-$CLIENT1}
-#
-# In order to test multiple remote HSM agents, a new facet type named "AGT" and
-# the following associated variables are added:
-#
-# AGTCOUNT: number of agents
-# AGTDEV{N}: target HSM mount point (root path of the backend)
-# agt{N}_HOST: hostname of the agent agt{N}
-# SINGLEAGT: facet of the single agent
-#
-# The number of agents is initialized as the number of remote client nodes.
-# By default, only single copytool is started on a remote client/agent. If there
-# was no remote client, then the copytool will be started on the local client.
-#
-init_agt_vars() {
- local n
- local agent
-
- export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))}
- [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1
-
- export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP}
- if [[ $CLIENTCOUNT -gt 1 ]] &&
- ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then
- skip_env "SHARED_DIRECTORY should be accessible"\
- "on all client nodes"
- exit 0
- fi
-
- # We used to put the HSM archive in $SHARED_DIRECTORY but that
- # meant NFS issues could hose sanity-hsm sessions. So now we
- # use $TMP instead.
- for n in $(seq $AGTCOUNT); do
- eval export AGTDEV$n=\$\{AGTDEV$n:-"$TMP/arc$n"\}
- agent=CLIENT$((n + 1))
- if [[ -z "${!agent}" ]]; then
- [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 ||
- agent=CLIENT2
- fi
- eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\}
- done
-
- export SINGLEAGT=${SINGLEAGT:-agt1}
-
- export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
- export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
- export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
- export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""}
- export HSMTOOL_TESTDIR
- export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
- HSM_ARCHIVE=$(copytool_device $SINGLEAGT)
- HSM_ARCHIVE_NUMBER=2
-
- # The test only support up to 10 MDTs
- MDT_PREFIX="mdt.$FSNAME-MDT000"
- HSM_PARAM="${MDT_PREFIX}0.hsm"
-
- # archive is purged at copytool setup
- HSM_ARCHIVE_PURGE=true
-
- # Don't allow copytool error upon start/setup
- HSMTOOL_NOERROR=false
-}
-
-# Get the backend root path for the given agent facet.
-copytool_device() {
- local facet=$1
- local dev=AGTDEV$(facet_number $facet)
-
- echo -n ${!dev}
-}
-
-# Stop copytool and unregister an existing changelog user.
-cleanup() {
- copytool_monitor_cleanup
- copytool_cleanup
- changelog_cleanup
- cdt_set_sanity_policy
-}
-
-get_mdt_devices() {
- local mdtno
- # get MDT device for each mdc
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- MDT[$idx]=$($LCTL get_param -n \
- mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid |
- awk '{gsub(/_UUID/,""); print $1}' | head -n1)
- done
-}
-
search_copytools() {
local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- do_nodesv $hosts "pgrep -x $HSMTOOL_BASE"
-}
-
-kill_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
-
- echo "Killing existing copytools on $hosts"
- do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true
+ do_nodesv $hosts "libtool execute pgrep -x $HSMTOOL"
}
wait_copytools() {
local wait_timeout=200
local wait_start=$SECONDS
local wait_end=$((wait_start + wait_timeout))
+ local sleep_time=1
while ((SECONDS < wait_end)); do
- sleep 2
if ! search_copytools $hosts; then
echo "copytools stopped in $((SECONDS - wait_start))s"
return 0
fi
echo "copytools still running on $hosts"
+ sleep $sleep_time
+ [ $sleep_time -lt 5 ] && sleep_time=$((sleep_time + 1))
done
+ # try to dump Copytool's stack
+ do_nodesv $hosts "echo 1 >/proc/sys/kernel/sysrq ; " \
+ "echo t >/proc/sysrq-trigger"
+
echo "copytools failed to stop in ${wait_timeout}s"
return 1
cmd="cat $test_dir/fifo > $test_dir/events &"
cmd+=" echo \\\$! > $test_dir/monitor_pid"
- if [[ $PDSH == *Rmrsh* ]]; then
- # This is required for pdsh -Rmrsh and its handling of remote
- # shells.
- # Regular ssh and pdsh -Rssh work fine without this
- # backgrounded subshell nonsense.
- (do_node $agent "$cmd") &
- export HSMTOOL_MONITOR_PDSH=$!
-
- # Slightly racy, but just making a best-effort to catch obvious
- # problems.
- sleep 1
- ps -p $HSMTOOL_MONITOR_PDSH > /dev/null ||
- error "Failed to start copytool monitor on $agent"
- else
- do_node $agent "$cmd"
- if [ $? != 0 ]; then
- error "Failed to start copytool monitor on $agent"
- fi
- fi
-}
-
-copytool_monitor_cleanup() {
- local facet=${1:-$SINGLEAGT}
- local agent=$(facet_active_host $facet)
-
- if [ -n "$HSMTOOL_MONITOR_DIR" ]; then
- # Should die when the copytool dies, but just in case.
- local cmd="kill \\\$(cat $HSMTOOL_MONITOR_DIR/monitor_pid)"
- cmd+=" 2>/dev/null || true"
- do_node $agent "$cmd"
- do_node $agent "rm -fr $HSMTOOL_MONITOR_DIR"
- export HSMTOOL_MONITOR_DIR=
- fi
+ # This background subshell nonsense is required when pdsh/ssh decides
+ # to wait for the cat process to exit on the remote client
+ (do_node $agent "$cmd") &
+ export HSMTOOL_MONITOR_PDSH=$!
- # The pdsh should die on its own when the monitor dies. Just
- # in case, though, try to clean up to avoid any cruft.
- if [ -n "$HSMTOOL_MONITOR_PDSH" ]; then
- kill $HSMTOOL_MONITOR_PDSH 2>/dev/null
- export HSMTOOL_MONITOR_PDSH=
+ # Slightly racy, but just making a best-effort to catch obvious
+ # problems.
+ sleep 1
+ do_node $agent "stat $HSMTOOL_MONITOR_DIR/monitor_pid 2>&1 > /dev/null"
+ if [ $? != 0 ]; then
+ error "Failed to start copytool monitor on $agent"
fi
}
-copytool_setup() {
- local facet=${1:-$SINGLEAGT}
- # Use MOUNT2 by default if defined
- local lustre_mntpnt=${2:-${MOUNT2:-$MOUNT}}
- local arc_id=$3
- local hsm_root=${4:-$(copytool_device $facet)}
- local agent=$(facet_active_host $facet)
-
- if [[ -z "$arc_id" ]] &&
- do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then
- echo "Only wakeup running copytool $facet on $agent"
- return 0
- fi
-
- if $HSM_ARCHIVE_PURGE; then
- echo "Purging archive on $agent"
- do_facet $facet "rm -rf $hsm_root/*"
- fi
-
- echo "Starting copytool $facet on $agent"
- do_facet $facet "mkdir -p $hsm_root" || error "mkdir '$hsm_root' failed"
- # bandwidth is limited to 1MB/s so the copy time is known and
- # independent of hardware
- local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root $hsm_root"
- [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id"
- [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] ||
- cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL"
- [[ -z "$HSMTOOL_EVENT_FIFO" ]] ||
- cmd+=" --event-fifo $HSMTOOL_EVENT_FIFO"
- cmd+=" --bandwidth 1 $lustre_mntpnt"
-
- # Redirect the standard output and error to a log file which
- # can be uploaded to Maloo.
- local prefix=$TESTLOG_PREFIX
- [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME
- local copytool_log=$prefix.copytool${arc_id}_log.$agent.log
-
- do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1"
- if [[ $? != 0 ]]; then
- [[ $HSMTOOL_NOERROR == true ]] ||
- error "start copytool $facet on $agent failed"
- echo "start copytool $facet on $agent failed"
- fi
+fid2archive()
+{
+ local fid="$1"
- trap cleanup EXIT
+ case "$HSMTOOL" in
+ lhsmtool_posix)
+ printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid"
+ ;;
+ esac
}
get_copytool_event_log() {
error "Could not collect event log from $agent"
}
-copytool_cleanup() {
- trap - EXIT
- local agt_facet=$SINGLEAGT
- local agt_hosts=${1:-$(facet_active_host $agt_facet)}
- local hsm_root=$(copytool_device $agt_facet)
- local i
- local facet
- local param
- local -a state
-
- kill_copytools $agt_hosts
- wait_copytools $agt_hosts || error "copytools failed to stop"
-
- # Clean all CDTs orphans requests from previous tests that
- # would otherwise need to timeout to clear.
- for ((i = 0; i < MDSCOUNT; i++)); do
- facet=mds$((i + 1))
- param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i)
- state[$i]=$(do_facet $facet "$LCTL get_param -n $param")
-
- # Skip already stopping or stopped CDTs.
- [[ "${state[$i]}" =~ ^stop ]] && continue
-
- do_facet $facet "$LCTL set_param $param=shutdown"
- done
-
- for ((i = 0; i < MDSCOUNT; i++)); do
- # Only check and restore CDTs that we stopped in the first loop.
- [[ "${state[$i]}" =~ ^stop ]] && continue
-
- facet=mds$((i + 1))
- param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i)
-
- wait_result $facet "$LCTL get_param -n $param" stopped 20 ||
- error "$facet CDT state is not stopped"
-
- # Restore old CDT state.
- do_facet $facet "$LCTL set_param $param=${state[$i]}"
- done
-
- for ((i = 0; i < MDSCOUNT; i++)); do
- # Only check CDTs that we stopped in the first loop.
- [[ "${state[$i]}" =~ ^stop ]] && continue
-
- facet=mds$((i + 1))
- param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i)
-
- # Check that the old CDT state was restored.
- wait_result $facet "$LCTL get_param -n $param" "${state[$i]}" \
- 20 || error "$facet CDT state is not '${state[$i]}'"
- done
-
- if do_facet $agt_facet "df $hsm_root" >/dev/null 2>&1 ; then
- do_facet $agt_facet "rm -rf $hsm_root/*"
- fi
-}
-
copytool_suspend() {
local agents=${1:-$(facet_active_host $SINGLEAGT)}
- do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0
+ stack_trap \
+ "do_nodesv $agents libtool execute pkill -CONT -x '$HSMTOOL' || true" EXIT
+ do_nodesv $agents "libtool execute pkill -STOP -x $HSMTOOL" || return 0
echo "Copytool is suspended on $agents"
}
copytool_remove_backend() {
local fid=$1
- local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid)
+ local be=$(do_facet $SINGLEAGT find "$(hsm_root)" -name $fid)
echo "Remove from backend: $fid = $be"
do_facet $SINGLEAGT rm -f $be
}
-import_file() {
- do_facet $SINGLEAGT \
- "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\
- --import $1 $2 $MOUNT" ||
- error "import of $1 to $2 failed"
-}
+file_creation_failure() {
+ local cmd=$1
+ local file=$2
+ local err=$3
-make_archive() {
- local file=$HSM_ARCHIVE/$1
- do_facet $SINGLEAGT mkdir -p $(dirname $file)
- do_facet $SINGLEAGT dd if=/dev/urandom of=$file count=32 bs=1000000 ||
- file_creation_failure dd $file $?
-}
+ case $err in
+ 28)
+ df $MOUNT $MOUNT2 >&2
+ error "Not enough space to create $file with $cmd"
+ ;;
+ *)
+ error "cannot create $file with $cmd, status=$err"
+ ;;
+ esac
+}
+
+# Creates a file using dd
+create_file() {
+ local file=$1
+ local bs=$2
+ local count=$3
+ local conv=$4
+ local source=${5:-/dev/zero}
+ local args=""
+ local err
+
+ if [ -n "$conv" ]; then
+ args+=" conv=$conv"
+ fi
-copy2archive() {
- local file=$HSM_ARCHIVE/$2
- do_facet $SINGLEAGT mkdir -p $(dirname $file)
- do_facet $SINGLEAGT cp -p $1 $file || error "cannot copy $1 to $file"
-}
+ # Create the directory in case it does not exist
+ mkdir -p "$(dirname "$file")"
+ # Delete the file in case it already exist
+ rm -f "$file"
-mdts_set_param() {
- local arg=$1
- local key=$2
- local value=$3
- local mdtno
- local rc=0
- if [[ "$value" != "" ]]; then
- value="=$value"
+ if dd if="$source" of="$file" count="$count" bs="$bs" $args; then
+ path2fid "$file" || error "cannot get FID of '$file'"
+ else
+ err=$?
+ echo "cannot create file '$file'" >&2;
+ # Let the caller decide what to do on error
+ return $err;
fi
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- local facet=mds${mdtno}
- # if $arg include -P option, run 1 set_param per MDT on the MGS
- # else, run set_param on each MDT
- [[ $arg = *"-P"* ]] && facet=mgs
- do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value
- [[ $? != 0 ]] && rc=1
- done
- return $rc
}
-mdts_check_param() {
- local key="$1"
- local target="$2"
- local timeout="$3"
- local mdtno
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- wait_result mds${mdtno} \
- "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \
- $timeout ||
- error "$key state is not '$target' on mds${mdtno}"
- done
+create_empty_file() {
+ create_file "${1/$DIR/$DIR2}" 1M 0 ||
+ file_creation_failure dd "${1/$DIR/$DIR2}" $?
}
-changelog_setup() {
- CL_USERS=()
- local mdtno
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- local cl_user=$(do_facet mds${mdtno} $LCTL \
- --device ${MDT[$idx]} \
- changelog_register -n)
- CL_USERS+=($cl_user)
- do_facet mds${mdtno} lctl set_param \
- mdd.${MDT[$idx]}.changelog_mask="+hsm"
- $LFS changelog_clear ${MDT[$idx]} $cl_user 0
- done
+create_small_file() {
+ local source_file=/dev/urandom
+ local count=1
+ local bs=1M
+ local conv=${2:-fsync}
+
+ create_file "${1/$DIR/$DIR2}" $bs $count $conv $source_file ||
+ file_creation_failure dd "${1/$DIR/$DIR2}" $?
}
-changelog_cleanup() {
- local mdtno
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- [[ -z ${CL_USERS[$idx]} ]] && continue
- $LFS changelog_clear ${MDT[$idx]} ${CL_USERS[$idx]} 0
- do_facet mds${mdtno} lctl --device ${MDT[$idx]} \
- changelog_deregister ${CL_USERS[$idx]}
- done
- CL_USERS=()
+create_small_sync_file() {
+ create_small_file "$1" sync
+}
+
+create_archive_file() {
+ local file="$(hsm_root)/$1"
+ local count=${2:-39}
+ local source=/dev/urandom
+
+ # Create the counterpart directory of the archive
+ do_facet "$SINGLEAGT" mkdir -p "$(dirname "$file")" ||
+ error "cannot create archive directory '$(dirname "$file")'"
+
+ do_facet "$SINGLEAGT" dd if=$source of="$file" bs=1M count=$count ||
+ error "cannot create archive file '$file'"
}
-changelog_get_flags() {
- local mdt=$1
- local cltype=$2
- local fid=$3
+copy2archive() {
+ local hsm_root="$(hsm_root)"
+ local file="$hsm_root/$2"
- $LFS changelog $mdt | awk "/$cltype/ && /t=\[$fid\]/ {print \$5}"
+ stack_trap "do_facet $SINGLEAGT rm -rf '$hsm_root'" EXIT
+ do_facet $SINGLEAGT mkdir -p "$(dirname "$file")" ||
+ error "mkdir '$(dirname "$file")' failed"
+ do_facet $SINGLEAGT cp -p "$1" "$file" ||
+ error "cannot copy '$1' to '$file'"
}
get_hsm_param() {
echo $val
}
-set_hsm_param() {
- local param=$1
- local value=$2
- local opt=$3
- mdts_set_param "$opt -n" "hsm.$param" "$value"
- return $?
-}
-
set_test_state() {
local cmd=$1
local target=$2
mdts_check_param hsm_control "$target" 10
}
-cdt_set_sanity_policy() {
- if [[ "$CDT_POLICY_HAD_CHANGED" ]]
- then
- # clear all
- mdts_set_param "" hsm.policy "+NRA"
- mdts_set_param "" hsm.policy "-NBR"
- CDT_POLICY_HAD_CHANGED=
- fi
-}
cdt_set_no_retry() {
mdts_set_param "" hsm.policy "+NRA"
mdts_set_param "-P -d" hsm_control ""
}
-cdt_set_mount_state() {
- mdts_set_param "-P" hsm_control "$1"
- # set_param -P is asynchronous operation and could race with set_param.
- # In such case configs could be retrieved and applied at mgc after
- # set_param -P completion. Sleep here to avoid race with set_param.
- # We need at least 20 seconds. 10 for mgc_requeue_thread to wake up
- # MGC_TIMEOUT_MIN_SECONDS + MGC_TIMEOUT_RAND_CENTISEC(5 + 5)
- # and 10 seconds to retrieve config from server.
- sleep 20
-}
-
-cdt_check_state() {
- mdts_check_param hsm_control "$1" 20
-}
-
cdt_disable() {
set_test_state disabled disabled
}
cdt_set_sanity_policy
}
-needclients() {
- local client_count=$1
- if [[ $CLIENTCOUNT -lt $client_count ]]; then
- skip "Need $client_count or more clients, have $CLIENTCOUNT"
- return 1
- fi
- return 0
-}
-
-path2fid() {
- $LFS path2fid $1 | tr -d '[]'
- return ${PIPESTATUS[0]}
-}
-
-get_hsm_flags() {
- local f=$1
- local u=$2
- local st
-
- if [[ $u == "user" ]]; then
- st=$($RUNAS $LFS hsm_state $f)
- else
- u=root
- st=$($LFS hsm_state $f)
- fi
-
- [[ $? == 0 ]] || error "$LFS hsm_state $f failed (run as $u)"
-
- st=$(echo $st | cut -f 2 -d" " | tr -d "()," )
- echo $st
-}
get_hsm_archive_id() {
local f=$1
st=$($LFS hsm_state $f)
[[ $? == 0 ]] || error "$LFS hsm_state $f failed"
- local ar=$(echo $st | grep "archive_id" | cut -f5 -d" " |
- cut -f2 -d:)
+ local ar=$(echo $st | grep -oP '(?<=archive_id:).*')
echo $ar
}
-check_hsm_flags() {
- local f=$1
- local fl=$2
-
- local st=$(get_hsm_flags $f)
- [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl"
-}
-
check_hsm_flags_user() {
local f=$1
local fl=$2
[[ $st == $fl ]] || error "hsm flags on $f are $st != $fl"
}
-file_creation_failure() {
- local cmd=$1
- local f=$2
- local err=$3
-
- df $MOUNT $MOUNT2 >&2
- error "cannot create $f with $cmd, status=$err"
-}
-
copy_file() {
local f=
path2fid $f || error "cannot get fid on $f"
}
-make_small() {
- local file2=${1/$DIR/$DIR2}
- dd if=/dev/urandom of=$file2 count=2 bs=1M conv=fsync ||
- file_creation_failure dd $file2 $?
-
- path2fid $1 || error "cannot get fid on $1"
-}
-
-make_small_sync() {
- dd if=/dev/urandom of=$1 count=1 bs=1M conv=sync ||
- file_creation_failure dd $1 $?
- path2fid $1 || error "cannot get fid on $1"
-}
-
-cleanup_large_files() {
- local ratio=$(df -P $MOUNT | tail -1 | awk '{print $5}' |
- sed 's/%//g')
- [ $ratio -gt 50 ] && find $MOUNT -size +10M -exec rm -f {} \;
-}
-
-check_enough_free_space() {
- local nb=$1
- local unit=$2
- local need=$((nb * unit /1024))
- local free=$(df -kP $MOUNT | tail -1 | awk '{print $4}')
- (( $need >= $free )) && return 1
- return 0
-}
-
-make_large_for_striping() {
- local file2=${1/$DIR/$DIR2}
- local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1)
-
- cleanup_large_files
-
- check_enough_free_space 5 $sz
- [ $? != 0 ] && return $?
-
- dd if=/dev/urandom of=$file2 count=5 bs=$sz conv=fsync ||
- file_creation_failure dd $file2 $?
-
- path2fid $1 || error "cannot get fid on $1"
-}
-
-make_large_for_progress() {
- local file2=${1/$DIR/$DIR2}
-
- cleanup_large_files
-
- check_enough_free_space 39 1000000
- [ $? != 0 ] && return $?
-
- # big file is large enough, so copy time is > 30s
- # so copytool make 1 progress
- # size is not a multiple of 1M to avoid stripe
- # aligment
- dd if=/dev/urandom of=$file2 count=39 bs=1000000 conv=fsync ||
- file_creation_failure dd $file2 $?
-
- path2fid $1 || error "cannot get fid on $1"
-}
-
-make_large_for_progress_aligned() {
- local file2=${1/$DIR/$DIR2}
-
- cleanup_large_files
-
- check_enough_free_space 33 1048576
- [ $? != 0 ] && return $?
-
- # big file is large enough, so copy time is > 30s
- # so copytool make 1 progress
- # size is a multiple of 1M to have stripe
- # aligment
- dd if=/dev/urandom of=$file2 count=33 bs=1M conv=fsync ||
- file_creation_failure dd $file2 $?
- path2fid $1 || error "cannot get fid on $1"
-}
-
-make_large_for_cancel() {
- local file2=${1/$DIR/$DIR2}
-
- cleanup_large_files
-
- check_enough_free_space 103 1048576
- [ $? != 0 ] && return $?
-
- # Copy timeout is 100s. 105MB => 105s
- dd if=/dev/urandom of=$file2 count=103 bs=1M conv=fsync ||
- file_creation_failure dd $file2 $?
- path2fid $1 || error "cannot get fid on $1"
-}
-
-wait_result() {
- local facet=$1
- shift
- wait_update --verbose $(facet_active_host $facet) "$@"
-}
-
-wait_request_state() {
- local fid=$1
- local request=$2
- local state=$3
- # 4th arg (mdt index) is optional
- local mdtidx=${4:-0}
- local mds=mds$(($mdtidx + 1))
-
- local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions"
- cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
-
- wait_result $mds "$cmd" $state 200 ||
- error "request on $fid is not $state on $mds"
+# Delete any file bigger than 10M under $MOUNT and wait for deletes to complete
+#
+# Note that this might lead to surprising behaviours such as deleting an
+# important file for the currently running test
+delete_large_files() {
+ printf "Deleting large files...\n" >&2
+ find $MOUNT -size +10M -delete
+ wait_delete_completed
}
get_request_state() {
"awk -vn=0 '/'$fid'.*action='$request'/ {n++}; END {print n}'"
}
+# Ensure the number of HSM request for a given FID is correct
+# assert_request_count FID REQUEST_TYPE COUNT [ERROR_MSG]
+assert_request_count() {
+ local request_count=$(get_request_count $1 $2)
+ local default_error_msg=("expected $3 '$2' request(s) for '$1', found "
+ "'$request_count'")
+ [ $request_count -eq $3 ] || error "${4:-"${default_error_msg[@]}"}"
+}
+
wait_all_done() {
local timeout=$1
local fid=$2
[[ -n $fid ]] && cmd+=" | grep '$fid'"
cmd+=" | egrep 'WAITING|STARTED'"
- wait_result $SINGLEMDS "$cmd" "" $timeout ||
+ wait_update_facet --verbose mds1 "$cmd" "" $timeout ||
error "requests did not complete"
}
sleep $val
}
+wait_for_loop_period() {
+ local val=$(get_hsm_param loop_period)
+ sleep $val
+}
+
parse_json_event() {
local raw_event=$1
echo $raw_event | python -c "$json_parser"
}
-# populate MDT device array
-get_mdt_devices
-
-# initiate variables
-init_agt_vars
-
-# cleanup from previous bad setup
-kill_copytools
+get_agent_by_uuid_mdt() {
+ local uuid=$1
+ local mdtidx=$2
+ local mds=mds$(($mdtidx + 1))
+ do_facet $mds "$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.agents |\
+ grep $uuid"
+}
-# for recovery tests, coordinator needs to be started at mount
-# so force it
-# the lustre conf must be without hsm on (like for sanity.sh)
-echo "Set HSM on and start"
-cdt_set_mount_state enabled
-cdt_check_state enabled
+check_agent_registered_by_mdt() {
+ local uuid=$1
+ local mdtidx=$2
+ local mds=mds$(($mdtidx + 1))
+ local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx)
+ if [[ ! -z "$agent" ]]; then
+ echo "found agent $agent on $mds"
+ else
+ error "uuid $uuid not found in agent list on $mds"
+ fi
+}
-echo "Start copytool"
-copytool_setup
+check_agent_unregistered_by_mdt() {
+ local uuid=$1
+ local mdtidx=$2
+ local mds=mds$(($mdtidx + 1))
+ local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx)
+ if [[ -z "$agent" ]]; then
+ echo "uuid not found in agent list on $mds"
+ else
+ error "uuid found in agent list on $mds: $agent"
+ fi
+}
-echo "Set sanity-hsm HSM policy"
-cdt_set_sanity_policy
+check_agent_registered() {
+ local uuid=$1
+ local mdsno
+ for mdsno in $(seq 1 $MDSCOUNT); do
+ check_agent_registered_by_mdt $uuid $((mdsno - 1))
+ done
+}
+
+check_agent_unregistered() {
+ local uuid=$1
+ local mdsno
+ for mdsno in $(seq 1 $MDSCOUNT); do
+ check_agent_unregistered_by_mdt $uuid $((mdsno - 1))
+ done
+}
+
+get_agent_uuid() {
+ local agent=${1:-$(facet_active_host $SINGLEAGT)}
+
+ # Lustre mount-point is mandatory and last parameter on
+ # copytool cmd-line.
+ local mntpnt=$(do_rpc_nodes $agent libtool execute ps -C $HSMTOOL -o args= |
+ awk '{print $NF}')
+ [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\
+ "parameter"
+ do_rpc_nodes $agent get_client_uuid $mntpnt | cut -d' ' -f2
+}
+
+# initiate variables
+init_agt_vars
+
+# populate MDT device array
+get_mdt_devices
+
+# cleanup from previous bad setup
+kill_copytools
+
+# for recovery tests, coordinator needs to be started at mount
+# so force it
+# the lustre conf must be without hsm on (like for sanity.sh)
+echo "Set HSM on and start"
+cdt_set_mount_state enabled
+cdt_check_state enabled
+
+echo "Set sanity-hsm HSM policy"
+cdt_set_sanity_policy
# finished requests are quickly removed from list
set_hsm_param grace_delay 10
-test_1() {
+CLIENT_NIDS=( $($LCTL list_nids all) )
+
+test_1A() { # was test_1
mkdir -p $DIR/$tdir
chmod 777 $DIR/$tdir
check_hsm_flags_user $f "0x00000000"
}
-run_test 1 "lfs hsm flags root/non-root access"
+run_test 1A "lfs hsm flags root/non-root access"
test_1a() {
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid=$(make_small $f)
+ local fid=$(create_small_file $f)
+
+ copytool setup
$LFS hsm_archive $f || error "could not archive file"
wait_request_state $fid ARCHIVE SUCCEED
}
run_test 1a "mmap & cat a HSM released file"
-test_2() {
+test_1bde_base() {
+ local f=$1
+ rm -f $f
+
+ dd if=/dev/urandom of=$f bs=1M count=1 conv=sync ||
+ error "failed to create file"
+ local fid=$(path2fid $f)
+
+ copytool setup
+
+ echo "archive $f"
+ $LFS hsm_archive $f || error "could not archive file"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ echo "release $f"
+ $LFS hsm_release $f || error "could not release file"
+ echo "verify released state: "
+ check_hsm_flags $f "0x0000000d" && echo "pass"
+
+ echo "restore $f"
+ $LFS hsm_restore $f || error "could not restore file"
+ wait_request_state $fid RESTORE SUCCEED
+ echo "verify restored state: "
+ check_hsm_flags $f "0x00000009" && echo "pass"
+}
+
+test_1b() {
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir ||
+ error "failed to set default stripe"
+ local f=$DIR/$tdir/$tfile
+
+ test_1bde_base $f
+}
+run_test 1b "Archive, Release and Restore composite file"
+
+test_1c() {
+ mkdir -p $DIR/$tdir
+ chmod 777 $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+ $RUNAS touch $f
+
+ # Test whether we can set the maximum archive number.
+ local LOCAL_HSM_ARCHIVE_NUMBER=32
+ $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f ||
+ error "root could not change hsm flags"
+ check_hsm_flags_user $f "0x00000001"
+ echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER"
+ local st=$(get_hsm_archive_id $f)
+ [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] ||
+ error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER"
+
+ # Test whether setting archive number 0 results in no change.
+ $LFS hsm_set --exists --archive-id 0 $f ||
+ error "root could not change hsm flags"
+ check_hsm_flags_user $f "0x00000001"
+ echo "verifying archive number is still $LOCAL_HSM_ARCHIVE_NUMBER"
+ st=$(get_hsm_archive_id $f)
+ [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] ||
+ error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER"
+
+ LOCAL_HSM_ARCHIVE_NUMBER=33
+ if [ $(lustre_version_code client) -ge $(version_code 2.11.56) ] &&
+ [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.11.56) ]; then
+ # lustre in the new version supports unlimited archiveID.
+ # Test whether setting archive number > 32 is supported
+ $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f ||
+ error "archive ID $LOCAL_HSM_ARCHIVE_NUMBER too large?"
+ check_hsm_flags_user $f "0x00000001"
+
+ echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER"
+ st=$(get_hsm_archive_id $f)
+ [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] ||
+ error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER"
+ else
+ # old client or old mds can only support at most 32 archiveID
+ # test whether setting archive number > 32 results in error.
+ $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f &&
+ error "bitmap archive number is larger than 32"
+ check_hsm_flags_user $f "0x00000001"
+ fi
+
+ # Test whether setting archive number 16 and archived flag.
+ LOCAL_HSM_ARCHIVE_NUMBER=16
+ $LFS hsm_set --exists --archived \
+ --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f ||
+ error "root could not change hsm flags"
+ check_hsm_flags_user $f "0x00000009"
+ echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER"
+ st=$(get_hsm_archive_id $f)
+ [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] ||
+ error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER"
+}
+run_test 1c "Check setting archive-id in lfs hsm_set"
+
+test_1d() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.59) ] &&
+ skip "need MDS version at least 2.10.59"
+
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -E 1M -L mdt -E -1 -c 2 $DIR/$tdir ||
+ error "failed to set default stripe"
+ local f=$DIR/$tdir/$tfile
+
+ test_1bde_base $f
+}
+run_test 1d "Archive, Release and Restore DoM file"
+
+test_1e() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code $SEL_VER) ] &&
+ skip "skipped for lustre < $SEL_VER"
+
mkdir -p $DIR/$tdir
+ $LFS setstripe -E 1G -z 64M -E 10G -z 512M -E -1 -z 1G $DIR/$tdir ||
+ error "failed to set default stripe"
+ local comp_file=$DIR/$tdir/$tfile
+
+ test_1bde_base $comp_file
+
+ local flg_opts="--comp-start 0 -E 64M --comp-flags init"
+ local found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "1st component not found"
+
+ flg_opts="--comp-start 64M -E 1G --comp-flags extension"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "2nd component not found"
+
+ flg_opts="--comp-start 1G -E 1G --comp-flags ^init"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "3rd component not found"
+
+ flg_opts="--comp-start 1G -E 10G --comp-flags extension"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "4th component not found"
+
+ flg_opts="--comp-start 10G -E 10G --comp-flags ^init"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "5th component not found"
+
+ flg_opts="--comp-start 10G -E EOF --comp-flags extension"
+ found=$($LFS find $flg_opts $comp_file | wc -l)
+ [ $found -eq 1 ] || error "6th component not found"
+
+ sel_layout_sanity $comp_file 6
+}
+run_test 1e "Archive, Release and Restore SEL file"
+
+test_2() {
local f=$DIR/$tdir/$tfile
- touch $f
+
+ create_empty_file "$f"
# New files are not dirty
check_hsm_flags $f "0x00000000"
run_test 3 "Check file dirtyness when opening for write"
test_4() {
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid=$(make_small $f)
+ local fid=$(create_small_file $f)
$LFS hsm_cancel $f
local st=$(get_request_state $fid CANCEL)
test_8() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
wait_request_state $fid ARCHIVE SUCCEED
check_hsm_flags $f "0x00000009"
-
- copytool_cleanup
}
run_test 8 "Test default archive number"
-test_9() {
+test_9A() { # was test_9
+ # we do not use the default one to be sure
+ local archive_id=$((HSM_ARCHIVE_NUMBER + 1))
+ copytool setup --archive-id $archive_id
+
+ # give time for CT to register with MDTs
+ sleep $(($MDSCOUNT*2))
+ local uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT))
+ check_agent_registered $uuid
+
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/passwd $f)
- # we do not use the default one to be sure
- local new_an=$((HSM_ARCHIVE_NUMBER + 1))
- copytool_cleanup
- copytool_setup $SINGLEAGT $MOUNT $new_an
- $LFS hsm_archive --archive $new_an $f
+ $LFS hsm_archive --archive $archive_id $f
wait_request_state $fid ARCHIVE SUCCEED
check_hsm_flags $f "0x00000009"
-
- copytool_cleanup
}
-run_test 9 "Use of explicit archive number, with dedicated copytool"
+run_test 9A "Use of explicit archive number, with dedicated copytool"
test_9a() {
needclients 3 || return 0
local file
local fid
- copytool_cleanup $(comma_list $(agts_nodes))
-
# start all of the copytools
for n in $(seq $AGTCOUNT); do
- copytool_setup agt$n
+ copytool setup --facet agt$n
done
- trap "copytool_cleanup $(comma_list $(agts_nodes))" EXIT
# archive files
- mkdir -p $DIR/$tdir
for n in $(seq $AGTCOUNT); do
file=$DIR/$tdir/$tfile.$n
- fid=$(make_small $file)
+ fid=$(create_small_file $file)
$LFS hsm_archive $file || error "could not archive file $file"
wait_request_state $fid ARCHIVE SUCCEED
check_hsm_flags $file "0x00000009"
done
-
- trap - EXIT
- copytool_cleanup $(comma_list $(agts_nodes))
}
run_test 9a "Multiple remote agents"
test_10a() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir/d1
local f=$DIR/$tdir/$tfile
error "hsm_archive failed"
wait_request_state $fid ARCHIVE SUCCEED
- local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) ||
- error "fid $fid not in archive $HSM_ARCHIVE"
+ local hsm_root="$(copytool_device $SINGLEAGT)"
+ local archive="$(do_facet $SINGLEAGT \
+ find "$hsm_root" -name "$fid" -print0)"
+ [ -n "$archive" ] || error "fid '$fid' not in archive '$hsm_root'"
+
echo "Verifying content"
- do_facet $SINGLEAGT diff $f $AFILE || error "archived file differs"
+ do_facet $SINGLEAGT diff $f $archive || error "archived file differs"
echo "Verifying hsm state "
check_hsm_flags $f "0x00000009"
local st=$(get_hsm_archive_id $f)
[[ $st == $HSM_ARCHIVE_NUMBER ]] ||
error "Wrong archive number, $st != $HSM_ARCHIVE_NUMBER"
-
- copytool_cleanup
-
}
run_test 10a "Archive a file"
test_10b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local cnt=$(get_request_count $fid ARCHIVE)
[[ "$cnt" == "1" ]] ||
error "archive of non dirty file must not make a request"
-
- copytool_cleanup
}
run_test 10b "Archive of non dirty file must work without doing request"
test_10c() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/hosts $f)
$LFS hsm_set --noarchive $f
$LFS hsm_archive $f && error "archive a noarchive file must fail"
-
- copytool_cleanup
+ return 0
}
run_test 10c "Check forbidden archive"
test_10d() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local dflt=$(get_hsm_param default_archive_id)
[[ $ar == $dflt ]] ||
error "archived file is not on default archive: $ar != $dflt"
-
- copytool_cleanup
}
run_test 10d "Archive a file on the default archive id"
copy2archive /etc/hosts $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
echo -n "Verifying released state: "
check_hsm_flags $f "0x0000000d"
local LSZ=$(stat -c "%s" $f)
- local ASZ=$(do_facet $SINGLEAGT stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile)
+ local ASZ=$(do_facet $SINGLEAGT stat -c "%s" "$(hsm_root)/$tdir/$tfile")
echo "Verifying imported size $LSZ=$ASZ"
[[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ"
echo -n "Verifying released pattern: "
- local PTRN=$($GETSTRIPE -L $f)
+ local PTRN=$($LFS getstripe -L $f)
echo $PTRN
- [[ $PTRN == 80000001 ]] || error "Is not released"
+ [[ $PTRN == released ]] || error "Is not released"
local fid=$(path2fid $f)
echo "Verifying new fid $fid in archive"
- local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) ||
- error "fid $fid not in archive $HSM_ARCHIVE"
+ do_facet $SINGLEAGT "[ -f \"$(fid2archive "$fid")\" ]" ||
+ error "No archive for fid $fid"
}
run_test 11a "Import a file"
test_11b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local FILE_HASH=$(md5sum $f)
rm -f $f
- import_file $fid $f
+ copytool import $fid $f
echo "$FILE_HASH" | md5sum -c
[[ $? -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 11b "Import a deleted file using its FID"
test_12a() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
copy2archive /etc/hosts $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
local f2=$DIR2/$tdir/$tfile
echo "Verifying released state: "
check_hsm_flags $f2 "0x0000000d"
echo "Verifying file state: "
check_hsm_flags $f2 "0x00000009"
- do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f
+ do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f
[[ $? -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 12a "Restore an imported file explicitly"
test_12b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
copy2archive /etc/hosts $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
echo "Verifying released state: "
check_hsm_flags $f "0x0000000d"
echo "Verifying file state after restore: "
check_hsm_flags $f "0x00000009"
- do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f
+ do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f
[[ $? -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 12b "Restore an imported file implicitly"
test_12c() {
- [ "$OSTCOUNT" -lt "2" ] && skip_env "skipping 2-stripe test" && return
+ [ "$OSTCOUNT" -lt "2" ] && skip_env "needs >= 2 OSTs" && return
# test needs a running copytool
- copytool_setup
+ copytool setup
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- $LFS setstripe -c 2 $f
- local fid
- fid=$(make_large_for_striping $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -c 2 "$f"
+ local fid=$(create_file "$f" 1M 5)
local FILE_CRC=$(md5sum $f)
echo "$FILE_CRC" | md5sum -c
[[ $? -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 12c "Restore a file with stripe of 2"
test_12d() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local cnt=$(get_request_count $fid RESTORE)
[[ "$cnt" == "0" ]] ||
error "restore a non dirty file must not make a request"
-
- copytool_cleanup
}
run_test 12d "Restore of a non archived, non released file must work"\
" without doing request"
test_12e() {
# test needs a running copytool
- copytool_setup
+ copytool setup
- mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir
+ mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/hosts $f)
$LFS hsm_archive $f || error "archive request failed"
$LFS hsm_state $f
$LFS hsm_restore $f && error "restore a dirty file must fail"
-
- copytool_cleanup
+ return 0
}
run_test 12e "Check forbidden restore"
test_12f() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
diff -q /etc/hosts $f
[[ $? -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 12f "Restore a released file explicitly"
test_12g() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
wait_request_state $fid RESTORE SUCCEED
[[ $st -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 12g "Restore a released file implicitly"
needclients 2 || return 0
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
wait_request_state $fid RESTORE SUCCEED
[[ $st -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 12h "Restore a released file implicitly from a second node"
test_12m() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
cmp /etc/passwd $f
[[ $? -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 12m "Archive/release/implicit restore"
test_12n() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
copy2archive /etc/hosts $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
do_facet $SINGLEAGT cmp /etc/hosts $f ||
error "Restored file differs"
$LFS hsm_release $f || error "release of $f failed"
-
- copytool_cleanup
}
run_test 12n "Import/implicit restore/release"
test_12o() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
wait_request_state $fid RESTORE SUCCEED
[[ $st -eq 0 ]] || error "Restored file differs"
-
- copytool_cleanup
}
run_test 12o "Layout-swap failure during Restore leaves file released"
test_12p() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir $DIR/$tdir
local f=$DIR/$tdir/$tfile
do_facet $SINGLEAGT cat $f > /dev/null || error "cannot cat $f"
$LFS hsm_release $f || error "cannot release $f"
do_facet $SINGLEAGT cat $f > /dev/null || error "cannot cat $f"
-
- copytool_cleanup
}
run_test 12p "implicit restore of a file on copytool mount point"
-cleanup_test_12q() {
- trap 0
- zconf_umount $(facet_host $SINGLEAGT) $MOUNT3 ||
- error "cannot umount $MOUNT3 on $SINGLEAGT"
-}
-
test_12q() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.58) ] &&
- skip "need MDS version at least 2.7.58" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.7.58) ] &&
+ skip "need MDS version at least 2.7.58"
+ stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT
zconf_mount $(facet_host $SINGLEAGT) $MOUNT3 ||
error "cannot mount $MOUNT3 on $SINGLEAGT"
- trap cleanup_test_12q EXIT
-
# test needs a running copytool
- copytool_setup $SINGLEAGT $MOUNT3
+ copytool setup -m "$MOUNT3"
- mkdir $DIR/$tdir
local f=$DIR/$tdir/$tfile
local f2=$DIR2/$tdir/$tfile
- local fid=$(make_small $f)
+ local fid=$(create_small_file $f)
local orig_size=$(stat -c "%s" $f)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
[ $size -eq $orig_size ] ||
error "$f2: wrong size after archive: $size != $orig_size"
- HSM_ARCHIVE_PURGE=false copytool_setup $SINGLEAGT /mnt/lustre3
+ copytool setup -m "$MOUNT3"
wait
size=$(stat -c "%s" $f2)
[ $size -eq 0 ] ||
error "$f2: wrong size after overwrite: $size != 0"
-
- copytool_cleanup
- zconf_umount $(facet_host $SINGLEAGT) $MOUNT3 ||
- error "cannot umount $MOUNT3 on $SINGLEAGT"
}
run_test 12q "file attributes are refreshed after restore"
test_13() {
- # test needs a running copytool
- copytool_setup
-
- local ARC_SUBDIR="import.orig"
- local d=""
- local f=""
-
- # populate directory to be imported
- for d in $(seq 1 10); do
- local CURR_DIR="$HSM_ARCHIVE/$ARC_SUBDIR/dir.$d"
- do_facet $SINGLEAGT mkdir -p "$CURR_DIR"
- for f in $(seq 1 10); do
- CURR_FILE="$CURR_DIR/$tfile.$f"
- # write file-specific data
- do_facet $SINGLEAGT \
- "echo d=$d, f=$f, dir=$CURR_DIR, "\
- "file=$CURR_FILE > $CURR_FILE"
+ local -i i j k=0
+ for i in {1..10}; do
+ local archive_dir="$(hsm_root)"/subdir/dir.$i
+
+ do_facet $SINGLEAGT mkdir -p "$archive_dir"
+ for j in {1..10}; do
+ local archive_file="$archive_dir"/file.$j
+
+ do_facet $SINGLEAGT "echo $k > \"$archive_dir\"/file.$j"
+ k+=1
done
done
+
# import to Lustre
- import_file "$ARC_SUBDIR" $DIR/$tdir
- # diff lustre content and origin (triggers file restoration)
- # there must be 10x10 identical files, and no difference
- local cnt_ok=$(do_facet $SINGLEAGT diff -rs $HSM_ARCHIVE/$ARC_SUBDIR \
- $DIR/$tdir/$ARC_SUBDIR | grep identical | wc -l)
- local cnt_diff=$(do_facet $SINGLEAGT diff -r $HSM_ARCHIVE/$ARC_SUBDIR \
- $DIR/$tdir/$ARC_SUBDIR | wc -l)
+ copytool import "subdir" "$DIR/$tdir"
- [ $cnt_diff -eq 0 ] ||
- error "$cnt_diff imported files differ from read data"
- [ $cnt_ok -eq 100 ] ||
- error "not enough identical files ($cnt_ok != 100)"
+ # To check the import, the test uses diff with the -r flag
+ # This is nice, but diff only checks files one by one, and triggering
+ # an implicit restore for one file at a time will consume as many
+ # seconds as there are files to compare. To speed this up, a restore
+ # operation is triggered manually first.
+ copytool setup
+ find "$DIR/$tdir"/subdir -type f -exec $LFS hsm_restore {} \;
- copytool_cleanup
+ # Compare the imported data
+ do_facet $SINGLEAGT \
+ diff -r "$(hsm_root)"/subdir "$DIR/$tdir"/subdir ||
+ error "imported files differ from archived data"
}
run_test 13 "Recursively import and restore a directory"
test_14() {
# test needs a running copytool
- copytool_setup
+ copytool setup
# archive a file
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid=$(make_small $f)
+ local fid=$(create_small_file $f)
local sum=$(md5sum $f | awk '{print $1}')
$LFS hsm_archive $f || error "could not archive file"
wait_request_state $fid ARCHIVE SUCCEED
- # delete the file
- rm -f $f
# create released file (simulate llapi_hsm_import call)
- touch $f
- local fid2=$(path2fid $f)
+ local fid2=$(create_empty_file "$f")
$LFS hsm_set --archived --exists $f || error "could not force hsm flags"
$LFS hsm_release $f || error "could not release file"
# rebind the archive to the newly created file
echo "rebind $fid to $fid2"
- do_facet $SINGLEAGT \
- "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\
- --rebind $fid $fid2 $DIR" || error "could not rebind file"
+ copytool rebind $fid $fid2
# restore file and compare md5sum
local sum2=$(md5sum $f | awk '{print $1}')
[[ $sum == $sum2 ]] || error "md5sum mismatch after restore"
-
- copytool_cleanup
}
run_test 14 "Rebind archived file to a new fid"
test_15() {
# test needs a running copytool
- copytool_setup
+ copytool setup
# archive files
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local count=5
local tmpfile=$SHARED_DIRECTORY/tmp.$$
local fids=()
local sums=()
for i in $(seq 1 $count); do
- fids[$i]=$(make_small $f.$i)
+ fids[$i]=$(create_small_file $f.$i)
sums[$i]=$(md5sum $f.$i | awk '{print $1}')
$LFS hsm_archive $f.$i || error "could not archive file"
done
wait_all_done $(($count*60))
+ stack_trap "rm -f $tmpfile" EXIT
:>$tmpfile
# delete the files
for i in $(seq 1 $count); do
- rm -f $f.$i
- touch $f.$i
- local fid2=$(path2fid $f.$i)
+ local fid2=$(create_empty_file "${f}.${i}")
# add the rebind operation to the list
echo ${fids[$i]} $fid2 >> $tmpfile
[[ $nl == $count ]] || error "$nl files in list, $count expected"
echo "rebind list of files"
- do_facet $SINGLEAGT \
- "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\
- --rebind $tmpfile $DIR" || error "could not rebind file list"
+ copytool rebind "$tmpfile"
# restore files and compare md5sum
for i in $(seq 1 $count); do
[[ $sum2 == ${sums[$i]} ]] ||
error "md5sum mismatch after restore ($sum2 != ${sums[$i]})"
done
-
- rm -f $tmpfile
- copytool_cleanup
}
run_test 15 "Rebind a list of files"
test_16() {
# test needs a running copytool
- copytool_setup
+ copytool setup -b 1
local ref=/tmp/ref
# create a known size file so we can verify transfer speed
$LFS hsm_archive $f
wait_request_state $fid ARCHIVE SUCCEED
local end=$(date +%s)
- local duration=$((end - start))
+ # Add 1 to account for rounding errors between start and end (LU-8155)
+ local duration=$((end - start + 1))
- [[ $duration -ge $goal ]] ||
+ [[ $duration -ge $((goal - 1)) ]] ||
error "Transfer is too fast $duration < $goal"
-
- copytool_cleanup
}
run_test 16 "Test CT bandwith control option"
test_20() {
- mkdir -p $DIR/$tdir
-
local f=$DIR/$tdir/$tfile
- touch $f || error "touch $f failed"
+ create_empty_file "$f"
# Could not release a non-archived file
$LFS hsm_release $f && error "release should not succeed"
test_21() {
# test needs a running copytool
- copytool_setup
+ copytool setup
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/test_release
# Create a file and check its states
- local fid=$(make_small $f)
+ local fid=$(create_small_file $f)
check_hsm_flags $f "0x00000000"
# LU-4388/LU-4389 - ZFS does not report full number of blocks
check_hsm_flags $f "0x0000000d"
stop_full_debug_logging
-
- copytool_cleanup
}
run_test 21 "Simple release tests"
test_22() {
# test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup
local f=$DIR/$tdir/test_release
local swap=$DIR/$tdir/test_swap
# Create a file and check its states
- local fid=$(make_small $f)
+ local fid=$(create_small_file $f)
check_hsm_flags $f "0x00000000"
$LFS hsm_archive $f || error "could not archive file"
$LFS hsm_release $f || error "could not release file"
check_hsm_flags $f "0x0000000d"
- make_small $swap
+ create_small_file $swap
$LFS swap_layouts $swap $f && error "swap_layouts should failed"
- true
- copytool_cleanup
+ return 0
}
run_test 22 "Could not swap a release file"
test_23() {
# test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup
local f=$DIR/$tdir/test_mtime
# Create a file and check its states
- local fid=$(make_small $f)
+ local fid=$(create_small_file $f)
check_hsm_flags $f "0x00000000"
$LFS hsm_archive $f || error "could not archive file"
local ATIME=$(stat -c "%X" $f)
[ $MTIME -eq "978261179" ] || fail "bad mtime: $MTIME"
[ $ATIME -eq "978261179" ] || fail "bad atime: $ATIME"
-
- copytool_cleanup
}
run_test 23 "Release does not change a/mtime (utime)"
local ctime1
# test needs a running copytool
- copytool_setup
+ copytool setup
- mkdir -p $DIR/$tdir
- rm -f $file
- fid=$(make_small $file)
+ fid=$(create_small_file $file)
# Create a file and check its states
check_hsm_flags $file "0x00000000"
[ $ctime0 -eq $ctime1 ] ||
error "release changed ctime from $ctime0 to $ctime1"
- # Restore should not change atime or mtime and should not
- # decrease ctime.
+ # Restore should not change any timestamps.
$LFS hsm_restore $file
wait_request_state $fid RESTORE SUCCEED
[ $ctime0 -eq $ctime1 ] ||
error "restore changed ctime from $ctime0 to $ctime1"
- copytool_cleanup
+ kill_copytools
+ wait_copytools || error "Copytools failed to stop"
# Once more, after unmount and mount.
umount_client $MOUNT || error "cannot unmount '$MOUNT'"
# LU-3811
# Test needs a running copytool.
- copytool_setup
- mkdir -p $DIR/$tdir
+ copytool setup
# Check that root can do HSM actions on a regular user's file.
- rm -f $file
- fid=$(make_small $file)
+ fid=$(create_small_file $file)
sum0=$(md5sum $file)
chown $RUNAS_ID:$RUNAS_GID $file ||
[ "$sum0" == "$sum1" ] ||
error "md5sum mismatch for '$file'"
-
- copytool_cleanup
}
run_test 24b "root can archive, release, and restore user files"
-cleanup_test_24c() {
- trap 0
- set_hsm_param user_request_mask RESTORE
- set_hsm_param group_request_mask RESTORE
- set_hsm_param other_request_mask RESTORE
-}
-
test_24c() {
local file=$DIR/$tdir/$tfile
local action=archive
local other_save
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
# Save the default masks and check that cleanup_24c will
# restore the request masks correctly.
user_save=$(get_hsm_param user_request_mask)
+ stack_trap "set_hsm_param user_request_mask $user_save" EXIT
group_save=$(get_hsm_param group_request_mask)
+ stack_trap "set_hsm_param user_request_mask $group_save" EXIT
other_save=$(get_hsm_param other_request_mask)
+ stack_trap "set_hsm_param user_request_mask $other_save" EXIT
[ "$user_save" == RESTORE ] ||
error "user_request_mask is '$user_save' expected 'RESTORE'"
[ "$other_save" == RESTORE ] ||
error "other_request_mask is '$other_save' expected 'RESTORE'"
- trap cleanup_test_24c EXIT
-
# User.
- rm -f $file
- make_small $file
- chown $RUNAS_ID:nobody $file ||
- error "cannot chown '$file' to '$RUNAS_ID:nobody'"
+ create_small_file $file
+ chown $RUNAS_ID:$GROUP $file ||
+ error "cannot chown '$file' to '$RUNAS_ID:$GROUP'"
- set_hsm_param user_request_mask ""
$RUNAS $LFS hsm_$action $file &&
error "$action by user should fail"
error "$action by user should succeed"
# Group.
- rm -f $file
- make_small $file
+ create_small_file $file
chown nobody:$RUNAS_GID $file ||
error "cannot chown '$file' to 'nobody:$RUNAS_GID'"
- set_hsm_param group_request_mask ""
$RUNAS $LFS hsm_$action $file &&
error "$action by group should fail"
error "$action by group should succeed"
# Other.
- rm -f $file
- make_small $file
- chown nobody:nobody $file ||
- error "cannot chown '$file' to 'nobody:nobody'"
+ create_small_file $file
+ chown nobody:$GROUP $file ||
+ error "cannot chown '$file' to 'nobody:$GROUP'"
- set_hsm_param other_request_mask ""
$RUNAS $LFS hsm_$action $file &&
error "$action by other should fail"
set_hsm_param other_request_mask $action
$RUNAS $LFS hsm_$action $file ||
error "$action by other should succeed"
-
- copytool_cleanup
- cleanup_test_24c
}
run_test 24c "check that user,group,other request masks work"
-cleanup_test_24d() {
- trap 0
- mount -o remount,rw $MOUNT2
-}
-
test_24d() {
local file1=$DIR/$tdir/$tfile
local file2=$DIR2/$tdir/$tfile
local fid1
local fid2
- copytool_setup
+ fid1=$(create_small_file $file1)
- mkdir -p $DIR/$tdir
- rm -f $file1
- fid1=$(make_small $file1)
+ echo $fid1
+ $LFS getstripe $file1
+
+ stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT
+ zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" ||
+ error "cannot mount '$MOUNT3' on '$SINGLEAGT'"
- trap cleanup_test_24d EXIT
+ copytool setup -m "$MOUNT3"
+ stack_trap "mount -o remount,rw \"$MOUNT2\"" EXIT
mount -o remount,ro $MOUNT2
+ do_nodes $(comma_list $(nodes_list)) $LCTL clear
+
fid2=$(path2fid $file2)
[ "$fid1" == "$fid2" ] ||
error "FID mismatch '$fid1' != '$fid2'"
error "archive should fail on read-only mount"
check_hsm_flags $file1 "0x00000000"
- $LFS hsm_archive $file1
+ $LFS hsm_archive $file1 || error "Fail to archive $file1"
wait_request_state $fid1 ARCHIVE SUCCEED
$LFS hsm_release $file1
wait_request_state $fid1 RESTORE SUCCEED
$LFS hsm_release $file1 || error "cannot release '$file1'"
- dd if=$file2 of=/dev/null bs=1M || "cannot read '$file2'"
+ dd if=$file2 of=/dev/null bs=1M || error "cannot read '$file2'"
$LFS hsm_release $file2 &&
error "release should fail on read-only mount"
- copytool_cleanup
- cleanup_test_24d
+ return 0
}
run_test 24d "check that read-only mounts are respected"
test_24e() {
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup
local f=$DIR/$tdir/$tfile
local fid
- fid=$(make_small $f) || error "cannot create $f"
+ fid=$(create_small_file $f) || error "cannot create $f"
$LFS hsm_archive $f || error "cannot archive $f"
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f || error "cannot release $f"
done
tar -cf $TMP/$tfile.tar $DIR/$tdir || error "cannot tar $DIR/$tdir"
-
- copytool_cleanup
}
run_test 24e "tar succeeds on HSM released files" # LU-6213
test_24f() {
-
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir/d1
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/hosts $f)
sum0=$(md5sum $f)
echo $sum0
- $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f ||
+ $LFS hsm_archive $f ||
error "hsm_archive failed"
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f || error "cannot release $f"
sum1=$(md5sum $f)
echo "Sum0 = $sum0, sum1 = $sum1"
[ "$sum0" == "$sum1" ] || error "md5sum mismatch for '$tfile'"
-
- copytool_cleanup
}
run_test 24f "root can archive, release, and restore tar files"
+test_24g() {
+ [ $MDS1_VERSION -lt $(version_code 2.11.56) ] &&
+ skip "need MDS version 2.11.56 or later"
+
+ local file=$DIR/$tdir/$tfile
+ local fid
+
+ echo "RUNAS = '$RUNAS'"
+
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+ chmod ugo+rwx $DIR/$tdir
+
+ echo "Please listen carefully as our options have changed." | tee $file
+ fid=$(path2fid $file)
+ chmod ugo+rw $file
+
+ $LFS hsm_archive $file
+ wait_request_state $fid ARCHIVE SUCCEED
+ check_hsm_flags $file 0x00000009 # exists archived
+
+ echo "To be electrocuted by your telephone, press #." | $RUNAS tee $file
+ check_hsm_flags $file 0x0000000b # exists dirty archived
+}
+run_test 24g "write by non-owner still sets dirty" # LU-11369
+
test_25a() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
copy2archive /etc/hosts $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
$LFS hsm_set --lost $f
local st=$?
[[ $st == 1 ]] || error "lost file access should failed (returns $st)"
-
- copytool_cleanup
}
run_test 25a "Restore lost file (HS_LOST flag) from import"\
" (Operation not permitted)"
test_25b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
st=$?
[[ $st == 1 ]] || error "lost file access should failed (returns $st)"
-
- copytool_cleanup
}
run_test 25b "Restore lost file (HS_LOST flag) after release"\
" (Operation not permitted)"
-test_26() {
+test_26A() { # was test_26
# test needs a running copytool
- copytool_setup
+ copytool setup
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
wait_request_state $fid REMOVE SUCCEED
check_hsm_flags $f "0x00000000"
-
- copytool_cleanup
}
-run_test 26 "Remove the archive of a valid file"
+run_test 26A "Remove the archive of a valid file"
+
+test_26a() {
+ local raolu=$(get_hsm_param remove_archive_on_last_unlink)
+ [[ $raolu -eq 0 ]] || error "RAoLU policy should be off"
-test_27a() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
- make_archive $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
- local fid=$(path2fid $f)
+ local fid=$(copy_file /etc/passwd $f)
- $LFS hsm_remove $f
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
- [[ $? != 0 ]] || error "Remove of a released file should fail"
+ local f2=$DIR/$tdir/${tfile}_2
+ local fid2=$(copy_file /etc/passwd $f2)
- copytool_cleanup
-}
-run_test 27a "Remove the archive of an imported file (Operation not permitted)"
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2
+ wait_request_state $fid2 ARCHIVE SUCCEED
-test_27b() {
- # test needs a running copytool
- copytool_setup
+ local f3=$DIR/$tdir/${tfile}_3
+ local fid3=$(copy_file /etc/passwd $f3)
- mkdir -p $DIR/$tdir
- local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f3
+ wait_request_state $fid3 ARCHIVE SUCCEED
- $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
- wait_request_state $fid ARCHIVE SUCCEED
- $LFS hsm_release $f
+ # set a long grace_delay vs short loop_period
+ local orig_loop_period=$(get_hsm_param loop_period)
+ local orig_grace_delay=$(get_hsm_param grace_delay)
+ stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT
+ set_hsm_param loop_period 10
+ stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT
+ set_hsm_param grace_delay 100
- $LFS hsm_remove $f
+ rm -f $f
- [[ $? != 0 ]] || error "Remove of a released file should fail"
+ stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT
+ set_hsm_param remove_archive_on_last_unlink 1
+
+ ln "$f3" "$f3"_bis || error "Unable to create hard-link"
+ rm -f $f3
+
+ rm -f $f2
- copytool_cleanup
+ wait_request_state $fid2 REMOVE SUCCEED
+
+ assert_request_count $fid REMOVE 0 \
+ "Unexpected archived data remove request for $f"
+ assert_request_count $fid3 REMOVE 0 \
+ "Unexpected archived data remove request for $f3"
}
-run_test 27b "Remove the archive of a relased file (Operation not permitted)"
+run_test 26a "Remove Archive On Last Unlink (RAoLU) policy"
-test_28() {
+test_26b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(copy_file /etc/passwd $f)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
- cdt_disable
- $LFS hsm_remove $f
+ stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT
+ set_hsm_param remove_archive_on_last_unlink 1
+
+ cdt_shutdown
+ cdt_check_state stopped
rm -f $f
+ wait_request_state $fid REMOVE WAITING
+
cdt_enable
- wait_request_state $fid REMOVE SUCCEED
+ # copytool must re-register
+ kill_copytools
+ wait_copytools || error "copytool failed to stop"
+ copytool setup
- copytool_cleanup
+ wait_request_state $fid REMOVE SUCCEED
}
-run_test 28 "Concurrent archive/file remove"
+run_test 26b "RAoLU policy when CDT off"
-test_29a() {
- # Tests --mntpath and --archive options
+test_26c() {
+ # test needs a running copytool
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(copy_file /etc/passwd $f)
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ local f2=$DIR/$tdir/${tfile}_2
+ local fid2=$(copy_file /etc/passwd $f2)
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2
+ wait_request_state $fid2 ARCHIVE SUCCEED
+
+ # set a long grace_delay vs short loop_period
+ local orig_loop_period=$(get_hsm_param loop_period)
+ local orig_grace_delay=$(get_hsm_param grace_delay)
+ stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT
+ set_hsm_param loop_period 10
+ stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT
+ set_hsm_param grace_delay 100
+
+ stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT
+ set_hsm_param remove_archive_on_last_unlink 1
+
+ multiop_bg_pause $f O_c || error "open $f failed"
+ local pid=$!
+
+ rm -f $f
+ rm -f $f2
+
+ wait_request_state $fid2 REMOVE SUCCEED
+ assert_request_count $fid REMOVE 0 \
+ "Unexpected archived data remove request for $f"
+
+ kill -USR1 $pid || error "multiop early exit"
+ # should reach autotest timeout if multiop fails to trap
+ # signal, close file, and exit ...
+ wait $pid || error "wait PID $PID failed"
+
+ wait_request_state $fid REMOVE SUCCEED
+}
+run_test 26c "RAoLU effective when file closed"
+
+test_26d() {
+ # test needs a running copytool
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_small_file $f)
+
+ $LFS hsm_archive $f || error "could not archive file"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ # set a long grace_delay vs short loop_period
+ local orig_loop_period=$(get_hsm_param loop_period)
+ local orig_grace_delay=$(get_hsm_param grace_delay)
+ stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT
+ set_hsm_param loop_period 10
+ stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT
+ set_hsm_param grace_delay 100
+
+ stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT
+ set_hsm_param remove_archive_on_last_unlink 1
+
+ multiop_bg_pause $f O_c || error "multiop failed"
+ local MULTIPID=$!
+
+ rm -f $f
+
+ mds_evict_client
+
+ wait_request_state $fid REMOVE SUCCEED
+
+ client_up || client_up || true
+
+ kill -USR1 $MULTIPID
+ wait $MULTIPID || error "multiop close failed"
+}
+run_test 26d "RAoLU when Client eviction"
+
+test_27a() {
+ # test needs a running copytool
+ copytool setup
+
+ create_archive_file $tdir/$tfile
+ local f=$DIR/$tdir/$tfile
+ copytool import $tdir/$tfile $f
+ local fid=$(path2fid $f)
+
+ $LFS hsm_remove $f
+
+ [[ $? != 0 ]] || error "Remove of a released file should fail"
+}
+run_test 27a "Remove the archive of an imported file (Operation not permitted)"
+
+test_27b() {
+ # test needs a running copytool
+ copytool setup
+
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+ $LFS hsm_release $f
+
+ $LFS hsm_remove $f
+
+ [[ $? != 0 ]] || error "Remove of a released file should fail"
+}
+run_test 27b "Remove the archive of a relased file (Operation not permitted)"
+
+test_28() {
+ # test needs a running copytool
+ copytool setup
+
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ cdt_disable
+ $LFS hsm_remove $f
+
+ rm -f $f
+
+ cdt_enable
+
+ wait_request_state $fid REMOVE SUCCEED
+}
+run_test 28 "Concurrent archive/file remove"
+
+test_29a() {
+ # Tests --mntpath and --archive options
local archive_id=7
- copytool_setup $SINGLEAGT $MOUNT $archive_id
+ copytool setup -m "$MOUNT" -a $archive_id
# Bad archive number
- $LFS hsm_remove -m $MOUNT -a 33 0x857765760:0x8:0x2 2>&1 |
+ $LFS hsm_remove -m "$MOUNT" -a 33 0x857765760:0x8:0x2 2>&1 |
grep "Invalid argument" ||
error "unexpected hsm_remove failure (1)"
# mntpath is present but file is given
- $LFS hsm_remove --mntpath $MOUNT --archive 30 /qwerty/uyt 2>&1 |
+ $LFS hsm_remove --mntpath "$MOUNT" --archive 30 /qwerty/uyt 2>&1 |
grep "hsm: '/qwerty/uyt' is not a valid FID" ||
error "unexpected hsm_remove failure (2)"
-
- copytool_cleanup
}
run_test 29a "Tests --mntpath and --archive options"
test_29b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid=$(make_small $f)
+ local fid=$(create_small_file $f)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_remove -m $MOUNT -a $HSM_ARCHIVE_NUMBER $fid
wait_request_state $fid REMOVE SUCCEED
-
- copytool_cleanup
}
run_test 29b "Archive/delete/remove by FID from the archive."
test_29c() {
# test needs a running copytool
- copytool_setup
+ copytool setup
- mkdir -p $DIR/$tdir
- local fid1=$(make_small $DIR/$tdir/$tfile-1)
- local fid2=$(make_small $DIR/$tdir/$tfile-2)
- local fid3=$(make_small $DIR/$tdir/$tfile-3)
+ local fid1=$(create_small_file $DIR/$tdir/$tfile-1)
+ local fid2=$(create_small_file $DIR/$tdir/$tfile-2)
+ local fid3=$(create_small_file $DIR/$tdir/$tfile-3)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$tfile-[1-3]
wait_request_state $fid1 ARCHIVE SUCCEED
wait_request_state $fid1 REMOVE SUCCEED
wait_request_state $fid2 REMOVE SUCCEED
wait_request_state $fid3 REMOVE SUCCEED
-
- copytool_cleanup
}
run_test 29c "Archive/delete/remove by FID, using a file list."
+test_29d() {
+ # test needs more than one CT
+ needclients 3 || return 0
+
+ local n
+ local file
+ local fid
+
+ # start all of the copytools
+ for n in $(seq $AGTCOUNT); do
+ copytool setup -f agt$n -a $n
+ done
+
+ # archive files
+ file=$DIR/$tdir/$tfile
+ fid=$(create_small_file $file)
+
+ $LFS hsm_archive $file
+ wait_request_state $fid ARCHIVE SUCCEED
+ check_hsm_flags $file "0x00000009"
+
+ rm -f $file
+
+ $LFS hsm_remove --mntpath "$MOUNT" -a 0 $fid ||
+ error "cannot hsm_remove '$fid'"
+
+ # give time for CDT to handle remove request and create broadcasted
+ sleep 2
+
+ # remove request has been broadcasted ?
+ local cnt=$(get_request_count $fid REMOVE)
+ # broadcasted requests + original
+ [[ $cnt -eq $((AGTCOUNT + 1)) ]] ||
+ error "remove not broadcasted to all CTs"
+
+ # give time for CDT and CTs to handle broadcasted
+ wait_for_loop_period
+
+ # each agent serves one different archive_id, so broadcasted
+ # hsm_remove request should only succeed once and fail at all others
+ local res
+ local scnt=0
+ local fcnt=0
+ for n in $(seq $AGTCOUNT); do
+ res=$(do_facet $SINGLEMDS "$LCTL get_param -n \
+ $HSM_PARAM.actions | awk \
+ '/'$fid'.*action=REMOVE archive#='$n'/ \
+ {print \\\$13}' | cut -f2 -d=")
+ if [[ "$res" == "SUCCEED" ]]; then
+ scnt=$((scnt + 1))
+ elif [[ "$res" == "FAILED" ]]; then
+ fcnt=$((fcnt + 1))
+ fi
+ done
+
+ [[ $scnt -eq 1 ]] ||
+ error "one and only CT should have removed successfully"
+
+ [[ $AGTCOUNT -eq $((scnt + fcnt)) ]] ||
+ error "all but one CT should have failed to remove"
+}
+run_test 29d "hsm_remove by FID with archive_id 0 for unlinked file cause "\
+ "request to be sent once for each registered archive_id"
+
test_30a() {
# restore at exec cannot work on agent node (because of Linux kernel
# protection of executables)
needclients 2 || return 0
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
copy2archive /bin/true $tdir/$tfile
local f=$DIR/$tdir/true
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
local fid=$(path2fid $f)
+ stack_trap "cdt_clear_no_retry" EXIT
# set no retry action mode
cdt_set_no_retry
do_node $CLIENT2 $f
local st=$?
- # cleanup
- # remove no try action mode
- cdt_clear_no_retry
$LFS hsm_state $f
[[ $st == 0 ]] || error "Failed to exec a released file"
-
- copytool_cleanup
}
run_test 30a "Restore at exec (import case)"
needclients 2 || return 0
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/true
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
$LFS hsm_state $f
+
+ stack_trap cdt_clear_no_retry EXIT
# set no retry action mode
cdt_set_no_retry
+
do_node $CLIENT2 $f
local st=$?
- # cleanup
- # remove no try action mode
- cdt_clear_no_retry
$LFS hsm_state $f
[[ $st == 0 ]] || error "Failed to exec a released file"
-
- copytool_cleanup
}
run_test 30b "Restore at exec (release case)"
needclients 2 || return 0
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/SLEEP
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
check_hsm_flags $f "0x0000000d"
+
+ stack_trap cdt_clear_no_retry EXIT
# set no retry action mode
cdt_set_no_retry
+
do_node $CLIENT2 "$f 10" &
local pid=$!
sleep 3
error "Binary overwritten during exec"
fi
- # cleanup
- # remove no try action mode
- cdt_clear_no_retry
check_hsm_flags $f "0x00000009"
-
- copytool_cleanup
}
run_test 30c "Update during exec of released file must fail"
while [[ "$st" != "0x00000009" && $cpt -le 10 ]]
do
n=$(stat -c "%s" $f)
- # we echo in both cases to show stat is not
- # hang
+ # we echo in both cases to show stat is not hang
if [[ $n != $s ]]; then
echo "size seen is $n != $s"
err=1
else
echo "size seen is right: $n == $s"
fi
- st=$(get_hsm_flags $f)
sleep 10
cpt=$((cpt + 1))
+ st=$(get_hsm_flags $f)
done
- if [[ $cpt -lt 10 ]]; then
+ if [[ "$st" = "0x00000009" ]]; then
echo " "done
else
echo " restore is too long"
test_31a() {
# test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup
- make_archive $tdir/$tfile
+ create_archive_file $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
local fid=$($LFS path2fid $f)
- HSM_ARCHIVE_PURGE=false copytool_setup
+ copytool setup
restore_and_check_size $f $fid
local err=$?
[[ $err -eq 0 ]] || error "File size changed during restore"
-
- copytool_cleanup
}
run_test 31a "Import a large file and check size during restore"
test_31b() {
# test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_file "$f" 1MB 39)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
local err=$?
[[ $err -eq 0 ]] || error "File size changed during restore"
-
- copytool_cleanup
}
run_test 31b "Restore a large unaligned file and check size during restore"
test_31c() {
# test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress_aligned $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_file "$f" 1M 39)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
local err=$?
[[ $err -eq 0 ]] || error "File size changed during restore"
-
- copytool_cleanup
}
run_test 31c "Restore a large aligned file and check size during restore"
test_33() {
- # test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
-
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
+
+ copytool setup
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
- # to be sure wait_all_done will not be mislead by previous tests
- # and ops.
- cdt_purge
- wait_for_grace_delay
- # Also raise grace_delay significantly so the Canceled
- # Restore action will stay enough long avail.
- local old_grace=$(get_hsm_param grace_delay)
- set_hsm_param grace_delay 100
+ # Prevent restore from completing
+ copytool_suspend
+ # Implicit restore
md5sum $f >/dev/null &
local pid=$!
- wait_request_state $fid RESTORE STARTED
+ wait_request_state $fid RESTORE STARTED
kill -15 $pid
- sleep 1
-
- # Check restore trigger process was killed
- local killed=$(ps -o pid,comm hp $pid >/dev/null)
-
- $LFS hsm_cancel $f
-
- # instead of waiting+checking both Restore and Cancel ops
- # sequentially, wait for both to be finished and then check
- # each results.
- wait_all_done 100 $fid
- local rstate=$(get_request_state $fid RESTORE)
- local cstate=$(get_request_state $fid CANCEL)
-
- # restore orig grace_delay.
- set_hsm_param grace_delay $old_grace
-
- if [[ "$rstate" == "CANCELED" ]] ; then
- [[ "$cstate" == "SUCCEED" ]] ||
- error "Restore state is CANCELED and Cancel state " \
- "is not SUCCEED but $cstate"
- echo "Restore state is CANCELED, Cancel state is SUCCEED"
- elif [[ "$rstate" == "SUCCEED" ]] ; then
- [[ "$cstate" == "FAILED" ]] ||
- error "Restore state is SUCCEED and Cancel state " \
- "is not FAILED but $cstate"
- echo "Restore state is SUCCEED, Cancel state is FAILED"
- else
- error "Restore state is $rstate and Cancel state is $cstate"
- fi
- [ -z $killed ] ||
- error "Cannot kill process waiting for restore ($killed)"
+ copytool_continue
- copytool_cleanup
+ # Check restore trigger process was killed
+ wait $pid
+ [ $? -eq 143 ] || error "md5sum was not 'Terminated'"
}
run_test 33 "Kill a restore waiting process"
test_34() {
# test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup -b 1
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+ # Prevent restore from completing
+ copytool_suspend
+
md5sum $f >/dev/null &
local pid=$!
+
wait_request_state $fid RESTORE STARTED
- rm $f || error "rm $f failed"
# rm must not block during restore
- wait_request_state $fid RESTORE STARTED
+ timeout --signal=KILL 1 rm "$f" || error "rm $f failed"
+ copytool_continue
wait_request_state $fid RESTORE SUCCEED
- # check md5sum pgm finished
- local there=$(ps -o pid,comm hp $pid >/dev/null)
- [[ -z $there ]] || error "Restore initiator does not exit"
- local rc=$(wait $pid)
- [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc"
+ # Check md5sum pgm finished
+ kill -0 $pid && error "Restore initiatior still running"
+ wait $pid || error "Restore initiator failed with $?"
- copytool_cleanup
+ # Check the file was actually deleted
+ [ ! -f "$f" ] || error "$f was not deleted"
}
run_test 34 "Remove file during restore"
test_35() {
# test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup -b 1
local f=$DIR/$tdir/$tfile
local f1=$DIR/$tdir/$tfile-1
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
-
+ local fid=$(create_empty_file "$f")
local fid1=$(copy_file /etc/passwd $f1)
+
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+ # Prevent restore from completing
+ copytool_suspend
+
md5sum $f >/dev/null &
local pid=$!
+
wait_request_state $fid RESTORE STARTED
- mv $f1 $f || error "mv $f1 $f failed"
# mv must not block during restore
- wait_request_state $fid RESTORE STARTED
+ timeout --signal=KILL 1 mv "$f1" "$f" || error "mv $f1 $f failed"
+ copytool_continue
wait_request_state $fid RESTORE SUCCEED
- # check md5sum pgm finished
- local there=$(ps -o pid,comm hp $pid >/dev/null)
- [[ -z $there ]] || error "Restore initiator does not exit"
- local rc=$(wait $pid)
- [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc"
+ # Check md5sum pgm finished
+ kill -0 $pid && error "Restore initiatior still running"
+ wait $pid || error "Restore initiator failed with $?"
- fid2=$(path2fid $f)
+ local fid2=$(path2fid $f)
[[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1"
-
- copytool_cleanup
}
run_test 35 "Overwrite file during restore"
test_36() {
# test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
+ copytool setup -b 1
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+ # Prevent restore from completing
+ copytool_suspend
+
md5sum $f >/dev/null &
local pid=$!
- wait_request_state $fid RESTORE STARTED
- mv $f $f.new
- # rm must not block during restore
wait_request_state $fid RESTORE STARTED
- wait_request_state $fid RESTORE SUCCEED
- # check md5sum pgm finished
- local there=$(ps -o pid,comm hp $pid >/dev/null)
- [[ -z $there ]] ||
- error "Restore initiator does not exit"
+ # mv must not block during restore
+ timeout --signal=KILL 10 mv "$f" "$f.new" ||
+ error "mv '$f' '$f.new' failed with rc=$?"
- local rc=$(wait $pid)
- [[ $rc -eq 0 ]] ||
- error "Restore initiator failed with $rc"
+ copytool_continue
+ wait_request_state $fid RESTORE SUCCEED
- copytool_cleanup
+ # Check md5sum pgm finished
+ kill -0 $pid && error "Restore initiator is still running"
+ wait $pid || error "Restore initiator failed with $?"
}
run_test 36 "Move file during restore"
test_37() {
# LU-5683: check that an archived dirty file can be rearchived.
- copytool_cleanup
- copytool_setup $SINGLEAGT $MOUNT2
+ copytool setup
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid
- fid=$(make_small $f) || error "cannot create small file"
+ fid=$(create_small_file $f) || error "cannot create small file"
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f || error "cannot release $f"
+ # Allow previous archive request to expire from the actions log.
+ wait_for_grace_delay
+
# Dirty file.
dd if=/dev/urandom of=$f bs=1M count=1 || error "cannot dirty file"
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
-
- copytool_cleanup
}
run_test 37 "re-archive a dirty file"
local i=""
local p=""
local fid=""
+ local max_requests=$(get_hsm_param max_requests)
+
+ stack_trap "set_hsm_param max_requests $max_requests" EXIT
+ # Increase the number of HSM request that can be performed in
+ # parallel. With the coordinator running once per second, this
+ # also limits the number of requests per seconds that can be
+ # performed, so we pick a decent number. But we also need to keep
+ # that number low because the copytool has no rate limit and will
+ # fail some requests if if gets too many at once.
+ set_hsm_param max_requests 300
for i in $(seq 1 $file_count); do
for p in $(seq 1 $stream_count); do
fid=$(copy_file /etc/hosts $f.$p.$i)
done
done
- # force copytool to use a local/temp archive dir to ensure best
- # performance vs remote/NFS mounts used in auto-tests
- if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then
- copytool_setup
- else
- copytool_setup $SINGLEAGT $MOUNT $HSM_ARCHIVE_NUMBER $TMP/$tdir
- fi
+
+ copytool setup
+
# to be sure wait_all_done will not be mislead by previous tests
cdt_purge
wait_for_grace_delay
wait ${pids[*]}
echo OK
wait_all_done 100
- copytool_cleanup
}
run_test 40 "Parallel archive requests"
test_52() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid=$(copy_file /etc/motd $f 1)
+ local fid=$(create_small_file $f)
$LFS hsm_archive $f || error "could not archive file"
wait_request_state $fid ARCHIVE SUCCEED
wait $MULTIPID || error "multiop close failed"
check_hsm_flags $f "0x0000000b"
-
- copytool_cleanup
}
run_test 52 "Opened for write file on an evicted client should be set dirty"
test_53() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid=$(copy_file /etc/motd $f 1)
+ local fid=$(create_small_file $f)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
wait $MULTIPID || error "multiop close failed"
check_hsm_flags $f "0x00000009"
-
- copytool_cleanup
}
run_test 53 "Opened for read file on an evicted client should not be set dirty"
test_54() {
- # test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid=$(make_large_for_progress $f)
+ local fid=$(create_file "$f" 1MB 39)
+
+ copytool setup -b 1
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
check_hsm_flags $f "0x00000001"
+ stack_trap "cdt_clear_no_retry" EXIT
# Avoid coordinator resending this request as soon it has failed.
cdt_set_no_retry
wait_request_state $fid ARCHIVE FAILED
check_hsm_flags $f "0x00000003"
-
- cdt_clear_no_retry
- copytool_cleanup
}
run_test 54 "Write during an archive cancels it"
test_55() {
- # test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid=$(make_large_for_progress $f)
+ local fid=$(create_file "$f" 1MB 39)
+
+ copytool setup -b 1
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
check_hsm_flags $f "0x00000001"
+ stack_trap "cdt_clear_no_retry" EXIT
# Avoid coordinator resending this request as soon it has failed.
cdt_set_no_retry
wait_request_state $fid ARCHIVE FAILED
check_hsm_flags $f "0x00000003"
-
- cdt_clear_no_retry
- copytool_cleanup
}
run_test 55 "Truncate during an archive cancels it"
test_56() {
- # test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_file "$f" 1MB 39)
+
+ copytool setup -b 1
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
wait_request_state $fid ARCHIVE SUCCEED
check_hsm_flags $f "0x00000009"
-
- copytool_cleanup
}
run_test 56 "Setattr during an archive is ok"
needclients 2 || return 0
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/test_archive_remote
error "hsm_restore failed"
wait_request_state $fid RESTORE SUCCEED
-
- copytool_cleanup
}
run_test 57 "Archive a file with dirty cache on another node"
test_58() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
echo "truncate to 0"
truncate_released_file /etc/passwd 0
-
- copytool_cleanup
}
run_test 58 "Truncate a released file will trigger restore"
test_59() {
local fid
- local server_version=$(lustre_version_code $SINGLEMDS)
- [[ $server_version -lt $(version_code 2.7.63) ]] &&
- skip "Need MDS version at least 2.7.63" && return
+ [[ $MDS1_VERSION -lt $(version_code 2.7.63) ]] &&
+ skip "Need MDS version at least 2.7.63"
- copytool_setup
+ copytool setup
$MCREATE $DIR/$tfile || error "mcreate failed"
$TRUNCATE $DIR/$tfile 42 || error "truncate failed"
$LFS hsm_archive $DIR/$tfile || error "archive request failed"
fid=$(path2fid $DIR/$tfile)
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $DIR/$tfile || error "release failed"
- copytool_cleanup
}
run_test 59 "Release stripeless file with non-zero size"
# This test validates the fix for LU-4512. Ensure that the -u
# option changes the progress reporting interval from the
# default (30 seconds) to the user-specified interval.
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_file "$f" 1M 10)
+
local interval=5
local progress_timeout=$((interval * 4))
-
- # test needs a new running copytool
- copytool_cleanup
- HSMTOOL_UPDATE_INTERVAL=$interval copytool_setup
-
- mkdir -p $DIR/$tdir
- local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ copytool setup -b 1 --update-interval $interval
local mdtidx=0
local mdt=${MDT_PREFIX}${mdtidx}
error "could not archive file"
local agent=$(facet_active_host $SINGLEAGT)
- local prefix=$TESTLOG_PREFIX
- [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME
- local copytool_log=$prefix.copytool_log.$agent.log
-
+ local logfile=$(copytool_logfile $SINGLEAGT)
wait_update $agent \
- "grep -o start.copy $copytool_log" "start copy" 100 ||
+ "grep -o start.copy \"$logfile\"" "start copy" 100 ||
error "copytool failed to start"
local cmd="$LCTL get_param -n ${mdt}.hsm.active_requests"
echo -n "Expecting a progress update within $progress_timeout seconds... "
while [ true ]; do
RESULT=$(do_node $(facet_active_host $mds) "$cmd")
- if [ $RESULT -gt 0 ]; then
+ if [ -n "$RESULT" ] && [ "$RESULT" -gt 0 ]; then
echo "$RESULT bytes copied in $WAIT seconds."
break
elif [ $WAIT -ge $progress_timeout ]; then
local elapsed=$((finish_at - start_at))
# Ensure that the progress update occurred within the expected window.
- if [ $elapsed -lt $interval ]; then
+ if [ $elapsed -lt $((interval - 1)) ]; then
error "Expected progress update after at least $interval seconds"
fi
- cdt_clear_no_retry
- copytool_cleanup
+ echo "Wait for on going archive hsm action to complete"
+ wait_update $agent "grep -o copied \"$logfile\"" "copied" 10 ||
+ echo "File archiving not completed even after 10 secs"
}
run_test 60 "Changing progress update interval from default"
-test_70() {
- # test needs a new running copytool
- copytool_cleanup
- copytool_monitor_setup
- HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup
-
- # Just start and stop the copytool to generate events.
- cdt_clear_no_retry
+test_61() {
+ # test needs a running copytool
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(copy_file /etc/passwd $f)
+ cdt_disable
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ rm -f $f
+ cdt_enable
+ wait_request_state $fid ARCHIVE FAILED
+}
+run_test 61 "Waiting archive of a removed file should fail"
+
+test_70() {
+ # test needs a new running copytool
+ stack_trap copytool_monitor_cleanup EXIT
+ copytool_monitor_setup
+ copytool setup --event-fifo "$HSMTOOL_MONITOR_DIR/fifo"
# Wait for the copytool to register.
wait_update --verbose $(facet_active_host mds1) \
uuid 100 ||
error "copytool failed to register with MDT0000"
- copytool_cleanup
+ kill_copytools
+ wait_copytools || error "Copytools failed to stop"
local REGISTER_EVENT
local UNREGISTER_EVENT
error "Copytool failed to send unregister event to FIFO"
fi
- copytool_monitor_cleanup
echo "Register/Unregister events look OK."
}
run_test 70 "Copytool logs JSON register/unregister events to FIFO"
local interval=5
# test needs a new running copytool
- copytool_cleanup
+ stack_trap copytool_monitor_cleanup EXIT
copytool_monitor_setup
- HSMTOOL_UPDATE_INTERVAL=$interval \
- HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup
+ copytool setup --update-interval $interval --event-fifo \
+ "$HSMTOOL_MONITOR_DIR/fifo"
+
+ stack_trap "cdt_clear_no_retry" EXIT
+ # Just start and stop the copytool to generate events.
+ cdt_clear_no_retry
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_small_file "$f")
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
error "could not archive file"
local expected_fields="event_time data_fid source_fid"
expected_fields+=" total_bytes current_bytes"
- local START_EVENT
- local FINISH_EVENT
+ local -A events=(
+ [ARCHIVE_START]=false
+ [ARCHIVE_FINISH]=false
+ [ARCHIVE_RUNNING]=false
+ )
while read event; do
# Make sure we're not getting anything from previous events.
for field in $expected_fields; do
fi
eval $parsed
- if [ $event_type == "ARCHIVE_START" ]; then
- START_EVENT=$event
- continue
- elif [ $event_type == "ARCHIVE_FINISH" ]; then
- FINISH_EVENT=$event
- continue
- elif [ $event_type != "ARCHIVE_RUNNING" ]; then
- continue
- fi
+ events["$event_type"]=true
+
+ [ "$event_type" != ARCHIVE_RUNNING ] && continue
# Do some simple checking of the progress update events.
for expected_field in $expected_fields; do
fi
done
- if [ $total_bytes -eq 0 ]; then
- error "Expected total_bytes to be > 0"
- fi
+ [ $total_bytes -gt 0 ] || error "Expected total_bytes to be > 0"
- # These should be identical throughout an archive
- # operation.
- if [ $source_fid != $data_fid ]; then
+ # These should be identical throughout an archive operation
+ [ $source_fid == $data_fid ] ||
error "Expected source_fid to equal data_fid"
- fi
done < <(echo $"$(get_copytool_event_log)")
- if [ -z "$START_EVENT" ]; then
- error "Copytool failed to send archive start event to FIFO"
- fi
-
- if [ -z "$FINISH_EVENT" ]; then
- error "Copytool failed to send archive finish event to FIFO"
- fi
+ # Check we received every type of events we were expecting
+ for event in "${!events[@]}"; do
+ ${events["$event"]} ||
+ error "Copytool failed to send '$event' event to FIFO"
+ done
echo "Archive events look OK."
-
- cdt_clear_no_retry
- copytool_cleanup
- copytool_monitor_cleanup
}
run_test 71 "Copytool logs JSON archive events to FIFO"
local interval=5
# test needs a new running copytool
- copytool_cleanup
+ stack_trap copytool_monitor_cleanup EXIT
copytool_monitor_setup
- HSMTOOL_UPDATE_INTERVAL=$interval \
- HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup
+ copytool setup --update-interval $interval --event-fifo \
+ "$HSMTOOL_MONITOR_DIR/fifo"
local test_file=$HSMTOOL_MONITOR_DIR/file
local cmd="dd if=/dev/urandom of=$test_file count=16 bs=1000000 "
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
f=$DIR2/$tdir/$tfile
echo "Verifying released state: "
check_hsm_flags $f "0x0000000d"
fi
echo "Restore events look OK."
-
- cdt_clear_no_retry
- copytool_cleanup
- copytool_monitor_cleanup
-
- rm -rf $test_dir
}
run_test 72 "Copytool logs JSON restore events to FIFO"
fid=$(copy_file /etc/hosts $f.$i)
echo $f.$i >> $FILELIST
done
- # force copytool to use a local/temp archive dir to ensure best
- # performance vs remote/NFS mounts used in auto-tests
- if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then
- copytool_setup
- else
- local dai=$(get_hsm_param default_archive_id)
- copytool_setup $SINGLEAGT $MOUNT $dai $TMP/$tdir
- fi
+
+ copytool setup
# to be sure wait_all_done will not be mislead by previous tests
cdt_purge
wait_for_grace_delay
$LFS hsm_restore --filelist $FILELIST ||
error "cannot restore a file list"
wait_all_done 100
- copytool_cleanup
}
run_test 90 "Archive/restore a file list"
test_103() {
# test needs a running copytool
- copytool_setup
+ copytool setup
local i=""
local fid=""
grep -v CANCELED | grep -v SUCCEED | grep -v FAILED")
[[ -z "$res" ]] || error "Some request have not been canceled"
-
- copytool_cleanup
}
run_test 103 "Purge all requests"
DATA=CEA
DATAHEX='[434541]'
test_104() {
- # test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
- # if cdt is on, it can serve too quickly the request
- cdt_disable
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f
local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\
$HSM_PARAM.actions |\
grep $fid | cut -f16 -d=")
- cdt_enable
[[ "$data1" == "$DATAHEX" ]] ||
error "Data field in records is ($data1) and not ($DATAHEX)"
- copytool_cleanup
+ cdt_purge
}
run_test 104 "Copy tool data field"
test_105() {
+ local max_requests=$(get_hsm_param max_requests)
mkdir -p $DIR/$tdir
local i=""
+ stack_trap "set_hsm_param max_requests $max_requests" EXIT
+ set_hsm_param max_requests 300
+
cdt_disable
for i in $(seq -w 1 10); do
cp /etc/passwd $DIR/$tdir/$i
$HSM_PARAM.actions |\
grep WAITING | wc -l")
cdt_restart
+
cdt_disable
local reqcnt2=$(do_facet $SINGLEMDS "$LCTL get_param -n\
$HSM_PARAM.actions |\
}
run_test 105 "Restart of coordinator"
-get_agent_by_uuid_mdt() {
- local uuid=$1
- local mdtidx=$2
- local mds=mds$(($mdtidx + 1))
- do_facet $mds "$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.agents |\
- grep $uuid"
-}
-
-check_agent_registered_by_mdt() {
- local uuid=$1
- local mdtidx=$2
- local mds=mds$(($mdtidx + 1))
- local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx)
- if [[ ! -z "$agent" ]]; then
- echo "found agent $agent on $mds"
- else
- error "uuid $uuid not found in agent list on $mds"
- fi
-}
-
-check_agent_unregistered_by_mdt() {
- local uuid=$1
- local mdtidx=$2
- local mds=mds$(($mdtidx + 1))
- local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx)
- if [[ -z "$agent" ]]; then
- echo "uuid not found in agent list on $mds"
- else
- error "uuid found in agent list on $mds: $agent"
- fi
-}
-
-check_agent_registered() {
- local uuid=$1
- local mdsno
- for mdsno in $(seq 1 $MDSCOUNT); do
- check_agent_registered_by_mdt $uuid $((mdsno - 1))
- done
-}
-
-check_agent_unregistered() {
- local uuid=$1
- local mdsno
- for mdsno in $(seq 1 $MDSCOUNT); do
- check_agent_unregistered_by_mdt $uuid $((mdsno - 1))
- done
-}
-
-get_agent_uuid() {
- local agent=${1:-$(facet_active_host $SINGLEAGT)}
-
- # Lustre mount-point is mandatory and last parameter on
- # copytool cmd-line.
- local mntpnt=$(do_rpc_nodes $agent pgrep -fl $HSMTOOL_BASE |
- grep -v pgrep | awk '{print $NF}')
- [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\
- "parameter"
- do_rpc_nodes $agent get_client_uuid $mntpnt | cut -d' ' -f2
-}
-
test_106() {
# test needs a running copytool
- copytool_setup
+ copytool setup
local uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT))
search_copytools || error "No copytool found"
- copytool_cleanup
+ kill_copytools
+ wait_copytools || error "Copytool failed to stop"
+
check_agent_unregistered $uuid
- copytool_setup
+ copytool setup
uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT))
check_agent_registered $uuid
-
- copytool_cleanup
}
run_test 106 "Copytool register/unregister"
test_107() {
+ [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return
+
# test needs a running copytool
- copytool_setup
+ copytool setup
# create and archive file
mkdir -p $DIR/$tdir
local f1=$DIR/$tdir/$tfile
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2
# main check of this sanity: this request MUST succeed
wait_request_state $fid ARCHIVE SUCCEED
- copytool_cleanup
}
run_test 107 "Copytool re-register after MDS restart"
test_110a() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
copy2archive /etc/passwd $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
local fid=$(path2fid $f)
cdt_set_non_blocking_restore
[[ $st == 1 ]] ||
error "md5sum returns $st != 1, "\
"should also perror ENODATA (No data available)"
-
- copytool_cleanup
}
run_test 110a "Non blocking restore policy (import case)"
test_110b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
[[ $st == 1 ]] ||
error "md5sum returns $st != 1, "\
"should also perror ENODATA (No data available)"
-
- copytool_cleanup
}
run_test 110b "Non blocking restore policy (release case)"
test_111a() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
copy2archive /etc/passwd $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
local fid=$(path2fid $f)
cdt_set_no_retry
# Test result
[[ $st == 0 ]] || error "Restore does not failed"
-
- copytool_cleanup
}
run_test 111a "No retry policy (import case), restore will error"\
" (No such file or directory)"
test_111b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/passwd $f)
+ stack_trap cdt_clear_no_retry EXIT
cdt_set_no_retry
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
wait_request_state $fid RESTORE FAILED
local st=$?
- # cleanup
- cdt_clear_no_retry
-
# Test result
[[ $st == 0 ]] || error "Restore does not failed"
-
- copytool_cleanup
}
run_test 111b "No retry policy (release case), restore will error"\
" (No such file or directory)"
test_112() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
echo $l
local res=$(echo $l | cut -f 2- -d" " | grep ARCHIVE)
- # cleanup
cdt_enable
wait_request_state $fid ARCHIVE SUCCEED
# Test result
[[ ! -z "$res" ]] || error "action is $l which is not an ARCHIVE"
-
- copytool_cleanup
}
run_test 112 "State of recorded request"
-test_200() {
- # test needs a running copytool
- copytool_setup
+test_113() {
+ local file1=$DIR/$tdir/$tfile
+ local file2=$DIR2/$tdir/$tfile
- mkdir -p $DIR/$tdir
+ local fid=$(create_small_sync_file $file1)
+
+ stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT
+ zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" ||
+ error "cannot mount '$MOUNT3' on '$SINGLEAGT'"
+
+ copytool setup -m "$MOUNT3"
+
+ do_nodes $(comma_list $(nodes_list)) $LCTL clear
+
+ $LFS hsm_archive $file1 || error "Fail to archive $file1"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ $LFS hsm_release $file1
+ echo "Verifying released state: "
+ check_hsm_flags $file1 "0x0000000d"
+
+ multiop_bg_pause $file1 oO_WRONLY:O_APPEND:_w4c || error "multiop failed"
+ MULTIPID=$!
+ stat $file2 &
+ kill -USR1 $MULTIPID
+
+ wait
+ sync
+
+ local size1=$(stat -c "%s" $file1)
+ local size2=$(stat -c "%s" $file2)
+
+ [ $size1 -eq $size2 ] || error "sizes are different $size1 $size2"
+}
+run_test 113 "wrong stat after restore"
+
+test_200() {
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_cancel $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
+
+ copytool setup
+
+ # Prevent archive from completing
+ copytool_suspend
- # test with cdt on is made in test_221
- cdt_disable
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
# wait archive to register at CDT
- wait_request_state $fid ARCHIVE WAITING
- $LFS hsm_cancel $f
- cdt_enable
+ wait_request_state $fid ARCHIVE STARTED
+
+ # Cancel the archive
+ $LFS hsm_cancel "$f"
+
wait_request_state $fid ARCHIVE CANCELED
- wait_request_state $fid CANCEL SUCCEED
- copytool_cleanup
+ copytool_continue
+ wait_request_state $fid CANCEL SUCCEED
}
run_test 200 "Register/Cancel archive"
test_201() {
# test needs a running copytool
- copytool_setup
+ copytool setup
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- make_archive $tdir/$tfile
- import_file $tdir/$tfile $f
+ create_archive_file $tdir/$tfile
+ copytool import $tdir/$tfile $f
local fid=$(path2fid $f)
# test with cdt on is made in test_222
cdt_enable
wait_request_state $fid RESTORE CANCELED
wait_request_state $fid CANCEL SUCCEED
-
- copytool_cleanup
}
run_test 201 "Register/Cancel restore"
test_202() {
- # test needs a running copytool
- copytool_setup
-
- mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
+
+ # test needs a running copytool
+ copytool setup
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
- cdt_disable
+ copytool_suspend
$LFS hsm_remove $f
# wait remove to register at CDT
- wait_request_state $fid REMOVE WAITING
+ wait_request_state $fid REMOVE STARTED
$LFS hsm_cancel $f
- cdt_enable
- wait_request_state $fid REMOVE CANCELED
- copytool_cleanup
+ wait_request_state $fid REMOVE CANCELED
}
run_test 202 "Register/Cancel remove"
-test_220() {
+test_220A() { # was test_220
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/passwd $f)
- changelog_setup
+ changelog_register
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
- local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
- changelog_cleanup
-
- local target=0x0
- [[ $flags == $target ]] || error "Changelog flag is $flags not $target"
-
- copytool_cleanup
+ changelog_find -type HSM -target-fid $fid -flags 0x0 ||
+ error "The expected changelog was not emitted"
}
-run_test 220 "Changelog for archive"
+run_test 220A "Changelog for archive"
-test_221() {
+test_220a() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_cancel $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(copy_file /etc/passwd $f)
+
+ changelog_register
+
+ # block copytool operations to allow for HSM request to be
+ # submitted and file be unlinked (CDT will find object removed)
+ copytool_suspend
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+
+ # wait request to reach CT
+ wait_request_state $fid ARCHIVE STARTED
+
+ rm -f $f
+
+ copytool_continue
+
+ wait_request_state $fid ARCHIVE FAILED
+
+ # HE_ARCHIVE|ENOENT
+ changelog_find -type HSM -target-fid $fid -flags 0x2 ||
+ error "The expected changelog was not emitted"
+}
+run_test 220a "Changelog for failed archive"
+
+test_221() {
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
- changelog_setup
+ copytool setup -b 1
+ changelog_register
+ # Prevent archive from completing
+ copytool_suspend
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE STARTED
+
$LFS hsm_cancel $f
wait_request_state $fid ARCHIVE CANCELED
- wait_request_state $fid CANCEL SUCCEED
- local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
-
- local target=0x7d
- [[ $flags == $target ]] || error "Changelog flag is $flags not $target"
+ copytool_continue
+ wait_request_state $fid CANCEL SUCCEED
- cleanup
+ changelog_find -type HSM -target-fid $fid -flags 0x7d ||
+ error "The expected changelog was not emitted"
}
run_test 221 "Changelog for archive canceled"
test_222a() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
copy2archive /etc/passwd $tdir/$tfile
local f=$DIR/$tdir/$tfile
- import_file $tdir/$tfile $f
+ copytool import $tdir/$tfile $f
local fid=$(path2fid $f)
- changelog_setup
+ changelog_register
$LFS hsm_restore $f
wait_request_state $fid RESTORE SUCCEED
- local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
-
- local target=0x80
- [[ $flags == $target ]] || error "Changelog flag is $flags not $target"
-
- cleanup
+ changelog_find -type HSM -target-fid $fid -flags 0x80 ||
+ error "The expected changelog was not emitted"
}
run_test 222a "Changelog for explicit restore"
test_222b() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/passwd $f)
- changelog_setup
+ changelog_register
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
wait_request_state $fid RESTORE SUCCEED
- local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
-
- local target=0x80
- [[ $flags == $target ]] || error "Changelog flag is $flags not $target"
-
- cleanup
+ changelog_find -type HSM -target-fid $fid -flags 0x80 ||
+ error "The expected changelog was not emitted"
}
run_test 222b "Changelog for implicit restore"
-test_223a() {
+test_222c() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
+ copy2archive /etc/passwd $tdir/$tfile
local f=$DIR/$tdir/$tfile
- make_archive $tdir/$tfile
+ copytool import $tdir/$tfile $f
+ local fid=$(path2fid $f)
- changelog_setup
+ changelog_register
- import_file $tdir/$tfile $f
- local fid=$(path2fid $f)
+ # block copytool operations to allow for HSM request to be
+ # submitted and file be unlinked (CDT will find object removed)
+ copytool_suspend
$LFS hsm_restore $f
+
+ # wait request to reach CT
wait_request_state $fid RESTORE STARTED
- $LFS hsm_cancel $f
- wait_request_state $fid RESTORE CANCELED
- wait_request_state $fid CANCEL SUCCEED
- local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
+ rm -f $f
- local target=0xfd
- [[ $flags == $target ]] ||
- error "Changelog flag is $flags not $target"
+ copytool_continue
- cleanup
+ wait_request_state $fid RESTORE FAILED
+
+ # HE_RESTORE|ENOENT
+ changelog_find -type HSM -target-fid $fid -flags 0x82 ||
+ error "The expected changelog was not emitted"
}
-run_test 223a "Changelog for restore canceled (import case)"
+run_test 222c "Changelog for failed explicit restore"
-test_223b() {
+test_222d() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
-
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(copy_file /etc/passwd $f)
- changelog_setup
+ changelog_register
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+
+ copytool_remove_backend $fid
+ md5sum $f
+
+ wait_request_state $fid RESTORE FAILED
+
+ # HE_RESTORE|ENOENT
+ changelog_find -type HSM -target-fid $fid -flags 0x82 ||
+ error "The expected changelog was not emitted"
+}
+run_test 222d "Changelog for failed implicit restore"
+
+test_223a() {
+ # test needs a running copytool
+ copytool setup -b 1
+
+ local f=$DIR/$tdir/$tfile
+ create_archive_file $tdir/$tfile
+
+ changelog_register
+
+ copytool import $tdir/$tfile $f
+ local fid=$(path2fid $f)
+
$LFS hsm_restore $f
wait_request_state $fid RESTORE STARTED
$LFS hsm_cancel $f
wait_request_state $fid RESTORE CANCELED
wait_request_state $fid CANCEL SUCCEED
- local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1)
+ changelog_find -type HSM -target-fid $fid -flags 0xfd ||
+ error "The expected changelog was not emitted"
+}
+run_test 223a "Changelog for restore canceled (import case)"
- local target=0xfd
- [[ $flags == $target ]] ||
- error "Changelog flag is $flags not $target"
+test_223b() {
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
+
+ copytool setup -b 1
+ changelog_register
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+ $LFS hsm_release $f
+
+ # Prevent restore from completing
+ copytool_suspend
+ $LFS hsm_restore $f
+ wait_request_state $fid RESTORE STARTED
+
+ $LFS hsm_cancel $f
+ wait_request_state $fid RESTORE CANCELED
+
+ copytool_continue
+ wait_request_state $fid CANCEL SUCCEED
- cleanup
+ changelog_find -type HSM -target-fid $fid -flags 0xfd ||
+ error "The expected changelog was not emitted"
}
run_test 223b "Changelog for restore canceled (release case)"
-test_224() {
+test_224A() { # was test_224
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/passwd $f)
- changelog_setup
+ changelog_register
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_remove $f
wait_request_state $fid REMOVE SUCCEED
- local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1)
+ changelog_find -type HSM -target-fid $fid -flags 0x200 ||
+ error "The expected changelog was not emitted"
+}
+run_test 224A "Changelog for remove"
+
+test_224a() {
+ # test needs a running copytool
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+ local fid=$(copy_file /etc/passwd $f)
+
+ changelog_register
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ copytool_remove_backend $fid
+
+ # block copytool operations to allow for HSM request to be
+ # submitted and file be unlinked (CDT will find object removed)
+ copytool_suspend
- local target=0x200
- [[ $flags == $target ]] ||
- error "Changelog flag is $flags not $target"
+ $LFS hsm_remove $f
+
+ # wait for request to reach CT
+ wait_request_state $fid REMOVE STARTED
+
+ rm -f $f
- cleanup
+ copytool_continue
+
+ wait_request_state $fid REMOVE FAILED
+
+ # HE_REMOVE|ENOENT=0x202
+ changelog_find -type HSM -target-fid $fid -flags 0x202 ||
+ error "The expected changelog was not emitted"
}
-run_test 224 "Changelog for remove"
+run_test 224a "Changelog for failed remove"
test_225() {
- # test needs a running copytool
- copytool_setup
-
# test is not usable because remove request is too fast
# so it is always finished before cancel can be done ...
echo "Test disabled"
- copytool_cleanup
return 0
- mkdir -p $DIR/$tdir
+ # test needs a running copytool
+ copytool setup
+
local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_progress $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ local fid=$(create_empty_file "$f")
- changelog_setup
+ changelog_register
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
wait_request_state $fid ARCHIVE SUCCEED
- # if cdt is on, it can serve too quickly the request
- cdt_disable
+ # Prevent restore from completing
+ copytool_suspend
$LFS hsm_remove $f
+
$LFS hsm_cancel $f
- cdt_enable
wait_request_state $fid REMOVE CANCELED
- wait_request_state $fid CANCEL SUCCEED
- flags=$(changelog_get_flags ${MDT[0]} RENME $fid2)
- local flags=$($LFS changelog ${MDT[0]} | grep HSM | grep $fid |
- tail -n 1 | awk '{print $5}')
-
- local target=0x27d
- [[ $flags == $target ]] ||
- error "Changelog flag is $flags not $target"
+ copytool_continue
+ wait_request_state $fid CANCEL SUCCEED
- cleanup
+ changelog_find -type HSM -target-fid $fid -flags 0x27d
+ error "The expected changelog was not emitted"
}
run_test 225 "Changelog for remove canceled"
test_226() {
# test needs a running copytool
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local fid2=$(copy_file /etc/passwd $f2)
copy_file /etc/passwd $f3
- changelog_setup
+ changelog_register
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1
wait_request_state $fid1 ARCHIVE SUCCEED
rm $f1 || error "rm $f1 failed"
- local flags=$(changelog_get_flags ${MDT[0]} UNLNK $fid1)
-
- local target=0x3
- [[ $flags == $target ]] ||
- error "Changelog flag is $flags not $target"
+ changelog_dump
+ changelog_find -type UNLNK -target-fid $fid1 -flags 0x3 ||
+ error "The expected changelog was not emitted"
mv $f3 $f2 || error "mv $f3 $f2 failed"
- flags=$(changelog_get_flags ${MDT[0]} RENME $fid2)
-
- target=0x3
- [[ $flags == $target ]] ||
- error "Changelog flag is $flags not $target"
-
- cleanup
+ changelog_find -type RENME -target-fid $fid2 -flags 0x3 ||
+ error "The expected changelog was not emitted"
}
run_test 226 "changelog for last rm/mv with exiting archive"
-check_flags_changes() {
- local f=$1
- local fid=$2
- local hsm_flag=$3
- local fst=$4
- local cnt=$5
-
+# This is just a utility function to clarify what test_227 does
+__test_227()
+{
local target=0x280
- $LFS hsm_set --$hsm_flag $f ||
- error "Cannot set $hsm_flag on $f"
- local flags=($(changelog_get_flags ${MDT[0]} HSM $fid))
- local seen=${#flags[*]}
- cnt=$((fst + cnt))
- [[ $seen == $cnt ]] ||
- error "set $hsm_flag: Changelog events $seen != $cnt"
- [[ ${flags[$((cnt - 1))]} == $target ]] ||
- error "set $hsm_flag: Changelog flags are "\
- "${flags[$((cnt - 1))]} not $target"
-
- $LFS hsm_clear --$hsm_flag $f ||
- error "Cannot clear $hsm_flag on $f"
- flags=($(changelog_get_flags ${MDT[0]} HSM $fid))
- seen=${#flags[*]}
- cnt=$(($cnt + 1))
- [[ $cnt == $seen ]] ||
- error "clear $hsm_flag: Changelog events $seen != $cnt"
-
- [[ ${flags[$((cnt - 1))]} == $target ]] ||
- error "clear $hsm_flag: Changelog flag is "\
- "${flags[$((cnt - 1))]} not $target"
+
+ "$LFS" "$action" --$flag "$file" ||
+ error "Cannot ${action#hsm_} $flag on '$file'"
+
+ # Only one changelog should be produced
+ local entries="$(changelog_find -type HSM -target-fid $fid)"
+ [ $(wc -l <<< "$entries") -eq $((++count)) ] ||
+ error "lfs $action --$flag '$file' produced more than one" \
+ "changelog record"
+
+ # Parse the last changelog record
+ local entry="$(tail -n 1 <<< "$entries")"
+ eval local -A changelog=$(changelog2array $entry)
+
+ # Also check the flags match what is expected
+ [[ ${changelog[flags]} == $target ]] ||
+ error "Changelog flag is '${changelog[flags]}', not $target"
}
test_227() {
+ local file="$DIR/$tdir/$tfile"
+ local fid=$(create_empty_file "$file")
+ local count=0
+
+ changelog_register
+
+ for flag in norelease noarchive exists archived lost; do
+ if [ "$flag" == lost ]; then
+ # The flag "lost" only works on an archived file
+ "$LFS" hsm_set --archived "$file"
+ ((count++))
+ fi
+
+ action="hsm_set" __test_227
+ action="hsm_clear" __test_227
+ done
+}
+run_test 227 "changelog when explicit setting of HSM flags"
+
+test_228() {
# test needs a running copytool
- copytool_setup
- changelog_setup
+ copytool setup
+
+ local fid=$(create_small_sync_file $DIR/$tfile)
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ $LFS hsm_release $DIR/$tfile
+ check_hsm_flags $DIR/$tfile "0x0000000d"
+
+ filefrag $DIR/$tfile | grep " 1 extent found" ||
+ error "filefrag on released file must return only one extent"
+
+ # only newer versions of cp detect sparse files by stat/FIEMAP
+ # (LU-2580)
+ cp --sparse=auto $DIR/$tfile $DIR/$tfile.2 ||
+ error "copying $DIR/$tfile"
+ cmp $DIR/$tfile $DIR/$tfile.2 || error "comparing copied $DIR/$tfile"
+
+ $LFS hsm_release $DIR/$tfile
+ check_hsm_flags $DIR/$tfile "0x0000000d"
+
+ mkdir -p $DIR/$tdir || error "mkdir $tdir failed"
+
+ tar cf - --sparse $DIR/$tfile | tar xvf - -C $DIR/$tdir ||
+ error "tar failed"
+ cmp $DIR/$tfile $DIR/$tdir/$DIR/$tfile ||
+ error "comparing untarred $DIR/$tfile"
+
+ rm -f $DIR/$tfile $DIR/$tfile.2 ||
+ error "rm $DIR/$tfile or $DIR/$tfile.2 failed"
+}
+run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse"
+
+test_250() {
+ local file="$DIR/$tdir/$tfile"
+
+ # set max_requests to allow one request of each type to be started (3)
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+ # speed up test
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
+
+ # send 1 requests of each kind twice
+ copytool setup
+ # setup the files
+ for action in archive restore remove; do
+ local filepath="$file"-to-$action
+ local fid=$(create_empty_file "$filepath")
+ local fid2=$(create_empty_file "$filepath".bis)
+
+ if [ "$action" != archive ]; then
+ "$LFS" hsm_archive "$filepath"
+ wait_request_state $fid ARCHIVE SUCCEED
+ "$LFS" hsm_archive "$filepath".bis
+ wait_request_state $fid2 ARCHIVE SUCCEED
+ fi
+ if [ "$action" == restore ]; then
+ "$LFS" hsm_release "$filepath"
+ "$LFS" hsm_release "$filepath".bis
+ fi
+ done
+
+ # suspend the copytool to prevent requests from completing
+ stack_trap "copytool_continue" EXIT
+ copytool_suspend
+
+ # send `max_requests' requests (one of each kind)
+ for action in archive restore remove; do
+ filepath="$file"-to-$action
+ "$LFS" hsm_${action} "$filepath"
+ wait_request_state $(path2fid "$filepath") "${action^^}" STARTED
+ done
+
+ # send another batch of requests
+ for action in archive restore remove; do
+ "$LFS" hsm_${action} "$file-to-$action".bis
+ done
+ # wait for `loop_period' seconds to make sure the coordinator has time
+ # to register those, even though it should not
+ sleep 1
+
+ # only the first batch of request should be started
+ local -i count
+ count=$(do_facet $SINGLEMDS "$LCTL" get_param -n $HSM_PARAM.actions |
+ grep -c STARTED)
+
+ ((count == 3)) ||
+ error "expected 3 STARTED requests, found $count"
+}
+run_test 250 "Coordinator max request"
+
+test_251() {
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
+
+ cdt_disable
+ # to have a short test
+ local old_to=$(get_hsm_param active_request_timeout)
+ set_hsm_param active_request_timeout 1
+ # to be sure the cdt will wake up frequently so
+ # it will be able to cancel the "old" request
+ local old_loop=$(get_hsm_param loop_period)
+ set_hsm_param loop_period 1
+ cdt_enable
+
+ copytool setup
+
+ # Prevent archive from completing
+ copytool_suspend
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE STARTED
+
+ # Let the request timeout
+ wait_request_state $fid ARCHIVE CANCELED
+
+ set_hsm_param active_request_timeout $old_to
+ set_hsm_param loop_period $old_loop
+}
+run_test 251 "Coordinator request timeout"
+
+test_252() {
+ local f=$DIR/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
+
+ # to have a short test
+ stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
+
+ copytool setup
+
+ # Prevent archive from completing
+ copytool_suspend
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE STARTED
+ rm -f "$f"
+
+ stack_trap "set_hsm_param active_request_timeout \
+ $(get_hsm_param active_request_timeout)" EXIT
+ set_hsm_param active_request_timeout 1
+
+ wait_request_state $fid ARCHIVE CANCELED
+ copytool_continue
+}
+run_test 252 "Timeout'ed running archive of a removed file should be canceled"
+
+test_253() {
+ local rc
+ # test needs a running copytool
+ copytool setup
mkdir -p $DIR/$tdir
- typeset -a flags
+ local f=$DIR/$tdir/$tfile
+
+ dd if=/dev/zero of=$f bs=1MB count=10
+ local fid=$(path2fid $f)
+
+ $LFS hsm_archive $f || error "could not archive file"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ # clear locks to discard inode data
+ cancel_lru_locks osc
+
+ #define OBD_FAIL_MDC_MERGE 0x807
+ $LCTL set_param fail_loc=0x807
+
+ #expect error here, instead of release with wrong size
+ $LFS hsm_release $f
+ rc=$?
+ if ((rc == 0)); then
+ file_size=$(stat -c '%s' $f)
+ if ((file_size != 10485760)); then
+ error "Wrong file size after hsm_release"
+ fi
+ else
+ echo "could not release file"
+ fi
+}
+run_test 253 "Check for wrong file size after release"
+
+test_254a()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.10.56) ] &&
+ skip "need MDS version at least 2.10.56"
+
+ # Check that the counters are initialized to 0
+ local count
+ for request_type in archive restore remove; do
+ count="$(get_hsm_param ${request_type}_count)" ||
+ error "Reading ${request_type}_count failed with $?"
+
+ [ "$count" -eq 0 ] ||
+ error "Expected ${request_type}_count to be " \
+ "0 != '$count'"
+ done
+}
+run_test 254a "Request counters are initialized to zero"
+
+test_254b()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.10.56) ] &&
+ skip "need MDS version at least 2.10.56"
+
+ # The number of request to launch (at least 32)
+ local request_count=$((RANDOM % 32 + 32))
+ printf "Will launch %i requests of each type\n" "$request_count"
+
+ # Launch a copytool to process requests
+ copytool setup
+
+ # Set hsm.max_requests to allow starting all requests at the same time
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests "$request_count"
+
+ local timeout
+ local count
+ for request_type in archive restore remove; do
+ printf "Checking %s requests\n" "${request_type}"
+ # Suspend the copytool to give us time to read the proc files
+ copytool_suspend
+
+ for ((i = 0; i < $request_count; i++)); do
+ case $request_type in
+ archive)
+ create_empty_file "$DIR/$tdir/$tfile-$i" \
+ >/dev/null 2>&1
+ ;;
+ restore)
+ lfs hsm_release "$DIR/$tdir/$tfile-$i"
+ ;;
+ esac
+ $LFS hsm_${request_type} "$DIR/$tdir/$tfile-$i"
+ done
+
+ # Give the coordinator 10 seconds to start every request
+ timeout=10
+ while get_hsm_param actions | grep -q WAITING; do
+ sleep 1
+ let timeout-=1
+ [ $timeout -gt 0 ] ||
+ error "${request_type^} requests took too " \
+ "long to start"
+ done
+
+ count="$(get_hsm_param ${request_type}_count)"
+ [ "$count" -eq "$request_count" ] ||
+ error "Expected '$request_count' (!= '$count') " \
+ "active $request_type requests"
+
+ # Let the copytool process the requests
+ copytool_continue
+ # Give it 10 seconds maximum
+ timeout=10
+ while get_hsm_param actions | grep -q STARTED; do
+ sleep 1
+ let timeout-=1
+ [ $timeout -gt 0 ] ||
+ error "${request_type^} requests took too " \
+ "long to complete"
+ done
+
+ count="$(get_hsm_param ${request_type}_count)"
+ [ "$count" -eq 0 ] ||
+ error "Expected 0 (!= '$count') " \
+ "active $request_type requests"
+ done
+}
+run_test 254b "Request counters are correctly incremented and decremented"
+
+test_255()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.12.0) ] &&
+ skip "Need MDS version at least 2.12.0"
+
+ local file="$DIR/$tdir/$tfile"
+ local fid=$(create_empty_file "$file")
+
+ # How do you make sure the coordinator has consumed any outstanding
+ # event, without triggering an event yourself?
+ #
+ # You wait for a request to disappear from the coordinator's llog.
+
+ # Warning: the setup represents 90% of this test
+
+ # Create and process an HSM request
+ copytool setup
+ "$LFS" hsm_archive "$file"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ kill_copytools
+ wait_copytools || error "failed to stop copytools"
+
+ # Launch a new HSM request
+ rm "$file"
+ create_empty_file "$file"
+ "$LFS" hsm_archive "$file"
+
+ cdt_shutdown
+
+ # Have the completed request be removed as soon as the cdt wakes up
+ stack_trap "set_hsm_param grace_delay $(get_hsm_param grace_delay)" EXIT
+ set_hsm_param grace_delay 1
+ # (Hopefully, time on the MDS will behave nicely)
+ do_facet $SINGLEMDS sleep 2 &
+
+ # Increase `loop_period' as a mean to prevent the coordinator from
+ # waking itself up to do some housekeeping.
+ stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1000
+
+ wait $! || error "waiting failed"
+ cdt_enable
+ wait_request_state $fid ARCHIVE ""
+ # The coordinator will not wake up on its own for ~`loop_period' secs...
+
+ # ... Unless a copytool registers. Now the real test begins
+ copytool setup
+ wait_request_state $(path2fid "$file") ARCHIVE SUCCEED
+}
+run_test 255 "Copytool registration wakes the coordinator up"
+
+# tests 260[a-c] rely on the parsing of the copytool's log file, they might
+# break in the future because of that.
+test_260a()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.11.56) ] &&
+ skip "need MDS version 2.11.56 or later"
+
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
+
+ for file in "${files[@]}"; do
+ create_small_file "$file"
+ done
+
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup
+ "$LFS" hsm_archive "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
+
+ # Send several archive requests
+ for file in "${files[@]:1}"; do
+ "$LFS" hsm_archive "$file"
+ done
+
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
+
+ # Launch a copytool
+ copytool setup
+
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:1}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
+
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
+
+ printf '%s\n' "${actions[@]}"
- for i in norelease noarchive exists archived
- do
- local f=$DIR/$tdir/$tfile-$i
- local fid=$(copy_file /etc/passwd $f)
- check_flags_changes $f $fid $i 0 1
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] && return
done
- f=$DIR/$tdir/$tfile---lost
- fid=$(copy_file /etc/passwd $f)
- $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
- wait_request_state $fid ARCHIVE SUCCEED
- check_flags_changes $f $fid lost 3 1
-
- cleanup
+ error "Too many ARCHIVE requests were run before the RESTORE request"
}
-run_test 227 "changelog when explicit setting of HSM flags"
+run_test 260a "Restore request have priority over other requests"
-test_228() {
- # test needs a running copytool
- copytool_setup
+# This test is very much tied to the implementation of the current priorisation
+# mechanism in the coordinator. It might not make sense to keep it in the future
+test_260b()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.11.56) ] &&
+ skip "need MDS version 2.11.56 or later"
- local fid=$(make_small_sync $DIR/$tfile)
- $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile
- wait_request_state $fid ARCHIVE SUCCEED
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
- $LFS hsm_release $DIR/$tfile
- check_hsm_flags $DIR/$tfile "0x0000000d"
+ for file in "${files[@]}"; do
+ create_small_file "$file"
+ done
- filefrag $DIR/$tfile | grep " 1 extent found" ||
- error "filefrag on released file must return only one extent"
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup --archive-id 2
+ "$LFS" hsm_archive --archive 2 "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
- # only newer versions of cp detect sparse files by stat/FIEMAP
- # (LU-2580)
- cp --sparse=auto $DIR/$tfile $DIR/$tfile.2 ||
- error "copying $DIR/$tfile"
- cmp $DIR/$tfile $DIR/$tfile.2 || error "comparing copied $DIR/$tfile"
+ # Send several archive requests
+ for file in "${files[@]:1}"; do
+ "$LFS" hsm_archive "$file"
+ done
- $LFS hsm_release $DIR/$tfile
- check_hsm_flags $DIR/$tfile "0x0000000d"
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
- mkdir -p $DIR/$tdir || error "mkdir $tdir failed"
+ # Launch a copytool
+ copytool setup
+ copytool setup --archive-id 2
- tar cf - --sparse $DIR/$tfile | tar xvf - -C $DIR/$tdir ||
- error "tar failed"
- cmp $DIR/$tfile $DIR/$tdir/$DIR/$tfile ||
- error "comparing untarred $DIR/$tfile"
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:1}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
- rm -f $DIR/$tfile $DIR/$tfile.2 ||
- error "rm $DIR/$tfile or $DIR/$tfile.2 failed"
- copytool_cleanup
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
+
+ printf '%s\n' "${actions[@]}"
+
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] && return
+ done
+
+ error "Too many ARCHIVE requests were run before the RESTORE request"
}
-run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse"
+run_test 260b "Restore request have priority over other requests"
-test_250() {
- # test needs a running copytool
- copytool_setup
+# This test is very much tied to the implementation of the current priorisation
+# mechanism in the coordinator. It might not make sense to keep it in the future
+test_260c()
+{
+ [ $MDS1_VERSION -lt $(version_code 2.12.0) ] &&
+ skip "Need MDS version at least 2.12.0"
- mkdir -p $DIR/$tdir
- local maxrequest=$(get_hsm_param max_requests)
- local rqcnt=$(($maxrequest * 3))
- local i=""
+ local -a files=("$DIR/$tdir/$tfile".{0..15})
+ local file
- cdt_disable
- for i in $(seq -w 1 $rqcnt); do
- rm -f $DIR/$tdir/$i
- dd if=/dev/urandom of=$DIR/$tdir/$i bs=1M count=10 conv=fsync
- done
- # we do it in 2 steps, so all requests arrive at the same time
- for i in $(seq -w 1 $rqcnt); do
- $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$i
+ for file in "${files[@]}"; do
+ create_small_file "$file"
done
+
+ # Set a few hsm parameters
+ stack_trap \
+ "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT
+ set_hsm_param loop_period 1000
+ stack_trap \
+ "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT
+ set_hsm_param max_requests 3
+
+ # Release one file
+ copytool setup --archive-id 2
+ "$LFS" hsm_archive --archive 2 "${files[0]}"
+ wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED
+ "$LFS" hsm_release "${files[0]}"
+
+ # Stop the copytool
+ kill_copytools
+ wait_copytools || error "copytools failed to stop"
+
+ # Force the next coordinator run to do housekeeping
+ cdt_shutdown
cdt_enable
- local cnt=$rqcnt
- local wt=$rqcnt
- while [[ $cnt != 0 || $wt != 0 ]]; do
- sleep 1
- cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\
- $HSM_PARAM.actions |\
- grep STARTED | grep -v CANCEL | wc -l")
- [[ $cnt -le $maxrequest ]] ||
- error "$cnt > $maxrequest too many started requests"
- wt=$(do_facet $SINGLEMDS "$LCTL get_param\
- $HSM_PARAM.actions |\
- grep WAITING | wc -l")
- echo "max=$maxrequest started=$cnt waiting=$wt"
- done
- copytool_cleanup
-}
-run_test 250 "Coordinator max request"
+ "$LFS" hsm_archive "${files[1]}"
-test_251() {
- # test needs a running copytool
- copytool_setup
+ # Launch a copytool
+ copytool setup
+ copytool setup --archive-id 2
- mkdir -p $DIR/$tdir
- local f=$DIR/$tdir/$tfile
- local fid
- fid=$(make_large_for_cancel $f)
- [ $? != 0 ] && skip "not enough free space" && return
+ wait_request_state "$(path2fid "${files[1]}")" ARCHIVE SUCCEED
+ # The coordinator just did a housekeeping run it won't do another one
+ # for around `loop_period' seconds => requests will not be reordered
+ # if it costs too much (ie. when the coordinator has to discard a whole
+ # hal)
- cdt_disable
- # to have a short test
- local old_to=$(get_hsm_param active_request_timeout)
- set_hsm_param active_request_timeout 4
- # to be sure the cdt will wake up frequently so
- # it will be able to cancel the "old" request
- local old_loop=$(get_hsm_param loop_period)
- set_hsm_param loop_period 2
- cdt_enable
+ # Send several archive requests
+ for file in "${files[@]:2}"; do
+ "$LFS" hsm_archive "$file"
+ done
- # clear locks to avoid extra delay caused by flush/cancel
- # and thus prevent early copytool death to timeout.
- cancel_lru_locks osc
+ # Send one restore request
+ "$LFS" hsm_restore "${files[0]}"
- $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
- wait_request_state $fid ARCHIVE STARTED
- sleep 5
- wait_request_state $fid ARCHIVE CANCELED
+ # Wait for all the requests to complete
+ wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED
+ for file in "${files[@]:2}"; do
+ wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED
+ done
- set_hsm_param active_request_timeout $old_to
- set_hsm_param loop_period $old_loop
+ # Collect the actions in the order in which the copytool processed them
+ local -a actions=(
+ $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \
+ "$(copytool_logfile "$SINGLEAGT")")
+ )
- copytool_cleanup
+ printf '%s\n' "${actions[@]}"
+
+ local action
+ for action in "${actions[@]:0:3}"; do
+ [ "$action" == RESTORE ] &&
+ error "Restore requests should not be prioritised" \
+ "unless the coordinator is doing housekeeping"
+ done
+ return 0
}
-run_test 251 "Coordinator request timeout"
+run_test 260c "Requests are not reordered on the 'hot' path of the coordinator"
test_300() {
+ [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return
+
# the only way to test ondisk conf is to restart MDS ...
echo "Stop coordinator and remove coordinator state at mount"
# stop coordinator
run_test 300 "On disk coordinator state kept between MDT umount/mount"
test_301() {
+ [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return
+
local ai=$(get_hsm_param default_archive_id)
local new=$((ai + 1))
run_test 301 "HSM tunnable are persistent"
test_302() {
+ [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return
+
local ai=$(get_hsm_param default_archive_id)
local new=$((ai + 1))
test_400() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local dir_mdt1=$DIR/$tdir/mdt1
# create 1 dir per MDT
+ stack_trap "rm -rf $dir_mdt0" EXIT
$LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir"
+ stack_trap "rm -rf $dir_mdt1" EXIT
$LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir"
# create 1 file in each MDT
- local fid1=$(make_small $dir_mdt0/$tfile)
- local fid2=$(make_small $dir_mdt1/$tfile)
+ local fid1=$(create_small_file $dir_mdt0/$tfile)
+ local fid2=$(create_small_file $dir_mdt1/$tfile)
# check that hsm request on mdt0 is sent to the right MDS
$LFS hsm_archive $dir_mdt0/$tfile || error "lfs hsm_archive"
$LFS hsm_archive $dir_mdt1/$tfile || error "lfs hsm_archive"
wait_request_state $fid2 ARCHIVE SUCCEED 1 &&
echo "archive successful on mdt1"
-
- copytool_cleanup
- # clean test files and directories
- rm -rf $dir_mdt0 $dir_mdt1
}
run_test 400 "Single request is sent to the right MDT"
test_401() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
local dir_mdt1=$DIR/$tdir/mdt1
# create 1 dir per MDT
+ stack_trap "rm -rf $dir_mdt0" EXIT
$LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir"
+ stack_trap "rm -rf $dir_mdt1" EXIT
$LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir"
# create 1 file in each MDT
- local fid1=$(make_small $dir_mdt0/$tfile)
- local fid2=$(make_small $dir_mdt1/$tfile)
+ local fid1=$(create_small_file $dir_mdt0/$tfile)
+ local fid2=$(create_small_file $dir_mdt1/$tfile)
# check that compound requests are shunt to the rights MDTs
$LFS hsm_archive $dir_mdt0/$tfile $dir_mdt1/$tfile ||
echo "archive successful on mdt0"
wait_request_state $fid2 ARCHIVE SUCCEED 1 &&
echo "archive successful on mdt1"
-
- copytool_cleanup
- # clean test files and directories
- rm -rf $dir_mdt0 $dir_mdt1
}
run_test 401 "Compound requests split and sent to their respective MDTs"
done
}
-test_402() {
- # make sure there is no running copytool
- copytool_cleanup
-
+test_402a() {
# deactivate all mdc on agent1
mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "deactivate"
- HSMTOOL_NOERROR=true copytool_setup $SINGLEAGT
+ copytool setup --no-fail
check_agent_unregistered "uuid" # match any agent
# reactivate MDCs
mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "activate"
}
-run_test 402 "Copytool start fails if all MDTs are inactive"
+run_test 402a "Copytool start fails if all MDTs are inactive"
+
+test_402b() {
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+ touch $f || error "touch $f failed"
+ local fid=$(path2fid $f)
+
+#define OBD_FAIL_MDS_HSM_CT_REGISTER_NET 0x14d
+ do_facet $SINGLEAGT lctl set_param fail_loc=0x14d
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+
+ # give time for CDT to send request and to keep it for retry
+ wait_for_loop_period
+
+ wait_request_state $fid ARCHIVE WAITING
+
+ do_facet $SINGLEAGT lctl set_param fail_loc=0
+
+ # request should succeed now
+ wait_request_state $fid ARCHIVE SUCCEED
+}
+run_test 402b "CDT must retry request upon slow start of CT"
test_403() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
- # make sure there is no running copytool
- copytool_cleanup
-
local agent=$(facet_active_host $SINGLEAGT)
# deactivate all mdc for MDT0001
mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate"
- copytool_setup
+ copytool setup
local uuid=$(get_agent_uuid $agent)
# check the agent is registered on MDT0000, and not on MDT0001
check_agent_registered_by_mdt $uuid 0
# make sure the copytool is now registered to all MDTs
check_agent_registered $uuid
-
- copytool_cleanup
}
run_test 403 "Copytool starts with inactive MDT and register on reconnect"
test_404() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
- copytool_setup
+ copytool setup
# create files on both MDT0000 and MDT0001
mkdir -p $DIR/$tdir
local dir_mdt0=$DIR/$tdir/mdt0
+ stack_trap "rm -rf $dir_mdt0" EXIT
$LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir"
# create 1 file on mdt0
- local fid1=$(make_small $dir_mdt0/$tfile)
+ local fid1=$(create_small_file $dir_mdt0/$tfile)
# deactivate all mdc for MDT0001
mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate"
# reactivate all mdc for MDT0001
mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "activate"
-
- copytool_cleanup
- # clean test files and directories
- rm -rf $dir_mdt0
}
run_test 404 "Inactive MDT does not block requests for active MDTs"
test_405() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
- copytool_setup
+ copytool setup
mkdir -p $DIR/$tdir
# create striped dir on all of MDTs
$LFS mkdir -i 0 -c $MDSCOUNT $striped_dir || error "lfs mkdir"
- local fid1=$(make_small_sync $striped_dir/${tfile}_0)
- local fid2=$(make_small_sync $striped_dir/${tfile}_1)
- local fid3=$(make_small_sync $striped_dir/${tfile}_2)
- local fid4=$(make_small_sync $striped_dir/${tfile}_3)
+ local fid1=$(create_small_sync_file $striped_dir/${tfile}_0)
+ local fid2=$(create_small_sync_file $striped_dir/${tfile}_1)
+ local fid3=$(create_small_sync_file $striped_dir/${tfile}_2)
+ local fid4=$(create_small_sync_file $striped_dir/${tfile}_3)
- local idx1=$($LFS getstripe -M $striped_dir/${tfile}_0)
- local idx2=$($LFS getstripe -M $striped_dir/${tfile}_1)
- local idx3=$($LFS getstripe -M $striped_dir/${tfile}_2)
- local idx4=$($LFS getstripe -M $striped_dir/${tfile}_3)
+ local idx1=$($LFS getstripe -m $striped_dir/${tfile}_0)
+ local idx2=$($LFS getstripe -m $striped_dir/${tfile}_1)
+ local idx3=$($LFS getstripe -m $striped_dir/${tfile}_2)
+ local idx4=$($LFS getstripe -m $striped_dir/${tfile}_3)
# check that compound requests are shunt to the rights MDTs
$LFS hsm_archive $striped_dir/${tfile}_0 $striped_dir/${tfile}_1 \
cat $striped_dir/${tfile}_1 > /dev/null || error "cat ${tfile}_1 failed"
cat $striped_dir/${tfile}_2 > /dev/null || error "cat ${tfile}_2 failed"
cat $striped_dir/${tfile}_3 > /dev/null || error "cat ${tfile}_3 failed"
-
- copytool_cleanup
}
run_test 405 "archive and release under striped directory"
test_406() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] &&
- skip "need MDS version at least 2.7.64" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.7.64) ] &&
+ skip "need MDS version at least 2.7.64"
local fid
local mdt_index
- copytool_setup
- mkdir -p $DIR/$tdir
- fid=$(make_small $DIR/$tdir/$tfile)
+ fid=$(create_small_file $DIR/$tdir/$tfile)
echo "old fid $fid"
+ copytool setup
+
$LFS hsm_archive $DIR/$tdir/$tfile
wait_request_state "$fid" ARCHIVE SUCCEED
$LFS hsm_release $DIR/$tdir/$tfile
# Should migrate $tdir but not $tfile.
- $LFS mv -M1 $DIR/$tdir &&
+ $LFS migrate -m1 $DIR/$tdir &&
error "migrating HSM an archived file should fail"
$LFS hsm_restore $DIR/$tdir/$tfile
cat $DIR/$tdir/$tfile > /dev/null ||
error "cannot read $DIR/$tdir/$tfile"
- $LFS mv -M1 $DIR/$tdir ||
+ $LFS migrate -m1 $DIR/$tdir ||
error "cannot complete migration after HSM remove"
- mdt_index=$($LFS getstripe -M $DIR/$tdir)
+ mdt_index=$($LFS getstripe -m $DIR/$tdir)
if ((mdt_index != 1)); then
error "expected MDT index 1, got $mdt_index"
fi
cat $DIR/$tdir/$tfile > /dev/null ||
error "cannot read $DIR/$tdir/$tfile"
-
- copytool_cleanup
}
run_test 406 "attempting to migrate HSM archived files is safe"
+test_407() {
+ local f=$DIR/$tdir/$tfile
+ local f2=$DIR2/$tdir/$tfile
+ local fid=$(create_empty_file "$f")
+
+ copytool setup
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+ $LFS hsm_release $f
+
+#define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164
+ do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x164
+
+ # Prevent restore from completing
+ copytool_suspend
+
+ md5sum $f &
+ # 1st request holds layout lock while appropriate
+ # RESTORE record is still not added to llog
+ md5sum $f2 &
+ sleep 2
+
+ # after umount hsm_actions->O/x/x log shouldn't have
+ # double RESTORE records like below
+ #[0x200000401:0x1:0x0]...0x58d03a0d/0x58d03a0c action=RESTORE...WAITING
+ #[0x200000401:0x1:0x0]...0x58d03a0c/0x58d03a0d action=RESTORE...WAITING
+ sleep 30 &&
+ do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"&
+ fail $SINGLEMDS
+
+ copytool_continue
+ wait_request_state $fid RESTORE SUCCEED
+}
+run_test 407 "Check for double RESTORE records in llog"
+
test_500()
{
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] &&
- skip "HSM migrate is not supported" && return
-
- # Stop the existing copytool
- copytool_cleanup
+ [ $MDS1_VERSION -lt $(version_code 2.6.92) ] &&
+ skip "HSM migrate is not supported"
test_mkdir -p $DIR/$tdir
- llapi_hsm_test -d $DIR/$tdir || error "One llapi HSM test failed"
+
+ if [ $(lustre_version_code client) -lt $(version_code 2.11.56) ] ||
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.11.56) ];
+ then
+ llapi_hsm_test -d $DIR/$tdir -b ||
+ error "One llapi HSM test failed"
+ else
+ llapi_hsm_test -d $DIR/$tdir ||
+ error "One llapi HSM test failed"
+ fi
}
run_test 500 "various LLAPI HSM tests"
-copytool_cleanup
+test_600() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+
+ changelog_register
+ # set changelog_mask to ALL
+ changelog_chmask "ALL"
+
+ chmod 777 $DIR/$tdir
+ $RUNAS touch $f || error "touch $f failed as $RUNAS_ID"
+ local fid=$(path2fid $f)
+
+ local entry
+ entry=$(changelog_find -type CREAT -target-fid $fid -uid "$RUNAS_ID" \
+ -gid "$RUNAS_GID") ||
+ error "No matching CREAT entry"
+
+ # Parse the changelog
+ eval local -A changelog=$(changelog2array $entry)
+ local nid="${changelog[nid]}"
+
+ # Check its NID
+ echo "Got NID '$nid'"
+ [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] ||
+ error "nid '$nid' does not match any client NID:" \
+ "${CLIENT_NIDS[@]}"
+}
+run_test 600 "Changelog fields 'u=' and 'nid='"
+
+test_601() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+
+ changelog_register
+ # set changelog_mask to ALL
+ changelog_chmask "ALL"
+
+ touch $f || error "touch $f failed"
+ local fid=$(path2fid $f)
+
+ changelog_clear
+ cat $f || error "cat $f failed"
+
+ changelog_find -type OPEN -target-fid $fid -mode "r--" ||
+ error "No matching OPEN entry"
+}
+run_test 601 "OPEN Changelog entry"
+
+test_602() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+
+ changelog_register
+ # set changelog_mask to ALL
+ changelog_chmask "ALL"
+
+ touch $f || error "touch $f failed"
+ local fid=$(path2fid $f)
+
+ changelog_clear
+ cat $f || error "cat $f failed"
+
+ changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry"
+
+ changelog_clear
+ changelog_dump
+ echo f > $f || error "write $f failed"
+ changelog_dump
+
+ changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry"
+
+ # remove OPEN from changelog_mask
+ changelog_chmask "-OPEN"
+
+ changelog_clear
+ changelog_dump
+ cat $f || error "cat $f failed"
+ changelog_dump
+
+ changelog_find -type CLOSE -target-fid $fid &&
+ error "There should be no CLOSE entry"
+
+ changelog_clear
+ changelog_dump
+ echo f > $f || error "write $f failed"
+ changelog_dump
+
+ changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry"
+}
+run_test 602 "Changelog record CLOSE only if open+write or OPEN recorded"
+
+test_603() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+
+ changelog_register
+ # set changelog_mask to ALL
+ changelog_chmask "ALL"
+
+ touch $f || error "touch $f failed"
+ local fid=$(path2fid $f)
+
+ setfattr -n user.xattr1 -v "value1" $f || error "setfattr $f failed"
+
+ changelog_clear
+ getfattr -n user.xattr1 $f || error "getfattr $f failed"
+
+ changelog_find -type GXATR -target-fid $fid -xattr "user.xattr1" ||
+ error "No matching GXATR entry"
+}
+run_test 603 "GETXATTR Changelog entry"
+
+test_604() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+ local f2=$DIR2/$tdir/$tfile
+ local procname="mdd.$FSNAME-MDT0000.changelog_deniednext"
+ local timeout
+ timeout="$(do_facet mds1 "$LCTL" get_param -n "$procname")"
+ stack_trap "do_facet mds1 '$LCTL' set_param '$procname=$timeout'" EXIT
+ do_facet mds1 lctl set_param "$procname=20"
+
+
+ changelog_register
+ # set changelog_mask to ALL
+ changelog_chmask "ALL"
+
+ touch $f || error "touch $f failed"
+ local fid=$(path2fid $f)
+
+ chmod 600 $f
+
+ changelog_clear
+ changelog_dump
+ $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed"
+ changelog_dump
+
+ local entry
+ entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \
+ -gid "$RUNAS_GID" -mode "r--") ||
+ error "No matching NOPEN entry"
+
+ # Parse the changelog
+ eval local -A changelog=$(changelog2array $entry)
+ local nid="${changelog[nid]}"
+
+ # Check its NID
+ echo "Got NID '$nid'"
+ [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] ||
+ error "nid '$nid' does not match any client NID:" \
+ "${CLIENT_NIDS[@]}"
+
+ changelog_clear
+ changelog_dump
+ $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed"
+ changelog_dump
+
+ changelog_find -type NOPEN -target-fid $fid &&
+ error "There should be no NOPEN entry"
+
+ # Sleep for `changelog_deniednext` seconds
+ sleep 20
+
+ changelog_clear
+ changelog_dump
+ $RUNAS cat $f2 && error "cat $f by user $RUNAS_ID should have failed"
+ changelog_dump
+
+ entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \
+ -gid "$RUNAS_GID" -mode "r--") ||
+ error "No matching NOPEN entry"
+
+ # Parse the changelog
+ eval local -A changelog=$(changelog2array $entry)
+ local nid="${changelog[nid]}"
+
+ # Check the NID
+ echo "Got NID '$nid'"
+ [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] ||
+ error "nid '$nid' does not match any client NID:" \
+ "${CLIENT_NIDS[@]}"
+}
+run_test 604 "NOPEN Changelog entry"
+
+test_605() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+ local f2=$DIR2/$tdir/$tfile
+
+ changelog_register
+ # set changelog_mask to ALL
+ changelog_chmask "ALL"
+
+ touch $f || error "touch $f failed"
+ local fid=$(path2fid $f)
+
+ changelog_clear
+ changelog_dump
+ exec 3<> $f || error "open $f failed"
+ changelog_dump
+
+ local entry
+ changelog_find -type OPEN -target-fid $fid || error "No OPEN entry"
+
+ changelog_clear
+ changelog_dump
+ exec 4<> $f || error "open $f failed"
+ changelog_dump
+
+ changelog_find -type OPEN -target-fid $fid &&
+ error "There should be no OPEN entry"
+
+ exec 4>&- || error "close $f failed"
+ changelog_dump
+
+ changelog_find -type CLOSE -target-fid $fid &&
+ error "There should be no CLOSE entry"
+
+ changelog_clear
+ changelog_dump
+ # access in rw, so different access mode should generate entries
+ cat $f || error "cat $f failed"
+ changelog_dump
+
+ changelog_find -type OPEN -target-fid $fid || error "No OPEN entry"
+
+ changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry"
+
+ changelog_clear
+ changelog_dump
+ # same access as first one, should not generate new entries
+ exec 4<> $f || error "open $f failed"
+ changelog_dump
+
+ changelog_find -type OPEN -target-fid $fid &&
+ error "There should be no OPEN entry"
+
+ exec 4>&- || error "close $f failed"
+ changelog_dump
+
+ changelog_find -type CLOSE -target-fid $fid &&
+ error "There should be no CLOSE entry"
+
+ changelog_clear
+ changelog_dump
+ # access by different user should generate new entries
+ $RUNAS cat $f || error "cat $f by user $RUNAS_ID failed"
+ changelog_dump
+
+ changelog_find -type OPEN -target-fid $fid || error "No OPEN entry"
+
+ changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry"
+
+ changelog_clear
+ changelog_dump
+ exec 3>&- || error "close $f failed"
+ changelog_dump
+
+ changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry"
+}
+run_test 605 "Test OPEN and CLOSE rate limit in Changelogs"
+
+test_606() {
+ [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ skip "need MDS version at least 2.10.58"
+
+ local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null")
+ llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader}
+ [ -z $(do_facet mgs ls -d $llog_reader 2> /dev/null) ] &&
+ skip_env "missing llog_reader" && return
+ local fstype=$(facet_fstype mds1)
+
+ mkdir -p $DIR/$tdir
+
+ local f=$DIR/$tdir/$tfile
+
+ changelog_register
+ # set changelog_mask to ALL
+ changelog_chmask "ALL"
+
+ chmod 777 $DIR/$tdir
+ $RUNAS touch $f || error "touch $f failed as $RUNAS_ID"
+ local fid=$(path2fid $f)
+ rm $f || error "rm $f failed"
+
+ local mntpt=$(facet_mntpt mds1)
+ local pass=true
+ local entry
+
+ #remount mds1 as ldiskfs or zfs type
+ stack_trap "stop mds1; start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS" EXIT
+ stop mds1 || error "stop mds1 failed"
+ mount_fstype mds1 || error "remount mds1 failed"
+
+ for ((i = 0; i < 1; i++)); do
+ do_facet mds1 $llog_reader $mntpt/changelog_catalog
+ local cat_file=$(do_facet mds1 $llog_reader \
+ $mntpt/changelog_catalog | awk \
+ '{match($0,"path=([^ ]+)",a)}END{print a[1]}')
+ [ -n "$cat_file" ] || error "no catalog file"
+
+ entry=$(do_facet mds1 $llog_reader $mntpt/$cat_file |
+ awk "/CREAT/ && /target:\[$fid\]/ {print}")
+ [ -n "$entry" ] || error "no CREAT entry"
+ done
+
+ local uidgid=$(echo $entry |
+ sed 's+.*\ user:\([0-9][0-9]*:[0-9][0-9]*\)\ .*+\1+')
+ [ -n "$uidgid" ] || error "uidgid is empty"
+ echo "Got UID/GID $uidgid"
+ [ "$uidgid" = "$RUNAS_ID:$RUNAS_GID" ] ||
+ error "uidgid '$uidgid' != '$RUNAS_ID:$RUNAS_GID'"
+ local nid=$(echo $entry |
+ sed 's+.*\ nid:\(\S\S*@\S\S*\)\ .*+\1+')
+ [ -n "$nid" ] || error "nid is empty"
+ echo "Got NID $nid"
+ [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] ||
+ error "nid '$nid' does not match any NID ${CLIENT_NIDS[@]}"
+}
+run_test 606 "llog_reader groks changelog fields"
complete $SECONDS
check_and_cleanup_lustre