X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=8bbdc50d0a9de18b2b0f7aca8c5e0697afaa63f2;hb=cda353e6efae5013a26aedbe49d8aa6fb8fe456e;hp=778b871dcf0baac1fc971ac741e02f39b851c4c7;hpb=11aae875be5fb30a88281ab47e491b75447b299b;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 778b871..8bbdc50 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -7,44 +7,72 @@ set -e set +o monitor -SRCDIR=$(dirname $0) -export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin - ONLY=${ONLY:-"$*"} -# bug number for skipped test: -ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +LUSTRE=${LUSTRE:-$(dirname $0)/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -MULTIOP=${MULTIOP:-multiop} +ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT " +if $SHARED_KEY; then +# bug number for skipped tests: LU-9795 + ALWAYS_EXCEPT+=" 402b " +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! +fi + +# Skip tests for PPC that fail frequently +if [[ $(uname -m) = ppc64 ]]; then + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 1a 1b 1d 1e 12c 12f " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 12g 12h 12m 12n 12o 12p " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 12q 21 22 23 24a 24b " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 24d 24e 24f 25b 30c 37 " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 57 58 90 110b 111b 113 " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 222b 222d 228 260a 260b 260c " + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 220A 220a 221 222a 222c 223a " + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 223b 224A 224a 226 227 600" + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 601 602 603 604 605 " +fi + +build_test_filter + +[ -n "$FILESET" ] && skip "Not functional for FILESET set" + OPENFILE=${OPENFILE:-openfile} -MMAP_CAT=${MMAP_CAT:-mmap_cat} MOUNT_2=${MOUNT_2:-"yes"} FAIL_ON_ERROR=false # script only handles up to 10 MDTs (because of MDT_PREFIX) [ $MDSCOUNT -gt 9 ] && - error "script cannot handle more than 9 MDTs, please fix" && exit + error "script cannot handle more than 9 MDTs, please fix" check_and_setup_lustre -if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then - skip_env "Need MDS version at least 2.4.53" && exit +if [[ $MDS1_VERSION -lt $(version_code 2.4.53) ]]; then + skip_env "Need MDS version at least 2.4.53" fi # $RUNAS_ID may get set incorrectly somewhere else if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then - skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit + skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" fi check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS - -build_test_filter +if getent group nobody; then + GROUP=nobody +elif getent group nogroup; then + GROUP=nogroup +else + error "No generic nobody group" +fi # if there is no CLIENT1 defined, some tests can be ran on localhost CLIENT1=${CLIENT1:-$HOSTNAME} @@ -54,115 +82,9 @@ CLIENT1=${CLIENT1:-$HOSTNAME} # Exception is the test which need two separate nodes CLIENT2=${CLIENT2:-$CLIENT1} -# -# In order to test multiple remote HSM agents, a new facet type named "AGT" and -# the following associated variables are added: -# -# AGTCOUNT: number of agents -# AGTDEV{N}: target HSM mount point (root path of the backend) -# agt{N}_HOST: hostname of the agent agt{N} -# SINGLEAGT: facet of the single agent -# -# The number of agents is initialized as the number of remote client nodes. -# By default, only single copytool is started on a remote client/agent. If there -# was no remote client, then the copytool will be started on the local client. -# -init_agt_vars() { - local n - local agent - - export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))} - [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1 - - export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP} - if [[ $CLIENTCOUNT -gt 1 ]] && - ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then - skip_env "SHARED_DIRECTORY should be accessible"\ - "on all client nodes" - exit 0 - fi - - # We used to put the HSM archive in $SHARED_DIRECTORY but that - # meant NFS issues could hose sanity-hsm sessions. So now we - # use $TMP instead. - for n in $(seq $AGTCOUNT); do - eval export AGTDEV$n=\$\{AGTDEV$n:-"$TMP/arc$n"\} - agent=CLIENT$((n + 1)) - if [[ -z "${!agent}" ]]; then - [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 || - agent=CLIENT2 - fi - eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\} - done - - export SINGLEAGT=${SINGLEAGT:-agt1} - - export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} - export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} - export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} - export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} - export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") - # $hsm_root/$HSMTMP Makes $hsm_root dir path less generic to ensure - # rm -rf $hsm_root/* is safe even if $hsm_root becomes unset to avoid - # deleting everything in filesystem, independent of any copytool. - export HSMTMP=${HSMTMP:-"shsm"} - - HSM_ARCHIVE=$(copytool_device $SINGLEAGT) - - [ -z "${HSM_ARCHIVE// /}" ] && error "HSM_ARCHIVE is empty!" - HSM_ARCHIVE=$HSM_ARCHIVE/$HSMTMP - - HSM_ARCHIVE_NUMBER=2 - - # The test only support up to 10 MDTs - MDT_PREFIX="mdt.$FSNAME-MDT000" - HSM_PARAM="${MDT_PREFIX}0.hsm" - - # archive is purged at copytool setup - HSM_ARCHIVE_PURGE=true - - # Don't allow copytool error upon start/setup - HSMTOOL_NOERROR=false -} - -# Get the backend root path for the given agent facet. -copytool_device() { - local facet=$1 - local dev=AGTDEV$(facet_number $facet) - - echo -n ${!dev} -} - -# Stop copytool and unregister an existing changelog user. -cleanup() { - copytool_monitor_cleanup - copytool_cleanup - changelog_cleanup - cdt_set_sanity_policy -} - -get_mdt_devices() { - local mdtno - # get MDT device for each mdc - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - MDT[$idx]=$($LCTL get_param -n \ - mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid | - awk '{gsub(/_UUID/,""); print $1}' | head -n1) - done -} - search_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" -} - -kill_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - - echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true + do_nodesv $hosts "libtool execute pgrep -x $HSMTOOL" } wait_copytools() { @@ -170,7 +92,7 @@ wait_copytools() { local wait_timeout=200 local wait_start=$SECONDS local wait_end=$((wait_start + wait_timeout)) - local sleep_time=100000 # 0.1 second + local sleep_time=1 while ((SECONDS < wait_end)); do if ! search_copytools $hosts; then @@ -179,9 +101,8 @@ wait_copytools() { fi echo "copytools still running on $hosts" - usleep $sleep_time - [ $sleep_time -lt 32000000 ] && # 3.2 seconds - sleep_time=$(bc <<< "$sleep_time * 2") + sleep $sleep_time + [ $sleep_time -lt 5 ] && sleep_time=$((sleep_time + 1)) done # try to dump Copytool's stack @@ -208,91 +129,29 @@ copytool_monitor_setup() { cmd="cat $test_dir/fifo > $test_dir/events &" cmd+=" echo \\\$! > $test_dir/monitor_pid" - if [[ $PDSH == *Rmrsh* ]]; then - # This is required for pdsh -Rmrsh and its handling of remote - # shells. - # Regular ssh and pdsh -Rssh work fine without this - # backgrounded subshell nonsense. - (do_node $agent "$cmd") & - export HSMTOOL_MONITOR_PDSH=$! - - # Slightly racy, but just making a best-effort to catch obvious - # problems. - sleep 1 - ps -p $HSMTOOL_MONITOR_PDSH > /dev/null || - error "Failed to start copytool monitor on $agent" - else - do_node $agent "$cmd" - if [ $? != 0 ]; then - error "Failed to start copytool monitor on $agent" - fi - fi -} - -copytool_monitor_cleanup() { - local facet=${1:-$SINGLEAGT} - local agent=$(facet_active_host $facet) - - if [ -n "$HSMTOOL_MONITOR_DIR" ]; then - # Should die when the copytool dies, but just in case. - local cmd="kill \\\$(cat $HSMTOOL_MONITOR_DIR/monitor_pid)" - cmd+=" 2>/dev/null || true" - do_node $agent "$cmd" - do_node $agent "rm -fr $HSMTOOL_MONITOR_DIR" - export HSMTOOL_MONITOR_DIR= - fi + # This background subshell nonsense is required when pdsh/ssh decides + # to wait for the cat process to exit on the remote client + (do_node $agent "$cmd") & + export HSMTOOL_MONITOR_PDSH=$! - # The pdsh should die on its own when the monitor dies. Just - # in case, though, try to clean up to avoid any cruft. - if [ -n "$HSMTOOL_MONITOR_PDSH" ]; then - kill $HSMTOOL_MONITOR_PDSH 2>/dev/null - export HSMTOOL_MONITOR_PDSH= + # Slightly racy, but just making a best-effort to catch obvious + # problems. + sleep 1 + do_node $agent "stat $HSMTOOL_MONITOR_DIR/monitor_pid 2>&1 > /dev/null" + if [ $? != 0 ]; then + error "Failed to start copytool monitor on $agent" fi } -copytool_setup() { - local facet=${1:-$SINGLEAGT} - # Use MOUNT2 by default if defined - local lustre_mntpnt=${2:-${MOUNT2:-$MOUNT}} - local arc_id=$3 - local hsm_root=${4:-$(copytool_device $facet)} - - [ -z "${hsm_root// /}" ] && error "copytool_setup: hsm_root empty!" - - local agent=$(facet_active_host $facet) - - if $HSM_ARCHIVE_PURGE; then - echo "Purging archive on $agent" - do_facet $facet "rm -rf $hsm_root/$HSMTMP/*" - fi +fid2archive() +{ + local fid="$1" - echo "Starting copytool $facet on $agent" - do_facet $facet "mkdir -p $hsm_root/$HSMTMP/" || - error "mkdir '$hsm_root/$HSMTMP' failed" - # bandwidth is limited to 1MB/s so the copy time is known and - # independent of hardware - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon" - cmd+=" --hsm-root $hsm_root/$HSMTMP" - [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id" - [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] || - cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL" - [[ -z "$HSMTOOL_EVENT_FIFO" ]] || - cmd+=" --event-fifo $HSMTOOL_EVENT_FIFO" - cmd+=" --bandwidth 1 $lustre_mntpnt" - - # Redirect the standard output and error to a log file which - # can be uploaded to Maloo. - local prefix=$TESTLOG_PREFIX - [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME - local copytool_log=$prefix.copytool${arc_id}_log.$agent.log - - stack_trap cleanup EXIT - do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1" - if [[ $? != 0 ]]; then - [[ $HSMTOOL_NOERROR == true ]] || - error "start copytool $facet on $agent failed" - echo "start copytool $facet on $agent failed" - fi + case "$HSMTOOL" in + lhsmtool_posix) + printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid" + ;; + esac } get_copytool_event_log() { @@ -306,97 +165,22 @@ get_copytool_event_log() { error "Could not collect event log from $agent" } -copytool_cleanup() { - trap - EXIT - local agt_facet=$SINGLEAGT - local agt_hosts=${1:-$(facet_active_host $agt_facet)} - local hsm_root=$(copytool_device $agt_facet) - - [ -z "${hsm_root// /}" ] && error "copytool_cleanup: hsm_root empty!" - - local i - local facet - local param - local -a state - - kill_copytools $agt_hosts - wait_copytools $agt_hosts || error "copytools failed to stop" - - # Clean all CDTs orphans requests from previous tests that - # would otherwise need to timeout to clear. - for ((i = 0; i < MDSCOUNT; i++)); do - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - state[$i]=$(do_facet $facet "$LCTL get_param -n $param") - - # Skip already stopping or stopped CDTs. - [[ "${state[$i]}" =~ ^stop ]] && continue - - do_facet $facet "$LCTL set_param $param=shutdown" - done - - for ((i = 0; i < MDSCOUNT; i++)); do - # Only check and restore CDTs that we stopped in the first loop. - [[ "${state[$i]}" =~ ^stop ]] && continue - - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - - wait_result $facet "$LCTL get_param -n $param" stopped 20 || - error "$facet CDT state is not stopped" - - # Restore old CDT state. - do_facet $facet "$LCTL set_param $param=${state[$i]}" - done - - for ((i = 0; i < MDSCOUNT; i++)); do - # Only check CDTs that we stopped in the first loop. - [[ "${state[$i]}" =~ ^stop ]] && continue - - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - - # Check that the old CDT state was restored. - wait_result $facet "$LCTL get_param -n $param" "${state[$i]}" \ - 20 || error "$facet CDT state is not '${state[$i]}'" - done - - if do_facet $agt_facet "df $hsm_root" >/dev/null 2>&1 ; then - do_facet $agt_facet "rm -rf $hsm_root/$HSMTMP/*" - fi -} - copytool_suspend() { local agents=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0 + stack_trap \ + "do_nodesv $agents libtool execute pkill -CONT -x '$HSMTOOL' || true" EXIT + do_nodesv $agents "libtool execute pkill -STOP -x $HSMTOOL" || return 0 echo "Copytool is suspended on $agents" } -copytool_continue() { - local agents=${1:-$(facet_active_host $SINGLEAGT)} - - do_nodesv $agents "pkill -CONT -x $HSMTOOL_BASE" || return 0 - echo "Copytool is continued on $agents" -} - copytool_remove_backend() { local fid=$1 - local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid) + local be=$(do_facet $SINGLEAGT find "$(hsm_root)" -name $fid) echo "Remove from backend: $fid = $be" do_facet $SINGLEAGT rm -f $be } -import_file() { - mkdir -p "$(dirname "$2")" || - error "cannot create directory '$(dirname "$2")'" - - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --import $1 $2 $MOUNT" || - error "import of $1 to $2 failed" -} - file_creation_failure() { local cmd=$1 local file=$2 @@ -462,7 +246,7 @@ create_small_sync_file() { } create_archive_file() { - local file="$HSM_ARCHIVE/$1" + local file="$(hsm_root)/$1" local count=${2:-39} local source=/dev/urandom @@ -475,79 +259,14 @@ create_archive_file() { } copy2archive() { - local file=$HSM_ARCHIVE/$2 - do_facet $SINGLEAGT mkdir -p $(dirname $file) - do_facet $SINGLEAGT cp -p $1 $file || error "cannot copy $1 to $file" -} - -mdts_set_param() { - local arg=$1 - local key=$2 - local value=$3 - local mdtno - local rc=0 - if [[ "$value" != "" ]]; then - value="=$value" - fi - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local facet=mds${mdtno} - # if $arg include -P option, run 1 set_param per MDT on the MGS - # else, run set_param on each MDT - [[ $arg = *"-P"* ]] && facet=mgs - do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value - [[ $? != 0 ]] && rc=1 - done - return $rc -} - -mdts_check_param() { - local key="$1" - local target="$2" - local timeout="$3" - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ - "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ - $timeout || - error "$key state is not '$target' on mds${mdtno}" - done -} - -changelog_setup() { - CL_USERS=() - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local cl_user=$(do_facet mds${mdtno} $LCTL \ - --device ${MDT[$idx]} \ - changelog_register -n) - CL_USERS+=($cl_user) - do_facet mds${mdtno} lctl set_param \ - mdd.${MDT[$idx]}.changelog_mask="+hsm" - $LFS changelog_clear ${MDT[$idx]} $cl_user 0 - done -} + local hsm_root="$(hsm_root)" + local file="$hsm_root/$2" -changelog_cleanup() { - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - [[ -z ${CL_USERS[$idx]} ]] && continue - $LFS changelog_clear ${MDT[$idx]} ${CL_USERS[$idx]} 0 - do_facet mds${mdtno} lctl --device ${MDT[$idx]} \ - changelog_deregister ${CL_USERS[$idx]} - done - CL_USERS=() -} - -changelog_get_flags() { - local mdt=$1 - local cltype=$2 - local fid=$3 - - $LFS changelog $mdt | awk "/$cltype/ && /t=\[$fid\]/ {print \$5}" + stack_trap "do_facet $SINGLEAGT rm -rf '$hsm_root'" EXIT + do_facet $SINGLEAGT mkdir -p "$(dirname "$file")" || + error "mkdir '$(dirname "$file")' failed" + do_facet $SINGLEAGT cp -p "$1" "$file" || + error "cannot copy '$1' to '$file'" } get_hsm_param() { @@ -556,14 +275,6 @@ get_hsm_param() { echo $val } -set_hsm_param() { - local param=$1 - local value=$2 - local opt=$3 - mdts_set_param "$opt -n" "hsm.$param" "$value" - return $? -} - set_test_state() { local cmd=$1 local target=$2 @@ -571,15 +282,6 @@ set_test_state() { mdts_check_param hsm_control "$target" 10 } -cdt_set_sanity_policy() { - if [[ "$CDT_POLICY_HAD_CHANGED" ]] - then - # clear all - mdts_set_param "" hsm.policy "+NRA" - mdts_set_param "" hsm.policy "-NBR" - CDT_POLICY_HAD_CHANGED= - fi -} cdt_set_no_retry() { mdts_set_param "" hsm.policy "+NRA" @@ -605,21 +307,6 @@ cdt_clear_mount_state() { mdts_set_param "-P -d" hsm_control "" } -cdt_set_mount_state() { - mdts_set_param "-P" hsm_control "$1" - # set_param -P is asynchronous operation and could race with set_param. - # In such case configs could be retrieved and applied at mgc after - # set_param -P completion. Sleep here to avoid race with set_param. - # We need at least 20 seconds. 10 for mgc_requeue_thread to wake up - # MGC_TIMEOUT_MIN_SECONDS + MGC_TIMEOUT_RAND_CENTISEC(5 + 5) - # and 10 seconds to retrieve config from server. - sleep 20 -} - -cdt_check_state() { - mdts_check_param hsm_control "$1" 20 -} - cdt_disable() { set_test_state disabled disabled } @@ -642,37 +329,6 @@ cdt_restart() { cdt_set_sanity_policy } -needclients() { - local client_count=$1 - if [[ $CLIENTCOUNT -lt $client_count ]]; then - skip "Need $client_count or more clients, have $CLIENTCOUNT" - return 1 - fi - return 0 -} - -path2fid() { - $LFS path2fid $1 | tr -d '[]' - return ${PIPESTATUS[0]} -} - -get_hsm_flags() { - local f=$1 - local u=$2 - local st - - if [[ $u == "user" ]]; then - st=$($RUNAS $LFS hsm_state $f) - else - u=root - st=$($LFS hsm_state $f) - fi - - [[ $? == 0 ]] || error "$LFS hsm_state $f failed (run as $u)" - - st=$(echo $st | cut -f 2 -d" " | tr -d "()," ) - echo $st -} get_hsm_archive_id() { local f=$1 @@ -684,14 +340,6 @@ get_hsm_archive_id() { echo $ar } -check_hsm_flags() { - local f=$1 - local fl=$2 - - local st=$(get_hsm_flags $f) - [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl" -} - check_hsm_flags_user() { local f=$1 local fl=$2 @@ -728,44 +376,6 @@ delete_large_files() { wait_delete_completed } -make_custom_file_for_progress() { - local count=${2:-"39"} - local bs=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1) - bs=${3:-$bs} - - [[ $count -gt 0 ]] || error "Invalid file size" - [[ $bs -gt 0 ]] || error "Invalid stripe size" - - if ! create_file "${1/$DIR/$DIR2}" $bs $count fsync; then - echo "The creation of '${1/$DIR/$DIR2}' failed" >&2 - echo "It might be due to a lack of space in the filesystem" >&2 - delete_large_files >&2 - create_file "${1/$DIR/$DIR2}" $bs $count fsync || - file_creation_failure dd "${1/$DIR/$DIR2}" $? - fi -} - -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - -wait_request_state() { - local fid=$1 - local request=$2 - local state=$3 - # 4th arg (mdt index) is optional - local mdtidx=${4:-0} - local mds=mds$(($mdtidx + 1)) - - local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" - cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - - wait_result $mds "$cmd" $state 200 || - error "request on $fid is not $state on $mds" -} - get_request_state() { local fid=$1 local request=$2 @@ -799,7 +409,7 @@ wait_all_done() { [[ -n $fid ]] && cmd+=" | grep '$fid'" cmd+=" | egrep 'WAITING|STARTED'" - wait_result $SINGLEMDS "$cmd" "" $timeout || + wait_update_facet --verbose mds1 "$cmd" "" $timeout || error "requests did not complete" } @@ -878,7 +488,7 @@ get_agent_uuid() { # Lustre mount-point is mandatory and last parameter on # copytool cmd-line. - local mntpnt=$(do_rpc_nodes $agent ps -C $HSMTOOL_BASE -o args= | + local mntpnt=$(do_rpc_nodes $agent libtool execute ps -C $HSMTOOL -o args= | awk '{print $NF}') [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ "parameter" @@ -907,7 +517,9 @@ cdt_set_sanity_policy # finished requests are quickly removed from list set_hsm_param grace_delay 10 -test_1() { +CLIENT_NIDS=( $($LCTL list_nids all) ) + +test_1A() { # was test_1 mkdir -p $DIR/$tdir chmod 777 $DIR/$tdir @@ -940,13 +552,13 @@ test_1() { check_hsm_flags_user $f "0x00000000" } -run_test 1 "lfs hsm flags root/non-root access" +run_test 1A "lfs hsm flags root/non-root access" test_1a() { local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) - copytool_setup + copytool setup $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED @@ -957,23 +569,18 @@ test_1a() { check_hsm_flags $f "0x0000000d" $MMAP_CAT $f > /dev/null || error "failed mmap & cat release file" - - copytool_cleanup } run_test 1a "mmap & cat a HSM released file" -test_1b() { - mkdir -p $DIR/$tdir - $LFS setstripe -E 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || - error "failed to set default stripe" - local f=$DIR/$tdir/$tfile +test_1bde_base() { + local f=$1 rm -f $f - dd if=/dev/random of=$f bs=1M count=1 conv=sync || + dd if=/dev/urandom of=$f bs=1M count=1 conv=sync || error "failed to create file" local fid=$(path2fid $f) - copytool_setup + copytool setup echo "archive $f" $LFS hsm_archive $f || error "could not archive file" @@ -989,10 +596,17 @@ test_1b() { wait_request_state $fid RESTORE SUCCEED echo "verify restored state: " check_hsm_flags $f "0x00000009" && echo "pass" +} + +test_1b() { + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile - copytool_cleanup + test_1bde_base $f } -run_test 1b "Archive, Release & Restore composite file" +run_test 1b "Archive, Release and Restore composite file" test_1c() { mkdir -p $DIR/$tdir @@ -1020,10 +634,26 @@ test_1c() { [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" - # Test whether setting archive number > 32 results in error. - $LFS hsm_set --exists --archive-id 33 $f && - error "archive number is larger than 32" - check_hsm_flags_user $f "0x00000001" + LOCAL_HSM_ARCHIVE_NUMBER=33 + if [ $(lustre_version_code client) -ge $(version_code 2.11.56) ] && + [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.11.56) ]; then + # lustre in the new version supports unlimited archiveID. + # Test whether setting archive number > 32 is supported + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f || + error "archive ID $LOCAL_HSM_ARCHIVE_NUMBER too large?" + check_hsm_flags_user $f "0x00000001" + + echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER" + st=$(get_hsm_archive_id $f) + [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || + error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" + else + # old client or old mds can only support at most 32 archiveID + # test whether setting archive number > 32 results in error. + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f && + error "bitmap archive number is larger than 32" + check_hsm_flags_user $f "0x00000001" + fi # Test whether setting archive number 16 and archived flag. LOCAL_HSM_ARCHIVE_NUMBER=16 @@ -1038,6 +668,58 @@ test_1c() { } run_test 1c "Check setting archive-id in lfs hsm_set" +test_1d() { + [ $MDS1_VERSION -lt $(version_code 2.10.59) ] && + skip "need MDS version at least 2.10.59" + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -L mdt -E -1 -c 2 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile + + test_1bde_base $f +} +run_test 1d "Archive, Release and Restore DoM file" + +test_1e() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code $SEL_VER) ] && + skip "skipped for lustre < $SEL_VER" + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1G -z 64M -E 10G -z 512M -E -1 -z 1G $DIR/$tdir || + error "failed to set default stripe" + local comp_file=$DIR/$tdir/$tfile + + test_1bde_base $comp_file + + local flg_opts="--comp-start 0 -E 64M --comp-flags init" + local found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "1st component not found" + + flg_opts="--comp-start 64M -E 1G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "2nd component not found" + + flg_opts="--comp-start 1G -E 1G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "3rd component not found" + + flg_opts="--comp-start 1G -E 10G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "4th component not found" + + flg_opts="--comp-start 10G -E 10G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "5th component not found" + + flg_opts="--comp-start 10G -E EOF --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "6th component not found" + + sel_layout_sanity $comp_file 6 +} +run_test 1e "Archive, Release and Restore SEL file" + test_2() { local f=$DIR/$tdir/$tfile @@ -1133,7 +815,7 @@ run_test 4 "Useless cancel must not be registered" test_8() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1142,16 +824,13 @@ test_8() { wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 8 "Test default archive number" -test_9() { +test_9A() { # was test_9 # we do not use the default one to be sure - local new_an=$((HSM_ARCHIVE_NUMBER + 1)) - copytool_cleanup - copytool_setup $SINGLEAGT $MOUNT $new_an + local archive_id=$((HSM_ARCHIVE_NUMBER + 1)) + copytool setup --archive-id $archive_id # give time for CT to register with MDTs sleep $(($MDSCOUNT*2)) @@ -1161,14 +840,12 @@ test_9() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - $LFS hsm_archive --archive $new_an $f + $LFS hsm_archive --archive $archive_id $f wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } -run_test 9 "Use of explicit archive number, with dedicated copytool" +run_test 9A "Use of explicit archive number, with dedicated copytool" test_9a() { needclients 3 || return 0 @@ -1177,14 +854,11 @@ test_9a() { local file local fid - copytool_cleanup $(comma_list $(agts_nodes)) - # start all of the copytools for n in $(seq $AGTCOUNT); do - copytool_setup agt$n + copytool setup --facet agt$n done - trap "copytool_cleanup $(comma_list $(agts_nodes))" EXIT # archive files for n in $(seq $AGTCOUNT); do file=$DIR/$tdir/$tfile.$n @@ -1194,15 +868,12 @@ test_9a() { wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $file "0x00000009" done - - trap - EXIT - copytool_cleanup $(comma_list $(agts_nodes)) } run_test 9a "Multiple remote agents" test_10a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile @@ -1211,10 +882,13 @@ test_10a() { error "hsm_archive failed" wait_request_state $fid ARCHIVE SUCCEED - local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || - error "fid $fid not in archive $HSM_ARCHIVE" + local hsm_root="$(copytool_device $SINGLEAGT)" + local archive="$(do_facet $SINGLEAGT \ + find "$hsm_root" -name "$fid" -print0)" + [ -n "$archive" ] || error "fid '$fid' not in archive '$hsm_root'" + echo "Verifying content" - do_facet $SINGLEAGT diff $f $AFILE || error "archived file differs" + do_facet $SINGLEAGT diff $f $archive || error "archived file differs" echo "Verifying hsm state " check_hsm_flags $f "0x00000009" @@ -1222,15 +896,12 @@ test_10a() { local st=$(get_hsm_archive_id $f) [[ $st == $HSM_ARCHIVE_NUMBER ]] || error "Wrong archive number, $st != $HSM_ARCHIVE_NUMBER" - - copytool_cleanup - } run_test 10a "Archive a file" test_10b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1242,28 +913,25 @@ test_10b() { local cnt=$(get_request_count $fid ARCHIVE) [[ "$cnt" == "1" ]] || error "archive of non dirty file must not make a request" - - copytool_cleanup } run_test 10b "Archive of non dirty file must work without doing request" test_10c() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_set --noarchive $f $LFS hsm_archive $f && error "archive a noarchive file must fail" - - copytool_cleanup + return 0 } run_test 10c "Check forbidden archive" test_10d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1275,8 +943,6 @@ test_10d() { local dflt=$(get_hsm_param default_archive_id) [[ $ar == $dflt ]] || error "archived file is not on default archive: $ar != $dflt" - - copytool_cleanup } run_test 10d "Archive a file on the default archive id" @@ -1285,30 +951,30 @@ test_11a() { copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f echo -n "Verifying released state: " check_hsm_flags $f "0x0000000d" local LSZ=$(stat -c "%s" $f) - local ASZ=$(do_facet $SINGLEAGT stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile) + local ASZ=$(do_facet $SINGLEAGT stat -c "%s" "$(hsm_root)/$tdir/$tfile") echo "Verifying imported size $LSZ=$ASZ" [[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ" echo -n "Verifying released pattern: " - local PTRN=$($GETSTRIPE -L $f) + local PTRN=$($LFS getstripe -L $f) echo $PTRN [[ $PTRN == released ]] || error "Is not released" local fid=$(path2fid $f) echo "Verifying new fid $fid in archive" - local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || - error "fid $fid not in archive $HSM_ARCHIVE" + do_facet $SINGLEAGT "[ -f \"$(fid2archive "$fid")\" ]" || + error "No archive for fid $fid" } run_test 11a "Import a file" test_11b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1320,25 +986,23 @@ test_11b() { local FILE_HASH=$(md5sum $f) rm -f $f - import_file $fid $f + copytool import $fid $f echo "$FILE_HASH" | md5sum -c [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 11b "Import a deleted file using its FID" test_12a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local f2=$DIR2/$tdir/$tfile echo "Verifying released state: " check_hsm_flags $f2 "0x0000000d" @@ -1350,23 +1014,21 @@ test_12a() { echo "Verifying file state: " check_hsm_flags $f2 "0x00000009" - do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12a "Restore an imported file explicitly" test_12b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f echo "Verifying released state: " check_hsm_flags $f "0x0000000d" @@ -1375,11 +1037,9 @@ test_12b() { echo "Verifying file state after restore: " check_hsm_flags $f "0x00000009" - do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12b "Restore an imported file implicitly" @@ -1387,14 +1047,12 @@ test_12c() { [ "$OSTCOUNT" -lt "2" ] && skip_env "needs >= 2 OSTs" && return # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile mkdir -p $DIR/$tdir $LFS setstripe -c 2 "$f" - local fid - fid=$(make_custom_file_for_progress $f 5) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 5) local FILE_CRC=$(md5sum $f) @@ -1405,14 +1063,12 @@ test_12c() { echo "$FILE_CRC" | md5sum -c [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12c "Restore a file with stripe of 2" test_12d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -1430,17 +1086,15 @@ test_12d() { local cnt=$(get_request_count $fid RESTORE) [[ "$cnt" == "0" ]] || error "restore a non dirty file must not make a request" - - copytool_cleanup } run_test 12d "Restore of a non archived, non released file must work"\ " without doing request" test_12e() { # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive $f || error "archive request failed" @@ -1452,14 +1106,13 @@ test_12e() { $LFS hsm_state $f $LFS hsm_restore $f && error "restore a dirty file must fail" - - copytool_cleanup + return 0 } run_test 12e "Check forbidden restore" test_12f() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1477,14 +1130,12 @@ test_12f() { diff -q /etc/hosts $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12f "Restore a released file explicitly" test_12g() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1501,8 +1152,6 @@ test_12g() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12g "Restore a released file implicitly" @@ -1510,7 +1159,7 @@ test_12h() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1527,14 +1176,12 @@ test_12h() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12h "Restore a released file implicitly from a second node" test_12m() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1547,33 +1194,29 @@ test_12m() { cmp /etc/passwd $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12m "Archive/release/implicit restore" test_12n() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f do_facet $SINGLEAGT cmp /etc/hosts $f || error "Restored file differs" $LFS hsm_release $f || error "release of $f failed" - - copytool_cleanup } run_test 12n "Import/implicit restore/release" test_12o() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1617,14 +1260,12 @@ test_12o() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12o "Layout-swap failure during Restore leaves file released" test_12p() { # test needs a running copytool - copytool_setup + copytool setup mkdir $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1637,28 +1278,19 @@ test_12p() { do_facet $SINGLEAGT cat $f > /dev/null || error "cannot cat $f" $LFS hsm_release $f || error "cannot release $f" do_facet $SINGLEAGT cat $f > /dev/null || error "cannot cat $f" - - copytool_cleanup } run_test 12p "implicit restore of a file on copytool mount point" -cleanup_test_12q() { - trap 0 - zconf_umount $(facet_host $SINGLEAGT) $MOUNT3 || - error "cannot umount $MOUNT3 on $SINGLEAGT" -} - test_12q() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.58) ] && - skip "need MDS version at least 2.7.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.58) ] && + skip "need MDS version at least 2.7.58" + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT zconf_mount $(facet_host $SINGLEAGT) $MOUNT3 || error "cannot mount $MOUNT3 on $SINGLEAGT" - trap cleanup_test_12q EXIT - # test needs a running copytool - copytool_setup $SINGLEAGT $MOUNT3 + copytool setup -m "$MOUNT3" local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile @@ -1684,7 +1316,7 @@ test_12q() { [ $size -eq $orig_size ] || error "$f2: wrong size after archive: $size != $orig_size" - HSM_ARCHIVE_PURGE=false copytool_setup $SINGLEAGT /mnt/lustre3 + copytool setup -m "$MOUNT3" wait @@ -1705,56 +1337,66 @@ test_12q() { size=$(stat -c "%s" $f2) [ $size -eq 0 ] || error "$f2: wrong size after overwrite: $size != 0" - - copytool_cleanup - zconf_umount $(facet_host $SINGLEAGT) $MOUNT3 || - error "cannot umount $MOUNT3 on $SINGLEAGT" } run_test 12q "file attributes are refreshed after restore" -test_13() { +test_12r() { # test needs a running copytool - copytool_setup - - local ARC_SUBDIR="import.orig" - local d="" - local f="" - - # populate directory to be imported - for d in $(seq 1 10); do - local CURR_DIR="$HSM_ARCHIVE/$ARC_SUBDIR/dir.$d" - do_facet $SINGLEAGT mkdir -p "$CURR_DIR" - for f in $(seq 1 10); do - CURR_FILE="$CURR_DIR/$tfile.$f" - # write file-specific data - do_facet $SINGLEAGT \ - "echo d=$d, f=$f, dir=$CURR_DIR, "\ - "file=$CURR_FILE > $CURR_FILE" - done - done - # import to Lustre - import_file "$ARC_SUBDIR" $DIR/$tdir - # diff lustre content and origin (triggers file restoration) - # there must be 10x10 identical files, and no difference - local cnt_ok=$(do_facet $SINGLEAGT diff -rs $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | grep identical | wc -l) - local cnt_diff=$(do_facet $SINGLEAGT diff -r $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | wc -l) + copytool setup - [ $cnt_diff -eq 0 ] || - error "$cnt_diff imported files differ from read data" - [ $cnt_ok -eq 100 ] || - error "not enough identical files ($cnt_ok != 100)" + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/hosts $f) - copytool_cleanup -} -run_test 13 "Recursively import and restore a directory" + $LFS hsm_archive $f || error "archive of $f failed" + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f || error "release of $f failed" -test_14() { - # test needs a running copytool - copytool_setup + offset=$(lseek_test -d 7 $f) - # archive a file + # we check we had a restore done + wait_request_state $fid RESTORE SUCCEED + [[ $offset == 7 ]] || error "offset $offset != 7" +} +run_test 12r "lseek restores released file" + +test_13() { + local -i i j k=0 + for i in {1..10}; do + local archive_dir="$(hsm_root)"/subdir/dir.$i + + do_facet $SINGLEAGT mkdir -p "$archive_dir" + for j in {1..10}; do + local archive_file="$archive_dir"/file.$j + + do_facet $SINGLEAGT "echo $k > \"$archive_dir\"/file.$j" + k+=1 + done + done + + # import to Lustre + copytool import "subdir" "$DIR/$tdir" + + # To check the import, the test uses diff with the -r flag + # This is nice, but diff only checks files one by one, and triggering + # an implicit restore for one file at a time will consume as many + # seconds as there are files to compare. To speed this up, a restore + # operation is triggered manually first. + copytool setup + find "$DIR/$tdir"/subdir -type f -exec $LFS hsm_restore {} \; + + # Compare the imported data + do_facet $SINGLEAGT \ + diff -r "$(hsm_root)"/subdir "$DIR/$tdir"/subdir || + error "imported files differ from archived data" +} +run_test 13 "Recursively import and restore a directory" + +test_14() { + # test needs a running copytool + copytool setup + + # archive a file local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) local sum=$(md5sum $f | awk '{print $1}') @@ -1769,22 +1411,18 @@ test_14() { # rebind the archive to the newly created file echo "rebind $fid to $fid2" - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --rebind $fid $fid2 $DIR" || error "could not rebind file" + copytool rebind $fid $fid2 # restore file and compare md5sum local sum2=$(md5sum $f | awk '{print $1}') [[ $sum == $sum2 ]] || error "md5sum mismatch after restore" - - copytool_cleanup } run_test 14 "Rebind archived file to a new fid" test_15() { # test needs a running copytool - copytool_setup + copytool setup # archive files local f=$DIR/$tdir/$tfile @@ -1800,6 +1438,7 @@ test_15() { done wait_all_done $(($count*60)) + stack_trap "rm -f $tmpfile" EXIT :>$tmpfile # delete the files for i in $(seq 1 $count); do @@ -1816,9 +1455,7 @@ test_15() { [[ $nl == $count ]] || error "$nl files in list, $count expected" echo "rebind list of files" - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --rebind $tmpfile $DIR" || error "could not rebind file list" + copytool rebind "$tmpfile" # restore files and compare md5sum for i in $(seq 1 $count); do @@ -1826,15 +1463,12 @@ test_15() { [[ $sum2 == ${sums[$i]} ]] || error "md5sum mismatch after restore ($sum2 != ${sums[$i]})" done - - rm -f $tmpfile - copytool_cleanup } run_test 15 "Rebind a list of files" test_16() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local ref=/tmp/ref # create a known size file so we can verify transfer speed @@ -1855,8 +1489,6 @@ test_16() { [[ $duration -ge $((goal - 1)) ]] || error "Transfer is too fast $duration < $goal" - - copytool_cleanup } run_test 16 "Test CT bandwith control option" @@ -1889,7 +1521,7 @@ run_test 20 "Release is not permitted" test_21() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/test_release @@ -1950,14 +1582,12 @@ test_21() { check_hsm_flags $f "0x0000000d" stop_full_debug_logging - - copytool_cleanup } run_test 21 "Simple release tests" test_22() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/test_release local swap=$DIR/$tdir/test_swap @@ -1976,14 +1606,13 @@ test_22() { create_small_file $swap $LFS swap_layouts $swap $f && error "swap_layouts should failed" - true - copytool_cleanup + return 0 } run_test 22 "Could not swap a release file" test_23() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/test_mtime @@ -2005,8 +1634,6 @@ test_23() { local ATIME=$(stat -c "%X" $f) [ $MTIME -eq "978261179" ] || fail "bad mtime: $MTIME" [ $ATIME -eq "978261179" ] || fail "bad atime: $ATIME" - - copytool_cleanup } run_test 23 "Release does not change a/mtime (utime)" @@ -2021,7 +1648,7 @@ test_24a() { local ctime1 # test needs a running copytool - copytool_setup + copytool setup fid=$(create_small_file $file) @@ -2093,7 +1720,8 @@ test_24a() { [ $ctime0 -eq $ctime1 ] || error "restore changed ctime from $ctime0 to $ctime1" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytools failed to stop" # Once more, after unmount and mount. umount_client $MOUNT || error "cannot unmount '$MOUNT'" @@ -2122,7 +1750,7 @@ test_24b() { # LU-3811 # Test needs a running copytool. - copytool_setup + copytool setup # Check that root can do HSM actions on a regular user's file. fid=$(create_small_file $file) @@ -2156,18 +1784,9 @@ test_24b() { [ "$sum0" == "$sum1" ] || error "md5sum mismatch for '$file'" - - copytool_cleanup } run_test 24b "root can archive, release, and restore user files" -cleanup_test_24c() { - trap 0 - set_hsm_param user_request_mask RESTORE - set_hsm_param group_request_mask RESTORE - set_hsm_param other_request_mask RESTORE -} - test_24c() { local file=$DIR/$tdir/$tfile local action=archive @@ -2176,15 +1795,18 @@ test_24c() { local other_save # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir # Save the default masks and check that cleanup_24c will # restore the request masks correctly. user_save=$(get_hsm_param user_request_mask) + stack_trap "set_hsm_param user_request_mask $user_save" EXIT group_save=$(get_hsm_param group_request_mask) + stack_trap "set_hsm_param user_request_mask $group_save" EXIT other_save=$(get_hsm_param other_request_mask) + stack_trap "set_hsm_param user_request_mask $other_save" EXIT [ "$user_save" == RESTORE ] || error "user_request_mask is '$user_save' expected 'RESTORE'" @@ -2193,12 +1815,10 @@ test_24c() { [ "$other_save" == RESTORE ] || error "other_request_mask is '$other_save' expected 'RESTORE'" - trap cleanup_test_24c EXIT - # User. create_small_file $file - chown $RUNAS_ID:nobody $file || - error "cannot chown '$file' to '$RUNAS_ID:nobody'" + chown $RUNAS_ID:$GROUP $file || + error "cannot chown '$file' to '$RUNAS_ID:$GROUP'" $RUNAS $LFS hsm_$action $file && error "$action by user should fail" @@ -2221,8 +1841,8 @@ test_24c() { # Other. create_small_file $file - chown nobody:nobody $file || - error "cannot chown '$file' to 'nobody:nobody'" + chown nobody:$GROUP $file || + error "cannot chown '$file' to 'nobody:$GROUP'" $RUNAS $LFS hsm_$action $file && error "$action by other should fail" @@ -2230,17 +1850,9 @@ test_24c() { set_hsm_param other_request_mask $action $RUNAS $LFS hsm_$action $file || error "$action by other should succeed" - - copytool_cleanup - cleanup_test_24c } run_test 24c "check that user,group,other request masks work" -cleanup_test_24d() { - mount -o remount,rw $MOUNT2 - zconf_umount $(facet_host $SINGLEAGT) "$MOUNT3" -} - test_24d() { local file1=$DIR/$tdir/$tfile local file2=$DIR2/$tdir/$tfile @@ -2252,12 +1864,13 @@ test_24d() { echo $fid1 $LFS getstripe $file1 - trap cleanup_test_24d EXIT - zconf_mount $(facet_host $SINGLEAGT) "$MOUNT3" || + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT + zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" || error "cannot mount '$MOUNT3' on '$SINGLEAGT'" - copytool_setup $SINGLEAGT "$MOUNT3" || - error "unable to setup a copytool for the test" + copytool setup -m "$MOUNT3" + + stack_trap "mount -o remount,rw \"$MOUNT2\"" EXIT mount -o remount,ro $MOUNT2 do_nodes $(comma_list $(nodes_list)) $LCTL clear @@ -2288,7 +1901,7 @@ test_24d() { run_test 24d "check that read-only mounts are respected" test_24e() { - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile local fid @@ -2302,22 +1915,19 @@ test_24e() { done tar -cf $TMP/$tfile.tar $DIR/$tdir || error "cannot tar $DIR/$tdir" - - copytool_cleanup } run_test 24e "tar succeeds on HSM released files" # LU-6213 test_24f() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) sum0=$(md5sum $f) echo $sum0 - $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f || + $LFS hsm_archive $f || error "hsm_archive failed" wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f || error "cannot release $f" @@ -2329,21 +1939,46 @@ test_24f() { sum1=$(md5sum $f) echo "Sum0 = $sum0, sum1 = $sum1" [ "$sum0" == "$sum1" ] || error "md5sum mismatch for '$tfile'" - - copytool_cleanup } run_test 24f "root can archive, release, and restore tar files" +test_24g() { + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local file=$DIR/$tdir/$tfile + local fid + + echo "RUNAS = '$RUNAS'" + + copytool setup + + mkdir -p $DIR/$tdir + chmod ugo+rwx $DIR/$tdir + + echo "Please listen carefully as our options have changed." | tee $file + fid=$(path2fid $file) + chmod ugo+rw $file + + $LFS hsm_archive $file + wait_request_state $fid ARCHIVE SUCCEED + check_hsm_flags $file 0x00000009 # exists archived + + echo "To be electrocuted by your telephone, press #." | $RUNAS tee $file + check_hsm_flags $file 0x0000000b # exists dirty archived +} +run_test 24g "write by non-owner still sets dirty" # LU-11369 + test_25a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f $LFS hsm_set --lost $f @@ -2351,15 +1986,13 @@ test_25a() { local st=$? [[ $st == 1 ]] || error "lost file access should failed (returns $st)" - - copytool_cleanup } run_test 25a "Restore lost file (HS_LOST flag) from import"\ " (Operation not permitted)" test_25b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -2375,20 +2008,16 @@ test_25b() { st=$? [[ $st == 1 ]] || error "lost file access should failed (returns $st)" - - copytool_cleanup } run_test 25b "Restore lost file (HS_LOST flag) after release"\ " (Operation not permitted)" -test_26() { +test_26A() { # was test_26 # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2397,25 +2026,15 @@ test_26() { wait_request_state $fid REMOVE SUCCEED check_hsm_flags $f "0x00000000" - - copytool_cleanup -} -run_test 26 "Remove the archive of a valid file" - -cleanup_test_26a() { - trap 0 - set_hsm_param remove_archive_on_last_unlink 0 - set_hsm_param loop_period $orig_loop_period - set_hsm_param grace_delay $orig_grace_delay - copytool_cleanup } +run_test 26A "Remove the archive of a valid file" test_26a() { local raolu=$(get_hsm_param remove_archive_on_last_unlink) [[ $raolu -eq 0 ]] || error "RAoLU policy should be off" # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -2436,16 +2055,17 @@ test_26a() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f3 wait_request_state $fid3 ARCHIVE SUCCEED - trap cleanup_test_26a EXIT - # set a long grace_delay vs short loop_period local orig_loop_period=$(get_hsm_param loop_period) local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT set_hsm_param grace_delay 100 rm -f $f + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT set_hsm_param remove_archive_on_last_unlink 1 ln "$f3" "$f3"_bis || error "Unable to create hard-link" @@ -2453,29 +2073,18 @@ test_26a() { rm -f $f2 - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid2 REMOVE SUCCEED assert_request_count $fid REMOVE 0 \ "Unexpected archived data remove request for $f" assert_request_count $fid3 REMOVE 0 \ "Unexpected archived data remove request for $f3" - - cleanup_test_26a } run_test 26a "Remove Archive On Last Unlink (RAoLU) policy" -cleanup_test_26b() { - trap 0 - set_hsm_param remove_archive_on_last_unlink 0 - copytool_cleanup -} - test_26b() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -2484,8 +2093,7 @@ test_26b() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - trap cleanup_test_26b EXIT - + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT set_hsm_param remove_archive_on_last_unlink 1 cdt_shutdown @@ -2493,34 +2101,22 @@ test_26b() { rm -f $f - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid REMOVE WAITING cdt_enable + # copytool must re-register kill_copytools wait_copytools || error "copytool failed to stop" - HSM_ARCHIVE_PURGE=false copytool_setup + copytool setup wait_request_state $fid REMOVE SUCCEED - - cleanup_test_26b } run_test 26b "RAoLU policy when CDT off" -cleanup_test_26c() { - trap 0 - set_hsm_param remove_archive_on_last_unlink 0 - set_hsm_param loop_period $orig_loop_period - set_hsm_param grace_delay $orig_grace_delay - copytool_cleanup -} - test_26c() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -2535,14 +2131,15 @@ test_26c() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2 wait_request_state $fid2 ARCHIVE SUCCEED - trap cleanup_test_26c EXIT - # set a long grace_delay vs short loop_period local orig_loop_period=$(get_hsm_param loop_period) local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT set_hsm_param grace_delay 100 + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT set_hsm_param remove_archive_on_last_unlink 1 multiop_bg_pause $f O_c || error "open $f failed" @@ -2558,44 +2155,32 @@ test_26c() { kill -USR1 $pid || error "multiop early exit" # should reach autotest timeout if multiop fails to trap # signal, close file, and exit ... - wait $pid || error - - set_hsm_param remove_archive_on_last_unlink 0 + wait $pid || error "wait PID $PID failed" wait_request_state $fid REMOVE SUCCEED - - cleanup_test_26c } run_test 26c "RAoLU effective when file closed" -cleanup_test_26d() { - trap 0 - set_hsm_param remove_archive_on_last_unlink 0 - set_hsm_param loop_period $orig_loop_period - set_hsm_param grace_delay $orig_grace_delay - copytool_cleanup -} - test_26d() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED - trap cleanup_test_26d EXIT - # set a long grace_delay vs short loop_period local orig_loop_period=$(get_hsm_param loop_period) local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT set_hsm_param grace_delay 100 + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT set_hsm_param remove_archive_on_last_unlink 1 multiop_bg_pause $f O_c || error "multiop failed" @@ -2605,44 +2190,36 @@ test_26d() { mds_evict_client - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid REMOVE SUCCEED client_up || client_up || true kill -USR1 $MULTIPID wait $MULTIPID || error "multiop close failed" - - cleanup_test_26d } run_test 26d "RAoLU when Client eviction" test_27a() { # test needs a running copytool - copytool_setup + copytool setup create_archive_file $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) $LFS hsm_remove $f [[ $? != 0 ]] || error "Remove of a released file should fail" - - copytool_cleanup } run_test 27a "Remove the archive of an imported file (Operation not permitted)" test_27b() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2651,19 +2228,15 @@ test_27b() { $LFS hsm_remove $f [[ $? != 0 ]] || error "Remove of a released file should fail" - - copytool_cleanup } run_test 27b "Remove the archive of a relased file (Operation not permitted)" test_28() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2676,8 +2249,6 @@ test_28() { cdt_enable wait_request_state $fid REMOVE SUCCEED - - copytool_cleanup } run_test 28 "Concurrent archive/file remove" @@ -2685,25 +2256,23 @@ test_29a() { # Tests --mntpath and --archive options local archive_id=7 - copytool_setup $SINGLEAGT $MOUNT $archive_id + copytool setup -m "$MOUNT" -a $archive_id # Bad archive number - $LFS hsm_remove -m $MOUNT -a 33 0x857765760:0x8:0x2 2>&1 | + $LFS hsm_remove -m "$MOUNT" -a 33 0x857765760:0x8:0x2 2>&1 | grep "Invalid argument" || error "unexpected hsm_remove failure (1)" # mntpath is present but file is given - $LFS hsm_remove --mntpath $MOUNT --archive 30 /qwerty/uyt 2>&1 | + $LFS hsm_remove --mntpath "$MOUNT" --archive 30 /qwerty/uyt 2>&1 | grep "hsm: '/qwerty/uyt' is not a valid FID" || error "unexpected hsm_remove failure (2)" - - copytool_cleanup } run_test 29a "Tests --mntpath and --archive options" test_29b() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) @@ -2715,14 +2284,12 @@ test_29b() { $LFS hsm_remove -m $MOUNT -a $HSM_ARCHIVE_NUMBER $fid wait_request_state $fid REMOVE SUCCEED - - copytool_cleanup } run_test 29b "Archive/delete/remove by FID from the archive." test_29c() { # test needs a running copytool - copytool_setup + copytool setup local fid1=$(create_small_file $DIR/$tdir/$tfile-1) local fid2=$(create_small_file $DIR/$tdir/$tfile-2) @@ -2744,8 +2311,6 @@ test_29c() { wait_request_state $fid1 REMOVE SUCCEED wait_request_state $fid2 REMOVE SUCCEED wait_request_state $fid3 REMOVE SUCCEED - - copytool_cleanup } run_test 29c "Archive/delete/remove by FID, using a file list." @@ -2757,14 +2322,11 @@ test_29d() { local file local fid - copytool_cleanup $(comma_list $(agts_nodes)) - # start all of the copytools for n in $(seq $AGTCOUNT); do - copytool_setup agt$n $MOUNT2 $n + copytool setup -f agt$n -a $n done - trap "copytool_cleanup $(comma_list $(agts_nodes))" EXIT # archive files file=$DIR/$tdir/$tfile fid=$(create_small_file $file) @@ -2807,15 +2369,11 @@ test_29d() { fi done - [[ $scnt -ne 1 ]] && + [[ $scnt -eq 1 ]] || error "one and only CT should have removed successfully" - [[ $AGTCOUNT -ne $((scnt + fcnt)) ]] && + [[ $AGTCOUNT -eq $((scnt + fcnt)) ]] || error "all but one CT should have failed to remove" - - trap - EXIT - copytool_cleanup $(comma_list $(agts_nodes)) - } run_test 29d "hsm_remove by FID with archive_id 0 for unlinked file cause "\ "request to be sent once for each registered archive_id" @@ -2826,29 +2384,25 @@ test_30a() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /bin/true $tdir/$tfile local f=$DIR/$tdir/true - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) + stack_trap "cdt_clear_no_retry" EXIT # set no retry action mode cdt_set_no_retry do_node $CLIENT2 $f local st=$? - # cleanup - # remove no try action mode - cdt_clear_no_retry $LFS hsm_state $f [[ $st == 0 ]] || error "Failed to exec a released file" - - copytool_cleanup } run_test 30a "Restore at exec (import case)" @@ -2858,7 +2412,7 @@ test_30b() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/true @@ -2868,19 +2422,17 @@ test_30b() { wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f $LFS hsm_state $f + + stack_trap cdt_clear_no_retry EXIT # set no retry action mode cdt_set_no_retry + do_node $CLIENT2 $f local st=$? - # cleanup - # remove no try action mode - cdt_clear_no_retry $LFS hsm_state $f [[ $st == 0 ]] || error "Failed to exec a released file" - - copytool_cleanup } run_test 30b "Restore at exec (release case)" @@ -2888,7 +2440,7 @@ test_30c() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/SLEEP @@ -2899,8 +2451,11 @@ test_30c() { wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f check_hsm_flags $f "0x0000000d" + + stack_trap cdt_clear_no_retry EXIT # set no retry action mode cdt_set_no_retry + do_node $CLIENT2 "$f 10" & local pid=$! sleep 3 @@ -2916,12 +2471,7 @@ test_30c() { error "Binary overwritten during exec" fi - # cleanup - # remove no try action mode - cdt_clear_no_retry check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 30c "Update during exec of released file must fail" @@ -2959,32 +2509,28 @@ restore_and_check_size() { test_31a() { # test needs a running copytool - copytool_setup + copytool setup create_archive_file $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$($LFS path2fid $f) - HSM_ARCHIVE_PURGE=false copytool_setup + copytool setup restore_and_check_size $f $fid local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31a "Import a large file and check size during restore" test_31b() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2994,19 +2540,15 @@ test_31b() { local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31b "Restore a large unaligned file and check size during restore" test_31c() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 33 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -3016,182 +2558,142 @@ test_31c() { local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31c "Restore a large aligned file and check size during restore" test_33() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f - # to be sure wait_all_done will not be mislead by previous tests - # and ops. - cdt_purge - wait_for_grace_delay - # Also raise grace_delay significantly so the Canceled - # Restore action will stay enough long avail. - local old_grace=$(get_hsm_param grace_delay) - set_hsm_param grace_delay 100 + # Prevent restore from completing + copytool_suspend + # Implicit restore md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED + wait_request_state $fid RESTORE STARTED kill -15 $pid - sleep 1 - - # Check restore trigger process was killed - local killed=$(ps -o pid,comm hp $pid >/dev/null) - - $LFS hsm_cancel $f - - # instead of waiting+checking both Restore and Cancel ops - # sequentially, wait for both to be finished and then check - # each results. - wait_all_done 100 $fid - local rstate=$(get_request_state $fid RESTORE) - local cstate=$(get_request_state $fid CANCEL) - - # restore orig grace_delay. - set_hsm_param grace_delay $old_grace - - if [[ "$rstate" == "CANCELED" ]] ; then - [[ "$cstate" == "SUCCEED" ]] || - error "Restore state is CANCELED and Cancel state " \ - "is not SUCCEED but $cstate" - echo "Restore state is CANCELED, Cancel state is SUCCEED" - elif [[ "$rstate" == "SUCCEED" ]] ; then - [[ "$cstate" == "FAILED" ]] || - error "Restore state is SUCCEED and Cancel state " \ - "is not FAILED but $cstate" - echo "Restore state is SUCCEED, Cancel state is FAILED" - else - error "Restore state is $rstate and Cancel state is $cstate" - fi - [ -z $killed ] || - error "Cannot kill process waiting for restore ($killed)" + copytool_continue - copytool_cleanup + # Check restore trigger process was killed + wait $pid + [ $? -eq 143 ] || error "md5sum was not 'Terminated'" } run_test 33 "Kill a restore waiting process" test_34() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - rm $f || error "rm $f failed" # rm must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 rm "$f" || error "rm $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" wait $pid || error "Restore initiator failed with $?" - copytool_cleanup + # Check the file was actually deleted + [ ! -f "$f" ] || error "$f was not deleted" } run_test 34 "Remove file during restore" test_35() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile local f1=$DIR/$tdir/$tfile-1 - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return - + local fid=$(create_empty_file "$f") local fid1=$(copy_file /etc/passwd $f1) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - mv $f1 $f || error "mv $f1 $f failed" # mv must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 mv "$f1" "$f" || error "mv $f1 $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" wait $pid || error "Restore initiator failed with $?" - fid2=$(path2fid $f) + local fid2=$(path2fid $f) [[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1" - - copytool_cleanup } run_test 35 "Overwrite file during restore" test_36() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED - mv $f $f.new - # rm must not block during restore wait_request_state $fid RESTORE STARTED + # mv must not block during restore + timeout --signal=KILL 10 mv "$f" "$f.new" || + error "mv '$f' '$f.new' failed with rc=$?" + + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || - error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiator is still running" wait $pid || error "Restore initiator failed with $?" - - copytool_cleanup } run_test 36 "Move file during restore" test_37() { # LU-5683: check that an archived dirty file can be rearchived. - copytool_cleanup - copytool_setup $SINGLEAGT $MOUNT2 + copytool setup local f=$DIR/$tdir/$tfile local fid @@ -3210,8 +2712,6 @@ test_37() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - - copytool_cleanup } run_test 37 "re-archive a dirty file" @@ -3226,12 +2726,6 @@ multi_archive() { echo "$count archive requests submitted" } -cleanup_test_40() { - trap 0 - set_hsm_param max_requests $max_requests - copytool_cleanup -} - test_40() { local stream_count=4 local file_count=100 @@ -3242,6 +2736,7 @@ test_40() { local fid="" local max_requests=$(get_hsm_param max_requests) + stack_trap "set_hsm_param max_requests $max_requests" EXIT # Increase the number of HSM request that can be performed in # parallel. With the coordinator running once per second, this # also limits the number of requests per seconds that can be @@ -3250,20 +2745,14 @@ test_40() { # fail some requests if if gets too many at once. set_hsm_param max_requests 300 - trap cleanup_test_40 EXIT - for i in $(seq 1 $file_count); do for p in $(seq 1 $stream_count); do fid=$(copy_file /etc/hosts $f.$p.$i) done done - # force copytool to use a local/temp archive dir to ensure best - # performance vs remote/NFS mounts used in auto-tests - if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then - copytool_setup - else - copytool_setup $SINGLEAGT $MOUNT $HSM_ARCHIVE_NUMBER $TMP/$tdir - fi + + copytool setup + # to be sure wait_all_done will not be mislead by previous tests cdt_purge wait_for_grace_delay @@ -3277,18 +2766,16 @@ test_40() { wait ${pids[*]} echo OK wait_all_done 100 - - cleanup_test_40 } run_test 40 "Parallel archive requests" test_52() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED @@ -3304,18 +2791,16 @@ test_52() { wait $MULTIPID || error "multiop close failed" check_hsm_flags $f "0x0000000b" - - copytool_cleanup } run_test 52 "Opened for write file on an evicted client should be set dirty" test_53() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3332,17 +2817,14 @@ test_53() { wait $MULTIPID || error "multiop close failed" check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 53 "Opened for read file on an evicted client should not be set dirty" test_54() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 39 1000000) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3350,6 +2832,7 @@ test_54() { check_hsm_flags $f "0x00000001" + stack_trap "cdt_clear_no_retry" EXIT # Avoid coordinator resending this request as soon it has failed. cdt_set_no_retry @@ -3358,18 +2841,14 @@ test_54() { wait_request_state $fid ARCHIVE FAILED check_hsm_flags $f "0x00000003" - - cdt_clear_no_retry - copytool_cleanup } run_test 54 "Write during an archive cancels it" test_55() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 39 1000000) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3377,6 +2856,7 @@ test_55() { check_hsm_flags $f "0x00000001" + stack_trap "cdt_clear_no_retry" EXIT # Avoid coordinator resending this request as soon it has failed. cdt_set_no_retry @@ -3385,20 +2865,14 @@ test_55() { wait_request_state $fid ARCHIVE FAILED check_hsm_flags $f "0x00000003" - - cdt_clear_no_retry - copytool_cleanup } run_test 55 "Truncate during an archive cancels it" test_56() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3414,8 +2888,6 @@ test_56() { wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 56 "Setattr during an archive is ok" @@ -3424,7 +2896,7 @@ test_57() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/test_archive_remote @@ -3445,8 +2917,6 @@ test_57() { error "hsm_restore failed" wait_request_state $fid RESTORE SUCCEED - - copytool_cleanup } run_test 57 "Archive a file with dirty cache on another node" @@ -3488,7 +2958,7 @@ truncate_released_file() { test_58() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -3502,25 +2972,21 @@ test_58() { echo "truncate to 0" truncate_released_file /etc/passwd 0 - - copytool_cleanup } run_test 58 "Truncate a released file will trigger restore" test_59() { local fid - local server_version=$(lustre_version_code $SINGLEMDS) - [[ $server_version -lt $(version_code 2.7.63) ]] && - skip "Need MDS version at least 2.7.63" && return + [[ $MDS1_VERSION -lt $(version_code 2.7.63) ]] && + skip "Need MDS version at least 2.7.63" - copytool_setup + copytool setup $MCREATE $DIR/$tfile || error "mcreate failed" $TRUNCATE $DIR/$tfile 42 || error "truncate failed" $LFS hsm_archive $DIR/$tfile || error "archive request failed" fid=$(path2fid $DIR/$tfile) wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $DIR/$tfile || error "release failed" - copytool_cleanup } run_test 59 "Release stripeless file with non-zero size" @@ -3528,17 +2994,12 @@ test_60() { # This test validates the fix for LU-4512. Ensure that the -u # option changes the progress reporting interval from the # default (30 seconds) to the user-specified interval. + local f=$DIR/$tdir/$tfile + local fid=$(create_file "$f" 1M 10) + local interval=5 local progress_timeout=$((interval * 4)) - - # test needs a new running copytool - copytool_cleanup - HSMTOOL_UPDATE_INTERVAL=$interval copytool_setup - - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 10) - [ $? != 0 ] && skip "not enough free space" && return + copytool setup -b 1 --update-interval $interval local mdtidx=0 local mdt=${MDT_PREFIX}${mdtidx} @@ -3554,13 +3015,10 @@ test_60() { error "could not archive file" local agent=$(facet_active_host $SINGLEAGT) - local prefix=$TESTLOG_PREFIX - [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME - local copytool_log=$prefix.copytool_log.$agent.log - + local logfile=$(copytool_logfile $SINGLEAGT) wait_update $agent \ - "grep -o start.copy $copytool_log" "start copy" 100 || + "grep -o start.copy \"$logfile\"" "start copy" 100 || error "copytool failed to start" local cmd="$LCTL get_param -n ${mdt}.hsm.active_requests" @@ -3573,7 +3031,7 @@ test_60() { echo -n "Expecting a progress update within $progress_timeout seconds... " while [ true ]; do RESULT=$(do_node $(facet_active_host $mds) "$cmd") - if [ $RESULT -gt 0 ]; then + if [ -n "$RESULT" ] && [ "$RESULT" -gt 0 ]; then echo "$RESULT bytes copied in $WAIT seconds." break elif [ $WAIT -ge $progress_timeout ]; then @@ -3593,17 +3051,14 @@ test_60() { fi echo "Wait for on going archive hsm action to complete" - wait_update $agent "grep -o copied $copytool_log" "copied" 10 || + wait_update $agent "grep -o copied \"$logfile\"" "copied" 10 || echo "File archiving not completed even after 10 secs" - - cdt_clear_no_retry - copytool_cleanup } run_test 60 "Changing progress update interval from default" test_61() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -3613,19 +3068,14 @@ test_61() { rm -f $f cdt_enable wait_request_state $fid ARCHIVE FAILED - - copytool_cleanup } run_test 61 "Waiting archive of a removed file should fail" test_70() { # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup - - # Just start and stop the copytool to generate events. - cdt_clear_no_retry + copytool setup --event-fifo "$HSMTOOL_MONITOR_DIR/fifo" # Wait for the copytool to register. wait_update --verbose $(facet_active_host mds1) \ @@ -3633,7 +3083,8 @@ test_70() { uuid 100 || error "copytool failed to register with MDT0000" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytools failed to stop" local REGISTER_EVENT local UNREGISTER_EVENT @@ -3659,7 +3110,6 @@ test_70() { error "Copytool failed to send unregister event to FIFO" fi - copytool_monitor_cleanup echo "Register/Unregister events look OK." } run_test 70 "Copytool logs JSON register/unregister events to FIFO" @@ -3669,15 +3119,17 @@ test_71() { local interval=5 # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_UPDATE_INTERVAL=$interval \ - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup + copytool setup --update-interval $interval --event-fifo \ + "$HSMTOOL_MONITOR_DIR/fifo" + + stack_trap "cdt_clear_no_retry" EXIT + # Just start and stop the copytool to generate events. + cdt_clear_no_retry local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_small_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3686,8 +3138,11 @@ test_71() { local expected_fields="event_time data_fid source_fid" expected_fields+=" total_bytes current_bytes" - local START_EVENT - local FINISH_EVENT + local -A events=( + [ARCHIVE_START]=false + [ARCHIVE_FINISH]=false + [ARCHIVE_RUNNING]=false + ) while read event; do # Make sure we're not getting anything from previous events. for field in $expected_fields; do @@ -3700,15 +3155,9 @@ test_71() { fi eval $parsed - if [ $event_type == "ARCHIVE_START" ]; then - START_EVENT=$event - continue - elif [ $event_type == "ARCHIVE_FINISH" ]; then - FINISH_EVENT=$event - continue - elif [ $event_type != "ARCHIVE_RUNNING" ]; then - continue - fi + events["$event_type"]=true + + [ "$event_type" != ARCHIVE_RUNNING ] && continue # Do some simple checking of the progress update events. for expected_field in $expected_fields; do @@ -3717,30 +3166,20 @@ test_71() { fi done - if [ $total_bytes -eq 0 ]; then - error "Expected total_bytes to be > 0" - fi + [ $total_bytes -gt 0 ] || error "Expected total_bytes to be > 0" - # These should be identical throughout an archive - # operation. - if [ $source_fid != $data_fid ]; then + # These should be identical throughout an archive operation + [ $source_fid == $data_fid ] || error "Expected source_fid to equal data_fid" - fi done < <(echo $"$(get_copytool_event_log)") - if [ -z "$START_EVENT" ]; then - error "Copytool failed to send archive start event to FIFO" - fi - - if [ -z "$FINISH_EVENT" ]; then - error "Copytool failed to send archive finish event to FIFO" - fi + # Check we received every type of events we were expecting + for event in "${!events[@]}"; do + ${events["$event"]} || + error "Copytool failed to send '$event' event to FIFO" + done echo "Archive events look OK." - - cdt_clear_no_retry - copytool_cleanup - copytool_monitor_cleanup } run_test 71 "Copytool logs JSON archive events to FIFO" @@ -3749,10 +3188,10 @@ test_72() { local interval=5 # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_UPDATE_INTERVAL=$interval \ - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup + copytool setup --update-interval $interval --event-fifo \ + "$HSMTOOL_MONITOR_DIR/fifo" local test_file=$HSMTOOL_MONITOR_DIR/file local cmd="dd if=/dev/urandom of=$test_file count=16 bs=1000000 " @@ -3763,7 +3202,7 @@ test_72() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f f=$DIR2/$tdir/$tfile echo "Verifying released state: " check_hsm_flags $f "0x0000000d" @@ -3838,12 +3277,6 @@ test_72() { fi echo "Restore events look OK." - - cdt_clear_no_retry - copytool_cleanup - copytool_monitor_cleanup - - rm -rf $test_dir } run_test 72 "Copytool logs JSON restore events to FIFO" @@ -3859,14 +3292,8 @@ test_90() { fid=$(copy_file /etc/hosts $f.$i) echo $f.$i >> $FILELIST done - # force copytool to use a local/temp archive dir to ensure best - # performance vs remote/NFS mounts used in auto-tests - if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then - copytool_setup - else - local dai=$(get_hsm_param default_archive_id) - copytool_setup $SINGLEAGT $MOUNT $dai $TMP/$tdir - fi + + copytool setup # to be sure wait_all_done will not be mislead by previous tests cdt_purge wait_for_grace_delay @@ -3878,7 +3305,6 @@ test_90() { $LFS hsm_restore --filelist $FILELIST || error "cannot restore a file list" wait_all_done 100 - copytool_cleanup } run_test 90 "Archive/restore a file list" @@ -3921,7 +3347,7 @@ run_test 102 "Verify coordinator control" test_103() { # test needs a running copytool - copytool_setup + copytool setup local i="" local fid="" @@ -3940,19 +3366,14 @@ test_103() { grep -v CANCELED | grep -v SUCCEED | grep -v FAILED") [[ -z "$res" ]] || error "Some request have not been canceled" - - copytool_cleanup } run_test 103 "Purge all requests" DATA=CEA DATAHEX='[434541]' test_104() { - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ @@ -3962,30 +3383,18 @@ test_104() { [[ "$data1" == "$DATAHEX" ]] || error "Data field in records is ($data1) and not ($DATAHEX)" - # archive the file - copytool_setup - - wait_request_state $fid ARCHIVE SUCCEED - - copytool_cleanup + cdt_purge } run_test 104 "Copy tool data field" -cleanup_test_105() { - trap 0 - set_hsm_param max_requests $max_requests - copytool_cleanup -} - test_105() { local max_requests=$(get_hsm_param max_requests) mkdir -p $DIR/$tdir local i="" + stack_trap "set_hsm_param max_requests $max_requests" EXIT set_hsm_param max_requests 300 - trap cleanup_test_105 EXIT - cdt_disable for i in $(seq -w 1 10); do cp /etc/passwd $DIR/$tdir/$i @@ -4005,14 +3414,12 @@ test_105() { [[ "$reqcnt1" == "$reqcnt2" ]] || error "Requests count after shutdown $reqcnt2 != "\ "before shutdown $reqcnt1" - - cleanup_test_105 } run_test 105 "Restart of coordinator" test_106() { # test needs a running copytool - copytool_setup + copytool setup local uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) @@ -4020,20 +3427,22 @@ test_106() { search_copytools || error "No copytool found" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytool failed to stop" + check_agent_unregistered $uuid - copytool_setup + copytool setup uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) check_agent_registered $uuid - - copytool_cleanup } run_test 106 "Copytool register/unregister" test_107() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + # test needs a running copytool - copytool_setup + copytool setup # create and archive file mkdir -p $DIR/$tdir local f1=$DIR/$tdir/$tfile @@ -4048,7 +3457,6 @@ test_107() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2 # main check of this sanity: this request MUST succeed wait_request_state $fid ARCHIVE SUCCEED - copytool_cleanup } run_test 107 "Copytool re-register after MDS restart" @@ -4089,14 +3497,14 @@ run_test 109 "Policy display/change" test_110a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) cdt_set_non_blocking_restore @@ -4111,14 +3519,12 @@ test_110a() { [[ $st == 1 ]] || error "md5sum returns $st != 1, "\ "should also perror ENODATA (No data available)" - - copytool_cleanup } run_test 110a "Non blocking restore policy (import case)" test_110b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -4139,21 +3545,19 @@ test_110b() { [[ $st == 1 ]] || error "md5sum returns $st != 1, "\ "should also perror ENODATA (No data available)" - - copytool_cleanup } run_test 110b "Non blocking restore policy (release case)" test_111a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) cdt_set_no_retry @@ -4169,19 +3573,18 @@ test_111a() { # Test result [[ $st == 0 ]] || error "Restore does not failed" - - copytool_cleanup } run_test 111a "No retry policy (import case), restore will error"\ " (No such file or directory)" test_111b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) + stack_trap cdt_clear_no_retry EXIT cdt_set_no_retry $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -4193,20 +3596,15 @@ test_111b() { wait_request_state $fid RESTORE FAILED local st=$? - # cleanup - cdt_clear_no_retry - # Test result [[ $st == 0 ]] || error "Restore does not failed" - - copytool_cleanup } run_test 111b "No retry policy (release case), restore will error"\ " (No such file or directory)" test_112() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -4217,47 +3615,80 @@ test_112() { echo $l local res=$(echo $l | cut -f 2- -d" " | grep ARCHIVE) - # cleanup cdt_enable wait_request_state $fid ARCHIVE SUCCEED # Test result [[ ! -z "$res" ]] || error "action is $l which is not an ARCHIVE" - - copytool_cleanup } run_test 112 "State of recorded request" -test_200() { - # test needs a running copytool - copytool_setup +test_113() { + local file1=$DIR/$tdir/$tfile + local file2=$DIR2/$tdir/$tfile + + local fid=$(create_small_sync_file $file1) + + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT + zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" || + error "cannot mount '$MOUNT3' on '$SINGLEAGT'" + + copytool setup -m "$MOUNT3" + + do_nodes $(comma_list $(nodes_list)) $LCTL clear + + $LFS hsm_archive $file1 || error "Fail to archive $file1" + wait_request_state $fid ARCHIVE SUCCEED + + $LFS hsm_release $file1 + echo "Verifying released state: " + check_hsm_flags $file1 "0x0000000d" + + multiop_bg_pause $file1 oO_WRONLY:O_APPEND:_w4c || error "multiop failed" + MULTIPID=$! + stat $file2 & + kill -USR1 $MULTIPID + + wait + sync + local size1=$(stat -c "%s" $file1) + local size2=$(stat -c "%s" $file2) + + [ $size1 -eq $size2 ] || error "sizes are different $size1 $size2" +} +run_test 113 "wrong stat after restore" + +test_200() { local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup + + # Prevent archive from completing + copytool_suspend - # test with cdt on is made in test_221 - cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f # wait archive to register at CDT - wait_request_state $fid ARCHIVE WAITING - $LFS hsm_cancel $f - cdt_enable + wait_request_state $fid ARCHIVE STARTED + + # Cancel the archive + $LFS hsm_cancel "$f" + wait_request_state $fid ARCHIVE CANCELED - wait_request_state $fid CANCEL SUCCEED - copytool_cleanup + copytool_continue + wait_request_state $fid CANCEL SUCCEED } run_test 200 "Register/Cancel archive" test_201() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile create_archive_file $tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) # test with cdt on is made in test_222 @@ -4269,69 +3700,58 @@ test_201() { cdt_enable wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - - copytool_cleanup } run_test 201 "Register/Cancel restore" test_202() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + # test needs a running copytool + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - cdt_disable + copytool_suspend $LFS hsm_remove $f # wait remove to register at CDT - wait_request_state $fid REMOVE WAITING + wait_request_state $fid REMOVE STARTED $LFS hsm_cancel $f - cdt_enable - wait_request_state $fid REMOVE CANCELED - copytool_cleanup + wait_request_state $fid REMOVE CANCELED } run_test 202 "Register/Cancel remove" -test_220() { +test_220A() { # was test_220 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - changelog_cleanup - - local target=0x0 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0x0 || + error "The expected changelog was not emitted" } -run_test 220 "Changelog for archive" +run_test 220A "Changelog for archive" test_220a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register # block copytool operations to allow for HSM request to be # submitted and file be unlinked (CDT will find object removed) @@ -4348,77 +3768,65 @@ test_220a() { wait_request_state $fid ARCHIVE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - changelog_cleanup - # HE_ARCHIVE|ENOENT - local target=0x2 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0x2 || + error "The expected changelog was not emitted" } run_test 220a "Changelog for failed archive" test_221() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") - changelog_setup + copytool setup -b 1 + changelog_register + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED + $LFS hsm_cancel $f wait_request_state $fid ARCHIVE CANCELED - wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x7d - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x7d || + error "The expected changelog was not emitted" } run_test 221 "Changelog for archive canceled" test_222a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) - changelog_setup + changelog_register $LFS hsm_restore $f wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x80 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x80 || + error "The expected changelog was not emitted" } run_test 222a "Changelog for explicit restore" test_222b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -4427,27 +3835,23 @@ test_222b() { wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x80 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x80 || + error "The expected changelog was not emitted" } run_test 222b "Changelog for implicit restore" test_222c() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) - changelog_setup + changelog_register # block copytool operations to allow for HSM request to be # submitted and file be unlinked (CDT will find object removed) @@ -4464,25 +3868,21 @@ test_222c() { wait_request_state $fid RESTORE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - # HE_RESTORE|ENOENT - local target=0x82 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x82 || + error "The expected changelog was not emitted" } run_test 222c "Changelog for failed explicit restore" test_222d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -4492,26 +3892,22 @@ test_222d() { wait_request_state $fid RESTORE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - # HE_RESTORE|ENOENT - local target=0x82 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x82 || + error "The expected changelog was not emitted" } run_test 222d "Changelog for failed implicit restore" test_223a() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile create_archive_file $tdir/$tfile - changelog_setup + changelog_register - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) $LFS hsm_restore $f @@ -4520,81 +3916,69 @@ test_223a() { wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0xfd - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0xfd || + error "The expected changelog was not emitted" } run_test 223a "Changelog for restore canceled (import case)" test_223b() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup -b 1 + changelog_register - changelog_setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + + # Prevent restore from completing + copytool_suspend $LFS hsm_restore $f wait_request_state $fid RESTORE STARTED + $LFS hsm_cancel $f wait_request_state $fid RESTORE CANCELED - wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0xfd - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - cleanup + changelog_find -type HSM -target-fid $fid -flags 0xfd || + error "The expected changelog was not emitted" } run_test 223b "Changelog for restore canceled (release case)" -test_224() { +test_224A() { # was test_224 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_remove $f wait_request_state $fid REMOVE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1) - - local target=0x200 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x200 || + error "The expected changelog was not emitted" } -run_test 224 "Changelog for remove" +run_test 224A "Changelog for remove" test_224a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -4615,59 +3999,46 @@ test_224a() { wait_request_state $fid REMOVE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1) - - # HE_REMOVE|ENOENT - local target=0x202 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + # HE_REMOVE|ENOENT=0x202 + changelog_find -type HSM -target-fid $fid -flags 0x202 || + error "The expected changelog was not emitted" } run_test 224a "Changelog for failed remove" test_225() { - # test needs a running copytool - copytool_setup - # test is not usable because remove request is too fast # so it is always finished before cancel can be done ... echo "Test disabled" - copytool_cleanup return 0 + # test needs a running copytool + copytool setup + local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - # if cdt is on, it can serve too quickly the request - cdt_disable + # Prevent restore from completing + copytool_suspend $LFS hsm_remove $f + $LFS hsm_cancel $f - cdt_enable wait_request_state $fid REMOVE CANCELED - wait_request_state $fid CANCEL SUCCEED - - flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) - local flags=$($LFS changelog ${MDT[0]} | grep HSM | grep $fid | - tail -n 1 | awk '{print $5}') - local target=0x27d - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x27d + error "The expected changelog was not emitted" } run_test 225 "Changelog for remove canceled" test_226() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -4678,7 +4049,7 @@ test_226() { local fid2=$(copy_file /etc/passwd $f2) copy_file /etc/passwd $f3 - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1 wait_request_state $fid1 ARCHIVE SUCCEED @@ -4687,84 +4058,63 @@ test_226() { rm $f1 || error "rm $f1 failed" - local flags=$(changelog_get_flags ${MDT[0]} UNLNK $fid1) - - local target=0x3 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + changelog_dump + changelog_find -type UNLNK -target-fid $fid1 -flags 0x3 || + error "The expected changelog was not emitted" mv $f3 $f2 || error "mv $f3 $f2 failed" - flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) - - target=0x3 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type RENME -target-fid $fid2 -flags 0x3 || + error "The expected changelog was not emitted" } run_test 226 "changelog for last rm/mv with exiting archive" -check_flags_changes() { - local f=$1 - local fid=$2 - local hsm_flag=$3 - local fst=$4 - local cnt=$5 - +# This is just a utility function to clarify what test_227 does +__test_227() +{ local target=0x280 - $LFS hsm_set --$hsm_flag $f || - error "Cannot set $hsm_flag on $f" - local flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) - local seen=${#flags[*]} - cnt=$((fst + cnt)) - [[ $seen == $cnt ]] || - error "set $hsm_flag: Changelog events $seen != $cnt" - [[ ${flags[$((cnt - 1))]} == $target ]] || - error "set $hsm_flag: Changelog flags are "\ - "${flags[$((cnt - 1))]} not $target" - - $LFS hsm_clear --$hsm_flag $f || - error "Cannot clear $hsm_flag on $f" - flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) - seen=${#flags[*]} - cnt=$(($cnt + 1)) - [[ $cnt == $seen ]] || - error "clear $hsm_flag: Changelog events $seen != $cnt" - - [[ ${flags[$((cnt - 1))]} == $target ]] || - error "clear $hsm_flag: Changelog flag is "\ - "${flags[$((cnt - 1))]} not $target" + + "$LFS" "$action" --$flag "$file" || + error "Cannot ${action#hsm_} $flag on '$file'" + + # Only one changelog should be produced + local entries="$(changelog_find -type HSM -target-fid $fid)" + [ $(wc -l <<< "$entries") -eq $((++count)) ] || + error "lfs $action --$flag '$file' produced more than one" \ + "changelog record" + + # Parse the last changelog record + local entry="$(tail -n 1 <<< "$entries")" + eval local -A changelog=$(changelog2array $entry) + + # Also check the flags match what is expected + [[ ${changelog[flags]} == $target ]] || + error "Changelog flag is '${changelog[flags]}', not $target" } test_227() { - # test needs a running copytool - copytool_setup - changelog_setup + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + local count=0 - mkdir -p $DIR/$tdir - typeset -a flags + changelog_register - for i in norelease noarchive exists archived - do - local f=$DIR/$tdir/$tfile-$i - local fid=$(copy_file /etc/passwd $f) - check_flags_changes $f $fid $i 0 1 - done - - f=$DIR/$tdir/$tfile---lost - fid=$(copy_file /etc/passwd $f) - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f - wait_request_state $fid ARCHIVE SUCCEED - check_flags_changes $f $fid lost 3 1 + for flag in norelease noarchive exists archived lost; do + if [ "$flag" == lost ]; then + # The flag "lost" only works on an archived file + "$LFS" hsm_set --archived "$file" + ((count++)) + fi - cleanup + action="hsm_set" __test_227 + action="hsm_clear" __test_227 + done } run_test 227 "changelog when explicit setting of HSM flags" test_228() { # test needs a running copytool - copytool_setup + copytool setup local fid=$(create_small_sync_file $DIR/$tfile) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile @@ -4794,124 +4144,128 @@ test_228() { rm -f $DIR/$tfile $DIR/$tfile.2 || error "rm $DIR/$tfile or $DIR/$tfile.2 failed" - copytool_cleanup } run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse" test_250() { - # test needs a running copytool - copytool_setup + local file="$DIR/$tdir/$tfile" + + # set max_requests to allow one request of each type to be started (3) + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + # speed up test + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + + # send 1 requests of each kind twice + copytool setup + # setup the files + for action in archive restore remove; do + local filepath="$file"-to-$action + local fid=$(create_empty_file "$filepath") + local fid2=$(create_empty_file "$filepath".bis) + + if [ "$action" != archive ]; then + "$LFS" hsm_archive "$filepath" + wait_request_state $fid ARCHIVE SUCCEED + "$LFS" hsm_archive "$filepath".bis + wait_request_state $fid2 ARCHIVE SUCCEED + fi + if [ "$action" == restore ]; then + "$LFS" hsm_release "$filepath" + "$LFS" hsm_release "$filepath".bis + fi + done - mkdir -p $DIR/$tdir - local maxrequest=$(get_hsm_param max_requests) - local rqcnt=$(($maxrequest * 3)) - local i="" + # suspend the copytool to prevent requests from completing + stack_trap "copytool_continue" EXIT + copytool_suspend - cdt_disable - for i in $(seq -w 1 $rqcnt); do - rm -f $DIR/$tdir/$i - dd if=/dev/urandom of=$DIR/$tdir/$i bs=1M count=10 conv=fsync - done - # we do it in 2 steps, so all requests arrive at the same time - for i in $(seq -w 1 $rqcnt); do - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$i + # send `max_requests' requests (one of each kind) + for action in archive restore remove; do + filepath="$file"-to-$action + "$LFS" hsm_${action} "$filepath" + wait_request_state $(path2fid "$filepath") "${action^^}" STARTED done - cdt_enable - local cnt=$rqcnt - local wt=$rqcnt - while [[ $cnt != 0 || $wt != 0 ]]; do - sleep 1 - cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.actions |\ - grep STARTED | grep -v CANCEL | wc -l") - [[ $cnt -le $maxrequest ]] || - error "$cnt > $maxrequest too many started requests" - wt=$(do_facet $SINGLEMDS "$LCTL get_param\ - $HSM_PARAM.actions |\ - grep WAITING | wc -l") - echo "max=$maxrequest started=$cnt waiting=$wt" + + # send another batch of requests + for action in archive restore remove; do + "$LFS" hsm_${action} "$file-to-$action".bis done + # wait for `loop_period' seconds to make sure the coordinator has time + # to register those, even though it should not + sleep 1 - copytool_cleanup + # only the first batch of request should be started + local -i count + count=$(do_facet $SINGLEMDS "$LCTL" get_param -n $HSM_PARAM.actions | + grep -c STARTED) + + ((count == 3)) || + error "expected 3 STARTED requests, found $count" } run_test 250 "Coordinator max request" test_251() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") cdt_disable # to have a short test local old_to=$(get_hsm_param active_request_timeout) - set_hsm_param active_request_timeout 4 + set_hsm_param active_request_timeout 1 # to be sure the cdt will wake up frequently so # it will be able to cancel the "old" request local old_loop=$(get_hsm_param loop_period) - set_hsm_param loop_period 2 + set_hsm_param loop_period 1 cdt_enable - # clear locks to avoid extra delay caused by flush/cancel - # and thus prevent early copytool death to timeout. - cancel_lru_locks osc + copytool setup + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED - sleep 5 + + # Let the request timeout wait_request_state $fid ARCHIVE CANCELED set_hsm_param active_request_timeout $old_to set_hsm_param loop_period $old_loop - - copytool_cleanup } run_test 251 "Coordinator request timeout" test_252() { - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 103 1048576) + local fid=$(create_empty_file "$f") - cdt_disable # to have a short test - local old_to=$(get_hsm_param active_request_timeout) - set_hsm_param active_request_timeout 20 - # to be sure the cdt will wake up frequently so - # it will be able to cancel the "old" request - local old_loop=$(get_hsm_param loop_period) - set_hsm_param loop_period 2 - cdt_enable + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 - # clear locks to avoid extra delay caused by flush/cancel - # and thus prevent early copytool death to timeout. - cancel_lru_locks osc + copytool setup + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED - rm -f $f + rm -f "$f" - # wait but less than active_request_timeout+grace_delay - sleep 25 - wait_request_state $fid ARCHIVE CANCELED + stack_trap "set_hsm_param active_request_timeout \ + $(get_hsm_param active_request_timeout)" EXIT + set_hsm_param active_request_timeout 1 - set_hsm_param active_request_timeout $old_to - set_hsm_param loop_period $old_loop - - copytool_cleanup + wait_request_state $fid ARCHIVE CANCELED + copytool_continue } run_test 252 "Timeout'ed running archive of a removed file should be canceled" test_253() { local rc # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -4939,11 +4293,367 @@ test_253() { else echo "could not release file" fi - copytool_cleanup } run_test 253 "Check for wrong file size after release" +test_254a() +{ + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" + + # Check that the counters are initialized to 0 + local count + for request_type in archive restore remove; do + count="$(get_hsm_param ${request_type}_count)" || + error "Reading ${request_type}_count failed with $?" + + [ "$count" -eq 0 ] || + error "Expected ${request_type}_count to be " \ + "0 != '$count'" + done +} +run_test 254a "Request counters are initialized to zero" + +test_254b() +{ + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" + + # The number of request to launch (at least 32) + local request_count=$((RANDOM % 32 + 32)) + printf "Will launch %i requests of each type\n" "$request_count" + + # Launch a copytool to process requests + copytool setup + + # Set hsm.max_requests to allow starting all requests at the same time + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests "$request_count" + + local timeout + local count + for request_type in archive restore remove; do + printf "Checking %s requests\n" "${request_type}" + # Suspend the copytool to give us time to read the proc files + copytool_suspend + + for ((i = 0; i < $request_count; i++)); do + case $request_type in + archive) + create_empty_file "$DIR/$tdir/$tfile-$i" \ + >/dev/null 2>&1 + ;; + restore) + lfs hsm_release "$DIR/$tdir/$tfile-$i" + ;; + esac + $LFS hsm_${request_type} "$DIR/$tdir/$tfile-$i" + done + + # Give the coordinator 10 seconds to start every request + timeout=10 + while get_hsm_param actions | grep -q WAITING; do + sleep 1 + let timeout-=1 + [ $timeout -gt 0 ] || + error "${request_type^} requests took too " \ + "long to start" + done + + count="$(get_hsm_param ${request_type}_count)" + [ "$count" -eq "$request_count" ] || + error "Expected '$request_count' (!= '$count') " \ + "active $request_type requests" + + # Let the copytool process the requests + copytool_continue + # Give it 10 seconds maximum + timeout=10 + while get_hsm_param actions | grep -q STARTED; do + sleep 1 + let timeout-=1 + [ $timeout -gt 0 ] || + error "${request_type^} requests took too " \ + "long to complete" + done + + count="$(get_hsm_param ${request_type}_count)" + [ "$count" -eq 0 ] || + error "Expected 0 (!= '$count') " \ + "active $request_type requests" + done +} +run_test 254b "Request counters are correctly incremented and decremented" + +test_255() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + + # How do you make sure the coordinator has consumed any outstanding + # event, without triggering an event yourself? + # + # You wait for a request to disappear from the coordinator's llog. + + # Warning: the setup represents 90% of this test + + # Create and process an HSM request + copytool setup + "$LFS" hsm_archive "$file" + wait_request_state $fid ARCHIVE SUCCEED + + kill_copytools + wait_copytools || error "failed to stop copytools" + + # Launch a new HSM request + rm "$file" + create_empty_file "$file" + "$LFS" hsm_archive "$file" + + cdt_shutdown + + # Have the completed request be removed as soon as the cdt wakes up + stack_trap "set_hsm_param grace_delay $(get_hsm_param grace_delay)" EXIT + set_hsm_param grace_delay 1 + # (Hopefully, time on the MDS will behave nicely) + do_facet $SINGLEMDS sleep 2 & + + # Increase `loop_period' as a mean to prevent the coordinator from + # waking itself up to do some housekeeping. + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + + wait $! || error "waiting failed" + cdt_enable + wait_request_state $fid ARCHIVE "" + # The coordinator will not wake up on its own for ~`loop_period' secs... + + # ... Unless a copytool registers. Now the real test begins + copytool setup + wait_request_state $(path2fid "$file") ARCHIVE SUCCEED +} +run_test 255 "Copytool registration wakes the coordinator up" + +# tests 260[a-c] rely on the parsing of the copytool's log file, they might +# break in the future because of that. +test_260a() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup + "$LFS" hsm_archive "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done + + error "Too many ARCHIVE requests were run before the RESTORE request" +} +run_test 260a "Restore request have priority over other requests" + +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260b() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + copytool setup --archive-id 2 + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done + + error "Too many ARCHIVE requests were run before the RESTORE request" +} +run_test 260b "Restore request have priority over other requests" + +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260c() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Force the next coordinator run to do housekeeping + cdt_shutdown + cdt_enable + + "$LFS" hsm_archive "${files[1]}" + + # Launch a copytool + copytool setup + copytool setup --archive-id 2 + + wait_request_state "$(path2fid "${files[1]}")" ARCHIVE SUCCEED + # The coordinator just did a housekeeping run it won't do another one + # for around `loop_period' seconds => requests will not be reordered + # if it costs too much (ie. when the coordinator has to discard a whole + # hal) + + # Send several archive requests + for file in "${files[@]:2}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:2}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && + error "Restore requests should not be prioritised" \ + "unless the coordinator is doing housekeeping" + done + return 0 +} +run_test 260c "Requests are not reordered on the 'hot' path of the coordinator" + test_300() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + # the only way to test ondisk conf is to restart MDS ... echo "Stop coordinator and remove coordinator state at mount" # stop coordinator @@ -4971,6 +4681,8 @@ test_300() { run_test 300 "On disk coordinator state kept between MDT umount/mount" test_301() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + local ai=$(get_hsm_param default_archive_id) local new=$((ai + 1)) @@ -4986,6 +4698,8 @@ test_301() { run_test 301 "HSM tunnable are persistent" test_302() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + local ai=$(get_hsm_param default_archive_id) local new=$((ai + 1)) @@ -5014,7 +4728,7 @@ run_test 302 "HSM tunnable are persistent when CDT is off" test_400() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -5022,7 +4736,9 @@ test_400() { local dir_mdt1=$DIR/$tdir/mdt1 # create 1 dir per MDT + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + stack_trap "rm -rf $dir_mdt1" EXIT $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" # create 1 file in each MDT @@ -5038,17 +4754,13 @@ test_400() { $LFS hsm_archive $dir_mdt1/$tfile || error "lfs hsm_archive" wait_request_state $fid2 ARCHIVE SUCCEED 1 && echo "archive successful on mdt1" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 $dir_mdt1 } run_test 400 "Single request is sent to the right MDT" test_401() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -5056,7 +4768,9 @@ test_401() { local dir_mdt1=$DIR/$tdir/mdt1 # create 1 dir per MDT + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + stack_trap "rm -rf $dir_mdt1" EXIT $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" # create 1 file in each MDT @@ -5070,10 +4784,6 @@ test_401() { echo "archive successful on mdt0" wait_request_state $fid2 ARCHIVE SUCCEED 1 && echo "archive successful on mdt1" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 $dir_mdt1 } run_test 401 "Compound requests split and sent to their respective MDTs" @@ -5092,13 +4802,10 @@ mdc_change_state() # facet, MDT_pattern, activate|deactivate } test_402a() { - # make sure there is no running copytool - copytool_cleanup - # deactivate all mdc on agent1 mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "deactivate" - HSMTOOL_NOERROR=true copytool_setup $SINGLEAGT + copytool setup --no-fail check_agent_unregistered "uuid" # match any agent @@ -5111,7 +4818,7 @@ test_402a() { run_test 402a "Copytool start fails if all MDTs are inactive" test_402b() { - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -5132,23 +4839,18 @@ test_402b() { # request should succeed now wait_request_state $fid ARCHIVE SUCCEED - - copytool_cleanup } run_test 402b "CDT must retry request upon slow start of CT" test_403() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - # make sure there is no running copytool - copytool_cleanup - local agent=$(facet_active_host $SINGLEAGT) # deactivate all mdc for MDT0001 mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate" - copytool_setup + copytool setup local uuid=$(get_agent_uuid $agent) # check the agent is registered on MDT0000, and not on MDT0001 check_agent_registered_by_mdt $uuid 0 @@ -5162,20 +4864,19 @@ test_403() { # make sure the copytool is now registered to all MDTs check_agent_registered $uuid - - copytool_cleanup } run_test 403 "Copytool starts with inactive MDT and register on reconnect" test_404() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup # create files on both MDT0000 and MDT0001 mkdir -p $DIR/$tdir local dir_mdt0=$DIR/$tdir/mdt0 + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" # create 1 file on mdt0 @@ -5193,17 +4894,13 @@ test_404() { # reactivate all mdc for MDT0001 mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "activate" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 } run_test 404 "Inactive MDT does not block requests for active MDTs" test_405() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -5217,10 +4914,10 @@ test_405() { local fid3=$(create_small_sync_file $striped_dir/${tfile}_2) local fid4=$(create_small_sync_file $striped_dir/${tfile}_3) - local idx1=$($LFS getstripe -M $striped_dir/${tfile}_0) - local idx2=$($LFS getstripe -M $striped_dir/${tfile}_1) - local idx3=$($LFS getstripe -M $striped_dir/${tfile}_2) - local idx4=$($LFS getstripe -M $striped_dir/${tfile}_3) + local idx1=$($LFS getstripe -m $striped_dir/${tfile}_0) + local idx2=$($LFS getstripe -m $striped_dir/${tfile}_1) + local idx3=$($LFS getstripe -m $striped_dir/${tfile}_2) + local idx4=$($LFS getstripe -m $striped_dir/${tfile}_3) # check that compound requests are shunt to the rights MDTs $LFS hsm_archive $striped_dir/${tfile}_0 $striped_dir/${tfile}_1 \ @@ -5245,30 +4942,29 @@ test_405() { cat $striped_dir/${tfile}_1 > /dev/null || error "cat ${tfile}_1 failed" cat $striped_dir/${tfile}_2 > /dev/null || error "cat ${tfile}_2 failed" cat $striped_dir/${tfile}_3 > /dev/null || error "cat ${tfile}_3 failed" - - copytool_cleanup } run_test 405 "archive and release under striped directory" test_406() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] && - skip "need MDS version at least 2.7.64" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.64) ] && + skip "need MDS version at least 2.7.64" local fid local mdt_index - copytool_setup fid=$(create_small_file $DIR/$tdir/$tfile) echo "old fid $fid" + copytool setup + $LFS hsm_archive $DIR/$tdir/$tfile wait_request_state "$fid" ARCHIVE SUCCEED $LFS hsm_release $DIR/$tdir/$tfile # Should migrate $tdir but not $tfile. - $LFS mv -M1 $DIR/$tdir && + $LFS migrate -m1 $DIR/$tdir && error "migrating HSM an archived file should fail" $LFS hsm_restore $DIR/$tdir/$tfile @@ -5280,10 +4976,10 @@ test_406() { cat $DIR/$tdir/$tfile > /dev/null || error "cannot read $DIR/$tdir/$tfile" - $LFS mv -M1 $DIR/$tdir || + $LFS migrate -m1 $DIR/$tdir || error "cannot complete migration after HSM remove" - mdt_index=$($LFS getstripe -M $DIR/$tdir) + mdt_index=$($LFS getstripe -m $DIR/$tdir) if ((mdt_index != 1)); then error "expected MDT index 1, got $mdt_index" fi @@ -5304,23 +5000,15 @@ test_406() { cat $DIR/$tdir/$tfile > /dev/null || error "cannot read $DIR/$tdir/$tfile" - - copytool_cleanup } run_test 406 "attempting to migrate HSM archived files is safe" test_407() { - needclients 2 || return 0 - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -5329,12 +5017,16 @@ test_407() { #define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x164 + # Prevent restore from completing + copytool_suspend + md5sum $f & # 1st request holds layout lock while appropriate # RESTORE record is still not added to llog md5sum $f2 & sleep 2 + do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions" # after umount hsm_actions->O/x/x log shouldn't have # double RESTORE records like below #[0x200000401:0x1:0x0]...0x58d03a0d/0x58d03a0c action=RESTORE...WAITING @@ -5342,26 +5034,378 @@ test_407() { sleep 30 && do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"& fail $SINGLEMDS + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - wait_request_state $fid RESTORE SUCCEED - copytool_cleanup + do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions" + + copytool_continue + wait_all_done 100 $fid } run_test 407 "Check for double RESTORE records in llog" test_500() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] && - skip "HSM migrate is not supported" && return - - # Stop the existing copytool - copytool_cleanup + [ $MDS1_VERSION -lt $(version_code 2.6.92) ] && + skip "HSM migrate is not supported" test_mkdir -p $DIR/$tdir - llapi_hsm_test -d $DIR/$tdir || error "One llapi HSM test failed" + + if [ $(lustre_version_code client) -lt $(version_code 2.11.56) ] || + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.11.56) ]; + then + llapi_hsm_test -d $DIR/$tdir -b || + error "One llapi HSM test failed" + else + llapi_hsm_test -d $DIR/$tdir || + error "One llapi HSM test failed" + fi } run_test 500 "various LLAPI HSM tests" -copytool_cleanup +test_600() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + chmod 777 $DIR/$tdir + $RUNAS touch $f || error "touch $f failed as $RUNAS_ID" + local fid=$(path2fid $f) + + local entry + entry=$(changelog_find -type CREAT -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID") || + error "No matching CREAT entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check its NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" +} +run_test 600 "Changelog fields 'u=' and 'nid='" + +test_601() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + cat $f || error "cat $f failed" + + changelog_find -type OPEN -target-fid $fid -mode "r--" || + error "No matching OPEN entry" +} +run_test 601 "OPEN Changelog entry" + +test_602() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + cat $f || error "cat $f failed" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + echo f > $f || error "write $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + # remove OPEN from changelog_mask + changelog_chmask "-OPEN" + + changelog_clear + changelog_dump + cat $f || error "cat $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + echo f > $f || error "write $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" +} +run_test 602 "Changelog record CLOSE only if open+write or OPEN recorded" + +test_603() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + setfattr -n user.xattr1 -v "value1" $f || error "setfattr $f failed" + + changelog_clear + getfattr -n user.xattr1 $f || error "getfattr $f failed" + + changelog_find -type GXATR -target-fid $fid -xattr "user.xattr1" || + error "No matching GXATR entry" +} +run_test 603 "GETXATTR Changelog entry" + +test_604() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + local procname="mdd.$FSNAME-MDT0000.changelog_deniednext" + local timeout + timeout="$(do_facet mds1 "$LCTL" get_param -n "$procname")" + stack_trap "do_facet mds1 '$LCTL' set_param '$procname=$timeout'" EXIT + do_facet mds1 lctl set_param "$procname=20" + + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + chmod 600 $f + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed" + changelog_dump + + local entry + entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID" -mode "r--") || + error "No matching NOPEN entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check its NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed" + changelog_dump + + changelog_find -type NOPEN -target-fid $fid && + error "There should be no NOPEN entry" + + # Sleep for `changelog_deniednext` seconds + sleep 20 + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f by user $RUNAS_ID should have failed" + changelog_dump + + entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID" -mode "r--") || + error "No matching NOPEN entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check the NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" +} +run_test 604 "NOPEN Changelog entry" + +test_605() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + changelog_dump + exec 3<> $f || error "open $f failed" + changelog_dump + + local entry + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_clear + changelog_dump + exec 4<> $f || error "open $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid && + error "There should be no OPEN entry" + + exec 4>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + # access in rw, so different access mode should generate entries + cat $f || error "cat $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + # same access as first one, should not generate new entries + exec 4<> $f || error "open $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid && + error "There should be no OPEN entry" + + exec 4>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + # access by different user should generate new entries + $RUNAS cat $f || error "cat $f by user $RUNAS_ID failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + exec 3>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" +} +run_test 605 "Test OPEN and CLOSE rate limit in Changelogs" + +test_606() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null") + llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader} + [ -z $(do_facet mgs ls -d $llog_reader 2> /dev/null) ] && + skip_env "missing llog_reader" && return + local fstype=$(facet_fstype mds1) + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + chmod 777 $DIR/$tdir + $RUNAS touch $f || error "touch $f failed as $RUNAS_ID" + local fid=$(path2fid $f) + rm $f || error "rm $f failed" + + local mntpt=$(facet_mntpt mds1) + local pass=true + local entry + + #remount mds1 as ldiskfs or zfs type + stack_trap "stop mds1; start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS" EXIT + stop mds1 || error "stop mds1 failed" + mount_fstype mds1 || error "remount mds1 failed" + + for ((i = 0; i < 1; i++)); do + do_facet mds1 $llog_reader $mntpt/changelog_catalog + local cat_file=$(do_facet mds1 $llog_reader \ + $mntpt/changelog_catalog | awk \ + '{match($0,"path=([^ ]+)",a)}END{print a[1]}') + [ -n "$cat_file" ] || error "no catalog file" + + entry=$(do_facet mds1 $llog_reader $mntpt/$cat_file | + awk "/CREAT/ && /target:\[$fid\]/ {print}") + [ -n "$entry" ] || error "no CREAT entry" + done + + local uidgid=$(echo $entry | + sed 's+.*\ user:\([0-9][0-9]*:[0-9][0-9]*\)\ .*+\1+') + [ -n "$uidgid" ] || error "uidgid is empty" + echo "Got UID/GID $uidgid" + [ "$uidgid" = "$RUNAS_ID:$RUNAS_GID" ] || + error "uidgid '$uidgid' != '$RUNAS_ID:$RUNAS_GID'" + local nid=$(echo $entry | + sed 's+.*\ nid:\(\S\S*@\S\S*\)\ .*+\1+') + [ -n "$nid" ] || error "nid is empty" + echo "Got NID $nid" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any NID ${CLIENT_NIDS[@]}" +} +run_test 606 "llog_reader groks changelog fields" complete $SECONDS check_and_cleanup_lustre