X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=82b7c5a55916eb62d717416a7f7fcb58342ce13c;hp=f73a50264ac0cd95062b3dced4641b083949270c;hb=1f7795fdd85a2aa39e41b26ab9b95bd3df740af7;hpb=d9f95aa201341d972eeb610471e3c45f1ba12202 diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index f73a502..82b7c5a 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -7,44 +7,72 @@ set -e set +o monitor -SRCDIR=$(dirname $0) -export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin - ONLY=${ONLY:-"$*"} -# bug number for skipped test: LU-3815 -ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +LUSTRE=${LUSTRE:-$(dirname $0)/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -MULTIOP=${MULTIOP:-multiop} +ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT " +if $SHARED_KEY; then +# bug number for skipped tests: LU-9795 + ALWAYS_EXCEPT+=" 402b " +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! +fi + +# Skip tests for PPC that fail frequently +if [[ $(uname -m) = ppc64 ]]; then + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 1a 1b 1d 1e 12c 12f " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 12g 12h 12m 12n 12o 12p " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 12q 21 22 23 24a 24b " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 24d 24e 24f 25b 30c 37 " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 57 58 90 110b 111b 113 " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 222b 222d 228 260a 260b 260c " + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 220A 220a 221 222a 222c 223a " + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 223b 224A 224a 226 227 600" + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 601 602 603 604 605 " +fi + +build_test_filter + +[ -n "$FILESET" ] && skip "Not functional for FILESET set" + OPENFILE=${OPENFILE:-openfile} -MMAP_CAT=${MMAP_CAT:-mmap_cat} MOUNT_2=${MOUNT_2:-"yes"} FAIL_ON_ERROR=false # script only handles up to 10 MDTs (because of MDT_PREFIX) [ $MDSCOUNT -gt 9 ] && - error "script cannot handle more than 9 MDTs, please fix" && exit + error "script cannot handle more than 9 MDTs, please fix" check_and_setup_lustre -if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then - skip_env "Need MDS version at least 2.4.53" && exit +if [[ $MDS1_VERSION -lt $(version_code 2.4.53) ]]; then + skip_env "Need MDS version at least 2.4.53" fi # $RUNAS_ID may get set incorrectly somewhere else if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then - skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit + skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" fi check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS - -build_test_filter +if getent group nobody; then + GROUP=nobody +elif getent group nogroup; then + GROUP=nogroup +else + error "No generic nobody group" +fi # if there is no CLIENT1 defined, some tests can be ran on localhost CLIENT1=${CLIENT1:-$HOSTNAME} @@ -54,106 +82,9 @@ CLIENT1=${CLIENT1:-$HOSTNAME} # Exception is the test which need two separate nodes CLIENT2=${CLIENT2:-$CLIENT1} -# -# In order to test multiple remote HSM agents, a new facet type named "AGT" and -# the following associated variables are added: -# -# AGTCOUNT: number of agents -# AGTDEV{N}: target HSM mount point (root path of the backend) -# agt{N}_HOST: hostname of the agent agt{N} -# SINGLEAGT: facet of the single agent -# -# The number of agents is initialized as the number of remote client nodes. -# By default, only single copytool is started on a remote client/agent. If there -# was no remote client, then the copytool will be started on the local client. -# -init_agt_vars() { - local n - local agent - - export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))} - [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1 - - export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP} - if [[ $CLIENTCOUNT -gt 1 ]] && - ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then - skip_env "SHARED_DIRECTORY should be accessible"\ - "on all client nodes" - exit 0 - fi - - # We used to put the HSM archive in $SHARED_DIRECTORY but that - # meant NFS issues could hose sanity-hsm sessions. So now we - # use $TMP instead. - for n in $(seq $AGTCOUNT); do - eval export AGTDEV$n=\$\{AGTDEV$n:-"$TMP/arc$n"\} - agent=CLIENT$((n + 1)) - if [[ -z "${!agent}" ]]; then - [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 || - agent=CLIENT2 - fi - eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\} - done - - export SINGLEAGT=${SINGLEAGT:-agt1} - - export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} - export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} - export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} - export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} - export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") - HSM_ARCHIVE=$(copytool_device $SINGLEAGT) - HSM_ARCHIVE_NUMBER=2 - - # The test only support up to 10 MDTs - MDT_PREFIX="mdt.$FSNAME-MDT000" - HSM_PARAM="${MDT_PREFIX}0.hsm" - - # archive is purged at copytool setup - HSM_ARCHIVE_PURGE=true - - # Don't allow copytool error upon start/setup - HSMTOOL_NOERROR=false -} - -# Get the backend root path for the given agent facet. -copytool_device() { - local facet=$1 - local dev=AGTDEV$(facet_number $facet) - - echo -n ${!dev} -} - -# Stop copytool and unregister an existing changelog user. -cleanup() { - copytool_monitor_cleanup - copytool_cleanup - changelog_cleanup - cdt_set_sanity_policy -} - -get_mdt_devices() { - local mdtno - # get MDT device for each mdc - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - MDT[$idx]=$($LCTL get_param -n \ - mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid | - awk '{gsub(/_UUID/,""); print $1}' | head -n1) - done -} - search_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" -} - -kill_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - - echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true + do_nodesv $hosts "pgrep --pidfile=$HSMTOOL_PID_FILE hsmtool" } wait_copytools() { @@ -161,15 +92,17 @@ wait_copytools() { local wait_timeout=200 local wait_start=$SECONDS local wait_end=$((wait_start + wait_timeout)) + local sleep_time=1 while ((SECONDS < wait_end)); do - sleep 2 if ! search_copytools $hosts; then echo "copytools stopped in $((SECONDS - wait_start))s" return 0 fi echo "copytools still running on $hosts" + sleep $sleep_time + [ $sleep_time -lt 5 ] && sleep_time=$((sleep_time + 1)) done # try to dump Copytool's stack @@ -196,93 +129,32 @@ copytool_monitor_setup() { cmd="cat $test_dir/fifo > $test_dir/events &" cmd+=" echo \\\$! > $test_dir/monitor_pid" - if [[ $PDSH == *Rmrsh* ]]; then - # This is required for pdsh -Rmrsh and its handling of remote - # shells. - # Regular ssh and pdsh -Rssh work fine without this - # backgrounded subshell nonsense. - (do_node $agent "$cmd") & - export HSMTOOL_MONITOR_PDSH=$! - - # Slightly racy, but just making a best-effort to catch obvious - # problems. - sleep 1 - ps -p $HSMTOOL_MONITOR_PDSH > /dev/null || - error "Failed to start copytool monitor on $agent" - else - do_node $agent "$cmd" - if [ $? != 0 ]; then - error "Failed to start copytool monitor on $agent" - fi - fi -} - -copytool_monitor_cleanup() { - local facet=${1:-$SINGLEAGT} - local agent=$(facet_active_host $facet) - - if [ -n "$HSMTOOL_MONITOR_DIR" ]; then - # Should die when the copytool dies, but just in case. - local cmd="kill \\\$(cat $HSMTOOL_MONITOR_DIR/monitor_pid)" - cmd+=" 2>/dev/null || true" - do_node $agent "$cmd" - do_node $agent "rm -fr $HSMTOOL_MONITOR_DIR" - export HSMTOOL_MONITOR_DIR= - fi + # This background subshell nonsense is required when pdsh/ssh decides + # to wait for the cat process to exit on the remote client + (do_node $agent "$cmd") & + export HSMTOOL_MONITOR_PDSH=$! - # The pdsh should die on its own when the monitor dies. Just - # in case, though, try to clean up to avoid any cruft. - if [ -n "$HSMTOOL_MONITOR_PDSH" ]; then - kill $HSMTOOL_MONITOR_PDSH 2>/dev/null - export HSMTOOL_MONITOR_PDSH= + # Slightly racy, but just making a best-effort to catch obvious + # problems. + sleep 1 + do_node $agent "stat $HSMTOOL_MONITOR_DIR/monitor_pid 2>&1 > /dev/null" + if [ $? != 0 ]; then + error "Failed to start copytool monitor on $agent" fi } -copytool_setup() { - local facet=${1:-$SINGLEAGT} - # Use MOUNT2 by default if defined - local lustre_mntpnt=${2:-${MOUNT2:-$MOUNT}} - local arc_id=$3 - local hsm_root=${4:-$(copytool_device $facet)} - local agent=$(facet_active_host $facet) - - if [[ -z "$arc_id" ]] && - do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then - echo "Only wakeup running copytool $facet on $agent" - return 0 - fi - - if $HSM_ARCHIVE_PURGE; then - echo "Purging archive on $agent" - do_facet $facet "rm -rf $hsm_root/*" - fi - - echo "Starting copytool $facet on $agent" - do_facet $facet "mkdir -p $hsm_root" || error "mkdir '$hsm_root' failed" - # bandwidth is limited to 1MB/s so the copy time is known and - # independent of hardware - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root $hsm_root" - [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id" - [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] || - cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL" - [[ -z "$HSMTOOL_EVENT_FIFO" ]] || - cmd+=" --event-fifo $HSMTOOL_EVENT_FIFO" - cmd+=" --bandwidth 1 $lustre_mntpnt" - - # Redirect the standard output and error to a log file which - # can be uploaded to Maloo. - local prefix=$TESTLOG_PREFIX - [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME - local copytool_log=$prefix.copytool${arc_id}_log.$agent.log - - do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1" - if [[ $? != 0 ]]; then - [[ $HSMTOOL_NOERROR == true ]] || - error "start copytool $facet on $agent failed" - echo "start copytool $facet on $agent failed" - fi +fid2archive() +{ + local fid="$1" - trap cleanup EXIT + case "$HSMTOOL_ARCHIVE_FORMAT" in + v1) + printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid" + ;; + v2) + printf "%s" "$(hsm_root)/*/$fid" + ;; + esac } get_copytool_event_log() { @@ -296,165 +168,107 @@ get_copytool_event_log() { error "Could not collect event log from $agent" } -copytool_cleanup() { - trap - EXIT - local agt_facet=$SINGLEAGT - local agt_hosts=${1:-$(facet_active_host $agt_facet)} - local hsm_root=$(copytool_device $agt_facet) - local i - local facet - local param - local -a state - - kill_copytools $agt_hosts - wait_copytools $agt_hosts || error "copytools failed to stop" - - # Clean all CDTs orphans requests from previous tests that - # would otherwise need to timeout to clear. - for ((i = 0; i < MDSCOUNT; i++)); do - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - state[$i]=$(do_facet $facet "$LCTL get_param -n $param") - - # Skip already stopping or stopped CDTs. - [[ "${state[$i]}" =~ ^stop ]] && continue - - do_facet $facet "$LCTL set_param $param=shutdown" - done - - for ((i = 0; i < MDSCOUNT; i++)); do - # Only check and restore CDTs that we stopped in the first loop. - [[ "${state[$i]}" =~ ^stop ]] && continue - - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - - wait_result $facet "$LCTL get_param -n $param" stopped 20 || - error "$facet CDT state is not stopped" - - # Restore old CDT state. - do_facet $facet "$LCTL set_param $param=${state[$i]}" - done - - for ((i = 0; i < MDSCOUNT; i++)); do - # Only check CDTs that we stopped in the first loop. - [[ "${state[$i]}" =~ ^stop ]] && continue - - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - - # Check that the old CDT state was restored. - wait_result $facet "$LCTL get_param -n $param" "${state[$i]}" \ - 20 || error "$facet CDT state is not '${state[$i]}'" - done - - if do_facet $agt_facet "df $hsm_root" >/dev/null 2>&1 ; then - do_facet $agt_facet "rm -rf $hsm_root/*" - fi -} - copytool_suspend() { local agents=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0 + stack_trap "pkill_copytools $agents CONT || true" EXIT + pkill_copytools $agents STOP || return 0 echo "Copytool is suspended on $agents" } copytool_remove_backend() { local fid=$1 - local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid) + local be=$(do_facet $SINGLEAGT find "$(hsm_root)" -name $fid) echo "Remove from backend: $fid = $be" do_facet $SINGLEAGT rm -f $be } -import_file() { - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --import $1 $2 $MOUNT" || - error "import of $1 to $2 failed" -} +file_creation_failure() { + local cmd=$1 + local file=$2 + local err=$3 -make_archive() { - local file=$HSM_ARCHIVE/$1 - do_facet $SINGLEAGT mkdir -p $(dirname $file) - do_facet $SINGLEAGT dd if=/dev/urandom of=$file count=32 bs=1000000 || - file_creation_failure dd $file $? -} + case $err in + 28) + df $MOUNT $MOUNT2 >&2 + error "Not enough space to create $file with $cmd" + ;; + *) + error "cannot create $file with $cmd, status=$err" + ;; + esac +} + +# Creates a file using dd +create_file() { + local file=$1 + local bs=$2 + local count=$3 + local conv=$4 + local source=${5:-/dev/zero} + local args="" + local err + + if [ -n "$conv" ]; then + args+=" conv=$conv" + fi -copy2archive() { - local file=$HSM_ARCHIVE/$2 - do_facet $SINGLEAGT mkdir -p $(dirname $file) - do_facet $SINGLEAGT cp -p $1 $file || error "cannot copy $1 to $file" -} + # Create the directory in case it does not exist + mkdir -p "$(dirname "$file")" + # Delete the file in case it already exist + rm -f "$file" -mdts_set_param() { - local arg=$1 - local key=$2 - local value=$3 - local mdtno - local rc=0 - if [[ "$value" != "" ]]; then - value="=$value" + if dd if="$source" of="$file" count="$count" bs="$bs" $args; then + path2fid "$file" || error "cannot get FID of '$file'" + else + err=$? + echo "cannot create file '$file'" >&2; + # Let the caller decide what to do on error + return $err; fi - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local facet=mds${mdtno} - # if $arg include -P option, run 1 set_param per MDT on the MGS - # else, run set_param on each MDT - [[ $arg = *"-P"* ]] && facet=mgs - do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value - [[ $? != 0 ]] && rc=1 - done - return $rc } -mdts_check_param() { - local key="$1" - local target="$2" - local timeout="$3" - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ - "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ - $timeout || - error "$key state is not '$target' on mds${mdtno}" - done +create_empty_file() { + create_file "${1/$DIR/$DIR2}" 1M 0 || + file_creation_failure dd "${1/$DIR/$DIR2}" $? } -changelog_setup() { - CL_USERS=() - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local cl_user=$(do_facet mds${mdtno} $LCTL \ - --device ${MDT[$idx]} \ - changelog_register -n) - CL_USERS+=($cl_user) - do_facet mds${mdtno} lctl set_param \ - mdd.${MDT[$idx]}.changelog_mask="+hsm" - $LFS changelog_clear ${MDT[$idx]} $cl_user 0 - done +create_small_file() { + local source_file=/dev/urandom + local count=1 + local bs=1M + local conv=${2:-fsync} + + create_file "${1/$DIR/$DIR2}" $bs $count $conv $source_file || + file_creation_failure dd "${1/$DIR/$DIR2}" $? } -changelog_cleanup() { - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - [[ -z ${CL_USERS[$idx]} ]] && continue - $LFS changelog_clear ${MDT[$idx]} ${CL_USERS[$idx]} 0 - do_facet mds${mdtno} lctl --device ${MDT[$idx]} \ - changelog_deregister ${CL_USERS[$idx]} - done - CL_USERS=() +create_small_sync_file() { + create_small_file "$1" sync +} + +create_archive_file() { + local file="$(hsm_root)/$1" + local count=${2:-39} + local source=/dev/urandom + + # Create the counterpart directory of the archive + do_facet "$SINGLEAGT" mkdir -p "$(dirname "$file")" || + error "cannot create archive directory '$(dirname "$file")'" + + do_facet "$SINGLEAGT" dd if=$source of="$file" bs=1M count=$count || + error "cannot create archive file '$file'" } -changelog_get_flags() { - local mdt=$1 - local cltype=$2 - local fid=$3 +copy2archive() { + local hsm_root="$(hsm_root)" + local file="$hsm_root/$2" - $LFS changelog $mdt | awk "/$cltype/ && /t=\[$fid\]/ {print \$5}" + stack_trap "do_facet $SINGLEAGT rm -rf '$hsm_root'" EXIT + do_facet $SINGLEAGT mkdir -p "$(dirname "$file")" || + error "mkdir '$(dirname "$file")' failed" + do_facet $SINGLEAGT cp -p "$1" "$file" || + error "cannot copy '$1' to '$file'" } get_hsm_param() { @@ -463,14 +277,6 @@ get_hsm_param() { echo $val } -set_hsm_param() { - local param=$1 - local value=$2 - local opt=$3 - mdts_set_param "$opt -n" "hsm.$param" "$value" - return $? -} - set_test_state() { local cmd=$1 local target=$2 @@ -478,15 +284,6 @@ set_test_state() { mdts_check_param hsm_control "$target" 10 } -cdt_set_sanity_policy() { - if [[ "$CDT_POLICY_HAD_CHANGED" ]] - then - # clear all - mdts_set_param "" hsm.policy "+NRA" - mdts_set_param "" hsm.policy "-NBR" - CDT_POLICY_HAD_CHANGED= - fi -} cdt_set_no_retry() { mdts_set_param "" hsm.policy "+NRA" @@ -512,21 +309,6 @@ cdt_clear_mount_state() { mdts_set_param "-P -d" hsm_control "" } -cdt_set_mount_state() { - mdts_set_param "-P" hsm_control "$1" - # set_param -P is asynchronous operation and could race with set_param. - # In such case configs could be retrieved and applied at mgc after - # set_param -P completion. Sleep here to avoid race with set_param. - # We need at least 20 seconds. 10 for mgc_requeue_thread to wake up - # MGC_TIMEOUT_MIN_SECONDS + MGC_TIMEOUT_RAND_CENTISEC(5 + 5) - # and 10 seconds to retrieve config from server. - sleep 20 -} - -cdt_check_state() { - mdts_check_param hsm_control "$1" 20 -} - cdt_disable() { set_test_state disabled disabled } @@ -549,37 +331,6 @@ cdt_restart() { cdt_set_sanity_policy } -needclients() { - local client_count=$1 - if [[ $CLIENTCOUNT -lt $client_count ]]; then - skip "Need $client_count or more clients, have $CLIENTCOUNT" - return 1 - fi - return 0 -} - -path2fid() { - $LFS path2fid $1 | tr -d '[]' - return ${PIPESTATUS[0]} -} - -get_hsm_flags() { - local f=$1 - local u=$2 - local st - - if [[ $u == "user" ]]; then - st=$($RUNAS $LFS hsm_state $f) - else - u=root - st=$($LFS hsm_state $f) - fi - - [[ $? == 0 ]] || error "$LFS hsm_state $f failed (run as $u)" - - st=$(echo $st | cut -f 2 -d" " | tr -d "()," ) - echo $st -} get_hsm_archive_id() { local f=$1 @@ -587,19 +338,10 @@ get_hsm_archive_id() { st=$($LFS hsm_state $f) [[ $? == 0 ]] || error "$LFS hsm_state $f failed" - local ar=$(echo $st | grep "archive_id" | cut -f5 -d" " | - cut -f2 -d:) + local ar=$(echo $st | grep -oP '(?<=archive_id:).*') echo $ar } -check_hsm_flags() { - local f=$1 - local fl=$2 - - local st=$(get_hsm_flags $f) - [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl" -} - check_hsm_flags_user() { local f=$1 local fl=$2 @@ -608,15 +350,6 @@ check_hsm_flags_user() { [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl" } -file_creation_failure() { - local cmd=$1 - local f=$2 - local err=$3 - - df $MOUNT $MOUNT2 >&2 - error "cannot create $f with $cmd, status=$err" -} - copy_file() { local f= @@ -635,118 +368,14 @@ copy_file() { path2fid $f || error "cannot get fid on $f" } -make_small() { - local file2=${1/$DIR/$DIR2} - dd if=/dev/urandom of=$file2 count=2 bs=1M conv=fsync || - file_creation_failure dd $file2 $? - - path2fid $1 || error "cannot get fid on $1" -} - -make_small_sync() { - dd if=/dev/urandom of=$1 count=1 bs=1M conv=sync || - file_creation_failure dd $1 $? - path2fid $1 || error "cannot get fid on $1" -} - -cleanup_large_files() { - local ratio=$(df -P $MOUNT | tail -1 | awk '{print $5}' | - sed 's/%//g') - [ $ratio -gt 50 ] && find $MOUNT -size +10M -exec rm -f {} \; -} - -check_enough_free_space() { - local nb=$1 - local unit=$2 - local need=$((nb * unit /1024)) - local free=$(df -kP $MOUNT | tail -1 | awk '{print $4}') - (( $need >= $free )) && return 1 - return 0 -} - -make_large_for_striping() { - local file2=${1/$DIR/$DIR2} - local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1) - - cleanup_large_files - - check_enough_free_space 5 $sz - [ $? != 0 ] && return $? - - dd if=/dev/urandom of=$file2 count=5 bs=$sz conv=fsync || - file_creation_failure dd $file2 $? - - path2fid $1 || error "cannot get fid on $1" -} - -make_large_for_progress() { - local file2=${1/$DIR/$DIR2} - - cleanup_large_files - - check_enough_free_space 39 1000000 - [ $? != 0 ] && return $? - - # big file is large enough, so copy time is > 30s - # so copytool make 1 progress - # size is not a multiple of 1M to avoid stripe - # aligment - dd if=/dev/urandom of=$file2 count=39 bs=1000000 conv=fsync || - file_creation_failure dd $file2 $? - - path2fid $1 || error "cannot get fid on $1" -} - -make_large_for_progress_aligned() { - local file2=${1/$DIR/$DIR2} - - cleanup_large_files - - check_enough_free_space 33 1048576 - [ $? != 0 ] && return $? - - # big file is large enough, so copy time is > 30s - # so copytool make 1 progress - # size is a multiple of 1M to have stripe - # aligment - dd if=/dev/urandom of=$file2 count=33 bs=1M conv=fsync || - file_creation_failure dd $file2 $? - path2fid $1 || error "cannot get fid on $1" -} - -make_large_for_cancel() { - local file2=${1/$DIR/$DIR2} - - cleanup_large_files - - check_enough_free_space 103 1048576 - [ $? != 0 ] && return $? - - # Copy timeout is 100s. 105MB => 105s - dd if=/dev/urandom of=$file2 count=103 bs=1M conv=fsync || - file_creation_failure dd $file2 $? - path2fid $1 || error "cannot get fid on $1" -} - -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - -wait_request_state() { - local fid=$1 - local request=$2 - local state=$3 - # 4th arg (mdt index) is optional - local mdtidx=${4:-0} - local mds=mds$(($mdtidx + 1)) - - local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" - cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - - wait_result $mds "$cmd" $state 200 || - error "request on $fid is not $state on $mds" +# Delete any file bigger than 10M under $MOUNT and wait for deletes to complete +# +# Note that this might lead to surprising behaviours such as deleting an +# important file for the currently running test +delete_large_files() { + printf "Deleting large files...\n" >&2 + find $MOUNT -size +10M -delete + wait_delete_completed } get_request_state() { @@ -765,6 +394,15 @@ get_request_count() { "awk -vn=0 '/'$fid'.*action='$request'/ {n++}; END {print n}'" } +# Ensure the number of HSM request for a given FID is correct +# assert_request_count FID REQUEST_TYPE COUNT [ERROR_MSG] +assert_request_count() { + local request_count=$(get_request_count $1 $2) + local default_error_msg=("expected $3 '$2' request(s) for '$1', found " + "'$request_count'") + [ $request_count -eq $3 ] || error "${4:-"${default_error_msg[@]}"}" +} + wait_all_done() { local timeout=$1 local fid=$2 @@ -773,7 +411,7 @@ wait_all_done() { [[ -n $fid ]] && cmd+=" | grep '$fid'" cmd+=" | egrep 'WAITING|STARTED'" - wait_result $SINGLEMDS "$cmd" "" $timeout || + wait_update_facet --verbose mds1 "$cmd" "" $timeout || error "requests did not complete" } @@ -782,6 +420,11 @@ wait_for_grace_delay() { sleep $val } +wait_for_loop_period() { + local val=$(get_hsm_param loop_period) + sleep $val +} + parse_json_event() { local raw_event=$1 @@ -794,32 +437,92 @@ parse_json_event() { echo $raw_event | python -c "$json_parser" } -# populate MDT device array -get_mdt_devices - -# initiate variables -init_agt_vars - -# cleanup from previous bad setup -kill_copytools +get_agent_by_uuid_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + do_facet $mds "$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.agents |\ + grep $uuid" +} -# for recovery tests, coordinator needs to be started at mount -# so force it -# the lustre conf must be without hsm on (like for sanity.sh) -echo "Set HSM on and start" -cdt_set_mount_state enabled -cdt_check_state enabled +check_agent_registered_by_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) + if [[ ! -z "$agent" ]]; then + echo "found agent $agent on $mds" + else + error "uuid $uuid not found in agent list on $mds" + fi +} -echo "Start copytool" -copytool_setup +check_agent_unregistered_by_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) + if [[ -z "$agent" ]]; then + echo "uuid not found in agent list on $mds" + else + error "uuid found in agent list on $mds: $agent" + fi +} -echo "Set sanity-hsm HSM policy" -cdt_set_sanity_policy +check_agent_registered() { + local uuid=$1 + local mdsno + for mdsno in $(seq 1 $MDSCOUNT); do + check_agent_registered_by_mdt $uuid $((mdsno - 1)) + done +} -# finished requests are quickly removed from list +check_agent_unregistered() { + local uuid=$1 + local mdsno + for mdsno in $(seq 1 $MDSCOUNT); do + check_agent_unregistered_by_mdt $uuid $((mdsno - 1)) + done +} + +get_agent_uuid() { + local agent=${1:-$(facet_active_host $SINGLEAGT)} + + # Lustre mount-point is mandatory and last parameter on + # copytool cmd-line. + local mntpnt=$(do_rpc_nodes $agent \ + pgrep --pidfile=$HSMTOOL_PID_FILE --list-full hsmtool | + awk '{print $NF}') + [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ + "parameter" + do_rpc_nodes $agent get_client_uuid $mntpnt | cut -d' ' -f2 +} + +# initiate variables +init_agt_vars + +# populate MDT device array +get_mdt_devices + +# cleanup from previous bad setup +kill_copytools + +# for recovery tests, coordinator needs to be started at mount +# so force it +# the lustre conf must be without hsm on (like for sanity.sh) +echo "Set HSM on and start" +cdt_set_mount_state enabled +cdt_check_state enabled + +echo "Set sanity-hsm HSM policy" +cdt_set_sanity_policy + +# finished requests are quickly removed from list set_hsm_param grace_delay 10 -test_1() { +CLIENT_NIDS=( $($LCTL list_nids all) ) + +test_1A() { # was test_1 mkdir -p $DIR/$tdir chmod 777 $DIR/$tdir @@ -852,12 +555,13 @@ test_1() { check_hsm_flags_user $f "0x00000000" } -run_test 1 "lfs hsm flags root/non-root access" +run_test 1A "lfs hsm flags root/non-root access" test_1a() { - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_small $f) + local fid=$(create_small_file $f) + + copytool setup $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED @@ -871,10 +575,158 @@ test_1a() { } run_test 1a "mmap & cat a HSM released file" -test_2() { +test_1bde_base() { + local f=$1 + rm -f $f + + dd if=/dev/urandom of=$f bs=1M count=1 conv=sync || + error "failed to create file" + local fid=$(path2fid $f) + + copytool setup + + echo "archive $f" + $LFS hsm_archive $f || error "could not archive file" + wait_request_state $fid ARCHIVE SUCCEED + + echo "release $f" + $LFS hsm_release $f || error "could not release file" + echo "verify released state: " + check_hsm_flags $f "0x0000000d" && echo "pass" + + echo "restore $f" + $LFS hsm_restore $f || error "could not restore file" + wait_request_state $fid RESTORE SUCCEED + echo "verify restored state: " + check_hsm_flags $f "0x00000009" && echo "pass" +} + +test_1b() { + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile + + test_1bde_base $f +} +run_test 1b "Archive, Release and Restore composite file" + +test_1c() { + mkdir -p $DIR/$tdir + chmod 777 $DIR/$tdir + + local f=$DIR/$tdir/$tfile + $RUNAS touch $f + + # Test whether we can set the maximum archive number. + local LOCAL_HSM_ARCHIVE_NUMBER=32 + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f || + error "root could not change hsm flags" + check_hsm_flags_user $f "0x00000001" + echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER" + local st=$(get_hsm_archive_id $f) + [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || + error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" + + # Test whether setting archive number 0 results in no change. + $LFS hsm_set --exists --archive-id 0 $f || + error "root could not change hsm flags" + check_hsm_flags_user $f "0x00000001" + echo "verifying archive number is still $LOCAL_HSM_ARCHIVE_NUMBER" + st=$(get_hsm_archive_id $f) + [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || + error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" + + LOCAL_HSM_ARCHIVE_NUMBER=33 + if [ "$CLIENT_VERSION" -ge $(version_code 2.11.56) ] && + [ "$MDS1_VERSION" -ge $(version_code 2.11.56) ]; then + # lustre in the new version supports unlimited archiveID. + # Test whether setting archive number > 32 is supported + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f || + error "archive ID $LOCAL_HSM_ARCHIVE_NUMBER too large?" + check_hsm_flags_user $f "0x00000001" + + echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER" + st=$(get_hsm_archive_id $f) + [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || + error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" + else + # old client or old mds can only support at most 32 archiveID + # test whether setting archive number > 32 results in error. + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f && + error "bitmap archive number is larger than 32" + check_hsm_flags_user $f "0x00000001" + fi + + # Test whether setting archive number 16 and archived flag. + LOCAL_HSM_ARCHIVE_NUMBER=16 + $LFS hsm_set --exists --archived \ + --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f || + error "root could not change hsm flags" + check_hsm_flags_user $f "0x00000009" + echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER" + st=$(get_hsm_archive_id $f) + [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || + error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" +} +run_test 1c "Check setting archive-id in lfs hsm_set" + +test_1d() { + [ $MDS1_VERSION -lt $(version_code 2.10.59) ] && + skip "need MDS version at least 2.10.59" + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -L mdt -E -1 -c 2 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile + + test_1bde_base $f +} +run_test 1d "Archive, Release and Restore DoM file" + +test_1e() { + [ "$MDS1_VERSION" -lt $(version_code $SEL_VER) ] && + skip "skipped for lustre < $SEL_VER" + mkdir -p $DIR/$tdir + $LFS setstripe -E 1G -z 64M -E 10G -z 512M -E -1 -z 1G $DIR/$tdir || + error "failed to set default stripe" + local comp_file=$DIR/$tdir/$tfile + + test_1bde_base $comp_file + + local flg_opts="--comp-start 0 -E 64M --comp-flags init" + local found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "1st component not found" + + flg_opts="--comp-start 64M -E 1G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "2nd component not found" + + flg_opts="--comp-start 1G -E 1G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "3rd component not found" + + flg_opts="--comp-start 1G -E 10G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "4th component not found" + + flg_opts="--comp-start 10G -E 10G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "5th component not found" + + flg_opts="--comp-start 10G -E EOF --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "6th component not found" + + sel_layout_sanity $comp_file 6 +} +run_test 1e "Archive, Release and Restore SEL file" + +test_2() { local f=$DIR/$tdir/$tfile - touch $f + + create_empty_file "$f" # New files are not dirty check_hsm_flags $f "0x00000000" @@ -955,9 +807,8 @@ test_3() { run_test 3 "Check file dirtyness when opening for write" test_4() { - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_small $f) + local fid=$(create_small_file $f) $LFS hsm_cancel $f local st=$(get_request_state $fid CANCEL) @@ -967,7 +818,7 @@ run_test 4 "Useless cancel must not be registered" test_8() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -976,27 +827,28 @@ test_8() { wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 8 "Test default archive number" -test_9() { +test_9A() { # was test_9 + # we do not use the default one to be sure + local archive_id=$((HSM_ARCHIVE_NUMBER + 1)) + copytool setup --archive-id $archive_id + + # give time for CT to register with MDTs + sleep $(($MDSCOUNT*2)) + local uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) + check_agent_registered $uuid + mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - # we do not use the default one to be sure - local new_an=$((HSM_ARCHIVE_NUMBER + 1)) - copytool_cleanup - copytool_setup $SINGLEAGT $MOUNT $new_an - $LFS hsm_archive --archive $new_an $f + $LFS hsm_archive --archive $archive_id $f wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } -run_test 9 "Use of explicit archive number, with dedicated copytool" +run_test 9A "Use of explicit archive number, with dedicated copytool" test_9a() { needclients 3 || return 0 @@ -1005,33 +857,26 @@ test_9a() { local file local fid - copytool_cleanup $(comma_list $(agts_nodes)) - # start all of the copytools for n in $(seq $AGTCOUNT); do - copytool_setup agt$n + copytool setup --facet agt$n done - trap "copytool_cleanup $(comma_list $(agts_nodes))" EXIT # archive files - mkdir -p $DIR/$tdir for n in $(seq $AGTCOUNT); do file=$DIR/$tdir/$tfile.$n - fid=$(make_small $file) + fid=$(create_small_file $file) $LFS hsm_archive $file || error "could not archive file $file" wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $file "0x00000009" done - - trap - EXIT - copytool_cleanup $(comma_list $(agts_nodes)) } run_test 9a "Multiple remote agents" test_10a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile @@ -1040,10 +885,13 @@ test_10a() { error "hsm_archive failed" wait_request_state $fid ARCHIVE SUCCEED - local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || - error "fid $fid not in archive $HSM_ARCHIVE" + local hsm_root="$(copytool_device $SINGLEAGT)" + local archive="$(do_facet $SINGLEAGT \ + find "$hsm_root" -name "$fid" -print0)" + [ -n "$archive" ] || error "fid '$fid' not in archive '$hsm_root'" + echo "Verifying content" - do_facet $SINGLEAGT diff $f $AFILE || error "archived file differs" + do_facet $SINGLEAGT diff $f $archive || error "archived file differs" echo "Verifying hsm state " check_hsm_flags $f "0x00000009" @@ -1051,15 +899,12 @@ test_10a() { local st=$(get_hsm_archive_id $f) [[ $st == $HSM_ARCHIVE_NUMBER ]] || error "Wrong archive number, $st != $HSM_ARCHIVE_NUMBER" - - copytool_cleanup - } run_test 10a "Archive a file" test_10b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1071,28 +916,25 @@ test_10b() { local cnt=$(get_request_count $fid ARCHIVE) [[ "$cnt" == "1" ]] || error "archive of non dirty file must not make a request" - - copytool_cleanup } run_test 10b "Archive of non dirty file must work without doing request" test_10c() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_set --noarchive $f $LFS hsm_archive $f && error "archive a noarchive file must fail" - - copytool_cleanup + return 0 } run_test 10c "Check forbidden archive" test_10d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1104,8 +946,6 @@ test_10d() { local dflt=$(get_hsm_param default_archive_id) [[ $ar == $dflt ]] || error "archived file is not on default archive: $ar != $dflt" - - copytool_cleanup } run_test 10d "Archive a file on the default archive id" @@ -1114,30 +954,30 @@ test_11a() { copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f echo -n "Verifying released state: " check_hsm_flags $f "0x0000000d" local LSZ=$(stat -c "%s" $f) - local ASZ=$(do_facet $SINGLEAGT stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile) + local ASZ=$(do_facet $SINGLEAGT stat -c "%s" "$(hsm_root)/$tdir/$tfile") echo "Verifying imported size $LSZ=$ASZ" [[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ" echo -n "Verifying released pattern: " - local PTRN=$($GETSTRIPE -L $f) + local PTRN=$($LFS getstripe -L $f) echo $PTRN - [[ $PTRN == 80000001 ]] || error "Is not released" + [[ $PTRN == released ]] || error "Is not released" local fid=$(path2fid $f) echo "Verifying new fid $fid in archive" - local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || - error "fid $fid not in archive $HSM_ARCHIVE" + do_facet $SINGLEAGT "[ -f \"$(fid2archive "$fid")\" ]" || + error "No archive for fid $fid" } run_test 11a "Import a file" test_11b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1149,25 +989,35 @@ test_11b() { local FILE_HASH=$(md5sum $f) rm -f $f - import_file $fid $f + copytool import $fid $f echo "$FILE_HASH" | md5sum -c [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 11b "Import a deleted file using its FID" +test_11c() { + pool_add $TESTNAME || error "Pool creation failed" + pool_add_targets $TESTNAME 1 1 || error "pool_add_targets failed" + + mkdir -p $DIR/$tdir + $LFS setstripe -p "$TESTNAME" $DIR/$tdir + + copy2archive /etc/hosts $tdir/$tfile + copytool import $tdir/$tfile $DIR/$tdir/$tfile +} +run_test 11c "Import a file to a directory with a pool" + test_12a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local f2=$DIR2/$tdir/$tfile echo "Verifying released state: " check_hsm_flags $f2 "0x0000000d" @@ -1179,23 +1029,21 @@ test_12a() { echo "Verifying file state: " check_hsm_flags $f2 "0x00000009" - do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12a "Restore an imported file explicitly" test_12b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f echo "Verifying released state: " check_hsm_flags $f "0x0000000d" @@ -1204,26 +1052,22 @@ test_12b() { echo "Verifying file state after restore: " check_hsm_flags $f "0x00000009" - do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12b "Restore an imported file implicitly" test_12c() { - [ "$OSTCOUNT" -lt "2" ] && skip_env "skipping 2-stripe test" && return + [ "$OSTCOUNT" -lt "2" ] && skip_env "needs >= 2 OSTs" && return # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - $LFS setstripe -c 2 $f - local fid - fid=$(make_large_for_striping $f) - [ $? != 0 ] && skip "not enough free space" && return + mkdir -p $DIR/$tdir + $LFS setstripe -c 2 "$f" + local fid=$(create_file "$f" 1M 5) local FILE_CRC=$(md5sum $f) @@ -1234,14 +1078,12 @@ test_12c() { echo "$FILE_CRC" | md5sum -c [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12c "Restore a file with stripe of 2" test_12d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -1259,17 +1101,15 @@ test_12d() { local cnt=$(get_request_count $fid RESTORE) [[ "$cnt" == "0" ]] || error "restore a non dirty file must not make a request" - - copytool_cleanup } run_test 12d "Restore of a non archived, non released file must work"\ " without doing request" test_12e() { # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive $f || error "archive request failed" @@ -1281,14 +1121,13 @@ test_12e() { $LFS hsm_state $f $LFS hsm_restore $f && error "restore a dirty file must fail" - - copytool_cleanup + return 0 } run_test 12e "Check forbidden restore" test_12f() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1306,14 +1145,12 @@ test_12f() { diff -q /etc/hosts $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12f "Restore a released file explicitly" test_12g() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1330,8 +1167,6 @@ test_12g() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12g "Restore a released file implicitly" @@ -1339,7 +1174,7 @@ test_12h() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1356,14 +1191,12 @@ test_12h() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12h "Restore a released file implicitly from a second node" test_12m() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1376,33 +1209,29 @@ test_12m() { cmp /etc/passwd $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12m "Archive/release/implicit restore" test_12n() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f do_facet $SINGLEAGT cmp /etc/hosts $f || error "Restored file differs" $LFS hsm_release $f || error "release of $f failed" - - copytool_cleanup } run_test 12n "Import/implicit restore/release" test_12o() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1446,14 +1275,12 @@ test_12o() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12o "Layout-swap failure during Restore leaves file released" test_12p() { # test needs a running copytool - copytool_setup + copytool setup mkdir $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1466,33 +1293,23 @@ test_12p() { do_facet $SINGLEAGT cat $f > /dev/null || error "cannot cat $f" $LFS hsm_release $f || error "cannot release $f" do_facet $SINGLEAGT cat $f > /dev/null || error "cannot cat $f" - - copytool_cleanup } run_test 12p "implicit restore of a file on copytool mount point" -cleanup_test_12q() { - trap 0 - zconf_umount $(facet_host $SINGLEAGT) $MOUNT3 || - error "cannot umount $MOUNT3 on $SINGLEAGT" -} - test_12q() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.58) ] && - skip "need MDS version at least 2.7.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.58) ] && + skip "need MDS version at least 2.7.58" + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT zconf_mount $(facet_host $SINGLEAGT) $MOUNT3 || error "cannot mount $MOUNT3 on $SINGLEAGT" - trap cleanup_test_12q EXIT - # test needs a running copytool - copytool_setup $SINGLEAGT $MOUNT3 + copytool setup -m "$MOUNT3" - mkdir $DIR/$tdir local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile - local fid=$(make_small $f) + local fid=$(create_small_file $f) local orig_size=$(stat -c "%s" $f) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f @@ -1514,7 +1331,7 @@ test_12q() { [ $size -eq $orig_size ] || error "$f2: wrong size after archive: $size != $orig_size" - HSM_ARCHIVE_PURGE=false copytool_setup $SINGLEAGT /mnt/lustre3 + copytool setup -m "$MOUNT3" wait @@ -1535,93 +1352,94 @@ test_12q() { size=$(stat -c "%s" $f2) [ $size -eq 0 ] || error "$f2: wrong size after overwrite: $size != 0" - - copytool_cleanup - zconf_umount $(facet_host $SINGLEAGT) $MOUNT3 || - error "cannot umount $MOUNT3 on $SINGLEAGT" } run_test 12q "file attributes are refreshed after restore" -test_13() { +test_12r() { # test needs a running copytool - copytool_setup - - local ARC_SUBDIR="import.orig" - local d="" - local f="" - - # populate directory to be imported - for d in $(seq 1 10); do - local CURR_DIR="$HSM_ARCHIVE/$ARC_SUBDIR/dir.$d" - do_facet $SINGLEAGT mkdir -p "$CURR_DIR" - for f in $(seq 1 10); do - CURR_FILE="$CURR_DIR/$tfile.$f" - # write file-specific data - do_facet $SINGLEAGT \ - "echo d=$d, f=$f, dir=$CURR_DIR, "\ - "file=$CURR_FILE > $CURR_FILE" + copytool setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/hosts $f) + + $LFS hsm_archive $f || error "archive of $f failed" + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f || error "release of $f failed" + + offset=$(lseek_test -d 7 $f) + + # we check we had a restore done + wait_request_state $fid RESTORE SUCCEED + [[ $offset == 7 ]] || error "offset $offset != 7" +} +run_test 12r "lseek restores released file" + +test_13() { + local -i i j k=0 + for i in {1..10}; do + local archive_dir="$(hsm_root)"/subdir/dir.$i + + do_facet $SINGLEAGT mkdir -p "$archive_dir" + for j in {1..10}; do + local archive_file="$archive_dir"/file.$j + + do_facet $SINGLEAGT "echo $k > \"$archive_dir\"/file.$j" + k+=1 done done + # import to Lustre - import_file "$ARC_SUBDIR" $DIR/$tdir - # diff lustre content and origin (triggers file restoration) - # there must be 10x10 identical files, and no difference - local cnt_ok=$(do_facet $SINGLEAGT diff -rs $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | grep identical | wc -l) - local cnt_diff=$(do_facet $SINGLEAGT diff -r $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | wc -l) + copytool import "subdir" "$DIR/$tdir" - [ $cnt_diff -eq 0 ] || - error "$cnt_diff imported files differ from read data" - [ $cnt_ok -eq 100 ] || - error "not enough identical files ($cnt_ok != 100)" + # To check the import, the test uses diff with the -r flag + # This is nice, but diff only checks files one by one, and triggering + # an implicit restore for one file at a time will consume as many + # seconds as there are files to compare. To speed this up, a restore + # operation is triggered manually first. + copytool setup + find "$DIR/$tdir"/subdir -type f -exec $LFS hsm_restore {} \; - copytool_cleanup + # Compare the imported data + do_facet $SINGLEAGT \ + diff -r "$(hsm_root)"/subdir "$DIR/$tdir"/subdir || + error "imported files differ from archived data" } run_test 13 "Recursively import and restore a directory" test_14() { # test needs a running copytool - copytool_setup + copytool setup # archive a file - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_small $f) + local fid=$(create_small_file $f) local sum=$(md5sum $f | awk '{print $1}') $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED - # delete the file - rm -f $f # create released file (simulate llapi_hsm_import call) - touch $f - local fid2=$(path2fid $f) + local fid2=$(create_empty_file "$f") $LFS hsm_set --archived --exists $f || error "could not force hsm flags" $LFS hsm_release $f || error "could not release file" # rebind the archive to the newly created file echo "rebind $fid to $fid2" - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --rebind $fid $fid2 $DIR" || error "could not rebind file" + copytool rebind $fid $fid2 # restore file and compare md5sum local sum2=$(md5sum $f | awk '{print $1}') [[ $sum == $sum2 ]] || error "md5sum mismatch after restore" - - copytool_cleanup } run_test 14 "Rebind archived file to a new fid" test_15() { # test needs a running copytool - copytool_setup + copytool setup # archive files - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local count=5 local tmpfile=$SHARED_DIRECTORY/tmp.$$ @@ -1629,18 +1447,17 @@ test_15() { local fids=() local sums=() for i in $(seq 1 $count); do - fids[$i]=$(make_small $f.$i) + fids[$i]=$(create_small_file $f.$i) sums[$i]=$(md5sum $f.$i | awk '{print $1}') $LFS hsm_archive $f.$i || error "could not archive file" done wait_all_done $(($count*60)) + stack_trap "rm -f $tmpfile" EXIT :>$tmpfile # delete the files for i in $(seq 1 $count); do - rm -f $f.$i - touch $f.$i - local fid2=$(path2fid $f.$i) + local fid2=$(create_empty_file "${f}.${i}") # add the rebind operation to the list echo ${fids[$i]} $fid2 >> $tmpfile @@ -1653,9 +1470,7 @@ test_15() { [[ $nl == $count ]] || error "$nl files in list, $count expected" echo "rebind list of files" - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --rebind $tmpfile $DIR" || error "could not rebind file list" + copytool rebind "$tmpfile" # restore files and compare md5sum for i in $(seq 1 $count); do @@ -1663,15 +1478,12 @@ test_15() { [[ $sum2 == ${sums[$i]} ]] || error "md5sum mismatch after restore ($sum2 != ${sums[$i]})" done - - rm -f $tmpfile - copytool_cleanup } run_test 15 "Rebind a list of files" test_16() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local ref=/tmp/ref # create a known size file so we can verify transfer speed @@ -1687,20 +1499,17 @@ test_16() { $LFS hsm_archive $f wait_request_state $fid ARCHIVE SUCCEED local end=$(date +%s) - local duration=$((end - start)) + # Add 1 to account for rounding errors between start and end (LU-8155) + local duration=$((end - start + 1)) - [[ $duration -ge $goal ]] || + [[ $duration -ge $((goal - 1)) ]] || error "Transfer is too fast $duration < $goal" - - copytool_cleanup } run_test 16 "Test CT bandwith control option" test_20() { - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - touch $f || error "touch $f failed" + create_empty_file "$f" # Could not release a non-archived file $LFS hsm_release $f && error "release should not succeed" @@ -1727,18 +1536,17 @@ run_test 20 "Release is not permitted" test_21() { # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir local f=$DIR/$tdir/test_release # Create a file and check its states - local fid=$(make_small $f) + local fid=$(create_small_file $f) check_hsm_flags $f "0x00000000" # LU-4388/LU-4389 - ZFS does not report full number of blocks # used until file is flushed to disk - if [ $(facet_fstype ost1) == "zfs" ]; then + if [ "$ost1_FSTYPE" == "zfs" ]; then # this causes an OST_SYNC rpc to be sent dd if=/dev/zero of=$f bs=512 count=1 oflag=sync conv=notrunc,fsync # clear locks to reread file data @@ -1789,22 +1597,18 @@ test_21() { check_hsm_flags $f "0x0000000d" stop_full_debug_logging - - copytool_cleanup } run_test 21 "Simple release tests" test_22() { # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup local f=$DIR/$tdir/test_release local swap=$DIR/$tdir/test_swap # Create a file and check its states - local fid=$(make_small $f) + local fid=$(create_small_file $f) check_hsm_flags $f "0x00000000" $LFS hsm_archive $f || error "could not archive file" @@ -1814,24 +1618,21 @@ test_22() { $LFS hsm_release $f || error "could not release file" check_hsm_flags $f "0x0000000d" - make_small $swap + create_small_file $swap $LFS swap_layouts $swap $f && error "swap_layouts should failed" - true - copytool_cleanup + return 0 } run_test 22 "Could not swap a release file" test_23() { # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup local f=$DIR/$tdir/test_mtime # Create a file and check its states - local fid=$(make_small $f) + local fid=$(create_small_file $f) check_hsm_flags $f "0x00000000" $LFS hsm_archive $f || error "could not archive file" @@ -1848,8 +1649,6 @@ test_23() { local ATIME=$(stat -c "%X" $f) [ $MTIME -eq "978261179" ] || fail "bad mtime: $MTIME" [ $ATIME -eq "978261179" ] || fail "bad atime: $ATIME" - - copytool_cleanup } run_test 23 "Release does not change a/mtime (utime)" @@ -1864,11 +1663,9 @@ test_24a() { local ctime1 # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir - rm -f $file - fid=$(make_small $file) + fid=$(create_small_file $file) # Create a file and check its states check_hsm_flags $file "0x00000000" @@ -1921,8 +1718,7 @@ test_24a() { [ $ctime0 -eq $ctime1 ] || error "release changed ctime from $ctime0 to $ctime1" - # Restore should not change atime or mtime and should not - # decrease ctime. + # Restore should not change any timestamps. $LFS hsm_restore $file wait_request_state $fid RESTORE SUCCEED @@ -1939,7 +1735,8 @@ test_24a() { [ $ctime0 -eq $ctime1 ] || error "restore changed ctime from $ctime0 to $ctime1" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytools failed to stop" # Once more, after unmount and mount. umount_client $MOUNT || error "cannot unmount '$MOUNT'" @@ -1968,12 +1765,10 @@ test_24b() { # LU-3811 # Test needs a running copytool. - copytool_setup - mkdir -p $DIR/$tdir + copytool setup # Check that root can do HSM actions on a regular user's file. - rm -f $file - fid=$(make_small $file) + fid=$(create_small_file $file) sum0=$(md5sum $file) chown $RUNAS_ID:$RUNAS_GID $file || @@ -2004,18 +1799,9 @@ test_24b() { [ "$sum0" == "$sum1" ] || error "md5sum mismatch for '$file'" - - copytool_cleanup } run_test 24b "root can archive, release, and restore user files" -cleanup_test_24c() { - trap 0 - set_hsm_param user_request_mask RESTORE - set_hsm_param group_request_mask RESTORE - set_hsm_param other_request_mask RESTORE -} - test_24c() { local file=$DIR/$tdir/$tfile local action=archive @@ -2024,15 +1810,18 @@ test_24c() { local other_save # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir # Save the default masks and check that cleanup_24c will # restore the request masks correctly. user_save=$(get_hsm_param user_request_mask) + stack_trap "set_hsm_param user_request_mask $user_save" EXIT group_save=$(get_hsm_param group_request_mask) + stack_trap "set_hsm_param user_request_mask $group_save" EXIT other_save=$(get_hsm_param other_request_mask) + stack_trap "set_hsm_param user_request_mask $other_save" EXIT [ "$user_save" == RESTORE ] || error "user_request_mask is '$user_save' expected 'RESTORE'" @@ -2041,15 +1830,11 @@ test_24c() { [ "$other_save" == RESTORE ] || error "other_request_mask is '$other_save' expected 'RESTORE'" - trap cleanup_test_24c EXIT - # User. - rm -f $file - make_small $file - chown $RUNAS_ID:nobody $file || - error "cannot chown '$file' to '$RUNAS_ID:nobody'" + create_small_file $file + chown $RUNAS_ID:$GROUP $file || + error "cannot chown '$file' to '$RUNAS_ID:$GROUP'" - set_hsm_param user_request_mask "" $RUNAS $LFS hsm_$action $file && error "$action by user should fail" @@ -2058,12 +1843,10 @@ test_24c() { error "$action by user should succeed" # Group. - rm -f $file - make_small $file + create_small_file $file chown nobody:$RUNAS_GID $file || error "cannot chown '$file' to 'nobody:$RUNAS_GID'" - set_hsm_param group_request_mask "" $RUNAS $LFS hsm_$action $file && error "$action by group should fail" @@ -2072,45 +1855,41 @@ test_24c() { error "$action by group should succeed" # Other. - rm -f $file - make_small $file - chown nobody:nobody $file || - error "cannot chown '$file' to 'nobody:nobody'" + create_small_file $file + chown nobody:$GROUP $file || + error "cannot chown '$file' to 'nobody:$GROUP'" - set_hsm_param other_request_mask "" $RUNAS $LFS hsm_$action $file && error "$action by other should fail" set_hsm_param other_request_mask $action $RUNAS $LFS hsm_$action $file || error "$action by other should succeed" - - copytool_cleanup - cleanup_test_24c } run_test 24c "check that user,group,other request masks work" -cleanup_test_24d() { - trap 0 - mount -o remount,rw $MOUNT2 -} - test_24d() { local file1=$DIR/$tdir/$tfile local file2=$DIR2/$tdir/$tfile local fid1 local fid2 - copytool_setup + fid1=$(create_small_file $file1) - mkdir -p $DIR/$tdir - rm -f $file1 - fid1=$(make_small $file1) + echo $fid1 + $LFS getstripe $file1 + + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT + zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" || + error "cannot mount '$MOUNT3' on '$SINGLEAGT'" - trap cleanup_test_24d EXIT + copytool setup -m "$MOUNT3" + stack_trap "mount -o remount,rw \"$MOUNT2\"" EXIT mount -o remount,ro $MOUNT2 + do_nodes $(comma_list $(nodes_list)) $LCTL clear + fid2=$(path2fid $file2) [ "$fid1" == "$fid2" ] || error "FID mismatch '$fid1' != '$fid2'" @@ -2119,7 +1898,7 @@ test_24d() { error "archive should fail on read-only mount" check_hsm_flags $file1 "0x00000000" - $LFS hsm_archive $file1 + $LFS hsm_archive $file1 || error "Fail to archive $file1" wait_request_state $fid1 ARCHIVE SUCCEED $LFS hsm_release $file1 @@ -2127,25 +1906,22 @@ test_24d() { wait_request_state $fid1 RESTORE SUCCEED $LFS hsm_release $file1 || error "cannot release '$file1'" - dd if=$file2 of=/dev/null bs=1M || "cannot read '$file2'" + dd if=$file2 of=/dev/null bs=1M || error "cannot read '$file2'" $LFS hsm_release $file2 && error "release should fail on read-only mount" - copytool_cleanup - cleanup_test_24d + return 0 } run_test 24d "check that read-only mounts are respected" test_24e() { - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup local f=$DIR/$tdir/$tfile local fid - fid=$(make_small $f) || error "cannot create $f" + fid=$(create_small_file $f) || error "cannot create $f" $LFS hsm_archive $f || error "cannot archive $f" wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f || error "cannot release $f" @@ -2154,22 +1930,19 @@ test_24e() { done tar -cf $TMP/$tfile.tar $DIR/$tdir || error "cannot tar $DIR/$tdir" - - copytool_cleanup } run_test 24e "tar succeeds on HSM released files" # LU-6213 test_24f() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) sum0=$(md5sum $f) echo $sum0 - $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f || + $LFS hsm_archive $f || error "hsm_archive failed" wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f || error "cannot release $f" @@ -2181,21 +1954,46 @@ test_24f() { sum1=$(md5sum $f) echo "Sum0 = $sum0, sum1 = $sum1" [ "$sum0" == "$sum1" ] || error "md5sum mismatch for '$tfile'" - - copytool_cleanup } run_test 24f "root can archive, release, and restore tar files" +test_24g() { + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local file=$DIR/$tdir/$tfile + local fid + + echo "RUNAS = '$RUNAS'" + + copytool setup + + mkdir -p $DIR/$tdir + chmod ugo+rwx $DIR/$tdir + + echo "Please listen carefully as our options have changed." | tee $file + fid=$(path2fid $file) + chmod ugo+rw $file + + $LFS hsm_archive $file + wait_request_state $fid ARCHIVE SUCCEED + check_hsm_flags $file 0x00000009 # exists archived + + echo "To be electrocuted by your telephone, press #." | $RUNAS tee $file + check_hsm_flags $file 0x0000000b # exists dirty archived +} +run_test 24g "write by non-owner still sets dirty" # LU-11369 + test_25a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f $LFS hsm_set --lost $f @@ -2203,15 +2001,13 @@ test_25a() { local st=$? [[ $st == 1 ]] || error "lost file access should failed (returns $st)" - - copytool_cleanup } run_test 25a "Restore lost file (HS_LOST flag) from import"\ " (Operation not permitted)" test_25b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -2227,21 +2023,16 @@ test_25b() { st=$? [[ $st == 1 ]] || error "lost file access should failed (returns $st)" - - copytool_cleanup } run_test 25b "Restore lost file (HS_LOST flag) after release"\ " (Operation not permitted)" -test_26() { +test_26A() { # was test_26 # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2250,60 +2041,217 @@ test_26() { wait_request_state $fid REMOVE SUCCEED check_hsm_flags $f "0x00000000" - - copytool_cleanup } -run_test 26 "Remove the archive of a valid file" +run_test 26A "Remove the archive of a valid file" + +test_26a() { + local raolu=$(get_hsm_param remove_archive_on_last_unlink) + [[ $raolu -eq 0 ]] || error "RAoLU policy should be off" -test_27a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir - make_archive $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f - local fid=$(path2fid $f) + local fid=$(copy_file /etc/passwd $f) - $LFS hsm_remove $f + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED - [[ $? != 0 ]] || error "Remove of a released file should fail" + local f2=$DIR/$tdir/${tfile}_2 + local fid2=$(copy_file /etc/passwd $f2) - copytool_cleanup -} -run_test 27a "Remove the archive of an imported file (Operation not permitted)" + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2 + wait_request_state $fid2 ARCHIVE SUCCEED -test_27b() { - # test needs a running copytool - copytool_setup + local f3=$DIR/$tdir/${tfile}_3 + local fid3=$(copy_file /etc/passwd $f3) - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f3 + wait_request_state $fid3 ARCHIVE SUCCEED - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f - wait_request_state $fid ARCHIVE SUCCEED - $LFS hsm_release $f + # set a long grace_delay vs short loop_period + local orig_loop_period=$(get_hsm_param loop_period) + local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT + set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT + set_hsm_param grace_delay 100 - $LFS hsm_remove $f + rm -f $f - [[ $? != 0 ]] || error "Remove of a released file should fail" + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT + set_hsm_param remove_archive_on_last_unlink 1 - copytool_cleanup + ln "$f3" "$f3"_bis || error "Unable to create hard-link" + rm -f $f3 + + rm -f $f2 + + wait_request_state $fid2 REMOVE SUCCEED + + assert_request_count $fid REMOVE 0 \ + "Unexpected archived data remove request for $f" + assert_request_count $fid3 REMOVE 0 \ + "Unexpected archived data remove request for $f3" +} +run_test 26a "Remove Archive On Last Unlink (RAoLU) policy" + +test_26b() { + # test needs a running copytool + copytool setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/passwd $f) + + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT + set_hsm_param remove_archive_on_last_unlink 1 + + cdt_shutdown + cdt_check_state stopped + + rm -f $f + + wait_request_state $fid REMOVE WAITING + + cdt_enable + + # copytool must re-register + kill_copytools + wait_copytools || error "copytool failed to stop" + copytool setup + + wait_request_state $fid REMOVE SUCCEED +} +run_test 26b "RAoLU policy when CDT off" + +test_26c() { + # test needs a running copytool + copytool setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/passwd $f) + + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + + local f2=$DIR/$tdir/${tfile}_2 + local fid2=$(copy_file /etc/passwd $f2) + + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2 + wait_request_state $fid2 ARCHIVE SUCCEED + + # set a long grace_delay vs short loop_period + local orig_loop_period=$(get_hsm_param loop_period) + local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT + set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT + set_hsm_param grace_delay 100 + + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT + set_hsm_param remove_archive_on_last_unlink 1 + + multiop_bg_pause $f O_c || error "open $f failed" + local pid=$! + + rm -f $f + rm -f $f2 + + wait_request_state $fid2 REMOVE SUCCEED + assert_request_count $fid REMOVE 0 \ + "Unexpected archived data remove request for $f" + + kill -USR1 $pid || error "multiop early exit" + # should reach autotest timeout if multiop fails to trap + # signal, close file, and exit ... + wait $pid || error "wait PID $PID failed" + + wait_request_state $fid REMOVE SUCCEED +} +run_test 26c "RAoLU effective when file closed" + +test_26d() { + # test needs a running copytool + copytool setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(create_small_file $f) + + $LFS hsm_archive $f || error "could not archive file" + wait_request_state $fid ARCHIVE SUCCEED + + # set a long grace_delay vs short loop_period + local orig_loop_period=$(get_hsm_param loop_period) + local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT + set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT + set_hsm_param grace_delay 100 + + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT + set_hsm_param remove_archive_on_last_unlink 1 + + multiop_bg_pause $f O_c || error "multiop failed" + local MULTIPID=$! + + rm -f $f + + mds_evict_client + + wait_request_state $fid REMOVE SUCCEED + + client_up || client_up || true + + kill -USR1 $MULTIPID + wait $MULTIPID || error "multiop close failed" +} +run_test 26d "RAoLU when Client eviction" + +test_27a() { + # test needs a running copytool + copytool setup + + create_archive_file $tdir/$tfile + local f=$DIR/$tdir/$tfile + copytool import $tdir/$tfile $f + local fid=$(path2fid $f) + + $LFS hsm_remove $f + + [[ $? != 0 ]] || error "Remove of a released file should fail" +} +run_test 27a "Remove the archive of an imported file (Operation not permitted)" + +test_27b() { + # test needs a running copytool + copytool setup + + local f=$DIR/$tdir/$tfile + local fid=$(create_empty_file "$f") + + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f + + $LFS hsm_remove $f + + [[ $? != 0 ]] || error "Remove of a released file should fail" } run_test 27b "Remove the archive of a relased file (Operation not permitted)" test_28() { # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2316,8 +2264,6 @@ test_28() { cdt_enable wait_request_state $fid REMOVE SUCCEED - - copytool_cleanup } run_test 28 "Concurrent archive/file remove" @@ -2325,29 +2271,26 @@ test_29a() { # Tests --mntpath and --archive options local archive_id=7 - copytool_setup $SINGLEAGT $MOUNT $archive_id + copytool setup -m "$MOUNT" -a $archive_id # Bad archive number - $LFS hsm_remove -m $MOUNT -a 33 0x857765760:0x8:0x2 2>&1 | + $LFS hsm_remove -m "$MOUNT" -a 33 0x857765760:0x8:0x2 2>&1 | grep "Invalid argument" || error "unexpected hsm_remove failure (1)" # mntpath is present but file is given - $LFS hsm_remove --mntpath $MOUNT --archive 30 /qwerty/uyt 2>&1 | + $LFS hsm_remove --mntpath "$MOUNT" --archive 30 /qwerty/uyt 2>&1 | grep "hsm: '/qwerty/uyt' is not a valid FID" || error "unexpected hsm_remove failure (2)" - - copytool_cleanup } run_test 29a "Tests --mntpath and --archive options" test_29b() { # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_small $f) + local fid=$(create_small_file $f) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2356,19 +2299,16 @@ test_29b() { $LFS hsm_remove -m $MOUNT -a $HSM_ARCHIVE_NUMBER $fid wait_request_state $fid REMOVE SUCCEED - - copytool_cleanup } run_test 29b "Archive/delete/remove by FID from the archive." test_29c() { # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir - local fid1=$(make_small $DIR/$tdir/$tfile-1) - local fid2=$(make_small $DIR/$tdir/$tfile-2) - local fid3=$(make_small $DIR/$tdir/$tfile-3) + local fid1=$(create_small_file $DIR/$tdir/$tfile-1) + local fid2=$(create_small_file $DIR/$tdir/$tfile-2) + local fid3=$(create_small_file $DIR/$tdir/$tfile-3) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$tfile-[1-3] wait_request_state $fid1 ARCHIVE SUCCEED @@ -2386,40 +2326,98 @@ test_29c() { wait_request_state $fid1 REMOVE SUCCEED wait_request_state $fid2 REMOVE SUCCEED wait_request_state $fid3 REMOVE SUCCEED - - copytool_cleanup } run_test 29c "Archive/delete/remove by FID, using a file list." +test_29d() { + # test needs more than one CT + needclients 3 || return 0 + + local n + local file + local fid + + # start all of the copytools + for n in $(seq $AGTCOUNT); do + copytool setup -f agt$n -a $n + done + + # archive files + file=$DIR/$tdir/$tfile + fid=$(create_small_file $file) + + $LFS hsm_archive $file + wait_request_state $fid ARCHIVE SUCCEED + check_hsm_flags $file "0x00000009" + + rm -f $file + + $LFS hsm_remove --mntpath "$MOUNT" -a 0 $fid || + error "cannot hsm_remove '$fid'" + + # give time for CDT to handle remove request and create broadcasted + sleep 2 + + # remove request has been broadcasted ? + local cnt=$(get_request_count $fid REMOVE) + # broadcasted requests + original + [[ $cnt -eq $((AGTCOUNT + 1)) ]] || + error "remove not broadcasted to all CTs" + + # give time for CDT and CTs to handle broadcasted + wait_for_loop_period + + # each agent serves one different archive_id, so broadcasted + # hsm_remove request should only succeed once and fail at all others + local res + local scnt=0 + local fcnt=0 + for n in $(seq $AGTCOUNT); do + res=$(do_facet $SINGLEMDS "$LCTL get_param -n \ + $HSM_PARAM.actions | awk \ + '/'$fid'.*action=REMOVE archive#='$n'/ \ + {print \\\$13}' | cut -f2 -d=") + if [[ "$res" == "SUCCEED" ]]; then + scnt=$((scnt + 1)) + elif [[ "$res" == "FAILED" ]]; then + fcnt=$((fcnt + 1)) + fi + done + + [[ $scnt -eq 1 ]] || + error "one and only CT should have removed successfully" + + [[ $AGTCOUNT -eq $((scnt + fcnt)) ]] || + error "all but one CT should have failed to remove" +} +run_test 29d "hsm_remove by FID with archive_id 0 for unlinked file cause "\ + "request to be sent once for each registered archive_id" + test_30a() { # restore at exec cannot work on agent node (because of Linux kernel # protection of executables) needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /bin/true $tdir/$tfile local f=$DIR/$tdir/true - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) + stack_trap "cdt_clear_no_retry" EXIT # set no retry action mode cdt_set_no_retry do_node $CLIENT2 $f local st=$? - # cleanup - # remove no try action mode - cdt_clear_no_retry $LFS hsm_state $f [[ $st == 0 ]] || error "Failed to exec a released file" - - copytool_cleanup } run_test 30a "Restore at exec (import case)" @@ -2429,7 +2427,7 @@ test_30b() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/true @@ -2439,19 +2437,17 @@ test_30b() { wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f $LFS hsm_state $f + + stack_trap cdt_clear_no_retry EXIT # set no retry action mode cdt_set_no_retry + do_node $CLIENT2 $f local st=$? - # cleanup - # remove no try action mode - cdt_clear_no_retry $LFS hsm_state $f [[ $st == 0 ]] || error "Failed to exec a released file" - - copytool_cleanup } run_test 30b "Restore at exec (release case)" @@ -2459,7 +2455,7 @@ test_30c() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/SLEEP @@ -2470,8 +2466,11 @@ test_30c() { wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f check_hsm_flags $f "0x0000000d" + + stack_trap cdt_clear_no_retry EXIT # set no retry action mode cdt_set_no_retry + do_node $CLIENT2 "$f 10" & local pid=$! sleep 3 @@ -2487,12 +2486,7 @@ test_30c() { error "Binary overwritten during exec" fi - # cleanup - # remove no try action mode - cdt_clear_no_retry check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 30c "Update during exec of released file must fail" @@ -2508,19 +2502,18 @@ restore_and_check_size() { while [[ "$st" != "0x00000009" && $cpt -le 10 ]] do n=$(stat -c "%s" $f) - # we echo in both cases to show stat is not - # hang + # we echo in both cases to show stat is not hang if [[ $n != $s ]]; then echo "size seen is $n != $s" err=1 else echo "size seen is right: $n == $s" fi - st=$(get_hsm_flags $f) sleep 10 cpt=$((cpt + 1)) + st=$(get_hsm_flags $f) done - if [[ $cpt -lt 10 ]]; then + if [[ "$st" = "0x00000009" ]]; then echo " "done else echo " restore is too long" @@ -2531,36 +2524,28 @@ restore_and_check_size() { test_31a() { # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup - make_archive $tdir/$tfile + create_archive_file $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$($LFS path2fid $f) - HSM_ARCHIVE_PURGE=false copytool_setup + copytool setup restore_and_check_size $f $fid local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31a "Import a large file and check size during restore" test_31b() { # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2570,21 +2555,15 @@ test_31b() { local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31b "Restore a large unaligned file and check size during restore" test_31c() { # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress_aligned $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2594,212 +2573,160 @@ test_31c() { local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31c "Restore a large aligned file and check size during restore" test_33() { - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f - # to be sure wait_all_done will not be mislead by previous tests - # and ops. - cdt_purge - wait_for_grace_delay - # Also raise grace_delay significantly so the Canceled - # Restore action will stay enough long avail. - local old_grace=$(get_hsm_param grace_delay) - set_hsm_param grace_delay 100 + # Prevent restore from completing + copytool_suspend + # Implicit restore md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED + wait_request_state $fid RESTORE STARTED kill -15 $pid - sleep 1 - - # Check restore trigger process was killed - local killed=$(ps -o pid,comm hp $pid >/dev/null) - - $LFS hsm_cancel $f - - # instead of waiting+checking both Restore and Cancel ops - # sequentially, wait for both to be finished and then check - # each results. - wait_all_done 100 $fid - local rstate=$(get_request_state $fid RESTORE) - local cstate=$(get_request_state $fid CANCEL) - - # restore orig grace_delay. - set_hsm_param grace_delay $old_grace - - if [[ "$rstate" == "CANCELED" ]] ; then - [[ "$cstate" == "SUCCEED" ]] || - error "Restore state is CANCELED and Cancel state " \ - "is not SUCCEED but $cstate" - echo "Restore state is CANCELED, Cancel state is SUCCEED" - elif [[ "$rstate" == "SUCCEED" ]] ; then - [[ "$cstate" == "FAILED" ]] || - error "Restore state is SUCCEED and Cancel state " \ - "is not FAILED but $cstate" - echo "Restore state is SUCCEED, Cancel state is FAILED" - else - error "Restore state is $rstate and Cancel state is $cstate" - fi - [ -z $killed ] || - error "Cannot kill process waiting for restore ($killed)" + copytool_continue - copytool_cleanup + # Check restore trigger process was killed + wait $pid + [ $? -eq 143 ] || error "md5sum was not 'Terminated'" } run_test 33 "Kill a restore waiting process" test_34() { # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup -b 1 local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - rm $f || error "rm $f failed" # rm must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 rm "$f" || error "rm $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" - local rc=$(wait $pid) - [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" + wait $pid || error "Restore initiator failed with $?" - copytool_cleanup + # Check the file was actually deleted + [ ! -f "$f" ] || error "$f was not deleted" } run_test 34 "Remove file during restore" test_35() { # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup -b 1 local f=$DIR/$tdir/$tfile local f1=$DIR/$tdir/$tfile-1 - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return - + local fid=$(create_empty_file "$f") local fid1=$(copy_file /etc/passwd $f1) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - mv $f1 $f || error "mv $f1 $f failed" # mv must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 mv "$f1" "$f" || error "mv $f1 $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" - local rc=$(wait $pid) - [[ $rc -eq 0 ]] || error "Restore initiator failed with $rc" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" + wait $pid || error "Restore initiator failed with $?" - fid2=$(path2fid $f) + local fid2=$(path2fid $f) [[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1" - - copytool_cleanup } run_test 35 "Overwrite file during restore" test_36() { # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir + copytool setup -b 1 local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED - mv $f $f.new - # rm must not block during restore wait_request_state $fid RESTORE STARTED - wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || - error "Restore initiator does not exit" + # mv must not block during restore + timeout --signal=KILL 10 mv "$f" "$f.new" || + error "mv '$f' '$f.new' failed with rc=$?" - local rc=$(wait $pid) - [[ $rc -eq 0 ]] || - error "Restore initiator failed with $rc" + copytool_continue + wait_request_state $fid RESTORE SUCCEED - copytool_cleanup + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiator is still running" + wait $pid || error "Restore initiator failed with $?" } run_test 36 "Move file during restore" test_37() { # LU-5683: check that an archived dirty file can be rearchived. - copytool_cleanup - copytool_setup $SINGLEAGT $MOUNT2 + copytool setup - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid - fid=$(make_small $f) || error "cannot create small file" + fid=$(create_small_file $f) || error "cannot create small file" $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f || error "cannot release $f" + # Allow previous archive request to expire from the actions log. + wait_for_grace_delay + # Dirty file. dd if=/dev/urandom of=$f bs=1M count=1 || error "cannot dirty file" $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - - copytool_cleanup } run_test 37 "re-archive a dirty file" @@ -2822,19 +2749,25 @@ test_40() { local i="" local p="" local fid="" + local max_requests=$(get_hsm_param max_requests) + + stack_trap "set_hsm_param max_requests $max_requests" EXIT + # Increase the number of HSM request that can be performed in + # parallel. With the coordinator running once per second, this + # also limits the number of requests per seconds that can be + # performed, so we pick a decent number. But we also need to keep + # that number low because the copytool has no rate limit and will + # fail some requests if if gets too many at once. + set_hsm_param max_requests 300 for i in $(seq 1 $file_count); do for p in $(seq 1 $stream_count); do fid=$(copy_file /etc/hosts $f.$p.$i) done done - # force copytool to use a local/temp archive dir to ensure best - # performance vs remote/NFS mounts used in auto-tests - if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then - copytool_setup - else - copytool_setup $SINGLEAGT $MOUNT $HSM_ARCHIVE_NUMBER $TMP/$tdir - fi + + copytool setup + # to be sure wait_all_done will not be mislead by previous tests cdt_purge wait_for_grace_delay @@ -2848,17 +2781,16 @@ test_40() { wait ${pids[*]} echo OK wait_all_done 100 - copytool_cleanup } run_test 40 "Parallel archive requests" test_52() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED @@ -2874,18 +2806,16 @@ test_52() { wait $MULTIPID || error "multiop close failed" check_hsm_flags $f "0x0000000b" - - copytool_cleanup } run_test 52 "Opened for write file on an evicted client should be set dirty" test_53() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2902,18 +2832,14 @@ test_53() { wait $MULTIPID || error "multiop close failed" check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 53 "Opened for read file on an evicted client should not be set dirty" test_54() { - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_large_for_progress $f) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2921,6 +2847,7 @@ test_54() { check_hsm_flags $f "0x00000001" + stack_trap "cdt_clear_no_retry" EXIT # Avoid coordinator resending this request as soon it has failed. cdt_set_no_retry @@ -2929,19 +2856,14 @@ test_54() { wait_request_state $fid ARCHIVE FAILED check_hsm_flags $f "0x00000003" - - cdt_clear_no_retry - copytool_cleanup } run_test 54 "Write during an archive cancels it" test_55() { - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_large_for_progress $f) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2949,6 +2871,7 @@ test_55() { check_hsm_flags $f "0x00000001" + stack_trap "cdt_clear_no_retry" EXIT # Avoid coordinator resending this request as soon it has failed. cdt_set_no_retry @@ -2957,21 +2880,14 @@ test_55() { wait_request_state $fid ARCHIVE FAILED check_hsm_flags $f "0x00000003" - - cdt_clear_no_retry - copytool_cleanup } run_test 55 "Truncate during an archive cancels it" test_56() { - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -2987,8 +2903,6 @@ test_56() { wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 56 "Setattr during an archive is ok" @@ -2997,7 +2911,7 @@ test_57() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/test_archive_remote @@ -3018,8 +2932,6 @@ test_57() { error "hsm_restore failed" wait_request_state $fid RESTORE SUCCEED - - copytool_cleanup } run_test 57 "Archive a file with dirty cache on another node" @@ -3061,7 +2973,7 @@ truncate_released_file() { test_58() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -3075,25 +2987,21 @@ test_58() { echo "truncate to 0" truncate_released_file /etc/passwd 0 - - copytool_cleanup } run_test 58 "Truncate a released file will trigger restore" test_59() { local fid - local server_version=$(lustre_version_code $SINGLEMDS) - [[ $server_version -lt $(version_code 2.7.63) ]] && - skip "Need MDS version at least 2.7.63" && return + [[ $MDS1_VERSION -lt $(version_code 2.7.63) ]] && + skip "Need MDS version at least 2.7.63" - copytool_setup + copytool setup $MCREATE $DIR/$tfile || error "mcreate failed" $TRUNCATE $DIR/$tfile 42 || error "truncate failed" $LFS hsm_archive $DIR/$tfile || error "archive request failed" fid=$(path2fid $DIR/$tfile) wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $DIR/$tfile || error "release failed" - copytool_cleanup } run_test 59 "Release stripeless file with non-zero size" @@ -3101,22 +3009,16 @@ test_60() { # This test validates the fix for LU-4512. Ensure that the -u # option changes the progress reporting interval from the # default (30 seconds) to the user-specified interval. + local f=$DIR/$tdir/$tfile + local fid=$(create_file "$f" 1M 10) + local interval=5 local progress_timeout=$((interval * 4)) + copytool setup -b 1 --update-interval $interval - # test needs a new running copytool - copytool_cleanup - HSMTOOL_UPDATE_INTERVAL=$interval copytool_setup - - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return - - local mdtidx=0 - local mdt=${MDT_PREFIX}${mdtidx} - local mds=mds$((mdtidx + 1)) + local mdtidx=0 + local mdt=${MDT_PREFIX}${mdtidx} + local mds=mds$((mdtidx + 1)) # Wait for copytool to register wait_update_facet $mds \ @@ -3128,13 +3030,10 @@ test_60() { error "could not archive file" local agent=$(facet_active_host $SINGLEAGT) - local prefix=$TESTLOG_PREFIX - [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME - local copytool_log=$prefix.copytool_log.$agent.log - + local logfile=$(copytool_logfile $SINGLEAGT) wait_update $agent \ - "grep -o start.copy $copytool_log" "start copy" 100 || + "grep -o start.copy \"$logfile\"" "start copy" 100 || error "copytool failed to start" local cmd="$LCTL get_param -n ${mdt}.hsm.active_requests" @@ -3147,7 +3046,7 @@ test_60() { echo -n "Expecting a progress update within $progress_timeout seconds... " while [ true ]; do RESULT=$(do_node $(facet_active_host $mds) "$cmd") - if [ $RESULT -gt 0 ]; then + if [ -n "$RESULT" ] && [ "$RESULT" -gt 0 ]; then echo "$RESULT bytes copied in $WAIT seconds." break elif [ $WAIT -ge $progress_timeout ]; then @@ -3166,19 +3065,32 @@ test_60() { error "Expected progress update after at least $interval seconds" fi - cdt_clear_no_retry - copytool_cleanup + echo "Wait for on going archive hsm action to complete" + wait_update $agent "grep -o copied \"$logfile\"" "copied" 10 || + echo "File archiving not completed even after 10 secs" } run_test 60 "Changing progress update interval from default" +test_61() { + # test needs a running copytool + copytool setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/passwd $f) + cdt_disable + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + rm -f $f + cdt_enable + wait_request_state $fid ARCHIVE FAILED +} +run_test 61 "Waiting archive of a removed file should fail" + test_70() { # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup - - # Just start and stop the copytool to generate events. - cdt_clear_no_retry + copytool setup --event-fifo "$HSMTOOL_MONITOR_DIR/fifo" # Wait for the copytool to register. wait_update --verbose $(facet_active_host mds1) \ @@ -3186,7 +3098,8 @@ test_70() { uuid 100 || error "copytool failed to register with MDT0000" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytools failed to stop" local REGISTER_EVENT local UNREGISTER_EVENT @@ -3212,7 +3125,6 @@ test_70() { error "Copytool failed to send unregister event to FIFO" fi - copytool_monitor_cleanup echo "Register/Unregister events look OK." } run_test 70 "Copytool logs JSON register/unregister events to FIFO" @@ -3222,16 +3134,17 @@ test_71() { local interval=5 # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_UPDATE_INTERVAL=$interval \ - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup + copytool setup --update-interval $interval --event-fifo \ + "$HSMTOOL_MONITOR_DIR/fifo" + + stack_trap "cdt_clear_no_retry" EXIT + # Just start and stop the copytool to generate events. + cdt_clear_no_retry - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_small_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3240,8 +3153,11 @@ test_71() { local expected_fields="event_time data_fid source_fid" expected_fields+=" total_bytes current_bytes" - local START_EVENT - local FINISH_EVENT + local -A events=( + [ARCHIVE_START]=false + [ARCHIVE_FINISH]=false + [ARCHIVE_RUNNING]=false + ) while read event; do # Make sure we're not getting anything from previous events. for field in $expected_fields; do @@ -3254,15 +3170,9 @@ test_71() { fi eval $parsed - if [ $event_type == "ARCHIVE_START" ]; then - START_EVENT=$event - continue - elif [ $event_type == "ARCHIVE_FINISH" ]; then - FINISH_EVENT=$event - continue - elif [ $event_type != "ARCHIVE_RUNNING" ]; then - continue - fi + events["$event_type"]=true + + [ "$event_type" != ARCHIVE_RUNNING ] && continue # Do some simple checking of the progress update events. for expected_field in $expected_fields; do @@ -3271,30 +3181,20 @@ test_71() { fi done - if [ $total_bytes -eq 0 ]; then - error "Expected total_bytes to be > 0" - fi + [ $total_bytes -gt 0 ] || error "Expected total_bytes to be > 0" - # These should be identical throughout an archive - # operation. - if [ $source_fid != $data_fid ]; then + # These should be identical throughout an archive operation + [ $source_fid == $data_fid ] || error "Expected source_fid to equal data_fid" - fi done < <(echo $"$(get_copytool_event_log)") - if [ -z "$START_EVENT" ]; then - error "Copytool failed to send archive start event to FIFO" - fi - - if [ -z "$FINISH_EVENT" ]; then - error "Copytool failed to send archive finish event to FIFO" - fi + # Check we received every type of events we were expecting + for event in "${!events[@]}"; do + ${events["$event"]} || + error "Copytool failed to send '$event' event to FIFO" + done echo "Archive events look OK." - - cdt_clear_no_retry - copytool_cleanup - copytool_monitor_cleanup } run_test 71 "Copytool logs JSON archive events to FIFO" @@ -3303,10 +3203,10 @@ test_72() { local interval=5 # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_UPDATE_INTERVAL=$interval \ - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup + copytool setup --update-interval $interval --event-fifo \ + "$HSMTOOL_MONITOR_DIR/fifo" local test_file=$HSMTOOL_MONITOR_DIR/file local cmd="dd if=/dev/urandom of=$test_file count=16 bs=1000000 " @@ -3317,7 +3217,7 @@ test_72() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f f=$DIR2/$tdir/$tfile echo "Verifying released state: " check_hsm_flags $f "0x0000000d" @@ -3392,12 +3292,6 @@ test_72() { fi echo "Restore events look OK." - - cdt_clear_no_retry - copytool_cleanup - copytool_monitor_cleanup - - rm -rf $test_dir } run_test 72 "Copytool logs JSON restore events to FIFO" @@ -3413,26 +3307,19 @@ test_90() { fid=$(copy_file /etc/hosts $f.$i) echo $f.$i >> $FILELIST done - # force copytool to use a local/temp archive dir to ensure best - # performance vs remote/NFS mounts used in auto-tests - if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then - copytool_setup - else - local dai=$(get_hsm_param default_archive_id) - copytool_setup $SINGLEAGT $MOUNT $dai $TMP/$tdir - fi + + copytool setup # to be sure wait_all_done will not be mislead by previous tests cdt_purge wait_for_grace_delay $LFS hsm_archive --filelist $FILELIST || error "cannot archive a file list" - wait_all_done 100 + wait_all_done 200 $LFS hsm_release --filelist $FILELIST || error "cannot release a file list" $LFS hsm_restore --filelist $FILELIST || error "cannot restore a file list" - wait_all_done 100 - copytool_cleanup + wait_all_done 200 } run_test 90 "Archive/restore a file list" @@ -3475,7 +3362,7 @@ run_test 102 "Verify coordinator control" test_103() { # test needs a running copytool - copytool_setup + copytool setup local i="" local fid="" @@ -3494,42 +3381,35 @@ test_103() { grep -v CANCELED | grep -v SUCCEED | grep -v FAILED") [[ -z "$res" ]] || error "Some request have not been canceled" - - copytool_cleanup } run_test 103 "Purge all requests" DATA=CEA DATAHEX='[434541]' test_104() { - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") - # if cdt is on, it can serve too quickly the request - cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ $HSM_PARAM.actions |\ grep $fid | cut -f16 -d=") - cdt_enable [[ "$data1" == "$DATAHEX" ]] || error "Data field in records is ($data1) and not ($DATAHEX)" - copytool_cleanup + cdt_purge } run_test 104 "Copy tool data field" test_105() { + local max_requests=$(get_hsm_param max_requests) mkdir -p $DIR/$tdir local i="" + stack_trap "set_hsm_param max_requests $max_requests" EXIT + set_hsm_param max_requests 300 + cdt_disable for i in $(seq -w 1 10); do cp /etc/passwd $DIR/$tdir/$i @@ -3539,6 +3419,7 @@ test_105() { $HSM_PARAM.actions |\ grep WAITING | wc -l") cdt_restart + cdt_disable local reqcnt2=$(do_facet $SINGLEMDS "$LCTL get_param -n\ $HSM_PARAM.actions |\ @@ -3551,69 +3432,9 @@ test_105() { } run_test 105 "Restart of coordinator" -get_agent_by_uuid_mdt() { - local uuid=$1 - local mdtidx=$2 - local mds=mds$(($mdtidx + 1)) - do_facet $mds "$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.agents |\ - grep $uuid" -} - -check_agent_registered_by_mdt() { - local uuid=$1 - local mdtidx=$2 - local mds=mds$(($mdtidx + 1)) - local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) - if [[ ! -z "$agent" ]]; then - echo "found agent $agent on $mds" - else - error "uuid $uuid not found in agent list on $mds" - fi -} - -check_agent_unregistered_by_mdt() { - local uuid=$1 - local mdtidx=$2 - local mds=mds$(($mdtidx + 1)) - local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) - if [[ -z "$agent" ]]; then - echo "uuid not found in agent list on $mds" - else - error "uuid found in agent list on $mds: $agent" - fi -} - -check_agent_registered() { - local uuid=$1 - local mdsno - for mdsno in $(seq 1 $MDSCOUNT); do - check_agent_registered_by_mdt $uuid $((mdsno - 1)) - done -} - -check_agent_unregistered() { - local uuid=$1 - local mdsno - for mdsno in $(seq 1 $MDSCOUNT); do - check_agent_unregistered_by_mdt $uuid $((mdsno - 1)) - done -} - -get_agent_uuid() { - local agent=${1:-$(facet_active_host $SINGLEAGT)} - - # Lustre mount-point is mandatory and last parameter on - # copytool cmd-line. - local mntpnt=$(do_rpc_nodes $agent pgrep -fl $HSMTOOL_BASE | - grep -v pgrep | awk '{print $NF}') - [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ - "parameter" - do_rpc_nodes $agent get_client_uuid $mntpnt | cut -d' ' -f2 -} - test_106() { # test needs a running copytool - copytool_setup + copytool setup local uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) @@ -3621,20 +3442,22 @@ test_106() { search_copytools || error "No copytool found" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytool failed to stop" + check_agent_unregistered $uuid - copytool_setup + copytool setup uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) check_agent_registered $uuid - - copytool_cleanup } run_test 106 "Copytool register/unregister" test_107() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + # test needs a running copytool - copytool_setup + copytool setup # create and archive file mkdir -p $DIR/$tdir local f1=$DIR/$tdir/$tfile @@ -3649,7 +3472,6 @@ test_107() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2 # main check of this sanity: this request MUST succeed wait_request_state $fid ARCHIVE SUCCEED - copytool_cleanup } run_test 107 "Copytool re-register after MDS restart" @@ -3690,14 +3512,14 @@ run_test 109 "Policy display/change" test_110a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) cdt_set_non_blocking_restore @@ -3712,14 +3534,12 @@ test_110a() { [[ $st == 1 ]] || error "md5sum returns $st != 1, "\ "should also perror ENODATA (No data available)" - - copytool_cleanup } run_test 110a "Non blocking restore policy (import case)" test_110b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -3740,21 +3560,19 @@ test_110b() { [[ $st == 1 ]] || error "md5sum returns $st != 1, "\ "should also perror ENODATA (No data available)" - - copytool_cleanup } run_test 110b "Non blocking restore policy (release case)" test_111a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) cdt_set_no_retry @@ -3770,19 +3588,18 @@ test_111a() { # Test result [[ $st == 0 ]] || error "Restore does not failed" - - copytool_cleanup } run_test 111a "No retry policy (import case), restore will error"\ " (No such file or directory)" test_111b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) + stack_trap cdt_clear_no_retry EXIT cdt_set_no_retry $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -3794,20 +3611,15 @@ test_111b() { wait_request_state $fid RESTORE FAILED local st=$? - # cleanup - cdt_clear_no_retry - # Test result [[ $st == 0 ]] || error "Restore does not failed" - - copytool_cleanup } run_test 111b "No retry policy (release case), restore will error"\ " (No such file or directory)" test_112() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -3818,49 +3630,80 @@ test_112() { echo $l local res=$(echo $l | cut -f 2- -d" " | grep ARCHIVE) - # cleanup cdt_enable wait_request_state $fid ARCHIVE SUCCEED # Test result [[ ! -z "$res" ]] || error "action is $l which is not an ARCHIVE" - - copytool_cleanup } run_test 112 "State of recorded request" -test_200() { - # test needs a running copytool - copytool_setup +test_113() { + local file1=$DIR/$tdir/$tfile + local file2=$DIR2/$tdir/$tfile - mkdir -p $DIR/$tdir + local fid=$(create_small_sync_file $file1) + + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT + zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" || + error "cannot mount '$MOUNT3' on '$SINGLEAGT'" + + copytool setup -m "$MOUNT3" + + do_nodes $(comma_list $(nodes_list)) $LCTL clear + + $LFS hsm_archive $file1 || error "Fail to archive $file1" + wait_request_state $fid ARCHIVE SUCCEED + + $LFS hsm_release $file1 + echo "Verifying released state: " + check_hsm_flags $file1 "0x0000000d" + + multiop_bg_pause $file1 oO_WRONLY:O_APPEND:_w4c || error "multiop failed" + MULTIPID=$! + stat $file2 & + kill -USR1 $MULTIPID + + wait + sync + + local size1=$(stat -c "%s" $file1) + local size2=$(stat -c "%s" $file2) + + [ $size1 -eq $size2 ] || error "sizes are different $size1 $size2" +} +run_test 113 "wrong stat after restore" + +test_200() { local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_cancel $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup + + # Prevent archive from completing + copytool_suspend - # test with cdt on is made in test_221 - cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f # wait archive to register at CDT - wait_request_state $fid ARCHIVE WAITING - $LFS hsm_cancel $f - cdt_enable + wait_request_state $fid ARCHIVE STARTED + + # Cancel the archive + $LFS hsm_cancel "$f" + wait_request_state $fid ARCHIVE CANCELED - wait_request_state $fid CANCEL SUCCEED - copytool_cleanup + copytool_continue + wait_request_state $fid CANCEL SUCCEED } run_test 200 "Register/Cancel archive" test_201() { # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - make_archive $tdir/$tfile - import_file $tdir/$tfile $f + create_archive_file $tdir/$tfile + copytool import $tdir/$tfile $f local fid=$(path2fid $f) # test with cdt on is made in test_222 @@ -3872,122 +3715,133 @@ test_201() { cdt_enable wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - - copytool_cleanup } run_test 201 "Register/Cancel restore" test_202() { - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + # test needs a running copytool + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - cdt_disable + copytool_suspend $LFS hsm_remove $f # wait remove to register at CDT - wait_request_state $fid REMOVE WAITING + wait_request_state $fid REMOVE STARTED $LFS hsm_cancel $f - cdt_enable - wait_request_state $fid REMOVE CANCELED - copytool_cleanup + wait_request_state $fid REMOVE CANCELED } run_test 202 "Register/Cancel remove" -test_220() { +test_220A() { # was test_220 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - changelog_cleanup - - local target=0x0 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0x0 || + error "The expected changelog was not emitted" } -run_test 220 "Changelog for archive" +run_test 220A "Changelog for archive" -test_221() { +test_220a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_cancel $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(copy_file /etc/passwd $f) + + changelog_register + + # block copytool operations to allow for HSM request to be + # submitted and file be unlinked (CDT will find object removed) + copytool_suspend + + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + + # wait request to reach CT + wait_request_state $fid ARCHIVE STARTED + + rm -f $f + + copytool_continue + + wait_request_state $fid ARCHIVE FAILED - changelog_setup + # HE_ARCHIVE|ENOENT + changelog_find -type HSM -target-fid $fid -flags 0x2 || + error "The expected changelog was not emitted" +} +run_test 220a "Changelog for failed archive" + +test_221() { + local f=$DIR/$tdir/$tfile + local fid=$(create_empty_file "$f") + copytool setup -b 1 + changelog_register + + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED + $LFS hsm_cancel $f wait_request_state $fid ARCHIVE CANCELED - wait_request_state $fid CANCEL SUCCEED - - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - local target=0x7d - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x7d || + error "The expected changelog was not emitted" } run_test 221 "Changelog for archive canceled" test_222a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) - changelog_setup + changelog_register $LFS hsm_restore $f wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x80 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x80 || + error "The expected changelog was not emitted" } run_test 222a "Changelog for explicit restore" test_222b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -3996,145 +3850,210 @@ test_222b() { wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x80 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x80 || + error "The expected changelog was not emitted" } run_test 222b "Changelog for implicit restore" -test_223a() { +test_222c() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir + copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - make_archive $tdir/$tfile + copytool import $tdir/$tfile $f + local fid=$(path2fid $f) - changelog_setup + changelog_register - import_file $tdir/$tfile $f - local fid=$(path2fid $f) + # block copytool operations to allow for HSM request to be + # submitted and file be unlinked (CDT will find object removed) + copytool_suspend $LFS hsm_restore $f + + # wait request to reach CT wait_request_state $fid RESTORE STARTED - $LFS hsm_cancel $f - wait_request_state $fid RESTORE CANCELED - wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) + rm -f $f + + copytool_continue - local target=0xfd - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + wait_request_state $fid RESTORE FAILED - cleanup + # HE_RESTORE|ENOENT + changelog_find -type HSM -target-fid $fid -flags 0x82 || + error "The expected changelog was not emitted" } -run_test 223a "Changelog for restore canceled (import case)" +run_test 222c "Changelog for failed explicit restore" -test_223b() { +test_222d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + + copytool_remove_backend $fid + md5sum $f + + wait_request_state $fid RESTORE FAILED + + # HE_RESTORE|ENOENT + changelog_find -type HSM -target-fid $fid -flags 0x82 || + error "The expected changelog was not emitted" +} +run_test 222d "Changelog for failed implicit restore" + +test_223a() { + # test needs a running copytool + copytool setup -b 1 + + local f=$DIR/$tdir/$tfile + create_archive_file $tdir/$tfile + + changelog_register + + copytool import $tdir/$tfile $f + local fid=$(path2fid $f) + $LFS hsm_restore $f wait_request_state $fid RESTORE STARTED $LFS hsm_cancel $f wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) + changelog_find -type HSM -target-fid $fid -flags 0xfd || + error "The expected changelog was not emitted" +} +run_test 223a "Changelog for restore canceled (import case)" + +test_223b() { + local f=$DIR/$tdir/$tfile + local fid=$(create_empty_file "$f") - local target=0xfd - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + copytool setup -b 1 + changelog_register - cleanup + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f + + # Prevent restore from completing + copytool_suspend + $LFS hsm_restore $f + wait_request_state $fid RESTORE STARTED + + $LFS hsm_cancel $f + wait_request_state $fid RESTORE CANCELED + + copytool_continue + wait_request_state $fid CANCEL SUCCEED + + changelog_find -type HSM -target-fid $fid -flags 0xfd || + error "The expected changelog was not emitted" } run_test 223b "Changelog for restore canceled (release case)" -test_224() { +test_224A() { # was test_224 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_remove $f wait_request_state $fid REMOVE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1) + changelog_find -type HSM -target-fid $fid -flags 0x200 || + error "The expected changelog was not emitted" +} +run_test 224A "Changelog for remove" + +test_224a() { + # test needs a running copytool + copytool setup + + mkdir -p $DIR/$tdir - local target=0x200 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/passwd $f) + + changelog_register + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + + copytool_remove_backend $fid - cleanup + # block copytool operations to allow for HSM request to be + # submitted and file be unlinked (CDT will find object removed) + copytool_suspend + + $LFS hsm_remove $f + + # wait for request to reach CT + wait_request_state $fid REMOVE STARTED + + rm -f $f + + copytool_continue + + wait_request_state $fid REMOVE FAILED + + # HE_REMOVE|ENOENT=0x202 + changelog_find -type HSM -target-fid $fid -flags 0x202 || + error "The expected changelog was not emitted" } -run_test 224 "Changelog for remove" +run_test 224a "Changelog for failed remove" test_225() { - # test needs a running copytool - copytool_setup - # test is not usable because remove request is too fast # so it is always finished before cancel can be done ... echo "Test disabled" - copytool_cleanup return 0 - mkdir -p $DIR/$tdir + # test needs a running copytool + copytool setup + local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_progress $f) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - # if cdt is on, it can serve too quickly the request - cdt_disable + # Prevent restore from completing + copytool_suspend $LFS hsm_remove $f + $LFS hsm_cancel $f - cdt_enable wait_request_state $fid REMOVE CANCELED - wait_request_state $fid CANCEL SUCCEED - flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) - local flags=$($LFS changelog ${MDT[0]} | grep HSM | grep $fid | - tail -n 1 | awk '{print $5}') - - local target=0x27d - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x27d + error "The expected changelog was not emitted" } run_test 225 "Changelog for remove canceled" test_226() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -4145,7 +4064,7 @@ test_226() { local fid2=$(copy_file /etc/passwd $f2) copy_file /etc/passwd $f3 - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1 wait_request_state $fid1 ARCHIVE SUCCEED @@ -4154,192 +4073,602 @@ test_226() { rm $f1 || error "rm $f1 failed" - local flags=$(changelog_get_flags ${MDT[0]} UNLNK $fid1) - - local target=0x3 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + changelog_dump + changelog_find -type UNLNK -target-fid $fid1 -flags 0x3 || + error "The expected changelog was not emitted" mv $f3 $f2 || error "mv $f3 $f2 failed" - flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) + changelog_find -type RENME -target-fid $fid2 -flags 0x3 || + error "The expected changelog was not emitted" +} +run_test 226 "changelog for last rm/mv with exiting archive" + +# This is just a utility function to clarify what test_227 does +__test_227() +{ + local target=0x280 - target=0x3 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + "$LFS" "$action" --$flag "$file" || + error "Cannot ${action#hsm_} $flag on '$file'" - cleanup + # Only one changelog should be produced + local entries="$(changelog_find -type HSM -target-fid $fid)" + [ $(wc -l <<< "$entries") -eq $((++count)) ] || + error "lfs $action --$flag '$file' produced more than one" \ + "changelog record" + + # Parse the last changelog record + local entry="$(tail -n 1 <<< "$entries")" + eval local -A changelog=$(changelog2array $entry) + + # Also check the flags match what is expected + [[ ${changelog[flags]} == $target ]] || + error "Changelog flag is '${changelog[flags]}', not $target" } -run_test 226 "changelog for last rm/mv with exiting archive" -check_flags_changes() { - local f=$1 - local fid=$2 - local hsm_flag=$3 - local fst=$4 - local cnt=$5 +test_227() { + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + local count=0 + + changelog_register + + for flag in norelease noarchive exists archived lost; do + if [ "$flag" == lost ]; then + # The flag "lost" only works on an archived file + "$LFS" hsm_set --archived "$file" + ((count++)) + fi + + action="hsm_set" __test_227 + action="hsm_clear" __test_227 + done +} +run_test 227 "changelog when explicit setting of HSM flags" + +test_228() { + # test needs a running copytool + copytool setup + + local fid=$(create_small_sync_file $DIR/$tfile) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile + wait_request_state $fid ARCHIVE SUCCEED + + $LFS hsm_release $DIR/$tfile + check_hsm_flags $DIR/$tfile "0x0000000d" + + filefrag $DIR/$tfile | grep " 1 extent found" || + error "filefrag on released file must return only one extent" + + # only newer versions of cp detect sparse files by stat/FIEMAP + # (LU-2580) + cp --sparse=auto $DIR/$tfile $DIR/$tfile.2 || + error "copying $DIR/$tfile" + cmp $DIR/$tfile $DIR/$tfile.2 || error "comparing copied $DIR/$tfile" + + $LFS hsm_release $DIR/$tfile + check_hsm_flags $DIR/$tfile "0x0000000d" + + mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + + tar cf - --sparse $DIR/$tfile | tar xvf - -C $DIR/$tdir || + error "tar failed" + cmp $DIR/$tfile $DIR/$tdir/$DIR/$tfile || + error "comparing untarred $DIR/$tfile" + + rm -f $DIR/$tfile $DIR/$tfile.2 || + error "rm $DIR/$tfile or $DIR/$tfile.2 failed" +} +run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse" + +test_250() { + local file="$DIR/$tdir/$tfile" + + # set max_requests to allow one request of each type to be started (3) + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + # speed up test + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + + # send 1 requests of each kind twice + copytool setup + # setup the files + for action in archive restore remove; do + local filepath="$file"-to-$action + local fid=$(create_empty_file "$filepath") + local fid2=$(create_empty_file "$filepath".bis) + + if [ "$action" != archive ]; then + "$LFS" hsm_archive "$filepath" + wait_request_state $fid ARCHIVE SUCCEED + "$LFS" hsm_archive "$filepath".bis + wait_request_state $fid2 ARCHIVE SUCCEED + fi + if [ "$action" == restore ]; then + "$LFS" hsm_release "$filepath" + "$LFS" hsm_release "$filepath".bis + fi + done + + # suspend the copytool to prevent requests from completing + stack_trap "copytool_continue" EXIT + copytool_suspend + + # send `max_requests' requests (one of each kind) + for action in archive restore remove; do + filepath="$file"-to-$action + "$LFS" hsm_${action} "$filepath" + wait_request_state $(path2fid "$filepath") "${action^^}" STARTED + done + + # send another batch of requests + for action in archive restore remove; do + "$LFS" hsm_${action} "$file-to-$action".bis + done + # wait for `loop_period' seconds to make sure the coordinator has time + # to register those, even though it should not + sleep 1 + + # only the first batch of request should be started + local -i count + count=$(do_facet $SINGLEMDS "$LCTL" get_param -n $HSM_PARAM.actions | + grep -c STARTED) + + ((count == 3)) || + error "expected 3 STARTED requests, found $count" +} +run_test 250 "Coordinator max request" + +test_251() { + local f=$DIR/$tdir/$tfile + local fid=$(create_empty_file "$f") + + cdt_disable + # to have a short test + local old_to=$(get_hsm_param active_request_timeout) + set_hsm_param active_request_timeout 1 + # to be sure the cdt will wake up frequently so + # it will be able to cancel the "old" request + local old_loop=$(get_hsm_param loop_period) + set_hsm_param loop_period 1 + cdt_enable + + copytool setup + + # Prevent archive from completing + copytool_suspend + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE STARTED + + # Let the request timeout + wait_request_state $fid ARCHIVE CANCELED + + set_hsm_param active_request_timeout $old_to + set_hsm_param loop_period $old_loop +} +run_test 251 "Coordinator request timeout" + +test_252() { + local f=$DIR/$tdir/$tfile + local fid=$(create_empty_file "$f") + + # to have a short test + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + + copytool setup + + # Prevent archive from completing + copytool_suspend + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE STARTED + rm -f "$f" + + stack_trap "set_hsm_param active_request_timeout \ + $(get_hsm_param active_request_timeout)" EXIT + set_hsm_param active_request_timeout 1 + + wait_request_state $fid ARCHIVE CANCELED + copytool_continue +} +run_test 252 "Timeout'ed running archive of a removed file should be canceled" + +test_253() { + local rc + # test needs a running copytool + copytool setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + + dd if=/dev/zero of=$f bs=1MB count=10 + local fid=$(path2fid $f) + + $LFS hsm_archive $f || error "could not archive file" + wait_request_state $fid ARCHIVE SUCCEED + + # clear locks to discard inode data + cancel_lru_locks osc + + #define OBD_FAIL_MDC_MERGE 0x807 + $LCTL set_param fail_loc=0x807 + + #expect error here, instead of release with wrong size + $LFS hsm_release $f + rc=$? + if ((rc == 0)); then + file_size=$(stat -c '%s' $f) + if ((file_size != 10485760)); then + error "Wrong file size after hsm_release" + fi + else + echo "could not release file" + fi +} +run_test 253 "Check for wrong file size after release" + +test_254a() +{ + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" + + # Check that the counters are initialized to 0 + local count + for request_type in archive restore remove; do + count="$(get_hsm_param ${request_type}_count)" || + error "Reading ${request_type}_count failed with $?" + + [ "$count" -eq 0 ] || + error "Expected ${request_type}_count to be " \ + "0 != '$count'" + done +} +run_test 254a "Request counters are initialized to zero" + +test_254b() +{ + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" + + # The number of request to launch (at least 32) + local request_count=$((RANDOM % 32 + 32)) + printf "Will launch %i requests of each type\n" "$request_count" + + # Launch a copytool to process requests + copytool setup + + # Set hsm.max_requests to allow starting all requests at the same time + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests "$request_count" + + local timeout + local count + for request_type in archive restore remove; do + printf "Checking %s requests\n" "${request_type}" + # Suspend the copytool to give us time to read the proc files + copytool_suspend + + for ((i = 0; i < $request_count; i++)); do + case $request_type in + archive) + create_empty_file "$DIR/$tdir/$tfile-$i" \ + >/dev/null 2>&1 + ;; + restore) + lfs hsm_release "$DIR/$tdir/$tfile-$i" + ;; + esac + $LFS hsm_${request_type} "$DIR/$tdir/$tfile-$i" + done + + # Give the coordinator 10 seconds to start every request + timeout=10 + while get_hsm_param actions | grep -q WAITING; do + sleep 1 + let timeout-=1 + [ $timeout -gt 0 ] || + error "${request_type^} requests took too " \ + "long to start" + done + + count="$(get_hsm_param ${request_type}_count)" + [ "$count" -eq "$request_count" ] || + error "Expected '$request_count' (!= '$count') " \ + "active $request_type requests" + + # Let the copytool process the requests + copytool_continue + # Give it 10 seconds maximum + timeout=10 + while get_hsm_param actions | grep -q STARTED; do + sleep 1 + let timeout-=1 + [ $timeout -gt 0 ] || + error "${request_type^} requests took too " \ + "long to complete" + done + + count="$(get_hsm_param ${request_type}_count)" + [ "$count" -eq 0 ] || + error "Expected 0 (!= '$count') " \ + "active $request_type requests" + done +} +run_test 254b "Request counters are correctly incremented and decremented" + +test_255() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + + # How do you make sure the coordinator has consumed any outstanding + # event, without triggering an event yourself? + # + # You wait for a request to disappear from the coordinator's llog. + + # Warning: the setup represents 90% of this test + + # Create and process an HSM request + copytool setup + "$LFS" hsm_archive "$file" + wait_request_state $fid ARCHIVE SUCCEED + + kill_copytools + wait_copytools || error "failed to stop copytools" + + # Launch a new HSM request + rm "$file" + create_empty_file "$file" + "$LFS" hsm_archive "$file" + + cdt_shutdown + + # Have the completed request be removed as soon as the cdt wakes up + stack_trap "set_hsm_param grace_delay $(get_hsm_param grace_delay)" EXIT + set_hsm_param grace_delay 1 + # (Hopefully, time on the MDS will behave nicely) + do_facet $SINGLEMDS sleep 2 & + + # Increase `loop_period' as a mean to prevent the coordinator from + # waking itself up to do some housekeeping. + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + + wait $! || error "waiting failed" + cdt_enable + wait_request_state $fid ARCHIVE "" + # The coordinator will not wake up on its own for ~`loop_period' secs... + + # ... Unless a copytool registers. Now the real test begins + copytool setup + wait_request_state $(path2fid "$file") ARCHIVE SUCCEED +} +run_test 255 "Copytool registration wakes the coordinator up" + +# tests 260[a-c] rely on the parsing of the copytool's log file, they might +# break in the future because of that. +test_260a() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup + "$LFS" hsm_archive "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done - local target=0x280 - $LFS hsm_set --$hsm_flag $f || - error "Cannot set $hsm_flag on $f" - local flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) - local seen=${#flags[*]} - cnt=$((fst + cnt)) - [[ $seen == $cnt ]] || - error "set $hsm_flag: Changelog events $seen != $cnt" - [[ ${flags[$((cnt - 1))]} == $target ]] || - error "set $hsm_flag: Changelog flags are "\ - "${flags[$((cnt - 1))]} not $target" - - $LFS hsm_clear --$hsm_flag $f || - error "Cannot clear $hsm_flag on $f" - flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) - seen=${#flags[*]} - cnt=$(($cnt + 1)) - [[ $cnt == $seen ]] || - error "clear $hsm_flag: Changelog events $seen != $cnt" - - [[ ${flags[$((cnt - 1))]} == $target ]] || - error "clear $hsm_flag: Changelog flag is "\ - "${flags[$((cnt - 1))]} not $target" + error "Too many ARCHIVE requests were run before the RESTORE request" } +run_test 260a "Restore request have priority over other requests" -test_227() { - # test needs a running copytool - copytool_setup - changelog_setup +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260b() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" - mkdir -p $DIR/$tdir - typeset -a flags + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file - for i in norelease noarchive exists archived - do - local f=$DIR/$tdir/$tfile-$i - local fid=$(copy_file /etc/passwd $f) - check_flags_changes $f $fid $i 0 1 + for file in "${files[@]}"; do + create_small_file "$file" done - f=$DIR/$tdir/$tfile---lost - fid=$(copy_file /etc/passwd $f) - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f - wait_request_state $fid ARCHIVE SUCCEED - check_flags_changes $f $fid lost 3 1 - - cleanup -} -run_test 227 "changelog when explicit setting of HSM flags" - -test_228() { - # test needs a running copytool - copytool_setup + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" - local fid=$(make_small_sync $DIR/$tfile) - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile - wait_request_state $fid ARCHIVE SUCCEED + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done - $LFS hsm_release $DIR/$tfile - check_hsm_flags $DIR/$tfile "0x0000000d" + # Send one restore request + "$LFS" hsm_restore "${files[0]}" - filefrag $DIR/$tfile | grep " 1 extent found" || - error "filefrag on released file must return only one extent" + # Launch a copytool + copytool setup + copytool setup --archive-id 2 - # only newer versions of cp detect sparse files by stat/FIEMAP - # (LU-2580) - cp --sparse=auto $DIR/$tfile $DIR/$tfile.2 || - error "copying $DIR/$tfile" - cmp $DIR/$tfile $DIR/$tfile.2 || error "comparing copied $DIR/$tfile" + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done - $LFS hsm_release $DIR/$tfile - check_hsm_flags $DIR/$tfile "0x0000000d" + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) - mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + printf '%s\n' "${actions[@]}" - tar cf - --sparse $DIR/$tfile | tar xvf - -C $DIR/$tdir || - error "tar failed" - cmp $DIR/$tfile $DIR/$tdir/$DIR/$tfile || - error "comparing untarred $DIR/$tfile" + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done - rm -f $DIR/$tfile $DIR/$tfile.2 || - error "rm $DIR/$tfile or $DIR/$tfile.2 failed" - copytool_cleanup + error "Too many ARCHIVE requests were run before the RESTORE request" } -run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse" +run_test 260b "Restore request have priority over other requests" -test_250() { - # test needs a running copytool - copytool_setup +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260c() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" - mkdir -p $DIR/$tdir - local maxrequest=$(get_hsm_param max_requests) - local rqcnt=$(($maxrequest * 3)) - local i="" + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file - cdt_disable - for i in $(seq -w 1 $rqcnt); do - rm -f $DIR/$tdir/$i - dd if=/dev/urandom of=$DIR/$tdir/$i bs=1M count=10 conv=fsync - done - # we do it in 2 steps, so all requests arrive at the same time - for i in $(seq -w 1 $rqcnt); do - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$i + for file in "${files[@]}"; do + create_small_file "$file" done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Force the next coordinator run to do housekeeping + cdt_shutdown cdt_enable - local cnt=$rqcnt - local wt=$rqcnt - while [[ $cnt != 0 || $wt != 0 ]]; do - sleep 1 - cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.actions |\ - grep STARTED | grep -v CANCEL | wc -l") - [[ $cnt -le $maxrequest ]] || - error "$cnt > $maxrequest too many started requests" - wt=$(do_facet $SINGLEMDS "$LCTL get_param\ - $HSM_PARAM.actions |\ - grep WAITING | wc -l") - echo "max=$maxrequest started=$cnt waiting=$wt" - done - copytool_cleanup -} -run_test 250 "Coordinator max request" + "$LFS" hsm_archive "${files[1]}" -test_251() { - # test needs a running copytool - copytool_setup + # Launch a copytool + copytool setup + copytool setup --archive-id 2 - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_large_for_cancel $f) - [ $? != 0 ] && skip "not enough free space" && return + wait_request_state "$(path2fid "${files[1]}")" ARCHIVE SUCCEED + # The coordinator just did a housekeeping run it won't do another one + # for around `loop_period' seconds => requests will not be reordered + # if it costs too much (ie. when the coordinator has to discard a whole + # hal) - cdt_disable - # to have a short test - local old_to=$(get_hsm_param active_request_timeout) - set_hsm_param active_request_timeout 4 - # to be sure the cdt will wake up frequently so - # it will be able to cancel the "old" request - local old_loop=$(get_hsm_param loop_period) - set_hsm_param loop_period 2 - cdt_enable + # Send several archive requests + for file in "${files[@]:2}"; do + "$LFS" hsm_archive "$file" + done - # clear locks to avoid extra delay caused by flush/cancel - # and thus prevent early copytool death to timeout. - cancel_lru_locks osc + # Send one restore request + "$LFS" hsm_restore "${files[0]}" - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f - wait_request_state $fid ARCHIVE STARTED - sleep 5 - wait_request_state $fid ARCHIVE CANCELED + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:2}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done - set_hsm_param active_request_timeout $old_to - set_hsm_param loop_period $old_loop + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) - copytool_cleanup + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && + error "Restore requests should not be prioritised" \ + "unless the coordinator is doing housekeeping" + done + return 0 } -run_test 251 "Coordinator request timeout" +run_test 260c "Requests are not reordered on the 'hot' path of the coordinator" test_300() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + # the only way to test ondisk conf is to restart MDS ... echo "Stop coordinator and remove coordinator state at mount" # stop coordinator @@ -4367,6 +4696,8 @@ test_300() { run_test 300 "On disk coordinator state kept between MDT umount/mount" test_301() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + local ai=$(get_hsm_param default_archive_id) local new=$((ai + 1)) @@ -4382,6 +4713,8 @@ test_301() { run_test 301 "HSM tunnable are persistent" test_302() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + local ai=$(get_hsm_param default_archive_id) local new=$((ai + 1)) @@ -4410,7 +4743,7 @@ run_test 302 "HSM tunnable are persistent when CDT is off" test_400() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -4418,12 +4751,14 @@ test_400() { local dir_mdt1=$DIR/$tdir/mdt1 # create 1 dir per MDT + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + stack_trap "rm -rf $dir_mdt1" EXIT $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" # create 1 file in each MDT - local fid1=$(make_small $dir_mdt0/$tfile) - local fid2=$(make_small $dir_mdt1/$tfile) + local fid1=$(create_small_file $dir_mdt0/$tfile) + local fid2=$(create_small_file $dir_mdt1/$tfile) # check that hsm request on mdt0 is sent to the right MDS $LFS hsm_archive $dir_mdt0/$tfile || error "lfs hsm_archive" @@ -4434,17 +4769,13 @@ test_400() { $LFS hsm_archive $dir_mdt1/$tfile || error "lfs hsm_archive" wait_request_state $fid2 ARCHIVE SUCCEED 1 && echo "archive successful on mdt1" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 $dir_mdt1 } run_test 400 "Single request is sent to the right MDT" test_401() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -4452,12 +4783,14 @@ test_401() { local dir_mdt1=$DIR/$tdir/mdt1 # create 1 dir per MDT + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + stack_trap "rm -rf $dir_mdt1" EXIT $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" # create 1 file in each MDT - local fid1=$(make_small $dir_mdt0/$tfile) - local fid2=$(make_small $dir_mdt1/$tfile) + local fid1=$(create_small_file $dir_mdt0/$tfile) + local fid2=$(create_small_file $dir_mdt1/$tfile) # check that compound requests are shunt to the rights MDTs $LFS hsm_archive $dir_mdt0/$tfile $dir_mdt1/$tfile || @@ -4466,10 +4799,6 @@ test_401() { echo "archive successful on mdt0" wait_request_state $fid2 ARCHIVE SUCCEED 1 && echo "archive successful on mdt1" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 $dir_mdt1 } run_test 401 "Compound requests split and sent to their respective MDTs" @@ -4487,14 +4816,11 @@ mdc_change_state() # facet, MDT_pattern, activate|deactivate done } -test_402() { - # make sure there is no running copytool - copytool_cleanup - +test_402a() { # deactivate all mdc on agent1 mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "deactivate" - HSMTOOL_NOERROR=true copytool_setup $SINGLEAGT + copytool setup --no-fail check_agent_unregistered "uuid" # match any agent @@ -4504,20 +4830,42 @@ test_402() { # reactivate MDCs mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "activate" } -run_test 402 "Copytool start fails if all MDTs are inactive" +run_test 402a "Copytool start fails if all MDTs are inactive" + +test_402b() { + copytool setup + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + +#define OBD_FAIL_MDS_HSM_CT_REGISTER_NET 0x14d + do_facet $SINGLEAGT lctl set_param fail_loc=0x14d + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + + # give time for CDT to send request and to keep it for retry + wait_for_loop_period + + wait_request_state $fid ARCHIVE WAITING + + do_facet $SINGLEAGT lctl set_param fail_loc=0 + + # request should succeed now + wait_request_state $fid ARCHIVE SUCCEED +} +run_test 402b "CDT must retry request upon slow start of CT" test_403() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - # make sure there is no running copytool - copytool_cleanup - local agent=$(facet_active_host $SINGLEAGT) # deactivate all mdc for MDT0001 mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate" - copytool_setup + copytool setup local uuid=$(get_agent_uuid $agent) # check the agent is registered on MDT0000, and not on MDT0001 check_agent_registered_by_mdt $uuid 0 @@ -4531,24 +4879,23 @@ test_403() { # make sure the copytool is now registered to all MDTs check_agent_registered $uuid - - copytool_cleanup } run_test 403 "Copytool starts with inactive MDT and register on reconnect" test_404() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup # create files on both MDT0000 and MDT0001 mkdir -p $DIR/$tdir local dir_mdt0=$DIR/$tdir/mdt0 + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" # create 1 file on mdt0 - local fid1=$(make_small $dir_mdt0/$tfile) + local fid1=$(create_small_file $dir_mdt0/$tfile) # deactivate all mdc for MDT0001 mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate" @@ -4562,17 +4909,13 @@ test_404() { # reactivate all mdc for MDT0001 mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "activate" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 } run_test 404 "Inactive MDT does not block requests for active MDTs" test_405() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -4581,15 +4924,15 @@ test_405() { # create striped dir on all of MDTs $LFS mkdir -i 0 -c $MDSCOUNT $striped_dir || error "lfs mkdir" - local fid1=$(make_small_sync $striped_dir/${tfile}_0) - local fid2=$(make_small_sync $striped_dir/${tfile}_1) - local fid3=$(make_small_sync $striped_dir/${tfile}_2) - local fid4=$(make_small_sync $striped_dir/${tfile}_3) + local fid1=$(create_small_sync_file $striped_dir/${tfile}_0) + local fid2=$(create_small_sync_file $striped_dir/${tfile}_1) + local fid3=$(create_small_sync_file $striped_dir/${tfile}_2) + local fid4=$(create_small_sync_file $striped_dir/${tfile}_3) - local idx1=$($LFS getstripe -M $striped_dir/${tfile}_0) - local idx2=$($LFS getstripe -M $striped_dir/${tfile}_1) - local idx3=$($LFS getstripe -M $striped_dir/${tfile}_2) - local idx4=$($LFS getstripe -M $striped_dir/${tfile}_3) + local idx1=$($LFS getstripe -m $striped_dir/${tfile}_0) + local idx2=$($LFS getstripe -m $striped_dir/${tfile}_1) + local idx3=$($LFS getstripe -m $striped_dir/${tfile}_2) + local idx4=$($LFS getstripe -m $striped_dir/${tfile}_3) # check that compound requests are shunt to the rights MDTs $LFS hsm_archive $striped_dir/${tfile}_0 $striped_dir/${tfile}_1 \ @@ -4614,31 +4957,29 @@ test_405() { cat $striped_dir/${tfile}_1 > /dev/null || error "cat ${tfile}_1 failed" cat $striped_dir/${tfile}_2 > /dev/null || error "cat ${tfile}_2 failed" cat $striped_dir/${tfile}_3 > /dev/null || error "cat ${tfile}_3 failed" - - copytool_cleanup } run_test 405 "archive and release under striped directory" test_406() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] && - skip "need MDS version at least 2.7.64" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.64) ] && + skip "need MDS version at least 2.7.64" local fid local mdt_index - copytool_setup - mkdir -p $DIR/$tdir - fid=$(make_small $DIR/$tdir/$tfile) + fid=$(create_small_file $DIR/$tdir/$tfile) echo "old fid $fid" + copytool setup + $LFS hsm_archive $DIR/$tdir/$tfile wait_request_state "$fid" ARCHIVE SUCCEED $LFS hsm_release $DIR/$tdir/$tfile # Should migrate $tdir but not $tfile. - $LFS mv -M1 $DIR/$tdir && + $LFS migrate -m1 $DIR/$tdir && error "migrating HSM an archived file should fail" $LFS hsm_restore $DIR/$tdir/$tfile @@ -4650,10 +4991,10 @@ test_406() { cat $DIR/$tdir/$tfile > /dev/null || error "cannot read $DIR/$tdir/$tfile" - $LFS mv -M1 $DIR/$tdir || + $LFS migrate -m1 $DIR/$tdir || error "cannot complete migration after HSM remove" - mdt_index=$($LFS getstripe -M $DIR/$tdir) + mdt_index=$($LFS getstripe -m $DIR/$tdir) if ((mdt_index != 1)); then error "expected MDT index 1, got $mdt_index" fi @@ -4674,25 +5015,412 @@ test_406() { cat $DIR/$tdir/$tfile > /dev/null || error "cannot read $DIR/$tdir/$tfile" - - copytool_cleanup } run_test 406 "attempting to migrate HSM archived files is safe" +test_407() { + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + local fid=$(create_empty_file "$f") + + copytool setup + + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f + +#define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164 + do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x164 + + # Prevent restore from completing + copytool_suspend + + md5sum $f & + # 1st request holds layout lock while appropriate + # RESTORE record is still not added to llog + md5sum $f2 & + sleep 2 + + do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions" + # after umount hsm_actions->O/x/x log shouldn't have + # double RESTORE records like below + #[0x200000401:0x1:0x0]...0x58d03a0d/0x58d03a0c action=RESTORE...WAITING + #[0x200000401:0x1:0x0]...0x58d03a0c/0x58d03a0d action=RESTORE...WAITING + sleep 30 && + do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"& + fail $SINGLEMDS + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + + do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions" + + copytool_continue + wait_all_done 100 $fid +} +run_test 407 "Check for double RESTORE records in llog" + test_500() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] && - skip "HSM migrate is not supported" && return - - # Stop the existing copytool - copytool_cleanup + [ "$MDS1_VERSION" -lt $(version_code 2.6.92) ] && + skip "HSM migrate is not supported" test_mkdir -p $DIR/$tdir - llapi_hsm_test -d $DIR/$tdir || error "One llapi HSM test failed" + + if [ "$CLIENT_VERSION" -lt $(version_code 2.11.56) ] || + [ "$MDS1_VERSION" -lt $(version_code 2.11.56) ]; + then + llapi_hsm_test -d $DIR/$tdir -b || + error "One llapi HSM test failed" + else + llapi_hsm_test -d $DIR/$tdir || + error "One llapi HSM test failed" + fi } run_test 500 "various LLAPI HSM tests" -copytool_cleanup +test_600() { + [ "$MDS1_VERSION" -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + chmod 777 $DIR/$tdir + $RUNAS touch $f || error "touch $f failed as $RUNAS_ID" + local fid=$(path2fid $f) + + local entry + entry=$(changelog_find -type CREAT -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID") || + error "No matching CREAT entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check its NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" +} +run_test 600 "Changelog fields 'u=' and 'nid='" + +test_601() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + cat $f || error "cat $f failed" + + changelog_find -type OPEN -target-fid $fid -mode "r--" || + error "No matching OPEN entry" +} +run_test 601 "OPEN Changelog entry" + +test_602() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + cat $f || error "cat $f failed" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + echo f > $f || error "write $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + # remove OPEN from changelog_mask + changelog_chmask "-OPEN" + + changelog_clear + changelog_dump + cat $f || error "cat $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + echo f > $f || error "write $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" +} +run_test 602 "Changelog record CLOSE only if open+write or OPEN recorded" + +test_603() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + setfattr -n user.xattr1 -v "value1" $f || error "setfattr $f failed" + + changelog_clear + getfattr -n user.xattr1 $f || error "getfattr $f failed" + + changelog_find -type GXATR -target-fid $fid -xattr "user.xattr1" || + error "No matching GXATR entry" +} +run_test 603 "GETXATTR Changelog entry" + +test_604() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + local procname="mdd.$FSNAME-MDT0000.changelog_deniednext" + local timeout + timeout="$(do_facet mds1 "$LCTL" get_param -n "$procname")" + stack_trap "do_facet mds1 '$LCTL' set_param '$procname=$timeout'" EXIT + do_facet mds1 lctl set_param "$procname=20" + + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + chmod 600 $f + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed" + changelog_dump + + local entry + entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID" -mode "r--") || + error "No matching NOPEN entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check its NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed" + changelog_dump + + changelog_find -type NOPEN -target-fid $fid && + error "There should be no NOPEN entry" + + # Sleep for `changelog_deniednext` seconds + sleep 20 + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f by user $RUNAS_ID should have failed" + changelog_dump + + entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID" -mode "r--") || + error "No matching NOPEN entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check the NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" +} +run_test 604 "NOPEN Changelog entry" + +test_605() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + changelog_dump + exec 3<> $f || error "open $f failed" + changelog_dump + + local entry + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_clear + changelog_dump + exec 4<> $f || error "open $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid && + error "There should be no OPEN entry" + + exec 4>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + # access in rw, so different access mode should generate entries + cat $f || error "cat $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + # same access as first one, should not generate new entries + exec 4<> $f || error "open $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid && + error "There should be no OPEN entry" + + exec 4>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + # access by different user should generate new entries + $RUNAS cat $f || error "cat $f by user $RUNAS_ID failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + exec 3>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" +} +run_test 605 "Test OPEN and CLOSE rate limit in Changelogs" + +test_606() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null") + llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader} + [ -z $(do_facet mgs ls -d $llog_reader 2> /dev/null) ] && + skip_env "missing llog_reader" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + chmod 777 $DIR/$tdir + $RUNAS touch $f || error "touch $f failed as $RUNAS_ID" + local fid=$(path2fid $f) + rm $f || error "rm $f failed" + + local mntpt=$(facet_mntpt mds1) + local pass=true + local entry + + #remount mds1 as ldiskfs or zfs type + stop mds1 || error "stop mds1 failed" + stack_trap "unmount_fstype mds1; start mds1 $(mdsdevname 1)\ + $MDS_MOUNT_OPTS" EXIT + mount_fstype mds1 || error "remount mds1 failed" + + for ((i = 0; i < 1; i++)); do + do_facet mds1 $llog_reader $mntpt/changelog_catalog + local cat_file=$(do_facet mds1 $llog_reader \ + $mntpt/changelog_catalog | awk \ + '{match($0,"path=([^ ]+)",a)}END{print a[1]}') + [ -n "$cat_file" ] || error "no catalog file" + + entry=$(do_facet mds1 $llog_reader $mntpt/$cat_file | + awk "/CREAT/ && /target:\[$fid\]/ {print}") + [ -n "$entry" ] || error "no CREAT entry" + done + + local uidgid=$(echo $entry | + sed 's+.*\ user:\([0-9][0-9]*:[0-9][0-9]*\)\ .*+\1+') + [ -n "$uidgid" ] || error "uidgid is empty" + echo "Got UID/GID $uidgid" + [ "$uidgid" = "$RUNAS_ID:$RUNAS_GID" ] || + error "uidgid '$uidgid' != '$RUNAS_ID:$RUNAS_GID'" + local nid=$(echo $entry | + sed 's+.*\ nid:\(\S\S*@\S\S*\)\ .*+\1+') + [ -n "$nid" ] || error "nid is empty" + echo "Got NID $nid" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any NID ${CLIENT_NIDS[@]}" +} +run_test 606 "llog_reader groks changelog fields" complete $SECONDS check_and_cleanup_lustre