X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=ae49009f66791eaaa8c730033eff9e19e02de520;hp=fcca8fe6a24fc77a93d55d27ebb0b6a9bd979087;hb=c8790ae52393d96fc71aa4edea0e5051c54431e9;hpb=d1855f8e22a929066a69470c7e3d082c70478575 diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index fcca8fe..ae49009 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -7,22 +7,24 @@ set -e set +o monitor -SRCDIR=$(dirname $0) -export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin - ONLY=${ONLY:-"$*"} -# bug number for skipped test: -ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +LUSTRE=${LUSTRE:-$(dirname $0)/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -MULTIOP=${MULTIOP:-multiop} +ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT " +if $SHARED_KEY; then +# bug number for skipped tests: LU-9795 LU-9795 + ALWAYS_EXCEPT+=" 13 402b" +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! +fi + +build_test_filter + +[ -n "$FILESET" ] && skip "Not functional for FILESET set" + OPENFILE=${OPENFILE:-openfile} MMAP_CAT=${MMAP_CAT:-mmap_cat} MOUNT_2=${MOUNT_2:-"yes"} @@ -30,21 +32,26 @@ FAIL_ON_ERROR=false # script only handles up to 10 MDTs (because of MDT_PREFIX) [ $MDSCOUNT -gt 9 ] && - error "script cannot handle more than 9 MDTs, please fix" && exit + error "script cannot handle more than 9 MDTs, please fix" check_and_setup_lustre -if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then - skip_env "Need MDS version at least 2.4.53" && exit +if [[ $MDS1_VERSION -lt $(version_code 2.4.53) ]]; then + skip_env "Need MDS version at least 2.4.53" fi # $RUNAS_ID may get set incorrectly somewhere else if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then - skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit + skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" fi check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS - -build_test_filter +if getent group nobody; then + GROUP=nobody +elif getent group nogroup; then + GROUP=nogroup +else + error "No generic nobody group" +fi # if there is no CLIENT1 defined, some tests can be ran on localhost CLIENT1=${CLIENT1:-$HOSTNAME} @@ -102,16 +109,6 @@ init_agt_vars() { export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") - # $hsm_root/$HSMTMP Makes $hsm_root dir path less generic to ensure - # rm -rf $hsm_root/* is safe even if $hsm_root becomes unset to avoid - # deleting everything in filesystem, independent of any copytool. - export HSMTMP=${HSMTMP:-"shsm"} - - HSM_ARCHIVE=$(copytool_device $SINGLEAGT) - - [ -z "${HSM_ARCHIVE// /}" ] && error "HSM_ARCHIVE is empty!" - HSM_ARCHIVE=$HSM_ARCHIVE/$HSMTMP HSM_ARCHIVE_NUMBER=2 @@ -134,14 +131,6 @@ copytool_device() { echo -n ${!dev} } -# Stop copytool and unregister an existing changelog user. -cleanup() { - copytool_monitor_cleanup - copytool_cleanup - changelog_cleanup - cdt_set_sanity_policy -} - get_mdt_devices() { local mdtno # get MDT device for each mdc @@ -155,14 +144,15 @@ get_mdt_devices() { search_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" + do_nodesv $hosts "libtool execute pgrep -x $HSMTOOL" } kill_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true + do_nodesv $hosts "libtool execute killall -q $HSMTOOL" || true + copytool_continue "$hosts" } wait_copytools() { @@ -229,70 +219,15 @@ copytool_monitor_setup() { fi } -copytool_monitor_cleanup() { - local facet=${1:-$SINGLEAGT} - local agent=$(facet_active_host $facet) - - if [ -n "$HSMTOOL_MONITOR_DIR" ]; then - # Should die when the copytool dies, but just in case. - local cmd="kill \\\$(cat $HSMTOOL_MONITOR_DIR/monitor_pid)" - cmd+=" 2>/dev/null || true" - do_node $agent "$cmd" - do_node $agent "rm -fr $HSMTOOL_MONITOR_DIR" - export HSMTOOL_MONITOR_DIR= - fi - - # The pdsh should die on its own when the monitor dies. Just - # in case, though, try to clean up to avoid any cruft. - if [ -n "$HSMTOOL_MONITOR_PDSH" ]; then - kill $HSMTOOL_MONITOR_PDSH 2>/dev/null - export HSMTOOL_MONITOR_PDSH= - fi -} - -copytool_setup() { - local facet=${1:-$SINGLEAGT} - # Use MOUNT2 by default if defined - local lustre_mntpnt=${2:-${MOUNT2:-$MOUNT}} - local arc_id=$3 - local hsm_root=${4:-$(copytool_device $facet)} - - [ -z "${hsm_root// /}" ] && error "copytool_setup: hsm_root empty!" - - local agent=$(facet_active_host $facet) - - if $HSM_ARCHIVE_PURGE; then - echo "Purging archive on $agent" - do_facet $facet "rm -rf $hsm_root/$HSMTMP/*" - fi +fid2archive() +{ + local fid="$1" - echo "Starting copytool $facet on $agent" - do_facet $facet "mkdir -p $hsm_root/$HSMTMP/" || - error "mkdir '$hsm_root/$HSMTMP' failed" - # bandwidth is limited to 1MB/s so the copy time is known and - # independent of hardware - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon" - cmd+=" --hsm-root $hsm_root/$HSMTMP" - [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id" - [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] || - cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL" - [[ -z "$HSMTOOL_EVENT_FIFO" ]] || - cmd+=" --event-fifo $HSMTOOL_EVENT_FIFO" - cmd+=" --bandwidth 1 $lustre_mntpnt" - - # Redirect the standard output and error to a log file which - # can be uploaded to Maloo. - local prefix=$TESTLOG_PREFIX - [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME - local copytool_log=$prefix.copytool${arc_id}_log.$agent.log - - stack_trap cleanup EXIT - do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1" - if [[ $? != 0 ]]; then - [[ $HSMTOOL_NOERROR == true ]] || - error "start copytool $facet on $agent failed" - echo "start copytool $facet on $agent failed" - fi + case "$HSMTOOL" in + lhsmtool_posix) + printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid" + ;; + esac } get_copytool_event_log() { @@ -306,94 +241,29 @@ get_copytool_event_log() { error "Could not collect event log from $agent" } -copytool_cleanup() { - trap - EXIT - local agt_facet=$SINGLEAGT - local agt_hosts=${1:-$(facet_active_host $agt_facet)} - local hsm_root=$(copytool_device $agt_facet) - - [ -z "${hsm_root// /}" ] && error "copytool_cleanup: hsm_root empty!" - - local i - local facet - local param - local -a state - - kill_copytools $agt_hosts - wait_copytools $agt_hosts || error "copytools failed to stop" - - # Clean all CDTs orphans requests from previous tests that - # would otherwise need to timeout to clear. - for ((i = 0; i < MDSCOUNT; i++)); do - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - state[$i]=$(do_facet $facet "$LCTL get_param -n $param") - - # Skip already stopping or stopped CDTs. - [[ "${state[$i]}" =~ ^stop ]] && continue - - do_facet $facet "$LCTL set_param $param=shutdown" - done - - for ((i = 0; i < MDSCOUNT; i++)); do - # Only check and restore CDTs that we stopped in the first loop. - [[ "${state[$i]}" =~ ^stop ]] && continue - - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - - wait_result $facet "$LCTL get_param -n $param" stopped 20 || - error "$facet CDT state is not stopped" - - # Restore old CDT state. - do_facet $facet "$LCTL set_param $param=${state[$i]}" - done - - for ((i = 0; i < MDSCOUNT; i++)); do - # Only check CDTs that we stopped in the first loop. - [[ "${state[$i]}" =~ ^stop ]] && continue - - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - - # Check that the old CDT state was restored. - wait_result $facet "$LCTL get_param -n $param" "${state[$i]}" \ - 20 || error "$facet CDT state is not '${state[$i]}'" - done - - if do_facet $agt_facet "df $hsm_root" >/dev/null 2>&1 ; then - do_facet $agt_facet "rm -rf $hsm_root/$HSMTMP/*" - fi -} - copytool_suspend() { local agents=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0 + stack_trap \ + "do_nodesv $agents libtool execute pkill -CONT -x '$HSMTOOL' || true" EXIT + do_nodesv $agents "libtool execute pkill -STOP -x $HSMTOOL" || return 0 echo "Copytool is suspended on $agents" } copytool_continue() { local agents=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $agents "pkill -CONT -x $HSMTOOL_BASE" || return 0 + do_nodesv $agents "libtool execute pkill -CONT -x $HSMTOOL" || return 0 echo "Copytool is continued on $agents" } copytool_remove_backend() { local fid=$1 - local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid) + local be=$(do_facet $SINGLEAGT find "$(hsm_root)" -name $fid) echo "Remove from backend: $fid = $be" do_facet $SINGLEAGT rm -f $be } -import_file() { - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --import $1 $2 $MOUNT" || - error "import of $1 to $2 failed" -} - file_creation_failure() { local cmd=$1 local file=$2 @@ -416,26 +286,24 @@ create_file() { local bs=$2 local count=$3 local conv=$4 - local if=${5:-/dev/zero} - local facet=$SINGLEAGT + local source=${5:-/dev/zero} + local args="" + local err - local cmd - printf -v cmd 'do_facet "%s" dd if="%s" of="%s" count=%s bs=%s' \ - "$facet" "$if" "$file" "$count" "$bs" - [ -n "$conv" ] && cmd+=" conv=$conv" + if [ -n "$conv" ]; then + args+=" conv=$conv" + fi # Create the directory in case it does not exist - do_facet "$facet" mkdir -p "$(dirname "$file")" + mkdir -p "$(dirname "$file")" # Delete the file in case it already exist - do_facet "$facet" rm -f "$file" + rm -f "$file" - if eval "$cmd"; then - # print the FID if the file is not an archive - [[ "$file" =~ ^$HSM_ARCHIVE ]] || path2fid "$file" || - error "cannot get fid on '$file'" + if dd if="$source" of="$file" count="$count" bs="$bs" $args; then + path2fid "$file" || error "cannot get FID of '$file'" else - local err=$? - printf "$cmd failed with $err\n" >&2; + err=$? + echo "cannot create file '$file'" >&2; # Let the caller decide what to do on error return $err; fi @@ -461,91 +329,27 @@ create_small_sync_file() { } create_archive_file() { - local if=/dev/urandom + local file="$(hsm_root)/$1" local count=${2:-39} - local bs=1M - local facet=$SINGLEAGT + local source=/dev/urandom # Create the counterpart directory of the archive - do_facet "$facet" mkdir -p "$DIR2/$(dirname "$1")" - create_file "${HSM_ARCHIVE}/$1" $bs $count "" $if || - file_creation_failure dd "${HSM_ARCHIVE}/$1" $? -} - -copy2archive() { - local file=$HSM_ARCHIVE/$2 - do_facet $SINGLEAGT mkdir -p $(dirname $file) - do_facet $SINGLEAGT cp -p $1 $file || error "cannot copy $1 to $file" -} - -mdts_set_param() { - local arg=$1 - local key=$2 - local value=$3 - local mdtno - local rc=0 - if [[ "$value" != "" ]]; then - value="=$value" - fi - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local facet=mds${mdtno} - # if $arg include -P option, run 1 set_param per MDT on the MGS - # else, run set_param on each MDT - [[ $arg = *"-P"* ]] && facet=mgs - do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value - [[ $? != 0 ]] && rc=1 - done - return $rc -} - -mdts_check_param() { - local key="$1" - local target="$2" - local timeout="$3" - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ - "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ - $timeout || - error "$key state is not '$target' on mds${mdtno}" - done -} - -changelog_setup() { - CL_USERS=() - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local cl_user=$(do_facet mds${mdtno} $LCTL \ - --device ${MDT[$idx]} \ - changelog_register -n) - CL_USERS+=($cl_user) - do_facet mds${mdtno} lctl set_param \ - mdd.${MDT[$idx]}.changelog_mask="+hsm" - $LFS changelog_clear ${MDT[$idx]} $cl_user 0 - done -} + do_facet "$SINGLEAGT" mkdir -p "$(dirname "$file")" || + error "cannot create archive directory '$(dirname "$file")'" -changelog_cleanup() { - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - [[ -z ${CL_USERS[$idx]} ]] && continue - $LFS changelog_clear ${MDT[$idx]} ${CL_USERS[$idx]} 0 - do_facet mds${mdtno} lctl --device ${MDT[$idx]} \ - changelog_deregister ${CL_USERS[$idx]} - done - CL_USERS=() + do_facet "$SINGLEAGT" dd if=$source of="$file" bs=1M count=$count || + error "cannot create archive file '$file'" } -changelog_get_flags() { - local mdt=$1 - local cltype=$2 - local fid=$3 +copy2archive() { + local hsm_root="$(hsm_root)" + local file="$hsm_root/$2" - $LFS changelog $mdt | awk "/$cltype/ && /t=\[$fid\]/ {print \$5}" + stack_trap "do_facet $SINGLEAGT rm -rf '$hsm_root'" EXIT + do_facet $SINGLEAGT mkdir -p "$(dirname "$file")" || + error "mkdir '$(dirname "$file")' failed" + do_facet $SINGLEAGT cp -p "$1" "$file" || + error "cannot copy '$1' to '$file'" } get_hsm_param() { @@ -554,14 +358,6 @@ get_hsm_param() { echo $val } -set_hsm_param() { - local param=$1 - local value=$2 - local opt=$3 - mdts_set_param "$opt -n" "hsm.$param" "$value" - return $? -} - set_test_state() { local cmd=$1 local target=$2 @@ -569,15 +365,6 @@ set_test_state() { mdts_check_param hsm_control "$target" 10 } -cdt_set_sanity_policy() { - if [[ "$CDT_POLICY_HAD_CHANGED" ]] - then - # clear all - mdts_set_param "" hsm.policy "+NRA" - mdts_set_param "" hsm.policy "-NBR" - CDT_POLICY_HAD_CHANGED= - fi -} cdt_set_no_retry() { mdts_set_param "" hsm.policy "+NRA" @@ -603,21 +390,6 @@ cdt_clear_mount_state() { mdts_set_param "-P -d" hsm_control "" } -cdt_set_mount_state() { - mdts_set_param "-P" hsm_control "$1" - # set_param -P is asynchronous operation and could race with set_param. - # In such case configs could be retrieved and applied at mgc after - # set_param -P completion. Sleep here to avoid race with set_param. - # We need at least 20 seconds. 10 for mgc_requeue_thread to wake up - # MGC_TIMEOUT_MIN_SECONDS + MGC_TIMEOUT_RAND_CENTISEC(5 + 5) - # and 10 seconds to retrieve config from server. - sleep 20 -} - -cdt_check_state() { - mdts_check_param hsm_control "$1" 20 -} - cdt_disable() { set_test_state disabled disabled } @@ -640,37 +412,6 @@ cdt_restart() { cdt_set_sanity_policy } -needclients() { - local client_count=$1 - if [[ $CLIENTCOUNT -lt $client_count ]]; then - skip "Need $client_count or more clients, have $CLIENTCOUNT" - return 1 - fi - return 0 -} - -path2fid() { - $LFS path2fid $1 | tr -d '[]' - return ${PIPESTATUS[0]} -} - -get_hsm_flags() { - local f=$1 - local u=$2 - local st - - if [[ $u == "user" ]]; then - st=$($RUNAS $LFS hsm_state $f) - else - u=root - st=$($LFS hsm_state $f) - fi - - [[ $? == 0 ]] || error "$LFS hsm_state $f failed (run as $u)" - - st=$(echo $st | cut -f 2 -d" " | tr -d "()," ) - echo $st -} get_hsm_archive_id() { local f=$1 @@ -682,14 +423,6 @@ get_hsm_archive_id() { echo $ar } -check_hsm_flags() { - local f=$1 - local fl=$2 - - local st=$(get_hsm_flags $f) - [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl" -} - check_hsm_flags_user() { local f=$1 local fl=$2 @@ -726,44 +459,6 @@ delete_large_files() { wait_delete_completed } -make_custom_file_for_progress() { - local count=${2:-"39"} - local bs=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1) - bs=${3:-$bs} - - [[ $count -gt 0 ]] || error "Invalid file size" - [[ $bs -gt 0 ]] || error "Invalid stripe size" - - if ! create_file "${1/$DIR/$DIR2}" $bs $count fsync; then - echo "The creation of '${1/$DIR/$DIR2}' failed" >&2 - echo "It might be due to a lack of space in the filesystem" >&2 - delete_large_files >&2 - create_file "${1/$DIR/$DIR2}" $bs $count fsync || - file_creation_failure dd "${1/$DIR/$DIR2}" $? - fi -} - -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - -wait_request_state() { - local fid=$1 - local request=$2 - local state=$3 - # 4th arg (mdt index) is optional - local mdtidx=${4:-0} - local mds=mds$(($mdtidx + 1)) - - local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" - cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - - wait_result $mds "$cmd" $state 200 || - error "request on $fid is not $state on $mds" -} - get_request_state() { local fid=$1 local request=$2 @@ -876,7 +571,7 @@ get_agent_uuid() { # Lustre mount-point is mandatory and last parameter on # copytool cmd-line. - local mntpnt=$(do_rpc_nodes $agent ps -C $HSMTOOL_BASE -o args= | + local mntpnt=$(do_rpc_nodes $agent libtool execute ps -C $HSMTOOL -o args= | awk '{print $NF}') [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ "parameter" @@ -905,7 +600,9 @@ cdt_set_sanity_policy # finished requests are quickly removed from list set_hsm_param grace_delay 10 -test_1() { +CLIENT_NIDS=( $($LCTL list_nids all) ) + +test_1A() { # was test_1 mkdir -p $DIR/$tdir chmod 777 $DIR/$tdir @@ -938,13 +635,13 @@ test_1() { check_hsm_flags_user $f "0x00000000" } -run_test 1 "lfs hsm flags root/non-root access" +run_test 1A "lfs hsm flags root/non-root access" test_1a() { local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) - copytool_setup + copytool setup $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED @@ -955,23 +652,18 @@ test_1a() { check_hsm_flags $f "0x0000000d" $MMAP_CAT $f > /dev/null || error "failed mmap & cat release file" - - copytool_cleanup } run_test 1a "mmap & cat a HSM released file" -test_1b() { - mkdir -p $DIR/$tdir - $LFS setstripe -E 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || - error "failed to set default stripe" - local f=$DIR/$tdir/$tfile +test_1bde_base() { + local f=$1 rm -f $f - dd if=/dev/random of=$f bs=1M count=1 conv=sync || + dd if=/dev/urandom of=$f bs=1M count=1 conv=sync || error "failed to create file" local fid=$(path2fid $f) - copytool_setup + copytool setup echo "archive $f" $LFS hsm_archive $f || error "could not archive file" @@ -987,10 +679,17 @@ test_1b() { wait_request_state $fid RESTORE SUCCEED echo "verify restored state: " check_hsm_flags $f "0x00000009" && echo "pass" +} + +test_1b() { + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile - copytool_cleanup + test_1bde_base $f } -run_test 1b "Archive, Release & Restore composite file" +run_test 1b "Archive, Release and Restore composite file" test_1c() { mkdir -p $DIR/$tdir @@ -1018,10 +717,26 @@ test_1c() { [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" - # Test whether setting archive number > 32 results in error. - $LFS hsm_set --exists --archive-id 33 $f && - error "archive number is larger than 32" - check_hsm_flags_user $f "0x00000001" + LOCAL_HSM_ARCHIVE_NUMBER=33 + if [ $(lustre_version_code client) -ge $(version_code 2.11.56) ] && + [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.11.56) ]; then + # lustre in the new version supports unlimited archiveID. + # Test whether setting archive number > 32 is supported + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f || + error "archive ID $LOCAL_HSM_ARCHIVE_NUMBER too large?" + check_hsm_flags_user $f "0x00000001" + + echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER" + st=$(get_hsm_archive_id $f) + [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || + error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" + else + # old client or old mds can only support at most 32 archiveID + # test whether setting archive number > 32 results in error. + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f && + error "bitmap archive number is larger than 32" + check_hsm_flags_user $f "0x00000001" + fi # Test whether setting archive number 16 and archived flag. LOCAL_HSM_ARCHIVE_NUMBER=16 @@ -1036,6 +751,58 @@ test_1c() { } run_test 1c "Check setting archive-id in lfs hsm_set" +test_1d() { + [ $MDS1_VERSION -lt $(version_code 2.10.59) ] && + skip "need MDS version at least 2.10.59" + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -L mdt -E -1 -c 2 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile + + test_1bde_base $f +} +run_test 1d "Archive, Release and Restore DoM file" + +test_1e() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code $SEL_VER) ] && + skip "skipped for lustre < $SEL_VER" + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1G -z 64M -E 10G -z 512M -E -1 -z 1G $DIR/$tdir || + error "failed to set default stripe" + local comp_file=$DIR/$tdir/$tfile + + test_1bde_base $comp_file + + local flg_opts="--comp-start 0 -E 64M --comp-flags init" + local found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "1st component not found" + + flg_opts="--comp-start 64M -E 1G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "2nd component not found" + + flg_opts="--comp-start 1G -E 1G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "3rd component not found" + + flg_opts="--comp-start 1G -E 10G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "4th component not found" + + flg_opts="--comp-start 10G -E 10G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "5th component not found" + + flg_opts="--comp-start 10G -E EOF --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "6th component not found" + + sel_layout_sanity $comp_file 6 +} +run_test 1e "Archive, Release and Restore SEL file" + test_2() { local f=$DIR/$tdir/$tfile @@ -1131,7 +898,7 @@ run_test 4 "Useless cancel must not be registered" test_8() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1140,16 +907,13 @@ test_8() { wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 8 "Test default archive number" -test_9() { +test_9A() { # was test_9 # we do not use the default one to be sure - local new_an=$((HSM_ARCHIVE_NUMBER + 1)) - copytool_cleanup - copytool_setup $SINGLEAGT $MOUNT $new_an + local archive_id=$((HSM_ARCHIVE_NUMBER + 1)) + copytool setup --archive-id $archive_id # give time for CT to register with MDTs sleep $(($MDSCOUNT*2)) @@ -1159,14 +923,12 @@ test_9() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - $LFS hsm_archive --archive $new_an $f + $LFS hsm_archive --archive $archive_id $f wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } -run_test 9 "Use of explicit archive number, with dedicated copytool" +run_test 9A "Use of explicit archive number, with dedicated copytool" test_9a() { needclients 3 || return 0 @@ -1175,14 +937,11 @@ test_9a() { local file local fid - copytool_cleanup $(comma_list $(agts_nodes)) - # start all of the copytools for n in $(seq $AGTCOUNT); do - copytool_setup agt$n + copytool setup --facet agt$n done - trap "copytool_cleanup $(comma_list $(agts_nodes))" EXIT # archive files for n in $(seq $AGTCOUNT); do file=$DIR/$tdir/$tfile.$n @@ -1192,15 +951,12 @@ test_9a() { wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $file "0x00000009" done - - trap - EXIT - copytool_cleanup $(comma_list $(agts_nodes)) } run_test 9a "Multiple remote agents" test_10a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile @@ -1209,10 +965,13 @@ test_10a() { error "hsm_archive failed" wait_request_state $fid ARCHIVE SUCCEED - local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || - error "fid $fid not in archive $HSM_ARCHIVE" + local hsm_root="$(copytool_device $SINGLEAGT)" + local archive="$(do_facet $SINGLEAGT \ + find "$hsm_root" -name "$fid" -print0)" + [ -n "$archive" ] || error "fid '$fid' not in archive '$hsm_root'" + echo "Verifying content" - do_facet $SINGLEAGT diff $f $AFILE || error "archived file differs" + do_facet $SINGLEAGT diff $f $archive || error "archived file differs" echo "Verifying hsm state " check_hsm_flags $f "0x00000009" @@ -1220,15 +979,12 @@ test_10a() { local st=$(get_hsm_archive_id $f) [[ $st == $HSM_ARCHIVE_NUMBER ]] || error "Wrong archive number, $st != $HSM_ARCHIVE_NUMBER" - - copytool_cleanup - } run_test 10a "Archive a file" test_10b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1240,28 +996,25 @@ test_10b() { local cnt=$(get_request_count $fid ARCHIVE) [[ "$cnt" == "1" ]] || error "archive of non dirty file must not make a request" - - copytool_cleanup } run_test 10b "Archive of non dirty file must work without doing request" test_10c() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_set --noarchive $f $LFS hsm_archive $f && error "archive a noarchive file must fail" - - copytool_cleanup + return 0 } run_test 10c "Check forbidden archive" test_10d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1273,8 +1026,6 @@ test_10d() { local dflt=$(get_hsm_param default_archive_id) [[ $ar == $dflt ]] || error "archived file is not on default archive: $ar != $dflt" - - copytool_cleanup } run_test 10d "Archive a file on the default archive id" @@ -1283,30 +1034,30 @@ test_11a() { copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f echo -n "Verifying released state: " check_hsm_flags $f "0x0000000d" local LSZ=$(stat -c "%s" $f) - local ASZ=$(do_facet $SINGLEAGT stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile) + local ASZ=$(do_facet $SINGLEAGT stat -c "%s" "$(hsm_root)/$tdir/$tfile") echo "Verifying imported size $LSZ=$ASZ" [[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ" echo -n "Verifying released pattern: " - local PTRN=$($GETSTRIPE -L $f) + local PTRN=$($LFS getstripe -L $f) echo $PTRN - [[ $PTRN == 80000001 ]] || error "Is not released" + [[ $PTRN == released ]] || error "Is not released" local fid=$(path2fid $f) echo "Verifying new fid $fid in archive" - local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || - error "fid $fid not in archive $HSM_ARCHIVE" + do_facet $SINGLEAGT "[ -f \"$(fid2archive "$fid")\" ]" || + error "No archive for fid $fid" } run_test 11a "Import a file" test_11b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1318,25 +1069,23 @@ test_11b() { local FILE_HASH=$(md5sum $f) rm -f $f - import_file $fid $f + copytool import $fid $f echo "$FILE_HASH" | md5sum -c [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 11b "Import a deleted file using its FID" test_12a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local f2=$DIR2/$tdir/$tfile echo "Verifying released state: " check_hsm_flags $f2 "0x0000000d" @@ -1348,23 +1097,21 @@ test_12a() { echo "Verifying file state: " check_hsm_flags $f2 "0x00000009" - do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12a "Restore an imported file explicitly" test_12b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f echo "Verifying released state: " check_hsm_flags $f "0x0000000d" @@ -1373,11 +1120,9 @@ test_12b() { echo "Verifying file state after restore: " check_hsm_flags $f "0x00000009" - do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12b "Restore an imported file implicitly" @@ -1385,14 +1130,12 @@ test_12c() { [ "$OSTCOUNT" -lt "2" ] && skip_env "needs >= 2 OSTs" && return # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile mkdir -p $DIR/$tdir $LFS setstripe -c 2 "$f" - local fid - fid=$(make_custom_file_for_progress $f 5) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 5) local FILE_CRC=$(md5sum $f) @@ -1403,14 +1146,12 @@ test_12c() { echo "$FILE_CRC" | md5sum -c [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12c "Restore a file with stripe of 2" test_12d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -1428,17 +1169,15 @@ test_12d() { local cnt=$(get_request_count $fid RESTORE) [[ "$cnt" == "0" ]] || error "restore a non dirty file must not make a request" - - copytool_cleanup } run_test 12d "Restore of a non archived, non released file must work"\ " without doing request" test_12e() { # test needs a running copytool - copytool_setup + copytool setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive $f || error "archive request failed" @@ -1450,14 +1189,13 @@ test_12e() { $LFS hsm_state $f $LFS hsm_restore $f && error "restore a dirty file must fail" - - copytool_cleanup + return 0 } run_test 12e "Check forbidden restore" test_12f() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1475,14 +1213,12 @@ test_12f() { diff -q /etc/hosts $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12f "Restore a released file explicitly" test_12g() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1499,8 +1235,6 @@ test_12g() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12g "Restore a released file implicitly" @@ -1508,7 +1242,7 @@ test_12h() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1525,14 +1259,12 @@ test_12h() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12h "Restore a released file implicitly from a second node" test_12m() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1545,33 +1277,29 @@ test_12m() { cmp /etc/passwd $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12m "Archive/release/implicit restore" test_12n() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f do_facet $SINGLEAGT cmp /etc/hosts $f || error "Restored file differs" $LFS hsm_release $f || error "release of $f failed" - - copytool_cleanup } run_test 12n "Import/implicit restore/release" test_12o() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1615,14 +1343,12 @@ test_12o() { wait_request_state $fid RESTORE SUCCEED [[ $st -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12o "Layout-swap failure during Restore leaves file released" test_12p() { # test needs a running copytool - copytool_setup + copytool setup mkdir $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1635,28 +1361,19 @@ test_12p() { do_facet $SINGLEAGT cat $f > /dev/null || error "cannot cat $f" $LFS hsm_release $f || error "cannot release $f" do_facet $SINGLEAGT cat $f > /dev/null || error "cannot cat $f" - - copytool_cleanup } run_test 12p "implicit restore of a file on copytool mount point" -cleanup_test_12q() { - trap 0 - zconf_umount $(facet_host $SINGLEAGT) $MOUNT3 || - error "cannot umount $MOUNT3 on $SINGLEAGT" -} - test_12q() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.58) ] && - skip "need MDS version at least 2.7.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.58) ] && + skip "need MDS version at least 2.7.58" + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT zconf_mount $(facet_host $SINGLEAGT) $MOUNT3 || error "cannot mount $MOUNT3 on $SINGLEAGT" - trap cleanup_test_12q EXIT - # test needs a running copytool - copytool_setup $SINGLEAGT $MOUNT3 + copytool setup -m "$MOUNT3" local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile @@ -1682,7 +1399,7 @@ test_12q() { [ $size -eq $orig_size ] || error "$f2: wrong size after archive: $size != $orig_size" - HSM_ARCHIVE_PURGE=false copytool_setup $SINGLEAGT /mnt/lustre3 + copytool setup -m "$MOUNT3" wait @@ -1703,54 +1420,44 @@ test_12q() { size=$(stat -c "%s" $f2) [ $size -eq 0 ] || error "$f2: wrong size after overwrite: $size != 0" - - copytool_cleanup - zconf_umount $(facet_host $SINGLEAGT) $MOUNT3 || - error "cannot umount $MOUNT3 on $SINGLEAGT" } run_test 12q "file attributes are refreshed after restore" test_13() { - # test needs a running copytool - copytool_setup - - local ARC_SUBDIR="import.orig" - local d="" - local f="" - - # populate directory to be imported - for d in $(seq 1 10); do - local CURR_DIR="$HSM_ARCHIVE/$ARC_SUBDIR/dir.$d" - do_facet $SINGLEAGT mkdir -p "$CURR_DIR" - for f in $(seq 1 10); do - CURR_FILE="$CURR_DIR/$tfile.$f" - # write file-specific data - do_facet $SINGLEAGT \ - "echo d=$d, f=$f, dir=$CURR_DIR, "\ - "file=$CURR_FILE > $CURR_FILE" + local -i i j k=0 + for i in {1..10}; do + local archive_dir="$(hsm_root)"/subdir/dir.$i + + do_facet $SINGLEAGT mkdir -p "$archive_dir" + for j in {1..10}; do + local archive_file="$archive_dir"/file.$j + + do_facet $SINGLEAGT "echo $k > \"$archive_dir\"/file.$j" + k+=1 done done + # import to Lustre - import_file "$ARC_SUBDIR" $DIR/$tdir - # diff lustre content and origin (triggers file restoration) - # there must be 10x10 identical files, and no difference - local cnt_ok=$(do_facet $SINGLEAGT diff -rs $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | grep identical | wc -l) - local cnt_diff=$(do_facet $SINGLEAGT diff -r $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | wc -l) + copytool import "subdir" "$DIR/$tdir" - [ $cnt_diff -eq 0 ] || - error "$cnt_diff imported files differ from read data" - [ $cnt_ok -eq 100 ] || - error "not enough identical files ($cnt_ok != 100)" + # To check the import, the test uses diff with the -r flag + # This is nice, but diff only checks files one by one, and triggering + # an implicit restore for one file at a time will consume as many + # seconds as there are files to compare. To speed this up, a restore + # operation is triggered manually first. + copytool setup + find "$DIR/$tdir"/subdir -type f -exec $LFS hsm_restore {} \; - copytool_cleanup + # Compare the imported data + do_facet $SINGLEAGT \ + diff -r "$(hsm_root)"/subdir "$DIR/$tdir"/subdir || + error "imported files differ from archived data" } run_test 13 "Recursively import and restore a directory" test_14() { # test needs a running copytool - copytool_setup + copytool setup # archive a file local f=$DIR/$tdir/$tfile @@ -1767,22 +1474,18 @@ test_14() { # rebind the archive to the newly created file echo "rebind $fid to $fid2" - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --rebind $fid $fid2 $DIR" || error "could not rebind file" + copytool rebind $fid $fid2 # restore file and compare md5sum local sum2=$(md5sum $f | awk '{print $1}') [[ $sum == $sum2 ]] || error "md5sum mismatch after restore" - - copytool_cleanup } run_test 14 "Rebind archived file to a new fid" test_15() { # test needs a running copytool - copytool_setup + copytool setup # archive files local f=$DIR/$tdir/$tfile @@ -1798,6 +1501,7 @@ test_15() { done wait_all_done $(($count*60)) + stack_trap "rm -f $tmpfile" EXIT :>$tmpfile # delete the files for i in $(seq 1 $count); do @@ -1814,9 +1518,7 @@ test_15() { [[ $nl == $count ]] || error "$nl files in list, $count expected" echo "rebind list of files" - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --rebind $tmpfile $DIR" || error "could not rebind file list" + copytool rebind "$tmpfile" # restore files and compare md5sum for i in $(seq 1 $count); do @@ -1824,15 +1526,12 @@ test_15() { [[ $sum2 == ${sums[$i]} ]] || error "md5sum mismatch after restore ($sum2 != ${sums[$i]})" done - - rm -f $tmpfile - copytool_cleanup } run_test 15 "Rebind a list of files" test_16() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local ref=/tmp/ref # create a known size file so we can verify transfer speed @@ -1853,8 +1552,6 @@ test_16() { [[ $duration -ge $((goal - 1)) ]] || error "Transfer is too fast $duration < $goal" - - copytool_cleanup } run_test 16 "Test CT bandwith control option" @@ -1887,7 +1584,7 @@ run_test 20 "Release is not permitted" test_21() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/test_release @@ -1948,14 +1645,12 @@ test_21() { check_hsm_flags $f "0x0000000d" stop_full_debug_logging - - copytool_cleanup } run_test 21 "Simple release tests" test_22() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/test_release local swap=$DIR/$tdir/test_swap @@ -1974,14 +1669,13 @@ test_22() { create_small_file $swap $LFS swap_layouts $swap $f && error "swap_layouts should failed" - true - copytool_cleanup + return 0 } run_test 22 "Could not swap a release file" test_23() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/test_mtime @@ -2003,8 +1697,6 @@ test_23() { local ATIME=$(stat -c "%X" $f) [ $MTIME -eq "978261179" ] || fail "bad mtime: $MTIME" [ $ATIME -eq "978261179" ] || fail "bad atime: $ATIME" - - copytool_cleanup } run_test 23 "Release does not change a/mtime (utime)" @@ -2019,7 +1711,7 @@ test_24a() { local ctime1 # test needs a running copytool - copytool_setup + copytool setup fid=$(create_small_file $file) @@ -2091,7 +1783,8 @@ test_24a() { [ $ctime0 -eq $ctime1 ] || error "restore changed ctime from $ctime0 to $ctime1" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytools failed to stop" # Once more, after unmount and mount. umount_client $MOUNT || error "cannot unmount '$MOUNT'" @@ -2120,7 +1813,7 @@ test_24b() { # LU-3811 # Test needs a running copytool. - copytool_setup + copytool setup # Check that root can do HSM actions on a regular user's file. fid=$(create_small_file $file) @@ -2154,18 +1847,9 @@ test_24b() { [ "$sum0" == "$sum1" ] || error "md5sum mismatch for '$file'" - - copytool_cleanup } run_test 24b "root can archive, release, and restore user files" -cleanup_test_24c() { - trap 0 - set_hsm_param user_request_mask RESTORE - set_hsm_param group_request_mask RESTORE - set_hsm_param other_request_mask RESTORE -} - test_24c() { local file=$DIR/$tdir/$tfile local action=archive @@ -2174,15 +1858,18 @@ test_24c() { local other_save # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir # Save the default masks and check that cleanup_24c will # restore the request masks correctly. user_save=$(get_hsm_param user_request_mask) + stack_trap "set_hsm_param user_request_mask $user_save" EXIT group_save=$(get_hsm_param group_request_mask) + stack_trap "set_hsm_param user_request_mask $group_save" EXIT other_save=$(get_hsm_param other_request_mask) + stack_trap "set_hsm_param user_request_mask $other_save" EXIT [ "$user_save" == RESTORE ] || error "user_request_mask is '$user_save' expected 'RESTORE'" @@ -2191,12 +1878,10 @@ test_24c() { [ "$other_save" == RESTORE ] || error "other_request_mask is '$other_save' expected 'RESTORE'" - trap cleanup_test_24c EXIT - # User. create_small_file $file - chown $RUNAS_ID:nobody $file || - error "cannot chown '$file' to '$RUNAS_ID:nobody'" + chown $RUNAS_ID:$GROUP $file || + error "cannot chown '$file' to '$RUNAS_ID:$GROUP'" $RUNAS $LFS hsm_$action $file && error "$action by user should fail" @@ -2219,8 +1904,8 @@ test_24c() { # Other. create_small_file $file - chown nobody:nobody $file || - error "cannot chown '$file' to 'nobody:nobody'" + chown nobody:$GROUP $file || + error "cannot chown '$file' to 'nobody:$GROUP'" $RUNAS $LFS hsm_$action $file && error "$action by other should fail" @@ -2228,17 +1913,9 @@ test_24c() { set_hsm_param other_request_mask $action $RUNAS $LFS hsm_$action $file || error "$action by other should succeed" - - copytool_cleanup - cleanup_test_24c } run_test 24c "check that user,group,other request masks work" -cleanup_test_24d() { - mount -o remount,rw $MOUNT2 - zconf_umount $(facet_host $SINGLEAGT) "$MOUNT3" -} - test_24d() { local file1=$DIR/$tdir/$tfile local file2=$DIR2/$tdir/$tfile @@ -2250,12 +1927,13 @@ test_24d() { echo $fid1 $LFS getstripe $file1 - trap cleanup_test_24d EXIT - zconf_mount $(facet_host $SINGLEAGT) "$MOUNT3" || + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT + zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" || error "cannot mount '$MOUNT3' on '$SINGLEAGT'" - copytool_setup $SINGLEAGT "$MOUNT3" || - error "unable to setup a copytool for the test" + copytool setup -m "$MOUNT3" + + stack_trap "mount -o remount,rw \"$MOUNT2\"" EXIT mount -o remount,ro $MOUNT2 do_nodes $(comma_list $(nodes_list)) $LCTL clear @@ -2286,7 +1964,7 @@ test_24d() { run_test 24d "check that read-only mounts are respected" test_24e() { - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile local fid @@ -2300,22 +1978,19 @@ test_24e() { done tar -cf $TMP/$tfile.tar $DIR/$tdir || error "cannot tar $DIR/$tdir" - - copytool_cleanup } run_test 24e "tar succeeds on HSM released files" # LU-6213 test_24f() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) sum0=$(md5sum $f) echo $sum0 - $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f || + $LFS hsm_archive $f || error "hsm_archive failed" wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f || error "cannot release $f" @@ -2327,21 +2002,46 @@ test_24f() { sum1=$(md5sum $f) echo "Sum0 = $sum0, sum1 = $sum1" [ "$sum0" == "$sum1" ] || error "md5sum mismatch for '$tfile'" - - copytool_cleanup } run_test 24f "root can archive, release, and restore tar files" +test_24g() { + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local file=$DIR/$tdir/$tfile + local fid + + echo "RUNAS = '$RUNAS'" + + copytool setup + + mkdir -p $DIR/$tdir + chmod ugo+rwx $DIR/$tdir + + echo "Please listen carefully as our options have changed." | tee $file + fid=$(path2fid $file) + chmod ugo+rw $file + + $LFS hsm_archive $file + wait_request_state $fid ARCHIVE SUCCEED + check_hsm_flags $file 0x00000009 # exists archived + + echo "To be electrocuted by your telephone, press #." | $RUNAS tee $file + check_hsm_flags $file 0x0000000b # exists dirty archived +} +run_test 24g "write by non-owner still sets dirty" # LU-11369 + test_25a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f $LFS hsm_set --lost $f @@ -2349,15 +2049,13 @@ test_25a() { local st=$? [[ $st == 1 ]] || error "lost file access should failed (returns $st)" - - copytool_cleanup } run_test 25a "Restore lost file (HS_LOST flag) from import"\ " (Operation not permitted)" test_25b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -2373,20 +2071,16 @@ test_25b() { st=$? [[ $st == 1 ]] || error "lost file access should failed (returns $st)" - - copytool_cleanup } run_test 25b "Restore lost file (HS_LOST flag) after release"\ " (Operation not permitted)" -test_26() { +test_26A() { # was test_26 # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2395,25 +2089,15 @@ test_26() { wait_request_state $fid REMOVE SUCCEED check_hsm_flags $f "0x00000000" - - copytool_cleanup -} -run_test 26 "Remove the archive of a valid file" - -cleanup_test_26a() { - trap 0 - set_hsm_param remove_archive_on_last_unlink 0 - set_hsm_param loop_period $orig_loop_period - set_hsm_param grace_delay $orig_grace_delay - copytool_cleanup } +run_test 26A "Remove the archive of a valid file" test_26a() { local raolu=$(get_hsm_param remove_archive_on_last_unlink) [[ $raolu -eq 0 ]] || error "RAoLU policy should be off" # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -2434,16 +2118,17 @@ test_26a() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f3 wait_request_state $fid3 ARCHIVE SUCCEED - trap cleanup_test_26a EXIT - # set a long grace_delay vs short loop_period local orig_loop_period=$(get_hsm_param loop_period) local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT set_hsm_param grace_delay 100 rm -f $f + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT set_hsm_param remove_archive_on_last_unlink 1 ln "$f3" "$f3"_bis || error "Unable to create hard-link" @@ -2451,29 +2136,18 @@ test_26a() { rm -f $f2 - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid2 REMOVE SUCCEED assert_request_count $fid REMOVE 0 \ "Unexpected archived data remove request for $f" assert_request_count $fid3 REMOVE 0 \ "Unexpected archived data remove request for $f3" - - cleanup_test_26a } run_test 26a "Remove Archive On Last Unlink (RAoLU) policy" -cleanup_test_26b() { - trap 0 - set_hsm_param remove_archive_on_last_unlink 0 - copytool_cleanup -} - test_26b() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -2482,8 +2156,7 @@ test_26b() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - trap cleanup_test_26b EXIT - + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT set_hsm_param remove_archive_on_last_unlink 1 cdt_shutdown @@ -2491,34 +2164,22 @@ test_26b() { rm -f $f - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid REMOVE WAITING cdt_enable + # copytool must re-register kill_copytools wait_copytools || error "copytool failed to stop" - HSM_ARCHIVE_PURGE=false copytool_setup + copytool setup wait_request_state $fid REMOVE SUCCEED - - cleanup_test_26b } run_test 26b "RAoLU policy when CDT off" -cleanup_test_26c() { - trap 0 - set_hsm_param remove_archive_on_last_unlink 0 - set_hsm_param loop_period $orig_loop_period - set_hsm_param grace_delay $orig_grace_delay - copytool_cleanup -} - test_26c() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -2533,14 +2194,15 @@ test_26c() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2 wait_request_state $fid2 ARCHIVE SUCCEED - trap cleanup_test_26c EXIT - # set a long grace_delay vs short loop_period local orig_loop_period=$(get_hsm_param loop_period) local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT set_hsm_param grace_delay 100 + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT set_hsm_param remove_archive_on_last_unlink 1 multiop_bg_pause $f O_c || error "open $f failed" @@ -2556,44 +2218,32 @@ test_26c() { kill -USR1 $pid || error "multiop early exit" # should reach autotest timeout if multiop fails to trap # signal, close file, and exit ... - wait $pid || error - - set_hsm_param remove_archive_on_last_unlink 0 + wait $pid || error "wait PID $PID failed" wait_request_state $fid REMOVE SUCCEED - - cleanup_test_26c } run_test 26c "RAoLU effective when file closed" -cleanup_test_26d() { - trap 0 - set_hsm_param remove_archive_on_last_unlink 0 - set_hsm_param loop_period $orig_loop_period - set_hsm_param grace_delay $orig_grace_delay - copytool_cleanup -} - test_26d() { - # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED - trap cleanup_test_26d EXIT - # set a long grace_delay vs short loop_period local orig_loop_period=$(get_hsm_param loop_period) local orig_grace_delay=$(get_hsm_param grace_delay) + stack_trap "set_hsm_param loop_period $orig_loop_period" EXIT set_hsm_param loop_period 10 + stack_trap "set_hsm_param grace_delay $orig_grace_delay" EXIT set_hsm_param grace_delay 100 + stack_trap "set_hsm_param remove_archive_on_last_unlink 0" EXIT set_hsm_param remove_archive_on_last_unlink 1 multiop_bg_pause $f O_c || error "multiop failed" @@ -2603,44 +2253,36 @@ test_26d() { mds_evict_client - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid REMOVE SUCCEED client_up || client_up || true kill -USR1 $MULTIPID wait $MULTIPID || error "multiop close failed" - - cleanup_test_26d } run_test 26d "RAoLU when Client eviction" test_27a() { # test needs a running copytool - copytool_setup + copytool setup create_archive_file $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) $LFS hsm_remove $f [[ $? != 0 ]] || error "Remove of a released file should fail" - - copytool_cleanup } run_test 27a "Remove the archive of an imported file (Operation not permitted)" test_27b() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2649,19 +2291,15 @@ test_27b() { $LFS hsm_remove $f [[ $? != 0 ]] || error "Remove of a released file should fail" - - copytool_cleanup } run_test 27b "Remove the archive of a relased file (Operation not permitted)" test_28() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2674,8 +2312,6 @@ test_28() { cdt_enable wait_request_state $fid REMOVE SUCCEED - - copytool_cleanup } run_test 28 "Concurrent archive/file remove" @@ -2683,25 +2319,23 @@ test_29a() { # Tests --mntpath and --archive options local archive_id=7 - copytool_setup $SINGLEAGT $MOUNT $archive_id + copytool setup -m "$MOUNT" -a $archive_id # Bad archive number - $LFS hsm_remove -m $MOUNT -a 33 0x857765760:0x8:0x2 2>&1 | + $LFS hsm_remove -m "$MOUNT" -a 33 0x857765760:0x8:0x2 2>&1 | grep "Invalid argument" || error "unexpected hsm_remove failure (1)" # mntpath is present but file is given - $LFS hsm_remove --mntpath $MOUNT --archive 30 /qwerty/uyt 2>&1 | + $LFS hsm_remove --mntpath "$MOUNT" --archive 30 /qwerty/uyt 2>&1 | grep "hsm: '/qwerty/uyt' is not a valid FID" || error "unexpected hsm_remove failure (2)" - - copytool_cleanup } run_test 29a "Tests --mntpath and --archive options" test_29b() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile local fid=$(create_small_file $f) @@ -2713,14 +2347,12 @@ test_29b() { $LFS hsm_remove -m $MOUNT -a $HSM_ARCHIVE_NUMBER $fid wait_request_state $fid REMOVE SUCCEED - - copytool_cleanup } run_test 29b "Archive/delete/remove by FID from the archive." test_29c() { # test needs a running copytool - copytool_setup + copytool setup local fid1=$(create_small_file $DIR/$tdir/$tfile-1) local fid2=$(create_small_file $DIR/$tdir/$tfile-2) @@ -2742,8 +2374,6 @@ test_29c() { wait_request_state $fid1 REMOVE SUCCEED wait_request_state $fid2 REMOVE SUCCEED wait_request_state $fid3 REMOVE SUCCEED - - copytool_cleanup } run_test 29c "Archive/delete/remove by FID, using a file list." @@ -2755,14 +2385,11 @@ test_29d() { local file local fid - copytool_cleanup $(comma_list $(agts_nodes)) - # start all of the copytools for n in $(seq $AGTCOUNT); do - copytool_setup agt$n $MOUNT2 $n + copytool setup -f agt$n -a $n done - trap "copytool_cleanup $(comma_list $(agts_nodes))" EXIT # archive files file=$DIR/$tdir/$tfile fid=$(create_small_file $file) @@ -2805,15 +2432,11 @@ test_29d() { fi done - [[ $scnt -ne 1 ]] && + [[ $scnt -eq 1 ]] || error "one and only CT should have removed successfully" - [[ $AGTCOUNT -ne $((scnt + fcnt)) ]] && + [[ $AGTCOUNT -eq $((scnt + fcnt)) ]] || error "all but one CT should have failed to remove" - - trap - EXIT - copytool_cleanup $(comma_list $(agts_nodes)) - } run_test 29d "hsm_remove by FID with archive_id 0 for unlinked file cause "\ "request to be sent once for each registered archive_id" @@ -2824,29 +2447,25 @@ test_30a() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /bin/true $tdir/$tfile local f=$DIR/$tdir/true - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) + stack_trap "cdt_clear_no_retry" EXIT # set no retry action mode cdt_set_no_retry do_node $CLIENT2 $f local st=$? - # cleanup - # remove no try action mode - cdt_clear_no_retry $LFS hsm_state $f [[ $st == 0 ]] || error "Failed to exec a released file" - - copytool_cleanup } run_test 30a "Restore at exec (import case)" @@ -2856,7 +2475,7 @@ test_30b() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/true @@ -2866,19 +2485,17 @@ test_30b() { wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f $LFS hsm_state $f + + stack_trap cdt_clear_no_retry EXIT # set no retry action mode cdt_set_no_retry + do_node $CLIENT2 $f local st=$? - # cleanup - # remove no try action mode - cdt_clear_no_retry $LFS hsm_state $f [[ $st == 0 ]] || error "Failed to exec a released file" - - copytool_cleanup } run_test 30b "Restore at exec (release case)" @@ -2886,7 +2503,7 @@ test_30c() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/SLEEP @@ -2897,8 +2514,11 @@ test_30c() { wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f check_hsm_flags $f "0x0000000d" + + stack_trap cdt_clear_no_retry EXIT # set no retry action mode cdt_set_no_retry + do_node $CLIENT2 "$f 10" & local pid=$! sleep 3 @@ -2914,12 +2534,7 @@ test_30c() { error "Binary overwritten during exec" fi - # cleanup - # remove no try action mode - cdt_clear_no_retry check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 30c "Update during exec of released file must fail" @@ -2957,32 +2572,28 @@ restore_and_check_size() { test_31a() { # test needs a running copytool - copytool_setup + copytool setup create_archive_file $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$($LFS path2fid $f) - HSM_ARCHIVE_PURGE=false copytool_setup + copytool setup restore_and_check_size $f $fid local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31a "Import a large file and check size during restore" test_31b() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2992,19 +2603,15 @@ test_31b() { local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31b "Restore a large unaligned file and check size during restore" test_31c() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 33 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -3014,182 +2621,142 @@ test_31c() { local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31c "Restore a large aligned file and check size during restore" test_33() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f - # to be sure wait_all_done will not be mislead by previous tests - # and ops. - cdt_purge - wait_for_grace_delay - # Also raise grace_delay significantly so the Canceled - # Restore action will stay enough long avail. - local old_grace=$(get_hsm_param grace_delay) - set_hsm_param grace_delay 100 + # Prevent restore from completing + copytool_suspend + # Implicit restore md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED + wait_request_state $fid RESTORE STARTED kill -15 $pid - sleep 1 - - # Check restore trigger process was killed - local killed=$(ps -o pid,comm hp $pid >/dev/null) - - $LFS hsm_cancel $f - # instead of waiting+checking both Restore and Cancel ops - # sequentially, wait for both to be finished and then check - # each results. - wait_all_done 100 $fid - local rstate=$(get_request_state $fid RESTORE) - local cstate=$(get_request_state $fid CANCEL) - - # restore orig grace_delay. - set_hsm_param grace_delay $old_grace - - if [[ "$rstate" == "CANCELED" ]] ; then - [[ "$cstate" == "SUCCEED" ]] || - error "Restore state is CANCELED and Cancel state " \ - "is not SUCCEED but $cstate" - echo "Restore state is CANCELED, Cancel state is SUCCEED" - elif [[ "$rstate" == "SUCCEED" ]] ; then - [[ "$cstate" == "FAILED" ]] || - error "Restore state is SUCCEED and Cancel state " \ - "is not FAILED but $cstate" - echo "Restore state is SUCCEED, Cancel state is FAILED" - else - error "Restore state is $rstate and Cancel state is $cstate" - fi - - [ -z $killed ] || - error "Cannot kill process waiting for restore ($killed)" + copytool_continue - copytool_cleanup + # Check restore trigger process was killed + wait $pid + [ $? -eq 143 ] || error "md5sum was not 'Terminated'" } run_test 33 "Kill a restore waiting process" test_34() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - rm $f || error "rm $f failed" # rm must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 rm "$f" || error "rm $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" wait $pid || error "Restore initiator failed with $?" - copytool_cleanup + # Check the file was actually deleted + [ ! -f "$f" ] || error "$f was not deleted" } run_test 34 "Remove file during restore" test_35() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile local f1=$DIR/$tdir/$tfile-1 - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return - + local fid=$(create_empty_file "$f") local fid1=$(copy_file /etc/passwd $f1) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - mv $f1 $f || error "mv $f1 $f failed" # mv must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 mv "$f1" "$f" || error "mv $f1 $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" wait $pid || error "Restore initiator failed with $?" - fid2=$(path2fid $f) + local fid2=$(path2fid $f) [[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1" - - copytool_cleanup } run_test 35 "Overwrite file during restore" test_36() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED - mv $f $f.new - # rm must not block during restore wait_request_state $fid RESTORE STARTED + # mv must not block during restore + timeout --signal=KILL 10 mv "$f" "$f.new" || + error "mv '$f' '$f.new' failed with rc=$?" + + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || - error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiator is still running" wait $pid || error "Restore initiator failed with $?" - - copytool_cleanup } run_test 36 "Move file during restore" test_37() { # LU-5683: check that an archived dirty file can be rearchived. - copytool_cleanup - copytool_setup $SINGLEAGT $MOUNT2 + copytool setup local f=$DIR/$tdir/$tfile local fid @@ -3208,8 +2775,6 @@ test_37() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - - copytool_cleanup } run_test 37 "re-archive a dirty file" @@ -3224,12 +2789,6 @@ multi_archive() { echo "$count archive requests submitted" } -cleanup_test_40() { - trap 0 - set_hsm_param max_requests $max_requests - copytool_cleanup -} - test_40() { local stream_count=4 local file_count=100 @@ -3240,6 +2799,7 @@ test_40() { local fid="" local max_requests=$(get_hsm_param max_requests) + stack_trap "set_hsm_param max_requests $max_requests" EXIT # Increase the number of HSM request that can be performed in # parallel. With the coordinator running once per second, this # also limits the number of requests per seconds that can be @@ -3248,20 +2808,14 @@ test_40() { # fail some requests if if gets too many at once. set_hsm_param max_requests 300 - trap cleanup_test_40 EXIT - for i in $(seq 1 $file_count); do for p in $(seq 1 $stream_count); do fid=$(copy_file /etc/hosts $f.$p.$i) done done - # force copytool to use a local/temp archive dir to ensure best - # performance vs remote/NFS mounts used in auto-tests - if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then - copytool_setup - else - copytool_setup $SINGLEAGT $MOUNT $HSM_ARCHIVE_NUMBER $TMP/$tdir - fi + + copytool setup + # to be sure wait_all_done will not be mislead by previous tests cdt_purge wait_for_grace_delay @@ -3275,18 +2829,16 @@ test_40() { wait ${pids[*]} echo OK wait_all_done 100 - - cleanup_test_40 } run_test 40 "Parallel archive requests" test_52() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED @@ -3302,18 +2854,16 @@ test_52() { wait $MULTIPID || error "multiop close failed" check_hsm_flags $f "0x0000000b" - - copytool_cleanup } run_test 52 "Opened for write file on an evicted client should be set dirty" test_53() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3330,17 +2880,14 @@ test_53() { wait $MULTIPID || error "multiop close failed" check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 53 "Opened for read file on an evicted client should not be set dirty" test_54() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 39 1000000) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3348,6 +2895,7 @@ test_54() { check_hsm_flags $f "0x00000001" + stack_trap "cdt_clear_no_retry" EXIT # Avoid coordinator resending this request as soon it has failed. cdt_set_no_retry @@ -3356,18 +2904,14 @@ test_54() { wait_request_state $fid ARCHIVE FAILED check_hsm_flags $f "0x00000003" - - cdt_clear_no_retry - copytool_cleanup } run_test 54 "Write during an archive cancels it" test_55() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 39 1000000) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3375,6 +2919,7 @@ test_55() { check_hsm_flags $f "0x00000001" + stack_trap "cdt_clear_no_retry" EXIT # Avoid coordinator resending this request as soon it has failed. cdt_set_no_retry @@ -3383,20 +2928,14 @@ test_55() { wait_request_state $fid ARCHIVE FAILED check_hsm_flags $f "0x00000003" - - cdt_clear_no_retry - copytool_cleanup } run_test 55 "Truncate during an archive cancels it" test_56() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3412,8 +2951,6 @@ test_56() { wait_request_state $fid ARCHIVE SUCCEED check_hsm_flags $f "0x00000009" - - copytool_cleanup } run_test 56 "Setattr during an archive is ok" @@ -3422,7 +2959,7 @@ test_57() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/test_archive_remote @@ -3443,8 +2980,6 @@ test_57() { error "hsm_restore failed" wait_request_state $fid RESTORE SUCCEED - - copytool_cleanup } run_test 57 "Archive a file with dirty cache on another node" @@ -3486,7 +3021,7 @@ truncate_released_file() { test_58() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -3500,25 +3035,21 @@ test_58() { echo "truncate to 0" truncate_released_file /etc/passwd 0 - - copytool_cleanup } run_test 58 "Truncate a released file will trigger restore" test_59() { local fid - local server_version=$(lustre_version_code $SINGLEMDS) - [[ $server_version -lt $(version_code 2.7.63) ]] && - skip "Need MDS version at least 2.7.63" && return + [[ $MDS1_VERSION -lt $(version_code 2.7.63) ]] && + skip "Need MDS version at least 2.7.63" - copytool_setup + copytool setup $MCREATE $DIR/$tfile || error "mcreate failed" $TRUNCATE $DIR/$tfile 42 || error "truncate failed" $LFS hsm_archive $DIR/$tfile || error "archive request failed" fid=$(path2fid $DIR/$tfile) wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $DIR/$tfile || error "release failed" - copytool_cleanup } run_test 59 "Release stripeless file with non-zero size" @@ -3526,17 +3057,12 @@ test_60() { # This test validates the fix for LU-4512. Ensure that the -u # option changes the progress reporting interval from the # default (30 seconds) to the user-specified interval. + local f=$DIR/$tdir/$tfile + local fid=$(create_file "$f" 1M 10) + local interval=5 local progress_timeout=$((interval * 4)) - - # test needs a new running copytool - copytool_cleanup - HSMTOOL_UPDATE_INTERVAL=$interval copytool_setup - - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 10) - [ $? != 0 ] && skip "not enough free space" && return + copytool setup -b 1 --update-interval $interval local mdtidx=0 local mdt=${MDT_PREFIX}${mdtidx} @@ -3552,13 +3078,10 @@ test_60() { error "could not archive file" local agent=$(facet_active_host $SINGLEAGT) - local prefix=$TESTLOG_PREFIX - [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME - local copytool_log=$prefix.copytool_log.$agent.log - + local logfile=$(copytool_logfile $SINGLEAGT) wait_update $agent \ - "grep -o start.copy $copytool_log" "start copy" 100 || + "grep -o start.copy \"$logfile\"" "start copy" 100 || error "copytool failed to start" local cmd="$LCTL get_param -n ${mdt}.hsm.active_requests" @@ -3571,7 +3094,7 @@ test_60() { echo -n "Expecting a progress update within $progress_timeout seconds... " while [ true ]; do RESULT=$(do_node $(facet_active_host $mds) "$cmd") - if [ $RESULT -gt 0 ]; then + if [ -n "$RESULT" ] && [ "$RESULT" -gt 0 ]; then echo "$RESULT bytes copied in $WAIT seconds." break elif [ $WAIT -ge $progress_timeout ]; then @@ -3591,17 +3114,14 @@ test_60() { fi echo "Wait for on going archive hsm action to complete" - wait_update $agent "grep -o copied $copytool_log" "copied" 10 || + wait_update $agent "grep -o copied \"$logfile\"" "copied" 10 || echo "File archiving not completed even after 10 secs" - - cdt_clear_no_retry - copytool_cleanup } run_test 60 "Changing progress update interval from default" test_61() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -3611,19 +3131,14 @@ test_61() { rm -f $f cdt_enable wait_request_state $fid ARCHIVE FAILED - - copytool_cleanup } run_test 61 "Waiting archive of a removed file should fail" test_70() { # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup - - # Just start and stop the copytool to generate events. - cdt_clear_no_retry + copytool setup --event-fifo "$HSMTOOL_MONITOR_DIR/fifo" # Wait for the copytool to register. wait_update --verbose $(facet_active_host mds1) \ @@ -3631,7 +3146,8 @@ test_70() { uuid 100 || error "copytool failed to register with MDT0000" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytools failed to stop" local REGISTER_EVENT local UNREGISTER_EVENT @@ -3657,7 +3173,6 @@ test_70() { error "Copytool failed to send unregister event to FIFO" fi - copytool_monitor_cleanup echo "Register/Unregister events look OK." } run_test 70 "Copytool logs JSON register/unregister events to FIFO" @@ -3667,15 +3182,17 @@ test_71() { local interval=5 # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_UPDATE_INTERVAL=$interval \ - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup + copytool setup --update-interval $interval --event-fifo \ + "$HSMTOOL_MONITOR_DIR/fifo" + + stack_trap "cdt_clear_no_retry" EXIT + # Just start and stop the copytool to generate events. + cdt_clear_no_retry local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_small_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3684,8 +3201,11 @@ test_71() { local expected_fields="event_time data_fid source_fid" expected_fields+=" total_bytes current_bytes" - local START_EVENT - local FINISH_EVENT + local -A events=( + [ARCHIVE_START]=false + [ARCHIVE_FINISH]=false + [ARCHIVE_RUNNING]=false + ) while read event; do # Make sure we're not getting anything from previous events. for field in $expected_fields; do @@ -3698,15 +3218,9 @@ test_71() { fi eval $parsed - if [ $event_type == "ARCHIVE_START" ]; then - START_EVENT=$event - continue - elif [ $event_type == "ARCHIVE_FINISH" ]; then - FINISH_EVENT=$event - continue - elif [ $event_type != "ARCHIVE_RUNNING" ]; then - continue - fi + events["$event_type"]=true + + [ "$event_type" != ARCHIVE_RUNNING ] && continue # Do some simple checking of the progress update events. for expected_field in $expected_fields; do @@ -3715,30 +3229,20 @@ test_71() { fi done - if [ $total_bytes -eq 0 ]; then - error "Expected total_bytes to be > 0" - fi + [ $total_bytes -gt 0 ] || error "Expected total_bytes to be > 0" - # These should be identical throughout an archive - # operation. - if [ $source_fid != $data_fid ]; then + # These should be identical throughout an archive operation + [ $source_fid == $data_fid ] || error "Expected source_fid to equal data_fid" - fi done < <(echo $"$(get_copytool_event_log)") - if [ -z "$START_EVENT" ]; then - error "Copytool failed to send archive start event to FIFO" - fi - - if [ -z "$FINISH_EVENT" ]; then - error "Copytool failed to send archive finish event to FIFO" - fi + # Check we received every type of events we were expecting + for event in "${!events[@]}"; do + ${events["$event"]} || + error "Copytool failed to send '$event' event to FIFO" + done echo "Archive events look OK." - - cdt_clear_no_retry - copytool_cleanup - copytool_monitor_cleanup } run_test 71 "Copytool logs JSON archive events to FIFO" @@ -3747,10 +3251,10 @@ test_72() { local interval=5 # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_UPDATE_INTERVAL=$interval \ - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup + copytool setup --update-interval $interval --event-fifo \ + "$HSMTOOL_MONITOR_DIR/fifo" local test_file=$HSMTOOL_MONITOR_DIR/file local cmd="dd if=/dev/urandom of=$test_file count=16 bs=1000000 " @@ -3761,7 +3265,7 @@ test_72() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f f=$DIR2/$tdir/$tfile echo "Verifying released state: " check_hsm_flags $f "0x0000000d" @@ -3836,12 +3340,6 @@ test_72() { fi echo "Restore events look OK." - - cdt_clear_no_retry - copytool_cleanup - copytool_monitor_cleanup - - rm -rf $test_dir } run_test 72 "Copytool logs JSON restore events to FIFO" @@ -3857,14 +3355,8 @@ test_90() { fid=$(copy_file /etc/hosts $f.$i) echo $f.$i >> $FILELIST done - # force copytool to use a local/temp archive dir to ensure best - # performance vs remote/NFS mounts used in auto-tests - if do_facet $SINGLEAGT "df --local $HSM_ARCHIVE" >/dev/null 2>&1 ; then - copytool_setup - else - local dai=$(get_hsm_param default_archive_id) - copytool_setup $SINGLEAGT $MOUNT $dai $TMP/$tdir - fi + + copytool setup # to be sure wait_all_done will not be mislead by previous tests cdt_purge wait_for_grace_delay @@ -3876,7 +3368,6 @@ test_90() { $LFS hsm_restore --filelist $FILELIST || error "cannot restore a file list" wait_all_done 100 - copytool_cleanup } run_test 90 "Archive/restore a file list" @@ -3919,7 +3410,7 @@ run_test 102 "Verify coordinator control" test_103() { # test needs a running copytool - copytool_setup + copytool setup local i="" local fid="" @@ -3938,19 +3429,14 @@ test_103() { grep -v CANCELED | grep -v SUCCEED | grep -v FAILED") [[ -z "$res" ]] || error "Some request have not been canceled" - - copytool_cleanup } run_test 103 "Purge all requests" DATA=CEA DATAHEX='[434541]' test_104() { - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ @@ -3960,30 +3446,18 @@ test_104() { [[ "$data1" == "$DATAHEX" ]] || error "Data field in records is ($data1) and not ($DATAHEX)" - # archive the file - copytool_setup - - wait_request_state $fid ARCHIVE SUCCEED - - copytool_cleanup + cdt_purge } run_test 104 "Copy tool data field" -cleanup_test_105() { - trap 0 - set_hsm_param max_requests $max_requests - copytool_cleanup -} - test_105() { local max_requests=$(get_hsm_param max_requests) mkdir -p $DIR/$tdir local i="" + stack_trap "set_hsm_param max_requests $max_requests" EXIT set_hsm_param max_requests 300 - trap cleanup_test_105 EXIT - cdt_disable for i in $(seq -w 1 10); do cp /etc/passwd $DIR/$tdir/$i @@ -4003,14 +3477,12 @@ test_105() { [[ "$reqcnt1" == "$reqcnt2" ]] || error "Requests count after shutdown $reqcnt2 != "\ "before shutdown $reqcnt1" - - cleanup_test_105 } run_test 105 "Restart of coordinator" test_106() { # test needs a running copytool - copytool_setup + copytool setup local uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) @@ -4018,20 +3490,22 @@ test_106() { search_copytools || error "No copytool found" - copytool_cleanup + kill_copytools + wait_copytools || error "Copytool failed to stop" + check_agent_unregistered $uuid - copytool_setup + copytool setup uuid=$(get_agent_uuid $(facet_active_host $SINGLEAGT)) check_agent_registered $uuid - - copytool_cleanup } run_test 106 "Copytool register/unregister" test_107() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + # test needs a running copytool - copytool_setup + copytool setup # create and archive file mkdir -p $DIR/$tdir local f1=$DIR/$tdir/$tfile @@ -4046,7 +3520,6 @@ test_107() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f2 # main check of this sanity: this request MUST succeed wait_request_state $fid ARCHIVE SUCCEED - copytool_cleanup } run_test 107 "Copytool re-register after MDS restart" @@ -4087,14 +3560,14 @@ run_test 109 "Policy display/change" test_110a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) cdt_set_non_blocking_restore @@ -4109,14 +3582,12 @@ test_110a() { [[ $st == 1 ]] || error "md5sum returns $st != 1, "\ "should also perror ENODATA (No data available)" - - copytool_cleanup } run_test 110a "Non blocking restore policy (import case)" test_110b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -4137,21 +3608,19 @@ test_110b() { [[ $st == 1 ]] || error "md5sum returns $st != 1, "\ "should also perror ENODATA (No data available)" - - copytool_cleanup } run_test 110b "Non blocking restore policy (release case)" test_111a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) cdt_set_no_retry @@ -4167,19 +3636,18 @@ test_111a() { # Test result [[ $st == 0 ]] || error "Restore does not failed" - - copytool_cleanup } run_test 111a "No retry policy (import case), restore will error"\ " (No such file or directory)" test_111b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) + stack_trap cdt_clear_no_retry EXIT cdt_set_no_retry $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -4191,20 +3659,15 @@ test_111b() { wait_request_state $fid RESTORE FAILED local st=$? - # cleanup - cdt_clear_no_retry - # Test result [[ $st == 0 ]] || error "Restore does not failed" - - copytool_cleanup } run_test 111b "No retry policy (release case), restore will error"\ " (No such file or directory)" test_112() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -4215,47 +3678,80 @@ test_112() { echo $l local res=$(echo $l | cut -f 2- -d" " | grep ARCHIVE) - # cleanup cdt_enable wait_request_state $fid ARCHIVE SUCCEED # Test result [[ ! -z "$res" ]] || error "action is $l which is not an ARCHIVE" - - copytool_cleanup } run_test 112 "State of recorded request" -test_200() { - # test needs a running copytool - copytool_setup +test_113() { + local file1=$DIR/$tdir/$tfile + local file2=$DIR2/$tdir/$tfile + + local fid=$(create_small_sync_file $file1) + + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT + zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" || + error "cannot mount '$MOUNT3' on '$SINGLEAGT'" + + copytool setup -m "$MOUNT3" + + do_nodes $(comma_list $(nodes_list)) $LCTL clear + + $LFS hsm_archive $file1 || error "Fail to archive $file1" + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $file1 + echo "Verifying released state: " + check_hsm_flags $file1 "0x0000000d" + + multiop_bg_pause $file1 oO_WRONLY:O_APPEND:_w4c || error "multiop failed" + MULTIPID=$! + stat $file2 & + kill -USR1 $MULTIPID + + wait + sync + + local size1=$(stat -c "%s" $file1) + local size2=$(stat -c "%s" $file2) + + [ $size1 -eq $size2 ] || error "sizes are different $size1 $size2" +} +run_test 113 "wrong stat after restore" + +test_200() { local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup + + # Prevent archive from completing + copytool_suspend - # test with cdt on is made in test_221 - cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f # wait archive to register at CDT - wait_request_state $fid ARCHIVE WAITING - $LFS hsm_cancel $f - cdt_enable + wait_request_state $fid ARCHIVE STARTED + + # Cancel the archive + $LFS hsm_cancel "$f" + wait_request_state $fid ARCHIVE CANCELED - wait_request_state $fid CANCEL SUCCEED - copytool_cleanup + copytool_continue + wait_request_state $fid CANCEL SUCCEED } run_test 200 "Register/Cancel archive" test_201() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile create_archive_file $tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) # test with cdt on is made in test_222 @@ -4267,69 +3763,58 @@ test_201() { cdt_enable wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - - copytool_cleanup } run_test 201 "Register/Cancel restore" test_202() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + # test needs a running copytool + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - cdt_disable + copytool_suspend $LFS hsm_remove $f # wait remove to register at CDT - wait_request_state $fid REMOVE WAITING + wait_request_state $fid REMOVE STARTED $LFS hsm_cancel $f - cdt_enable - wait_request_state $fid REMOVE CANCELED - copytool_cleanup + wait_request_state $fid REMOVE CANCELED } run_test 202 "Register/Cancel remove" -test_220() { +test_220A() { # was test_220 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - changelog_cleanup - - local target=0x0 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0x0 || + error "The expected changelog was not emitted" } -run_test 220 "Changelog for archive" +run_test 220A "Changelog for archive" test_220a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register # block copytool operations to allow for HSM request to be # submitted and file be unlinked (CDT will find object removed) @@ -4346,77 +3831,65 @@ test_220a() { wait_request_state $fid ARCHIVE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - changelog_cleanup - # HE_ARCHIVE|ENOENT - local target=0x2 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0x2 || + error "The expected changelog was not emitted" } run_test 220a "Changelog for failed archive" test_221() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") - changelog_setup + copytool setup -b 1 + changelog_register + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED + $LFS hsm_cancel $f wait_request_state $fid ARCHIVE CANCELED - wait_request_state $fid CANCEL SUCCEED - - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - local target=0x7d - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x7d || + error "The expected changelog was not emitted" } run_test 221 "Changelog for archive canceled" test_222a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) - changelog_setup + changelog_register $LFS hsm_restore $f wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x80 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x80 || + error "The expected changelog was not emitted" } run_test 222a "Changelog for explicit restore" test_222b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -4425,27 +3898,23 @@ test_222b() { wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x80 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x80 || + error "The expected changelog was not emitted" } run_test 222b "Changelog for implicit restore" test_222c() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) - changelog_setup + changelog_register # block copytool operations to allow for HSM request to be # submitted and file be unlinked (CDT will find object removed) @@ -4462,25 +3931,21 @@ test_222c() { wait_request_state $fid RESTORE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - # HE_RESTORE|ENOENT - local target=0x82 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x82 || + error "The expected changelog was not emitted" } run_test 222c "Changelog for failed explicit restore" test_222d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -4490,26 +3955,22 @@ test_222d() { wait_request_state $fid RESTORE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - # HE_RESTORE|ENOENT - local target=0x82 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x82 || + error "The expected changelog was not emitted" } run_test 222d "Changelog for failed implicit restore" test_223a() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile create_archive_file $tdir/$tfile - changelog_setup + changelog_register - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) $LFS hsm_restore $f @@ -4518,81 +3979,69 @@ test_223a() { wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0xfd - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0xfd || + error "The expected changelog was not emitted" } run_test 223a "Changelog for restore canceled (import case)" test_223b() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup -b 1 + changelog_register - changelog_setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + + # Prevent restore from completing + copytool_suspend $LFS hsm_restore $f wait_request_state $fid RESTORE STARTED + $LFS hsm_cancel $f wait_request_state $fid RESTORE CANCELED - wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0xfd - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - cleanup + changelog_find -type HSM -target-fid $fid -flags 0xfd || + error "The expected changelog was not emitted" } run_test 223b "Changelog for restore canceled (release case)" -test_224() { +test_224A() { # was test_224 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_remove $f wait_request_state $fid REMOVE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1) - - local target=0x200 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x200 || + error "The expected changelog was not emitted" } -run_test 224 "Changelog for remove" +run_test 224A "Changelog for remove" test_224a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -4613,59 +4062,46 @@ test_224a() { wait_request_state $fid REMOVE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1) - - # HE_REMOVE|ENOENT - local target=0x202 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + # HE_REMOVE|ENOENT=0x202 + changelog_find -type HSM -target-fid $fid -flags 0x202 || + error "The expected changelog was not emitted" } run_test 224a "Changelog for failed remove" test_225() { - # test needs a running copytool - copytool_setup - # test is not usable because remove request is too fast # so it is always finished before cancel can be done ... echo "Test disabled" - copytool_cleanup return 0 + # test needs a running copytool + copytool setup + local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - # if cdt is on, it can serve too quickly the request - cdt_disable + # Prevent restore from completing + copytool_suspend $LFS hsm_remove $f + $LFS hsm_cancel $f - cdt_enable wait_request_state $fid REMOVE CANCELED - wait_request_state $fid CANCEL SUCCEED - flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) - local flags=$($LFS changelog ${MDT[0]} | grep HSM | grep $fid | - tail -n 1 | awk '{print $5}') - - local target=0x27d - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - cleanup + changelog_find -type HSM -target-fid $fid -flags 0x27d + error "The expected changelog was not emitted" } run_test 225 "Changelog for remove canceled" test_226() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -4676,7 +4112,7 @@ test_226() { local fid2=$(copy_file /etc/passwd $f2) copy_file /etc/passwd $f3 - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1 wait_request_state $fid1 ARCHIVE SUCCEED @@ -4685,84 +4121,63 @@ test_226() { rm $f1 || error "rm $f1 failed" - local flags=$(changelog_get_flags ${MDT[0]} UNLNK $fid1) - - local target=0x3 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + changelog_dump + changelog_find -type UNLNK -target-fid $fid1 -flags 0x3 || + error "The expected changelog was not emitted" mv $f3 $f2 || error "mv $f3 $f2 failed" - flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) - - target=0x3 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type RENME -target-fid $fid2 -flags 0x3 || + error "The expected changelog was not emitted" } run_test 226 "changelog for last rm/mv with exiting archive" -check_flags_changes() { - local f=$1 - local fid=$2 - local hsm_flag=$3 - local fst=$4 - local cnt=$5 - +# This is just a utility function to clarify what test_227 does +__test_227() +{ local target=0x280 - $LFS hsm_set --$hsm_flag $f || - error "Cannot set $hsm_flag on $f" - local flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) - local seen=${#flags[*]} - cnt=$((fst + cnt)) - [[ $seen == $cnt ]] || - error "set $hsm_flag: Changelog events $seen != $cnt" - [[ ${flags[$((cnt - 1))]} == $target ]] || - error "set $hsm_flag: Changelog flags are "\ - "${flags[$((cnt - 1))]} not $target" - - $LFS hsm_clear --$hsm_flag $f || - error "Cannot clear $hsm_flag on $f" - flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) - seen=${#flags[*]} - cnt=$(($cnt + 1)) - [[ $cnt == $seen ]] || - error "clear $hsm_flag: Changelog events $seen != $cnt" - - [[ ${flags[$((cnt - 1))]} == $target ]] || - error "clear $hsm_flag: Changelog flag is "\ - "${flags[$((cnt - 1))]} not $target" + + "$LFS" "$action" --$flag "$file" || + error "Cannot ${action#hsm_} $flag on '$file'" + + # Only one changelog should be produced + local entries="$(changelog_find -type HSM -target-fid $fid)" + [ $(wc -l <<< "$entries") -eq $((++count)) ] || + error "lfs $action --$flag '$file' produced more than one" \ + "changelog record" + + # Parse the last changelog record + local entry="$(tail -n 1 <<< "$entries")" + eval local -A changelog=$(changelog2array $entry) + + # Also check the flags match what is expected + [[ ${changelog[flags]} == $target ]] || + error "Changelog flag is '${changelog[flags]}', not $target" } test_227() { - # test needs a running copytool - copytool_setup - changelog_setup - - mkdir -p $DIR/$tdir - typeset -a flags + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + local count=0 - for i in norelease noarchive exists archived - do - local f=$DIR/$tdir/$tfile-$i - local fid=$(copy_file /etc/passwd $f) - check_flags_changes $f $fid $i 0 1 - done + changelog_register - f=$DIR/$tdir/$tfile---lost - fid=$(copy_file /etc/passwd $f) - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f - wait_request_state $fid ARCHIVE SUCCEED - check_flags_changes $f $fid lost 3 1 + for flag in norelease noarchive exists archived lost; do + if [ "$flag" == lost ]; then + # The flag "lost" only works on an archived file + "$LFS" hsm_set --archived "$file" + ((count++)) + fi - cleanup + action="hsm_set" __test_227 + action="hsm_clear" __test_227 + done } run_test 227 "changelog when explicit setting of HSM flags" test_228() { # test needs a running copytool - copytool_setup + copytool setup local fid=$(create_small_sync_file $DIR/$tfile) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile @@ -4792,124 +4207,128 @@ test_228() { rm -f $DIR/$tfile $DIR/$tfile.2 || error "rm $DIR/$tfile or $DIR/$tfile.2 failed" - copytool_cleanup } run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse" test_250() { - # test needs a running copytool - copytool_setup + local file="$DIR/$tdir/$tfile" + + # set max_requests to allow one request of each type to be started (3) + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + # speed up test + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + + # send 1 requests of each kind twice + copytool setup + # setup the files + for action in archive restore remove; do + local filepath="$file"-to-$action + local fid=$(create_empty_file "$filepath") + local fid2=$(create_empty_file "$filepath".bis) + + if [ "$action" != archive ]; then + "$LFS" hsm_archive "$filepath" + wait_request_state $fid ARCHIVE SUCCEED + "$LFS" hsm_archive "$filepath".bis + wait_request_state $fid2 ARCHIVE SUCCEED + fi + if [ "$action" == restore ]; then + "$LFS" hsm_release "$filepath" + "$LFS" hsm_release "$filepath".bis + fi + done - mkdir -p $DIR/$tdir - local maxrequest=$(get_hsm_param max_requests) - local rqcnt=$(($maxrequest * 3)) - local i="" + # suspend the copytool to prevent requests from completing + stack_trap "copytool_continue" EXIT + copytool_suspend - cdt_disable - for i in $(seq -w 1 $rqcnt); do - rm -f $DIR/$tdir/$i - dd if=/dev/urandom of=$DIR/$tdir/$i bs=1M count=10 conv=fsync - done - # we do it in 2 steps, so all requests arrive at the same time - for i in $(seq -w 1 $rqcnt); do - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$i + # send `max_requests' requests (one of each kind) + for action in archive restore remove; do + filepath="$file"-to-$action + "$LFS" hsm_${action} "$filepath" + wait_request_state $(path2fid "$filepath") "${action^^}" STARTED done - cdt_enable - local cnt=$rqcnt - local wt=$rqcnt - while [[ $cnt != 0 || $wt != 0 ]]; do - sleep 1 - cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.actions |\ - grep STARTED | grep -v CANCEL | wc -l") - [[ $cnt -le $maxrequest ]] || - error "$cnt > $maxrequest too many started requests" - wt=$(do_facet $SINGLEMDS "$LCTL get_param\ - $HSM_PARAM.actions |\ - grep WAITING | wc -l") - echo "max=$maxrequest started=$cnt waiting=$wt" + + # send another batch of requests + for action in archive restore remove; do + "$LFS" hsm_${action} "$file-to-$action".bis done + # wait for `loop_period' seconds to make sure the coordinator has time + # to register those, even though it should not + sleep 1 + + # only the first batch of request should be started + local -i count + count=$(do_facet $SINGLEMDS "$LCTL" get_param -n $HSM_PARAM.actions | + grep -c STARTED) - copytool_cleanup + ((count == 3)) || + error "expected 3 STARTED requests, found $count" } run_test 250 "Coordinator max request" test_251() { - # test needs a running copytool - copytool_setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") cdt_disable # to have a short test local old_to=$(get_hsm_param active_request_timeout) - set_hsm_param active_request_timeout 4 + set_hsm_param active_request_timeout 1 # to be sure the cdt will wake up frequently so # it will be able to cancel the "old" request local old_loop=$(get_hsm_param loop_period) - set_hsm_param loop_period 2 + set_hsm_param loop_period 1 cdt_enable - # clear locks to avoid extra delay caused by flush/cancel - # and thus prevent early copytool death to timeout. - cancel_lru_locks osc + copytool setup + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED - sleep 5 + + # Let the request timeout wait_request_state $fid ARCHIVE CANCELED set_hsm_param active_request_timeout $old_to set_hsm_param loop_period $old_loop - - copytool_cleanup } run_test 251 "Coordinator request timeout" test_252() { - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 103 1048576) + local fid=$(create_empty_file "$f") - cdt_disable # to have a short test - local old_to=$(get_hsm_param active_request_timeout) - set_hsm_param active_request_timeout 20 - # to be sure the cdt will wake up frequently so - # it will be able to cancel the "old" request - local old_loop=$(get_hsm_param loop_period) - set_hsm_param loop_period 2 - cdt_enable + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 - # clear locks to avoid extra delay caused by flush/cancel - # and thus prevent early copytool death to timeout. - cancel_lru_locks osc + copytool setup + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED - rm -f $f + rm -f "$f" - # wait but less than active_request_timeout+grace_delay - sleep 25 - wait_request_state $fid ARCHIVE CANCELED + stack_trap "set_hsm_param active_request_timeout \ + $(get_hsm_param active_request_timeout)" EXIT + set_hsm_param active_request_timeout 1 - set_hsm_param active_request_timeout $old_to - set_hsm_param loop_period $old_loop - - copytool_cleanup + wait_request_state $fid ARCHIVE CANCELED + copytool_continue } run_test 252 "Timeout'ed running archive of a removed file should be canceled" test_253() { local rc # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -4937,11 +4356,367 @@ test_253() { else echo "could not release file" fi - copytool_cleanup } run_test 253 "Check for wrong file size after release" +test_254a() +{ + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" + + # Check that the counters are initialized to 0 + local count + for request_type in archive restore remove; do + count="$(get_hsm_param ${request_type}_count)" || + error "Reading ${request_type}_count failed with $?" + + [ "$count" -eq 0 ] || + error "Expected ${request_type}_count to be " \ + "0 != '$count'" + done +} +run_test 254a "Request counters are initialized to zero" + +test_254b() +{ + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" + + # The number of request to launch (at least 32) + local request_count=$((RANDOM % 32 + 32)) + printf "Will launch %i requests of each type\n" "$request_count" + + # Launch a copytool to process requests + copytool setup + + # Set hsm.max_requests to allow starting all requests at the same time + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests "$request_count" + + local timeout + local count + for request_type in archive restore remove; do + printf "Checking %s requests\n" "${request_type}" + # Suspend the copytool to give us time to read the proc files + copytool_suspend + + for ((i = 0; i < $request_count; i++)); do + case $request_type in + archive) + create_empty_file "$DIR/$tdir/$tfile-$i" \ + >/dev/null 2>&1 + ;; + restore) + lfs hsm_release "$DIR/$tdir/$tfile-$i" + ;; + esac + $LFS hsm_${request_type} "$DIR/$tdir/$tfile-$i" + done + + # Give the coordinator 10 seconds to start every request + timeout=10 + while get_hsm_param actions | grep -q WAITING; do + sleep 1 + let timeout-=1 + [ $timeout -gt 0 ] || + error "${request_type^} requests took too " \ + "long to start" + done + + count="$(get_hsm_param ${request_type}_count)" + [ "$count" -eq "$request_count" ] || + error "Expected '$request_count' (!= '$count') " \ + "active $request_type requests" + + # Let the copytool process the requests + copytool_continue + # Give it 10 seconds maximum + timeout=10 + while get_hsm_param actions | grep -q STARTED; do + sleep 1 + let timeout-=1 + [ $timeout -gt 0 ] || + error "${request_type^} requests took too " \ + "long to complete" + done + + count="$(get_hsm_param ${request_type}_count)" + [ "$count" -eq 0 ] || + error "Expected 0 (!= '$count') " \ + "active $request_type requests" + done +} +run_test 254b "Request counters are correctly incremented and decremented" + +test_255() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + + # How do you make sure the coordinator has consumed any outstanding + # event, without triggering an event yourself? + # + # You wait for a request to disappear from the coordinator's llog. + + # Warning: the setup represents 90% of this test + + # Create and process an HSM request + copytool setup + "$LFS" hsm_archive "$file" + wait_request_state $fid ARCHIVE SUCCEED + + kill_copytools + wait_copytools || error "failed to stop copytools" + + # Launch a new HSM request + rm "$file" + create_empty_file "$file" + "$LFS" hsm_archive "$file" + + cdt_shutdown + + # Have the completed request be removed as soon as the cdt wakes up + stack_trap "set_hsm_param grace_delay $(get_hsm_param grace_delay)" EXIT + set_hsm_param grace_delay 1 + # (Hopefully, time on the MDS will behave nicely) + do_facet $SINGLEMDS sleep 2 & + + # Increase `loop_period' as a mean to prevent the coordinator from + # waking itself up to do some housekeeping. + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + + wait $! || error "waiting failed" + cdt_enable + wait_request_state $fid ARCHIVE "" + # The coordinator will not wake up on its own for ~`loop_period' secs... + + # ... Unless a copytool registers. Now the real test begins + copytool setup + wait_request_state $(path2fid "$file") ARCHIVE SUCCEED +} +run_test 255 "Copytool registration wakes the coordinator up" + +# tests 260[a-c] rely on the parsing of the copytool's log file, they might +# break in the future because of that. +test_260a() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup + "$LFS" hsm_archive "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done + + error "Too many ARCHIVE requests were run before the RESTORE request" +} +run_test 260a "Restore request have priority over other requests" + +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260b() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + copytool setup --archive-id 2 + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done + + error "Too many ARCHIVE requests were run before the RESTORE request" +} +run_test 260b "Restore request have priority over other requests" + +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260c() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Force the next coordinator run to do housekeeping + cdt_shutdown + cdt_enable + + "$LFS" hsm_archive "${files[1]}" + + # Launch a copytool + copytool setup + copytool setup --archive-id 2 + + wait_request_state "$(path2fid "${files[1]}")" ARCHIVE SUCCEED + # The coordinator just did a housekeeping run it won't do another one + # for around `loop_period' seconds => requests will not be reordered + # if it costs too much (ie. when the coordinator has to discard a whole + # hal) + + # Send several archive requests + for file in "${files[@]:2}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:2}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && + error "Restore requests should not be prioritised" \ + "unless the coordinator is doing housekeeping" + done + return 0 +} +run_test 260c "Requests are not reordered on the 'hot' path of the coordinator" + test_300() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + # the only way to test ondisk conf is to restart MDS ... echo "Stop coordinator and remove coordinator state at mount" # stop coordinator @@ -4969,6 +4744,8 @@ test_300() { run_test 300 "On disk coordinator state kept between MDT umount/mount" test_301() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + local ai=$(get_hsm_param default_archive_id) local new=$((ai + 1)) @@ -4984,6 +4761,8 @@ test_301() { run_test 301 "HSM tunnable are persistent" test_302() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + local ai=$(get_hsm_param default_archive_id) local new=$((ai + 1)) @@ -5012,7 +4791,7 @@ run_test 302 "HSM tunnable are persistent when CDT is off" test_400() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -5020,7 +4799,9 @@ test_400() { local dir_mdt1=$DIR/$tdir/mdt1 # create 1 dir per MDT + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + stack_trap "rm -rf $dir_mdt1" EXIT $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" # create 1 file in each MDT @@ -5036,17 +4817,13 @@ test_400() { $LFS hsm_archive $dir_mdt1/$tfile || error "lfs hsm_archive" wait_request_state $fid2 ARCHIVE SUCCEED 1 && echo "archive successful on mdt1" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 $dir_mdt1 } run_test 400 "Single request is sent to the right MDT" test_401() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -5054,7 +4831,9 @@ test_401() { local dir_mdt1=$DIR/$tdir/mdt1 # create 1 dir per MDT + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + stack_trap "rm -rf $dir_mdt1" EXIT $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" # create 1 file in each MDT @@ -5068,10 +4847,6 @@ test_401() { echo "archive successful on mdt0" wait_request_state $fid2 ARCHIVE SUCCEED 1 && echo "archive successful on mdt1" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 $dir_mdt1 } run_test 401 "Compound requests split and sent to their respective MDTs" @@ -5090,13 +4865,10 @@ mdc_change_state() # facet, MDT_pattern, activate|deactivate } test_402a() { - # make sure there is no running copytool - copytool_cleanup - # deactivate all mdc on agent1 mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "deactivate" - HSMTOOL_NOERROR=true copytool_setup $SINGLEAGT + copytool setup --no-fail check_agent_unregistered "uuid" # match any agent @@ -5109,7 +4881,7 @@ test_402a() { run_test 402a "Copytool start fails if all MDTs are inactive" test_402b() { - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -5130,23 +4902,18 @@ test_402b() { # request should succeed now wait_request_state $fid ARCHIVE SUCCEED - - copytool_cleanup } run_test 402b "CDT must retry request upon slow start of CT" test_403() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - # make sure there is no running copytool - copytool_cleanup - local agent=$(facet_active_host $SINGLEAGT) # deactivate all mdc for MDT0001 mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate" - copytool_setup + copytool setup local uuid=$(get_agent_uuid $agent) # check the agent is registered on MDT0000, and not on MDT0001 check_agent_registered_by_mdt $uuid 0 @@ -5160,20 +4927,19 @@ test_403() { # make sure the copytool is now registered to all MDTs check_agent_registered $uuid - - copytool_cleanup } run_test 403 "Copytool starts with inactive MDT and register on reconnect" test_404() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup # create files on both MDT0000 and MDT0001 mkdir -p $DIR/$tdir local dir_mdt0=$DIR/$tdir/mdt0 + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" # create 1 file on mdt0 @@ -5191,17 +4957,13 @@ test_404() { # reactivate all mdc for MDT0001 mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "activate" - - copytool_cleanup - # clean test files and directories - rm -rf $dir_mdt0 } run_test 404 "Inactive MDT does not block requests for active MDTs" test_405() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return - copytool_setup + copytool setup mkdir -p $DIR/$tdir @@ -5215,10 +4977,10 @@ test_405() { local fid3=$(create_small_sync_file $striped_dir/${tfile}_2) local fid4=$(create_small_sync_file $striped_dir/${tfile}_3) - local idx1=$($LFS getstripe -M $striped_dir/${tfile}_0) - local idx2=$($LFS getstripe -M $striped_dir/${tfile}_1) - local idx3=$($LFS getstripe -M $striped_dir/${tfile}_2) - local idx4=$($LFS getstripe -M $striped_dir/${tfile}_3) + local idx1=$($LFS getstripe -m $striped_dir/${tfile}_0) + local idx2=$($LFS getstripe -m $striped_dir/${tfile}_1) + local idx3=$($LFS getstripe -m $striped_dir/${tfile}_2) + local idx4=$($LFS getstripe -m $striped_dir/${tfile}_3) # check that compound requests are shunt to the rights MDTs $LFS hsm_archive $striped_dir/${tfile}_0 $striped_dir/${tfile}_1 \ @@ -5243,30 +5005,29 @@ test_405() { cat $striped_dir/${tfile}_1 > /dev/null || error "cat ${tfile}_1 failed" cat $striped_dir/${tfile}_2 > /dev/null || error "cat ${tfile}_2 failed" cat $striped_dir/${tfile}_3 > /dev/null || error "cat ${tfile}_3 failed" - - copytool_cleanup } run_test 405 "archive and release under striped directory" test_406() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] && - skip "need MDS version at least 2.7.64" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.64) ] && + skip "need MDS version at least 2.7.64" local fid local mdt_index - copytool_setup fid=$(create_small_file $DIR/$tdir/$tfile) echo "old fid $fid" + copytool setup + $LFS hsm_archive $DIR/$tdir/$tfile wait_request_state "$fid" ARCHIVE SUCCEED $LFS hsm_release $DIR/$tdir/$tfile # Should migrate $tdir but not $tfile. - $LFS mv -M1 $DIR/$tdir && + $LFS migrate -m1 $DIR/$tdir && error "migrating HSM an archived file should fail" $LFS hsm_restore $DIR/$tdir/$tfile @@ -5278,10 +5039,10 @@ test_406() { cat $DIR/$tdir/$tfile > /dev/null || error "cannot read $DIR/$tdir/$tfile" - $LFS mv -M1 $DIR/$tdir || + $LFS migrate -m1 $DIR/$tdir || error "cannot complete migration after HSM remove" - mdt_index=$($LFS getstripe -M $DIR/$tdir) + mdt_index=$($LFS getstripe -m $DIR/$tdir) if ((mdt_index != 1)); then error "expected MDT index 1, got $mdt_index" fi @@ -5302,23 +5063,15 @@ test_406() { cat $DIR/$tdir/$tfile > /dev/null || error "cannot read $DIR/$tdir/$tfile" - - copytool_cleanup } run_test 406 "attempting to migrate HSM archived files is safe" test_407() { - needclients 2 || return 0 - # test needs a running copytool - copytool_setup - - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -5327,6 +5080,9 @@ test_407() { #define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x164 + # Prevent restore from completing + copytool_suspend + md5sum $f & # 1st request holds layout lock while appropriate # RESTORE record is still not added to llog @@ -5341,25 +5097,374 @@ test_407() { do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"& fail $SINGLEMDS + copytool_continue wait_request_state $fid RESTORE SUCCEED - copytool_cleanup } run_test 407 "Check for double RESTORE records in llog" test_500() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] && - skip "HSM migrate is not supported" && return - - # Stop the existing copytool - copytool_cleanup + [ $MDS1_VERSION -lt $(version_code 2.6.92) ] && + skip "HSM migrate is not supported" test_mkdir -p $DIR/$tdir - llapi_hsm_test -d $DIR/$tdir || error "One llapi HSM test failed" + + if [ $(lustre_version_code client) -lt $(version_code 2.11.56) ] || + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.11.56) ]; + then + llapi_hsm_test -d $DIR/$tdir -b || + error "One llapi HSM test failed" + else + llapi_hsm_test -d $DIR/$tdir || + error "One llapi HSM test failed" + fi } run_test 500 "various LLAPI HSM tests" -copytool_cleanup +test_600() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + chmod 777 $DIR/$tdir + $RUNAS touch $f || error "touch $f failed as $RUNAS_ID" + local fid=$(path2fid $f) + + local entry + entry=$(changelog_find -type CREAT -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID") || + error "No matching CREAT entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check its NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" +} +run_test 600 "Changelog fields 'u=' and 'nid='" + +test_601() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + cat $f || error "cat $f failed" + + changelog_find -type OPEN -target-fid $fid -mode "r--" || + error "No matching OPEN entry" +} +run_test 601 "OPEN Changelog entry" + +test_602() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + cat $f || error "cat $f failed" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + echo f > $f || error "write $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + # remove OPEN from changelog_mask + changelog_chmask "-OPEN" + + changelog_clear + changelog_dump + cat $f || error "cat $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + echo f > $f || error "write $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" +} +run_test 602 "Changelog record CLOSE only if open+write or OPEN recorded" + +test_603() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + setfattr -n user.xattr1 -v "value1" $f || error "setfattr $f failed" + + changelog_clear + getfattr -n user.xattr1 $f || error "getfattr $f failed" + + changelog_find -type GXATR -target-fid $fid -xattr "user.xattr1" || + error "No matching GXATR entry" +} +run_test 603 "GETXATTR Changelog entry" + +test_604() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + local procname="mdd.$FSNAME-MDT0000.changelog_deniednext" + local timeout + timeout="$(do_facet mds1 "$LCTL" get_param -n "$procname")" + stack_trap "do_facet mds1 '$LCTL' set_param '$procname=$timeout'" EXIT + do_facet mds1 lctl set_param "$procname=20" + + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + chmod 600 $f + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed" + changelog_dump + + local entry + entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID" -mode "r--") || + error "No matching NOPEN entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check its NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed" + changelog_dump + + changelog_find -type NOPEN -target-fid $fid && + error "There should be no NOPEN entry" + + # Sleep for `changelog_deniednext` seconds + sleep 20 + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f by user $RUNAS_ID should have failed" + changelog_dump + + entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID" -mode "r--") || + error "No matching NOPEN entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check the NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" +} +run_test 604 "NOPEN Changelog entry" + +test_605() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + changelog_dump + exec 3<> $f || error "open $f failed" + changelog_dump + + local entry + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_clear + changelog_dump + exec 4<> $f || error "open $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid && + error "There should be no OPEN entry" + + exec 4>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + # access in rw, so different access mode should generate entries + cat $f || error "cat $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + # same access as first one, should not generate new entries + exec 4<> $f || error "open $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid && + error "There should be no OPEN entry" + + exec 4>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + # access by different user should generate new entries + $RUNAS cat $f || error "cat $f by user $RUNAS_ID failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + exec 3>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" +} +run_test 605 "Test OPEN and CLOSE rate limit in Changelogs" + +test_606() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null") + llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader} + [ -z $(do_facet mgs ls -d $llog_reader 2> /dev/null) ] && + skip_env "missing llog_reader" && return + local fstype=$(facet_fstype mds1) + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + chmod 777 $DIR/$tdir + $RUNAS touch $f || error "touch $f failed as $RUNAS_ID" + local fid=$(path2fid $f) + rm $f || error "rm $f failed" + + local mntpt=$(facet_mntpt mds1) + local pass=true + local entry + + #remount mds1 as ldiskfs or zfs type + stack_trap "stop mds1; start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS" EXIT + stop mds1 || error "stop mds1 failed" + mount_fstype mds1 || error "remount mds1 failed" + + for ((i = 0; i < 1; i++)); do + do_facet mds1 $llog_reader $mntpt/changelog_catalog + local cat_file=$(do_facet mds1 $llog_reader \ + $mntpt/changelog_catalog | awk \ + '{match($0,"path=([^ ]+)",a)}END{print a[1]}') + [ -n "$cat_file" ] || error "no catalog file" + + entry=$(do_facet mds1 $llog_reader $mntpt/$cat_file | + awk "/CREAT/ && /target:\[$fid\]/ {print}") + [ -n "$entry" ] || error "no CREAT entry" + done + + local uidgid=$(echo $entry | + sed 's+.*\ user:\([0-9][0-9]*:[0-9][0-9]*\)\ .*+\1+') + [ -n "$uidgid" ] || error "uidgid is empty" + echo "Got UID/GID $uidgid" + [ "$uidgid" = "$RUNAS_ID:$RUNAS_GID" ] || + error "uidgid '$uidgid' != '$RUNAS_ID:$RUNAS_GID'" + local nid=$(echo $entry | + sed 's+.*\ nid:\(\S\S*@\S\S*\)\ .*+\1+') + [ -n "$nid" ] || error "nid is empty" + echo "Got NID $nid" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any NID ${CLIENT_NIDS[@]}" +} +run_test 606 "llog_reader groks changelog fields" complete $SECONDS check_and_cleanup_lustre