X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=9f314598d74bf2d19708f0d015ce1fc19f0d5295;hp=bd7eeddef803179dbfc77d99aeb22d96b97a2e20;hb=ca754ec8b43416d41bbd401bad7d9f93746fb867;hpb=54cf0962e286b8f1bcfca490cf828fd126f1fa60 diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index bd7eedd..9f31459 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -7,44 +7,72 @@ set -e set +o monitor -SRCDIR=$(dirname $0) -export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin - ONLY=${ONLY:-"$*"} -# bug number for skipped test: -ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT" -# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! - -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +LUSTRE=${LUSTRE:-$(dirname $0)/..} . $LUSTRE/tests/test-framework.sh init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -MULTIOP=${MULTIOP:-multiop} +ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT " +if $SHARED_KEY; then +# bug number for skipped tests: LU-9795 LU-9795 + ALWAYS_EXCEPT+=" 13 402b " +# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! +fi + +# Skip tests for PPC that fail frequently +if [[ $(uname -m) = ppc64 ]]; then + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 1a 1b 1d 1e 12c 12f " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 12g 12h 12m 12n 12o 12p " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 12q 21 22 23 24a 24b " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 24d 24e 24f 25b 30c 37 " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 57 58 90 110b 111b 113 " + # bug number: LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 LU-12251 + ALWAYS_EXCEPT+=" 222b 222d 228 260a 260b 260c " + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 220A 220a 221 222a 222c 223a " + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 223b 224A 224a 226 227 600" + # bug number: LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 LU-12252 + ALWAYS_EXCEPT+=" 601 602 603 604 605 " +fi + +build_test_filter + +[ -n "$FILESET" ] && skip "Not functional for FILESET set" + OPENFILE=${OPENFILE:-openfile} -MMAP_CAT=${MMAP_CAT:-mmap_cat} MOUNT_2=${MOUNT_2:-"yes"} FAIL_ON_ERROR=false # script only handles up to 10 MDTs (because of MDT_PREFIX) [ $MDSCOUNT -gt 9 ] && - error "script cannot handle more than 9 MDTs, please fix" && exit + error "script cannot handle more than 9 MDTs, please fix" check_and_setup_lustre -if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then - skip_env "Need MDS version at least 2.4.53" && exit +if [[ $MDS1_VERSION -lt $(version_code 2.4.53) ]]; then + skip_env "Need MDS version at least 2.4.53" fi # $RUNAS_ID may get set incorrectly somewhere else if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then - skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit + skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" fi check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS - -build_test_filter +if getent group nobody; then + GROUP=nobody +elif getent group nogroup; then + GROUP=nogroup +else + error "No generic nobody group" +fi # if there is no CLIENT1 defined, some tests can be ran on localhost CLIENT1=${CLIENT1:-$HOSTNAME} @@ -54,115 +82,9 @@ CLIENT1=${CLIENT1:-$HOSTNAME} # Exception is the test which need two separate nodes CLIENT2=${CLIENT2:-$CLIENT1} -# -# In order to test multiple remote HSM agents, a new facet type named "AGT" and -# the following associated variables are added: -# -# AGTCOUNT: number of agents -# AGTDEV{N}: target HSM mount point (root path of the backend) -# agt{N}_HOST: hostname of the agent agt{N} -# SINGLEAGT: facet of the single agent -# -# The number of agents is initialized as the number of remote client nodes. -# By default, only single copytool is started on a remote client/agent. If there -# was no remote client, then the copytool will be started on the local client. -# -init_agt_vars() { - local n - local agent - - export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))} - [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1 - - export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP} - if [[ $CLIENTCOUNT -gt 1 ]] && - ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then - skip_env "SHARED_DIRECTORY should be accessible"\ - "on all client nodes" - exit 0 - fi - - # We used to put the HSM archive in $SHARED_DIRECTORY but that - # meant NFS issues could hose sanity-hsm sessions. So now we - # use $TMP instead. - for n in $(seq $AGTCOUNT); do - eval export AGTDEV$n=\$\{AGTDEV$n:-"$TMP/arc$n"\} - agent=CLIENT$((n + 1)) - if [[ -z "${!agent}" ]]; then - [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 || - agent=CLIENT2 - fi - eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\} - done - - export SINGLEAGT=${SINGLEAGT:-agt1} - - export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} - export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} - export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""} - export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} - export HSMTOOL_TESTDIR - export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") - # $hsm_root/$HSMTMP Makes $hsm_root dir path less generic to ensure - # rm -rf $hsm_root/* is safe even if $hsm_root becomes unset to avoid - # deleting everything in filesystem, independent of any copytool. - export HSMTMP=${HSMTMP:-"shsm"} - - HSM_ARCHIVE=$(copytool_device $SINGLEAGT) - - [ -z "${HSM_ARCHIVE// /}" ] && error "HSM_ARCHIVE is empty!" - HSM_ARCHIVE=$HSM_ARCHIVE/$HSMTMP - - HSM_ARCHIVE_NUMBER=2 - - # The test only support up to 10 MDTs - MDT_PREFIX="mdt.$FSNAME-MDT000" - HSM_PARAM="${MDT_PREFIX}0.hsm" - - # archive is purged at copytool setup - HSM_ARCHIVE_PURGE=true - - # Don't allow copytool error upon start/setup - HSMTOOL_NOERROR=false -} - -# Get the backend root path for the given agent facet. -copytool_device() { - local facet=$1 - local dev=AGTDEV$(facet_number $facet) - - echo -n ${!dev} -} - -# Stop copytool and unregister an existing changelog user. -cleanup() { - copytool_monitor_cleanup - copytool_cleanup - changelog_cleanup - cdt_set_sanity_policy -} - -get_mdt_devices() { - local mdtno - # get MDT device for each mdc - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - MDT[$idx]=$($LCTL get_param -n \ - mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid | - awk '{gsub(/_UUID/,""); print $1}' | head -n1) - done -} - search_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "pgrep -x $HSMTOOL_BASE" -} - -kill_copytools() { - local hosts=${1:-$(facet_active_host $SINGLEAGT)} - - echo "Killing existing copytools on $hosts" - do_nodesv $hosts "killall -q $HSMTOOL_BASE" || true + do_nodesv $hosts "libtool execute pgrep -x $HSMTOOL" } wait_copytools() { @@ -170,7 +92,7 @@ wait_copytools() { local wait_timeout=200 local wait_start=$SECONDS local wait_end=$((wait_start + wait_timeout)) - local sleep_time=100000 # 0.1 second + local sleep_time=1 while ((SECONDS < wait_end)); do if ! search_copytools $hosts; then @@ -179,9 +101,8 @@ wait_copytools() { fi echo "copytools still running on $hosts" - usleep $sleep_time - [ $sleep_time -lt 32000000 ] && # 3.2 seconds - sleep_time=$(bc <<< "$sleep_time * 2") + sleep $sleep_time + [ $sleep_time -lt 5 ] && sleep_time=$((sleep_time + 1)) done # try to dump Copytool's stack @@ -208,187 +129,29 @@ copytool_monitor_setup() { cmd="cat $test_dir/fifo > $test_dir/events &" cmd+=" echo \\\$! > $test_dir/monitor_pid" - if [[ $PDSH == *Rmrsh* ]]; then - # This is required for pdsh -Rmrsh and its handling of remote - # shells. - # Regular ssh and pdsh -Rssh work fine without this - # backgrounded subshell nonsense. - (do_node $agent "$cmd") & - export HSMTOOL_MONITOR_PDSH=$! - - # Slightly racy, but just making a best-effort to catch obvious - # problems. - sleep 1 - ps -p $HSMTOOL_MONITOR_PDSH > /dev/null || - error "Failed to start copytool monitor on $agent" - else - do_node $agent "$cmd" - if [ $? != 0 ]; then - error "Failed to start copytool monitor on $agent" - fi - fi -} - -copytool_monitor_cleanup() { - local facet=${1:-$SINGLEAGT} - local agent=$(facet_active_host $facet) - - if [ -n "$HSMTOOL_MONITOR_DIR" ]; then - # Should die when the copytool dies, but just in case. - local cmd="kill \\\$(cat $HSMTOOL_MONITOR_DIR/monitor_pid)" - cmd+=" 2>/dev/null || true" - do_node $agent "$cmd" - do_node $agent "rm -fr $HSMTOOL_MONITOR_DIR" - export HSMTOOL_MONITOR_DIR= - fi + # This background subshell nonsense is required when pdsh/ssh decides + # to wait for the cat process to exit on the remote client + (do_node $agent "$cmd") & + export HSMTOOL_MONITOR_PDSH=$! - # The pdsh should die on its own when the monitor dies. Just - # in case, though, try to clean up to avoid any cruft. - if [ -n "$HSMTOOL_MONITOR_PDSH" ]; then - kill $HSMTOOL_MONITOR_PDSH 2>/dev/null || true - export HSMTOOL_MONITOR_PDSH= + # Slightly racy, but just making a best-effort to catch obvious + # problems. + sleep 1 + do_node $agent "stat $HSMTOOL_MONITOR_DIR/monitor_pid 2>&1 > /dev/null" + if [ $? != 0 ]; then + error "Failed to start copytool monitor on $agent" fi } -copytool_logfile() +fid2archive() { - local host="$(facet_host "$1")" - local prefix=$TESTLOG_PREFIX - [ -n "$TESTNAME" ] && prefix+=.$TESTNAME - - printf "${prefix}.copytool${archive_id}_log.${host}.log" -} - -__lhsmtool_setup() -{ - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root \"$hsm_root\"" - [ -n "$bandwidth" ] && cmd+=" --bandwidth $bandwidth" - [ -n "$archive_id" ] && cmd+=" --archive $archive_id" - [ ${#misc_options[@]} -gt 0 ] && - cmd+=" $(IFS=" " echo "$@")" - cmd+=" \"$mountpoint\"" - - echo "Starting copytool $facet on $(facet_host $facet)" - stack_trap "do_facet $facet \"pkill -x $HSMTOOL_BASE\" || true" EXIT - do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" -} - -hsm_root() { - local facet="${1:-$SINGLEAGT}" - - printf "$(copytool_device "$facet")/${TESTSUITE}.${TESTNAME}/" -} - -copytool() -{ - local action=$1 - shift - - # Parse arguments - local fail_on_error=true - local -a misc_options - while [ $# -gt 0 ]; do - case "$1" in - -f|--facet) - shift - local facet="$1" - ;; - -m|--mountpoint) - shift - local mountpoint="$1" - ;; - -a|--archive-id) - shift - local archive_id="$1" - ;; - -b|--bwlimit) - shift - local bandwidth="$1" # in MB/s - ;; - -n|--no-fail) - local fail_on_error=false - ;; - *) - # Uncommon(/copytool dependent) option - misc_options+=("$1") - ;; - esac - shift - done - - # Use default values if needed - local facet=${facet:-$SINGLEAGT} - local mountpoint="${mountpoint:-${MOUNT2:-$MOUNT}}" - local hsm_root="$(hsm_root "$facet")" - - stack_trap "do_facet $facet \"rm -rf \\\"$hsm_root\\\"\"" EXIT - do_facet $facet "mkdir -p \"$hsm_root\"" || - error "mkdir \"$hsm_root\" failed" + local fid="$1" case "$HSMTOOL" in lhsmtool_posix) - local copytool=lhsmtool + printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid" ;; esac - - __${copytool}_${action} "${misc_options[@]}" - if [ $? -ne 0 ]; then - local error_msg - - case $action in - setup) - local host="$(facet_host $facet)" - error_msg="Failed to start copytool $facet on '$host'" - ;; - esac - - $fail_on_error && error "$error_msg" || echo "$error_msg" - fi -} - -copytool_setup() { - local facet=${1:-$SINGLEAGT} - # Use MOUNT2 by default if defined - local lustre_mntpnt=${2:-${MOUNT2:-$MOUNT}} - local arc_id=$3 - local hsm_root=${4:-$(copytool_device $facet)} - - [ -z "${hsm_root// /}" ] && error "copytool_setup: hsm_root empty!" - - local agent=$(facet_active_host $facet) - - if $HSM_ARCHIVE_PURGE; then - echo "Purging archive on $agent" - do_facet $facet "rm -rf $hsm_root/$HSMTMP/*" - fi - - echo "Starting copytool $facet on $agent" - do_facet $facet "mkdir -p $hsm_root/$HSMTMP/" || - error "mkdir '$hsm_root/$HSMTMP' failed" - # bandwidth is limited to 1MB/s so the copy time is known and - # independent of hardware - local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon" - cmd+=" --hsm-root $hsm_root/$HSMTMP" - [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id" - [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] || - cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL" - [[ -z "$HSMTOOL_EVENT_FIFO" ]] || - cmd+=" --event-fifo $HSMTOOL_EVENT_FIFO" - cmd+=" --bandwidth 1 $lustre_mntpnt" - - # Redirect the standard output and error to a log file which - # can be uploaded to Maloo. - local prefix=$TESTLOG_PREFIX - [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME - local copytool_log=$prefix.copytool${arc_id}_log.$agent.log - - stack_trap cleanup EXIT - do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1" - if [[ $? != 0 ]]; then - [[ $HSMTOOL_NOERROR == true ]] || - error "start copytool $facet on $agent failed" - echo "start copytool $facet on $agent failed" - fi } get_copytool_event_log() { @@ -402,97 +165,22 @@ get_copytool_event_log() { error "Could not collect event log from $agent" } -copytool_cleanup() { - trap - EXIT - local agt_facet=$SINGLEAGT - local agt_hosts=${1:-$(facet_active_host $agt_facet)} - local hsm_root=$(copytool_device $agt_facet) - - [ -z "${hsm_root// /}" ] && error "copytool_cleanup: hsm_root empty!" - - local i - local facet - local param - local -a state - - kill_copytools $agt_hosts - wait_copytools $agt_hosts || error "copytools failed to stop" - - # Clean all CDTs orphans requests from previous tests that - # would otherwise need to timeout to clear. - for ((i = 0; i < MDSCOUNT; i++)); do - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - state[$i]=$(do_facet $facet "$LCTL get_param -n $param") - - # Skip already stopping or stopped CDTs. - [[ "${state[$i]}" =~ ^stop ]] && continue - - do_facet $facet "$LCTL set_param $param=shutdown" - done - - for ((i = 0; i < MDSCOUNT; i++)); do - # Only check and restore CDTs that we stopped in the first loop. - [[ "${state[$i]}" =~ ^stop ]] && continue - - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - - wait_result $facet "$LCTL get_param -n $param" stopped 20 || - error "$facet CDT state is not stopped" - - # Restore old CDT state. - do_facet $facet "$LCTL set_param $param=${state[$i]}" - done - - for ((i = 0; i < MDSCOUNT; i++)); do - # Only check CDTs that we stopped in the first loop. - [[ "${state[$i]}" =~ ^stop ]] && continue - - facet=mds$((i + 1)) - param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) - - # Check that the old CDT state was restored. - wait_result $facet "$LCTL get_param -n $param" "${state[$i]}" \ - 20 || error "$facet CDT state is not '${state[$i]}'" - done - - if do_facet $agt_facet "df $hsm_root" >/dev/null 2>&1 ; then - do_facet $agt_facet "rm -rf $hsm_root/$HSMTMP/*" - fi -} - copytool_suspend() { local agents=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0 + stack_trap \ + "do_nodesv $agents libtool execute pkill -CONT -x '$HSMTOOL' || true" EXIT + do_nodesv $agents "libtool execute pkill -STOP -x $HSMTOOL" || return 0 echo "Copytool is suspended on $agents" } -copytool_continue() { - local agents=${1:-$(facet_active_host $SINGLEAGT)} - - do_nodesv $agents "pkill -CONT -x $HSMTOOL_BASE" || return 0 - echo "Copytool is continued on $agents" -} - copytool_remove_backend() { local fid=$1 - local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid) + local be=$(do_facet $SINGLEAGT find "$(hsm_root)" -name $fid) echo "Remove from backend: $fid = $be" do_facet $SINGLEAGT rm -f $be } -import_file() { - mkdir -p "$(dirname "$2")" || - error "cannot create directory '$(dirname "$2")'" - - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --import $1 $2 $MOUNT" || - error "import of $1 to $2 failed" -} - file_creation_failure() { local cmd=$1 local file=$2 @@ -558,7 +246,7 @@ create_small_sync_file() { } create_archive_file() { - local file="$HSM_ARCHIVE/$1" + local file="$(hsm_root)/$1" local count=${2:-39} local source=/dev/urandom @@ -571,79 +259,14 @@ create_archive_file() { } copy2archive() { - local file=$HSM_ARCHIVE/$2 - do_facet $SINGLEAGT mkdir -p $(dirname $file) - do_facet $SINGLEAGT cp -p $1 $file || error "cannot copy $1 to $file" -} - -mdts_set_param() { - local arg=$1 - local key=$2 - local value=$3 - local mdtno - local rc=0 - if [[ "$value" != "" ]]; then - value="=$value" - fi - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local facet=mds${mdtno} - # if $arg include -P option, run 1 set_param per MDT on the MGS - # else, run set_param on each MDT - [[ $arg = *"-P"* ]] && facet=mgs - do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value - [[ $? != 0 ]] && rc=1 - done - return $rc -} + local hsm_root="$(hsm_root)" + local file="$hsm_root/$2" -mdts_check_param() { - local key="$1" - local target="$2" - local timeout="$3" - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - wait_result mds${mdtno} \ - "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ - $timeout || - error "$key state is not '$target' on mds${mdtno}" - done -} - -changelog_setup() { - CL_USERS=() - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - local cl_user=$(do_facet mds${mdtno} $LCTL \ - --device ${MDT[$idx]} \ - changelog_register -n) - CL_USERS+=($cl_user) - do_facet mds${mdtno} lctl set_param \ - mdd.${MDT[$idx]}.changelog_mask="+hsm" - $LFS changelog_clear ${MDT[$idx]} $cl_user 0 - done -} - -changelog_cleanup() { - local mdtno - for mdtno in $(seq 1 $MDSCOUNT); do - local idx=$(($mdtno - 1)) - [[ -z ${CL_USERS[$idx]} ]] && continue - $LFS changelog_clear ${MDT[$idx]} ${CL_USERS[$idx]} 0 - do_facet mds${mdtno} lctl --device ${MDT[$idx]} \ - changelog_deregister ${CL_USERS[$idx]} - done - CL_USERS=() -} - -changelog_get_flags() { - local mdt=$1 - local cltype=$2 - local fid=$3 - - $LFS changelog $mdt | awk "/$cltype/ && /t=\[$fid\]/ {print \$5}" + stack_trap "do_facet $SINGLEAGT rm -rf '$hsm_root'" EXIT + do_facet $SINGLEAGT mkdir -p "$(dirname "$file")" || + error "mkdir '$(dirname "$file")' failed" + do_facet $SINGLEAGT cp -p "$1" "$file" || + error "cannot copy '$1' to '$file'" } get_hsm_param() { @@ -652,14 +275,6 @@ get_hsm_param() { echo $val } -set_hsm_param() { - local param=$1 - local value=$2 - local opt=$3 - mdts_set_param "$opt -n" "hsm.$param" "$value" - return $? -} - set_test_state() { local cmd=$1 local target=$2 @@ -667,15 +282,6 @@ set_test_state() { mdts_check_param hsm_control "$target" 10 } -cdt_set_sanity_policy() { - if [[ "$CDT_POLICY_HAD_CHANGED" ]] - then - # clear all - mdts_set_param "" hsm.policy "+NRA" - mdts_set_param "" hsm.policy "-NBR" - CDT_POLICY_HAD_CHANGED= - fi -} cdt_set_no_retry() { mdts_set_param "" hsm.policy "+NRA" @@ -701,21 +307,6 @@ cdt_clear_mount_state() { mdts_set_param "-P -d" hsm_control "" } -cdt_set_mount_state() { - mdts_set_param "-P" hsm_control "$1" - # set_param -P is asynchronous operation and could race with set_param. - # In such case configs could be retrieved and applied at mgc after - # set_param -P completion. Sleep here to avoid race with set_param. - # We need at least 20 seconds. 10 for mgc_requeue_thread to wake up - # MGC_TIMEOUT_MIN_SECONDS + MGC_TIMEOUT_RAND_CENTISEC(5 + 5) - # and 10 seconds to retrieve config from server. - sleep 20 -} - -cdt_check_state() { - mdts_check_param hsm_control "$1" 20 -} - cdt_disable() { set_test_state disabled disabled } @@ -738,37 +329,6 @@ cdt_restart() { cdt_set_sanity_policy } -needclients() { - local client_count=$1 - if [[ $CLIENTCOUNT -lt $client_count ]]; then - skip "Need $client_count or more clients, have $CLIENTCOUNT" - return 1 - fi - return 0 -} - -path2fid() { - $LFS path2fid $1 | tr -d '[]' - return ${PIPESTATUS[0]} -} - -get_hsm_flags() { - local f=$1 - local u=$2 - local st - - if [[ $u == "user" ]]; then - st=$($RUNAS $LFS hsm_state $f) - else - u=root - st=$($LFS hsm_state $f) - fi - - [[ $? == 0 ]] || error "$LFS hsm_state $f failed (run as $u)" - - st=$(echo $st | cut -f 2 -d" " | tr -d "()," ) - echo $st -} get_hsm_archive_id() { local f=$1 @@ -780,14 +340,6 @@ get_hsm_archive_id() { echo $ar } -check_hsm_flags() { - local f=$1 - local fl=$2 - - local st=$(get_hsm_flags $f) - [[ $st == $fl ]] || error "hsm flags on $f are $st != $fl" -} - check_hsm_flags_user() { local f=$1 local fl=$2 @@ -824,44 +376,6 @@ delete_large_files() { wait_delete_completed } -make_custom_file_for_progress() { - local count=${2:-"39"} - local bs=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1) - bs=${3:-$bs} - - [[ $count -gt 0 ]] || error "Invalid file size" - [[ $bs -gt 0 ]] || error "Invalid stripe size" - - if ! create_file "${1/$DIR/$DIR2}" $bs $count fsync; then - echo "The creation of '${1/$DIR/$DIR2}' failed" >&2 - echo "It might be due to a lack of space in the filesystem" >&2 - delete_large_files >&2 - create_file "${1/$DIR/$DIR2}" $bs $count fsync || - file_creation_failure dd "${1/$DIR/$DIR2}" $? - fi -} - -wait_result() { - local facet=$1 - shift - wait_update --verbose $(facet_active_host $facet) "$@" -} - -wait_request_state() { - local fid=$1 - local request=$2 - local state=$3 - # 4th arg (mdt index) is optional - local mdtidx=${4:-0} - local mds=mds$(($mdtidx + 1)) - - local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" - cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" - - wait_result $mds "$cmd" $state 200 || - error "request on $fid is not $state on $mds" -} - get_request_state() { local fid=$1 local request=$2 @@ -895,7 +409,7 @@ wait_all_done() { [[ -n $fid ]] && cmd+=" | grep '$fid'" cmd+=" | egrep 'WAITING|STARTED'" - wait_result $SINGLEMDS "$cmd" "" $timeout || + wait_update_facet --verbose mds1 "$cmd" "" $timeout || error "requests did not complete" } @@ -974,7 +488,7 @@ get_agent_uuid() { # Lustre mount-point is mandatory and last parameter on # copytool cmd-line. - local mntpnt=$(do_rpc_nodes $agent ps -C $HSMTOOL_BASE -o args= | + local mntpnt=$(do_rpc_nodes $agent libtool execute ps -C $HSMTOOL -o args= | awk '{print $NF}') [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ "parameter" @@ -1003,7 +517,9 @@ cdt_set_sanity_policy # finished requests are quickly removed from list set_hsm_param grace_delay 10 -test_1() { +CLIENT_NIDS=( $($LCTL list_nids all) ) + +test_1A() { # was test_1 mkdir -p $DIR/$tdir chmod 777 $DIR/$tdir @@ -1036,7 +552,7 @@ test_1() { check_hsm_flags_user $f "0x00000000" } -run_test 1 "lfs hsm flags root/non-root access" +run_test 1A "lfs hsm flags root/non-root access" test_1a() { local f=$DIR/$tdir/$tfile @@ -1056,11 +572,8 @@ test_1a() { } run_test 1a "mmap & cat a HSM released file" -test_1b() { - mkdir -p $DIR/$tdir - $LFS setstripe -E 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || - error "failed to set default stripe" - local f=$DIR/$tdir/$tfile +test_1bde_base() { + local f=$1 rm -f $f dd if=/dev/urandom of=$f bs=1M count=1 conv=sync || @@ -1084,7 +597,16 @@ test_1b() { echo "verify restored state: " check_hsm_flags $f "0x00000009" && echo "pass" } -run_test 1b "Archive, Release & Restore composite file" + +test_1b() { + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -S 1M -E 64M -c 2 -E -1 -c 4 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile + + test_1bde_base $f +} +run_test 1b "Archive, Release and Restore composite file" test_1c() { mkdir -p $DIR/$tdir @@ -1112,10 +634,26 @@ test_1c() { [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" - # Test whether setting archive number > 32 results in error. - $LFS hsm_set --exists --archive-id 33 $f && - error "archive number is larger than 32" - check_hsm_flags_user $f "0x00000001" + LOCAL_HSM_ARCHIVE_NUMBER=33 + if [ $(lustre_version_code client) -ge $(version_code 2.11.56) ] && + [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.11.56) ]; then + # lustre in the new version supports unlimited archiveID. + # Test whether setting archive number > 32 is supported + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f || + error "archive ID $LOCAL_HSM_ARCHIVE_NUMBER too large?" + check_hsm_flags_user $f "0x00000001" + + echo "verifying archive number is $LOCAL_HSM_ARCHIVE_NUMBER" + st=$(get_hsm_archive_id $f) + [[ $st == $LOCAL_HSM_ARCHIVE_NUMBER ]] || + error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER" + else + # old client or old mds can only support at most 32 archiveID + # test whether setting archive number > 32 results in error. + $LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f && + error "bitmap archive number is larger than 32" + check_hsm_flags_user $f "0x00000001" + fi # Test whether setting archive number 16 and archived flag. LOCAL_HSM_ARCHIVE_NUMBER=16 @@ -1130,6 +668,58 @@ test_1c() { } run_test 1c "Check setting archive-id in lfs hsm_set" +test_1d() { + [ $MDS1_VERSION -lt $(version_code 2.10.59) ] && + skip "need MDS version at least 2.10.59" + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1M -L mdt -E -1 -c 2 $DIR/$tdir || + error "failed to set default stripe" + local f=$DIR/$tdir/$tfile + + test_1bde_base $f +} +run_test 1d "Archive, Release and Restore DoM file" + +test_1e() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code $SEL_VER) ] && + skip "skipped for lustre < $SEL_VER" + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1G -z 64M -E 10G -z 512M -E -1 -z 1G $DIR/$tdir || + error "failed to set default stripe" + local comp_file=$DIR/$tdir/$tfile + + test_1bde_base $comp_file + + local flg_opts="--comp-start 0 -E 64M --comp-flags init" + local found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "1st component not found" + + flg_opts="--comp-start 64M -E 1G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "2nd component not found" + + flg_opts="--comp-start 1G -E 1G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "3rd component not found" + + flg_opts="--comp-start 1G -E 10G --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "4th component not found" + + flg_opts="--comp-start 10G -E 10G --comp-flags ^init" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "5th component not found" + + flg_opts="--comp-start 10G -E EOF --comp-flags extension" + found=$($LFS find $flg_opts $comp_file | wc -l) + [ $found -eq 1 ] || error "6th component not found" + + sel_layout_sanity $comp_file 6 +} +run_test 1e "Archive, Release and Restore SEL file" + test_2() { local f=$DIR/$tdir/$tfile @@ -1237,7 +827,7 @@ test_8() { } run_test 8 "Test default archive number" -test_9() { +test_9A() { # was test_9 # we do not use the default one to be sure local archive_id=$((HSM_ARCHIVE_NUMBER + 1)) copytool setup --archive-id $archive_id @@ -1255,7 +845,7 @@ test_9() { check_hsm_flags $f "0x00000009" } -run_test 9 "Use of explicit archive number, with dedicated copytool" +run_test 9A "Use of explicit archive number, with dedicated copytool" test_9a() { needclients 3 || return 0 @@ -1283,7 +873,7 @@ run_test 9a "Multiple remote agents" test_10a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir/d1 local f=$DIR/$tdir/$tfile @@ -1292,10 +882,13 @@ test_10a() { error "hsm_archive failed" wait_request_state $fid ARCHIVE SUCCEED - local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || - error "fid $fid not in archive $HSM_ARCHIVE" + local hsm_root="$(copytool_device $SINGLEAGT)" + local archive="$(do_facet $SINGLEAGT \ + find "$hsm_root" -name "$fid" -print0)" + [ -n "$archive" ] || error "fid '$fid' not in archive '$hsm_root'" + echo "Verifying content" - do_facet $SINGLEAGT diff $f $AFILE || error "archived file differs" + do_facet $SINGLEAGT diff $f $archive || error "archived file differs" echo "Verifying hsm state " check_hsm_flags $f "0x00000009" @@ -1303,8 +896,6 @@ test_10a() { local st=$(get_hsm_archive_id $f) [[ $st == $HSM_ARCHIVE_NUMBER ]] || error "Wrong archive number, $st != $HSM_ARCHIVE_NUMBER" - - copytool_cleanup } run_test 10a "Archive a file" @@ -1360,30 +951,30 @@ test_11a() { copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f echo -n "Verifying released state: " check_hsm_flags $f "0x0000000d" local LSZ=$(stat -c "%s" $f) - local ASZ=$(do_facet $SINGLEAGT stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile) + local ASZ=$(do_facet $SINGLEAGT stat -c "%s" "$(hsm_root)/$tdir/$tfile") echo "Verifying imported size $LSZ=$ASZ" [[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ" echo -n "Verifying released pattern: " - local PTRN=$($GETSTRIPE -L $f) + local PTRN=$($LFS getstripe -L $f) echo $PTRN [[ $PTRN == released ]] || error "Is not released" local fid=$(path2fid $f) echo "Verifying new fid $fid in archive" - local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || - error "fid $fid not in archive $HSM_ARCHIVE" + do_facet $SINGLEAGT "[ -f \"$(fid2archive "$fid")\" ]" || + error "No archive for fid $fid" } run_test 11a "Import a file" test_11b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile @@ -1395,25 +986,23 @@ test_11b() { local FILE_HASH=$(md5sum $f) rm -f $f - import_file $fid $f + copytool import $fid $f echo "$FILE_HASH" | md5sum -c [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 11b "Import a deleted file using its FID" test_12a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local f2=$DIR2/$tdir/$tfile echo "Verifying released state: " check_hsm_flags $f2 "0x0000000d" @@ -1425,23 +1014,21 @@ test_12a() { echo "Verifying file state: " check_hsm_flags $f2 "0x00000009" - do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12a "Restore an imported file explicitly" test_12b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f echo "Verifying released state: " check_hsm_flags $f "0x0000000d" @@ -1450,11 +1037,9 @@ test_12b() { echo "Verifying file state after restore: " check_hsm_flags $f "0x00000009" - do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $(hsm_root)/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" - - copytool_cleanup } run_test 12b "Restore an imported file implicitly" @@ -1467,9 +1052,7 @@ test_12c() { local f=$DIR/$tdir/$tfile mkdir -p $DIR/$tdir $LFS setstripe -c 2 "$f" - local fid - fid=$(make_custom_file_for_progress $f 5) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 5) local FILE_CRC=$(md5sum $f) @@ -1511,7 +1094,7 @@ test_12e() { # test needs a running copytool copytool setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive $f || error "archive request failed" @@ -1616,20 +1199,18 @@ run_test 12m "Archive/release/implicit restore" test_12n() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f do_facet $SINGLEAGT cmp /etc/hosts $f || error "Restored file differs" $LFS hsm_release $f || error "release of $f failed" - - copytool_cleanup } run_test 12n "Import/implicit restore/release" @@ -1700,13 +1281,9 @@ test_12p() { } run_test 12p "implicit restore of a file on copytool mount point" -cleanup_test_12q() { - error "cannot umount $MOUNT3 on $SINGLEAGT" -} - test_12q() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.58) ] && - skip "need MDS version at least 2.7.58" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.58) ] && + skip "need MDS version at least 2.7.58" stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT zconf_mount $(facet_host $SINGLEAGT) $MOUNT3 || @@ -1764,46 +1341,40 @@ test_12q() { run_test 12q "file attributes are refreshed after restore" test_13() { - # test needs a running copytool - copytool_setup - - local ARC_SUBDIR="import.orig" - local d="" - local f="" - - # populate directory to be imported - for d in $(seq 1 10); do - local CURR_DIR="$HSM_ARCHIVE/$ARC_SUBDIR/dir.$d" - do_facet $SINGLEAGT mkdir -p "$CURR_DIR" - for f in $(seq 1 10); do - CURR_FILE="$CURR_DIR/$tfile.$f" - # write file-specific data - do_facet $SINGLEAGT \ - "echo d=$d, f=$f, dir=$CURR_DIR, "\ - "file=$CURR_FILE > $CURR_FILE" + local -i i j k=0 + for i in {1..10}; do + local archive_dir="$(hsm_root)"/subdir/dir.$i + + do_facet $SINGLEAGT mkdir -p "$archive_dir" + for j in {1..10}; do + local archive_file="$archive_dir"/file.$j + + do_facet $SINGLEAGT "echo $k > \"$archive_dir\"/file.$j" + k+=1 done done + # import to Lustre - import_file "$ARC_SUBDIR" $DIR/$tdir - # diff lustre content and origin (triggers file restoration) - # there must be 10x10 identical files, and no difference - local cnt_ok=$(do_facet $SINGLEAGT diff -rs $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | grep identical | wc -l) - local cnt_diff=$(do_facet $SINGLEAGT diff -r $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | wc -l) + copytool import "subdir" "$DIR/$tdir" - [ $cnt_diff -eq 0 ] || - error "$cnt_diff imported files differ from read data" - [ $cnt_ok -eq 100 ] || - error "not enough identical files ($cnt_ok != 100)" + # To check the import, the test uses diff with the -r flag + # This is nice, but diff only checks files one by one, and triggering + # an implicit restore for one file at a time will consume as many + # seconds as there are files to compare. To speed this up, a restore + # operation is triggered manually first. + copytool setup + find "$DIR/$tdir"/subdir -type f -exec $LFS hsm_restore {} \; - copytool_cleanup + # Compare the imported data + do_facet $SINGLEAGT \ + diff -r "$(hsm_root)"/subdir "$DIR/$tdir"/subdir || + error "imported files differ from archived data" } run_test 13 "Recursively import and restore a directory" test_14() { # test needs a running copytool - copytool_setup + copytool setup # archive a file local f=$DIR/$tdir/$tfile @@ -1820,22 +1391,18 @@ test_14() { # rebind the archive to the newly created file echo "rebind $fid to $fid2" - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --rebind $fid $fid2 $DIR" || error "could not rebind file" + copytool rebind $fid $fid2 # restore file and compare md5sum local sum2=$(md5sum $f | awk '{print $1}') [[ $sum == $sum2 ]] || error "md5sum mismatch after restore" - - copytool_cleanup } run_test 14 "Rebind archived file to a new fid" test_15() { # test needs a running copytool - copytool_setup + copytool setup # archive files local f=$DIR/$tdir/$tfile @@ -1851,6 +1418,7 @@ test_15() { done wait_all_done $(($count*60)) + stack_trap "rm -f $tmpfile" EXIT :>$tmpfile # delete the files for i in $(seq 1 $count); do @@ -1867,9 +1435,7 @@ test_15() { [[ $nl == $count ]] || error "$nl files in list, $count expected" echo "rebind list of files" - do_facet $SINGLEAGT \ - "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ - --rebind $tmpfile $DIR" || error "could not rebind file list" + copytool rebind "$tmpfile" # restore files and compare md5sum for i in $(seq 1 $count); do @@ -1877,9 +1443,6 @@ test_15() { [[ $sum2 == ${sums[$i]} ]] || error "md5sum mismatch after restore ($sum2 != ${sums[$i]})" done - - rm -f $tmpfile - copytool_cleanup } run_test 15 "Rebind a list of files" @@ -2234,8 +1797,8 @@ test_24c() { # User. create_small_file $file - chown $RUNAS_ID:nobody $file || - error "cannot chown '$file' to '$RUNAS_ID:nobody'" + chown $RUNAS_ID:$GROUP $file || + error "cannot chown '$file' to '$RUNAS_ID:$GROUP'" $RUNAS $LFS hsm_$action $file && error "$action by user should fail" @@ -2258,8 +1821,8 @@ test_24c() { # Other. create_small_file $file - chown nobody:nobody $file || - error "cannot chown '$file' to 'nobody:nobody'" + chown nobody:$GROUP $file || + error "cannot chown '$file' to 'nobody:$GROUP'" $RUNAS $LFS hsm_$action $file && error "$action by other should fail" @@ -2359,16 +1922,43 @@ test_24f() { } run_test 24f "root can archive, release, and restore tar files" +test_24g() { + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local file=$DIR/$tdir/$tfile + local fid + + echo "RUNAS = '$RUNAS'" + + copytool setup + + mkdir -p $DIR/$tdir + chmod ugo+rwx $DIR/$tdir + + echo "Please listen carefully as our options have changed." | tee $file + fid=$(path2fid $file) + chmod ugo+rw $file + + $LFS hsm_archive $file + wait_request_state $fid ARCHIVE SUCCEED + check_hsm_flags $file 0x00000009 # exists archived + + echo "To be electrocuted by your telephone, press #." | $RUNAS tee $file + check_hsm_flags $file 0x0000000b # exists dirty archived +} +run_test 24g "write by non-owner still sets dirty" # LU-11369 + test_25a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f $LFS hsm_set --lost $f @@ -2376,8 +1966,6 @@ test_25a() { local st=$? [[ $st == 1 ]] || error "lost file access should failed (returns $st)" - - copytool_cleanup } run_test 25a "Restore lost file (HS_LOST flag) from import"\ " (Operation not permitted)" @@ -2404,14 +1992,12 @@ test_25b() { run_test 25b "Restore lost file (HS_LOST flag) after release"\ " (Operation not permitted)" -test_26() { +test_26A() { # was test_26 # test needs a running copytool copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2421,7 +2007,7 @@ test_26() { check_hsm_flags $f "0x00000000" } -run_test 26 "Remove the archive of a valid file" +run_test 26A "Remove the archive of a valid file" test_26a() { local raolu=$(get_hsm_param remove_archive_on_last_unlink) @@ -2467,8 +2053,6 @@ test_26a() { rm -f $f2 - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid2 REMOVE SUCCEED assert_request_count $fid REMOVE 0 \ @@ -2497,8 +2081,6 @@ test_26b() { rm -f $f - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid REMOVE WAITING cdt_enable @@ -2553,9 +2135,7 @@ test_26c() { kill -USR1 $pid || error "multiop early exit" # should reach autotest timeout if multiop fails to trap # signal, close file, and exit ... - wait $pid || error - - set_hsm_param remove_archive_on_last_unlink 0 + wait $pid || error "wait PID $PID failed" wait_request_state $fid REMOVE SUCCEED } @@ -2567,7 +2147,7 @@ test_26d() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED @@ -2590,8 +2170,6 @@ test_26d() { mds_evict_client - set_hsm_param remove_archive_on_last_unlink 0 - wait_request_state $fid REMOVE SUCCEED client_up || client_up || true @@ -2603,18 +2181,16 @@ run_test 26d "RAoLU when Client eviction" test_27a() { # test needs a running copytool - copytool_setup + copytool setup create_archive_file $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) $LFS hsm_remove $f [[ $? != 0 ]] || error "Remove of a released file should fail" - - copytool_cleanup } run_test 27a "Remove the archive of an imported file (Operation not permitted)" @@ -2623,9 +2199,7 @@ test_27b() { copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2642,9 +2216,7 @@ test_28() { copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2792,29 +2364,25 @@ test_30a() { needclients 2 || return 0 # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /bin/true $tdir/$tfile local f=$DIR/$tdir/true - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) + stack_trap "cdt_clear_no_retry" EXIT # set no retry action mode cdt_set_no_retry do_node $CLIENT2 $f local st=$? - # cleanup - # remove no try action mode - cdt_clear_no_retry $LFS hsm_state $f [[ $st == 0 ]] || error "Failed to exec a released file" - - copytool_cleanup } run_test 30a "Restore at exec (import case)" @@ -2921,20 +2489,18 @@ restore_and_check_size() { test_31a() { # test needs a running copytool - copytool_setup + copytool setup create_archive_file $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$($LFS path2fid $f) - HSM_ARCHIVE_PURGE=false copytool_setup + copytool setup restore_and_check_size $f $fid local err=$? [[ $err -eq 0 ]] || error "File size changed during restore" - - copytool_cleanup } run_test 31a "Import a large file and check size during restore" @@ -2944,9 +2510,7 @@ test_31b() { copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2964,9 +2528,7 @@ test_31c() { copytool setup local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 33 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1M 39) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -2980,63 +2542,30 @@ test_31c() { run_test 31c "Restore a large aligned file and check size during restore" test_33() { - # test needs a running copytool - copytool setup -b 1 - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f - # to be sure wait_all_done will not be mislead by previous tests - # and ops. - cdt_purge - wait_for_grace_delay - # Also raise grace_delay significantly so the Canceled - # Restore action will stay enough long avail. - local old_grace=$(get_hsm_param grace_delay) - stack_trap "set_hsm_param grace_delay $old_grace" EXIT - set_hsm_param grace_delay 100 + # Prevent restore from completing + copytool_suspend + # Implicit restore md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED + wait_request_state $fid RESTORE STARTED kill -15 $pid - sleep 1 - - # Check restore trigger process was killed - local killed=$(ps -o pid,comm hp $pid >/dev/null) - - $LFS hsm_cancel $f - # instead of waiting+checking both Restore and Cancel ops - # sequentially, wait for both to be finished and then check - # each results. - wait_all_done 100 $fid - local rstate=$(get_request_state $fid RESTORE) - local cstate=$(get_request_state $fid CANCEL) - - if [[ "$rstate" == "CANCELED" ]] ; then - [[ "$cstate" == "SUCCEED" ]] || - error "Restore state is CANCELED and Cancel state " \ - "is not SUCCEED but $cstate" - echo "Restore state is CANCELED, Cancel state is SUCCEED" - elif [[ "$rstate" == "SUCCEED" ]] ; then - [[ "$cstate" == "FAILED" ]] || - error "Restore state is SUCCEED and Cancel state " \ - "is not FAILED but $cstate" - echo "Restore state is SUCCEED, Cancel state is FAILED" - else - error "Restore state is $rstate and Cancel state is $cstate" - fi + copytool_continue - [ -z $killed ] || - error "Cannot kill process waiting for restore ($killed)" + # Check restore trigger process was killed + wait $pid + [ $? -eq 143 ] || error "md5sum was not 'Terminated'" } run_test 33 "Kill a restore waiting process" @@ -3045,28 +2574,32 @@ test_34() { copytool setup -b 1 local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - rm $f || error "rm $f failed" # rm must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 rm "$f" || error "rm $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" wait $pid || error "Restore initiator failed with $?" + + # Check the file was actually deleted + [ ! -f "$f" ] || error "$f was not deleted" } run_test 34 "Remove file during restore" @@ -3076,31 +2609,32 @@ test_35() { local f=$DIR/$tdir/$tfile local f1=$DIR/$tdir/$tfile-1 - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return - + local fid=$(create_empty_file "$f") local fid1=$(copy_file /etc/passwd $f1) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! + wait_request_state $fid RESTORE STARTED - mv $f1 $f || error "mv $f1 $f failed" # mv must not block during restore - wait_request_state $fid RESTORE STARTED + timeout --signal=KILL 1 mv "$f1" "$f" || error "mv $f1 $f failed" + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiatior still running" wait $pid || error "Restore initiator failed with $?" - fid2=$(path2fid $f) + local fid2=$(path2fid $f) [[ $fid2 == $fid1 ]] || error "Wrong fid after mv $fid2 != $fid1" } run_test 35 "Overwrite file during restore" @@ -3110,28 +2644,29 @@ test_36() { copytool setup -b 1 local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + # Prevent restore from completing + copytool_suspend + md5sum $f >/dev/null & local pid=$! - wait_request_state $fid RESTORE STARTED - mv $f $f.new - # rm must not block during restore wait_request_state $fid RESTORE STARTED + # mv must not block during restore + timeout --signal=KILL 10 mv "$f" "$f.new" || + error "mv '$f' '$f.new' failed with rc=$?" + + copytool_continue wait_request_state $fid RESTORE SUCCEED - # check md5sum pgm finished - local there=$(ps -o pid,comm hp $pid >/dev/null) - [[ -z $there ]] || - error "Restore initiator does not exit" + # Check md5sum pgm finished + kill -0 $pid && error "Restore initiator is still running" wait $pid || error "Restore initiator failed with $?" } run_test 36 "Move file during restore" @@ -3220,7 +2755,7 @@ test_52() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED @@ -3245,7 +2780,7 @@ test_53() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/motd $f 1) + local fid=$(create_small_file $f) $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3266,11 +2801,10 @@ test_53() { run_test 53 "Opened for read file on an evicted client should not be set dirty" test_54() { - # test needs a running copytool - copytool setup -b 1 - local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 39 1000000) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3291,11 +2825,10 @@ test_54() { run_test 54 "Write during an archive cancels it" test_55() { - # test needs a running copytool - copytool setup -b 1 - local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 39 1000000) + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3316,13 +2849,10 @@ test_55() { run_test 55 "Truncate during an archive cancels it" test_56() { - # test needs a running copytool - copytool setup -b 1 - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_file "$f" 1MB 39) + + copytool setup -b 1 $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3427,9 +2957,8 @@ run_test 58 "Truncate a released file will trigger restore" test_59() { local fid - local server_version=$(lustre_version_code $SINGLEMDS) - [[ $server_version -lt $(version_code 2.7.63) ]] && - skip "Need MDS version at least 2.7.63" && return + [[ $MDS1_VERSION -lt $(version_code 2.7.63) ]] && + skip "Need MDS version at least 2.7.63" copytool setup $MCREATE $DIR/$tfile || error "mcreate failed" @@ -3445,16 +2974,13 @@ test_60() { # This test validates the fix for LU-4512. Ensure that the -u # option changes the progress reporting interval from the # default (30 seconds) to the user-specified interval. + local f=$DIR/$tdir/$tfile + local fid=$(create_file "$f" 1M 10) + local interval=5 local progress_timeout=$((interval * 4)) - copytool setup -b 1 --update-interval $interval - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 10) - [ $? != 0 ] && skip "not enough free space" && return - local mdtidx=0 local mdt=${MDT_PREFIX}${mdtidx} local mds=mds$((mdtidx + 1)) @@ -3583,9 +3109,7 @@ test_71() { cdt_clear_no_retry local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_small_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -3594,8 +3118,11 @@ test_71() { local expected_fields="event_time data_fid source_fid" expected_fields+=" total_bytes current_bytes" - local START_EVENT - local FINISH_EVENT + local -A events=( + [ARCHIVE_START]=false + [ARCHIVE_FINISH]=false + [ARCHIVE_RUNNING]=false + ) while read event; do # Make sure we're not getting anything from previous events. for field in $expected_fields; do @@ -3608,15 +3135,9 @@ test_71() { fi eval $parsed - if [ $event_type == "ARCHIVE_START" ]; then - START_EVENT=$event - continue - elif [ $event_type == "ARCHIVE_FINISH" ]; then - FINISH_EVENT=$event - continue - elif [ $event_type != "ARCHIVE_RUNNING" ]; then - continue - fi + events["$event_type"]=true + + [ "$event_type" != ARCHIVE_RUNNING ] && continue # Do some simple checking of the progress update events. for expected_field in $expected_fields; do @@ -3625,24 +3146,18 @@ test_71() { fi done - if [ $total_bytes -eq 0 ]; then - error "Expected total_bytes to be > 0" - fi + [ $total_bytes -gt 0 ] || error "Expected total_bytes to be > 0" - # These should be identical throughout an archive - # operation. - if [ $source_fid != $data_fid ]; then + # These should be identical throughout an archive operation + [ $source_fid == $data_fid ] || error "Expected source_fid to equal data_fid" - fi done < <(echo $"$(get_copytool_event_log)") - if [ -z "$START_EVENT" ]; then - error "Copytool failed to send archive start event to FIFO" - fi - - if [ -z "$FINISH_EVENT" ]; then - error "Copytool failed to send archive finish event to FIFO" - fi + # Check we received every type of events we were expecting + for event in "${!events[@]}"; do + ${events["$event"]} || + error "Copytool failed to send '$event' event to FIFO" + done echo "Archive events look OK." } @@ -3653,10 +3168,10 @@ test_72() { local interval=5 # test needs a new running copytool - copytool_cleanup + stack_trap copytool_monitor_cleanup EXIT copytool_monitor_setup - HSMTOOL_UPDATE_INTERVAL=$interval \ - HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup + copytool setup --update-interval $interval --event-fifo \ + "$HSMTOOL_MONITOR_DIR/fifo" local test_file=$HSMTOOL_MONITOR_DIR/file local cmd="dd if=/dev/urandom of=$test_file count=16 bs=1000000 " @@ -3667,7 +3182,7 @@ test_72() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f f=$DIR2/$tdir/$tfile echo "Verifying released state: " check_hsm_flags $f "0x0000000d" @@ -3742,12 +3257,6 @@ test_72() { fi echo "Restore events look OK." - - cdt_clear_no_retry - copytool_cleanup - copytool_monitor_cleanup - - rm -rf $test_dir } run_test 72 "Copytool logs JSON restore events to FIFO" @@ -3844,9 +3353,7 @@ DATA=CEA DATAHEX='[434541]' test_104() { local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ @@ -3856,10 +3363,7 @@ test_104() { [[ "$data1" == "$DATAHEX" ]] || error "Data field in records is ($data1) and not ($DATAHEX)" - # archive the file - copytool setup - - wait_request_state $fid ARCHIVE SUCCEED + cdt_purge } run_test 104 "Copy tool data field" @@ -3915,6 +3419,8 @@ test_106() { run_test 106 "Copytool register/unregister" test_107() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + # test needs a running copytool copytool setup # create and archive file @@ -3971,14 +3477,14 @@ run_test 109 "Policy display/change" test_110a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) cdt_set_non_blocking_restore @@ -4024,14 +3530,14 @@ run_test 110b "Non blocking restore policy (release case)" test_111a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) cdt_set_no_retry @@ -4047,19 +3553,18 @@ test_111a() { # Test result [[ $st == 0 ]] || error "Restore does not failed" - - copytool_cleanup } run_test 111a "No retry policy (import case), restore will error"\ " (No such file or directory)" test_111b() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) + stack_trap cdt_clear_no_retry EXIT cdt_set_no_retry $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -4071,13 +3576,8 @@ test_111b() { wait_request_state $fid RESTORE FAILED local st=$? - # cleanup - cdt_clear_no_retry - # Test result [[ $st == 0 ]] || error "Restore does not failed" - - copytool_cleanup } run_test 111b "No retry policy (release case), restore will error"\ " (No such file or directory)" @@ -4103,34 +3603,72 @@ test_112() { } run_test 112 "State of recorded request" +test_113() { + local file1=$DIR/$tdir/$tfile + local file2=$DIR2/$tdir/$tfile + + local fid=$(create_small_sync_file $file1) + + stack_trap "zconf_umount \"$(facet_host $SINGLEAGT)\" \"$MOUNT3\"" EXIT + zconf_mount "$(facet_host $SINGLEAGT)" "$MOUNT3" || + error "cannot mount '$MOUNT3' on '$SINGLEAGT'" + + copytool setup -m "$MOUNT3" + + do_nodes $(comma_list $(nodes_list)) $LCTL clear + + $LFS hsm_archive $file1 || error "Fail to archive $file1" + wait_request_state $fid ARCHIVE SUCCEED + + $LFS hsm_release $file1 + echo "Verifying released state: " + check_hsm_flags $file1 "0x0000000d" + + multiop_bg_pause $file1 oO_WRONLY:O_APPEND:_w4c || error "multiop failed" + MULTIPID=$! + stat $file2 & + kill -USR1 $MULTIPID + + wait + sync + + local size1=$(stat -c "%s" $file1) + local size2=$(stat -c "%s" $file2) + + [ $size1 -eq $size2 ] || error "sizes are different $size1 $size2" +} +run_test 113 "wrong stat after restore" + test_200() { - # test needs a running copytool + local f=$DIR/$tdir/$tfile + local fid=$(create_empty_file "$f") + copytool setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + # Prevent archive from completing + copytool_suspend - # test with cdt on is made in test_221 - cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f # wait archive to register at CDT - wait_request_state $fid ARCHIVE WAITING - $LFS hsm_cancel $f - cdt_enable + wait_request_state $fid ARCHIVE STARTED + + # Cancel the archive + $LFS hsm_cancel "$f" + wait_request_state $fid ARCHIVE CANCELED + + copytool_continue wait_request_state $fid CANCEL SUCCEED } run_test 200 "Register/Cancel archive" test_201() { # test needs a running copytool - copytool_setup + copytool setup local f=$DIR/$tdir/$tfile create_archive_file $tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) # test with cdt on is made in test_222 @@ -4142,34 +3680,30 @@ test_201() { cdt_enable wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - - copytool_cleanup } run_test 201 "Register/Cancel restore" test_202() { + local f=$DIR/$tdir/$tfile + local fid=$(create_empty_file "$f") + # test needs a running copytool copytool setup - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - cdt_disable + copytool_suspend $LFS hsm_remove $f # wait remove to register at CDT - wait_request_state $fid REMOVE WAITING + wait_request_state $fid REMOVE STARTED $LFS hsm_cancel $f - cdt_enable + wait_request_state $fid REMOVE CANCELED } run_test 202 "Register/Cancel remove" -test_220() { +test_220A() { # was test_220 # test needs a running copytool copytool setup @@ -4178,18 +3712,15 @@ test_220() { local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - changelog_cleanup - - local target=0x0 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" + changelog_find -type HSM -target-fid $fid -flags 0x0 || + error "The expected changelog was not emitted" } -run_test 220 "Changelog for archive" +run_test 220A "Changelog for archive" test_220a() { # test needs a running copytool @@ -4200,7 +3731,7 @@ test_220a() { local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register # block copytool operations to allow for HSM request to be # submitted and file be unlinked (CDT will find object removed) @@ -4217,61 +3748,53 @@ test_220a() { wait_request_state $fid ARCHIVE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - changelog_cleanup - # HE_ARCHIVE|ENOENT - local target=0x2 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" + changelog_find -type HSM -target-fid $fid -flags 0x2 || + error "The expected changelog was not emitted" } run_test 220a "Changelog for failed archive" test_221() { - # test needs a running copytool - copytool setup -b 1 - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") - changelog_setup + copytool setup -b 1 + changelog_register + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED + $LFS hsm_cancel $f wait_request_state $fid ARCHIVE CANCELED - wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) + copytool_continue + wait_request_state $fid CANCEL SUCCEED - local target=0x7d - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" + changelog_find -type HSM -target-fid $fid -flags 0x7d || + error "The expected changelog was not emitted" } run_test 221 "Changelog for archive canceled" test_222a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) - changelog_setup + changelog_register $LFS hsm_restore $f wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x80 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0x80 || + error "The expected changelog was not emitted" } run_test 222a "Changelog for explicit restore" @@ -4283,7 +3806,7 @@ test_222b() { local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -4292,25 +3815,23 @@ test_222b() { wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0x80 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" + changelog_find -type HSM -target-fid $fid -flags 0x80 || + error "The expected changelog was not emitted" } run_test 222b "Changelog for implicit restore" test_222c() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir copy2archive /etc/passwd $tdir/$tfile local f=$DIR/$tdir/$tfile - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) - changelog_setup + changelog_register # block copytool operations to allow for HSM request to be # submitted and file be unlinked (CDT will find object removed) @@ -4327,25 +3848,21 @@ test_222c() { wait_request_state $fid RESTORE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - # HE_RESTORE|ENOENT - local target=0x82 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0x82 || + error "The expected changelog was not emitted" } run_test 222c "Changelog for failed explicit restore" test_222d() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f @@ -4355,26 +3872,22 @@ test_222d() { wait_request_state $fid RESTORE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - # HE_RESTORE|ENOENT - local target=0x82 - [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0x82 || + error "The expected changelog was not emitted" } run_test 222d "Changelog for failed implicit restore" test_223a() { # test needs a running copytool - copytool_setup + copytool setup -b 1 local f=$DIR/$tdir/$tfile create_archive_file $tdir/$tfile - changelog_setup + changelog_register - import_file $tdir/$tfile $f + copytool import $tdir/$tfile $f local fid=$(path2fid $f) $LFS hsm_restore $f @@ -4383,46 +3896,39 @@ test_223a() { wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - - local target=0xfd - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + changelog_find -type HSM -target-fid $fid -flags 0xfd || + error "The expected changelog was not emitted" } run_test 223a "Changelog for restore canceled (import case)" test_223b() { - # test needs a running copytool - copytool setup -b 1 - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup -b 1 + changelog_register - changelog_setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f + + # Prevent restore from completing + copytool_suspend $LFS hsm_restore $f wait_request_state $fid RESTORE STARTED + $LFS hsm_cancel $f wait_request_state $fid RESTORE CANCELED - wait_request_state $fid CANCEL SUCCEED - - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) - local target=0xfd - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + copytool_continue + wait_request_state $fid CANCEL SUCCEED - copytool_cleanup + changelog_find -type HSM -target-fid $fid -flags 0xfd || + error "The expected changelog was not emitted" } run_test 223b "Changelog for restore canceled (release case)" -test_224() { +test_224A() { # was test_224 # test needs a running copytool copytool setup @@ -4431,31 +3937,28 @@ test_224() { local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_remove $f wait_request_state $fid REMOVE SUCCEED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1) - - local target=0x200 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + changelog_find -type HSM -target-fid $fid -flags 0x200 || + error "The expected changelog was not emitted" } -run_test 224 "Changelog for remove" +run_test 224A "Changelog for remove" test_224a() { # test needs a running copytool - copytool_setup + copytool setup mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -4476,51 +3979,40 @@ test_224a() { wait_request_state $fid REMOVE FAILED - local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1) - - # HE_REMOVE|ENOENT - local target=0x202 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" - - cleanup + # HE_REMOVE|ENOENT=0x202 + changelog_find -type HSM -target-fid $fid -flags 0x202 || + error "The expected changelog was not emitted" } run_test 224a "Changelog for failed remove" test_225() { - # test needs a running copytool - copytool setup - # test is not usable because remove request is too fast # so it is always finished before cancel can be done ... echo "Test disabled" - copytool_cleanup return 0 + # test needs a running copytool + copytool setup + local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - # if cdt is on, it can serve too quickly the request - cdt_disable + # Prevent restore from completing + copytool_suspend $LFS hsm_remove $f + $LFS hsm_cancel $f - cdt_enable wait_request_state $fid REMOVE CANCELED - wait_request_state $fid CANCEL SUCCEED - flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) - local flags=$($LFS changelog ${MDT[0]} | grep HSM | grep $fid | - tail -n 1 | awk '{print $5}') + copytool_continue + wait_request_state $fid CANCEL SUCCEED - local target=0x27d - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + changelog_find -type HSM -target-fid $fid -flags 0x27d + error "The expected changelog was not emitted" } run_test 225 "Changelog for remove canceled" @@ -4537,7 +4029,7 @@ test_226() { local fid2=$(copy_file /etc/passwd $f2) copy_file /etc/passwd $f3 - changelog_setup + changelog_register $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f1 wait_request_state $fid1 ARCHIVE SUCCEED @@ -4546,74 +4038,57 @@ test_226() { rm $f1 || error "rm $f1 failed" - local flags=$(changelog_get_flags ${MDT[0]} UNLNK $fid1) - - local target=0x3 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + changelog_dump + changelog_find -type UNLNK -target-fid $fid1 -flags 0x3 || + error "The expected changelog was not emitted" mv $f3 $f2 || error "mv $f3 $f2 failed" - flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) - - target=0x3 - [[ $flags == $target ]] || - error "Changelog flag is $flags not $target" + changelog_find -type RENME -target-fid $fid2 -flags 0x3 || + error "The expected changelog was not emitted" } run_test 226 "changelog for last rm/mv with exiting archive" -check_flags_changes() { - local f=$1 - local fid=$2 - local hsm_flag=$3 - local fst=$4 - local cnt=$5 - +# This is just a utility function to clarify what test_227 does +__test_227() +{ local target=0x280 - $LFS hsm_set --$hsm_flag $f || - error "Cannot set $hsm_flag on $f" - local flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) - local seen=${#flags[*]} - cnt=$((fst + cnt)) - [[ $seen == $cnt ]] || - error "set $hsm_flag: Changelog events $seen != $cnt" - [[ ${flags[$((cnt - 1))]} == $target ]] || - error "set $hsm_flag: Changelog flags are "\ - "${flags[$((cnt - 1))]} not $target" - - $LFS hsm_clear --$hsm_flag $f || - error "Cannot clear $hsm_flag on $f" - flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) - seen=${#flags[*]} - cnt=$(($cnt + 1)) - [[ $cnt == $seen ]] || - error "clear $hsm_flag: Changelog events $seen != $cnt" - - [[ ${flags[$((cnt - 1))]} == $target ]] || - error "clear $hsm_flag: Changelog flag is "\ - "${flags[$((cnt - 1))]} not $target" + + "$LFS" "$action" --$flag "$file" || + error "Cannot ${action#hsm_} $flag on '$file'" + + # Only one changelog should be produced + local entries="$(changelog_find -type HSM -target-fid $fid)" + [ $(wc -l <<< "$entries") -eq $((++count)) ] || + error "lfs $action --$flag '$file' produced more than one" \ + "changelog record" + + # Parse the last changelog record + local entry="$(tail -n 1 <<< "$entries")" + eval local -A changelog=$(changelog2array $entry) + + # Also check the flags match what is expected + [[ ${changelog[flags]} == $target ]] || + error "Changelog flag is '${changelog[flags]}', not $target" } test_227() { - # test needs a running copytool - copytool setup - changelog_setup + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + local count=0 - mkdir -p $DIR/$tdir - typeset -a flags + changelog_register - for i in norelease noarchive exists archived - do - local f=$DIR/$tdir/$tfile-$i - local fid=$(copy_file /etc/passwd $f) - check_flags_changes $f $fid $i 0 1 - done + for flag in norelease noarchive exists archived lost; do + if [ "$flag" == lost ]; then + # The flag "lost" only works on an archived file + "$LFS" hsm_set --archived "$file" + ((count++)) + fi - f=$DIR/$tdir/$tfile---lost - fid=$(copy_file /etc/passwd $f) - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f - wait_request_state $fid ARCHIVE SUCCEED - check_flags_changes $f $fid lost 3 1 + action="hsm_set" __test_227 + action="hsm_clear" __test_227 + done } run_test 227 "changelog when explicit setting of HSM flags" @@ -4653,67 +4128,88 @@ test_228() { run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse" test_250() { - # test needs a running copytool - copytool setup + local file="$DIR/$tdir/$tfile" - mkdir -p $DIR/$tdir - local maxrequest=$(get_hsm_param max_requests) - local rqcnt=$(($maxrequest * 3)) - local i="" + # set max_requests to allow one request of each type to be started (3) + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + # speed up test + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 - cdt_disable - for i in $(seq -w 1 $rqcnt); do - rm -f $DIR/$tdir/$i - dd if=/dev/urandom of=$DIR/$tdir/$i bs=1M count=10 conv=fsync + # send 1 requests of each kind twice + copytool setup + # setup the files + for action in archive restore remove; do + local filepath="$file"-to-$action + local fid=$(create_empty_file "$filepath") + local fid2=$(create_empty_file "$filepath".bis) + + if [ "$action" != archive ]; then + "$LFS" hsm_archive "$filepath" + wait_request_state $fid ARCHIVE SUCCEED + "$LFS" hsm_archive "$filepath".bis + wait_request_state $fid2 ARCHIVE SUCCEED + fi + if [ "$action" == restore ]; then + "$LFS" hsm_release "$filepath" + "$LFS" hsm_release "$filepath".bis + fi done - # we do it in 2 steps, so all requests arrive at the same time - for i in $(seq -w 1 $rqcnt); do - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tdir/$i + + # suspend the copytool to prevent requests from completing + stack_trap "copytool_continue" EXIT + copytool_suspend + + # send `max_requests' requests (one of each kind) + for action in archive restore remove; do + filepath="$file"-to-$action + "$LFS" hsm_${action} "$filepath" + wait_request_state $(path2fid "$filepath") "${action^^}" STARTED done - cdt_enable - local cnt=$rqcnt - local wt=$rqcnt - while [[ $cnt != 0 || $wt != 0 ]]; do - sleep 1 - cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.actions |\ - grep STARTED | grep -v CANCEL | wc -l") - [[ $cnt -le $maxrequest ]] || - error "$cnt > $maxrequest too many started requests" - wt=$(do_facet $SINGLEMDS "$LCTL get_param\ - $HSM_PARAM.actions |\ - grep WAITING | wc -l") - echo "max=$maxrequest started=$cnt waiting=$wt" + + # send another batch of requests + for action in archive restore remove; do + "$LFS" hsm_${action} "$file-to-$action".bis done + # wait for `loop_period' seconds to make sure the coordinator has time + # to register those, even though it should not + sleep 1 + + # only the first batch of request should be started + local -i count + count=$(do_facet $SINGLEMDS "$LCTL" get_param -n $HSM_PARAM.actions | + grep -c STARTED) + + ((count == 3)) || + error "expected 3 STARTED requests, found $count" } run_test 250 "Coordinator max request" test_251() { - # test needs a running copytool - copytool setup -b 1 - local f=$DIR/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 103 1048576) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") cdt_disable # to have a short test local old_to=$(get_hsm_param active_request_timeout) - set_hsm_param active_request_timeout 4 + set_hsm_param active_request_timeout 1 # to be sure the cdt will wake up frequently so # it will be able to cancel the "old" request local old_loop=$(get_hsm_param loop_period) - set_hsm_param loop_period 2 + set_hsm_param loop_period 1 cdt_enable - # clear locks to avoid extra delay caused by flush/cancel - # and thus prevent early copytool death to timeout. - cancel_lru_locks osc + copytool setup + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED - sleep 5 + + # Let the request timeout wait_request_state $fid ARCHIVE CANCELED set_hsm_param active_request_timeout $old_to @@ -4722,36 +4218,27 @@ test_251() { run_test 251 "Coordinator request timeout" test_252() { - # test needs a running copytool - copytool setup -b 1 - - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile - local fid=$(make_custom_file_for_progress $f 103 1048576) + local fid=$(create_empty_file "$f") - cdt_disable # to have a short test - local old_timeout=$(get_hsm_param active_request_timeout) - stack_trap "set_hsm_param active_request_timeout $old_timeout" EXIT - set_hsm_param active_request_timeout 20 - # to be sure the cdt will wake up frequently so - # it will be able to cancel the "old" request - local old_loop_period=$(get_hsm_param loop_period) - stack_trap "set_hsm_param loop_period $old_loop_period" EXIT - set_hsm_param loop_period 2 - cdt_enable + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 - # clear locks to avoid extra delay caused by flush/cancel - # and thus prevent early copytool death to timeout. - cancel_lru_locks osc + copytool setup + # Prevent archive from completing + copytool_suspend $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE STARTED - rm -f $f + rm -f "$f" + + stack_trap "set_hsm_param active_request_timeout \ + $(get_hsm_param active_request_timeout)" EXIT + set_hsm_param active_request_timeout 1 - # wait but less than active_request_timeout+grace_delay - sleep 25 wait_request_state $fid ARCHIVE CANCELED + copytool_continue } run_test 252 "Timeout'ed running archive of a removed file should be canceled" @@ -4791,8 +4278,8 @@ run_test 253 "Check for wrong file size after release" test_254a() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.56) ] && - skip "need MDS version at least 2.10.56" && return + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" # Check that the counters are initialized to 0 local count @@ -4809,15 +4296,15 @@ run_test 254a "Request counters are initialized to zero" test_254b() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.56) ] && - skip "need MDS version at least 2.10.56" && return + [ $MDS1_VERSION -lt $(version_code 2.10.56) ] && + skip "need MDS version at least 2.10.56" # The number of request to launch (at least 32) local request_count=$((RANDOM % 32 + 32)) printf "Will launch %i requests of each type\n" "$request_count" # Launch a copytool to process requests - copytool_setup + copytool setup # Set hsm.max_requests to allow starting all requests at the same time stack_trap \ @@ -4879,7 +4366,274 @@ test_254b() } run_test 254b "Request counters are correctly incremented and decremented" +test_255() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + local file="$DIR/$tdir/$tfile" + local fid=$(create_empty_file "$file") + + # How do you make sure the coordinator has consumed any outstanding + # event, without triggering an event yourself? + # + # You wait for a request to disappear from the coordinator's llog. + + # Warning: the setup represents 90% of this test + + # Create and process an HSM request + copytool setup + "$LFS" hsm_archive "$file" + wait_request_state $fid ARCHIVE SUCCEED + + kill_copytools + wait_copytools || error "failed to stop copytools" + + # Launch a new HSM request + rm "$file" + create_empty_file "$file" + "$LFS" hsm_archive "$file" + + cdt_shutdown + + # Have the completed request be removed as soon as the cdt wakes up + stack_trap "set_hsm_param grace_delay $(get_hsm_param grace_delay)" EXIT + set_hsm_param grace_delay 1 + # (Hopefully, time on the MDS will behave nicely) + do_facet $SINGLEMDS sleep 2 & + + # Increase `loop_period' as a mean to prevent the coordinator from + # waking itself up to do some housekeeping. + stack_trap "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + + wait $! || error "waiting failed" + cdt_enable + wait_request_state $fid ARCHIVE "" + # The coordinator will not wake up on its own for ~`loop_period' secs... + + # ... Unless a copytool registers. Now the real test begins + copytool setup + wait_request_state $(path2fid "$file") ARCHIVE SUCCEED +} +run_test 255 "Copytool registration wakes the coordinator up" + +# tests 260[a-c] rely on the parsing of the copytool's log file, they might +# break in the future because of that. +test_260a() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup + "$LFS" hsm_archive "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done + + error "Too many ARCHIVE requests were run before the RESTORE request" +} +run_test 260a "Restore request have priority over other requests" + +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260b() +{ + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "need MDS version 2.11.56 or later" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Send several archive requests + for file in "${files[@]:1}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Launch a copytool + copytool setup + copytool setup --archive-id 2 + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:1}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && return + done + + error "Too many ARCHIVE requests were run before the RESTORE request" +} +run_test 260b "Restore request have priority over other requests" + +# This test is very much tied to the implementation of the current priorisation +# mechanism in the coordinator. It might not make sense to keep it in the future +test_260c() +{ + [ $MDS1_VERSION -lt $(version_code 2.12.0) ] && + skip "Need MDS version at least 2.12.0" + + local -a files=("$DIR/$tdir/$tfile".{0..15}) + local file + + for file in "${files[@]}"; do + create_small_file "$file" + done + + # Set a few hsm parameters + stack_trap \ + "set_hsm_param loop_period $(get_hsm_param loop_period)" EXIT + set_hsm_param loop_period 1000 + stack_trap \ + "set_hsm_param max_requests $(get_hsm_param max_requests)" EXIT + set_hsm_param max_requests 3 + + # Release one file + copytool setup --archive-id 2 + "$LFS" hsm_archive --archive 2 "${files[0]}" + wait_request_state "$(path2fid "${files[0]}")" ARCHIVE SUCCEED + "$LFS" hsm_release "${files[0]}" + + # Stop the copytool + kill_copytools + wait_copytools || error "copytools failed to stop" + + # Force the next coordinator run to do housekeeping + cdt_shutdown + cdt_enable + + "$LFS" hsm_archive "${files[1]}" + + # Launch a copytool + copytool setup + copytool setup --archive-id 2 + + wait_request_state "$(path2fid "${files[1]}")" ARCHIVE SUCCEED + # The coordinator just did a housekeeping run it won't do another one + # for around `loop_period' seconds => requests will not be reordered + # if it costs too much (ie. when the coordinator has to discard a whole + # hal) + + # Send several archive requests + for file in "${files[@]:2}"; do + "$LFS" hsm_archive "$file" + done + + # Send one restore request + "$LFS" hsm_restore "${files[0]}" + + # Wait for all the requests to complete + wait_request_state "$(path2fid "${files[0]}")" RESTORE SUCCEED + for file in "${files[@]:2}"; do + wait_request_state "$(path2fid "$file")" ARCHIVE SUCCEED + done + + # Collect the actions in the order in which the copytool processed them + local -a actions=( + $(do_facet "$SINGLEAGT" grep -o '\"RESTORE\\|ARCHIVE\"' \ + "$(copytool_logfile "$SINGLEAGT")") + ) + + printf '%s\n' "${actions[@]}" + + local action + for action in "${actions[@]:0:3}"; do + [ "$action" == RESTORE ] && + error "Restore requests should not be prioritised" \ + "unless the coordinator is doing housekeeping" + done + return 0 +} +run_test 260c "Requests are not reordered on the 'hot' path of the coordinator" + test_300() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + # the only way to test ondisk conf is to restart MDS ... echo "Stop coordinator and remove coordinator state at mount" # stop coordinator @@ -4907,6 +4661,8 @@ test_300() { run_test 300 "On disk coordinator state kept between MDT umount/mount" test_301() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + local ai=$(get_hsm_param default_archive_id) local new=$((ai + 1)) @@ -4922,6 +4678,8 @@ test_301() { run_test 301 "HSM tunnable are persistent" test_302() { + [ "$CLIENTONLY" ] && skip "CLIENTONLY mode" && return + local ai=$(get_hsm_param default_archive_id) local new=$((ai + 1)) @@ -4958,9 +4716,9 @@ test_400() { local dir_mdt1=$DIR/$tdir/mdt1 # create 1 dir per MDT - stack_trap "rm -rf $dir_mdt0" + stack_trap "rm -rf $dir_mdt0" EXIT $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" - stack_trap "rm -rf $dir_mdt1" + stack_trap "rm -rf $dir_mdt1" EXIT $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" # create 1 file in each MDT @@ -5136,10 +4894,10 @@ test_405() { local fid3=$(create_small_sync_file $striped_dir/${tfile}_2) local fid4=$(create_small_sync_file $striped_dir/${tfile}_3) - local idx1=$($LFS getstripe -M $striped_dir/${tfile}_0) - local idx2=$($LFS getstripe -M $striped_dir/${tfile}_1) - local idx3=$($LFS getstripe -M $striped_dir/${tfile}_2) - local idx4=$($LFS getstripe -M $striped_dir/${tfile}_3) + local idx1=$($LFS getstripe -m $striped_dir/${tfile}_0) + local idx2=$($LFS getstripe -m $striped_dir/${tfile}_1) + local idx3=$($LFS getstripe -m $striped_dir/${tfile}_2) + local idx4=$($LFS getstripe -m $striped_dir/${tfile}_3) # check that compound requests are shunt to the rights MDTs $LFS hsm_archive $striped_dir/${tfile}_0 $striped_dir/${tfile}_1 \ @@ -5170,8 +4928,8 @@ run_test 405 "archive and release under striped directory" test_406() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] && - skip "need MDS version at least 2.7.64" && return 0 + [ $MDS1_VERSION -lt $(version_code 2.7.64) ] && + skip "need MDS version at least 2.7.64" local fid local mdt_index @@ -5186,7 +4944,7 @@ test_406() { $LFS hsm_release $DIR/$tdir/$tfile # Should migrate $tdir but not $tfile. - $LFS mv -M1 $DIR/$tdir && + $LFS migrate -m1 $DIR/$tdir && error "migrating HSM an archived file should fail" $LFS hsm_restore $DIR/$tdir/$tfile @@ -5198,10 +4956,10 @@ test_406() { cat $DIR/$tdir/$tfile > /dev/null || error "cannot read $DIR/$tdir/$tfile" - $LFS mv -M1 $DIR/$tdir || + $LFS migrate -m1 $DIR/$tdir || error "cannot complete migration after HSM remove" - mdt_index=$($LFS getstripe -M $DIR/$tdir) + mdt_index=$($LFS getstripe -m $DIR/$tdir) if ((mdt_index != 1)); then error "expected MDT index 1, got $mdt_index" fi @@ -5226,17 +4984,11 @@ test_406() { run_test 406 "attempting to migrate HSM archived files is safe" test_407() { - needclients 2 || return 0 - # test needs a running copytool - copytool setup - - mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile local f2=$DIR2/$tdir/$tfile - local fid - fid=$(make_custom_file_for_progress $f 39 1000000) - [ $? != 0 ] && skip "not enough free space" && return + local fid=$(create_empty_file "$f") + + copytool setup $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED @@ -5245,6 +4997,9 @@ test_407() { #define OBD_FAIL_MDS_HSM_CDT_DELAY 0x164 do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x164 + # Prevent restore from completing + copytool_suspend + md5sum $f & # 1st request holds layout lock while appropriate # RESTORE record is still not added to llog @@ -5259,21 +5014,374 @@ test_407() { do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"& fail $SINGLEMDS + copytool_continue wait_request_state $fid RESTORE SUCCEED } run_test 407 "Check for double RESTORE records in llog" test_500() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6.92) ] && - skip "HSM migrate is not supported" && return + [ $MDS1_VERSION -lt $(version_code 2.6.92) ] && + skip "HSM migrate is not supported" test_mkdir -p $DIR/$tdir - llapi_hsm_test -d $DIR/$tdir || error "One llapi HSM test failed" + + if [ $(lustre_version_code client) -lt $(version_code 2.11.56) ] || + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.11.56) ]; + then + llapi_hsm_test -d $DIR/$tdir -b || + error "One llapi HSM test failed" + else + llapi_hsm_test -d $DIR/$tdir || + error "One llapi HSM test failed" + fi } run_test 500 "various LLAPI HSM tests" -copytool_cleanup +test_600() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + chmod 777 $DIR/$tdir + $RUNAS touch $f || error "touch $f failed as $RUNAS_ID" + local fid=$(path2fid $f) + + local entry + entry=$(changelog_find -type CREAT -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID") || + error "No matching CREAT entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check its NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" +} +run_test 600 "Changelog fields 'u=' and 'nid='" + +test_601() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + cat $f || error "cat $f failed" + + changelog_find -type OPEN -target-fid $fid -mode "r--" || + error "No matching OPEN entry" +} +run_test 601 "OPEN Changelog entry" + +test_602() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + cat $f || error "cat $f failed" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + echo f > $f || error "write $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + # remove OPEN from changelog_mask + changelog_chmask "-OPEN" + + changelog_clear + changelog_dump + cat $f || error "cat $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + echo f > $f || error "write $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" +} +run_test 602 "Changelog record CLOSE only if open+write or OPEN recorded" + +test_603() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + setfattr -n user.xattr1 -v "value1" $f || error "setfattr $f failed" + + changelog_clear + getfattr -n user.xattr1 $f || error "getfattr $f failed" + + changelog_find -type GXATR -target-fid $fid -xattr "user.xattr1" || + error "No matching GXATR entry" +} +run_test 603 "GETXATTR Changelog entry" + +test_604() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + local procname="mdd.$FSNAME-MDT0000.changelog_deniednext" + local timeout + timeout="$(do_facet mds1 "$LCTL" get_param -n "$procname")" + stack_trap "do_facet mds1 '$LCTL' set_param '$procname=$timeout'" EXIT + do_facet mds1 lctl set_param "$procname=20" + + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + chmod 600 $f + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed" + changelog_dump + + local entry + entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID" -mode "r--") || + error "No matching NOPEN entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check its NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f2 by user $RUNAS_ID should have failed" + changelog_dump + + changelog_find -type NOPEN -target-fid $fid && + error "There should be no NOPEN entry" + + # Sleep for `changelog_deniednext` seconds + sleep 20 + + changelog_clear + changelog_dump + $RUNAS cat $f2 && error "cat $f by user $RUNAS_ID should have failed" + changelog_dump + + entry=$(changelog_find -type NOPEN -target-fid $fid -uid "$RUNAS_ID" \ + -gid "$RUNAS_GID" -mode "r--") || + error "No matching NOPEN entry" + + # Parse the changelog + eval local -A changelog=$(changelog2array $entry) + local nid="${changelog[nid]}" + + # Check the NID + echo "Got NID '$nid'" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any client NID:" \ + "${CLIENT_NIDS[@]}" +} +run_test 604 "NOPEN Changelog entry" + +test_605() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + local f2=$DIR2/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + touch $f || error "touch $f failed" + local fid=$(path2fid $f) + + changelog_clear + changelog_dump + exec 3<> $f || error "open $f failed" + changelog_dump + + local entry + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_clear + changelog_dump + exec 4<> $f || error "open $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid && + error "There should be no OPEN entry" + + exec 4>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + # access in rw, so different access mode should generate entries + cat $f || error "cat $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + # same access as first one, should not generate new entries + exec 4<> $f || error "open $f failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid && + error "There should be no OPEN entry" + + exec 4>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid && + error "There should be no CLOSE entry" + + changelog_clear + changelog_dump + # access by different user should generate new entries + $RUNAS cat $f || error "cat $f by user $RUNAS_ID failed" + changelog_dump + + changelog_find -type OPEN -target-fid $fid || error "No OPEN entry" + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" + + changelog_clear + changelog_dump + exec 3>&- || error "close $f failed" + changelog_dump + + changelog_find -type CLOSE -target-fid $fid || error "No CLOSE entry" +} +run_test 605 "Test OPEN and CLOSE rate limit in Changelogs" + +test_606() { + [ $MDS1_VERSION -lt $(version_code 2.10.58) ] && + skip "need MDS version at least 2.10.58" + + local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null") + llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader} + [ -z $(do_facet mgs ls -d $llog_reader 2> /dev/null) ] && + skip_env "missing llog_reader" && return + local fstype=$(facet_fstype mds1) + + mkdir -p $DIR/$tdir + + local f=$DIR/$tdir/$tfile + + changelog_register + # set changelog_mask to ALL + changelog_chmask "ALL" + + chmod 777 $DIR/$tdir + $RUNAS touch $f || error "touch $f failed as $RUNAS_ID" + local fid=$(path2fid $f) + rm $f || error "rm $f failed" + + local mntpt=$(facet_mntpt mds1) + local pass=true + local entry + + #remount mds1 as ldiskfs or zfs type + stack_trap "stop mds1; start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS" EXIT + stop mds1 || error "stop mds1 failed" + mount_fstype mds1 || error "remount mds1 failed" + + for ((i = 0; i < 1; i++)); do + do_facet mds1 $llog_reader $mntpt/changelog_catalog + local cat_file=$(do_facet mds1 $llog_reader \ + $mntpt/changelog_catalog | awk \ + '{match($0,"path=([^ ]+)",a)}END{print a[1]}') + [ -n "$cat_file" ] || error "no catalog file" + + entry=$(do_facet mds1 $llog_reader $mntpt/$cat_file | + awk "/CREAT/ && /target:\[$fid\]/ {print}") + [ -n "$entry" ] || error "no CREAT entry" + done + + local uidgid=$(echo $entry | + sed 's+.*\ user:\([0-9][0-9]*:[0-9][0-9]*\)\ .*+\1+') + [ -n "$uidgid" ] || error "uidgid is empty" + echo "Got UID/GID $uidgid" + [ "$uidgid" = "$RUNAS_ID:$RUNAS_GID" ] || + error "uidgid '$uidgid' != '$RUNAS_ID:$RUNAS_GID'" + local nid=$(echo $entry | + sed 's+.*\ nid:\(\S\S*@\S\S*\)\ .*+\1+') + [ -n "$nid" ] || error "nid is empty" + echo "Got NID $nid" + [ -n "$nid" ] && [[ "${CLIENT_NIDS[*]}" =~ $nid ]] || + error "nid '$nid' does not match any NID ${CLIENT_NIDS[@]}" +} +run_test 606 "llog_reader groks changelog fields" complete $SECONDS check_and_cleanup_lustre