X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity-hsm.sh;h=4ef21eb60461cb05e31635d8acf7aee9361510cd;hb=3f5fc6412d3ae2d3a57f8fdb8a457f35e9d9576e;hp=62164fb636d3f201aa27ba256bb97b68e5888541;hpb=3ca3500252765e7afbfe99cbbb7a0e7eb9df01bc;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 62164fb..4ef21eb 100644 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -11,11 +11,11 @@ SRCDIR=$(dirname $0) export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin ONLY=${ONLY:-"$*"} -# bug number for skipped test: +# bug number for skipped test: 3815 +ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36" +# bug number for skipped test:4178 4176 +ALWAYS_EXCEPT="$ALWAYS_EXCEPT 200 221 223b 31a" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -# skip test cases failed before landing - Jinshan -ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 12a 12b 12n 13 24 30a 31a 34 35 36 58 59" -ALWAYS_EXCEPT="$ALWAYS_EXCEPT 110a 200 201 221 222a 223a 223b 225" LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} @@ -30,9 +30,9 @@ MCREATE=${MCREATE:-mcreate} MOUNT_2=${MOUNT_2:-"yes"} FAIL_ON_ERROR=false -if [ $MDSCOUNT -ge 2 ]; then - skip_env "Only run with single MDT for now" && exit -fi +# script only handles up to 10 MDTs (because of MDT_PREFIX) +[ $MDSCOUNT -gt 9 ] && + error "script cannot handle more than 9 MDTs, please fix" && exit check_and_setup_lustre @@ -41,112 +41,269 @@ if [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.4.53) ]]; then fi # $RUNAS_ID may get set incorrectly somewhere else -[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && - error "\$RUNAS_ID set to 0, but \$UID is also 0!" - +if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then + skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit +fi check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS build_test_filter -# the standard state when starting a test is -# - no copytool -# - MOUNT2 done -# as some test changes the default, we need to re-make it +# +# In order to test multiple remote HSM agents, a new facet type named "AGT" and +# the following associated variables are added: +# +# AGTCOUNT: number of agents +# AGTDEV{N}: target HSM mount point (root path of the backend) +# agt{N}_HOST: hostname of the agent agt{N} +# SINGLEAGT: facet of the single agent +# +# The number of agents is initialized as the number of remote client nodes. +# By default, only single copytool is started on a remote client/agent. If there +# was no remote client, then the copytool will be started on the local client. +# +init_agt_vars() { + local n + local agent + + export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))} + [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1 + + export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP} + if [[ $CLIENTCOUNT -gt 1 ]] && + ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then + skip_env "SHARED_DIRECTORY should be accessible"\ + "on all client nodes" + exit 0 + fi + + for n in $(seq $AGTCOUNT); do + eval export AGTDEV$n=\$\{AGTDEV$n:-"$SHARED_DIRECTORY/arc$n"\} + agent=CLIENT$((n + 1)) + if [[ -z "${!agent}" ]]; then + [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 || + agent=CLIENT2 + fi + eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\} + done + + export SINGLEAGT=${SINGLEAGT:-agt1} + + export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} + export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} + export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") + HSM_ARCHIVE=$(copytool_device $SINGLEAGT) + HSM_ARCHIVE_NUMBER=2 + + # The test only support up to 10 MDTs + MDT_PREFIX="mdt.$FSNAME-MDT000" + HSM_PARAM="${MDT_PREFIX}0.hsm" + + # archive is purged at copytool setup + HSM_ARCHIVE_PURGE=true +} + +# Get the backend root path for the given agent facet. +copytool_device() { + local facet=$1 + local dev=AGTDEV$(facet_number $facet) + + echo -n ${!dev} +} + +# Stop copytool and unregister an existing changelog user. cleanup() { copytool_cleanup - if ! is_mounted $MOUNT2 - then - mount_client $MOUNT2 - fi changelog_cleanup + cdt_set_sanity_policy } -export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"} -export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""} -export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ") -HSM_ARCHIVE=${HSM_ARCHIVE:-$TMP/arc} -HSM_ARCHIVE_NUMBER=2 - -MDT_PARAM="mdt.$FSNAME-MDT0000" -HSM_PARAM="$MDT_PARAM.hsm" +get_mdt_devices() { + local mdtno + # get MDT device for each mdc + for mdtno in $(seq 1 $MDSCOUNT); do + local idx=$(($mdtno - 1)) + MDT[$idx]=$($LCTL get_param -n \ + mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid | + awk '{gsub(/_UUID/,""); print $1}' | head -1) + done +} -# archive is purged at copytool setup -HSM_ARCHIVE_PURGE=true +search_copytools() { + local agents=${1:-$(facet_active_host $SINGLEAGT)} + do_nodesv $agents "pgrep -x $HSMTOOL_BASE" +} search_and_kill_copytool() { - echo "Killing existing copy tools" - killall -q $HSMTOOL_BASE || true + local agents=${1:-$(facet_active_host $SINGLEAGT)} + + echo "Killing existing copytools on $agents" + do_nodesv $agents "killall -q $HSMTOOL_BASE" || true } copytool_setup() { - if pkill -CONT -x $HSMTOOL_BASE; then - echo "Wakeup copytool" - return + local facet=${1:-$SINGLEAGT} + local lustre_mntpnt=${2:-$MOUNT} + local arc_id=$3 + local hsm_root=${4:-$(copytool_device $facet)} + local agent=$(facet_active_host $facet) + + if [[ -z "$arc_id" ]] && + do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then + echo "Wakeup copytool $facet on $agent" + return 0 fi if $HSM_ARCHIVE_PURGE; then - echo "Purging archive" - rm -rf $HSM_ARCHIVE/* + echo "Purging archive on $agent" + do_facet $facet "rm -rf $hsm_root/*" fi - echo "Starting copytool" - mkdir -p $HSM_ARCHIVE + echo "Starting copytool $facet on $agent" + do_facet $facet "mkdir -p $hsm_root" || error "mkdir '$hsm_root' failed" # bandwidth is limited to 1MB/s so the copy time is known and # independent of hardware - local CMD="$HSMTOOL $HSMTOOL_VERBOSE --hsm-root $HSM_ARCHIVE" - CMD=$CMD" --daemon --bandwidth 1 $MOUNT" - [[ -z "$1" ]] || CMD+=" --archive $1" - - echo "$CMD" - $CMD & + local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root $hsm_root" + [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id" + cmd+=" --bandwidth 1 $lustre_mntpnt" + + # Redirect the standard output and error to a log file which + # can be uploaded to Maloo. + local prefix=$TESTLOG_PREFIX + [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME + local copytool_log=$prefix.copytool${arc_id}_log.$agent.log + + do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1" || + error "start copytool $facet on $agent failed" trap cleanup EXIT } copytool_cleanup() { trap - EXIT - pkill -INT -x $HSMTOOL_BASE || return 0 + local agents=${1:-$(facet_active_host $SINGLEAGT)} + local mdtno + local idx + local oldstate + local mdt_hsmctrl + + do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0 sleep 1 - echo "Copytool is stopped" + echo "Copytool is stopped on $agents" + + # clean all CDTs orphans requests from previous tests + # that would otherwise need to timeout to clear. + for mdtno in $(seq 1 $MDSCOUNT); do + idx=$(($mdtno - 1)) + mdt_hsmctrl="mdt.$FSNAME-MDT000${idx}.hsm_control" + oldstate=$(do_facet mds${mdtno} "$LCTL get_param -n " \ + "$mdt_hsmctrl") + # skip already stop[ed,ing] CDTs + echo $oldstate | grep stop && continue + + do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=shutdown" + wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \ + "stopped" 20 || + error "mds${mdtno} cdt state is not stopped" + do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=$oldstate" + wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \ + "$oldstate" 20 || + error "mds${mdtno} cdt state is not $oldstate" + done } copytool_suspend() { - pkill -STOP -x $HSMTOOL_BASE || return 0 - echo "Copytool is suspended" + local agents=${1:-$(facet_active_host $SINGLEAGT)} + + do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0 + echo "Copytool is suspended on $agents" } copytool_remove_backend() { local fid=$1 - local be=$(find $HSM_ARCHIVE -name $fid) + local be=$(do_facet $SINGLEAGT find $HSM_ARCHIVE -name $fid) echo "Remove from backend: $fid = $be" - rm -f $be + do_facet $SINGLEAGT rm -f $be } import_file() { - $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE \ - --import $1 $2 $MOUNT || error "import of $1 to $2 failed" + do_facet $SINGLEAGT \ + "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ + --import $1 $2 $MOUNT" || + error "import of $1 to $2 failed" } make_archive() { local file=$HSM_ARCHIVE/$1 - mkdir -p $(dirname $file) - dd if=/dev/urandom of=$file count=32 bs=1000000 || + do_facet $SINGLEAGT mkdir -p $(dirname $file) + do_facet $SINGLEAGT dd if=/dev/urandom of=$file count=32 bs=1000000 || error "cannot create $file" } +copy2archive() { + local file=$HSM_ARCHIVE/$2 + do_facet $SINGLEAGT mkdir -p $(dirname $file) + do_facet $SINGLEAGT cp -p $1 $file || error "cannot copy $1 to $file" +} + +mdts_set_param() { + local arg=$1 + local key=$2 + local value=$3 + local mdtno + local rc=0 + if [[ "$value" != "" ]]; then + value="=$value" + fi + for mdtno in $(seq 1 $MDSCOUNT); do + local idx=$(($mdtno - 1)) + local facet=mds${mdtno} + # if $arg include -P option, run 1 set_param per MDT on the MGS + # else, run set_param on each MDT + [[ $arg = *"-P"* ]] && facet=mgs + do_facet $facet $LCTL set_param $arg mdt.${MDT[$idx]}.$key$value + [[ $? != 0 ]] && rc=1 + done + return $rc +} + +mdts_check_param() { + local key="$1" + local target="$2" + local timeout="$3" + local mdtno + for mdtno in $(seq 1 $MDSCOUNT); do + local idx=$(($mdtno - 1)) + wait_result mds${mdtno} \ + "$LCTL get_param -n $MDT_PREFIX${idx}.$key" "$target" \ + $timeout || + error "$key state is not '$target' on mds${mdtno}" + done +} + changelog_setup() { - CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0\ - changelog_register -n) - do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog_mask="+hsm" - $LFS changelog_clear $MDT0 $CL_USER 0 + CL_USERS=() + local mdtno + for mdtno in $(seq 1 $MDSCOUNT); do + local idx=$(($mdtno - 1)) + local cl_user=$(do_facet mds${mdtno} $LCTL \ + --device ${MDT[$idx]} \ + changelog_register -n) + CL_USERS+=($cl_user) + do_facet mds${mdtno} lctl set_param \ + mdd.${MDT[$idx]}.changelog_mask="+hsm" + $LFS changelog_clear ${MDT[$idx]} $cl_user 0 + done } changelog_cleanup() { -# $LFS changelog $MDT0 - [[ -n "$CL_USER" ]] || return 0 - - $LFS changelog_clear $MDT0 $CL_USER 0 - do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER - CL_USER= + local mdtno + for mdtno in $(seq 1 $MDSCOUNT); do + local idx=$(($mdtno - 1)) + [[ -z ${CL_USERS[$idx]} ]] && continue + $LFS changelog_clear ${MDT[$idx]} ${CL_USERS[$idx]} 0 + do_facet mds${mdtno} lctl --device ${MDT[$idx]} \ + changelog_deregister ${CL_USERS[$idx]} + done + CL_USERS=() } changelog_get_flags() { @@ -166,58 +323,58 @@ get_hsm_param() { set_hsm_param() { local param=$1 local value=$2 - do_facet $SINGLEMDS $LCTL set_param -n $HSM_PARAM.$param=$value + local opt=$3 + mdts_set_param "$opt -n" "hsm.$param" "$value" return $? } set_test_state() { local cmd=$1 local target=$2 - do_facet $SINGLEMDS $LCTL set_param $MDT_PARAM.hsm_control=$cmd - wait_result $SINGLEMDS "$LCTL get_param -n $MDT_PARAM.hsm_control"\ - $target 10 || error "cdt state is not $target" + mdts_set_param "" hsm_control "$cmd" + mdts_check_param hsm_control "$target" 10 } cdt_set_sanity_policy() { - # clear all - do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-nra - do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-nbr - do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-gc + if [[ "$CDT_POLICY_HAD_CHANGED" ]] + then + # clear all + mdts_set_param "" hsm.policy "+NRA" + mdts_set_param "" hsm.policy "-NBR" + CDT_POLICY_HAD_CHANGED= + fi } cdt_set_no_retry() { - do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=+nra + mdts_set_param "" hsm.policy "+NRA" + CDT_POLICY_HAD_CHANGED=true } cdt_clear_no_retry() { - do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-nra + mdts_set_param "" hsm.policy "-NRA" + CDT_POLICY_HAD_CHANGED=true } -cdt_set_no_blocking_restore() { - do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=+nbr +cdt_set_non_blocking_restore() { + mdts_set_param "" hsm.policy "+NBR" + CDT_POLICY_HAD_CHANGED=true } -cdt_clear_no_blocking_restore() { - do_facet $SINGLEMDS $LCTL set_param $HSM_PARAM.policy=-nbr +cdt_clear_non_blocking_restore() { + mdts_set_param "" hsm.policy "-NBR" + CDT_POLICY_HAD_CHANGED=true } cdt_clear_mount_state() { - # /!\ conf_param and set_param syntax differ +> we cannot use - # $MDT_PARAM - do_facet $SINGLEMDS $LCTL conf_param -d $FSNAME-MDT0000.mdt.hsm_control + mdts_set_param "-P -d" hsm_control "" } cdt_set_mount_state() { - # /!\ conf_param and set_param syntax differ +> we cannot use - # $MDT_PARAM - do_facet $SINGLEMDS $LCTL conf_param $FSNAME-MDT0000.mdt.hsm_control=$1 + mdts_set_param "-P" hsm_control "$1" } cdt_check_state() { - local target=$1 - wait_result $SINGLEMDS\ - "$LCTL get_param -n $MDT_PARAM.hsm_control" "$target" 20 || - error "cdt state is not $target" + mdts_check_param hsm_control "$1" 20 } cdt_disable() { @@ -242,9 +399,10 @@ cdt_restart() { cdt_set_sanity_policy } -need2clients() { - if [[ $CLIENTCOUNT -lt 2 ]]; then - skip "Need two or more clients, have $CLIENTCOUNT" +needclients() { + local client_count=$1 + if [[ $CLIENTCOUNT -lt $client_count ]]; then + skip "Need $client_count or more clients, have $CLIENTCOUNT" return 1 fi return 0 @@ -258,8 +416,7 @@ get_hsm_flags() { local f=$1 local u=$2 - if [[ $u == "user" ]] - then + if [[ $u == "user" ]]; then local st=$($RUNAS $LFS hsm_state $f) else local st=$($LFS hsm_state $f) @@ -301,15 +458,13 @@ check_hsm_flags_user() { copy_file() { local f= - if [[ -d $2 ]] - then + if [[ -d $2 ]]; then f=$2/$(basename $1) else f=$2 fi - if [[ "$3" != 1 ]] - then + if [[ "$3" != 1 ]]; then f=${f/$DIR/$DIR2} fi rm -f $f @@ -324,9 +479,24 @@ make_small() { path2fid $1 || error "cannot get fid on $1" } +make_small_sync() { + dd if=/dev/urandom of=$1 count=1 bs=1M conv=sync || + error "cannot create $1" + path2fid $1 || error "cannot get fid on $1" +} + +cleanup_large_files() { + local ratio=$(df -P $MOUNT | tail -1 | awk '{print $5}' | + sed 's/%//g') + [ $ratio -gt 50 ] && find $MOUNT -size +10M -exec rm -f {} \; +} + make_large_for_striping() { local file2=${1/$DIR/$DIR2} local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -1) + + cleanup_large_files + dd if=/dev/urandom of=$file2 count=5 bs=$sz conv=fsync || error "cannot create $file2" path2fid $1 || error "cannot get fid on $1" @@ -334,6 +504,9 @@ make_large_for_striping() { make_large_for_progress() { local file2=${1/$DIR/$DIR2} + + cleanup_large_files + # big file is large enough, so copy time is > 30s # so copytool make 1 progress # size is not a multiple of 1M to avoid stripe @@ -345,6 +518,9 @@ make_large_for_progress() { make_large_for_progress_aligned() { local file2=${1/$DIR/$DIR2} + + cleanup_large_files + # big file is large enough, so copy time is > 30s # so copytool make 1 progress # size is a multiple of 1M to have stripe @@ -356,6 +532,9 @@ make_large_for_progress_aligned() { make_large_for_cancel() { local file2=${1/$DIR/$DIR2} + + cleanup_large_files + # Copy timeout is 100s. 105MB => 105s dd if=/dev/urandom of=$file2 count=103 bs=1M conv=fsync || error "cannot create $file2" @@ -368,54 +547,57 @@ wait_result() { wait_update --verbose $(facet_active_host $facet) "$@" } -wait_request_state() -{ +wait_request_state() { local fid=$1 local request=$2 local state=$3 - wait_result $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\ - grep $fid | grep action=$request |\ - cut -f 13 -d ' ' | cut -f 2 -d =" $state 100 || - error "request on $fid is not $state" + # 4th arg (mdt index) is optional + local mdtidx=${4:-0} + local mds=mds$(($mdtidx + 1)) + + local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions" + cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" + + wait_result $mds "$cmd" $state 100 || + error "request on $fid is not $state on $mds" } -get_request_state() -{ +get_request_state() { local fid=$1 local request=$2 - do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\ - grep $fid | grep action=$request |\ - cut -f 13 -d ' ' | cut -f 2 -d =" + + do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.actions |"\ + "awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d=" } -get_request_count() -{ +get_request_count() { local fid=$1 local request=$2 - do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\ - grep $fid | grep action=$request | wc -l" + + do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.actions |"\ + "awk -vn=0 '/'$fid'.*action='$request'/ {n++}; END {print n}'" } -wait_all_done() -{ +wait_all_done() { local timeout=$1 - wait_result $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\ - egrep 'WAITING|STARTED' " "" $timeout || - error "requests did not complete" + + local cmd="$LCTL get_param -n $HSM_PARAM.actions" + cmd+=" | egrep 'WAITING|STARTED'" + + wait_result $SINGLEMDS "$cmd" "" $timeout || + error "requests did not complete" } -wait_for_grace_delay() -{ +wait_for_grace_delay() { local val=$(get_hsm_param grace_delay) sleep $val } -my_uuid() { - $LCTL get_param -n llite.$FSNAME-*.uuid -} +# populate MDT device array +get_mdt_devices -MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid | - awk '{gsub(/_UUID/,""); print $1}' | head -1) +# initiate variables +init_agt_vars # cleanup from previous bad setup search_and_kill_copytool @@ -430,6 +612,9 @@ cdt_check_state enabled echo "Start copytool" copytool_setup +echo "Set sanity-hsm HSM policy" +cdt_set_sanity_policy + # finished requests are quickly removed from list set_hsm_param grace_delay 10 @@ -583,9 +768,9 @@ test_9() { local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/passwd $f) # we do not use the default one to be sure - local new_an=$((HSM_ARCHIVE_NUMBER+ 1)) + local new_an=$((HSM_ARCHIVE_NUMBER + 1)) copytool_cleanup - copytool_setup $new_an + copytool_setup $SINGLEAGT $MOUNT $new_an $LFS hsm_archive --archive $new_an $f wait_request_state $fid ARCHIVE SUCCEED @@ -595,6 +780,37 @@ test_9() { } run_test 9 "Use of explict archive number, with dedicated copytool" +test_9a() { + needclients 3 || return 0 + + local n + local file + local fid + + copytool_cleanup $(comma_list $(agts_nodes)) + + # start all of the copytools + for n in $(seq $AGTCOUNT); do + copytool_setup agt$n + done + + trap "copytool_cleanup $(comma_list $(agts_nodes))" EXIT + # archive files + mkdir -p $DIR/$tdir + for n in $(seq $AGTCOUNT); do + file=$DIR/$tdir/$tfile.$n + fid=$(make_small $file) + + $LFS hsm_archive $file || error "could not archive file $file" + wait_request_state $fid ARCHIVE SUCCEED + check_hsm_flags $file "0x00000009" + done + + trap - EXIT + copytool_cleanup $(comma_list $(agts_nodes)) +} +run_test 9a "Multiple remote agents" + test_10a() { # test needs a running copytool copytool_setup @@ -606,10 +822,10 @@ test_10a() { error "hsm_archive failed" wait_request_state $fid ARCHIVE SUCCEED - local AFILE=$(ls $HSM_ARCHIVE/*/*/*/*/*/*/$fid) || + local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || error "fid $fid not in archive $HSM_ARCHIVE" echo "Verifying content" - diff $f $AFILE || error "archived file differs" + do_facet $SINGLEAGT diff $f $AFILE || error "archived file differs" echo "Verifying hsm state " check_hsm_flags $f "0x00000009" @@ -627,7 +843,7 @@ test_10b() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_archive $f || error "archive request failed" @@ -646,7 +862,7 @@ test_10c() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_set --noarchive $f @@ -656,9 +872,28 @@ test_10c() { } run_test 10c "Check forbidden archive" +test_10d() { + # test needs a running copytool + copytool_setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/hosts $f) + $LFS hsm_archive $f || error "cannot archive $f" + wait_request_state $fid ARCHIVE SUCCEED + + local ar=$(get_hsm_archive_id $f) + local dflt=$(get_hsm_param default_archive_id) + [[ $ar == $dflt ]] || + error "archived file is not on default archive: $ar != $dflt" + + copytool_cleanup +} +run_test 10d "Archive a file on the default archive id" + test_11() { - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir - cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile + mkdir -p $DIR/$tdir + copy2archive /etc/hosts $tdir/$tfile local f=$DIR/$tdir/$tfile import_file $tdir/$tfile $f @@ -666,7 +901,7 @@ test_11() { check_hsm_flags $f "0x0000000d" local LSZ=$(stat -c "%s" $f) - local ASZ=$(stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile) + local ASZ=$(do_facet $SINGLEAGT stat -c "%s" $HSM_ARCHIVE/$tdir/$tfile) echo "Verifying imported size $LSZ=$ASZ" [[ $LSZ -eq $ASZ ]] || error "Incorrect size $LSZ != $ASZ" @@ -677,7 +912,7 @@ test_11() { local fid=$(path2fid $f) echo "Verifying new fid $fid in archive" - local AFILE=$(ls $HSM_ARCHIVE/*/*/*/*/*/*/$fid) || + local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) || error "fid $fid not in archive $HSM_ARCHIVE" } run_test 11 "Import a file" @@ -686,8 +921,9 @@ test_12a() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir - cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile + mkdir -p $DIR/$tdir + copy2archive /etc/hosts $tdir/$tfile + local f=$DIR/$tdir/$tfile import_file $tdir/$tfile $f local f=$DIR2/$tdir/$tfile @@ -701,7 +937,7 @@ test_12a() { echo "Verifying file state: " check_hsm_flags $f "0x00000009" - diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" @@ -713,8 +949,9 @@ test_12b() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir - cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile + mkdir -p $DIR/$tdir + copy2archive /etc/hosts $tdir/$tfile + local f=$DIR/$tdir/$tfile import_file $tdir/$tfile $f echo "Verifying released state: " @@ -725,7 +962,7 @@ test_12b() { echo "Verifying file state after restore: " check_hsm_flags $f "0x00000009" - diff -q $HSM_ARCHIVE/$tdir/$tfile $f + do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f [[ $? -eq 0 ]] || error "Restored file differs" @@ -761,7 +998,8 @@ test_12d() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/hosts $f) $LFS hsm_restore $f || error "restore of non archived file failed" @@ -853,7 +1091,7 @@ test_12g() { run_test 12g "Restore a released file implicitly" test_12h() { - need2clients || return 0 + needclients 2 || return 0 # test needs a running copytool copytool_setup @@ -902,12 +1140,14 @@ test_12n() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir - cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile + mkdir -p $DIR/$tdir + copy2archive /etc/hosts $tdir/$tfile + local f=$DIR/$tdir/$tfile import_file $tdir/$tfile $f - cmp /etc/hosts $f || error "Restored file differs" + do_facet $SINGLEAGT cmp /etc/hosts $f || + error "Restored file differs" $LFS hsm_release $f || error "release of $f failed" @@ -915,6 +1155,53 @@ test_12n() { } run_test 12n "Import/implicit restore/release" +test_12o() { + # test needs a running copytool + copytool_setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/$tfile + local fid=$(copy_file /etc/hosts $f) + + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f || error "release of $f failed" + +#define OBD_FAIL_MDS_HSM_SWAP_LAYOUTS 0x152 + do_facet $SINGLEMDS lctl set_param fail_loc=0x152 + + # set no retry action mode + cdt_set_no_retry + + diff -q /etc/hosts $f + local st=$? + + # we check we had a restore failure + wait_request_state $fid RESTORE FAILED + + [[ $st -eq 0 ]] && error "Restore must fail" + + # remove no retry action mode + cdt_clear_no_retry + + # check file is still released + check_hsm_flags $f "0x0000000d" + + # retry w/o failure injection + do_facet $SINGLEMDS lctl set_param fail_loc=0 + + diff -q /etc/hosts $f + st=$? + + # we check we had a restore done + wait_request_state $fid RESTORE SUCCEED + + [[ $st -eq 0 ]] || error "Restored file differs" + + copytool_cleanup +} +run_test 12o "Layout-swap failure during Restore leaves file released" + test_13() { # test needs a running copytool copytool_setup @@ -926,24 +1213,23 @@ test_13() { # populate directory to be imported for d in $(seq 1 10); do local CURR_DIR="$HSM_ARCHIVE/$ARC_SUBDIR/dir.$d" - mkdir -p "$CURR_DIR" + do_facet $SINGLEAGT mkdir -p "$CURR_DIR" for f in $(seq 1 10); do CURR_FILE="$CURR_DIR/$tfile.$f" # write file-specific data - echo "d=$d, f=$f, dir=$CURR_DIR, file=$CURR_FILE"\ - > $CURR_FILE + do_facet $SINGLEAGT \ + "echo d=$d, f=$f, dir=$CURR_DIR, "\ + "file=$CURR_FILE > $CURR_FILE" done done # import to Lustre import_file "$ARC_SUBDIR" $DIR/$tdir # diff lustre content and origin (triggers file restoration) # there must be 10x10 identical files, and no difference - local cnt_ok=$(diff -rs $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | - grep identical | wc -l) - local cnt_diff=$(diff -r $HSM_ARCHIVE/$ARC_SUBDIR \ - $DIR/$tdir/$ARC_SUBDIR | - wc -l) + local cnt_ok=$(do_facet $SINGLEAGT diff -rs $HSM_ARCHIVE/$ARC_SUBDIR \ + $DIR/$tdir/$ARC_SUBDIR | grep identical | wc -l) + local cnt_diff=$(do_facet $SINGLEAGT diff -r $HSM_ARCHIVE/$ARC_SUBDIR \ + $DIR/$tdir/$ARC_SUBDIR | wc -l) [ $cnt_diff -eq 0 ] || error "$cnt_diff imported files differ from read data" @@ -976,9 +1262,10 @@ test_14() { # rebind the archive to the newly created file echo "rebind $fid to $fid2" - $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root="$HSM_ARCHIVE"\ - --rebind $fid $fid2 $DIR || - error "could not rebind file" + + do_facet $SINGLEAGT \ + "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ + --rebind $fid $fid2 $DIR" || error "could not rebind file" # restore file and compare md5sum local sum2=$(md5sum $f | awk '{print $1}') @@ -997,7 +1284,7 @@ test_15() { mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local count=5 - local tmpfile=$TMP/tmp.$$ + local tmpfile=$SHARED_DIRECTORY/tmp.$$ local fids=() local sums=() @@ -1026,9 +1313,9 @@ test_15() { [[ $nl == $count ]] || error "$nl files in list, $count expected" echo "rebind list of files" - $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root="$HSM_ARCHIVE"\ - --rebind $tmpfile $DIR || - error "could not rebind file list" + do_facet $SINGLEAGT \ + "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\ + --rebind $tmpfile $DIR" || error "could not rebind file list" # restore files and compare md5sum for i in $(seq 1 $count); do @@ -1109,19 +1396,41 @@ test_21() { local fid=$(make_small $f) check_hsm_flags $f "0x00000000" + # LU-4388/LU-4389 - ZFS does not report full number of blocks + # used until file is flushed to disk + if [ $(facet_fstype ost1) == "zfs" ]; then + # this causes an OST_SYNC rpc to be sent + dd if=/dev/zero of=$f bs=512 count=1 oflag=sync conv=notrunc,fsync + # clear locks to reread file data + cancel_lru_locks osc + fi + + local orig_size=$(stat -c "%s" $f) + local orig_blocks=$(stat -c "%b" $f) + + start_full_debug_logging + $LFS hsm_archive $f || error "could not archive file" wait_request_state $fid ARCHIVE SUCCEED - [ $(stat -c "%b" $f) -ne "0" ] || error "wrong block number" - local sz=$(stat -c "%s" $f) - [ $sz -ne "0" ] || error "file size should not be zero" + local blocks=$(stat -c "%b" $f) + [ $blocks -eq $orig_blocks ] || + error "$f: wrong block number after archive: " \ + "$blocks != $orig_blocks" + local size=$(stat -c "%s" $f) + [ $size -eq $orig_size ] || + error "$f: wrong size after archive: $size != $orig_size" # Release and check states $LFS hsm_release $f || error "could not release file" check_hsm_flags $f "0x0000000d" - [ $(stat -c "%b" $f) -eq "0" ] || error "wrong block number" - [ $(stat -c "%s" $f) -eq $sz ] || error "wrong file size" + blocks=$(stat -c "%b" $f) + [ $blocks -gt 5 ] && + error "$f: too many blocks after release: $blocks > 5" + size=$(stat -c "%s" $f) + [ $size -ne $orig_size ] && + error "$f: wrong size after release: $size != $orig_size" # Check we can release an file without stripe info f=$f.nolov @@ -1139,6 +1448,8 @@ test_21() { $LFS hsm_release $f || fail "second release should succeed" check_hsm_flags $f "0x0000000d" + stop_full_debug_logging + copytool_cleanup } run_test 21 "Simple release tests" @@ -1202,47 +1513,297 @@ test_23() { } run_test 23 "Release does not change a/mtime (utime)" -test_24() { +test_24a() { + local file=$DIR/$tdir/$tfile + local fid + local atime0 + local atime1 + local mtime0 + local mtime1 + local ctime0 + local ctime1 + # test needs a running copytool copytool_setup mkdir -p $DIR/$tdir - - local f=$DIR/$tdir/test_mtime + rm -f $file + fid=$(make_small $file) # Create a file and check its states - local fid=$(make_small $f) - check_hsm_flags $f "0x00000000" + check_hsm_flags $file "0x00000000" - # make mtime is different + # Ensure atime is less than mtime and ctime. sleep 1 - echo "append" >> $f - local MTIME=$(stat -c "%Y" $f) - local ATIME=$(stat -c "%X" $f) + echo >> $file - $LFS hsm_archive $f || error "could not archive file" + atime0=$(stat -c "%X" $file) + mtime0=$(stat -c "%Y" $file) + ctime0=$(stat -c "%Z" $file) + + [ $atime0 -lt $mtime0 ] || + error "atime $atime0 is not less than mtime $mtime0" + + [ $atime0 -lt $ctime0 ] || + error "atime $atime0 is not less than ctime $ctime0" + + # Archive should not change any timestamps. + $LFS hsm_archive $file || error "cannot archive '$file'" wait_request_state $fid ARCHIVE SUCCEED - # Release and check states - $LFS hsm_release $f || error "could not release file" - check_hsm_flags $f "0x0000000d" + atime1=$(stat -c "%X" $file) + mtime1=$(stat -c "%Y" $file) + ctime1=$(stat -c "%Z" $file) + + [ $atime0 -eq $atime1 ] || + error "archive changed atime from $atime0 to $atime1" + + [ $mtime0 -eq $mtime1 ] || + error "archive changed mtime from $mtime0 to $mtime1" + + [ $ctime0 -eq $ctime1 ] || + error "archive changed ctime from $ctime0 to $ctime1" + + # Release should not change any timestamps. + $LFS hsm_release $file || error "cannot release '$file'" + check_hsm_flags $file "0x0000000d" + + atime1=$(stat -c "%X" $file) + mtime1=$(stat -c "%Y" $file) + ctime1=$(stat -c "%Z" $file) + + [ $atime0 -eq $atime1 ] || + error "release changed atime from $atime0 to $atime1" + + [ $mtime0 -eq $mtime1 ] || + error "release changed mtime from $mtime0 to $mtime1" + + [ $ctime0 -eq $ctime1 ] || + error "release changed ctime from $ctime0 to $ctime1" + + # Restore should not change atime or mtime and should not + # decrease ctime. + $LFS hsm_restore $file + wait_request_state $fid RESTORE SUCCEED + + atime1=$(stat -c "%X" $file) + mtime1=$(stat -c "%Y" $file) + ctime1=$(stat -c "%Z" $file) + + [ $atime0 -eq $atime1 ] || + error "restore changed atime from $atime0 to $atime1" + + [ $mtime0 -eq $mtime1 ] || + error "restore changed mtime from $mtime0 to $mtime1" + + [ $ctime0 -le $ctime1 ] || + error "restore changed ctime from $ctime0 to $ctime1" + + copytool_cleanup + + # Once more, after unmount and mount. + umount_client $MOUNT || error "cannot unmount '$MOUNT'" + mount_client $MOUNT || error "cannot mount '$MOUNT'" + + atime1=$(stat -c "%X" $file) + mtime1=$(stat -c "%Y" $file) + ctime1=$(stat -c "%Z" $file) + + [ $atime0 -eq $atime1 ] || + error "remount changed atime from $atime0 to $atime1" + + [ $mtime0 -eq $mtime1 ] || + error "remount changed mtime from $mtime0 to $mtime1" + + [ $ctime0 -le $ctime1 ] || + error "remount changed ctime from $ctime0 to $ctime1" +} +run_test 24a "Archive, release, and restore does not change a/mtime (i/o)" + +test_24b() { + local file=$DIR/$tdir/$tfile + local fid + local sum0 + local sum1 + # LU-3811 + + # Test needs a running copytool. + copytool_setup + mkdir -p $DIR/$tdir + + # Check that root can do HSM actions on a ordinary user's file. + rm -f $file + fid=$(make_small $file) + sum0=$(md5sum $file) + + chown $RUNAS_ID:$RUNAS_GID $file || + error "cannot chown '$file' to '$RUNAS_ID'" + + chmod ugo-w $DIR/$tdir || + error "cannot chmod '$DIR/$tdir'" + + $LFS hsm_archive $file + wait_request_state $fid ARCHIVE SUCCEED + + $LFS hsm_release $file + check_hsm_flags $file "0x0000000d" + + $LFS hsm_restore $file + wait_request_state $fid RESTORE SUCCEED + + # Check that ordinary user can get HSM state. + $RUNAS $LFS hsm_state $file || + error "user '$RUNAS_ID' cannot get HSM state of '$file'" + + $LFS hsm_release $file + check_hsm_flags $file "0x0000000d" + + # Check that ordinary user can accessed released file. + sum1=$($RUNAS md5sum $file) || + error "user '$RUNAS_ID' cannot read '$file'" + + [ "$sum0" == "$sum1" ] || + error "md5sum mismatch for '$file'" + + copytool_cleanup +} +run_test 24b "root can archive, release, and restore user files" + +cleanup_test_24c() { + trap 0 + set_hsm_param user_request_mask RESTORE + set_hsm_param group_request_mask RESTORE + set_hsm_param other_request_mask RESTORE +} + +test_24c() { + local file=$DIR/$tdir/$tfile + local action=archive + local user_save + local group_save + local other_save + + # test needs a running copytool + copytool_setup + + mkdir -p $DIR/$tdir + + # Save the default masks and check that cleanup_24c will + # restore the request masks correctly. + user_save=$(get_hsm_param user_request_mask) + group_save=$(get_hsm_param group_request_mask) + other_save=$(get_hsm_param other_request_mask) + + [ "$user_save" == RESTORE ] || + error "user_request_mask is '$user_save' expected 'RESTORE'" + [ "$group_save" == RESTORE ] || + error "group_request_mask is '$group_save' expected 'RESTORE'" + [ "$other_save" == RESTORE ] || + error "other_request_mask is '$other_save' expected 'RESTORE'" + + trap cleanup_test_24c EXIT + + # User. + rm -f $file + make_small $file + chown $RUNAS_ID:nobody $file || + error "cannot chown '$file' to '$RUNAS_ID:nobody'" + + set_hsm_param user_request_mask "" + $RUNAS $LFS hsm_$action $file && + error "$action by user should fail" + + set_hsm_param user_request_mask $action + $RUNAS $LFS hsm_$action $file || + error "$action by user should succeed" + + # Group. + rm -f $file + make_small $file + chown nobody:$RUNAS_GID $file || + error "cannot chown '$file' to 'nobody:$RUNAS_GID'" + + set_hsm_param group_request_mask "" + $RUNAS $LFS hsm_$action $file && + error "$action by group should fail" + + set_hsm_param group_request_mask $action + $RUNAS $LFS hsm_$action $file || + error "$action by group should succeed" + + # Other. + rm -f $file + make_small $file + chown nobody:nobody $file || + error "cannot chown '$file' to 'nobody:nobody'" + + set_hsm_param other_request_mask "" + $RUNAS $LFS hsm_$action $file && + error "$action by other should fail" + + set_hsm_param other_request_mask $action + $RUNAS $LFS hsm_$action $file || + error "$action by other should succeed" + + copytool_cleanup + cleanup_test_24c +} +run_test 24c "check that user,group,other request masks work" + +cleanup_test_24d() { + trap 0 + mount -o remount,rw $MOUNT2 +} + +test_24d() { + local file1=$DIR/$tdir/$tfile + local file2=$DIR2/$tdir/$tfile + local fid1 + local fid2 + + copytool_setup + + mkdir -p $DIR/$tdir + rm -f $file1 + fid1=$(make_small $file1) + + trap cleanup_test_24d EXIT + + mount -o remount,ro $MOUNT2 - [ "$(stat -c "%Y" $f)" -eq "$MTIME" ] || - error "mtime should be $MTIME" + fid2=$(path2fid $file2) + [ "$fid1" == "$fid2" ] || + error "FID mismatch '$fid1' != '$fid2'" - [ "$(stat -c "%X" $f)" -eq "$ATIME" ] || - error "atime should be $ATIME" + $LFS hsm_archive $file2 && + error "archive should fail on read-only mount" + check_hsm_flags $file1 "0x00000000" + + $LFS hsm_archive $file1 + wait_request_state $fid1 ARCHIVE SUCCEED + + $LFS hsm_release $file1 + $LFS hsm_restore $file2 + wait_request_state $fid1 RESTORE SUCCEED + + $LFS hsm_release $file1 || error "cannot release '$file1'" + dd if=$file2 of=/dev/null bs=1M || "cannot read '$file2'" + + $LFS hsm_release $file2 && + error "release should fail on read-only mount" copytool_cleanup + cleanup_test_24d } -run_test 24 "Release does not change a/mtime (i/o)" +run_test 24d "check that read-only mounts are respected" test_25a() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir - cp /etc/hosts $HSM_ARCHIVE/$tdir/$tfile + mkdir -p $DIR/$tdir + copy2archive /etc/hosts $tdir/$tfile + local f=$DIR/$tdir/$tfile import_file $tdir/$tfile $f @@ -1365,13 +1926,14 @@ run_test 28 "Concurrent archive/file remove" test_30a() { # restore at exec cannot work on agent node (because of Linux kernel # protection of executables) - need2clients || return 0 + needclients 2 || return 0 # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir - cp -p /bin/true $HSM_ARCHIVE/$tdir/$tfile + mkdir -p $DIR/$tdir + copy2archive /bin/true $tdir/$tfile + local f=$DIR/$tdir/true import_file $tdir/$tfile $f @@ -1396,7 +1958,7 @@ run_test 30a "Restore at exec (import case)" test_30b() { # restore at exec cannot work on agent node (because of Linux kernel # protection of executables) - need2clients || return 0 + needclients 2 || return 0 # test needs a running copytool copytool_setup @@ -1425,8 +1987,42 @@ test_30b() { } run_test 30b "Restore at exec (release case)" -restore_and_check_size() -{ +test_30c() { + needclients 2 || return 0 + + # test needs a running copytool + copytool_setup + + mkdir -p $DIR/$tdir + local f=$DIR/$tdir/SLEEP + local fid=$(copy_file /bin/sleep $f) + chmod 755 $f + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f + wait_request_state $fid ARCHIVE SUCCEED + $LFS hsm_release $f + check_hsm_flags $f "0x0000000d" + # set no retry action mode + cdt_set_no_retry + do_node $CLIENT2 "$f 10" & + local pid=$! + sleep 3 + echo 'Hi!' > $f + [[ $? == 0 ]] && error "Update during exec of released file must fail" + wait $pid + [[ $? == 0 ]] || error "Execution failed during run" + cmp /bin/sleep $f + [[ $? == 0 ]] || error "Binary overwritten during exec" + + # cleanup + # remove no try action mode + cdt_clear_no_retry + check_hsm_flags $f "0x00000009" + + copytool_cleanup +} +run_test 30c "Update during exec of released file must fail" + +restore_and_check_size() { local f=$1 local fid=$2 local s=$(stat -c "%s" $f) @@ -1440,8 +2036,7 @@ restore_and_check_size() n=$(stat -c "%s" $f) # we echo in both cases to show stat is not # hang - if [[ $n != $s ]] - then + if [[ $n != $s ]]; then echo "size seen is $n != $s" err=1 else @@ -1451,13 +2046,12 @@ restore_and_check_size() sleep 10 cpt=$((cpt + 1)) done - if [[ $cpt -lt 10 ]] - then - echo " restore is too long" - else + if [[ $cpt -lt 10 ]]; then echo " "done + else + echo " restore is too long" + wait_request_state $fid RESTORE SUCCEED fi - wait_request_state $fid RESTORE SUCCEED return $err } @@ -1687,7 +2281,13 @@ test_40() { fid=$(copy_file /etc/hosts $f.$p.$i) done done - copytool_setup + # force copytool to use a local/temp archive dir to ensure best + # performance vs remote/NFS mounts used in auto-tests + if df --local $HSM_ARCHIVE >/dev/null 2>&1 ; then + copytool_setup + else + copytool_setup $SINGLEAGT $MOUNT $HSM_ARCHIVE_NUMBER $TMP/$tdir + fi # to be sure wait_all_done will not be mislead by previous tests cdt_purge wait_for_grace_delay @@ -1709,9 +2309,6 @@ test_52() { # test needs a running copytool copytool_setup - # Test behave badly if 2 mount points are present - umount_client $MOUNT2 - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/motd $f 1) @@ -1731,9 +2328,6 @@ test_52() { check_hsm_flags $f "0x0000000b" - # Restore test environment - mount_client $MOUNT2 - copytool_cleanup } run_test 52 "Opened for write file on an evicted client should be set dirty" @@ -1742,9 +2336,6 @@ test_53() { # test needs a running copytool copytool_setup - # Checks are wrong with 2 mount points - umount_client $MOUNT2 - mkdir -p $DIR/$tdir local f=$DIR/$tdir/$tfile local fid=$(copy_file /etc/motd $f 1) @@ -1765,8 +2356,6 @@ test_53() { check_hsm_flags $f "0x00000009" - mount_client $MOUNT2 - copytool_cleanup } run_test 53 "Opened for read file on an evicted client should not be set dirty" @@ -1856,7 +2445,7 @@ run_test 56 "Setattr during an archive is ok" test_57() { # Need one client for I/O, one for request - need2clients || return 0 + needclients 2 || return 0 # test needs a running copytool copytool_setup @@ -1885,13 +2474,15 @@ test_57() { } run_test 57 "Archive a file with dirty cache on another node" -test_58() { - # test needs a running copytool - copytool_setup +truncate_released_file() { + local src_file=$1 + local trunc_to=$2 - mkdir -p $DIR/$tdir + local sz=$(stat -c %s $src_file) local f=$DIR/$tdir/$tfile - local fid=$(make_small $f) + local fid=$(copy_file $1 $f) + local ref=$f-ref + cp $f $f-ref $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || error "could not archive file" @@ -1899,63 +2490,46 @@ test_58() { $LFS hsm_release $f || error "could not release file" - $TRUNCATE $f 0 || error "truncate failed" + $TRUNCATE $f $trunc_to || error "truncate failed" sync - local sz=$(stat -c %s $f) - [[ $sz == 0 ]] || error "size after truncate is $sz != 0" + local sz1=$(stat -c %s $f) + [[ $sz1 == $trunc_to ]] || + error "size after trunc: $sz1 expect $trunc_to, original $sz" $LFS hsm_state $f - check_hsm_flags $f "0x0000000b" local state=$(get_request_state $fid RESTORE) - [[ "$state" == "" ]] || - error "truncate 0 trigs a restore, state = $state" + [[ "$state" == "SUCCEED" ]] || + error "truncate $sz does not trig restore, state = $state" - copytool_cleanup + $TRUNCATE $ref $trunc_to + cmp $ref $f || error "file data wrong after truncate" + + rm -f $f $f-ref } -run_test 58 "Truncate 0 on a released file must not trigger restore" -test_59() { +test_58() { # test needs a running copytool copytool_setup mkdir -p $DIR/$tdir - local f=$DIR/$tdir/$tfile - local fid=$(copy_file /etc/passwd $f) - local ref=$f-ref - cp $f $ref - local sz=$(stat -c %s $ref) - sz=$((sz / 2)) - $TRUNCATE $ref $sz - - $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f || - error "could not archive file" - wait_request_state $fid ARCHIVE SUCCEED - $LFS hsm_release $f || error "could not release file" - - $TRUNCATE $f $sz || error "truncate failed" - sync + local sz=$(stat -c %s /etc/passwd) - local sz1=$(stat -c %s $f) - [[ $sz1 == $sz ]] || error "size after truncate is $sz1 != $sz" + echo "truncate up from $sz to $((sz*2))" + truncate_released_file /etc/passwd $((sz*2)) - $LFS hsm_state $f + echo "truncate down from $sz to $((sz/2))" + truncate_released_file /etc/passwd $((sz/2)) - check_hsm_flags $f "0x0000000b" - - local state=$(get_request_state $fid RESTORE) - [[ "$state" == "SUCCEED" ]] || - error "truncate $sz does not trig a successfull restore,"\ - " state = $state" - - cmp $ref $f || error "file data wrong after truncate" + echo "truncate to 0" + truncate_released_file /etc/passwd 0 copytool_cleanup } -run_test 59 "Truncate != 0 on a released file" +run_test 58 "Truncate a released file will trigger restore" test_90() { file_count=57 @@ -1985,7 +2559,7 @@ test_90() { } run_test 90 "Archive/restore a file list" -double_verify_reset_ham_param() { +double_verify_reset_hsm_param() { local p=$1 echo "Testing $HSM_PARAM.$p" local val=$(get_hsm_param $p) @@ -2001,17 +2575,17 @@ double_verify_reset_ham_param() { # restore value set_hsm_param $p $save - if [[ $rc == 0 ]] - then + if [[ $rc == 0 ]]; then error "we must not be able to set $HSM_PARAM.$p to 0" fi } test_100() { - double_verify_reset_ham_param loop_period - double_verify_reset_ham_param grace_delay - double_verify_reset_ham_param request_timeout - double_verify_reset_ham_param max_requests + double_verify_reset_hsm_param loop_period + double_verify_reset_hsm_param grace_delay + double_verify_reset_hsm_param active_request_timeout + double_verify_reset_hsm_param max_requests + double_verify_reset_hsm_param default_archive_id } run_test 100 "Set coordinator /proc tunables" @@ -2039,7 +2613,7 @@ test_103() { echo "Current requests" local res=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep -v CANCELED | grep -v SUCCEED | grep -v FAILED") [[ -z "$res" ]] || error "Some request have not been canceled" @@ -2061,7 +2635,7 @@ test_104() { cdt_disable $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER --data $DATA $f local data1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep $fid | cut -f16 -d=") cdt_enable @@ -2082,12 +2656,12 @@ test_105() { $LFS hsm_archive $DIR/$tdir/$i done local reqcnt1=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep WAITING | wc -l") cdt_restart cdt_disable local reqcnt2=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep WAITING | wc -l") cdt_enable cdt_purge @@ -2097,33 +2671,70 @@ test_105() { } run_test 105 "Restart of coordinator" +get_agent_by_uuid_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + do_facet $mds "$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.agents |\ + grep $uuid" +} + +check_agent_registered_by_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) + if [[ ! -z "$agent" ]]; then + echo "found agent $agent on $mds" + else + error "uuid $uuid not found in agent list on $mds" + fi +} + +check_agent_unregistered_by_mdt() { + local uuid=$1 + local mdtidx=$2 + local mds=mds$(($mdtidx + 1)) + local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx) + if [[ -z "$agent" ]]; then + echo "uuid not found in agent list on $mds" + else + error "uuid found in agent list on $mds: $agent" + fi +} + +check_agent_registered() { + local uuid=$1 + local mdsno + for mdsno in $(seq 1 $MDSCOUNT); do + check_agent_registered_by_mdt $uuid $((mdsno - 1)) + done +} + +check_agent_unregistered() { + local uuid=$1 + local mdsno + for mdsno in $(seq 1 $MDSCOUNT); do + check_agent_unregistered_by_mdt $uuid $((mdsno - 1)) + done +} + test_106() { - # Test behave badly if 2 mount points are present - umount_client $MOUNT2 + local uuid=$(do_rpc_nodes $(facet_active_host $SINGLEAGT) \ + get_client_uuid $MOUNT | cut -d' ' -f2) - # test needs a running copytool copytool_setup + check_agent_registered $uuid + + search_copytools || error "No copytool found" - local uuid=$(my_uuid) - local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents | - grep $uuid) copytool_cleanup - [[ ! -z "$agent" ]] || error "My uuid $uuid not found in agent list" - local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents | - grep $uuid) - [[ -z "$agent" ]] || - error "My uuid $uuid still found in agent list,"\ - " after copytool shutdown" + check_agent_unregistered $uuid + copytool_setup - local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents | - grep $uuid) - copytool_cleanup - [[ ! -z "$agent" ]] || - error "My uuid $uuid not found in agent list after"\ - " copytool restart" + check_agent_registered $uuid - # Restore test environment - mount_client $MOUNT2 + copytool_cleanup } run_test 106 "Copytool register/unregister" @@ -2148,23 +2759,60 @@ test_107() { } run_test 107 "Copytool re-register after MDS restart" +policy_set_and_test() +{ + local change="$1" + local target="$2" + do_facet $SINGLEMDS $LCTL set_param "$HSM_PARAM.policy=\\\"$change\\\"" + local policy=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.policy) + [[ "$policy" == "$target" ]] || + error "Wrong policy after '$change': '$policy' != '$target'" +} + +test_109() { + # to force default policy setting if error + CDT_POLICY_HAD_CHANGED=true + + local policy=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.policy) + local default="NonBlockingRestore [NoRetryAction]" + [[ "$policy" == "$default" ]] || + error "default policy has changed,"\ + " '$policy' != '$default' update the test" + policy_set_and_test "+NBR" "[NonBlockingRestore] [NoRetryAction]" + policy_set_and_test "+NRA" "[NonBlockingRestore] [NoRetryAction]" + policy_set_and_test "-NBR" "NonBlockingRestore [NoRetryAction]" + policy_set_and_test "-NRA" "NonBlockingRestore NoRetryAction" + policy_set_and_test "NRA NBR" "[NonBlockingRestore] [NoRetryAction]" + # useless bacause we know but safer for futur changes to use real value + local policy=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.policy) + echo "Next set_param must failed" + policy_set_and_test "wrong" "$policy" + + # return to default + echo "Back to default policy" + cdt_set_sanity_policy +} +run_test 109 "Policy display/change" + test_110a() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir - cp /etc/passwd $HSM_ARCHIVE/$tdir/$tfile + mkdir -p $DIR/$tdir + + copy2archive /etc/passwd $tdir/$tfile + local f=$DIR/$tdir/$tfile import_file $tdir/$tfile $f local fid=$(path2fid $f) - cdt_set_no_blocking_restore + cdt_set_non_blocking_restore md5sum $f local st=$? # cleanup wait_request_state $fid RESTORE SUCCEED - cdt_clear_no_blocking_restore + cdt_clear_non_blocking_restore # Test result [[ $st == 1 ]] || @@ -2186,13 +2834,13 @@ test_110b() { wait_request_state $fid ARCHIVE SUCCEED $LFS hsm_release $f - cdt_set_no_blocking_restore + cdt_set_non_blocking_restore md5sum $f local st=$? # cleanup wait_request_state $fid RESTORE SUCCEED - cdt_clear_no_blocking_restore + cdt_clear_non_blocking_restore # Test result [[ $st == 1 ]] || @@ -2207,9 +2855,11 @@ test_111a() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir + copy2archive /etc/passwd $tdir/$tfile + local f=$DIR/$tdir/$tfile - cp /etc/passwd $HSM_ARCHIVE/$tdir/$tfile + import_file $tdir/$tfile $f local fid=$(path2fid $f) @@ -2360,7 +3010,7 @@ test_220() { $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f wait_request_state $fid ARCHIVE SUCCEED - local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1) + local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) changelog_cleanup local target=0x0 @@ -2387,13 +3037,12 @@ test_221() { wait_request_state $fid ARCHIVE CANCELED wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1) + local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) local target=0x7d [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - changelog_cleanup - copytool_cleanup + cleanup } run_test 221 "Changelog for archive canceled" @@ -2401,9 +3050,10 @@ test_222a() { # test needs a running copytool copytool_setup - mkdir -p $DIR/$tdir $HSM_ARCHIVE/$tdir + mkdir -p $DIR/$tdir + copy2archive /etc/passwd $tdir/$tfile + local f=$DIR/$tdir/$tfile - cp /etc/passwd $HSM_ARCHIVE/$tdir/$tfile import_file $tdir/$tfile $f local fid=$(path2fid $f) @@ -2412,13 +3062,12 @@ test_222a() { $LFS hsm_restore $f wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1) + local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) local target=0x80 [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - changelog_cleanup - copytool_cleanup + cleanup } run_test 222a "Changelog for explicit restore" @@ -2439,13 +3088,12 @@ test_222b() { wait_request_state $fid RESTORE SUCCEED - local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1) + local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) local target=0x80 [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - changelog_cleanup - copytool_cleanup + cleanup } run_test 222b "Changelog for implicit restore" @@ -2469,14 +3117,13 @@ test_223a() { wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1) + local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) local target=0xfd [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - changelog_cleanup - copytool_cleanup + cleanup } run_test 223a "Changelog for restore canceled (import case)" @@ -2499,14 +3146,13 @@ test_223b() { wait_request_state $fid RESTORE CANCELED wait_request_state $fid CANCEL SUCCEED - local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1) + local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -1) local target=0xfd [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - changelog_cleanup - copytool_cleanup + cleanup } run_test 223b "Changelog for restore canceled (release case)" @@ -2526,14 +3172,13 @@ test_224() { $LFS hsm_remove $f wait_request_state $fid REMOVE SUCCEED - local flags=$(changelog_get_flags $MDT0 HSM $fid | tail -1) + local flags=$(changelog_get_flags ${MDT[0]} HSM $fid | tail -n 1) local target=0x200 [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - changelog_cleanup - copytool_cleanup + cleanup } run_test 224 "Changelog for remove" @@ -2563,16 +3208,15 @@ test_225() { wait_request_state $fid REMOVE CANCELED wait_request_state $fid CANCEL SUCCEED - flags=$(changelog_get_flags $MDT0 RENME $fid2) - local flags=$($LFS changelog $MDT0 | grep HSM | grep $fid | tail -1 | - awk '{print $5}') + flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) + local flags=$($LFS changelog ${MDT[0]} | grep HSM | grep $fid | + tail -n 1 | awk '{print $5}') local target=0x27d [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - changelog_cleanup - copytool_cleanup + cleanup } run_test 225 "Changelog for remove canceled" @@ -2598,7 +3242,7 @@ test_226() { rm $f1 || error "rm $f1 failed" - local flags=$(changelog_get_flags $MDT0 UNLNK $fid1) + local flags=$(changelog_get_flags ${MDT[0]} UNLNK $fid1) local target=0x3 [[ $flags == $target ]] || @@ -2606,14 +3250,13 @@ test_226() { mv $f3 $f2 || error "mv $f3 $f2 failed" - flags=$(changelog_get_flags $MDT0 RENME $fid2) + flags=$(changelog_get_flags ${MDT[0]} RENME $fid2) target=0x3 [[ $flags == $target ]] || error "Changelog flag is $flags not $target" - changelog_cleanup - copytool_cleanup + cleanup } run_test 226 "changelog for last rm/mv with exiting archive" @@ -2627,7 +3270,7 @@ check_flags_changes() { local target=0x280 $LFS hsm_set --$hsm_flag $f || error "Cannot set $hsm_flag on $f" - local flags=($(changelog_get_flags $MDT0 HSM $fid)) + local flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) local seen=${#flags[*]} cnt=$((fst + cnt)) [[ $seen == $cnt ]] || @@ -2638,7 +3281,7 @@ check_flags_changes() { $LFS hsm_clear --$hsm_flag $f || error "Cannot clear $hsm_flag on $f" - flags=($(changelog_get_flags $MDT0 HSM $fid)) + flags=($(changelog_get_flags ${MDT[0]} HSM $fid)) seen=${#flags[*]} cnt=$(($cnt + 1)) [[ $cnt == $seen ]] || @@ -2670,11 +3313,46 @@ test_227() { wait_request_state $fid ARCHIVE SUCCEED check_flags_changes $f $fid lost 3 1 - changelog_cleanup - copytool_cleanup + cleanup } run_test 227 "changelog when explicit setting of HSM flags" +test_228() { + # test needs a running copytool + copytool_setup + + local fid=$(make_small_sync $DIR/$tfile) + $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile + wait_request_state $fid ARCHIVE SUCCEED + + $LFS hsm_release $DIR/$tfile + check_hsm_flags $DIR/$tfile "0x0000000d" + + filefrag $DIR/$tfile | grep " 1 extent found" || + error "filefrag on released file must return only one extent" + + # only newer versions of cp detect sparse files by stat/FIEMAP + # (LU-2580) + cp --sparse=auto $DIR/$tfile $DIR/$tfile.2 || + error "copying $DIR/$tfile" + cmp $DIR/$tfile $DIR/$tfile.2 || error "comparing copied $DIR/$tfile" + + $LFS hsm_release $DIR/$tfile + check_hsm_flags $DIR/$tfile "0x0000000d" + + mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + + tar cf - --sparse $DIR/$tfile | tar xvf - -C $DIR/$tdir || + error "tar failed" + cmp $DIR/$tfile $DIR/$tdir/$DIR/$tfile || + error "comparing untarred $DIR/$tfile" + + rm -f $DIR/$tfile $DIR/$tfile.2 || + error "rm $DIR/$tfile or $DIR/$tfile.2 failed" + copytool_cleanup +} +run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse" + test_250() { # test needs a running copytool copytool_setup @@ -2699,12 +3377,12 @@ test_250() { while [[ $cnt != 0 || $wt != 0 ]]; do sleep 1 cnt=$(do_facet $SINGLEMDS "$LCTL get_param -n\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep STARTED | grep -v CANCEL | wc -l") [[ $cnt -le $maxrequest ]] || error "$cnt > $maxrequest too many started requests" wt=$(do_facet $SINGLEMDS "$LCTL get_param\ - $HSM_PARAM.agent_actions |\ + $HSM_PARAM.actions |\ grep WAITING | wc -l") echo "max=$maxrequest started=$cnt waiting=$wt" done @@ -2723,8 +3401,8 @@ test_251() { cdt_disable # to have a short test - local old_to=$(get_hsm_param request_timeout) - set_hsm_param request_timeout 4 + local old_to=$(get_hsm_param active_request_timeout) + set_hsm_param active_request_timeout 4 # to be sure the cdt will wake up frequently so # it will be able to cancel the "old" request local old_loop=$(get_hsm_param loop_period) @@ -2736,7 +3414,7 @@ test_251() { sleep 5 wait_request_state $fid ARCHIVE CANCELED - set_hsm_param request_timeout $old_to + set_hsm_param active_request_timeout $old_to set_hsm_param loop_period $old_loop copytool_cleanup @@ -2770,6 +3448,209 @@ test_300() { } run_test 300 "On disk coordinator state kept between MDT umount/mount" +test_301() { + local ai=$(get_hsm_param default_archive_id) + local new=$((ai + 1)) + + set_hsm_param default_archive_id $new -P + fail $SINGLEMDS + local res=$(get_hsm_param default_archive_id) + + # clear value + set_hsm_param default_archive_id "" "-P -d" + + [[ $new == $res ]] || error "Value after MDS restart is $res != $new" +} +run_test 301 "HSM tunnable are persistent" + +test_302() { + local ai=$(get_hsm_param default_archive_id) + local new=$((ai + 1)) + + # stop coordinator + cdt_shutdown + + set_hsm_param default_archive_id $new -P + + local mdtno + for mdtno in $(seq 1 $MDSCOUNT); do + fail mds${mdtno} + done + + # check cdt is on + cdt_check_state enabled + + local res=$(get_hsm_param default_archive_id) + + # clear value + set_hsm_param default_archive_id "" "-P -d" + + [[ $new == $res ]] || error "Value after MDS restart is $res != $new" +} +run_test 302 "HSM tunnable are persistent when CDT is off" + +test_400() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + copytool_setup + + mkdir -p $DIR/$tdir + + local dir_mdt0=$DIR/$tdir/mdt0 + local dir_mdt1=$DIR/$tdir/mdt1 + + # create 1 dir per MDT + $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" + + # create 1 file in each MDT + local fid1=$(make_small $dir_mdt0/$tfile) + local fid2=$(make_small $dir_mdt1/$tfile) + + # check that hsm request on mdt0 is sent to the right MDS + $LFS hsm_archive $dir_mdt0/$tfile || error "lfs hsm_archive" + wait_request_state $fid1 ARCHIVE SUCCEED 0 && + echo "archive successful on mdt0" + + # check that hsm request on mdt1 is sent to the right MDS + $LFS hsm_archive $dir_mdt1/$tfile || error "lfs hsm_archive" + wait_request_state $fid2 ARCHIVE SUCCEED 1 && + echo "archive successful on mdt1" + + copytool_cleanup + # clean test files and directories + rm -rf $dir_mdt0 $dir_mdt1 +} +run_test 400 "Single request is sent to the right MDT" + +test_401() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + copytool_setup + + mkdir -p $DIR/$tdir + + local dir_mdt0=$DIR/$tdir/mdt0 + local dir_mdt1=$DIR/$tdir/mdt1 + + # create 1 dir per MDT + $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir" + + # create 1 file in each MDT + local fid1=$(make_small $dir_mdt0/$tfile) + local fid2=$(make_small $dir_mdt1/$tfile) + + # check that compound requests are shunt to the rights MDTs + $LFS hsm_archive $dir_mdt0/$tfile $dir_mdt1/$tfile || + error "lfs hsm_archive" + wait_request_state $fid1 ARCHIVE SUCCEED 0 && + echo "archive successful on mdt0" + wait_request_state $fid2 ARCHIVE SUCCEED 1 && + echo "archive successful on mdt1" + + copytool_cleanup + # clean test files and directories + rm -rf $dir_mdt0 $dir_mdt1 +} +run_test 401 "Compound requests split and sent to their respective MDTs" + +mdc_change_state() # facet, MDT_pattern, activate|deactivate +{ + local facet=$1 + local pattern="$2" + local state=$3 + local node=$(facet_active_host $facet) + local mdc + for mdc in $(do_facet $facet "$LCTL dl | grep -E ${pattern}-mdc" | + awk '{print $4}'); do + echo "$3 $mdc on $node" + do_facet $facet "$LCTL --device $mdc $state" || return 1 + done +} + +test_402() { + # make sure there is no running copytool + copytool_cleanup + + # deactivate all mdc on agent1 + mdc_change_state $SINGLEAGT "MDT000." "deactivate" + + copytool_setup $SINGLEAGT + + check_agent_unregistered "uuid" # match any agent + + # no expected running copytool + search_copytools $agent && error "Copytool start should have failed" + + # reactivate MDCs + mdc_change_state $SINGLEAGT "MDT000." "activate" +} +run_test 402 "Copytool start fails if all MDTs are inactive" + +test_403() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + # make sure there is no running copytool + copytool_cleanup + + local agent=$(facet_active_host $SINGLEAGT) + local uuid=$(do_rpc_nodes $agent get_client_uuid | cut -d' ' -f2) + + # deactivate all mdc for MDT0001 + mdc_change_state $SINGLEAGT "MDT0001" "deactivate" + + copytool_setup + # check the agent is registered on MDT0000, and not on MDT0001 + check_agent_registered_by_mdt $uuid 0 + check_agent_unregistered_by_mdt $uuid 1 + + # check running copytool process + search_copytools $agent || error "No running copytools on $agent" + + # reactivate all mdc for MDT0001 + mdc_change_state $SINGLEAGT "MDT0001" "activate" + + # make sure the copytool is now registered to all MDTs + check_agent_registered $uuid + + copytool_cleanup +} +run_test 403 "Copytool starts with inactive MDT and register on reconnect" + +test_404() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + copytool_setup + + # create files on both MDT0000 and MDT0001 + mkdir -p $DIR/$tdir + + local dir_mdt0=$DIR/$tdir/mdt0 + $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir" + + # create 1 file on mdt0 + local fid1=$(make_small $dir_mdt0/$tfile) + + # deactivate all mdc for MDT0001 + mdc_change_state $SINGLEAGT "MDT0001" "deactivate" + + # send an HSM request for files in MDT0000 + $LFS hsm_archive $dir_mdt0/$tfile || error "lfs hsm_archive" + + # check for completion of files in MDT0000 + wait_request_state $fid1 ARCHIVE SUCCEED 0 && + echo "archive successful on mdt0" + + # reactivate all mdc for MDT0001 + mdc_change_state $SINGLEAGT "MDT0001" "activate" + + copytool_cleanup + # clean test files and directories + rm -rf $dir_mdt0 +} +run_test 404 "Inactive MDT does not block requests for active MDTs" + copytool_cleanup complete $SECONDS