export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin
ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 3815 3939
-ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36 40"
+# bug number for skipped test: 3815
+ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36"
# bug number for skipped test:4178 4176
ALWAYS_EXCEPT="$ALWAYS_EXCEPT 200 221 223b 31a"
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
+ export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
HSM_ARCHIVE=$(copytool_device $SINGLEAGT)
HSM_ARCHIVE_NUMBER=2
done
}
+search_copytools() {
+ local agents=${1:-$(facet_active_host $SINGLEAGT)}
+ do_nodesv $agents "pgrep -x $HSMTOOL_BASE"
+}
+
search_and_kill_copytool() {
local agents=${1:-$(facet_active_host $SINGLEAGT)}
local facet=${1:-$SINGLEAGT}
local lustre_mntpnt=${2:-$MOUNT}
local arc_id=$3
- local hsm_root=$(copytool_device $facet)
+ local hsm_root=${4:-$(copytool_device $facet)}
local agent=$(facet_active_host $facet)
if [[ -z "$arc_id" ]] &&
# independent of hardware
local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root $hsm_root"
[[ -z "$arc_id" ]] || cmd+=" --archive $arc_id"
+ [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] ||
+ cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL"
cmd+=" --bandwidth 1 $lustre_mntpnt"
# Redirect the standard output and error to a log file which
path2fid $1 || error "cannot get fid on $1"
}
+make_small_sync() {
+ dd if=/dev/urandom of=$1 count=1 bs=1M conv=sync ||
+ error "cannot create $1"
+ path2fid $1 || error "cannot get fid on $1"
+}
+
cleanup_large_files() {
local ratio=$(df -P $MOUNT | tail -1 | awk '{print $5}' |
sed 's/%//g')
local fid=$1
local request=$2
local state=$3
+ # 4th arg (mdt index) is optional
+ local mdtidx=${4:-0}
+ local mds=mds$(($mdtidx + 1))
- local cmd="$LCTL get_param -n $HSM_PARAM.actions"
+ local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.actions"
cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
- wait_result $SINGLEMDS "$cmd" $state 100 ||
- error "request on $fid is not $state"
+ wait_result $mds "$cmd" $state 100 ||
+ error "request on $fid is not $state on $mds"
+}
+
+wait_request_progress() {
+ local fid=$1
+ local request=$2
+ local progress=$3
+ # 4th arg (mdt index) is optional
+ local mdtidx=${4:-0}
+ local mds=mds$((mdtidx + 1))
+
+ local cmd="$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.active_requests"
+ cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$12}' | cut -f2 -d="
+
+ wait_result $mds "$cmd" $progress 100 ||
+ error "request on $fid has not made progress $progress on $mds"
}
get_request_state() {
}
run_test 12n "Import/implicit restore/release"
+test_12o() {
+ # test needs a running copytool
+ copytool_setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(copy_file /etc/hosts $f)
+
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f
+ wait_request_state $fid ARCHIVE SUCCEED
+ $LFS hsm_release $f || error "release of $f failed"
+
+#define OBD_FAIL_MDS_HSM_SWAP_LAYOUTS 0x152
+ do_facet $SINGLEMDS lctl set_param fail_loc=0x152
+
+ # set no retry action mode
+ cdt_set_no_retry
+
+ diff -q /etc/hosts $f
+ local st=$?
+
+ # we check we had a restore failure
+ wait_request_state $fid RESTORE FAILED
+
+ [[ $st -eq 0 ]] && error "Restore must fail"
+
+ # remove no retry action mode
+ cdt_clear_no_retry
+
+ # check file is still released
+ check_hsm_flags $f "0x0000000d"
+
+ # retry w/o failure injection
+ do_facet $SINGLEMDS lctl set_param fail_loc=0
+
+ diff -q /etc/hosts $f
+ st=$?
+
+ # we check we had a restore done
+ wait_request_state $fid RESTORE SUCCEED
+
+ [[ $st -eq 0 ]] || error "Restored file differs"
+
+ copytool_cleanup
+}
+run_test 12o "Layout-swap failure during Restore leaves file released"
+
test_13() {
# test needs a running copytool
copytool_setup
local fid=$(make_small $f)
check_hsm_flags $f "0x00000000"
+ # LU-4388/LU-4389 - ZFS does not report full number of blocks
+ # used until file is flushed to disk
+ if [ $(facet_fstype ost1) == "zfs" ]; then
+ # this causes an OST_SYNC rpc to be sent
+ dd if=/dev/zero of=$f bs=512 count=1 oflag=sync conv=notrunc,fsync
+ # clear locks to reread file data
+ cancel_lru_locks osc
+ fi
+
+ local orig_size=$(stat -c "%s" $f)
+ local orig_blocks=$(stat -c "%b" $f)
+
+ start_full_debug_logging
+
$LFS hsm_archive $f || error "could not archive file"
wait_request_state $fid ARCHIVE SUCCEED
- [ $(stat -c "%b" $f) -ne "1" ] || error "wrong block number"
- local sz=$(stat -c "%s" $f)
- [ $sz -ne "0" ] || error "file size should not be zero"
+ local blocks=$(stat -c "%b" $f)
+ [ $blocks -eq $orig_blocks ] ||
+ error "$f: wrong block number after archive: " \
+ "$blocks != $orig_blocks"
+ local size=$(stat -c "%s" $f)
+ [ $size -eq $orig_size ] ||
+ error "$f: wrong size after archive: $size != $orig_size"
# Release and check states
$LFS hsm_release $f || error "could not release file"
check_hsm_flags $f "0x0000000d"
- [ $(stat -c "%b" $f) -eq "1" ] || error "wrong block number"
- [ $(stat -c "%s" $f) -eq $sz ] || error "wrong file size"
+ blocks=$(stat -c "%b" $f)
+ [ $blocks -gt 5 ] &&
+ error "$f: too many blocks after release: $blocks > 5"
+ size=$(stat -c "%s" $f)
+ [ $size -ne $orig_size ] &&
+ error "$f: wrong size after release: $size != $orig_size"
# Check we can release an file without stripe info
f=$f.nolov
$LFS hsm_release $f || fail "second release should succeed"
check_hsm_flags $f "0x0000000d"
+ stop_full_debug_logging
+
copytool_cleanup
}
run_test 21 "Simple release tests"
fid=$(copy_file /etc/hosts $f.$p.$i)
done
done
- copytool_setup
+ # force copytool to use a local/temp archive dir to ensure best
+ # performance vs remote/NFS mounts used in auto-tests
+ if df --local $HSM_ARCHIVE >/dev/null 2>&1 ; then
+ copytool_setup
+ else
+ copytool_setup $SINGLEAGT $MOUNT $HSM_ARCHIVE_NUMBER $TMP/$tdir
+ fi
# to be sure wait_all_done will not be mislead by previous tests
cdt_purge
wait_for_grace_delay
}
run_test 58 "Truncate a released file will trigger restore"
+test_60() {
+ local interval=5
+ local progress_timeout=$((interval * 2))
+
+ # test needs a new running copytool
+ copytool_cleanup
+ HSMTOOL_UPDATE_INTERVAL=$interval copytool_setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(make_large_for_progress $f)
+
+ local start_at=$(date +%s)
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
+ error "could not archive file"
+ wait_request_progress $fid ARCHIVE 5242880
+ local finish_at=$(date +%s)
+ local elapsed=$((finish_at - start_at))
+
+ if [ $elapsed -gt $progress_timeout ]; then
+ error "Expected progress update within $progress_timeout seconds"
+ elif [ $elapsed -lt $interval ]; then
+ error "Expected progress update after at least $interval seconds"
+ fi
+
+ cdt_clear_no_retry
+ copytool_cleanup
+}
+run_test 60 "Changing progress update interval from default"
+
test_90() {
file_count=57
mkdir -p $DIR/$tdir
}
run_test 105 "Restart of coordinator"
-test_106() {
- # test needs a running copytool
- copytool_setup
+get_agent_by_uuid_mdt() {
+ local uuid=$1
+ local mdtidx=$2
+ local mds=mds$(($mdtidx + 1))
+ do_facet $mds "$LCTL get_param -n ${MDT_PREFIX}${mdtidx}.hsm.agents |\
+ grep $uuid"
+}
+
+check_agent_registered_by_mdt() {
+ local uuid=$1
+ local mdtidx=$2
+ local mds=mds$(($mdtidx + 1))
+ local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx)
+ if [[ ! -z "$agent" ]]; then
+ echo "found agent $agent on $mds"
+ else
+ error "uuid $uuid not found in agent list on $mds"
+ fi
+}
+
+check_agent_unregistered_by_mdt() {
+ local uuid=$1
+ local mdtidx=$2
+ local mds=mds$(($mdtidx + 1))
+ local agent=$(get_agent_by_uuid_mdt $uuid $mdtidx)
+ if [[ -z "$agent" ]]; then
+ echo "uuid not found in agent list on $mds"
+ else
+ error "uuid found in agent list on $mds: $agent"
+ fi
+}
+
+check_agent_registered() {
+ local uuid=$1
+ local mdsno
+ for mdsno in $(seq 1 $MDSCOUNT); do
+ check_agent_registered_by_mdt $uuid $((mdsno - 1))
+ done
+}
+
+check_agent_unregistered() {
+ local uuid=$1
+ local mdsno
+ for mdsno in $(seq 1 $MDSCOUNT); do
+ check_agent_unregistered_by_mdt $uuid $((mdsno - 1))
+ done
+}
+test_106() {
local uuid=$(do_rpc_nodes $(facet_active_host $SINGLEAGT) \
get_client_uuid $MOUNT | cut -d' ' -f2)
- local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents |
- grep $uuid)
+
+ copytool_setup
+ check_agent_registered $uuid
+
+ search_copytools || error "No copytool found"
+
copytool_cleanup
- [[ ! -z "$agent" ]] || error "My uuid $uuid not found in agent list"
- local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents |
- grep $uuid)
- [[ -z "$agent" ]] ||
- error "My uuid $uuid still found in agent list,"\
- " after copytool shutdown"
+ check_agent_unregistered $uuid
+
copytool_setup
- local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents |
- grep $uuid)
+ check_agent_registered $uuid
+
copytool_cleanup
- [[ ! -z "$agent" ]] ||
- error "My uuid $uuid not found in agent list after"\
- " copytool restart"
}
run_test 106 "Copytool register/unregister"
# test needs a running copytool
copytool_setup
- dd if=/dev/urandom of=$DIR/$tfile bs=1M count=1 conv=sync ||
- error "creating $DIR/$tfile"
+ local fid=$(make_small_sync $DIR/$tfile)
$LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $DIR/$tfile
- wait_request_state $(path2fid $DIR/$tfile) ARCHIVE SUCCEED
+ wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $DIR/$tfile
check_hsm_flags $DIR/$tfile "0x0000000d"
$LFS hsm_release $DIR/$tfile
check_hsm_flags $DIR/$tfile "0x0000000d"
- mkdir $DIR/$tdir
+ mkdir -p $DIR/$tdir || error "mkdir $tdir failed"
tar cf - --sparse $DIR/$tfile | tar xvf - -C $DIR/$tdir ||
error "tar failed"
cmp $DIR/$tfile $DIR/$tdir/$DIR/$tfile ||
error "comparing untarred $DIR/$tfile"
+ rm -f $DIR/$tfile $DIR/$tfile.2 ||
+ error "rm $DIR/$tfile or $DIR/$tfile.2 failed"
copytool_cleanup
}
run_test 228 "On released file, return extend to FIEMAP. For [cp,tar] --sparse"
cdt_shutdown
set_hsm_param default_archive_id $new -P
- fail $SINGLEMDS
+
+ local mdtno
+ for mdtno in $(seq 1 $MDSCOUNT); do
+ fail mds${mdtno}
+ done
# check cdt is on
cdt_check_state enabled
}
run_test 302 "HSM tunnable are persistent when CDT is off"
+test_400() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+
+ copytool_setup
+
+ mkdir -p $DIR/$tdir
+
+ local dir_mdt0=$DIR/$tdir/mdt0
+ local dir_mdt1=$DIR/$tdir/mdt1
+
+ # create 1 dir per MDT
+ $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir"
+ $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir"
+
+ # create 1 file in each MDT
+ local fid1=$(make_small $dir_mdt0/$tfile)
+ local fid2=$(make_small $dir_mdt1/$tfile)
+
+ # check that hsm request on mdt0 is sent to the right MDS
+ $LFS hsm_archive $dir_mdt0/$tfile || error "lfs hsm_archive"
+ wait_request_state $fid1 ARCHIVE SUCCEED 0 &&
+ echo "archive successful on mdt0"
+
+ # check that hsm request on mdt1 is sent to the right MDS
+ $LFS hsm_archive $dir_mdt1/$tfile || error "lfs hsm_archive"
+ wait_request_state $fid2 ARCHIVE SUCCEED 1 &&
+ echo "archive successful on mdt1"
+
+ copytool_cleanup
+ # clean test files and directories
+ rm -rf $dir_mdt0 $dir_mdt1
+}
+run_test 400 "Single request is sent to the right MDT"
+
+test_401() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+
+ copytool_setup
+
+ mkdir -p $DIR/$tdir
+
+ local dir_mdt0=$DIR/$tdir/mdt0
+ local dir_mdt1=$DIR/$tdir/mdt1
+
+ # create 1 dir per MDT
+ $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir"
+ $LFS mkdir -i 1 $dir_mdt1 || error "lfs mkdir"
+
+ # create 1 file in each MDT
+ local fid1=$(make_small $dir_mdt0/$tfile)
+ local fid2=$(make_small $dir_mdt1/$tfile)
+
+ # check that compound requests are shunt to the rights MDTs
+ $LFS hsm_archive $dir_mdt0/$tfile $dir_mdt1/$tfile ||
+ error "lfs hsm_archive"
+ wait_request_state $fid1 ARCHIVE SUCCEED 0 &&
+ echo "archive successful on mdt0"
+ wait_request_state $fid2 ARCHIVE SUCCEED 1 &&
+ echo "archive successful on mdt1"
+
+ copytool_cleanup
+ # clean test files and directories
+ rm -rf $dir_mdt0 $dir_mdt1
+}
+run_test 401 "Compound requests split and sent to their respective MDTs"
+
+mdc_change_state() # facet, MDT_pattern, activate|deactivate
+{
+ local facet=$1
+ local pattern="$2"
+ local state=$3
+ local node=$(facet_active_host $facet)
+ local mdc
+ for mdc in $(do_facet $facet "$LCTL dl | grep -E ${pattern}-mdc" |
+ awk '{print $4}'); do
+ echo "$3 $mdc on $node"
+ do_facet $facet "$LCTL --device $mdc $state" || return 1
+ done
+}
+
+test_402() {
+ # make sure there is no running copytool
+ copytool_cleanup
+
+ # deactivate all mdc on agent1
+ mdc_change_state $SINGLEAGT "MDT000." "deactivate"
+
+ copytool_setup $SINGLEAGT
+
+ check_agent_unregistered "uuid" # match any agent
+
+ # no expected running copytool
+ search_copytools $agent && error "Copytool start should have failed"
+
+ # reactivate MDCs
+ mdc_change_state $SINGLEAGT "MDT000." "activate"
+}
+run_test 402 "Copytool start fails if all MDTs are inactive"
+
+test_403() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+
+ # make sure there is no running copytool
+ copytool_cleanup
+
+ local agent=$(facet_active_host $SINGLEAGT)
+ local uuid=$(do_rpc_nodes $agent get_client_uuid | cut -d' ' -f2)
+
+ # deactivate all mdc for MDT0001
+ mdc_change_state $SINGLEAGT "MDT0001" "deactivate"
+
+ copytool_setup
+ # check the agent is registered on MDT0000, and not on MDT0001
+ check_agent_registered_by_mdt $uuid 0
+ check_agent_unregistered_by_mdt $uuid 1
+
+ # check running copytool process
+ search_copytools $agent || error "No running copytools on $agent"
+
+ # reactivate all mdc for MDT0001
+ mdc_change_state $SINGLEAGT "MDT0001" "activate"
+
+ # make sure the copytool is now registered to all MDTs
+ check_agent_registered $uuid
+
+ copytool_cleanup
+}
+run_test 403 "Copytool starts with inactive MDT and register on reconnect"
+
+test_404() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+
+ copytool_setup
+
+ # create files on both MDT0000 and MDT0001
+ mkdir -p $DIR/$tdir
+
+ local dir_mdt0=$DIR/$tdir/mdt0
+ $LFS mkdir -i 0 $dir_mdt0 || error "lfs mkdir"
+
+ # create 1 file on mdt0
+ local fid1=$(make_small $dir_mdt0/$tfile)
+
+ # deactivate all mdc for MDT0001
+ mdc_change_state $SINGLEAGT "MDT0001" "deactivate"
+
+ # send an HSM request for files in MDT0000
+ $LFS hsm_archive $dir_mdt0/$tfile || error "lfs hsm_archive"
+
+ # check for completion of files in MDT0000
+ wait_request_state $fid1 ARCHIVE SUCCEED 0 &&
+ echo "archive successful on mdt0"
+
+ # reactivate all mdc for MDT0001
+ mdc_change_state $SINGLEAGT "MDT0001" "activate"
+
+ copytool_cleanup
+ # clean test files and directories
+ rm -rf $dir_mdt0
+}
+run_test 404 "Inactive MDT does not block requests for active MDTs"
+
copytool_cleanup
complete $SECONDS