# archive is purged at copytool setup
HSM_ARCHIVE_PURGE=true
+
+ # Don't allow copytool error upon start/setup
+ HSMTOOL_NOERROR=false
}
# Get the backend root path for the given agent facet.
local idx=$(($mdtno - 1))
MDT[$idx]=$($LCTL get_param -n \
mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid |
- awk '{gsub(/_UUID/,""); print $1}' | head -1)
+ awk '{gsub(/_UUID/,""); print $1}' | head -n1)
done
}
cmd="cat $test_dir/fifo > $test_dir/events &"
cmd+=" echo \\\$! > $test_dir/monitor_pid"
- # This is required for pdsh -Rmrsh and its handling of remote shells.
- # Regular ssh and pdsh -Rssh work fine without this backgrounded
- # subshell nonsense.
- (do_node $agent "$cmd") &
- export HSMTOOL_MONITOR_PDSH=$!
+ if [[ $PDSH == *Rmrsh* ]]; then
+ # This is required for pdsh -Rmrsh and its handling of remote
+ # shells.
+ # Regular ssh and pdsh -Rssh work fine without this
+ # backgrounded subshell nonsense.
+ (do_node $agent "$cmd") &
+ export HSMTOOL_MONITOR_PDSH=$!
- # Slightly racy, but just making a best-effort to catch obvious
- # problems. If we get rid of the ridiculous backgrounded subshell,
- # this check will need to be updated to just look at the returncode
- # of do_node.
- sleep 1
- ps -p $HSMTOOL_MONITOR_PDSH >&- ||
- error "Failed to start copytool monitor on $agent"
+ # Slightly racy, but just making a best-effort to catch obvious
+ # problems.
+ sleep 1
+ ps -p $HSMTOOL_MONITOR_PDSH >&- ||
+ error "Failed to start copytool monitor on $agent"
+ else
+ do_node $agent "$cmd"
+ if [ $? != 0 ]; then
+ error "Failed to start copytool monitor on $agent"
+ fi
+ fi
}
copytool_monitor_cleanup() {
[[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME
local copytool_log=$prefix.copytool${arc_id}_log.$agent.log
- do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1" ||
- error "start copytool $facet on $agent failed"
+ do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1"
+ if [[ $? != 0 ]]; then
+ [[ $HSMTOOL_NOERROR == true ]] ||
+ error "start copytool $facet on $agent failed"
+ echo "start copytool $facet on $agent failed"
+ fi
+
trap cleanup EXIT
}
copytool_cleanup() {
trap - EXIT
- local agents=${1:-$(facet_active_host $SINGLEAGT)}
+ local facet=$SINGLEAGT
+ local agents=${1:-$(facet_active_host $facet)}
local mdtno
local idx
local oldstate
local mdt_hsmctrl
+ local hsm_root=$(copytool_device $facet)
do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0
sleep 1
"$oldstate" 20 ||
error "mds${mdtno} cdt state is not $oldstate"
done
+ do_facet $facet "rm -rf $hsm_root"
}
copytool_suspend() {
make_large_for_striping() {
local file2=${1/$DIR/$DIR2}
- local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -1)
+ local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1)
cleanup_large_files
wait_all_done() {
local timeout=$1
+ local fid=$2
local cmd="$LCTL get_param -n $HSM_PARAM.actions"
+ [[ -n $fid ]] && cmd+=" | grep '$fid'"
cmd+=" | egrep 'WAITING|STARTED'"
wait_result $SINGLEMDS "$cmd" "" $timeout ||
}
run_test 10d "Archive a file on the default archive id"
-test_11() {
+test_11a() {
mkdir -p $DIR/$tdir
copy2archive /etc/hosts $tdir/$tfile
local f=$DIR/$tdir/$tfile
local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) ||
error "fid $fid not in archive $HSM_ARCHIVE"
}
-run_test 11 "Import a file"
+run_test 11a "Import a file"
+
+test_11b() {
+ # test needs a running copytool
+ copytool_setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(copy_file /etc/hosts $f)
+ $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f ||
+ error "hsm_archive failed"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ local FILE_HASH=$(md5sum $f)
+ rm -f $f
+
+ import_file $fid $f
+
+ echo "$FILE_HASH" | md5sum -c
+
+ [[ $? -eq 0 ]] || error "Restored file differs"
+
+ copytool_cleanup
+}
+run_test 11b "Import a deleted file using its FID"
test_12a() {
# test needs a running copytool
local f=$DIR/$tdir/$tfile
import_file $tdir/$tfile $f
- local f=$DIR2/$tdir/$tfile
+ local f2=$DIR2/$tdir/$tfile
echo "Verifying released state: "
- check_hsm_flags $f "0x0000000d"
+ check_hsm_flags $f2 "0x0000000d"
- local fid=$(path2fid $f)
- $LFS hsm_restore $f
+ local fid=$(path2fid $f2)
+ $LFS hsm_restore $f2
wait_request_state $fid RESTORE SUCCEED
echo "Verifying file state: "
- check_hsm_flags $f "0x00000009"
+ check_hsm_flags $f2 "0x00000009"
do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+ # to be sure wait_all_done will not be mislead by previous tests
+ # and ops.
+ cdt_purge
+ wait_for_grace_delay
+ # Also raise grace_delay significantly so the Canceled
+ # Restore action will stay enough long avail.
+ local old_grace=$(get_hsm_param grace_delay)
+ set_hsm_param grace_delay 100
+
md5sum $f >/dev/null &
local pid=$!
wait_request_state $fid RESTORE STARTED
$LFS hsm_cancel $f
- wait_request_state $fid RESTORE CANCELED
- wait_request_state $fid CANCEL SUCCEED
+ # instead of waiting+checking both Restore and Cancel ops
+ # sequentially, wait for both to be finished and then check
+ # each results.
+ wait_all_done 100 $fid
+ local rstate=$(get_request_state $fid RESTORE)
+ local cstate=$(get_request_state $fid CANCEL)
+
+ # restore orig grace_delay.
+ set_hsm_param grace_delay $old_grace
+
+ if [[ "$rstate" == "CANCELED" ]] ; then
+ [[ "$cstate" == "SUCCEED" ]] ||
+ error "Restore state is CANCELED and Cancel state " \
+ "is not SUCCEED but $cstate"
+ echo "Restore state is CANCELED, Cancel state is SUCCEED"
+ elif [[ "$rstate" == "SUCCEED" ]] ; then
+ [[ "$cstate" == "FAILED" ]] ||
+ error "Restore state is SUCCEED and Cancel state " \
+ "is not FAILED but $cstate"
+ echo "Restore state is SUCCEED, Cancel state is FAILED"
+ else
+ error "Restore state is $rstate and Cancel state is $cstate"
+ fi
[ -z $killed ] ||
error "Cannot kill process waiting for restore ($killed)"
test_60() {
# This test validates the fix for LU-4512. Ensure that the -u
- # option changes the progress reporting interval from the default
- # (30 seconds) to the user-specified interval.
+ # option changes the progress reporting interval from the
+ # default (30 seconds) to the user-specified interval.
local interval=5
- local progress_timeout=$((interval * 3))
+ local progress_timeout=$((interval * 4))
# test needs a new running copytool
copytool_cleanup
local f=$DIR/$tdir/$tfile
local fid=$(make_large_for_progress $f)
- local start_at=$(date +%s)
- $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
- error "could not archive file"
-
local mdtidx=0
local mdt=${MDT_PREFIX}${mdtidx}
local mds=mds$((mdtidx + 1))
+ # Wait for copytool to register
+ wait_update_facet $mds \
+ "$LCTL get_param -n ${mdt}.hsm.agents | grep -o ^uuid" \
+ uuid 100 || error "coyptool failed to register with $mdt"
+
+ local start_at=$(date +%s)
+ $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
+ error "could not archive file"
+
local cmd="$LCTL get_param -n ${mdt}.hsm.active_requests"
cmd+=" | awk '/'$fid'.*action=ARCHIVE/ {print \\\$12}' | cut -f2 -d="
copytool_cleanup
# deactivate all mdc on agent1
- mdc_change_state $SINGLEAGT "MDT000." "deactivate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "deactivate"
- copytool_setup $SINGLEAGT
+ HSMTOOL_NOERROR=true copytool_setup $SINGLEAGT
check_agent_unregistered "uuid" # match any agent
search_copytools $agent && error "Copytool start should have failed"
# reactivate MDCs
- mdc_change_state $SINGLEAGT "MDT000." "activate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "activate"
}
run_test 402 "Copytool start fails if all MDTs are inactive"
local uuid=$(do_rpc_nodes $agent get_client_uuid | cut -d' ' -f2)
# deactivate all mdc for MDT0001
- mdc_change_state $SINGLEAGT "MDT0001" "deactivate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate"
copytool_setup
# check the agent is registered on MDT0000, and not on MDT0001
search_copytools $agent || error "No running copytools on $agent"
# reactivate all mdc for MDT0001
- mdc_change_state $SINGLEAGT "MDT0001" "activate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "activate"
# make sure the copytool is now registered to all MDTs
check_agent_registered $uuid
local fid1=$(make_small $dir_mdt0/$tfile)
# deactivate all mdc for MDT0001
- mdc_change_state $SINGLEAGT "MDT0001" "deactivate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate"
# send an HSM request for files in MDT0000
$LFS hsm_archive $dir_mdt0/$tfile || error "lfs hsm_archive"
echo "archive successful on mdt0"
# reactivate all mdc for MDT0001
- mdc_change_state $SINGLEAGT "MDT0001" "activate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "activate"
copytool_cleanup
# clean test files and directories