export PATH=$PWD/$SRCDIR:$SRCDIR:$PWD/$SRCDIR/utils:$PATH:/sbin:/usr/sbin
ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 3815
+# bug number for skipped test: LU-3815
ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT 34 35 36"
-# bug number for skipped test:4178 4176
+# bug number for skipped test:LU-5474
+ALWAYS_EXCEPT="$ALWAYS_EXCEPT 90"
+# bug number for skipped test:LU-4178 LU-4176
ALWAYS_EXCEPT="$ALWAYS_EXCEPT 200 221 223b 31a"
+# bug number for skipped test:LU-3852
+ALWAYS_EXCEPT="$ALWAYS_EXCEPT 251"
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
MULTIOP=${MULTIOP:-multiop}
OPENFILE=${OPENFILE:-openfile}
-MCREATE=${MCREATE:-mcreate}
MOUNT_2=${MOUNT_2:-"yes"}
FAIL_ON_ERROR=false
# archive is purged at copytool setup
HSM_ARCHIVE_PURGE=true
+
+ # Don't allow copytool error upon start/setup
+ HSMTOOL_NOERROR=false
}
# Get the backend root path for the given agent facet.
local idx=$(($mdtno - 1))
MDT[$idx]=$($LCTL get_param -n \
mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid |
- awk '{gsub(/_UUID/,""); print $1}' | head -1)
+ awk '{gsub(/_UUID/,""); print $1}' | head -n1)
done
}
cmd="cat $test_dir/fifo > $test_dir/events &"
cmd+=" echo \\\$! > $test_dir/monitor_pid"
- # This is required for pdsh -Rmrsh and its handling of remote shells.
- # Regular ssh and pdsh -Rssh work fine without this backgrounded
- # subshell nonsense.
- (do_node $agent "$cmd") &
- export HSMTOOL_MONITOR_PDSH=$!
+ if [[ $PDSH == *Rmrsh* ]]; then
+ # This is required for pdsh -Rmrsh and its handling of remote
+ # shells.
+ # Regular ssh and pdsh -Rssh work fine without this
+ # backgrounded subshell nonsense.
+ (do_node $agent "$cmd") &
+ export HSMTOOL_MONITOR_PDSH=$!
- # Slightly racy, but just making a best-effort to catch obvious
- # problems. If we get rid of the ridiculous backgrounded subshell,
- # this check will need to be updated to just look at the returncode
- # of do_node.
- sleep 1
- ps -p $HSMTOOL_MONITOR_PDSH >&- ||
- error "Failed to start copytool monitor on $agent"
+ # Slightly racy, but just making a best-effort to catch obvious
+ # problems.
+ sleep 1
+ ps -p $HSMTOOL_MONITOR_PDSH >&- ||
+ error "Failed to start copytool monitor on $agent"
+ else
+ do_node $agent "$cmd"
+ if [ $? != 0 ]; then
+ error "Failed to start copytool monitor on $agent"
+ fi
+ fi
}
copytool_monitor_cleanup() {
[[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME
local copytool_log=$prefix.copytool${arc_id}_log.$agent.log
- do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1" ||
- error "start copytool $facet on $agent failed"
+ do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1"
+ if [[ $? != 0 ]]; then
+ [[ $HSMTOOL_NOERROR == true ]] ||
+ error "start copytool $facet on $agent failed"
+ echo "start copytool $facet on $agent failed"
+ fi
+
trap cleanup EXIT
}
copytool_cleanup() {
trap - EXIT
- local agents=${1:-$(facet_active_host $SINGLEAGT)}
+ local facet=$SINGLEAGT
+ local agents=${1:-$(facet_active_host $facet)}
local mdtno
local idx
local oldstate
local mdt_hsmctrl
+ local hsm_root=$(copytool_device $facet)
do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0
sleep 1
"$oldstate" 20 ||
error "mds${mdtno} cdt state is not $oldstate"
done
+ do_facet $facet "rm -rf $hsm_root"
}
copytool_suspend() {
make_large_for_striping() {
local file2=${1/$DIR/$DIR2}
- local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -1)
+ local sz=$($LCTL get_param -n lov.*-clilov-*.stripesize | head -n1)
cleanup_large_files
wait_all_done() {
local timeout=$1
+ local fid=$2
local cmd="$LCTL get_param -n $HSM_PARAM.actions"
+ [[ -n $fid ]] && cmd+=" | grep '$fid'"
cmd+=" | egrep 'WAITING|STARTED'"
wait_result $SINGLEMDS "$cmd" "" $timeout ||
}
run_test 10d "Archive a file on the default archive id"
-test_11() {
+test_11a() {
mkdir -p $DIR/$tdir
copy2archive /etc/hosts $tdir/$tfile
local f=$DIR/$tdir/$tfile
local AFILE=$(do_facet $SINGLEAGT ls $HSM_ARCHIVE'/*/*/*/*/*/*/'$fid) ||
error "fid $fid not in archive $HSM_ARCHIVE"
}
-run_test 11 "Import a file"
+run_test 11a "Import a file"
+
+test_11b() {
+ # test needs a running copytool
+ copytool_setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(copy_file /etc/hosts $f)
+ $LFS hsm_archive -a $HSM_ARCHIVE_NUMBER $f ||
+ error "hsm_archive failed"
+ wait_request_state $fid ARCHIVE SUCCEED
+
+ local FILE_HASH=$(md5sum $f)
+ rm -f $f
+
+ import_file $fid $f
+
+ echo "$FILE_HASH" | md5sum -c
+
+ [[ $? -eq 0 ]] || error "Restored file differs"
+
+ copytool_cleanup
+}
+run_test 11b "Import a deleted file using its FID"
test_12a() {
# test needs a running copytool
local f=$DIR/$tdir/$tfile
import_file $tdir/$tfile $f
- local f=$DIR2/$tdir/$tfile
+ local f2=$DIR2/$tdir/$tfile
echo "Verifying released state: "
- check_hsm_flags $f "0x0000000d"
+ check_hsm_flags $f2 "0x0000000d"
- local fid=$(path2fid $f)
- $LFS hsm_restore $f
+ local fid=$(path2fid $f2)
+ $LFS hsm_restore $f2
wait_request_state $fid RESTORE SUCCEED
echo "Verifying file state: "
- check_hsm_flags $f "0x00000009"
+ check_hsm_flags $f2 "0x00000009"
do_facet $SINGLEAGT diff -q $HSM_ARCHIVE/$tdir/$tfile $f
wait_request_state $fid ARCHIVE SUCCEED
$LFS hsm_release $f
+ # to be sure wait_all_done will not be mislead by previous tests
+ # and ops.
+ cdt_purge
+ wait_for_grace_delay
+ # Also raise grace_delay significantly so the Canceled
+ # Restore action will stay enough long avail.
+ local old_grace=$(get_hsm_param grace_delay)
+ set_hsm_param grace_delay 100
+
md5sum $f >/dev/null &
local pid=$!
wait_request_state $fid RESTORE STARTED
$LFS hsm_cancel $f
- wait_request_state $fid RESTORE CANCELED
- wait_request_state $fid CANCEL SUCCEED
+ # instead of waiting+checking both Restore and Cancel ops
+ # sequentially, wait for both to be finished and then check
+ # each results.
+ wait_all_done 100 $fid
+ local rstate=$(get_request_state $fid RESTORE)
+ local cstate=$(get_request_state $fid CANCEL)
+
+ # restore orig grace_delay.
+ set_hsm_param grace_delay $old_grace
+
+ if [[ "$rstate" == "CANCELED" ]] ; then
+ [[ "$cstate" == "SUCCEED" ]] ||
+ error "Restore state is CANCELED and Cancel state " \
+ "is not SUCCEED but $cstate"
+ echo "Restore state is CANCELED, Cancel state is SUCCEED"
+ elif [[ "$rstate" == "SUCCEED" ]] ; then
+ [[ "$cstate" == "FAILED" ]] ||
+ error "Restore state is SUCCEED and Cancel state " \
+ "is not FAILED but $cstate"
+ echo "Restore state is SUCCEED, Cancel state is FAILED"
+ else
+ error "Restore state is $rstate and Cancel state is $cstate"
+ fi
[ -z $killed ] ||
error "Cannot kill process waiting for restore ($killed)"
# Just start and stop the copytool to generate events.
cdt_clear_no_retry
+
+ # Wait for the copytool to register.
+ wait_update --verbose $(facet_active_host mds1) \
+ "$LCTL get_param -n ${MDT_PREFIX}0.hsm.agents | grep -o ^uuid" \
+ uuid 100 ||
+ error "copytool failed to register with MDT0000"
+
copytool_cleanup
local REGISTER_EVENT
copytool_cleanup
# deactivate all mdc on agent1
- mdc_change_state $SINGLEAGT "MDT000." "deactivate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "deactivate"
- copytool_setup $SINGLEAGT
+ HSMTOOL_NOERROR=true copytool_setup $SINGLEAGT
check_agent_unregistered "uuid" # match any agent
search_copytools $agent && error "Copytool start should have failed"
# reactivate MDCs
- mdc_change_state $SINGLEAGT "MDT000." "activate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT000." "activate"
}
run_test 402 "Copytool start fails if all MDTs are inactive"
local uuid=$(do_rpc_nodes $agent get_client_uuid | cut -d' ' -f2)
# deactivate all mdc for MDT0001
- mdc_change_state $SINGLEAGT "MDT0001" "deactivate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate"
copytool_setup
# check the agent is registered on MDT0000, and not on MDT0001
search_copytools $agent || error "No running copytools on $agent"
# reactivate all mdc for MDT0001
- mdc_change_state $SINGLEAGT "MDT0001" "activate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "activate"
# make sure the copytool is now registered to all MDTs
check_agent_registered $uuid
local fid1=$(make_small $dir_mdt0/$tfile)
# deactivate all mdc for MDT0001
- mdc_change_state $SINGLEAGT "MDT0001" "deactivate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "deactivate"
# send an HSM request for files in MDT0000
$LFS hsm_archive $dir_mdt0/$tfile || error "lfs hsm_archive"
echo "archive successful on mdt0"
# reactivate all mdc for MDT0001
- mdc_change_state $SINGLEAGT "MDT0001" "activate"
+ mdc_change_state $SINGLEAGT "$FSNAME-MDT0001" "activate"
copytool_cleanup
# clean test files and directories
}
run_test 404 "Inactive MDT does not block requests for active MDTs"
+test_405() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+
+ copytool_setup
+
+ mkdir -p $DIR/$tdir
+
+ local striped_dir=$DIR/$tdir/striped_dir
+
+ # create striped dir on all of MDTs
+ $LFS mkdir -i 0 -c $MDSCOUNT $striped_dir || error "lfs mkdir"
+
+ local fid1=$(make_small_sync $striped_dir/${tfile}_0)
+ local fid2=$(make_small_sync $striped_dir/${tfile}_1)
+ local fid3=$(make_small_sync $striped_dir/${tfile}_2)
+ local fid4=$(make_small_sync $striped_dir/${tfile}_3)
+
+ local idx1=$($LFS getstripe -M $striped_dir/${tfile}_0)
+ local idx2=$($LFS getstripe -M $striped_dir/${tfile}_1)
+ local idx3=$($LFS getstripe -M $striped_dir/${tfile}_2)
+ local idx4=$($LFS getstripe -M $striped_dir/${tfile}_3)
+
+ # check that compound requests are shunt to the rights MDTs
+ $LFS hsm_archive $striped_dir/${tfile}_0 $striped_dir/${tfile}_1 \
+ $striped_dir/${tfile}_2 $striped_dir/${tfile}_3 ||
+ error "lfs hsm_archive"
+
+ wait_request_state $fid1 ARCHIVE SUCCEED $idx1 &&
+ echo "archive successful on $fid1"
+ wait_request_state $fid2 ARCHIVE SUCCEED $idx2 &&
+ echo "archive successful on $fid2"
+ wait_request_state $fid3 ARCHIVE SUCCEED $idx3 &&
+ echo "archive successful on $fid3"
+ wait_request_state $fid4 ARCHIVE SUCCEED $idx4 &&
+ echo "archive successful on $fid4"
+
+ $LFS hsm_release $striped_dir/${tfile}_0 || error "lfs hsm_release 1"
+ $LFS hsm_release $striped_dir/${tfile}_1 || error "lfs hsm_release 2"
+ $LFS hsm_release $striped_dir/${tfile}_2 || error "lfs hsm_release 3"
+ $LFS hsm_release $striped_dir/${tfile}_3 || error "lfs hsm_release 4"
+
+ cat $striped_dir/${tfile}_0 > /dev/null || error "cat ${tfile}_0 failed"
+ cat $striped_dir/${tfile}_1 > /dev/null || error "cat ${tfile}_1 failed"
+ cat $striped_dir/${tfile}_2 > /dev/null || error "cat ${tfile}_2 failed"
+ cat $striped_dir/${tfile}_3 > /dev/null || error "cat ${tfile}_3 failed"
+
+ copytool_cleanup
+}
+run_test 405 "archive and release under striped directory"
+
copytool_cleanup
complete $SECONDS