ALWAYS_EXCEPT="$SANITY_HSM_EXCEPT "
if $SHARED_KEY; then
-# bug number for skipped tests: LU-9795 LU-9795
- ALWAYS_EXCEPT+=" 13 402b "
+# bug number for skipped tests: LU-9795
+ ALWAYS_EXCEPT+=" 402b "
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
fi
[ -n "$FILESET" ] && skip "Not functional for FILESET set"
OPENFILE=${OPENFILE:-openfile}
-MMAP_CAT=${MMAP_CAT:-mmap_cat}
MOUNT_2=${MOUNT_2:-"yes"}
FAIL_ON_ERROR=false
# Exception is the test which need two separate nodes
CLIENT2=${CLIENT2:-$CLIENT1}
-#
-# In order to test multiple remote HSM agents, a new facet type named "AGT" and
-# the following associated variables are added:
-#
-# AGTCOUNT: number of agents
-# AGTDEV{N}: target HSM mount point (root path of the backend)
-# agt{N}_HOST: hostname of the agent agt{N}
-# SINGLEAGT: facet of the single agent
-#
-# The number of agents is initialized as the number of remote client nodes.
-# By default, only single copytool is started on a remote client/agent. If there
-# was no remote client, then the copytool will be started on the local client.
-#
-init_agt_vars() {
- local n
- local agent
-
- export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))}
- [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1
-
- export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP}
- if [[ $CLIENTCOUNT -gt 1 ]] &&
- ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then
- skip_env "SHARED_DIRECTORY should be accessible"\
- "on all client nodes"
- exit 0
- fi
-
- # We used to put the HSM archive in $SHARED_DIRECTORY but that
- # meant NFS issues could hose sanity-hsm sessions. So now we
- # use $TMP instead.
- for n in $(seq $AGTCOUNT); do
- eval export AGTDEV$n=\$\{AGTDEV$n:-"$TMP/arc$n"\}
- agent=CLIENT$((n + 1))
- if [[ -z "${!agent}" ]]; then
- [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 ||
- agent=CLIENT2
- fi
- eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\}
- done
-
- export SINGLEAGT=${SINGLEAGT:-agt1}
-
- export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
- export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
- export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
- export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""}
- export HSMTOOL_TESTDIR
-
- HSM_ARCHIVE_NUMBER=2
-
- # The test only support up to 10 MDTs
- MDT_PREFIX="mdt.$FSNAME-MDT000"
- HSM_PARAM="${MDT_PREFIX}0.hsm"
-
- # archive is purged at copytool setup
- HSM_ARCHIVE_PURGE=true
-
- # Don't allow copytool error upon start/setup
- HSMTOOL_NOERROR=false
-}
-
-# Get the backend root path for the given agent facet.
-copytool_device() {
- local facet=$1
- local dev=AGTDEV$(facet_number $facet)
-
- echo -n ${!dev}
-}
-
-get_mdt_devices() {
- local mdtno
- # get MDT device for each mdc
- for mdtno in $(seq 1 $MDSCOUNT); do
- local idx=$(($mdtno - 1))
- MDT[$idx]=$($LCTL get_param -n \
- mdc.$FSNAME-MDT000${idx}-mdc-*.mds_server_uuid |
- awk '{gsub(/_UUID/,""); print $1}' | head -n1)
- done
-}
-
search_copytools() {
local hosts=${1:-$(facet_active_host $SINGLEAGT)}
- do_nodesv $hosts "libtool execute pgrep -x $HSMTOOL"
-}
-
-kill_copytools() {
- local hosts=${1:-$(facet_active_host $SINGLEAGT)}
-
- echo "Killing existing copytools on $hosts"
- do_nodesv $hosts "libtool execute killall -q $HSMTOOL" || true
- copytool_continue "$hosts"
+ do_nodesv $hosts "pgrep --pidfile=$HSMTOOL_PID_FILE hsmtool"
}
wait_copytools() {
local wait_timeout=200
local wait_start=$SECONDS
local wait_end=$((wait_start + wait_timeout))
- local sleep_time=100000 # 0.1 second
+ local sleep_time=1
while ((SECONDS < wait_end)); do
if ! search_copytools $hosts; then
fi
echo "copytools still running on $hosts"
- usleep $sleep_time
- [ $sleep_time -lt 32000000 ] && # 3.2 seconds
- sleep_time=$(bc <<< "$sleep_time * 2")
+ sleep $sleep_time
+ [ $sleep_time -lt 5 ] && sleep_time=$((sleep_time + 1))
done
# try to dump Copytool's stack
cmd="cat $test_dir/fifo > $test_dir/events &"
cmd+=" echo \\\$! > $test_dir/monitor_pid"
- if [[ $PDSH == *Rmrsh* ]]; then
- # This is required for pdsh -Rmrsh and its handling of remote
- # shells.
- # Regular ssh and pdsh -Rssh work fine without this
- # backgrounded subshell nonsense.
- (do_node $agent "$cmd") &
- export HSMTOOL_MONITOR_PDSH=$!
+ # This background subshell nonsense is required when pdsh/ssh decides
+ # to wait for the cat process to exit on the remote client
+ (do_node $agent "$cmd") &
+ export HSMTOOL_MONITOR_PDSH=$!
- # Slightly racy, but just making a best-effort to catch obvious
- # problems.
- sleep 1
- ps -p $HSMTOOL_MONITOR_PDSH > /dev/null ||
- error "Failed to start copytool monitor on $agent"
- else
- do_node $agent "$cmd"
- if [ $? != 0 ]; then
- error "Failed to start copytool monitor on $agent"
- fi
+ # Slightly racy, but just making a best-effort to catch obvious
+ # problems.
+ sleep 1
+ do_node $agent "stat $HSMTOOL_MONITOR_DIR/monitor_pid 2>&1 > /dev/null"
+ if [ $? != 0 ]; then
+ error "Failed to start copytool monitor on $agent"
fi
}
{
local fid="$1"
- case "$HSMTOOL" in
- lhsmtool_posix)
- printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid"
- ;;
+ case "$HSMTOOL_ARCHIVE_FORMAT" in
+ v1)
+ printf "%s" "$(hsm_root)/*/*/*/*/*/*/$fid"
+ ;;
+ v2)
+ printf "%s" "$(hsm_root)/*/$fid"
+ ;;
esac
}
copytool_suspend() {
local agents=${1:-$(facet_active_host $SINGLEAGT)}
- stack_trap \
- "do_nodesv $agents libtool execute pkill -CONT -x '$HSMTOOL' || true" EXIT
- do_nodesv $agents "libtool execute pkill -STOP -x $HSMTOOL" || return 0
+ stack_trap "pkill_copytools $agents CONT || true" EXIT
+ pkill_copytools $agents STOP || return 0
echo "Copytool is suspended on $agents"
}
-copytool_continue() {
- local agents=${1:-$(facet_active_host $SINGLEAGT)}
-
- do_nodesv $agents "libtool execute pkill -CONT -x $HSMTOOL" || return 0
- echo "Copytool is continued on $agents"
-}
-
copytool_remove_backend() {
local fid=$1
local be=$(do_facet $SINGLEAGT find "$(hsm_root)" -name $fid)
[[ -n $fid ]] && cmd+=" | grep '$fid'"
cmd+=" | egrep 'WAITING|STARTED'"
- wait_result $SINGLEMDS "$cmd" "" $timeout ||
+ wait_update_facet --verbose mds1 "$cmd" "" $timeout ||
error "requests did not complete"
}
# Lustre mount-point is mandatory and last parameter on
# copytool cmd-line.
- local mntpnt=$(do_rpc_nodes $agent libtool execute ps -C $HSMTOOL -o args= |
+ local mntpnt=$(do_rpc_nodes $agent \
+ pgrep --pidfile=$HSMTOOL_PID_FILE --list-full hsmtool |
awk '{print $NF}')
[ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\
"parameter"
error "wrong archive number, $st != $LOCAL_HSM_ARCHIVE_NUMBER"
LOCAL_HSM_ARCHIVE_NUMBER=33
- if [ $(lustre_version_code client) -ge $(version_code 2.11.56) ] &&
- [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.11.56) ]; then
+ if [ "$CLIENT_VERSION" -ge $(version_code 2.11.56) ] &&
+ [ "$MDS1_VERSION" -ge $(version_code 2.11.56) ]; then
# lustre in the new version supports unlimited archiveID.
# Test whether setting archive number > 32 is supported
$LFS hsm_set --exists --archive-id $LOCAL_HSM_ARCHIVE_NUMBER $f ||
run_test 1d "Archive, Release and Restore DoM file"
test_1e() {
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code $SEL_VER) ] &&
+ [ "$MDS1_VERSION" -lt $(version_code $SEL_VER) ] &&
skip "skipped for lustre < $SEL_VER"
mkdir -p $DIR/$tdir
}
run_test 11b "Import a deleted file using its FID"
+test_11c() {
+ pool_add $TESTNAME || error "Pool creation failed"
+ pool_add_targets $TESTNAME 1 1 || error "pool_add_targets failed"
+
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -p "$TESTNAME" $DIR/$tdir
+
+ copy2archive /etc/hosts $tdir/$tfile
+ copytool import $tdir/$tfile $DIR/$tdir/$tfile
+}
+run_test 11c "Import a file to a directory with a pool"
+
test_12a() {
# test needs a running copytool
copytool setup
}
run_test 12q "file attributes are refreshed after restore"
+test_12r() {
+ # test needs a running copytool
+ copytool setup
+
+ mkdir -p $DIR/$tdir
+ local f=$DIR/$tdir/$tfile
+ local fid=$(copy_file /etc/hosts $f)
+
+ $LFS hsm_archive $f || error "archive of $f failed"
+ wait_request_state $fid ARCHIVE SUCCEED
+ $LFS hsm_release $f || error "release of $f failed"
+
+ offset=$(lseek_test -d 7 $f)
+
+ # we check we had a restore done
+ wait_request_state $fid RESTORE SUCCEED
+ [[ $offset == 7 ]] || error "offset $offset != 7"
+}
+run_test 12r "lseek restores released file"
+
test_13() {
local -i i j k=0
for i in {1..10}; do
# LU-4388/LU-4389 - ZFS does not report full number of blocks
# used until file is flushed to disk
- if [ $(facet_fstype ost1) == "zfs" ]; then
+ if [ "$ost1_FSTYPE" == "zfs" ]; then
# this causes an OST_SYNC rpc to be sent
dd if=/dev/zero of=$f bs=512 count=1 oflag=sync conv=notrunc,fsync
# clear locks to reread file data
wait_for_grace_delay
$LFS hsm_archive --filelist $FILELIST ||
error "cannot archive a file list"
- wait_all_done 100
+ wait_all_done 200
$LFS hsm_release --filelist $FILELIST ||
error "cannot release a file list"
$LFS hsm_restore --filelist $FILELIST ||
error "cannot restore a file list"
- wait_all_done 100
+ wait_all_done 200
}
run_test 90 "Archive/restore a file list"
md5sum $f2 &
sleep 2
+ do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"
# after umount hsm_actions->O/x/x log shouldn't have
# double RESTORE records like below
#[0x200000401:0x1:0x0]...0x58d03a0d/0x58d03a0c action=RESTORE...WAITING
sleep 30 &&
do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"&
fail $SINGLEMDS
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+
+ do_facet $SINGLEMDS "$LCTL get_param $HSM_PARAM.actions"
copytool_continue
- wait_request_state $fid RESTORE SUCCEED
+ wait_all_done 100 $fid
}
run_test 407 "Check for double RESTORE records in llog"
test_500()
{
- [ $MDS1_VERSION -lt $(version_code 2.6.92) ] &&
+ [ "$MDS1_VERSION" -lt $(version_code 2.6.92) ] &&
skip "HSM migrate is not supported"
test_mkdir -p $DIR/$tdir
- if [ $(lustre_version_code client) -lt $(version_code 2.11.56) ] ||
- [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.11.56) ];
+ if [ "$CLIENT_VERSION" -lt $(version_code 2.11.56) ] ||
+ [ "$MDS1_VERSION" -lt $(version_code 2.11.56) ];
then
llapi_hsm_test -d $DIR/$tdir -b ||
error "One llapi HSM test failed"
run_test 500 "various LLAPI HSM tests"
test_600() {
- [ $MDS1_VERSION -lt $(version_code 2.10.58) ] &&
+ [ "$MDS1_VERSION" -lt $(version_code 2.10.58) ] &&
skip "need MDS version at least 2.10.58"
mkdir -p $DIR/$tdir
local llog_reader=$(do_facet mgs "which llog_reader 2> /dev/null")
llog_reader=${llog_reader:-$LUSTRE/utils/llog_reader}
[ -z $(do_facet mgs ls -d $llog_reader 2> /dev/null) ] &&
- skip_env "missing llog_reader" && return
- local fstype=$(facet_fstype mds1)
+ skip_env "missing llog_reader"
mkdir -p $DIR/$tdir
local entry
#remount mds1 as ldiskfs or zfs type
- stack_trap "stop mds1; start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS" EXIT
stop mds1 || error "stop mds1 failed"
+ stack_trap "unmount_fstype mds1; start mds1 $(mdsdevname 1)\
+ $MDS_MOUNT_OPTS" EXIT
mount_fstype mds1 || error "remount mds1 failed"
for ((i = 0; i < 1; i++)); do