MOUNT_2=${MOUNT_2:-"yes"}
FAIL_ON_ERROR=false
-if [ $MDSCOUNT -ge 2 ]; then
+if [[ $MDSCOUNT -ge 2 ]]; then
skip_env "Only run with single MDT for now" && exit
fi
fi
# $RUNAS_ID may get set incorrectly somewhere else
-[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] &&
- error "\$RUNAS_ID set to 0, but \$UID is also 0!"
-
+if [[ $UID -eq 0 && $RUNAS_ID -eq 0 ]]; then
+ skip_env "\$RUNAS_ID set to 0, but \$UID is also 0!" && exit
+fi
check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS
build_test_filter
-# the standard state when starting a test is
-# - no copytool
-# - MOUNT2 done
-# as some test changes the default, we need to re-make it
-cleanup() {
- copytool_cleanup
- if ! is_mounted $MOUNT2
- then
- mount_client $MOUNT2
+#
+# In order to test multiple remote HSM agents, a new facet type named "AGT" and
+# the following associated variables are added:
+#
+# AGTCOUNT: number of agents
+# AGTDEV{N}: target HSM mount point (root path of the backend)
+# agt{N}_HOST: hostname of the agent agt{N}
+# SINGLEAGT: facet of the single agent
+#
+# The number of agents is initialized as the number of remote client nodes.
+# By default, only single copytool is started on a remote client/agent. If there
+# was no remote client, then the copytool will be started on the local client.
+#
+init_agt_vars() {
+ local n
+ local agent
+
+ export AGTCOUNT=${AGTCOUNT:-$((CLIENTCOUNT - 1))}
+ [[ $AGTCOUNT -gt 0 ]] || AGTCOUNT=1
+
+ export SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP}
+ if [[ $CLIENTCOUNT -gt 1 ]] &&
+ ! check_shared_dir $SHARED_DIRECTORY $CLIENTS; then
+ skip_env "SHARED_DIRECTORY should be accessible"\
+ "on all client nodes"
+ exit 0
fi
- changelog_cleanup
+
+ for n in $(seq $AGTCOUNT); do
+ eval export AGTDEV$n=\$\{AGTDEV$n:-"$SHARED_DIRECTORY/arc$n"\}
+ agent=CLIENT$((n + 1))
+ if [[ -z "${!agent}" ]]; then
+ [[ $CLIENTCOUNT -eq 1 ]] && agent=CLIENT1 ||
+ agent=CLIENT2
+ fi
+ eval export agt${n}_HOST=\$\{agt${n}_HOST:-${!agent}\}
+ done
+
+ export SINGLEAGT=${SINGLEAGT:-agt1}
+
+ export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
+ export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
+ export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
+ HSM_ARCHIVE=$(copytool_device $SINGLEAGT)
+ HSM_ARCHIVE_NUMBER=2
+
+ MDT_PARAM="mdt.$FSNAME-MDT0000"
+ HSM_PARAM="$MDT_PARAM.hsm"
+
+ # archive is purged at copytool setup
+ HSM_ARCHIVE_PURGE=true
}
-export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
-export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
-export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
-HSM_ARCHIVE=${HSM_ARCHIVE:-$TMP/arc}
-HSM_ARCHIVE_NUMBER=2
+# Get the backend root path for the given agent facet.
+copytool_device() {
+ local facet=$1
+ local dev=AGTDEV$(facet_number $facet)
-MDT_PARAM="mdt.$FSNAME-MDT0000"
-HSM_PARAM="$MDT_PARAM.hsm"
+ echo -n ${!dev}
+}
-# archive is purged at copytool setup
-HSM_ARCHIVE_PURGE=true
+# Stop copytool and unregister an existing changelog user.
+cleanup() {
+ copytool_cleanup
+ changelog_cleanup
+}
search_and_kill_copytool() {
- echo "Killing existing copy tools"
- killall -q $HSMTOOL_BASE || true
+ local agents=${1:-$(facet_active_host $SINGLEAGT)}
+
+ echo "Killing existing copytools on $agents"
+ do_nodesv $agents "killall -q $HSMTOOL_BASE" || true
}
copytool_setup() {
- if pkill -CONT -x $HSMTOOL_BASE; then
- echo "Wakeup copytool"
- return
+ local facet=${1:-$SINGLEAGT}
+ local lustre_mntpnt=${2:-$MOUNT}
+ local arc_id=$3
+ local hsm_root=$(copytool_device $facet)
+ local agent=$(facet_active_host $facet)
+
+ if [[ -z "$arc_id" ]] &&
+ do_facet $facet "pkill -CONT -x $HSMTOOL_BASE"; then
+ echo "Wakeup copytool $facet on $agent"
+ return 0
fi
if $HSM_ARCHIVE_PURGE; then
- echo "Purging archive"
- rm -rf $HSM_ARCHIVE/*
+ echo "Purging archive on $agent"
+ do_facet $facet "rm -rf $hsm_root/*"
fi
- echo "Starting copytool"
- mkdir -p $HSM_ARCHIVE
+ echo "Starting copytool $facet on $agent"
+ do_facet $facet "mkdir -p $hsm_root" || error "mkdir '$hsm_root' failed"
# bandwidth is limited to 1MB/s so the copy time is known and
# independent of hardware
- local CMD="$HSMTOOL $HSMTOOL_VERBOSE --hsm-root $HSM_ARCHIVE"
- CMD=$CMD" --daemon --bandwidth 1 $MOUNT"
- [[ -z "$1" ]] || CMD+=" --archive $1"
-
- echo "$CMD"
- $CMD &
+ local cmd="$HSMTOOL $HSMTOOL_VERBOSE --daemon --hsm-root $hsm_root"
+ [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id"
+ cmd+=" --bandwidth 1 $lustre_mntpnt"
+
+ # Redirect the standard output and error to a log file which
+ # can be uploaded to Maloo.
+ local prefix=$TESTLOG_PREFIX
+ [[ -z "$TESTNAME" ]] || prefix=$prefix.$TESTNAME
+ local copytool_log=$prefix.copytool${arc_id}_log.$agent.log
+
+ do_facet $facet "$cmd < /dev/null > $copytool_log 2>&1" ||
+ error "start copytool $facet on $agent failed"
trap cleanup EXIT
}
copytool_cleanup() {
trap - EXIT
- pkill -INT -x $HSMTOOL_BASE || return 0
+ local agents=${1:-$(facet_active_host $SINGLEAGT)}
+
+ do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0
sleep 1
- echo "Copytool is stopped"
+ echo "Copytool is stopped on $agents"
}
copytool_suspend() {
- pkill -STOP -x $HSMTOOL_BASE || return 0
- echo "Copytool is suspended"
+ local agents=${1:-$(facet_active_host $SINGLEAGT)}
+
+ do_nodesv $agents "pkill -STOP -x $HSMTOOL_BASE" || return 0
+ echo "Copytool is suspended on $agents"
}
copytool_remove_backend() {
}
import_file() {
- $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE \
- --import $1 $2 $MOUNT || error "import of $1 to $2 failed"
+ do_facet $SINGLEAGT \
+ "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\
+ --import $1 $2 $MOUNT" ||
+ error "import of $1 to $2 failed"
}
make_archive() {
wait_update --verbose $(facet_active_host $facet) "$@"
}
-wait_request_state()
-{
+wait_request_state() {
local fid=$1
local request=$2
local state=$3
- wait_result $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\
- grep $fid | grep action=$request |\
- cut -f 13 -d ' ' | cut -f 2 -d =" $state 100 ||
+
+ local cmd="$LCTL get_param -n $HSM_PARAM.agent_actions"
+ cmd+=" | awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
+
+ wait_result $SINGLEMDS "$cmd" $state 100 ||
error "request on $fid is not $state"
}
-get_request_state()
-{
+get_request_state() {
local fid=$1
local request=$2
- do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\
- grep $fid | grep action=$request |\
- cut -f 13 -d ' ' | cut -f 2 -d ="
+
+ do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |"\
+ "awk '/'$fid'.*action='$request'/ {print \\\$13}' | cut -f2 -d="
}
-get_request_count()
-{
+get_request_count() {
local fid=$1
local request=$2
- do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\
- grep $fid | grep action=$request | wc -l"
+
+ do_facet $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |"\
+ "awk -vn=0 '/'$fid'.*action='$request'/ {n++}; END {print n}'"
}
-wait_all_done()
-{
+wait_all_done() {
local timeout=$1
- wait_result $SINGLEMDS "$LCTL get_param -n $HSM_PARAM.agent_actions |\
- egrep 'WAITING|STARTED' " "" $timeout ||
- error "requests did not complete"
+
+ local cmd="$LCTL get_param -n $HSM_PARAM.agent_actions"
+ cmd+=" | egrep 'WAITING|STARTED'"
+
+ wait_result $SINGLEMDS "$cmd" "" $timeout ||
+ error "requests did not complete"
}
-wait_for_grace_delay()
-{
+wait_for_grace_delay() {
local val=$(get_hsm_param grace_delay)
sleep $val
}
-my_uuid() {
- $LCTL get_param -n llite.$FSNAME-*.uuid
-}
-
MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid |
awk '{gsub(/_UUID/,""); print $1}' | head -1)
+# initiate variables
+init_agt_vars
+
# cleanup from previous bad setup
search_and_kill_copytool
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/passwd $f)
# we do not use the default one to be sure
- local new_an=$((HSM_ARCHIVE_NUMBER+ 1))
+ local new_an=$((HSM_ARCHIVE_NUMBER + 1))
copytool_cleanup
- copytool_setup $new_an
+ copytool_setup $SINGLEAGT $MOUNT $new_an
$LFS hsm_archive --archive $new_an $f
wait_request_state $fid ARCHIVE SUCCEED
}
run_test 9 "Use of explict archive number, with dedicated copytool"
+test_9a() {
+ [[ $CLIENTCOUNT -ge 3 ]] ||
+ { skip "Need three or more clients"; return 0; }
+
+ local n
+ local file
+ local fid
+
+ copytool_cleanup $(comma_list $(agts_nodes))
+
+ # start all of the copytools
+ for n in $(seq $AGTCOUNT); do
+ copytool_setup agt$n
+ done
+
+ trap "copytool_cleanup $(comma_list $(agts_nodes))" EXIT
+ # archive files
+ mkdir -p $DIR/$tdir
+ for n in $(seq $AGTCOUNT); do
+ file=$DIR/$tdir/$tfile.$n
+ fid=$(make_small $file)
+
+ $LFS hsm_archive $file || error "could not archive file $file"
+ wait_request_state $fid ARCHIVE SUCCEED
+ check_hsm_flags $file "0x00000001"
+ done
+
+ trap - EXIT
+ copytool_cleanup $(comma_list $(agts_nodes))
+}
+run_test 9a "Multiple remote agents"
+
test_10a() {
# test needs a running copytool
copytool_setup
# rebind the archive to the newly created file
echo "rebind $fid to $fid2"
- $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root="$HSM_ARCHIVE"\
- --rebind $fid $fid2 $DIR ||
- error "could not rebind file"
+
+ do_facet $SINGLEAGT \
+ "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\
+ --rebind $fid $fid2 $DIR" || error "could not rebind file"
# restore file and compare md5sum
local sum2=$(md5sum $f | awk '{print $1}')
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local count=5
- local tmpfile=$TMP/tmp.$$
+ local tmpfile=$SHARED_DIRECTORY/tmp.$$
local fids=()
local sums=()
[[ $nl == $count ]] || error "$nl files in list, $count expected"
echo "rebind list of files"
- $HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root="$HSM_ARCHIVE"\
- --rebind $tmpfile $DIR ||
- error "could not rebind file list"
+ do_facet $SINGLEAGT \
+ "$HSMTOOL --archive $HSM_ARCHIVE_NUMBER --hsm-root $HSM_ARCHIVE\
+ --rebind $tmpfile $DIR" || error "could not rebind file list"
# restore files and compare md5sum
for i in $(seq 1 $count); do
}
run_test 30b "Restore at exec (release case)"
-restore_and_check_size()
-{
+restore_and_check_size() {
local f=$1
local fid=$2
local s=$(stat -c "%s" $f)
# test needs a running copytool
copytool_setup
- # Test behave badly if 2 mount points are present
- umount_client $MOUNT2
-
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/motd $f 1)
check_hsm_flags $f "0x0000000b"
- # Restore test environment
- mount_client $MOUNT2
-
copytool_cleanup
}
run_test 52 "Opened for write file on an evicted client should be set dirty"
# test needs a running copytool
copytool_setup
- # Checks are wrong with 2 mount points
- umount_client $MOUNT2
-
mkdir -p $DIR/$tdir
local f=$DIR/$tdir/$tfile
local fid=$(copy_file /etc/motd $f 1)
check_hsm_flags $f "0x00000009"
- mount_client $MOUNT2
-
copytool_cleanup
}
run_test 53 "Opened for read file on an evicted client should not be set dirty"
run_test 105 "Restart of coordinator"
test_106() {
- # Test behave badly if 2 mount points are present
- umount_client $MOUNT2
-
# test needs a running copytool
copytool_setup
- local uuid=$(my_uuid)
+ local uuid=$(do_rpc_nodes $(facet_active_host $SINGLEAGT) \
+ get_client_uuid | cut -d' ' -f2)
local agent=$(do_facet $SINGLEMDS $LCTL get_param -n $HSM_PARAM.agents |
grep $uuid)
copytool_cleanup
[[ ! -z "$agent" ]] ||
error "My uuid $uuid not found in agent list after"\
" copytool restart"
-
- # Restore test environment
- mount_client $MOUNT2
}
run_test 106 "Copytool register/unregister"
local target=0x7d
[[ $flags == $target ]] || error "Changelog flag is $flags not $target"
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 221 "Changelog for archive canceled"
local target=0x80
[[ $flags == $target ]] || error "Changelog flag is $flags not $target"
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 222a "Changelog for explicit restore"
local target=0x80
[[ $flags == $target ]] || error "Changelog flag is $flags not $target"
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 222b "Changelog for implicit restore"
[[ $flags == $target ]] ||
error "Changelog flag is $flags not $target"
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 223a "Changelog for restore canceled (import case)"
[[ $flags == $target ]] ||
error "Changelog flag is $flags not $target"
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 223b "Changelog for restore canceled (release case)"
[[ $flags == $target ]] ||
error "Changelog flag is $flags not $target"
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 224 "Changelog for remove"
[[ $flags == $target ]] ||
error "Changelog flag is $flags not $target"
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 225 "Changelog for remove canceled"
[[ $flags == $target ]] ||
error "Changelog flag is $flags not $target"
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 226 "changelog for last rm/mv with exiting archive"
wait_request_state $fid ARCHIVE SUCCEED
check_flags_changes $f $fid lost 3 1
- changelog_cleanup
- copytool_cleanup
+ cleanup
}
run_test 227 "changelog when explicit setting of HSM flags"