From 29bb063654c9a74d495e5d4cea17694a2b70f6a0 Mon Sep 17 00:00:00 2001 From: Nikitas Angelinas Date: Wed, 17 Jun 2020 04:17:07 -0700 Subject: [PATCH] LU-13688 hsm: handle in-tree executed copytools correctly The Lustre test suite and HSM copytools can be invoked from either within /usr/lib{,64}/ if they have been installed from source or from packages, or from within the Lustre source tree, usually for development purposes; in the latter case, the copytool process name is prepended with an "lt-", due to being invoked via a libtool wrapper script. The Lustre test framework relies on "libtool execute" to distinguish between these two cases, parse the command parameters and pass the correct process name as a parameter to utilities such as pgrep(1), pkill(1), ps(1) and killall(1). Unfortunately, this doesn't seem to work unless the libtool script for the copytool and the test framework test file are in the same directory; e.g. this doesn't work for lhsmtool_posix as its libtool script is in lustre/utils/, but the Lustre test suite is in lustre/tests/, which doesn't allow the "libtool execute" parsing and parameter replacing to succeed. Fix this by determining the process name of the executed copytool based on whether it was invoked from within the source tree or not and using it in commands that either search for copytool processes or send them signals by process name. Signed-off-by: Nikitas Angelinas Cray-bug-id: LUS-8931 Change-Id: Ief7b224b793401b1a24bf9780d1df6e029f5c0d7 Reviewed-on: https://review.whamcloud.com/38962 Tested-by: jenkins Reviewed-by: James Simmons Reviewed-by: Ben Evans Reviewed-by: nathan r Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/tests/sanity-hsm.sh | 8 ++++---- lustre/tests/test-framework.sh | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 15f9551..01f7e0a 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -84,7 +84,7 @@ CLIENT2=${CLIENT2:-$CLIENT1} search_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $hosts "libtool execute pgrep -x $HSMTOOL" + do_nodesv $hosts "pgrep -x $HSMTOOL_PKILL" } wait_copytools() { @@ -170,8 +170,8 @@ copytool_suspend() { local agents=${1:-$(facet_active_host $SINGLEAGT)} stack_trap \ - "do_nodesv $agents libtool execute pkill -CONT -x '$HSMTOOL' || true" EXIT - do_nodesv $agents "libtool execute pkill -STOP -x $HSMTOOL" || return 0 + "do_nodesv $agents pkill -CONT -x '$HSMTOOL_PKILL' || true" EXIT + do_nodesv $agents "pkill -STOP -x $HSMTOOL_PKILL" || return 0 echo "Copytool is suspended on $agents" } @@ -489,7 +489,7 @@ get_agent_uuid() { # Lustre mount-point is mandatory and last parameter on # copytool cmd-line. - local mntpnt=$(do_rpc_nodes $agent libtool execute ps -C $HSMTOOL -o args= | + local mntpnt=$(do_rpc_nodes $agent ps -C $HSMTOOL_KILL -o args= | awk '{print $NF}') [ -n "$mntpnt" ] || error "Found no Agent or with no mount-point "\ "parameter" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index fd84e2d..65aeb10 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -10034,6 +10034,15 @@ init_agt_vars() { export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""} export HSMTOOL_TESTDIR + # Copytools invoked from within the source tree are executed through a + # libtool script and have a different process name + from_build_tree && LIBTOOL_PREFIX=lt- + + export HSMTOOL_KILL=${LIBTOOL_PREFIX}${HSMTOOL} + + # pgrep(1) and pkill(1) limit process name matching to 15 characters + export HSMTOOL_PKILL=${HSMTOOL_KILL:0:15} + HSM_ARCHIVE_NUMBER=2 # The test only support up to 10 MDTs @@ -10069,7 +10078,7 @@ get_mdt_devices() { copytool_continue() { local agents=${1:-$(facet_active_host $SINGLEAGT)} - do_nodesv $agents "libtool execute pkill -CONT -x $HSMTOOL" || return 0 + do_nodesv $agents "pkill -CONT -x $HSMTOOL_PKILL" || return 0 echo "Copytool is continued on $agents" } @@ -10077,7 +10086,7 @@ kill_copytools() { local hosts=${1:-$(facet_active_host $SINGLEAGT)} echo "Killing existing copytools on $hosts" - do_nodesv $hosts "libtool execute killall -q $HSMTOOL" || true + do_nodesv $hosts "killall -q $HSMTOOL_KILL" || true copytool_continue "$hosts" } @@ -10133,7 +10142,7 @@ __lhsmtool_setup() cmd+=" \"$mountpoint\"" echo "Starting copytool $facet on $(facet_host $facet)" - stack_trap "do_facet $facet libtool execute pkill -x '$HSMTOOL' || true" EXIT + stack_trap "do_facet $facet pkill -x '$HSMTOOL_PKILL' || true" EXIT do_facet $facet "$cmd < /dev/null > \"$(copytool_logfile $facet)\" 2>&1" } -- 1.8.3.1