From 2d8c6eb381e5af6e5c4b08186a583453f4711cc3 Mon Sep 17 00:00:00 2001 From: grev Date: Fri, 14 Nov 2008 16:23:22 +0000 Subject: [PATCH] b=17229 i=Adilger remote multiop fix --- lustre/tests/replay-vbr.sh | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/lustre/tests/replay-vbr.sh b/lustre/tests/replay-vbr.sh index 64104365..119555b 100644 --- a/lustre/tests/replay-vbr.sh +++ b/lustre/tests/replay-vbr.sh @@ -447,17 +447,41 @@ run_test 7a "fail MDS, delayed recovery, fail MDS" rmultiop_start() { local client=$1 local file=$2 - do_node $client LUSTRE="" sh runmultiop_bg_pause $file O_tSc - eval export ${client}_pid=$(do_node $client cat /tmp/multiop_bg.pid) + + # We need to run do_node in bg, because pdsh does not exit + # if child process of run script exists. + # I.e. pdsh does not exit when runmultiop_bg_pause exited, + # because of multiop_bg_pause -> $MULTIOP_PROG & + # By the same reason we need sleep a bit after do_nodes starts + # to let runmultiop_bg_pause start muliop and + # update /tmp/multiop_bg.pid ; + # The rm /tmp/multiop_bg.pid guarantees here that + # we have the updated by runmultiop_bg_pause + # /tmp/multiop_bg.pid file + + local pid_file=$TMP/multiop_bg.pid.$$ + do_node $client "rm -f $pid_file && MULTIOP_PID_FILE=$pid_file LUSTRE= runmultiop_bg_pause $file O_tSc" & + local pid=$! + sleep 3 + local multiop_pid + multiop_pid=$(do_node $client cat $pid_file) + [ -n "$multiop_pid" ] || error "$client : Can not get multiop_pid from $pid_file " + eval export ${client}_multiop_pid=$multiop_pid + eval export ${client}_do_node_pid=$pid + local var=${client}_multiop_pid + echo client $client multiop_bg started multiop_pid=${!var} return $? } rmultiop_stop() { local client=$1 - local pid=${client}_pid - echo "Stopping pid=${!pid}" - # how to wait for pid in that case? - do_node $client "kill -USR1 ${!pid}; sleep 3" + local multiop_pid=${client}_multiop_pid + local do_node_pid=${client}_do_node_pid + + echo "Stopping multiop_pid=${!multiop_pid} (kill ${!multiop_pid} on $client)" + do_node $client kill -USR1 ${!multiop_pid} + + wait ${!do_node_pid} || true } test_8a() { -- 1.8.3.1