From eaf3353fce3384e21b1310c2feb847a7c01eb131 Mon Sep 17 00:00:00 2001 From: "John L. Hammond" Date: Wed, 25 Feb 2015 16:12:49 -0600 Subject: [PATCH] LU-6298 hsm: shutdown HSM CDTs in parallel In sanity-hsm.sh rewrite copytool_cleanup() to shutdown and restart the MDT HSM coordinators in parallel. This saves about 8 * (MDSCOUNT - 1) seconds per call. Signed-off-by: John L. Hammond Change-Id: I75445ad126dc73251a3d056611133e3ab6b83362 Reviewed-on: http://review.whamcloud.com/13901 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Faccini Bruno Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- lustre/tests/sanity-hsm.sh | 92 ++++++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 35 deletions(-) diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index f361edc..183a4d4 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -272,54 +272,76 @@ get_copytool_event_log() { copytool_cleanup() { trap - EXIT - local facet=$SINGLEAGT - local agents=${1:-$(facet_active_host $facet)} - local mdtno - local idx - local oldstate - local mdt_hsmctrl - local hsm_root=$(copytool_device $facet) + local agt_facet=$SINGLEAGT + local agt_hosts=${1:-$(facet_active_host $agt_facet)} local end_wait=$(( SECONDS + TIMEOUT )) + local hsm_root=$(copytool_device $agt_facet) + local i + local facet + local param + local -a state - do_nodesv $agents "pkill -INT -x $HSMTOOL_BASE" || return 0 + do_nodesv $agt_hosts "pkill -INT -x $HSMTOOL_BASE" || return 0 while (( SECONDS < end_wait )); do sleep 2 - do_nodesv $agents "pgrep -x $HSMTOOL_BASE" + do_nodesv $agt_hosts "pgrep -x $HSMTOOL_BASE" if [ $? -ne 0 ]; then - echo "Copytool is stopped on $agents" + echo "copytool is stopped on $agt_hosts" break fi - echo "Copytool still running on $agents" + echo "copytool still running on $agt_hosts" done - if do_nodesv $agents "pgrep -x $HSMTOOL_BASE"; then - error "Copytool failed to stop in ${TIMEOUT}s ..." + + if do_nodesv $agt_hosts "pgrep -x $HSMTOOL_BASE"; then + error "copytool failed to stop in ${TIMEOUT}s" else - echo "Copytool has stopped in " \ - "$((TIMEOUT - (end_wait - SECONDS)))s." + echo "copytool has stopped in " \ + "$((TIMEOUT - (end_wait - SECONDS)))s" fi - # clean all CDTs orphans requests from previous tests - # that would otherwise need to timeout to clear. - for mdtno in $(seq 1 $MDSCOUNT); do - idx=$(($mdtno - 1)) - mdt_hsmctrl="mdt.$FSNAME-MDT000${idx}.hsm_control" - oldstate=$(do_facet mds${mdtno} "$LCTL get_param -n " \ - "$mdt_hsmctrl") - # skip already stop[ed,ing] CDTs - echo $oldstate | grep stop && continue - - do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=shutdown" - wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \ - "stopped" 20 || - error "mds${mdtno} cdt state is not stopped" - do_facet mds${mdtno} "$LCTL set_param $mdt_hsmctrl=$oldstate" - wait_result mds${mdtno} "$LCTL get_param -n $mdt_hsmctrl" \ - "$oldstate" 20 || - error "mds${mdtno} cdt state is not $oldstate" + # Clean all CDTs orphans requests from previous tests that + # would otherwise need to timeout to clear. + for ((i = 0; i < MDSCOUNT; i++)); do + facet=mds$((i + 1)) + param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) + state[$i]=$(do_facet $facet "$LCTL get_param -n $param") + + # Skip already stopping or stopped CDTs. + [[ "${state[$i]}" =~ ^stop ]] && continue + + do_facet $facet "$LCTL set_param $param=shutdown" done - if do_facet $facet "df $hsm_root" >/dev/null 2>&1 ; then - do_facet $facet "rm -rf $hsm_root/*" + + for ((i = 0; i < MDSCOUNT; i++)); do + # Only check and restore CDTs that we stopped in the first loop. + [[ "${state[$i]}" =~ ^stop ]] && continue + + facet=mds$((i + 1)) + param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) + + wait_result $facet "$LCTL get_param -n $param" stopped 20 || + error "$facet CDT state is not stopped" + + # Restore old CDT state. + do_facet $facet "$LCTL set_param $param=${state[$i]}" + done + + + for ((i = 0; i < MDSCOUNT; i++)); do + # Only check CDTs that we stopped in the first loop. + [[ "${state[$i]}" =~ ^stop ]] && continue + + facet=mds$((i + 1)) + param=$(printf 'mdt.%s-MDT%04x.hsm_control' $FSNAME $i) + + # Check that the old CDT state was restored. + wait_result $facet "$LCTL get_param -n $param" "${state[$i]}" \ + 20 || error "$facet CDT state is not '${state[$i]}'" + done + + if do_facet $agt_facet "df $hsm_root" >/dev/null 2>&1 ; then + do_facet $agt_facet "rm -rf $hsm_root/*" fi } -- 1.8.3.1