From d031e92fe730792e3a4dba2f1e8ae90a085c96c5 Mon Sep 17 00:00:00 2001 From: Elena Gryaznova Date: Tue, 26 Jan 2010 17:10:59 +0300 Subject: [PATCH] b=19387 integrate LST into acc-sm new acc-sm test suite: lnet-selftest i=Maxim.Patlasov i=He.Huang --- lustre/tests/Makefile.am | 1 + lustre/tests/acceptance-small.sh | 2 +- lustre/tests/functions.sh | 50 ++++++++++++ lustre/tests/lnet-selftest.sh | 161 +++++++++++++++++++++++++++++++++++++++ lustre/tests/test-framework.sh | 22 +++++- 5 files changed, 231 insertions(+), 5 deletions(-) create mode 100755 lustre/tests/lnet-selftest.sh diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 73d01a0..35349df 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -24,6 +24,7 @@ noinst_SCRIPTS += recovery-mds-scale.sh run_dd.sh run_tar.sh run_iozone.sh noinst_SCRIPTS += run_dbench.sh run_IOR.sh recovery-double-scale.sh noinst_SCRIPTS += recovery-random-scale.sh parallel-scale.sh metadata-updates.sh noinst_SCRIPTS += lustre-rsync-test.sh ost-pools.sh rpc.sh yaml.sh liblustre.sh +noinst_SCRIPTS += lnet-selftest.sh nobase_noinst_SCRIPTS = cfg/local.sh nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh nobase_noinst_SCRIPTS += racer/dir_create.sh racer/file_create.sh racer/file_list.sh diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index ed7562f..2d9277a 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -13,7 +13,7 @@ DEFAULT_SUITES="runtests sanity sanity-benchmark sanityn lfsck liblustre replay-ost-single replay-dual insanity sanity-quota sanity-sec sanity-gss performance-sanity large-scale recovery-mds-scale recovery-double-scale recovery-random-scale parallel-scale - lustre_rsync-test metadata-updates ost-pools" + lustre_rsync-test metadata-updates ost-pools lnet-selftest" if [[ -n $@ ]]; then ACC_SM_ONLY="${ACC_SM_ONLY} $@" diff --git a/lustre/tests/functions.sh b/lustre/tests/functions.sh index 83f109d..1dd9ac5 100644 --- a/lustre/tests/functions.sh +++ b/lustre/tests/functions.sh @@ -48,3 +48,53 @@ mpi_run () { return $rc } +nids_list () { + local list + for i in ${1//,/ }; do + list="$list $i@$NETTYPE" + done + echo $list +} + +# FIXME: all setup/cleanup can be done without rpc.sh +lst_end_session () { + local verbose=false + [ x$1 = x--verbose ] && verbose=true + + export LST_SESSION=`$LST show_session 2>/dev/null | awk -F " " '{print $5}'` + [ "$LST_SESSION" == "" ] && return + + if $verbose; then + $LST show_error c s + fi + $LST stop b + $LST end_session +} + +lst_session_cleanup_all () { + local list=$(comma_list $(nodes_list)) + do_rpc_nodes $list lst_end_session +} + +lst_cleanup () { + lsmod | grep -q lnet_selftest && rmmod lnet_selftest > /dev/null 2>&1 || true +} + +lst_cleanup_all () { + local list=$(comma_list $(nodes_list)) + + # lst end_session needs to be executed only locally + # i.e. on node where lst new_session was called + lst_end_session --verbose + do_rpc_nodes $list lst_cleanup +} + +lst_setup () { + load_module lnet_selftest +} + +lst_setup_all () { + local list=$(comma_list $(nodes_list)) + do_rpc_nodes $list lst_setup +} + diff --git a/lustre/tests/lnet-selftest.sh b/lustre/tests/lnet-selftest.sh new file mode 100755 index 0000000..da254bd --- /dev/null +++ b/lustre/tests/lnet-selftest.sh @@ -0,0 +1,161 @@ +#!/bin/sh + +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} + +# +ALWAYS_EXCEPT="$ALWAYS_EXCEPT $LNET_SELFTEST_EXCEPT" + +[ x$LST = x ] && { skip_env "$0 : lst not found LST=$LST" && exit 0; } + +# FIXME: what is the reasonable value here? +lst_LOOP=${lst_LOOP:-100000} +lst_CONCR=${lst_CONCR:-"1 2 4 8"} +lst_SIZES=${lst_SIZES:-"4k 8k 256k 1M"} +if [ "$SLOW" = no ]; then + lst_CONCR="1 8" + lst_SIZES="4k 1M" + lst_LOOP=1000 +fi + +smoke_DURATION=${smoke_DURATION:-1800} +if [ "$SLOW" = no ]; then + [ $smoke_DURATION -le 300 ] || smoke_DURATION=300 +fi + +lst_SERVERS=${lst_SERVERS:-$(comma_list $(osts_nodes) $(mdts_nodes))} +lst_CLIENTS=${lst_CLIENTS:-${CLIENTS:-`hostname`}} + +is_mounted () { + local mntpt=$1 + local mounted=$(mounted_lustre_filesystems) + echo $mounted' ' | grep -w -q $mntpt' ' +} + +if local_mode; then + lst_SERVERS=`hostname` + lst_CLIENTS=`hostname` +fi + +# FIXME: do we really need to unload lustre modules on all nodes? +# bug 19387, comment 9 +# unloading lustre modules is not strictly necessary but unmounting +# /mnt/lustre before running lst would be useful: +# 1) because lustre messages clutter logs - we needn't them for testing LNET +# 2) it's theoretically possible that lst tests congest comm paths so tightly +# that mounted lustre wouldn't able to perform some of its background activities +if is_mounted $MOUNT || is_mounted $MOUNT2; then + local_mode && CLIENTONLY=yes + stopall + RESTORE_MOUNT=yes +fi + +build_test_filter + +lst_prepare () { + # Workaround for bug 15619 + lst_cleanup_all + lst_setup_all +} + +# make batch +test_smoke_sub () { + local servers=$1 + local clients=$2 + + + local nc=$(echo ${clients//,/ } | wc -w) + local ns=$(echo ${servers//,/ } | wc -w) + echo '#!/bin/bash' + echo 'set -e' + + echo 'cleanup () { trap 0; echo killing $1 ... ; kill -9 $1 || true; }' + + echo "$LST new_session --timeo 100000 hh" + echo "$LST add_group c $(nids_list $clients)" + echo "$LST add_group s $(nids_list $servers)" + echo "$LST add_batch b" + + pre="$LST add_test --batch b --loop $lst_LOOP " + for t in "brw read" "brw write" ; do + for s in $lst_SIZES; do + for c in $lst_CONCR; do + for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do + echo -n "$pre" + echo " --concurrency $c --distribute $d $t check=full size=$s" + done + done + done + done + + for c in $lst_CONCR; do + for d in "${nc}:${ns} --from c --to s" "${ns}:${nc} --from s --to c"; do + echo -n "$pre" + echo " --concurrency $c --distribute $d ping " + done + done + + echo $LST run b + echo sleep 1 + echo "$LST stat --delay 10 c s &" + echo 'pid=$!' + echo 'trap "cleanup $pid" INT TERM' + echo sleep $smoke_DURATION + echo 'cleanup $pid' + +} + +run_lst () { + local file=$1 + + export LST_SESSION=$$ + + # start lst + sh $file +} + +check_lst_err () { + local log=$1 + + grep ^Total $log + + if awk '/^Total.*nodes/ {print $2}' $log | grep -vq '^0$'; then + error 'lst Error found' + fi +} + +test_smoke () { + lst_prepare + + local servers=$lst_SERVERS + local clients=$lst_CLIENTS + + local runlst=$TMP/smoke.sh + + local log=$TMP/$tfile.log + local rc=0 + + test_smoke_sub $servers $clients 2>&1 > $runlst + + cat $runlst + + run_lst $runlst | tee $log + rc=${PIPESTATUS[0]} + [ $rc = 0 ] || error "$runlst failed: $rc" + + lst_end_session --verbose | tee -a $log + + # error counters in "lst show_error" should be checked + check_lst_err $log + +} +run_test smoke "lst regression test" + +equals_msg `basename $0`: test complete, cleaning up +if [ "$RESTORE_MOUNT" = yes ]; then + setupall +fi +[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true + diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index abb6531..4b8d7d0 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -126,6 +126,8 @@ init_test_env() { if ! echo $PATH | grep -q $LUSTRE/tests; then export PATH=$PATH:$LUSTRE/tests fi + export LST=${LST:-"$LUSTRE/../lnet/utils/lst"} + [ ! -f "$LST" ] && export LST=$(which lst) export MDSRATE=${MDSRATE:-"$LUSTRE/tests/mpi/mdsrate"} [ ! -f "$MDSRATE" ] && export MDSRATE=$(which mdsrate 2> /dev/null) if ! echo $PATH | grep -q $LUSTRE/tests/racer; then @@ -236,7 +238,11 @@ load_module() { module_loaded ${BASE} && return - if [ -f ${LUSTRE}/${module}${EXT} ]; then + if [ "$BASE" == "lnet_selftest" ] && \ + [ -f ${LUSTRE}/../lnet/selftest/${module}${EXT} ]; then + insmod ${LUSTRE}/../lnet/selftest/${module}${EXT} + + elif [ -f ${LUSTRE}/${module}${EXT} ]; then insmod ${LUSTRE}/${module}${EXT} $@ else # must be testing a "make install" or "rpm" installation @@ -345,9 +351,11 @@ unload_modules() { if $LOAD_MODULES_REMOTE ; then local list=$(comma_list $(remote_nodes_list)) - echo unloading modules on $list - do_rpc_nodes $list $LUSTRE_RMMOD $FSTYPE - do_rpc_nodes $list check_mem_leak + if [ ! -z $list ]; then + echo unloading modules on $list + do_rpc_nodes $list $LUSTRE_RMMOD $FSTYPE + do_rpc_nodes $list check_mem_leak + fi fi HAVE_MODULES=false @@ -2771,6 +2779,12 @@ remote_mgs_nodsh() remote_node $MGS && [ "$PDSH" = "no_dsh" -o -z "$PDSH" -o -z "$ost_HOST" ] } +local_mode () +{ + remote_mds_nodsh || remote_ost_nodsh || \ + $(single_local_node $(comma_list $(nodes_list))) +} + mdts_nodes () { local MDSNODES local NODES_sort -- 1.8.3.1