X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=188dd51510cef10a5069d9dd3587426af5dfe3f9;hb=9743164e578405585be4eabc3699e5808c6afda7;hp=36be2e30781b312fe1d4b5cb36226a0922ce3829;hpb=fc214b0d115e14aeb5224df72ae58d475ca15bf2;p=fs%2Flustre-release.git diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 36be2e3..188dd51 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -1,7 +1,7 @@ #!/bin/bash # vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: -trap 'echo "test-framework exiting on error"' ERR +trap 'print_summary && echo "test-framework exiting on error"' ERR set -e #set -x @@ -10,6 +10,9 @@ export REFORMAT=${REFORMAT:-""} export VERBOSE=false export GMNALNID=${GMNALNID:-/usr/sbin/gmlndnid} export CATASTROPHE=${CATASTROPHE:-/proc/sys/lnet/catastrophe} +export GSS=false +export GSS_KRB5=false +export GSS_PIPEFS=false #export PDSH="pdsh -S -Rssh -w" # eg, assert_env LUSTRE MDSNODES OSTNODES CLIENTS @@ -31,6 +34,37 @@ usage() { exit } +print_summary () { + [ -n "$ONLY" ] && echo "WARNING: ONLY is set to ${ONLY}." + local form="%-13s %-17s %s\n" + printf "$form" "status" "script" "skipped tests E(xcluded) S(low)" + echo "------------------------------------------------------------------------------------" + for O in $TESTSUITE_LIST; do + local skipped="" + local slow="" + local o=$(echo $O | tr "[:upper:]" "[:lower:]") + o=${o//_/-} + o=${o//tyn/tyN} + local log=${TMP}/${o}.log + [ -f $log ] && skipped=$(grep excluded $log | awk '{ printf " %s", $3 }' | sed 's/test_//g') + [ -f $log ] && slow=$(grep SLOW $log | awk '{ printf " %s", $3 }' | sed 's/test_//g') + [ "${!O}" = "done" ] && \ + printf "$form" "Done" "$O" "E=$skipped" && \ + [ -n "$slow" ] && printf "$form" "-" "-" "S=$slow" + + done + + for O in $TESTSUITE_LIST; do + [ "${!O}" = "no" ] && \ + printf "$form" "Skipped" "$O" "" + done + + for O in $TESTSUITE_LIST; do + [ "${!O}" = "done" -o "${!O}" = "no" ] || \ + printf "$form" "UNFINISHED" "$O" "" + done +} + init_test_env() { export LUSTRE=`absolute_path $LUSTRE` export TESTSUITE=`basename $0 .sh` @@ -57,6 +91,7 @@ init_test_env() { export LGSSD=${LGSSD:-"$LUSTRE/utils/gss/lgssd"} export LSVCGSSD=${LSVCGSSD:-"$LUSTRE/utils/gss/lsvcgssd"} export KRB5DIR=${KRB5DIR:-"/usr/kerberos"} + export DIR2 if [ "$ACCEPTOR_PORT" ]; then export PORT_OPT="--port $ACCEPTOR_PORT" @@ -65,7 +100,8 @@ init_test_env() { case "x$SEC" in xkrb5*) echo "Using GSS/krb5 ptlrpc security flavor" - export USING_KRB5="y" + GSS=true + GSS_KRB5=true ;; esac @@ -104,7 +140,7 @@ load_module() { # must be testing a "make install" or "rpm" installation # note failed to load ptlrpc_gss is considered not fatal if [ "$BASE" == "ptlrpc_gss" ]; then - modprobe $BASE $@ || echo "gss/krb5 is not supported" + modprobe $BASE $@ 2>/dev/null || echo "gss/krb5 is not supported" else modprobe $BASE $@ fi @@ -124,17 +160,17 @@ load_modules() { echo Loading modules from $LUSTRE load_module ../lnet/libcfs/libcfs - [ -z "$LNETOPTS" ] && \ - LNETOPTS=$(awk '/^options lnet/ { print $0}' /etc/modprobe.conf | sed 's/^options lnet //g') + [ -f /etc/modprobe.conf ] && MODPROBECONF=/etc/modprobe.conf + [ -f /etc/modprobe.d/Lustre ] && MODPROBECONF=/etc/modprobe.d/Lustre + [ -z "$LNETOPTS" -a -n "$MODPROBECONF" ] && \ + LNETOPTS=$(awk '/^options lnet/ { print $0}' $MODPROBECONF | sed 's/^options lnet //g') echo "lnet options: '$LNETOPTS'" # note that insmod will ignore anything in modprobe.conf load_module ../lnet/lnet/lnet $LNETOPTS LNETLND=${LNETLND:-"socklnd/ksocklnd"} load_module ../lnet/klnds/$LNETLND - [ "$FSTYPE" = "ldiskfs" ] && load_module ../ldiskfs/ldiskfs/ldiskfs load_module lvfs/lvfs load_module obdclass/obdclass - load_module lvfs/fsfilt_$FSTYPE load_module ptlrpc/ptlrpc load_module ptlrpc/gss/ptlrpc_gss # Now, some modules depend on lquota without USE_QUOTA check, @@ -147,16 +183,22 @@ load_modules() { load_module mdc/mdc load_module osc/osc load_module lov/lov - load_module mds/mds - load_module mdd/mdd - load_module mdt/mdt - load_module cmm/cmm - load_module osd/osd - load_module ost/ost - load_module obdfilter/obdfilter - load_module llite/lustre load_module mgc/mgc - load_module mgs/mgs + if [ -z "$CLIENTONLY" ]; then + [ "$FSTYPE" = "ldiskfs" ] && load_module ../ldiskfs/ldiskfs/ldiskfs + load_module mgs/mgs + load_module mds/mds + load_module mdd/mdd + load_module mdt/mdt + load_module lvfs/fsfilt_$FSTYPE + load_module cmm/cmm + load_module osd/osd + load_module ost/ost + load_module obdfilter/obdfilter + fi + + load_module llite/lustre + load_module llite/llite_lloop rm -f $TMP/ogdb-`hostname` $LCTL modules > $TMP/ogdb-`hostname` # 'mount' doesn't look in $PATH, just sbin @@ -192,6 +234,8 @@ wait_for_lnet() { } unload_modules() { + wait_exit_ST client # bug 12845 + lsmod | grep lnet > /dev/null && $LCTL dl && $LCTL dk $TMP/debug local MODULES=$($LCTL modules | awk '{ print $2 }') $RMMOD $MODULES > /dev/null 2>&1 || true @@ -221,6 +265,7 @@ unload_modules() { echo "$LEAK_PORTALS" 1>&2 mv $TMP/debug $TMP/debug-leak.`date +%s` || true echo "Memory leaks detected" + [ -n "$IGNORE_LEAK" ] && echo "ignoring leaks" && return 0 return 254 fi echo "modules unloaded." @@ -249,7 +294,9 @@ start_gss_daemons() { # starting on MDT for num in `seq $MDSCOUNT`; do do_facet mds$num "$LSVCGSSD -v" - do_facet mds$num "$LGSSD -v" + if $GSS_PIPEFS; then + do_facet mds$num "$LGSSD -v" + fi done # starting on OSTs for num in `seq $OSTCOUNT`; do @@ -257,7 +304,9 @@ start_gss_daemons() { done # starting on client # FIXME: is "client" the right facet name? - do_facet client "$LGSSD -v" + if $GSS_PIPEFS; then + do_facet client "$LGSSD -v" + fi # wait daemons entering "stable" status sleep 5 @@ -267,12 +316,16 @@ start_gss_daemons() { # for num in `seq $MDSCOUNT`; do check_gss_daemon_facet mds$num lsvcgssd - check_gss_daemon_facet mds$num lgssd + if $GSS_PIPEFS; then + check_gss_daemon_facet mds$num lgssd + fi done for num in `seq $OSTCOUNT`; do check_gss_daemon_facet ost$num lsvcgssd done - check_gss_daemon_facet client lgssd + if $GSS_PIPEFS; then + check_gss_daemon_facet client lgssd + fi } stop_gss_daemons() { @@ -291,13 +344,13 @@ init_krb5_env() { OST_MOUNT_OPTS=$OST_MOUNT_OPTS,sec=$SEC fi - if [ ! -z $USING_KRB5 ]; then + if $GSS; then start_gss_daemons fi } cleanup_krb5_env() { - if [ ! -z $USING_KRB5 ]; then + if $GSS; then stop_gss_daemons # maybe cleanup credential cache? fi @@ -332,6 +385,10 @@ start() { echo mount -t lustre $@ ${device} ${MOUNT%/*}/${facet} echo Start of ${device} on ${facet} failed ${RC} else + do_facet ${facet} "sysctl -w lnet.debug=$PTLDEBUG; \ + sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }; \ + sysctl -w lnet.debug_mb=${DEBUG_SIZE}" + do_facet ${facet} sync label=$(do_facet ${facet} "e2label ${device}") [ -z "$label" ] && echo no label for ${device} && exit 1 @@ -358,24 +415,7 @@ stop() { # umount should block, but we should wait for unrelated obd's # like the MGS or MGC to also stop. - local WAIT=0 - local INTERVAL=1 - # conf-sanity 31 takes a long time cleanup - while [ $WAIT -lt 300 ]; do - running=$(do_facet ${facet} "[ -e $LPROC ] && grep ST' ' $LPROC/devices") || true - if [ -z "${running}" ]; then - return 0 - fi - echo "waited $WAIT for${running}" - if [ $INTERVAL -lt 64 ]; then - INTERVAL=$((INTERVAL + INTERVAL)) - fi - sleep $INTERVAL - WAIT=$((WAIT + INTERVAL)) - done - echo "service didn't stop after $WAIT seconds. Still running:" - echo ${running} - exit 1 + wait_exit_ST ${facet} } zconf_mount() { @@ -396,7 +436,9 @@ zconf_mount() { do_node $client mkdir -p $mnt do_node $client mount -t lustre $OPTIONS $device $mnt || return 1 - do_node $client "sysctl -w lnet.debug=$PTLDEBUG; sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }" + do_node $client "sysctl -w lnet.debug=$PTLDEBUG; + sysctl -w lnet.subsystem_debug=${SUBSYSTEM# }; + sysctl -w lnet.debug_mb=${DEBUG_SIZE}" [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname` return 0 } @@ -462,6 +504,24 @@ cleanup_check() { return 0 } +wait_delete_completed () { + local TOTALPREV=`awk 'BEGIN{total=0}; {total+=$1}; END{print total}' \ + $LPROC/osc/*/kbytesavail` + + local WAIT=0 + local MAX_WAIT=20 + while [ "$WAIT" -ne "$MAX_WAIT" ]; do + sleep 1 + TOTAL=`awk 'BEGIN{total=0}; {total+=$1}; END{print total}' \ + $LPROC/osc/*/kbytesavail` + [ "$TOTAL" -eq "$TOTALPREV" ] && break + echo "Waiting delete completed ... prev: $TOTALPREV current: $TOTAL " + TOTALPREV=$TOTAL + WAIT=$(( WAIT + 1)) + done + echo "Delete completed." +} + wait_for_host() { HOST=$1 check_network "$HOST" 900 @@ -474,6 +534,43 @@ wait_for() { wait_for_host $HOST } +wait_mds_recovery_done () { + local timeout=`do_facet mds cat /proc/sys/lustre/timeout` +#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) +# as we are in process of changing obd_timeout in different ways +# let's set MAX longer than that + MAX=$(( timeout * 4 )) + WAIT=0 + while [ $WAIT -lt $MAX ]; do + STATUS=`do_facet mds grep status /proc/fs/lustre/mdt/*-MDT*/recovery_status` + echo $STATUS | grep COMPLETE && return 0 + sleep 5 + WAIT=$((WAIT + 5)) + echo "Waiting $(($MAX - $WAIT)) secs for MDS recovery done" + done + echo "MDS recovery not done in $MAX sec" + return 1 +} + +wait_exit_ST () { + local facet=$1 + + local WAIT=0 + local INTERVAL=1 + # conf-sanity 31 takes a long time cleanup + while [ $WAIT -lt 300 ]; do + running=$(do_facet ${facet} "[ -e $LPROC ] && grep ST' ' $LPROC/devices") || true + [ -z "${running}" ] && return 0 + echo "waited $WAIT for${running}" + [ $INTERVAL -lt 64 ] && INTERVAL=$((INTERVAL + INTERVAL)) + sleep $INTERVAL + WAIT=$((WAIT + INTERVAL)) + done + echo "service didn't stop after $WAIT seconds. Still running:" + echo ${running} + return 1 +} + client_df() { # not every config has many clients if [ ! -z "$CLIENTS" ]; then @@ -736,6 +833,7 @@ stopall() { # assume client mount is local grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT $* grep " $MOUNT2 " /proc/mounts && zconf_umount `hostname` $MOUNT2 $* + [ "$CLIENTONLY" ] && return for num in `seq $MDSCOUNT`; do stop mds$num -f done @@ -843,7 +941,8 @@ cleanup_and_setup_lustre() { check_and_cleanup_lustre() { if [ "`mount | grep $MOUNT`" ]; then - rm -rf $DIR/[Rdfs][1-9]* + rm -rf $DIR/[Rdfs][0-9]* + rm -f $DIR/${TESTSUITE}/[Rdfs][1-9]* fi if [ "$I_MOUNTED" = "yes" ]; then cleanupall -f || error "cleanup failed" @@ -999,26 +1098,37 @@ debugrestore() { DEBUGSAVE="" } -FAIL_ON_ERROR=true ################################## # Test interface +################################## + error() { + local FAIL_ON_ERROR=${FAIL_ON_ERROR:-true} + local TYPE=${TYPE:-"FAIL"} local ERRLOG sysctl -w lustre.fail_loc=0 2> /dev/null || true - log "${TESTSUITE} ${TESTNAME}: **** FAIL:" $@ + log " ${TESTSUITE} ${TESTNAME}: @@@@@@ ${TYPE}: $@ " ERRLOG=$TMP/lustre_${TESTSUITE}_${TESTNAME}.$(date +%s) echo "Dumping lctl log to $ERRLOG" # We need to dump the logs on all nodes - $LCTL dk $ERRLOG - [ ! "$mds_HOST" = "$(hostname)" ] && do_node $mds_HOST $LCTL dk $ERRLOG - [ ! "$ost_HOST" = "$(hostname)" -a ! "$ost_HOST" = "$mds_HOST" ] && do_node $ost_HOST $LCTL dk $ERRLOG + local NODES=$(nodes_list) + for NODE in $NODES; do + do_node $NODE $LCTL dk $ERRLOG + done debugrestore - [ "$TESTSUITELOG" ] && echo "$0: FAIL: $TESTNAME $@" >> $TESTSUITELOG + [ "$TESTSUITELOG" ] && echo "$0: ${TYPE}: $TESTNAME $@" >> $TESTSUITELOG if $FAIL_ON_ERROR; then exit 1 fi } +# use only if we are ignoring failures for this test, bugno required. +# (like ALWAYS_EXCEPT, but run the test and ignore the results.) +# e.g. error_ignore 5494 "your message" +error_ignore() { + FAIL_ON_ERROR=false TYPE="IGNORE (bz$1)" error $2 +} + skip () { log " SKIP: ${TESTSUITE} ${TESTNAME} $@" [ "$TESTSUITELOG" ] && echo "${TESTSUITE}: SKIP: $TESTNAME $@" >> $TESTSUITELOG @@ -1031,9 +1141,14 @@ build_test_filter() { done [ "$EXCEPT$ALWAYS_EXCEPT" ] && \ log "skipping tests: `echo $EXCEPT $ALWAYS_EXCEPT`" + [ "$EXCEPT_SLOW" ] && \ + log "skipping tests SLOW=no: `echo $EXCEPT_SLOW`" for E in $EXCEPT $ALWAYS_EXCEPT; do eval EXCEPT_${E}=true done + for E in $EXCEPT_SLOW; do + eval EXCEPT_SLOW_${E}=true + done for G in $GRANT_CHECK_LIST; do eval GCHECK_ONLY_${G}=true done @@ -1073,6 +1188,17 @@ run_test() { TESTNAME=test_$1 skip "skipping excluded test $1 (base $base)" return 0 fi + testname=EXCEPT_SLOW_$1 + if [ ${!testname}x != x ]; then + TESTNAME=test_$1 skip "skipping SLOW test $1" + return 0 + fi + testname=EXCEPT_SLOW_$base + if [ ${!testname}x != x ]; then + TESTNAME=test_$1 skip "skipping SLOW test $1 (base $base)" + return 0 + fi + run_one $1 "$2" return $? @@ -1084,13 +1210,24 @@ equals_msg() { local suffixlen=$((${#EQUALS} - ${#msg})) [ $suffixlen -lt 5 ] && suffixlen=5 - printf '===== %s %.*s\n' "$msg" $suffixlen $EQUALS + log `echo $(printf '===== %s %.*s\n' "$msg" $suffixlen $EQUALS)` } log() { echo "$*" lsmod | grep lnet > /dev/null || load_modules - $LCTL mark "$*" 2> /dev/null || true + + local MSG="$*" + # Get rif of ' + MSG=${MSG//\'/\\\'} + MSG=${MSG//\(/\\\(} + MSG=${MSG//\)/\\\)} + MSG=${MSG//\;/\\\;} + MSG=${MSG//\|/\\\|} + local NODES=$(nodes_list) + for NODE in $NODES; do + do_node $NODE $LCTL mark "$MSG" 2> /dev/null || true + done } trace() { @@ -1115,7 +1252,10 @@ run_one() { testnum=$1 message=$2 tfile=f${testnum} - tdir=d${base} + export tdir=d${TESTSUITE}/d${base} + local SAVE_UMASK=`umask` + umask 0022 + mkdir -p $DIR/$tdir BEFORE=`date +%s` log "== test $testnum: $message ============ `date +%H:%M:%S` ($BEFORE)" @@ -1127,7 +1267,10 @@ run_one() { [ -f $CATASTROPHE ] && [ `cat $CATASTROPHE` -ne 0 ] && \ error "LBUG/LASSERT detected" pass "($((`date +%s` - $BEFORE))s)" + rmdir ${DIR}/$tdir >/dev/null 2>&1 || true unset TESTNAME + unset tdir + umask $SAVE_UMASK cd $SAVE_PWD $CLEANUP } @@ -1154,7 +1297,7 @@ check_grant() { # write some data to sync client lost_grant rm -f $DIR1/${tfile}_check_grant_* 2>&1 for i in `seq $OSTCOUNT`; do - $LFS setstripe $DIR1/${tfile}_check_grant_$i 0 $(($i -1)) 1 + $LFS setstripe $DIR1/${tfile}_check_grant_$i -i $(($i -1)) -c 1 dd if=/dev/zero of=$DIR1/${tfile}_check_grant_$i bs=4k \ count=1 > /dev/null 2>&1 done @@ -1208,7 +1351,66 @@ remote_ost () [ $(grep -c obdfilter $LPROC/devices) -eq 0 ] } +mdts_nodes () { + local MDSNODES=$(facet_host $SINGLEMDS) + + # FIXME: Currenly we use only $SINGLEMDS, + # should be fixed when we will start to test cmd. + echo $MDSNODES + return + + for num in `seq $MDSCOUNT`; do + local myMDS=$(facet_host mds$num) + [[ ! '\ '"$MDSNODES"'\ ' = *'\ '"$myMDS"'\ '* ]] && MDSNODES="$MDSNODES $myMDS" + done + + echo $MDSNODES +} + +osts_nodes () { + local OSTNODES=$(facet_host ost1) + + for num in `seq $OSTCOUNT`; do + local myOST=$(facet_host ost$num) + [[ ! '\ '"$OSTNODES"'\ ' = *'\ '"$myOST"'\ '* ]] && OSTNODES="$OSTNODES $myOST" + done + + echo $OSTNODES +} + +nodes_list () { + # FIXME. We need a list of clients + local myNODES=`hostname` + + local OSTNODES=$(osts_nodes) + local myOSTNODES=`echo ' '"$OSTNODES"' ' | sed -e s/[\ ]$(hostname)[\ ]/\ /` + [ -n "$myOSTNODES" ] && myNODES="$myNODES $myOSTNODES" + + local myNODES=${myNODES% } + # Add to list only not listed mds nodes + local MDSNODES=$(mdts_nodes) + for myMDS in $MDSNODES; do + [[ ! "'\ '$myNODES'\ '" = *'\ '"$myMDS"'\ '* ]] && myNODES="$myNODES $myMDS" + done + + echo $myNODES +} + is_patchless () { grep -q patchless $LPROC/version } + +check_runas_id() { + local myRUNAS_ID=$1 + shift + local myRUNAS=$@ + mkdir $DIR/d0_runas_test + chmod 0755 $DIR + chown $myRUNAS_ID:$myRUNAS_ID $DIR/d0_runas_test + $myRUNAS touch $DIR/d0_runas_test/f$$ || \ + error "unable to write to $DIR/d0_runas_test as UID $myRUNAS_ID. + Please set RUNAS_ID to some UID which exists on MDS and client or + add user $myRUNAS_ID:$myRUNAS_ID on these nodes." + rm -rf $DIR/d0_runas_test +}