X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Ftest-framework.sh;h=dd596d6f3ceb4520c4545a07797436afec3e80f2;hp=b692fe5518de518fbac5613a56db398d61673dc1;hb=b924164398e939986e20506ab5d004e64f0b004e;hpb=0ed02e6bc4fa915993131d3f6aeff765766eb235 diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index b692fe5..dd596d6 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -7,6 +7,7 @@ set -e export REFORMAT=${REFORMAT:-""} +export WRITECONF=${WRITECONF:-""} export VERBOSE=false export GMNALNID=${GMNALNID:-/usr/sbin/gmlndnid} export CATASTROPHE=${CATASTROPHE:-/proc/sys/lnet/catastrophe} @@ -14,6 +15,7 @@ export GSS=false export GSS_KRB5=false export GSS_PIPEFS=false export IDENTITY_UPCALL=default + #export PDSH="pdsh -S -Rssh -w" # eg, assert_env LUSTRE MDSNODES OSTNODES CLIENTS @@ -30,12 +32,12 @@ assert_env() { assert_DIR () { local failed="" - [ -z "`echo :$DIR: | grep :$MOUNT:`" ] && \ - failed=1 && echo "DIR not in $MOUNT. Aborting." - [ -z "`echo :$DIR1: | grep :$MOUNT1:`" ] && \ - failed=1 && echo "DIR1 not in $MOUNT1. Aborting." - [ -z "`echo :$DIR2: | grep :$MOUNT2:`" ] && \ - failed=1 && echo "DIR2 not in $MOUNT2. Aborting" + [[ $DIR/ = $MOUNT/* ]] || \ + { failed=1 && echo "DIR=$DIR not in $MOUNT. Aborting."; } + [[ $DIR1/ = $MOUNT1/* ]] || \ + { failed=1 && echo "DIR1=$DIR1 not in $MOUNT1. Aborting."; } + [[ $DIR2/ = $MOUNT2/* ]] || \ + { failed=1 && echo "DIR2=$DIR2 not in $MOUNT2. Aborting"; } [ -n "$failed" ] && exit 99 || true } @@ -83,7 +85,7 @@ init_test_env() { export LUSTRE=`absolute_path $LUSTRE` export TESTSUITE=`basename $0 .sh` - [ -d /r ] && export ROOT=${ROOT:-/r} + #[ -d /r ] && export ROOT=${ROOT:-/r} export TMP=${TMP:-$ROOT/tmp} export TESTSUITELOG=${TMP}/${TESTSUITE}.log export HOSTNAME=${HOSTNAME:-`hostname`} @@ -96,6 +98,11 @@ init_test_env() { if ! echo $PATH | grep -q $LUSTRE/tests; then export PATH=$PATH:$LUSTRE/tests fi + export MDSRATE=${MDSRATE:-"$LUSTRE/tests/mdsrate"} + [ ! -f "$MDSRATE" ] && export MDSRATE=$(which mdsrate 2> /dev/null) + if ! echo $PATH | grep -q $LUSTRE/test/racer; then + export PATH=$PATH:$LUSTRE/tests/racer + fi export LCTL=${LCTL:-"$LUSTRE/utils/lctl"} [ ! -f "$LCTL" ] && export LCTL=$(which lctl) export LFS=${LFS:-"$LUSTRE/utils/lfs"} @@ -115,7 +122,6 @@ init_test_env() { export CHECKSTAT="${CHECKSTAT:-"checkstat -v"} " export FSYTPE=${FSTYPE:-"ldiskfs"} export NAME=${NAME:-local} - export LPROC=/proc/fs/lustre export LGSSD=${LGSSD:-"$LUSTRE/utils/gss/lgssd"} [ "$GSS_PIPEFS" = "true" ] && [ ! -f "$LGSSD" ] && \ export LGSSD=$(which lgssd) @@ -156,11 +162,12 @@ init_test_env() { # command line - while getopts "rvf:" opt $*; do + while getopts "rvwf:" opt $*; do case $opt in f) CONFIG=$OPTARG;; r) REFORMAT=--reformat;; v) VERBOSE=true;; + w) WRITECONF=writeconf;; \?) usage;; esac done @@ -173,6 +180,11 @@ init_test_env() { } +case `uname -r` in +2.4.*) EXT=".o"; USE_QUOTA=no; [ ! "$CLIENTONLY" ] && FSTYPE=ext3;; + *) EXT=".ko"; USE_QUOTA=yes;; +esac + load_module() { EXT=".ko" module=$1 @@ -207,10 +219,12 @@ load_modules() { load_module ../libcfs/libcfs/libcfs [ "$PTLDEBUG" ] && lctl set_param debug=$PTLDEBUG [ "$SUBSYSTEM" ] && lctl set_param subsystem_debug=${SUBSYSTEM# } + local MODPROBECONF= [ -f /etc/modprobe.conf ] && MODPROBECONF=/etc/modprobe.conf - [ -f /etc/modprobe.d/Lustre ] && MODPROBECONF=/etc/modprobe.d/Lustre - [ -z "$LNETOPTS" -a -n "$MODPROBECONF" ] && \ + [ ! "$MODPROBECONF" -a -d /etc/modprobe.d ] && MODPROBECONF=/etc/modprobe.d/Lustre + [ -z "$LNETOPTS" -a "$MODPROBECONF" ] && \ LNETOPTS=$(awk '/^options lnet/ { print $0}' $MODPROBECONF | sed 's/^options lnet //g') + echo $LNETOPTS | grep -q "accept=all" || LNETOPTS="$LNETOPTS accept=all"; echo "lnet options: '$LNETOPTS'" # note that insmod will ignore anything in modprobe.conf load_module ../lnet/lnet/lnet $LNETOPTS @@ -220,10 +234,7 @@ load_modules() { load_module obdclass/obdclass load_module ptlrpc/ptlrpc load_module ptlrpc/gss/ptlrpc_gss - # Now, some modules depend on lquota without USE_QUOTA check, - # will fix later. Disable check "$USE_QUOTA" = "yes" temporary. - #[ "$USE_QUOTA" = "yes" ] && load_module quota/lquota - load_module quota/lquota + [ "$USE_QUOTA" = "yes" -a "$LQUOTA" != "no" ] && load_module quota/lquota load_module fid/fid load_module fld/fld load_module lmv/lmv @@ -247,8 +258,8 @@ load_modules() { load_module llite/lustre load_module llite/llite_lloop - rm -f $TMP/ogdb-$HOSTNAME - OGDB=$TMP + OGDB=${OGDB:-$TMP} + rm -f $OGDB/ogdb-$HOSTNAME [ -d /r ] && OGDB="/r/tmp" $LCTL modules > $OGDB/ogdb-$HOSTNAME @@ -296,6 +307,19 @@ unload_dep_module() { $RMMOD $MODULE || true } +check_mem_leak () { + LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd_memory.*leaked" || true) + LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true) + if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then + echo "$LEAK_LUSTRE" 1>&2 + echo "$LEAK_PORTALS" 1>&2 + mv $TMP/debug $TMP/debug-leak.`date +%s` || true + echo "Memory leaks detected" + [ -n "$IGNORE_LEAK" ] && { echo "ignoring leaks" && return 0; } || true + return 1 + fi +} + unload_modules() { wait_exit_ST client # bug 12845 @@ -306,7 +330,7 @@ unload_modules() { local MODULES=$($LCTL modules | awk '{ print $2 }') if [ -n "$MODULES" ]; then echo "Modules still loaded: " - echo $MODULES + echo $MODULES if [ "$(lctl dl)" ]; then echo "Lustre still loaded" lctl dl || true @@ -320,16 +344,8 @@ unload_modules() { fi HAVE_MODULES=false - LEAK_LUSTRE=$(dmesg | tail -n 30 | grep "obd mem.*leaked" || true) - LEAK_PORTALS=$(dmesg | tail -n 20 | grep "Portals memory leaked" || true) - if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then - echo "$LEAK_LUSTRE" 1>&2 - echo "$LEAK_PORTALS" 1>&2 - mv $TMP/debug $TMP/debug-leak.`date +%s` || true - echo "Memory leaks detected" - [ -n "$IGNORE_LEAK" ] && echo "ignoring leaks" && return 0 - return 254 - fi + check_mem_leak || return 254 + echo "modules unloaded." return 0 } @@ -434,7 +450,7 @@ mount_facet() { local dev=${facet}_dev local opt=${facet}_opt echo "Starting ${facet}: ${!opt} $@ ${!dev} ${MOUNT%/*}/${facet}" - do_facet ${facet} mount -t lustre ${!opt} $@ ${!dev} ${MOUNT%/*}/${facet} + do_facet ${facet} mount -t lustre ${!opt} $@ ${!dev} ${MOUNT%/*}/${facet} RC=${PIPESTATUS[0]} if [ $RC -ne 0 ]; then echo "mount -t lustre $@ ${!dev} ${MOUNT%/*}/${facet}" @@ -444,7 +460,7 @@ mount_facet() { lctl set_param subsystem_debug=${SUBSYSTEM# }; \ lctl set_param debug_mb=${DEBUG_SIZE}; \ sync" - + label=$(do_facet ${facet} "e2label ${!dev}") [ -z "$label" ] && echo no label for ${!dev} && exit 1 eval export ${facet}_svc=${label} @@ -455,9 +471,9 @@ mount_facet() { # start facet device options start() { - facet=$1 + local facet=$1 shift - device=$1 + local device=$1 shift eval export ${facet}_dev=${device} eval export ${facet}_opt=\"$@\" @@ -469,7 +485,7 @@ start() { stop() { local running - facet=$1 + local facet=$1 shift HOST=`facet_active_host $facet` [ -z $HOST ] && echo stop: no host for $facet && return 0 @@ -506,7 +522,7 @@ zconf_mount() { do_node $client "lctl set_param debug=$PTLDEBUG; lctl set_param subsystem_debug=${SUBSYSTEM# }; lctl set_param debug_mb=${DEBUG_SIZE}" - [ -d /r ] && $LCTL modules > /r/tmp/ogdb-$HOSTNAME + return 0 } @@ -581,6 +597,13 @@ reboot_facet() { fi } +boot_node() { + local node=$1 + if [ "$FAILURE_MODE" = HARD ]; then + $POWER_UP $node + fi +} + # verify that lustre actually cleaned up properly cleanup_check() { [ -f $CATASTROPHE ] && [ `cat $CATASTROPHE` -ne 0 ] && \ @@ -591,15 +614,8 @@ cleanup_check() { [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.`date +%s` exit 205 fi - LEAK_LUSTRE=`dmesg | tail -n 30 | grep "obd mem.*leaked" || true` - LEAK_PORTALS=`dmesg | tail -n 20 | grep "Portals memory leaked" || true` - if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then - echo "$0: $LEAK_LUSTRE" 1>&2 - echo "$0: $LEAK_PORTALS" 1>&2 - echo "$0: Memory leak(s) detected..." 1>&2 - mv $TMP/debug $TMP/debug-leak.`date +%s` - exit 204 - fi + + check_mem_leak || exit 204 [ "`lctl dl 2> /dev/null | wc -l`" -gt 0 ] && lctl dl && \ echo "$0: lustre didn't clean up..." 1>&2 && return 202 || true @@ -643,14 +659,14 @@ wait_for() { } wait_mds_recovery_done () { - local timeout=`do_facet mds lctl get_param -n timeout` + local timeout=`do_facet $SINGLEMDS lctl get_param -n timeout` #define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) # as we are in process of changing obd_timeout in different ways # let's set MAX longer than that MAX=$(( timeout * 4 )) WAIT=0 while [ $WAIT -lt $MAX ]; do - STATUS=`do_facet $SINGLEMDS "lctl get_param -n mdt.*-MDT*.recovery_status | grep status"` + STATUS=`do_facet $SINGLEMDS "lctl get_param -n mdt.*-MDT0000.recovery_status | grep status"` echo $STATUS | grep COMPLETE && return 0 sleep 5 WAIT=$((WAIT + 5)) @@ -686,10 +702,10 @@ wait_remote_prog () { local rc=0 [ "$PDSH" = "no_dsh" ] && return 0 - + while [ $WAIT -lt $2 ]; do - running=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep) - [ -z "${running}" ] && return 0 + running=$(ps uax | grep "$PDSH.*$prog.*$MOUNT" | grep -v grep) || true + [ -z "${running}" ] && return 0 || true echo "waited $WAIT for: " echo "$running" [ $INTERVAL -lt 60 ] && INTERVAL=$((INTERVAL + INTERVAL)) @@ -705,7 +721,7 @@ wait_remote_prog () { echo "Killing $pid" kill -9 $pid || true sleep 1 - ps -P $pid && rc=1 + ps -P $pid && rc=1 done return $rc @@ -735,8 +751,10 @@ client_reconnect() { facet_failover() { facet=$1 + sleep_time=$2 echo "Failing $facet on node `facet_active_host $facet`" shutdown_facet $facet + [ -n "$sleep_time" ] && sleep $sleep_time reboot_facet $facet client_df & DFPID=$! @@ -774,6 +792,16 @@ replay_barrier_nodf() { $LCTL mark "local REPLAY BARRIER on ${!svc}" } +replay_barrier_nosync() { + local facet=$1 echo running=${running} + local svc=${facet}_svc + echo Replay barrier on ${!svc} + do_facet $facet $LCTL --device %${!svc} readonly + do_facet $facet $LCTL --device %${!svc} notransno + do_facet $facet $LCTL mark "$facet REPLAY BARRIER on ${!svc}" + $LCTL mark "local REPLAY BARRIER on ${!svc}" +} + mds_evict_client() { UUID=`lctl get_param -n mdc.${mds1_svc}-mdc-*.uuid` do_facet mds1 "lctl set_param -n mdt.${mds1_svc}.evict_client $UUID" @@ -934,7 +962,7 @@ do_node() { local command_status="$TMP/cs" rsh $HOST ":> $command_status" rsh $HOST "(PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests:/sbin:/usr/sbin; - cd $RPWD; sh -c \"$@\") || + cd $RPWD; sh -c \"$@\") || echo command failed >$command_status" [ -n "$($myPDSH $HOST cat $command_status)" ] && return 1 || true return 0 @@ -943,10 +971,19 @@ do_node() { return ${PIPESTATUS[0]} } +single_local_node () { + [ "$1" = "$HOSTNAME" ] +} + do_nodes() { local rnodes=$1 shift + if $(single_local_node $rnodes); then + do_node $rnodes $@ + return $? + fi + # This is part from do_node local myPDSH=$PDSH @@ -1045,10 +1082,16 @@ mdsmkfsopts() } formatall() { + if [ "$IAMDIR" == "yes" ]; then + MDS_MKFS_OPTS="$MDS_MKFS_OPTS --iam-dir" + MDSn_MKFS_OPTS="$MDSn_MKFS_OPTS --iam-dir" + fi + [ "$FSTYPE" ] && FSTYPE_OPT="--backfstype $FSTYPE" if [ ! -z $SEC ]; then MDS_MKFS_OPTS="$MDS_MKFS_OPTS --param srpc.flavor.default=$SEC" + MDSn_MKFS_OPTS="$MDSn_MKFS_OPTS --param srpc.flavor.default=$SEC" OST_MKFS_OPTS="$OST_MKFS_OPTS --param srpc.flavor.default=$SEC" fi @@ -1092,7 +1135,7 @@ switch_identity() { local num=$1 local switch=$2 local j=`expr $num - 1` - local MDT="`do_facet mds$num lctl get_param -N mdt.*MDT*$j | cut -d"." -f2 2>/dev/null || true`" + local MDT="`(do_facet mds$num lctl get_param -N mdt.*MDT*$j 2>/dev/null | cut -d"." -f2 2>/dev/null) || true`" if [ -z "$MDT" ]; then return 2 @@ -1131,15 +1174,34 @@ set_obd_timeout() { do_facet $facet "lctl set_param timeout=$timeout" } +writeconf_facet () { + local facet=$1 + local dev=$2 + + do_facet $facet "$TUNEFS --writeconf $dev" +} + +writeconf_all () { + for num in `seq $MDSCOUNT`; do + DEVNAME=$(mdsdevname $num) + writeconf_facet mds$num $DEVNAME + done + + for num in `seq $OSTCOUNT`; do + DEVNAME=$(ostdevname $num) + writeconf_facet ost$num $DEVNAME + done +} + setupall() { load_modules init_gss if [ -z "$CLIENTONLY" ]; then echo "Setup mdts, osts" + echo $WRITECONF | grep -q "writeconf" && \ + writeconf_all for num in `seq $MDSCOUNT`; do DEVNAME=$(mdsdevname $num) - echo $REFORMAT | grep -q "reformat" \ - || do_facet mds$num "$TUNEFS --writeconf $DEVNAME" set_obd_timeout mds$num $TIMEOUT start mds$num $DEVNAME $MDS_MOUNT_OPTS @@ -1168,6 +1230,10 @@ setupall() { done fi + # wait a while to allow sptlrpc configuration be propogated to targets, + # only needed when mounting new target devices. + $GSS && sleep 10 + [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE mount_client $MOUNT [ -n "$CLIENTS" ] && zconf_mount_clients $CLIENTS $MOUNT @@ -1193,14 +1259,66 @@ mounted_lustre_filesystems() { awk '($3 ~ "lustre" && $1 ~ ":") { print $2 }' /proc/mounts } +init_facet_vars () { + local facet=$1 + shift + local device=$1 + + shift + + eval export ${facet}_dev=${device} + eval export ${facet}_opt=\"$@\" + + local dev=${facet}_dev + local label=$(do_facet ${facet} "e2label ${!dev}") + [ -z "$label" ] && echo no label for ${!dev} && exit 1 + + eval export ${facet}_svc=${label} + + local varname=${facet}failover_HOST + if [ -z "${!varname}" ]; then + eval $varname=$(facet_host $facet) + fi +} + +init_facets_vars () { + local DEVNAME + + for num in `seq $MDSCOUNT`; do + DEVNAME=`mdsdevname $num` + init_facet_vars mds$num $DEVNAME $MDS_MOUNT_OPTS + done + + for num in `seq $OSTCOUNT`; do + DEVNAME=`ostdevname $num` + init_facet_vars ost$num $DEVNAME $OST_MOUNT_OPTS + done +} + +check_config () { + local mntpt=$1 + + echo Checking config lustre mounted on $mntpt + local mgshost=$(mount | grep " $mntpt " | awk -F@ '{print $1}') + mgshost=$(echo $mgshost | awk -F: '{print $1}') + if [ "$mgshost" != "$mgs_HOST" ]; then + FAIL_ON_ERROR=true \ + error "Bad config file: lustre is mounted with mgs $mgshost, but mgs_HOST=$mgs_HOST + Please use correct config or set mds_HOST correctly!" + fi +} + check_and_setup_lustre() { - MOUNTED="`mounted_lustre_filesystems`" - if [ -z "$MOUNTED" ]; then + local MOUNTED=$(mounted_lustre_filesystems) + if [ -z "$MOUNTED" ] || ! $(echo $MOUNTED | grep -w -q $MOUNT); then [ "$REFORMAT" ] && formatall setupall - MOUNTED="`mounted_lustre_filesystems`" + MOUNTED=$(mounted_lustre_filesystems | head -1) [ -z "$MOUNTED" ] && error "NAME=$NAME not mounted" export I_MOUNTED=yes + else + check_config $MOUNT + init_facets_vars fi if [ "$ONLY" == "setup" ]; then exit 0 @@ -1276,7 +1394,7 @@ absolute_path() { at_is_valid() { if [ -z "$AT_MAX_PATH" ]; then - AT_MAX_PATH=$(do_facet mds "find /sys/ -name at_max") + AT_MAX_PATH=$(do_facet $SINGLEMDS "find /sys/ -name at_max") [ -z "$AT_MAX_PATH" ] && echo "missing /sys/.../at_max " && return 1 fi return 0 @@ -1286,7 +1404,7 @@ at_is_enabled() { at_is_valid || error "invalid call" # only check mds, we assume at_max is the same on all nodes - local at_max=$(do_facet mds "cat $AT_MAX_PATH") + local at_max=$(do_facet $SINGLEMDS "cat $AT_MAX_PATH") if [ $at_max -eq 0 ]; then return 1 else @@ -1335,27 +1453,27 @@ at_max_set() { drop_request() { # OBD_FAIL_MDS_ALL_REQUEST_NET RC=0 - do_facet mds lctl set_param fail_loc=0x123 + do_facet $SINGLEMDS lctl set_param fail_loc=0x123 do_facet client "$1" || RC=$? - do_facet mds lctl set_param fail_loc=0 + do_facet $SINGLEMDS lctl set_param fail_loc=0 return $RC } drop_reply() { # OBD_FAIL_MDS_ALL_REPLY_NET RC=0 - do_facet mds lctl set_param fail_loc=0x122 + do_facet $SINGLEMDS lctl set_param fail_loc=0x122 do_facet client "$@" || RC=$? - do_facet mds lctl set_param fail_loc=0 + do_facet $SINGLEMDS lctl set_param fail_loc=0 return $RC } drop_reint_reply() { # OBD_FAIL_MDS_REINT_NET_REP RC=0 - do_facet mds lctl set_param fail_loc=0x119 + do_facet $SINGLEMDS lctl set_param fail_loc=0x119 do_facet client "$@" || RC=$? - do_facet mds lctl set_param fail_loc=0 + do_facet $SINGLEMDS lctl set_param fail_loc=0 return $RC } @@ -1390,9 +1508,9 @@ drop_bl_callback() { drop_ldlm_reply() { #define OBD_FAIL_LDLM_REPLY 0x30c RC=0 - do_facet mds lctl set_param fail_loc=0x30c + do_facet $SINGLEMDS lctl set_param fail_loc=0x30c do_facet client "$@" || RC=$? - do_facet mds lctl set_param fail_loc=0 + do_facet $SINGLEMDS lctl set_param fail_loc=0 return $RC } @@ -1413,15 +1531,6 @@ set_nodes_failloc () { done } -set_nodes_failloc () { - local nodes=$1 - local node - - for node in $nodes ; do - do_node $node lctl set_param fail_loc=$2 - done -} - cancel_lru_locks() { $LCTL mark "cancel_lru_locks $1 start" for d in `lctl get_param -N ldlm.namespaces.*.lru_size | egrep -i $1`; do @@ -1542,6 +1651,8 @@ basetest() { IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 } +# print a newline if the last test was skipped +export LAST_SKIPPED= run_test() { assert_DIR @@ -1549,38 +1660,46 @@ run_test() { if [ ! -z "$ONLY" ]; then testname=ONLY_$1 if [ ${!testname}x != x ]; then + [ "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED= run_one $1 "$2" return $? fi testname=ONLY_$base if [ ${!testname}x != x ]; then + [ "$LAST_SKIPPED" ] && echo "" && LAST_SKIPPED= run_one $1 "$2" return $? fi + LAST_SKIPPED="y" echo -n "." return 0 fi testname=EXCEPT_$1 if [ ${!testname}x != x ]; then + LAST_SKIPPED="y" TESTNAME=test_$1 skip "skipping excluded test $1" return 0 fi testname=EXCEPT_$base if [ ${!testname}x != x ]; then + LAST_SKIPPED="y" TESTNAME=test_$1 skip "skipping excluded test $1 (base $base)" return 0 fi testname=EXCEPT_SLOW_$1 if [ ${!testname}x != x ]; then + LAST_SKIPPED="y" TESTNAME=test_$1 skip "skipping SLOW test $1" return 0 fi testname=EXCEPT_SLOW_$base if [ ${!testname}x != x ]; then + LAST_SKIPPED="y" TESTNAME=test_$1 skip "skipping SLOW test $1 (base $base)" return 0 fi + LAST_SKIPPED= run_one $1 "$2" return $? @@ -1628,8 +1747,8 @@ pass() { } check_mds() { - FFREE=`lctl get_param -n osd.*MDT*.filesfree` - FTOTAL=`lctl get_param -n osd.*MDT*.filestotal` + FFREE=$(do_node $SINGLEMDS lctl get_param -n osd.*MDT*.filesfree | awk 'BEGIN{avail=0}; {avail+=$1}; END{print avail}') + FTOTAL=$(do_node $SINGLEMDS lctl get_param -n osd.*MDT*.filestotal | awk 'BEGIN{avail=0}; {avail+=$1}; END{print avail}') [ $FFREE -ge $FTOTAL ] && error "files free $FFREE > total $FTOTAL" || true } @@ -1665,7 +1784,6 @@ run_one() { unset TESTNAME unset tdir umask $SAVE_UMASK - $CLEANUP } canonical_path() { @@ -1734,9 +1852,18 @@ osc_to_ost() echo $ost } +remote_node () { + local node=$1 + [ "$node" != "$(hostname)" ] +} + remote_mds () { - [ -z "$(lctl dl | grep mdt)" ] + local node + for node in $(mdts_nodes); do + remote_node $node && return 0 + done + return 1 } remote_mds_nodsh() @@ -1746,7 +1873,11 @@ remote_mds_nodsh() remote_ost () { - [ -z "$(lctl dl | grep ost)" ] + local node + for node in $(osts_nodes) ; do + remote_node $node && return 0 + done + return 1 } remote_ost_nodsh() @@ -1755,23 +1886,20 @@ remote_ost_nodsh() } mdts_nodes () { - local MDSNODES=$(facet_host $SINGLEMDS) + local MDSNODES local NODES_sort - - # FIXME: Currenly we use only $SINGLEMDS, - # should be fixed when we will start to test cmd. - echo $MDSNODES - return - for num in `seq $MDSCOUNT`; do - local myMDS=$(facet_host mds$num) - MDSNODES="$MDSNODES $myMDS" + MDSNODES="$MDSNODES $(facet_host mds$num)" done NODES_sort=$(for i in $MDSNODES; do echo $i; done | sort -u) echo $NODES_sort } +remote_servers () { + remote_ost && remote_mds +} + osts_nodes () { local OSTNODES=$(facet_host ost1) local NODES_sort @@ -1805,7 +1933,7 @@ nodes_list () { remote_nodes_list () { local rnodes=$(nodes_list) rnodes=$(echo " $rnodes " | sed -re "s/\s+$HOSTNAME\s+/ /g") - echo $rnodes + echo $rnodes } init_clients_lists () { @@ -1818,7 +1946,7 @@ init_clients_lists () { local clients="$SINGLECLIENT $HOSTNAME $rclients" # Sanity check: exclude the dup entries from CLIENTS - # for those configs which has SINGLCLIENT set to local client + # for those configs which has SINGLCLIENT set to local client clients=$(for i in $clients; do echo $i; done | sort -u) CLIENTS=`comma_list $clients` @@ -1847,6 +1975,28 @@ mixed_ost_devs () { [ ! "$OSTCOUNT" = "$osscount" ] } +mixed_mdt_devs () { + local nodes=$(mdts_nodes) + local mdtcount=$(get_node_count "$nodes") + [ ! "$MDSCOUNT" = "$mdtcount" ] +} + +generate_machine_file() { + local nodes=${1//,/ } + local machinefile=$2 + rm -f $machinefile || error "can't rm $machinefile" + for node in $nodes; do + echo $node >>$machinefile + done +} + +get_stripe () { + local file=$1/stripe + touch $file + $LFS getstripe -v $file || error + rm -f $file +} + check_runas_id_ret() { local myRC=0 local myRUNAS_ID=$1 @@ -1872,8 +2022,8 @@ check_runas_id() { shift local myRUNAS=$@ check_runas_id_ret $myRUNAS_ID $myRUNAS || \ - error "unable to write to $DIR/d0_runas_test as UID $myRUNAS_ID. - Please set RUNAS_ID to some UID which exists on MDS and client or + error "unable to write to $DIR/d0_runas_test as UID $myRUNAS_ID. + Please set RUNAS_ID to some UID which exists on MDS and client or add user $myRUNAS_ID:$myRUNAS_ID on these nodes." } @@ -1906,6 +2056,27 @@ multiop_bg_pause() { return 0 } +check_rate() { + local OP=$1 + local TARGET_RATE=$2 + local NUM_CLIENTS=$3 + local LOG=$4 + + local RATE=$(awk '/^Rate: [0-9\.]+ '"${OP}"'s\/sec/ { print $2}' ${LOG}) + + # We need to use bc since the rate is a floating point number + local RES=$(echo "${RATE} < ${TARGET_RATE}" | bc -l ) + if [ ${RES} -eq 0 ]; then + echo "Success: ${RATE} ${OP}s/sec met target rate" \ + "${TARGET_RATE} ${OP}s/sec for ${NUM_CLIENTS} client(s)." + return 0 + else + echo "Failure: ${RATE} ${OP}s/sec did not meet target rate" \ + "${TARGET_RATE} ${OP}s/sec for ${NUM_CLIENTS} client(s)." + return 1 + fi +} + # reset llite stat counters clear_llite_stats(){ lctl set_param -n llite.*.stats 0 @@ -1918,6 +2089,26 @@ calc_llite_stats() { echo $res } +# reset osc stat counters +clear_osc_stats(){ + lctl set_param -n osc.*.osc_stats 0 +} + +# sum osc stat items +calc_osc_stats() { + local res=$(lctl get_param -n osc.*.osc_stats | + awk 'BEGIN {s = 0} END {print s} /^'"$1"'/ {s += $2}') + echo $res +} + +calc_sum () { + awk 'BEGIN {s = 0}; {s += $1}; END {print s}' +} + +calc_osc_kbytes () { + $LCTL get_param -n osc.*[oO][sS][cC][-_][0-9a-f]*.$1 | calc_sum +} + # save_lustre_params(node, parameter_mask) # generate a stream of formatted strings ( =) save_lustre_params() { @@ -1944,3 +2135,20 @@ check_catastrophe () { fi } +# $1 node +# $2 file +get_stripe_info() { + local tmp_file + + stripe_size=0 + stripe_count=0 + stripe_index=0 + tmp_file=$(mktemp) + + do_facet $1 lfs getstripe -v $2 > $tmp_file + + stripe_size=`awk '$1 ~ /size/ {print $2}' $tmp_file` + stripe_count=`awk '$1 ~ /count/ {print $2}' $tmp_file` + stripe_index=`awk '/obdidx/ {start = 1; getline; print $1; exit}' $tmp_file` + rm -f $tmp_file +}