X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Foos.sh;h=ba9bec3b885688d05cfd6384cb16c6d027c4abab;hb=3667acae1dc5af1973734c09f341dabe0e14ccd4;hp=910346ed3f84c857bd7f03f4a107ba69e4cc38f3;hpb=cd9c585e8c7bdd6cfd802be64ef277dfd466be17;p=fs%2Flustre-release.git diff --git a/lustre/tests/oos.sh b/lustre/tests/oos.sh index 910346e..ba9bec3 100755 --- a/lustre/tests/oos.sh +++ b/lustre/tests/oos.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -set -vx +#set -vx export PATH=`dirname $0`/../utils:$PATH LFS=${LFS:-lfs} @@ -13,74 +13,80 @@ LOG=$TMP/ooslog SUCCESS=1 -rm -f $OOS +rm -f $OOS $LOG -sleep 1 # to ensure we get up-to-date statfs info +sync; sleep 1; sync # to ensure we get up-to-date statfs info -#echo -1 > /proc/sys/portals/debug -#echo 0x40a8 > /proc/sys/portals/subsystem_debug +#echo -1 > /proc/sys/lnet/debug +#echo 0x40a8 > /proc/sys/lnet/subsystem_debug #lctl clear #lctl debug_daemon start /r/tmp/debug 1024 STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1` ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1` -MAXFREE=${MAXFREE:-$((200000 * $STRIPECOUNT))} +MAXFREE=${MAXFREE:-$((400000 * $STRIPECOUNT))} if [ $ORIGFREE -gt $MAXFREE ]; then echo "skipping out-of-space test on $OSC" - echo "reports ${ORIGFREE}kB free, more tham MAXFREE ${MAXFREE}kB" + echo "reports ${ORIGFREE}kB free, more than MAXFREE ${MAXFREE}kB" echo "increase $MAXFREE (or reduce test fs size) to proceed" exit 0 fi export LANG=C LC_LANG=C # for "No space left on device" message -# make sure, that log file will be removed. Somehow it was possible -# to me, that log file had +a and could not be rewritten, what led -# to test fail. -chattr -ai $LOG >/dev/null 2>&1 -rm -f $LOG >/dev/null 2>&1 +[ -f $LOG ] && echo "ERROR: log file wasn't removed?" && exit 1 # make sure we stripe over all OSTs to avoid OOS on only a subset of OSTs -$LFS setstripe $OOS 65536 0 $STRIPECOUNT +$LFS setstripe $OOS -c $STRIPECOUNT if dd if=/dev/zero of=$OOS count=$(($ORIGFREE + 100)) bs=1k 2> $LOG; then echo "ERROR: dd did not fail" SUCCESS=0 fi if [ "`grep -c 'No space left on device' $LOG`" -ne 1 ]; then - echo "ERROR: dd not return ENOSPC" + echo "ERROR: dd not return ENOSPC" + sed "s/^/LOG: /" $LOG SUCCESS=0 fi # flush cache to OST(s) so avail numbers are correct sync; sleep 1 ; sync -for OSC in /proc/fs/lustre/osc/OSC*MNT*; do +for OSC in /proc/fs/lustre/osc/*-osc-*; do AVAIL=`cat $OSC/kbytesavail` - GRANT=`cat $OSC/cur_grant_bytes` - [ $(($AVAIL - $GRANT / 1024)) -lt 400 ] && OSCFULL=full + GRANT=$((`cat $OSC/cur_grant_bytes` / 1024)) + echo -n "$(basename $OSC) avl=$AVAIL grnt=$GRANT diff=$(($AVAIL - $GRANT))" + [ $(($AVAIL - $GRANT)) -lt 400 ] && OSCFULL=full && echo -n " FULL" + echo " " done if [ -z "$OSCFULL" ]; then echo "no OSTs are close to full" - grep [0-9] /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*} + grep "[0-9]" /proc/fs/lustre/osc/*-osc-*/{kbytesavail,cur*} SUCCESS=0 fi RECORDSOUT=`grep "records out" $LOG | cut -d + -f1` - FILESIZE=`ls -l $OOS | awk '{ print $5 }'` -if [ $RECORDSOUT -ne $(($FILESIZE / 1024)) ]; then - echo "ERROR: blocks written by dd not equal to the size of file" - SUCCESS=0 +if [ -z "$RECORDSOUT" ]; then + echo "ERROR: no blocks written by dd?" + sed "s/^/LOG: /" $LOG + SUCCESS=0 +elif [ "$RECORDSOUT" -ne $((FILESIZE / 1024)) ]; then + echo "ERROR: blocks written by dd not equal to the size of file" + SUCCESS=0 fi #lctl debug_daemon stop rm -f $OOS +sync; sleep 1; sync + +sync; sleep 3; sync if [ $SUCCESS -eq 1 ]; then echo "Success!" + rm -f $LOG else exit 1 fi