From 5a6391bc274ccde53a217a27c9d6213cf72b1b73 Mon Sep 17 00:00:00 2001 From: ericm Date: Tue, 28 Feb 2006 00:51:08 +0000 Subject: [PATCH] branch: b1_5 the rest part of fixes of merging from b1_4. --- lustre/include/Makefile.am | 2 +- lustre/include/linux/Makefile.am | 3 +- lustre/llite/llite_internal.h | 1 + lustre/tests/sanity.sh | 366 ++++++++++++++++++++++++++------------- lustre/utils/lconf | 212 +++++++++++++++-------- 5 files changed, 395 insertions(+), 189 deletions(-) diff --git a/lustre/include/Makefile.am b/lustre/include/Makefile.am index ca7d1b4..6091bd2 100644 --- a/lustre/include/Makefile.am +++ b/lustre/include/Makefile.am @@ -12,5 +12,5 @@ EXTRA_DIST = ioctl.h liblustre.h lprocfs_status.h lustre_cfg.h \ lustre_lib.h lustre_lite.h lustre_log.h lustre_mds.h\ lustre_net.h lustre_quota.h lustre_ucache.h lvfs.h \ obd_cache.h obd_class.h obd_echo.h obd.h obd_lov.h \ - obd_ost.h obd_support.h + obd_ost.h obd_support.h lustre_ver.h diff --git a/lustre/include/linux/Makefile.am b/lustre/include/linux/Makefile.am index 762c6b9..6d02881 100644 --- a/lustre/include/linux/Makefile.am +++ b/lustre/include/linux/Makefile.am @@ -12,4 +12,5 @@ endif EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_lib.h types.h\ lustre_dlm.h lustre_handles.h lustre_net.h obd_class.h obd_support.h \ lustre_log.h lustre_compat25.h lustre_fsfilt.h lustre_mds.h obd.h \ - lvfs.h lvfs_linux.h lustre_lite.h lustre_idl.h lustre_quota.h lustre_user.h + lvfs.h lvfs_linux.h lustre_lite.h lustre_idl.h lustre_quota.h \ + lustre_disk.h lustre_user.h diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 7df9aa9..9a47016 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -12,6 +12,7 @@ #include #include +#include /* struct lustre_intent_data { diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 4712495..9f9c63a 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -7,21 +7,18 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 2108 3637 3561 5188/5749 -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42c 45 68"} +# bug number for skipped test: 2108 9789 3637 9789 3561 5188/5749 +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42b 42c 42d 45 68"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 51b 51c 64b 71 75 101" +[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 63 64b 71 101" case `uname -r` in -2.4*) FSTYPE=${FSTYPE:-ext3} ;; -2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 60 69";; +2.4*) FSTYPE=${FSTYPE:-ext3}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;; +2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 60 69" ;; *) error "unsupported kernel" ;; esac -[ "$ALWAYS_EXCEPT$EXCEPT" ] && \ - echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT`" - SRCDIR=`dirname $0` export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin @@ -47,6 +44,8 @@ IOPENTEST1=${IOPENTEST1:-iopentest1} IOPENTEST2=${IOPENTEST2:-iopentest2} MEMHOG=${MEMHOG:-memhog} DIRECTIO=${DIRECTIO:-directio} +ACCEPTOR_PORT=${ACCEPTOR_PORT:-988} +UMOUNT=${UMOUNT:-"umount -d"} if [ $UID -ne 0 ]; then echo "Warning: running as non-root uid $UID" @@ -105,14 +104,26 @@ check_kernel_version() { return 1 } +_basetest() { + echo $* +} + +basetest() { + IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 +} + run_one() { if ! mount | grep -q $DIR; then $START fi + testnum=$1 + message=$2 BEFORE=`date +%s` - log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)" - export TESTNAME=test_$1 - test_$1 || error "exit with rc=$?" + log "== test $testnum: $message= `date +%H:%M:%S` ($BEFORE)" + export TESTNAME=test_$testnum + export tfile=f${testnum} + export tdir=d${base} + test_${testnum} || error "exit with rc=$?" unset TESTNAME pass "($((`date +%s` - $BEFORE))s)" cd $SAVE_PWD @@ -120,10 +131,13 @@ run_one() { } build_test_filter() { + [ "$ALWAYS_EXCEPT$EXCEPT$SANITY_EXCEPT" ] && \ + echo "Skipping tests: `echo $ALWAYS_EXCEPT $EXCEPT $SANITY_EXCEPT`" + for O in $ONLY; do eval ONLY_${O}=true done - for E in $EXCEPT $ALWAYS_EXCEPT; do + for E in $EXCEPT $ALWAYS_EXCEPT $SANITY_EXCEPT; do eval EXCEPT_${E}=true done } @@ -137,7 +151,7 @@ basetest() { } run_test() { - base=`basetest $1` + export base=`basetest $1` if [ "$ONLY" ]; then testname=ONLY_$1 if [ ${!testname}x != x ]; then @@ -171,6 +185,7 @@ run_test() { error() { sysctl -w lustre.fail_loc=0 log "FAIL: $TESTNAME $@" + $LCTL dk $TMP/lustre-log-$TESTNAME.log if [ "$SANITYLOG" ]; then echo "FAIL: $TESTNAME $@" >> $SANITYLOG else @@ -212,7 +227,7 @@ rm -rf $DIR/[Rdfs][1-9]* build_test_filter -echo preparing for tests involving mounts +echo "preparing for tests involving mounts" EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP} touch $EXT2_DEV mke2fs -j -F $EXT2_DEV 8000 > /dev/null @@ -221,12 +236,12 @@ echo # add a newline after mke2fs. umask 077 test_0() { - touch $DIR/f - $CHECKSTAT -t file $DIR/f || error - rm $DIR/f - $CHECKSTAT -a $DIR/f || error + touch $DIR/$tfile + $CHECKSTAT -t file $DIR/$tfile || error + rm $DIR/$tfile + $CHECKSTAT -a $DIR/$tfile || error } -run_test 0 "touch .../f ; rm .../f =============================" +run_test 0 "touch .../$tfile ; rm .../$tfile =====================" test_0b() { chmod 0755 $DIR || error @@ -699,7 +714,11 @@ run_test 24l "Renaming a file to itself ========================" test_24m() { f="$DIR/f24m" - multiop $f OcLN ${f}2 ${f}2 || error + multiop $f OcLN ${f}2 ${f}2 || error "link ${f}2 ${f}2 failed" + # on ext3 this does not remove either the source or target files + # though the "expected" operation would be to remove the source + $CHECKSTAT -t file ${f} || error "${f} missing" + $CHECKSTAT -t file ${f}2 || error "${f}2 missing" } run_test 24m "Renaming a file to a hard link to itself =========" @@ -826,18 +845,18 @@ run_test 26e "unlink multiple component recursive symlink ======" # recursive symlinks (bug 7022) test_26f() { - mkdir $DIR/foo || error "mkdir $DIR/foo failed" - cd $DIR/foo || error "cd $DIR/foo failed" - mkdir -p bar/bar1 || error "mkdir bar/bar1 failed" - mkdir foo || error "mkdir foo failed" - cd foo || error "cd foo failed" - ln -s .. dotdot || error "ln dotdot failed" - ln -s dotdot/bar bar || error "ln bar failed" - cd ../.. || error "cd ../.. failed" - output=`ls foo/foo/bar/bar1` + mkdir $DIR/$tfile || error "mkdir $DIR/$tfile failed" + cd $DIR/$tfile || error "cd $DIR/$tfile failed" + mkdir -p $tdir/bar1 || error "mkdir $tdir/bar1 failed" + mkdir $tfile || error "mkdir $tfile failed" + cd $tfile || error "cd $tfile failed" + ln -s .. dotdot || error "ln dotdot failed" + ln -s dotdot/$tdir $tdir || error "ln $tdir failed" + cd ../.. || error "cd ../.. failed" + output=`ls $tfile/$tfile/$tdir/bar1` [ "$output" = bar1 ] && error "unexpected output" - rm -r foo || error "rm foo failed" - $CHECKSTAT -a $DIR/foo || error "foo not gone" + rm -r $tfile || error "rm $tfile failed" + $CHECKSTAT -a $DIR/$tfile || error "$tfile not gone" } run_test 26f "rm -r of a directory which has recursive symlink =" @@ -868,7 +887,7 @@ test_27d() { mkdir -p $DIR/d27 $LSTRIPE $DIR/d27/fdef 0 -1 0 || error "lstripe failed" $CHECKSTAT -t file $DIR/d27/fdef || error "checkstat failed" - #dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4 || error + dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4 || error } run_test 27d "create file with default settings ================" @@ -1009,6 +1028,7 @@ test_27o() { reset_enospc rm -f $DIR/d27/f27o exhaust_all_precreations 0x215 + sleep 5 touch $DIR/d27/f27o && error @@ -1192,7 +1212,7 @@ test_32a() { mkdir -p $DIR/d32a/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32a/ext2-mountpoint || error $CHECKSTAT -t dir $DIR/d32a/ext2-mountpoint/.. || error - umount $DIR/d32a/ext2-mountpoint || error + $UMOUNT $DIR/d32a/ext2-mountpoint || error } run_test 32a "stat d32a/ext2-mountpoint/.. =====================" @@ -1201,7 +1221,7 @@ test_32b() { mkdir -p $DIR/d32b/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32b/ext2-mountpoint || error ls -al $DIR/d32b/ext2-mountpoint/.. || error - umount $DIR/d32b/ext2-mountpoint || error + $UMOUNT $DIR/d32b/ext2-mountpoint || error } run_test 32b "open d32b/ext2-mountpoint/.. =====================" @@ -1211,7 +1231,7 @@ test_32c() { mount -t ext2 -o loop $EXT2_DEV $DIR/d32c/ext2-mountpoint || error mkdir -p $DIR/d32c/d2/test_dir $CHECKSTAT -t dir $DIR/d32c/ext2-mountpoint/../d2/test_dir || error - umount $DIR/d32c/ext2-mountpoint || error + $UMOUNT $DIR/d32c/ext2-mountpoint || error } run_test 32c "stat d32c/ext2-mountpoint/../d2/test_dir =========" @@ -1221,7 +1241,7 @@ test_32d() { mount -t ext2 -o loop $EXT2_DEV $DIR/d32d/ext2-mountpoint || error mkdir -p $DIR/d32d/d2/test_dir ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error - umount $DIR/d32d/ext2-mountpoint || error + $UMOUNT $DIR/d32d/ext2-mountpoint || error } run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir =========" @@ -1281,7 +1301,7 @@ test_32i() { mount -t ext2 -o loop $EXT2_DEV $DIR/d32i/ext2-mountpoint || error touch $DIR/d32i/test_file $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error - umount $DIR/d32i/ext2-mountpoint || error + $UMOUNT $DIR/d32i/ext2-mountpoint || error } run_test 32i "stat d32i/ext2-mountpoint/../test_file ===========" @@ -1291,7 +1311,7 @@ test_32j() { mount -t ext2 -o loop $EXT2_DEV $DIR/d32j/ext2-mountpoint || error touch $DIR/d32j/test_file cat $DIR/d32j/ext2-mountpoint/../test_file || error - umount $DIR/d32j/ext2-mountpoint || error + $UMOUNT $DIR/d32j/ext2-mountpoint || error } run_test 32j "open d32j/ext2-mountpoint/../test_file ===========" @@ -1302,7 +1322,7 @@ test_32k() { mkdir -p $DIR/d32k/d2 touch $DIR/d32k/d2/test_file || error $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error - umount $DIR/d32k/ext2-mountpoint || error + $UMOUNT $DIR/d32k/ext2-mountpoint || error } run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========" @@ -1313,7 +1333,7 @@ test_32l() { mkdir -p $DIR/d32l/d2 touch $DIR/d32l/d2/test_file cat $DIR/d32l/ext2-mountpoint/../d2/test_file || error - umount $DIR/d32l/ext2-mountpoint || error + $UMOUNT $DIR/d32l/ext2-mountpoint || error } run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========" @@ -1383,7 +1403,7 @@ test_32q() { touch $DIR/d32q/under_the_mount mount -t ext2 -o loop $EXT2_DEV $DIR/d32q ls $DIR/d32q/under_the_mount && error || true - umount $DIR/d32q || error + $UMOUNT $DIR/d32q || error } run_test 32q "stat follows mountpoints in Lustre (should return error)" @@ -1393,18 +1413,18 @@ test_32r() { touch $DIR/d32r/under_the_mount mount -t ext2 -o loop $EXT2_DEV $DIR/d32r ls $DIR/d32r | grep -q under_the_mount && error || true - umount $DIR/d32r || error + $UMOUNT $DIR/d32r || error } run_test 32r "opendir follows mountpoints in Lustre (should return error)" test_33() { - rm -f $DIR/test_33_file - touch $DIR/test_33_file - chmod 444 $DIR/test_33_file - chown $RUNAS_ID $DIR/test_33_file - log 33_1 - $RUNAS $OPENFILE -f O_RDWR $DIR/test_33_file && error || true - log 33_2 + rm -f $DIR/$tfile + touch $DIR/$tfile + chmod 444 $DIR/$tfile + chown $RUNAS_ID $DIR/$tfile + log 33_1 + $RUNAS $OPENFILE -f O_RDWR $DIR/$tfile && error || true + log 33_2 } run_test 33 "write file with mode 444 (should return error) ====" @@ -1412,8 +1432,9 @@ test_33a() { rm -fr $DIR/d33 mkdir -p $DIR/d33 chown $RUNAS_ID $DIR/d33 - $RUNAS $OPENFILE -f O_RDWR:O_CREAT -m 0444 $DIR/d33/f33 || error - $RUNAS $OPENFILE -f O_RDWR:O_CREAT -m 0444 $DIR/d33/f33 && error || true + $RUNAS $OPENFILE -f O_RDWR:O_CREAT -m 0444 $DIR/d33/f33|| error "create" + $RUNAS $OPENFILE -f O_RDWR:O_CREAT -m 0444 $DIR/d33/f33 && \ + error "open RDWR" || true } run_test 33a "test open file(mode=0444) with O_RDWR (should return error)" @@ -1527,7 +1548,7 @@ test_37() { echo f > $DIR/dextra/fbugfile mount -t ext2 -o loop $EXT2_DEV $DIR/dextra ls $DIR/dextra | grep "\" && error - umount $DIR/dextra || error + $UMOUNT $DIR/dextra || error rm -f $DIR/dextra/fbugfile || error } run_test 37 "ls a mounted file system to check old content =====" @@ -1538,17 +1559,22 @@ test_38() { run_test 38 "open a regular file with O_DIRECTORY ==============" test_39() { - touch $DIR/test_39_file - touch $DIR/test_39_file2 -# ls -l $DIR/test_39_file $DIR/test_39_file2 -# ls -lu $DIR/test_39_file $DIR/test_39_file2 -# ls -lc $DIR/test_39_file $DIR/test_39_file2 + touch $DIR/$tfile + touch $DIR/${tfile}2 +# ls -l $DIR/$tfile $DIR/${tfile}2 +# ls -lu $DIR/$tfile $DIR/${tfile}2 +# ls -lc $DIR/$tfile $DIR/${tfile}2 sleep 2 - $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/test_39_file2 -# ls -l $DIR/test_39_file $DIR/test_39_file2 -# ls -lu $DIR/test_39_file $DIR/test_39_file2 -# ls -lc $DIR/test_39_file $DIR/test_39_file2 - [ $DIR/test_39_file2 -nt $DIR/test_39_file ] || error + $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/${tfile}2 + if [ ! $DIR/${tfile}2 -nt $DIR/$tfile ]; then + echo "mtime" + ls -l $DIR/$tfile $DIR/${tfile}2 + echo "atime" + ls -lu $DIR/$tfile $DIR/${tfile}2 + echo "ctime" + ls -lc $DIR/$tfile $DIR/${tfile}2 + error "O_TRUNC didn't change timestamps" + fi } run_test 39 "mtime changed on create ===========================" @@ -1574,6 +1600,7 @@ count_ost_writes() { WRITEBACK_SAVE=500 start_writeback() { + trap 0 # in 2.6, restore /proc/sys/vm/dirty_writeback_centisecs if [ -f /proc/sys/vm/dirty_writeback_centisecs ]; then echo $WRITEBACK_SAVE > /proc/sys/vm/dirty_writeback_centisecs @@ -1696,10 +1723,10 @@ test_42d() { run_test 42d "test complete truncate of file with cached dirty data" test_43() { - mkdir $DIR/d43 - cp -p /bin/ls $DIR/d43/f - exec 100>> $DIR/d43/f - $DIR/d43/f && error || true + mkdir $DIR/$tdir + cp -p /bin/ls $DIR/$tdir/$tfile + exec 100>> $DIR/$tdir/$tfile + $DIR/$tdir/$tfile && error || true exec 100<&- } run_test 43 "execution of file opened for write should return -ETXTBSY" @@ -2002,17 +2029,17 @@ test_52a() { [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo mkdir -p $DIR/d52a touch $DIR/d52a/foo - chattr =a $DIR/d52a/foo || error - echo bar >> $DIR/d52a/foo || error - cp /etc/hosts $DIR/d52a/foo && error - rm -f $DIR/d52a/foo 2>/dev/null && error - link $DIR/d52a/foo $DIR/d52a/foo_link 2>/dev/null && error - echo foo >> $DIR/d52a/foo || error - mrename $DIR/d52a/foo $DIR/d52a/foo_ren && error - lsattr $DIR/d52a/foo | egrep -q "^-+a-+ $DIR/d52a/foo" || error - chattr -a $DIR/d52a/foo || error - - rm -fr $DIR/d52a || error + chattr =a $DIR/d52a/foo || error "chattr =a failed" + echo bar >> $DIR/d52a/foo || error "append bar failed" + cp /etc/hosts $DIR/d52a/foo && error "cp worked" + rm -f $DIR/d52a/foo 2>/dev/null && error "rm worked" + link $DIR/d52a/foo $DIR/d52a/foo_link 2>/dev/null && error "link worked" + echo foo >> $DIR/d52a/foo || error "append foo failed" + mrename $DIR/d52a/foo $DIR/d52a/foo_ren && error "rename worked" + lsattr $DIR/d52a/foo | egrep -q "^-+a-+ $DIR/d52a/foo" || error "lsattr" + chattr -a $DIR/d52a/foo || error "chattr -a failed" + + rm -fr $DIR/d52a || error "cleanup rm failed" } run_test 52a "append-only flag test (should return errors) =====" @@ -2037,9 +2064,9 @@ test_52b() { run_test 52b "immutable flag test (should return errors) =======" test_53() { - for i in `ls -d /proc/fs/lustre/osc/OSC*mds1 2> /dev/null` ; do + for i in `ls -d $LPROC/osc/OSC*mds1 2> /dev/null` ; do ostname=`echo $i | cut -d _ -f 3-4 | sed -e s/_mds1//` - ost_last=`cat /proc/fs/lustre/obdfilter/$ostname/last_id` + ost_last=`cat $LPROC/obdfilter/$ostname/last_id` mds_last=`cat $i/prealloc_last_id` echo "$ostname.last_id=$ost_last ; MDS.last_id=$mds_last" if [ $ost_last != $mds_last ]; then @@ -2054,7 +2081,7 @@ test_54a() { $SOCKETCLIENT $DIR/socket || error $MUNLINK $DIR/socket } -run_test 54a "unix damain socket test ==========================" +run_test 54a "unix domain socket test ==========================" test_54b() { f="$DIR/f54b" @@ -2081,11 +2108,11 @@ test_54c() { tfile="$DIR/f54c" tdir="$DIR/d54c" loopdev="$DIR/loop54c" - + find_loop_dev [ -z "$LOOPNUM" ] && echo "couldn't find empty loop device" && return mknod $loopdev b 7 $LOOPNUM - echo "make a loop file system with $tfile on $loopdev ($LOOPNUM)..." + echo "make a loop file system with $tfile on $loopdev ($LOOPNUM)..." dd if=/dev/zero of=$tfile bs=`page_size` seek=1024 count=1 > /dev/null losetup $loopdev $tfile || error "can't set up $loopdev for $tfile" mkfs.ext2 $loopdev || error "mke2fs on $loopdev" @@ -2094,7 +2121,7 @@ test_54c() { dd if=/dev/zero of=$tdir/tmp bs=`page_size` count=30 || error "dd write" df $tdir dd if=$tdir/tmp of=/dev/zero bs=`page_size` count=30 || error "dd read" - umount $tdir + $UMOUNT $tdir losetup -d $loopdev rm $loopdev } @@ -2138,7 +2165,7 @@ test_55() { $IOPENTEST2 $DIR/d55 || error "running $IOPENTEST2" echo "check for $EXT2_DEV. Please wait..." rm -rf $DIR/d55/* - umount $DIR/d55 || error "unmounting" + $UMOUNT $DIR/d55 || error "unmounting" } run_test 55 "check iopen_connect_dentry() ======================" @@ -2196,7 +2223,7 @@ run_test 56 "check lfs find ====================================" test_57a() { # note test will not do anything if MDS is not local - for DEV in `cat /proc/fs/lustre/mds/*/mntdev`; do + for DEV in `cat $LPROC/mds/*/mntdev`; do dumpe2fs -h $DEV > $TMP/t57a.dump || error "can't access $DEV" DEVISIZE=`awk '/Inode size:/ { print $3 }' $TMP/t57a.dump` [ "$DEVISIZE" -gt 128 ] || error "inode size $DEVISIZE" @@ -2219,8 +2246,8 @@ test_57b() { $LFIND $FILE1 2>&1 | grep -q "no stripe" || error "$FILE1 has an EA" $LFIND $FILEN 2>&1 | grep -q "no stripe" || error "$FILEN has an EA" - MDSFREE="`cat /proc/fs/lustre/mds/*/kbytesfree`" - MDCFREE="`cat /proc/fs/lustre/mdc/*/kbytesfree`" + MDSFREE="`cat $LPROC/mds/*/kbytesfree`" + MDCFREE="`cat $LPROC/mdc/*/kbytesfree`" echo "opening files to create objects/EAs" for FILE in `seq -f $DIR/d57b/f%g 1 $FILECOUNT`; do $OPENFILE -f O_RDWR $FILE > /dev/null || error "opening $FILE" @@ -2231,8 +2258,8 @@ test_57b() { $LFIND $FILEN | grep -q "obdidx" || error "$FILEN missing EA" sleep 1 # make sure we get new statfs data - MDSFREE2="`cat /proc/fs/lustre/mds/*/kbytesfree`" - MDCFREE2="`cat /proc/fs/lustre/mdc/*/kbytesfree`" + MDSFREE2="`cat $LPROC/mds/*/kbytesfree`" + MDCFREE2="`cat $LPROC/mdc/*/kbytesfree`" if [ "$MDCFREE2" -lt "$((MDCFREE - 8))" ]; then if [ "$MDSFREE" != "$MDSFREE2" ]; then error "MDC before $MDCFREE != after $MDCFREE2" @@ -2288,16 +2315,16 @@ test_62() { f="$DIR/f62" echo foo > $f cancel_lru_locks OSC - echo 0x405 > /proc/sys/lustre/fail_loc + sysctl -w lustre.fail_loc=0x405 cat $f && error "cat succeeded, expect -EIO" - echo 0 > /proc/sys/lustre/fail_loc + sysctl -w lustre.fail_loc=0 } run_test 62 "verify obd_match failure doesn't LBUG (should -EIO)" # bug 2319 - oig_wait() interrupted causes crash because of invalid waitq. test_63() { - MAX_DIRTY_MB=`cat /proc/fs/lustre/osc/*/max_dirty_mb | head -n 1` - for i in /proc/fs/lustre/osc/*/max_dirty_mb ; do + MAX_DIRTY_MB=`cat $LPROC/osc/*/max_dirty_mb | head -n 1` + for i in $LPROC/osc/*/max_dirty_mb ; do echo 0 > $i done for i in `seq 10` ; do @@ -2307,10 +2334,10 @@ test_63() { sleep 1 done - for i in /proc/fs/lustre/osc/*/max_dirty_mb ; do + for i in $LPROC/osc/*/max_dirty_mb ; do echo $MAX_DIRTY_MB > $i done - true + rm -f $DIR/f63 || true } run_test 63 "Verify oig_wait interruption does not crash =======" @@ -2321,16 +2348,16 @@ test_63b() { sysctl -w lnet.debug=-1 # ensure we have a grant to do async writes - dd if=/dev/zero of=/mnt/lustre/f63b bs=4k count=1 - rm /mnt/lustre/f63b + dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 + rm $DIR/$tfile #define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 sysctl -w lustre.fail_loc=0x80000406 - multiop /mnt/lustre/f63b Owy && \ + multiop $DIR/$tfile Owy && \ $LCTL dk /tmp/test63b.debug && \ sysctl -w lnet.debug=$DBG_SAVE && \ error "sync didn't return ENOMEM" - grep -q locked /proc/fs/lustre/llite/fs*/dump_page_cache && \ + grep -q locked $LPROC/llite/fs*/dump_page_cache && \ $LCTL dk /tmp/test63b.debug && \ sysctl -w lnet.debug=$DBG_SAVE && \ error "locked page left in cache after async error" || true @@ -2340,7 +2367,7 @@ run_test 63b "async write errors should be returned to fsync ===" test_64a () { df $DIR - grep "[0-9]" /proc/fs/lustre/osc/OSC*MNT*/cur* + grep "[0-9]" $LPROC/osc/OSC*MNT*/cur* } run_test 64a "verify filter grant calculations (in kernel) =====" @@ -2390,7 +2417,7 @@ test_65e() { mkdir -p $DIR/d65 $LSTRIPE $DIR/d65 0 -1 0 || error "setstripe" - $LFS find -v $DIR/d65 | grep "$DIR/d65/ has no stripe info" || error "no stripe info failed" + $LFS find -v $DIR/d65 | grep "has no stripe info" || error "no stripe info failed" touch $DIR/d65/f6 $LVERIFY $DIR/d65 $DIR/d65/f6 || error "lverify failed" } @@ -2406,8 +2433,8 @@ test_65g() { mkdir -p $DIR/d65 $LSTRIPE $DIR/d65 $(($STRIPESIZE * 2)) 0 1 || error "setstripe" $LSTRIPE -d $DIR/d65 || error "setstripe" - $LFS find -v $DIR/d65 | grep "$DIR/d65/ has no stripe info" || \ - error "delete default stripe failed" + $LFS find -v $DIR/d65 | grep "has no stripe info" || \ + error "delete default stripe failed" } run_test 65g "directory setstripe -d ===========================" @@ -2455,7 +2482,7 @@ test_67() { # bug 3285 - supplementary group fails on MDS, passes on client RC=$? if [ "$MDS" ]; then # can't tell which is correct otherwise - GROUP_UPCALL=`cat /proc/fs/lustre/mds/$MDS/group_upcall` + GROUP_UPCALL=`cat $LPROC/mds/$MDS/group_upcall` [ "$GROUP_UPCALL" = "NONE" -a $RC -eq 0 ] && \ error "no-upcall passed" || true [ "$GROUP_UPCALL" != "NONE" -a $RC -ne 0 ] && \ @@ -2465,6 +2492,7 @@ test_67() { # bug 3285 - supplementary group fails on MDS, passes on client run_test 67 "supplementary group failure (should return error) =" cleanup_68() { + trap 0 if [ "$LOOPDEV" ]; then swapoff $LOOPDEV || error "swapoff failed" losetup -d $LOOPDEV || error "losetup -d failed" @@ -2518,27 +2546,27 @@ test_69() { f="$DIR/f69" touch $f - echo 0x217 > /proc/sys/lustre/fail_loc + sysctl -w lustre.fail_loc=0x217 truncate $f 1 # vmtruncate() will ignore truncate() error. $DIRECTIO write $f 0 2 && error "write succeeded, expect -ENOENT" - echo 0 > /proc/sys/lustre/fail_loc + sysctl -w lustre.fail_loc=0 $DIRECTIO write $f 0 2 || error "write error" cancel_lru_locks OSC $DIRECTIO read $f 0 1 || error "read error" - echo 0x217 > /proc/sys/lustre/fail_loc + sysctl -w lustre.fail_loc=0x217 $DIRECTIO read $f 1 1 && error "read succeeded, expect -ENOENT" - echo 0 > /proc/sys/lustre/fail_loc + sysctl -w lustre.fail_loc=0 rm -f $f } run_test 69 "verify oa2dentry return -ENOENT doesn't LBUG ======" test_71() { DBENCH_LIB=${DBENCH_LIB:-/usr/lib/dbench} - PATH=${PATH}:$DBENCH_LIB + PATH=${DBENCH_LIB}:${PATH} cp `which dbench` $DIR [ ! -f $DIR/dbench ] && echo "dbench not installed, skip this test" && return 0 @@ -2551,17 +2579,15 @@ test_71() { echo "copying necessary lib to $DIR" [ -d /lib64 ] && LIB71=/lib64 || LIB71=/lib - mkdir $DIR$LIB71 || error "can't create $DIR$LIB71" + mkdir -p $DIR$LIB71 || error "can't create $DIR$LIB71" cp $LIB71/libc* $DIR$LIB71 || error "can't copy $LIB71/libc*" cp $LIB71/ld-* $DIR$LIB71 || error "can't create $LIB71/ld-*" - + echo "chroot $DIR /dbench -c client.txt 2" chroot $DIR /dbench -c client.txt 2 RC=$? - rm -f $DIR/dbench - rm -f $TGT - rm -fr $DIR$LIB71 + rm -rf $DIR/dbench $TGT $DIR$LIB71 return $RC } @@ -2598,10 +2624,88 @@ test_74() { # bug 6149, 6184 } run_test 74 "ldlm_enqueue freed-export error path (shouldn't LBUG)" +JOIN=${JOIN:-"lfs join"} test_75() { - sh qos.sh + F=$DIR/$tfile + F128k=${F}_128k + FHEAD=${F}_head + FTAIL=${F}_tail + echo "using F=$F, F128k=$F128k, FHEAD=$FHEAD, FTAIL=$FTAIL" + rm -f $F* + + dd if=/dev/urandom of=${F}_128k bs=1024 count=128 || error "dd failed" + chmod 777 ${F128k} + cp -p ${F128k} ${FHEAD} + cp -p ${F128k} ${FTAIL} + cat ${F128k} ${F128k} > ${F}_sim_sim + + $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" + cmp ${FHEAD} ${F}_sim_sim || error "${FHEAD} ${F}_sim_sim differ" + $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} still exist after join" + + cp -p ${F128k} ${FTAIL} + cat ${F}_sim_sim >> ${F}_join_sim + cat ${F128k} >> ${F}_join_sim + $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" + cmp ${FHEAD} ${F}_join_sim || \ + error "${FHEAD} ${F}_join_sim are different" + $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join" + + cp -p ${F128k} ${FTAIL} + cat ${F128k} >> ${F}_sim_join + cat ${F}_join_sim >> ${F}_sim_join + $JOIN ${FTAIL} ${FHEAD} || error "join error" + cmp ${FTAIL} ${F}_sim_join || \ + error "${FTAIL} ${F}_sim_join are different" + $CHECKSTAT -a ${FHEAD} || error "tail ${FHEAD} exist after join" + + cp -p ${F128k} ${FHEAD} + cp -p ${F128k} ${FHEAD}_tmp + cat ${F}_sim_sim >> ${F}_join_join + cat ${F}_sim_join >> ${F}_join_join + $JOIN ${FHEAD} ${FHEAD}_tmp || error "join ${FHEAD} ${FHEAD}_tmp error" + $JOIN ${FHEAD} ${FTAIL} || error "join ${FHEAD} ${FTAIL} error" + cmp ${FHEAD} ${F}_join_join || error "${FHEAD} ${F}_join_join differ" + $CHECKSTAT -a ${FHEAD}_tmp || error "${FHEAD}_tmp exist after join" + $CHECKSTAT -a ${FTAIL} || error "tail ${FTAIL} exist after join (2)" + + rm -rf ${FHEAD} || "delete join file error" + cp -p ${F128k} ${F}_join_10_compare + cp -p ${F128k} ${F}_join_10 + for ((i = 0; i < 10; i++)); do + cat ${F128k} >> ${F}_join_10_compare + cp -p ${F128k} ${FTAIL} + $JOIN ${F}_join_10 ${FTAIL} || \ + error "join ${F}_join_10 ${FTAIL} error" + $CHECKSTAT -a ${FTAIL} || error "tail file exist after join" + done + cmp ${F}_join_10 ${F}_join_10_compare || \ + error "files ${F}_join_10 ${F}_join_10_compare are different" + $LFS getstripe ${F}_join_10 + $OPENUNLINK ${F}_join_10 ${F}_join_10 || error "files unlink open" + + ls -l $F* +} +run_test 75 "TEST join file" + +num_inodes() { + awk '/lustre_inode_cache|^inode_cache/ {print $2; exit}' /proc/slabinfo +} + +test_76() { # bug 1443 + BEFORE_INODES=`num_inodes` + echo "before inodes: $BEFORE_INODES" + for i in `seq 1000`; do + touch $DIR/$tfile + rm -f $DIR/$tfile + done + AFTER_INODES=`num_inodes` + echo "after inodes: $AFTER_INODES" + [ $AFTER_INODES -gt $((BEFORE_INODES + 10)) ] && \ + error "inode slab grew from $BEFORE_INODES to $AFTER_INODES" + true } -run_test 75 "qos test ============================================" +run_test 76 "destroy duplicate inodes in client inode cache" # on the LLNL clusters, runas will still pick up root's $TMP settings, # which will not be writable for the runas user, and then you get a CVS @@ -2668,10 +2772,15 @@ test_100() { netstat -tna | while read PROT SND RCV LOCAL REMOTE STAT; do [ "$PROT" != "tcp" ] && continue RPORT=`echo $REMOTE | cut -d: -f2` - [ "$RPORT" != "988" ] && continue + [ "$RPORT" != "$ACCEPTOR_PORT" ] && continue LPORT=`echo $LOCAL | cut -d: -f2` - [ $LPORT -ge 1024 ] && error "local port: $LPORT > 1024" || true + if [ $LPORT -ge 1024 ]; then + echo "bad: $PROT $SND $RCV $LOCAL $REMOTE $STAT" + netstat -tna + error "local: $LPORT > 1024, remote: $RPORT" + fi done + true } run_test 100 "check local port using privileged port ===========" @@ -2719,6 +2828,7 @@ test_101() { cat $LPROC/llite/*/read_ahead_stats error "too many ($discard) discarded pages" fi + rm -f $DIR/f101 || true } run_test 101 "check read-ahead for random reads ===========" @@ -2729,7 +2839,7 @@ test_102() { touch $testfile [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return - [ -z "`mount | grep " $DIR .*\"`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return + [ -z "`grep \ $LPROC/mdc/MDC*MNT*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return echo "set/get xattr..." setfattr -n trusted.name1 -v value1 $testfile || error [ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \ @@ -2780,6 +2890,7 @@ test_103 () { [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return [ -z "`mount | grep " $DIR .*\"`" ] && echo "skipping $TESTNAME (must have acl)" && return + [ -z "`grep acl $LPROC/mdc/MDC*MNT*/connect_flags`" ] && echo "skipping $TESTNAME (must have acl)" && return echo "performing cp ..." run_acl_subtest cp || error @@ -2805,6 +2916,23 @@ test_103 () { } run_test 103 "==============acl test =============" +test_104() { + touch $DIR/$tfile + lfs df || error "lfs df failed" + lfs df -ih || error "lfs df -ih failed" + lfs df $DIR || error "lfs df $DIR failed" + lfs df -ih $DIR || error "lfs df -ih $DIR failed" + lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed" + lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed" + + OSC=`lctl dl | awk '/OSC.*MNT/ {print $4}' | head -n 1` + lctl --device %$OSC deactivate + lfs df || error "lfs df with deactivated OSC failed" + lctl --device %$OSC recover + lfs df || error "lfs df with reactivated OSC failed" +} +run_test 104 "lfs>df [-ih] [path] test ============" + TMPDIR=$OLDTMPDIR TMP=$OLDTMP HOME=$OLDHOME diff --git a/lustre/utils/lconf b/lustre/utils/lconf index cfea7da..c550731 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -270,7 +270,7 @@ class LCTLInterface: if config.noexec: return (0, []) child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command - child.tochild.write(cmds + "\n") + child.tochild.write(cmds + "\nq\n") child.tochild.close() # From "Python Cookbook" from O'Reilly @@ -599,6 +599,32 @@ def is_block(path): return 0 return stat.S_ISBLK(s[stat.ST_MODE]) +def my_realpath(path): + try: + if os.path.islink(path): + # get the realpath of the mount point path + if 'realpath' in dir(os.path): + real_path = os.path.realpath(path) + else: + real_path = path + link_count = 0 + while os.path.islink(real_path) and (link_count < 20): + link_count = link_count + 1 + path_link = os.readlink(real_path) + if os.path.isabs(path_link): + real_path = path_link + else: + real_path = os.path.join(os.path.dirname(real_path), path_link) + if link_count > 19: + panic("Encountered too many symbolic links resolving path:", path) + else: + real_path = path + + return real_path + except: + panic("Fatal error realpath()ing path:", path) + + # build fs according to type # fixme: dangerous def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): @@ -631,20 +657,7 @@ def mkfs(dev, devsize, fstype, jsize, isize, mkfsoptions, isblock=1): # get the realpath of the device # it may be the real device, such as /dev/hda7 # or the hardlink created via mknod for a device - if 'realpath' in dir(os.path): - real_dev = os.path.realpath(dev) - else: - real_dev = dev - link_count = 0 - while os.path.islink(real_dev) and (link_count < 20): - link_count = link_count + 1 - dev_link = os.readlink(real_dev) - if os.path.isabs(dev_link): - real_dev = dev_link - else: - real_dev = os.path.join(os.path.dirname(real_dev), dev_link) - if link_count > 19: - panic("Encountered too many symbolic links resolving block device:", dev) + real_dev = my_realpath(dev) # get the major and minor number of the realpath via ls # it seems python(os.stat) does not return @@ -787,7 +800,7 @@ def if2addr(iface): ip = string.split(addr, ':')[1] return ip -def def_mount_options(fstype, target): +def def_mount_options(fstype, target, blkdev): """returns deafult mount options for passed fstype and target (mds, ost)""" if fstype == 'ext3' or fstype == 'ldiskfs': mountfsoptions = "errors=remount-ro" @@ -801,6 +814,64 @@ def def_mount_options(fstype, target): mountfsoptions = "%s,user_xattr" % (mountfsoptions) if config.acl: mountfsoptions = "%s,acl" % (mountfsoptions) + + if blkdev: + # grab superblock info + dumpe2fs="dumpe2fs -f -h" + (ret, sb) = run(dumpe2fs, blkdev) + if ret: + panic("unable to get superblock for ", blkdev) + + # extract journal UUID + journal_UUID='' + journal_DEV='' + for line in sb: + lst = string.split(line, ":") + if lst[0] == 'Journal UUID': + if len(lst[1]) < 3: + panic("cannot retrieve journal UUID for ", blkdev) + if string.split(lst[1])[0] != '': + journal_UUID = string.split(lst[1])[0] + debug(blkdev, 'has journal UUID', journal_UUID) + if lst[0] == 'Journal device': + if len(lst[1]) < 3: + panic("cannot retrieve journal device for ", blkdev) + if string.split(lst[1])[0] != '0x0000': + journal_DEV = string.split(lst[1])[0] + debug(blkdev, 'has journal device', journal_DEV) + break + + if len(journal_UUID) == 0 or len(journal_DEV) == 0: + debug('no external journal found for', blkdev) + # use internal journal + return mountfsoptions + + # run blkid + blkid = "blkid -o device -t UUID='%s'" % (journal_UUID) + (ret, devname) = run(blkid) + if ret or len(devname) == 0: + panic("cannot find external journal for ", blkdev) + debug('found', blkdev, 'journal UUID', journal_UUID, 'on', + string.replace(devname[0], '\n', '')) + + try: # sigh, python 1.5 does not support os.stat().st_rdev + jdevpath = my_realpath(string.replace(devname[0], '\n', '')) + ret, out = runcmd("ls -l %s" %jdevpath) + debug('ls -l:', out) + major = int(string.split(string.split(out[0])[4], ',')[0]) + minor = int(string.split(out[0])[5]) + debug('major', major, 'minor', minor) + rdev = major << 8 | minor + except OSError: + panic("cannot stat ", devname[0]) + + debug('found', blkdev, 'journal UUID', journal_UUID, 'on', + jdevpath, 'rdev', rdev) + + # add mount option + if string.atoi(journal_DEV, 0) != rdev: + mountfsoptions = "%s,journal_dev=%#x" % (mountfsoptions,rdev) + return mountfsoptions return "" @@ -857,12 +928,14 @@ def is_network_prepared(): def fs_is_mounted(path): """Return true if path is a mounted lustre filesystem""" try: + real_path = my_realpath(path) + fp = open('/proc/mounts') lines = fp.readlines() fp.close() for l in lines: a = string.split(l) - if a[1] == path and a[2] == 'lustre_lite': + if a[1] == real_path and a[2] == 'lustre_lite': return 1 except IOError, e: log(e) @@ -890,15 +963,18 @@ class kmod: continue log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) if PLATFORM == 'LINUX': + options = '' if mod == 'lnet': #For LNET we really need modprobe to load defined LNDs run('/sbin/modprobe lnet') - #But if that fails, try insmod anyhow + #But if that fails, try insmod anyhow with dev option + #accept=all for dev liblustre testing + options = 'accept=all' if src_dir: module = find_module(src_dir, dev_dir, mod) if not module: panic('module not found:', mod) - (rc, out) = run('/sbin/insmod', module) + (rc, out) = run('/sbin/insmod', module, options) if rc and not mod_loaded(mod): if rc == 1: print("Bad module options? Check dmesg.") @@ -911,6 +987,7 @@ class kmod: raise CommandError('modprobe', out, rc) elif PLATFORM == 'DARWIN': run('/sbin/kextload', KEXTPATH + mod + '.kext'); + def cleanup_module(self): """Unload the modules in the list in reverse order.""" @@ -927,6 +1004,7 @@ class kmod: # remove any self-ref portals created lctl.unconfigure_network() if config.dump: + debug('dumping debug log to', config.dump) # debug hack lctl.dump(config.dump) log('unloading the network') @@ -1198,6 +1276,7 @@ class MDSDEV(Module): self.active = 0 self.inode_size = self.db.get_val_int('inodesize', 0) + debug('original inode_size ', self.inode_size) if self.inode_size == 0: # find the LOV for this MDS lovconfig_uuid = mds.get_first_ref('lovconfig') @@ -1224,8 +1303,7 @@ class MDSDEV(Module): # self.inode_size = 256 else: self.inode_size = 512 - debug('stripe_count %d, inode_size %d', - stripe_count, self.inode_size) + debug('stripe_count ', stripe_count,' inode_size ',self.inode_size) self.target_dev_uuid = self.uuid self.uuid = target_uuid @@ -1263,7 +1341,7 @@ class MDSDEV(Module): if not is_prepared('MDT'): lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") try: - mountfsoptions = def_mount_options(self.fstype, 'mds') + mountfsoptions = def_mount_options(self.fstype, 'mds', blkdev) if config.mountfsoptions: if mountfsoptions: @@ -1344,7 +1422,7 @@ class MDSDEV(Module): noexec_opt = ('', '-n') ret, out = run (sys.argv[0], noexec_opt[old_noexec == 1], - " -v --record --nomod", + " -v --record --nomod --old_conf", "--record_log", client_name, "--record_device", self.name, "--node", client_name, @@ -1369,18 +1447,22 @@ class MDSDEV(Module): #change the mtime of LLOG to match the XML creation time if toplustreDB.get_mtime(): - mtime = string.atof(toplustreDB.get_mtime()) - runcmd("mkdir /tmp/lustre-XXXX/") - if is_block(self.devpath): - ret, out = runcmd("mount %s /tmp/lustre-XXXX/" %self.devpath) - else: - ret, out = runcmd("mount -o loop %s /tmp/lustre-XXXX/" %self.devpath) + mtime = toplustreDB.get_mtime() + debug("changing mtime of LOGS to %s" %mtime) + ret, mktemp = runcmd("mktemp /tmp/lustre-cmd.XXXXXXXX") if ret: - print out[0] - os.utime("/tmp/lustre-XXXX/LOGS", (mtime, mtime)) - runcmd("umount -f /tmp/lustre-XXXX/") - else: - print "XML file does not contain mtime, skip mtime checking." + log(self.module_name, "create mtime LOGS cmdfile failed: ", self.name) + else: + mtimecmdfile = string.split(mktemp[0])[0] + #mtimecmdfile="/tmp/lustre-cmd.XXXXXXXX" + fd = os.open(mtimecmdfile, os.O_RDWR | os.O_CREAT) + os.write(fd, "\n\n\n\n\n%s\n\n" %mtime) + os.close(fd) + cmd = "debugfs -w -R \"mi /LOGS\" <%s %s" %(mtimecmdfile, self.devpath) + ret, outs = runcmd(cmd) + os.remove(mtimecmdfile) + if ret: + print "Can not change mtime of LOGS by debugfs." def mds_remaining(self): out = lctl.device_list() @@ -1499,7 +1581,7 @@ class OSD(Module): config.reformat, self.format, self.journal_size, self.inode_size, self.mkfsoptions) - mountfsoptions = def_mount_options(self.fstype, 'ost') + mountfsoptions = def_mount_options(self.fstype, 'ost', blkdev) if config.mountfsoptions: if mountfsoptions: @@ -1586,7 +1668,7 @@ class Client(Module): self.name = self_name self.uuid = uuid self.lookup_server(self.tgt_dev_uuid) - self.lookup_backup_targets() + self.lookup_backup_targets() self.fs_name = fs_name if not module_dir: module_dir = module @@ -2111,38 +2193,28 @@ def doCheckMtime(lustreDB, hosts): if s[1].get_class() == 'mdsdev': mdsdb = s[1] break - if mdsdb: - if lustreDB.get_mtime(): - if config.verbose: - print "Checking XML modification time" - devpath = mdsdb.get_val('devpath','') - xmtime = int(lustreDB.get_mtime()) - runcmd("mkdir /tmp/lustre-XXXX/") - # mounting ro causes confusing syslog errors - if is_block(devpath): - ret, out = runcmd("mount %s /tmp/lustre-XXXX/" %devpath) - else: - ret, out = runcmd("mount -o loop %s /tmp/lustre-XXXX/" %devpath) - if ret: - print out[0] - else: - try: - out = os.stat("/tmp/lustre-XXXX/LOGS") - except OSError: - runcmd("umount -f /tmp/lustre-XXXX") - panic("Warning: Can't read Lustre logs." - " Please run --write_conf to update.") - runcmd("umount -f /tmp/lustre-XXXX") - try: - kmtime = int(out[8]) - except ValueError: - kmtime = xmtime - if xmtime > kmtime : - debug('xmtime ', xmtime, '> kmtime', kmtime) - panic("Warning: the startup logs are older than the XML file." - " Please run --write_conf to update.") + + if mdsdb and lustreDB.get_mtime(): + debug("Checking XML modification time") + devpath = mdsdb.get_val('devpath','') + xmtime = string.atol(lustreDB.get_mtime()) + cmd = "debugfs -c -R 'stat /LOGS' %s 2>&1 | grep mtime" %devpath + ret, kmtimes = runcmd(cmd) + if ret: + log("Can not get mtime info of MDS LOGS directory") else: - print "XML file does not contain mtime, skip mtime checking." + kmtime = string.atoi(string.split(kmtimes[0])[1], 0) + if xmtime > kmtime: + debug('xmtime ', xmtime, '> kmtime', kmtime) + if config.old_conf: + log("Warning: MDS startup logs are older than config %s." + " Please run --write_conf on stopped MDS to update." + %CONFIG_FILE) + else: + panic("Error: MDS startup logs are older than config %s." + " Please run --write_conf on stopped MDS to update." + " Use '--old_conf' to start anyways." %CONFIG_FILE) + return # # Load profile for @@ -2623,6 +2695,7 @@ lconf_options = [ ('dump', "Dump the kernel debug log to file before portals is unloaded", PARAM), ('write_conf', "Save all the client config information on mds."), + ('old_conf', "Start up service even though config logs appear outdated."), ('record', "Write config information on mds."), ('record_log', "Name of config record log.", PARAM), ('record_device', "MDS device name that will record the config commands", @@ -2803,7 +2876,10 @@ if __name__ == "__main__": sys.exit(1) except CommandError, e: e.dump() - sys.exit(e.rc) + rc = e.rc + if rc == 0: + rc = 1 + sys.exit(rc) if first_cleanup_error: sys.exit(first_cleanup_error) -- 1.8.3.1