X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=c50519fa569945a67cf2a26088103f379373db8f;hp=8f51dc58559d8cb9dc769e418e25273c22bee222;hb=4d905db121a5d1c3bb4962b66fc2d27d70412233;hpb=7a79e2b47b7d9078757c73a13578a3f728fb4ad6 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 8f51dc5..c50519f 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -72,7 +72,7 @@ LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/${NAME}.sh} -[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 24v 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" SANITYLOG=${TESTSUITELOG:-$TMP/$(basename $0 .sh).log} FAIL_ON_ERROR=false @@ -90,11 +90,12 @@ setup() { } check_kernel_version() { - VERSION_FILE=version WANT_VER=$1 - GOT_VER=$(lctl get_param -n $VERSION_FILE | awk '/kernel:/ {print $2}') - [ $GOT_VER == "patchless" ] && return 0 - [ $GOT_VER -ge $WANT_VER ] && return 0 + GOT_VER=$(lctl get_param -n version | awk '/kernel:/ {print $2}') + case $GOT_VER in + patchless|patchless_client) return 0;; + *) [ $GOT_VER -ge $WANT_VER ] && return 0 ;; + esac log "test needs at least kernel version $WANT_VER, running $GOT_VER" return 1 } @@ -473,6 +474,15 @@ test_17g() { } run_test 17g "symlinks: really long symlink name ===============================" +test_17h() { #bug 17378 + mkdir -p $DIR/$tdir + $SETSTRIPE $DIR/$tdir -c -1 +#define OBD_FAIL_MDS_LOV_PREP_CREATE 0x141 + do_facet mds lctl set_param fail_loc=0x80000141 + touch $DIR/$tdir/$tfile || true +} +run_test 17h "create objects: lov_free_memmd() doesn't lbug" + test_18() { touch $DIR/f ls $DIR || error @@ -541,7 +551,8 @@ test_22() { } run_test 22 "unpack tar archive as non-root user ===============" -test_23() { +# was test_23 +test_23a() { mkdir -p $DIR/$tdir local file=$DIR/$tdir/$tfile @@ -549,7 +560,19 @@ test_23() { openfile -f O_CREAT:O_EXCL $file && error "$file recreate succeeded" || true } -run_test 23 "O_CREAT|O_EXCL in subdir ==========================" +run_test 23a "O_CREAT|O_EXCL in subdir ==========================" + +test_23b() { # bug 18988 + mkdir -p $DIR/$tdir + local file=$DIR/$tdir/$tfile + + rm -f $file + echo foo > $file || error "write filed" + echo bar >> $file || error "append filed" + $CHECKSTAT -s 8 $file || error "wrong size" + rm $file +} +run_test 23b "O_APPEND check ==========================" test_24a() { echo '== rename sanity ==============================================' @@ -633,7 +656,7 @@ test_24i() { mrename $DIR/R9/f $DIR/R9/a $CHECKSTAT -t file $DIR/R9/f || error $CHECKSTAT -t dir $DIR/R9/a || error - $CHECKSTAT -a file $DIR/R9/a/f || error + $CHECKSTAT -a $DIR/R9/a/f || error } run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a" @@ -746,6 +769,21 @@ test_24u() { # bug12192 } run_test 24u "create stripe file" +test_24v() { + local NRFILES=100000 + local FREE_INODES=`lfs df -i|grep "filesystem summary" | awk '{print $5}'` + [ $FREE_INODES -lt $NRFILES ] && \ + skip "not enough free inodes $FREE_INODES required $NRFILES" && \ + return + + mkdir -p $DIR/d24v + createmany -m $DIR/d24v/$tfile $NRFILES + ls $DIR/d24v >/dev/null || error "error in listing large dir" + + rm $DIR/d24v -rf +} +run_test 24v "list directory with large files (handle hash collision, bug: 17560)" + test_25a() { echo '== symlink sanity =============================================' @@ -819,6 +857,7 @@ run_test 26f "rm -r of a directory which has recursive symlink =" test_27a() { echo '== stripe sanity ==============================================' mkdir -p $DIR/d27 || error "mkdir failed" + $GETSTRIPE $DIR/d27 $SETSTRIPE $DIR/d27/f0 -c 1 || error "lstripe failed" $CHECKSTAT -t file $DIR/d27/f0 || error "checkstat failed" pass @@ -853,7 +892,7 @@ test_27e() { $SETSTRIPE $DIR/d27/f12 -c 2 && error "lstripe succeeded twice" $CHECKSTAT -t file $DIR/d27/f12 || error "checkstat failed" } -run_test 27e "lstripe existing file (should return error) ======" +run_test 27e "setstripe existing file (should return error) ======" test_27f() { mkdir -p $DIR/d27 @@ -861,7 +900,7 @@ test_27f() { dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 || error "dd failed" $GETSTRIPE $DIR/d27/fbad || error "lfs getstripe failed" } -run_test 27f "lstripe with bad stripe size (should return error)" +run_test 27f "setstripe with bad stripe size (should return error)" test_27g() { mkdir -p $DIR/d27 @@ -880,7 +919,7 @@ test_27j() { mkdir -p $DIR/d27 $SETSTRIPE $DIR/d27/f27j -i $OSTCOUNT && error "lstripe failed"||true } -run_test 27j "lstripe with bad stripe offset (should return error)" +run_test 27j "setstripe with bad stripe offset (should return error)" test_27k() { # bug 2844 mkdir -p $DIR/d27 @@ -934,10 +973,15 @@ run_test 27m "create file while OST0 was full ==================" # osc's keep a NOSPC stick flag that gets unset with rmdir reset_enospc() { - [ "$1" ] && FAIL_LOC=$1 || FAIL_LOC=0 + local FAIL_LOC=${1:-0} + local OSTIDX=${2:-""} + mkdir -p $DIR/d27/nospc rmdir $DIR/d27/nospc - do_nodes $(comma_list $(osts_nodes)) lctl set_param fail_loc=$FAIL_LOC + local list=$(comma_list $(osts_nodes)) + [ "$OSTIDX" ] && list=$(facet_host ost$((OSTIDX + 1))) + + do_nodes $list lctl set_param fail_loc=$FAIL_LOC } exhaust_precreations() { @@ -945,7 +989,8 @@ exhaust_precreations() { local MDSIDX=$(get_mds_dir "$DIR/d27") echo OSTIDX=$OSTIDX MDSIDX=$MDSIDX - local OST=$(lfs osts | grep ${OSTIDX}": " | awk '{print $2}' | sed -e 's/_UUID$//') + local OST=$(lfs osts | grep ${OSTIDX}": " | \ + awk '{print $2}' | sed -e 's/_UUID$//') local MDT_INDEX=$(lfs df | grep "\[MDT:$((MDSIDX - 1))\]" | awk '{print $1}' | \ sed -e 's/_UUID$//;s/^.*-//') @@ -963,7 +1008,7 @@ exhaust_precreations() { echo "Creating to objid $last_id on ost $OST..." createmany -o $DIR/d27/${OST}-f $next_id $((last_id - next_id + 2)) do_facet mds${MDSIDX} lctl get_param osc.*${OST}-osc-${MDT_INDEX}.prealloc* | grep '[0-9]' - reset_enospc $2 + reset_enospc $2 $OSTIDX } exhaust_all_precreations() { @@ -982,9 +1027,9 @@ test_27n() { reset_enospc rm -f $DIR/d27/f27n exhaust_precreations 0 0x80000215 - + $SETSTRIPE -c -1 $DIR/d27 touch $DIR/d27/f27n || error - + $GETSTRIPE $DIR/d27/f27n reset_enospc } run_test 27n "create file with some full OSTs ==================" @@ -1063,9 +1108,13 @@ test_27r() { run_test 27r "stripe file with some full OSTs (shouldn't LBUG) =" test_27s() { # bug 10725 - mkdir -p $DIR/$tdir - $LSTRIPE $DIR/$tdir $((2048 * 1024 * 1024)) -1 2 && \ - error "stripe width >= 2^32 succeeded" || true + mkdir -p $DIR/$tdir + local stripe_size=$((4096 * 1024 * 1024)) # 2^32 + local stripe_count=0 + [ $OSTCOUNT -eq 1 ] || stripe_count=2 + $SETSTRIPE $DIR/$tdir -s $stripe_size -c $stripe_count && \ + error "stripe width >= 2^32 succeeded" || true + } run_test 27s "lsm_xfersize overflow (should error) (bug 10725)" @@ -1110,15 +1159,17 @@ test_27v() { # bug 4900 touch $DIR/$tdir/$tfile #define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 - lctl set_param fail_loc=0x705 - START=`date +%s` - for F in `seq 1 32`; do - touch $DIR/$tdir/$tfile.$F + # all except ost1 + for (( i=0; i < OSTCOUNT; i++ )) ; do + do_facet ost$i lctl set_param fail_loc=0x705 done - lctl set_param fail_loc=0 + local START=`date +%s` + createmany -o $DIR/$tdir/$tfile 32 + + reset_enospc - FINISH=`date +%s` - TIMEOUT=`lctl get_param -n timeout` + local FINISH=`date +%s` + local TIMEOUT=`lctl get_param -n timeout` [ $((FINISH - START)) -ge $((TIMEOUT / 2)) ] && \ error "$FINISH - $START >= $TIMEOUT / 2" @@ -1144,7 +1195,7 @@ test_27w() { # bug 10997 } run_test 27w "check lfs setstripe -c -s -i options =============" -# createtest also checks that device nodes are created and +# createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091 mkdir $DIR/d28 @@ -2080,7 +2131,7 @@ test_46() { } run_test 46 "dirtying a previously written page ================" -# test_47 is removed "Device nodes check" is moved to test_28 +# test_47 is removed "Device nodes check" is moved to test_28 test_48a() { # bug 2399 check_kernel_version 34 || return 0 @@ -2276,7 +2327,7 @@ test_51bb() { echo "mds $i: inodes count OLD ${OLDUSED[$i]} NEW ${NEWUSED[$i]}" [ ${OLDUSED[$i]} -lt ${NEWUSED[$i]} ] || rc=$((rc + 1)) done - + lctl set_param -n lmv.*.placement=$savePOLICY [ $rc -ne $MDSCOUNT ] || \ @@ -2750,7 +2801,7 @@ test_57b() { mkdir -p $dir || error "creating $dir" local num=$(get_mds_dir $dir) local mymds=mds$num - + echo "mcreating $FILECOUNT files" createmany -m $dir/f 1 $FILECOUNT || \ error "creating files in $dir" @@ -3470,7 +3521,6 @@ test_77b() { # bug 10889 error "dd error: $?" lctl set_param fail_loc=0 set_checksums 0 - rm -f $DIR/f77b } run_test 77b "checksum error on client write ====================" @@ -3488,6 +3538,7 @@ test_77c() { # bug 10889 done set_checksums 0 set_checksum_type $ORIG_CSUM_TYPE + rm -f $DIR/f77b } run_test 77c "checksum error on client read ===================" @@ -3693,7 +3744,7 @@ test_99a() { chown $RUNAS_ID $DIR/d99cvsroot local oldPWD=$PWD # bug 13584, use $TMP as working dir cd $TMP - + $RUNAS cvs -d $DIR/d99cvsroot init || error cd $oldPWD } @@ -3768,10 +3819,10 @@ test_100() { if [ $LPORT -ge 1024 ]; then echo "bad: $PROT $SND $RCV $LOCAL $REMOTE $STAT" netstat -tna - error "local: $LPORT > 1024, remote: $RPORT" + error_exit "local: $LPORT > 1024, remote: $RPORT" fi done - [ "$rc" = 0 ] || error "privileged port not found" ) + [ "$rc" = 0 ] || error_exit "privileged port not found" ) } run_test 100 "check local port using privileged port ===========" @@ -3905,6 +3956,51 @@ test_101b() { } run_test 101b "check stride-io mode read-ahead =================" +set_read_ahead() { + lctl get_param -n llite.*.max_read_ahead_mb | head -n 1 + lctl set_param -n llite.*.max_read_ahead_mb $1 > /dev/null 2>&1 +} + +test_101d() { + local file=$DIR/$tfile + local size=${FILESIZE_101c:-500} + local ra_MB=${READAHEAD_MB:-40} + + local space=$(df -P $DIR | tail -n 1 | awk '{ print $4 }') + [ $space -gt $((size / 1024)) ] || + { skip "Need free space ${size}M, have $space" && return; } + + echo Creating ${size}M test file $file + dd if=/dev/zero of=$file bs=1M count=$size + echo Cancel LRU locks on lustre client to flush the client cache + cancel_lru_locks osc + + echo Disable read-ahead + local old_READAHEAD=$(set_read_ahead 0) + + echo Reading the test file $file with read-ahead disabled + time_ra_OFF=$(do_and_time "dd if=$file of=/dev/null bs=1M count=$size") + + echo Cancel LRU locks on lustre client to flush the client cache + cancel_lru_locks osc + echo Enable read-ahead with ${ra_MB}MB + set_read_ahead $ra_MB + + echo Reading the test file $file with read-ahead enabled + time_ra_ON=$(do_and_time "dd if=$file of=/dev/null bs=1M count=$size") + + echo read-ahead disabled time read $time_ra_OFF + echo read-ahead enabled time read $time_ra_ON + + set_read_ahead $old_READAHEAD + rm -f $file + + [ $time_ra_ON -lt $time_ra_OFF ] || + error "read-ahead enabled time read (${time_ra_ON}s) is more than + read-ahead disabled time read (${time_ra_OFF}s) filesize ${size}M" +} +run_test 101d "file read with and without read-ahead enabled =================" + export SETUP_TEST102=no setup_test102() { [ "$SETUP_TEST102" = "yes" ] && return @@ -3998,16 +4094,16 @@ test_102b() { echo "get/set/list trusted.lov xattr ..." [ "$OSTCOUNT" -lt "2" ] && skip "skipping 2-stripe test" && return local testfile=$DIR/$tfile - $SETSTRIPE $testfile -s 65536 -i 1 -c 2 + $SETSTRIPE -s 65536 -i 1 -c 2 $testfile || error "setstripe failed" getfattr -d -m "^trusted" $testfile 2> /dev/null | \ grep "trusted.lov" || error "can't get trusted.lov from $testfile" local testfile2=${testfile}2 local value=`getfattr -n trusted.lov $testfile 2> /dev/null | \ grep "trusted.lov" |sed -e 's/[^=]\+=//'` - + $MCREATE $testfile2 - setfattr -n trusted.lov -v $value $testfile2 + setfattr -n trusted.lov -v $value $testfile2 local tmp_file=${testfile}3 $GETSTRIPE -v $testfile2 > $tmp_file local stripe_size=`grep "size" $tmp_file| awk '{print $2}'` @@ -4025,16 +4121,16 @@ test_102c() { mkdir -p $DIR/$tdir chown $RUNAS_ID $DIR/$tdir local testfile=$DIR/$tdir/$tfile - $RUNAS $SETSTRIPE $testfile -s 65536 -i 1 -c 2 + $RUNAS $SETSTRIPE -s 65536 -i 1 -c 2 $testfile||error "setstripe failed" $RUNAS getfattr -d -m "^lustre" $testfile 2> /dev/null | \ grep "lustre.lov" || error "can't get lustre.lov from $testfile" local testfile2=${testfile}2 local value=`getfattr -n lustre.lov $testfile 2> /dev/null | \ grep "lustre.lov" |sed -e 's/[^=]\+=//' ` - + $RUNAS $MCREATE $testfile2 - $RUNAS setfattr -n lustre.lov -v $value $testfile2 + $RUNAS setfattr -n lustre.lov -v $value $testfile2 local tmp_file=${testfile}3 $RUNAS $GETSTRIPE -v $testfile2 > $tmp_file local stripe_size=`grep "size" $tmp_file| awk '{print $2}'` @@ -4045,6 +4141,8 @@ test_102c() { run_test 102c "non-root getfattr/setfattr for lustre.lov EAs ===========" compare_stripe_info1() { + local stripe_index_all_zero=1 + for num in 1 2 3 4 do for count in `seq 1 $STRIPE_COUNT` @@ -4061,11 +4159,13 @@ compare_stripe_info1() { error "$file: different stripe count $stripe_count, expected $count" && return fi if [ $stripe_index -ne 0 ]; then - error "$file: different stripe offset $stripe_index, expected 0" && return + stripe_index_all_zero=0 fi done done done + [ $stripe_index_all_zero -eq 1 ] && error "all files are being extracted starting from OST index 0" + return 0 } compare_stripe_info2() { @@ -4251,7 +4351,7 @@ test_104() { lfs df -i $DIR || error "lfs df -i $DIR failed" lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed" lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed" - + OSC=`lctl get_param -n devices | awk '/-osc-/ {print $4}' | head -n 1` lctl --device %$OSC deactivate lfs df || error "lfs df with deactivated OSC failed" @@ -4423,7 +4523,7 @@ test_116() { declare -i FILL FILL=$(($MINV / 4)) echo "Filling 25% remaining space in OST${MINI} with ${FILL}Kb" - $SETSTRIPE $DIR/$tdir/OST${MINI} -i $MINI -c 1 + $SETSTRIPE -i $MINI -c 1 $DIR/$tdir/OST${MINI}||error "setstripe failed" i=0 while [ $FILL -gt 0 ]; do i=$(($i + 1)) @@ -4529,7 +4629,7 @@ reset_async() { test_118a() #bug 11710 { reset_async - + multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c DIRTY=$(lctl get_param -n llite.*.dump_page_cache | grep -c dirty) WRITEBACK=$(lctl get_param -n llite.*.dump_page_cache | grep -c writeback) @@ -4573,7 +4673,7 @@ test_118b() # until a subsequent RPC completes successfully without error. multiop $DIR/$tfile Ow4096yc rm -f $DIR/$tfile - + return 0 } run_test 118b "Reclaim dirty pages on fatal error ==========" @@ -4615,7 +4715,7 @@ test_118c() if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" fi - + rm -f $DIR/$tfile echo "Dirty pages flushed via fsync on EROFS" return 0 @@ -4631,7 +4731,7 @@ test_118d() #define OBD_FAIL_OST_BRW_PAUSE_BULK set_nodes_failloc "$(osts_nodes)" 0x214 # multiop should block due to fsync until pages are written - multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & + multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & MULTIPID=$! sleep 1 @@ -4673,7 +4773,7 @@ test_118f() { if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" fi - + lctl set_param fail_loc=0x0 LOCKED=$(lctl get_param -n llite.*.dump_page_cache | grep -c locked) @@ -4705,7 +4805,7 @@ test_118g() { # simulate local -ENOMEM multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - + lctl set_param fail_loc=0 if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" @@ -4718,7 +4818,7 @@ test_118g() { if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi - + if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" fi @@ -4741,7 +4841,7 @@ test_118h() { # Should simulate ENOMEM error which is recoverable and should be handled by timeout multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c RC=$? - + set_nodes_failloc "$(osts_nodes)" 0 if [[ $RC -eq 0 ]]; then error "Must return error due to dropped pages, rc=$RC" @@ -4754,7 +4854,7 @@ test_118h() { if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi - + if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" fi @@ -4773,13 +4873,13 @@ test_118i() { #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e set_nodes_failloc "$(osts_nodes)" 0x20e - + # Should simulate ENOMEM error which is recoverable and should be handled by timeout multiop $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c & PID=$! sleep 5 set_nodes_failloc "$(osts_nodes)" 0 - + wait $PID RC=$? if [[ $RC -ne 0 ]]; then @@ -4792,7 +4892,7 @@ test_118i() { if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi - + if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" fi @@ -4826,7 +4926,7 @@ test_118j() { if [[ $LOCKED -ne 0 ]]; then error "Locked pages remain in cache, locked=$LOCKED" fi - + # in recoverable error on OST we want resend and stay until it finished if [[ $DIRTY -ne 0 || $WRITEBACK -ne 0 ]]; then error "Dirty pages not flushed to disk, dirty=$DIRTY, writeback=$WRITEBACK" @@ -4882,7 +4982,7 @@ test_119b() # bug 11737 { [ "$OSTCOUNT" -lt "2" ] && skip "skipping 2-stripe test" && return - $SETSTRIPE $DIR/$tfile -c 2 + $SETSTRIPE -c 2 $DIR/$tfile || error "setstripe failed" dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 seek=1 || error "dd failed" sync multiop $DIR/$tfile oO_RDONLY:O_DIRECT:r$((2048 * 1024)) || \ @@ -5161,37 +5261,38 @@ test_123a() { # was test 123, statahead(bug 11401) log "statahead was stopped, maybe too many locks held!" fi - [ $delta -eq 0 ] && continue + [ $delta -eq 0 -o $delta_sa -eq 0 ] && continue - if [ $((delta_sa * 100)) -gt $((delta * 105)) ]; then + if [ $((delta_sa * 100)) -gt $((delta * 105)) -a $delta_sa -gt $((delta + 2)) ]; then if [ $SLOWOK -eq 0 ]; then error "ls $i files is slower with statahead!" + debugsave + lctl set_param debug=-1 max=`lctl get_param -n llite.*.statahead_max | head -n 1` lctl set_param -n llite.*.statahead_max 0 lctl get_param llite.*.statahead_max cancel_lru_locks mdc cancel_lru_locks osc - $LCTL dk > /dev/null + $LCTL clear stime=`date +%s` time ls -l $DIR/$tdir | wc -l etime=`date +%s` - $LCTL dk > $TMP/sanity_test_123a_${i}_disable_${etime}.log - delta=$((etime - stime)) - log "ls $i files without statahead: $delta sec, dump to $TMP/sanity_test_123a_${i}_disable_${etime}.log" - lctl set_param llite.*.statahead_max=$max + error "ls $i files (again) without statahead: $((etime - stime)) sec" + lctl set_param debug=-1 + lctl set_param llite.*.statahead_max=$max lctl get_param -n llite.*.statahead_max | grep '[0-9]' cancel_lru_locks mdc cancel_lru_locks osc - $LCTL dk > /dev/null + $LCTL clear stime=`date +%s` time ls -l $DIR/$tdir | wc -l etime=`date +%s` - $LCTL dk > $TMP/sanity_test_123a_${i}_enable_${etime}.log - delta_sa=$((etime - stime)) - log "ls $i files with statahead: $delta_sa sec, dump to $TMP/sanity_test_123a_${i}_enable_${etime}.log" + error "ls $i files (again) with statahead: $((etime - stime)) sec" lctl get_param -n llite.*.statahead_stats + + debugrestore else log "ls $i files is slower with statahead!" fi @@ -5200,7 +5301,7 @@ test_123a() { # was test 123, statahead(bug 11401) [ $delta -gt 20 ] && break [ $delta -gt 8 ] && MULT=$((50 / delta)) - [ "$SLOW" = "no" -a $delta -gt 3 ] && break + [ "$SLOW" = "no" -a $delta -gt 5 ] && break done log "ls done" @@ -5218,7 +5319,7 @@ run_test 123a "verify statahead work" test_123b () { # statahead(bug 15027) mkdir -p $DIR/$tdir createmany -o $DIR/$tdir/$tfile-%d 1000 - + cancel_lru_locks mdc cancel_lru_locks osc @@ -5286,7 +5387,7 @@ test_124a() { log "LVF=$LVF" local OLD_LVF=`lctl get_param -n $NSDIR.pool.lock_volume_factor` lctl set_param -n $NSDIR.pool.lock_volume_factor $LVF - + # Let's make sure that we really have some margin. Client checks # cached locks every 10 sec. SLEEP=$((SLEEP+20)) @@ -5419,7 +5520,7 @@ test_126() { # bug 12829/13455 run_test 126 "check that the fsgid provided by the client is taken into account" test_127() { # bug 15521 - $LSTRIPE -i 0 -c 1 $DIR/$tfile + $SETSTRIPE -i 0 -c 1 $DIR/$tfile || error "setstripe failed" $LCTL set_param osc.*.stats=0 FSIZE=$((2048 * 1024)) dd if=/dev/zero of=$DIR/$tfile bs=$FSIZE count=1 @@ -5431,7 +5532,7 @@ test_127() { # bug 15521 echo "got $COUNT $NAME" [ ! $MIN ] && error "Missing min value for $NAME proc entry" eval $NAME=$COUNT || error "Wrong proc format" - + case $NAME in read_bytes|write_bytes) [ $MIN -lt 4096 ] && error "min is too small: $MIN" @@ -5814,6 +5915,64 @@ test_131e() { } run_test 131e "test read hitting hole" +get_ost_param() { + local token=$1 + local gl_sum=0 + for node in $(osts_nodes); do + gl=$(do_node $node "$LCTL get_param -n ost.OSS.ost.stats" | awk '/'$token'/ {print $2}' | head -n 1) + [ x$gl = x"" ] && gl=0 + gl_sum=$((gl_sum + gl)) + done + echo $gl +} + +som_mode_switch() { + local som=$1 + local gl1=$2 + local gl2=$3 + + if [ x$som = x"enabled" ]; then + [ $((gl2 - gl1)) -gt 0 ] && error "no glimpse RPC is expected" + do_facet mgs "$LCTL conf_param $FSNAME.mdt.som=disabled" + else + [ $((gl2 - gl1)) -gt 0 ] || error "some glimpse RPC is expected" + do_facet mgs "$LCTL conf_param $FSNAME.mdt.som=enabled" + fi + + # do remount to make new mount-conf parameters actual + echo remounting... + sync + stopall + setupall +} + +test_132() { #1028, SOM + local num=$(get_mds_dir $DIR) + local mymds=mds${num} + + dd if=/dev/zero of=$DIR/$tfile count=1 2>/dev/null + cancel_lru_locks osc + + som1=$(do_facet $mymds "$LCTL get_param mdt.*.som" | awk -F= ' {print $2}' | head -n 1) + + gl1=$(get_ost_param "ldlm_glimpse_enqueue") + stat $DIR/$tfile >/dev/null + gl2=$(get_ost_param "ldlm_glimpse_enqueue") + echo "====> SOM is "$som1", "$((gl2 - gl1))" glimpse RPC occured" + cancel_lru_locks osc + som_mode_switch $som1 $gl1 $gl2 + + som2=$(do_facet $mymds "$LCTL get_param mdt.*.som" | awk -F= ' {print $2}' | head -n 1) + [ $som1 != $som2 ] || error "som is still "$som2 + + gl1=$(get_ost_param "ldlm_glimpse_enqueue") + stat $DIR/$tfile >/dev/null + gl2=$(get_ost_param "ldlm_glimpse_enqueue") + echo "SOM is "$som2", "$((gl2 - gl1))" glimpse RPC occured" + som_mode_switch $som2 $gl1 $gl2 +} +run_test 132 "som avoids glimpse rpc" + test_140() { #bug-17379 mkdir -p $DIR/$tdir || error "Creating dir $DIR/$tdir" cd $DIR/$tdir || error "Changing to $DIR/$tdir" @@ -5875,76 +6034,81 @@ test_150() { run_test 150 "truncate/append tests" function roc_access() { - ACCNUM=`$LCTL get_param -n obdfilter.*.stats | \ - grep 'cache_access'| awk '{print $2}' | \ - awk '{sum=sum+$3} END{print sum}'` - echo $ACCNUM + local list=$(comma_list $(osts_nodes)) + ACCNUM=`do_nodes $list $LCTL get_param -n obdfilter.*.stats | \ + grep 'cache_access'| awk '{print $2}' | \ + awk '{sum=sum+$3} END{print sum}'` + echo $ACCNUM } function roc_hit() { - ACCNUM=`$LCTL get_param -n obdfilter.*.stats | \ - grep 'cache_hit'|awk '{print $2}' | \ - awk '{sum=sum+$1} END{print sum}'` - echo $ACCNUM + local list=$(comma_list $(osts_nodes)) + ACCNUM=`do_nodes $list $LCTL get_param -n obdfilter.*.stats | \ + grep 'cache_hit'|awk '{print $2}' | \ + awk '{sum=sum+$1} END{print sum}'` + echo $ACCNUM } test_151() { - local CPAGES=3 + remote_ost_nodsh && skip "remote OST with nodsh" && return - # check whether obdfilter is cache capable at all - if ! $LCTL get_param -n obdfilter.*.read_cache_enable; then - echo "not cache-capable obdfilter" - return 0 - fi + local CPAGES=3 + local list=$(comma_list $(osts_nodes)) - # check cache is enabled on all obdfilters - if $LCTL get_param -n obdfilter.*.read_cache_enable | grep 0 >&/dev/null; then - echo "oss cache is disabled" - return 0 - fi + # check whether obdfilter is cache capable at all + if ! do_nodes $list $LCTL get_param -n obdfilter.*.read_cache_enable > /dev/null; then + echo "not cache-capable obdfilter" + return 0 + fi - $LCTL set_param -n obdfilter.*.writethrough_cache_enable 1 + # check cache is enabled on all obdfilters + if do_nodes $list $LCTL get_param -n obdfilter.*.read_cache_enable | grep 0 >&/dev/null; then + echo "oss cache is disabled" + return 0 + fi - # pages should be in the case right after write - dd if=/dev/urandom of=$DIR/$tfile bs=4k count=$CPAGES || error "dd failed" - local BEFORE=`roc_hit` - cancel_lru_locks osc - cat $DIR/$tfile >/dev/null - local AFTER=`roc_hit` - if ! let "AFTER - BEFORE == CPAGES"; then - error "NOT IN CACHE: before: $BEFORE, after: $AFTER" - fi + do_nodes $list $LCTL set_param -n obdfilter.*.writethrough_cache_enable 1 - # the following read invalidates the cache - cancel_lru_locks osc - $LCTL set_param -n obdfilter.*.read_cache_enable 0 - cat $DIR/$tfile >/dev/null + # pages should be in the case right after write + dd if=/dev/urandom of=$DIR/$tfile bs=4k count=$CPAGES || error "dd failed" + local BEFORE=`roc_hit` + cancel_lru_locks osc + cat $DIR/$tfile >/dev/null + local AFTER=`roc_hit` + if ! let "AFTER - BEFORE == CPAGES"; then + error "NOT IN CACHE: before: $BEFORE, after: $AFTER" + fi - # now data shouldn't be found in the cache - BEFORE=`roc_hit` - cancel_lru_locks osc - cat $DIR/$tfile >/dev/null - AFTER=`roc_hit` - if let "AFTER - BEFORE != 0"; then - error "IN CACHE: before: $BEFORE, after: $AFTER" - fi + # the following read invalidates the cache + cancel_lru_locks osc + do_nodes $list $LCTL set_param -n obdfilter.*.read_cache_enable 0 + cat $DIR/$tfile >/dev/null - $LCTL set_param -n obdfilter.*.read_cache_enable 1 - rm -f $DIR/$tfile + # now data shouldn't be found in the cache + BEFORE=`roc_hit` + cancel_lru_locks osc + cat $DIR/$tfile >/dev/null + AFTER=`roc_hit` + if let "AFTER - BEFORE != 0"; then + error "IN CACHE: before: $BEFORE, after: $AFTER" + fi + + do_nodes $list $LCTL set_param -n obdfilter.*.read_cache_enable 1 + rm -f $DIR/$tfile } run_test 151 "test cache on oss and controls ===============================" test_152() { local TF="$TMP/$tfile" - # simulate ENOMEM during write -#define OBD_FAIL_OST_NOMEM 0x226 + # simulate ENOMEM during write +#define OBD_FAIL_OST_NOMEM 0x226 lctl set_param fail_loc=0x80000226 dd if=/dev/urandom of=$TF bs=6096 count=1 || error "dd failed" cp $TF $DIR/$tfile sync || error "sync failed" lctl set_param fail_loc=0 - + # discard client's cache cancel_lru_locks osc @@ -5953,7 +6117,7 @@ test_152() { cmp $TF $DIR/$tfile || error "cmp failed" lctl set_param fail_loc=0 - rm -f $TF + rm -f $TF } run_test 152 "test read/write with enomem ============================" @@ -5985,7 +6149,6 @@ err17935 () { fi } test_160() { - do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog on USER=$(do_facet $SINGLEMDS lctl --device $MDT0 changelog_register -n) echo "Registered as changelog user $USER" do_facet $SINGLEMDS lctl get_param -n mdd.$MDT0.changelog_users | \ @@ -6000,8 +6163,17 @@ test_160() { ln -s $DIR/$tdir/pics/2008/portland.jpg $DIR/$tdir/pics/desktop.jpg rm $DIR/$tdir/pics/desktop.jpg - # verify contents $LFS changelog $MDT0 | tail -5 + + echo "verifying changelog mask" + do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog_mask="-mkdir" + mkdir -p $DIR/$tdir/pics/2009/sofia + do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog_mask="+mkdir" + mkdir $DIR/$tdir/pics/2009/zachary + DIRS=$($LFS changelog $MDT0 | tail -5 | grep -c MKDIR) + [ $DIRS -eq 1 ] || err17935 "changelog mask count $DIRS != 1" + + # verify contents echo "verifying target fid" fidc=$($LFS changelog $MDT0 | grep timestamp | grep "CREAT" | \ tail -1 | awk '{print $5}') @@ -6013,35 +6185,46 @@ test_160() { tail -1 | awk '{print $6}') fidf=$($LFS path2fid $DIR/$tdir/pics/zach) [ "$fidc" == "p=$fidf" ] || \ - err17935 "pfid in changelog $fidc != dir fid $fidf" + err17935 "pfid in changelog $fidc != dir fid $fidf" - echo "verifying user clear" - USERS=$(( $(do_facet $SINGLEMDS lctl get_param -n \ - mdd.$MDT0.changelog_users | wc -l) - 2 )) - FIRST_REC=$($LFS changelog $MDT0 | head -1 | awk '{print $1}') - $LFS changelog_clear $MDT0 $USER $(($FIRST_REC + 5)) - USER_REC=$(do_facet $SINGLEMDS lctl get_param -n \ + USER_REC1=$(do_facet $SINGLEMDS lctl get_param -n \ mdd.$MDT0.changelog_users | grep $USER | awk '{print $2}') - [ $USER_REC == $(($FIRST_REC + 5)) ] || \ - err17935 "user index should be $(($FIRST_REC + 5)); is $USER_REC" - CLEAR_REC=$($LFS changelog $MDT0 | head -1 | awk '{print $1}') - [ $CLEAR_REC == $(($FIRST_REC + 6)) -o $USERS -gt 1 ] || \ - err17935 "first index should be $(($FIRST_REC + 6)); is $PURGE_REC" + $LFS changelog_clear $MDT0 $USER $(($USER_REC1 + 5)) + USER_REC2=$(do_facet $SINGLEMDS lctl get_param -n \ + mdd.$MDT0.changelog_users | grep $USER | awk '{print $2}') + echo "verifying user clear: $(( $USER_REC1 + 5 )) == $USER_REC2" + [ $USER_REC2 == $(($USER_REC1 + 5)) ] || \ + err17935 "user index should be $(($USER_REC1 + 5)); is $USER_REC2" + + MIN_REC=$(do_facet $SINGLEMDS lctl get_param mdd.$MDT0.changelog_users | \ + awk 'min == "" || $2 < min {min = $2}; END {print min}') + FIRST_REC=$($LFS changelog $MDT0 | head -1 | awk '{print $1}') + echo "verifying min purge: $(( $MIN_REC + 1 )) == $FIRST_REC" + [ $FIRST_REC == $(($MIN_REC + 1)) ] || \ + err17935 "first index should be $(($MIN_REC + 1)); is $FIRST_REC" echo "verifying user deregister" do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $USER do_facet $SINGLEMDS lctl get_param -n mdd.$MDT0.changelog_users | \ grep -q $USER && error "User $USER still found in changelog_users" - [ $USERS -eq 1 ] && \ - do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog off || true + USERS=$(( $(do_facet $SINGLEMDS lctl get_param -n \ + mdd.$MDT0.changelog_users | wc -l) - 2 )) + if [ $USERS -eq 0 ]; then + LAST_REC1=$(do_facet $SINGLEMDS lctl get_param -n \ + mdd.$MDT0.changelog_users | head -1 | awk '{print $3}') + touch $DIR/$tdir/chloe + LAST_REC2=$(do_facet $SINGLEMDS lctl get_param -n \ + mdd.$MDT0.changelog_users | head -1 | awk '{print $3}') + echo "verify changelogs are off if we were the only user: $LAST_REC1 == $LAST_REC2" + [ $LAST_REC1 == $LAST_REC2 ] || error "changelogs not off" + else + echo "$USERS other changelog users; can't verify off" + fi } run_test 160 "changelog sanity" test_161() { - # need local MDT for fid2path - remote_mds && skip "remote MDS" && return - mkdir -p $DIR/$tdir cp /etc/hosts $DIR/$tdir/$tfile mkdir $DIR/$tdir/foo1 @@ -6051,8 +6234,8 @@ test_161() { ln $DIR/$tdir/$tfile $DIR/$tdir/foo1/luna ln $DIR/$tdir/$tfile $DIR/$tdir/foo2/thor local FID=$($LFS path2fid $DIR/$tdir/$tfile | tr -d '[') - if [ "$($LFS fid2path ${mds1_svc} $FID | wc -l)" != "5" ]; then - $LFS fid2path ${mds1_svc} $FID + if [ "$($LFS fid2path $DIR $FID | wc -l)" != "5" ]; then + $LFS fid2path $DIR $FID err17935 "bad link ea" fi # middle @@ -6063,9 +6246,9 @@ test_161() { rm $DIR/$tdir/$tfile # rename mv $DIR/$tdir/foo1/sofia $DIR/$tdir/foo2/maggie - if [ "$($LFS fid2path ${mds1_svc} --link 1 $FID)" != "/$tdir/foo2/maggie" ] + if [ "$($LFS fid2path $DIR --link 1 $FID)" != "/$tdir/foo2/maggie" ] then - $LFS fid2path ${mds1_svc} $FID + $LFS fid2path $DIR $FID err17935 "bad link rename" fi rm $DIR/$tdir/foo2/maggie @@ -6074,11 +6257,11 @@ test_161() { local longname=filename_avg_len_is_thirty_two_ createmany -l$DIR/$tdir/foo1/luna $DIR/$tdir/foo2/$longname 1000 || \ error "failed to hardlink many files" - links=$($LFS fid2path ${mds1_svc} $FID | wc -l) + links=$($LFS fid2path $DIR $FID | wc -l) echo -n "${links}/1000 links in link EA" [ ${links} -gt 60 ] || err17935 "expected at least 60 links in link EA" unlinkmany $DIR/$tdir/foo2/$longname 1000 || \ - error "failed to unlink many hardlinks" + error "failed to unlink many hardlinks" } run_test 161 "link ea sanity" @@ -6101,9 +6284,6 @@ check_path() { } test_162() { - # need local MDT for fid2path - remote_mds && skip "remote MDS" && return - # Make changes to filesystem mkdir -p $DIR/$tdir/d2 touch $DIR/$tdir/d2/$tfile @@ -6112,24 +6292,43 @@ test_162() { mkdir -p $DIR/$tdir/d2/a/b/c mkdir -p $DIR/$tdir/d2/p/q/r FID=$($LFS path2fid $DIR/$tdir/d2/$tfile | tr -d '[') - check_path "/$tdir/d2/$tfile" ${mds1_svc} $FID --link 0 + check_path "/$tdir/d2/$tfile" $DIR $FID --link 0 ln $DIR/$tdir/d2/$tfile $DIR/$tdir/d2/p/q/r/hlink mv $DIR/$tdir/d2/$tfile $DIR/$tdir/d2/a/b/c/new_file FID=$($LFS path2fid $DIR/$tdir/d2/a/b/c/new_file | tr -d '[') - check_path "/$tdir/d2/a/b/c/new_file" ${mds1_svc} $FID --link 1 - check_path "/$tdir/d2/p/q/r/hlink" ${mds1_svc} $FID --link 0 + # fid2path dir/fsname should both work + check_path "/$tdir/d2/a/b/c/new_file" $FSNAME $FID --link 1 + check_path "/$tdir/d2/p/q/r/hlink" $DIR $FID --link 0 # check that there are 2 links - ${LFS} fid2path ${mds1_svc} $FID | wc -l | grep -q 2 || \ - err17935 "expected 2 links" + ${LFS} fid2path $DIR $FID | wc -l | grep -q 2 || \ + err17935 "expected 2 links" rm $DIR/$tdir/d2/p/q/r/hlink - check_path "/$tdir/d2/a/b/c/new_file" ${mds1_svc} $FID --link 0 - # Doesnt work with CMD yet: 17935 + check_path "/$tdir/d2/a/b/c/new_file" $DIR $FID --link 0 + # Doesnt work with CMD yet: 17935 return 0 } run_test 162 "path lookup sanity" +test_169() { + # do directio so as not to populate the page cache + log "creating a 10 Mb file" + multiop $DIR/$tfile oO_CREAT:O_DIRECT:O_RDWR:w$((10*1048576))c || error "multiop failed while creating a file" + log "starting reads" + dd if=$DIR/$tfile of=/dev/null bs=4096 & + log "truncating the file" + multiop $DIR/$tfile oO_TRUNC:c || error "multiop failed while truncating the file" + log "killing dd" + kill %+ || true # reads might have finished + echo "wait until dd is finished" + wait + log "removing the temporary file" + rm -rf $DIR/$tfile || error "tmp file removal failed" +} +run_test 169 "parallel read and truncate should not deadlock ===" + test_170() { + $LCTL clear # bug 18514 $LCTL debug_daemon start $TMP/${tfile}_log_good touch $DIR/$tfile $LCTL debug_daemon stop @@ -6140,40 +6339,40 @@ test_170() { rm -rf $DIR/$tfile $LCTL debug_daemon stop - $LCTL df $TMP/${tfile}_log_bad 2&> $TMP/${tfile}_log_bad.out || + $LCTL df $TMP/${tfile}_log_bad > $TMP/${tfile}_log_bad.out 2>&1 || error "lctl df log_bad failed" local bad_line=$(tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $9}') local good_line1=$(tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $5}') - $LCTL df $TMP/${tfile}_log_good 2&>$TMP/${tfile}_log_good.out + $LCTL df $TMP/${tfile}_log_good > $TMP/${tfile}_log_good.out 2>&1 local good_line2=$(tail -n 1 $TMP/${tfile}_log_good.out | awk '{print $5}') - [ "$bad_line" ] && [ "$good_line1" ] && [ "$good_line2" ] || + [ "$bad_line" ] && [ "$good_line1" ] && [ "$good_line2" ] || error "bad_line good_line1 good_line2 are empty" - + + cat $TMP/${tfile}_log_good >> $TMP/${tfile}_logs_corrupt + cat $TMP/${tfile}_log_bad >> $TMP/${tfile}_logs_corrupt cat $TMP/${tfile}_log_good >> $TMP/${tfile}_logs_corrupt - cat $TMP/${tfile}_log_bad >> $TMP/${tfile}_logs_corrupt - cat $TMP/${tfile}_log_good >> $TMP/${tfile}_logs_corrupt - $LCTL df $TMP/${tfile}_logs_corrupt 2&> $TMP/${tfile}_log_bad.out + $LCTL df $TMP/${tfile}_logs_corrupt > $TMP/${tfile}_log_bad.out 2>&1 local bad_line_new=$(tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $9}') local good_line_new=$(tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $5}') - [ "$bad_line_new" ] && [ "$good_line_new" ] || + [ "$bad_line_new" ] && [ "$good_line_new" ] || error "bad_line_new good_line_new are empty" - + local expected_good=$((good_line1 + good_line2*2)) - rm -rf $TMP/${tfile}* + rm -f $TMP/${tfile}* if [ $bad_line -ne $bad_line_new ]; then error "expected $bad_line bad lines, but got $bad_line_new" - return 1 + return 1 fi if [ $expected_good -ne $good_line_new ]; then error "expected $expected_good good lines, but got $good_line_new" - return 2 + return 2 fi true } @@ -6314,6 +6513,17 @@ test_212() { } run_test 212 "Sendfile test ============================================" +test_213() { + dd if=/dev/zero of=$DIR/$tfile bs=4k count=4 + cancel_lru_locks osc + lctl set_param fail_loc=0x8000040f + # generate a read lock + cat $DIR/$tfile > /dev/null + # write to the file, it will try to cancel the above read lock. + cat /etc/hosts >> $DIR/$tfile +} +run_test 213 "OSC lock completion and cancel race don't crash - bug 18829" + # # tests that do cleanup/setup should be run at the end #