X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=57bbc172f294b22e28c370554ad04c094182e86e;hb=710793eab04e08c5ce671cb99ed1b2db4baa333f;hp=764e4133b7cd2f566ee659403686eb8d03a008ab;hpb=aa7b44da8412e203fd6727c0b534d9f9b24aad76;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 764e413..57bbc17 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13,7 +13,7 @@ ALWAYS_EXCEPT=" 27o 27q 42a 42b 42c 42d 45 74b 75 $SANI #ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27m 42a 42b 42c 42d 45 68 76"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 78 101 103 115 120g" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="24o 27m 36f 36g 51b 51c 60c 63 64b 68 71 73 77f 78 101 103 115 120g 124b" # Tests that fail on uml CPU=`awk '/model/ {print $4}' /proc/cpuinfo` @@ -63,21 +63,6 @@ STRIPES_PER_OBJ=-1 CHECK_GRANT=${CHECK_GRANT:-"yes"} GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} -if [ $UID -ne 0 ]; then - echo "Warning: running as non-root uid $UID" - RUNAS_ID="$UID" - RUNAS="" -else - RUNAS_ID=${RUNAS_ID:-500} - RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} - - # $RUNAS_ID may get set incorrectly somewhere else - if [ $RUNAS_ID -eq 0 ]; then - echo "Error: \$RUNAS_ID set to 0, but \$UID is also 0!" - exit 1 - fi -fi - export NAME=${NAME:-local} SAVE_PWD=$PWD @@ -141,7 +126,10 @@ MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo [ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo -rm -rf $DIR/[Rdfs][1-9]* +rm -rf $DIR/[Rdfs][0-9]* + +# $RUNAS_ID may get set incorrectly somewhere else +[ $UID -eq 0 -a $RUNAS_ID -eq 0 ] && error "\$RUNAS_ID set to 0, but \$UID is also 0!" check_runas_id $RUNAS_ID $RUNAS @@ -160,6 +148,8 @@ echo # add a newline after mke2fs. umask 077 +OLDDEBUG="`sysctl -n lnet.debug 2> /dev/null`" +sysctl -w lnet.debug=-1 2> /dev/null || true test_0() { touch $DIR/$tfile $CHECKSTAT -t file $DIR/$tfile || error @@ -830,7 +820,7 @@ test_27c() { [ `$GETSTRIPE $DIR/d27/f01 | grep -A 10 obdidx | wc -l` -eq 4 ] || error "two-stripe file doesn't have two stripes" pass - log "== test_27d: write to two stripe file file f01 ================" + log "== test_27c: write to two stripe file file f01 ================" dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4 || error "dd failed" } run_test 27c "create two stripe file f01 =======================" @@ -1551,6 +1541,14 @@ test_33a() { } run_test 33a "test open file(mode=0444) with O_RDWR (should return error)" +test_33b() { + rm -fr $DIR/d33 + mkdir -p $DIR/d33 + chown $RUNAS_ID $DIR/d33 + $RUNAS $OPENFILE -f 1286739555 $DIR/d33/f33 && error "create" || true +} +run_test 33b "test open file with malformed flags (No panic and return error)" + TEST_34_SIZE=${TEST_34_SIZE:-2000000000000} test_34a() { rm -f $DIR/f34 @@ -1681,9 +1679,9 @@ test_36f() { } run_test 36f "utime on file racing with OST BRW write ==========" -export FMD_MAX_AGE=`do_facet ost1 cat $LPROC/obdfilter/*/client_cache_seconds | head -n 1` test_36g() { - [ -z "$FMD_MAX_AGE" ] && skip "skip test for remote OST" && return + remote_ost && skip "remote OST" && return + export FMD_MAX_AGE=`do_facet ost1 cat $LPROC/obdfilter/*/client_cache_seconds 2> /dev/null | head -n 1` FMD_BEFORE="`awk '/ll_fmd_cache/ { print $2 }' /proc/slabinfo`" touch $DIR/d36/$tfile sleep $((FMD_MAX_AGE + 12)) @@ -1949,7 +1947,7 @@ test_44a() { [ "$nstripe" -gt "$OSTCOUNT" ] && skip "Wrong default_stripe_count: $nstripe (OSTCOUNT: $OSTCOUNT)" && return local stride=`$LCTL lov_getconfig $DIR | grep default_stripe_size: | \ awk '{print $2}'` - if [ $nstripe -eq 0 -o $nstripe -gt 1024 ] ; then + if [ $nstripe -eq 0 -o $nstripe -eq -1 ] ; then nstripe=`$LCTL lov_getconfig $DIR | grep obd_count: | awk '{print $2}'` fi @@ -2656,8 +2654,19 @@ run_test 60a "llog sanity tests run from kernel module ==========" test_60b() { # bug 6411 dmesg > $DIR/$tfile - LLOG_COUNT=`dmesg | grep -A 1000 "$TEST60_HEAD" | grep -c llog_test` - [ $LLOG_COUNT -gt 50 ] && error "CDEBUG_LIMIT not limiting messages"|| true + LLOG_COUNT=`dmesg | awk "/$TEST60_HEAD/{marker = 1; from_marker = 0;} + /llog.test/ { + if (marker) + from_marker++ + from_begin++ + } + END { + if (marker) + print from_marker + else + print from_begin + }"` + [ $LLOG_COUNT -gt 50 ] && error "CDEBUG_LIMIT not limiting messages ($LLOG_COUNT)"|| true } run_test 60b "limit repeated messages from CERROR/CWARN ========" @@ -2811,7 +2820,7 @@ test_65e() { mkdir -p $DIR/d65 $SETSTRIPE $DIR/d65 || error "setstripe" - $GETSTRIPE -v $DIR/d65 | grep "has no stripe info" || error "no stripe info failed" + $GETSTRIPE -v $DIR/d65 | grep "Default" || error "no stripe info failed" touch $DIR/d65/f6 $LVERIFY $DIR/d65 $DIR/d65/f6 || error "lverify failed" } @@ -2827,7 +2836,7 @@ test_65g() { mkdir -p $DIR/d65 $SETSTRIPE $DIR/d65 -s $(($STRIPESIZE * 2)) -i 0 -c 1 || error "setstripe" $SETSTRIPE -d $DIR/d65 || error "setstripe" - $GETSTRIPE -v $DIR/d65 | grep "has no stripe info" || \ + $GETSTRIPE -v $DIR/d65 | grep "Default" || \ error "delete default stripe failed" } run_test 65g "directory setstripe -d ===========================" @@ -2847,9 +2856,10 @@ test_65i() { # bug6367 run_test 65i "set non-default striping on root directory (bug 6367)=" test_65j() { # bug6367 + sync; sleep 1 # if we aren't already remounting for each test, do so for this test if [ "$CLEANUP" = ":" -a "$I_MOUNTED" = "yes" ]; then - cleanup -f || error "failed to unmount" + cleanup || error "failed to unmount" setup fi $SETSTRIPE -d $MOUNT || error "setstripe failed" @@ -2858,6 +2868,7 @@ run_test 65j "set default striping on root directory (bug 6367)=" test_65k() { # bug11679 [ "$OSTCOUNT" -lt 2 ] && skip "too few OSTs" && return + remote_mds_nodsh && skip "remote MDS" && return echo "Check OST status: " MDS_OSCS=`do_facet mds lctl dl | awk '/[oO][sS][cC].*md[ts]/ { print $4 }'` @@ -2902,12 +2913,6 @@ test_66() { } run_test 66 "update inode blocks count on client ===============" -test_67() { - [ ! -f sanity-sec.sh ] && skip "missing subtest sanity-sec.sh" && return - sh sanity-sec.sh -} -run_test 67 "security test =====================================" - LLOOP= cleanup_68() { trap 0 @@ -3255,6 +3260,13 @@ run_test 76 "destroy duplicate inodes in client inode cache ====" export ORIG_CSUM="" set_checksums() { + # Note: in sptlrpc modes which enable its own bulk checksum, the + # original crc32_le bulk checksum will be automatically disabled, + # and the OBD_FAIL_OSC_CHECKSUM_SEND/OBD_FAIL_OSC_CHECKSUM_RECEIVE + # will be checked by sptlrpc code against sptlrpc bulk checksum. + # In this case set_checksums() will not be no-op, because sptlrpc + # bulk checksum will be enabled all through the test. + [ "$ORIG_CSUM" ] || ORIG_CSUM=`cat $LPROC/osc/*/checksums | head -n1` for f in $LPROC/osc/*/checksums; do echo $1 >> $f @@ -3372,16 +3384,27 @@ unset F77_TMP test_78() { # bug 10901 NSEQ=5 F78SIZE=$(($(awk '/MemFree:/ { print $2 }' /proc/meminfo) / 1024)) + echo "MemFree: $F78SIZE, Max file size: $MAXFREE" + MEMTOTAL=$(($(awk '/MemTotal:/ { print $2 }' /proc/meminfo) / 2048)) + echo "MemTotal: $((MEMTOTAL * 2))" + [ $F78SIZE -gt $MEMTOTAL ] && F78SIZE=$MEMTOTAL [ $F78SIZE -gt 512 ] && F78SIZE=512 [ $F78SIZE -gt $((MAXFREE / 1024)) ] && F78SIZE=$((MAXFREE / 1024)) SMALLESTOST=`lfs df $DIR |grep OST | awk '{print $4}' |sort -n |head -1` - [ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024)) ] && \ - F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024)) + echo "Smallest OST: $SMALLESTOST" + [ $SMALLESTOST -lt 10240 ] && \ + skip "too small OSTSIZE, useless to run large O_DIRECT test" && return 0 + + [ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024 - 5)) ] && \ + F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024 - 5)) + [ "$SLOW" = "no" ] && NSEQ=1 && [ $F78SIZE -gt 32 ] && F78SIZE=32 + echo "File size: $F78SIZE" $SETSTRIPE $DIR/$tfile -c -1 || error "setstripe failed" for i in `seq 1 $NSEQ` do + FSIZE=$(($F78SIZE / ($NSEQ - $i + 1))) echo directIO rdwr round $i of $NSEQ - $DIRECTIO rdwr $DIR/$tfile 0 $F78SIZE 1048576 || error "rdwr failed" + $DIRECTIO rdwr $DIR/$tfile 0 $FSIZE 1048576||error "rdwr failed" done rm -f $DIR/$tfile @@ -3793,10 +3816,7 @@ run_test 102d "star restore stripe info from tarfile,not keep osts ===========" test_102e() { # b10930: star test for trusted.lov xattr star --xhelp 2>&1 | grep -q nolustre - if [ $? -ne 0 ] - then - skip "being skipped because a lustre-aware star is not installed." && return - fi + [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 mkdir -p $DIR/d102e @@ -3809,10 +3829,7 @@ run_test 102e "star restore stripe info from tarfile, keep osts ===========" test_102f() { # b10930: star test for trusted.lov xattr star --xhelp 2>&1 | grep -q nolustre - if [ $? -ne 0 ] - then - skip "being skipped because a lustre-aware star is not installed." && return - fi + [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 mkdir -p $DIR/d102f @@ -3826,10 +3843,7 @@ run_test 102f "star copy files, not keep osts ===========" test_102g() { # b10930: star test for trusted.lov xattr star --xhelp 2>&1 | grep -q nolustre - if [ $? -ne 0 ] - then - skip "being skipped because a lustre-aware star is not installed." && return - fi + [ $? -ne 0 ] && skip "lustre-aware star is not installed" && return [ "$OSTCOUNT" -lt "4" ] && skip "skipping 4-stripe test" && return setup_test102 mkdir -p $DIR/d102g @@ -3853,6 +3867,12 @@ test_103 () { [ -z "$(which setfacl 2>/dev/null)" ] && skip "could not find setfacl" && return $GSS && skip "could not run under gss" && return + declare -a identity_old + + for num in `seq $MDSCOUNT`; do + switch_identity $num true || identity_old[$num]=$? + done + SAVE_UMASK=`umask` umask 0022 cd $DIR @@ -3863,9 +3883,8 @@ test_103 () { run_acl_subtest getfacl-noacl || error echo "performing misc..." run_acl_subtest misc || error -# XXX add back permission test when we support supplementary groups. -# echo "performing permissions..." -# run_acl_subtest permissions || error + echo "performing permissions..." + run_acl_subtest permissions || error echo "performing setfacl..." run_acl_subtest setfacl || error @@ -3878,6 +3897,12 @@ test_103 () { cd $SAVE_PWD umask $SAVE_UMASK + + for num in `seq $MDSCOUNT`; do + if [ "${identity_old[$num]}" = 1 ]; then + switch_identity $num false || identity_old[$num]=$? + fi + done } run_test 103 "acl test =========================================" @@ -4144,6 +4169,8 @@ run_test 118a "verify O_SYNC works ==========" test_118b() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_ENOENT 0x217 @@ -4177,6 +4204,8 @@ run_test 118b "Reclaim dirty pages on fatal error ==========" test_118c() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_EROFS 0x216 @@ -4217,6 +4246,8 @@ run_test 118c "Fsync blocks on EROFS until dirty pages are flushed ==========" test_118d() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_BRW_PAUSE_BULK @@ -4319,6 +4350,8 @@ test_118g() { run_test 118g "Don't stay in wait if we got local -ENOMEM ==========" test_118h() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e @@ -4351,6 +4384,8 @@ test_118h() { run_test 118h "Verify timeout in handling recoverables errors ==========" test_118i() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e @@ -4387,6 +4422,8 @@ test_118i() { run_test 118i "Fix error before timeout in recoverable error ==========" test_118j() { + remote_ost_nodsh && skip "remote OST" && return + reset_async #define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220 @@ -4463,82 +4500,24 @@ test_119b() # bug 11737 sync multiop $DIR/$tfile oO_RDONLY:O_DIRECT:r$((2048 * 1024)) || \ error "direct read failed" + rm -f $DIR/$tfile } run_test 119b "Sparse directIO read must return actual read amount" -LDLM_POOL_CTL_RECALC=1 -LDLM_POOL_CTL_SHRINK=2 - -disable_pool_recalc() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL & ~LDLM_POOL_CTL_RECALC)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -enable_pool_recalc() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL | LDLM_POOL_CTL_RECALC)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -disable_pool_shrink() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL & ~LDLM_POOL_CTL_SHRINK)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -enable_pool_shrink() { - for NSD in $LPROC/ldlm/namespaces/*$1*; do - if test -f $NSD/pool/control; then - CONTROL=`cat $NSD/pool/control` - CONTROL=$((CONTROL | LDLM_POOL_CTL_SHRINK)) - echo "$CONTROL" > $NSD/pool/control - fi - done -} - -disable_pool() { - disable_pool_shrink $1 - disable_pool_recalc $1 -} - -enable_pool() { - enable_pool_shrink $1 - enable_pool_recalc $1 -} - -lru_resize_enable() +test_119c() # bug 13099 { - enable_pool osc - enable_pool "filter-$FSNAME" - enable_pool mdc - enable_pool "mds-$FSNAME" -} - -lru_resize_disable() -{ - disable_pool osc - disable_pool "filter-$FSNAME" - disable_pool mdc - disable_pool "mds-$FSNAME" + BSIZE=1048576 + directio write $DIR/$tfile 3 1 $BSIZE || error "direct write failed" + directio readhole $DIR/$tfile 0 2 $BSIZE || error "reading hole failed" + rm -f $DIR/$tfile } +run_test 119c "Testing for direct read hitting hole" test_120a() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc cancel_lru_locks mdc stat $DIR/$tdir > /dev/null can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` @@ -4548,14 +4527,16 @@ test_120a() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120a "Early Lock Cancel: mkdir test" test_120b() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc cancel_lru_locks mdc stat $DIR/$tdir > /dev/null can1=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` @@ -4565,14 +4546,16 @@ test_120b() { can2=`awk '/ldlm_cancel/ {print $2}' $LPROC/ldlm/services/ldlm_canceld/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120b "Early Lock Cancel: create test" test_120c() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc mkdir -p $DIR/$tdir/d1 $DIR/$tdir/d2 touch $DIR/$tdir/d1/f1 cancel_lru_locks mdc @@ -4584,14 +4567,16 @@ test_120c() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120c "Early Lock Cancel: link test" test_120d() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc touch $DIR/$tdir cancel_lru_locks mdc stat $DIR/$tdir > /dev/null @@ -4602,14 +4587,16 @@ test_120d() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120d "Early Lock Cancel: setattr test" test_120e() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc dd if=/dev/zero of=$DIR/$tdir/f1 count=1 cancel_lru_locks mdc cancel_lru_locks osc @@ -4622,14 +4609,16 @@ test_120e() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120e "Early Lock Cancel: unlink test" test_120f() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc mkdir -p $DIR/$tdir/d1 $DIR/$tdir/d2 dd if=/dev/zero of=$DIR/$tdir/d1/f1 count=1 dd if=/dev/zero of=$DIR/$tdir/d2/f2 count=1 @@ -4645,14 +4634,16 @@ test_120f() { blk2=`awk '/ldlm_bl_callback/ {print $2}' $LPROC/ldlm/services/ldlm_cbd/stats` [ $can1 -eq $can2 ] || error $((can2-can1)) "cancel RPC occured." [ $blk1 -eq $blk2 ] || error $((blk2-blk1)) "blocking RPC occured." - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120f "Early Lock Cancel: rename test" test_120g() { [ -z "`grep early_lock_cancel $LPROC/mdc/*/connect_flags`" ] && \ skip "no early lock cancel on server" && return 0 - lru_resize_disable + lru_resize_disable mdc + lru_resize_disable osc count=10000 echo create $count files mkdir -p $DIR/$tdir @@ -4678,69 +4669,49 @@ test_120g() { echo total: $((can2-can1)) cancels, $((blk2-blk1)) blockings sleep 2 # wait for commitment of removal - lru_resize_enable + lru_resize_enable mdc + lru_resize_enable osc } run_test 120g "Early Lock Cancel: performance test" test_121() { #bug #10589 rm -rf $DIR/$tfile - writes=`dd if=/dev/zero of=$DIR/$tfile count=1 2>&1 | awk 'BEGIN { FS="+" } /out/ {print $1}'` + writes=$(LANG=C dd if=/dev/zero of=$DIR/$tfile count=1 2>&1 | awk -F '+' '/out/ {print $1}') #define OBD_FAIL_LDLM_CANCEL_RACE 0x310 sysctl -w lustre.fail_loc=0x310 cancel_lru_locks osc > /dev/null - reads=`dd if=$DIR/$tfile of=/dev/null 2>&1 | awk 'BEGIN { FS="+" } /in/ {print $1}'` + reads=$(LANG=C dd if=$DIR/$tfile of=/dev/null 2>&1 | awk -F '+' '/in/ {print $1}') sysctl -w lustre.fail_loc=0 [ "$reads" -eq "$writes" ] || error "read" $reads "blocks, must be" $writes } run_test 121 "read cancel race =========" -cmd_cancel_lru_locks() { - NS=$1 - test "x$NS" = "x" && NS="mdc" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - cancel_lru_locks $d - fi - done -} - test_124a() { [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ skip "no lru resize on server" && return 0 - cmd_cancel_lru_locks "mdc" - lru_resize_enable - - # we want to test main pool functionality, that is cancel based on SLV - # this is why shrinkers are disabled - disable_pool_shrink "mds-$FSNAME" - disable_pool_shrink mdc - NR=2000 mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" # use touch to produce $NR new locks log "create $NR files at $DIR/$tdir" - for ((i=0;i<$NR;i++)); do touch $DIR/$tdir/f$i; done + createmany -o $DIR/$tdir/f $NR || + error "failed to create $NR files in $DIR/$tdir" + cancel_lru_locks mdc + ls -l $DIR/$tdir > /dev/null + NSDIR="" LRU_SIZE=0 - for d in `find $LPROC/ldlm/namespaces | grep mdc-`; do - if test -f $d/lru_size; then - LRU_SIZE=`cat $d/lru_size` - if test $LRU_SIZE -gt 0; then - log "using $d namespace" - NSDIR=$d - break - fi + for F in $LPROC/ldlm/namespaces/*mdc-*/lru_size; do + LRU_SIZE=$(cat $F) + if [ $LRU_SIZE -gt $(default_lru_size) ]; then + NSDIR=$(dirname $F) + log "using $(basename $NSDIR) namespace" + break fi done - if test -z $NSDIR; then - skip "No cached locks created!" - return 0 - fi - - if test $LRU_SIZE -lt 100; then + if [ -z "$NSDIR" -o $LRU_SIZE -lt $(default_lru_size) ]; then skip "Not enough cached locks created!" return 0 fi @@ -4763,7 +4734,7 @@ test_124a() { # in the case of CMD, LRU_SIZE_B != $NR in most of cases LVF=$(($MAX_HRS * 60 * 60 * $LIMIT / $SLEEP)) LRU_SIZE_B=$LRU_SIZE - log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE_B lock(s)" + log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE lock(s)" OLD_LVF=`cat $NSDIR/pool/lock_volume_factor` echo "$LVF" > $NSDIR/pool/lock_volume_factor log "sleep for $((SLEEP+SLEEP_ADD))s" @@ -4773,102 +4744,84 @@ test_124a() { [ $LRU_SIZE_B -gt $LRU_SIZE_A ] || { error "No locks dropped in "$((SLEEP+SLEEP_ADD))"s. LRU size: $LRU_SIZE_A" - lru_resize_enable unlinkmany $DIR/$tdir/f $NR return } log "Dropped "$((LRU_SIZE_B-LRU_SIZE_A))" locks in "$((SLEEP+SLEEP_ADD))"s" - lru_resize_enable log "unlink $NR files at $DIR/$tdir" unlinkmany $DIR/$tdir/f $NR } run_test 124a "lru resize =======================================" -set_lru_size() { - NS=$1 - SIZE=$2 - test "x$NS" = "x" && NS="mdc" - test "x$SIZE" = "x" && SIZE="0" - test $SIZE -lt 0 && SIZE="0" - test $SIZE -gt 0 && ACTION="disabled" || ACTION="enabled" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - log "$(basename $d):" - log " lru resize $ACTION" - log " lru_size=$SIZE" - echo $SIZE > $d/lru_size - fi - done -} - -get_lru_size() { - NS=$1 - test "x$NS" = "x" && NS="mdc" - for d in `find $LPROC/ldlm/namespaces | grep $NS`; do - if test -f $d/lru_size; then - log "$(basename $d):" - log " lru_size=$(cat $d/lru_size)" - fi - done -} - test_124b() { [ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \ skip "no lru resize on server" && return 0 + # even for cmd no matter what metadata namespace to use for getting + # the limit, we use appropriate. NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1` LIMIT=`cat $NSDIR/pool/limit` - NR_CPU=$(awk '/processor/' /proc/cpuinfo | wc -l) - # 100 locks here is default value for non-shrinkable lru as well - # as the order to switch to static lru managing policy - # define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus()) - LDLM_DEFAULT_LRU_SIZE=$((100 * NR_CPU)) - - NR=$((LIMIT-(LIMIT/3))) - log "starting lru resize disable cycle" - set_lru_size "mdc-" $LDLM_DEFAULT_LRU_SIZE - + NR=$(($(default_lru_size)*20)) + if [ $NR -gt $LIMIT ]; then + NR=$LIMIT + fi + lru_resize_disable mdc mkdir -p $DIR/$tdir/disable_lru_resize || - error "failed to create $DIR/$tdir/disable_lru_resize" + error "failed to create $DIR/$tdir/disable_lru_resize" createmany -o $DIR/$tdir/disable_lru_resize/f $NR log "doing ls -la $DIR/$tdir/disable_lru_resize 3 times" + cancel_lru_locks mdc stime=`date +%s` - ls -la $DIR/$tdir/disable_lru_resize > /dev/null - ls -la $DIR/$tdir/disable_lru_resize > /dev/null - ls -la $DIR/$tdir/disable_lru_resize > /dev/null + PID="" + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/disable_lru_resize > /dev/null & + PID="$PID $!" + wait $PID etime=`date +%s` nolruresize_delta=$((etime-stime)) log "ls -la time: $nolruresize_delta seconds" - get_lru_size "mdc-" - - log "starting lru resize enable cycle" - mkdir -p $DIR/$tdir/enable_lru_resize || - error "failed to create $DIR/$tdir/enable_lru_resize" + log "lru_size = $(cat $NSDIR/lru_size)" + unlinkmany $DIR/$tdir/disable_lru_resize/f $NR - # 0 locks means here flush lru and switch to lru resize policy - set_lru_size "mdc-" 0 + lru_resize_enable mdc + mkdir -p $DIR/$tdir/enable_lru_resize || + error "failed to create $DIR/$tdir/enable_lru_resize" createmany -o $DIR/$tdir/enable_lru_resize/f $NR log "doing ls -la $DIR/$tdir/enable_lru_resize 3 times" + cancel_lru_locks mdc stime=`date +%s` - ls -la $DIR/$tdir/enable_lru_resize > /dev/null - ls -la $DIR/$tdir/enable_lru_resize > /dev/null - ls -la $DIR/$tdir/enable_lru_resize > /dev/null + PID="" + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + sleep 2 + ls -la $DIR/$tdir/enable_lru_resize > /dev/null & + PID="$PID $!" + wait $PID etime=`date +%s` lruresize_delta=$((etime-stime)) log "ls -la time: $lruresize_delta seconds" - get_lru_size "mdc-" + log "lru_size = $(cat $NSDIR/lru_size)" - if test $lruresize_delta -gt $nolruresize_delta; then - log "ls -la is $((lruresize_delta - $nolruresize_delta))s slower with lru resize enabled" - elif test $nolruresize_delta -gt $lruresize_delta; then - log "ls -la is $((nolruresize_delta - $lruresize_delta))s faster with lru resize enabled" + if [ $lruresize_delta -gt $nolruresize_delta ]; then + log "ls -la is $(((lruresize_delta - $nolruresize_delta) * 100 / $nolruresize_delta))% slower with lru resize enabled" + elif [ $nolruresize_delta -gt $lruresize_delta ]; then + log "ls -la is $(((nolruresize_delta - $lruresize_delta) * 100 / $nolruresize_delta))% faster with lru resize enabled" else log "lru resize performs the same with no lru resize" fi + unlinkmany $DIR/$tdir/enable_lru_resize/f $NR } run_test 124b "lru resize (performance test) ======================="