X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=8516e09e1d7073a9b9700443b3ca435891df669c;hp=0ada97c04ab0a3bd2f234e8c8248a2dac42f27a6;hb=937656dca50d4b0ba8501fbea0a450c663cdabcc;hpb=b69882e89235d5c0eb392a4b0705b018474201b6 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh old mode 100644 new mode 100755 index 0ada97c..8516e09 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -79,8 +79,8 @@ init_logging [ "$SLOW" = "no" ] && EXCEPT_SLOW="24D 27m 64b 68 71 115 300o" if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then - # bug number for skipped test: LU-4536 LU-1957 LU-2805 - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 65ic 180 184c" + # bug number for skipped test: LU-4536 LU-1957 + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 65ic 180" # 4 13 (min)" [ "$SLOW" = "no" ] && EXCEPT_SLOW="$EXCEPT_SLOW 51b 51ba" fi @@ -1017,7 +1017,9 @@ test_24u() { # bug12192 run_test 24u "create stripe file" page_size() { - getconf PAGE_SIZE + local size + size=$(getconf PAGE_SIZE 2>/dev/null) + echo -n ${size:-4096} } simple_cleanup_common() { @@ -2022,6 +2024,7 @@ run_test 27C "check full striping across all OSTs" test_27D() { [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return local POOL=${POOL:-testpool} local first_ost=0 local last_ost=$(($OSTCOUNT - 1)) @@ -2032,8 +2035,13 @@ test_27D() { test_mkdir -p $DIR/$tdir pool_add $POOL || error "pool_add failed" pool_add_targets $POOL $ost_range || error "pool_add_targets failed" - llapi_layout_test -d$DIR/$tdir -p$POOL -o$OSTCOUNT || + + local skip27D + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.64) ] && + skip27D = "-s 29" + llapi_layout_test -d$DIR/$tdir -p$POOL -o$OSTCOUNT $skip27D || error "llapi_layout_test failed" + cleanup_pools || error "cleanup_pools failed" } run_test 27D "validate llapi_layout API" @@ -2119,6 +2127,7 @@ run_test 28 "create/mknod/mkdir with bad file types ============" test_29() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return 0 + sync; sleep 1; sync # flush out any dirty pages from previous tests cancel_lru_locks test_mkdir $DIR/d29 touch $DIR/d29/foo @@ -2400,8 +2409,13 @@ test_31p() { run_test 31p "remove of open striped directory" cleanup_test32_mount() { + local rc=0 trap 0 - $UMOUNT $DIR/$tdir/ext2-mountpoint + local loopdev=$(losetup -a | grep $EXT2_DEV | sed -ne 's/:.*$/p') + $UMOUNT $DIR/$tdir/ext2-mountpoint || rc=$? + losetup -d $loopdev || true + rm -rf $DIR/$tdir/ext2-mountpoint + return $rc } test_32a() { @@ -2607,8 +2621,13 @@ test_32p() { run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/$tfile" cleanup_testdir_mount() { + local rc=0 trap 0 - $UMOUNT $DIR/$tdir + local loopdev=$(losetup -a | grep $EXT2_DEV | sed -ne 's/:.*$/p') + $UMOUNT $DIR/$tdir || rc=$? + losetup -d $loopdev || true + rm -rf $DIR/$tdir + return $rc } test_32q() { @@ -2808,6 +2827,7 @@ cleanup_33f() { test_33f() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return mkdir $DIR/$tdir chmod go+rwx $DIR/$tdir @@ -3436,6 +3456,7 @@ test_39m() { run_test 39m "test atime and mtime before 1970" test_39n() { # LU-3832 + remote_mds_nodsh && skip "remote MDS with nodsh" && return local atime_diff=$(do_facet $SINGLEMDS \ lctl get_param -n mdd.*MDT0000*.atime_diff) local atime0 @@ -3510,7 +3531,7 @@ test_39p() { run_test 39p "remote directory cached attributes updated after create ========" -test_39p() { # LU-8041 +test_39q() { # LU-8041 local testdir=$DIR/$tdir mkdir -p $testdir multiop_bg_pause $testdir D_c || error "multiop failed" @@ -3520,7 +3541,7 @@ test_39p() { # LU-8041 local atime=$(stat -c %X $testdir) [ "$atime" -ne 0 ] || error "atime is zero" } -run_test 39p "close won't zero out atime" +run_test 39q "close won't zero out atime" test_40() { dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 @@ -4024,6 +4045,7 @@ run_test 48e "Access to recreated parent subdir (should return errors)" test_49() { # LU-1030 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return # get ost1 size - lustre-OST0000 ost1_size=$(do_facet ost1 $LFS df | grep ${ost1_svc} | awk '{ print $4 }') @@ -4326,6 +4348,7 @@ find_loop_dev() { } cleanup_54c() { + local rc=0 loopdev="$DIR/loop54c" trap 0 @@ -4436,6 +4459,26 @@ test_56a() { # was test_56 } run_test 56a "check $GETSTRIPE" +test_56b() { + test_mkdir $DIR/$tdir + NUMDIRS=3 + for i in $(seq 1 $NUMDIRS); do + test_mkdir $DIR/$tdir/dir$i + done + + # test lfs getdirstripe default mode is non-recursion, which is + # different from lfs getstripe + dircnt=$($LFS getdirstripe $DIR/$tdir | grep -c lmv_stripe_count) + [[ $dircnt -eq 1 ]] || + error "$LFS getdirstripe: found $dircnt, not 1" + dircnt=$($LFS getdirstripe --recursive $DIR/$tdir | + grep -c lmv_stripe_count) + [[ $dircnt -eq $((NUMDIRS + 1)) ]] || + error "$LFS getdirstripe --recursive: found $dircnt, \ + not $((NUMDIRS + 1))" +} +run_test 56b "check $LFS getdirstripe" + NUMFILES=3 NUMDIRS=3 setup_56() { @@ -5304,14 +5347,14 @@ test_60d() { # verify "lctl mark" is even working" MESSAGE="test message ID $RANDOM $$" - $LCTL mark "$HOSTNAME $MESSAGE" || error "$LCTL mark failed" + $LCTL mark "$MESSAGE" || error "$LCTL mark failed" dmesg | grep -q "$MESSAGE" || error "didn't find debug marker in log" lctl set_param printk=0 || error "set lnet.printk failed" lctl get_param -n printk | grep emerg || error "lnet.printk dropped emerg" MESSAGE="new test message ID $RANDOM $$" # Assume here that libcfs_debug_mark_buffer() uses D_WARNING - $LCTL mark "$HOSTNAME $MESSAGE" || error "$LCTL mark failed" + $LCTL mark "$MESSAGE" || error "$LCTL mark failed" dmesg | grep -q "$MESSAGE" && error "D_WARNING wasn't masked" || true lctl set_param -n printk="$SAVEPRINTK" @@ -5320,6 +5363,7 @@ run_test 60d "test printk console message masking" test_60e() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return touch $DIR/$tfile #define OBD_FAIL_MDS_LLOG_CREATE_FAILED2 0x15b do_facet mds1 lctl set_param fail_loc=0x15b @@ -5552,34 +5596,59 @@ test_65k() { # bug11679 [[ $OSTCOUNT -lt 2 ]] && skip_env "too few OSTs" && return remote_mds_nodsh && skip "remote MDS with nodsh" && return - echo "Check OST status: " - local MDS_OSCS=`do_facet $SINGLEMDS lctl dl | - awk '/[oO][sS][cC].*md[ts]/ { print $4 }'` + local disable_precreate=true + [ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.8.54) ] && + disable_precreate=false - for OSC in $MDS_OSCS; do - echo $OSC "is activate" - do_facet $SINGLEMDS lctl --device %$OSC activate - done + echo "Check OST status: " + local MDS_OSCS=$(do_facet $SINGLEMDS lctl dl | + awk '/[oO][sS][cC].*md[ts]/ { print $4 }') - mkdir -p $DIR/$tdir - for INACTIVE_OSC in $MDS_OSCS; do - echo "Deactivate: " $INACTIVE_OSC - do_facet $SINGLEMDS lctl --device %$INACTIVE_OSC deactivate - for STRIPE_OSC in $MDS_OSCS; do - OST=`osc_to_ost $STRIPE_OSC` - IDX=`do_facet $SINGLEMDS lctl get_param -n lov.*md*.target_obd | - awk -F: /$OST/'{ print $1 }' | head -n 1` - - [ -f $DIR/$tdir/$IDX ] && continue - echo "$SETSTRIPE -i $IDX -c 1 $DIR/$tdir/$IDX" - $SETSTRIPE -i $IDX -c 1 $DIR/$tdir/$IDX - RC=$? - [ $RC -ne 0 ] && error "setstripe should have succeeded" - done - rm -f $DIR/$tdir/* - echo $INACTIVE_OSC "is Activate." - do_facet $SINGLEMDS lctl --device %$INACTIVE_OSC activate - done + for OSC in $MDS_OSCS; do + echo $OSC "is active" + do_facet $SINGLEMDS lctl --device %$OSC activate + done + + for INACTIVE_OSC in $MDS_OSCS; do + local ost=$(osc_to_ost $INACTIVE_OSC) + local ostnum=$(do_facet $SINGLEMDS lctl get_param -n \ + lov.*md*.target_obd | + awk -F: /$ost/'{ print $1 }' | head -n 1) + + mkdir -p $DIR/$tdir + $SETSTRIPE -i $ostnum -c 1 $DIR/$tdir + createmany -o $DIR/$tdir/$tfile.$ostnum. 1000 + + echo "Deactivate: " $INACTIVE_OSC + do_facet $SINGLEMDS lctl --device %$INACTIVE_OSC deactivate + + local count=$(do_facet $SINGLEMDS "lctl get_param -n \ + osp.$ost*MDT0000.create_count") + local max_count=$(do_facet $SINGLEMDS "lctl get_param -n \ + osp.$ost*MDT0000.max_create_count") + $disable_precreate && + do_facet $SINGLEMDS "lctl set_param -n \ + osp.$ost*MDT0000.max_create_count=0" + + for idx in $(seq 0 $((OSTCOUNT - 1))); do + [ -f $DIR/$tdir/$idx ] && continue + echo "$SETSTRIPE -i $idx -c 1 $DIR/$tdir/$idx" + $SETSTRIPE -i $idx -c 1 $DIR/$tdir/$idx || + error "setstripe $idx should succeed" + rm -f $DIR/$tdir/$idx || error "rm $idx failed" + done + unlinkmany $DIR/$tdir/$tfile.$ostnum. 1000 + rmdir $DIR/$tdir + + do_facet $SINGLEMDS "lctl set_param -n \ + osp.$ost*MDT0000.max_create_count=$max_count" + do_facet $SINGLEMDS "lctl set_param -n \ + osp.$ost*MDT0000.create_count=$count" + do_facet $SINGLEMDS lctl --device %$INACTIVE_OSC activate + echo $INACTIVE_OSC "is Activate" + + wait_osc_import_state mds ost$ostnum FULL + done } run_test 65k "validate manual striping works properly with deactivated OSCs" @@ -5591,6 +5660,12 @@ test_65l() { # bug 12836 } run_test 65l "lfs find on -1 stripe dir ========================" +test_65m() { + $RUNAS $SETSTRIPE -c 2 $MOUNT && error "setstripe should fail" + true +} +run_test 65m "normal user can't set filesystem default stripe" + # bug 2543 - update blocks count on client test_66() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return @@ -5876,8 +5951,8 @@ set_inode_slab_tunables() { test_76() { # Now for bug 20433, added originally in bug 1443 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return - local SLAB_SETTINGS=`get_inode_slab_tunables` - local CPUS=`getconf _NPROCESSORS_ONLN` + local SLAB_SETTINGS=$(get_inode_slab_tunables) + local CPUS=$(getconf _NPROCESSORS_ONLN 2>/dev/null) # we cannot set limit below 1 which means 1 inode in each # per-cpu cache is still allowed set_inode_slab_tunables "1 1 0" @@ -5894,7 +5969,7 @@ test_76() { # Now for bug 20433, added originally in bug 1443 AFTER_INODES=$(num_inodes) echo "after inodes: $AFTER_INODES" local wait=0 - while [[ $((AFTER_INODES-1*CPUS)) -gt $BEFORE_INODES ]]; do + while [[ $((AFTER_INODES-1*${CPUS:-1})) -gt $BEFORE_INODES ]]; do sleep 2 AFTER_INODES=$(num_inodes) wait=$((wait+2)) @@ -6080,6 +6155,11 @@ run_test 77j "client only supporting ADLER32" rm -f $F77_TMP unset F77_TMP +cleanup_test_78() { + trap 0 + rm -f $DIR/$tfile +} + test_78() { # bug 10901 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_ost || { skip_env "local OST" && return; } @@ -6109,6 +6189,8 @@ test_78() { # bug 10901 [[ $SMALLESTOST -lt 10240 ]] && skip "too small OSTSIZE, useless to run large O_DIRECT test" && return 0 + trap cleanup_test_78 EXIT + [[ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024 - 80)) ]] && F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024 - 80)) @@ -6121,7 +6203,7 @@ test_78() { # bug 10901 $DIRECTIO rdwr $DIR/$tfile 0 $FSIZE 1048576||error "rdwr failed" done - rm -f $DIR/$tfile + cleanup_test_78 } run_test 78 "handle large O_DIRECT writes correctly ============" @@ -7198,6 +7280,7 @@ test_103a() { [ -z "$(which setfacl 2>/dev/null)" ] && skip_env "could not find setfacl" && return $GSS && skip "could not run under gss" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return gpasswd -a daemon bin # LU-5641 do_facet $SINGLEMDS gpasswd -a daemon bin # LU-5641 @@ -7258,6 +7341,7 @@ test_103a() { run_test 103a "acl test =========================================" test_103b() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return local noacl=false local MDT_DEV=$(mdsdevname ${SINGLEMDS//mds/}) local mountopts=$MDS_MOUNT_OPTS @@ -8117,6 +8201,7 @@ run_test 119d "The DIO path should try to send a new rpc once one is completed" test_120a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return test_mkdir -p $DIR/$tdir [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 @@ -8148,6 +8233,7 @@ run_test 120a "Early Lock Cancel: mkdir test" test_120b() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return test_mkdir $DIR/$tdir [ -z "$(lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel)" ] && \ skip "no early lock cancel on server" && return 0 @@ -8175,6 +8261,7 @@ run_test 120b "Early Lock Cancel: create test" test_120c() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return test_mkdir -c1 $DIR/$tdir [ -z "$(lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel)" ] && \ skip "no early lock cancel on server" && return 0 @@ -8205,6 +8292,7 @@ run_test 120c "Early Lock Cancel: link test" test_120d() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return test_mkdir -p -c1 $DIR/$tdir [ -z "$(lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel)" ] && \ skip "no early lock cancel on server" && return 0 @@ -8235,6 +8323,7 @@ test_120e() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return ! $($LCTL get_param -n mdc.*.connect_flags | grep -q early_lock_can) && skip "no early lock cancel on server" && return 0 + remote_mds_nodsh && skip "remote MDS with nodsh" && return local dlmtrace_set=false test_mkdir -p -c1 $DIR/$tdir @@ -8276,6 +8365,7 @@ test_120f() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 + remote_mds_nodsh && skip "remote MDS with nodsh" && return test_mkdir -p -c1 $DIR/$tdir lru_resize_disable mdc lru_resize_disable osc @@ -8314,6 +8404,7 @@ test_120g() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return [ -z "`lctl get_param -n mdc.*.connect_flags | grep early_lock_cancel`" ] && \ skip "no early lock cancel on server" && return 0 + remote_mds_nodsh && skip "remote MDS with nodsh" && return lru_resize_disable mdc lru_resize_disable osc count=10000 @@ -8830,9 +8921,9 @@ test_129() { return fi remote_mds_nodsh && skip "remote MDS with nodsh" && return - ENOSPC=28 - EFBIG=27 - has_warning=0 + local ENOSPC=28 + local EFBIG=27 + local has_warning=0 rm -rf $DIR/$tdir mkdir -p $DIR/$tdir @@ -8864,7 +8955,7 @@ test_129() { check_mds_dmesg '"has reached"' || error_exit "has reached message should be output" - [ $has_warning ] || + [ $has_warning -eq 0 ] && error_exit "warning message should be output" I=$(stat -c%s "$DIR/$tdir") @@ -9281,12 +9372,14 @@ test_133a() { # verify mdt stats first. mkdir ${testdir} || error "mkdir failed" check_stats $SINGLEMDS "mkdir" 1 - touch ${testdir}/${tfile} || "touch failed" + touch ${testdir}/${tfile} || error "touch failed" check_stats $SINGLEMDS "open" 1 check_stats $SINGLEMDS "close" 1 - mknod ${testdir}/${tfile}-pipe p || "mknod failed" - check_stats $SINGLEMDS "mknod" 1 - rm -f ${testdir}/${tfile}-pipe || "pipe remove failed" + [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.8.54) ] && { + mknod ${testdir}/${tfile}-pipe p || error "mknod failed" + check_stats $SINGLEMDS "mknod" 2 + } + rm -f ${testdir}/${tfile}-pipe || error "pipe remove failed" check_stats $SINGLEMDS "unlink" 1 rm -f ${testdir}/${tfile} || error "file remove failed" check_stats $SINGLEMDS "unlink" 2 @@ -9315,7 +9408,7 @@ test_133b() { remote_mds_nodsh && skip "remote MDS with nodsh" && return local testdir=$DIR/${tdir}/stats_testdir mkdir -p ${testdir} || error "mkdir failed" - touch ${testdir}/${tfile} || "touch failed" + touch ${testdir}/${tfile} || error "touch failed" cancel_lru_locks mdc # clear stats. @@ -9579,6 +9672,8 @@ test_133f() { run_test 133f "Check for LBUGs/Oopses/unreadable files in /proc" test_133g() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return # Second verifying writability. find $proc_dirs \ -type f \ @@ -9612,6 +9707,7 @@ test_133g() { run_test 133g "Check for Oopses on bad io area writes/reads in /proc" test_134a() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] && skip "Need MDS version at least 2.7.54" && return @@ -9620,7 +9716,7 @@ test_134a() { local nsdir="ldlm.namespaces.*-MDT0000-mdc-*" local unused=$($LCTL get_param -n $nsdir.lock_unused_count) - [ $unused -eq 0 ] || "$unused locks are not cleared" + [ $unused -eq 0 ] || error "$unused locks are not cleared" local nr=1000 createmany -o $DIR/$tdir/f $nr || @@ -9647,6 +9743,7 @@ test_134a() { run_test 134a "Server reclaims locks when reaching lock_reclaim_threshold" test_134b() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.54) ]] && skip "Need MDS version at least 2.7.54" && return @@ -9688,9 +9785,9 @@ run_test 134b "Server rejects lock request when reaching lock_limit_mb" test_140() { #bug-17379 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return - test_mkdir -p $DIR/$tdir || error "Creating dir $DIR/$tdir" - cd $DIR/$tdir || error "Changing to $DIR/$tdir" - cp /usr/bin/stat . || error "Copying stat to $DIR/$tdir" + test_mkdir -p $DIR/$tdir || error "Creating dir $DIR/$tdir" + cd $DIR/$tdir || error "Changing to $DIR/$tdir" + cp $(which stat) . || error "Copying stat to $DIR/$tdir" # VFS limits max symlink depth to 5(4KSTACK) or 7(8KSTACK) or 8 # For kernel > 3.5, bellow only tests consecutive symlink (MAX 40) @@ -10137,6 +10234,7 @@ test_154c() { run_test 154c "lfs path2fid and fid2path multiple arguments" test_154d() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.53) ]] && skip "Need MDS version at least 2.5.53" && return @@ -10325,67 +10423,114 @@ test_155_big_load() { true } +save_writethrough() { + local facets=$(get_facets OST) + + save_lustre_params $facets "obdfilter.*.writethrough_cache_enable" > $1 + save_lustre_params $facets "osd-*.*.writethrough_cache_enable" >> $1 +} + test_155a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read on set_cache writethrough on test_155_small_load + restore_lustre_params < $p + rm -f $p } run_test 155a "Verify small file correctness: read cache:on write_cache:on" test_155b() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read on set_cache writethrough off test_155_small_load + restore_lustre_params < $p + rm -f $p } run_test 155b "Verify small file correctness: read cache:on write_cache:off" test_155c() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read off set_cache writethrough on test_155_small_load + restore_lustre_params < $p + rm -f $p } run_test 155c "Verify small file correctness: read cache:off write_cache:on" test_155d() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read off set_cache writethrough off test_155_small_load + restore_lustre_params < $p + rm -f $p } run_test 155d "Verify small file correctness: read cache:off write_cache:off" test_155e() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read on set_cache writethrough on test_155_big_load + restore_lustre_params < $p + rm -f $p } run_test 155e "Verify big file correctness: read cache:on write_cache:on" test_155f() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read on set_cache writethrough off test_155_big_load + restore_lustre_params < $p + rm -f $p } run_test 155f "Verify big file correctness: read cache:on write_cache:off" test_155g() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read off set_cache writethrough on test_155_big_load + restore_lustre_params < $p + rm -f $p } run_test 155g "Verify big file correctness: read cache:off write_cache:on" test_155h() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read off set_cache writethrough off test_155_big_load + restore_lustre_params < $p + rm -f $p } run_test 155h "Verify big file correctness: read cache:off write_cache:off" @@ -10396,12 +10541,14 @@ test_156() { local BEFORE local AFTER local file="$DIR/$tfile" + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" [ "$(facet_fstype ost1)" = "zfs" -a \ $(lustre_version_code ost1 -lt $(version_code 2.6.93)) ] && skip "LU-1956/LU-2261: stats not implemented on OSD ZFS" && return + save_writethrough $p roc_hit_init log "Turn on read and write cache" @@ -10530,10 +10677,18 @@ test_156() { fi rm -f $file + restore_lustre_params < $p + rm -f $p } run_test 156 "Verification of tunables" #Changelogs +cleanup_changelog () { + trap 0 + echo "Deregistering changelog client $CL_USER" + do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $CL_USER +} + err17935 () { if [[ $MDSCOUNT -gt 1 ]]; then error_ignore bz17935 $* @@ -10574,10 +10729,12 @@ test_160a() { local CL_USERS="mdd.$MDT0.changelog_users" local GET_CL_USERS="do_facet $SINGLEMDS $LCTL get_param -n $CL_USERS" - USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_register -n) - echo "Registered as changelog user $USER" - $GET_CL_USERS | grep -q $USER || - error "User $USER not found in changelog_users" + CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + changelog_register -n) + echo "Registered as changelog user $CL_USER" + trap cleanup_changelog EXIT + $GET_CL_USERS | grep -q $CL_USER || + error "User $CL_USER not found in changelog_users" # change something test_mkdir -p $DIR/$tdir/pics/2008/zachy @@ -10621,9 +10778,9 @@ test_160a() { [ "$fidc" == "$fidf" ] || err17935 "pfid in changelog $fidc != dir fid $fidf" - USER_REC1=$($GET_CL_USERS | awk "\$1 == \"$USER\" {print \$2}") - $LFS changelog_clear $MDT0 $USER $(($USER_REC1 + 5)) - USER_REC2=$($GET_CL_USERS | awk "\$1 == \"$USER\" {print \$2}") + USER_REC1=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}") + $LFS changelog_clear $MDT0 $CL_USER $(($USER_REC1 + 5)) + USER_REC2=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}") echo "verifying user clear: $(( $USER_REC1 + 5 )) == $USER_REC2" [ $USER_REC2 == $(($USER_REC1 + 5)) ] || err17935 "user index expected $(($USER_REC1 + 5)) is $USER_REC2" @@ -10638,7 +10795,7 @@ test_160a() { # LU-3446 changelog index reset on MDT restart local MDT_DEV=$(mdsdevname ${SINGLEMDS//mds/}) CUR_REC1=$($GET_CL_USERS | head -n1 | cut -f3 -d' ') - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 stop $SINGLEMDS || error "Fail to stop MDT." start $SINGLEMDS $MDT_DEV $MDS_MOUNT_OPTS || error "Fail to start MDT." CUR_REC2=$($GET_CL_USERS | head -n1 | cut -f3 -d' ') @@ -10647,19 +10804,19 @@ test_160a() { err17935 "current index should be $CUR_REC1 is $CUR_REC2" echo "verifying user deregister" - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $USER - $GET_CL_USERS | grep -q $USER && - error "User $USER still in changelog_users" + cleanup_changelog + $GET_CL_USERS | grep -q $CL_USER && + error "User $CL_USER still in changelog_users" USERS=$(( $($GET_CL_USERS | wc -l) - 2 )) - if [ $USERS -eq 0 ]; then + if [ $CL_USER -eq 0 ]; then LAST_REC1=$($GET_CL_USERS | head -n1 | cut -f3 -d' ') touch $DIR/$tdir/chloe LAST_REC2=$($GET_CL_USERS | head -n1 | cut -f3 -d' ') echo "verify changelogs are off: $LAST_REC1 == $LAST_REC2" [ $LAST_REC1 == $LAST_REC2 ] || error "changelogs not off" else - echo "$USERS other changelog users; can't verify off" + echo "$CL_USER other changelog users; can't verify off" fi } run_test 160a "changelog sanity" @@ -10672,10 +10829,12 @@ test_160b() { # LU-3587 local CL_USERS="mdd.$MDT0.changelog_users" local GET_CL_USERS="do_facet $SINGLEMDS $LCTL get_param -n $CL_USERS" - USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_register -n) - echo "Registered as changelog user $USER" - $GET_CL_USERS | grep -q $USER || - error "User $USER not found in changelog_users" + CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + changelog_register -n) + echo "Registered as changelog user $CL_USER" + trap cleanup_changelog EXIT + $GET_CL_USERS | grep -q $CL_USER || + error "User $CL_USER not found in changelog_users" local LONGNAME1=$(str_repeat a 255) local LONGNAME2=$(str_repeat b 255) @@ -10687,11 +10846,8 @@ test_160b() { # LU-3587 mv $LONGNAME1 $LONGNAME2 $LFS changelog $MDT0 | grep RENME - - echo "deregistering $USER" - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $USER - rm -f $LONGNAME2 + cleanup_changelog } run_test 160b "Verify that very long rename doesn't crash in changelog" @@ -10708,8 +10864,9 @@ test_160c() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return # Registration step - local USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ changelog_register -n) + trap cleanup_changelog EXIT rm -rf $DIR/$tdir mkdir -p $DIR/$tdir @@ -10721,11 +10878,10 @@ test_160c() { $LFS changelog $MDT0 TRUNCS=$($LFS changelog $MDT0 | tail -5 | grep -c "TRUNC") [ $TRUNCS -eq 1 ] || err17935 "TRUNC changelog mask count $TRUNCS != 1" - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 # Deregistration step - echo "deregistering $USER" - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $USER + cleanup_changelog } run_test 160c "verify that changelog log catch the truncate event" @@ -10741,21 +10897,22 @@ test_160d() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return # Registration step - local USER=$(do_facet mds1 $LCTL --device $MDT0 \ + CL_USER=$(do_facet mds1 $LCTL --device $MDT0 \ changelog_register -n) + trap cleanup_changelog EXIT mkdir -p $DIR/$tdir/migrate_dir - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 $LFS migrate -m 1 $DIR/$tdir/migrate_dir || error "migrate fails" $LFS changelog $MDT0 MIGRATES=$($LFS changelog $MDT0 | tail -5 | grep -c "MIGRT") - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 [ $MIGRATES -eq 1 ] || error "MIGRATE changelog mask count $MIGRATES != 1" # Deregistration step - do_facet mds1 $LCTL --device $MDT0 changelog_deregister $USER + cleanup_changelog } run_test 160d "verify that changelog log catch the migrate event" @@ -10862,8 +11019,10 @@ test_161c() { # define CLF_RENAME_LAST 0x0001 # rename overwrite a target having nlink = 1 (changelog flag 0x1) - local USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ changelog_register -n) + + trap cleanup_changelog EXIT rm -rf $DIR/$tdir mkdir -p $DIR/$tdir touch $DIR/$tdir/foo_161c @@ -10872,10 +11031,8 @@ test_161c() { $LFS changelog $MDT0 | grep RENME local flags=$($LFS changelog $MDT0 | grep RENME | tail -1 | \ cut -f5 -d' ') - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 if [ x$flags != "x0x1" ]; then - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister \ - $USER error "flag $flags is not 0x1" fi echo "rename overwrite a target having nlink = 1," \ @@ -10888,10 +11045,8 @@ test_161c() { mv -f $DIR/$tdir/foo_161c $DIR/$tdir/bar_161c $LFS changelog $MDT0 | grep RENME flags=$($LFS changelog $MDT0 | grep RENME | tail -1 | cut -f5 -d' ') - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 if [ x$flags != "x0x0" ]; then - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister \ - $USER error "flag $flags is not 0x0" fi echo "rename overwrite a target having nlink > 1," \ @@ -10902,10 +11057,8 @@ test_161c() { mv -f $DIR/$tdir/foo_161c $DIR/$tdir/foo2_161c $LFS changelog $MDT0 | grep RENME flags=$($LFS changelog $MDT0 | grep RENME | tail -1 | cut -f5 -d' ') - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 if [ x$flags != "x0x0" ]; then - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister \ - $USER error "flag $flags is not 0x0" fi echo "rename doesn't overwrite a target," \ @@ -10916,10 +11069,8 @@ test_161c() { rm -f $DIR/$tdir/foo2_161c $LFS changelog $MDT0 | grep UNLNK flags=$($LFS changelog $MDT0 | grep UNLNK | tail -1 | cut -f5 -d' ') - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 if [ x$flags != "x0x1" ]; then - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister \ - $USER error "flag $flags is not 0x1" fi echo "unlink a file having nlink = 1," \ @@ -10930,15 +11081,13 @@ test_161c() { rm -f $DIR/$tdir/foobar_161c $LFS changelog $MDT0 | grep UNLNK flags=$($LFS changelog $MDT0 | grep UNLNK | tail -1 | cut -f5 -d' ') - $LFS changelog_clear $MDT0 $USER 0 + $LFS changelog_clear $MDT0 $CL_USER 0 if [ x$flags != "x0x0" ]; then - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister \ - $USER error "flag $flags is not 0x0" fi echo "unlink a file having nlink > 1," \ "changelog record has flags of $flags" - do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $USER + cleanup_changelog } run_test 161c "check CL_RENME[UNLINK] changelog record flags" @@ -11426,6 +11575,8 @@ test_184b() { run_test 184b "Forbidden layout swap (will generate errors)" test_184c() { + local cmpn_arg=$(cmp -n 2>&1 | grep "invalid option") + [ -n "$cmpn_arg" ] && skip_env "cmp does not support -n" && return check_swap_layouts_support && return 0 local dir0=$DIR/$tdir/$testnum @@ -11858,10 +12009,11 @@ jobstats_set() { } cleanup_205() { + trap 0 do_facet $SINGLEMDS \ $LCTL set_param mdt.*.job_cleanup_interval=$OLD_INTERVAL [ $OLD_JOBENV != $JOBENV ] && jobstats_set $OLD_JOBENV - do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $CL_USER + cleanup_changelog } test_205() { # Job stats @@ -12425,16 +12577,16 @@ run_test 222a "AGL for ls should not trigger CLIO lock failure ================" test_222b () { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return - rm -rf $DIR/$tdir - test_mkdir -p $DIR/$tdir - $SETSTRIPE -c 1 -i 0 $DIR/$tdir - createmany -o $DIR/$tdir/$tfile 10 - cancel_lru_locks mdc - cancel_lru_locks osc - #define OBD_FAIL_LDLM_AGL_DELAY 0x31a - $LCTL set_param fail_loc=0x31a - rm -r $DIR/$tdir || "AGL for rmdir failed" - $LCTL set_param fail_loc=0 + rm -rf $DIR/$tdir + test_mkdir -p $DIR/$tdir + $SETSTRIPE -c 1 -i 0 $DIR/$tdir + createmany -o $DIR/$tdir/$tfile 10 + cancel_lru_locks mdc + cancel_lru_locks osc + #define OBD_FAIL_LDLM_AGL_DELAY 0x31a + $LCTL set_param fail_loc=0x31a + rm -r $DIR/$tdir || error "AGL for rmdir failed" + $LCTL set_param fail_loc=0 } run_test 222b "AGL for rmdir should not trigger CLIO lock failure =============" @@ -12478,6 +12630,12 @@ run_test 224b "Don't panic on bulk IO failure" test_224c() { # LU-6441 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return + + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache writethrough on + local pages_per_rpc=$($LCTL get_param \ osc.*.max_pages_per_rpc) local at_max=$($LCTL get_param -n at_max) @@ -12506,6 +12664,8 @@ test_224c() { # LU-6441 $timeout || error "conf_param timeout=$timeout failed" $LCTL set_param -n $pages_per_rpc + restore_lustre_params < $p + rm -f $p } run_test 224c "Don't hang if one of md lost during large bulk RPC" @@ -13159,7 +13319,7 @@ test_230f() { # a should be migrated to MDT1, since no other links on MDT0 $LFS migrate -m 1 $DIR/$tdir/migrate_dir || - error "migrate dir fails" + error "#1 migrate dir fails" mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir) [ $mdt_index == 1 ] || error "migrate_dir is not on MDT1" mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir/a) @@ -13167,12 +13327,12 @@ test_230f() { # a should stay on MDT1, because it is a mulitple link file $LFS migrate -m 0 $DIR/$tdir/migrate_dir || - error "migrate dir fails" + error "#2 migrate dir fails" mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir/a) [ $mdt_index == 1 ] || error "a is not on MDT1" $LFS migrate -m 1 $DIR/$tdir/migrate_dir || - error "migrate dir fails" + error "#3 migrate dir fails" a_fid=$($LFS path2fid $DIR/$tdir/migrate_dir/a) ln_fid=$($LFS path2fid $DIR/$tdir/other_dir/ln1) @@ -13183,7 +13343,7 @@ test_230f() { # a should be migrated to MDT0, since no other links on MDT1 $LFS migrate -m 0 $DIR/$tdir/migrate_dir || - error "migrate dir fails" + error "#4 migrate dir fails" mdt_index=$($LFS getstripe -M $DIR/$tdir/migrate_dir/a) [ $mdt_index == 0 ] || error "a is not on MDT0" @@ -13465,6 +13625,7 @@ test_239() { run_test 239 "osp_sync test" test_239a() { #LU-5297 + remote_mds_nodsh && skip "remote MDS with nodsh" && return touch $DIR/$tfile #define OBD_FAIL_OSP_CHECK_INVALID_REC 0x2100 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x2100 @@ -13474,6 +13635,7 @@ test_239a() { #LU-5297 run_test 239a "process invalid osp sync record correctly" test_239b() { #LU-5297 + remote_mds_nodsh && skip "remote MDS with nodsh" && return touch $DIR/$tfile1 #define OBD_FAIL_OSP_CHECK_ENOMEM 0x2101 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x2101 @@ -13488,6 +13650,7 @@ run_test 239b "process osp sync record with ENOMEM error correctly" test_240() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return mkdir -p $DIR/$tdir @@ -13542,6 +13705,7 @@ test_241b() { run_test 241b "dio vs dio" test_242() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return mkdir -p $DIR/$tdir touch $DIR/$tdir/$tfile @@ -13597,6 +13761,7 @@ test_245() { run_test 245 "check mdc connection flag/data: multiple modify RPCs" test_246() { # LU-7371 + remote_ost_nodsh && skip "remote OST with nodsh" && return [ $(lustre_version_code ost1) -lt $(version_code 2.7.62) ] && skip "Need OST version >= 2.7.62" && return 0 do_facet ost1 $LCTL set_param fail_val=4095 @@ -13696,33 +13861,34 @@ run_test 247e "mount .. as fileset" test_248() { local my_error=error - # This test case is time sensitive and maloo uses kvm to run auto test. + local fast_read_sav=$($LCTL get_param -n llite.*.fast_read 2>/dev/null) + [ -z "$fast_read_sav" ] && skip "no fast read support" && return + + # This test case is time sensitive and Maloo uses KVM to run autotest. # Therefore the complete time of I/O task is unreliable and depends on - # the work load on the host machine when the task is running. - which virt-what 2> /dev/null && [ "$(virt-what)" != "kvm" ] || - { echo "no virt-what installed or running in kvm; ignore error"; - my_error="error_ignore env=kvm"; } + # the workload on the host machine when the task is running. + local virt=$(running_in_vm) + [ -n "$virt" ] && echo "running in VM '$virt', ignore error" && + my_error="error_ignore env=$virt" # create a large file for fast read verification - dd if=/dev/zero of=$DIR/$tfile bs=128M count=1 > /dev/null 2>&1 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=128 > /dev/null 2>&1 # make sure the file is created correctly $CHECKSTAT -s $((128*1024*1024)) $DIR/$tfile || { rm -f $DIR/$tfile; skip "file creation error" && return; } - local saved_fast_read=$($LCTL get_param -n llite.*.fast_read) - echo "Test 1: verify that fast read is 4 times faster on cache read" # small read with fast read enabled $LCTL set_param -n llite.*.fast_read=1 - local t_fast=$(eval time -p dd if=$DIR/$tfile of=/dev/null bs=4k 2>&1 | - awk '/real/ { print $2 }') + local t_fast=$(dd if=$DIR/$tfile of=/dev/null bs=4k 2>&1 | + awk '/copied/ { print $6 }') # small read with fast read disabled $LCTL set_param -n llite.*.fast_read=0 - local t_slow=$(eval time -p dd if=$DIR/$tfile of=/dev/null bs=4k 2>&1 | - awk '/real/ { print $2 }') + local t_slow=$(dd if=$DIR/$tfile of=/dev/null bs=4k 2>&1 | + awk '/copied/ { print $6 }') # verify that fast read is 4 times faster for cache read [ $(bc <<< "4 * $t_fast < $t_slow") -eq 1 ] || @@ -13733,19 +13899,19 @@ test_248() { # 1k non-cache read cancel_lru_locks osc - local t_1k=$(eval time -p dd if=$DIR/$tfile of=/dev/null bs=1k 2>&1 | - awk '/real/ { print $2 }') + local t_1k=$(dd if=$DIR/$tfile of=/dev/null bs=1k 2>&1 | + awk '/copied/ { print $6 }') # 1M non-cache read cancel_lru_locks osc - local t_1m=$(eval time -p dd if=$DIR/$tfile of=/dev/null bs=1M 2>&1 | - awk '/real/ { print $2 }') + local t_1m=$(dd if=$DIR/$tfile of=/dev/null bs=1k 2>&1 | + awk '/copied/ { print $6 }') # verify that big IO is not 4 times faster than small IO [ $(bc <<< "4 * $t_1k >= $t_1m") -eq 1 ] || $my_error "bigger IO is way too fast: $t_1k vs $t_1m" - $LCTL set_param -n llite.*.fast_read=$saved_fast_read + $LCTL set_param -n llite.*.fast_read=$fast_read_sav rm -f $DIR/$tfile } run_test 248 "fast read verification" @@ -13802,6 +13968,8 @@ test_252() { local num local gen + remote_mds_nodsh && skip "remote MDS with nodsh" && return + remote_ost_nodsh && skip "remote OST with nodsh" && return if [ "$(facet_fstype ost1)" != "ldiskfs" -o \ "$(facet_fstype mds1)" != "ldiskfs" ]; then skip "can only run lr_reader on ldiskfs target" @@ -13886,6 +14054,175 @@ test_254() { } run_test 254 "Check changelog size" +ladvise_no_type() +{ + local type=$1 + local file=$2 + + lfs ladvise -a invalid $file 2>&1 | grep "Valid types" | + awk -F: '{print $2}' | grep $type > /dev/null + if [ $? -ne 0 ]; then + return 0 + fi + return 1 +} + +ladvise_no_ioctl() +{ + local file=$1 + + lfs ladvise -a willread $file > /dev/null 2>&1 + if [ $? -eq 0 ]; then + return 1 + fi + + lfs ladvise -a willread $file 2>&1 | + grep "Inappropriate ioctl for device" > /dev/null + if [ $? -eq 0 ]; then + return 0 + fi + return 1 +} + +ladvise_willread_performance() +{ + local repeat=10 + local average_cache=0 + local average_ladvise=0 + for ((i = 1; i <= $repeat; i++)); do + echo "Iter $i/$repeat: reading without willread hint" + cancel_lru_locks osc + do_nodes $(comma_list $(osts_nodes)) \ + "echo 3 > /proc/sys/vm/drop_caches" + local speed_origin=$($READS -f $DIR/$tfile -s $size \ + -b 4096 -n $((size / 4096)) -t 60 | + sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##') + + echo "Iter $i/$repeat: Reading again without willread hint" + cancel_lru_locks osc + local speed_cache=$($READS -f $DIR/$tfile -s $size \ + -b 4096 -n $((size / 4096)) -t 60 | + sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##') + + echo "Iter $i/$repeat: reading with willread hint" + cancel_lru_locks osc + do_nodes $(comma_list $(osts_nodes)) \ + "echo 3 > /proc/sys/vm/drop_caches" + lfs ladvise -a willread $DIR/$tfile || + error "Ladvise failed" + local speed_ladvise=$($READS -f $DIR/$tfile -s $size \ + -b 4096 -n $((size / 4096)) -t 60 | + sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##') + + local cache_speedup=$(echo "scale=2; \ + ($speed_cache-$speed_origin)/$speed_origin*100" | bc) + cache_speedup=$(echo ${cache_speedup%.*}) + echo "Iter $i/$repeat: cache speedup: $cache_speedup%" + average_cache=$((average_cache + cache_speedup)) + + local ladvise_speedup=$(echo "scale=2; \ + ($speed_ladvise-$speed_origin)/$speed_origin*100" | bc) + ladvise_speedup=$(echo ${ladvise_speedup%.*}) + echo "Iter $i/$repeat: ladvise speedup: $ladvise_speedup%" + average_ladvise=$((average_ladvise + ladvise_speedup)) + done + average_cache=$((average_cache / repeat)) + average_ladvise=$((average_ladvise / repeat)) + + if [ $average_cache -lt 20 ]; then + echo "Speedup with cache is less than 20% ($average_cache%),"\ + "skipping check of speedup with willread:"\ + "$average_ladvise%" + return 0 + fi + + local lowest_speedup=$((average_cache / 2)) + [ $average_ladvise -gt $lowest_speedup ] || + error "Speedup with willread is less than $lowest_speedup%,"\ + "got $average_ladvise%" + echo "Speedup with willread ladvise: $average_ladvise%" + echo "Speedup with cache: $average_cache%" +} + +test_255a() { + lfs setstripe -c -1 -i 0 $DIR/$tfile || error "$tfile failed" + + ladvise_no_type willread $DIR/$tfile && + skip "willread ladvise is not supported" && return + + ladvise_no_ioctl $DIR/$tfile && + skip "ladvise ioctl is not supported" && return + + [ $(lustre_version_code ost1) -lt $(version_code 2.8.54) ] && + skip "lustre < 2.8.54 does not support ladvise " && return + + local size_mb=100 + local size=$((size_mb * 1048576)) + dd if=/dev/zero of=$DIR/$tfile bs=1048576 count=$size_mb || + error "dd to $DIR/$tfile failed" + + lfs ladvise -a willread $DIR/$tfile || + error "Ladvise failed with no range argument" + + lfs ladvise -a willread -s 0 $DIR/$tfile || + error "Ladvise failed with no -l or -e argument" + + lfs ladvise -a willread -e 1 $DIR/$tfile || + error "Ladvise failed with only -e argument" + + lfs ladvise -a willread -l 1 $DIR/$tfile || + error "Ladvise failed with only -l argument" + + lfs ladvise -a willread -s 2 -e 1 $DIR/$tfile && + error "End offset should not be smaller than start offset" + + lfs ladvise -a willread -s 2 -e 2 $DIR/$tfile && + error "End offset should not be equal to start offset" + + lfs ladvise -a willread -s $size -l 1 $DIR/$tfile || + error "Ladvise failed with overflowing -s argument" + + lfs ladvise -a willread -s 1 -e $((size + 1)) $DIR/$tfile || + error "Ladvise failed with overflowing -e argument" + + lfs ladvise -a willread -s 1 -l $size $DIR/$tfile || + error "Ladvise failed with overflowing -l argument" + + lfs ladvise -a willread -l 1 -e 2 $DIR/$tfile && + error "Ladvise succeeded with conflicting -l and -e arguments" + + echo "Synchronous ladvise should wait" + local delay=4 +#define OBD_FAIL_OST_LADVISE_PAUSE 0x237 + do_nodes $(comma_list $(osts_nodes)) \ + $LCTL set_param fail_val=$delay fail_loc=0x237 + + local start_ts=$SECONDS + lfs ladvise -a willread $DIR/$tfile || + error "Ladvise failed with no range argument" + local end_ts=$SECONDS + local inteval_ts=$((end_ts - start_ts)) + + if [ $inteval_ts -lt $(($delay - 1)) ]; then + error "Synchronous advice didn't wait reply" + fi + + echo "Asynchronous ladvise shouldn't wait" + local start_ts=$SECONDS + lfs ladvise -a willread -b $DIR/$tfile || + error "Ladvise failed with no range argument" + local end_ts=$SECONDS + local inteval_ts=$((end_ts - start_ts)) + + if [ $inteval_ts -gt $(($delay / 2)) ]; then + error "Asynchronous advice blocked" + fi + + do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0 + ladvise_willread_performance +} +run_test 255a "check 'lfs ladvise -a willread'" + test_256() { local cl_user local cat_sl @@ -13950,6 +14287,14 @@ test_256() { } run_test 256 "Check llog delete for empty and not full state" +test_260() { +#define OBD_FAIL_MDC_CLOSE 0x806 + $LCTL set_param fail_loc=0x80000806 + touch $DIR/$tfile + +} +run_test 260 "Check mdc_close fail" + cleanup_test_300() { trap 0 umask $SAVE_UMASK @@ -14557,6 +14902,7 @@ run_test 300o "unlink big sub stripe(> 65000 subdirs)" test_300p() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + remote_mds_nodsh && skip "remote MDS with nodsh" && return mkdir -p $DIR/$tdir @@ -14666,6 +15012,154 @@ test_310c() { } run_test 310c "open-unlink remote file with multiple links" +#LU-4825 +test_311() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.54) ] && + skip "lustre < 2.8.54 does not contain LU-4825 fix" && return + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + + local old_iused=$($LFS df -i | grep OST0000 | awk '{ print $3 }') + + mkdir -p $DIR/$tdir + $SETSTRIPE -i 0 -c 1 $DIR/$tdir + createmany -o $DIR/$tdir/$tfile. 1000 + + # statfs data is not real time, let's just calculate it + old_iused=$((old_iused + 1000)) + + local count=$(do_facet $SINGLEMDS "lctl get_param -n \ + osp.*OST0000*MDT0000.create_count") + local max_count=$(do_facet $SINGLEMDS "lctl get_param -n \ + osp.*OST0000*MDT0000.max_create_count") + for idx in $(seq $MDSCOUNT); do + do_facet mds$idx "lctl set_param -n \ + osp.*OST0000*MDT000?.max_create_count=0" + done + + $SETSTRIPE -i 0 $DIR/$tdir/$tfile || error "setstripe failed" + local index=$($GETSTRIPE -i $DIR/$tdir/$tfile) + [ $index -ne 0 ] || error "$tfile stripe index is 0" + + unlinkmany $DIR/$tdir/$tfile. 1000 + + for idx in $(seq $MDSCOUNT); do + do_facet mds$idx "lctl set_param -n \ + osp.*OST0000*MDT000?.max_create_count=$max_count" + do_facet mds$idx "lctl set_param -n \ + osp.*OST0000*MDT000?.create_count=$count" + done + + local new_iused + for i in $(seq 120); do + new_iused=$($LFS df -i | grep OST0000 | awk '{ print $3 }') + [ $((old_iused - new_iused)) -gt 800 ] && break + sleep 1 + done + + echo "waited $i sec, old Iused $old_iused, new Iused $new_iused" + [ $((old_iused - new_iused)) -gt 800 ] || + error "objs not destroyed after unlink" +} +run_test 311 "disable OSP precreate, and unlink should destroy objs" + +zfs_oid_to_objid() +{ + local ost=$1 + local objid=$2 + + local vdevdir=$(dirname $(facet_vdevice $ost)) + local cmd="$ZDB -e -p $vdevdir -dddd $(facet_device $ost)" + local zfs_zapid=$(do_facet $ost $cmd | + grep -w "/O/0/d$((objid%32))" -C 5 | + awk '/Object/{getline; print $1}') + local zfs_objid=$(do_facet $ost $cmd $zfs_zapid | + awk "/$objid = /"'{printf $3}') + + echo $zfs_objid +} + +zfs_object_blksz() { + local ost=$1 + local objid=$2 + + local vdevdir=$(dirname $(facet_vdevice $ost)) + local cmd="$ZDB -e -p $vdevdir -dddd $(facet_device $ost)" + local blksz=$(do_facet $ost $cmd $objid | + awk '/dblk/{getline; printf $4}') + + case "${blksz: -1}" in + k|K) blksz=$((${blksz:0:$((${#blksz} - 1))}*1024)) ;; + m|M) blksz=$((${blksz:0:$((${#blksz} - 1))}*1024*1024)) ;; + *) ;; + esac + + echo $blksz +} + +test_312() { # LU-4856 + [ $(facet_fstype ost1) = "zfs" ] || + { skip "the test only applies to zfs" && return; } + + local max_blksz=$(do_facet ost1 \ + $ZFS get -p recordsize $(facet_device ost1) | + awk '!/VALUE/{print $3}') + + # to make life a little bit easier + $LFS mkdir -c 1 -i 0 $DIR/$tdir + $LFS setstripe -c 1 -i 0 $DIR/$tdir + + local tf=$DIR/$tdir/$tfile + touch $tf + local oid=$($LFS getstripe $tf | awk '/obdidx/{getline; print $2}') + + # Get ZFS object id + local zfs_objid=$(zfs_oid_to_objid ost1 $oid) + + # block size change by sequential over write + local blksz + for ((bs=4096; bs <= max_blksz; bs <<= 2)); do + dd if=/dev/zero of=$tf bs=$bs count=1 oflag=sync conv=notrunc + + blksz=$(zfs_object_blksz ost1 $zfs_objid) + [ $blksz -eq $bs ] || error "blksz error: $blksz, expected: $bs" + done + rm -f $tf + + # block size change by sequential append write + dd if=/dev/zero of=$tf bs=4K count=1 oflag=sync conv=notrunc + oid=$($LFS getstripe $tf | awk '/obdidx/{getline; print $2}') + zfs_objid=$(zfs_oid_to_objid ost1 $oid) + + for ((count = 1; count < $((max_blksz / 4096)); count *= 2)); do + dd if=/dev/zero of=$tf bs=4K count=$count seek=$count \ + oflag=sync conv=notrunc + + blksz=$(zfs_object_blksz ost1 $zfs_objid) + blksz=$((blksz / 8192)) # in 2*4K unit + [ $blksz -eq $count ] || + error "blksz error(in 8k): $blksz, expected: $count" + done + rm -f $tf + + # random write + touch $tf + oid=$($LFS getstripe $tf | awk '/obdidx/{getline; print $2}') + zfs_objid=$(zfs_oid_to_objid ost1 $oid) + + dd if=/dev/zero of=$tf bs=8K count=1 oflag=sync conv=notrunc + blksz=$(zfs_object_blksz ost1 $zfs_objid) + [ $blksz -eq 8192 ] || error "blksz error: $blksz, expected: 8k" + + dd if=/dev/zero of=$tf bs=64K count=1 oflag=sync conv=notrunc seek=128 + blksz=$(zfs_object_blksz ost1 $zfs_objid) + [ $blksz -eq 65536 ] || error "blksz error: $blksz, expected: 64k" + + dd if=/dev/zero of=$tf bs=1M count=1 oflag=sync conv=notrunc + blksz=$(zfs_object_blksz ost1 $zfs_objid) + [ $blksz -eq 65536 ] || error "rewrite error: $blksz, expected: 64k" +} +run_test 312 "make sure ZFS adjusts its block size by write pattern" + test_400a() { # LU-1606, was conf-sanity test_74 local extra_flags='' local out=$TMP/$tfile @@ -14726,6 +15220,9 @@ test_400b() { # LU-1606, LU-5011 run_test 400b "packaged headers can be compiled" test_401a() { #LU-7437 + local printf_arg=$(find -printf 2>&1 | grep "unrecognized:") + [ -n "$printf_arg" ] && skip_env "find does not support -printf" && + return #count the number of parameters by "list_param -R" local params=$($LCTL list_param -R '*' 2>/dev/null | wc -l) #count the number of parameters by listing proc files @@ -14815,6 +15312,7 @@ test_401d() { run_test 401d "Verify 'lctl set_param' accepts values containing '='" test_402() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return $LFS setdirstripe -i 0 $DIR/$tdir || error "setdirstripe -i 0 failed" #define OBD_FAIL_MDS_FLD_LOOKUP 0x15c do_facet mds1 "lctl set_param fail_loc=0x8000015c" @@ -14853,6 +15351,7 @@ test_403() { run_test 403 "i_nlink should not drop to zero due to aliasing" test_404() { # LU-6601 + remote_mds_nodsh && skip "remote MDS with nodsh" && return local mosps=$(do_facet $SINGLEMDS $LCTL dl | awk '/osp .*-osc-MDT/ { print $4}') @@ -14890,6 +15389,107 @@ test_405() { } run_test 405 "Various layout swap lock tests" +test_406() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return + [ -n "$FILESET" ] && skip "SKIP due to FILESET set" && return + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.50) ] && + skip "Need MDS version at least 2.8.50" && return + + local def_stripenr=$($GETSTRIPE -c $MOUNT) + local def_stripe_size=$($GETSTRIPE -S $MOUNT) + local def_stripe_offset=$($GETSTRIPE -i $MOUNT) + local def_pool=$($GETSTRIPE -p $MOUNT) + + local test_pool=$TESTNAME + pool_add $test_pool || error "pool_add failed" + pool_add_targets $test_pool 0 $(($OSTCOUNT - 1)) 1 || + error "pool_add_targets failed" + + # parent set default stripe count only, child will stripe from both + # parent and fs default + $SETSTRIPE -c 1 -i 1 -S $((def_stripe_size * 2)) -p $test_pool $MOUNT || + error "setstripe $MOUNT failed" + $LFS mkdir -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed" + $SETSTRIPE -c $OSTCOUNT $DIR/$tdir || error "setstripe $tdir failed" + for i in $(seq 10); do + local f=$DIR/$tdir/$tfile.$i + touch $f || error "touch failed" + local count=$($GETSTRIPE -c $f) + [ $count -eq $OSTCOUNT ] || + error "$f stripe count $count != $OSTCOUNT" + local offset=$($GETSTRIPE -i $f) + [ $offset -eq 1 ] || error "$f stripe offset $offset != 1" + local size=$($GETSTRIPE -S $f) + [ $size -eq $((def_stripe_size * 2)) ] || + error "$f stripe size $size != $((def_stripe_size * 2))" + local pool=$($GETSTRIPE -p $f) + [ $pool == $test_pool ] || error "$f pool $pool != $test_pool" + done + + # change fs default striping, delete parent default striping, now child + # will stripe from new fs default striping only + $SETSTRIPE -c 1 -S $def_stripe_size -i 0 $MOUNT || + error "change $MOUNT default stripe failed" + $SETSTRIPE -c 0 $DIR/$tdir || error "delete $tdir default stripe failed" + for i in $(seq 11 20); do + local f=$DIR/$tdir/$tfile.$i + touch $f || error "touch $f failed" + local count=$($GETSTRIPE -c $f) + [ $count -eq 1 ] || error "$f stripe count $count != 1" + local offset=$($GETSTRIPE -i $f) + [ $offset -eq 0 ] || error "$f stripe offset $offset != 0" + local size=$($GETSTRIPE -S $f) + [ $size -eq $def_stripe_size ] || + error "$f stripe size $size != $def_stripe_size" + local pool=$($GETSTRIPE -p $f) + [ "#$pool" == "#" ] || error "$f pool $pool is set" + + done + + unlinkmany $DIR/$tdir/$tfile. 1 20 + + # restore FS default striping + if [ -z $def_pool ]; then + $SETSTRIPE -c $def_stripenr -S $def_stripe_size \ + -i $def_stripe_offset $MOUNT || + error "restore default striping failed" + else + $SETSTRIPE -c $def_stripenr -S $def_stripe_size -p $def_pool \ + -i $def_stripe_offset $MOUNT || + error "restore default striping with $def_pool failed" + fi + + local f=$DIR/$tdir/$tfile + pool_remove_all_targets $test_pool $f + pool_remove $test_pool $f +} +run_test 406 "DNE support fs default striping" + +test_407() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.55) ]] && + skip "Need MDS version at least 2.8.55" && return + + $LFS mkdir -i 0 -c 1 $DIR/$tdir.0 || + error "$LFS mkdir -i 0 -c 1 $tdir.0 failed" + $LFS mkdir -i 1 -c 1 $DIR/$tdir.1 || + error "$LFS mkdir -i 1 -c 1 $tdir.1 failed" + touch $DIR/$tdir.0/$tfile.0 || error "touch $tdir.0/$tfile.0 failed" + + #define OBD_FAIL_DT_TXN_STOP 0x2019 + for idx in $(seq $MDSCOUNT); do + do_facet mds$idx "lctl set_param fail_loc=0x2019" + done + $LFS mkdir -c 2 $DIR/$tdir && error "$LFS mkdir -c 2 $tdir should fail" + mv $DIR/$tdir.0/$tfile.0 $DIR/$tdir.1/$tfile.1 && + error "mv $tdir.0/$tfile.0 $tdir.1/$tfile.1 should fail" + true +} +run_test 407 "transaction fail should cause operation fail" + # # tests that do cleanup/setup should be run at the end #