X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=0f3c56aa47acd40e2468d0bf97b47c47d5636c0d;hp=c4e305d6cac3ba707c65af6027040ea41f531a4e;hb=d775f9ae37975c853984b67f6d1a21e6ec8a8c3d;hpb=aa92caa21fa2a4473dce5889de7fcd17e171c1a0 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index c4e305d..0f3c56a 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -77,8 +77,8 @@ if (( $LINUX_VERSION_CODE >= $(version_code 4.18.0) && ALWAYS_EXCEPT+=" 411" fi -# 5 12 8 12 (min)" -[ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 64b 68 71 115 135 136 300o" +# 5 12 8 12 (min)" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 60i 64b 68 71 115 135 136 300o" if [ "$mds1_FSTYPE" = "zfs" ]; then # bug number for skipped test: @@ -162,7 +162,7 @@ check_and_setup_lustre DIR=${DIR:-$MOUNT} assert_DIR -MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} +MAXFREE=${MAXFREE:-$((300000 * $OSTCOUNT))} [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo [ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo @@ -1835,7 +1835,7 @@ __exhaust_precreations() { local FAILIDX=${3:-$OSTIDX} local ofacet=ost$((OSTIDX + 1)) - test_mkdir -p -c1 $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local mdtidx=$($LFS getstripe -m $DIR/$tdir) local mfacet=mds$((mdtidx + 1)) echo OSTIDX=$OSTIDX MDTIDX=$mdtidx @@ -1991,7 +1991,7 @@ test_27q() { reset_enospc rm -f $DIR/$tdir/$tfile - test_mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir $MCREATE $DIR/$tdir/$tfile || error "mcreate $DIR/$tdir/$tfile failed" $TRUNCATE $DIR/$tdir/$tfile 80000000 || error "truncate $DIR/$tdir/$tfile failed" @@ -2755,7 +2755,8 @@ test_27I() { save_layout_restore_at_exit $MOUNT $LFS setstripe -c 2 -i 0 $MOUNT pool_add $pool || error "pool_add failed" - pool_add_targets $pool $ostrange || "pool_add_targets failed" + pool_add_targets $pool $ostrange || + error "pool_add_targets failed" test_mkdir $DIR/$tdir $LFS setstripe -p $pool $DIR/$tdir $MULTIOP $DIR/$tdir/$tfile Oc || error "multiop failed" @@ -3319,6 +3320,38 @@ test_27P() { } run_test 27P "basic ops on foreign dir of foreign_symlink type" +test_27Q() { + rm -f $TMP/$tfile $TMP/$tfile.loop $TMP/$tfile.none $TMP/$tfile.broken + stack_trap "rm -f $TMP/$tfile*" + + test_mkdir $DIR/$tdir-1 + test_mkdir $DIR/$tdir-2 + + echo 'It is what it is' > $DIR/$tdir-1/$tfile + lov_getstripe_old $DIR/$tdir-1/$tfile || error "$DIR/$tdir-1/$tfile: rc = $?" + + ln -s $DIR/$tdir-1/$tfile $DIR/$tdir-2/$tfile + lov_getstripe_old $DIR/$tdir-2/$tfile || error "$DIR/$tdir-2/$tfile: rc = $?" + + ln -s $DIR/$tdir-1/$tfile $TMP/$tfile + lov_getstripe_old $TMP/$tfile || error "$TMP/$tfile: rc = $?" + + # Create some bad symlinks and ensure that we don't loop + # forever or something. These should return ELOOP (40) and + # ENOENT (2) but I don't want to test for that because there's + # always some weirdo architecture that needs to ruin + # everything by defining these error numbers differently. + + ln -s $TMP/$tfile.loop $TMP/$tfile.loop + lov_getstripe_old $TMP/$tfile.loop && error "$TMP/$tfile.loop: rc = $?" + + ln -s $TMP/$tfile.none $TMP/$tfile.broken + lov_getstripe_old $TMP/$tfile.broken && error "$TMP/$tfile.broken: rc = $?" + + return 0 +} +run_test 27Q "llapi_file_get_stripe() works on symlinks" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091 @@ -4734,7 +4767,7 @@ test_39l() { local atime_diff=$(do_facet $SINGLEMDS \ lctl get_param -n mdd.*MDT0000*.atime_diff) rm -rf $DIR/$tdir - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir # test setting directory atime to future touch -a -d @$TEST_39_ATIME $DIR/$tdir @@ -5862,6 +5895,7 @@ run_test 54e "console/tty device works in lustre ======================" test_56a() { local numfiles=3 + local numdirs=2 local dir=$DIR/$tdir rm -rf $dir @@ -5901,9 +5935,10 @@ test_56a() { #test lfs getstripe with -v prints lmm_fid filenum=$($LFS getstripe -v $dir | grep -c lmm_fid) - [[ $filenum -eq $((numfiles * numcomp)) ]] || + local countfids=$((numdirs + numfiles * numcomp)) + [[ $filenum -eq $countfids ]] || error "$LFS getstripe -v $dir: "\ - "got $filenum want $((numfiles * numcomp)) lmm_fid" + "got $filenum want $countfids lmm_fid" [[ $($LFS getstripe $dir | grep -c lmm_fid) -eq 0 ]] || error "$LFS getstripe $dir: showed lmm_fid by default" echo "$LFS getstripe --verbose passed" @@ -6631,6 +6666,36 @@ test_56rb() { } run_test 56rb "check lfs find --size --ost/--mdt works" +test_56rc() { + (( MDSCOUNT >= 2 )) || skip "needs at least 2 MDTs" + local dir=$DIR/$tdir + local found + + test_mkdir -c 2 -H all_char $dir || error "failed to mkdir $dir" + $LFS mkdir -c 2 --mdt-hash all_char $dir/$tdir-all{1..10} + (( $MDSCOUNT > 2 )) && + $LFS mkdir -c 3 --mdt-hash fnv_1a_64 $dir/$tdir-fnv{1..10} + mkdir $dir/$tdir-{1..10} + touch $dir/$tfile-{1..10} + + found=$($LFS find $dir --mdt-count 2 | wc -l) + expect=11 + (( $found == $expect )) || error "found $found 2-stripe, expect $expect" + + found=$($LFS find $dir -T +1 | wc -l) + (( $MDSCOUNT > 2 )) && expect=$((expect + 10)) + (( $found == $expect )) || error "found $found 2+stripe, expect $expect" + + found=$($LFS find $dir --mdt-hash all_char | wc -l) + expect=11 + (( $found == $expect )) || error "found $found all_char, expect $expect" + + found=$($LFS find $dir --mdt-hash fnv_1a_64 | wc -l) + (( $MDSCOUNT > 2 )) && expect=10 || expect=0 + (( $found == $expect )) || error "found $found all_char, expect $expect" +} +run_test 56rc "check lfs find --mdt-count/--mdt-hash works" + test_56s() { # LU-611 #LU-9369 [[ $OSTCOUNT -lt 2 ]] && skip_env "need at least 2 OSTs" @@ -7611,6 +7676,94 @@ test_56ab() { # LU-10705 } run_test 56ab "lfs find --blocks" +# LU-11188 +test_56aca() { + local dir="$DIR/$tdir" + local perms=(001 002 003 004 005 006 007 + 010 020 030 040 050 060 070 + 100 200 300 400 500 600 700 + 111 222 333 444 555 666 777) + local perm_minus=(8 8 4 8 4 4 2 + 8 8 4 8 4 4 2 + 8 8 4 8 4 4 2 + 4 4 2 4 2 2 1) + local perm_slash=(8 8 12 8 12 12 14 + 8 8 12 8 12 12 14 + 8 8 12 8 12 12 14 + 16 16 24 16 24 24 28) + + test_mkdir "$dir" + for perm in ${perms[*]}; do + touch "$dir/$tfile.$perm" + chmod $perm "$dir/$tfile.$perm" + done + + for ((i = 0; i < ${#perms[*]}; i++)); do + local num=$($LFS find $dir -perm ${perms[i]} | wc -l) + (( $num == 1 )) || + error "lfs find -perm ${perms[i]}:"\ + "$num != 1" + + num=$($LFS find $dir -perm -${perms[i]} -type f| wc -l) + (( $num == ${perm_minus[i]} )) || + error "lfs find -perm -${perms[i]}:"\ + "$num != ${perm_minus[i]}" + + num=$($LFS find $dir -perm /${perms[i]} -type f| wc -l) + (( $num == ${perm_slash[i]} )) || + error "lfs find -perm /${perms[i]}:"\ + "$num != ${perm_slash[i]}" + done +} +run_test 56aca "check lfs find -perm with octal representation" + +test_56acb() { + local dir=$DIR/$tdir + # p is the permission of write and execute for user, group and other + # without the umask. It is used to test +wx. + local p=$(printf "%o" "$((0333 & ~$(umask)))") + local perms=(1000 000 2000 4000 $p 644 111 110 100 004) + local symbolic=(+t a+t u+t g+t o+t + g+s u+s o+s +s o+sr + o=r,ug+o,u+w + u+ g+ o+ a+ ugo+ + u- g- o- a- ugo- + u= g= o= a= ugo= + o=r,ug+o,u+w u=r,a+u,u+w + g=r,ugo=g,u+w u+x,+X +X + u+x,u+X u+X u+x,g+X o+r,+X + u+x,go+X +wx +rwx) + + test_mkdir $dir + for perm in ${perms[*]}; do + touch "$dir/$tfile.$perm" + chmod $perm "$dir/$tfile.$perm" + done + + for (( i = 0; i < ${#symbolic[*]}; i++ )); do + local num=$($LFS find $dir -perm ${symbolic[i]} | wc -l) + + (( $num == 1 )) || + error "lfs find $dir -perm ${symbolic[i]}: $num != 1" + done +} +run_test 56acb "check lfs find -perm with symbolic representation" + +test_56acc() { + local dir=$DIR/$tdir + local tests="17777 787 789 abcd + ug=uu ug=a ug=gu uo=ou urw + u+xg+x a=r,u+x," + + test_mkdir $dir + for err in $tests; do + if $LFS find $dir -perm $err 2>/dev/null; then + error "lfs find -perm $err: parsing should have failed" + fi + done +} +run_test 56acc "check parsing error for lfs find -perm" + test_56ba() { [ $MDS1_VERSION -lt $(version_code 2.10.50) ] && skip "Need MDS version at least 2.10.50" @@ -8146,6 +8299,34 @@ test_60h() { } run_test 60h "striped directory with missing stripes can be accessed" +function t60i_load() { + mkdir $DIR/$tdir + #define OBD_FAIL_LLOG_PAUSE_AFTER_PAD 0x131c + $LCTL set_param fail_loc=0x131c fail_val=1 + for ((i=0; i<5000; i++)); do + touch $DIR/$tdir/f$i + done +} + +test_60i() { + changelog_register || error "changelog_register failed" + local cl_user="${CL_USERS[$SINGLEMDS]%% *}" + changelog_users $SINGLEMDS | grep -q $cl_user || + error "User $cl_user not found in changelog_users" + changelog_chmask "ALL" + t60i_load & + local PID=$! + for((i=0; i<100; i++)); do + changelog_dump >/dev/null || + error "can't read changelog" + done + kill $PID + wait $PID + changelog_deregister || error "changelog_deregister failed" + $LCTL set_param fail_loc=0 +} +run_test 60i "llog: new record vs reader race" + test_61a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -8699,8 +8880,7 @@ test_65n() { which getfattr > /dev/null 2>&1 || skip_env "no getfattr command" which setfattr > /dev/null 2>&1 || skip_env "no setfattr command" - local root_layout=$(save_layout $MOUNT) - stack_trap "restore_layout $MOUNT $root_layout" EXIT + save_layout_restore_at_exit $MOUNT # new subdirectory under root directory should not inherit # the default layout from root @@ -9269,6 +9449,7 @@ test_77d() { # bug 10889 [ $PARALLEL == "yes" ] && skip "skip parallel run" $GSS && skip_env "could not run with gss" + stack_trap "rm -f $DIR/$tfile" #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 $LCTL set_param fail_loc=0x80000409 set_checksums 1 @@ -9293,6 +9474,7 @@ test_77f() { # bug 10889 $GSS && skip_env "could not run with gss" set_checksums 1 + stack_trap "rm -f $DIR/$tfile" for algo in $CKSUM_TYPES; do cancel_lru_locks osc set_checksum_type $algo @@ -9414,6 +9596,20 @@ run_test 77l "preferred checksum type is remembered after reconnected" rm -f $F77_TMP unset F77_TMP +test_77m() { + (( $CLIENT_VERSION >= $(version_code 2.14.52) )) || + skip "Need at least version 2.14.52" + local param=checksum_speed + + $LCTL get_param $param || error "reading $param failed" + + csum_speeds=$($LCTL get_param -n $param) + + [[ "$csum_speeds" =~ "adler32" && "$csum_speeds" =~ "crc32" ]] || + error "known checksum types are missing" +} +run_test 77m "Verify checksum_speed is correctly read" + cleanup_test_78() { trap 0 rm -f $DIR/$tfile @@ -9885,6 +10081,8 @@ test_101e() { dd if=$file.$i of=/dev/null bs=$bsize count=$size_KB 2>/dev/null done + $LCTL get_param llite.*.max_cached_mb + $LCTL get_param llite.*.read_ahead_stats local miss=$($LCTL get_param -n llite.*.read_ahead_stats | get_named_value 'misses' | calc_total) @@ -10862,6 +11060,25 @@ test_103e() { } run_test 103e "inheritance of big amount of default ACLs" +test_103f() { + (( $MDS1_VERSION >= $(version_code 2.14.51) )) || + skip "MDS needs to be at least 2.14.51" + + large_xattr_enabled || skip_env "ea_inode feature disabled" + + # enable changelog to consume more internal MDD buffers + changelog_register + + mkdir -p $DIR/$tdir + # add big LOV EA + $LFS setstripe -C 1000 $DIR/$tdir + setfacl -d -m user:$U:rwx $DIR/$tdir || error "Cannot add default ACLs" + mkdir $DIR/$tdir/inherited || error "failed to create subdirectory" + rmdir $DIR/$tdir/inherited || error "Cannot remove subdirectory" + rmdir $DIR/$tdir || error "Cannot remove directory" +} +run_test 103f "changelog doesn't interfere with default ACLs buffers" + test_104a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -10898,6 +11115,106 @@ test_104b() { } run_test 104b "$RUNAS lfs check servers test ====================" +# +# Verify $1 is within range of $2. +# Success when $1 is within range. That is, when $1 is >= 2% of $2 and +# $1 is <= 2% of $2. Else Fail. +# +value_in_range() { + # Strip all units (M, G, T) + actual=$(echo $1 | tr -d A-Z) + expect=$(echo $2 | tr -d A-Z) + + expect_lo=$(($expect * 98 / 100)) # 2% below + expect_hi=$(($expect * 102 / 100)) # 2% above + + # permit 2% drift above and below + (( $actual >= $expect_lo && $actual <= $expect_hi )) +} + +test_104c() { + [ $PARALLEL == "yes" ] && skip "skip parallel run" + [ "$ost1_FSTYPE" == "zfs" ] || skip "zfs only test" + + local ost_param="osd-zfs.$FSNAME-OST0000." + local mdt_param="osd-zfs.$FSNAME-MDT0000." + local ofacets=$(get_facets OST) + local mfacets=$(get_facets MDS) + local saved_ost_blocks= + local saved_mdt_blocks= + + echo "Before recordsize change" + lfs_df=($($LFS df -h | grep "filesystem_summary:")) + df=($(df -h | grep "/mnt/lustre"$)) + + # For checking. + echo "lfs output : ${lfs_df[*]}" + echo "df output : ${df[*]}" + + for facet in ${ofacets//,/ }; do + if [ -z $saved_ost_blocks ]; then + saved_ost_blocks=$(do_facet $facet \ + lctl get_param -n $ost_param.blocksize) + echo "OST Blocksize: $saved_ost_blocks" + fi + ost=$(do_facet $facet lctl get_param -n $ost_param.mntdev) + do_facet $facet zfs set recordsize=32768 $ost + done + + # BS too small. Sufficient for functional testing. + for facet in ${mfacets//,/ }; do + if [ -z $saved_mdt_blocks ]; then + saved_mdt_blocks=$(do_facet $facet \ + lctl get_param -n $mdt_param.blocksize) + echo "MDT Blocksize: $saved_mdt_blocks" + fi + mdt=$(do_facet $facet lctl get_param -n $mdt_param.mntdev) + do_facet $facet zfs set recordsize=32768 $mdt + done + + # Give new values chance to reflect change + sleep 2 + + echo "After recordsize change" + lfs_df_after=($($LFS df -h | grep "filesystem_summary:")) + df_after=($(df -h | grep "/mnt/lustre"$)) + + # For checking. + echo "lfs output : ${lfs_df_after[*]}" + echo "df output : ${df_after[*]}" + + # Verify lfs df + value_in_range ${lfs_df_after[1]%.*} ${lfs_df[1]%.*} || + error "lfs_df bytes: ${lfs_df_after[1]%.*} != ${lfs_df[1]%.*}" + value_in_range ${lfs_df_after[2]%.*} ${lfs_df[2]%.*} || + error "lfs_df used: ${lfs_df_after[2]%.*} != ${lfs_df[2]%.*}" + value_in_range ${lfs_df_after[3]%.*} ${lfs_df[3]%.*} || + error "lfs_df avail: ${lfs_df_after[3]%.*} != ${lfs_df[3]%.*}" + + # Verify df + value_in_range ${df_after[1]%.*} ${df[1]%.*} || + error "df bytes: ${df_after[1]%.*} != ${df[1]%.*}" + value_in_range ${df_after[2]%.*} ${df[2]%.*} || + error "df used: ${df_after[2]%.*} != ${df[2]%.*}" + value_in_range ${df_after[3]%.*} ${df[3]%.*} || + error "df avail: ${df_after[3]%.*} != ${df[3]%.*}" + + # Restore MDT recordize back to original + for facet in ${mfacets//,/ }; do + mdt=$(do_facet $facet lctl get_param -n $mdt_param.mntdev) + do_facet $facet zfs set recordsize=$saved_mdt_blocks $mdt + done + + # Restore OST recordize back to original + for facet in ${ofacets//,/ }; do + ost=$(do_facet $facet lctl get_param -n $ost_param.mntdev) + do_facet $facet zfs set recordsize=$saved_ost_blocks $ost + done + + return 0 +} +run_test 104c "Verify df vs lfs_df stays same after recordsize change" + test_105a() { # doesn't work on 2.4 kernels touch $DIR/$tfile @@ -13584,7 +13901,7 @@ test_134a() { [[ $MDS1_VERSION -lt $(version_code 2.7.54) ]] && skip "Need MDS version at least 2.7.54" - mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" + mkdir_on_mdt0 $DIR/$tdir || error "failed to create $DIR/$tdir" cancel_lru_locks mdc local nsdir="ldlm.namespaces.*-MDT0000-mdc-*" @@ -13620,7 +13937,7 @@ test_134b() { [[ $MDS1_VERSION -lt $(version_code 2.7.54) ]] && skip "Need MDS version at least 2.7.54" - mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" + mkdir_on_mdt0 $DIR/$tdir || error "failed to create $DIR/$tdir" cancel_lru_locks mdc local low_wm=$(do_facet mds1 $LCTL get_param -n \ @@ -14981,6 +15298,8 @@ test_160a() { changelog_users $SINGLEMDS | grep -q $cl_user || error "User $cl_user not found in changelog_users" + mkdir_on_mdt0 $DIR/$tdir + # change something test_mkdir -p $DIR/$tdir/pics/2008/zachy changelog_clear 0 || error "changelog_clear failed" @@ -14991,8 +15310,6 @@ test_160a() { ln -s $DIR/$tdir/pics/2008/portland.jpg $DIR/$tdir/pics/desktop.jpg rm $DIR/$tdir/pics/desktop.jpg - changelog_dump | tail -10 - echo "verifying changelog mask" changelog_chmask "-MKDIR" changelog_chmask "-CLOSE" @@ -15006,7 +15323,6 @@ test_160a() { test_mkdir -p $DIR/$tdir/pics/2008/sofia # mkdir 1 echo "zzzzzz" > $DIR/$tdir/pics/zach/file # open 3 - changelog_dump | tail -10 MKDIRS=$(changelog_dump | grep -c "MKDIR") CLOSES=$(changelog_dump | grep -c "CLOSE") [ $MKDIRS -eq 1 ] || error "MKDIR changelog mask count $MKDIRS != 1" @@ -15069,10 +15385,10 @@ test_160a() { error "User '$cl_user' still in changelog_users" # lctl get_param -n mdd.*.changelog_users - # current index: 144 + # current_index: 144 # ID index (idle seconds) - # cl3 144 (2) - if ! changelog_users $SINGLEMDS | grep "^cl"; then + # cl3 144 (2) mask= + if [ -z "$(changelog_users $SINGLEMDS | grep -v current.index)" ]; then # this is the normal case where all users were deregistered # make sure no new records are added when no users are present local last_rec1=$(changelog_users $SINGLEMDS | @@ -15799,6 +16115,199 @@ test_160l() { } run_test 160l "Verify that MTIME changelog records contain the parent FID" +test_160m() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [[ $MDS1_VERSION -ge $(version_code 2.14.51) ]] || + skip "Need MDS version at least 2.14.51" + local cl_users + local cl_user1 + local cl_user2 + local pid1 + + # Create a user + changelog_register || error "first changelog_register failed" + changelog_register || error "second changelog_register failed" + + cl_users=(${CL_USERS[mds1]}) + cl_user1="${cl_users[0]}" + cl_user2="${cl_users[1]}" + # generate some changelog records to accumulate on MDT0 + test_mkdir -p -i0 -c1 $DIR/$tdir || error "test_mkdir $tdir failed" + createmany -m $DIR/$tdir/$tfile 50 || + error "create $DIR/$tdir/$tfile failed" + unlinkmany $DIR/$tdir/$tfile 50 || error "unlinkmany failed" + rm -f $DIR/$tdir + + # check changelogs have been generated + local nbcl=$(changelog_dump | wc -l) + [[ $nbcl -eq 0 ]] && error "no changelogs found" + +#define OBD_FAIL_MDS_CHANGELOG_RACE 0x15f + do_facet mds1 $LCTL set_param fail_loc=0x8000015f fail_val=0 + + __changelog_clear mds1 $cl_user1 +10 + __changelog_clear mds1 $cl_user2 0 & + pid1=$! + sleep 2 + __changelog_clear mds1 $cl_user1 0 || + error "fail to cancel record for $cl_user1" + wait $pid1 + [[ $? -eq 0 ]] || error "fail to cancel record for $cl_user2" +} +run_test 160m "Changelog clear race" + +test_160n() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [[ $MDS1_VERSION -ge $(version_code 2.14.51) ]] || + skip "Need MDS version at least 2.14.51" + local cl_users + local cl_user1 + local cl_user2 + local pid1 + local first_rec + local last_rec=0 + + # Create a user + changelog_register || error "first changelog_register failed" + + cl_users=(${CL_USERS[mds1]}) + cl_user1="${cl_users[0]}" + + # generate some changelog records to accumulate on MDT0 + test_mkdir -i0 -c1 $DIR/$tdir || error "test_mkdir $tdir failed" + first_rec=$(changelog_users $SINGLEMDS | + awk '/^current.index:/ { print $NF }') + while (( last_rec < (( first_rec + 65000)) )); do + createmany -m $DIR/$tdir/$tfile 10000 || + error "create $DIR/$tdir/$tfile failed" + + for i in $(seq 0 10000); do + mrename $DIR/$tdir/$tfile$i $DIR/$tdir/$tfile-new$i \ + > /dev/null + done + + unlinkmany $DIR/$tdir/$tfile-new 10000 || + error "unlinkmany failed unlink" + last_rec=$(changelog_users $SINGLEMDS | + awk '/^current.index:/ { print $NF }') + echo last record $last_rec + (( last_rec == 0 )) && error "no changelog found" + done + +#define OBD_FAIL_MDS_CHANGELOG_DEL 0x16c + do_facet mds1 $LCTL set_param fail_loc=0x8000016c fail_val=0 + + __changelog_clear mds1 $cl_user1 0 & + pid1=$! + sleep 2 + __changelog_clear mds1 $cl_user1 0 || + error "fail to cancel record for $cl_user1" + wait $pid1 + [[ $? -eq 0 ]] || error "fail to cancel record for $cl_user2" +} +run_test 160n "Changelog destroy race" + +test_160o() { + local mdt="$(facet_svc $SINGLEMDS)" + + [[ $PARALLEL != "yes" ]] || skip "skip parallel run" + remote_mds_nodsh && skip "remote MDS with nodsh" + [ $MDS1_VERSION -ge $(version_code 2.14.52) ] || + skip "Need MDS version at least 2.14.52" + + changelog_register --user test_160o -m unlnk+close+open || + error "changelog_register failed" + # drop server mask so it doesn't interfere + do_facet $SINGLEMDS $LCTL --device $mdt \ + changelog_register -u "Tt3_-#" && + error "bad symbols in name should fail" + + do_facet $SINGLEMDS $LCTL --device $mdt \ + changelog_register -u test_160o && + error "the same name registration should fail" + + do_facet $SINGLEMDS $LCTL --device $mdt \ + changelog_register -u test_160toolongname && + error "too long name registration should fail" + + changelog_chmask "MARK+HSM" + lctl get_param mdd.*.changelog*mask + local cl_user="${CL_USERS[$SINGLEMDS]%% *}" + changelog_users $SINGLEMDS | grep -q $cl_user || + error "User $cl_user not found in changelog_users" + #verify username + echo $cl_user | grep -q test_160o || + error "User $cl_user has no specific name 'test160o'" + + # change something + changelog_clear 0 || error "changelog_clear failed" + # generate some changelog records to accumulate on MDT0 + test_mkdir -p -i0 -c1 $DIR/$tdir || error "test_mkdir $tdir failed" + touch $DIR/$tdir/$tfile # open 1 + + OPENS=$(changelog_dump | grep -c "OPEN") + [[ $OPENS -eq 1 ]] || error "OPEN changelog mask count $OPENS != 1" + + # must be no MKDIR it wasn't set as user mask + MKDIR=$(changelog_dump | grep -c "MKDIR") + [[ $MKDIR -eq 0 ]] || error "MKDIR changelog mask found $MKDIR > 0" + + oldmask=$(do_facet $SINGLEMDS $LCTL get_param \ + mdd.$mdt.changelog_current_mask -n) + # register maskless user + changelog_register || error "changelog_register failed" + # effective mask should be not changed because it is not minimal + mask=$(do_facet $SINGLEMDS $LCTL get_param \ + mdd.$mdt.changelog_current_mask -n) + [[ $mask == $oldmask ]] || error "mask was changed: $mask vs $oldmask" + # set server mask to minimal value + changelog_chmask "MARK" + # check effective mask again, should be treated as DEFMASK now + mask=$(do_facet $SINGLEMDS $LCTL get_param \ + mdd.$mdt.changelog_current_mask -n) + [[ $mask == *"HLINK"* ]] || error "mask is not DEFMASK as expected" + + do_facet $SINGLEMDS $LCTL --device $mdt \ + changelog_deregister -u test_160o || + error "cannot deregister by name" +} +run_test 160o "changelog user name and mask" + +test_160p() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [[ $MDS1_VERSION -ge $(version_code 2.14.51) ]] || + skip "Need MDS version at least 2.14.51" + [[ "$mds1_FSTYPE" == "ldiskfs" ]] || skip "ldiskfs only test" + local cl_users + local cl_user1 + local entry_count + + # Create a user + changelog_register || error "first changelog_register failed" + + cl_users=(${CL_USERS[mds1]}) + cl_user1="${cl_users[0]}" + + test_mkdir -p -i0 -c1 $DIR/$tdir || error "test_mkdir $tdir failed" + createmany -m $DIR/$tdir/$tfile 50 || + error "create $DIR/$tdir/$tfile failed" + unlinkmany $DIR/$tdir/$tfile 50 || error "unlinkmany failed" + rm -rf $DIR/$tdir + + # check changelogs have been generated + entry_count=$(changelog_dump | wc -l) + ((entry_count != 0)) || error "no changelog entries found" + + # remove changelog_users and check that orphan entries are removed + stop mds1 + do_facet mds1 "$DEBUGFS -w -R 'rm changelog_users' $(mdsdevname 1)" + start mds1 || error "cannot start mdt" + entry_count=$(changelog_dump | wc -l) + ((entry_count == 0)) || + error "found $entry_count changelog entries, expected none" +} +run_test 160p "Changelog orphan cleanup with no users" + test_161a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -16760,7 +17269,7 @@ test_183() { # LU-2275 [[ $MDS1_VERSION -lt $(version_code 2.3.56) ]] && skip "Need MDS version at least 2.3.56" - mkdir -p $DIR/$tdir || error "creating dir $DIR/$tdir" + mkdir_on_mdt0 $DIR/$tdir || error "creating dir $DIR/$tdir" echo aaa > $DIR/$tdir/$tfile #define OBD_FAIL_MDS_NEGATIVE_POSITIVE 0x148 @@ -17352,7 +17861,7 @@ test_205a() { # Job stats local cmd # mkdir - cmd="mkdir $DIR/$tdir" + cmd="$LFS mkdir -i 0 -c 1 $DIR/$tdir" verify_jobstats "$cmd" "$SINGLEMDS" # rmdir cmd="rmdir $DIR/$tdir" @@ -17393,7 +17902,7 @@ test_205a() { # Job stats [ $left -ge 0 ] && wait_update_facet $SINGLEMDS \ "lctl get_param *.*.job_stats | grep -c 'job_id.*mkdir'" \ "0" $left - cmd="mkdir $DIR/$tdir.expire" + cmd="$LFS mkdir -i 0 -c 1 $DIR/$tdir.expire" verify_jobstats "$cmd" "$SINGLEMDS" [ $(do_facet $SINGLEMDS lctl get_param *.*.job_stats | grep -c "job_id.*mkdir") -gt 1 ] && error "old jobstats not expired" @@ -17446,6 +17955,9 @@ run_test 205a "Verify job stats" # LU-13117, LU-13597 test_205b() { + (( $MDS1_VERSION >= $(version_code 2.13.54.91) )) || + skip "Need MDS version at least 2.13.54.91" + job_stats="mdt.*.job_stats" $LCTL set_param $job_stats=clear # Setting jobid_var to USER might not be supported @@ -17884,7 +18396,7 @@ test_220() { #LU-325 local OSTIDX=0 # create on MDT0000 so the last_id and next_id are correct - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local OST=$($LFS df $DIR | awk '/OST:'$OSTIDX'/ { print $1 }') OST=${OST%_UUID} @@ -18764,7 +19276,7 @@ test_230e() { local a_fid local b_fid - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir mkdir $DIR/$tdir/migrate_dir mkdir $DIR/$tdir/other_dir touch $DIR/$tdir/migrate_dir/a @@ -19269,6 +19781,22 @@ test_230s() { } run_test 230s "lfs mkdir should return -EEXIST if target exists" +test_230t() +{ + [[ $MDSCOUNT -ge 2 ]] || skip_env "needs >= 2 MDTs" + [[ $MDS1_VERSION -ge $(version_code 2.14.50) ]] || + skip "Need MDS version at least 2.14.50" + + test_mkdir $DIR/$tdir || error "mkdir $tdir failed" + test_mkdir $DIR/$tdir/subdir || error "mkdir subdir failed" + $LFS project -p 1 -s $DIR/$tdir || + error "set $tdir project id failed" + $LFS project -p 2 -s $DIR/$tdir/subdir || + error "set subdir project id failed" + $LFS migrate -m 1 -c $MDSCOUNT $DIR/$tdir || error "migrate failed" +} +run_test 230t "migrate directory with project ID set" + test_231a() { # For simplicity this test assumes that max_pages_per_rpc @@ -19600,7 +20128,7 @@ run_test 241b "dio vs dio" test_242() { remote_mds_nodsh && skip "remote MDS with nodsh" - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir touch $DIR/$tdir/$tfile #define OBD_FAIL_MDS_READPAGE_PACK 0x105 @@ -19792,7 +20320,7 @@ test_247f() { grep -q subtree || skip "Fileset feature is not supported" - mkdir $DIR/$tdir || error "mkdir $tdir failed" + mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed" $LFS mkdir -i $((MDSCOUNT - 1)) $DIR/$tdir/remote || error "mkdir remote failed" mkdir $DIR/$tdir/remote/subdir || error "mkdir remote/subdir failed" @@ -20319,6 +20847,7 @@ test_255a() { skip "lustre < 2.8.54 does not support ladvise " remote_ost_nodsh && skip "remote OST with nodsh" + stack_trap "rm -f $DIR/$tfile" lfs setstripe -c -1 -i 0 $DIR/$tfile || error "$tfile failed" ladvise_no_type willread $DIR/$tfile && @@ -20406,6 +20935,7 @@ test_255b() { skip "lustre < 2.8.54 does not support ladvise " remote_ost_nodsh && skip "remote OST with nodsh" + stack_trap "rm -f $DIR/$tfile" lfs setstripe -c 1 -i 0 $DIR/$tfile ladvise_no_type dontneed $DIR/$tfile && @@ -20568,7 +21098,7 @@ test_256() { changelog_register || error "changelog_register failed" rm -rf $DIR/$tdir - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir changelog_clear 0 || error "changelog_clear failed" @@ -20719,7 +21249,7 @@ test_270a() { local dom=$DIR/$tdir/dom_file local tmp=$DIR/$tdir/tmp_file - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir # basic checks for DoM component creation $LFS setstripe -E 1024K -E 2048K -L mdt $dom 2>/dev/null && @@ -21858,7 +22388,7 @@ test_300c() { local file_count - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir $LFS setdirstripe -i 0 -c 2 $DIR/$tdir/striped_dir || error "set striped dir error" @@ -22068,7 +22598,7 @@ test_300g() { local stripe_count local stripe_index - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir mkdir $DIR/$tdir/normal_dir #Checking when client cache stripe index @@ -22408,7 +22938,7 @@ test_300p() { [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs" remote_mds_nodsh && skip "remote MDS with nodsh" - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir #define OBD_FAIL_OUT_ENOSPC 0x1704 do_facet mds2 lctl set_param fail_loc=0x80001704 @@ -23001,7 +23531,7 @@ test_398b() { # LU-4198 --filename=$DIR/$tfile || true wait $bg_pid - rm -rf $DIR/$tfile + rm -f $DIR/$tfile } run_test 398b "DIO and buffer IO race" @@ -23061,27 +23591,28 @@ test_398c() { # LU-4198 --filename=$DIR/$tfile [ $? -eq 0 ] || error "fio large block size failed" - rm -rf $DIR/$tfile + rm -f $DIR/$tfile $LCTL set_param debug="$saved_debug" } run_test 398c "run fio to test AIO" test_398d() { # LU-13846 - test -f aiocp || skip_env "no aiocp installed" - local aio_file=$DIR/aio_file + which aiocp || skip_env "no aiocp installed" + local aio_file=$DIR/$tfile.aio $LFS setstripe -c -1 -S 1M $DIR/$tfile $aio_file dd if=/dev/urandom of=$DIR/$tfile bs=1M count=64 aiocp -a $PAGE_SIZE -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file + stack_trap "rm -f $DIR/$tfile $aio_file" - diff $DIR/$tfile $aio_file || "file diff after aiocp" + diff $DIR/$tfile $aio_file || error "file diff after aiocp" # make sure we don't crash and fail properly aiocp -a 512 -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file && error "aio not aligned with PAGE SIZE should fail" - rm -rf $DIR/$tfile $aio_file + rm -f $DIR/$tfile $aio_file } run_test 398d "run aiocp to verify block size > stripe size" @@ -23092,6 +23623,297 @@ test_398e() { } run_test 398e "O_Direct open cleared by fcntl doesn't cause hang" +test_398f() { # LU-14687 + which aiocp || skip_env "no aiocp installed" + local aio_file=$DIR/$tfile.aio + + $LFS setstripe -c -1 -S 1M $DIR/$tfile $aio_file + + dd if=/dev/zero of=$DIR/$tfile bs=1M count=64 + stack_trap "rm -f $DIR/$tfile $aio_file" + + #define OBD_FAIL_LLITE_PAGE_ALLOC 0x1418 + $LCTL set_param fail_loc=0x1418 + # make sure we don't crash and fail properly + aiocp -b 64M -s 64M -f O_DIRECT $DIR/$tfile $aio_file && + error "aio with page allocation failure succeeded" + $LCTL set_param fail_loc=0 + diff $DIR/$tfile $aio_file + [[ $? != 0 ]] || error "no diff after failed aiocp" +} +run_test 398f "verify aio handles ll_direct_rw_pages errors correctly" + +# NB: To get the parallel DIO behavior in LU-13798, there must be > 1 +# stripe and i/o size must be > stripe size +# Old style synchronous DIO waits after submitting each chunk, resulting in a +# single RPC in flight. This test shows async DIO submission is working by +# showing multiple RPCs in flight. +test_398g() { # LU-13798 + $LFS setstripe -o 0,0 -S 1M $DIR/$tfile + + # We need to do some i/o first to acquire enough grant to put our RPCs + # in flight; otherwise a new connection may not have enough grant + # available + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=1 oflag=direct || + error "parallel dio failed" + stack_trap "rm -f $DIR/$tfile" + + # Reduce RPC size to 1M to avoid combination in to larger RPCs + local pages_per_rpc=$($LCTL get_param osc.*-OST0000-*.max_pages_per_rpc) + $LCTL set_param osc.*-OST0000-*.max_pages_per_rpc=1M + stack_trap "$LCTL set_param -n $pages_per_rpc" + + # Recreate file so it's empty + rm -f $DIR/$tfile + $LFS setstripe -o 0,0 -S 1M $DIR/$tfile + #Pause rpc completion to guarantee we see multiple rpcs in flight + #define OBD_FAIL_OST_BRW_PAUSE_BULK + do_facet ost1 $LCTL set_param fail_loc=0x214 fail_val=2 + stack_trap "do_facet ost1 $LCTL set_param fail_loc=0" + + # Clear rpc stats + $LCTL set_param osc.*.rpc_stats=c + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=1 oflag=direct || + error "parallel dio failed" + stack_trap "rm -f $DIR/$tfile" + + $LCTL get_param osc.*-OST0000-*.rpc_stats + pct=$($LCTL get_param osc.*-OST0000-*.rpc_stats | + grep -A 8 'rpcs in flight' | grep -v 'rpcs in flight' | + grep "8:" | awk '{print $8}') + # We look at the "8 rpcs in flight" field, and verify A) it is present + # and B) it includes all RPCs. This proves we had 8 RPCs in flight, + # as expected for an 8M DIO to a file with 1M stripes. + [ $pct -eq 100 ] || error "we should see 8 RPCs in flight" + + # Verify turning off parallel dio works as expected + # Clear rpc stats + $LCTL set_param osc.*.rpc_stats=c + $LCTL set_param llite.*.parallel_dio=0 + stack_trap '$LCTL set_param llite.*.parallel_dio=1' + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=1 oflag=direct || + error "dio with parallel dio disabled failed" + + # Ideally, we would see only one RPC in flight here, but there is an + # unavoidable race between i/o completion and RPC in flight counting, + # so while only 1 i/o is in flight at a time, the RPC in flight counter + # will sometimes exceed 1 (3 or 4 is not rare on VM testing). + # So instead we just verify it's always < 8. + $LCTL get_param osc.*-OST0000-*.rpc_stats + ret=$($LCTL get_param osc.*-OST0000-*.rpc_stats | + grep -A 8 'rpcs in flight' | grep -v 'rpcs in flight' | + grep '^$' -B1 | grep . | awk '{print $1}') + [ $ret != "8:" ] || + error "we should see fewer than 8 RPCs in flight (saw $ret)" +} +run_test 398g "verify parallel dio async RPC submission" + +test_398h() { # LU-13798 + local dio_file=$DIR/$tfile.dio + + $LFS setstripe -C 2 -S 1M $DIR/$tfile $dio_file + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct + stack_trap "rm -f $DIR/$tfile $dio_file" + + dd if=$DIR/$tfile of=$dio_file bs=8M count=8 iflag=direct oflag=direct || + error "parallel dio failed" + diff $DIR/$tfile $dio_file + [[ $? == 0 ]] || error "file diff after aiocp" +} +run_test 398h "verify correctness of read & write with i/o size >> stripe size" + +test_398i() { # LU-13798 + local dio_file=$DIR/$tfile.dio + + $LFS setstripe -C 2 -S 1M $DIR/$tfile $dio_file + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct + stack_trap "rm -f $DIR/$tfile $dio_file" + + #define OBD_FAIL_LLITE_PAGE_ALLOC 0x1418 + $LCTL set_param fail_loc=0x1418 + # make sure we don't crash and fail properly + dd if=$DIR/$tfile of=$dio_file bs=8M count=8 iflag=direct oflag=direct && + error "parallel dio page allocation failure succeeded" + diff $DIR/$tfile $dio_file + [[ $? != 0 ]] || error "no diff after failed aiocp" +} +run_test 398i "verify parallel dio handles ll_direct_rw_pages errors correctly" + +test_398j() { # LU-13798 + # Stripe size > RPC size but less than i/o size tests split across + # stripes and RPCs for individual i/o op + $LFS setstripe -o 0,0 -S 4M $DIR/$tfile $DIR/$tfile.2 + + # Reduce RPC size to 1M to guarantee split to multiple RPCs per stripe + local pages_per_rpc=$($LCTL get_param osc.*-OST0000-*.max_pages_per_rpc) + $LCTL set_param osc.*-OST0000-*.max_pages_per_rpc=1M + stack_trap "$LCTL set_param -n $pages_per_rpc" + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct || + error "parallel dio write failed" + stack_trap "rm -f $DIR/$tfile $DIR/$tfile.2" + + dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 iflag=direct || + error "parallel dio read failed" + diff $DIR/$tfile $DIR/$tfile.2 + [[ $? == 0 ]] || error "file diff after parallel dio read" +} +run_test 398j "test parallel dio where stripe size > rpc_size" + +test_398k() { # LU-13798 + wait_delete_completed + wait_mds_ost_sync + + # 4 stripe file; we will cause out of space on OST0 + $LFS setstripe -o 0,1,0,1 -S 1M $DIR/$tfile + + # Fill OST0 (if it's not too large) + ORIGFREE=$($LCTL get_param -n lov.$FSNAME-clilov-*.kbytesavail | + head -n1) + if [[ $ORIGFREE -gt $MAXFREE ]]; then + skip "$ORIGFREE > $MAXFREE skipping out-of-space test on OST0" + fi + $LFS setstripe -i 0 -c 1 $DIR/$tfile.1 + dd if=/dev/zero of=$DIR/$tfile.1 bs=1024 count=$MAXFREE && + error "dd should fill OST0" + stack_trap "rm -f $DIR/$tfile.1" + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct + err=$? + + ls -la $DIR/$tfile + $CHECKSTAT -t file -s 0 $DIR/$tfile || + error "file is not 0 bytes in size" + + # dd above should not succeed, but don't error until here so we can + # get debug info above + [[ $err != 0 ]] || + error "parallel dio write with enospc succeeded" + stack_trap "rm -f $DIR/$tfile" +} +run_test 398k "test enospc on first stripe" + +test_398l() { # LU-13798 + wait_delete_completed + wait_mds_ost_sync + + # 4 stripe file; we will cause out of space on OST0 + # Note the 1M stripe size and the > 1M i/o size mean this ENOSPC + # happens on the second i/o chunk we issue + $LFS setstripe -o 1,0,1,0 -S 1M $DIR/$tfile $DIR/$tfile.2 + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=2 oflag=direct + stack_trap "rm -f $DIR/$tfile" + + # Fill OST0 (if it's not too large) + ORIGFREE=$($LCTL get_param -n lov.$FSNAME-clilov-*.kbytesavail | + head -n1) + if [[ $ORIGFREE -gt $MAXFREE ]]; then + skip "$ORIGFREE > $MAXFREE skipping out-of-space test on OST0" + fi + $LFS setstripe -i 0 -c 1 $DIR/$tfile.1 + dd if=/dev/zero of=$DIR/$tfile.1 bs=1024 count=$MAXFREE && + error "dd should fill OST0" + stack_trap "rm -f $DIR/$tfile.1" + + dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 oflag=direct + err=$? + stack_trap "rm -f $DIR/$tfile.2" + + # Check that short write completed as expected + ls -la $DIR/$tfile.2 + $CHECKSTAT -t file -s 1048576 $DIR/$tfile.2 || + error "file is not 1M in size" + + # dd above should not succeed, but don't error until here so we can + # get debug info above + [[ $err != 0 ]] || + error "parallel dio write with enospc succeeded" + + # Truncate source file to same length as output file and diff them + $TRUNCATE $DIR/$tfile 1048576 + diff $DIR/$tfile $DIR/$tfile.2 + [[ $? == 0 ]] || error "data incorrect after short write" +} +run_test 398l "test enospc on intermediate stripe/RPC" + +test_398m() { # LU-13798 + $LFS setstripe -o 0,1,0,1 -S 1M $DIR/$tfile + + lctl set_param *debug=-1 debug_mb=10000 + + # Set up failure on OST0, the first stripe: + #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e + #NB: Fail val is ost # + 1, because we cannot use cfs_fail_val = 0 + # So this fail_val specifies OST0 + do_facet ost1 $LCTL set_param fail_loc=0x20e fail_val=1 + stack_trap "do_facet ost1 $LCTL set_param fail_loc=0" + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct && + error "parallel dio write with failure on first stripe succeeded" + stack_trap "rm -f $DIR/$tfile" + do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0 + + # Place data in file for read + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct || + error "parallel dio write failed" + + # Fail read on OST0, first stripe + #define OBD_FAIL_OST_BRW_READ_BULK 0x20f + do_facet ost1 $LCTL set_param fail_loc=0x20f fail_val=1 + dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 iflag=direct && + error "parallel dio read with error on first stripe succeeded" + rm -f $DIR/$tfile.2 + do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0 + + # Switch to testing on OST1, second stripe + # Clear file contents, maintain striping + echo > $DIR/$tfile + # Set up failure on OST1, second stripe: + do_facet ost1 $LCTL set_param fail_loc=0x20e fail_val=2 + stack_trap "do_facet ost1 $LCTL set_param fail_loc=0" + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct && + error "parallel dio write with failure on first stripe succeeded" + stack_trap "rm -f $DIR/$tfile" + do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0 + + # Place data in file for read + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct || + error "parallel dio write failed" + + # Fail read on OST1, second stripe + #define OBD_FAIL_OST_BRW_READ_BULK 0x20f + do_facet ost2 $LCTL set_param fail_loc=0x20f fail_val=2 + dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 iflag=direct && + error "parallel dio read with error on first stripe succeeded" + rm -f $DIR/$tfile.2 + do_facet ost2 $LCTL set_param fail_loc=0 fail_val=0 +} +run_test 398m "test RPC failures with parallel dio" + +# Parallel submission of DIO should not cause problems for append, but it's +# important to verify. +test_398n() { # LU-13798 + $LFS setstripe -C 2 -S 1M $DIR/$tfile + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 || + error "dd to create source file failed" + stack_trap "rm -f $DIR/$tfile" + + dd if=$DIR/$tfile of=$DIR/$tfile.1 bs=8M count=8 oflag=direct oflag=append || + error "parallel dio write with failure on second stripe succeeded" + stack_trap "rm -f $DIR/$tfile $DIR/$tfile.1" + diff $DIR/$tfile $DIR/$tfile.1 + [[ $? == 0 ]] || error "data incorrect after append" + +} +run_test 398n "test append with parallel DIO" + test_fake_rw() { local read_write=$1 if [ "$read_write" = "write" ]; then @@ -23695,14 +24517,26 @@ test_qos_mkdir() { stack_trap "do_nodes $mdts $LCTL set_param \ lod.*.mdt_qos_maxage=$lod_qos_maxage > /dev/null" EXIT - echo - echo "Mkdir (stripe_count $stripe_count) roundrobin:" - $LCTL set_param lmv.*.qos_threshold_rr=100 > /dev/null do_nodes $mdts $LCTL set_param lod.*.mdt_qos_threshold_rr=100 > /dev/null testdir=$DIR/$tdir-s$stripe_count/rr + local stripe_index=$($LFS getstripe -m $testdir) + local test_mkdir_rr=true + + getfattr -d -m dmv $testdir | grep dmv + if [ $? -eq 0 ] && [ $MDS1_VERSION -ge $(version_code 2.14.51) ]; then + local inherit_rr=$($LFS getdirstripe -D --max-inherit-rr $testdir) + + (( $inherit_rr == 0 )) && test_mkdir_rr=false + fi + + echo + $test_mkdir_rr && + echo "Mkdir (stripe_count $stripe_count) roundrobin:" || + echo "Mkdir (stripe_count $stripe_count) on stripe $stripe_index" + for i in $(seq $((100 * MDSCOUNT))); do eval $mkdir_cmd $testdir/subdir$i || error "$mkdir_cmd subdir$i failed" @@ -23712,15 +24546,24 @@ test_qos_mkdir() { count=$($LFS getdirstripe -i $testdir/* | grep ^$((i - 1))$ | wc -l) echo "$count directories created on MDT$((i - 1))" - [ $count -eq 100 ] || error "subdirs are not evenly distributed" + if $test_mkdir_rr; then + (( $count == 100 )) || + error "subdirs are not evenly distributed" + elif [ $((i - 1)) -eq $stripe_index ]; then + (( $count == 100 * MDSCOUNT )) || + error "$count subdirs created on MDT$((i - 1))" + else + (( $count == 0 )) || + error "$count subdirs created on MDT$((i - 1))" + fi - if [ $stripe_count -gt 1 ]; then + if $test_mkdir_rr && [ $stripe_count -gt 1 ]; then count=$($LFS getdirstripe $testdir/* | grep -P "^\s+$((i - 1))\t" | wc -l) echo "$count stripes created on MDT$((i - 1))" # deviation should < 5% of average - [ $count -lt $((95 * stripe_count)) ] || - [ $count -gt $((105 * stripe_count)) ] && + (( $count < 95 * stripe_count )) || + (( $count > 105 * stripe_count)) && error "stripes are not evenly distributed" fi done @@ -23760,10 +24603,10 @@ test_qos_mkdir() { fi done - [ ${ffree[min_index]} -eq 0 ] && + (( ${ffree[min_index]} == 0 )) && skip "no free files in MDT$min_index" - [ ${ffree[min_index]} -gt 100000000 ] && - skip "too much free files in MDT$min_index" + (( ${ffree[min_index]} > 100000000 )) && + skip "too many free files in MDT$min_index" # Check if we need to generate uneven MDTs local threshold=50 @@ -23775,20 +24618,14 @@ test_qos_mkdir() { echo -n "weight diff=$diff% must be > $threshold% ..." count=$((${ffree[min_index]} / 10)) # 50 sec per 10000 files in vm - [ $count -gt 40000 ] && [ "$SLOW" = "no" ] && + (( $count < 100000 )) || [ "$SLOW" != "no" ] || skip "$count files to create" echo "Fill MDT$min_index with $count files" [ -d $DIR/$tdir-MDT$min_index ] || $LFS mkdir -i $min_index $DIR/$tdir-MDT$min_index || error "mkdir $tdir-MDT$min_index failed" - for i in $(seq $count); do - $OPENFILE -f O_CREAT:O_LOV_DELAY_CREATE \ - $DIR/$tdir-MDT$min_index/f$j_$i > /dev/null || - error "create f$j_$i failed" - setfattr -n user.413b -v $value \ - $DIR/$tdir-MDT$min_index/f$j_$i || - error "setfattr f$j_$i failed" - done + createmany -d $DIR/$tdir-MDT$min_index/d $count || + error "create d$count failed" ffree=($(lctl get_param -n mdc.*[mM][dD][cC]-*.filesfree)) bavail=($(lctl get_param -n mdc.*[mM][dD][cC]-*.kbytesavail)) @@ -23837,7 +24674,7 @@ test_qos_mkdir() { min=$($LFS getdirstripe -i $testdir/* | grep ^$min_index$ | wc -l) # D-value should > 10% of averge - [ $((max - min)) -lt 10 ] && + (( $max - $min < 10 )) && error "subdirs shouldn't be evenly distributed" # ditto @@ -23846,7 +24683,7 @@ test_qos_mkdir() { grep -P "^\s+$max_index\t" | wc -l) min=$($LFS getdirstripe $testdir/* | grep -P "^\s+$min_index\t" | wc -l) - [ $((max - min)) -le $((10 * stripe_count)) ] && + (( $max - $min < 10 * $stripe_count )) && error "stripes shouldn't be evenly distributed"|| true fi } @@ -23876,23 +24713,64 @@ test_413b() { [ $MDS1_VERSION -lt $(version_code 2.12.52) ] && skip "Need server version at least 2.12.52" + local testdir local stripe_count for stripe_count in $(seq 1 $((MDSCOUNT - 1))); do - mkdir $DIR/$tdir-s$stripe_count || error "mkdir failed" - mkdir $DIR/$tdir-s$stripe_count/rr || error "mkdir failed" - mkdir $DIR/$tdir-s$stripe_count/qos || error "mkdir failed" - $LFS setdirstripe -D -c $stripe_count \ - $DIR/$tdir-s$stripe_count/rr || - error "setdirstripe failed" - $LFS setdirstripe -D -c $stripe_count \ - $DIR/$tdir-s$stripe_count/qos || + testdir=$DIR/$tdir-s$stripe_count + mkdir $testdir || error "mkdir $testdir failed" + mkdir $testdir/rr || error "mkdir rr failed" + mkdir $testdir/qos || error "mkdir qos failed" + $LFS setdirstripe -D -c $stripe_count --max-inherit-rr 2 \ + $testdir/rr || error "setdirstripe rr failed" + $LFS setdirstripe -D -c $stripe_count $testdir/qos || error "setdirstripe failed" test_qos_mkdir "mkdir" $stripe_count done } run_test 413b "QoS mkdir under dir whose default LMV starting MDT offset is -1" +test_413c() { + [ $MDSCOUNT -ge 2 ] || + skip "We need at least 2 MDTs for this test" + + [ $MDS1_VERSION -ge $(version_code 2.14.51) ] || + skip "Need server version at least 2.14.50" + + local testdir + local inherit + local inherit_rr + + testdir=$DIR/${tdir}-s1 + mkdir $testdir || error "mkdir $testdir failed" + mkdir $testdir/rr || error "mkdir rr failed" + mkdir $testdir/qos || error "mkdir qos failed" + # default max_inherit is -1, default max_inherit_rr is 0 + $LFS setdirstripe -D -c 1 $testdir/rr || + error "setdirstripe rr failed" + $LFS setdirstripe -D -c 1 -X 2 --max-inherit-rr 1 $testdir/qos || + error "setdirstripe qos failed" + test_qos_mkdir "mkdir" 1 + + mkdir $testdir/rr/level1 || error "mkdir rr/level1 failed" + inherit=$($LFS getdirstripe -D -X $testdir/rr/level1) + (( $inherit == -1 )) || error "rr/level1 inherit $inherit != -1" + inherit_rr=$($LFS getdirstripe -D --max-inherit-rr $testdir/rr/level1) + (( $inherit_rr == 0 )) || + error "rr/level1 inherit-rr $inherit_rr != 0" + + mkdir $testdir/qos/level1 || error "mkdir qos/level1 failed" + inherit=$($LFS getdirstripe -D -X $testdir/qos/level1) + (( $inherit == 1 )) || error "qos/level1 inherit $inherit != 1" + inherit_rr=$($LFS getdirstripe -D --max-inherit-rr $testdir/qos/level1) + (( $inherit_rr == 0 )) || + error "qos/level1 inherit-rr $inherit_rr !=0" + mkdir $testdir/qos/level1/level2 || error "mkdir level2 failed" + getfattr -d -m dmv $testdir/qos/level1/level2 | grep dmv && + error "level2 shouldn't have default LMV" || true +} +run_test 413c "mkdir with default LMV max inherit rr" + test_414() { #define OBD_FAIL_PTLRPC_BULK_ATTACH 0x521 $LCTL set_param fail_loc=0x80000521 @@ -24527,6 +25405,52 @@ test_428() { } run_test 428 "large block size IO should not hang" +test_429() { # LU-7915 / LU-10948 + local ll_opencache_threshold_count="llite.*.opencache_threshold_count" + local testfile=$DIR/$tfile + local mdc_rpcstats="mdc.$FSNAME-MDT0000-*.stats" + local new_flag=1 + local first_rpc + local second_rpc + local third_rpc + + $LCTL get_param $ll_opencache_threshold_count || + skip "client does not have opencache parameter" + + set_opencache $new_flag + stack_trap "restore_opencache" + [ $($LCTL get_param -n $ll_opencache_threshold_count) == $new_flag ] || + error "enable opencache failed" + touch $testfile + # drop MDC DLM locks + cancel_lru_locks mdc + # clear MDC RPC stats counters + $LCTL set_param $mdc_rpcstats=clear + + # According to the current implementation, we need to run 3 times + # open & close file to verify if opencache is enabled correctly. + # 1st, RPCs are sent for lookup/open and open handle is released on + # close finally. + # 2nd, RPC is sent for open, MDS_OPEN_LOCK is fetched automatically, + # so open handle won't be released thereafter. + # 3rd, No RPC is sent out. + $MULTIOP $testfile oc || error "multiop failed" + first_rpc=$(calc_stats $mdc_rpcstats ldlm_ibits_enqueue) + echo "1st: $first_rpc RPCs in flight" + + $MULTIOP $testfile oc || error "multiop failed" + second_rpc=$(calc_stats $mdc_rpcstats ldlm_ibits_enqueue) + echo "2nd: $second_rpc RPCs in flight" + + $MULTIOP $testfile oc || error "multiop failed" + third_rpc=$(calc_stats $mdc_rpcstats ldlm_ibits_enqueue) + echo "3rd: $third_rpc RPCs in flight" + + #verify no MDC RPC is sent + [[ $second_rpc == $third_rpc ]] || error "MDC RPC is still sent" +} +run_test 429 "verify if opencache flag on client side does work" + lseek_test_430() { local offset local file=$1 @@ -25087,7 +26011,7 @@ test_803a() { [ $MDS1_VERSION -lt $(version_code 2.10.54) ] && skip "MDS needs to be newer than 2.10.54" - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir # Create some objects on all MDTs to trigger related logs objects for idx in $(seq $MDSCOUNT); do $LFS mkdir -c $MDSCOUNT -i $((idx % $MDSCOUNT)) \ @@ -25282,7 +26206,7 @@ test_805() { fi do_facet $SINGLEMDS zfs set quota=$(((usedkb+freekb)*1024)) $fsset trap cleanup_805 EXIT - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir $LFS setstripe -E 1M -c2 -E 4M -c2 -E -1 -c2 $DIR/$tdir || error "Can't set PFL layout" createmany -m $DIR/$tdir/f- 1000000 && error "ENOSPC wasn't met" @@ -25296,11 +26220,9 @@ run_test 805 "ZFS can remove from full fs" check_lsom_data() { local file=$1 - local size=$($LFS getsom -s $file) local expect=$(stat -c %s $file) - [[ $size == $expect ]] || - error "$file expected size: $expect, got: $size" + check_lsom_size $1 $expect local blocks=$($LFS getsom -b $file) expect=$(stat -c %b $file) @@ -25310,9 +26232,12 @@ check_lsom_data() check_lsom_size() { - local size=$($LFS getsom -s $1) + local size local expect=$2 + cancel_lru_locks mdc + + size=$($LFS getsom -s $1) [[ $size == $expect ]] || error "$file expected size: $expect, got: $size" } @@ -25429,7 +26354,7 @@ test_807() { stack_trap "restore_lustre_params < $save; rm -f $save" EXIT rm -rf $DIR/$tdir || error "rm $tdir failed" - mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed" touch $DIR/$tdir/trunc || error "touch $tdir/trunc failed" $TRUNCATE $DIR/$tdir/trunc 1024 || error "truncate $tdir/trunc failed" $TRUNCATE $DIR/$tdir/trunc 1048576 || @@ -25601,6 +26526,28 @@ test_812b() { # LU-12378 } run_test 812b "do not drop no resend request for idle connect" +test_812c() { + local old + + old=$($LCTL get_param -n osc.*.idle_timeout | head -n 1) + + $LFS setstripe -c 1 -o 0 $DIR/$tfile + $LFS getstripe $DIR/$tfile + $LCTL set_param osc.*.idle_timeout=10 + stack_trap "$LCTL set_param osc.*.idle_timeout=$old" EXIT + # ensure ost1 is connected + stat $DIR/$tfile >/dev/null || error "can't stat" + wait_osc_import_state client ost1 FULL + # no locks, no reqs to let the connection idle + cancel_lru_locks osc + +#define OBD_FAIL_PTLRPC_IDLE_RACE 0x533 + $LCTL set_param fail_loc=0x80000533 + sleep 15 + dd if=/dev/zero of=$DIR/$tfile count=1 conv=sync || error "dd failed" +} +run_test 812c "idle import vs lock enqueue race" + test_813() { local file_heat_sav=$($LCTL get_param -n llite.*.file_heat 2>/dev/null) [ -z "$file_heat_sav" ] && skip "no file heat support"