X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=0f3c56aa47acd40e2468d0bf97b47c47d5636c0d;hb=d775f9ae37975c853984b67f6d1a21e6ec8a8c3d;hp=cfe0193d516f338adfeaaabdb8e4ecf067150aab;hpb=c4be7bab2f06e0db045f940c51a1ecd632ad4fe8;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index cfe0193..0f3c56a 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -77,8 +77,8 @@ if (( $LINUX_VERSION_CODE >= $(version_code 4.18.0) && ALWAYS_EXCEPT+=" 411" fi -# 5 12 8 12 (min)" -[ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 64b 68 71 115 135 136 300o" +# 5 12 8 12 (min)" +[ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 60i 64b 68 71 115 135 136 300o" if [ "$mds1_FSTYPE" = "zfs" ]; then # bug number for skipped test: @@ -162,7 +162,7 @@ check_and_setup_lustre DIR=${DIR:-$MOUNT} assert_DIR -MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} +MAXFREE=${MAXFREE:-$((300000 * $OSTCOUNT))} [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo [ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo @@ -1835,7 +1835,7 @@ __exhaust_precreations() { local FAILIDX=${3:-$OSTIDX} local ofacet=ost$((OSTIDX + 1)) - test_mkdir -p -c1 $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local mdtidx=$($LFS getstripe -m $DIR/$tdir) local mfacet=mds$((mdtidx + 1)) echo OSTIDX=$OSTIDX MDTIDX=$mdtidx @@ -1991,7 +1991,7 @@ test_27q() { reset_enospc rm -f $DIR/$tdir/$tfile - test_mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir $MCREATE $DIR/$tdir/$tfile || error "mcreate $DIR/$tdir/$tfile failed" $TRUNCATE $DIR/$tdir/$tfile 80000000 || error "truncate $DIR/$tdir/$tfile failed" @@ -2755,7 +2755,8 @@ test_27I() { save_layout_restore_at_exit $MOUNT $LFS setstripe -c 2 -i 0 $MOUNT pool_add $pool || error "pool_add failed" - pool_add_targets $pool $ostrange || "pool_add_targets failed" + pool_add_targets $pool $ostrange || + error "pool_add_targets failed" test_mkdir $DIR/$tdir $LFS setstripe -p $pool $DIR/$tdir $MULTIOP $DIR/$tdir/$tfile Oc || error "multiop failed" @@ -4766,7 +4767,7 @@ test_39l() { local atime_diff=$(do_facet $SINGLEMDS \ lctl get_param -n mdd.*MDT0000*.atime_diff) rm -rf $DIR/$tdir - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir # test setting directory atime to future touch -a -d @$TEST_39_ATIME $DIR/$tdir @@ -6665,6 +6666,36 @@ test_56rb() { } run_test 56rb "check lfs find --size --ost/--mdt works" +test_56rc() { + (( MDSCOUNT >= 2 )) || skip "needs at least 2 MDTs" + local dir=$DIR/$tdir + local found + + test_mkdir -c 2 -H all_char $dir || error "failed to mkdir $dir" + $LFS mkdir -c 2 --mdt-hash all_char $dir/$tdir-all{1..10} + (( $MDSCOUNT > 2 )) && + $LFS mkdir -c 3 --mdt-hash fnv_1a_64 $dir/$tdir-fnv{1..10} + mkdir $dir/$tdir-{1..10} + touch $dir/$tfile-{1..10} + + found=$($LFS find $dir --mdt-count 2 | wc -l) + expect=11 + (( $found == $expect )) || error "found $found 2-stripe, expect $expect" + + found=$($LFS find $dir -T +1 | wc -l) + (( $MDSCOUNT > 2 )) && expect=$((expect + 10)) + (( $found == $expect )) || error "found $found 2+stripe, expect $expect" + + found=$($LFS find $dir --mdt-hash all_char | wc -l) + expect=11 + (( $found == $expect )) || error "found $found all_char, expect $expect" + + found=$($LFS find $dir --mdt-hash fnv_1a_64 | wc -l) + (( $MDSCOUNT > 2 )) && expect=10 || expect=0 + (( $found == $expect )) || error "found $found all_char, expect $expect" +} +run_test 56rc "check lfs find --mdt-count/--mdt-hash works" + test_56s() { # LU-611 #LU-9369 [[ $OSTCOUNT -lt 2 ]] && skip_env "need at least 2 OSTs" @@ -8268,6 +8299,34 @@ test_60h() { } run_test 60h "striped directory with missing stripes can be accessed" +function t60i_load() { + mkdir $DIR/$tdir + #define OBD_FAIL_LLOG_PAUSE_AFTER_PAD 0x131c + $LCTL set_param fail_loc=0x131c fail_val=1 + for ((i=0; i<5000; i++)); do + touch $DIR/$tdir/f$i + done +} + +test_60i() { + changelog_register || error "changelog_register failed" + local cl_user="${CL_USERS[$SINGLEMDS]%% *}" + changelog_users $SINGLEMDS | grep -q $cl_user || + error "User $cl_user not found in changelog_users" + changelog_chmask "ALL" + t60i_load & + local PID=$! + for((i=0; i<100; i++)); do + changelog_dump >/dev/null || + error "can't read changelog" + done + kill $PID + wait $PID + changelog_deregister || error "changelog_deregister failed" + $LCTL set_param fail_loc=0 +} +run_test 60i "llog: new record vs reader race" + test_61a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -8821,8 +8880,7 @@ test_65n() { which getfattr > /dev/null 2>&1 || skip_env "no getfattr command" which setfattr > /dev/null 2>&1 || skip_env "no setfattr command" - local root_layout=$(save_layout $MOUNT) - stack_trap "restore_layout $MOUNT $root_layout" EXIT + save_layout_restore_at_exit $MOUNT # new subdirectory under root directory should not inherit # the default layout from root @@ -9391,6 +9449,7 @@ test_77d() { # bug 10889 [ $PARALLEL == "yes" ] && skip "skip parallel run" $GSS && skip_env "could not run with gss" + stack_trap "rm -f $DIR/$tfile" #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 $LCTL set_param fail_loc=0x80000409 set_checksums 1 @@ -9415,6 +9474,7 @@ test_77f() { # bug 10889 $GSS && skip_env "could not run with gss" set_checksums 1 + stack_trap "rm -f $DIR/$tfile" for algo in $CKSUM_TYPES; do cancel_lru_locks osc set_checksum_type $algo @@ -9536,6 +9596,20 @@ run_test 77l "preferred checksum type is remembered after reconnected" rm -f $F77_TMP unset F77_TMP +test_77m() { + (( $CLIENT_VERSION >= $(version_code 2.14.52) )) || + skip "Need at least version 2.14.52" + local param=checksum_speed + + $LCTL get_param $param || error "reading $param failed" + + csum_speeds=$($LCTL get_param -n $param) + + [[ "$csum_speeds" =~ "adler32" && "$csum_speeds" =~ "crc32" ]] || + error "known checksum types are missing" +} +run_test 77m "Verify checksum_speed is correctly read" + cleanup_test_78() { trap 0 rm -f $DIR/$tfile @@ -13827,7 +13901,7 @@ test_134a() { [[ $MDS1_VERSION -lt $(version_code 2.7.54) ]] && skip "Need MDS version at least 2.7.54" - mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" + mkdir_on_mdt0 $DIR/$tdir || error "failed to create $DIR/$tdir" cancel_lru_locks mdc local nsdir="ldlm.namespaces.*-MDT0000-mdc-*" @@ -13863,7 +13937,7 @@ test_134b() { [[ $MDS1_VERSION -lt $(version_code 2.7.54) ]] && skip "Need MDS version at least 2.7.54" - mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" + mkdir_on_mdt0 $DIR/$tdir || error "failed to create $DIR/$tdir" cancel_lru_locks mdc local low_wm=$(do_facet mds1 $LCTL get_param -n \ @@ -15224,6 +15298,8 @@ test_160a() { changelog_users $SINGLEMDS | grep -q $cl_user || error "User $cl_user not found in changelog_users" + mkdir_on_mdt0 $DIR/$tdir + # change something test_mkdir -p $DIR/$tdir/pics/2008/zachy changelog_clear 0 || error "changelog_clear failed" @@ -15234,8 +15310,6 @@ test_160a() { ln -s $DIR/$tdir/pics/2008/portland.jpg $DIR/$tdir/pics/desktop.jpg rm $DIR/$tdir/pics/desktop.jpg - changelog_dump | tail -10 - echo "verifying changelog mask" changelog_chmask "-MKDIR" changelog_chmask "-CLOSE" @@ -15249,7 +15323,6 @@ test_160a() { test_mkdir -p $DIR/$tdir/pics/2008/sofia # mkdir 1 echo "zzzzzz" > $DIR/$tdir/pics/zach/file # open 3 - changelog_dump | tail -10 MKDIRS=$(changelog_dump | grep -c "MKDIR") CLOSES=$(changelog_dump | grep -c "CLOSE") [ $MKDIRS -eq 1 ] || error "MKDIR changelog mask count $MKDIRS != 1" @@ -15312,10 +15385,10 @@ test_160a() { error "User '$cl_user' still in changelog_users" # lctl get_param -n mdd.*.changelog_users - # current index: 144 + # current_index: 144 # ID index (idle seconds) - # cl3 144 (2) - if ! changelog_users $SINGLEMDS | grep "^cl"; then + # cl3 144 (2) mask= + if [ -z "$(changelog_users $SINGLEMDS | grep -v current.index)" ]; then # this is the normal case where all users were deregistered # make sure no new records are added when no users are present local last_rec1=$(changelog_users $SINGLEMDS | @@ -16134,6 +16207,107 @@ test_160n() { } run_test 160n "Changelog destroy race" +test_160o() { + local mdt="$(facet_svc $SINGLEMDS)" + + [[ $PARALLEL != "yes" ]] || skip "skip parallel run" + remote_mds_nodsh && skip "remote MDS with nodsh" + [ $MDS1_VERSION -ge $(version_code 2.14.52) ] || + skip "Need MDS version at least 2.14.52" + + changelog_register --user test_160o -m unlnk+close+open || + error "changelog_register failed" + # drop server mask so it doesn't interfere + do_facet $SINGLEMDS $LCTL --device $mdt \ + changelog_register -u "Tt3_-#" && + error "bad symbols in name should fail" + + do_facet $SINGLEMDS $LCTL --device $mdt \ + changelog_register -u test_160o && + error "the same name registration should fail" + + do_facet $SINGLEMDS $LCTL --device $mdt \ + changelog_register -u test_160toolongname && + error "too long name registration should fail" + + changelog_chmask "MARK+HSM" + lctl get_param mdd.*.changelog*mask + local cl_user="${CL_USERS[$SINGLEMDS]%% *}" + changelog_users $SINGLEMDS | grep -q $cl_user || + error "User $cl_user not found in changelog_users" + #verify username + echo $cl_user | grep -q test_160o || + error "User $cl_user has no specific name 'test160o'" + + # change something + changelog_clear 0 || error "changelog_clear failed" + # generate some changelog records to accumulate on MDT0 + test_mkdir -p -i0 -c1 $DIR/$tdir || error "test_mkdir $tdir failed" + touch $DIR/$tdir/$tfile # open 1 + + OPENS=$(changelog_dump | grep -c "OPEN") + [[ $OPENS -eq 1 ]] || error "OPEN changelog mask count $OPENS != 1" + + # must be no MKDIR it wasn't set as user mask + MKDIR=$(changelog_dump | grep -c "MKDIR") + [[ $MKDIR -eq 0 ]] || error "MKDIR changelog mask found $MKDIR > 0" + + oldmask=$(do_facet $SINGLEMDS $LCTL get_param \ + mdd.$mdt.changelog_current_mask -n) + # register maskless user + changelog_register || error "changelog_register failed" + # effective mask should be not changed because it is not minimal + mask=$(do_facet $SINGLEMDS $LCTL get_param \ + mdd.$mdt.changelog_current_mask -n) + [[ $mask == $oldmask ]] || error "mask was changed: $mask vs $oldmask" + # set server mask to minimal value + changelog_chmask "MARK" + # check effective mask again, should be treated as DEFMASK now + mask=$(do_facet $SINGLEMDS $LCTL get_param \ + mdd.$mdt.changelog_current_mask -n) + [[ $mask == *"HLINK"* ]] || error "mask is not DEFMASK as expected" + + do_facet $SINGLEMDS $LCTL --device $mdt \ + changelog_deregister -u test_160o || + error "cannot deregister by name" +} +run_test 160o "changelog user name and mask" + +test_160p() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [[ $MDS1_VERSION -ge $(version_code 2.14.51) ]] || + skip "Need MDS version at least 2.14.51" + [[ "$mds1_FSTYPE" == "ldiskfs" ]] || skip "ldiskfs only test" + local cl_users + local cl_user1 + local entry_count + + # Create a user + changelog_register || error "first changelog_register failed" + + cl_users=(${CL_USERS[mds1]}) + cl_user1="${cl_users[0]}" + + test_mkdir -p -i0 -c1 $DIR/$tdir || error "test_mkdir $tdir failed" + createmany -m $DIR/$tdir/$tfile 50 || + error "create $DIR/$tdir/$tfile failed" + unlinkmany $DIR/$tdir/$tfile 50 || error "unlinkmany failed" + rm -rf $DIR/$tdir + + # check changelogs have been generated + entry_count=$(changelog_dump | wc -l) + ((entry_count != 0)) || error "no changelog entries found" + + # remove changelog_users and check that orphan entries are removed + stop mds1 + do_facet mds1 "$DEBUGFS -w -R 'rm changelog_users' $(mdsdevname 1)" + start mds1 || error "cannot start mdt" + entry_count=$(changelog_dump | wc -l) + ((entry_count == 0)) || + error "found $entry_count changelog entries, expected none" +} +run_test 160p "Changelog orphan cleanup with no users" + test_161a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -17095,7 +17269,7 @@ test_183() { # LU-2275 [[ $MDS1_VERSION -lt $(version_code 2.3.56) ]] && skip "Need MDS version at least 2.3.56" - mkdir -p $DIR/$tdir || error "creating dir $DIR/$tdir" + mkdir_on_mdt0 $DIR/$tdir || error "creating dir $DIR/$tdir" echo aaa > $DIR/$tdir/$tfile #define OBD_FAIL_MDS_NEGATIVE_POSITIVE 0x148 @@ -17687,7 +17861,7 @@ test_205a() { # Job stats local cmd # mkdir - cmd="mkdir $DIR/$tdir" + cmd="$LFS mkdir -i 0 -c 1 $DIR/$tdir" verify_jobstats "$cmd" "$SINGLEMDS" # rmdir cmd="rmdir $DIR/$tdir" @@ -17728,7 +17902,7 @@ test_205a() { # Job stats [ $left -ge 0 ] && wait_update_facet $SINGLEMDS \ "lctl get_param *.*.job_stats | grep -c 'job_id.*mkdir'" \ "0" $left - cmd="mkdir $DIR/$tdir.expire" + cmd="$LFS mkdir -i 0 -c 1 $DIR/$tdir.expire" verify_jobstats "$cmd" "$SINGLEMDS" [ $(do_facet $SINGLEMDS lctl get_param *.*.job_stats | grep -c "job_id.*mkdir") -gt 1 ] && error "old jobstats not expired" @@ -17781,6 +17955,9 @@ run_test 205a "Verify job stats" # LU-13117, LU-13597 test_205b() { + (( $MDS1_VERSION >= $(version_code 2.13.54.91) )) || + skip "Need MDS version at least 2.13.54.91" + job_stats="mdt.*.job_stats" $LCTL set_param $job_stats=clear # Setting jobid_var to USER might not be supported @@ -18219,7 +18396,7 @@ test_220() { #LU-325 local OSTIDX=0 # create on MDT0000 so the last_id and next_id are correct - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir local OST=$($LFS df $DIR | awk '/OST:'$OSTIDX'/ { print $1 }') OST=${OST%_UUID} @@ -19099,7 +19276,7 @@ test_230e() { local a_fid local b_fid - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir mkdir $DIR/$tdir/migrate_dir mkdir $DIR/$tdir/other_dir touch $DIR/$tdir/migrate_dir/a @@ -19951,7 +20128,7 @@ run_test 241b "dio vs dio" test_242() { remote_mds_nodsh && skip "remote MDS with nodsh" - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir touch $DIR/$tdir/$tfile #define OBD_FAIL_MDS_READPAGE_PACK 0x105 @@ -20143,7 +20320,7 @@ test_247f() { grep -q subtree || skip "Fileset feature is not supported" - mkdir $DIR/$tdir || error "mkdir $tdir failed" + mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed" $LFS mkdir -i $((MDSCOUNT - 1)) $DIR/$tdir/remote || error "mkdir remote failed" mkdir $DIR/$tdir/remote/subdir || error "mkdir remote/subdir failed" @@ -20670,6 +20847,7 @@ test_255a() { skip "lustre < 2.8.54 does not support ladvise " remote_ost_nodsh && skip "remote OST with nodsh" + stack_trap "rm -f $DIR/$tfile" lfs setstripe -c -1 -i 0 $DIR/$tfile || error "$tfile failed" ladvise_no_type willread $DIR/$tfile && @@ -20757,6 +20935,7 @@ test_255b() { skip "lustre < 2.8.54 does not support ladvise " remote_ost_nodsh && skip "remote OST with nodsh" + stack_trap "rm -f $DIR/$tfile" lfs setstripe -c 1 -i 0 $DIR/$tfile ladvise_no_type dontneed $DIR/$tfile && @@ -20919,7 +21098,7 @@ test_256() { changelog_register || error "changelog_register failed" rm -rf $DIR/$tdir - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir changelog_clear 0 || error "changelog_clear failed" @@ -21070,7 +21249,7 @@ test_270a() { local dom=$DIR/$tdir/dom_file local tmp=$DIR/$tdir/tmp_file - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir # basic checks for DoM component creation $LFS setstripe -E 1024K -E 2048K -L mdt $dom 2>/dev/null && @@ -22209,7 +22388,7 @@ test_300c() { local file_count - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir $LFS setdirstripe -i 0 -c 2 $DIR/$tdir/striped_dir || error "set striped dir error" @@ -22419,7 +22598,7 @@ test_300g() { local stripe_count local stripe_index - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir mkdir $DIR/$tdir/normal_dir #Checking when client cache stripe index @@ -22759,7 +22938,7 @@ test_300p() { [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs" remote_mds_nodsh && skip "remote MDS with nodsh" - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir #define OBD_FAIL_OUT_ENOSPC 0x1704 do_facet mds2 lctl set_param fail_loc=0x80001704 @@ -23464,6 +23643,277 @@ test_398f() { # LU-14687 } run_test 398f "verify aio handles ll_direct_rw_pages errors correctly" +# NB: To get the parallel DIO behavior in LU-13798, there must be > 1 +# stripe and i/o size must be > stripe size +# Old style synchronous DIO waits after submitting each chunk, resulting in a +# single RPC in flight. This test shows async DIO submission is working by +# showing multiple RPCs in flight. +test_398g() { # LU-13798 + $LFS setstripe -o 0,0 -S 1M $DIR/$tfile + + # We need to do some i/o first to acquire enough grant to put our RPCs + # in flight; otherwise a new connection may not have enough grant + # available + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=1 oflag=direct || + error "parallel dio failed" + stack_trap "rm -f $DIR/$tfile" + + # Reduce RPC size to 1M to avoid combination in to larger RPCs + local pages_per_rpc=$($LCTL get_param osc.*-OST0000-*.max_pages_per_rpc) + $LCTL set_param osc.*-OST0000-*.max_pages_per_rpc=1M + stack_trap "$LCTL set_param -n $pages_per_rpc" + + # Recreate file so it's empty + rm -f $DIR/$tfile + $LFS setstripe -o 0,0 -S 1M $DIR/$tfile + #Pause rpc completion to guarantee we see multiple rpcs in flight + #define OBD_FAIL_OST_BRW_PAUSE_BULK + do_facet ost1 $LCTL set_param fail_loc=0x214 fail_val=2 + stack_trap "do_facet ost1 $LCTL set_param fail_loc=0" + + # Clear rpc stats + $LCTL set_param osc.*.rpc_stats=c + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=1 oflag=direct || + error "parallel dio failed" + stack_trap "rm -f $DIR/$tfile" + + $LCTL get_param osc.*-OST0000-*.rpc_stats + pct=$($LCTL get_param osc.*-OST0000-*.rpc_stats | + grep -A 8 'rpcs in flight' | grep -v 'rpcs in flight' | + grep "8:" | awk '{print $8}') + # We look at the "8 rpcs in flight" field, and verify A) it is present + # and B) it includes all RPCs. This proves we had 8 RPCs in flight, + # as expected for an 8M DIO to a file with 1M stripes. + [ $pct -eq 100 ] || error "we should see 8 RPCs in flight" + + # Verify turning off parallel dio works as expected + # Clear rpc stats + $LCTL set_param osc.*.rpc_stats=c + $LCTL set_param llite.*.parallel_dio=0 + stack_trap '$LCTL set_param llite.*.parallel_dio=1' + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=1 oflag=direct || + error "dio with parallel dio disabled failed" + + # Ideally, we would see only one RPC in flight here, but there is an + # unavoidable race between i/o completion and RPC in flight counting, + # so while only 1 i/o is in flight at a time, the RPC in flight counter + # will sometimes exceed 1 (3 or 4 is not rare on VM testing). + # So instead we just verify it's always < 8. + $LCTL get_param osc.*-OST0000-*.rpc_stats + ret=$($LCTL get_param osc.*-OST0000-*.rpc_stats | + grep -A 8 'rpcs in flight' | grep -v 'rpcs in flight' | + grep '^$' -B1 | grep . | awk '{print $1}') + [ $ret != "8:" ] || + error "we should see fewer than 8 RPCs in flight (saw $ret)" +} +run_test 398g "verify parallel dio async RPC submission" + +test_398h() { # LU-13798 + local dio_file=$DIR/$tfile.dio + + $LFS setstripe -C 2 -S 1M $DIR/$tfile $dio_file + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct + stack_trap "rm -f $DIR/$tfile $dio_file" + + dd if=$DIR/$tfile of=$dio_file bs=8M count=8 iflag=direct oflag=direct || + error "parallel dio failed" + diff $DIR/$tfile $dio_file + [[ $? == 0 ]] || error "file diff after aiocp" +} +run_test 398h "verify correctness of read & write with i/o size >> stripe size" + +test_398i() { # LU-13798 + local dio_file=$DIR/$tfile.dio + + $LFS setstripe -C 2 -S 1M $DIR/$tfile $dio_file + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct + stack_trap "rm -f $DIR/$tfile $dio_file" + + #define OBD_FAIL_LLITE_PAGE_ALLOC 0x1418 + $LCTL set_param fail_loc=0x1418 + # make sure we don't crash and fail properly + dd if=$DIR/$tfile of=$dio_file bs=8M count=8 iflag=direct oflag=direct && + error "parallel dio page allocation failure succeeded" + diff $DIR/$tfile $dio_file + [[ $? != 0 ]] || error "no diff after failed aiocp" +} +run_test 398i "verify parallel dio handles ll_direct_rw_pages errors correctly" + +test_398j() { # LU-13798 + # Stripe size > RPC size but less than i/o size tests split across + # stripes and RPCs for individual i/o op + $LFS setstripe -o 0,0 -S 4M $DIR/$tfile $DIR/$tfile.2 + + # Reduce RPC size to 1M to guarantee split to multiple RPCs per stripe + local pages_per_rpc=$($LCTL get_param osc.*-OST0000-*.max_pages_per_rpc) + $LCTL set_param osc.*-OST0000-*.max_pages_per_rpc=1M + stack_trap "$LCTL set_param -n $pages_per_rpc" + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct || + error "parallel dio write failed" + stack_trap "rm -f $DIR/$tfile $DIR/$tfile.2" + + dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 iflag=direct || + error "parallel dio read failed" + diff $DIR/$tfile $DIR/$tfile.2 + [[ $? == 0 ]] || error "file diff after parallel dio read" +} +run_test 398j "test parallel dio where stripe size > rpc_size" + +test_398k() { # LU-13798 + wait_delete_completed + wait_mds_ost_sync + + # 4 stripe file; we will cause out of space on OST0 + $LFS setstripe -o 0,1,0,1 -S 1M $DIR/$tfile + + # Fill OST0 (if it's not too large) + ORIGFREE=$($LCTL get_param -n lov.$FSNAME-clilov-*.kbytesavail | + head -n1) + if [[ $ORIGFREE -gt $MAXFREE ]]; then + skip "$ORIGFREE > $MAXFREE skipping out-of-space test on OST0" + fi + $LFS setstripe -i 0 -c 1 $DIR/$tfile.1 + dd if=/dev/zero of=$DIR/$tfile.1 bs=1024 count=$MAXFREE && + error "dd should fill OST0" + stack_trap "rm -f $DIR/$tfile.1" + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct + err=$? + + ls -la $DIR/$tfile + $CHECKSTAT -t file -s 0 $DIR/$tfile || + error "file is not 0 bytes in size" + + # dd above should not succeed, but don't error until here so we can + # get debug info above + [[ $err != 0 ]] || + error "parallel dio write with enospc succeeded" + stack_trap "rm -f $DIR/$tfile" +} +run_test 398k "test enospc on first stripe" + +test_398l() { # LU-13798 + wait_delete_completed + wait_mds_ost_sync + + # 4 stripe file; we will cause out of space on OST0 + # Note the 1M stripe size and the > 1M i/o size mean this ENOSPC + # happens on the second i/o chunk we issue + $LFS setstripe -o 1,0,1,0 -S 1M $DIR/$tfile $DIR/$tfile.2 + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=2 oflag=direct + stack_trap "rm -f $DIR/$tfile" + + # Fill OST0 (if it's not too large) + ORIGFREE=$($LCTL get_param -n lov.$FSNAME-clilov-*.kbytesavail | + head -n1) + if [[ $ORIGFREE -gt $MAXFREE ]]; then + skip "$ORIGFREE > $MAXFREE skipping out-of-space test on OST0" + fi + $LFS setstripe -i 0 -c 1 $DIR/$tfile.1 + dd if=/dev/zero of=$DIR/$tfile.1 bs=1024 count=$MAXFREE && + error "dd should fill OST0" + stack_trap "rm -f $DIR/$tfile.1" + + dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 oflag=direct + err=$? + stack_trap "rm -f $DIR/$tfile.2" + + # Check that short write completed as expected + ls -la $DIR/$tfile.2 + $CHECKSTAT -t file -s 1048576 $DIR/$tfile.2 || + error "file is not 1M in size" + + # dd above should not succeed, but don't error until here so we can + # get debug info above + [[ $err != 0 ]] || + error "parallel dio write with enospc succeeded" + + # Truncate source file to same length as output file and diff them + $TRUNCATE $DIR/$tfile 1048576 + diff $DIR/$tfile $DIR/$tfile.2 + [[ $? == 0 ]] || error "data incorrect after short write" +} +run_test 398l "test enospc on intermediate stripe/RPC" + +test_398m() { # LU-13798 + $LFS setstripe -o 0,1,0,1 -S 1M $DIR/$tfile + + lctl set_param *debug=-1 debug_mb=10000 + + # Set up failure on OST0, the first stripe: + #define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e + #NB: Fail val is ost # + 1, because we cannot use cfs_fail_val = 0 + # So this fail_val specifies OST0 + do_facet ost1 $LCTL set_param fail_loc=0x20e fail_val=1 + stack_trap "do_facet ost1 $LCTL set_param fail_loc=0" + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct && + error "parallel dio write with failure on first stripe succeeded" + stack_trap "rm -f $DIR/$tfile" + do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0 + + # Place data in file for read + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct || + error "parallel dio write failed" + + # Fail read on OST0, first stripe + #define OBD_FAIL_OST_BRW_READ_BULK 0x20f + do_facet ost1 $LCTL set_param fail_loc=0x20f fail_val=1 + dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 iflag=direct && + error "parallel dio read with error on first stripe succeeded" + rm -f $DIR/$tfile.2 + do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0 + + # Switch to testing on OST1, second stripe + # Clear file contents, maintain striping + echo > $DIR/$tfile + # Set up failure on OST1, second stripe: + do_facet ost1 $LCTL set_param fail_loc=0x20e fail_val=2 + stack_trap "do_facet ost1 $LCTL set_param fail_loc=0" + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct && + error "parallel dio write with failure on first stripe succeeded" + stack_trap "rm -f $DIR/$tfile" + do_facet ost1 $LCTL set_param fail_loc=0 fail_val=0 + + # Place data in file for read + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 oflag=direct || + error "parallel dio write failed" + + # Fail read on OST1, second stripe + #define OBD_FAIL_OST_BRW_READ_BULK 0x20f + do_facet ost2 $LCTL set_param fail_loc=0x20f fail_val=2 + dd if=$DIR/$tfile of=$DIR/$tfile.2 bs=8M count=8 iflag=direct && + error "parallel dio read with error on first stripe succeeded" + rm -f $DIR/$tfile.2 + do_facet ost2 $LCTL set_param fail_loc=0 fail_val=0 +} +run_test 398m "test RPC failures with parallel dio" + +# Parallel submission of DIO should not cause problems for append, but it's +# important to verify. +test_398n() { # LU-13798 + $LFS setstripe -C 2 -S 1M $DIR/$tfile + + dd if=/dev/urandom of=$DIR/$tfile bs=8M count=8 || + error "dd to create source file failed" + stack_trap "rm -f $DIR/$tfile" + + dd if=$DIR/$tfile of=$DIR/$tfile.1 bs=8M count=8 oflag=direct oflag=append || + error "parallel dio write with failure on second stripe succeeded" + stack_trap "rm -f $DIR/$tfile $DIR/$tfile.1" + diff $DIR/$tfile $DIR/$tfile.1 + [[ $? == 0 ]] || error "data incorrect after append" + +} +run_test 398n "test append with parallel DIO" + test_fake_rw() { local read_write=$1 if [ "$read_write" = "write" ]; then @@ -25561,7 +26011,7 @@ test_803a() { [ $MDS1_VERSION -lt $(version_code 2.10.54) ] && skip "MDS needs to be newer than 2.10.54" - mkdir -p $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir # Create some objects on all MDTs to trigger related logs objects for idx in $(seq $MDSCOUNT); do $LFS mkdir -c $MDSCOUNT -i $((idx % $MDSCOUNT)) \ @@ -25756,7 +26206,7 @@ test_805() { fi do_facet $SINGLEMDS zfs set quota=$(((usedkb+freekb)*1024)) $fsset trap cleanup_805 EXIT - mkdir $DIR/$tdir + mkdir_on_mdt0 $DIR/$tdir $LFS setstripe -E 1M -c2 -E 4M -c2 -E -1 -c2 $DIR/$tdir || error "Can't set PFL layout" createmany -m $DIR/$tdir/f- 1000000 && error "ENOSPC wasn't met" @@ -25904,7 +26354,7 @@ test_807() { stack_trap "restore_lustre_params < $save; rm -f $save" EXIT rm -rf $DIR/$tdir || error "rm $tdir failed" - mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed" touch $DIR/$tdir/trunc || error "touch $tdir/trunc failed" $TRUNCATE $DIR/$tdir/trunc 1024 || error "truncate $tdir/trunc failed" $TRUNCATE $DIR/$tdir/trunc 1048576 ||