X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=8cdf3fe2a26a3a9f7b8ad50c38e47392c61a1088;hp=85d65df77672791fb288332e14e63b588b7d3e20;hb=82e494a36e9ea4f51ec163ab15beb9fdda7fa8d6;hpb=80a2ff7137d3504e5672c6a68561d4ae8d5a28e3 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 85d65df..8cdf3fe 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -12,8 +12,8 @@ ONLY=${ONLY:-"$*"} ALWAYS_EXCEPT=" 42a 42b 42c 45 68b $SANITY_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -# skipped tests: LU-8411 LU-9096 LU-9054 -ALWAYS_EXCEPT=" 407 253 312 $ALWAYS_EXCEPT" +# skipped tests: LU-8411 LU-9096 LU-9054 LU-10199 +ALWAYS_EXCEPT=" 407 253 312 56xb $ALWAYS_EXCEPT" # Check Grants after these tests GRANT_CHECK_LIST="$GRANT_CHECK_LIST 42a 42b 42c 42d 42e 63a 63b 64a 64b 64c" @@ -40,6 +40,7 @@ SRCDIR=$(cd $(dirname $0); echo $PWD) export PATH=$PATH:/sbin TMP=${TMP:-/tmp} +OSC=${OSC:-"osc"} CC=${CC:-cc} CHECKSTAT=${CHECKSTAT:-"checkstat -v"} @@ -1673,7 +1674,8 @@ test_27u() { # bug 4900 unlinkmany $DIR/$tdir/t- 1000 trap 0 [[ $OBJS -gt 0 ]] && - error "$OBJS objects created on OST-0. See $TLOG" || pass + error "$OBJS objects created on OST-0. See $TLOG" || + rm -f $TLOG } run_test 27u "skip object creation on OSC w/o objects" @@ -2058,6 +2060,10 @@ test_27D() { local ost_list=$(seq $first_ost $ost_step $last_ost) local ost_range="$first_ost $last_ost $ost_step" + if ! combined_mgs_mds ; then + mount_mgs_client + fi + test_mkdir $DIR/$tdir pool_add $POOL || error "pool_add failed" pool_add_targets $POOL $ost_range || error "pool_add_targets failed" @@ -2072,6 +2078,10 @@ test_27D() { error "llapi_layout_test failed" destroy_test_pools || error "destroy test pools failed" + + if ! combined_mgs_mds ; then + umount_mgs_client + fi } run_test 27D "validate llapi_layout API" @@ -3587,7 +3597,7 @@ test_41() { run_test 41 "test small file write + fstat =====================" count_ost_writes() { - lctl get_param -n osc.*.stats | + lctl get_param -n ${OSC}.*.stats | awk -vwrites=0 '/ost_write/ { writes += $2 } \ END { printf("%0.0f", writes) }' } @@ -3647,7 +3657,7 @@ setup_test42() { test_42a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return setup_test42 - cancel_lru_locks osc + cancel_lru_locks $OSC stop_writeback sync; sleep 1; sync # just to be safe BEFOREWRITES=`count_ost_writes` @@ -3663,7 +3673,7 @@ run_test 42a "ensure that we don't flush on close" test_42b() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return setup_test42 - cancel_lru_locks osc + cancel_lru_locks $OSC stop_writeback sync dd if=/dev/zero of=$DIR/f42b bs=1024 count=100 @@ -3699,21 +3709,21 @@ run_test 42b "test destroy of file with cached dirty data ======" # start the file with a full-file pw lock to match against # until the truncate. trunc_test() { - test=$1 - file=$DIR/$test - offset=$2 - cancel_lru_locks osc + test=$1 + file=$DIR/$test + offset=$2 + cancel_lru_locks $OSC stop_writeback # prime the file with 0,EOF PW to match touch $file $TRUNCATE $file 0 sync; sync # now the real test.. - dd if=/dev/zero of=$file bs=1024 count=100 - BEFOREWRITES=`count_ost_writes` - $TRUNCATE $file $offset - cancel_lru_locks osc - AFTERWRITES=`count_ost_writes` + dd if=/dev/zero of=$file bs=1024 count=100 + BEFOREWRITES=`count_ost_writes` + $TRUNCATE $file $offset + cancel_lru_locks $OSC + AFTERWRITES=`count_ost_writes` start_writeback } @@ -3830,6 +3840,7 @@ test_43a() { cp -p multiop $DIR/$tdir/multiop MULTIOP_PROG=$DIR/$tdir/multiop multiop_bg_pause $TMP/$tfile.junk O_c || error "multiop open $TMP/$tfile.junk failed" + rm $TMP/$tfile.junk # delete junk file on close (not part of test) MULTIOP_PID=$! $MULTIOP $DIR/$tdir/multiop Oc && error "expected error, got success" kill -USR1 $MULTIOP_PID || error "kill -USR1 PID $MULTIOP_PID failed" @@ -3844,6 +3855,7 @@ test_43b() { cp -p multiop $DIR/$tdir/multiop MULTIOP_PROG=$DIR/$tdir/multiop multiop_bg_pause $TMP/$tfile.junk O_c || error "multiop open $TMP/$tfile.junk failed" + rm $TMP/$tfile.junk # delete junk file on close (not part of test) MULTIOP_PID=$! $TRUNCATE $DIR/$tdir/multiop 0 && error "expected error, got success" kill -USR1 $MULTIOP_PID || error "kill -USR1 PID $MULTIOP_PID failed" @@ -3912,7 +3924,7 @@ run_test 44a "test sparse pwrite ===============================" dirty_osc_total() { tot=0 - for d in `lctl get_param -n osc.*.cur_dirty_bytes`; do + for d in `lctl get_param -n ${OSC}.*.cur_dirty_bytes`; do tot=$(($tot + $d)) done echo $tot @@ -4153,11 +4165,13 @@ test_51b() { [[ $numfree -lt $nrdirs ]] && skip "not enough blocks ($numfree)" && return - trap cleanup_print_lfsdf EXIT + trap cleanup_print_lfs_df EXIT # create files - createmany -d $dir/d $nrdirs || + createmany -d $dir/d $nrdirs || { + unlinkmany $dir/d $nrdirs error "failed to create $nrdirs subdirs in MDT$mdtidx:$dir" + } # really created : nrdirs=$(ls -U $dir | wc -l) @@ -4266,8 +4280,10 @@ test_51f() { echo "left ulimit at $ulimit_old" fi - createmany -o -k -t 120 $DIR/$tdir/f $numfree || + createmany -o -k -t 120 $DIR/$tdir/f $numfree || { + unlinkmany $DIR/$tdir/f $numfree error "create+open $numfree files in $DIR/$tdir failed" + } ulimit -n $ulimit_old # if createmany exits at 120s there will be fewer than $numfree files @@ -5361,6 +5377,68 @@ test_56aa() { # LU-5937 } run_test 56aa "lfs find --size under striped dir" +test_56ba() { + # Create composite files with one component + TDIR=$DIR/$tdir/1Mfiles + setup_56 5 1 "--component-end 1M" + # Create composite files with three components + TDIR=$DIR/$tdir/2Mfiles + setup_56 5 2 "-E 2M -E 4M -E 6M" + TDIR=$DIR/$tdir + # Create non-composite files + createmany -o $TDIR/${tfile}- 10 + + local nfiles=$($LFIND --component-end 1M --type f $TDIR | wc -l) + [[ $nfiles == 10 ]] || + error "lfs find -E 1M found $nfiles != 10 files" + + nfiles=$($LFIND ! -E 1M --type f $TDIR | wc -l) + [[ $nfiles == 25 ]] || + error "lfs find ! -E 1M found $nfiles != 25 files" + + # All files have a component that starts at 0 + local nfiles=$($LFIND --component-start 0 --type f $TDIR | wc -l) + [[ $nfiles == 35 ]] || + error "lfs find --component-start 0 found $nfiles != 35 files" + + nfiles=$($LFIND --component-start 2M --type f $TDIR | wc -l) + [[ $nfiles == 15 ]] || + error "$LFIND --component-start 2M found $nfiles != 15 files" + + # All files created here have a componenet that does not starts at 2M + nfiles=$($LFIND ! --component-start 2M --type f $TDIR | wc -l) + [[ $nfiles == 35 ]] || + error "$LFIND ! --component-start 2M found $nfiles != 35 files" + + # Find files with a specified number of components + local nfiles=$($LFIND --component-count 3 --type f $TDIR | wc -l) + [[ $nfiles == 15 ]] || + error "lfs find --component-count 3 found $nfiles != 15 files" + + # Remember non-composite files have a component count of zero + local nfiles=$($LFIND --component-count 0 --type f $TDIR | wc -l) + [[ $nfiles == 10 ]] || + error "lfs find --component-count 0 found $nfiles != 10 files" + + nfiles=$($LFIND ! --component-count 3 --type f $TDIR | wc -l) + [[ $nfiles == 20 ]] || + error "$LFIND ! --component-count 3 found $nfiles != 20 files" + + # All files have a flag called "init" + local nfiles=$($LFIND --component-flags init --type f $TDIR | wc -l) + [[ $nfiles == 35 ]] || + error "$LFIND --component-flags init found $nfiles != 35 files" + + # Multi-component files will have a component not initialized + local nfiles=$($LFIND ! --component-flags init --type f $TDIR | wc -l) + [[ $nfiles == 15 ]] || + error "$LFIND !--component-flags init found $nfiles != 15 files" + + rm -rf $TDIR + +} +run_test 56ba "test lfs find --component-end, -start, -count, and -flags" + test_57a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return # note test will not do anything if MDS is not local @@ -5713,6 +5791,92 @@ test_64c() { } run_test 64c "verify grant shrink" +# this does exactly what osc_request.c:osc_announce_cached() does in +# order to calculate max amount of grants to ask from server +want_grant() { + local tgt=$1 + + local page_size=$(get_page_size client) + + local nrpages=$($LCTL get_param -n osc.${tgt}.max_pages_per_rpc) + local rpc_in_flight=$($LCTL get_param -n osc.${tgt}.max_rpcs_in_flight) + + ((rpc_in_flight ++)); + nrpages=$((nrpages * rpc_in_flight)) + + local dirty_max_pages=$($LCTL get_param -n osc.${tgt}.max_dirty_mb) + + dirty_max_pages=$((dirty_max_pages * 1024 * 1024 / page_size)) + + [[ $dirty_max_pages -gt $nrpages ]] && nrpages=$dirty_max_pages + local undirty=$((nrpages * page_size)) + + local max_extent_pages + max_extent_pages=$($LCTL get_param osc.${tgt}.import | + grep grant_max_extent_size | awk '{print $2}') + max_extent_pages=$((max_extent_pages / page_size)) + local nrextents=$(((nrpages + max_extent_pages - 1) / max_extent_pages)) + local grant_extent_tax + grant_extent_tax=$($LCTL get_param osc.${tgt}.import | + grep grant_extent_tax | awk '{print $2}') + + undirty=$((undirty + nrextents * grant_extent_tax)) + + echo $undirty +} + +# this is size of unit for grant allocation. It should be equal to +# what tgt_grant.c:tgt_grant_chunk() calculates +grant_chunk() { + local tgt=$1 + local max_brw_size + local grant_extent_tax + + max_brw_size=$($LCTL get_param osc.${tgt}.import | + grep max_brw_size | awk '{print $2}') + + grant_extent_tax=$($LCTL get_param osc.${tgt}.import | + grep grant_extent_tax | awk '{print $2}') + + echo $(((max_brw_size + grant_extent_tax) * 2)) +} + +test_64d() { + [ $(lustre_version_code ost1) -lt $(version_code 2.10.56) ] && + skip "OST < 2.10.55 doesn't limit grants enough" && return 0 + + local tgt=$($LCTL dl | grep "0000-osc-[^mM]" | awk '{print $4}') + + [[ $($LCTL get_param osc.${tgt}.import | + grep "connect_flags:.*grant_param") ]] || \ + { skip "no grant_param connect flag"; return; } + + local olddebug=$($LCTL get_param -n debug 2> /dev/null) + + $LCTL set_param debug="$OLDDEBUG" 2> /dev/null || true + + local max_cur_granted=$(($(want_grant $tgt) + $(grant_chunk $tgt))) + + $SETSTRIPE $DIR/$tfile -i 0 -c 1 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1000 & + ddpid=$! + + while true + do + local cur_grant=$($LCTL get_param -n osc.${tgt}.cur_grant_bytes) + if [[ $cur_grant -gt $max_cur_granted ]] + then + kill $ddpid + error "cur_grant $cur_grant > $max_cur_granted" + fi + kill -0 $ddpid + [[ $? -ne 0 ]] && break; + sleep 2 + done + $LCTL set_param debug="$olddebug" 2> /dev/null || true +} +run_test 64d "check grant limit exceed" + # bug 1414 - set/get directories' stripe info test_65a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return @@ -6874,7 +7038,7 @@ test_101e() { done echo "Cancel LRU locks on lustre client to flush the client cache" - cancel_lru_locks osc + cancel_lru_locks $OSC echo "Reset readahead stats" $LCTL set_param -n llite.*.read_ahead_stats 0 @@ -6966,14 +7130,19 @@ test_101g() { local list=$(comma_list $(osts_nodes)) local p="$TMP/$TESTSUITE-$TESTNAME.parameters" local brw_size="obdfilter.*.brw_size" + local ostver=$(lustre_version_code ost1) + local cliver=$(lustre_version_code client) $LFS setstripe -i 0 -c 1 $DIR/$tfile local orig_mb=$(do_facet ost1 $LCTL get_param -n $brw_size | head -n 1) - if [ $(lustre_version_code ost1) -ge $(version_code 2.8.52) -a \ - $(lustre_version_code client) -ge $(version_code 2.8.52) ]; then - [ $(lustre_version_code ost1) -ge $(version_code 2.9.52) ] && - suffix="M" + if [ $ostver -ge $(version_code 2.8.52) -o \ + \( $ostver -ge $(version_code 2.7.17) -a \ + $ostver -lt $(version_code 2.7.50) \) ] && + [ $cliver -ge $(version_code 2.8.52) -o \ + \( $cliver -ge $(version_code 2.7.17) -a \ + $cliver -lt $(version_code 2.7.50) \) ]; then + [ $ostver -ge $(version_code 2.9.52) ] && suffix="M" if [[ $orig_mb -lt 16 ]]; then save_lustre_params $osts "$brw_size" > $p do_nodes $list $LCTL set_param -n $brw_size=16$suffix || @@ -8429,6 +8598,34 @@ run_test 118m "fdatasync dir =========" [ "$SLOW" = "no" ] && [ -n "$OLD_RESENDCOUNT" ] && set_resend_count $OLD_RESENDCOUNT +test_118n() +{ + local begin + local end + + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + remote_ost_nodsh && skip "remote OSTs with nodsh" && return + + # Sleep to avoid a cached response. + #define OBD_STATFS_CACHE_SECONDS 1 + sleep 2 + + # Inject a 10 second delay in the OST_STATFS handler. + #define OBD_FAIL_OST_STATFS_DELAY 0x242 + set_nodes_failloc "$(osts_nodes)" 0x242 + + begin=$SECONDS + stat --file-system $MOUNT > /dev/null + end=$SECONDS + + set_nodes_failloc "$(osts_nodes)" 0 + + if ((end - begin > 20)); then + error "statfs took $((end - begin)) seconds, expected 10" + fi +} +run_test 118n "statfs() sends OST_STATFS requests in parallel" + test_119a() # bug 11737 { BSIZE=$((512 * 1024)) @@ -9257,7 +9454,7 @@ test_129() { check_mds_dmesg '"has reached"' || error_exit "reached message should be output" - [ $has_warning -eq 0 ] && + [ $has_warning = "false" ] && error_exit "warning message should be output" dirsize=$(stat -c%s "$DIR/$tdir") @@ -9304,7 +9501,7 @@ test_130a() { skip "ORI-366/LU-1941: FIEMAP unimplemented on ZFS" && return [ $RC != 0 ] && error "filefrag $fm_file failed" - filefrag_op=$(filefrag -ve $fm_file | + filefrag_op=$(filefrag -ve -k $fm_file | sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') lun=$($GETSTRIPE -i $fm_file) @@ -9354,7 +9551,7 @@ test_130b() { error "dd failed on $fm_file" filefrag -ves $fm_file || error "filefrag $fm_file failed" - filefrag_op=$(filefrag -ve $fm_file | + filefrag_op=$(filefrag -ve -k $fm_file | sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') last_lun=$(echo $filefrag_op | cut -d: -f5 | @@ -9413,7 +9610,7 @@ test_130c() { error "dd failed on $fm_file" filefrag -ves $fm_file || error "filefrag $fm_file failed" - filefrag_op=$(filefrag -ve $fm_file | + filefrag_op=$(filefrag -ve -k $fm_file | sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') last_lun=$(echo $filefrag_op | cut -d: -f5 | @@ -9481,7 +9678,7 @@ test_130d() { error "dd failed on $fm_file" filefrag -ves $fm_file || error "filefrag $fm_file failed" - filefrag_op=$(filefrag -ve $fm_file | + filefrag_op=$(filefrag -ve -k $fm_file | sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') last_lun=$(echo $filefrag_op | cut -d: -f5 | @@ -9543,7 +9740,7 @@ test_130e() { done filefrag -ves $fm_file || error "filefrag $fm_file failed" - filefrag_op=$(filefrag -ve $fm_file | + filefrag_op=$(filefrag -ve -k $fm_file | sed -n '/ext:/,/found/{/ext:/d; /found/d; p}') last_lun=$(echo $filefrag_op | cut -d: -f5 | @@ -9985,10 +10182,11 @@ test_133g() { echo "proc_dirs='$proc_dirs'" [ -n "$proc_dirs" ] || error "no proc_dirs on $HOSTNAME" find $proc_dirs \ + -ignore_readdir_race \ -type f \ -not -name force_lbug \ -not -name changelog_mask \ - -exec badarea_io '{}' \; &> /dev/null || + -exec badarea_io '{}' \; || error "find $proc_dirs failed" local facet @@ -10000,10 +10198,11 @@ test_133g() { echo "${facet}_proc_dirs='$facet_proc_dirs'" [ -z "$facet_proc_dirs" ] && error "no proc_dirs on $facet" do_facet $facet find $facet_proc_dirs \ + -ignore_readdir_race \ -type f \ -not -name force_lbug \ -not -name changelog_mask \ - -exec badarea_io '{}' \\\; &> /dev/null || + -exec badarea_io '{}' \\\; || error "$facet find $facet_proc_dirs failed" done @@ -10162,31 +10361,31 @@ test_150() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return local TF="$TMP/$tfile" - dd if=/dev/urandom of=$TF bs=6096 count=1 || error "dd failed" - cp $TF $DIR/$tfile - cancel_lru_locks osc - cmp $TF $DIR/$tfile || error "$TMP/$tfile $DIR/$tfile differ" - remount_client $MOUNT - df -P $MOUNT - cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (remount)" + dd if=/dev/urandom of=$TF bs=6096 count=1 || error "dd failed" + cp $TF $DIR/$tfile + cancel_lru_locks $OSC + cmp $TF $DIR/$tfile || error "$TMP/$tfile $DIR/$tfile differ" + remount_client $MOUNT + df -P $MOUNT + cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (remount)" - $TRUNCATE $TF 6000 - $TRUNCATE $DIR/$tfile 6000 - cancel_lru_locks osc - cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (truncate1)" + $TRUNCATE $TF 6000 + $TRUNCATE $DIR/$tfile 6000 + cancel_lru_locks $OSC + cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (truncate1)" - echo "12345" >>$TF - echo "12345" >>$DIR/$tfile - cancel_lru_locks osc - cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (append1)" + echo "12345" >>$TF + echo "12345" >>$DIR/$tfile + cancel_lru_locks $OSC + cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (append1)" - echo "12345" >>$TF - echo "12345" >>$DIR/$tfile - cancel_lru_locks osc - cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (append2)" + echo "12345" >>$TF + echo "12345" >>$DIR/$tfile + cancel_lru_locks $OSC + cmp $TF $DIR/$tfile || error "$TF $DIR/$tfile differ (append2)" - rm -f $TF - true + rm -f $TF + true } run_test 150 "truncate/append tests" @@ -10734,7 +10933,7 @@ test_155_small_load() { dd if=/dev/urandom of=$temp bs=6096 count=1 || \ error "dd of=$temp bs=6096 count=1 failed" cp $temp $file - cancel_lru_locks osc + cancel_lru_locks $OSC cmp $temp $file || error "$temp $file differ" $TRUNCATE $temp 6000 @@ -11036,9 +11235,8 @@ test_156() { log "cache hits:: before: $BEFORE, after: $AFTER" fi - rm -f $file restore_lustre_params < $p - rm -f $p + rm -f $p $file } run_test 156 "Verification of tunables" @@ -11308,6 +11506,186 @@ test_160e() { } run_test 160e "changelog negative testing" +cleanup_160f() { + trap 0 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + echo "Deregistering changelog client $CL_USER" + do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $CL_USER + echo "Deregistering changelog client $CL_USER2" + do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $CL_USER2 + restore_lustre_params < $save_params + rm -f $save_params +} + +test_160f() { + # do_facet $SINGLEMDS $LCTL set_param mdd.$MDT0.changelog_gc=1 + # should be set by default + + local CL_USERS="mdd.$MDT0.changelog_users" + local GET_CL_USERS="do_facet $SINGLEMDS $LCTL get_param -n $CL_USERS" + local save_params="$TMP/sanity-$TESTNAME.parameters" + + save_lustre_params $SINGLEMDS \ + "mdd.$MDT0.changelog_max_idle_time" > $save_params + save_lustre_params $SINGLEMDS \ + "mdd.$MDT0.changelog_min_gc_interval" >> $save_params + save_lustre_params $SINGLEMDS \ + "mdd.$MDT0.changelog_min_free_cat_entries" >> $save_params + + trap cleanup_160f EXIT + + # Create a user + CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + changelog_register -n) + echo "Registered as changelog user $CL_USER" + CL_USER2=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + changelog_register -n) + echo "Registered as changelog user $CL_USER2" + $GET_CL_USERS | grep -q $CL_USER || + error "User $CL_USER not found in changelog_users" + $GET_CL_USERS | grep -q $CL_USER2 || + error "User $CL_USER2 not found in changelog_users" + + # generate some changelogs to accumulate + mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + touch $DIR/$tdir/$tfile || error "touch $DIR/$tdir/$tfile failed" + touch $DIR/$tdir/${tfile}2 || error "touch $DIR/$tdir/${tfile}2 failed" + rm -f $DIR/$tdir/$tfile || error "rm -f $tfile failed" + + # check changelogs have been generated + nbcl=$($LFS changelog $MDT0 | wc -l) + [[ $nbcl -eq 0 ]] && error "no changelogs found" + + do_facet $SINGLEMDS $LCTL set_param \ + mdd.$MDT0.changelog_max_idle_time=10 + do_facet $SINGLEMDS $LCTL set_param \ + mdd.$MDT0.changelog_min_gc_interval=2 + do_facet $SINGLEMDS $LCTL set_param \ + mdd.$MDT0.changelog_min_free_cat_entries=3 + + # simulate changelog catalog almost full +#define OBD_FAIL_CAT_FREE_RECORDS 0x1313 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1313 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + + sleep 6 + USER_REC1=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}") + $LFS changelog_clear $MDT0 $CL_USER $(($USER_REC1 + 2)) + USER_REC2=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}") + echo "verifying user clear: $(( $USER_REC1 + 2 )) == $USER_REC2" + [ $USER_REC2 == $(($USER_REC1 + 2)) ] || + error "user index expected $(($USER_REC1 + 2)) is $USER_REC2" + sleep 5 + + # generate one more changelog to trigger fail_loc + rm -rf $DIR/$tdir || error "rm -rf $tdir failed" + + # ensure gc thread is done + wait_update_facet $SINGLEMDS \ + "ps -e -o comm= | grep chlg_gc_thread" "" 20 + + # check user still registered + $GET_CL_USERS | grep -q $CL_USER || + error "User $CL_USER not found in changelog_users" + # check user2 unregistered + $GET_CL_USERS | grep -q $CL_USER2 && + error "User $CL_USER2 still found in changelog_users" + + # check changelogs are present and starting at $USER_REC2 + 1 + FIRST_REC=$($LFS changelog $MDT0 | head -n1 | awk '{print $1}') + echo "verifying min purge: $(( $USER_REC2 + 1 )) == $FIRST_REC" + [ $FIRST_REC == $(($USER_REC2 + 1)) ] || + error "first index should be $(($USER_REC2 + 1)) is $FIRST_REC" + + cleanup_160f +} +run_test 160f "changelog garbage collect (timestamped users)" + +test_160g() { + # do_facet $SINGLEMDS $LCTL set_param mdd.$MDT0.changelog_gc=1 + # should be set by default + + local CL_USERS="mdd.$MDT0.changelog_users" + local GET_CL_USERS="do_facet $SINGLEMDS $LCTL get_param -n $CL_USERS" + local save_params="$TMP/sanity-$TESTNAME.parameters" + + save_lustre_params $SINGLEMDS \ + "mdd.$MDT0.changelog_max_idle_indexes" > $save_params + save_lustre_params $SINGLEMDS \ + "mdd.$MDT0.changelog_min_gc_interval" >> $save_params + save_lustre_params $SINGLEMDS \ + "mdd.$MDT0.changelog_min_free_cat_entries" >> $save_params + + trap cleanup_160f EXIT + +#define OBD_FAIL_TIME_IN_CHLOG_USER 0x1314 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1314 + + # Create a user + CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + changelog_register -n) + echo "Registered as changelog user $CL_USER" + CL_USER2=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \ + changelog_register -n) + echo "Registered as changelog user $CL_USER2" + $GET_CL_USERS | grep -q $CL_USER || + error "User $CL_USER not found in changelog_users" + $GET_CL_USERS | grep -q $CL_USER2 || + error "User $CL_USER2 not found in changelog_users" + + # generate some changelogs to accumulate + mkdir -p $DIR/$tdir || error "mkdir $tdir failed" + touch $DIR/$tdir/$tfile || error "touch $DIR/$tdir/$tfile failed" + touch $DIR/$tdir/${tfile}2 || error "touch $DIR/$tdir/${tfile}2 failed" + rm -f $DIR/$tdir/$tfile || error "rm -f $tfile failed" + + # check changelogs have been generated + nbcl=$($LFS changelog $MDT0 | wc -l) + [[ $nbcl -eq 0 ]] && error "no changelogs found" + + do_facet $SINGLEMDS $LCTL set_param \ + mdd.$MDT0.changelog_max_idle_indexes=$((nbcl - 1)) + do_facet $SINGLEMDS $LCTL set_param \ + mdd.$MDT0.changelog_min_gc_interval=2 + do_facet $SINGLEMDS $LCTL set_param \ + mdd.$MDT0.changelog_min_free_cat_entries=3 + + # simulate changelog catalog almost full +#define OBD_FAIL_CAT_FREE_RECORDS 0x1313 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1313 + do_facet $SINGLEMDS $LCTL set_param fail_val=3 + + USER_REC1=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}") + $LFS changelog_clear $MDT0 $CL_USER $(($USER_REC1 + 3)) + USER_REC2=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}") + echo "verifying user clear: $(( $USER_REC1 + 3 )) == $USER_REC2" + [ $USER_REC2 == $(($USER_REC1 + 3)) ] || + error "user index expected $(($USER_REC1 + 3)) is $USER_REC2" + + # generate one more changelog to trigger fail_loc + rm -rf $DIR/$tdir || error "rm -rf $tdir failed" + + # ensure gc thread is done + wait_update_facet $SINGLEMDS \ + "ps -e -o comm= | grep chlg_gc_thread" "" 20 + + # check user still registered + $GET_CL_USERS | grep -q $CL_USER || + error "User $CL_USER not found in changelog_users" + # check user2 unregistered + $GET_CL_USERS | grep -q $CL_USER2 && + error "User $CL_USER2 still found in changelog_users" + + # check changelogs are present and starting at $USER_REC2 + 1 + FIRST_REC=$($LFS changelog $MDT0 | head -n1 | awk '{print $1}') + echo "verifying min purge: $(( $USER_REC2 + 1 )) == $FIRST_REC" + [ $FIRST_REC == $(($USER_REC2 + 1)) ] || + error "first index should be $(($USER_REC2 + 1)) is $FIRST_REC" + + cleanup_160f +} +run_test 160g "changelog garbage collect (old users)" + test_161a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return test_mkdir -c1 $DIR/$tdir @@ -11981,6 +12359,8 @@ test_184a() { cmp $ref1 $file2 || error "content compare failed ($ref1 != $file2)" cmp $ref2 $file1 || error "content compare failed ($ref2 != $file1)" + + lfsck_verify_pfid $file1 $file2 || error "PFID are not transferred" } run_test 184a "Basic layout swap" @@ -12226,8 +12606,12 @@ test_200() { local test_path=$POOL_ROOT/$POOL_DIR_NAME local file_dir=$POOL_ROOT/file_tst local subdir=$test_path/subdir - local rc=0 + + if ! combined_mgs_mds ; then + mount_mgs_client + fi + while : ; do # former test_200a test_200b pool_add $POOL || { rc=$? ; break; } @@ -12247,7 +12631,7 @@ test_200() { pool_create_files $POOL $file_dir $files "$ost_list" \ || { rc=$? ; break; } # former test_200g test_200h - pool_lfs_df $POOL || { rc=$? ; break; } + pool_lfs_df $POOL || { rc=$? ; break; } pool_file_rel_path $POOL $test_path || { rc=$? ; break; } # former test_201a test_201b test_201c @@ -12255,11 +12639,15 @@ test_200() { local f=$test_path/$tfile pool_remove_all_targets $POOL $f || { rc=$? ; break; } - pool_remove $POOL $f || { rc=$? ; break; } + pool_remove $POOL $f || { rc=$? ; break; } break done destroy_test_pools + + if ! combined_mgs_mds ; then + umount_mgs_client + fi return $rc } run_test 200 "OST pools" @@ -12937,6 +13325,10 @@ test_220() { #LU-325 $LFS df -i + if ! combined_mgs_mds ; then + mount_mgs_client + fi + do_facet ost$((OSTIDX + 1)) lctl set_param fail_val=-1 #define OBD_FAIL_OST_ENOINO 0x229 do_facet ost$((OSTIDX + 1)) lctl set_param fail_loc=0x229 @@ -12967,10 +13359,16 @@ test_220() { #LU-325 do_facet ost$((OSTIDX + 1)) lctl set_param fail_val=0 do_facet ost$((OSTIDX + 1)) lctl set_param fail_loc=0 - do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $OST || return 4 - do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME || return 5 + do_facet mgs $LCTL pool_remove $FSNAME.$TESTNAME $OST || + error "$LCTL pool_remove $FSNAME.$TESTNAME $OST failed" + do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME || + error "$LCTL pool_destroy $FSNAME.$TESTNAME failed" echo "unlink $MDSOBJS files @$next_id..." - unlinkmany $DIR/$tdir/f $MDSOBJS || return 6 + unlinkmany $DIR/$tdir/f $MDSOBJS || error "unlinkmany failed" + + if ! combined_mgs_mds ; then + umount_mgs_client + fi } run_test 220 "preallocated MDS objects still used if ENOSPC from OST" @@ -13142,8 +13540,6 @@ test_225b () { skip_env "Need to mount OST to test" && return fi - [ $MDSCOUNT -ge 2 ] && - skip "skipping now for more than one MDT" && return local mds=$(facet_host $SINGLEMDS) local target=$(do_nodes $mds 'lctl dl' | \ awk "{if (\$2 == \"UP\" && \$3 == \"mdt\") {print \$4}}") @@ -13395,7 +13791,7 @@ test_229() { # LU-2482, LU-3448 $GETSTRIPE -v $DIR/$tfile local pattern=$($GETSTRIPE -L $DIR/$tfile) - [ X"$pattern" = X"80000001" ] || error "pattern error ($pattern)" + [ X"$pattern" = X"released" ] || error "pattern error ($pattern)" local stripe_count=$($GETSTRIPE -c $DIR/$tfile) || error "getstripe" [ $stripe_count -eq 2 ] || error "stripe count not 2 ($stripe_count)" @@ -13892,19 +14288,44 @@ test_231b() { } run_test 231b "must not assert on fully utilized OST request buffer" -test_232() { +test_232a() { mkdir -p $DIR/$tdir + $LFS setstripe -c1 -i0 $DIR/$tdir/$tfile + #define OBD_FAIL_LDLM_OST_LVB 0x31c - $LCTL set_param fail_loc=0x31c + do_facet ost1 $LCTL set_param fail_loc=0x31c # ignore dd failure dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=1 || true - $LCTL set_param fail_loc=0 + do_facet ost1 $LCTL set_param fail_loc=0 + umount_client $MOUNT || error "umount failed" + mount_client $MOUNT || error "mount failed" + stop ost1 || error "cannot stop ost1" + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || error "cannot start ost1" +} +run_test 232a "failed lock should not block umount" + +test_232b() { + mkdir -p $DIR/$tdir + $LFS setstripe -c1 -i0 $DIR/$tdir/$tfile + dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=1 + sync + cancel_lru_locks osc + + #define OBD_FAIL_LDLM_OST_LVB 0x31c + do_facet ost1 $LCTL set_param fail_loc=0x31c + + # ignore failure + $LFS data_version $DIR/$tdir/$tfile || true + + do_facet ost1 $LCTL set_param fail_loc=0 umount_client $MOUNT || error "umount failed" mount_client $MOUNT || error "mount failed" + stop ost1 || error "cannot stop ost1" + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || error "cannot start ost1" } -run_test 232 "failed lock should not block umount" +run_test 232b "failed data version lock should not block umount" test_233a() { [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.64) ] || @@ -14106,7 +14527,7 @@ run_test 240 "race between ldlm enqueue and the connection RPC (no ASSERT)" test_241_bio() { for LOOP in $(seq $1); do dd if=$DIR/$tfile of=/dev/null bs=40960 count=1 2>/dev/null - cancel_lru_locks osc || true + cancel_lru_locks $OSC || true done } @@ -14120,7 +14541,7 @@ test_241_dio() { test_241a() { # was test_241 dd if=/dev/zero of=$DIR/$tfile count=1 bs=40960 ls -la $DIR/$tfile - cancel_lru_locks osc + cancel_lru_locks $OSC test_241_bio 1000 & PID=$! test_241_dio 1000 @@ -14208,6 +14629,14 @@ test_246() { # LU-7371 } run_test 246 "Read file of size 4095 should return right length" +cleanup_247() { + local submount=$1 + + trap 0 + umount_client $submount + rmdir $submount +} + test_247a() { lctl get_param -n mdc.$FSNAME-MDT0000*.import | grep -q subtree || @@ -14219,11 +14648,11 @@ test_247a() { mkdir -p $submount || error "mkdir $submount failed" FILESET="$FILESET/$tdir" mount_client $submount || error "mount $submount failed" + trap "cleanup_247 $submount" EXIT echo foo > $submount/$tfile || error "write $submount/$tfile failed" [ $(cat $MOUNT/$tdir/$tfile) = "foo" ] || error "read $MOUNT/$tdir/$tfile failed" - umount_client $submount || error "umount $submount failed" - rmdir $submount + cleanup_247 $submount } run_test 247a "mount subdir as fileset" @@ -14250,12 +14679,12 @@ test_247c() { mkdir -p $MOUNT/$tdir/dir1 mkdir -p $submount || error "mkdir $submount failed" + trap "cleanup_247 $submount" EXIT FILESET="$FILESET/$tdir" mount_client $submount || error "mount $submount failed" local fid=$($LFS path2fid $MOUNT/) $LFS fid2path $submount $fid && error "fid2path should fail" - umount_client $submount || error "umount $submount failed" - rmdir $submount + cleanup_247 $submount } run_test 247c "running fid2path outside root" @@ -14269,10 +14698,10 @@ test_247d() { mkdir -p $submount || error "mkdir $submount failed" FILESET="$FILESET/$tdir" mount_client $submount || error "mount $submount failed" + trap "cleanup_247 $submount" EXIT local fid=$($LFS path2fid $submount/dir1) $LFS fid2path $submount $fid || error "fid2path should succeed" - umount_client $submount || error "umount $submount failed" - rmdir $submount + cleanup_247 $submount } run_test 247d "running fid2path inside root" @@ -14308,12 +14737,13 @@ test_248() { # small read with fast read enabled $LCTL set_param -n llite.*.fast_read=1 local t_fast=$(dd if=$DIR/$tfile of=/dev/null bs=4k 2>&1 | - awk '/copied/ { print $6 }') - + egrep -o '([[:digit:]\.\,e-]+) s' | cut -d's' -f1 | + sed -e 's/,/./' -e 's/[eE]+*/\*10\^/') # small read with fast read disabled $LCTL set_param -n llite.*.fast_read=0 local t_slow=$(dd if=$DIR/$tfile of=/dev/null bs=4k 2>&1 | - awk '/copied/ { print $6 }') + egrep -o '([[:digit:]\.\,e-]+) s' | cut -d's' -f1 | + sed -e 's/,/./' -e 's/[eE]+*/\*10\^/') # verify that fast read is 4 times faster for cache read [ $(bc <<< "4 * $t_fast < $t_slow") -eq 1 ] || @@ -14326,12 +14756,14 @@ test_248() { # 1k non-cache read cancel_lru_locks osc local t_1k=$(dd if=$DIR/$tfile of=/dev/null bs=1k 2>&1 | - awk '/copied/ { print $6 }') + egrep -o '([[:digit:]\.\,e-]+) s' | cut -d's' -f1 | + sed -e 's/,/./' -e 's/[eE]+*/\*10\^/') # 1M non-cache read cancel_lru_locks osc local t_1m=$(dd if=$DIR/$tfile of=/dev/null bs=1k 2>&1 | - awk '/copied/ { print $6 }') + egrep -o '([[:digit:]\.\,e-]+) s' | cut -d's' -f1 | + sed -e 's/,/./' -e 's/[eE]+*/\*10\^/') # verify that big IO is not 4 times faster than small IO [ $(bc <<< "4 * $t_1k >= $t_1m") -eq 1 ] || @@ -14494,6 +14926,9 @@ test_253() { osp.$mdtosc_proc1.reserved_mb_low) echo "prev high watermark $last_wm_h, prev low watermark $last_wm_l" + if ! combined_mgs_mds ; then + mount_mgs_client + fi create_pool $FSNAME.$TESTNAME || error "Pool creation failed" do_facet mgs $LCTL pool_add $FSNAME.$TESTNAME $ost_name || error "Adding $ost_name to pool failed" @@ -14558,6 +14993,10 @@ test_253() { error "Remove $ost_name from pool failed" do_facet mgs $LCTL pool_destroy $FSNAME.$TESTNAME || error "Pool destroy fialed" + + if ! combined_mgs_mds ; then + umount_mgs_client + fi } run_test 253 "Check object allocation limit" @@ -14697,7 +15136,7 @@ ladvise_willread_performance() local lowest_speedup=20 if [ ${average_cache%.*} -lt $lowest_speedup ]; then - echo "Speedup with OSS cached read less than $lowest_speedup%, " + echo "Speedup with OSS cached read less than $lowest_speedup%," \ "got $average_cache%. Skipping ladvise willread check." return 0 fi @@ -14869,6 +15308,10 @@ test_255c() { local difference local i local rc + + [ $(lustre_version_code ost1) -lt $(version_code 2.10.50) ] && + skip "lustre < 2.10.53 does not support lockahead" && return + test_mkdir -p $DIR/$tdir $SETSTRIPE -i 0 $DIR/$tdir @@ -14922,9 +15365,9 @@ test_255c() { ldlm.namespaces.$FSNAME-OST0000*osc-f*.lock_unused_count) difference="$((new_count - count))" - # Test 15 output is divided by 1000 to map down to valid return + # Test 15 output is divided by 100 to map down to valid return if [ $i -eq 15 ]; then - rc="$((rc * 1000))" + rc="$((rc * 100))" fi if [ $difference -ne $rc ]; then @@ -15078,6 +15521,401 @@ test_260() { } run_test 260 "Check mdc_close fail" +### Data-on-MDT sanity tests ### +test_270a() { + + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + # create DoM file + local dom=$DIR/$tdir/dom_file + local tmp=$DIR/$tdir/tmp_file + + mkdir -p $DIR/$tdir + + # basic checks for DoM component creation + $LFS setstripe -E 1024K -E 1024K -L mdt $dom 2>/dev/null && + error "Can set MDT layout to non-first entry" + + $LFS setstripe -E 1024K -L mdt -E 1024K -L mdt $dom 2>/dev/null && + error "Can define multiple entries as MDT layout" + + $LFS setstripe -E 1M -L mdt $dom || + error "Can't create DoM layout" + + [ $($LFS getstripe -L $dom) == "mdt" ] || error "bad pattern" + [ $($LFS getstripe -c $dom) == 0 ] || error "bad stripe count" + [ $($LFS getstripe -S $dom) == 1048576 ] || error "bad stripe size" + + local mdtidx=$($GETSTRIPE -M $dom) + local mdtname=MDT$(printf %04x $mdtidx) + local facet=mds$((mdtidx + 1)) + local space_check=1 + + # Skip free space checks with ZFS + if [ "$(facet_fstype $facet)" == "zfs" ]; then + space_check=0 + fi + + # write + sync + local mdtfree1=$(do_facet $facet \ + lctl get_param -n osd*.*$mdtname.kbytesfree) + dd if=/dev/urandom of=$tmp bs=1024 count=100 + # check also direct IO along write + dd if=$tmp of=$dom bs=102400 count=1 oflag=direct + sync + cmp $tmp $dom || error "file data is different" + [ $(stat -c%s $dom) == 102400 ] || error "bad size after write" + if [ $space_check == 1 ]; then + local mdtfree2=$(do_facet $facet \ + lctl get_param -n osd*.*$mdtname.kbytesfree) + [ $(($mdtfree1 - $mdtfree2)) -ge 102 ] || + error "MDT free space is wrong after write" + fi + + # truncate + $TRUNCATE $dom 10000 + [ $(stat -c%s $dom) == 10000 ] || error "bad size after truncate" + if [ $space_check == 1 ]; then + mdtfree1=$(do_facet $facet \ + lctl get_param -n osd*.*$mdtname.kbytesfree) + [ $(($mdtfree1 - $mdtfree2)) -ge 92 ] || + error "MDT free space is wrong after truncate" + fi + + # append + cat $tmp >> $dom + sync + [ $(stat -c%s $dom) == 112400 ] || error "bad size after append" + if [ $space_check == 1 ]; then + mdtfree2=$(do_facet $facet \ + lctl get_param -n osd*.*$mdtname.kbytesfree) + [ $(($mdtfree1 - $mdtfree2)) -ge 102 ] || + error "MDT free space is wrong after append" + fi + + # delete + rm $dom + if [ $space_check == 1 ]; then + mdtfree1=$(do_facet $facet \ + lctl get_param -n osd*.*$mdtname.kbytesfree) + [ $(($mdtfree1 - $mdtfree2)) -ge 112 ] || + error "MDT free space is wrong after removal" + fi + + # combined striping + $LFS setstripe -E 1024K -L mdt -E EOF $dom || + error "Can't create DoM + OST striping" + + dd if=/dev/urandom of=$tmp bs=1024 count=2000 + # check also direct IO along write + dd if=$tmp of=$dom bs=102400 count=20 oflag=direct + sync + cmp $tmp $dom || error "file data is different" + [ $(stat -c%s $dom) == 2048000 ] || error "bad size after write" + rm $dom + rm $tmp + + return 0 +} +run_test 270a "DoM: basic functionality tests" + +test_270b() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + local dom=$DIR/$tdir/dom_file + local max_size=1048576 + + mkdir -p $DIR/$tdir + $LFS setstripe -E $max_size -L mdt $dom + + # truncate over the limit + $TRUNCATE $dom $(($max_size + 1)) && + error "successful truncate over the maximum size" + # write over the limit + dd if=/dev/zero of=$dom bs=$max_size seek=1 count=1 && + error "successful write over the maximum size" + # append over the limit + dd if=/dev/zero of=$dom bs=$(($max_size - 3)) count=1 + echo "12345" >> $dom && error "successful append over the maximum size" + rm $dom + + return 0 +} +run_test 270b "DoM: maximum size overflow checks for DoM-only file" + +test_270c() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1024K -L mdt $DIR/$tdir + + # check files inherit DoM EA + touch $DIR/$tdir/first + [ $($GETSTRIPE -L $DIR/$tdir/first) == "mdt" ] || + error "bad pattern" + [ $($LFS getstripe -c $DIR/$tdir/first) == 0 ] || + error "bad stripe count" + [ $($LFS getstripe -S $DIR/$tdir/first) == 1048576 ] || + error "bad stripe size" + + # check directory inherits DoM EA and uses it as default + mkdir $DIR/$tdir/subdir + touch $DIR/$tdir/subdir/second + [ $($LFS getstripe -L $DIR/$tdir/subdir/second) == "mdt" ] || + error "bad pattern in sub-directory" + [ $($LFS getstripe -c $DIR/$tdir/subdir/second) == 0 ] || + error "bad stripe count in sub-directory" + [ $($LFS getstripe -S $DIR/$tdir/subdir/second) == 1048576 ] || + error "bad stripe size in sub-directory" + return 0 +} +run_test 270c "DoM: DoM EA inheritance tests" + +test_270d() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + mkdir -p $DIR/$tdir + $LFS setstripe -E 1024K -L mdt $DIR/$tdir + + # inherit default DoM striping + mkdir $DIR/$tdir/subdir + touch $DIR/$tdir/subdir/f1 + + # change default directory striping + $LFS setstripe -c 1 $DIR/$tdir/subdir + touch $DIR/$tdir/subdir/f2 + [ $($LFS getstripe -c $DIR/$tdir/subdir/f2) == 1 ] || + error "wrong default striping in file 2" + [ $($LFS getstripe -L $DIR/$tdir/subdir/f2) == "raid0" ] || + error "bad pattern in file 2" + return 0 +} +run_test 270d "DoM: change striping from DoM to RAID0" + +test_270e() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + mkdir -p $DIR/$tdir/dom + mkdir -p $DIR/$tdir/norm + DOMFILES=20 + NORMFILES=10 + $LFS setstripe -E 1M -L mdt $DIR/$tdir/dom + $LFS setstripe -i 0 -S 2M $DIR/$tdir/norm + + createmany -o $DIR/$tdir/dom/dom- $DOMFILES + createmany -o $DIR/$tdir/norm/norm- $NORMFILES + + # find DoM files by layout + NUM=$($LFIND -L mdt -type f $DIR/$tdir 2>/dev/null | wc -l) + [ $NUM -eq $DOMFILES ] || + error "lfs find -L: found $NUM, expected $DOMFILES" + echo "Test 1: lfs find 20 DOM files by layout: OK" + + # there should be 1 dir with default DOM striping + NUM=$($LFIND -L mdt -type d $DIR/$tdir 2>/dev/null | wc -l) + [ $NUM -eq 1 ] || + error "lfs find -L: found $NUM, expected 1 dir" + echo "Test 2: lfs find 1 DOM dir by layout: OK" + + # find DoM files by stripe size + NUM=$($LFIND -S -1200K -type f $DIR/$tdir 2>/dev/null | wc -l) + [ $NUM -eq $DOMFILES ] || + error "lfs find -S: found $NUM, expected $DOMFILES" + echo "Test 4: lfs find 20 DOM files by stripe size: OK" + + # find files by stripe offset except DoM files + NUM=$($LFIND -i 0 -type f $DIR/$tdir 2>/dev/null | wc -l) + [ $NUM -eq $NORMFILES ] || + error "lfs find -i: found $NUM, expected $NORMFILES" + echo "Test 5: lfs find no DOM files by stripe index: OK" + return 0 +} +run_test 270e "DoM: lfs find with DoM files test" + +test_270f() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + local mdtname=${FSNAME}-MDT0000-mdtlov + local dom=$DIR/$tdir/dom_file + local dom_limit_saved=$(do_facet mds1 $LCTL get_param -n \ + lod.$mdtname.dom_stripesize) + local dom_limit=131072 + + do_facet mds1 $LCTL set_param -n lod.$mdtname.dom_stripesize=$dom_limit + local dom_current=$(do_facet mds1 $LCTL get_param -n \ + lod.$mdtname.dom_stripesize) + [ ${dom_limit} -eq ${dom_current} ] || + error "Cannot change per-MDT DoM stripe limit to $dom_limit" + + $LFS mkdir -i 0 -c 1 $DIR/$tdir + $LFS setstripe -d $DIR/$tdir + $LFS setstripe -E $dom_limit -L mdt $DIR/$tdir || + error "Can't set directory default striping" + + # exceed maximum stripe size + $LFS setstripe -E $(($dom_limit * 2)) -L mdt $dom && + error "Able to create DoM component size more than LOD limit" + + do_facet mds1 $LCTL set_param -n lod.$mdtname.dom_stripesize=0 + dom_current=$(do_facet mds1 $LCTL get_param -n \ + lod.$mdtname.dom_stripesize) + [ 0 -eq ${dom_current} ] || + error "Can't set zero DoM stripe limit" + + # too low values to be aligned with smallest stripe size 64K + do_facet mds1 $LCTL set_param -n lod.$mdtname.dom_stripesize=30000 + dom_current=$(do_facet mds1 $LCTL get_param -n \ + lod.$mdtname.dom_stripesize) + [ 30000 -eq ${dom_current} ] && + error "Can set too small DoM stripe limit" + + do_facet mds1 $LCTL set_param -n lod.$mdtname.dom_stripesize=2147483648 + dom_current=$(do_facet mds1 $LCTL get_param -n \ + lod.$mdtname.dom_stripesize) + echo $dom_current + [ 2147483648 -eq ${dom_current} ] && + error "Can set too large DoM stripe limit" + + do_facet mds1 $LCTL set_param -n \ + lod.$mdtname.dom_stripesize=$((dom_limit * 2)) + $LFS setstripe -E $((dom_limit * 2)) -L mdt $dom || + error "Can't create DoM component size after limit change" + do_facet mds1 $LCTL set_param -n \ + lod.$mdtname.dom_stripesize=$((dom_limit / 2)) + $LFS setstripe -E $dom_limit -L mdt ${dom}_big && + error "Can create big DoM component after limit decrease" + touch ${dom}_def || + error "Can't create file with old default layout" + + do_facet mds1 $LCTL set_param -n lod.*.dom_stripesize=$dom_limit_saved + return 0 +} +run_test 270f "DoM: maximum DoM stripe size checks" + +test_271a() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + local dom=$DIR/$tdir/dom + + mkdir -p $DIR/$tdir + + $LFS setstripe -E 1024K -L mdt $dom + + lctl set_param -n mdc.*.stats=clear + dd if=/dev/zero of=$dom bs=4096 count=1 || return 1 + cat $dom > /dev/null + local reads=$(lctl get_param -n mdc.*.stats | + awk '/ost_read/ {print $2}') + [ -z $reads ] || error "Unexpected $reads READ RPCs" + ls $dom + rm -f $dom +} +run_test 271a "DoM: data is cached for read after write" + +test_271b() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + local dom=$DIR/$tdir/dom + + mkdir -p $DIR/$tdir + + $LFS setstripe -E 1024K -L mdt -E EOF $dom + + lctl set_param -n mdc.*.stats=clear + dd if=/dev/zero of=$dom bs=4096 count=1 || return 1 + cancel_lru_locks mdc + $CHECKSTAT -t file -s 4096 $dom || error "stat #1 fails" + # second stat to check size is cached on client + $CHECKSTAT -t file -s 4096 $dom || error "stat #2 fails" + local gls=$(lctl get_param -n mdc.*.stats | + awk '/ldlm_glimpse/ {print $2}') + [ -z $gls ] || error "Unexpected $gls glimpse RPCs" + rm -f $dom +} +run_test 271b "DoM: no glimpse RPC for stat (DoM only file)" + +test_271ba() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + local dom=$DIR/$tdir/dom + + mkdir -p $DIR/$tdir + + $LFS setstripe -E 1024K -L mdt -E EOF $dom + + lctl set_param -n mdc.*.stats=clear + lctl set_param -n osc.*.stats=clear + dd if=/dev/zero of=$dom bs=2048K count=1 || return 1 + cancel_lru_locks mdc + $CHECKSTAT -t file -s 2097152 $dom || error "stat" + # second stat to check size is cached on client + $CHECKSTAT -t file -s 2097152 $dom || error "stat" + local gls=$(lctl get_param -n mdc.*.stats | + awk '/ldlm_glimpse/ {print $2}') + [ -z $gls ] || error "Unexpected $gls glimpse RPCs" + local gls=$(lctl get_param -n osc.*.stats | + awk '/ldlm_glimpse/ {print $2}') + [ -z $gls ] || error "Unexpected $gls OSC glimpse RPCs" + rm -f $dom +} +run_test 271ba "DoM: no glimpse RPC for stat (combined file)" + +test_271c() { + # test to be enabled with lock_convert + skip "skipped until lock convert will be implemented" && return + + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.55) ] && + skip "Need MDS version at least 2.10.55" && return + + local dom=$DIR/$tdir/dom + + mkdir -p $DIR/$tdir + + $LFS setstripe -E 1024K -L mdt $DIR/$tdir + + local mdtidx=$($LFS getstripe -M $DIR/$tdir) + local facet=mds$((mdtidx + 1)) + + cancel_lru_locks mdc + do_facet $facet lctl set_param -n mdt.*.dom_lock=0 + createmany -o $dom 1000 + lctl set_param -n mdc.*.stats=clear + smalliomany -w $dom 1000 200 + lctl get_param -n mdc.*.stats + local enq=$(lctl get_param -n mdc.*.stats | + awk '/ldlm_ibits_enqueue/ {print $2}') + # Each file has 1 open, 1 IO enqueues, total 2000 + # but now we have also +1 getxattr for security.capability, total 3000 + [ $enq -ge 2000 ] || error "Too few enqueues $enq, expected > 2000" + unlinkmany $dom 1000 + + cancel_lru_locks mdc + do_facet $facet lctl set_param -n mdt.*.dom_lock=1 + createmany -o $dom 1000 + lctl set_param -n mdc.*.stats=clear + smalliomany -w $dom 1000 200 + lctl get_param -n mdc.*.stats + local enq_2=$(lctl get_param -n mdc.*.stats | + awk '/ldlm_ibits_enqueue/ {print $2}') + # Expect to see reduced amount of RPCs by 1000 due to single enqueue + # for OPEN and IO lock. + [ $((enq - enq_2)) -ge 1000 ] || + error "Too many enqueues $enq_2, expected about $((enq - 1000))" + unlinkmany $dom 1000 + return 0 +} +run_test 271c "DoM: IO lock at open saves enqueue RPCs" + cleanup_test_300() { trap 0 umask $SAVE_UMASK @@ -16002,6 +16840,33 @@ test_313() { } run_test 313 "io should fail after last_rcvd update fail" +test_314() { + $SETSTRIPE -c 2 -i 0 $DIR/$tfile || error "setstripe failed" + do_facet ost1 "$LCTL set_param fail_loc=0x720" + rm -f $DIR/$tfile + wait_delete_completed + do_facet ost1 "$LCTL set_param fail_loc=0" +} +run_test 314 "OSP shouldn't fail after last_rcvd update failure" + +test_315() { # LU-618 + local file=$DIR/$tfile + rm -f $file + + $MULTIOP $file oO_CREAT:O_DIRECT:O_RDWR:w4096000c + $MULTIOP $file oO_RDONLY:r4096000_c & + PID=$! + + sleep 2 + + local rbytes=$(awk '/read_bytes/ { print $2 }' /proc/$PID/io) + kill -USR1 $PID + + [ $rbytes -gt 4000000 ] || error "read is not accounted ($rbytes)" + rm -f $file +} +run_test 315 "read should be accounted" + test_fake_rw() { local read_write=$1 if [ "$read_write" = "write" ]; then @@ -16271,7 +17136,7 @@ test_403() { wait - [ `cat $tfile` -gt 0 ] || error "wrong nlink count: `cat $tfile`" + [ $(cat $tfile) -gt 0 ] || error "wrong nlink count: $(cat $tfile)" rm -f $tfile $file1 $file2 } @@ -16333,8 +17198,11 @@ test_406() { local def_stripe_size=$($GETSTRIPE -S $MOUNT) local def_stripe_offset=$($GETSTRIPE -i $MOUNT) local def_pool=$($GETSTRIPE -p $MOUNT) - local test_pool=$TESTNAME + + if ! combined_mgs_mds ; then + mount_mgs_client + fi pool_add $test_pool || error "pool_add failed" pool_add_targets $test_pool 0 $(($OSTCOUNT - 1)) 1 || error "pool_add_targets failed" @@ -16396,6 +17264,10 @@ test_406() { local f=$DIR/$tdir/$tfile pool_remove_all_targets $test_pool $f pool_remove $test_pool $f + + if ! combined_mgs_mds ; then + umount_mgs_client + fi } run_test 406 "DNE support fs default striping" @@ -16798,6 +17670,137 @@ test_802() { } run_test 802 "simulate readonly device" +test_803() { + [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 2 MDTs" && return + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.54) ] && + skip "MDS needs to be newer than 2.10.54" && return + + mkdir -p $DIR/$tdir + # Create some objects on all MDTs to trigger related logs objects + for idx in $(seq $MDSCOUNT); do + $LFS mkdir -c $MDSCOUNT -i $((idx % $MDSCOUNT)) \ + $DIR/$tdir/dir${idx} || + error "Fail to create $DIR/$tdir/dir${idx}" + done + + sync; sleep 5 + echo "before create:" + $LFS df -i $MOUNT + local before_used=$($LFS df -i | grep MDT0000_UUID | awk '{print $3}') + + for ((i=0; i<10; i++)); do + $LFS mkdir -c 1 -i 1 $DIR/$tdir/foo$i || + error "Fail to create $DIR/$tdir/foo$i" + done + + sync; sleep 5 + echo "after create:" + $LFS df -i $MOUNT + local after_used=$($LFS df -i | grep MDT0000_UUID | awk '{print $3}') + + [ $after_used -ge $((before_used + 10)) ] || + error "before ($before_used) + 10 > after ($after_used)" + + for ((i=0; i<10; i++)); do + rm -rf $DIR/$tdir/foo$i || + error "Fail to remove $DIR/$tdir/foo$i" + done + + wait_delete_completed + echo "after unlink:" + $LFS df -i $MOUNT + before_used=$after_used + after_used=$($LFS df -i | grep MDT0000_UUID | awk '{print $3}') + + [ $after_used -le $((before_used - 8)) ] || + error "before ($before_used) - 8 < after ($after_used)" +} +run_test 803 "verify agent object for remote object" + +test_804() { + [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 2 MDTs" && return + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.54) ] && + skip "MDS needs to be newer than 2.10.54" && return + + [ "$(facet_fstype $SINGLEMDS)" != "ldiskfs" ] && + skip "ldiskfs only test" && return 0 + + mkdir -p $DIR/$tdir + $LFS mkdir -c 1 -i 1 $DIR/$tdir/dir0 || + error "Fail to create $DIR/$tdir/dir0" + + local fid=$($LFS path2fid $DIR/$tdir/dir0) + local dev=$(mdsdevname 2) + + do_facet mds2 "$DEBUGFS -c -R 'ls /REMOTE_PARENT_DIR' $dev" | + grep ${fid} || error "NOT found agent entry for dir0" + + $LFS mkdir -c $MDSCOUNT -i 0 $DIR/$tdir/dir1 || + error "Fail to create $DIR/$tdir/dir1" + + touch $DIR/$tdir/dir1/foo0 || + error "Fail to create $DIR/$tdir/dir1/foo0" + fid=$($LFS path2fid $DIR/$tdir/dir1/foo0) + local rc=0 + + for idx in $(seq $MDSCOUNT); do + dev=$(mdsdevname $idx) + do_facet mds${idx} \ + "$DEBUGFS -c -R 'ls /REMOTE_PARENT_DIR' $dev" | + grep ${fid} && rc=$idx + done + + mv $DIR/$tdir/dir1/foo0 $DIR/$tdir/dir1/foo1 || + error "Fail to rename foo0 to foo1" + if [ $rc -eq 0 ]; then + for idx in $(seq $MDSCOUNT); do + dev=$(mdsdevname $idx) + do_facet mds${idx} \ + "$DEBUGFS -c -R 'ls /REMOTE_PARENT_DIR' $dev" | + grep ${fid} && rc=$idx + done + fi + + mv $DIR/$tdir/dir1/foo1 $DIR/$tdir/dir1/foo2 || + error "Fail to rename foo1 to foo2" + if [ $rc -eq 0 ]; then + for idx in $(seq $MDSCOUNT); do + dev=$(mdsdevname $idx) + do_facet mds${idx} \ + "$DEBUGFS -c -R 'ls /REMOTE_PARENT_DIR' $dev" | + grep ${fid} && rc=$idx + done + fi + + [ $rc -ne 0 ] || error "NOT found agent entry for foo" + + ln $DIR/$tdir/dir1/foo2 $DIR/$tdir/dir0/guard || + error "Fail to link to $DIR/$tdir/dir1/foo2" + mv $DIR/$tdir/dir1/foo2 $DIR/$tdir/dir1/foo0 || + error "Fail to rename foo2 to foo0" + unlink $DIR/$tdir/dir1/foo0 || + error "Fail to unlink $DIR/$tdir/dir1/foo0" + rm -rf $DIR/$tdir/dir0 || + error "Fail to rm $DIR/$tdir/dir0" + + for idx in $(seq $MDSCOUNT); do + dev=$(mdsdevname $idx) + rc=0 + + stop mds${idx} + run_e2fsck $(facet_active_host mds$idx) $dev -n || + rc=$? + start mds${idx} $dev $MDS_MOUNT_OPTS || + error "mount mds$idx failed" + df $MOUNT > /dev/null 2>&1 + + # e2fsck should not return error + [ $rc -eq 0 ] || + error "e2fsck detected error on MDT${idx}: rc=$rc" + done +} +run_test 804 "verify agent entry for remote entry" + # # tests that do cleanup/setup should be run at the end #