X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=4d50dcfab8877826132194e9dcfb059c9f72282c;hp=c1cb61642eef1a8776a8bb21595a78129fcdc024;hb=777b04a093;hpb=cc10922919325c212ae98a69d63328c0efbd4f83 diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh old mode 100644 new mode 100755 index c1cb616..4d50dcf --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -31,8 +31,8 @@ GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} export PARALLEL=${PARALLEL:-"no"} TRACE=${TRACE:-""} -LUSTRE_TESTS_API_DIR=${LUSTRE_TESTS_API_DIR:-${LUSTRE}/tests/clientapi} LUSTRE=${LUSTRE:-$(dirname $0)/..} +LUSTRE_TESTS_API_DIR=${LUSTRE_TESTS_API_DIR:-${LUSTRE}/tests/clientapi} . $LUSTRE/tests/test-framework.sh init_test_env $@ @@ -41,8 +41,8 @@ init_logging ALWAYS_EXCEPT="$SANITY_EXCEPT " # bug number for skipped test: LU-9693 LU-6493 LU-9693 ALWAYS_EXCEPT+=" 42a 42b 42c " -# bug number: LU-8411 LU-9054 -ALWAYS_EXCEPT+=" 407 312 " +# bug number: LU-8411 LU-9054 LU-13314 +ALWAYS_EXCEPT+=" 407 312 56ob" if $SHARED_KEY; then # bug number: LU-9795 LU-9795 LU-9795 LU-9795 @@ -64,12 +64,18 @@ if [[ $(uname -m) = aarch64 ]]; then fi # skip nfs tests on kernels >= 4.14.0 until they are fixed -if [ $LINUX_VERSION_CODE -ge $(version_code 4.14.0) ];then +if [ $LINUX_VERSION_CODE -ge $(version_code 4.14.0) ]; then # bug number: LU-12661 ALWAYS_EXCEPT+=" 817" fi +# skip cgroup tests on RHEL8.1 kernels until they are fixed +if (( $LINUX_VERSION_CODE >= $(version_code 4.18.0) && + $LINUX_VERSION_CODE < $(version_code 5.4.0) )); then + # bug number: LU-13063 + ALWAYS_EXCEPT+=" 411" +fi -# 5 12 (min)" +# 5 12 8 12 (min)" [ "$SLOW" = "no" ] && EXCEPT_SLOW="27m 64b 68 71 115 135 136 300o" if [ "$mds1_FSTYPE" = "zfs" ]; then @@ -550,11 +556,6 @@ test_17g() { [ $MDS1_VERSION -le $(version_code 2.3.55) ] && TESTS="4094 4095" - # skip long symlink name for rhel6.5. - # rhel6.5 has a limit (PATH_MAX - sizeof(struct filename)) - grep -q '6.5' /etc/redhat-release &>/dev/null && - TESTS="59 60 61 4062 4063" - for i in $TESTS; do local SYMNAME=$(str_repeat 'x' $i) ln -s $SYMNAME $DIR/$tdir/f$i || error "failed $i-char symlink" @@ -2641,7 +2642,7 @@ test_27I() { run_test 27I "check that root dir striping does not break parent dir one" test_27J() { - [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.12.51) ]] && + [[ $MDS1_VERSION -le $(version_code 2.12.51) ]] && skip "Need MDS version newer than 2.12.51" test_mkdir $DIR/$tdir @@ -2736,7 +2737,7 @@ test_27J() { run_test 27J "basic ops on file with foreign LOV" test_27K() { - [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.12.49) ]] && + [[ $MDS1_VERSION -le $(version_code 2.12.49) ]] && skip "Need MDS version newer than 2.12.49" test_mkdir $DIR/$tdir @@ -3748,6 +3749,46 @@ test_33g() { } run_test 33g "nonroot user create already existing root created file" +test_33h() { + [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs" + [ $MDS1_VERSION -lt $(version_code 2.13.50) ] && + skip "Need MDS version at least 2.13.50" + + test_mkdir -c $MDSCOUNT -H crush $DIR/$tdir || + error "mkdir $tdir failed" + touch $DIR/$tdir/$tfile || error "touch $tfile failed" + + local index=$($LFS getstripe -m $DIR/$tdir/$tfile) + local index2 + + for fname in $DIR/$tdir/$tfile.bak \ + $DIR/$tdir/$tfile.SAV \ + $DIR/$tdir/$tfile.orig \ + $DIR/$tdir/$tfile~; do + touch $fname || error "touch $fname failed" + index2=$($LFS getstripe -m $fname) + [ $index -eq $index2 ] || + error "$fname MDT index mismatch $index != $index2" + done + + local failed=0 + for i in {1..50}; do + for fname in $(mktemp -u $DIR/$tdir/.$tfile.XXXXXX) \ + $(mktemp $DIR/$tdir/$tfile.XXXXXXXX); do + touch $fname || error "touch $fname failed" + index2=$($LFS getstripe -m $fname) + if [[ $index != $index2 ]]; then + failed=$((failed + 1)) + echo "$fname MDT index mismatch $index != $index2" + fi + done + done + echo "$failed MDT index mismatches" + (( failed < 4 )) || error "MDT index mismatch $failed times" + +} +run_test 33h "temp file is located on the same MDT as target" + TEST_34_SIZE=${TEST_34_SIZE:-2000000000000} test_34a() { rm -f $DIR/f34 @@ -6437,7 +6478,9 @@ test_56wb() { echo "done." echo -n "Removing test file from pool '$pool'..." - $LFS migrate $file1 &> /dev/null || + # "lfs migrate $file" won't remove the file from the pool + # until some striping information is changed. + $LFS migrate -c 1 $file1 &> /dev/null || error "cannot remove from pool" [ "$($LFS getstripe -p $file1)" ] && error "pool still set" @@ -6459,19 +6502,23 @@ run_test 56wb "check lfs_migrate pool support" test_56wc() { local file1="$DIR/$tdir/file1" + local parent_ssize + local parent_scount + local cur_ssize + local cur_scount + local orig_ssize echo -n "Creating test dir..." test_mkdir $DIR/$tdir &> /dev/null || error "cannot create dir" - local def_stripe_size=$($LFS getstripe -S $DIR/$tdir 2>/dev/null) $LFS setstripe -S 1M -c 1 "$DIR/$tdir" &> /dev/null || - error "cannot set stripe" + error "cannot set stripe by '-S 1M -c 1'" echo "done" echo -n "Setting initial stripe for test file..." $LFS setstripe -S 512K -c 1 "$file1" &> /dev/null || error "cannot set stripe" - [ $($LFS getstripe -S "$file1") -eq 524288 ] || - error "stripe size not set" + cur_ssize=$($LFS getstripe -S "$file1") + [ $cur_ssize -eq 524288 ] || error "setstripe -S $cur_ssize != 524288" echo "done." # File currently set to -S 512K -c 1 @@ -6488,8 +6535,8 @@ test_56wc() { echo -n "Verifying -S option is passed through to lfs migrate..." $LFS_MIGRATE -y -S 1M "$file1" &> /dev/null || error "migration failed" - [ $($LFS getstripe -S "$file1") -eq 1048576 ] || - error "file was not restriped" + cur_ssize=$($LFS getstripe -S "$file1") + [ $cur_ssize -eq 1048576 ] || error "migrate -S $cur_ssize != 1048576" echo "done." # File currently set to -S 1M -c 1 @@ -6500,8 +6547,9 @@ test_56wc() { error "long option without argument not supported" $LFS_MIGRATE -y --stripe-size 512K "$file1" &> /dev/null || error "long option with argument not supported" - [ $($LFS getstripe -S "$file1") -eq 524288 ] || - error "file not restriped with --stripe-size option" + cur_ssize=$($LFS getstripe -S "$file1") + [ $cur_ssize -eq 524288 ] || + error "migrate --stripe-size $cur_ssize != 524288" echo "done." # File currently set to -S 512K -c 1 @@ -6510,8 +6558,8 @@ test_56wc() { echo -n "Verifying explicit stripe count can be set..." $LFS_MIGRATE -y -c 2 "$file1" &> /dev/null || error "migrate failed" - [ $($LFS getstripe -c "$file1") -eq 2 ] || - error "file not restriped to explicit count" + cur_scount=$($LFS getstripe -c "$file1") + [ $cur_scount -eq 2 ] || error "migrate -c $cur_scount != 2" echo "done." fi @@ -6520,17 +6568,21 @@ test_56wc() { # Ensure parent striping is used if -R is set, and no stripe # count or size is specified echo -n "Setting stripe for parent directory..." - $LFS setstripe -S 1M -c 1 "$DIR/$tdir" &> /dev/null || - error "cannot set stripe" + $LFS setstripe -S 2M -c 1 "$DIR/$tdir" &> /dev/null || + error "cannot set stripe '-S 2M -c 1'" echo "done." echo -n "Verifying restripe option uses parent stripe settings..." + parent_ssize=$($LFS getstripe -S $DIR/$tdir 2>/dev/null) + parent_scount=$($LFS getstripe -c $DIR/$tdir 2>/dev/null) $LFS_MIGRATE -y -R "$file1" &> /dev/null || error "migrate failed" - [ $($LFS getstripe -S "$file1") -eq $def_stripe_size ] || - error "file not restriped to parent settings" - [ $($LFS getstripe -c "$file1") -eq 1 ] || - error "file not restriped to parent settings" + cur_ssize=$($LFS getstripe -S "$file1") + [ $cur_ssize -eq $parent_ssize ] || + error "migrate -R stripe_size $cur_ssize != $parent_ssize" + cur_scount=$($LFS getstripe -c "$file1") + [ $cur_scount -eq $parent_scount ] || + error "migrate -R stripe_count $cur_scount != $parent_scount" echo "done." # File currently set to -S 1M -c 1 @@ -6538,13 +6590,14 @@ test_56wc() { # Ensure striping is preserved if -R is not set, and no stripe # count or size is specified echo -n "Verifying striping size preserved when not specified..." - local orig_stripe_size=$($LFS getstripe -S "$file1" 2>/dev/null) + orig_ssize=$($LFS getstripe -S "$file1" 2>/dev/null) $LFS setstripe -S 2M -c 1 "$DIR/$tdir" &> /dev/null || error "cannot set stripe on parent directory" $LFS_MIGRATE -y "$file1" &> /dev/null || error "migrate failed" - [ $($LFS getstripe -S "$file1") -eq $orig_stripe_size ] || - error "file was restriped" + cur_ssize=$($LFS getstripe -S "$file1") + [ $cur_ssize -eq $orig_ssize ] || + error "migrate by default $cur_ssize != $orig_ssize" echo "done." # Ensure file name properly detected when final option has no argument @@ -6832,6 +6885,75 @@ test_56xc() { } run_test 56xc "lfs migration autostripe" +test_56xd() { + [[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs" + + local dir=$DIR/$tdir + local f_mgrt=$dir/$tfile.mgrt + local f_yaml=$dir/$tfile.yaml + local f_copy=$dir/$tfile.copy + local layout_yaml="-E 1M -S 512K -c 1 -E -1 -S 1M -c 2 -i 0" + local layout_copy="-c 2 -S 2M -i 1" + local yamlfile=$dir/yamlfile + local layout_before; + local layout_after; + + test_mkdir "$dir" || error "cannot create dir $dir" + $LFS setstripe $layout_yaml $f_yaml || + error "cannot setstripe $f_yaml with layout $layout_yaml" + $LFS getstripe --yaml $f_yaml > $yamlfile + $LFS setstripe $layout_copy $f_copy || + error "cannot setstripe $f_copy with layout $layout_copy" + touch $f_mgrt + dd if=/dev/zero of=$f_mgrt bs=1M count=4 + + # 1. test option --yaml + $LFS_MIGRATE -y --yaml $yamlfile $f_mgrt || + error "cannot migrate $f_mgrt with --yaml $yamlfile" + layout_before=$(get_layout_param $f_yaml) + layout_after=$(get_layout_param $f_mgrt) + [ "$layout_after" == "$layout_before" ] || + error "lfs_migrate --yaml: $layout_after != $layout_before" + + # 2. test option --copy + $LFS_MIGRATE -y --copy $f_copy $f_mgrt || + error "cannot migrate $f_mgrt with --copy $f_copy" + layout_before=$(get_layout_param $f_copy) + layout_after=$(get_layout_param $f_mgrt) + [ "$layout_after" == "$layout_before" ] || + error "lfs_migrate --copy: $layout_after != $layout_before" +} +run_test 56xd "check lfs_migrate --yaml and --copy support" + +test_56xe() { + [[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs" + + local dir=$DIR/$tdir + local f_comp=$dir/$tfile + local layout="-E 1M -S 512K -c 1 -E -1 -S 1M -c 2 -i 0" + local layout_before="" + local layout_after="" + + test_mkdir "$dir" || error "cannot create dir $dir" + $LFS setstripe $layout $f_comp || + error "cannot setstripe $f_comp with layout $layout" + layout_before=$(get_layout_param $f_comp) + dd if=/dev/zero of=$f_comp bs=1M count=4 + + # 1. migrate a comp layout file by lfs_migrate + $LFS_MIGRATE -y $f_comp || error "cannot migrate $f_comp by lfs_migrate" + layout_after=$(get_layout_param $f_comp) + [ "$layout_before" == "$layout_after" ] || + error "lfs_migrate: $layout_before != $layout_after" + + # 2. migrate a comp layout file by lfs migrate + $LFS migrate $f_comp || error "cannot migrate $f_comp by lfs migrate" + layout_after=$(get_layout_param $f_comp) + [ "$layout_before" == "$layout_after" ] || + error "lfs migrate: $layout_before != $layout_after" +} +run_test 56xe "migrate a composite layout file" + test_56y() { [ $MDS1_VERSION -lt $(version_code 2.4.53) ] && skip "No HSM $(lustre_build_version $SINGLEMDS) MDS < 2.4.53" @@ -7491,8 +7613,8 @@ run_test 63b "async write errors should be returned to fsync ===" test_64a () { [ $PARALLEL == "yes" ] && skip "skip parallel run" - df $DIR - lctl get_param -n osc.*[oO][sS][cC][_-]*.cur* | grep "[0-9]" + lfs df $DIR + lctl get_param osc.*[oO][sS][cC][_-]*.cur* | grep "=[1-9]" } run_test 64a "verify filter grant calculations (in kernel) =====" @@ -7838,7 +7960,7 @@ run_test 65m "normal user can't set filesystem default stripe" test_65n() { [ -n "$FILESET" ] && skip "Not functional for FILESET set" - [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.12.50) ]] || + [[ $MDS1_VERSION -ge $(version_code 2.12.50) ]] || skip "Need MDS version at least 2.12.50" [[ $PARALLEL != "yes" ]] || skip "skip parallel run" @@ -8155,28 +8277,27 @@ num_inodes() { test_76() { # Now for bug 20433, added originally in bug 1443 [ $PARALLEL == "yes" ] && skip "skip parallel run" - local CPUS=$(getconf _NPROCESSORS_ONLN 2>/dev/null) - cancel_lru_locks osc - BEFORE_INODES=$(num_inodes) - echo "before inodes: $BEFORE_INODES" - local COUNT=1000 - [ "$SLOW" = "no" ] && COUNT=100 - for i in $(seq $COUNT); do + local cpus=$(getconf _NPROCESSORS_ONLN 2>/dev/null) + local before=$(num_inodes) + local count=$((512 * cpus)) + [ "$SLOW" = "no" ] && count=$((64 * cpus)) + + echo "before inodes: $before" + for i in $(seq $count); do touch $DIR/$tfile rm -f $DIR/$tfile done cancel_lru_locks osc - AFTER_INODES=$(num_inodes) - echo "after inodes: $AFTER_INODES" - local wait=0 - while [[ $((AFTER_INODES-1*${CPUS:-1})) -gt $BEFORE_INODES ]]; do - sleep 2 - AFTER_INODES=$(num_inodes) - wait=$((wait+2)) - echo "wait $wait seconds inodes: $AFTER_INODES" - if [ $wait -gt 30 ]; then - error "inode slab grew from $BEFORE_INODES to $AFTER_INODES" + local after=$(num_inodes) + echo "after inodes: $after" + while (( after > before + 8 * ${cpus:-1} )); do + sleep 1 + after=$(num_inodes) + wait=$((wait + 1)) + (( wait % 5 == 0 )) && echo "wait $wait seconds inodes: $after" + if (( wait > 30 )); then + error "inode slab grew from $before to $after" fi done } @@ -8931,11 +9052,6 @@ test_101c() { } run_test 101c "check stripe_size aligned read-ahead =================" -set_read_ahead() { - $LCTL get_param -n llite.*.max_read_ahead_mb | head -n 1 - $LCTL set_param -n llite.*.max_read_ahead_mb $1 > /dev/null 2>&1 -} - test_101d() { [ $PARALLEL == "yes" ] && skip "skip parallel run" @@ -8955,7 +9071,10 @@ test_101d() { cancel_lru_locks osc echo Disable read-ahead - local old_READAHEAD=$(set_read_ahead 0) + local old_RA=$($LCTL get_param -n llite.*.max_read_ahead_mb | head -n 1) + $LCTL set_param -n llite.*.max_read_ahead_mb=0 + stack_trap "$LCTL set_param -n llite.*.max_read_ahead_mb $old_RA" EXIT + $LCTL get_param -n llite.*.max_read_ahead_mb echo Reading the test file $file with read-ahead disabled local raOFF=$(do_and_time "dd if=$file of=/dev/null bs=1M count=$sz_MB") @@ -8963,7 +9082,7 @@ test_101d() { echo Cancel LRU locks on lustre client to flush the client cache cancel_lru_locks osc echo Enable read-ahead with ${ra_MB}MB - set_read_ahead $ra_MB + $LCTL set_param -n llite.*.max_read_ahead_mb=$ra_MB echo Reading the test file $file with read-ahead enabled local raON=$(do_and_time "dd if=$file of=/dev/null bs=1M count=$sz_MB") @@ -8971,7 +9090,6 @@ test_101d() { echo "read-ahead disabled time read $raOFF" echo "read-ahead enabled time read $raON" - set_read_ahead $old_READAHEAD rm -f $file wait_delete_completed @@ -9155,6 +9273,30 @@ test_101h() { } run_test 101h "Readahead should cover current read window" +test_101i() { + dd if=/dev/zero of=$DIR/$tfile bs=1M count=10 || + error "dd 10M file failed" + + local max_per_file_mb=$($LCTL get_param -n \ + llite.*.max_read_ahead_per_file_mb 2>/dev/null) + cancel_lru_locks osc + stack_trap "$LCTL set_param llite.*.max_read_ahead_per_file_mb=$max_per_file_mb" + $LCTL set_param llite.*.max_read_ahead_per_file_mb=1 || + error "set max_read_ahead_per_file_mb to 1 failed" + + echo "Reset readahead stats" + $LCTL set_param llite.*.read_ahead_stats=0 + + dd if=$DIR/$tfile of=/dev/null bs=2M + + $LCTL get_param llite.*.read_ahead_stats + local miss=$($LCTL get_param -n llite.*.read_ahead_stats | + awk '/misses/ { print $2 }') + [ $miss -eq 5 ] || error "expected misses 5 but got $miss" + rm -f $DIR/$tfile +} +run_test 101i "allow current readahead to exceed reservation" + setup_test102() { test_mkdir $DIR/$tdir chown $RUNAS_ID $DIR/$tdir @@ -11162,6 +11304,27 @@ test_123b () { # statahead(bug 15027) } run_test 123b "not panic with network error in statahead enqueue (bug 15027)" +test_123c() { + [[ $MDSCOUNT -lt 2 ]] && skip_env "needs >= 2 MDTs" + + test_mkdir -i 0 -c 1 $DIR/$tdir.0 + test_mkdir -i 1 -c 1 $DIR/$tdir.1 + touch $DIR/$tdir.1/{1..3} + mv $DIR/$tdir.1/{1..3} $DIR/$tdir.0 + + remount_client $MOUNT + + $MULTIOP $DIR/$tdir.0 Q + + # let statahead to complete + ls -l $DIR/$tdir.0 > /dev/null + + testid=$(echo $TESTNAME | tr '_' ' ') + dmesg | tac | sed "/$testid/,$ d" | grep "Can not initialize inode" && + error "statahead warning" || true +} +run_test 123c "Can not initialize inode warning on DNE statahead" + test_124a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" $LCTL get_param -n mdc.*.connect_flags | grep -q lru_resize || @@ -12472,33 +12635,22 @@ test_133g() { remote_mds_nodsh && skip "remote MDS with nodsh" remote_ost_nodsh && skip "remote OST with nodsh" - # eventually, this can also be replaced with "lctl get_param -R", - # but not until that option is always available on the server local facet for facet in mds1 ost1; do - [ $(lustre_version_code $facet) -le $(version_code 2.5.54) ] && - skip_noexit "Too old lustre on $facet" - local facet_proc_dirs=$(do_facet $facet \ - \\\ls -d $proc_regexp 2>/dev/null) - echo "${facet}_proc_dirs='$facet_proc_dirs'" - [ -z "$facet_proc_dirs" ] && error "no proc_dirs on $facet" - do_facet $facet find $facet_proc_dirs \ - ! -name req_history \ - -exec cat '{}' \\\; &> /dev/null - - do_facet $facet find $facet_proc_dirs \ - ! -name req_history \ - -type f \ - -exec cat '{}' \\\; &> /dev/null || - error "proc file read failed" - - do_facet $facet find $facet_proc_dirs \ - -ignore_readdir_race \ - -type f \ - -not -name force_lbug \ - -not -name changelog_mask \ - -exec badarea_io '{}' \\\; || - error_133 "$facet find $facet_proc_dirs failed" + local facet_ver=$(lustre_version_code $facet) + if [ $facet_ver -ge $(version_code 2.7.65) ]; then + do_facet $facet "$LCTL get_param -R '*'" &> /dev/null + else + log "$facet: too old lustre for get_param -R" + fi + if [ $facet_ver -ge $(version_code 2.5.54) ]; then + do_facet $facet "$LCTL list_param -R '*' | grep '=' | + tr -d= | egrep -v 'force_lbug|changelog_mask' | + xargs badarea_io" || + error_133 "$facet badarea_io failed" + else + skip_noexit "$facet: too old lustre for get_param -R" + fi done # remount the FS in case writes/reads /proc break the FS @@ -12651,7 +12803,7 @@ test_136() { local fname mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir" - $SETSTRIPE -c 1 -i 0 $DIR/$tdir || error "failed to set striping" + $LFS setstripe -c 1 -i 0 $DIR/$tdir || error "failed to set striping" #set only one record at plain llog #define OBD_FAIL_CATALOG_FULL_CHECK 0x131a do_facet $SINGLEMDS $LCTL set_param fail_loc=0x131a fail_val=1 @@ -13864,15 +14016,20 @@ test_160f() { local i # generate some changelog records to accumulate on each MDT - test_mkdir -c $MDSCOUNT $DIR/$tdir || error "test_mkdir $tdir failed" + # use fnv1a because created files should be evenly distributed + test_mkdir -c $MDSCOUNT -H fnv_1a_64 $DIR/$tdir || + error "test_mkdir $tdir failed" + log "$(date +%s): creating first files" createmany -m $DIR/$tdir/$tfile $((MDSCOUNT * 2)) || error "create $DIR/$tdir/$tfile failed" # check changelogs have been generated + local start=$SECONDS + local idle_time=$((MDSCOUNT * 5 + 5)) local nbcl=$(changelog_dump | wc -l) [[ $nbcl -eq 0 ]] && error "no changelogs found" - for param in "changelog_max_idle_time=10" \ + for param in "changelog_max_idle_time=$idle_time" \ "changelog_gc=1" \ "changelog_min_gc_interval=2" \ "changelog_min_free_cat_entries=3"; do @@ -13884,8 +14041,11 @@ test_160f() { do_nodes $mdts $LCTL set_param mdd.*.$param done - # force cl_user2 to be idle (1st part) - sleep 9 + # force cl_user2 to be idle (1st part), but also cancel the + # cl_user1 records so that it is not evicted later in the test. + local sleep1=$((idle_time / 2)) + echo "$(date +%s): sleep1 $sleep1/${idle_time}s" + sleep $sleep1 # simulate changelog catalog almost full #define OBD_FAIL_CAT_FREE_RECORDS 0x1313 @@ -13921,13 +14081,16 @@ test_160f() { "$user_rec1, but is $user_rec2" done - # force cl_user2 to be idle (2nd part) and to reach - # changelog_max_idle_time - sleep 2 + # force cl_user2 idle (2nd part) to just exceed changelog_max_idle_time + local sleep2=$((idle_time - (SECONDS - start) + 1)) + echo "$(date +%s): sleep2 $sleep2/${idle_time}s" + sleep $sleep2 - # generate one more changelog to trigger fail_loc - createmany -m $DIR/$tdir/${tfile}bis $((MDSCOUNT * 2)) || - error "create $DIR/$tdir/${tfile}bis failed" + # Generate one more changelog to trigger GC at fail_loc for cl_user2. + # cl_user1 should be OK because it recently processed records. + echo "$(date +%s): creating $((MDSCOUNT * 2)) files" + createmany -m $DIR/$tdir/${tfile}b $((MDSCOUNT * 2)) || + error "create $DIR/$tdir/${tfile}b failed" # ensure gc thread is done for i in $(mdts_nodes); do @@ -13981,7 +14144,9 @@ test_160g() { local i # generate some changelog records to accumulate on each MDT - test_mkdir -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed" + # use fnv1a because created files should be evenly distributed + test_mkdir -c $MDSCOUNT -H fnv_1a_64 $DIR/$tdir || + error "mkdir $tdir failed" createmany -m $DIR/$tdir/$tfile $((MDSCOUNT * 2)) || error "create $DIR/$tdir/$tfile failed" @@ -14095,7 +14260,9 @@ test_160h() { local i # generate some changelog records to accumulate on each MDT - test_mkdir -c $MDSCOUNT $DIR/$tdir || error "test_mkdir $tdir failed" + # use fnv1a because created files should be evenly distributed + test_mkdir -c $MDSCOUNT -H fnv_1a_64 $DIR/$tdir || + error "test_mkdir $tdir failed" createmany -m $DIR/$tdir/$tfile $((MDSCOUNT * 2)) || error "create $DIR/$tdir/$tfile failed" @@ -14244,7 +14411,9 @@ test_160i() { changelog_register || error "first changelog_register failed" # generate some changelog records to accumulate on each MDT - test_mkdir -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed" + # use fnv1a because created files should be evenly distributed + test_mkdir -c $MDSCOUNT -H fnv_1a_64 $DIR/$tdir || + error "mkdir $tdir failed" createmany -m $DIR/$tdir/$tfile $((MDSCOUNT * 2)) || error "create $DIR/$tdir/$tfile failed" @@ -14303,20 +14472,27 @@ test_160j() { skip "Need MDS version at least 2.12.56" mount_client $MOUNT2 || error "mount_client on $MOUNT2 failed" + stack_trap "umount $MOUNT2" EXIT changelog_register || error "first changelog_register failed" + stack_trap "changelog_deregister" EXIT # generate some changelog - test_mkdir -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed" + # use fnv1a because created files should be evenly distributed + test_mkdir -c $MDSCOUNT -H fnv_1a_64 $DIR/$tdir || + error "mkdir $tdir failed" createmany -m $DIR/$tdir/${tfile}bis $((MDSCOUNT * 2)) || error "create $DIR/$tdir/${tfile}bis failed" # open the changelog device exec 3>/dev/changelog-$FSNAME-MDT0000 + stack_trap "exec 3>&-" EXIT exec 4/dev/null || error "read changelog failed" @@ -14327,16 +14503,6 @@ test_160j() { error "User $cl_user not found in changelog_users" printf 'clear:'$cl_user':0' >&3 - - # close - exec 3>&- - exec 4<&- - - # cleanup - changelog_deregister || error "changelog_deregister failed" - - umount $MOUNT2 - mount_client $MOUNT || error "mount_client on $MOUNT failed" } run_test 160j "client can be umounted while its chanangelog is being used" @@ -15530,7 +15696,7 @@ jobstats_set() { "$FSNAME.sys.jobid_var" $new_jobenv } -test_205() { # Job stats +test_205a() { # Job stats [ $PARALLEL == "yes" ] && skip "skip parallel run" [[ $MDS1_VERSION -ge $(version_code 2.7.1) ]] || skip "Need MDS version with at least 2.7.1" @@ -15632,7 +15798,18 @@ test_205() { # Job stats verify_jobstats "touch $DIR/$tfile" $SINGLEMDS } -run_test 205 "Verify job stats" +run_test 205a "Verify job stats" + +# LU-13117 +test_205b() { + $LCTL set_param jobid_var=USER jobid_name="%e.%u" + env -i USERTESTJOBSTATS=foolish touch $DIR/$tfile.1 + do_facet $SINGLEMDS $LCTL get_param mdt.*.job_stats | + grep job_id: | grep foolish && + error "Unexpected jobid found" + true +} +run_test 205b "Verify job stats jobid parsing" # LU-1480, LU-1773 and LU-1657 test_206() { @@ -16169,8 +16346,7 @@ test_224c() { # LU-6441 save_writethrough $p set_cache writethrough on - local pages_per_rpc=$($LCTL get_param \ - osc.*.max_pages_per_rpc) + local pages_per_rpc=$($LCTL get_param osc.*.max_pages_per_rpc) local at_max=$($LCTL get_param -n at_max) local timeout=$($LCTL get_param -n timeout) local test_at="at_max" @@ -16579,6 +16755,27 @@ test_230b() { ln -s $migrate_dir/$tfile $migrate_dir/${tfile}_ln ln -s $other_dir/$tfile $migrate_dir/${tfile}_ln_other + local len + local lnktgt + + # inline symlink + for len in 58 59 60; do + lnktgt=$(str_repeat 'l' $len) + touch $migrate_dir/$lnktgt + ln -s $lnktgt $migrate_dir/${len}char_ln + done + + # PATH_MAX + for len in 4094 4095; do + lnktgt=$(str_repeat 'l' $len) + ln -s $lnktgt $migrate_dir/${len}char_ln + done + + # NAME_MAX + for len in 254 255; do + touch $migrate_dir/$(str_repeat 'l' $len) + done + $LFS migrate -m $MDTIDX $migrate_dir || error "fails on migrating remote dir to MDT1" @@ -16586,7 +16783,8 @@ test_230b() { for ((i = 0; i < 10; i++)); do for file in $(find $migrate_dir/dir_${i}); do mdt_index=$($LFS getstripe -m $file) - [ $mdt_index == $MDTIDX ] || + # broken symlink getstripe will fail + [ $mdt_index -ne $MDTIDX ] && stat -L $file && error "$file is not on MDT${MDTIDX}" done done @@ -16650,7 +16848,7 @@ test_230b() { echo "migrate back to MDT0, checking.." for file in $(find $migrate_dir); do mdt_index=$($LFS getstripe -m $file) - [ $mdt_index == $MDTIDX ] || + [ $mdt_index -ne $MDTIDX ] && stat -L $file && error "$file is not on MDT${MDTIDX}" done @@ -19363,6 +19561,25 @@ test_278() { } run_test 278 "Race starting MDS between MDTs stop/start" +test_280() { + [ $MGS_VERSION -lt $(version_code 2.13.52) ] && + skip "Need MGS version at least 2.13.52" + [ $PARALLEL == "yes" ] && skip "skip parallel run" + combined_mgs_mds || skip "needs combined MGS/MDT" + + umount_client $MOUNT +#define OBD_FAIL_MDS_LLOG_UMOUNT_RACE 0x15e + do_facet mgs $LCTL set_param fail_loc=0x8000015e fail_val=0 + + mount_client $MOUNT & + sleep 1 + stop mgs || error "stop mgs failed" + #for a race mgs would crash + start mgs $(mgsdevname) $MGS_MOUNT_OPTS || error "start mgs failed" + mount_client $MOUNT || error "mount client failed" +} +run_test 280 "Race between MGS umount and client llog processing" + cleanup_test_300() { trap 0 umask $SAVE_UMASK @@ -19516,12 +19733,22 @@ test_300d() { #local striped directory $LFS setdirstripe -i 0 -c 2 -H all_char $DIR/$tdir/striped_dir || error "set striped dir error" + #look at the directories for debug purposes + ls -l $DIR/$tdir + $LFS getdirstripe $DIR/$tdir + ls -l $DIR/$tdir/striped_dir + $LFS getdirstripe $DIR/$tdir/striped_dir createmany -o $DIR/$tdir/striped_dir/f 10 || error "create 10 files failed" #remote striped directory $LFS setdirstripe -i 1 -c 2 $DIR/$tdir/remote_striped_dir || error "set striped dir error" + #look at the directories for debug purposes + ls -l $DIR/$tdir + $LFS getdirstripe $DIR/$tdir + ls -l $DIR/$tdir/remote_striped_dir + $LFS getdirstripe $DIR/$tdir/remote_striped_dir createmany -o $DIR/$tdir/remote_striped_dir/f 10 || error "create 10 files failed" @@ -20055,7 +20282,7 @@ test_300q() { run_test 300q "create remote directory under orphan directory" test_300r() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.7.55) ] && + [ $MDS1_VERSION -lt $(version_code 2.7.55) ] && skip "Need MDS version at least 2.7.55" && return [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return @@ -20370,7 +20597,7 @@ test_316() { chown nobody $DIR/$tdir/d touch $DIR/$tdir/d/file - $LFS mv -M1 $DIR/$tdir/d || error "lfs mv failed" + $LFS mv -m1 $DIR/$tdir/d || error "lfs mv failed" } run_test 316 "lfs mv" @@ -20452,9 +20679,8 @@ test_318() { llite.*.max_read_ahead_async_active 2>/dev/null) [ $max_active -ne 256 ] && error "expected 256 but got $max_active" - # currently reset to 0 is unsupported, leave it 512 for now. - $LCTL set_param llite.*.max_read_ahead_async_active=0 && - error "set max_read_ahead_async_active should fail" + $LCTL set_param llite.*.max_read_ahead_async_active=0 || + error "set max_read_ahead_async_active should succeed" $LCTL set_param llite.*.max_read_ahead_async_active=512 max_active=$($LCTL get_param -n \ @@ -20513,6 +20739,105 @@ test_319() { } run_test 319 "lost lease lock on migrate error" +test_398a() { # LU-4198 + $LFS setstripe -c 1 -i 0 $DIR/$tfile + $LCTL set_param ldlm.namespaces.*.lru_size=clear + + # request a new lock on client + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 + + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct conv=notrunc + local lock_count=$($LCTL get_param -n \ + ldlm.namespaces.*-OST0000-osc-ffff*.lru_size) + [[ $lock_count -eq 0 ]] || error "lock should be cancelled by direct IO" + + $LCTL set_param ldlm.namespaces.*-OST0000-osc-ffff*.lru_size=clear + + # no lock cached, should use lockless IO and not enqueue new lock + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct conv=notrunc + lock_count=$($LCTL get_param -n \ + ldlm.namespaces.*-OST0000-osc-ffff*.lru_size) + [[ $lock_count -eq 0 ]] || error "no lock should be held by direct IO" +} +run_test 398a "direct IO should cancel lock otherwise lockless" + +test_398b() { # LU-4198 + which fio || skip_env "no fio installed" + $LFS setstripe -c -1 $DIR/$tfile + + local size=12 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=$size + + local njobs=4 + echo "mix direct rw ${size}M to OST0 by fio with $njobs jobs..." + fio --name=rand-rw --rw=randrw --bs=$PAGE_SIZE --direct=1 \ + --numjobs=$njobs --fallocate=none \ + --iodepth=16 --allow_file_create=0 --size=$((size/njobs))M \ + --filename=$DIR/$tfile & + bg_pid=$! + + echo "mix buffer rw ${size}M to OST0 by fio with $njobs jobs..." + fio --name=rand-rw --rw=randrw --bs=$PAGE_SIZE \ + --numjobs=$njobs --fallocate=none \ + --iodepth=16 --allow_file_create=0 --size=$((size/njobs))M \ + --filename=$DIR/$tfile || true + wait $bg_pid + + rm -rf $DIR/$tfile +} +run_test 398b "DIO and buffer IO race" + +test_398c() { # LU-4198 + which fio || skip_env "no fio installed" + + saved_debug=$($LCTL get_param -n debug) + $LCTL set_param debug=0 + + local size=$(lctl get_param -n osc.$FSNAME-OST0000*.kbytesavail | head -1) + ((size /= 1024)) # by megabytes + ((size /= 2)) # write half of the OST at most + [ $size -gt 40 ] && size=40 #reduce test time anyway + + $LFS setstripe -c 1 $DIR/$tfile + + # it seems like ldiskfs reserves more space than necessary if the + # writing blocks are not mapped, so it extends the file firstly + dd if=/dev/zero of=$DIR/$tfile bs=1M count=$size && sync + cancel_lru_locks osc + + # clear and verify rpc_stats later + $LCTL set_param osc.${FSNAME}-OST0000-osc-ffff*.rpc_stats=clear + + local njobs=4 + echo "writing ${size}M to OST0 by fio with $njobs jobs..." + fio --name=rand-write --rw=randwrite --bs=$PAGE_SIZE --direct=1 \ + --numjobs=$njobs --fallocate=none --ioengine=libaio \ + --iodepth=16 --allow_file_create=0 --size=$((size/njobs))M \ + --filename=$DIR/$tfile + [ $? -eq 0 ] || error "fio write error" + + [ $($LCTL get_param -n \ + ldlm.namespaces.${FSNAME}-OST0000-osc-ffff*.lock_count) -eq 0 ] || + error "Locks were requested while doing AIO" + + # get the percentage of 1-page I/O + pct=$($LCTL get_param osc.${FSNAME}-OST0000-osc-ffff*.rpc_stats | + grep -A 1 'pages per rpc' | grep -v 'pages per rpc' | + awk '{print $7}') + [ $pct -le 50 ] || error "$pct% of I/O are 1-page" + + echo "mix rw ${size}M to OST0 by fio with $njobs jobs..." + fio --name=rand-rw --rw=randrw --bs=$PAGE_SIZE --direct=1 \ + --numjobs=$njobs --fallocate=none --ioengine=libaio \ + --iodepth=16 --allow_file_create=0 --size=$((size/njobs))M \ + --filename=$DIR/$tfile + [ $? -eq 0 ] || error "fio mixed read write error" + + rm -rf $DIR/$tfile + $LCTL set_param debug="$saved_debug" +} +run_test 398c "run fio to test AIO" + test_fake_rw() { local read_write=$1 if [ "$read_write" = "write" ]; then @@ -20598,6 +20923,10 @@ test_400a() { # LU-1606, was conf-sanity test_74 local prefix=/usr/include/lustre local prog + # Oleg removes c files in his test rig so test if any c files exist + [ -z "$(ls -A $LUSTRE_TESTS_API_DIR)" ] && \ + skip_env "Needed c test files are missing" + if ! [[ -d $prefix ]]; then # Assume we're running in tree and fixup the include path. extra_flags+=" -I$LUSTRE/../lnet/include/uapi -I$LUSTRE/include/uapi -I$LUSTRE/include" @@ -20605,7 +20934,7 @@ test_400a() { # LU-1606, was conf-sanity test_74 fi for prog in $LUSTRE_TESTS_API_DIR/*.c; do - $CC -Wall -Werror $extra_flags -o $out $prog -llustreapi || + $CC -Wall -Werror -std=c99 $extra_flags -o $out $prog -llustreapi || error "client api broken" done rm -f $out @@ -20637,7 +20966,7 @@ test_400b() { # LU-1606, LU-5011 continue # lustre_ioctl.h is internal header fi - $CC -Wall -Werror -include $header -c -x c /dev/null -o $out || + $CC -Wall -Werror -std=c99 -include $header -c -x c /dev/null -o $out || error "cannot compile '$header'" done rm -f $out @@ -21026,7 +21355,7 @@ run_test 411 "Slab allocation error with cgroup does not LBUG" test_412() { [ $MDSCOUNT -lt 2 ] && skip_env "needs >= 2 MDTs" - if [ $(lustre_version_code mds1) -lt $(version_code 2.10.55) ]; then + if [ $MDS1_VERSION -lt $(version_code 2.10.55) ]; then skip "Need server version at least 2.10.55" fi @@ -21294,7 +21623,7 @@ run_test 414 "simulate ENOMEM in ptlrpc_register_bulk()" test_415() { [ $PARALLEL == "yes" ] && skip "skip parallel run" - [ $(lustre_version_code mds1) -lt $(version_code 2.11.52) ] && + [ $MDS1_VERSION -lt $(version_code 2.11.52) ] && skip "Need server version at least 2.11.52" # LU-11102 @@ -21336,7 +21665,7 @@ test_415() { run_test 415 "lock revoke is not missing" test_416() { - [ $(lustre_version_code mds1) -lt $(version_code 2.11.55) ] && + [ $MDS1_VERSION -lt $(version_code 2.11.55) ] && skip "Need server version at least 2.11.55" # define OBD_FAIL_OSD_TXN_START 0x19a @@ -21790,8 +22119,29 @@ test_422() { } run_test 422 "kill a process with RPC in progress" +stat_test() { + df -h $MOUNT & + df -h $MOUNT & + df -h $MOUNT & + df -h $MOUNT & + df -h $MOUNT & + df -h $MOUNT & +} + +test_423() { + local _stats + # ensure statfs cache is expired + sleep 2; + + _stats=$(stat_test | grep $MOUNT | sort -u | wc -l) + [[ ${_stats} -ne 1 ]] && error "statfs wrong" + + return 0 +} +run_test 423 "statfs should return a right data" + prep_801() { - [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] || + [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] || [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] && skip "Need server version at least 2.9.55" @@ -22022,7 +22372,7 @@ cleanup_802a() { test_802a() { [[ $mds1_FSTYPE = zfs ]] || skip "ZFS specific test" - [[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] || + [[ $MDS1_VERSION -lt $(version_code 2.9.55) ]] || [[ $OST1_VERSION -lt $(version_code 2.9.55) ]] && skip "Need server version at least 2.9.55" @@ -22541,7 +22891,7 @@ test_810() { run_test 810 "partial page writes on ZFS (LU-11663)" test_811() { - [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.11.56) ] && + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && skip "Need MDS version at least 2.11.56" #define OBD_FAIL_MDS_ORPHAN_DELETE 0x165 @@ -22551,8 +22901,7 @@ test_811() { stop mds1 start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS - sleep 5 - [[ $(do_facet mds1 pgrep orph_.*-MDD | wc -l) -eq 0 ]] || + wait_update_facet mds1 "pgrep orph_.*-MDD | wc -l" "0" || error "MDD orphan cleanup thread not quit" } run_test 811 "orphan name stub can be cleaned up in startup" @@ -22872,6 +23221,42 @@ test_819b() { } run_test 819b "too big niobuf in write" + +function test_820_start_ost() { + sleep 5 + + for num in $(seq $OSTCOUNT); do + start ost$num $(ostdevname $num) $OST_MOUNT_OPTS + done +} + +test_820() { + [[ $MDSCOUNT -lt 2 ]] && skip_env "needs >= 2 MDTs" + + mkdir $DIR/$tdir + umount_client $MOUNT || error "umount failed" + for num in $(seq $OSTCOUNT); do + stop ost$num + done + + # mount client with no active OSTs + # so that the client can't initialize max LOV EA size + # from OSC notifications + mount_client $MOUNT || error "mount failed" + # delay OST starting to keep this 0 max EA size for a while + test_820_start_ost & + + # create a directory on MDS2 + test_mkdir -i 1 -c1 $DIR/$tdir/mds2 || + error "Failed to create directory" + # open intent should update default EA size + # see mdc_update_max_ea_from_body() + # notice this is the very first RPC to MDS2 + cp /etc/services $DIR/$tdir/mds2 || + error "Failed to copy files to mds$n" +} +run_test 820 "update max EA from open intent" + # # tests that do cleanup/setup should be run at the end # @@ -22915,6 +23300,18 @@ test_901() { } run_test 901 "don't leak a mgc lock on client umount" +# LU-13377 +test_902() { + [ $CLIENT_VERSION -lt $(version_code 2.13.52) ] && + skip "client does not have LU-13377 fix" + #define OBD_FAIL_LLITE_SHORT_COMMIT 0x1415 + $LCTL set_param fail_loc=0x1415 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 + cancel_lru_locks osc + rm -f $DIR/$tfile +} +run_test 902 "test short write doesn't hang lustre" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre