X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Ftests%2Fsanity.sh;h=5a26c7782c87c9f341c719701c4d47eef0aa4f65;hb=15dd813536ad06a119dfb2358f00281eed22a98b;hp=085a3f4afc31fb5fcec4715a928988e91f47618e;hpb=b738c4850935f3a9c483b3141cb37d6539557615;p=fs%2Flustre-release.git diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 085a3f4..5a26c77 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -81,8 +81,8 @@ init_logging if [ $(facet_fstype $SINGLEMDS) = "zfs" ]; then # bug number for skipped test: LU-4536 LU-1957 ALWAYS_EXCEPT="$ALWAYS_EXCEPT 65ic 180" - # 4 13 (min)" - [ "$SLOW" = "no" ] && EXCEPT_SLOW="$EXCEPT_SLOW 51b 51ba" + # 13 (min)" + [ "$SLOW" = "no" ] && EXCEPT_SLOW="$EXCEPT_SLOW 51b" fi FAIL_ON_ERROR=false @@ -4097,6 +4097,12 @@ test_51a() { # was test_51 } run_test 51a "special situations: split htree with empty entry ==" +cleanup_print_lfs_df () { + trap 0 + $LFS df + $LFS df -i +} + test_51b() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return local dir=$DIR/$tdir @@ -4118,10 +4124,12 @@ test_51b() { # need to check free space for the directories as well local blkfree=$(lctl get_param -n mdc.$FSNAME-MDT$mdtidx*.kbytesavail) - numfree=$((blkfree / 4)) + numfree=$(( blkfree / $(fs_inode_ksize) )) [[ $numfree -lt $nrdirs ]] && skip "not enough blocks ($numfree)" && return + trap cleanup_print_lfsdf EXIT + # create files createmany -d $dir/d $nrdirs || error "failed to create $nrdirs subdirs in MDT$mdtidx:$dir" @@ -4133,6 +4141,9 @@ test_51b() { local left=100 local delete=$((nrdirs - left)) + $LFS df + $LFS df -i + # for ldiskfs the nlink count should be 1, but this is OSD specific # and so this is listed for informational purposes only echo "nlink before: $(stat -c %h $dir), created before: $nrdirs" @@ -4151,6 +4162,8 @@ test_51b() { local after=$(stat -c %h $dir) [[ $after -gt 2 ]] && error "nlink after: $after > 2" || echo "nlink after: $after" + + cleanup_print_lfs_df } run_test 51b "exceed 64k subdirectory nlink limit on create, verify unlink" @@ -6155,6 +6168,11 @@ run_test 77j "client only supporting ADLER32" rm -f $F77_TMP unset F77_TMP +cleanup_test_78() { + trap 0 + rm -f $DIR/$tfile +} + test_78() { # bug 10901 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_ost || { skip_env "local OST" && return; } @@ -6184,6 +6202,8 @@ test_78() { # bug 10901 [[ $SMALLESTOST -lt 10240 ]] && skip "too small OSTSIZE, useless to run large O_DIRECT test" && return 0 + trap cleanup_test_78 EXIT + [[ $F78SIZE -gt $((SMALLESTOST * $OSTCOUNT / 1024 - 80)) ]] && F78SIZE=$((SMALLESTOST * $OSTCOUNT / 1024 - 80)) @@ -6196,7 +6216,7 @@ test_78() { # bug 10901 $DIRECTIO rdwr $DIR/$tfile 0 $FSIZE 1048576||error "rdwr failed" done - rm -f $DIR/$tfile + cleanup_test_78 } run_test 78 "handle large O_DIRECT writes correctly ============" @@ -7296,6 +7316,15 @@ test_103a() { run_acl_subtest misc || error "misc test failed" echo "performing permissions..." run_acl_subtest permissions || error "permissions failed" + # LU-1482 mdd: Setting xattr are properly checked with and without ACLs + if [ $(lustre_version_code $SINGLEMDS) -gt $(version_code 2.8.55) -o \ + \( $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.6) -a \ + $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.29) \) ] + then + echo "performing permissions xattr..." + run_acl_subtest permissions_xattr || + error "permissions_xattr failed" + fi echo "performing setfacl..." run_acl_subtest setfacl || error "setfacl test failed" @@ -10416,67 +10445,114 @@ test_155_big_load() { true } +save_writethrough() { + local facets=$(get_facets OST) + + save_lustre_params $facets "obdfilter.*.writethrough_cache_enable" > $1 + save_lustre_params $facets "osd-*.*.writethrough_cache_enable" >> $1 +} + test_155a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read on set_cache writethrough on test_155_small_load + restore_lustre_params < $p + rm -f $p } run_test 155a "Verify small file correctness: read cache:on write_cache:on" test_155b() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read on set_cache writethrough off test_155_small_load + restore_lustre_params < $p + rm -f $p } run_test 155b "Verify small file correctness: read cache:on write_cache:off" test_155c() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read off set_cache writethrough on test_155_small_load + restore_lustre_params < $p + rm -f $p } run_test 155c "Verify small file correctness: read cache:off write_cache:on" test_155d() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read off set_cache writethrough off test_155_small_load + restore_lustre_params < $p + rm -f $p } run_test 155d "Verify small file correctness: read cache:off write_cache:off" test_155e() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read on set_cache writethrough on test_155_big_load + restore_lustre_params < $p + rm -f $p } run_test 155e "Verify big file correctness: read cache:on write_cache:on" test_155f() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read on set_cache writethrough off test_155_big_load + restore_lustre_params < $p + rm -f $p } run_test 155f "Verify big file correctness: read cache:on write_cache:off" test_155g() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read off set_cache writethrough on test_155_big_load + restore_lustre_params < $p + rm -f $p } run_test 155g "Verify big file correctness: read cache:off write_cache:on" test_155h() { [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache read off set_cache writethrough off test_155_big_load + restore_lustre_params < $p + rm -f $p } run_test 155h "Verify big file correctness: read cache:off write_cache:off" @@ -10487,12 +10563,14 @@ test_156() { local BEFORE local AFTER local file="$DIR/$tfile" + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" [ "$(facet_fstype ost1)" = "zfs" -a \ $(lustre_version_code ost1 -lt $(version_code 2.6.93)) ] && skip "LU-1956/LU-2261: stats not implemented on OSD ZFS" && return + save_writethrough $p roc_hit_init log "Turn on read and write cache" @@ -10621,6 +10699,8 @@ test_156() { fi rm -f $file + restore_lustre_params < $p + rm -f $p } run_test 156 "Verification of tunables" @@ -12573,6 +12653,11 @@ run_test 224b "Don't panic on bulk IO failure" test_224c() { # LU-6441 [ $PARALLEL == "yes" ] && skip "skip parallel run" && return remote_mds_nodsh && skip "remote MDS with nodsh" && return + + local p="$TMP/$TESTSUITE-$TESTNAME.parameters" + save_writethrough $p + set_cache writethrough on + local pages_per_rpc=$($LCTL get_param \ osc.*.max_pages_per_rpc) local at_max=$($LCTL get_param -n at_max) @@ -12590,10 +12675,11 @@ test_224c() { # LU-6441 error "conf_param timeout=5 failed" #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520 - $LCTL set_param fail_loc=0x520 + do_facet ost1 $LCTL set_param fail_loc=0x520 + $LFS setstripe -c 1 -i 0 $DIR/$tfile dd if=/dev/zero of=$DIR/$tfile bs=8MB count=1 sync - $LCTL set_param fail_loc=0 + do_facet ost1 $LCTL set_param fail_loc=0 set_conf_param_and_check client "$test_at" "$param_at" $at_max || error "conf_param at_max=$at_max failed" @@ -12601,6 +12687,8 @@ test_224c() { # LU-6441 $timeout || error "conf_param timeout=$timeout failed" $LCTL set_param -n $pages_per_rpc + restore_lustre_params < $p + rm -f $p } run_test 224c "Don't hang if one of md lost during large bulk RPC" @@ -13794,18 +13882,9 @@ test_247e() { run_test 247e "mount .. as fileset" test_248() { - local my_error=error - local fast_read_sav=$($LCTL get_param -n llite.*.fast_read 2>/dev/null) [ -z "$fast_read_sav" ] && skip "no fast read support" && return - # This test case is time sensitive and Maloo uses KVM to run autotest. - # Therefore the complete time of I/O task is unreliable and depends on - # the workload on the host machine when the task is running. - local virt=$(running_in_vm) - [ -n "$virt" ] && echo "running in VM '$virt', ignore error" && - my_error="error_ignore env=$virt" - # create a large file for fast read verification dd if=/dev/zero of=$DIR/$tfile bs=1M count=128 > /dev/null 2>&1 @@ -13827,7 +13906,8 @@ test_248() { # verify that fast read is 4 times faster for cache read [ $(bc <<< "4 * $t_fast < $t_slow") -eq 1 ] || - $my_error "fast read was not 4 times faster: $t_fast vs $t_slow" + error_not_in_vm "fast read was not 4 times faster: " \ + "$t_fast vs $t_slow" echo "Test 2: verify the performance between big and small read" $LCTL set_param -n llite.*.fast_read=1 @@ -13844,7 +13924,7 @@ test_248() { # verify that big IO is not 4 times faster than small IO [ $(bc <<< "4 * $t_1k >= $t_1m") -eq 1 ] || - $my_error "bigger IO is way too fast: $t_1k vs $t_1m" + error_not_in_vm "bigger IO is way too fast: $t_1k vs $t_1m" $LCTL set_param -n llite.*.fast_read=$fast_read_sav rm -f $DIR/$tfile @@ -14024,6 +14104,7 @@ ladvise_willread_performance() local repeat=10 local average_cache=0 local average_ladvise=0 + for ((i = 1; i <= $repeat; i++)); do echo "Iter $i/$repeat: reading without willread hint" cancel_lru_locks osc @@ -14073,8 +14154,8 @@ ladvise_willread_performance() local lowest_speedup=$((average_cache / 2)) [ $average_ladvise -gt $lowest_speedup ] || - error "Speedup with willread is less than $lowest_speedup%,"\ - "got $average_ladvise%" + error_not_in_vm "Speedup with willread is less than " \ + "$lowest_speedup%, got $average_ladvise%" echo "Speedup with willread ladvise: $average_ladvise%" echo "Speedup with cache: $average_cache%" } @@ -14158,6 +14239,71 @@ test_255a() { } run_test 255a "check 'lfs ladvise -a willread'" +facet_meminfo() { + local facet=$1 + local info=$2 + + do_facet $facet "cat /proc/meminfo | grep ^${info}:" | awk '{print $2}' +} + +test_255b() { + lfs setstripe -c -1 -i 0 $DIR/$tfile + + ladvise_no_type dontneed $DIR/$tfile && + skip "dontneed ladvise is not supported" && return + + ladvise_no_ioctl $DIR/$tfile && + skip "ladvise ioctl is not supported" && return + + [ $(lustre_version_code ost1) -lt $(version_code 2.8.54) ] && + skip "lustre < 2.8.54 does not support ladvise" && return + + [ "$(facet_fstype ost1)" = "zfs" ] && + skip "zfs-osd does not support dontneed advice" && return + + local size_mb=100 + local size=$((size_mb * 1048576)) + # In order to prevent disturbance of other processes, only check 3/4 + # of the memory usage + local kibibytes=$((size_mb * 1024 * 3 / 4)) + + dd if=/dev/zero of=$DIR/$tfile bs=1048576 count=$size_mb || + error "dd to $DIR/$tfile failed" + + local total=$(facet_meminfo ost1 MemTotal) + echo "Total memory: $total KiB" + + do_facet ost1 "sync && echo 3 > /proc/sys/vm/drop_caches" + local before_read=$(facet_meminfo ost1 Cached) + echo "Cache used before read: $before_read KiB" + + lfs ladvise -a willread $DIR/$tfile || + error "Ladvise willread failed" + local after_read=$(facet_meminfo ost1 Cached) + echo "Cache used after read: $after_read KiB" + + lfs ladvise -a dontneed $DIR/$tfile || + error "Ladvise dontneed again failed" + local no_read=$(facet_meminfo ost1 Cached) + echo "Cache used after dontneed ladvise: $no_read KiB" + + if [ $total -lt $((before_read + kibibytes)) ]; then + echo "Memory is too small, abort checking" + return 0 + fi + + if [ $((before_read + kibibytes)) -gt $after_read ]; then + error "Ladvise willread should use more memory" \ + "than $kibibytes KiB" + fi + + if [ $((no_read + kibibytes)) -gt $after_read ]; then + error "Ladvise dontneed should release more memory" \ + "than $kibibytes KiB" + fi +} +run_test 255b "check 'lfs ladvise -a dontneed'" + test_256() { local cl_user local cat_sl @@ -14222,6 +14368,29 @@ test_256() { } run_test 256 "Check llog delete for empty and not full state" +test_257() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.55) ]] && + skip "Need MDS version at least 2.8.55" && return + + test_mkdir -p $DIR/$tdir + + setfattr -n trusted.name1 -v value1 $DIR/$tdir || + error "setfattr -n trusted.name1=value1 $DIR/$tdir failed" + stat $DIR/$tdir + +#define OBD_FAIL_MDS_XATTR_REP 0x161 + local mdtidx=$($LFS getstripe -M $DIR/$tdir) + local facet=mds$((mdtidx + 1)) + set_nodes_failloc $(facet_active_host $facet) 0x80000161 + getfattr -n trusted.name1 $DIR/$tdir 2> /dev/null + + stop $facet || error "stop MDS failed" + start $facet $(mdsdevname $((mdtidx + 1))) $MDS_MOUNT_OPTS || + error "start MDS fail" +} +run_test 257 "xattr locks are not lost" + test_260() { #define OBD_FAIL_MDC_CLOSE 0x806 $LCTL set_param fail_loc=0x80000806 @@ -14987,12 +15156,12 @@ test_311() { local new_iused for i in $(seq 120); do new_iused=$($LFS df -i | grep OST0000 | awk '{ print $3 }') - [ $new_iused -lt $((old_iused - 900)) ] && break + [ $((old_iused - new_iused)) -gt 800 ] && break sleep 1 done echo "waited $i sec, old Iused $old_iused, new Iused $new_iused" - [ $new_iused -lt $((old_iused - 900)) ] || + [ $((old_iused - new_iused)) -gt 800 ] || error "objs not destroyed after unlink" } run_test 311 "disable OSP precreate, and unlink should destroy objs" @@ -15095,6 +15264,48 @@ test_312() { # LU-4856 } run_test 312 "make sure ZFS adjusts its block size by write pattern" +test_399() { # LU-7655 for OST fake write + # turn off debug for performance testing + local saved_debug=$($LCTL get_param -n debug) + $LCTL set_param debug=0 + + $SETSTRIPE -c 1 -i 0 $DIR/$tfile + + # get ost1 size - lustre-OST0000 + local ost1_avail_size=$($LFS df | awk /${ost1_svc}/'{ print $4 }') + local blocks=$((ost1_avail_size/2/1024)) # half avail space by megabytes + [ $blocks -gt 1000 ] && blocks=1000 # 1G in maximum + + local start_time=$(date +%s.%N) + dd if=/dev/zero of=$DIR/$tfile bs=1M count=$blocks oflag=sync || + error "real dd writing error" + local duration=$(bc <<< "$(date +%s.%N) - $start_time") + rm -f $DIR/$tfile + + # define OBD_FAIL_OST_FAKE_WRITE 0x238 + do_facet ost1 $LCTL set_param fail_loc=0x238 + + local start_time=$(date +%s.%N) + dd if=/dev/zero of=$DIR/$tfile bs=1M count=$blocks oflag=sync || + error "fake dd writing error" + local duration_fake=$(bc <<< "$(date +%s.%N) - $start_time") + + # verify file size + cancel_lru_locks osc + $CHECKSTAT -t file -s $((blocks * 1024 * 1024)) $DIR/$tfile || + error "$tfile size not $blocks MB" + + do_facet ost1 $LCTL set_param fail_loc=0 + + echo "fake write $duration_fake vs. normal write $duration in seconds" + [ $(bc <<< "$duration_fake < $duration") -eq 1 ] || + error_not_in_vm "fake write is slower" + + $LCTL set_param -n debug="$saved_debug" + rm -f $DIR/$tfile +} +run_test 399 "fake write should not be slower than normal write" + test_400a() { # LU-1606, was conf-sanity test_74 local extra_flags='' local out=$TMP/$tfile @@ -15402,6 +15613,29 @@ test_406() { } run_test 406 "DNE support fs default striping" +test_407() { + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return + + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.55) ]] && + skip "Need MDS version at least 2.8.55" && return + + $LFS mkdir -i 0 -c 1 $DIR/$tdir.0 || + error "$LFS mkdir -i 0 -c 1 $tdir.0 failed" + $LFS mkdir -i 1 -c 1 $DIR/$tdir.1 || + error "$LFS mkdir -i 1 -c 1 $tdir.1 failed" + touch $DIR/$tdir.0/$tfile.0 || error "touch $tdir.0/$tfile.0 failed" + + #define OBD_FAIL_DT_TXN_STOP 0x2019 + for idx in $(seq $MDSCOUNT); do + do_facet mds$idx "lctl set_param fail_loc=0x2019" + done + $LFS mkdir -c 2 $DIR/$tdir && error "$LFS mkdir -c 2 $tdir should fail" + mv $DIR/$tdir.0/$tfile.0 $DIR/$tdir.1/$tfile.1 && + error "mv $tdir.0/$tfile.0 $tdir.1/$tfile.1 should fail" + true +} +run_test 407 "transaction fail should cause operation fail" + # # tests that do cleanup/setup should be run at the end #