From bdd470ff9765cb81d94b888ee1067e1c420303bc Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Mon, 16 Jan 2017 14:24:10 -0700 Subject: [PATCH] LU-9069 tests: improve output of sanity test_255a Improve output of sanity.sh test_255a to contain more information. Clean up the performance measurements and calculations to make the test easier to read. The random_read_iops() helper routine might be useful for other tests in the future as well. The test does not (yet) work for ZFS, so it will skip the checks on ZFS OSTs until "ladvise -a dontneed" is implemented for osd-zfs. Test-Parameters: trivial Signed-off-by: Andreas Dilger Change-Id: Ia177d87e41266b058a6863bbf36108ad71ef9a00 Reviewed-on: https://review.whamcloud.com/24907 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Jian Yu Reviewed-by: Saurabh Tandan Reviewed-by: Oleg Drokin --- lustre/doc/lfs-ladvise.1 | 34 ++++++++------ lustre/tests/sanity.sh | 113 ++++++++++++++++++++++++++++------------------- lustre/utils/lfs.c | 2 +- 3 files changed, 89 insertions(+), 60 deletions(-) diff --git a/lustre/doc/lfs-ladvise.1 b/lustre/doc/lfs-ladvise.1 index 4658b6c..b676480 100644 --- a/lustre/doc/lfs-ladvise.1 +++ b/lustre/doc/lfs-ladvise.1 @@ -10,8 +10,11 @@ lfs ladvise \- give file access advices or hints to server. .br .SH DESCRIPTION Give file access advices or hints to Lustre server side, usually OSS. This lfs -utility is simlar to Linux fadvise() system call, except it can forward the -advices from Lustre clients to servers. +command is simlar to the Linux +.BR fadvise64 (2) +system call and +.BR posix_fadvise (2), +except it can forward the hints from Lustre clients to remote servers. .SH OPTIONS .TP \fB\-a\fR, \fB\-\-advice\fR=\fIADVICE\fR @@ -38,18 +41,20 @@ File range has length of \fILENGTH\fR. This option may not be specified at the same time as the -e option. .SH NOTE .PP -Typically, the "lfs ladvise" forwards the advice to Lustre servers without -guaranteeing what and when servers will react to the advice. Actions may or -may not triggered when the advices are recieved, depending on the type of the -advice as well as the real-time decision of the affected server-side -components. +Typically, +.B lfs ladvise +forwards the advice to Lustre servers without +guaranteeing how and when servers will react to the advice. Actions may or +may not be triggered when the advices are recieved, depending on the type of +the advice, whether the backing filesystem type supports that advice, as well +as the real-time decision of the affected server-side components. A typical usage of ladvise is to enable applications and users with external knowledge to intervene in server-side cache management. For example, if a -bunch of different clients are doing small random reads of a file, prefetching +group of different clients are doing small random reads of a file, prefetching pages into OSS cache with big linear reads before the random IO is a net benefit. Fetching that data into each client cache with fadvise() may not -be, due to much more data being sent to the client. +be a benefit if any individual client only reads a subset of the file. The main difference between Linux fadvise() system call and ladvise is that fadvise() is only a client side mechanism that does not pass the advice to the @@ -57,18 +62,19 @@ filesystem, while ladvise can send advices or hints to Lustre server sides. .SH EXAMPLES .TP -.B $ lfs ladvise -a willread -s 0 -e 1048576000 /mnt/lustre/file1 +.B $ lfs ladvise -a willread -s 0 -e 1024M /mnt/lustre/file1 This gives the OST(s) holding the first 1GB of \fB/mnt/lustre/file1\fR a hint -that the first 1GB of the file will be read soon. +that the first 1GB of that file will be read soon. .TP -.B $ lfs ladvise -a dontneed -s 0 -e 1048576000 /mnt/lustre/file1 +.B $ lfs ladvise -a dontneed -s 0 -e 1G /mnt/lustre/file1 This gives the OST(s) holding the first 1GB of \fB/mnt/lustre/file1\fR a hint that the first 1GB of file will not be read in the near future, thus the OST(s) -could clear the cache of the file in the memory. +could clear the cache of that file in the memory. .SH AVAILABILITY The lfs ladvise command is part of the Lustre filesystem. .SH SEE ALSO .BR lfs (1), -.BR fadvise (2), +.BR fadvise64 (2), +.BR posix_fadvise (2), .BR llapi_ladvise (3), .BR lustre (7) diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index c9fa183..37cd0ba 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -14433,65 +14433,87 @@ ladvise_no_ioctl() return 1 } +percent() { + bc <<<"scale=2; ($1 - $2) * 100 / $2" +} + +# run a random read IO workload +# usage: random_read_iops +random_read_iops() { + local file=$1 + local fsize=$2 + local iosize=${3:-4096} + + $READS -f $file -s $fsize -b $iosize -n $((fsize / iosize)) -t 60 | + sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##' +} + +drop_file_oss_cache() { + local file="$1" + local nodes="$2" + + $LFS ladvise -a dontneed $file 2>/dev/null || + do_nodes $nodes "echo 3 > /proc/sys/vm/drop_caches" +} + ladvise_willread_performance() { local repeat=10 + local average_origin=0 local average_cache=0 local average_ladvise=0 for ((i = 1; i <= $repeat; i++)); do echo "Iter $i/$repeat: reading without willread hint" cancel_lru_locks osc - do_nodes $(comma_list $(osts_nodes)) \ - "echo 3 > /proc/sys/vm/drop_caches" - local speed_origin=$($READS -f $DIR/$tfile -s $size \ - -b 4096 -n $((size / 4096)) -t 60 | - sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##') + drop_file_oss_cache $DIR/$tfile $(comma_list $(osts_nodes)) + local speed_origin=$(random_read_iops $DIR/$tfile $size) + echo "Iter $i/$repeat: uncached speed: $speed_origin" + average_origin=$(bc <<<"$average_origin + $speed_origin") - echo "Iter $i/$repeat: Reading again without willread hint" cancel_lru_locks osc - local speed_cache=$($READS -f $DIR/$tfile -s $size \ - -b 4096 -n $((size / 4096)) -t 60 | - sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##') + local speed_cache=$(random_read_iops $DIR/$tfile $size) + echo "Iter $i/$repeat: OSS cache speed: $speed_cache" + average_cache=$(bc <<<"$average_cache + $speed_cache") - echo "Iter $i/$repeat: reading with willread hint" cancel_lru_locks osc - do_nodes $(comma_list $(osts_nodes)) \ - "echo 3 > /proc/sys/vm/drop_caches" - lfs ladvise -a willread $DIR/$tfile || - error "Ladvise failed" - local speed_ladvise=$($READS -f $DIR/$tfile -s $size \ - -b 4096 -n $((size / 4096)) -t 60 | - sed -e '/^$/d' -e 's#.*s, ##' -e 's#MB/s##') - - local cache_speedup=$(echo "scale=2; \ - ($speed_cache-$speed_origin)/$speed_origin*100" | bc) - cache_speedup=$(echo ${cache_speedup%.*}) - echo "Iter $i/$repeat: cache speedup: $cache_speedup%" - average_cache=$((average_cache + cache_speedup)) - - local ladvise_speedup=$(echo "scale=2; \ - ($speed_ladvise-$speed_origin)/$speed_origin*100" | bc) - ladvise_speedup=$(echo ${ladvise_speedup%.*}) - echo "Iter $i/$repeat: ladvise speedup: $ladvise_speedup%" - average_ladvise=$((average_ladvise + ladvise_speedup)) + drop_file_oss_cache $DIR/$tfile $(comma_list $(osts_nodes)) + $LFS ladvise -a willread $DIR/$tfile || error "ladvise failed" + local speed_ladvise=$(random_read_iops $DIR/$tfile $size) + echo "Iter $i/$repeat: ladvise speed: $speed_ladvise" + average_ladvise=$(bc <<<"$average_ladvise + $speed_ladvise") done - average_cache=$((average_cache / repeat)) - average_ladvise=$((average_ladvise / repeat)) - - if [ $average_cache -lt 20 ]; then - echo "Speedup with cache is less than 20% ($average_cache%),"\ - "skipping check of speedup with willread:"\ - "$average_ladvise%" + average_origin=$(bc <<<"scale=2; $average_origin / $repeat") + average_cache=$(bc <<<"scale=2; $average_cache / $repeat") + average_ladvise=$(bc <<<"scale=2; $average_ladvise / $repeat") + + speedup_cache=$(percent $average_cache $average_origin) + speedup_ladvise=$(percent $average_ladvise $average_origin) + + echo "Average uncached read: $average_origin" + echo "Average speedup with OSS cached read: " \ + "$average_cache = +$speedup_cache%" + echo "Average speedup with ladvise willread: " \ + "$average_ladvise = +$speedup_ladvise%" + + local lowest_speedup=20 + if [ ${average_cache%.*} -lt $lowest_speedup ]; then + echo "Speedup with OSS cached read less than $lowest_speedup%, " + "got $average_cache%. Skipping ladvise willread check." return 0 fi - local lowest_speedup=$((average_cache / 2)) - [ $average_ladvise -gt $lowest_speedup ] || + # the test won't work on ZFS until it supports 'ladvise dontneed', but + # it is still good to run until then to exercise 'ladvise willread' + ! $LFS ladvise -a dontneed $DIR/$tfile && + [ "$(facet_fstype ost1)" = "zfs" ] && + echo "osd-zfs does not support dontneed or drop_caches" && + return 0 + + lowest_speedup=$(bc <<<"scale=2; $average_cache / 2") + [ ${average_ladvise%.*} -gt $lowest_speedup ] || error_not_in_vm "Speedup with willread is less than " \ - "$lowest_speedup%, got $average_ladvise%" - echo "Speedup with willread ladvise: $average_ladvise%" - echo "Speedup with cache: $average_cache%" + "$lowest_speedup%, got $average_ladvise%" } test_255a() { @@ -14581,6 +14603,8 @@ facet_meminfo() { } test_255b() { + lfs setstripe -c 1 -i 0 $DIR/$tfile + ladvise_no_type dontneed $DIR/$tfile && skip "dontneed ladvise is not supported" && return @@ -14590,10 +14614,9 @@ test_255b() { [ $(lustre_version_code ost1) -lt $(version_code 2.8.54) ] && skip "lustre < 2.8.54 does not support ladvise" && return - [ "$(facet_fstype ost1)" = "zfs" ] && - skip "zfs-osd does not support dontneed advice" && return - - lfs setstripe -c 1 -i 0 $DIR/$tfile + ! $LFS ladvise -a dontneed $DIR/$tfile && + [ "$(facet_fstype ost1)" = "zfs" ] && + skip "zfs-osd does not support 'ladvise dontneed'" && return local size_mb=100 local size=$((size_mb * 1048576)) diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 63194f3..ac5f44b 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -399,7 +399,7 @@ command_t cmdlist[] = { "usage: ladvise [--advice|-a ADVICE] [--start|-s START[kMGT]]\n" " [--background|-b]\n" " {[--end|-e END[kMGT]] | [--length|-l LENGTH[kMGT]]}\n" - " ..."}, + " ...\n"}, {"help", Parser_help, 0, "help"}, {"exit", Parser_quit, 0, "quit"}, {"quit", Parser_quit, 0, "quit"}, -- 1.8.3.1