From a5a7890093ea2509db15f8aa8a8c9d9c86133209 Mon Sep 17 00:00:00 2001 From: Li Xi Date: Tue, 14 Jun 2016 13:15:04 +0800 Subject: [PATCH] LU-4931 ladvise: Add dontneed advice support for ladvise This patch addds DONTNEED advice to ladvise framework. OSS will cleanup the page cache of the file when this hint is provided. Signed-off-by: Li Xi Signed-off-by: Gu Zheng Change-Id: If5cf7f3193924ca7cccb96d8d841c0d889469393 Reviewed-on: http://review.whamcloud.com/20203 Reviewed-by: Wang Shilong Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/doc/lfs-ladvise.1 | 7 ++++ lustre/include/lustre/lustre_user.h | 2 ++ lustre/ofd/ofd_dev.c | 6 ++++ lustre/osd-ldiskfs/osd_io.c | 10 +++++- lustre/ptlrpc/wiretest.c | 2 ++ lustre/tests/sanity.sh | 65 +++++++++++++++++++++++++++++++++++++ lustre/utils/wirecheck.c | 1 + lustre/utils/wiretest.c | 2 ++ 8 files changed, 94 insertions(+), 1 deletion(-) diff --git a/lustre/doc/lfs-ladvise.1 b/lustre/doc/lfs-ladvise.1 index 295658b..6810a14 100644 --- a/lustre/doc/lfs-ladvise.1 +++ b/lustre/doc/lfs-ladvise.1 @@ -19,6 +19,8 @@ Give advice or hint of type \fIADVICE\fR. Advice types are: .RS 1.2i .TP \fBwillread\fR to prefetch data into server cache +.TP +\fBdontneed\fR to cleanup data cache on server .RE .TP \fB\-b\fR, \fB\-\-background @@ -58,6 +60,11 @@ filesystem, while ladvise can send advices or hints to Lustre server sides. .B $ lfs ladvise -a willread -s 0 -e 1048576000 /mnt/lustre/file1 This gives the OST(s) holding the first 1GB of \fB/mnt/lustre/file1\fR a hint that the first 1GB of the file will be read soon. +.TP +.B $ lfs ladvise -a dontneed -s 0 -e 1048576000 /mnt/lustre/file1 +This gives the OST(s) holding the first 1GB of \fB/mnt/lustre/file1\fR a hint +that the first 1GB of file will not be read in the near future, thus the OST(s) +could clear the cache of the file in the memory. .SH AVAILABILITY The lfs ladvise command is part of the Lustre filesystem. .SH SEE ALSO diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 6fe9ae4..a27b876 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -1379,10 +1379,12 @@ struct llapi_json_item_list { enum lu_ladvise_type { LU_LADVISE_INVALID = 0, LU_LADVISE_WILLREAD = 1, + LU_LADVISE_DONTNEED = 2, }; #define LU_LADVISE_NAMES { \ [LU_LADVISE_WILLREAD] = "willread", \ + [LU_LADVISE_DONTNEED] = "dontneed", \ } /* This is the userspace argument for ladvise. It is currently the same as diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index cfa2baa..40cbf71 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -2191,6 +2191,7 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi) struct lustre_handle lockh = { 0 }; __u64 flags = 0; int i; + struct dt_object *dob; ENTRY; CFS_FAIL_TIMEOUT(OBD_FAIL_OST_LADVISE_PAUSE, cfs_fail_val); @@ -2237,6 +2238,7 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi) RETURN(rc); } LASSERT(fo != NULL); + dob = ofd_object_child(fo); for (i = 0; i < num_advise; i++, ladvise++) { if (ladvise->lla_end <= ladvise->lla_start) { @@ -2266,6 +2268,10 @@ static int ofd_ladvise_hdl(struct tgt_session_info *tsi) ladvise->lla_end); tgt_extent_unlock(&lockh, LCK_PR); break; + case LU_LADVISE_DONTNEED: + rc = dt_ladvise(env, dob, ladvise->lla_start, + ladvise->lla_end, LU_LADVISE_DONTNEED); + break; } if (rc != 0) break; diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 91034d2..9bfe5aab4 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1933,10 +1933,18 @@ static int osd_fiemap_get(const struct lu_env *env, struct dt_object *dt, static int osd_ladvise(const struct lu_env *env, struct dt_object *dt, __u64 start, __u64 end, enum lu_ladvise_type advice) { - int rc; + int rc = 0; + struct inode *inode = osd_dt_obj(dt)->oo_inode; ENTRY; switch (advice) { + case LU_LADVISE_DONTNEED: + if (end == 0) + break; + invalidate_mapping_pages(inode->i_mapping, + start >> PAGE_CACHE_SHIFT, + (end - 1) >> PAGE_CACHE_SHIFT); + break; default: rc = -ENOTSUPP; break; diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 6f383dd..5af7e02 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -5015,6 +5015,8 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct lu_ladvise *)0)->lla_value4)); LASSERTF(LU_LADVISE_WILLREAD == 1, "found %lld\n", (long long)LU_LADVISE_WILLREAD); + LASSERTF(LU_LADVISE_DONTNEED == 2, "found %lld\n", + (long long)LU_LADVISE_DONTNEED); /* Checks for struct ladvise_hdr */ LASSERTF(LADVISE_MAGIC == 0x1ADF1CE0, "found 0x%.8x\n", diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 507812d..0c617fe 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -14226,6 +14226,71 @@ test_255a() { } run_test 255a "check 'lfs ladvise -a willread'" +facet_meminfo() { + local facet=$1 + local info=$2 + + do_facet $facet "cat /proc/meminfo | grep ^${info}:" | awk '{print $2}' +} + +test_255b() { + lfs setstripe -c -1 -i 0 $DIR/$tfile + + ladvise_no_type dontneed $DIR/$tfile && + skip "dontneed ladvise is not supported" && return + + ladvise_no_ioctl $DIR/$tfile && + skip "ladvise ioctl is not supported" && return + + [ $(lustre_version_code ost1) -lt $(version_code 2.8.54) ] && + skip "lustre < 2.8.54 does not support ladvise" && return + + [ "$(facet_fstype ost1)" = "zfs" ] && + skip "zfs-osd does not support dontneed advice" && return + + local size_mb=100 + local size=$((size_mb * 1048576)) + # In order to prevent disturbance of other processes, only check 3/4 + # of the memory usage + local kibibytes=$((size_mb * 1024 * 3 / 4)) + + dd if=/dev/zero of=$DIR/$tfile bs=1048576 count=$size_mb || + error "dd to $DIR/$tfile failed" + + local total=$(facet_meminfo ost1 MemTotal) + echo "Total memory: $total KiB" + + do_facet ost1 "sync && echo 3 > /proc/sys/vm/drop_caches" + local before_read=$(facet_meminfo ost1 Cached) + echo "Cache used before read: $before_read KiB" + + lfs ladvise -a willread $DIR/$tfile || + error "Ladvise willread failed" + local after_read=$(facet_meminfo ost1 Cached) + echo "Cache used after read: $after_read KiB" + + lfs ladvise -a dontneed $DIR/$tfile || + error "Ladvise dontneed again failed" + local no_read=$(facet_meminfo ost1 Cached) + echo "Cache used after dontneed ladvise: $no_read KiB" + + if [ $total -lt $((before_read + kibibytes)) ]; then + echo "Memory is too small, abort checking" + return 0 + fi + + if [ $((before_read + kibibytes)) -gt $after_read ]; then + error "Ladvise willread should use more memory" \ + "than $kibibytes KiB" + fi + + if [ $((no_read + kibibytes)) -gt $after_read ]; then + error "Ladvise dontneed should release more memory" \ + "than $kibibytes KiB" + fi +} +run_test 255b "check 'lfs ladvise -a dontneed'" + test_256() { local cl_user local cat_sl diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index f2e63d6..fcaa88a 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -324,6 +324,7 @@ check_lu_ladvise(void) CHECK_MEMBER(lu_ladvise, lla_value3); CHECK_MEMBER(lu_ladvise, lla_value4); CHECK_VALUE(LU_LADVISE_WILLREAD); + CHECK_VALUE(LU_LADVISE_DONTNEED); CHECK_VALUE(LF_ASYNC); CHECK_VALUE(LADVISE_MAGIC); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 2dce67d..d28f60f 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -5030,6 +5030,8 @@ void lustre_assert_wire_constants(void) (long long)(int)sizeof(((struct lu_ladvise *)0)->lla_value4)); LASSERTF(LU_LADVISE_WILLREAD == 1, "found %lld\n", (long long)LU_LADVISE_WILLREAD); + LASSERTF(LU_LADVISE_DONTNEED == 2, "found %lld\n", + (long long)LU_LADVISE_DONTNEED); /* Checks for struct ladvise_hdr */ LASSERTF(LADVISE_MAGIC == 0x1ADF1CE0, "found 0x%.8x\n", -- 1.8.3.1