Whamcloud - gitweb
LU-17030 llite: allow setting max_cached_mb to a % 52/51952/7
authorPatrick Farrell <pfarrell@whamcloud.com>
Tue, 15 Aug 2023 23:08:12 +0000 (19:08 -0400)
committerOleg Drokin <green@whamcloud.com>
Thu, 31 Aug 2023 06:39:20 +0000 (06:39 +0000)
Lustre's max_cached_mb parameter is hard to use because it
must be set to a specific numeric value, so in effect it
cannot be set on the server side unless all clients are
guaranteed identical.

Let's add the ability to set that to a % of memory to make
it more useful.

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I1f9f5a8a5d671ab00b7ab6133bb9b1d1214ca59e
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51952
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/llite/lproc_llite.c
lustre/tests/conf-sanity.sh
lustre/tests/sanity.sh

index 67b47ac..bd4708b 100644 (file)
@@ -496,9 +496,11 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
        long diff = 0;
        long nrpages = 0;
        __u16 refcheck;
+       u64 value;
        u64 pages_number;
        int rc;
        char kernbuf[128], *ptr;
+       bool percent = false;
 
        ENTRY;
        if (count >= sizeof(kernbuf))
@@ -506,14 +508,28 @@ static ssize_t ll_max_cached_mb_seq_write(struct file *file,
 
        if (copy_from_user(kernbuf, buffer, count))
                RETURN(-EFAULT);
-       kernbuf[count] = '\0';
+
+       if (count > 0 && kernbuf[count - 1] == '%') {
+               percent = true;
+               /* strip off the % */
+               kernbuf[count - 1] = '\0';
+       } else {
+               kernbuf[count] = '\0';
+       }
 
        ptr = lprocfs_find_named_value(kernbuf, "max_cached_mb:", &count);
-       rc = sysfs_memparse(ptr, count, &pages_number, "MiB");
+       if (percent)
+               rc = sysfs_memparse(ptr, count, &value, "B");
+       else
+               rc = sysfs_memparse(ptr, count, &value, "MiB");
        if (rc)
                RETURN(rc);
 
-       pages_number >>= PAGE_SHIFT;
+       if (percent) {
+               pages_number = cfs_totalram_pages() * value / 100;
+       } else {
+               pages_number = value >> PAGE_SHIFT;
+       }
 
        if (pages_number < 0 || pages_number > cfs_totalram_pages()) {
                CERROR("%s: can't set max cache more than %lu MB\n",
index 773348a..eaeae51 100644 (file)
@@ -10601,6 +10601,45 @@ test_140() {
 }
 run_test 140 "remove_updatelog script actions"
 
+test_150() {
+       setup
+
+       local max_cached_mb=$($LCTL get_param llite.*.max_cached_mb |
+                             awk '/^max_cached_mb/ { print $2 }')
+       stack_trap "$LCTL set_param -n llite.*.max_cached_mb=$max_cached_mb"
+
+       $LCTL set_param llite.*.max_cached_mb='100%'
+
+       local new_max_cached_mb=$($LCTL get_param llite.*.max_cached_mb |
+                                 awk '/^max_cached_mb/ { print $2 }')
+       local total_ram_mb=$(free -m | grep 'Mem:' | awk '{print $2}')
+
+       $LCTL get_param llite.*.max_cached_mb
+       echo "total ram mb: $total_ram_mb"
+       (( new_max_cached_mb == total_ram_mb )) ||
+               error "setting cache to 100% not equal to total RAM"
+
+       $LCTL set_param llite.*.max_cached_mb='50%'
+       new_max_cached_mb=$($LCTL get_param llite.*.max_cached_mb |
+                           awk '/^max_cached_mb/ { print $2 }')
+
+       $LCTL get_param llite.*.max_cached_mb
+       (( new_max_cached_mb == $((total_ram_mb / 2)) )) ||
+               error "setting cache to 50% not equal to 50% of RAM"
+
+       $LCTL set_param llite.*.max_cached_mb='105%' &&
+               error "should not be able to set insane value"
+
+       $LCTL set_param llite.*.max_cached_mb='0%'
+       new_max_cached_mb=$($LCTL get_param llite.*.max_cached_mb |
+                           awk '/^max_cached_mb/ { print $2 }')
+       # Minimum cache size is 64 MiB
+       $LCTL get_param llite.*.max_cached_mb
+       (( new_max_cached_mb == 64 )) ||
+               error "setting cache to 0% != minimum cache size"
+}
+run_test 150 "test setting max_cached_mb to a %"
+
 #
 # (This was sanity/802a)
 #
index a486121..8892d80 100755 (executable)
@@ -11160,14 +11160,6 @@ function get_named_value()
     grep -w "$tag" | sed "s/^$tag  *\([0-9]*\)  *.*/\1/"
 }
 
-export CACHE_MAX=$($LCTL get_param -n llite.*.max_cached_mb |
-                  awk '/^max_cached_mb/ { print $2 }')
-
-cleanup_101a() {
-       $LCTL set_param -n llite.*.max_cached_mb $CACHE_MAX
-       trap 0
-}
-
 test_101a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
 
@@ -11177,8 +11169,10 @@ test_101a() {
        local cache_limit=32
 
        $LCTL set_param -n osc.*-osc*.rpc_stats=0
-       trap cleanup_101a EXIT
        $LCTL set_param -n llite.*.read_ahead_stats=0
+       local max_cached_mb=$($LCTL get_param llite.*.max_cached_mb |
+                             awk '/^max_cached_mb/ { print $2 }')
+       stack_trap "$LCTL set_param -n llite.*.max_cached_mb=$max_cached_mb"
        $LCTL set_param -n llite.*.max_cached_mb=$cache_limit
 
        #
@@ -11192,7 +11186,6 @@ test_101a() {
                   get_named_value 'read.but.discarded'); do
                        discard=$(($discard + $s))
        done
-       cleanup_101a
 
        $LCTL get_param osc.*-osc*.rpc_stats
        $LCTL get_param llite.*.read_ahead_stats
@@ -24962,12 +24955,12 @@ test_277() {
        $LCTL set_param ldlm.namespaces.*.lru_size=0
        dd if=/dev/zero of=$DIR/$tfile bs=1M count=1
        local cached_mb=$($LCTL get_param llite.*.max_cached_mb |
-                       grep ^used_mb | awk '{print $2}')
+                         awk '/^used_mb/ { print $2 }')
        [ $cached_mb -eq 1 ] || error "expected mb 1 got $cached_mb"
        dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 \
                oflag=direct conv=notrunc
        cached_mb=$($LCTL get_param llite.*.max_cached_mb |
-                       grep ^used_mb | awk '{print $2}')
+                   awk '/^used_mb/ { print $2 }')
        [ $cached_mb -eq 0 ] || error "expected mb 0 got $cached_mb"
 }
 run_test 277 "Direct IO shall drop page cache"
@@ -28863,9 +28856,10 @@ run_test 427 "Failed DNE2 update request shouldn't corrupt updatelog"
 
 test_428() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
-       local cache_limit=$CACHE_MAX
+       local max_cached_mb=$($LCTL get_param llite.*.max_cached_mb |
+                             awk '/^max_cached_mb/ { print $2 }')
+       stack_trap "$LCTL set_param -n llite.*.max_cached_mb=$max_cached_mb"
 
-       stack_trap "$LCTL set_param -n llite.*.max_cached_mb=$cache_limit"
        $LCTL set_param -n llite.*.max_cached_mb=64
 
        mkdir $DIR/$tdir