Whamcloud - gitweb
LU-17151 tests: increase sanity/411b memory limit 10/52610/7
authorTimothy Day <timday@amazon.com>
Tue, 10 Oct 2023 00:07:24 +0000 (00:07 +0000)
committerOleg Drokin <green@whamcloud.com>
Wed, 25 Oct 2023 18:09:10 +0000 (18:09 +0000)
This test fails most of the time when run using
arm clients. It seems like the cgroup memory limit
was increased in a past revision for a similar issue.
Increase it a bit more for aarch64. Increase it a
smaller amount for x86.

Also, add some better logging for some other issues.

There's likely a better fix for this, but hopefully
this will let the test pass and provide some value
without having to do a full revert.

Fixes: 8aa231a99 ("LU-16713 llite: writeback/commit pages under memory pressure")
Test-Parameters: trivial
Test-Parameters: testgroup=review-ldiskfs-arm testlist=sanity env=ONLY=411b,ONLY_REPEAT=50
Test-Parameters: clientdistro=el8.7 testlist=sanity env=ONLY=411b,ONLY_REPEAT=50
Test-Parameters: clientdistro=el9.1 testlist=sanity env=ONLY=411b,ONLY_REPEAT=50
Signed-off-by: Timothy Day <timday@amazon.com>
Change-Id: If850077c0d7f6466082433776d370d24eee9736c
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52610
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/tests/sanity.sh

index 7671899..bbe83c9 100755 (executable)
@@ -27639,9 +27639,14 @@ test_411b() {
        [ -e "$cg_basedir/memory.kmem.limit_in_bytes" ] ||
                skip "no setup for cgroup"
        $LFS setstripe -c 2 $DIR/$tfile || error "unable to setstripe"
-       # testing suggests we can't reliably avoid OOM with a 64M limit, but it
-       # seems reasonable to ask that we have at least 128M in the cgroup
-       local memlimit_mb=256
+       # (x86) testing suggests we can't reliably avoid OOM with a 64M-256M
+       # limit, so we have 384M in cgroup
+       # (arm) this seems to hit OOM more often than x86, so 1024M
+       if [[ $(uname -m) = aarch64 ]]; then
+               local memlimit_mb=1024
+       else
+               local memlimit_mb=384
+       fi
 
        # Create a cgroup and set memory limit
        # (tfile is used as an easy way to get a recognizable cgroup name)
@@ -27694,16 +27699,16 @@ test_411b() {
        wait $pid4
        local rc4=$?
        if (( rc1 != 0)); then
-               error "error writing to file from $pid1"
+               error "error $rc1 writing to file from $pid1"
        fi
        if (( rc2 != 0)); then
-               error "error writing to file from $pid2"
+               error "error $rc2 writing to file from $pid2"
        fi
        if (( rc3 != 0)); then
-               error "error writing to file from $pid3"
+               error "error $rc3 writing to file from $pid3"
        fi
        if (( rc4 != 0)); then
-               error "error writing to file from $pid4"
+               error "error $rc4 writing to file from $pid4"
        fi
 
        sync