From a07a96b53670d33e0adc384f34ba6e478be073a5 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 4 Oct 2024 01:15:03 -0600 Subject: [PATCH] LU-18310 tests: add debugging to test_metabench Both parallel-scale and parallel-scale-nfs are intermittently failing test_metabench with "No space left on device" (ENOSPC) or "Disk quota exceeded" (EDQUOT), even though this test is creating only about 10-20k files. Add some debugging to see where all of the space has gone, and what quota limits are being set. It may be that some earlier test (e.g. compilebench) is leaving too much junk behind. The failure rate is very low (only 2/637 runs in the past 4 weeks), so it likely needs to be landed to catch a failure. Test-Parameters: trivial testlist=parallel-scale Test-Parameters: testlist=parallel-scale-nfsv4 Test-Parameters: testgroup=full-part-1 Signed-off-by: Andreas Dilger Change-Id: Ie35ae677032ccc8113cbad5dc5a7b0504149717f Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/56582 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alex Deiter Reviewed-by: Elena Reviewed-by: Oleg Drokin --- lustre/tests/functions.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lustre/tests/functions.sh b/lustre/tests/functions.sh index 538cb8c..1993b7b 100644 --- a/lustre/tests/functions.sh +++ b/lustre/tests/functions.sh @@ -353,6 +353,24 @@ run_compilebench() { rm -rf $testdir } +# try to understand why a test is running out of space/quota +find_space_usage() { + local dir=$1 + local tmpfile=$(mktemp) + + $LFS df $dir || df $dir + $LFS df -i $dir || df -i $dir + $LFS quota -u mpiuser $dir + $LFS quota -u root $dir + + du -skx $dir/../* | sort -nr | tee $tmpfile + local topdir=$(awk '{ print $2; exit; }' $tmpfile) + du -skx $topdir/* | sort -nr | tee $tmpfile + topdir=$(awk '{ print $2; exit; }' $tmpfile) + du -skx $topdir/* | sort -nr + rm -f $tmpfile +} + run_metabench() { local dir=${1:-$DIR} local mntpt=${2:-$MOUNT} @@ -374,6 +392,9 @@ run_metabench() { # mpi_run uses mpiuser chmod 0777 $testdir + # try to understand why this test is running out of space/quota + find_space_usage $dir + # -C Run the file creation tests. Creates zero byte files. # -S Run the file stat tests. # -c nfile Number of files to be used in each test. @@ -393,6 +414,7 @@ run_metabench() { local rc=$? if [ $rc != 0 ] ; then + find_space_usage $dir error "metabench failed! $rc" fi -- 1.8.3.1