From 68dd4f2ba984d42356e0bfb9948223509d8407ca Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 27 Nov 2024 19:49:03 -0700 Subject: [PATCH] LU-17251 tests: increase parallel-scale OST seq_width Increase seq_width earlier in parallel-scale so that OST SEQ rollover does not happen during later testing (in particular test_rr_alloc). Otherwise, the OST SEQ rollover can stall OST object preallocation during the testing and provide skewed creation rates across OSTs. Fix parallel-scale.sh so only tests which use MPI check MPI_USER. Test-Parameters: trivial Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc Fixes: 11d55259ab ("LU-17251 tests: try to fix test_rr_alloc again") Fixes: 66d93ce3e4 ("LU-17251 test: improve parallel-scale rr_alloc test") Fixes: 0ecb2a167c ("LU-11912 ofd: reduce LUSTRE_DATA_SEQ_MAX_WIDTH") Signed-off-by: Andreas Dilger Change-Id: I79b146fff96dcc3c607e73bbb575c526d02540e5 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57181 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Jian Yu Reviewed-by: Li Dongyang Reviewed-by: Oleg Drokin --- lustre/tests/parallel-scale.sh | 49 +++++++++++++++++++++++++++--------------- lustre/tests/test-framework.sh | 18 ++++++++++++---- 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh index 8ba305c..7e60fee 100644 --- a/lustre/tests/parallel-scale.sh +++ b/lustre/tests/parallel-scale.sh @@ -71,94 +71,108 @@ fi check_and_setup_lustre -get_mpiuser_id $MPI_USER +ost_set_temp_seq_width_all $DATA_SEQ_MAX_WIDTH + MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"} $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS test_compilebench() { - run_compilebench + run_compilebench } run_test compilebench "compilebench" test_metabench() { - run_metabench + run_metabench } run_test metabench "metabench" test_simul() { - run_simul + get_mpiuser_id $MPI_USER + run_simul } run_test simul "simul" test_mdtestssf() { - run_mdtest "ssf" + get_mpiuser_id $MPI_USER + run_mdtest "ssf" } run_test mdtestssf "mdtestssf" test_mdtestfpp() { - run_mdtest "fpp" + get_mpiuser_id $MPI_USER + run_mdtest "fpp" } run_test mdtestfpp "mdtestfpp" test_connectathon() { - run_connectathon + run_connectathon } run_test connectathon "connectathon" test_iorssf() { - run_ior "ssf" + get_mpiuser_id $MPI_USER + run_ior "ssf" } run_test iorssf "iorssf" test_iorfpp() { - run_ior "fpp" + get_mpiuser_id $MPI_USER + run_ior "fpp" } run_test iorfpp "iorfpp" test_ior_mdtest_parallel_ssf() { + get_mpiuser_id $MPI_USER ior_mdtest_parallel "ssf" } run_test ior_mdtest_parallel_ssf "iormdtestssf" test_ior_mdtest_parallel_fpp() { + get_mpiuser_id $MPI_USER ior_mdtest_parallel "fpp" } run_test ior_mdtest_parallel_fpp "iormdtestfpp" test_mib() { - run_mib + get_mpiuser_id $MPI_USER + run_mib } run_test mib "mib" test_cascading_rw() { - run_cascading_rw + get_mpiuser_id $MPI_USER + run_cascading_rw } run_test cascading_rw "cascading_rw" test_write_append_truncate() { - run_write_append_truncate + get_mpiuser_id $MPI_USER + run_write_append_truncate } run_test write_append_truncate "write_append_truncate" # Argument is chunk size limit, the upper bound on write size test_write_disjoint() { - run_write_disjoint 123456 + get_mpiuser_id $MPI_USER + run_write_disjoint 123456 } run_test write_disjoint "write_disjoint" # Make sure to exercise the tiny write code test_write_disjoint_tiny() { + get_mpiuser_id $MPI_USER run_write_disjoint 16384 } run_test write_disjoint_tiny "write_disjoint_tiny" test_parallel_grouplock() { - run_parallel_grouplock + get_mpiuser_id $MPI_USER + run_parallel_grouplock } run_test parallel_grouplock "parallel_grouplock" test_statahead () { - run_statahead + run_statahead } run_test statahead "statahead test, multiple clients" @@ -168,6 +182,7 @@ test_rr_alloc () { run_test rr_alloc "Checking even file distribution over OSTs in RR policy" test_fs_test () { + get_mpiuser_id $MPI_USER run_fs_test } run_test fs_test "fs_test" @@ -178,13 +193,13 @@ test_fio () { run_test fio "fio" test_xdd () { + get_mpiuser_id $MPI_USER run_xdd } run_test xdd "xdd" # If necessary, return SLOW to its original value -[ "$mds1_FSTYPE" = zfs -o "$ost1_FSTYPE" = zfs ] && - SLOW=$ZFSSLOW +[[ "$mds1_FSTYPE" == zfs || "$ost1_FSTYPE" == zfs ]] && SLOW=$ZFSSLOW complete_test $SECONDS check_and_cleanup_lustre diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 7798ce3..66156d1 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -8320,11 +8320,19 @@ check_runas_id() { get_mpiuser_id() { local mpi_user=$1 - MPI_USER_UID=$(do_facet client "getent passwd $mpi_user | cut -d: -f3; -exit \\\${PIPESTATUS[0]}") || error_exit "failed to get the UID for $mpi_user" + if [[ -z "$MPI_USER_UID" ]]; then + MPI_USER_UID=$(do_facet client "getent passwd $mpi_user | + cut -d: -f3; exit \\\${PIPESTATUS[0]}") || + skip_env "failed to get the UID for $mpi_user" + echo "mpi_user=$1 MPI_USER_UID=$MPI_USER_UID" + fi - MPI_USER_GID=$(do_facet client "getent passwd $mpi_user | cut -d: -f4; -exit \\\${PIPESTATUS[0]}") || error_exit "failed to get the GID for $mpi_user" + if [[ -z "$MPI_USER_GID" ]]; then + MPI_USER_GID=$(do_facet client "getent passwd $mpi_user | + cut -d: -f4; exit \\\${PIPESTATUS[0]}") || + skip_env "failed to get the GID for $mpi_user" + echo "mpi_user=$1 MPI_USER_GID=$MPI_USER_GID" + fi } # Run multiop in the background, but wait for it to print @@ -12435,6 +12443,8 @@ ost_set_temp_seq_width_all() { local osts=$(comma_list $(osts_nodes)) local width=$(do_facet ost1 $LCTL get_param -n seq.*OST0000-super.width) + (( $width != $1 )) || return 0 + do_nodes $osts $LCTL set_param seq.*OST*-super.width=$1 stack_trap "do_nodes $osts $LCTL set_param seq.*OST*-super.width=$width" } -- 1.8.3.1