From 68dd4f2ba984d42356e0bfb9948223509d8407ca Mon Sep 17 00:00:00 2001
From: Andreas Dilger <adilger@whamcloud.com>
Date: Wed, 27 Nov 2024 19:49:03 -0700
Subject: [PATCH] LU-17251 tests: increase parallel-scale OST seq_width

Increase seq_width earlier in parallel-scale so that OST SEQ rollover
does not happen during later testing (in particular test_rr_alloc).

Otherwise, the OST SEQ rollover can stall OST object preallocation
during the testing and provide skewed creation rates across OSTs.

Fix parallel-scale.sh so only tests which use MPI check MPI_USER.

Test-Parameters: trivial
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Test-Parameters: testlist=parallel-scale env=ONLY=statahead+rr_alloc
Fixes: 11d55259ab ("LU-17251 tests: try to fix test_rr_alloc again")
Fixes: 66d93ce3e4 ("LU-17251 test: improve parallel-scale rr_alloc test")
Fixes: 0ecb2a167c ("LU-11912 ofd: reduce LUSTRE_DATA_SEQ_MAX_WIDTH")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I79b146fff96dcc3c607e73bbb575c526d02540e5
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57181
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 lustre/tests/parallel-scale.sh | 49 +++++++++++++++++++++++++++---------------
 lustre/tests/test-framework.sh | 18 ++++++++++++----
 2 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh
index 8ba305c..7e60fee 100644
--- a/lustre/tests/parallel-scale.sh
+++ b/lustre/tests/parallel-scale.sh
@@ -71,94 +71,108 @@ fi
 
 check_and_setup_lustre
 
-get_mpiuser_id $MPI_USER
+ost_set_temp_seq_width_all $DATA_SEQ_MAX_WIDTH
+
 MPI_RUNAS=${MPI_RUNAS:-"runas -u $MPI_USER_UID -g $MPI_USER_GID"}
 $GSS_KRB5 && refresh_krb5_tgt $MPI_USER_UID $MPI_USER_GID $MPI_RUNAS
 
 test_compilebench() {
-    run_compilebench
+	run_compilebench
 }
 run_test compilebench "compilebench"
 
 test_metabench() {
-    run_metabench
+	run_metabench
 }
 run_test metabench "metabench"
 
 test_simul() {
-    run_simul
+	get_mpiuser_id $MPI_USER
+	run_simul
 }
 run_test simul "simul"
 
 test_mdtestssf() {
-    run_mdtest "ssf"
+	get_mpiuser_id $MPI_USER
+	run_mdtest "ssf"
 }
 run_test mdtestssf "mdtestssf"
 
 test_mdtestfpp() {
-    run_mdtest "fpp"
+	get_mpiuser_id $MPI_USER
+	run_mdtest "fpp"
 }
 run_test mdtestfpp "mdtestfpp"
 
 test_connectathon() {
-    run_connectathon
+	run_connectathon
 }
 run_test connectathon "connectathon"
 
 test_iorssf() {
-    run_ior "ssf"
+	get_mpiuser_id $MPI_USER
+	run_ior "ssf"
 }
 run_test iorssf "iorssf"
 
 test_iorfpp() {
-    run_ior "fpp"
+	get_mpiuser_id $MPI_USER
+	run_ior "fpp"
 }
 run_test iorfpp "iorfpp"
 
 test_ior_mdtest_parallel_ssf() {
+	get_mpiuser_id $MPI_USER
 	ior_mdtest_parallel "ssf"
 }
 run_test ior_mdtest_parallel_ssf "iormdtestssf"
 
 test_ior_mdtest_parallel_fpp() {
+	get_mpiuser_id $MPI_USER
 	ior_mdtest_parallel "fpp"
 }
 run_test ior_mdtest_parallel_fpp "iormdtestfpp"
 
 test_mib() {
-    run_mib
+	get_mpiuser_id $MPI_USER
+	run_mib
 }
 run_test mib "mib"
 
 test_cascading_rw() {
-    run_cascading_rw
+	get_mpiuser_id $MPI_USER
+	run_cascading_rw
 }
 run_test cascading_rw "cascading_rw"
 
 test_write_append_truncate() {
-    run_write_append_truncate
+	get_mpiuser_id $MPI_USER
+	run_write_append_truncate
 }
 run_test write_append_truncate "write_append_truncate"
 
 # Argument is chunk size limit, the upper bound on write size
 test_write_disjoint() {
-    run_write_disjoint 123456
+	get_mpiuser_id $MPI_USER
+	run_write_disjoint 123456
 }
 run_test write_disjoint "write_disjoint"
 
 # Make sure to exercise the tiny write code
 test_write_disjoint_tiny() {
+	get_mpiuser_id $MPI_USER
 	run_write_disjoint 16384
 }
 run_test write_disjoint_tiny "write_disjoint_tiny"
 
 test_parallel_grouplock() {
-    run_parallel_grouplock
+	get_mpiuser_id $MPI_USER
+	run_parallel_grouplock
 }
 run_test parallel_grouplock "parallel_grouplock"
 
 test_statahead () {
-    run_statahead
+	run_statahead
 }
 run_test statahead "statahead test, multiple clients"
 
@@ -168,6 +182,7 @@ test_rr_alloc () {
 run_test rr_alloc "Checking even file distribution over OSTs in RR policy"
 
 test_fs_test () {
+	get_mpiuser_id $MPI_USER
 	run_fs_test
 }
 run_test fs_test "fs_test"
@@ -178,13 +193,13 @@ test_fio () {
 run_test fio "fio"
 
 test_xdd () {
+	get_mpiuser_id $MPI_USER
 	run_xdd
 }
 run_test xdd "xdd"
 
 # If necessary, return SLOW to its original value
-[ "$mds1_FSTYPE" = zfs -o "$ost1_FSTYPE" = zfs ] &&
-	SLOW=$ZFSSLOW
+[[ "$mds1_FSTYPE" == zfs || "$ost1_FSTYPE" == zfs ]] && SLOW=$ZFSSLOW
 
 complete_test $SECONDS
 check_and_cleanup_lustre
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
index 7798ce3..66156d1 100755
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -8320,11 +8320,19 @@ check_runas_id() {
 get_mpiuser_id() {
 	local mpi_user=$1
 
-	MPI_USER_UID=$(do_facet client "getent passwd $mpi_user | cut -d: -f3;
-exit \\\${PIPESTATUS[0]}") || error_exit "failed to get the UID for $mpi_user"
+	if [[ -z "$MPI_USER_UID" ]]; then
+		MPI_USER_UID=$(do_facet client "getent passwd $mpi_user |
+			       cut -d: -f3; exit \\\${PIPESTATUS[0]}") ||
+			skip_env "failed to get the UID for $mpi_user"
+		echo "mpi_user=$1 MPI_USER_UID=$MPI_USER_UID"
+	fi
 
-	MPI_USER_GID=$(do_facet client "getent passwd $mpi_user | cut -d: -f4;
-exit \\\${PIPESTATUS[0]}") || error_exit "failed to get the GID for $mpi_user"
+	if [[ -z "$MPI_USER_GID" ]]; then
+		MPI_USER_GID=$(do_facet client "getent passwd $mpi_user |
+			       cut -d: -f4; exit \\\${PIPESTATUS[0]}") ||
+			skip_env "failed to get the GID for $mpi_user"
+		echo "mpi_user=$1 MPI_USER_GID=$MPI_USER_GID"
+	fi
 }
 
 # Run multiop in the background, but wait for it to print
@@ -12435,6 +12443,8 @@ ost_set_temp_seq_width_all() {
 	local osts=$(comma_list $(osts_nodes))
 	local width=$(do_facet ost1 $LCTL get_param -n seq.*OST0000-super.width)
 
+	(( $width != $1 )) || return 0
+
 	do_nodes $osts $LCTL set_param seq.*OST*-super.width=$1
 	stack_trap "do_nodes $osts $LCTL set_param seq.*OST*-super.width=$width"
 }
-- 
1.8.3.1