From 66d93ce3e4fc2579bafa1274dd0b6471454a9422 Mon Sep 17 00:00:00 2001 From: Alex Deiter Date: Fri, 27 Sep 2024 11:23:31 -0700 Subject: [PATCH] LU-17251 test: improve parallel-scale rr_alloc test Added checking for pre-created OST objects and waiting (maximum 60 seconds) before executing the rr_alloc test. Test-Parameters: trivial Test-Parameters: mdtcount=4 mdscount=2 testlist=parallel-scale env=ONLY=rr_alloc,ONLY_REPEAT=8 Test-Parameters: mdtcount=4 mdscount=2 testlist=parallel-scale env=ONLY=rr_alloc,ONLY_REPEAT=8 Test-Parameters: mdtcount=4 mdscount=2 testlist=parallel-scale env=ONLY=rr_alloc,ONLY_REPEAT=8 Test-Parameters: testlist=parallel-scale env=ONLY=rr_alloc,ONLY_REPEAT=8 Test-Parameters: testlist=parallel-scale env=ONLY=rr_alloc,ONLY_REPEAT=8 Test-Parameters: testlist=parallel-scale env=ONLY=rr_alloc,ONLY_REPEAT=8 Signed-off-by: Alex Deiter Change-Id: Ib604b99138ceccf384476ad2876d9df7cd7d524b Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/52940 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- lustre/tests/functions.sh | 55 ++++++++++++++++++++++++------------------ lustre/tests/test-framework.sh | 9 ++++++- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/lustre/tests/functions.sh b/lustre/tests/functions.sh index e741a87..538cb8c 100644 --- a/lustre/tests/functions.sh +++ b/lustre/tests/functions.sh @@ -1058,42 +1058,49 @@ run_rr_alloc() { # Make sure that every osp has enough precreated objects for the file # creation app - # create_count is always set to the power of 2 only, so if the files - # per OST are not multiple of that then it will be set to nearest - # lower power of 2. So set 'create_count' to the upper power of 2. + # The MDS does not precreate objects if there are at least + # create_count / 2 precreated objects available for the OST. + # Set 'create_count' to 2x required number to force creation. # foeo = file on each ost. calc = calculated. local foeo_calc=$((rr_alloc_NFILES * total_MNTPTS / OSTCOUNT)) local create_count=$((2 * foeo_calc)) + local max_create_count=$(do_facet $SINGLEMDS "$LCTL get_param -n \ + osp.*OST0000*MDT0000.max_create_count") # create_count accepted values: # [OST_MIN_PRECREATE=32, OST_MAX_PRECREATE=20000] - # values exceeding OST_MAX_PRECREATE are lowered to the maximum. - [[ $create_count -lt 32 ]] && create_count=32 - local i - for i in $(seq $MDSCOUNT); do - do_facet mds$i "$LCTL set_param -n \ - lod.$FSNAME-MDT*.qos_threshold_rr=100 \ - osp.$FSNAME-OST*-osc-MDT*.create_count=$create_count" - done + # values exceeding OST_MAX_PRECREATE are lowered to half of the maximum. + (( create_count >= 32 )) || create_count=32 + (( create_count <= max_create_count )) || + create_count=$((max_create_count / 2)) + + local mdts=$(comma_list $(mdts_nodes)) - # Create few temporary files in order to increase the precreated objects - # to a desired value, before starting 'rr_alloc' app. Due to default - # value 32 of precreation count (OST_MIN_PRECREATE=32), precreated - # objects available are 32 initially, these gets exhausted very soon, - # which causes skip of some osps when very large number of files - # is created per OSTs. - createmany -o $DIR/$tdir/foo- $(((old_create_count + 1) * OSTCOUNT)) \ - > /dev/null - unlinkmany $DIR/$tdir/foo- $(((old_create_count + 1) * OSTCOUNT)) + do_nodes $mdts "$LCTL set_param lod.*.qos_threshold_rr=100 \ + osp.*.create_count=$create_count" # Check for enough precreated objects... We should not # fail here because code(osp_precreate.c) also takes care of it. # So we have good chances of passing test even if this check fails. - local mdt_idx=0 - for ((ost_idx = 0; ost_idx < $OSTCOUNT; ost_idx++ )); do - (($(precreated_ost_obj_count $mdt_idx $ost_idx) >= foeo_calc))|| - echo "Warning: test may fail from too few objs on OST$ost_idx" + local stop=$((SECONDS + 60)) + + while ((SECONDS < stop)); do + local sleep=0 + + for ((mdt_idx = 0; mdt_idx < $MDSCOUNT; mdt_idx++)); do + for ((ost_idx = 0; ost_idx < $OSTCOUNT; ost_idx++)); do + local count=$(precreated_ost_obj_count \ + $mdt_idx $ost_idx) + if ((count < foeo_calc)); then + sleep=1 + fi + done + done + + (( sleep > 0 )) || break + + sleep $sleep done local cmd="$RR_ALLOC $mntpt_root/$tdir/f $rr_alloc_NFILES $num_clients" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 3439af0..a6f926c 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -10203,7 +10203,14 @@ precreated_ost_obj_count() osp.$proc_path.prealloc_last_id) local next_id=$(do_facet mds$((mdt_idx + 1)) lctl get_param -n \ osp.$proc_path.prealloc_next_id) - echo $((last_id - next_id + 1)) + local ost_obj_count=$((last_id - next_id + 1)) + + echo " - precreated_ost_obj_count $proc_path" \ + "prealloc_last_id: $last_id" \ + "prealloc_next_id: $next_id" \ + "count: $ost_obj_count" 1>&2 + + echo $ost_obj_count } check_file_in_pool() -- 1.8.3.1