From b104c0a27713899a4d047f56fed57c30c39b8195 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Tue, 18 Oct 2022 18:37:58 -0600 Subject: [PATCH] LU-14377 tests: make parallel-scale/rr_alloc less strict test_rr_alloc() sometimes fails with a difference of 3-4 objects per OST, after creating 1500+ objects on each OST. This should not be considered fatal. Make the test more lenient, and allow a difference of up to 0.3% of objects between the OSTs. Fix some code style issues in the test. Test-Parameters: trivial testlist=parallel-scale env=ONLY=rr_alloc Signed-off-by: Andreas Dilger Change-Id: Ib6ba8c5d8e9d3245833448a52f8ed25308698a33 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48914 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Elena Gryaznova Reviewed-by: Oleg Drokin --- lustre/tests/functions.sh | 52 +++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/lustre/tests/functions.sh b/lustre/tests/functions.sh index 68f63aa..f25817d 100644 --- a/lustre/tests/functions.sh +++ b/lustre/tests/functions.sh @@ -1019,16 +1019,15 @@ cleanup_rr_alloc () { run_rr_alloc() { remote_mds_nodsh && skip "remote MDS with nodsh" - echo "===Test gives more reproduction percentage if number of "\ - "client and ost are more. Test with 44 or more clients "\ - "and 73 or more OSTs gives 100% reproduction rate==" + + echo "===Test gives more reproduction percentage if number of " + echo " client and ost are more. Test with 44 or more clients " + echo " and 73 or more OSTs gives 100% reproduction rate==" RR_ALLOC=${RR_ALLOC:-$(which rr_alloc 2> /dev/null || true)} [ x$RR_ALLOC = x ] && skip_env "rr_alloc not found" declare -a diff_max_min_arr - # foeo = file on each ost. calc = calculated. local ost_idx - local foeo_calc local qos_prec_objs="${TMP}/qos_and_precreated_objects" local rr_alloc_NFILES=${rr_alloc_NFILES:-555} local rr_alloc_MNTPTS=${rr_alloc_MNTPTS:-11} @@ -1045,9 +1044,6 @@ run_rr_alloc() { error_exit "Failed to mount lustre on ${mntpt_root}$i $clients" done - local cmd="$RR_ALLOC $mntpt_root/$tdir/ash $rr_alloc_NFILES \ - $num_clients" - # Save mdt values, set threshold to 100% i.e always Round Robin, # restore the saved values again after creating files... save_lustre_params mds1 \ @@ -1065,7 +1061,8 @@ run_rr_alloc() { # per OST are not multiple of that then it will be set to nearest # lower power of 2. So set 'create_count' to the upper power of 2. - foeo_calc=$((rr_alloc_NFILES * total_MNTPTS / OSTCOUNT)) + # foeo = file on each ost. calc = calculated. + local foeo_calc=$((rr_alloc_NFILES * total_MNTPTS / OSTCOUNT)) local create_count=$((2 * foeo_calc)) # create_count accepted values: @@ -1087,18 +1084,19 @@ run_rr_alloc() { # is created per OSTs. createmany -o $DIR/$tdir/foo- $(((old_create_count + 1) * OSTCOUNT)) \ > /dev/null - rm -f /$DIR/$tdir/foo* + unlinkmany $DIR/$tdir/foo- $(((old_create_count + 1) * OSTCOUNT)) # Check for enough precreated objects... We should not # fail here because code(osp_precreate.c) also takes care of it. # So we have good chances of passing test even if this check fails. local mdt_idx=0 - for ost_idx in $(seq 0 $((OSTCOUNT - 1))); do - [[ $(precreated_ost_obj_count $mdt_idx $ost_idx) -ge \ - $foeo_calc ]] || echo "Warning: test may fail because" \ - "of lack of precreated objects on OST${ost_idx}" + for ((ost_idx = 0; ost_idx < $OSTCOUNT; ost_idx++ )); do + (($(precreated_ost_obj_count $mdt_idx $ost_idx) >= foeo_calc))|| + echo "Warning: test may fail from too few objs on OST$ost_idx" done + local cmd="$RR_ALLOC $mntpt_root/$tdir/f $rr_alloc_NFILES $num_clients" + if [[ $total_MNTPTS -ne 0 ]]; then # Now start the actual file creation app. mpi_run "-np $total_MNTPTS" $cmd || return @@ -1110,12 +1108,14 @@ run_rr_alloc() { rm -f $qos_prec_objs diff_max_min_arr=($($LFS getstripe -r $DIR/$tdir/ | - grep "lmm_stripe_offset:" | awk '{print $2}' | sort -n | - uniq -c | awk 'NR==1 {min=max=$1} \ - { $1max ? max=$1 : max} \ - END {print max-min, max, min}')) + awk '/lmm_stripe_offset:/ {print $2}' | + sort | uniq -c | + awk 'NR==1 {min=max=$1} \ + { $1max ? max=$1:max} \ + END {print max-min, max, min}')) + + $LFS find $DIR/$tdir -type f | xargs -n1 -P8 unlink - rm -rf $DIR/$tdir # In-case of fairly large number of file creation using RR (round-robin) # there can be two cases in which deviation will occur than the regular @@ -1123,11 +1123,15 @@ run_rr_alloc() { # 1- When rr_alloc does not start right with 'lqr_start_count' reseeded, # 2- When rr_alloc does not finish with 'lqr_start_count == 0'. # So the difference of files b/w any 2 OST should not be more than 2. - [[ ${diff_max_min_arr[0]} -le 2 ]] || - error "Uneven distribution detected: difference between" \ - "maximum files per OST (${diff_max_min_arr[1]}) and" \ - "minimum files per OST (${diff_max_min_arr[2]}) must not be" \ - "greater than 2" + # In some cases it may be more, but shouldn't be > 0.3% of the files. + local max_diff=$((create_count > 600 ? create_count / 300 : 2)) + + (( ${diff_max_min_arr[0]} <= $max_diff )) || { + $LFS getstripe -r $DIR/$tdir | + awk '/lmm_stripe_offset:/ {print $2}' | sort | uniq -c + + error "max/min OST objects (${diff_max_min_arr[1]} : ${diff_max_min_arr[2]}) too different" + } } run_fs_test() { -- 1.8.3.1