LU-14377 tests: make parallel-scale/rr_alloc less strict

author Andreas Dilger <adilger@whamcloud.com>

Wed, 19 Oct 2022 00:37:58 +0000 (18:37 -0600)

committer Oleg Drokin <green@whamcloud.com>

Tue, 8 Nov 2022 08:52:59 +0000 (08:52 +0000)
author Andreas Dilger <adilger@whamcloud.com>
Wed, 19 Oct 2022 00:37:58 +0000 (18:37 -0600)
committer Oleg Drokin <green@whamcloud.com>
Tue, 8 Nov 2022 08:52:59 +0000 (08:52 +0000)
diff --git a/lustre/tests/functions.sh b/lustre/tests/functions.sh

index 68f63aa..f25817d 100644 (file)
--- a/lustre/tests/functions.sh
+++ b/lustre/tests/functions.sh
@@ -1019,16 +1019,15 @@ cleanup_rr_alloc () {
  
  run_rr_alloc() {
         remote_mds_nodsh && skip "remote MDS with nodsh"
-       echo "===Test gives more reproduction percentage if number of "\
-               "client and ost are more. Test with 44 or more clients "\
-               "and 73 or more OSTs gives 100% reproduction rate=="
+
+       echo "===Test gives more reproduction percentage if number of "
+       echo "   client and ost are more. Test with 44 or more clients "
+       echo "   and 73 or more OSTs gives 100% reproduction rate=="
  
         RR_ALLOC=${RR_ALLOC:-$(which rr_alloc 2> /dev/null || true)}
         [ x$RR_ALLOC = x ] && skip_env "rr_alloc not found"
         declare -a diff_max_min_arr
-       # foeo = file on each ost. calc = calculated.
         local ost_idx
-       local foeo_calc
         local qos_prec_objs="${TMP}/qos_and_precreated_objects"
         local rr_alloc_NFILES=${rr_alloc_NFILES:-555}
         local rr_alloc_MNTPTS=${rr_alloc_MNTPTS:-11}
@@ -1045,9 +1044,6 @@ run_rr_alloc() {
                 error_exit "Failed to mount lustre on ${mntpt_root}$i $clients"
         done
  
-       local cmd="$RR_ALLOC $mntpt_root/$tdir/ash $rr_alloc_NFILES \
-               $num_clients"
-
         # Save mdt values, set threshold to 100% i.e always Round Robin,
         # restore the saved values again after creating files...
         save_lustre_params mds1 \
@@ -1065,7 +1061,8 @@ run_rr_alloc() {
         # per OST are not multiple of that then it will be set to nearest
         # lower power of 2. So set 'create_count' to the upper power of 2.
  
-       foeo_calc=$((rr_alloc_NFILES * total_MNTPTS / OSTCOUNT))
+       # foeo = file on each ost. calc = calculated.
+       local foeo_calc=$((rr_alloc_NFILES * total_MNTPTS / OSTCOUNT))
         local create_count=$((2 * foeo_calc))
  
         # create_count accepted values:
@@ -1087,18 +1084,19 @@ run_rr_alloc() {
         # is created per OSTs.
         createmany -o $DIR/$tdir/foo- $(((old_create_count + 1) * OSTCOUNT)) \
                 > /dev/null
-       rm -f /$DIR/$tdir/foo*
+       unlinkmany $DIR/$tdir/foo- $(((old_create_count + 1)  * OSTCOUNT))
  
         # Check for enough precreated objects... We should not
         # fail here because code(osp_precreate.c) also takes care of it.
         # So we have good chances of passing test even if this check fails.
         local mdt_idx=0
-       for ost_idx in $(seq 0 $((OSTCOUNT - 1))); do
-               [[ $(precreated_ost_obj_count $mdt_idx $ost_idx) -ge \
-                       $foeo_calc ]] || echo "Warning: test may fail because" \
-                       "of lack of precreated objects on OST${ost_idx}"
+       for ((ost_idx = 0; ost_idx < $OSTCOUNT; ost_idx++ )); do
+               (($(precreated_ost_obj_count $mdt_idx $ost_idx) >= foeo_calc))||
+               echo "Warning: test may fail from too few objs on OST$ost_idx"
         done
  
+       local cmd="$RR_ALLOC $mntpt_root/$tdir/f $rr_alloc_NFILES $num_clients"
+
         if [[ $total_MNTPTS -ne 0 ]]; then
                 # Now start the actual file creation app.
                 mpi_run "-np $total_MNTPTS" $cmd || return
@@ -1110,12 +1108,14 @@ run_rr_alloc() {
         rm -f $qos_prec_objs
  
         diff_max_min_arr=($($LFS getstripe -r $DIR/$tdir/ |
-               grep "lmm_stripe_offset:" | awk '{print $2}' | sort -n |
-               uniq -c | awk 'NR==1 {min=max=$1} \
-               { $1<min ? min=$1 : min; $1>max ? max=$1 : max} \
-               END {print max-min, max, min}'))
+                           awk '/lmm_stripe_offset:/ {print $2}' |
+                           sort | uniq -c |
+                           awk 'NR==1 {min=max=$1} \
+                                { $1<min ? min=$1:min; $1>max ? max=$1:max} \
+                                END {print max-min, max, min}'))
+
+       $LFS find $DIR/$tdir -type f | xargs -n1 -P8 unlink
  
-       rm -rf $DIR/$tdir
  
         # In-case of fairly large number of file creation using RR (round-robin)
         # there can be two cases in which deviation will occur than the regular
@@ -1123,11 +1123,15 @@ run_rr_alloc() {
         # 1- When rr_alloc does not start right with 'lqr_start_count' reseeded,
         # 2- When rr_alloc does not finish with 'lqr_start_count == 0'.
         # So the difference of files b/w any 2 OST should not be more than 2.
-       [[ ${diff_max_min_arr[0]} -le 2 ]] ||
-               error "Uneven distribution detected: difference between" \
-               "maximum files per OST (${diff_max_min_arr[1]}) and" \
-               "minimum files per OST (${diff_max_min_arr[2]}) must not be" \
-               "greater than 2"
+       # In some cases it may be more, but shouldn't be > 0.3% of the files.
+       local max_diff=$((create_count > 600 ? create_count / 300 : 2))
+
+       (( ${diff_max_min_arr[0]} <= $max_diff )) || {
+               $LFS getstripe -r $DIR/$tdir |
+                       awk '/lmm_stripe_offset:/ {print $2}' | sort | uniq -c
+
+               error "max/min OST objects (${diff_max_min_arr[1]} : ${diff_max_min_arr[2]}) too different"
+       }
  }
  
  run_fs_test() {
author	Andreas Dilger <adilger@whamcloud.com>
	Wed, 19 Oct 2022 00:37:58 +0000 (18:37 -0600)
committer	Oleg Drokin <green@whamcloud.com>
	Tue, 8 Nov 2022 08:52:59 +0000 (08:52 +0000)