From: Andreas Dilger <adilger@whamcloud.com>
Date: Mon, 17 Jan 2022 23:24:48 +0000 (-0700)
Subject: LU-15282 tests: improve sanity test_51d coverage
X-Git-Tag: 2.15.51~75
X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=fd5c915eff577bd7ff2c01133c7f8cf4d76b7c55;hp=1ac4b9598ad6e2f94c4c672b4733186364255c6a

LU-15282 tests: improve sanity test_51d coverage

Improve sanity test_51d to test all different stripe counts, rather
than only striping over all OSTs.  With the current default test
config there are 7 OSTs, and this does not cover some test cases.

Test-Parameters: trivial testlist=sanity env=ONLY=51d ostcount=3
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=4
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=5
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=6
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=7
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=8
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=9
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=10
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=11
Test-Parameters: testlist=sanity env=ONLY=51d ostcount=12
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Icc694d037e53f7bf966aff8ca1070d42ac3ebbe5
Reviewed-on: https://review.whamcloud.com/46154
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Jian Yu <yujian@whamcloud.com>
Reviewed-by: Wei Liu <sarah@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---

diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh
index 5422fb5..40e3d82 100755
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -5808,22 +5808,13 @@ test_51b() {
 }
 run_test 51b "exceed 64k subdirectory nlink limit on create, verify unlink"
 
-test_51d() {
-	[ $PARALLEL == "yes" ] && skip "skip parallel run"
-	[[ $OSTCOUNT -lt 3 ]] && skip_env "needs >= 3 OSTs"
-	local qos_old
-
-	test_mkdir $DIR/$tdir
-	$LFS setstripe -c $OSTCOUNT $DIR/$tdir
+test_51d_sub() {
+	local stripecount=$1
+	local nfiles=$((200 * $OSTCOUNT))
 
-	qos_old=$(do_facet mds1 \
-		"$LCTL get_param -n lod.$FSNAME-*.qos_threshold_rr" | head -n 1)
-	do_nodes $(comma_list $(mdts_nodes)) \
-		"$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=100"
-	stack_trap "do_nodes $(comma_list $(mdts_nodes)) \
-		'$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=${qos_old%%%}'"
-
-	createmany -o $DIR/$tdir/t- 1000
+	log "create files with stripecount=$stripecount"
+	$LFS setstripe -C $stripecount $DIR/$tdir
+	createmany -o $DIR/$tdir/t- $nfiles
 	$LFS getstripe $DIR/$tdir > $TMP/$tfile
 	for ((n = 0; n < $OSTCOUNT; n++)); do
 		objs[$n]=$(awk -vobjs=0 '($1 == '$n') { objs += 1 } \
@@ -5833,28 +5824,58 @@ test_51d() {
 			    END { printf("%0.0f", objs) }')
 		log "OST$n has ${objs[$n]} objects, ${objs0[$n]} are index 0"
 	done
-	unlinkmany $DIR/$tdir/t- 1000
-
-	nlast=0
-	for ((n = 0; n < $OSTCOUNT; n++)); do
+	unlinkmany $DIR/$tdir/t- $nfiles
+	rm  -f $TMP/$tfile
+
+	local nlast
+	local min=4
+	local max=5 # allow variance of (1 - $min/$max) = 20% by default
+
+	# For some combinations of stripecount and OSTCOUNT current code
+	# is not ideal, and allocates 50% fewer *first* objects to some OSTs
+	# than others. Rather than skipping this test entirely, check that
+	# and keep testing to ensure imbalance does not get worse. LU-15282
+	(( (OSTCOUNT == 6 && stripecount == 4) ||
+	   (OSTCOUNT == 10 && (stripecount == 4 || stripecount == 8)) ||
+	   (OSTCOUNT == 12 && (stripecount == 8 || stripecount == 9)))) && max=9
+	for ((nlast=0, n = 1; n < $OSTCOUNT; nlast=n,n++)); do
 		(( ${objs[$n]} > ${objs[$nlast]} * 4 / 5 )) ||
 			{ $LFS df && $LFS df -i &&
-			error "OST $n has fewer objects vs. OST $nlast" \
-			      " (${objs[$n]} < ${objs[$nlast]}"; }
+			error "OST $n has fewer objects vs. OST $nlast " \
+			      "(${objs[$n]} < ${objs[$nlast]} x 4/5)"; }
 		(( ${objs[$n]} < ${objs[$nlast]} * 5 / 4 )) ||
 			{ $LFS df && $LFS df -i &&
-			error "OST $n has fewer objects vs. OST $nlast" \
-			      " (${objs[$n]} < ${objs[$nlast]}"; }
+			error "OST $n has fewer objects vs. OST $nlast " \
+			      "(${objs[$n]} > ${objs[$nlast]} x 5/4)"; }
 
-		(( ${objs0[$n]} > ${objs0[$nlast]} * 4 / 5 )) ||
+		(( ${objs0[$n]} > ${objs0[$nlast]} * $min / $max )) ||
 			{ $LFS df && $LFS df -i &&
-			error "OST $n has fewer #0 objects vs. OST $nlast" \
-			      " (${objs0[$n]} < ${objs0[$nlast]}"; }
-		(( ${objs0[$n]} < ${objs0[$nlast]} * 5 / 4 )) ||
+			error "OST $n has fewer #0 objects vs. OST $nlast " \
+			      "(${objs0[$n]} < ${objs0[$nlast]} x $min/$max)"; }
+		(( ${objs0[$n]} < ${objs0[$nlast]} * $max / $min )) ||
 			{ $LFS df && $LFS df -i &&
-			error "OST $n has fewer #0 objects vs. OST $nlast" \
-			      " (${objs0[$n]} < ${objs0[$nlast]}"; }
-		nlast=$n
+			error "OST $n has fewer #0 objects vs. OST $nlast " \
+			      "(${objs0[$n]} > ${objs0[$nlast]} x $max/$min)"; }
+	done
+}
+
+test_51d() {
+	[ $PARALLEL == "yes" ] && skip "skip parallel run"
+	[[ $OSTCOUNT -lt 3 ]] && skip_env "needs >= 3 OSTs"
+
+	local stripecount
+	local qos_old=$(do_facet mds1 \
+		"$LCTL get_param -n lod.$FSNAME-*.qos_threshold_rr" | head -n 1)
+
+	do_nodes $(comma_list $(mdts_nodes)) \
+		"$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=100"
+	stack_trap "do_nodes $(comma_list $(mdts_nodes)) \
+		'$LCTL set_param lod.$FSNAME-*.qos_threshold_rr=${qos_old%%%}'"
+
+	test_mkdir $DIR/$tdir
+
+	for ((stripecount = 3; stripecount <= $OSTCOUNT; stripecount++)); do
+		test_51d_sub $stripecount
 	done
 }
 run_test 51d "check object distribution"