From: Andreas Dilger Date: Thu, 17 Oct 2019 07:19:26 +0000 (+0900) Subject: LU-12865 tests: fix sanity 160f to be more robust X-Git-Tag: 2.13.52~107 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=4b0f0164c6ed761897409186376e9edc989323c9 LU-12865 tests: fix sanity 160f to be more robust The sanity test_160f test was failing intermittently because the first Changelog user ("cl6") was being unregistered in some cases when it set changelog_max_idle_time=10, but the test slept for 9s and then did some operations that could be slow. In rare cases the test runs too long and the MDS evicts the "good" user along with the bad user: MDD0000: Force deregister of ChangeLog user cl7 idle more than 35s MDD0000: Force deregister of ChangeLog user cl6 idle more than 11s Change the test sleep interval to be half of the max_idle limit so that there is no risk of the "good" Changelog user being evicted. Add some logging to the test so that it is easier to correlate test script actions with events in the MDS debug log. Fixes: 31fef6845e8b ("LU-10680 mdd: create gc thread when no current transaction") Test-Parameters: trivial envdefinitions=ONLY=160 testlist=sanity,sanity Test-Parameters: envdefinitions=ONLY=160 mdscount=2 testlist=sanity,sanity Signed-off-by: Andreas Dilger Change-Id: I0e4c9c271d98a2716f848e75676780b0383ebbe5 Reviewed-on: https://review.whamcloud.com/36468 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Faccini Bruno Reviewed-by: James Nunez Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index c1cb616..c3aa385 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13865,14 +13865,17 @@ test_160f() { # generate some changelog records to accumulate on each MDT test_mkdir -c $MDSCOUNT $DIR/$tdir || error "test_mkdir $tdir failed" + log "$(date +%s): creating first files" createmany -m $DIR/$tdir/$tfile $((MDSCOUNT * 2)) || error "create $DIR/$tdir/$tfile failed" # check changelogs have been generated + local start=$SECONDS + local idle_time=$((MDSCOUNT * 5 + 5)) local nbcl=$(changelog_dump | wc -l) [[ $nbcl -eq 0 ]] && error "no changelogs found" - for param in "changelog_max_idle_time=10" \ + for param in "changelog_max_idle_time=$idle_time" \ "changelog_gc=1" \ "changelog_min_gc_interval=2" \ "changelog_min_free_cat_entries=3"; do @@ -13884,8 +13887,11 @@ test_160f() { do_nodes $mdts $LCTL set_param mdd.*.$param done - # force cl_user2 to be idle (1st part) - sleep 9 + # force cl_user2 to be idle (1st part), but also cancel the + # cl_user1 records so that it is not evicted later in the test. + local sleep1=$((idle_time / 2)) + echo "$(date +%s): sleep1 $sleep1/${idle_time}s" + sleep $sleep1 # simulate changelog catalog almost full #define OBD_FAIL_CAT_FREE_RECORDS 0x1313 @@ -13921,13 +13927,16 @@ test_160f() { "$user_rec1, but is $user_rec2" done - # force cl_user2 to be idle (2nd part) and to reach - # changelog_max_idle_time - sleep 2 + # force cl_user2 idle (2nd part) to just exceed changelog_max_idle_time + local sleep2=$((idle_time - (SECONDS - start) + 1)) + echo "$(date +%s): sleep2 $sleep2/${idle_time}s" + sleep $sleep2 - # generate one more changelog to trigger fail_loc - createmany -m $DIR/$tdir/${tfile}bis $((MDSCOUNT * 2)) || - error "create $DIR/$tdir/${tfile}bis failed" + # Generate one more changelog to trigger GC at fail_loc for cl_user2. + # cl_user1 should be OK because it recently processed records. + echo "$(date +%s): creating $((MDSCOUNT * 2)) files" + createmany -m $DIR/$tdir/${tfile}b $((MDSCOUNT * 2)) || + error "create $DIR/$tdir/${tfile}b failed" # ensure gc thread is done for i in $(mdts_nodes); do