From 37c1ddc34d3a1e61c5533f48cb29fe2258ca2907 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Thu, 9 Jun 2022 07:44:41 -0400 Subject: [PATCH] LU-15850 lmv: always space-balance r-r directories If the MDT free space is imbalanced, use QOS space balancing for round-robin subdirectory creation, regardless of the depth of the directory tree. Otherwise, new subdirectories created in parents with round-robin default layout may suddenly become "sticky" on the parent MDT and upset the space balancing and load distribution. Add sanity/test_413h to check that round-robin dirs always balance. Test-Parameters: testlist=sanity env=ONLY=413h,ONLY_REPEAT=100 Fixes: 38c4c538f5 ("LU-15216 lmv: improve MDT QOS space balance") Signed-off-by: Lai Siyao Change-Id: Ia1d0b5b1a027cf14236f93ae34b5cf4929e76d23 Reviewed-on: https://review.whamcloud.com/47578 Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/lmv/lmv_obd.c | 38 +++++++++++++++------------ lustre/tests/sanity.sh | 59 +++++++++++++++++++++++++++++++++++++----- lustre/tests/test-framework.sh | 9 ++++--- 3 files changed, 80 insertions(+), 26 deletions(-) diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 4fded87..da6e9e0 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -56,6 +56,7 @@ #include "lmv_internal.h" static int lmv_check_connect(struct obd_device *obd); +static inline bool lmv_op_default_rr_mkdir(const struct md_op_data *op_data); void lmv_activate_target(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt, int activate) @@ -1479,8 +1480,8 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } -static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 mdt, - unsigned short dir_depth) +static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, + struct md_op_data *op_data) { struct lu_tgt_desc *tgt, *cur = NULL; __u64 total_avail = 0; @@ -1512,23 +1513,31 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 mdt, tgt->ltd_qos.ltq_usable = 1; lu_tgt_qos_weight_calc(tgt); - if (tgt->ltd_index == mdt) + if (tgt->ltd_index == op_data->op_mds) cur = tgt; total_avail += tgt->ltd_qos.ltq_avail; total_weight += tgt->ltd_qos.ltq_weight; total_usable++; } - /* if current MDT has above-average space, within range of the QOS - * threshold, stay on the same MDT to avoid creating needless remote - * MDT directories. It's more likely for low level directories - * "16 / (dir_depth + 10)" is the factor to make it more unlikely for - * top level directories, while more likely for low levels. + /* If current MDT has above-average space and dir is not aleady using + * round-robin to spread across more MDTs, stay on the parent MDT + * to avoid creating needless remote MDT directories. Remote dirs + * close to the root balance space more effectively than bottom dirs, + * so prefer to create remote dirs at top level of directory tree. + * "16 / (dir_depth + 10)" is the factor to make it less likely + * for top-level directories to stay local unless they have more than + * average free space, while deep dirs prefer local until more full. + * depth=0 -> 160%, depth=3 -> 123%, depth=6 -> 100%, + * depth=9 -> 84%, depth=12 -> 73%, depth=15 -> 64% */ - rand = total_avail * 16 / (total_usable * (dir_depth + 10)); - if (cur && cur->ltd_qos.ltq_avail >= rand) { - tgt = cur; - GOTO(unlock, tgt); + if (!lmv_op_default_rr_mkdir(op_data)) { + rand = total_avail * 16 / + (total_usable * (op_data->op_dir_depth + 10)); + if (cur && cur->ltd_qos.ltq_avail >= rand) { + tgt = cur; + GOTO(unlock, tgt); + } } rand = lu_prandom_u64_max(total_weight); @@ -1863,9 +1872,6 @@ static inline bool lmv_op_default_rr_mkdir(const struct md_op_data *op_data) { const struct lmv_stripe_md *lsm = op_data->op_default_mea1; - if (!lmv_op_default_qos_mkdir(op_data)) - return false; - return (op_data->op_flags & MF_RR_MKDIR) || (lsm && lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE) || fid_is_root(&op_data->op_fid1); @@ -1900,7 +1906,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_by_space(struct lmv_obd *lmv, { struct lmv_tgt_desc *tmp = tgt; - tgt = lmv_locate_tgt_qos(lmv, op_data->op_mds, op_data->op_dir_depth); + tgt = lmv_locate_tgt_qos(lmv, op_data); if (tgt == ERR_PTR(-EAGAIN)) { if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) && !lmv_op_default_rr_mkdir(op_data) && diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index dccc614..2518129 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -25598,6 +25598,7 @@ test_412() { } run_test 412 "mkdir on specific MDTs" +TEST413_COUNT=${TEST413_COUNT:-200} generate_uneven_mdts() { local threshold=$1 local lmv_qos_maxage @@ -25655,23 +25656,24 @@ generate_uneven_mdts() { local testdir=$DIR/$tdir-fillmdt local start - mkdir -p $testdir - i=0 while (( diff < threshold )); do + mkdir -p $testdir # generate uneven MDTs, create till $threshold% diff echo -n "weight diff=$diff% must be > $threshold% ..." - echo "Fill MDT$min_index with 1000 files: loop $i" + echo "Fill MDT$min_index with $TEST413_COUNT files: loop $i" testdir=$DIR/$tdir-fillmdt/$i - [ -d $testdir ] || $LFS mkdir -i $min_index $testdir || + [ -d $testdir ] && continue + $LFS mkdir -i $min_index $testdir || error "mkdir $testdir failed" $LFS setstripe -E 1M -L mdt $testdir || error "setstripe $testdir failed" start=$SECONDS - for F in f.{0..999}; do - dd if=/dev/zero of=$testdir/$F bs=64K count=1 > \ + for ((F=0; F < TEST413_COUNT; F++)); do + dd if=/dev/zero of=$testdir/f.$F bs=128K count=1 > \ /dev/null 2>&1 || error "dd $F failed" done + sync; sleep 1; sync # wait for QOS to update (( SECONDS < start + 1 )) && sleep $((start + 1 - SECONDS)) @@ -26136,13 +26138,56 @@ test_413g() { } run_test 413g "enforce ROOT default LMV on subdir mount" +test_413h() { + (( MDSCOUNT >= 2 )) || + skip "We need at least 2 MDTs for this test" + + (( MDS1_VERSION >= $(version_code 2.15.50.6) )) || + skip "Need server version at least 2.15.50.6" + + local lmv_qos_maxage=$($LCTL get_param -n lmv.*.qos_maxage) + + stack_trap "$LCTL set_param \ + lmv.*.qos_maxage=$lmv_qos_maxage > /dev/null" + $LCTL set_param lmv.*.qos_maxage=1 + + local depth=5 + local rr_depth=4 + local dir=$DIR/$tdir/l1/l2/l3/l4/l5 + local count=$((MDSCOUNT * 20)) + + generate_uneven_mdts 50 + + mkdir -p $dir || error "mkdir $dir failed" + stack_trap "rm -rf $dir" + $LFS setdirstripe -D -c 1 -i -1 --max-inherit=$depth \ + --max-inherit-rr=$rr_depth $dir + + for ((d=0; d < depth + 2; d++)); do + log "dir=$dir:" + for ((sub=0; sub < count; sub++)); do + mkdir $dir/d$sub + done + $LFS getdirstripe -i $dir/d* | sort | uniq -c | sort -nr + local num=($($LFS getdirstripe -i $dir/d* | sort | uniq -c)) + # subdirs within $rr_depth should be created round-robin + if (( d < rr_depth )); then + (( ${num[0]} != count )) || + error "all objects created on MDT ${num[1]}" + fi + + dir=$dir/d0 + done +} +run_test 413h "don't stick to parent for round-robin dirs" + test_413z() { local pids="" local subdir local pid for subdir in $(\ls -1 -d $DIR/d413*-fillmdt/*); do - unlinkmany $subdir/f. 1000 & + unlinkmany $subdir/f. $TEST413_COUNT & pids="$pids $!" done diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 7c25003..b893ed6 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -6894,13 +6894,16 @@ run_one_logged() { local isonly=ONLY_$testnum local repeat=${!isonly:+$ONLY_REPEAT} - for testiter in $(seq ${repeat:-1}); do + for ((testiter=0; testiter < ${repeat:-1}; testiter++)); do local before_sub=$SECONDS log_sub_test_begin $TESTNAME # remove temp files between repetitions to avoid test failures - [ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] && - rm -rvf $DIR/$tdir* $DIR/$tfile* + if [[ -n "$append" ]]; then + [[ -n "$DIR/$tdir" ]] && rm -rvf $DIR/$tdir* + [[ -n "$DIR/$tfile" ]] && rm -vf $DIR/$tfile* + echo "subtest iteration $testiter/$repeat" + fi # loop around subshell so stack_trap EXIT triggers each time (run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log rc=${PIPESTATUS[0]} -- 1.8.3.1