If the MDT free space is imbalanced, use QOS space balancing for
round-robin subdirectory creation, regardless of the depth
of the directory tree. Otherwise, new subdirectories created
in parents with round-robin default layout may suddenly become
"sticky" on the parent MDT and upset the space balancing and
load distribution.
Add sanity/test_413h to check that round-robin dirs always balance.
Test-Parameters: testlist=sanity env=ONLY=413h,ONLY_REPEAT=100
Fixes:
38c4c538f5 ("LU-15216 lmv: improve MDT QOS space balance")
Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: Ia1d0b5b1a027cf14236f93ae34b5cf4929e76d23
Reviewed-on: https://review.whamcloud.com/47578
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
#include "lmv_internal.h"
static int lmv_check_connect(struct obd_device *obd);
+static inline bool lmv_op_default_rr_mkdir(const struct md_op_data *op_data);
void lmv_activate_target(struct lmv_obd *lmv, struct lmv_tgt_desc *tgt,
int activate)
RETURN(rc);
}
-static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 mdt,
- unsigned short dir_depth)
+static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv,
+ struct md_op_data *op_data)
{
struct lu_tgt_desc *tgt, *cur = NULL;
__u64 total_avail = 0;
tgt->ltd_qos.ltq_usable = 1;
lu_tgt_qos_weight_calc(tgt);
- if (tgt->ltd_index == mdt)
+ if (tgt->ltd_index == op_data->op_mds)
cur = tgt;
total_avail += tgt->ltd_qos.ltq_avail;
total_weight += tgt->ltd_qos.ltq_weight;
total_usable++;
}
- /* if current MDT has above-average space, within range of the QOS
- * threshold, stay on the same MDT to avoid creating needless remote
- * MDT directories. It's more likely for low level directories
- * "16 / (dir_depth + 10)" is the factor to make it more unlikely for
- * top level directories, while more likely for low levels.
+ /* If current MDT has above-average space and dir is not aleady using
+ * round-robin to spread across more MDTs, stay on the parent MDT
+ * to avoid creating needless remote MDT directories. Remote dirs
+ * close to the root balance space more effectively than bottom dirs,
+ * so prefer to create remote dirs at top level of directory tree.
+ * "16 / (dir_depth + 10)" is the factor to make it less likely
+ * for top-level directories to stay local unless they have more than
+ * average free space, while deep dirs prefer local until more full.
+ * depth=0 -> 160%, depth=3 -> 123%, depth=6 -> 100%,
+ * depth=9 -> 84%, depth=12 -> 73%, depth=15 -> 64%
*/
- rand = total_avail * 16 / (total_usable * (dir_depth + 10));
- if (cur && cur->ltd_qos.ltq_avail >= rand) {
- tgt = cur;
- GOTO(unlock, tgt);
+ if (!lmv_op_default_rr_mkdir(op_data)) {
+ rand = total_avail * 16 /
+ (total_usable * (op_data->op_dir_depth + 10));
+ if (cur && cur->ltd_qos.ltq_avail >= rand) {
+ tgt = cur;
+ GOTO(unlock, tgt);
+ }
}
rand = lu_prandom_u64_max(total_weight);
{
const struct lmv_stripe_md *lsm = op_data->op_default_mea1;
- if (!lmv_op_default_qos_mkdir(op_data))
- return false;
-
return (op_data->op_flags & MF_RR_MKDIR) ||
(lsm && lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE) ||
fid_is_root(&op_data->op_fid1);
{
struct lmv_tgt_desc *tmp = tgt;
- tgt = lmv_locate_tgt_qos(lmv, op_data->op_mds, op_data->op_dir_depth);
+ tgt = lmv_locate_tgt_qos(lmv, op_data);
if (tgt == ERR_PTR(-EAGAIN)) {
if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) &&
!lmv_op_default_rr_mkdir(op_data) &&
}
run_test 412 "mkdir on specific MDTs"
+TEST413_COUNT=${TEST413_COUNT:-200}
generate_uneven_mdts() {
local threshold=$1
local lmv_qos_maxage
local testdir=$DIR/$tdir-fillmdt
local start
- mkdir -p $testdir
-
i=0
while (( diff < threshold )); do
+ mkdir -p $testdir
# generate uneven MDTs, create till $threshold% diff
echo -n "weight diff=$diff% must be > $threshold% ..."
- echo "Fill MDT$min_index with 1000 files: loop $i"
+ echo "Fill MDT$min_index with $TEST413_COUNT files: loop $i"
testdir=$DIR/$tdir-fillmdt/$i
- [ -d $testdir ] || $LFS mkdir -i $min_index $testdir ||
+ [ -d $testdir ] && continue
+ $LFS mkdir -i $min_index $testdir ||
error "mkdir $testdir failed"
$LFS setstripe -E 1M -L mdt $testdir ||
error "setstripe $testdir failed"
start=$SECONDS
- for F in f.{0..999}; do
- dd if=/dev/zero of=$testdir/$F bs=64K count=1 > \
+ for ((F=0; F < TEST413_COUNT; F++)); do
+ dd if=/dev/zero of=$testdir/f.$F bs=128K count=1 > \
/dev/null 2>&1 || error "dd $F failed"
done
+ sync; sleep 1; sync
# wait for QOS to update
(( SECONDS < start + 1 )) && sleep $((start + 1 - SECONDS))
}
run_test 413g "enforce ROOT default LMV on subdir mount"
+test_413h() {
+ (( MDSCOUNT >= 2 )) ||
+ skip "We need at least 2 MDTs for this test"
+
+ (( MDS1_VERSION >= $(version_code 2.15.50.6) )) ||
+ skip "Need server version at least 2.15.50.6"
+
+ local lmv_qos_maxage=$($LCTL get_param -n lmv.*.qos_maxage)
+
+ stack_trap "$LCTL set_param \
+ lmv.*.qos_maxage=$lmv_qos_maxage > /dev/null"
+ $LCTL set_param lmv.*.qos_maxage=1
+
+ local depth=5
+ local rr_depth=4
+ local dir=$DIR/$tdir/l1/l2/l3/l4/l5
+ local count=$((MDSCOUNT * 20))
+
+ generate_uneven_mdts 50
+
+ mkdir -p $dir || error "mkdir $dir failed"
+ stack_trap "rm -rf $dir"
+ $LFS setdirstripe -D -c 1 -i -1 --max-inherit=$depth \
+ --max-inherit-rr=$rr_depth $dir
+
+ for ((d=0; d < depth + 2; d++)); do
+ log "dir=$dir:"
+ for ((sub=0; sub < count; sub++)); do
+ mkdir $dir/d$sub
+ done
+ $LFS getdirstripe -i $dir/d* | sort | uniq -c | sort -nr
+ local num=($($LFS getdirstripe -i $dir/d* | sort | uniq -c))
+ # subdirs within $rr_depth should be created round-robin
+ if (( d < rr_depth )); then
+ (( ${num[0]} != count )) ||
+ error "all objects created on MDT ${num[1]}"
+ fi
+
+ dir=$dir/d0
+ done
+}
+run_test 413h "don't stick to parent for round-robin dirs"
+
test_413z() {
local pids=""
local subdir
local pid
for subdir in $(\ls -1 -d $DIR/d413*-fillmdt/*); do
- unlinkmany $subdir/f. 1000 &
+ unlinkmany $subdir/f. $TEST413_COUNT &
pids="$pids $!"
done
local isonly=ONLY_$testnum
local repeat=${!isonly:+$ONLY_REPEAT}
- for testiter in $(seq ${repeat:-1}); do
+ for ((testiter=0; testiter < ${repeat:-1}; testiter++)); do
local before_sub=$SECONDS
log_sub_test_begin $TESTNAME
# remove temp files between repetitions to avoid test failures
- [ -n "$append" -a -n "$DIR" -a -n "$tdir" -a -n "$tfile" ] &&
- rm -rvf $DIR/$tdir* $DIR/$tfile*
+ if [[ -n "$append" ]]; then
+ [[ -n "$DIR/$tdir" ]] && rm -rvf $DIR/$tdir*
+ [[ -n "$DIR/$tfile" ]] && rm -vf $DIR/$tfile*
+ echo "subtest iteration $testiter/$repeat"
+ fi
# loop around subshell so stack_trap EXIT triggers each time
(run_one $testnum "$testmsg") 2>&1 | tee -i $append $test_log
rc=${PIPESTATUS[0]}