From 3e04b0fd6c3dd36372f33c54ea5f401c27485d60 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Wed, 28 Apr 2021 23:02:23 +0800 Subject: [PATCH] LU-13417 mdd: set default LMV on ROOT To balance MDT usage, set default LMV on ROOT if it's not set. The default stripe offset is "-1", and default stripe count is "1". Then directory created by "mkdir" under ROOT will be scattered on all MDTs by usage. Add sanity 0e. Signed-off-by: Lai Siyao Signed-off-by: Andreas Dilger Signed-off-by: Hongchao Zhang Change-Id: I7a6c752256225b8d065b2c304c4725268df28045 Reviewed-on: https://review.whamcloud.com/38553 Tested-by: jenkins Tested-by: Maloo --- lustre/doc/lfs-setdirstripe.1 | 21 ++++++++++------ lustre/mdc/mdc_request.c | 8 ++++++ lustre/mdd/mdd_device.c | 58 +++++++++++++++++++++++++++++++++++++++++++ lustre/tests/sanity.sh | 29 ++++++++++++++++++++++ 4 files changed, 109 insertions(+), 7 deletions(-) diff --git a/lustre/doc/lfs-setdirstripe.1 b/lustre/doc/lfs-setdirstripe.1 index c7a0acc..4fff19e 100644 --- a/lustre/doc/lfs-setdirstripe.1 +++ b/lustre/doc/lfs-setdirstripe.1 @@ -34,9 +34,14 @@ Use the MDT whose index is as the master/starting MDT for the directory. If multiple .I MDT_INDEX values are given, then the stripes will be allocated on the specified -MDT indices. If index -1 (default) is used, it will prefer to select +MDT indices. If +.B -1 +(default) is used, the client will round-robin subdirectory creation +across all MDTs if their free space is within +.B lod.*.mdt_qos_threshold_rr +percent of each other, otherwise the client will prefer to select .B COUNT -MDTs proportional to the free space and inodes on each. +MDT(s) proportional to the free space and inodes on each. .TP .BR \-H ", " \-\-mdt-hash =\fIHASH_TYPE\fR Use @@ -71,16 +76,18 @@ Set the default striping pattern of subdirectories. Newly created sub-directories will use the new default striping pattern, but existing sub-directories will not be affected. The newly created sub-directories will also inherit the specified default -striping pattern. Only default stripe count is supported for now. +striping pattern. Note that striping all directories across all MDTs by default is .B not recommended at this time, as the clients will have to do more RPCs to create and access each directory, hurting performance rather than -improving it. Default striped directories are preferred for parent -directories -where large subdirectories will be created (e.g. file-per-process -job output directories). +improving it. Default striped directories are preferred only for +parent directories where large subdirectories will be created +(e.g. file-per-process job output directories). Instead, using +.B -c 1 -i -1 +on top-level directories balances mkdir therein over MDTs automatically +without causing all subdirectories to be remote by default. .TP .BR \-o ", " \-\-mode =\fIMODE\fR Set the file access permissions of the new directory to the specified diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 8942e68..3df0523 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -550,6 +550,13 @@ static int mdc_get_lustre_md(struct obd_export *exp, struct req_capsule *pill, GOTO(out, rc = -EPROTO); } + if (md_exp->exp_obd->obd_type->typ_lu == &mdc_device_type) { + CERROR("%s: no LMV, upgrading from old version?\n", + md_exp->exp_obd->obd_name); + + GOTO(out_acl, rc = 0); + } + if (md->body->mbo_valid & OBD_MD_MEA) { lmv_size = md->body->mbo_eadatasize; if (lmv_size == 0) { @@ -606,6 +613,7 @@ static int mdc_get_lustre_md(struct obd_export *exp, struct req_capsule *pill, } rc = 0; +out_acl: if (md->body->mbo_valid & OBD_MD_FLACL) { /* for ACL, it's possible that FLACL is set but aclsize is zero. * only when aclsize != 0 there's an actual segment for ACL diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 8ea6adc..8876830 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -1270,6 +1270,8 @@ static int mdd_prepare(const struct lu_env *env, struct lu_device *next = &mdd->mdd_child->dd_lu_dev; struct nm_config_file *nodemap_config; struct obd_device_target *obt = &mdd2obd_dev(mdd)->u.obt; + struct dt_object *root = NULL; + struct thandle *th = NULL; struct lu_fid fid; int rc; @@ -1303,6 +1305,56 @@ static int mdd_prepare(const struct lu_env *env, GOTO(out_los, rc); } + /* store a default directory layout on the root directory if + * it doesn't already exist to improve MDT space balance. + */ + root = dt_locate(env, mdd->mdd_bottom, &fid); + if (unlikely(IS_ERR(root))) + GOTO(out_los, rc = PTR_ERR(root)); + + rc = dt_xattr_get(env, root, &LU_BUF_NULL, + XATTR_NAME_DEFAULT_LMV); + if (rc == -ENODATA) { + struct lu_buf buf; + struct lmv_user_md lmv_default = { + .lum_magic = LMV_USER_MAGIC, + .lum_stripe_count = 1, + .lum_stripe_offset = LMV_OFFSET_DEFAULT, + }; + + th = dt_trans_create(env, mdd->mdd_bottom); + if (IS_ERR(th)) + GOTO(out_root_put, rc = PTR_ERR(th)); + + buf.lb_buf = &lmv_default; + buf.lb_len = sizeof(lmv_default); + rc = dt_declare_xattr_set(env, root, &buf, + XATTR_NAME_DEFAULT_LMV, 0, + th); + if (rc) + GOTO(out_trans_stop, rc); + + rc = dt_trans_start_local(env, mdd->mdd_bottom, th); + if (rc) + GOTO(out_trans_stop, rc); + + rc = dt_xattr_set(env, root, &buf, + XATTR_NAME_DEFAULT_LMV, 0, th); + if (rc) + GOTO(out_trans_stop, rc); + + dt_trans_stop(env, mdd->mdd_bottom, th); + th = NULL; + } else if (rc < 0 && rc != -ERANGE) { + CERROR("%s: get default LMV of root failed: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, rc); + + GOTO(out_root_put, rc); + } + + dt_object_put(env, root); + root = NULL; + mdd->mdd_root_fid = fid; rc = mdd_dot_lustre_setup(env, mdd); if (rc != 0) { @@ -1373,6 +1425,12 @@ out_orph: out_dot: if (mdd_seq_site(mdd)->ss_node_id == 0) mdd_dot_lustre_cleanup(env, mdd); +out_trans_stop: + if (th != NULL) + dt_trans_stop(env, mdd->mdd_bottom, th); +out_root_put: + if (root != NULL) + dt_object_put(env, root); out_los: local_oid_storage_fini(env, mdd->mdd_los); mdd->mdd_los = NULL; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 744e6d6..cd08cde 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -253,6 +253,35 @@ test_0d() { # LU-3397 } run_test 0d "check export proc =============================" +test_0e() { # LU-13417 + (( $MDSCOUNT > 1 )) || + skip "We need at least 2 MDTs for this test" + + (( $MDS1_VERSION >= $(version_code 2.14.51) )) || + skip "Need server version at least 2.14.51" + + local default_lmv_count=$($LFS getdirstripe -D -c $MOUNT) + local default_lmv_index=$($LFS getdirstripe -D -i $MOUNT) + + [ $default_lmv_count -eq 1 ] || + error "$MOUNT default stripe count $default_lmv_count" + + [ $default_lmv_index -eq -1 ] || + error "$MOUNT default stripe index $default_lmv_index" + + mkdir $MOUNT/$tdir.1 || error "mkdir $MOUNT/$tdir.1 failed" + mkdir $MOUNT/$tdir.2 || error "mkdir $MOUNT/$tdir.2 failed" + + local mdt_index1=$($LFS getdirstripe -i $MOUNT/$tdir.1) + local mdt_index2=$($LFS getdirstripe -i $MOUNT/$tdir.2) + + [ $mdt_index1 -eq $mdt_index2 ] && + error "directories are on the same MDT $mdt_index1=$mdt_index2" + + rmdir $MOUNT/$tdir.1 $MOUNT/$tdir.2 +} +run_test 0e "Enable DNE MDT balancing for mkdir in the ROOT" + test_1() { test_mkdir $DIR/$tdir test_mkdir $DIR/$tdir/d2 -- 1.8.3.1