MF_MDC_CANCEL_FID4 = BIT(3),
MF_GET_MDT_IDX = BIT(4),
MF_GETATTR_BY_FID = BIT(5),
+ MF_QOS_MKDIR = BIT(6),
+ MF_RR_MKDIR = BIT(7),
};
enum md_cli_flags {
__u32 op_projid;
- /* Used by readdir */
- unsigned int op_max_pages;
+ union {
+ /* Used by readdir */
+ unsigned int op_max_pages;
+ /* mkdir */
+ unsigned short op_dir_depth;
+ };
__u16 op_mirror_id;
LMV_INHERIT_RR_DEFAULT = 0,
/* not inherit any more */
LMV_INHERIT_RR_END = 1,
+ /* default inherit_rr of ROOT */
+ LMV_INHERIT_RR_ROOT = 3,
/* max inherit depth */
LMV_INHERIT_RR_MAX = 250,
/* [251, 254] are reserved */
if (IS_ERR(op_data))
RETURN(PTR_ERR(op_data));
+ op_data->op_dir_depth = ll_i2info(parent)->lli_depth;
+
if (ll_sbi_has_encrypt(sbi) &&
(IS_ENCRYPTED(parent) ||
unlikely(llcrypt_dummy_context_enabled(parent)))) {
* of kernel will deal with that later.
*/
ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, &bits);
- if (bits & MDS_INODELOCK_LOOKUP)
+ if (bits & MDS_INODELOCK_LOOKUP) {
d_lustre_revalidate(de);
+ ll_update_dir_depth(parent->d_inode, de->d_inode);
+ }
+
/* if DoM bit returned along with LAYOUT bit then there
* can be read-on-open data returned.
*/
/* "opendir_pid" is the token when lookup/revalid
* -- I am the owner of dir statahead. */
pid_t lli_opendir_pid;
+ /* directory depth to ROOT */
+ unsigned short lli_depth;
/* stat will try to access statahead entries or start
* statahead if this flag is set, and this flag will be
* set upon dir open, and cleared when dir is closed,
* statahead hit ratio is too low, or start statahead
* thread failed. */
- unsigned int lli_sa_enabled:1;
+ unsigned short lli_sa_enabled:1;
/* generation for statahead */
unsigned int lli_sa_generation;
/* rw lock protects lli_lsm_md */
u32 flags);
int ll_update_inode(struct inode *inode, struct lustre_md *md);
void ll_update_inode_flags(struct inode *inode, unsigned int ext_flags);
+void ll_update_dir_depth(struct inode *dir, struct inode *inode);
int ll_read_inode2(struct inode *inode, void *opaque);
void ll_truncate_inode_pages_final(struct inode *inode);
void ll_delete_inode(struct inode *inode);
return 0;
}
+/* update directory depth to ROOT, called after LOOKUP lock is fetched. */
+void ll_update_dir_depth(struct inode *dir, struct inode *inode)
+{
+ struct ll_inode_info *lli;
+
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ if (inode == dir)
+ return;
+
+ lli = ll_i2info(inode);
+ lli->lli_depth = ll_i2info(dir)->lli_depth + 1;
+ CDEBUG(D_INODE, DFID" depth %hu\n", PFID(&lli->lli_fid), lli->lli_depth);
+}
+
void ll_truncate_inode_pages_final(struct inode *inode)
{
struct address_space *mapping = &inode->i_data;
if (!it_disposition(it, DISP_LOOKUP_NEG)) {
/* we have lookup look - unhide dentry */
- if (bits & MDS_INODELOCK_LOOKUP)
+ if (bits & MDS_INODELOCK_LOOKUP) {
d_lustre_revalidate(*de);
+ ll_update_dir_depth(parent, (*de)->d_inode);
+ }
if (encrypt) {
rc = llcrypt_get_encryption_info(inode);
}
ll_set_lock_data(ll_i2sbi(dir)->ll_md_exp, inode, it, &bits);
- if (bits & MDS_INODELOCK_LOOKUP)
+ if (bits & MDS_INODELOCK_LOOKUP) {
d_lustre_revalidate(dentry);
+ ll_update_dir_depth(dir, inode);
+ }
RETURN(0);
}
inode->i_ctime.tv_sec = body->mbo_ctime;
}
+/* once default LMV (space balanced) is set on ROOT, it should take effect if
+ * default LMV is not set on parent directory.
+ */
+static void ll_qos_mkdir_prep(struct md_op_data *op_data, struct inode *dir)
+{
+ struct inode *root = dir->i_sb->s_root->d_inode;
+ struct ll_inode_info *rlli = ll_i2info(root);
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct lmv_stripe_md *lsm;
+
+ op_data->op_dir_depth = lli->lli_depth;
+
+ /* parent directory is striped */
+ if (unlikely(lli->lli_lsm_md))
+ return;
+
+ /* default LMV set on parent directory */
+ if (unlikely(lli->lli_default_lsm_md))
+ return;
+
+ /* parent is ROOT */
+ if (unlikely(dir == root))
+ return;
+
+ /* default LMV not set on ROOT */
+ if (!rlli->lli_default_lsm_md)
+ return;
+
+ down_read(&rlli->lli_lsm_sem);
+ lsm = rlli->lli_default_lsm_md;
+ if (!lsm)
+ goto unlock;
+
+ /* not space balanced */
+ if (lsm->lsm_md_master_mdt_index != LMV_OFFSET_DEFAULT)
+ goto unlock;
+
+ if (lsm->lsm_md_max_inherit != LMV_INHERIT_NONE &&
+ (lsm->lsm_md_max_inherit == LMV_INHERIT_UNLIMITED ||
+ lsm->lsm_md_max_inherit >= lli->lli_depth)) {
+ op_data->op_flags |= MF_QOS_MKDIR;
+ if (lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE &&
+ (lsm->lsm_md_max_inherit_rr == LMV_INHERIT_RR_UNLIMITED ||
+ lsm->lsm_md_max_inherit_rr >= lli->lli_depth))
+ op_data->op_flags |= MF_RR_MKDIR;
+ CDEBUG(D_INODE, DFID" requests qos mkdir %#x\n",
+ PFID(&lli->lli_fid), op_data->op_flags);
+ }
+unlock:
+ up_read(&rlli->lli_lsm_sem);
+}
+
static int ll_new_node(struct inode *dir, struct dentry *dchild,
const char *tgt, umode_t mode, int rdev, __u32 opc)
{
if (IS_ERR(op_data))
GOTO(err_exit, err = PTR_ERR(op_data));
+ if (S_ISDIR(mode))
+ ll_qos_mkdir_prep(op_data, dir);
+
if (sbi->ll_flags & LL_SBI_FILE_SECCTX) {
err = ll_dentry_init_security(dchild, mode, &dchild->d_name,
&op_data->op_file_secctx_name,
}
if ((bits & MDS_INODELOCK_LOOKUP) &&
- d_lustre_invalid(*dentryp))
+ d_lustre_invalid(*dentryp)) {
d_lustre_revalidate(*dentryp);
+ ll_update_dir_depth(dir, (*dentryp)->d_inode);
+ }
+
ll_intent_release(&it);
}
}
RETURN(rc);
}
-static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt)
+static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt,
+ unsigned short dir_depth)
{
struct lu_tgt_desc *tgt, *cur = NULL;
__u64 total_avail = 0;
/* if current MDT has above-average space, within range of the QOS
* threshold, stay on the same MDT to avoid creating needless remote
- * MDT directories.
+ * MDT directories. It's more likely for low level directories.
*/
rand = total_avail * (256 - lmv->lmv_qos.lq_threshold_rr) /
- (total_usable * 256);
+ (total_usable * 256 * (1 + dir_depth / 4));
if (cur && cur->ltd_qos.ltq_avail >= rand) {
tgt = cur;
GOTO(unlock, rc = 0);
{
const struct lmv_stripe_md *lsm = op_data->op_default_mea1;
- return lsm && lsm->lsm_md_master_mdt_index == LMV_OFFSET_DEFAULT;
+ return (op_data->op_flags & MF_QOS_MKDIR) ||
+ (lsm && lsm->lsm_md_master_mdt_index == LMV_OFFSET_DEFAULT);
}
-/* mkdir by QoS in two cases:
- * 1. 'lfs mkdir -i -1'
- * 2. parent default LMV master_mdt_index is -1
+/* mkdir by QoS in three cases:
+ * 1. ROOT default LMV is space balanced.
+ * 2. 'lfs mkdir -i -1'
+ * 3. parent default LMV master_mdt_index is -1
*
* NB, mkdir by QoS only if parent is not striped, this is to avoid remote
* directories under striped directory.
return false;
}
-/* if default LMV is set, and its index is LMV_OFFSET_DEFAULT, and
- * 1. max_inherit_rr is set and is not LMV_INHERIT_RR_NONE
+/* if parent default LMV is space balanced, and
+ * 1. max_inherit_rr is set
* 2. or parent is ROOT
- * mkdir roundrobin.
- * NB, this also needs to check server is balanced, which is checked by caller.
+ * mkdir roundrobin. Or if parent doesn't have default LMV, while ROOT default
+ * LMV requests roundrobin mkdir, do the same.
+ * NB, this needs to check server is balanced, which is done by caller.
*/
static inline bool lmv_op_default_rr_mkdir(const struct md_op_data *op_data)
{
if (!lmv_op_default_qos_mkdir(op_data))
return false;
- return lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE ||
+ return (op_data->op_flags & MF_RR_MKDIR) ||
+ (lsm && lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE) ||
fid_is_root(&op_data->op_fid1);
}
} else if (lmv_op_qos_mkdir(op_data)) {
struct lmv_tgt_desc *tmp = tgt;
- tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds);
+ tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds,
+ op_data->op_dir_depth);
if (tgt == ERR_PTR(-EAGAIN)) {
if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) &&
!lmv_op_default_rr_mkdir(op_data) &&
struct obd_device *obd = container_of(kobj, struct obd_device,
obd_kset.kobj);
struct lmv_obd *lmv = &obd->u.lmv;
+ char buf[6], *tmp;
unsigned int val;
int rc;
- rc = kstrtouint(buffer, 0, &val);
+ /* "100%\n\0" should be largest string */
+ if (count >= sizeof(buf))
+ return -ERANGE;
+
+ strncpy(buf, buffer, sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+ tmp = strchr(buf, '%');
+ if (tmp)
+ *tmp = '\0';
+
+ rc = kstrtouint(buf, 0, &val);
if (rc)
return rc;
struct obd_device *obd = container_of(kobj, struct obd_device,
obd_kset.kobj);
struct lmv_obd *lmv = &obd->u.lmv;
+ char buf[6], *tmp;
unsigned int val;
int rc;
- rc = kstrtouint(buffer, 0, &val);
+ /* "100%\n\0" should be largest string */
+ if (count >= sizeof(buf))
+ return -ERANGE;
+
+ strncpy(buf, buffer, sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+ tmp = strchr(buf, '%');
+ if (tmp)
+ *tmp = '\0';
+
+ rc = kstrtouint(buf, 0, &val);
if (rc)
return rc;
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_STALE_DIR_LAYOUT))
GOTO(out, rc = -EREMOTE);
- if (lo->ldo_dir_stripe_offset == LMV_OFFSET_DEFAULT) {
- struct lod_default_striping *lds;
-
- lds = lo->ldo_def_striping;
- /*
- * child and parent should be on the same MDT,
- * but if parent has default LMV, and the start
- * MDT offset is -1, it's allowed. This check
- * is not necessary after 2.12.22 because client
- * follows this already, but old client may not.
- */
- if (hint->dah_parent &&
- dt_object_remote(hint->dah_parent) && lds &&
- lds->lds_dir_def_stripe_offset !=
- LMV_OFFSET_DEFAULT)
- GOTO(out, rc = -EREMOTE);
- } else if (lo->ldo_dir_stripe_offset !=
- ss->ss_node_id) {
+ if (lo->ldo_dir_stripe_offset != LMV_OFFSET_DEFAULT &&
+ lo->ldo_dir_stripe_offset != ss->ss_node_id) {
struct lod_device *lod;
struct lu_tgt_desc *mdt = NULL;
bool found_mdt = false;
struct lod_device *lod = dt2lod_dev(dt);
struct lu_tgt_descs *ltd = is_mdt ? &lod->lod_mdt_descs :
&lod->lod_ost_descs;
+ char buf[6], *tmp;
unsigned int val;
int rc;
- rc = kstrtouint(buffer, 0, &val);
+ /* "100%\n\0" should be largest string */
+ if (count >= sizeof(buf))
+ return -ERANGE;
+
+ strncpy(buf, buffer, sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+ tmp = strchr(buf, '%');
+ if (tmp)
+ *tmp = '\0';
+
+ rc = kstrtouint(buf, 0, &val);
if (rc)
return rc;
if (val > 100)
return -EINVAL;
+
ltd->ltd_qos.lq_prio_free = (val << 8) / 100;
set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags);
set_bit(LQ_RESET, <d->ltd_qos.lq_flags);
.lum_magic = LMV_USER_MAGIC,
.lum_stripe_count = 1,
.lum_stripe_offset = LMV_OFFSET_DEFAULT,
+ .lum_max_inherit = LMV_INHERIT_UNLIMITED,
+ .lum_max_inherit_rr = LMV_INHERIT_RR_ROOT,
};
th = dt_trans_create(env, mdd->mdd_bottom);
run_pjdfstest $EXT4_MNTPT $pjdfstest $EXT4_LOG
log "Run $pjdfstest against lustre filesystem"
- run_pjdfstest $MOUNT $pjdfstest $LUSTRE_LOG
-
+ mkdir_on_mdt0 $MOUNT/pjdfstest
+ run_pjdfstest $MOUNT/pjdfstest $pjdfstest $LUSTRE_LOG
}
setup_ext4() {
}
run_test 413c "mkdir with default LMV max inherit rr"
+test_413d() {
+ (( MDSCOUNT >= 2 )) ||
+ skip "We need at least 2 MDTs for this test"
+
+ (( MDS1_VERSION >= $(version_code 2.14.51) )) ||
+ skip "Need server version at least 2.14.51"
+
+ local lmv_qos_threshold_rr
+
+ lmv_qos_threshold_rr=$($LCTL get_param -n lmv.*.qos_threshold_rr |
+ head -n1)
+ stack_trap "$LCTL set_param \
+ lmv.*.qos_threshold_rr=$lmv_qos_threshold_rr > /dev/null" EXIT
+
+ $LCTL set_param lmv.*.qos_threshold_rr=100 > /dev/null
+ mkdir -p $DIR/$tdir || error "mkdir $tdir failed"
+ getfattr -d -m dmv -e hex $DIR/$tdir | grep dmv &&
+ error "$tdir shouldn't have default LMV"
+ createmany -d $DIR/$tdir/sub $((100 * MDSCOUNT)) ||
+ error "mkdir sub failed"
+
+ local count=$($LFS getstripe -m $DIR/$tdir/* | grep -c ^0)
+
+ (( count == 100 )) || error "$count subdirs on MDT0"
+}
+run_test 413d "inherit ROOT default LMV"
+
test_413z() {
local pids=""
local subdir
shift $((OPTIND - 1))
$LFS mkdir -i $mdt -c 1 $*
+ # setting default LMV in non-DNE system will cause sanity-quota 41 fail
+ ((MDSCOUNT < 2)) || $LFS setdirstripe -D -i $mdt -c 1 $*
}
mkdir_on_mdt0() {