X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flmv%2Flmv_obd.c;h=717e410d426a025e98b637cf21b2b5ff0eb3944c;hp=a8ee8034732910d61ed21a5c98c424da5591eee3;hb=cbc62b0b829afdceaa01820996e567b5bdeb281c;hpb=de47c7671f29b2a3a79f6a126b7e01f0b2c5991a;ds=sidebyside diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index a8ee803..717e410 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. */ #define DEBUG_SUBSYSTEM S_LMV @@ -1311,7 +1310,7 @@ static int lmv_statfs_update(void *cookie, int rc) tgt->ltd_statfs = *osfs; tgt->ltd_statfs_age = ktime_get_seconds(); spin_unlock(&lmv->lmv_lock); - lmv->lmv_qos.lq_dirty = 1; + set_bit(LQ_DIRTY, &lmv->lmv_qos.lq_flags); } return rc; @@ -1462,9 +1461,11 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data, static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt) { - struct lu_tgt_desc *tgt; + struct lu_tgt_desc *tgt, *cur = NULL; + __u64 total_avail = 0; __u64 total_weight = 0; __u64 cur_weight = 0; + int total_usable = 0; __u64 rand; int rc; @@ -1483,13 +1484,29 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, __u32 *mdt) GOTO(unlock, tgt = ERR_PTR(rc)); lmv_foreach_tgt(lmv, tgt) { - tgt->ltd_qos.ltq_usable = 0; - if (!tgt->ltd_exp || !tgt->ltd_active) + if (!tgt->ltd_exp || !tgt->ltd_active) { + tgt->ltd_qos.ltq_usable = 0; continue; + } tgt->ltd_qos.ltq_usable = 1; lu_tgt_qos_weight_calc(tgt); + if (tgt->ltd_index == *mdt) + cur = tgt; + total_avail += tgt->ltd_qos.ltq_avail; total_weight += tgt->ltd_qos.ltq_weight; + total_usable++; + } + + /* if current MDT has above-average space, within range of the QOS + * threshold, stay on the same MDT to avoid creating needless remote + * MDT directories. + */ + rand = total_avail * (256 - lmv->lmv_qos.lq_threshold_rr) / + (total_usable * 256); + if (cur && cur->ltd_qos.ltq_avail >= rand) { + tgt = cur; + GOTO(unlock, rc = 0); } rand = lu_prandom_u64_max(total_weight); @@ -1523,7 +1540,7 @@ static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv, __u32 *mdt) ENTRY; - spin_lock(&lmv->lmv_qos.lq_rr.lqr_alloc); + spin_lock(&lmv->lmv_lock); for (i = 0; i < lmv->lmv_mdt_descs.ltd_tgts_size; i++) { index = (i + lmv->lmv_qos_rr_index) % lmv->lmv_mdt_descs.ltd_tgts_size; @@ -1534,11 +1551,11 @@ static struct lu_tgt_desc *lmv_locate_tgt_rr(struct lmv_obd *lmv, __u32 *mdt) *mdt = tgt->ltd_index; lmv->lmv_qos_rr_index = (*mdt + 1) % lmv->lmv_mdt_descs.ltd_tgts_size; - spin_unlock(&lmv->lmv_qos.lq_rr.lqr_alloc); + spin_unlock(&lmv->lmv_lock); RETURN(tgt); } - spin_unlock(&lmv->lmv_qos.lq_rr.lqr_alloc); + spin_unlock(&lmv->lmv_lock); RETURN(ERR_PTR(-ENODEV)); } @@ -1721,6 +1738,22 @@ int lmv_old_layout_lookup(struct lmv_obd *lmv, struct md_op_data *op_data) return rc; } +static inline bool lmv_op_user_qos_mkdir(const struct md_op_data *op_data) +{ + const struct lmv_user_md *lum = op_data->op_data; + + return (op_data->op_cli_flags & CLI_SET_MEA) && lum && + le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC && + le32_to_cpu(lum->lum_stripe_offset) == LMV_OFFSET_DEFAULT; +} + +static inline bool lmv_op_default_qos_mkdir(const struct md_op_data *op_data) +{ + const struct lmv_stripe_md *lsm = op_data->op_default_mea1; + + return lsm && lsm->lsm_md_master_mdt_index == LMV_OFFSET_DEFAULT; +} + /* mkdir by QoS in two cases: * 1. 'lfs mkdir -i -1' * 2. parent default LMV master_mdt_index is -1 @@ -1730,27 +1763,38 @@ int lmv_old_layout_lookup(struct lmv_obd *lmv, struct md_op_data *op_data) */ static inline bool lmv_op_qos_mkdir(const struct md_op_data *op_data) { - const struct lmv_stripe_md *lsm = op_data->op_default_mea1; - const struct lmv_user_md *lum = op_data->op_data; - if (op_data->op_code != LUSTRE_OPC_MKDIR) return false; if (lmv_dir_striped(op_data->op_mea1)) return false; - if (op_data->op_cli_flags & CLI_SET_MEA && lum && - (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC || - le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) && - le32_to_cpu(lum->lum_stripe_offset) == LMV_OFFSET_DEFAULT) + if (lmv_op_user_qos_mkdir(op_data)) return true; - if (lsm && lsm->lsm_md_master_mdt_index == LMV_OFFSET_DEFAULT) + if (lmv_op_default_qos_mkdir(op_data)) return true; return false; } +/* if default LMV is set, and its index is LMV_OFFSET_DEFAULT, and + * 1. max_inherit_rr is set and is not LMV_INHERIT_RR_NONE + * 2. or parent is ROOT + * mkdir roundrobin. + * NB, this also needs to check server is balanced, which is checked by caller. + */ +static inline bool lmv_op_default_rr_mkdir(const struct md_op_data *op_data) +{ + const struct lmv_stripe_md *lsm = op_data->op_default_mea1; + + if (!lmv_op_default_qos_mkdir(op_data)) + return false; + + return lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE || + fid_is_root(&op_data->op_fid1); +} + /* 'lfs mkdir -i ' */ static inline bool lmv_op_user_specific_mkdir(const struct md_op_data *op_data) { @@ -1772,6 +1816,7 @@ lmv_op_default_specific_mkdir(const struct md_op_data *op_data) op_data->op_default_mea1->lsm_md_master_mdt_index != LMV_OFFSET_DEFAULT; } + int lmv_create(struct obd_export *exp, struct md_op_data *op_data, const void *data, size_t datalen, umode_t mode, uid_t uid, gid_t gid, cfs_cap_t cap_effective, __u64 rdev, @@ -1821,11 +1866,23 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, if (!tgt) RETURN(-ENODEV); } else if (lmv_op_qos_mkdir(op_data)) { + struct lmv_tgt_desc *tmp = tgt; + tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds); - if (tgt == ERR_PTR(-EAGAIN)) - tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds); + if (tgt == ERR_PTR(-EAGAIN)) { + if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) && + !lmv_op_default_rr_mkdir(op_data) && + !lmv_op_user_qos_mkdir(op_data)) + /* if it's not necessary, don't create remote + * directory. + */ + tgt = tmp; + else + tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds); + } if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); + /* * only update statfs after QoS mkdir, this means the cached * statfs may be stale, and current mkdir may not follow QoS @@ -1919,7 +1976,11 @@ lmv_getattr_name(struct obd_export *exp,struct md_op_data *op_data, ENTRY; retry: - tgt = lmv_locate_tgt(lmv, op_data); + if (op_data->op_namelen == 2 && + op_data->op_name[0] == '.' && op_data->op_name[1] == '.') + tgt = lmv_fid2tgt(lmv, &op_data->op_fid1); + else + tgt = lmv_locate_tgt(lmv, op_data); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); @@ -3162,6 +3223,8 @@ static inline int lmv_unpack_user_md(struct obd_export *exp, lsm->lsm_md_stripe_count = le32_to_cpu(lmu->lum_stripe_count); lsm->lsm_md_master_mdt_index = le32_to_cpu(lmu->lum_stripe_offset); lsm->lsm_md_hash_type = le32_to_cpu(lmu->lum_hash_type); + lsm->lsm_md_max_inherit = lmu->lum_max_inherit; + lsm->lsm_md_max_inherit_rr = lmu->lum_max_inherit_rr; lsm->lsm_md_pool_name[LOV_MAXPOOLNAME] = 0; return 0; @@ -3194,10 +3257,8 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, } if (lmv_dir_striped(lsm)) { - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - if (lsm->lsm_md_oinfo[i].lmo_root) - iput(lsm->lsm_md_oinfo[i].lmo_root); - } + for (i = 0; i < lsm->lsm_md_stripe_count; i++) + iput(lsm->lsm_md_oinfo[i].lmo_root); lsm_size = lmv_stripe_md_size(lsm->lsm_md_stripe_count); } else { lsm_size = lmv_stripe_md_size(0); @@ -3371,7 +3432,7 @@ lmv_lock_match(struct obd_export *exp, __u64 flags, } static int -lmv_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, +lmv_get_lustre_md(struct obd_export *exp, struct req_capsule *pill, struct obd_export *dt_exp, struct obd_export *md_exp, struct lustre_md *md) { @@ -3381,7 +3442,7 @@ lmv_get_lustre_md(struct obd_export *exp, struct ptlrpc_request *req, if (!tgt || !tgt->ltd_exp) return -EINVAL; - return md_get_lustre_md(tgt->ltd_exp, req, dt_exp, md_exp, md); + return md_get_lustre_md(tgt->ltd_exp, pill, dt_exp, md_exp, md); } static int lmv_free_lustre_md(struct obd_export *exp, struct lustre_md *md) @@ -3439,9 +3500,9 @@ static int lmv_clear_open_replay_data(struct obd_export *exp, } static int lmv_intent_getattr_async(struct obd_export *exp, - struct md_enqueue_info *minfo) + struct md_op_item *item) { - struct md_op_data *op_data = &minfo->mi_data; + struct md_op_data *op_data = &item->mop_data; struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct lmv_tgt_desc *ptgt; @@ -3468,7 +3529,7 @@ static int lmv_intent_getattr_async(struct obd_export *exp, if (ctgt != ptgt) RETURN(-EREMOTE); - rc = md_intent_getattr_async(ptgt->ltd_exp, minfo); + rc = md_intent_getattr_async(ptgt->ltd_exp, item); RETURN(rc); } @@ -3662,7 +3723,7 @@ static const struct md_ops lmv_md_ops = { static int __init lmv_init(void) { - return class_register_type(&lmv_obd_ops, &lmv_md_ops, true, NULL, + return class_register_type(&lmv_obd_ops, &lmv_md_ops, true, LUSTRE_LMV_NAME, NULL); }