X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flmv%2Flmv_obd.c;h=5a7945b93953f6b3985ec9ad9850d7f1b9e9a4bc;hp=2a6fbfee1582d918c2accc8ac878d7465e557d80;hb=65a8ff5fbe8ca014bd01150ab102d8aa43f78cff;hpb=d76e96c1794f82296a772065e3d36469e3a07836 diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 2a6fbfe..5a7945b 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -265,10 +265,12 @@ static int lmv_init_ea_size(struct obd_export *exp, __u32 easize, for (i = 0; i < lmv->desc.ld_tgt_count; i++) { struct lmv_tgt_desc *tgt = lmv->tgts[i]; - if (tgt == NULL || tgt->ltd_exp == NULL || !tgt->ltd_active) { + if (tgt == NULL || tgt->ltd_exp == NULL) { CWARN("%s: NULL export for %d\n", obd->obd_name, i); continue; } + if (!tgt->ltd_active) + continue; rc = md_init_ea_size(tgt->ltd_exp, easize, def_easize); if (rc) { @@ -798,23 +800,42 @@ static int lmv_hsm_ct_register(struct obd_device *obd, unsigned int cmd, void __user *uarg) { struct lmv_obd *lmv = &obd->u.lmv; - struct file *filp; - __u32 i, j; - int err; - bool any_set = false; - struct kkuc_ct_data kcd = { - .kcd_magic = KKUC_CT_DATA_MAGIC, - .kcd_archive = lk->lk_data, - }; - int rc = 0; + struct file *filp; + __u32 i, j; + int err; + bool any_set = false; + struct kkuc_ct_data *kcd; + size_t kcd_size; + int rc = 0; ENTRY; filp = fget(lk->lk_wfd); if (!filp) RETURN(-EBADF); + if (lk->lk_flags & LK_FLG_DATANR) + kcd_size = offsetof(struct kkuc_ct_data, + kcd_archives[lk->lk_data_count]); + else + kcd_size = sizeof(*kcd); + + OBD_ALLOC(kcd, kcd_size); + if (kcd == NULL) + GOTO(err_fput, rc = -ENOMEM); + + kcd->kcd_nr_archives = lk->lk_data_count; + if (lk->lk_flags & LK_FLG_DATANR) { + kcd->kcd_magic = KKUC_CT_DATA_ARRAY_MAGIC; + if (lk->lk_data_count > 0) + memcpy(kcd->kcd_archives, lk->lk_data, + sizeof(*kcd->kcd_archives) * lk->lk_data_count); + } else { + kcd->kcd_magic = KKUC_CT_DATA_BITMAP_MAGIC; + } + rc = libcfs_kkuc_group_add(filp, &obd->obd_uuid, lk->lk_uid, - lk->lk_group, &kcd, sizeof(kcd)); + lk->lk_group, kcd, kcd_size); + OBD_FREE(kcd, kcd_size); if (rc) GOTO(err_fput, rc); @@ -1154,7 +1175,7 @@ static int lmv_placement_policy(struct obd_device *obd, * 1. See if the stripe offset is specified by lum. * 2. Then check if there is default stripe offset. * 3. Finally choose MDS by name hash if the parent - * is striped directory. (see lmv_locate_mds()). */ + * is striped directory. (see lmv_locate_tgt()). */ if (op_data->op_cli_flags & CLI_SET_MEA && lum != NULL && le32_to_cpu(lum->lum_stripe_offset) != (__u32)-1) { *mds = le32_to_cpu(lum->lum_stripe_offset); @@ -1335,6 +1356,32 @@ out: RETURN(rc); } +static int lmv_select_statfs_mdt(struct lmv_obd *lmv, __u32 flags) +{ + int i; + + if (flags & OBD_STATFS_FOR_MDT0) + return 0; + + if (lmv->lmv_statfs_start || lmv->desc.ld_tgt_count == 1) + return lmv->lmv_statfs_start; + + /* choose initial MDT for this client */ + for (i = 0;; i++) { + struct lnet_process_id lnet_id; + if (LNetGetId(i, &lnet_id) == -ENOENT) + break; + + if (LNET_NETTYP(LNET_NIDNET(lnet_id.nid)) != LOLND) { + lmv->lmv_statfs_start = + lnet_id.nid % lmv->desc.ld_tgt_count; + break; + } + } + + return lmv->lmv_statfs_start; +} + static int lmv_statfs(const struct lu_env *env, struct obd_export *exp, struct obd_statfs *osfs, time64_t max_age, __u32 flags) { @@ -1342,42 +1389,52 @@ static int lmv_statfs(const struct lu_env *env, struct obd_export *exp, struct lmv_obd *lmv = &obd->u.lmv; struct obd_statfs *temp; int rc = 0; - __u32 i; + __u32 i, idx; ENTRY; OBD_ALLOC(temp, sizeof(*temp)); if (temp == NULL) RETURN(-ENOMEM); - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { - if (lmv->tgts[i] == NULL || lmv->tgts[i]->ltd_exp == NULL) + /* distribute statfs among MDTs */ + idx = lmv_select_statfs_mdt(lmv, flags); + + for (i = 0; i < lmv->desc.ld_tgt_count; i++, idx++) { + idx = idx % lmv->desc.ld_tgt_count; + if (lmv->tgts[idx] == NULL || lmv->tgts[idx]->ltd_exp == NULL) continue; - rc = obd_statfs(env, lmv->tgts[i]->ltd_exp, temp, + rc = obd_statfs(env, lmv->tgts[idx]->ltd_exp, temp, max_age, flags); if (rc) { CERROR("can't stat MDS #%d (%s), error %d\n", i, - lmv->tgts[i]->ltd_exp->exp_obd->obd_name, + lmv->tgts[idx]->ltd_exp->exp_obd->obd_name, rc); GOTO(out_free_temp, rc); } + if (temp->os_state & OS_STATE_SUM || + flags == OBD_STATFS_FOR_MDT0) { + /* reset to the last aggregated values + * and don't sum with non-aggrated data */ + /* If the statfs is from mount, it needs to retrieve + * necessary information from MDT0. i.e. mount does + * not need the merged osfs from all of MDT. Also + * clients can be mounted as long as MDT0 is in + * service */ + *osfs = *temp; + break; + } + if (i == 0) { *osfs = *temp; - /* If the statfs is from mount, it will needs - * retrieve necessary information from MDT0. - * i.e. mount does not need the merged osfs - * from all of MDT. - * And also clients can be mounted as long as - * MDT0 is in service*/ - if (flags & OBD_STATFS_FOR_MDT0) - GOTO(out_free_temp, rc); - } else { - osfs->os_bavail += temp->os_bavail; - osfs->os_blocks += temp->os_blocks; - osfs->os_ffree += temp->os_ffree; - osfs->os_files += temp->os_files; - } + } else { + osfs->os_bavail += temp->os_bavail; + osfs->os_blocks += temp->os_blocks; + osfs->os_ffree += temp->os_ffree; + osfs->os_files += temp->os_files; + osfs->os_granted += temp->os_granted; + } } EXIT; @@ -1503,28 +1560,33 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } -/** - * Choosing the MDT by name or FID in @op_data. - * For non-striped directory, it will locate MDT by fid. - * For striped-directory, it will locate MDT by name. And also - * it will reset op_fid1 with the FID of the choosen stripe. - **/ -struct lmv_tgt_desc * -lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm, - const char *name, int namelen, struct lu_fid *fid, - u32 *mds) +struct lmv_tgt_desc* +__lmv_locate_tgt(struct lmv_obd *lmv, struct lmv_stripe_md *lsm, + const char *name, int namelen, struct lu_fid *fid, u32 *mds, + bool post_migrate) { - struct lmv_tgt_desc *tgt; - const struct lmv_oinfo *oinfo; + struct lmv_tgt_desc *tgt; + const struct lmv_oinfo *oinfo; + + if (lsm == NULL || namelen == 0) { + tgt = lmv_find_target(lmv, fid); + if (IS_ERR(tgt)) + return tgt; + + LASSERT(mds); + *mds = tgt->ltd_idx; + return tgt; + } if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_NAME_HASH)) { if (cfs_fail_val >= lsm->lsm_md_stripe_count) - RETURN(ERR_PTR(-EBADF)); + return ERR_PTR(-EBADF); oinfo = &lsm->lsm_md_oinfo[cfs_fail_val]; } else { - oinfo = lsm_name_to_stripe_info(lsm, name, namelen); + oinfo = lsm_name_to_stripe_info(lsm, name, namelen, + post_migrate); if (IS_ERR(oinfo)) - RETURN(ERR_CAST(oinfo)); + return ERR_CAST(oinfo); } if (fid != NULL) @@ -1536,18 +1598,21 @@ lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm, CDEBUG(D_INFO, "locate on mds %u "DFID"\n", oinfo->lmo_mds, PFID(&oinfo->lmo_fid)); + return tgt; } + /** - * Locate mds by fid or name + * Locate mdt by fid or name * - * For striped directory (lsm != NULL), it will locate the stripe - * by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type - * is unknown, it will return -EBADFD, and lmv_intent_lookup might need - * walk through all of stripes to locate the entry. + * For striped directory, it will locate the stripe by name hash, if hash_type + * is unknown, it will return the stripe specified by 'op_data->op_stripe_index' + * which is set outside, and if dir is migrating, 'op_data->op_post_migrate' + * indicates whether old or new layout is used to locate. * * For normal direcotry, it will locate MDS by FID directly. + * * \param[in] lmv LMV device * \param[in] op_data client MD stack parameters, name, namelen * mds_num etc. @@ -1557,27 +1622,27 @@ lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm, * ERR_PTR(errno) if failed. */ struct lmv_tgt_desc* -lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, +lmv_locate_tgt(struct lmv_obd *lmv, struct md_op_data *op_data, struct lu_fid *fid) { - struct lmv_stripe_md *lsm = op_data->op_mea1; - struct lmv_tgt_desc *tgt; + struct lmv_stripe_md *lsm = op_data->op_mea1; + struct lmv_oinfo *oinfo; + struct lmv_tgt_desc *tgt; /* During creating VOLATILE file, it should honor the mdt * index if the file under striped dir is being restored, see * ct_restore(). */ if (op_data->op_bias & MDS_CREATE_VOLATILE && (int)op_data->op_mds != -1) { - int i; tgt = lmv_get_target(lmv, op_data->op_mds, NULL); if (IS_ERR(tgt)) return tgt; - if (lsm != NULL) { + if (lsm) { + int i; + /* refill the right parent fid */ for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - struct lmv_oinfo *oinfo; - oinfo = &lsm->lsm_md_oinfo[i]; if (oinfo->lmo_mds == op_data->op_mds) { *fid = oinfo->lmo_fid; @@ -1588,22 +1653,21 @@ lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, if (i == lsm->lsm_md_stripe_count) *fid = lsm->lsm_md_oinfo[0].lmo_fid; } + } else if (lmv_is_dir_bad_hash(lsm)) { + LASSERT(op_data->op_stripe_index < lsm->lsm_md_stripe_count); + oinfo = &lsm->lsm_md_oinfo[op_data->op_stripe_index]; - return tgt; - } - - if (lsm == NULL || op_data->op_namelen == 0) { - tgt = lmv_find_target(lmv, fid); - if (IS_ERR(tgt)) - return tgt; - - op_data->op_mds = tgt->ltd_idx; - return tgt; + *fid = oinfo->lmo_fid; + op_data->op_mds = oinfo->lmo_mds; + tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL); + } else { + tgt = __lmv_locate_tgt(lmv, lsm, op_data->op_name, + op_data->op_namelen, fid, + &op_data->op_mds, + op_data->op_post_migrate); } - return lmv_locate_target_for_name(lmv, lsm, op_data->op_name, - op_data->op_namelen, fid, - &op_data->op_mds); + return tgt; } int lmv_create(struct obd_export *exp, struct md_op_data *op_data, @@ -1620,7 +1684,33 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, if (!lmv->desc.ld_active_tgt_count) RETURN(-EIO); - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); + if (lmv_is_dir_bad_hash(op_data->op_mea1)) + RETURN(-EBADF); + + if (lmv_is_dir_migrating(op_data->op_mea1)) { + /* + * if parent is migrating, create() needs to lookup existing + * name, to avoid creating new file under old layout of + * migrating directory, check old layout here. + */ + tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + + rc = md_getattr_name(tgt->ltd_exp, op_data, request); + if (!rc) { + ptlrpc_req_finished(*request); + *request = NULL; + RETURN(-EEXIST); + } + + if (rc != -ENOENT) + RETURN(rc); + + op_data->op_post_migrate = true; + } + + tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); @@ -1631,6 +1721,7 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data, rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); if (rc) RETURN(rc); + if (exp_connect_flags(exp) & OBD_CONNECT_DIR_STRIPE) { /* Send the create request to the MDT where the object * will be located */ @@ -1670,7 +1761,7 @@ lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, CDEBUG(D_INODE, "ENQUEUE on "DFID"\n", PFID(&op_data->op_fid1)); - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); + tgt = lmv_find_target(lmv, &op_data->op_fid1); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); @@ -1683,19 +1774,20 @@ lmv_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, RETURN(rc); } -static int +int lmv_getattr_name(struct obd_export *exp,struct md_op_data *op_data, struct ptlrpc_request **preq) { - struct ptlrpc_request *req = NULL; - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt; - struct mdt_body *body; - int rc; + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt; + struct mdt_body *body; + int rc; + ENTRY; - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); +retry: + tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); @@ -1704,31 +1796,28 @@ lmv_getattr_name(struct obd_export *exp,struct md_op_data *op_data, PFID(&op_data->op_fid1), tgt->ltd_idx); rc = md_getattr_name(tgt->ltd_exp, op_data, preq); - if (rc != 0) + if (rc == -ENOENT && lmv_dir_retry_check_update(op_data)) { + ptlrpc_req_finished(*preq); + *preq = NULL; + goto retry; + } + + if (rc) RETURN(rc); body = req_capsule_server_get(&(*preq)->rq_pill, &RMF_MDT_BODY); LASSERT(body != NULL); if (body->mbo_valid & OBD_MD_MDS) { - struct lu_fid rid = body->mbo_fid1; - CDEBUG(D_INODE, "Request attrs for "DFID"\n", - PFID(&rid)); - - tgt = lmv_find_target(lmv, &rid); - if (IS_ERR(tgt)) { - ptlrpc_req_finished(*preq); - preq = NULL; - RETURN(PTR_ERR(tgt)); - } - - op_data->op_fid1 = rid; + op_data->op_fid1 = body->mbo_fid1; op_data->op_valid |= OBD_MD_FLCROSSREF; op_data->op_namelen = 0; op_data->op_name = NULL; - rc = md_getattr_name(tgt->ltd_exp, op_data, &req); + ptlrpc_req_finished(*preq); - *preq = req; + *preq = NULL; + + goto retry; } RETURN(rc); @@ -1798,19 +1887,40 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data, op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); op_data->op_cap = cfs_curproc_cap_pack(); - if (op_data->op_mea2 != NULL) { - struct lmv_stripe_md *lsm = op_data->op_mea2; - const struct lmv_oinfo *oinfo; - oinfo = lsm_name_to_stripe_info(lsm, op_data->op_name, - op_data->op_namelen); - if (IS_ERR(oinfo)) - RETURN(PTR_ERR(oinfo)); + if (lmv_is_dir_migrating(op_data->op_mea2)) { + struct lu_fid fid1 = op_data->op_fid1; + struct lmv_stripe_md *lsm1 = op_data->op_mea1; - op_data->op_fid2 = oinfo->lmo_fid; + /* + * avoid creating new file under old layout of migrating + * directory, check it here. + */ + tgt = __lmv_locate_tgt(lmv, op_data->op_mea2, op_data->op_name, + op_data->op_namelen, &op_data->op_fid2, + &op_data->op_mds, false); + tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + + op_data->op_fid1 = op_data->op_fid2; + op_data->op_mea1 = op_data->op_mea2; + rc = md_getattr_name(tgt->ltd_exp, op_data, request); + op_data->op_fid1 = fid1; + op_data->op_mea1 = lsm1; + if (!rc) { + ptlrpc_req_finished(*request); + *request = NULL; + RETURN(-EEXIST); + } + + if (rc != -ENOENT) + RETURN(rc); } - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid2); + tgt = __lmv_locate_tgt(lmv, op_data->op_mea2, op_data->op_name, + op_data->op_namelen, &op_data->op_fid2, + &op_data->op_mds, true); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); @@ -1828,158 +1938,323 @@ static int lmv_link(struct obd_export *exp, struct md_op_data *op_data, RETURN(rc); } -static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, - const char *old, size_t oldlen, - const char *new, size_t newlen, - struct ptlrpc_request **request) +static int lmv_migrate(struct obd_export *exp, struct md_op_data *op_data, + const char *name, size_t namelen, + struct ptlrpc_request **request) { - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *src_tgt; - struct lmv_tgt_desc *tgt_tgt; - struct obd_export *target_exp; - struct mdt_body *body; - int rc; + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_stripe_md *lsm = op_data->op_mea1; + struct lmv_tgt_desc *parent_tgt; + struct lmv_tgt_desc *sp_tgt; + struct lmv_tgt_desc *tp_tgt = NULL; + struct lmv_tgt_desc *child_tgt; + struct lmv_tgt_desc *tgt; + struct lu_fid target_fid; + int rc; + ENTRY; - LASSERT(oldlen != 0); + LASSERT(op_data->op_cli_flags & CLI_MIGRATE); - CDEBUG(D_INODE, "RENAME %.*s in "DFID":%d to %.*s in "DFID":%d\n", - (int)oldlen, old, PFID(&op_data->op_fid1), - op_data->op_mea1 ? op_data->op_mea1->lsm_md_stripe_count : 0, - (int)newlen, new, PFID(&op_data->op_fid2), - op_data->op_mea2 ? op_data->op_mea2->lsm_md_stripe_count : 0); + CDEBUG(D_INODE, "MIGRATE "DFID"/%.*s\n", + PFID(&op_data->op_fid1), (int)namelen, name); op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); op_data->op_cap = cfs_curproc_cap_pack(); - if (op_data->op_cli_flags & CLI_MIGRATE) { - LASSERTF(fid_is_sane(&op_data->op_fid3), "invalid FID "DFID"\n", - PFID(&op_data->op_fid3)); - - if (op_data->op_mea1 != NULL) { - struct lmv_stripe_md *lsm = op_data->op_mea1; - struct lmv_tgt_desc *tmp; - - /* Fix the parent fid for striped dir */ - tmp = lmv_locate_target_for_name(lmv, lsm, old, - oldlen, - &op_data->op_fid1, - NULL); - if (IS_ERR(tmp)) - RETURN(PTR_ERR(tmp)); - } - - rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); - if (rc != 0) - RETURN(rc); - src_tgt = lmv_find_target(lmv, &op_data->op_fid3); - if (IS_ERR(src_tgt)) - RETURN(PTR_ERR(src_tgt)); + parent_tgt = lmv_find_target(lmv, &op_data->op_fid1); + if (IS_ERR(parent_tgt)) + RETURN(PTR_ERR(parent_tgt)); - target_exp = src_tgt->ltd_exp; - } else { - if (op_data->op_mea1 != NULL) { - struct lmv_stripe_md *lsm = op_data->op_mea1; + if (lsm) { + __u32 hash_type = lsm->lsm_md_hash_type; + __u32 stripe_count = lsm->lsm_md_stripe_count; - src_tgt = lmv_locate_target_for_name(lmv, lsm, old, - oldlen, - &op_data->op_fid1, - &op_data->op_mds); - } else { - src_tgt = lmv_find_target(lmv, &op_data->op_fid1); + /* + * old stripes are appended after new stripes for migrating + * directory. + */ + if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION) { + hash_type = lsm->lsm_md_migrate_hash; + stripe_count -= lsm->lsm_md_migrate_offset; } - if (IS_ERR(src_tgt)) - RETURN(PTR_ERR(src_tgt)); + rc = lmv_name_to_stripe_index(hash_type, stripe_count, name, + namelen); + if (rc < 0) + RETURN(rc); - if (op_data->op_mea2 != NULL) { - struct lmv_stripe_md *lsm = op_data->op_mea2; + if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION) + rc += lsm->lsm_md_migrate_offset; - tgt_tgt = lmv_locate_target_for_name(lmv, lsm, new, - newlen, - &op_data->op_fid2, - &op_data->op_mds); - } else { - tgt_tgt = lmv_find_target(lmv, &op_data->op_fid2); + /* save it in fid4 temporarily for early cancel */ + op_data->op_fid4 = lsm->lsm_md_oinfo[rc].lmo_fid; + sp_tgt = lmv_get_target(lmv, lsm->lsm_md_oinfo[rc].lmo_mds, + NULL); + if (IS_ERR(sp_tgt)) + RETURN(PTR_ERR(sp_tgt)); + /* + * if parent is being migrated too, fill op_fid2 with target + * stripe fid, otherwise the target stripe is not created yet. + */ + if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION) { + hash_type = lsm->lsm_md_hash_type & + ~LMV_HASH_FLAG_MIGRATION; + stripe_count = lsm->lsm_md_migrate_offset; + + rc = lmv_name_to_stripe_index(hash_type, stripe_count, + name, namelen); + if (rc < 0) + RETURN(rc); + + op_data->op_fid2 = lsm->lsm_md_oinfo[rc].lmo_fid; + tp_tgt = lmv_get_target(lmv, + lsm->lsm_md_oinfo[rc].lmo_mds, + NULL); + if (IS_ERR(tp_tgt)) + RETURN(PTR_ERR(tp_tgt)); } - if (IS_ERR(tgt_tgt)) - RETURN(PTR_ERR(tgt_tgt)); - - target_exp = tgt_tgt->ltd_exp; + } else { + sp_tgt = parent_tgt; } - /* - * LOOKUP lock on src child (fid3) should also be cancelled for - * src_tgt in mdc_rename. - */ - op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3; - - /* - * Cancel UPDATE locks on tgt parent (fid2), tgt_tgt is its - * own target. - */ - rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx, - LCK_EX, MDS_INODELOCK_UPDATE, - MF_MDC_CANCEL_FID2); + child_tgt = lmv_find_target(lmv, &op_data->op_fid3); + if (IS_ERR(child_tgt)) + RETURN(PTR_ERR(child_tgt)); - if (rc != 0) + if (!S_ISDIR(op_data->op_mode) && tp_tgt) + rc = __lmv_fid_alloc(lmv, &target_fid, tp_tgt->ltd_idx); + else + rc = lmv_fid_alloc(NULL, exp, &target_fid, op_data); + if (rc) RETURN(rc); + /* - * Cancel LOOKUP locks on source child (fid3) for parent tgt_tgt. + * for directory, send migrate request to the MDT where the object will + * be migrated to, because we can't create a striped directory remotely. + * + * otherwise, send to the MDT where source is located because regular + * file may open lease. + * + * NB. if MDT doesn't support DIR_MIGRATE, send to source MDT too for + * backward compatibility. */ - if (fid_is_sane(&op_data->op_fid3)) { - struct lmv_tgt_desc *tgt; - - tgt = lmv_find_target(lmv, &op_data->op_fid1); + if (S_ISDIR(op_data->op_mode) && + (exp_connect_flags2(exp) & OBD_CONNECT2_DIR_MIGRATE)) { + tgt = lmv_find_target(lmv, &target_fid); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); + } else { + tgt = child_tgt; + } + + /* cancel UPDATE lock of parent master object */ + rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_idx, LCK_EX, + MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1); + if (rc) + RETURN(rc); - /* Cancel LOOKUP lock on its parent */ - rc = lmv_early_cancel(exp, tgt, op_data, src_tgt->ltd_idx, + /* cancel UPDATE lock of source parent */ + if (sp_tgt != parent_tgt) { + /* + * migrate RPC packs master object FID, because we can only pack + * two FIDs in reint RPC, but MDS needs to know both source + * parent and target parent, and it will obtain them from master + * FID and LMV, the other FID in RPC is kept for target. + * + * since this FID is not passed to MDC, cancel it anyway. + */ + rc = lmv_early_cancel(exp, sp_tgt, op_data, -1, LCK_EX, + MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID4); + if (rc) + RETURN(rc); + + op_data->op_flags &= ~MF_MDC_CANCEL_FID4; + } + op_data->op_fid4 = target_fid; + + /* cancel UPDATE locks of target parent */ + rc = lmv_early_cancel(exp, tp_tgt, op_data, tgt->ltd_idx, LCK_EX, + MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID2); + if (rc) + RETURN(rc); + + /* cancel LOOKUP lock of source if source is remote object */ + if (child_tgt != sp_tgt) { + rc = lmv_early_cancel(exp, sp_tgt, op_data, tgt->ltd_idx, LCK_EX, MDS_INODELOCK_LOOKUP, MF_MDC_CANCEL_FID3); - if (rc != 0) + if (rc) RETURN(rc); + } - rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx, - LCK_EX, MDS_INODELOCK_ELC, - MF_MDC_CANCEL_FID3); - if (rc != 0) + /* cancel ELC locks of source */ + rc = lmv_early_cancel(exp, child_tgt, op_data, tgt->ltd_idx, LCK_EX, + MDS_INODELOCK_ELC, MF_MDC_CANCEL_FID3); + if (rc) + RETURN(rc); + + rc = md_rename(tgt->ltd_exp, op_data, name, namelen, NULL, 0, request); + + RETURN(rc); +} + +static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data, + const char *old, size_t oldlen, + const char *new, size_t newlen, + struct ptlrpc_request **request) +{ + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *sp_tgt; + struct lmv_tgt_desc *tp_tgt = NULL; + struct lmv_tgt_desc *src_tgt = NULL; + struct lmv_tgt_desc *tgt; + struct mdt_body *body; + int rc; + + ENTRY; + + LASSERT(oldlen != 0); + + if (op_data->op_cli_flags & CLI_MIGRATE) { + rc = lmv_migrate(exp, op_data, old, oldlen, request); + RETURN(rc); + } + + op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); + op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); + op_data->op_cap = cfs_curproc_cap_pack(); + + if (lmv_is_dir_migrating(op_data->op_mea2)) { + struct lu_fid fid1 = op_data->op_fid1; + struct lmv_stripe_md *lsm1 = op_data->op_mea1; + + /* + * we avoid creating new file under old layout of migrating + * directory, if there is an existing file with new name under + * old layout, we can't unlink file in old layout and rename to + * new layout in one transaction, so return -EBUSY here.` + */ + tgt = __lmv_locate_tgt(lmv, op_data->op_mea2, new, newlen, + &op_data->op_fid2, &op_data->op_mds, + false); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + + op_data->op_fid1 = op_data->op_fid2; + op_data->op_mea1 = op_data->op_mea2; + op_data->op_name = new; + op_data->op_namelen = newlen; + rc = md_getattr_name(tgt->ltd_exp, op_data, request); + op_data->op_fid1 = fid1; + op_data->op_mea1 = lsm1; + op_data->op_name = NULL; + op_data->op_namelen = 0; + if (!rc) { + ptlrpc_req_finished(*request); + *request = NULL; + RETURN(-EBUSY); + } + + if (rc != -ENOENT) RETURN(rc); } -retry_rename: - /* - * Cancel all the locks on tgt child (fid4). - */ + /* rename to new layout for migrating directory */ + tp_tgt = __lmv_locate_tgt(lmv, op_data->op_mea2, new, newlen, + &op_data->op_fid2, &op_data->op_mds, true); + if (IS_ERR(tp_tgt)) + RETURN(PTR_ERR(tp_tgt)); + + /* Since the target child might be destroyed, and it might become + * orphan, and we can only check orphan on the local MDT right now, so + * we send rename request to the MDT where target child is located. If + * target child does not exist, then it will send the request to the + * target parent */ + if (fid_is_sane(&op_data->op_fid4)) { + tgt = lmv_find_target(lmv, &op_data->op_fid4); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + } else { + tgt = tp_tgt; + } + + op_data->op_flags |= MF_MDC_CANCEL_FID4; + + /* cancel UPDATE locks of target parent */ + rc = lmv_early_cancel(exp, tp_tgt, op_data, tgt->ltd_idx, LCK_EX, + MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID2); + if (rc != 0) + RETURN(rc); + if (fid_is_sane(&op_data->op_fid4)) { - struct lmv_tgt_desc *tgt; + /* cancel LOOKUP lock of target on target parent */ + if (tgt != tp_tgt) { + rc = lmv_early_cancel(exp, tp_tgt, op_data, + tgt->ltd_idx, LCK_EX, + MDS_INODELOCK_LOOKUP, + MF_MDC_CANCEL_FID4); + if (rc != 0) + RETURN(rc); + } + } - rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx, + if (fid_is_sane(&op_data->op_fid3)) { + src_tgt = lmv_find_target(lmv, &op_data->op_fid3); + if (IS_ERR(src_tgt)) + RETURN(PTR_ERR(src_tgt)); + + /* cancel ELC locks of source */ + rc = lmv_early_cancel(exp, src_tgt, op_data, tgt->ltd_idx, LCK_EX, MDS_INODELOCK_ELC, - MF_MDC_CANCEL_FID4); + MF_MDC_CANCEL_FID3); if (rc != 0) RETURN(rc); + } - tgt = lmv_find_target(lmv, &op_data->op_fid4); - if (IS_ERR(tgt)) - RETURN(PTR_ERR(tgt)); +retry: + sp_tgt = __lmv_locate_tgt(lmv, op_data->op_mea1, old, oldlen, + &op_data->op_fid1, &op_data->op_mds, + op_data->op_post_migrate); + if (IS_ERR(sp_tgt)) + RETURN(PTR_ERR(sp_tgt)); + + /* cancel UPDATE locks of source parent */ + rc = lmv_early_cancel(exp, sp_tgt, op_data, tgt->ltd_idx, LCK_EX, + MDS_INODELOCK_UPDATE, MF_MDC_CANCEL_FID1); + if (rc != 0) + RETURN(rc); - /* Since the target child might be destroyed, and it might - * become orphan, and we can only check orphan on the local - * MDT right now, so we send rename request to the MDT where - * target child is located. If target child does not exist, - * then it will send the request to the target parent */ - target_exp = tgt->ltd_exp; + if (fid_is_sane(&op_data->op_fid3)) { + /* cancel LOOKUP lock of source on source parent */ + if (src_tgt != sp_tgt) { + rc = lmv_early_cancel(exp, sp_tgt, op_data, + tgt->ltd_idx, LCK_EX, + MDS_INODELOCK_LOOKUP, + MF_MDC_CANCEL_FID3); + if (rc != 0) + RETURN(rc); + } } - rc = md_rename(target_exp, op_data, old, oldlen, new, newlen, - request); +rename: + CDEBUG(D_INODE, "RENAME "DFID"/%.*s to "DFID"/%.*s\n", + PFID(&op_data->op_fid1), (int)oldlen, old, + PFID(&op_data->op_fid2), (int)newlen, new); + + rc = md_rename(tgt->ltd_exp, op_data, old, oldlen, new, newlen, + request); + if (rc == -ENOENT && lmv_dir_retry_check_update(op_data)) { + ptlrpc_req_finished(*request); + *request = NULL; + goto retry; + } - if (rc != 0 && rc != -EXDEV) + if (rc && rc != -EXDEV) RETURN(rc); body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); @@ -1990,13 +2265,28 @@ retry_rename: if (likely(!(body->mbo_valid & OBD_MD_MDS))) RETURN(rc); - CDEBUG(D_INODE, "%s: try rename to another MDT for "DFID"\n", - exp->exp_obd->obd_name, PFID(&body->mbo_fid1)); - op_data->op_fid4 = body->mbo_fid1; + ptlrpc_req_finished(*request); *request = NULL; - goto retry_rename; + + tgt = lmv_find_target(lmv, &op_data->op_fid4); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + + if (fid_is_sane(&op_data->op_fid4)) { + /* cancel LOOKUP lock of target on target parent */ + if (tgt != tp_tgt) { + rc = lmv_early_cancel(exp, tp_tgt, op_data, + tgt->ltd_idx, LCK_EX, + MDS_INODELOCK_LOOKUP, + MF_MDC_CANCEL_FID4); + if (rc != 0) + RETURN(rc); + } + } + + goto rename; } static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, @@ -2008,8 +2298,9 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, int rc = 0; ENTRY; - CDEBUG(D_INODE, "SETATTR for "DFID", valid 0x%x\n", - PFID(&op_data->op_fid1), op_data->op_attr.ia_valid); + CDEBUG(D_INODE, "SETATTR for "DFID", valid 0x%x/0x%x\n", + PFID(&op_data->op_fid1), op_data->op_attr.ia_valid, + op_data->op_xvalid); op_data->op_flags |= MF_MDC_CANCEL_FID1; tgt = lmv_find_target(lmv, &op_data->op_fid1); @@ -2054,7 +2345,7 @@ struct lmv_dir_ctxt { struct stripe_dirent ldc_stripes[0]; }; -static inline void put_stripe_dirent(struct stripe_dirent *stripe) +static inline void stripe_dirent_unload(struct stripe_dirent *stripe) { if (stripe->sd_page) { kunmap(stripe->sd_page); @@ -2069,58 +2360,78 @@ static inline void put_lmv_dir_ctxt(struct lmv_dir_ctxt *ctxt) int i; for (i = 0; i < ctxt->ldc_count; i++) - put_stripe_dirent(&ctxt->ldc_stripes[i]); + stripe_dirent_unload(&ctxt->ldc_stripes[i]); +} + +/* if @ent is dummy, or . .., get next */ +static struct lu_dirent *stripe_dirent_get(struct lmv_dir_ctxt *ctxt, + struct lu_dirent *ent, + int stripe_index) +{ + for (; ent; ent = lu_dirent_next(ent)) { + /* Skip dummy entry */ + if (le16_to_cpu(ent->lde_namelen) == 0) + continue; + + /* skip . and .. for other stripes */ + if (stripe_index && + (strncmp(ent->lde_name, ".", + le16_to_cpu(ent->lde_namelen)) == 0 || + strncmp(ent->lde_name, "..", + le16_to_cpu(ent->lde_namelen)) == 0)) + continue; + + if (le64_to_cpu(ent->lde_hash) >= ctxt->ldc_hash) + break; + } + + return ent; } -static struct lu_dirent *stripe_dirent_next(struct lmv_dir_ctxt *ctxt, +static struct lu_dirent *stripe_dirent_load(struct lmv_dir_ctxt *ctxt, struct stripe_dirent *stripe, int stripe_index) { + struct md_op_data *op_data = ctxt->ldc_op_data; + struct lmv_oinfo *oinfo; + struct lu_fid fid = op_data->op_fid1; + struct inode *inode = op_data->op_data; + struct lmv_tgt_desc *tgt; struct lu_dirent *ent = stripe->sd_ent; __u64 hash = ctxt->ldc_hash; - __u64 end; int rc = 0; + ENTRY; LASSERT(stripe == &ctxt->ldc_stripes[stripe_index]); - - if (stripe->sd_eof) - RETURN(NULL); - - if (ent) { - ent = lu_dirent_next(ent); - if (!ent) { -check_eof: - end = le64_to_cpu(stripe->sd_dp->ldp_hash_end); - LASSERTF(hash <= end, "hash %llx end %llx\n", - hash, end); + LASSERT(!ent); + + do { + if (stripe->sd_page) { + __u64 end = le64_to_cpu(stripe->sd_dp->ldp_hash_end); + + /* @hash should be the last dirent hash */ + LASSERTF(hash <= end, + "ctxt@%p stripe@%p hash %llx end %llx\n", + ctxt, stripe, hash, end); + /* unload last page */ + stripe_dirent_unload(stripe); + /* eof */ if (end == MDS_DIR_END_OFF) { - stripe->sd_ent = NULL; stripe->sd_eof = true; - RETURN(NULL); + break; } - - put_stripe_dirent(stripe); hash = end; } - } - - if (!ent) { - struct md_op_data *op_data = ctxt->ldc_op_data; - struct lmv_oinfo *oinfo; - struct lu_fid fid = op_data->op_fid1; - struct inode *inode = op_data->op_data; - struct lmv_tgt_desc *tgt; - - LASSERT(!stripe->sd_page); oinfo = &op_data->op_mea1->lsm_md_oinfo[stripe_index]; tgt = lmv_get_target(ctxt->ldc_lmv, oinfo->lmo_mds, NULL); - if (IS_ERR(tgt)) - GOTO(out, rc = PTR_ERR(tgt)); + if (IS_ERR(tgt)) { + rc = PTR_ERR(tgt); + break; + } - /* op_data will be shared by each stripe, so we need - * reset these value for each stripe */ + /* op_data is shared by stripes, reset after use */ op_data->op_fid1 = oinfo->lmo_fid; op_data->op_fid2 = oinfo->lmo_fid; op_data->op_data = oinfo->lmo_root; @@ -2133,45 +2444,26 @@ check_eof: op_data->op_data = inode; if (rc) - GOTO(out, rc); - - stripe->sd_dp = page_address(stripe->sd_page); - ent = lu_dirent_start(stripe->sd_dp); - } - - for (; ent; ent = lu_dirent_next(ent)) { - /* Skip dummy entry */ - if (le16_to_cpu(ent->lde_namelen) == 0) - continue; - - /* skip . and .. for other stripes */ - if (stripe_index && - (strncmp(ent->lde_name, ".", - le16_to_cpu(ent->lde_namelen)) == 0 || - strncmp(ent->lde_name, "..", - le16_to_cpu(ent->lde_namelen)) == 0)) - continue; - - if (le64_to_cpu(ent->lde_hash) >= hash) break; - } - if (!ent) - goto check_eof; - EXIT; + stripe->sd_dp = page_address(stripe->sd_page); + ent = stripe_dirent_get(ctxt, lu_dirent_start(stripe->sd_dp), + stripe_index); + /* in case a page filled with ., .. and dummy, read next */ + } while (!ent); -out: stripe->sd_ent = ent; - /* treat error as eof, so dir can be partially accessed */ if (rc) { - put_stripe_dirent(stripe); + LASSERT(!ent); + /* treat error as eof, so dir can be partially accessed */ stripe->sd_eof = true; LCONSOLE_WARN("dir "DFID" stripe %d readdir failed: %d, " "directory is partially accessed!\n", PFID(&ctxt->ldc_op_data->op_fid1), stripe_index, rc); } - return ent; + + RETURN(ent); } static int lmv_file_resync(struct obd_export *exp, struct md_op_data *data) @@ -2224,8 +2516,7 @@ static struct lu_dirent *lmv_dirent_next(struct lmv_dir_ctxt *ctxt) continue; if (!stripe->sd_ent) { - /* locate starting entry */ - stripe_dirent_next(ctxt, stripe, i); + stripe_dirent_load(ctxt, stripe, i); if (!stripe->sd_ent) { LASSERT(stripe->sd_eof); continue; @@ -2246,7 +2537,8 @@ static struct lu_dirent *lmv_dirent_next(struct lmv_dir_ctxt *ctxt) stripe = &ctxt->ldc_stripes[min]; ent = stripe->sd_ent; /* pop found dirent */ - stripe_dirent_next(ctxt, stripe, min); + stripe->sd_ent = stripe_dirent_get(ctxt, lu_dirent_next(ent), + min); } return ent; @@ -2438,68 +2730,34 @@ int lmv_read_page(struct obd_export *exp, struct md_op_data *op_data, * negative errno if failed. */ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) + struct ptlrpc_request **request) { - struct obd_device *obd = exp->exp_obd; - struct lmv_obd *lmv = &obd->u.lmv; - struct lmv_tgt_desc *tgt = NULL; - struct lmv_tgt_desc *parent_tgt = NULL; - struct mdt_body *body; - int rc; - int stripe_index = 0; - struct lmv_stripe_md *lsm = op_data->op_mea1; - ENTRY; - -retry_unlink: - /* For striped dir, we need to locate the parent as well */ - if (lsm != NULL) { - struct lmv_tgt_desc *tmp; - - LASSERT(op_data->op_name != NULL && - op_data->op_namelen != 0); - - tmp = lmv_locate_target_for_name(lmv, lsm, - op_data->op_name, - op_data->op_namelen, - &op_data->op_fid1, - &op_data->op_mds); - - /* return -EBADFD means unknown hash type, might - * need try all sub-stripe here */ - if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD) - RETURN(PTR_ERR(tmp)); - - /* Note: both migrating dir and unknown hash dir need to - * try all of sub-stripes, so we need start search the - * name from stripe 0, but migrating dir is already handled - * inside lmv_locate_target_for_name(), so we only check - * unknown hash type directory here */ - if (!lmv_is_known_hash_type(lsm->lsm_md_hash_type)) { - struct lmv_oinfo *oinfo; - - oinfo = &lsm->lsm_md_oinfo[stripe_index]; - - op_data->op_fid1 = oinfo->lmo_fid; - op_data->op_mds = oinfo->lmo_mds; - } - } - -try_next_stripe: - /* Send unlink requests to the MDT where the child is located */ - if (likely(!fid_is_zero(&op_data->op_fid2))) - tgt = lmv_find_target(lmv, &op_data->op_fid2); - else if (lsm != NULL) - tgt = lmv_get_target(lmv, op_data->op_mds, NULL); - else - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); + struct obd_device *obd = exp->exp_obd; + struct lmv_obd *lmv = &obd->u.lmv; + struct lmv_tgt_desc *tgt; + struct lmv_tgt_desc *parent_tgt; + struct mdt_body *body; + int rc; - if (IS_ERR(tgt)) - RETURN(PTR_ERR(tgt)); + ENTRY; op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); op_data->op_cap = cfs_curproc_cap_pack(); +retry: + parent_tgt = lmv_locate_tgt(lmv, op_data, &op_data->op_fid1); + if (IS_ERR(parent_tgt)) + RETURN(PTR_ERR(parent_tgt)); + + if (likely(!fid_is_zero(&op_data->op_fid2))) { + tgt = lmv_find_target(lmv, &op_data->op_fid2); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + } else { + tgt = parent_tgt; + } + /* * If child's fid is given, cancel unused locks for it if it is from * another export than parent. @@ -2509,50 +2767,29 @@ try_next_stripe: */ op_data->op_flags |= MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID3; - /* - * Cancel FULL locks on child (fid3). - */ - parent_tgt = lmv_find_target(lmv, &op_data->op_fid1); - if (IS_ERR(parent_tgt)) - RETURN(PTR_ERR(parent_tgt)); - - if (parent_tgt != tgt) { + if (parent_tgt != tgt) rc = lmv_early_cancel(exp, parent_tgt, op_data, tgt->ltd_idx, LCK_EX, MDS_INODELOCK_LOOKUP, MF_MDC_CANCEL_FID3); - } rc = lmv_early_cancel(exp, NULL, op_data, tgt->ltd_idx, LCK_EX, MDS_INODELOCK_ELC, MF_MDC_CANCEL_FID3); - if (rc != 0) + if (rc) RETURN(rc); CDEBUG(D_INODE, "unlink with fid="DFID"/"DFID" -> mds #%u\n", PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx); rc = md_unlink(tgt->ltd_exp, op_data, request); - if (rc != 0 && rc != -EREMOTE && rc != -ENOENT) - RETURN(rc); - - /* Try next stripe if it is needed. */ - if (rc == -ENOENT && lsm != NULL && lmv_need_try_all_stripes(lsm)) { - struct lmv_oinfo *oinfo; - - stripe_index++; - if (stripe_index >= lsm->lsm_md_stripe_count) - RETURN(rc); - - oinfo = &lsm->lsm_md_oinfo[stripe_index]; - - op_data->op_fid1 = oinfo->lmo_fid; - op_data->op_mds = oinfo->lmo_mds; - + if (rc == -ENOENT && lmv_dir_retry_check_update(op_data)) { ptlrpc_req_finished(*request); *request = NULL; - - goto try_next_stripe; + goto retry; } + if (rc != -EREMOTE) + RETURN(rc); + body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); if (body == NULL) RETURN(-EPROTO); @@ -2561,40 +2798,23 @@ try_next_stripe: if (likely(!(body->mbo_valid & OBD_MD_MDS))) RETURN(rc); - CDEBUG(D_INODE, "%s: try unlink to another MDT for "DFID"\n", - exp->exp_obd->obd_name, PFID(&body->mbo_fid1)); - - /* This is a remote object, try remote MDT, Note: it may - * try more than 1 time here, Considering following case - * /mnt/lustre is root on MDT0, remote1 is on MDT1 - * 1. Initially A does not know where remote1 is, it send - * unlink RPC to MDT0, MDT0 return -EREMOTE, it will - * resend unlink RPC to MDT1 (retry 1st time). - * - * 2. During the unlink RPC in flight, - * client B mv /mnt/lustre/remote1 /mnt/lustre/remote2 - * and create new remote1, but on MDT0 - * - * 3. MDT1 get unlink RPC(from A), then do remote lock on - * /mnt/lustre, then lookup get fid of remote1, and find - * it is remote dir again, and replay -EREMOTE again. - * - * 4. Then A will resend unlink RPC to MDT0. (retry 2nd times). - * - * In theory, it might try unlimited time here, but it should - * be very rare case. */ + /* This is a remote object, try remote MDT. */ op_data->op_fid2 = body->mbo_fid1; ptlrpc_req_finished(*request); *request = NULL; - goto retry_unlink; + tgt = lmv_find_target(lmv, &op_data->op_fid2); + if (IS_ERR(tgt)) + RETURN(PTR_ERR(tgt)); + + goto retry; } static int lmv_precleanup(struct obd_device *obd) { ENTRY; libcfs_kkuc_group_rem(&obd->obd_uuid, 0, KUC_GRP_HSM); - fld_client_proc_fini(&obd->u.lmv.lmv_fld); + fld_client_debugfs_fini(&obd->u.lmv.lmv_fld); lprocfs_obd_cleanup(obd); lprocfs_free_md_stats(obd); RETURN(0); @@ -2744,13 +2964,15 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm, else lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type); lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version); + lsm->lsm_md_migrate_offset = le32_to_cpu(lmm1->lmv_migrate_offset); + lsm->lsm_md_migrate_hash = le32_to_cpu(lmm1->lmv_migrate_hash); cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name, sizeof(lsm->lsm_md_pool_name)); if (cplen >= sizeof(lsm->lsm_md_pool_name)) RETURN(-E2BIG); - CDEBUG(D_INFO, "unpack lsm count %d, master %d hash_type %d" + CDEBUG(D_INFO, "unpack lsm count %d, master %d hash_type %#x " "layout_version %d\n", lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index, lsm->lsm_md_hash_type, lsm->lsm_md_layout_version); @@ -2785,14 +3007,9 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, /* Free memmd */ if (lsm != NULL && lmm == NULL) { int i; - for (i = 0; i < lsm->lsm_md_stripe_count; i++) { - /* For migrating inode, the master stripe and master - * object will be the same, so do not need iput, see - * ll_update_lsm_md */ - if (!(lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && - i == 0) && lsm->lsm_md_oinfo[i].lmo_root != NULL) - iput(lsm->lsm_md_oinfo[i].lmo_root); - } + + for (i = 0; i < lsm->lsm_md_stripe_count; i++) + iput(lsm->lsm_md_oinfo[i].lmo_root); lsm_size = lmv_stripe_md_size(lsm->lsm_md_stripe_count); OBD_FREE(lsm, lsm_size); *lsmp = NULL; @@ -3012,7 +3229,7 @@ int lmv_intent_getattr_async(struct obd_export *exp, if (!fid_is_sane(&op_data->op_fid2)) RETURN(-EINVAL); - tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); + tgt = lmv_find_target(lmv, &op_data->op_fid1); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); @@ -3053,7 +3270,7 @@ int lmv_get_fid_from_lsm(struct obd_export *exp, const struct lmv_oinfo *oinfo; LASSERT(lsm != NULL); - oinfo = lsm_name_to_stripe_info(lsm, name, namelen); + oinfo = lsm_name_to_stripe_info(lsm, name, namelen, false); if (IS_ERR(oinfo)) return PTR_ERR(oinfo); @@ -3130,12 +3347,12 @@ static int lmv_merge_attr(struct obd_export *exp, for (i = 0; i < lsm->lsm_md_stripe_count; i++) { struct inode *inode = lsm->lsm_md_oinfo[i].lmo_root; - CDEBUG(D_INFO, ""DFID" size %llu, blocks %llu nlink %u," - " atime %lu ctime %lu, mtime %lu.\n", + CDEBUG(D_INFO, + "" DFID " size %llu, blocks %llu nlink %u, atime %lld ctime %lld, mtime %lld.\n", PFID(&lsm->lsm_md_oinfo[i].lmo_fid), i_size_read(inode), (unsigned long long)inode->i_blocks, - inode->i_nlink, LTIME_S(inode->i_atime), - LTIME_S(inode->i_ctime), LTIME_S(inode->i_mtime)); + inode->i_nlink, (s64)inode->i_atime.tv_sec, + (s64)inode->i_ctime.tv_sec, (s64)inode->i_mtime.tv_sec); /* for slave stripe, it needs to subtract nlink for . and .. */ if (i != 0) @@ -3146,14 +3363,14 @@ static int lmv_merge_attr(struct obd_export *exp, attr->cat_size += i_size_read(inode); attr->cat_blocks += inode->i_blocks; - if (attr->cat_atime < LTIME_S(inode->i_atime)) - attr->cat_atime = LTIME_S(inode->i_atime); + if (attr->cat_atime < inode->i_atime.tv_sec) + attr->cat_atime = inode->i_atime.tv_sec; - if (attr->cat_ctime < LTIME_S(inode->i_ctime)) - attr->cat_ctime = LTIME_S(inode->i_ctime); + if (attr->cat_ctime < inode->i_ctime.tv_sec) + attr->cat_ctime = inode->i_ctime.tv_sec; - if (attr->cat_mtime < LTIME_S(inode->i_mtime)) - attr->cat_mtime = LTIME_S(inode->i_mtime); + if (attr->cat_mtime < inode->i_mtime.tv_sec) + attr->cat_mtime = inode->i_mtime.tv_sec; } return 0; }