X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdt%2Fmdt_reint.c;h=45cdfb0bd7eaac19c146ee50d725b5e8fd3d6f88;hp=670beef7deee88fce12413decafa115e62feaf40;hb=05e6ccd344e7eba44e43230fa2fa0a1b3b6115c4;hpb=65e3e4050ec5bb371c1c343fca49a605286a086e diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 670beef..45cdfb0 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lustre/mdt/mdt_reint.c * @@ -45,6 +44,7 @@ #include #include "mdt_internal.h" #include +#include static inline void mdt_reint_init_ma(struct mdt_thread_info *info, struct md_attr *ma) @@ -353,6 +353,7 @@ static int mdt_restripe(struct mdt_thread_info *info, struct mdt_device *mdt = info->mti_mdt; struct lu_fid *fid = &info->mti_tmp_fid2; struct ldlm_enqueue_info *einfo = &info->mti_einfo[0]; + struct lmv_user_md *lum = spec->u.sp_ea.eadata; struct lmv_mds_md_v1 *lmv; struct mdt_object *child; struct mdt_lock_handle *lhp; @@ -364,6 +365,9 @@ static int mdt_restripe(struct mdt_thread_info *info, if (!mdt->mdt_enable_dir_restripe) RETURN(-EPERM); + LASSERT(lum); + lum->lum_hash_type |= cpu_to_le32(LMV_HASH_FLAG_FIXED); + rc = mdt_version_get_check_save(info, parent, 0); if (rc) RETURN(rc); @@ -382,7 +386,7 @@ static int mdt_restripe(struct mdt_thread_info *info, if (ma->ma_valid & MA_LMV) { /* don't allow restripe if parent dir layout is changing */ lmv = &ma->ma_lmv->lmv_md_v1; - if (!lmv_is_sane(lmv)) + if (!lmv_is_sane2(lmv)) GOTO(unlock_parent, rc = -EBADF); if (lmv_is_layout_changing(lmv)) @@ -413,6 +417,18 @@ static int mdt_restripe(struct mdt_thread_info *info, GOTO(out_child, rc = -EREMOTE); } + if (!S_ISDIR(lu_object_attr(&child->mot_obj))) + GOTO(out_child, rc = -ENOTDIR); + + rc = mdt_stripe_get(info, child, ma, XATTR_NAME_LMV); + if (rc) + GOTO(out_child, rc); + + /* race with migrate? */ + if ((ma->ma_valid & MA_LMV) && + lmv_is_migrating(&ma->ma_lmv->lmv_md_v1)) + GOTO(out_child, rc = -EBUSY); + /* lock object */ lhc = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_reg_init(lhc, LCK_EX); @@ -526,7 +542,7 @@ static int mdt_create(struct mdt_thread_info *info) LMV_HASH_TYPE_CRUSH) RETURN(-EPROTO); - if (!md_capable(uc, CAP_SYS_ADMIN) && + if (!cap_raised(uc->uc_cap, CAP_SYS_ADMIN) && uc->uc_gid != mdt->mdt_enable_remote_dir_gid && mdt->mdt_enable_remote_dir_gid != -1) RETURN(-EPERM); @@ -600,9 +616,6 @@ static int mdt_create(struct mdt_thread_info *info) if (rc) GOTO(put_child, rc); - /* Let lower layer know current lock mode. */ - info->mti_spec.sp_cr_mode = mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode); - /* * Do not perform lookup sanity check. We know that name does * not exist. @@ -734,7 +747,7 @@ static int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, if (rc != 0) GOTO(out_unlock, rc); - mdt_dom_obj_lvb_update(info->mti_env, mo, false); + mdt_dom_obj_lvb_update(info->mti_env, mo, NULL, false); EXIT; out_unlock: mdt_reint_striped_unlock(info, mo, lh, einfo, rc); @@ -751,7 +764,7 @@ int mdt_add_dirty_flag(struct mdt_thread_info *info, struct mdt_object *mo, struct md_attr *ma) { struct lu_ucred *uc = mdt_ucred(info); - cfs_cap_t cap_saved; + kernel_cap_t cap_saved; int rc; ENTRY; @@ -773,7 +786,7 @@ int mdt_add_dirty_flag(struct mdt_thread_info *info, struct mdt_object *mo, * set the HSM state to dirty. */ cap_saved = uc->uc_cap; - uc->uc_cap |= MD_CAP_TO_MASK(CAP_FOWNER); + cap_raise(uc->uc_cap, CAP_FOWNER); rc = mdt_hsm_attr_set(info, mo, &ma->ma_hsm); uc->uc_cap = cap_saved; if (rc) @@ -910,7 +923,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, !mdt->mdt_enable_striped_dir) GOTO(out_put, rc = -EPERM); - if (!md_capable(uc, CAP_SYS_ADMIN) && + if (!cap_raised(uc->uc_cap, CAP_SYS_ADMIN) && uc->uc_gid != mdt->mdt_enable_remote_dir_gid && mdt->mdt_enable_remote_dir_gid != -1) GOTO(out_put, rc = -EPERM); @@ -922,22 +935,56 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, if (ma->ma_attr.la_valid != 0) GOTO(out_put, rc = -EPROTO); + lh = &info->mti_lh[MDT_LH_PARENT]; + mdt_lock_reg_init(lh, LCK_PW); + if (ma->ma_valid & MA_LOV) { buf->lb_buf = ma->ma_lmm; buf->lb_len = ma->ma_lmm_size; name = XATTR_NAME_LOV; } else { struct lmv_user_md *lmu = &ma->ma_lmv->lmv_user_md; + struct lu_fid *pfid = &info->mti_tmp_fid1; + struct lu_name *pname = &info->mti_name; + const char dotdot[] = ".."; + struct mdt_object *pobj; buf->lb_buf = lmu; buf->lb_len = ma->ma_lmv_size; name = XATTR_NAME_DEFAULT_LMV; - /* force client to update dir default layout */ - lockpart |= MDS_INODELOCK_LOOKUP; - } - lh = &info->mti_lh[MDT_LH_PARENT]; - mdt_lock_reg_init(lh, LCK_PW); + if (fid_is_root(rr->rr_fid1)) { + lockpart |= MDS_INODELOCK_LOOKUP; + } else { + /* force client to update dir default layout */ + fid_zero(pfid); + pname->ln_name = dotdot; + pname->ln_namelen = sizeof(dotdot); + rc = mdo_lookup(info->mti_env, + mdt_object_child(mo), pname, + pfid, NULL); + if (rc) + GOTO(out_put, rc); + + pobj = mdt_object_find(info->mti_env, mdt, + pfid); + if (IS_ERR(pobj)) + GOTO(out_put, rc = PTR_ERR(pobj)); + + if (mdt_object_remote(pobj)) + rc = mdt_remote_object_lock(info, pobj, + mdt_object_fid(mo), + &lh->mlh_rreg_lh, LCK_EX, + MDS_INODELOCK_LOOKUP, false); + else + lockpart |= MDS_INODELOCK_LOOKUP; + + mdt_object_put(info->mti_env, pobj); + + if (rc) + GOTO(out_put, rc); + } + } rc = mdt_object_lock(info, mo, lh, lockpart); if (rc != 0) @@ -1085,31 +1132,36 @@ relock: if (rc != 0) GOTO(put_parent, rc); - /* lookup child object along with version checking */ - fid_zero(child_fid); - rc = mdt_lookup_version_check(info, mp, &rr->rr_name, child_fid, 1); - if (rc != 0) { - /* Name might not be able to find during resend of - * remote unlink, considering following case. - * dir_A is a remote directory, the name entry of - * dir_A is on MDT0, the directory is on MDT1, - * - * 1. client sends unlink req to MDT1. - * 2. MDT1 sends name delete update to MDT0. - * 3. name entry is being deleted in MDT0 synchronously. - * 4. MDT1 is restarted. - * 5. client resends unlink req to MDT1. So it can not - * find the name entry on MDT0 anymore. - * In this case, MDT1 only needs to destory the local - * directory. - */ - if (mdt_object_remote(mp) && rc == -ENOENT && - !fid_is_zero(rr->rr_fid2) && - lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { - no_name = 1; - *child_fid = *rr->rr_fid2; - } else { - GOTO(unlock_parent, rc); + if (info->mti_spec.sp_cr_flags & MDS_OP_WITH_FID) { + *child_fid = *rr->rr_fid2; + } else { + /* lookup child object along with version checking */ + fid_zero(child_fid); + rc = mdt_lookup_version_check(info, mp, &rr->rr_name, child_fid, + 1); + if (rc != 0) { + /* Name might not be able to find during resend of + * remote unlink, considering following case. + * dir_A is a remote directory, the name entry of + * dir_A is on MDT0, the directory is on MDT1, + * + * 1. client sends unlink req to MDT1. + * 2. MDT1 sends name delete update to MDT0. + * 3. name entry is being deleted in MDT0 synchronously. + * 4. MDT1 is restarted. + * 5. client resends unlink req to MDT1. So it can not + * find the name entry on MDT0 anymore. + * In this case, MDT1 only needs to destory the local + * directory. + */ + if (mdt_object_remote(mp) && rc == -ENOENT && + !fid_is_zero(rr->rr_fid2) && + lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + no_name = 1; + *child_fid = *rr->rr_fid2; + } else { + GOTO(unlock_parent, rc); + } } } @@ -1121,6 +1173,16 @@ relock: if (IS_ERR(mc)) GOTO(unlock_parent, rc = PTR_ERR(mc)); + if (info->mti_spec.sp_cr_flags & MDS_OP_WITH_FID) { + /* In this case, child fid is embedded in the request, and we do + * not have a proper name as rr_name contains an encoded + * hash. So find name that matches provided hash. + */ + if (!find_name_matching_hash(info, &rr->rr_name, + NULL, mc, false)) + GOTO(put_child, rc = -ENOENT); + } + if (!cos_incompat) { rc = mdt_object_striped(info, mc); if (rc < 0) @@ -1143,7 +1205,7 @@ relock: /* Return -ENOTSUPP for old client */ GOTO(put_child, rc = -ENOTSUPP); - if (!md_capable(uc, CAP_SYS_ADMIN)) + if (!cap_raised(uc->uc_cap, CAP_SYS_ADMIN)) GOTO(put_child, rc = -EPERM); ma->ma_need = MA_INODE; @@ -1262,6 +1324,9 @@ out_stat: unlock_child: mdt_reint_striped_unlock(info, mc, child_lh, einfo, rc); put_child: + if (info->mti_spec.sp_cr_flags & MDS_OP_WITH_FID && + info->mti_big_buf.lb_buf) + lu_buf_free(&info->mti_big_buf); mdt_object_put(info->mti_env, mc); unlock_parent: mdt_object_unlock(info, mp, parent_lh, rc); @@ -1296,7 +1361,8 @@ static int mdt_reint_link(struct mdt_thread_info *info, if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK)) RETURN(err_serious(-ENOENT)); - if (OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_RESEND_RACE)) { + if (OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_RESEND_RACE) || + OBD_FAIL_PRECHECK(OBD_FAIL_PTLRPC_ENQ_RESEND)) { req->rq_no_reply = 1; RETURN(err_serious(-ENOENT)); } @@ -1336,6 +1402,8 @@ static int mdt_reint_link(struct mdt_thread_info *info, cos_incompat = (mdt_object_remote(mp) || mdt_object_remote(ms)); + OBD_RACE(OBD_FAIL_MDS_LINK_RENAME_RACE); + lhp = &info->mti_lh[MDT_LH_PARENT]; mdt_lock_pdo_init(lhp, LCK_PW, &rr->rr_name); rc = mdt_reint_object_lock(info, mp, lhp, MDS_INODELOCK_UPDATE, @@ -2111,7 +2179,8 @@ static int mdd_migrate_close(struct mdt_thread_info *info, * cancelled, it's okay to cancel it now as we've held mot_open_sem. */ ldlm_lock_cancel(lease); - ldlm_reprocess_all(lease->l_resource, lease); + ldlm_reprocess_all(lease->l_resource, + lease->l_policy_data.l_inodebits.bits); LDLM_LOCK_PUT(lease); close: @@ -2180,7 +2249,7 @@ int mdt_reint_migrate(struct mdt_thread_info *info, if (!mdt->mdt_enable_remote_dir || !mdt->mdt_enable_dir_migration) RETURN(-EPERM); - if (uc && !md_capable(uc, CAP_SYS_ADMIN) && + if (uc && !cap_raised(uc->uc_cap, CAP_SYS_ADMIN) && uc->uc_gid != mdt->mdt_enable_remote_dir_gid && mdt->mdt_enable_remote_dir_gid != -1) RETURN(-EPERM); @@ -2268,6 +2337,15 @@ lock_parent: if (ma->ma_valid & MA_LOV && mdt_lmm_dom_stripesize(ma->ma_lmm)) info->mti_spec.sp_migrate_nsonly = 1; + } else if (S_ISDIR(lu_object_attr(&sobj->mot_obj))) { + rc = mdt_stripe_get(info, sobj, ma, XATTR_NAME_LMV); + if (rc) + GOTO(unlock_links, rc); + + /* race with restripe/auto-split? */ + if ((ma->ma_valid & MA_LMV) && + lmv_is_restriping(&ma->ma_lmv->lmv_md_v1)) + GOTO(unlock_links, rc = -EBUSY); } /* if migration HSM is allowed */ @@ -2608,12 +2686,12 @@ static int mdt_reint_rename(struct mdt_thread_info *info, mdt_object_remote(msrcdir)) GOTO(out_put_tgtdir, rc = -EXDEV); - /* This may be further relaxed in the future for different - * source and target parents. Start with only same-directory - * renames of non-directory files for simplicity and because - * this is by far the most common use case. + /* This might be further relaxed in the future for regular file + * renames in different source and target parents. Start with + * only same-directory renames for simplicity and because this + * is by far the most the common use case. */ - if (msrcdir != mtgtdir || S_ISDIR(ma->ma_attr.la_mode)) { + if (msrcdir != mtgtdir) { rc = mdt_rename_lock(info, &rename_lh); if (rc != 0) { CERROR("%s: cannot lock for rename: rc = %d\n", @@ -2925,6 +3003,7 @@ out_put_srcdir: mdt_dom_discard_data(info, mnew); mdt_object_put(info->mti_env, mnew); } + OBD_RACE(OBD_FAIL_MDS_LINK_RENAME_RACE); return rc; }