X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Fmdt%2Fmdt_reint.c;h=5b5a1b0d3835045faefdcdf5d884c082ac34e416;hb=ee9fa55a5dd8e2c5efc09a1d864f5eb3c127053c;hp=1545724ca20dcad7f55e70e789dfa033c0cec292;hpb=a35113b690e39dcd39e126efc9085b3bc160b4ff;p=fs%2Flustre-release.git diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 1545724..5b5a1b0 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -66,7 +66,7 @@ static int mdt_create_pack_capa(struct mdt_thread_info *info, int rc, if (repbody->valid & OBD_MD_FLMDSCAPA) RETURN(rc); - if (rc == 0 && info->mti_mdt->mdt_opts.mo_mds_capa && + if (rc == 0 && info->mti_mdt->mdt_lut.lut_mds_capa && exp_connect_flags(info->mti_exp) & OBD_CONNECT_MDS_CAPA) { struct lustre_capa *capa; @@ -349,7 +349,7 @@ static int mdt_md_create(struct mdt_thread_info *info) OBD_FAIL_MDS_REINT_CREATE_WRITE); /* Version of child will be updated on disk. */ - info->mti_mos = child; + tgt_vbr_obj_set(info->mti_env, mdt_obj2dt(child)); rc = mdt_version_get_check_save(info, child, 2); if (rc) GOTO(out_put_child, rc); @@ -428,7 +428,7 @@ int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, /* VBR: update version if attr changed are important for recovery */ if (do_vbr) { /* update on-disk version of changed object */ - info->mti_mos = mo; + tgt_vbr_obj_set(info->mti_env, mdt_obj2dt(mo)); rc = mdt_version_get_check_save(info, mo, 0); if (rc) GOTO(out_unlock, rc); @@ -462,21 +462,28 @@ int mdt_add_dirty_flag(struct mdt_thread_info *info, struct mdt_object *mo, rc = mdt_attr_get_complex(info, mo, ma); if (rc) { CERROR("file attribute read error for "DFID": %d.\n", - PFID(lu_object_fid(&mo->mot_obj.mo_lu)), rc); + PFID(mdt_object_fid(mo)), rc); RETURN(rc); } /* If an up2date copy exists in the backend, add dirty flag */ if ((ma->ma_valid & MA_HSM) && (ma->ma_hsm.mh_flags & HS_EXISTS) && !(ma->ma_hsm.mh_flags & (HS_DIRTY|HS_RELEASED))) { + struct mdt_lock_handle *lh = &info->mti_lh[MDT_LH_CHILD]; ma->ma_hsm.mh_flags |= HS_DIRTY; + + mdt_lock_reg_init(lh, LCK_PW); + rc = mdt_object_lock(info, mo, lh, MDS_INODELOCK_XATTR, + MDT_LOCAL_LOCK); + if (rc != 0) + RETURN(rc); + rc = mdt_hsm_attr_set(info, mo, &ma->ma_hsm); - if (rc) { + if (rc) CERROR("file attribute change error for "DFID": %d\n", - PFID(lu_object_fid(&mo->mot_obj.mo_lu)), rc); - RETURN(rc); - } + PFID(mdt_object_fid(mo)), rc); + mdt_object_unlock(info, mo, lh, rc); } RETURN(rc); @@ -521,7 +528,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, if (rc) GOTO(out_put, rc); - mfd = mdt_mfd_new(); + mfd = mdt_mfd_new(med); if (mfd == NULL) { mdt_write_put(mo); GOTO(out_put, rc = -ENOMEM); @@ -549,7 +556,8 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, LASSERT(info->mti_ioepoch); spin_lock(&med->med_open_lock); - mfd = mdt_handle2mfd(info, &info->mti_ioepoch->handle); + mfd = mdt_handle2mfd(med, &info->mti_ioepoch->handle, + req_is_replay(req)); if (mfd == NULL) { spin_unlock(&med->med_open_lock); CDEBUG(D_INODE, "no handle for file close: " @@ -590,14 +598,14 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, ma->ma_need = MA_INODE; ma->ma_valid = 0; rc = mdt_attr_get_complex(info, mo, ma); - if (rc != 0) - GOTO(out_put, rc); + if (rc != 0) + GOTO(out_put, rc); - mdt_pack_attr2body(info, repbody, &ma->ma_attr, mdt_object_fid(mo)); + mdt_pack_attr2body(info, repbody, &ma->ma_attr, mdt_object_fid(mo)); - if (info->mti_mdt->mdt_opts.mo_oss_capa && + if (info->mti_mdt->mdt_lut.lut_oss_capa && exp_connect_flags(info->mti_exp) & OBD_CONNECT_OSS_CAPA && - S_ISREG(lu_object_attr(&mo->mot_obj.mo_lu)) && + S_ISREG(lu_object_attr(&mo->mot_obj)) && (ma->ma_attr.la_valid & LA_SIZE) && !som_au) { struct lustre_capa *capa; @@ -820,26 +828,34 @@ static int mdt_reint_unlink(struct mdt_thread_info *info, GOTO(put_child, rc); } - rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL, - MDT_CROSS_LOCK); - if (rc != 0) { + /* We used to acquire MDS_INODELOCK_FULL here but we can't do + * this now because a running HSM restore on the child (unlink + * victim) will hold the layout lock. See LU-4002. */ + rc = mdt_object_lock(info, mc, child_lh, + MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE, + MDT_CROSS_LOCK); + if (rc != 0) GOTO(put_child, rc); - } mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_UNLINK_WRITE); /* save version when object is locked */ mdt_version_get_save(info, mc, 1); - /* - * Now we can only make sure we need MA_INODE, in mdd layer, will check - * whether need MA_LOV and MA_COOKIE. - */ - ma->ma_need = MA_INODE; - ma->ma_valid = 0; - mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); + /* + * Now we can only make sure we need MA_INODE, in mdd layer, will check + * whether need MA_LOV and MA_COOKIE. + */ + ma->ma_need = MA_INODE; + ma->ma_valid = 0; + mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); + + mutex_lock(&mc->mot_lov_mutex); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), mdt_object_child(mc), lname, ma, no_name); + + mutex_unlock(&mc->mot_lov_mutex); + if (rc == 0 && !lu_object_is_dying(&mc->mot_header)) rc = mdt_attr_get_complex(info, mc, ma); if (rc == 0) @@ -942,8 +958,8 @@ static int mdt_reint_link(struct mdt_thread_info *info, GOTO(out_unlock_parent, rc = -EXDEV); } - rc = mdt_object_lock(info, ms, lhs, MDS_INODELOCK_UPDATE, - MDT_CROSS_LOCK); + rc = mdt_object_lock(info, ms, lhs, MDS_INODELOCK_UPDATE | + MDS_INODELOCK_XATTR, MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, ms); GOTO(out_unlock_parent, rc); @@ -953,7 +969,7 @@ static int mdt_reint_link(struct mdt_thread_info *info, mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_LINK_WRITE); - info->mti_mos = ms; + tgt_vbr_obj_set(info->mti_env, mdt_obj2dt(ms)); rc = mdt_version_get_check_save(info, ms, 1); if (rc) GOTO(out_unlock_child, rc); @@ -1033,7 +1049,7 @@ static int mdt_rename_lock(struct mdt_thread_info *info, memset(policy, 0, sizeof *policy); policy->l_inodebits.bits = MDS_INODELOCK_UPDATE; -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 4, 53, 0) +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 5, 53, 0) /* In phase I, we will not do cross-rename, so local BFL lock would * be enough */ @@ -1234,14 +1250,14 @@ static int mdt_reint_rename(struct mdt_thread_info *info, lh_oldp = &info->mti_lh[MDT_LH_OLD]; mdt_lock_reg_init(lh_oldp, LCK_EX); - rc = mdt_object_lock(info, mold, lh_oldp, MDS_INODELOCK_LOOKUP, - MDT_CROSS_LOCK); + rc = mdt_object_lock(info, mold, lh_oldp, MDS_INODELOCK_LOOKUP | + MDS_INODELOCK_XATTR, MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, mold); GOTO(out_unlock_target, rc); } - info->mti_mos = mold; + tgt_vbr_obj_set(info->mti_env, mdt_obj2dt(mold)); /* save version after locking */ mdt_version_get_save(info, mold, 2); mdt_set_capainfo(info, 2, old_fid, BYPASS_CAPA); @@ -1282,8 +1298,14 @@ static int mdt_reint_rename(struct mdt_thread_info *info, GOTO(out_unlock_old, rc = -EXDEV); } - rc = mdt_object_lock(info, mnew, lh_newp, - MDS_INODELOCK_FULL, MDT_CROSS_LOCK); + /* We used to acquire MDS_INODELOCK_FULL here but we + * can't do this now because a running HSM restore on + * the rename onto victim will hold the layout + * lock. See LU-4002. */ + rc = mdt_object_lock(info, mnew, lh_newp, + MDS_INODELOCK_LOOKUP | + MDS_INODELOCK_UPDATE, + MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, mnew); GOTO(out_unlock_old, rc); @@ -1294,7 +1316,14 @@ static int mdt_reint_rename(struct mdt_thread_info *info, } else if (rc != -EREMOTE && rc != -ENOENT) { GOTO(out_unlock_old, rc); } else { - mdt_enoent_version_save(info, 3); + /* If mnew does not exist and mold are remote directory, + * it only allows rename if they are under same directory */ + if (mtgtdir != msrcdir && mdt_object_remote(mold)) { + CDEBUG(D_INFO, "Src child "DFID" is on another MDT\n", + PFID(old_fid)); + GOTO(out_unlock_old, rc = -EXDEV); + } + mdt_enoent_version_save(info, 3); } /* step 5: rename it */ @@ -1303,22 +1332,27 @@ static int mdt_reint_rename(struct mdt_thread_info *info, mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_RENAME_WRITE); + /* Check if @dst is subdir of @src. */ + rc = mdt_rename_sanity(info, old_fid); + if (rc) + GOTO(out_unlock_new, rc); - /* Check if @dst is subdir of @src. */ - rc = mdt_rename_sanity(info, old_fid); - if (rc) - GOTO(out_unlock_new, rc); + if (mnew != NULL) + mutex_lock(&mnew->mot_lov_mutex); - rc = mdo_rename(info->mti_env, mdt_object_child(msrcdir), - mdt_object_child(mtgtdir), old_fid, &slname, - (mnew ? mdt_object_child(mnew) : NULL), - lname, ma); + rc = mdo_rename(info->mti_env, mdt_object_child(msrcdir), + mdt_object_child(mtgtdir), old_fid, &slname, + (mnew ? mdt_object_child(mnew) : NULL), + lname, ma); - /* handle last link of tgt object */ - if (rc == 0) { + if (mnew != NULL) + mutex_unlock(&mnew->mot_lov_mutex); + + /* handle last link of tgt object */ + if (rc == 0) { mdt_counter_incr(req, LPROC_MDT_RENAME); - if (mnew) - mdt_handle_last_unlink(info, mnew, ma); + if (mnew) + mdt_handle_last_unlink(info, mnew, ma); mdt_rename_counter_tally(info, info->mti_mdt, req, msrcdir, mtgtdir);