/* local transation, no need to inform other layers */
th_local:1,
/* Whether we need wait the transaction to be submitted */
- th_wait_submit:1;
+ th_wait_submit:1,
+ /* complex transaction which will track updates on all targets */
+ th_complex:1;
};
/**
extern struct req_format RQF_MDS_QUOTACTL;
extern struct req_format RQF_QUOTA_DQACQ;
extern struct req_format RQF_MDS_SWAP_LAYOUTS;
+extern struct req_format RQF_MDS_REINT_MIGRATE;
/* MDS hsm formats */
extern struct req_format RQF_MDS_HSM_STATE_GET;
extern struct req_format RQF_MDS_HSM_STATE_SET;
unsigned int no_create:1,
sp_cr_lookup:1, /* do lookup sanity check or not. */
sp_rm_entry:1, /* only remove name entry */
- sp_permitted:1; /* do not check permission */
-
+ sp_permitted:1, /* do not check permission */
+ sp_migrate_close:1; /* close the file during migrate */
/** Current lock mode for parent dir where create is performing. */
mdl_mode_t sp_cr_mode;
GOTO(out_free, rc = -EINVAL);
}
- rc = ll_get_fid_by_name(inode, filename, namelen, NULL);
+ rc = ll_get_fid_by_name(inode, filename, namelen, NULL, NULL);
if (rc < 0) {
CERROR("%s: lookup %.*s failed: rc = %d\n",
ll_get_fsname(inode->i_sb, NULL, 0), namelen,
}
int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid)
+ int namelen, struct lu_fid *fid,
+ struct inode **inode)
{
struct md_op_data *op_data = NULL;
struct mdt_body *body;
if (IS_ERR(op_data))
RETURN(PTR_ERR(op_data));
- op_data->op_valid = OBD_MD_FLID;
+ op_data->op_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
ll_finish_md_op_data(op_data);
if (rc < 0)
GOTO(out_req, rc = -EFAULT);
if (fid != NULL)
*fid = body->mbo_fid1;
+
+ if (inode != NULL)
+ rc = ll_prep_inode(inode, req, parent->i_sb, NULL);
out_req:
ptlrpc_req_finished(req);
RETURN(rc);
struct inode *child_inode = NULL;
struct md_op_data *op_data;
struct ptlrpc_request *request = NULL;
+ struct obd_client_handle *och = NULL;
struct qstr qstr;
+ struct mdt_body *body;
int rc;
+ __u64 data_version = 0;
ENTRY;
CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%04x\n",
qstr.len = namelen;
dchild = d_lookup(file->f_path.dentry, &qstr);
if (dchild != NULL) {
- if (dchild->d_inode != NULL) {
+ if (dchild->d_inode != NULL)
child_inode = igrab(dchild->d_inode);
- if (child_inode != NULL) {
- mutex_lock(&child_inode->i_mutex);
- op_data->op_fid3 = *ll_inode2fid(child_inode);
- ll_invalidate_aliases(child_inode);
- }
- }
dput(dchild);
- } else {
+ }
+
+ if (child_inode == NULL) {
rc = ll_get_fid_by_name(parent, name, namelen,
- &op_data->op_fid3);
+ &op_data->op_fid3, &child_inode);
if (rc != 0)
GOTO(out_free, rc);
}
+ if (child_inode == NULL)
+ GOTO(out_free, rc = -EINVAL);
+
+ mutex_lock(&child_inode->i_mutex);
+ op_data->op_fid3 = *ll_inode2fid(child_inode);
if (!fid_is_sane(&op_data->op_fid3)) {
CERROR("%s: migrate %s , but fid "DFID" is insane\n",
ll_get_fsname(parent->i_sb, NULL, 0), name,
PFID(&op_data->op_fid3), mdtidx);
GOTO(out_free, rc = 0);
}
+again:
+ if (S_ISREG(child_inode->i_mode)) {
+ och = ll_lease_open(child_inode, NULL, FMODE_WRITE, 0);
+ if (IS_ERR(och)) {
+ rc = PTR_ERR(och);
+ och = NULL;
+ GOTO(out_free, rc);
+ }
+
+ rc = ll_data_version(child_inode, &data_version,
+ LL_DV_WR_FLUSH);
+ if (rc != 0)
+ GOTO(out_free, rc);
+
+ op_data->op_handle = och->och_fh;
+ op_data->op_data = och->och_mod;
+ op_data->op_data_version = data_version;
+ op_data->op_lease_handle = och->och_lease_handle;
+ op_data->op_bias |= MDS_RENAME_MIGRATE;
+ }
op_data->op_mds = mdtidx;
op_data->op_cli_flags = CLI_MIGRATE;
if (rc == 0)
ll_update_times(request, parent);
- ptlrpc_req_finished(request);
- if (rc != 0)
- GOTO(out_free, rc);
+ body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
+ if (body == NULL)
+ GOTO(out_free, rc = -EPROTO);
+
+ /* If the server does release layout lock, then we cleanup
+ * the client och here, otherwise release it in out_free: */
+ if (och != NULL && body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED) {
+ obd_mod_put(och->och_mod);
+ md_clear_open_replay_data(ll_i2sbi(parent)->ll_md_exp, och);
+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+ OBD_FREE_PTR(och);
+ och = NULL;
+ }
+ ptlrpc_req_finished(request);
+ /* Try again if the file layout has changed. */
+ if (rc == -EAGAIN && S_ISREG(child_inode->i_mode))
+ goto again;
out_free:
if (child_inode != NULL) {
+ if (och != NULL) /* close the file */
+ ll_lease_close(och, child_inode, NULL);
clear_nlink(child_inode);
mutex_unlock(&child_inode->i_mutex);
iput(child_inode);
int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
const char *name, int namelen);
int ll_get_fid_by_name(struct inode *parent, const char *name,
- int namelen, struct lu_fid *fid);
+ int namelen, struct lu_fid *fid, struct inode **inode);
#ifdef HAVE_GENERIC_PERMISSION_4ARGS
int ll_inode_permission(struct inode *inode, int mask, unsigned int flags);
#else
if (rc != 0)
RETURN(rc);
+ /* Note: Do not set LinkEA on sub-stripes, otherwise
+ * it will confuse the fid2path process(see mdt_path_current()).
+ * The linkEA between master and sub-stripes is set in
+ * lod_xattr_set_lmv(). */
+ if (strcmp(name, XATTR_NAME_LINK) == 0)
+ RETURN(0);
+
/* set xattr to each stripes, if needed */
rc = lod_load_striping(env, lo);
if (rc != 0)
RETURN(rc);
- /* Note: Do not set LinkEA on sub-stripes, otherwise
- * it will confuse the fid2path process(see mdt_path_current()).
- * The linkEA between master and sub-stripes is set in
- * lod_xattr_set_lmv(). */
- if (lo->ldo_stripenr == 0 || strcmp(name, XATTR_NAME_LINK) == 0)
+ if (lo->ldo_stripenr == 0)
RETURN(0);
for (i = 0; i < lo->ldo_stripenr; i++) {
}
/**
+ * Reset parent FID on OST object
+ *
+ * Replace parent FID with @dt object FID, which is only called during migration
+ * to reset the parent FID after the MDT object is migrated to the new MDT, i.e.
+ * the FID is changed.
+ *
+ * \param[in] env execution environment
+ * \param[in] dt dt_object whose stripes's parent FID will be reset
+ * \parem[in] th thandle
+ * \param[in] declare if it is declare
+ *
+ * \retval 0 if reset succeeds
+ * \retval negative errno if reset fais
+ */
+static int lod_object_replace_parent_fid(const struct lu_env *env,
+ struct dt_object *dt,
+ struct thandle *th, bool declare)
+{
+ struct lod_object *lo = lod_dt_obj(dt);
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lu_buf *buf = &info->lti_buf;
+ struct filter_fid *ff;
+ int i, rc;
+ ENTRY;
+
+ LASSERT(S_ISREG(dt->do_lu.lo_header->loh_attr));
+
+ /* set xattr to each stripes, if needed */
+ rc = lod_load_striping(env, lo);
+ if (rc != 0)
+ RETURN(rc);
+
+ if (lo->ldo_stripenr == 0)
+ RETURN(0);
+
+ if (info->lti_ea_store_size < sizeof(*ff)) {
+ rc = lod_ea_store_resize(info, sizeof(*ff));
+ if (rc != 0)
+ RETURN(rc);
+ }
+
+ buf->lb_buf = info->lti_ea_store;
+ buf->lb_len = info->lti_ea_store_size;
+
+ for (i = 0; i < lo->ldo_stripenr; i++) {
+ if (lo->ldo_stripe[i] == NULL)
+ continue;
+
+ rc = dt_xattr_get(env, lo->ldo_stripe[i], buf,
+ XATTR_NAME_FID);
+ if (rc < 0) {
+ rc = 0;
+ continue;
+ }
+
+ ff = buf->lb_buf;
+ fid_le_to_cpu(&ff->ff_parent, &ff->ff_parent);
+ ff->ff_parent.f_seq = lu_object_fid(&dt->do_lu)->f_seq;
+ ff->ff_parent.f_oid = lu_object_fid(&dt->do_lu)->f_oid;
+ fid_cpu_to_le(&ff->ff_parent, &ff->ff_parent);
+
+ if (declare) {
+ rc = lod_sub_object_declare_xattr_set(env,
+ lo->ldo_stripe[i], buf,
+ XATTR_NAME_FID,
+ LU_XATTR_REPLACE, th);
+ } else {
+ rc = lod_sub_object_xattr_set(env, lo->ldo_stripe[i],
+ buf, XATTR_NAME_FID,
+ LU_XATTR_REPLACE, th);
+ }
+ if (rc < 0)
+ break;
+ }
+
+ RETURN(rc);
+}
+
+/**
* Implementation of dt_object_operations::do_declare_xattr_set.
*
* \see dt_object_operations::do_declare_xattr_set() in the API description
rc = lod_declare_striped_object(env, dt, attr, buf, th);
} else if (S_ISDIR(mode)) {
rc = lod_dir_declare_xattr_set(env, dt, buf, name, fl, th);
+ } else if (strcmp(name, XATTR_NAME_FID) == 0) {
+ rc = lod_object_replace_parent_fid(env, dt, th, true);
} else {
rc = lod_sub_object_declare_xattr_set(env, next, buf, name,
fl, th);
rc = lod_striping_create(env, dt, NULL, NULL, th);
}
RETURN(rc);
+ } else if (strcmp(name, XATTR_NAME_FID) == 0) {
+ rc = lod_object_replace_parent_fid(env, dt, th, false);
+
+ RETURN(rc);
}
/* then all other xattr */
if (rc < 0)
RETURN(ERR_PTR(rc));
- if (type == LU_SEQ_RANGE_OST)
+ /* th_complex means we need track all of updates for this
+ * transaction, include changes on OST */
+ if (type == LU_SEQ_RANGE_OST && !th->th_complex)
RETURN(tth->tt_master_sub_thandle);
sub_th = thandle_get_sub(env, th, sub_obj);
mdc_pack_name(req, &RMF_NAME, op_data->op_name, op_data->op_namelen);
}
+static void mdc_intent_close_pack(struct ptlrpc_request *req,
+ struct md_op_data *op_data)
+{
+ struct close_data *data;
+ struct ldlm_lock *lock;
+ enum mds_op_bias bias = op_data->op_bias;
+
+ if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |
+ MDS_RENAME_MIGRATE)))
+ return;
+
+ data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
+ LASSERT(data != NULL);
+
+ lock = ldlm_handle2lock(&op_data->op_lease_handle);
+ if (lock != NULL) {
+ data->cd_handle = lock->l_remote_handle;
+ LDLM_LOCK_PUT(lock);
+ }
+ ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
+
+ data->cd_data_version = op_data->op_data_version;
+ data->cd_fid = op_data->op_fid2;
+}
+
void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
const char *old, size_t oldlen,
const char *new, size_t newlen)
if (new != NULL)
mdc_pack_name(req, &RMF_SYMTGT, new, newlen);
+
+ if (op_data->op_cli_flags & CLI_MIGRATE &&
+ op_data->op_bias & MDS_RENAME_MIGRATE) {
+ struct mdt_ioepoch *epoch;
+
+ mdc_intent_close_pack(req, op_data);
+ epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
+ mdc_ioepoch_pack(epoch, op_data);
+ }
}
void mdc_getattr_pack(struct ptlrpc_request *req, __u64 valid, __u32 flags,
op_data->op_namelen);
}
-static void mdc_intent_close_pack(struct ptlrpc_request *req,
- struct md_op_data *op_data)
-{
- struct close_data *data;
- struct ldlm_lock *lock;
- enum mds_op_bias bias = op_data->op_bias;
-
- if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP)))
- return;
-
- data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
- LASSERT(data != NULL);
-
- lock = ldlm_handle2lock(&op_data->op_lease_handle);
- if (lock != NULL) {
- data->cd_handle = lock->l_remote_handle;
- LDLM_LOCK_PUT(lock);
- }
- ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
-
- data->cd_data_version = op_data->op_data_version;
- data->cd_fid = op_data->op_fid2;
-}
-
void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
{
struct mdt_ioepoch *epoch;
&cancels, LCK_EX,
MDS_INODELOCK_FULL);
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_MDS_REINT_RENAME);
- if (req == NULL) {
- ldlm_lock_list_put(&cancels, l_bl_ast, count);
- RETURN(-ENOMEM);
- }
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ op_data->op_cli_flags & CLI_MIGRATE ?
+ &RQF_MDS_REINT_MIGRATE : &RQF_MDS_REINT_RENAME);
+ if (req == NULL) {
+ ldlm_lock_list_put(&cancels, l_bl_ast, count);
+ RETURN(-ENOMEM);
+ }
req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT, oldlen + 1);
req_capsule_set_size(&req->rq_pill, &RMF_SYMTGT, RCL_CLIENT, newlen+1);
RETURN(rc);
}
+ if (op_data->op_cli_flags & CLI_MIGRATE && op_data->op_data != NULL) {
+ struct md_open_data *mod = op_data->op_data;
+
+ LASSERTF(mod->mod_open_req != NULL &&
+ mod->mod_open_req->rq_type != LI_POISON,
+ "POISONED open %p!\n", mod->mod_open_req);
+
+ DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
+ /* We no longer want to preserve this open for replay even
+ * though the open was committed. b=3632, b=3633 */
+ spin_lock(&mod->mod_open_req->rq_lock);
+ mod->mod_open_req->rq_replay = 0;
+ spin_unlock(&mod->mod_open_req->rq_lock);
+ }
+
if (exp_connect_cancelset(exp) && req)
ldlm_cli_cancel_list(&cancels, count, req, 0);
int list_xsize;
struct lu_buf list_xbuf;
int rc;
+ int rc1;
/* retrieve xattr list from the old object */
list_xsize = mdo_xattr_list(env, mdd_sobj, &LU_BUF_NULL);
if (rc != 0)
GOTO(stop_trans, rc);
stop_trans:
- mdd_trans_stop(env, mdd, rc, handle);
+ rc1 = mdd_trans_stop(env, mdd, rc, handle);
+ if (rc == 0)
+ rc = rc1;
if (rc != 0)
GOTO(out, rc);
next:
RETURN(rc);
}
spec->u.sp_symname = link_buf.lb_buf;
- } else if S_ISREG(la->la_mode) {
+ } else if (S_ISREG(la->la_mode)) {
/* retrieve lov of the old object */
rc = mdd_get_lov_ea(env, mdd_sobj, &lmm_buf);
if (rc != 0 && rc != -ENODATA)
la_flag->la_flags = la->la_flags | LUSTRE_IMMUTABLE_FL;
rc = mdo_attr_set(env, mdd_sobj, la_flag, handle);
stop_trans:
- if (handle != NULL)
- mdd_trans_stop(env, mdd, rc, handle);
+ if (handle != NULL) {
+ int rc1;
+
+ rc1 = mdd_trans_stop(env, mdd, rc, handle);
+ if (rc == 0)
+ rc = rc1;
+ }
out_free:
if (lmm_buf.lb_buf != NULL)
OBD_FREE(lmm_buf.lb_buf, lmm_buf.lb_len);
int recsize;
int is_dir;
bool target_exist = false;
+ int rc1;
len = iops->key_size(env, it);
if (len == 0)
if (IS_ERR(child))
GOTO(out, rc = PTR_ERR(child));
+ mdd_write_lock(env, child, MOR_SRC_CHILD);
is_dir = S_ISDIR(mdd_object_type(child));
snprintf(name, ent->lde_namelen + 1, "%s", ent->lde_name);
strlen(name), handle);
out_put:
+ mdd_write_unlock(env, child);
mdd_object_put(env, child);
- mdd_trans_stop(env, mdd, rc, handle);
+ rc1 = mdd_trans_stop(env, mdd, rc, handle);
+ if (rc == 0)
+ rc = rc1;
+
if (rc != 0)
GOTO(out, rc);
next:
handle);
if (rc != 0)
return rc;
+
+ handle->th_complex = 1;
+ rc = mdo_declare_xattr_set(env, mdd_tobj, NULL,
+ XATTR_NAME_FID,
+ LU_XATTR_REPLACE, handle);
+ if (rc < 0)
+ return rc;
}
if (S_ISDIR(mdd_object_type(mdd_sobj))) {
handle);
if (rc != 0 && rc != -ENODATA)
GOTO(stop_trans, rc);
+
+ rc = mdo_xattr_set(env, mdd_tobj, NULL,
+ XATTR_NAME_FID,
+ LU_XATTR_REPLACE, handle);
+ if (rc < 0)
+ GOTO(stop_trans, rc);
}
}
if (rc != 0)
GOTO(stop_trans, rc);
- mdd_write_lock(env, mdd_sobj, MOR_SRC_CHILD);
+ mdd_write_lock(env, mdd_sobj, MOR_TGT_CHILD);
+
+ /* Increase mod_count to add the source object to the orphan list,
+ * so if other clients still send RPC to the old object, then these
+ * objects can help the request to find the new object, see
+ * mdt_reint_open() */
+ mdd_sobj->mod_count++;
+ rc = mdd_finish_unlink(env, mdd_sobj, ma, mdd_pobj, lname, handle);
+ mdd_sobj->mod_count--;
+ if (rc != 0)
+ GOTO(out_unlock, rc);
+
mdo_ref_del(env, mdd_sobj, handle);
if (is_dir)
mdo_ref_del(env, mdd_sobj, handle);
ma->ma_attr = *so_attr;
ma->ma_valid |= MA_INODE;
- rc = mdd_finish_unlink(env, mdd_sobj, ma, mdd_pobj, lname, handle);
- if (rc != 0)
- GOTO(out_unlock, rc);
rc = mdd_attr_set_internal(env, mdd_pobj, p_la, handle, 0);
if (rc != 0)
if (rc != 0) {
/* For multiple links files, if there are no linkEA data at all,
* means the file might be created before linkEA is enabled, and
- * all all of its links should not be migrated yet, otherwise
- * it should have some linkEA there */
+ * all of its links should not be migrated yet, otherwise it
+ * should have some linkEA there */
if (rc == -ENOENT || rc == -ENODATA)
RETURN(1);
RETURN(rc);
}
- /* If it is mulitple links file, we need update the name entry for
- * all parent */
+ /* If there are still links locally, then the file will not be
+ * migrated. */
LASSERT(ldata->ld_leh != NULL);
ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);
for (count = 0; count < ldata->ld_leh->leh_reccount; count++) {
/* If the file will being migrated, it will check whether
* the file is being opened by someone else right now */
mdd_read_lock(env, mdd_sobj, MOR_SRC_CHILD);
- if (mdd_sobj->mod_count >= 1) {
+ if (mdd_sobj->mod_count > 0) {
CERROR("%s: "DFID"%s is already opened count %d: rc = %d\n",
mdd2obd_dev(mdd)->obd_name,
PFID(mdd_object_fid(mdd_sobj)), lname->ln_name,
GOTO(put, rc);
}
+ LASSERT(mdd_object_exists(mdd_tobj));
/* step 2: migrate xattr */
rc = mdd_migrate_xattrs(env, mdd_sobj, mdd_tobj);
if (rc != 0)
OBD_FAIL_TIMEOUT(OBD_FAIL_MIGRATE_DELAY, cfs_fail_val);
}
+ LASSERT(mdd_object_exists(mdd_tobj));
/* step 4: update name entry to the new object */
rc = mdd_migrate_update_name(env, mdd_pobj, mdd_sobj, mdd_tobj, lname,
ma);
info->mti_spec.no_create = 0;
info->mti_spec.sp_rm_entry = 0;
info->mti_spec.sp_permitted = 0;
+ info->mti_spec.sp_migrate_close = 0;
info->mti_spec.u.sp_ea.eadata = NULL;
info->mti_spec.u.sp_ea.eadatalen = 0;
int mdt_links_read(struct mdt_thread_info *info,
struct mdt_object *mdt_obj,
struct linkea_data *ldata);
+int mdt_close_internal(struct mdt_thread_info *info, struct ptlrpc_request *req,
+ struct mdt_body *repbody);
/* mdt_idmap.c */
int mdt_init_idmap(struct tgt_session_info *tsi);
void mdt_cleanup_idmap(struct mdt_export_data *);
struct hsm_progress_kernel *pgs,
const int update_record);
+int mdt_close_swap_layouts(struct mdt_thread_info *info,
+ struct mdt_object *o, struct md_attr *ma);
+
extern struct lu_context_key mdt_thread_key;
/* debug issues helper starts here*/
else
ma->ma_attr_flags &= ~MDS_VTX_BYPASS;
+ if (rec->rn_bias & MDS_RENAME_MIGRATE) {
+ req_capsule_extend(info->mti_pill, &RQF_MDS_REINT_MIGRATE);
+ rc = mdt_close_handle_unpack(info);
+ if (rc < 0)
+ RETURN(rc);
+ info->mti_spec.sp_migrate_close = 1;
+ }
+
info->mti_spec.no_create = !!req_is_replay(mdt_info_req(info));
if (!lu_name_is_valid(&rr->rr_name))
GOTO(out, result = -EPROTO);
+again:
lh = &info->mti_lh[MDT_LH_PARENT];
mdt_lock_pdo_init(lh,
(create_flags & MDS_OPEN_CREAT) ? LCK_PW : LCK_PR,
if (result != 0 && result != -ENOENT && result != -ESTALE)
GOTO(out_parent, result);
- if (result == -ENOENT || result == -ESTALE) {
- mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG);
- if (result == -ESTALE) {
- /*
- * -ESTALE means the parent is a dead(unlinked) dir, so
- * it should return -ENOENT to in accordance with the
- * original mds implementaion.
- */
- GOTO(out_parent, result = -ENOENT);
- }
+ if (result == -ENOENT || result == -ESTALE) {
+ /* If the object is dead, let's check if the object
+ * is being migrated to a new object */
+ if (result == -ESTALE) {
+ struct lu_buf lmv_buf;
+
+ lmv_buf.lb_buf = info->mti_xattr_buf;
+ lmv_buf.lb_len = sizeof(info->mti_xattr_buf);
+ rc = mo_xattr_get(info->mti_env,
+ mdt_object_child(parent),
+ &lmv_buf, XATTR_NAME_LMV);
+ if (rc > 0) {
+ struct lmv_mds_md_v1 *lmv;
+
+ lmv = lmv_buf.lb_buf;
+ if (le32_to_cpu(lmv->lmv_hash_type) &
+ LMV_HASH_FLAG_MIGRATION) {
+ /* Get the new parent FID and retry */
+ mdt_object_unlock_put(info, parent,
+ lh, 1);
+ mdt_lock_handle_init(lh);
+ fid_le_to_cpu(
+ (struct lu_fid *)rr->rr_fid1,
+ &lmv->lmv_stripe_fids[1]);
+ goto again;
+ }
+ }
+ }
+
+ mdt_set_disposition(info, ldlm_rep, DISP_LOOKUP_NEG);
+ if (result == -ESTALE) {
+ /*
+ * -ESTALE means the parent is a dead(unlinked) dir, so
+ * it should return -ENOENT to in accordance with the
+ * original mds implementaion.
+ */
+ GOTO(out_parent, result = -ENOENT);
+ }
+
if (!(create_flags & MDS_OPEN_CREAT))
GOTO(out_parent, result);
if (exp_connect_flags(req->rq_export) & OBD_CONNECT_RDONLY)
return rc;
}
-static int mdt_close_swap_layouts(struct mdt_thread_info *info,
- struct mdt_object *o, struct md_attr *ma)
+int mdt_close_swap_layouts(struct mdt_thread_info *info,
+ struct mdt_object *o, struct md_attr *ma)
{
struct mdt_lock_handle *lh1 = &info->mti_lh[MDT_LH_NEW];
struct mdt_lock_handle *lh2 = &info->mti_lh[MDT_LH_OLD];
out_lease:
LDLM_LOCK_PUT(lease);
- ma->ma_valid = 0;
- ma->ma_need = 0;
+ if (ma != NULL) {
+ ma->ma_valid = 0;
+ ma->ma_need = 0;
+ }
return rc;
}
RETURN(rc);
}
+int mdt_close_internal(struct mdt_thread_info *info, struct ptlrpc_request *req,
+ struct mdt_body *repbody)
+{
+ struct mdt_export_data *med;
+ struct mdt_file_data *mfd;
+ struct mdt_object *o;
+ struct md_attr *ma = &info->mti_attr;
+ int ret = 0;
+ int rc = 0;
+ ENTRY;
+
+ med = &req->rq_export->exp_mdt_data;
+ spin_lock(&med->med_open_lock);
+ mfd = mdt_handle2mfd(med, &info->mti_close_handle, req_is_replay(req));
+ if (mdt_mfd_closed(mfd)) {
+ spin_unlock(&med->med_open_lock);
+ CDEBUG(D_INODE, "no handle for file close: fid = "DFID
+ ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1),
+ info->mti_close_handle.cookie);
+ /** not serious error since bug 3633 */
+ rc = -ESTALE;
+ } else {
+ class_handle_unhash(&mfd->mfd_handle);
+ list_del_init(&mfd->mfd_list);
+ spin_unlock(&med->med_open_lock);
+
+ /* Do not lose object before last unlink. */
+ o = mfd->mfd_object;
+ mdt_object_get(info->mti_env, o);
+ ret = mdt_mfd_close(info, mfd);
+ if (repbody != NULL)
+ rc = mdt_handle_last_unlink(info, o, ma);
+ mdt_object_put(info->mti_env, o);
+ }
+
+ RETURN(rc ? rc : ret);
+}
+
int mdt_close(struct tgt_session_info *tsi)
{
struct mdt_thread_info *info = tsi2mdt_info(tsi);
struct ptlrpc_request *req = tgt_ses_req(tsi);
- struct mdt_export_data *med;
- struct mdt_file_data *mfd;
- struct mdt_object *o;
struct md_attr *ma = &info->mti_attr;
struct mdt_body *repbody = NULL;
int rc, ret = 0;
rc = err_serious(rc);
}
- med = &req->rq_export->exp_mdt_data;
- spin_lock(&med->med_open_lock);
- mfd = mdt_handle2mfd(med, &info->mti_close_handle, req_is_replay(req));
- if (mdt_mfd_closed(mfd)) {
- spin_unlock(&med->med_open_lock);
- CDEBUG(D_INODE, "no handle for file close: fid = "DFID
- ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1),
- info->mti_close_handle.cookie);
- /** not serious error since bug 3633 */
- rc = -ESTALE;
- } else {
- class_handle_unhash(&mfd->mfd_handle);
- list_del_init(&mfd->mfd_list);
- spin_unlock(&med->med_open_lock);
+ rc = mdt_close_internal(info, req, repbody);
+ if (rc != -ESTALE)
+ mdt_empty_transno(info, rc);
- /* Do not lose object before last unlink. */
- o = mfd->mfd_object;
- mdt_object_get(info->mti_env, o);
- ret = mdt_mfd_close(info, mfd);
- if (repbody != NULL)
- rc = mdt_handle_last_unlink(info, o, ma);
- mdt_empty_transno(info, rc);
- mdt_object_put(info->mti_env, o);
- }
if (repbody != NULL) {
mdt_client_compatibility(info);
rc = mdt_fix_reply(info);
struct lu_fid *old_fid = &info->mti_tmp_fid1;
struct list_head lock_list;
__u64 lock_ibits;
+ struct ldlm_lock *lease = NULL;
+ bool lock_open_sem = false;
int rc;
ENTRY;
if (rc != 0)
GOTO(out_put_child, rc);
+ if (info->mti_spec.sp_migrate_close) {
+ struct close_data *data;
+ struct mdt_body *repbody;
+ bool lease_broken = false;
+
+ if (!req_capsule_field_present(info->mti_pill, &RMF_MDT_EPOCH,
+ RCL_CLIENT) ||
+ !req_capsule_field_present(info->mti_pill, &RMF_CLOSE_DATA,
+ RCL_CLIENT))
+ GOTO(out_lease, rc = -EPROTO);
+
+ data = req_capsule_client_get(info->mti_pill, &RMF_CLOSE_DATA);
+ if (data == NULL)
+ GOTO(out_lease, rc = -EPROTO);
+
+ lease = ldlm_handle2lock(&data->cd_handle);
+ if (lease == NULL)
+ GOTO(out_lease, rc = -ESTALE);
+
+ /* try to hold open_sem so that nobody else can open the file */
+ if (!down_write_trylock(&mold->mot_open_sem)) {
+ ldlm_lock_cancel(lease);
+ GOTO(out_lease, rc = -EBUSY);
+ }
+
+ lock_open_sem = true;
+ /* Check if the lease open lease has already canceled */
+ lock_res_and_lock(lease);
+ lease_broken = ldlm_is_cancel(lease);
+ unlock_res_and_lock(lease);
+
+ LDLM_DEBUG(lease, DFID " lease broken? %d\n",
+ PFID(mdt_object_fid(mold)), lease_broken);
+
+ /* Cancel server side lease. Client side counterpart should
+ * have been cancelled. It's okay to cancel it now as we've
+ * held mot_open_sem. */
+ ldlm_lock_cancel(lease);
+
+ if (lease_broken)
+ GOTO(out_lease, rc = -EAGAIN);
+out_lease:
+ rc = mdt_close_internal(info, mdt_info_req(info), NULL);
+ repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
+ repbody->mbo_valid |= OBD_MD_CLOSE_INTENT_EXECED;
+ if (rc != 0)
+ GOTO(out_unlock_list, rc);
+ }
+
/* 4: lock of the object migrated object */
lh_childp = &info->mti_lh[MDT_LH_OLD];
mdt_lock_reg_init(lh_childp, LCK_EX);
mdt_object_child(mnew), ma);
if (rc != 0)
GOTO(out_unlock_new, rc);
+
out_unlock_new:
if (lh_tgtp != NULL)
mdt_object_unlock(info, mnew, lh_tgtp, rc);
mdt_object_unlock(info, mold, lh_childp, rc);
out_unlock_list:
mdt_unlock_list(info, &lock_list, rc);
+ if (lease != NULL) {
+ ldlm_reprocess_all(lease->l_resource);
+ LDLM_LOCK_PUT(lease);
+ }
+
+ if (lock_open_sem)
+ up_write(&mold->mot_open_sem);
out_put_child:
mdt_object_put(info->mti_env, mold);
out_unlock_parent:
if (top_device->ld_obd->obd_recovering)
req->rq_allow_replay = 1;
- osp_get_rpc_lock(osp);
+ if (osp->opd_connect_mdt)
+ osp_get_rpc_lock(osp);
rc = ptlrpc_queue_wait(req);
- osp_put_rpc_lock(osp);
+ if (osp->opd_connect_mdt)
+ osp_put_rpc_lock(osp);
if ((rc == -ENOMEM && req->rq_set == NULL) ||
(req->rq_transno == 0 && !req->rq_committed)) {
if (args->oaua_update != NULL) {
}
if (!osp->opd_connect_mdt) {
+ osp_trans_callback(env, oth, th->th_result);
rc = osp_send_update_req(env, osp, oth->ot_our);
GOTO(out, rc);
}
&RMF_DLM_REQ
};
+static const struct req_msg_field *mds_reint_migrate_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_REC_REINT,
+ &RMF_CAPA1,
+ &RMF_CAPA2,
+ &RMF_NAME,
+ &RMF_SYMTGT,
+ &RMF_DLM_REQ,
+ &RMF_MDT_EPOCH,
+ &RMF_CLOSE_DATA
+};
+
static const struct req_msg_field *mds_last_unlink_server[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_BODY,
&RQF_MDS_REINT_UNLINK,
&RQF_MDS_REINT_LINK,
&RQF_MDS_REINT_RENAME,
+ &RQF_MDS_REINT_MIGRATE,
&RQF_MDS_REINT_SETATTR,
&RQF_MDS_REINT_SETXATTR,
&RQF_MDS_QUOTACTL,
mds_last_unlink_server);
EXPORT_SYMBOL(RQF_MDS_REINT_RENAME);
+struct req_format RQF_MDS_REINT_MIGRATE =
+ DEFINE_REQ_FMT0("MDS_REINT_MIGRATE", mds_reint_migrate_client,
+ mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_REINT_MIGRATE);
+
struct req_format RQF_MDS_REINT_SETATTR =
DEFINE_REQ_FMT0("MDS_REINT_SETATTR",
mds_reint_setattr_client, mds_setattr_server);
* for example if the the OSP is used to connect to OST */
ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
LLOG_UPDATELOG_ORIG_CTXT);
- LASSERT(ctxt != NULL);
/* Not ready to record updates yet. */
- if (ctxt->loc_handle == NULL)
- GOTO(out_put, rc = 0);
+ if (ctxt == NULL || ctxt->loc_handle == NULL) {
+ llog_ctxt_put(ctxt);
+ return 0;
+ }
rc = llog_declare_add(env, ctxt->loc_handle,
&record->lur_hdr, sub_th);
ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
LLOG_UPDATELOG_ORIG_CTXT);
- LASSERT(ctxt != NULL);
-
- /* Not ready to record updates yet, usually happens
- * in error handler path */
- if (ctxt->loc_handle == NULL)
- GOTO(llog_put, rc = 0);
+ /* If ctxt == NULL, then it means updates on OST (only happens
+ * during migration), and we do not track those updates for now */
+ /* If ctxt->loc_handle == NULL, then it does not need to record
+ * update, usually happens in error handler path */
+ if (ctxt == NULL || ctxt->loc_handle == NULL) {
+ llog_ctxt_put(ctxt);
+ RETURN(0);
+ }
/* Since the cross-MDT updates will includes both local
* and remote updates, the update ops count must > 1 */
obd = st->st_dt->dd_lu_dev.ld_obd;
ctxt = llog_get_context(obd, LLOG_UPDATELOG_ORIG_CTXT);
- LASSERT(ctxt);
+ if (ctxt == NULL)
+ continue;
list_for_each_entry(stc, &st->st_cookie_list, stc_list) {
cookie = &stc->stc_cookie;
if (fid_is_zero(&cookie->lgc_lgl.lgl_oi.oi_fid))
[ $MDSCOUNT -lt 2 ] &&
skip "We need at least 2 MDSes for this test" && return
+ [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.7.55) ] &&
+ skip "Skip the test after 2.7.55 see LU-6437" && return
+
echo "#####"
echo "According to current metadata migration implementation,"
echo "before the old MDT-object is removed, both the new MDT-object"
ln -s $other_dir/$tfile $migrate_dir/${tfile}_ln_other
$LFS migrate -m $MDTIDX $migrate_dir ||
- error "migrate remote dir error"
+ error "fails on migrating remote dir to MDT1"
echo "migratate to MDT1, then checking.."
for ((i = 0; i < 10; i++)); do
#migrate back to MDT0
MDTIDX=0
+
$LFS migrate -m $MDTIDX $migrate_dir ||
- error "migrate remote dir error"
+ error "fails on migrating remote dir to MDT0"
echo "migrate back to MDT0, checking.."
for file in $(find $migrate_dir); do
local t=$(ls $migrate_dir | wc -l)
$LFS migrate --mdt-index $MDTIDX $migrate_dir &&
error "migrate should fail after 5 entries"
+
+ mkdir $migrate_dir/dir &&
+ error "mkdir succeeds under migrating directory"
+ touch $migrate_dir/file &&
+ error "touch file succeeds under migrating directory"
+
local u=$(ls $migrate_dir | wc -l)
[ "$u" == "$t" ] || error "$u != $t during migration"
}
run_test 77g "Change TBF type directly"
-test_80() {
+test_80a() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
local MDTIDX=1
local mdt_index
rm -rf $DIR1/$tdir || error "rm dir failed after migration"
}
-run_test 80 "migrate directory when some children is being opened"
+run_test 80a "migrate directory when some children is being opened"
+
+cleanup_80b() {
+ trap 0
+ kill -9 $migrate_pid
+}
+
+test_80b() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+ local migrate_dir1=$DIR1/$tdir/migrate_dir
+ local migrate_dir2=$DIR2/$tdir/migrate_dir
+ local migrate_run=$LUSTRE/tests/migrate.sh
+ local start_time
+ local end_time
+ local show_time=1
+ local mdt_idx
+ local rc=0
+ local rc1=0
+
+ trap cleanup_80b EXIT
+ #prepare migrate directory
+ mkdir -p $migrate_dir1
+ for F in {1,2,3,4,5}; do
+ echo "$F$F$F$F$F" > $migrate_dir1/file$F
+ echo "$F$F$F$F$F" > $DIR/$tdir/file$F
+ done
+
+ #migrate the directories among MDTs
+ (
+ while true; do
+ mdt_idx=$((RANDOM % MDSCOUNT))
+ $LFS migrate -m $mdt_idx $migrate_dir1 2&>/dev/null ||
+ rc=$?
+ [ $rc -ne 0 -o $rc -ne 16 ] || break
+ done
+ ) &
+ migrate_pid=$!
+
+ echo "start migration thread $migrate_pid"
+ #Access the files at the same time
+ start_time=$(date +%s)
+ echo "accessing the migrating directory for 5 minutes..."
+ while true; do
+ ls $migrate_dir2 > /dev/null || {
+ echo "read dir fails"
+ break
+ }
+ diff -u $DIR2/$tdir/file1 $migrate_dir2/file1 || {
+ echo "access file1 fails"
+ break
+ }
+
+ cat $migrate_dir2/file2 > $migrate_dir2/file3 || {
+ echo "access file2/3 fails"
+ break
+ }
+
+ echo "aaaaa" > $migrate_dir2/file4 > /dev/null || {
+ echo "access file4 fails"
+ break
+ }
+
+ stat $migrate_dir2/file5 > /dev/null || {
+ echo "stat file5 fails"
+ break
+ }
+
+ touch $migrate_dir2/source_file > /dev/null || rc1=$?
+ [ $rc1 -ne 0 -o $rc1 -ne 1 ] || {
+ echo "touch file failed with $rc1"
+ break;
+ }
+
+ if [ -e $migrate_dir2/source_file ]; then
+ ln $migrate_dir2/source_file $migrate_dir2/link_file \
+ 2&>/dev/null || rc1=$?
+ if [ -e $migrate_dir2/link_file ]; then
+ rm -rf $migrate_dir2/link_file
+ fi
+
+ mrename $migrate_dir2/source_file \
+ $migrate_dir2/target_file 2&>/dev/null || rc1=$?
+ [ $rc1 -ne 0 -o $rc1 -ne 1 ] || {
+ echo "rename failed with $rc1"
+ break
+ }
+
+ if [ -e $migrate_dir2/target_file ]; then
+ rm -rf $migrate_dir2/target_file 2&>/dev/null ||
+ rc1=$?
+ else
+ rm -rf $migrate_dir2/source_file 2&>/dev/null ||
+ rc1=$?
+ fi
+ [ $rc1 -ne 0 -o $rc1 -ne 1 ] || {
+ echo "unlink failed with $rc1"
+ break
+ }
+ fi
+
+ end_time=$(date +%s)
+ duration=$((end_time - start_time))
+ if [ $((duration % 10)) -eq 0 ]; then
+ if [ $show_time -eq 1 ]; then
+ echo "...$duration seconds"
+ show_time=0
+ fi
+ else
+ show_time=1
+ fi
+
+ kill -0 $migrate_pid || {
+ echo "migration stopped 1"
+ break
+ }
+
+ [ $duration -ge 300 ] && break
+ done
+
+ #check migration are still there
+ kill -0 $migrate_pid || error "migration stopped 2"
+ cleanup_80b
+}
+run_test 80b "Accessing directory during migration"
test_81() {
[ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return