* Right now, only two opertaions need to verify layout: glimpse
* and setattr.
*/
- ci_verify_layout:1;
+ ci_verify_layout:1,
+ /**
+ * file is released, restore has to to be triggered by vvp layer
+ */
+ ci_restore_needed:1;
/**
* Number of pages owned by this IO. For invariant checking.
*/
#define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */
#define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */
#define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */
-
-/* OBD_MD_MDTIDX is used to get MDT index, but it is never been used overwire,
- * and it is already obsolete since 2.3 */
-/* #define OBD_MD_MDTIDX (0x0000000800000000ULL) */
+#define OBD_MD_TSTATE (0x0000000800000000ULL) /* transient state field */
#define OBD_MD_FLXATTR (0x0000001000000000ULL) /* xattr */
#define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */
}
#endif
+/* 64 possible states */
+enum md_transient_state {
+ MS_RESTORE = (1 << 0), /* restore is running */
+};
+
struct mdt_body {
struct lu_fid fid1;
struct lu_fid fid2;
obd_time ctime;
__u64 blocks; /* XID, in the case of MDS_READPAGE */
__u64 ioepoch;
- __u64 unused1; /* was "ino" until 2.4.0 */
+ __u64 t_state; /* transient file state defined in
+ * enum md_transient_state
+ * was "ino" until 2.4.0 */
__u32 fsuid;
__u32 fsgid;
__u32 capability;
cl_io_fini(env, io);
if (unlikely(io->ci_need_restart))
goto again;
+ /* HSM import case: file is released, cannot be restored
+ * no need to fail except if restore registration failed
+ * with -ENODATA */
+ if (result == -ENODATA && io->ci_restore_needed &&
+ io->ci_result != -ENODATA)
+ result = 0;
cl_env_put(env, &refcheck);
RETURN(result);
}
cl_io_fini(env, io);
/* If any bit been read/written (result != 0), we just return
* short read/write instead of restart io. */
- if (result == 0 && io->ci_need_restart) {
+ if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
CDEBUG(D_VFSTRACE, "Restart %s on %s from %lld, count:%zd\n",
iot == CIT_READ ? "read" : "write",
file->f_dentry->d_name.name, *ppos, count);
}
int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
- __u64 ibits)
+ __u64 ibits)
{
- struct inode *inode = dentry->d_inode;
- int rc;
- ENTRY;
+ struct inode *inode = dentry->d_inode;
+ int rc;
+ ENTRY;
- rc = __ll_inode_revalidate_it(dentry, it, ibits);
+ rc = __ll_inode_revalidate_it(dentry, it, ibits);
if (rc != 0)
RETURN(rc);
LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
} else {
- rc = ll_glimpse_size(inode);
+ /* In case of restore, the MDT has the right size and has
+ * already send it back without granting the layout lock,
+ * inode is up-to-date so glimpse is useless.
+ * Also to glimpse we need the layout, in case of a running
+ * restore the MDT holds the layout lock so the glimpse will
+ * block up to the end of restore (getattr will block)
+ */
+ if (!(ll_i2info(inode)->lli_flags & LLIF_FILE_RESTORING))
+ rc = ll_glimpse_size(inode);
}
- RETURN(rc);
+ RETURN(rc);
}
int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
unlock_res_and_lock(lock);
/* checking lvb_ready is racy but this is okay. The worst case is
* that multi processes may configure the file on the same time. */
+
if (lvb_ready || !reconf) {
rc = -ENODATA;
if (lvb_ready) {
RETURN(rc);
}
+
+/**
+ * This function send a restore request to the MDT
+ */
+int ll_layout_restore(struct inode *inode)
+{
+ struct hsm_user_request *hur;
+ int len, rc;
+ ENTRY;
+
+ len = sizeof(struct hsm_user_request) +
+ sizeof(struct hsm_user_item);
+ OBD_ALLOC(hur, len);
+ if (hur == NULL)
+ RETURN(-ENOMEM);
+
+ hur->hur_request.hr_action = HUA_RESTORE;
+ hur->hur_request.hr_archive_id = 0;
+ hur->hur_request.hr_flags = 0;
+ memcpy(&hur->hur_user_item[0].hui_fid, &ll_i2info(inode)->lli_fid,
+ sizeof(hur->hur_user_item[0].hui_fid));
+ hur->hur_user_item[0].hui_extent.length = -1;
+ hur->hur_request.hr_itemcount = 1;
+ rc = obd_iocontrol(LL_IOC_HSM_REQUEST, cl_i2sbi(inode)->ll_md_exp,
+ len, hur, NULL);
+ OBD_FREE(hur, len);
+ RETURN(rc);
+}
+
LLIF_SRVLOCK = (1 << 5),
/* File data is modified. */
LLIF_DATA_MODIFIED = (1 << 6),
+ /* File is being restored */
+ LLIF_FILE_RESTORING = (1 << 7),
};
struct ll_inode_info {
int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
int ll_layout_refresh(struct inode *inode, __u32 *gen);
+int ll_layout_restore(struct inode *inode);
#endif /* LLITE_INTERNAL_H */
struct ll_inode_info *lli = ll_i2info(inode);
struct md_op_data *op_data = NULL;
struct md_open_data *mod = NULL;
+ bool file_is_released = false;
int rc = 0, rc1 = 0;
ENTRY;
(attr->ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET)))
op_data->op_flags = MF_EPOCH_OPEN;
+ /* truncate on a released file must failed with -ENODATA,
+ * so size must not be set on MDS for released file
+ * but other attributes must be set
+ */
+ if (S_ISREG(inode->i_mode)) {
+ struct lov_stripe_md *lsm;
+ __u32 gen;
+
+ ll_layout_refresh(inode, &gen);
+ lsm = ccc_inode_lsm_get(inode);
+ if (lsm && lsm->lsm_pattern & LOV_PATTERN_F_RELEASED)
+ file_is_released = true;
+ ccc_inode_lsm_put(inode, lsm);
+ }
+
+ /* clear size attr for released file
+ * we clear the attribute send to MDT in op_data, not the original
+ * received from caller in attr which is used later to
+ * decide return code */
+ if (file_is_released && (attr->ia_valid & ATTR_SIZE))
+ op_data->op_attr.ia_valid &= ~ATTR_SIZE;
+
rc = ll_md_setattr(dentry, op_data, &mod);
if (rc)
GOTO(out, rc);
+ /* truncate failed, others succeed */
+ if (file_is_released) {
+ if (attr->ia_valid & ATTR_SIZE)
+ GOTO(out, rc = -ENODATA);
+ else
+ GOTO(out, rc = 0);
+ }
+
/* RPC to MDT is sent, cancel data modification flag */
if (rc == 0 && (op_data->op_bias & MDS_DATA_MODIFIED)) {
spin_lock(&lli->lli_lock);
if (attr->ia_valid & (ATTR_SIZE |
ATTR_ATIME | ATTR_ATIME_SET |
- ATTR_MTIME | ATTR_MTIME_SET))
+ ATTR_MTIME | ATTR_MTIME_SET)) {
/* For truncate and utimes sending attributes to OSTs, setting
* mtime/atime to the past will be performed under PW [0:EOF]
* extent lock (new_size:EOF for truncate). It may seem
* setting times to past, but it is necessary due to possible
* time de-synchronization between MDT inode and OST objects */
rc = ll_setattr_ost(inode, attr);
+ }
EXIT;
out:
if (op_data) {
LASSERT(md->oss_capa);
ll_add_capa(inode, md->oss_capa);
}
+
+ if (body->valid & OBD_MD_TSTATE) {
+ if (body->t_state & MS_RESTORE)
+ lli->lli_flags |= LLIF_FILE_RESTORING;
+ }
}
void ll_read_inode2(struct inode *inode, void *opaque)
CLOBINVRNT(env, obj, ccc_object_invariant(obj));
- CDEBUG(D_VFSTRACE, "ignore/verify layout %d/%d, layout version %d.\n",
- io->ci_ignore_layout, io->ci_verify_layout, cio->cui_layout_gen);
+ CDEBUG(D_VFSTRACE, DFID" ignore/verify layout %d/%d, layout version %d "
+ "restore needed %d\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ io->ci_ignore_layout, io->ci_verify_layout,
+ cio->cui_layout_gen, io->ci_restore_needed);
+
+ if (io->ci_restore_needed == 1) {
+ int rc;
+
+ /* file was detected release, we need to restore it
+ * before finishing the io
+ */
+ rc = ll_layout_restore(ccc_object_inode(obj));
+ /* if restore registration failed, no restart,
+ * we will return -ENODATA */
+ /* The layout will change after restore, so we need to
+ * block on layout lock hold by the MDT
+ * as MDT will not send new layout in lvb (see LU-3124)
+ * we have to explicitly fetch it, all this will be done
+ * by ll_layout_refresh()
+ */
+ if (rc == 0) {
+ io->ci_restore_needed = 0;
+ io->ci_need_restart = 1;
+ io->ci_verify_layout = 1;
+ } else {
+ io->ci_restore_needed = 1;
+ io->ci_need_restart = 0;
+ io->ci_verify_layout = 0;
+ io->ci_result = rc;
+ }
+ }
if (!io->ci_ignore_layout && io->ci_verify_layout) {
__u32 gen = 0;
/* check layout version */
ll_layout_refresh(ccc_object_inode(obj), &gen);
io->ci_need_restart = cio->cui_layout_gen != gen;
- if (io->ci_need_restart)
- CDEBUG(D_VFSTRACE, "layout changed from %d to %d.\n",
- cio->cui_layout_gen, gen);
+ if (io->ci_need_restart) {
+ CDEBUG(D_VFSTRACE,
+ DFID" layout changed from %d to %d.\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ cio->cui_layout_gen, gen);
+ /* today successful restore is the only possible
+ * case */
+ /* restore was done, clear restoring state */
+ ll_i2info(ccc_object_inode(obj))->lli_flags &=
+ ~LLIF_FILE_RESTORING;
+ }
}
}
CLOBINVRNT(env, obj, ccc_object_invariant(obj));
ENTRY;
+ CDEBUG(D_VFSTRACE, DFID" ignore/verify layout %d/%d, layout version %d "
+ "restore needed %d\n",
+ PFID(lu_object_fid(&obj->co_lu)),
+ io->ci_ignore_layout, io->ci_verify_layout,
+ cio->cui_layout_gen, io->ci_restore_needed);
+
CL_IO_SLICE_CLEAN(cio, cui_cl);
cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops);
vio->cui_ra_window_set = 0;
cl2ccc_io(env, slice);
return vvp_env_io(env);
}
-
LASSERTF(0, "invalid type %d\n", io->ci_type);
case CIT_MISC:
case CIT_FSYNC:
- result = +1;
+ result = 1;
break;
case CIT_SETATTR:
+ /* the truncate to 0 is managed by MDT:
+ * - in open, for open O_TRUNC
+ * - in setattr, for truncate
+ */
+ /* the truncate is for size > 0 so triggers a restore */
+ if (cl_io_is_trunc(io))
+ io->ci_restore_needed = 1;
+ result = -ENODATA;
+ break;
case CIT_READ:
case CIT_WRITE:
case CIT_FAULT:
- /* TODO: need to restore the file. */
- result = -EBADF;
+ io->ci_restore_needed = 1;
+ result = -ENODATA;
break;
}
if (result == 0) {
b->blocks = 0;
/* if no object is allocated on osts, the size on mds is valid. b=22272 */
b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
- }
+ } else if ((ma->ma_valid & MA_LOV) && ma->ma_lmm &&
+ (ma->ma_lmm->lmm_pattern & LOV_PATTERN_F_RELEASED)) {
+ /* A released file stores its size on MDS. */
+ b->blocks = 0;
+ b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+ }
if (fid) {
b->fid1 = *fid;
RETURN(rc);
}
+ /* if file is released, check if a restore is running */
+ if ((ma->ma_valid & MA_HSM) && (ma->ma_hsm.mh_flags & HS_RELEASED) &&
+ mdt_hsm_restore_is_running(info, mdt_object_fid(o))) {
+ repbody->t_state = MS_RESTORE;
+ repbody->valid |= OBD_MD_TSTATE;
+ }
+
is_root = lu_fid_eq(mdt_object_fid(o), &info->mti_mdt->mdt_md_root_fid);
/* the Lustre protocol supposes to return default striping
#define MDT_HSM_FREE(ptr, size) OBD_FREE_LARGE((ptr), (size))
/*
- * fake functions, will be replace by real one with HSM Coordinator patch
+ * fake functions, will be replaced by real one with HSM Coordinator patch
*/
int mdt_hsm_copytool_send(struct obd_export *exp)
__u64 *compound_id,
int mti_attr_is_valid)
{
- return 0;
+ return -ENODATA;
}
/**
rc = 0;
MDT_HSM_FREE(hal, hal_size);
- EXIT;
+
+ GOTO(out_ucred, rc);
+
out_ucred:
mdt_exit_ucred(info);
return rc;
int mdt_hsm_ct_register(struct mdt_thread_info *info);
int mdt_hsm_ct_unregister(struct mdt_thread_info *info);
int mdt_hsm_request(struct mdt_thread_info *info);
+/* HSM restore cannot be active yet, until the coordinator
+ * patch is landed, so this function always returns false for now, but
+ * allows the other parts of the code to start checking for this.
+ */
+static inline bool mdt_hsm_restore_is_running(struct mdt_thread_info *mti,
+ const struct lu_fid *fid)
+{
+ return false;
+}
/* mdt/mdt_hsm_cdt_actions.c */
extern const struct file_operations mdt_agent_actions_fops;
void lustre_swab_mdt_body (struct mdt_body *b)
{
- lustre_swab_lu_fid (&b->fid1);
- lustre_swab_lu_fid (&b->fid2);
- /* handle is opaque */
- __swab64s (&b->valid);
- __swab64s (&b->size);
- __swab64s (&b->mtime);
- __swab64s (&b->atime);
- __swab64s (&b->ctime);
- __swab64s (&b->blocks);
- __swab64s (&b->ioepoch);
- CLASSERT(offsetof(typeof(*b), unused1) != 0);
- __swab32s (&b->fsuid);
- __swab32s (&b->fsgid);
- __swab32s (&b->capability);
- __swab32s (&b->mode);
- __swab32s (&b->uid);
- __swab32s (&b->gid);
- __swab32s (&b->flags);
- __swab32s (&b->rdev);
- __swab32s (&b->nlink);
+ lustre_swab_lu_fid(&b->fid1);
+ lustre_swab_lu_fid(&b->fid2);
+ /* handle is opaque */
+ __swab64s(&b->valid);
+ __swab64s(&b->size);
+ __swab64s(&b->mtime);
+ __swab64s(&b->atime);
+ __swab64s(&b->ctime);
+ __swab64s(&b->blocks);
+ __swab64s(&b->ioepoch);
+ __swab64s(&b->t_state);
+ __swab32s(&b->fsuid);
+ __swab32s(&b->fsgid);
+ __swab32s(&b->capability);
+ __swab32s(&b->mode);
+ __swab32s(&b->uid);
+ __swab32s(&b->gid);
+ __swab32s(&b->flags);
+ __swab32s(&b->rdev);
+ __swab32s(&b->nlink);
CLASSERT(offsetof(typeof(*b), unused2) != 0);
- __swab32s (&b->suppgid);
- __swab32s (&b->eadatasize);
- __swab32s (&b->aclsize);
- __swab32s (&b->max_mdsize);
- __swab32s (&b->max_cookiesize);
- __swab32s (&b->uid_h);
- __swab32s (&b->gid_h);
- CLASSERT(offsetof(typeof(*b), padding_5) != 0);
+ __swab32s(&b->suppgid);
+ __swab32s(&b->eadatasize);
+ __swab32s(&b->aclsize);
+ __swab32s(&b->max_mdsize);
+ __swab32s(&b->max_cookiesize);
+ __swab32s(&b->uid_h);
+ __swab32s(&b->gid_h);
+ CLASSERT(offsetof(typeof(*b), padding_5) != 0);
}
EXPORT_SYMBOL(lustre_swab_mdt_body);
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
{
/* Wire protocol assertions generated by 'wirecheck'
* (make -C lustre/utils newwiretest)
- * running on Linux deva 2.6.32.279.lustre #5 SMP Tue Apr 9 22:52:17 CST 2013 x86_64 x86_64 x
- * with gcc version 4.4.4 20100726 (Red Hat 4.4.4-13) (GCC) */
+ * running on Linux centos6-bis 2.6.32-358.0.1.el6-head #3 SMP Wed Apr 17 17:37:43 CEST 2013
+ * with gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC) */
/* Constants... */
OBD_MD_REINT);
LASSERTF(OBD_MD_MEA == (0x0000000400000000ULL), "found 0x%.16llxULL\n",
OBD_MD_MEA);
+ LASSERTF(OBD_MD_TSTATE == (0x0000000800000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_TSTATE);
LASSERTF(OBD_MD_FLXATTR == (0x0000001000000000ULL), "found 0x%.16llxULL\n",
OBD_MD_FLXATTR);
LASSERTF(OBD_MD_FLXATTRLS == (0x0000002000000000ULL), "found 0x%.16llxULL\n",
(long long)(int)offsetof(struct mdt_body, blocks));
LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n",
(long long)(int)sizeof(((struct mdt_body *)0)->blocks));
- LASSERTF((int)offsetof(struct mdt_body, unused1) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, unused1));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->unused1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->unused1));
+ LASSERTF((int)offsetof(struct mdt_body, t_state) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, t_state));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->t_state) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->t_state));
LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n",
(long long)(int)offsetof(struct mdt_body, fsuid));
LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n",
[ $stripe_count -eq 2 ] || error "stripe count not 2 ($stripe_count)"
stat $DIR/$tfile || error "failed to stat released file"
- # Truncate should fail.
$TRUNCATE $DIR/$tfile 200000 &&
error "truncate of released file should fail"
stripe_count=$($GETSTRIPE -c $DIR/$tfile) || error "getstripe failed"
[ $stripe_count -eq 2 ] || error "after trunc: ($stripe_count)"
+ chown $RUNAS_ID $DIR/$tfile ||
+ error "chown $RUNAS_ID $DIR/$tfile failed"
+
+ chgrp $RUNAS_ID $DIR/$tfile ||
+ error "chgrp $RUNAS_ID $DIR/$tfile failed"
+
+ touch $DIR/$tfile ||
+ error "touch $DIR/$tfile failed"
+
rm $DIR/$tfile || error "failed to remove released file"
}
-run_test 229 "getstripe/stat/rm work on released files (stripe count = 2)"
+run_test 229 "getstripe/stat/rm/attr changes work on released files"
test_230a() {
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
CHECK_DEFINE_64X(OBD_MD_MDS);
CHECK_DEFINE_64X(OBD_MD_REINT);
CHECK_DEFINE_64X(OBD_MD_MEA);
+ CHECK_DEFINE_64X(OBD_MD_TSTATE);
CHECK_DEFINE_64X(OBD_MD_FLXATTR);
CHECK_DEFINE_64X(OBD_MD_FLXATTRLS);
CHECK_DEFINE_64X(OBD_MD_FLXATTRRM);
CHECK_MEMBER(mdt_body, atime);
CHECK_MEMBER(mdt_body, ctime);
CHECK_MEMBER(mdt_body, blocks);
- CHECK_MEMBER(mdt_body, unused1);
+ CHECK_MEMBER(mdt_body, t_state);
CHECK_MEMBER(mdt_body, fsuid);
CHECK_MEMBER(mdt_body, fsgid);
CHECK_MEMBER(mdt_body, capability);
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2013, Intel Corporation.
+ * Copyright (c) 2011, 2012, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
{
/* Wire protocol assertions generated by 'wirecheck'
* (make -C lustre/utils newwiretest)
- * running on Linux deva 2.6.32.279.lustre #5 SMP Tue Apr 9 22:52:17 CST 2013 x86_64 x86_64 x
- * with gcc version 4.4.4 20100726 (Red Hat 4.4.4-13) (GCC) */
+ * running on Linux centos6-bis 2.6.32-358.0.1.el6-head #3 SMP Wed Apr 17 17:37:43 CEST 2013
+ * with gcc version 4.4.6 20110731 (Red Hat 4.4.6-3) (GCC) */
/* Constants... */
OBD_MD_REINT);
LASSERTF(OBD_MD_MEA == (0x0000000400000000ULL), "found 0x%.16llxULL\n",
OBD_MD_MEA);
+ LASSERTF(OBD_MD_TSTATE == (0x0000000800000000ULL), "found 0x%.16llxULL\n",
+ OBD_MD_TSTATE);
LASSERTF(OBD_MD_FLXATTR == (0x0000001000000000ULL), "found 0x%.16llxULL\n",
OBD_MD_FLXATTR);
LASSERTF(OBD_MD_FLXATTRLS == (0x0000002000000000ULL), "found 0x%.16llxULL\n",
(long long)(int)offsetof(struct mdt_body, blocks));
LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n",
(long long)(int)sizeof(((struct mdt_body *)0)->blocks));
- LASSERTF((int)offsetof(struct mdt_body, unused1) == 96, "found %lld\n",
- (long long)(int)offsetof(struct mdt_body, unused1));
- LASSERTF((int)sizeof(((struct mdt_body *)0)->unused1) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct mdt_body *)0)->unused1));
+ LASSERTF((int)offsetof(struct mdt_body, t_state) == 96, "found %lld\n",
+ (long long)(int)offsetof(struct mdt_body, t_state));
+ LASSERTF((int)sizeof(((struct mdt_body *)0)->t_state) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct mdt_body *)0)->t_state));
LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n",
(long long)(int)offsetof(struct mdt_body, fsuid));
LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n",