#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
#define ATTR_CTIME_SET 0x2000
+#define ATTR_BLOCKS 0x4000
struct iattr {
unsigned int ia_valid;
time_t ia_ctime;
unsigned int ia_attr_flags;
};
-#define ll_iattr_struct iattr
+
+#define ll_iattr iattr
#define IT_OPEN 0x0001
#define IT_CREAT 0x0002
#include <linux/lustre_patchless_compat.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
-struct ll_iattr_struct {
+struct ll_iattr {
struct iattr iattr;
unsigned int ia_attr_flags;
};
#else
-#define ll_iattr_struct iattr
+#define ll_iattr iattr
#endif
#ifndef HAVE_SET_FS_PWD
#define ll_set_fs_pwd set_fs_pwd
#endif
+#define ATTR_BLOCKS 0x4000
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15)
#define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0)
#define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0)
/* obdo.c */
#ifdef __KERNEL__
-void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid);
-void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid);
void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
void obdo_from_la(struct obdo *dst, struct lu_attr *la, obd_flag valid);
void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
struct md_op_data {
struct lu_fid fid1;
struct lu_fid fid2;
+ struct lustre_handle handle;
__u64 mod_time;
const char *name;
int namelen;
__u32 suppgids[2];
__u32 fsuid;
__u32 fsgid;
- /* part of obdo fields for md stack */
- obd_valid valid;
- obd_size size;
- obd_blocks blocks;
- obd_flag flags;
- obd_time mtime;
- obd_time atime;
- obd_time ctime;
+
+ /* iattr fields and blocks. */
+ struct iattr attr;
+#ifdef __KERNEL__
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
+ unsigned int attr_flags;
+#endif
+#endif
+ loff_t attr_blocks;
+
+ /* Size-on-MDS epoch and flags. */
+ __u64 ioepoch;
+ __u32 flags;
};
#define MDS_MODE_DONT_LOCK (1 << 30)
#define OBD_MD_FLGROUP (0x01000000ULL) /* group */
#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */
#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write easize is epoch */
+ /* ->mds if epoch opens or closes */
#define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */
#define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */
#define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */
extern void lustre_swab_mds_status_req (struct mds_status_req *r);
-#define MDS_BFLAG_UNCOMMITTED_WRITES 0x1
+/* mdt_thread_info.mti_flags. */
+enum mdt_ioepoch_flags {
+ /* The flag indicates Size-on-MDS attributes are changed. */
+ MF_SOM_CHANGE = (1 << 0),
+ /* Flags indicates an epoch opens or closes. */
+ MF_EPOCH_OPEN = (1 << 1),
+ MF_EPOCH_CLOSE = (1 << 2),
+};
+
#define MDS_BFLAG_EXT_FLAGS 0x80000000 /* == EXT3_RESERVED_FL */
/* these should be identical to their EXT3_*_FL counterparts, and are
__u64 atime;
__u64 ctime;
__u64 blocks; /* XID, in the case of MDS_READPAGE */
- __u64 io_epoch;
+ __u64 ioepoch;
__u32 fsuid;
__u32 fsgid;
__u32 capability;
extern void lustre_swab_mds_body (struct mds_body *b);
extern void lustre_swab_mdt_body (struct mdt_body *b);
+struct mdt_epoch {
+ struct lustre_handle handle;
+ __u64 ioepoch;
+ __u32 flags;
+};
+
+extern void lustre_swab_mdt_epoch (struct mdt_body *b);
+
struct lustre_md {
struct mdt_body *body;
struct lov_stripe_md *lsm;
struct lu_fid sa_fid;
__u64 sa_valid;
__u64 sa_size;
+ __u64 sa_blocks;
__u64 sa_mtime;
__u64 sa_atime;
__u64 sa_ctime;
#define FMODE_READ 00000001
#define FMODE_WRITE 00000002
#endif
+
+#define FMODE_EPOCH 01000000
+#define FMODE_EPOCHLCK 02000000
+#define FMODE_SOM 04000000
+#define FMODE_CLOSED 0
+
#define MDS_FMODE_EXEC 00000004
#define MDS_OPEN_CREAT 00000100
#define MDS_OPEN_EXCL 00000200
struct llog_size_change_rec {
struct llog_rec_hdr lsc_hdr;
struct ll_fid lsc_fid;
- __u32 lsc_io_epoch;
+ __u32 lsc_ioepoch;
__u32 padding;
struct llog_rec_tail lsc_tail;
} __attribute__((packed));
/* Add a handle to the hash table */
void class_handle_hash(struct portals_handle *, portals_handle_addref_cb);
void class_handle_unhash(struct portals_handle *);
+void class_handle_hash_back(struct portals_handle *);
void *class_handle2object(__u64 cookie);
int class_handle_init(void);
void class_handle_cleanup(void);
extern const struct req_msg_field RMF_PTLRPC_BODY;
extern const struct req_msg_field RMF_MDT_BODY;
+extern const struct req_msg_field RMF_MDT_EPOCH;
extern const struct req_msg_field RMF_OBD_STATFS;
extern const struct req_msg_field RMF_NAME;
extern const struct req_msg_field RMF_SYMTGT;
int (*m_create)(struct obd_export *, struct md_op_data *,
const void *, int, int, __u32, __u32, __u32,
__u64, struct ptlrpc_request **);
- int (*m_done_writing)(struct obd_export *, struct md_op_data *);
+ int (*m_done_writing)(struct obd_export *, struct md_op_data *,
+ struct obd_client_handle *);
int (*m_enqueue)(struct obd_export *, int, struct lookup_intent *,
int, struct md_op_data *, struct lustre_handle *,
void *, int, ldlm_completion_callback,
int (*m_rename)(struct obd_export *, struct md_op_data *,
const char *, int, const char *, int,
struct ptlrpc_request **);
- int (*m_setattr)(struct obd_export *, struct md_op_data *,
- struct iattr *, void *, int , void *, int,
- struct ptlrpc_request **);
+ int (*m_setattr)(struct obd_export *, struct md_op_data *, void *,
+ int , void *, int, struct ptlrpc_request **);
int (*m_sync)(struct obd_export *, const struct lu_fid *,
struct ptlrpc_request **);
int (*m_readpage)(struct obd_export *, const struct lu_fid *,
void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare);
void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
-
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr,
+ unsigned int ia_valid);
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid);
+void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, obd_flag valid);
+void obdo_from_md(struct obdo *oa, struct md_op_data *op_data,
+ unsigned int valid);
#define OBT(dev) (dev)->obd_type
#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->o_ ## op
}
static inline int md_done_writing(struct obd_export *exp,
- struct md_op_data *op_data)
+ struct md_op_data *op_data,
+ struct obd_client_handle *och)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, done_writing);
MD_COUNTER_INCREMENT(exp->exp_obd, done_writing);
- rc = MDP(exp->exp_obd, done_writing)(exp, op_data);
+ rc = MDP(exp->exp_obd, done_writing)(exp, op_data, och);
RETURN(rc);
}
}
static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
- struct iattr *iattr, void *ea, int ealen,
- void *ea2, int ea2len, struct ptlrpc_request **request)
+ void *ea, int ealen, void *ea2, int ea2len,
+ struct ptlrpc_request **request)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, setattr);
MD_COUNTER_INCREMENT(exp->exp_obd, setattr);
- rc = MDP(exp->exp_obd, setattr)(exp, op_data, iattr, ea, ealen,
+ rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen,
ea2, ea2len, request);
RETURN(rc);
}
return rc;
}
+int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh)
+{
+ struct llu_inode_info *lli = llu_i2info(inode);
+ struct md_op_data op_data;
+ struct obdo oa;
+ int rc;
+ ENTRY;
+
+ LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK));
+
+ rc = llu_inode_getattr(inode, &oa);
+ if (rc) {
+ CERROR("inode_getattr failed (%d): unable to send a "
+ "Size-on-MDS attribute update for inode %llu/%lu\n",
+ rc, (long long)llu_i2stat(inode)->st_ino,
+ lli->lli_st_generation);
+ RETURN(rc);
+ }
+
+ md_from_obdo(&op_data, &oa, oa.o_valid);
+ memcpy(&op_data.handle, fh, sizeof(*fh));
+ op_data.flags |= MF_SOM_CHANGE;
+
+ rc = llu_md_setattr(inode, &op_data);
+ RETURN(rc);
+}
+
int llu_md_close(struct obd_export *md_exp, struct inode *inode)
{
struct llu_inode_info *lli = llu_i2info(inode);
&fd->fd_cwlockh);
}
- memset(&op_data, 0, sizeof(op_data));
- op_data.fid1 = lli->lli_fid;
- op_data.valid = OBD_MD_FLTYPE | OBD_MD_FLMODE |
- OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
- OBD_MD_FLATIME | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME;
-
- op_data.atime = LTIME_S(st->st_atime);
- op_data.mtime = LTIME_S(st->st_mtime);
- op_data.ctime = LTIME_S(st->st_ctime);
- op_data.size = st->st_size;
- op_data.blocks = st->st_blocks;
- op_data.flags = lli->lli_st_flags;
-
- if (test_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags))
- op_data.valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-
- if (0 /* ll_is_inode_dirty(inode) */) {
- op_data.flags = MDS_BFLAG_UNCOMMITTED_WRITES;
- op_data.valid |= OBD_MD_FLFLAGS;
+ op_data.attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
+ ATTR_MTIME_SET | ATTR_CTIME_SET;
+
+ if (!S_ISREG(llu_i2stat(inode)->st_mode)) {
+ op_data.attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+ } else {
+ /* Inode cannot be dirty. Close the epoch. */
+ op_data.flags |= MF_EPOCH_CLOSE;
+ /* XXX: Send CHANGE flag only if Size-on-MDS inode attributes
+ * are really changed. */
+ op_data.flags |= MF_SOM_CHANGE;
+
+ /* Pack Size-on-MDS attrinodes if valid. */
+ if ((lli->lli_flags & LLIF_MDS_SIZE_LOCK) ||
+ !llu_local_size(inode))
+ op_data.attr.ia_valid |=
+ OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
}
+
+ op_data.fid1 = lli->lli_fid;
+ op_data.attr.ia_atime = st->st_atime;
+ op_data.attr.ia_mtime = st->st_mtime;
+ op_data.attr.ia_ctime = st->st_ctime;
+ op_data.attr.ia_size = st->st_size;
+ op_data.attr_blocks = st->st_blocks;
+ op_data.attr.ia_attr_flags = lli->lli_st_flags;
+ op_data.ioepoch = lli->lli_ioepoch;
+ memcpy(&op_data.handle, &och->och_fh, sizeof(op_data.handle));
+
rc = md_close(md_exp, &op_data, och, &req);
if (rc == EAGAIN) {
/* We are the last writer, so the MDS has instructed us to get
* the file size and any write cookies, then close again. */
- //ll_queue_done_writing(inode);
- rc = 0;
+ rc = llu_sizeonmds_update(inode, &och->och_fh);
+ if (rc) {
+ CERROR("inode %llu mdc Size-on-MDS update failed: "
+ "rc = %d\n", (long long)st->st_ino, rc);
+ rc = 0;
+ }
} else if (rc) {
CERROR("inode %llu close failed: rc %d\n",
(long long)st->st_ino, rc);
#define LL_SBI_NOLCK 0x1
-#define LLI_F_HAVE_OST_SIZE_LOCK 0
-#define LLI_F_HAVE_MDS_SIZE_LOCK 1
+enum lli_flags {
+ /* MDS has an authority for the Size-on-MDS attributes. */
+ LLIF_MDS_SIZE_LOCK = (1 << 0),
+};
struct llu_inode_info {
struct llu_sb_info *lli_sbi;
struct semaphore lli_open_sem;
__u64 lli_maxbytes;
unsigned long lli_flags;
+ __u64 lli_ioepoch;
/* for libsysio */
struct file_identifier lli_sysio_fid;
void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
int ll_it_open_error(int phase, struct lookup_intent *it);
struct inode *llu_iget(struct filesys *fs, struct lustre_md *md);
-int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm);
+int llu_inode_getattr(struct inode *inode, struct obdo *obdo);
+int llu_md_setattr(struct inode *inode, struct md_op_data *op_data);
int llu_setattr_raw(struct inode *inode, struct iattr *attr);
extern struct fssw_ops llu_fssw_ops;
int llu_iop_open(struct pnode *pnode, int flags, mode_t mode);
int llu_md_close(struct obd_export *md_exp, struct inode *inode);
int llu_file_release(struct inode *inode);
+int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh);
int llu_iop_close(struct inode *inode);
_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off);
int llu_vmtruncate(struct inode * inode, loff_t offset, obd_flag obd_flags);
int llu_iop_read(struct inode *ino, struct ioctx *ioctxp);
int llu_iop_write(struct inode *ino, struct ioctx *ioctxp);
int llu_iop_iodone(struct ioctx *ioctxp);
+int llu_local_size(struct inode *inode);
int llu_glimpse_size(struct inode *inode);
int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
struct lov_stripe_md *lsm, int mode,
struct llu_inode_info *lli;
struct intnl_stat *st;
__u64 bits = lock->l_policy_data.l_inodebits.bits;
+ struct lu_fid *fid;
/* Invalidate all dentries associated with this inode */
if (inode == NULL)
st = llu_i2stat(inode);
if (bits & MDS_INODELOCK_UPDATE)
- clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
-
- if (lock->l_resource->lr_name.name[0] != fid_seq(&lli->lli_fid) ||
- lock->l_resource->lr_name.name[1] != fid_oid(&lli->lli_fid) ||
- lock->l_resource->lr_name.name[2] != fid_ver(&lli->lli_fid)) {
- LDLM_ERROR(lock, "data mismatch with ino %llu/%llu/%llu",
- (long long)fid_seq(&lli->lli_fid),
- (long long)fid_oid(&lli->lli_fid),
- (long long)fid_ver(&lli->lli_fid));
+ lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
+
+ fid = &lli->lli_fid;
+ if (lock->l_resource->lr_name.name[0] != fid_seq(fid) ||
+ lock->l_resource->lr_name.name[1] != fid_oid(fid) ||
+ lock->l_resource->lr_name.name[2] != fid_ver(fid)) {
+ LDLM_ERROR(lock,"data mismatch with ino %llu/%llu/%llu",
+ (long long)fid_seq(fid),
+ (long long)fid_oid(fid),
+ (long long)fid_ver(fid));
}
if (S_ISDIR(st->st_mode) &&
(bits & MDS_INODELOCK_UPDATE)) {
return rc;
}
+static void llu_merge_lvb(struct inode *inode)
+{
+ struct llu_inode_info *lli = llu_i2info(inode);
+ struct llu_sb_info *sbi = llu_i2sbi(inode);
+ struct intnl_stat *st = llu_i2stat(inode);
+ struct ost_lvb lvb;
+ ENTRY;
+
+ inode_init_lvb(inode, &lvb);
+ obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
+ st->st_size = lvb.lvb_size;
+ st->st_blocks = lvb.lvb_blocks;
+ st->st_mtime = lvb.lvb_mtime;
+ st->st_atime = lvb.lvb_atime;
+ st->st_ctime = lvb.lvb_ctime;
+ EXIT;
+}
+
+int llu_local_size(struct inode *inode)
+{
+ ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } };
+ struct llu_inode_info *lli = llu_i2info(inode);
+ struct llu_sb_info *sbi = llu_i2sbi(inode);
+ struct lustre_handle lockh = { 0 };
+ int flags = 0;
+ int rc;
+ ENTRY;
+
+ if (lli->lli_smd->lsm_stripe_count == 0)
+ RETURN(0);
+
+ rc = obd_match(sbi->ll_dt_exp, lli->lli_smd, LDLM_EXTENT,
+ &policy, LCK_PR | LCK_PW, &flags, inode, &lockh);
+ if (rc < 0)
+ RETURN(rc);
+ else if (rc == 0)
+ RETURN(-ENODATA);
+
+ llu_merge_lvb(inode);
+ obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR, &lockh);
+ RETURN(0);
+}
+
/* NB: lov_merge_size will prefer locally cached writes if they extend the
* file (because it prefers KMS over RSS when larger) */
int llu_glimpse_size(struct inode *inode)
struct lustre_handle lockh = { 0 };
struct obd_enqueue_info einfo = { 0 };
struct obd_info oinfo = { { { 0 } } };
- struct ost_lvb lvb;
int rc;
ENTRY;
+ /* If size is cached on the mds, skip glimpse. */
+ if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
+ RETURN(0);
+
CDEBUG(D_DLMTRACE, "Glimpsing inode %llu\n", (long long)st->st_ino);
if (!lli->lli_smd) {
RETURN(rc > 0 ? -EIO : rc);
}
- inode_init_lvb(inode, &lvb);
- obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
- st->st_size = lvb.lvb_size;
- st->st_blocks = lvb.lvb_blocks;
- st->st_mtime = lvb.lvb_mtime;
- st->st_atime = lvb.lvb_atime;
- st->st_ctime = lvb.lvb_ctime;
-
+ llu_merge_lvb(inode);
CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n",
(long long)st->st_size, (long long)st->st_blocks);
/*
* really does the getattr on the inode and updates its fields
*/
-int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
+int llu_inode_getattr(struct inode *inode, struct obdo *obdo)
{
struct llu_inode_info *lli = llu_i2info(inode);
- struct obd_export *exp = llu_i2obdexp(inode);
struct ptlrpc_request_set *set;
+ struct lov_stripe_md *lsm = lli->lli_smd;
struct obd_info oinfo = { { { 0 } } };
- struct obdo oa = { 0 };
- obd_flag refresh_valid;
int rc;
ENTRY;
LASSERT(lsm);
- LASSERT(lli);
oinfo.oi_md = lsm;
- oinfo.oi_oa = &oa;
- oa.o_id = lsm->lsm_object_id;
- oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
- OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME;
+ oinfo.oi_oa = obdo;
+ oinfo.oi_oa->o_id = lsm->lsm_object_id;
+ oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
+ oinfo.oi_oa->o_mode = S_IFREG;
+ oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
+ OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
+ OBD_MD_FLCTIME;
set = ptlrpc_prep_set();
if (set == NULL) {
CERROR ("ENOMEM allocing request set\n");
rc = -ENOMEM;
} else {
- rc = obd_getattr_async(exp, &oinfo, set);
+ rc = obd_getattr_async(llu_i2obdexp(inode), &oinfo, set);
if (rc == 0)
rc = ptlrpc_set_wait(set);
ptlrpc_set_destroy(set);
if (rc)
RETURN(rc);
- refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME | OBD_MD_FLSIZE;
-
- obdo_refresh_inode(inode, &oa, refresh_valid);
+ oinfo.oi_oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLSIZE;
+ obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid);
+ CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %Lu, "
+ "blksize %Lu\n", lli->lli_smd->lsm_object_id,
+ (long long unsigned)llu_i2stat(inode)->st_size,
+ (long long unsigned)llu_i2stat(inode)->st_blocks,
+ (long long unsigned)llu_i2stat(inode)->st_blksize);
RETURN(0);
}
llu_update_inode(inode, md.body, md.lsm);
if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm)
obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
-
if (md.body->valid & OBD_MD_FLSIZE)
- set_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
- &llu_i2info(inode)->lli_flags);
+ llu_i2info(inode)->lli_flags |= LLIF_MDS_SIZE_LOCK;
ptlrpc_req_finished(req);
}
(long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
inode);
- clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags));
+ lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
null_if_equal, inode);
return error;
}
+int llu_md_setattr(struct inode *inode, struct md_op_data *op_data)
+{
+ struct lustre_md md;
+ struct llu_sb_info *sbi = llu_i2sbi(inode);
+ struct ptlrpc_request *request = NULL;
+ int rc;
+ ENTRY;
+
+ llu_prepare_md_op_data(op_data, inode, NULL, NULL, 0, 0);
+ rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &request);
+ OBD_FREE_PTR(op_data);
+
+ if (rc) {
+ ptlrpc_req_finished(request);
+ if (rc != -EPERM && rc != -EACCES)
+ CERROR("md_setattr fails: rc = %d\n", rc);
+ RETURN(rc);
+ }
+
+ rc = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF,
+ sbi->ll_dt_exp, sbi->ll_md_exp, &md);
+ if (rc) {
+ ptlrpc_req_finished(request);
+ RETURN(rc);
+ }
+
+ /* We call inode_setattr to adjust timestamps.
+ * If there is at least some data in file, we cleared ATTR_SIZE
+ * above to avoid invoking vmtruncate, otherwise it is important
+ * to call vmtruncate in inode_setattr to update inode->i_size
+ * (bug 6196) */
+ inode_setattr(inode, &op_data->attr);
+ llu_update_inode(inode, md.body, md.lsm);
+ ptlrpc_req_finished(request);
+
+ RETURN(rc);
+}
+
+/* Close IO epoch and send Size-on-MDS attribute update. */
+static int llu_setattr_done_writing(struct inode *inode,
+ struct md_op_data *op_data)
+{
+ struct llu_inode_info *lli = llu_i2info(inode);
+ struct intnl_stat *st = llu_i2stat(inode);
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(op_data != NULL);
+ if (!S_ISREG(st->st_mode))
+ RETURN(0);
+
+ /* XXX: pass och here for the recovery purpose. */
+ CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n",
+ op_data->ioepoch, PFID(&lli->lli_fid));
+
+ op_data->flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
+ rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, NULL);
+ if (rc == EAGAIN) {
+ /* MDS has instructed us to obtain Size-on-MDS attribute
+ * from OSTs and send setattr to back to MDS. */
+ rc = llu_sizeonmds_update(inode, &op_data->handle);
+ } else if (rc) {
+ CERROR("inode %llu mdc truncate failed: rc = %d\n",
+ st->st_ino, rc);
+ }
+ RETURN(rc);
+}
+
/* If this inode has objects allocated to it (lsm != NULL), then the OST
* object(s) determine the file size and mtime. Otherwise, the MDS will
* keep these values until such a time that objects are allocated for it.
struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
struct llu_sb_info *sbi = llu_i2sbi(inode);
struct intnl_stat *st = llu_i2stat(inode);
- struct ptlrpc_request *request = NULL;
int ia_valid = attr->ia_valid;
- struct md_op_data op_data;
+ struct md_op_data op_data = { { 0 } };
int rc = 0;
ENTRY;
CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
LTIME_S(CURRENT_TIME));
+
+ /* NB: ATTR_SIZE will only be set after this point if the size
+ * resides on the MDS, ie, this file has no objects. */
if (lsm)
attr->ia_valid &= ~ATTR_SIZE;
/* If only OST attributes being set on objects, don't do MDS RPC.
* In that case, we need to check permissions and update the local
* inode ourselves so we can call obdo_from_inode() always. */
- if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
- struct lustre_md md;
-
- llu_prepare_md_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
- rc = md_setattr(sbi->ll_md_exp, &op_data,
- attr, NULL, 0, NULL, 0, &request);
+ if (ia_valid & (lsm ? ~(ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
+ memcpy(&op_data.attr, attr, sizeof(*attr));
- if (rc) {
- ptlrpc_req_finished(request);
- if (rc != -EPERM && rc != -EACCES)
- CERROR("md_setattr fails: rc = %d\n", rc);
- RETURN(rc);
- }
-
- rc = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF,
- sbi->ll_dt_exp, sbi->ll_md_exp, &md);
- if (rc) {
- ptlrpc_req_finished(request);
+ /* Open epoch for truncate. */
+ if (ia_valid & ATTR_SIZE)
+ op_data.flags = MF_EPOCH_OPEN;
+ rc = llu_md_setattr(inode, &op_data);
+ if (rc)
RETURN(rc);
- }
-
- /* We call inode_setattr to adjust timestamps.
- * If there is at least some data in file, we cleared ATTR_SIZE
- * above to avoid invoking vmtruncate, otherwise it is important
- * to call vmtruncate in inode_setattr to update inode->i_size
- * (bug 6196) */
- inode_setattr(inode, attr);
- llu_update_inode(inode, md.body, md.lsm);
- ptlrpc_req_finished(request);
if (!lsm || !S_ISREG(st->st_mode)) {
CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
- RETURN(0);
+ if (op_data.ioepoch)
+ rc = llu_setattr_done_writing(inode, &op_data);
+ RETURN(rc);
}
} else {
/* The OST doesn't check permissions, but the alternative is
}
}
+
/* Won't invoke llu_vmtruncate(), as we already cleared
* ATTR_SIZE */
inode_setattr(inode, attr);
if (!rc)
rc = err;
}
+
+ if (op_data.ioepoch)
+ rc = llu_setattr_done_writing(inode, &op_data);
} else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
struct obd_info oinfo = { { { 0 } } };
struct obdo oa;
struct llu_sb_info *sbi = llu_i2sbi(ino);
struct ptlrpc_request *request = NULL;
struct md_op_data op_data;
- struct iattr attr = { 0 };
struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
int rc = 0;
lustre_swab_lov_user_md(&lum);
/* swabbing is done in lov_setstripe() on server side */
- rc = md_setattr(sbi->ll_md_exp, &op_data,
- &attr, &lum, sizeof(lum), NULL, 0, &request);
+ rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
+ sizeof(lum), NULL, 0, &request);
if (rc) {
ptlrpc_req_finished(request);
if (rc != -EPERM && rc != -EACCES)
struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
struct ptlrpc_request *request = NULL;
struct md_op_data *op_data;
- struct iattr attr = { 0 };
int rc = 0;
OBD_ALLOC_PTR(op_data);
lustre_swab_lov_user_md(&lum);
/* swabbing is done in lov_setstripe() on server side */
- rc = md_setattr(sbi->ll_md_exp, op_data,
- &attr, &lum, sizeof(lum), NULL, 0, &request);
+ rc = md_setattr(sbi->ll_md_exp, op_data, &lum,
+ sizeof(lum), NULL, 0, &request);
if (rc) {
if (rc != -EPERM && rc != -EACCES)
CERROR("md_setattr fails: rc = %d\n", rc);
OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof *fd);
}
+void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
+ struct lustre_handle *fh)
+{
+ op_data->fid1 = ll_i2info(inode)->lli_fid;
+ op_data->attr.ia_atime = inode->i_atime;
+ op_data->attr.ia_mtime = inode->i_mtime;
+ op_data->attr.ia_ctime = inode->i_ctime;
+ op_data->attr.ia_size = inode->i_size;
+ op_data->attr_blocks = inode->i_blocks;
+ ((struct ll_iattr *)&op_data->attr)->ia_attr_flags = inode->i_flags;
+ op_data->ioepoch = ll_i2info(inode)->lli_ioepoch;
+ memcpy(&op_data->handle, fh, sizeof(op_data->handle));
+}
+
+static void ll_prepare_close(struct inode *inode, struct md_op_data *op_data,
+ struct obd_client_handle *och)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ ENTRY;
+
+ op_data->attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
+ ATTR_MTIME_SET | ATTR_CTIME_SET;
+
+ if (!S_ISREG(inode->i_mode)) {
+ op_data->attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+ goto out;
+ }
+
+ spin_lock(&lli->lli_lock);
+ if (!(list_empty(&lli->lli_pending_write_llaps)) &&
+ !(lli->lli_flags & LLIF_EPOCH_PENDING)) {
+ LASSERT(lli->lli_pending_och == NULL);
+ /* Inode is dirty and there is no pending write done request
+ * yet, DONE_WRITE is to be sent later. */
+ lli->lli_flags |= LLIF_EPOCH_PENDING;
+ lli->lli_pending_och = och;
+ } else {
+ ll_epoch_close(inode, op_data);
+ }
+ spin_unlock(&lli->lli_lock);
+
+out:
+ ll_pack_inode2opdata(inode, op_data, &och->och_fh);
+ EXIT;
+}
+
static int ll_close_inode_openhandle(struct obd_export *md_exp,
struct inode *inode,
struct obd_client_handle *och)
struct md_op_data *op_data;
struct ptlrpc_request *req = NULL;
struct obd_device *obd;
+ int epoch_close = 1;
int rc;
ENTRY;
if (op_data == NULL)
RETURN(-ENOMEM);
- op_data->fid1 = ll_i2info(inode)->lli_fid;
- op_data->valid = OBD_MD_FLTYPE | OBD_MD_FLMODE |
- OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
- OBD_MD_FLATIME | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME;
-
- op_data->atime = LTIME_S(inode->i_atime);
- op_data->mtime = LTIME_S(inode->i_mtime);
- op_data->ctime = LTIME_S(inode->i_ctime);
- op_data->size = inode->i_size;
- op_data->blocks = inode->i_blocks;
- op_data->flags = inode->i_flags;
-
- if (0 /* ll_is_inode_dirty(inode) */) {
- op_data->flags = MDS_BFLAG_UNCOMMITTED_WRITES;
- op_data->valid |= OBD_MD_FLFLAGS;
- }
-
+ ll_prepare_close(inode, op_data, och);
+ epoch_close = (op_data->flags & MF_EPOCH_CLOSE);
rc = md_close(md_exp, op_data, och, &req);
- OBD_FREE_PTR(op_data);
if (rc == EAGAIN) {
- /* We are the last writer, so the MDS has instructed us to get
- * the file size and any write cookies, then close again. */
- //ll_queue_done_writing(inode);
- rc = 0;
+ /* This close must have closed the epoch. */
+ LASSERT(epoch_close);
+ /* MDS has instructed us to obtain Size-on-MDS attribute from
+ * OSTs and send setattr to back to MDS. */
+ rc = ll_sizeonmds_update(inode, &och->och_fh);
+ if (rc) {
+ CERROR("inode %lu mdc Size-on-MDS update failed: "
+ "rc = %d\n", inode->i_ino, rc);
+ rc = 0;
+ }
} else if (rc) {
CERROR("inode %lu mdc close failed: rc = %d\n",
inode->i_ino, rc);
+ } else if (!epoch_close) {
+ ll_queue_done_writing(inode);
}
+ OBD_FREE_PTR(op_data);
if (rc == 0) {
rc = ll_objects_destroy(req, inode);
EXIT;
out:
md_clear_open_replay_data(md_exp, och);
+ if (epoch_close)
+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
return rc;
}
already */
rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
inode, och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
- OBD_FREE(och, sizeof *och);
+ /* Do not free @och is it is waiting for DONE_WRITING. */
+ if (och->och_fh.cookie == DEAD_HANDLE_MAGIC)
+ OBD_FREE(och, sizeof *och);
}
RETURN(rc);
memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
och->och_fid = &lli->lli_fid;
- lli->lli_io_epoch = body->io_epoch;
+ lli->lli_ioepoch = body->ioepoch;
md_set_open_replay_data(md_exp, och, req);
}
struct ll_file_data *fd, struct obd_client_handle *och)
{
struct inode *inode = file->f_dentry->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
ENTRY;
LASSERT(!LUSTRE_FPRIVATE(file));
LASSERT(fd != NULL);
- if (och)
- ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
- ll_i2info(inode), it, och);
+ if (och) {
+ struct ptlrpc_request *req = it->d.lustre.it_data;
+ struct mdt_body *body;
+ ll_och_fill(ll_i2sbi(inode)->ll_md_exp, lli, it, och);
+
+ body = lustre_msg_buf(req->rq_repmsg,
+ DLM_REPLY_REC_OFF, sizeof(*body));
+
+ if ((it->it_flags & FMODE_WRITE) &&
+ (body->valid & OBD_MD_FLSIZE))
+ {
+ CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID"\n",
+ lli->lli_ioepoch, PFID(&lli->lli_fid));
+ }
+ }
+
LUSTRE_FPRIVATE(file) = fd;
ll_readahead_init(inode, &fd->fd_ras);
fd->fd_omode = it->it_flags;
}
file->f_flags &= ~O_LOV_DELAY_CREATE;
GOTO(out, rc);
- out:
+out:
ptlrpc_req_finished(req);
if (req)
it_clear_disposition(it, DISP_ENQ_OPEN_REF);
- if (rc == 0) {
- ll_open_complete(inode);
- } else {
out_och_free:
+ if (rc) {
if (*och_p) {
OBD_FREE(*och_p, sizeof (struct obd_client_handle));
*och_p = NULL; /* OBD_FREE writes some magic there */
}
/* Fills the obdo with the attributes for the inode defined by lsm */
-int ll_lsm_getattr(struct obd_export *exp, struct lov_stripe_md *lsm,
- struct obdo *oa)
+int ll_inode_getattr(struct inode *inode, struct obdo *obdo)
{
struct ptlrpc_request_set *set;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lov_stripe_md *lsm = lli->lli_smd;
+
struct obd_info oinfo = { { { 0 } } };
int rc;
ENTRY;
LASSERT(lsm != NULL);
- memset(oa, 0, sizeof *oa);
oinfo.oi_md = lsm;
- oinfo.oi_oa = oa;
- oa->o_id = lsm->lsm_object_id;
- oa->o_mode = S_IFREG;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
- OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME | OBD_MD_FLGROUP;
+ oinfo.oi_oa = obdo;
+ oinfo.oi_oa->o_id = lsm->lsm_object_id;
+ oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
+ oinfo.oi_oa->o_mode = S_IFREG;
+ oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
+ OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
+ OBD_MD_FLCTIME | OBD_MD_FLGROUP;
set = ptlrpc_prep_set();
if (set == NULL) {
+ CERROR("can't allocate ptlrpc set\n");
rc = -ENOMEM;
} else {
- rc = obd_getattr_async(exp, &oinfo, set);
+ rc = obd_getattr_async(ll_i2dtexp(inode), &oinfo, set);
if (rc == 0)
rc = ptlrpc_set_wait(set);
ptlrpc_set_destroy(set);
if (rc)
RETURN(rc);
- oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME | OBD_MD_FLSIZE);
+ oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLSIZE);
+
+ obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid);
+ CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %lu, blksize %lu\n",
+ lli->lli_smd->lsm_object_id, inode->i_size, inode->i_blocks,
+ inode->i_blksize);
RETURN(0);
}
lsm->lsm_oinfo[stripe].loi_kms = kms;
unlock_res_and_lock(lock);
lov_stripe_unlock(lsm);
- //ll_try_done_writing(inode);
iput:
iput(inode);
break;
return rc;
}
+static void ll_merge_lvb(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ost_lvb lvb;
+ ENTRY;
+
+ ll_inode_size_lock(inode, 1);
+ inode_init_lvb(inode, &lvb);
+ obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
+ inode->i_size = lvb.lvb_size;
+ inode->i_blocks = lvb.lvb_blocks;
+ LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
+ LTIME_S(inode->i_atime) = lvb.lvb_atime;
+ LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
+ ll_inode_size_unlock(inode, 1);
+ EXIT;
+}
+
+int ll_local_size(struct inode *inode)
+{
+ ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } };
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct lustre_handle lockh = { 0 };
+ int flags = 0;
+ int rc;
+ ENTRY;
+
+ if (lli->lli_smd->lsm_stripe_count == 0)
+ RETURN(0);
+
+ rc = obd_match(sbi->ll_dt_exp, lli->lli_smd, LDLM_EXTENT,
+ &policy, LCK_PR | LCK_PW, &flags, inode, &lockh);
+ if (rc < 0)
+ RETURN(rc);
+ else if (rc == 0)
+ RETURN(-ENODATA);
+
+ ll_merge_lvb(inode);
+ obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR | LCK_PW, &lockh);
+ RETURN(0);
+}
+
int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
lstat_t *st)
{
struct lustre_handle lockh = { 0 };
struct obd_enqueue_info einfo = { 0 };
struct obd_info oinfo = { { { 0 } } };
- struct ost_lvb lvb;
int rc;
ENTRY;
+ if (lli->lli_flags & LLIF_MDS_SIZE_LOCK)
+ RETURN(0);
+
CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", inode->i_ino);
if (!lli->lli_smd) {
RETURN(rc > 0 ? -EIO : rc);
}
- ll_inode_size_lock(inode, 1);
- inode_init_lvb(inode, &lvb);
- obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
- inode->i_size = lvb.lvb_size;
- inode->i_blocks = lvb.lvb_blocks;
- LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
- LTIME_S(inode->i_atime) = lvb.lvb_atime;
- LTIME_S(inode->i_ctime) = lvb.lvb_ctime;
- ll_inode_size_unlock(inode, 1);
-
+ ll_merge_lvb(inode);
+
CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n",
inode->i_size, inode->i_blocks);
rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
inode, och);
- OBD_FREE(och, sizeof(*och));
+ /* Do not free @och is it is waiting for DONE_WRITING. */
+ if (och->och_fh.cookie == DEAD_HANDLE_MAGIC)
+ OBD_FREE(och, sizeof(*och));
out:
/* this one is in place of ll_file_open */
ptlrpc_req_finished(it->d.lustre.it_data);
{
struct inode *inode = dentry->d_inode;
struct ptlrpc_request *req = NULL;
- struct ll_inode_info *lli;
struct ll_sb_info *sbi;
struct obd_export *exp;
int rc;
RETURN(0);
}
sbi = ll_i2sbi(inode);
- lli = ll_i2info(inode);
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n",
inode->i_ino, inode->i_generation, inode, dentry->d_name.name);
GOTO(out, rc = 0);
/* ll_glimpse_size will prefer locally cached writes if they extend
- * the file */
+ the file */
rc = ll_glimpse_size(inode, 0);
-
+ EXIT;
out:
ptlrpc_req_finished(req);
- RETURN(rc);
+ return rc;
}
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
void llap_write_pending(struct inode *inode, struct ll_async_page *llap)
{
struct ll_inode_info *lli = ll_i2info(inode);
+
+ ENTRY;
spin_lock(&lli->lli_lock);
- list_add(&llap->llap_pending_write, &lli->lli_pending_write_llaps);
+ lli->lli_flags |= LLIF_SOM_DIRTY;
+ if (llap && list_empty(&llap->llap_pending_write))
+ list_add(&llap->llap_pending_write,
+ &lli->lli_pending_write_llaps);
spin_unlock(&lli->lli_lock);
+ EXIT;
}
/* record that a write has completed */
-void llap_write_complete(struct inode *inode, struct ll_async_page *llap)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- spin_lock(&lli->lli_lock);
- list_del_init(&llap->llap_pending_write);
- spin_unlock(&lli->lli_lock);
-}
-
-void ll_open_complete(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- spin_lock(&lli->lli_lock);
- lli->lli_send_done_writing = 0;
- spin_unlock(&lli->lli_lock);
-}
-
-/* if we close with writes in flight then we want the completion or cancelation
- * of those writes to send a DONE_WRITING rpc to the MDS */
-int ll_is_inode_dirty(struct inode *inode)
+int llap_write_complete(struct inode *inode, struct ll_async_page *llap)
{
struct ll_inode_info *lli = ll_i2info(inode);
int rc = 0;
+
ENTRY;
-
spin_lock(&lli->lli_lock);
- if (!list_empty(&lli->lli_pending_write_llaps))
+ if (llap && !list_empty(&llap->llap_pending_write)) {
+ list_del_init(&llap->llap_pending_write);
rc = 1;
+ }
spin_unlock(&lli->lli_lock);
RETURN(rc);
}
-void ll_try_done_writing(struct inode *inode)
+/* DONE_WRITING should be queued only if:
+ * - CLOSE has been called already and that CLOSE has not closed epoch;
+ * - inode has no no dirty page; */
+void ll_queue_done_writing(struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq;
spin_lock(&lli->lli_lock);
+
+ /* Close happened. If it has not closed epoch, let DONE_WRITING to
+ * happen. */
+ if ((lli->lli_flags & LLIF_EPOCH_PENDING))
+ lli->lli_flags |= LLIF_DONE_WRITING;
- if (lli->lli_send_done_writing &&
+ if ((lli->lli_flags & LLIF_DONE_WRITING) &&
list_empty(&lli->lli_pending_write_llaps)) {
-
+ /* DONE_WRITING is allowed and inode has no dirty page. */
spin_lock(&lcq->lcq_lock);
- if (list_empty(&lli->lli_close_item)) {
- CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
- inode->i_ino, inode->i_generation);
- igrab(inode);
- list_add_tail(&lli->lli_close_item, &lcq->lcq_list);
- wake_up(&lcq->lcq_waitq);
- }
+
+ LASSERT(list_empty(&lli->lli_close_list));
+ CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
+ inode->i_ino, inode->i_generation);
+
+ igrab(inode);
+ list_add_tail(&lli->lli_close_list, &lcq->lcq_head);
+ wake_up(&lcq->lcq_waitq);
spin_unlock(&lcq->lcq_lock);
}
-
spin_unlock(&lli->lli_lock);
}
-/* The MDS needs us to get the real file attributes, then send a DONE_WRITING */
-void ll_queue_done_writing(struct inode *inode)
+/* Close epoch and send Size-on-MDS attribute update if possible.
+ * Call this under @lli->lli_lock spinlock. */
+void ll_epoch_close(struct inode *inode, struct md_op_data *op_data)
{
struct ll_inode_info *lli = ll_i2info(inode);
ENTRY;
- spin_lock(&lli->lli_lock);
- lli->lli_send_done_writing = 1;
- spin_unlock(&lli->lli_lock);
+ CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID"\n",
+ op_data->ioepoch, PFID(&lli->lli_fid));
+ op_data->flags |= MF_EPOCH_CLOSE;
+
+ /* Pack Size-on-MDS inode attributes only if they has changed */
+ if (!(lli->lli_flags & LLIF_SOM_DIRTY))
+ goto out;
+
+ /* There is already 1 pending DONE_WRITE, do not create another one --
+ * close epoch with no attribute change. */
+ if (lli->lli_flags & LLIF_EPOCH_PENDING)
+ goto out;
+
+ op_data->flags |= MF_SOM_CHANGE;
- ll_try_done_writing(inode);
+ /* Check if Size-on-MDS attributes are valid. */
+ if ((lli->lli_flags & LLIF_MDS_SIZE_LOCK) || !ll_local_size(inode)) {
+ /* Send Size-on-MDS Attributes if valid. */
+ op_data->attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET |
+ ATTR_SIZE | ATTR_BLOCKS;
+ }
+out:
EXIT;
}
-#if 0
-/* If we know the file size and have the cookies:
- * - send a DONE_WRITING rpc
- *
- * Otherwise:
- * - get a whole-file lock
- * - get the authoritative size and all cookies with GETATTRs
- * - send a DONE_WRITING rpc
- */
-static void ll_close_done_writing(struct inode *inode)
+int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh)
{
struct ll_inode_info *lli = ll_i2info(inode);
- ldlm_policy_data_t policy = { .l_extent = {0, OBD_OBJECT_EOF } };
- struct lustre_handle lockh = { 0 };
struct md_op_data *op_data;
- struct obdo obdo;
- obd_flag valid;
- int rc, ast_flags = 0;
+ struct obdo *oa;
+ int rc;
ENTRY;
-
- memset(&obdo, 0, sizeof(obdo));
- if (test_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags))
- goto rpc;
-
- rc = ll_extent_lock(NULL, inode, lli->lli_smd, LCK_PW, &policy, &lockh,
- ast_flags);
- if (rc != 0) {
- CERROR("lock acquisition failed (%d): unable to send "
- "DONE_WRITING for inode %lu/%u\n", rc, inode->i_ino,
- inode->i_generation);
- GOTO(out, rc);
+
+ LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK));
+
+ oa = obdo_alloc();
+ OBD_ALLOC_PTR(op_data);
+ if (!oa || !op_data) {
+ CERROR("can't allocate memory for Size-on-MDS update.\n");
+ RETURN(-ENOMEM);
}
-
- rc = ll_lsm_getattr(ll_i2dtexp(inode), lli->lli_smd, &obdo);
+ rc = ll_inode_getattr(inode, oa);
if (rc) {
- CERROR("inode_getattr failed (%d): unable to send DONE_WRITING "
- "for inode %lu/%u\n", rc, inode->i_ino,
- inode->i_generation);
- ll_extent_unlock(NULL, inode, lli->lli_smd, LCK_PW, &lockh);
+ CERROR("inode_getattr failed (%d): unable to send a "
+ "Size-on-MDS attribute update for inode %lu/%u\n",
+ rc, inode->i_ino, inode->i_generation);
GOTO(out, rc);
}
+ CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", PFID(&lli->lli_fid));
+
+ md_from_obdo(op_data, oa, oa->o_valid);
+ memcpy(&op_data->handle, fh, sizeof(*fh));
+
+ op_data->ioepoch = lli->lli_ioepoch;
+ op_data->flags |= MF_SOM_CHANGE;
+
+ rc = ll_md_setattr(inode, op_data);
+ EXIT;
+out:
+ if (oa)
+ obdo_free(oa);
+ if (op_data)
+ OBD_FREE_PTR(op_data);
+ return rc;
+}
- obdo_refresh_inode(inode, &obdo, valid);
-
- CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %lu, blksize %lu\n",
- lli->lli_smd->lsm_object_id, inode->i_size, inode->i_blocks,
- inode->i_blksize);
-
- set_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags);
-
- rc = ll_extent_unlock(NULL, inode, lli->lli_smd, LCK_PW, &lockh);
- if (rc != ELDLM_OK)
- CERROR("unlock failed (%d)? proceeding anyways...\n", rc);
+/* Send a DONE_WRITING rpc, pack Size-on-MDS attributes into it, if possible */
+static void ll_done_writing(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct md_op_data *op_data;
+ struct obd_client_handle *och;
+ int rc;
+ ENTRY;
- rpc:
OBD_ALLOC_PTR(op_data);
if (op_data == NULL) {
CERROR("can't allocate op_data\n");
EXIT;
return;
}
+
+ spin_lock(&lli->lli_lock);
+ LASSERT(lli->lli_flags & LLIF_SOM_DIRTY);
- op_data->fid1 = lli->lli_fid;
- op_data->size = inode->i_size;
- op_data->blocks = inode->i_blocks;
- op_data->valid = OBD_MD_FLID | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+ och = lli->lli_pending_och;
+ lli->lli_pending_och = NULL;
+ lli->lli_flags &= ~(LLIF_DONE_WRITING | LLIF_EPOCH_PENDING);
+ ll_epoch_close(inode, op_data);
+ lli->lli_flags &= ~LLIF_SOM_DIRTY;
+ spin_unlock(&lli->lli_lock);
+
+ ll_pack_inode2opdata(inode, op_data, &och->och_fh);
- rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data);
+ rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och);
OBD_FREE_PTR(op_data);
- out:
+ if (rc == EAGAIN) {
+ /* MDS has instructed us to obtain Size-on-MDS attribute from
+ * OSTs and send setattr to back to MDS. */
+ rc = ll_sizeonmds_update(inode, &och->och_fh);
+ } else if (rc) {
+ CERROR("inode %lu mdc done_writing failed: rc = %d\n",
+ inode->i_ino, rc);
+ }
+ OBD_FREE_PTR(och);
+ EXIT;
}
-#endif
static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq)
{
spin_lock(&lcq->lcq_lock);
- if (lcq->lcq_list.next == NULL)
+ if (lcq->lcq_head.next == NULL)
lli = ERR_PTR(-1);
- else if (!list_empty(&lcq->lcq_list)) {
- lli = list_entry(lcq->lcq_list.next, struct ll_inode_info,
- lli_close_item);
- list_del(&lli->lli_close_item);
+ else if (!list_empty(&lcq->lcq_head)) {
+ lli = list_entry(lcq->lcq_head.next, struct ll_inode_info,
+ lli_close_list);
+ list_del_init(&lli->lli_close_list);
}
spin_unlock(&lcq->lcq_lock);
while (1) {
struct l_wait_info lwi = { 0 };
struct ll_inode_info *lli;
- //struct inode *inode;
+ struct inode *inode;
l_wait_event_exclusive(lcq->lcq_waitq,
(lli = ll_close_next_lli(lcq)) != NULL,
if (IS_ERR(lli))
break;
- //inode = ll_info2i(lli);
- //ll_close_done_writing(inode);
- //iput(inode);
+ inode = ll_info2i(lli);
+ ll_done_writing(inode);
+ iput(inode);
}
complete(&lcq->lcq_comp);
return -ENOMEM;
spin_lock_init(&lcq->lcq_lock);
- INIT_LIST_HEAD(&lcq->lcq_list);
+ INIT_LIST_HEAD(&lcq->lcq_head);
init_waitqueue_head(&lcq->lcq_waitq);
init_completion(&lcq->lcq_comp);
void ll_close_thread_shutdown(struct ll_close_queue *lcq)
{
init_completion(&lcq->lcq_comp);
- lcq->lcq_list.next = NULL;
+ lcq->lcq_head.next = NULL;
wake_up(&lcq->lcq_waitq);
wait_for_completion(&lcq->lcq_comp);
OBD_FREE(lcq, sizeof(*lcq));
#define LLI_INODE_MAGIC 0x111d0de5
#define LLI_INODE_DEAD 0xdeadd00d
-#define LLI_F_HAVE_OST_SIZE_LOCK 0
-#define LLI_F_HAVE_MDS_SIZE_LOCK 1
+
+enum lli_flags {
+ /* MDS has an authority for the Size-on-MDS attributes. */
+ LLIF_MDS_SIZE_LOCK = (1 << 0),
+ /* Epoch close is postponed. */
+ LLIF_EPOCH_PENDING = (1 << 1),
+ /* DONE WRITING is allowed. */
+ LLIF_DONE_WRITING = (1 << 2),
+ /* Sizeon-on-MDS attributes are changed. An attribute update needs to
+ * be sent to MDS. */
+ LLIF_SOM_DIRTY = (1 << 3),
+};
struct ll_inode_info {
int lli_inode_magic;
struct semaphore lli_write_sem;
char *lli_symlink_name;
__u64 lli_maxbytes;
- __u64 lli_io_epoch;
+ __u64 lli_ioepoch;
unsigned long lli_flags;
- /* this lock protects s_d_w and p_w_ll and mmap_cnt */
+ /* this lock protects posix_acl, pending_write_llaps, mmap_cnt */
spinlock_t lli_lock;
struct list_head lli_pending_write_llaps;
- int lli_send_done_writing;
+ struct list_head lli_close_list;
+ /* handle is to be sent to MDS later on done_writing and setattr.
+ * Open handle data are needed for the recovery to reconstruct
+ * the inode state on the MDS. XXX: recovery is not ready yet. */
+ struct obd_client_handle *lli_pending_och;
+
atomic_t lli_mmap_cnt;
- struct list_head lli_close_item;
-
/* for writepage() only to communicate to fsync */
int lli_async_rc;
void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
int ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction);
+struct ll_async_page *llap_from_page(struct page *page, unsigned origin);
extern struct cache_definition ll_cache_definition;
void ll_removepage(struct page *page);
int ll_readpage(struct file *file, struct page *page);
int ll_file_open(struct inode *inode, struct file *file);
int ll_file_release(struct inode *inode, struct file *file);
int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
+int ll_local_size(struct inode *inode);
int ll_glimpse_ioctl(struct ll_sb_info *sbi,
struct lov_stripe_md *lsm, lstat_t *st);
int ll_glimpse_size(struct inode *inode, int ast_flags);
int ll_md_close(struct obd_export *md_exp, struct inode *inode,
struct file *file);
int ll_md_real_close(struct inode *inode, int flags);
+void ll_epoch_close(struct inode *inode, struct md_op_data *op_data);
+int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh);
+int ll_inode_getattr(struct inode *inode, struct obdo *obdo);
+int ll_md_setattr(struct inode *inode, struct md_op_data *op_data);
+void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
+ struct lustre_handle *fh);
extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
*file, size_t count, int rw);
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
/* llite/llite_close.c */
struct ll_close_queue {
spinlock_t lcq_lock;
- struct list_head lcq_list;
+ struct list_head lcq_head;
wait_queue_head_t lcq_waitq;
struct completion lcq_comp;
};
void llap_write_pending(struct inode *inode, struct ll_async_page *llap);
-void llap_write_complete(struct inode *inode, struct ll_async_page *llap);
-void ll_open_complete(struct inode *inode);
-int ll_is_inode_dirty(struct inode *inode);
-void ll_try_done_writing(struct inode *inode);
+int llap_write_complete(struct inode *inode, struct ll_async_page *llap);
+int ll_inode_dirty(struct inode *inode);
void ll_queue_done_writing(struct inode *inode);
+void ll_init_done_writing(struct inode *inode);
void ll_close_thread_shutdown(struct ll_close_queue *lcq);
int ll_close_thread_start(struct ll_close_queue **lcq_ret);
lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
spin_lock_init(&lli->lli_lock);
INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
+ INIT_LIST_HEAD(&lli->lli_close_list);
lli->lli_inode_magic = LLI_INODE_MAGIC;
sema_init(&lli->lli_och_sem, 1);
lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
inode->i_generation, inode);
- clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags));
+ ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
null_if_equal, inode);
EXIT;
}
+int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
+{
+ struct lustre_md md;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *request = NULL;
+ int rc;
+ ENTRY;
+
+ ll_prepare_md_op_data(op_data, inode, NULL, NULL, 0, 0);
+ rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &request);
+ if (rc) {
+ ptlrpc_req_finished(request);
+ if (rc == -ENOENT) {
+ inode->i_nlink = 0;
+ /* Unlinked special device node? Or just a race?
+ * Pretend we done everything. */
+ if (!S_ISREG(inode->i_mode) &&
+ !S_ISDIR(inode->i_mode))
+ rc = inode_setattr(inode, &op_data->attr);
+ } else if (rc != -EPERM && rc != -EACCES) {
+ CERROR("md_setattr fails: rc = %d\n", rc);
+ }
+ RETURN(rc);
+ }
+
+ rc = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF,
+ sbi->ll_dt_exp, sbi->ll_md_exp, &md);
+ if (rc) {
+ ptlrpc_req_finished(request);
+ RETURN(rc);
+ }
+
+ /* We call inode_setattr to adjust timestamps.
+ * If there is at least some data in file, we cleared ATTR_SIZE
+ * above to avoid invoking vmtruncate, otherwise it is important
+ * to call vmtruncate in inode_setattr to update inode->i_size
+ * (bug 6196) */
+ rc = inode_setattr(inode, &op_data->attr);
+
+ /* Extract epoch data if obtained. */
+ memcpy(&op_data->handle, &md.body->handle, sizeof(op_data->handle));
+ op_data->ioepoch = md.body->ioepoch;
+
+ ll_update_inode(inode, &md);
+ ptlrpc_req_finished(request);
+
+ RETURN(rc);
+}
+
+/* Close IO epoch and send Size-on-MDS attribute update. */
+static int ll_setattr_done_writing(struct inode *inode,
+ struct md_op_data *op_data)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(op_data != NULL);
+ if (!S_ISREG(inode->i_mode))
+ RETURN(0);
+
+ /* XXX: pass och here for the recovery purpose. */
+ CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n",
+ op_data->ioepoch, PFID(&lli->lli_fid));
+
+ op_data->flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
+ rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL);
+ if (rc == EAGAIN) {
+ /* MDS has instructed us to obtain Size-on-MDS attribute
+ * from OSTs and send setattr to back to MDS. */
+ rc = ll_sizeonmds_update(inode, &op_data->handle);
+ } else if (rc) {
+ CERROR("inode %lu mdc truncate failed: rc = %d\n",
+ inode->i_ino, rc);
+ }
+ RETURN(rc);
+}
+
/* If this inode has objects allocated to it (lsm != NULL), then the OST
* object(s) determine the file size and mtime. Otherwise, the MDS will
* keep these values until such a time that objects are allocated for it.
struct ll_inode_info *lli = ll_i2info(inode);
struct lov_stripe_md *lsm = lli->lli_smd;
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *request = NULL;
+ struct md_op_data *op_data = NULL;
int ia_valid = attr->ia_valid;
- struct md_op_data *op_data;
int rc = 0;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu valid %x\n", inode->i_ino,
attr->ia_valid);
- lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_SETATTR);
+ lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_SETATTR);
if (ia_valid & ATTR_SIZE) {
if (attr->ia_size > ll_file_maxbytes(inode)) {
/* If only OST attributes being set on objects, don't do MDS RPC.
* In that case, we need to check permissions and update the local
* inode ourselves so we can call obdo_from_inode() always. */
- if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
- struct lustre_md md;
-
+ if (ia_valid & (lsm ? ~(ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
OBD_ALLOC_PTR(op_data);
if (op_data == NULL)
RETURN(-ENOMEM);
-
- ll_prepare_md_op_data(op_data, inode, NULL, NULL, 0, 0);
-
- rc = md_setattr(sbi->ll_md_exp, op_data,
- attr, NULL, 0, NULL, 0, &request);
- OBD_FREE_PTR(op_data);
- if (rc) {
- ptlrpc_req_finished(request);
- if (rc == -ENOENT) {
- inode->i_nlink = 0;
- /* Unlinked special device node? Or just a race?
- * Pretend we done everything. */
- if (!S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode) &&
- !S_ISDIR(inode->i_mode))
- rc = inode_setattr(inode, attr);
- } else if (rc != -EPERM && rc != -EACCES) {
- CERROR("mdcsetattr fails: rc = %d\n", rc);
- }
- RETURN(rc);
- }
-
- rc = md_get_lustre_md(sbi->ll_md_exp, request,
- REPLY_REC_OFF, sbi->ll_dt_exp,
- sbi->ll_md_exp, &md);
- if (rc) {
- ptlrpc_req_finished(request);
- RETURN(rc);
- }
+ memcpy(&op_data->attr, attr, sizeof(*attr));
- /* We call inode_setattr to adjust timestamps.
- * If there is at least some data in file, we cleared ATTR_SIZE
- * above to avoid invoking vmtruncate, otherwise it is important
- * to call vmtruncate in inode_setattr to update inode->i_size
- * (bug 6196) */
- rc = inode_setattr(inode, attr);
+ /* Open epoch for truncate. */
+ if (ia_valid & ATTR_SIZE)
+ op_data->flags = MF_EPOCH_OPEN;
+ rc = ll_md_setattr(inode, op_data);
+ if (rc)
+ GOTO(out, rc);
- ll_update_inode(inode, &md);
- ptlrpc_req_finished(request);
+ CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for truncate\n",
+ op_data->ioepoch, PFID(&lli->lli_fid));
if (!lsm || !S_ISREG(inode->i_mode)) {
CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
- RETURN(rc);
+ GOTO(out, rc = 0);
}
} else {
/* The OST doesn't check permissions, but the alternative is
DOWN_WRITE_I_ALLOC_SEM(inode);
#endif
if (rc != 0)
- RETURN(rc);
+ GOTO(out, rc);
/* Only ll_inode_size_lock is taken at this level.
* lov_stripe_lock() is grabbed by ll_truncate() only over
oinfo.oi_oa = oa;
oinfo.oi_md = lsm;
+ /* XXX: this looks unnecessary now. */
rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL);
if (rc)
CERROR("obd_setattr_async fails: rc=%d\n", rc);
rc = -ENOMEM;
}
}
- RETURN(rc);
+ EXIT;
+out:
+ if (op_data && op_data->ioepoch) {
+ rc = ll_setattr_done_writing(inode, op_data);
+ OBD_FREE_PTR(op_data);
+ }
+ return rc;
}
int ll_setattr(struct dentry *de, struct iattr *attr)
#else
inode->i_rdev = old_decode_dev(body->rdev);
#endif
- if (body->valid & OBD_MD_FLSIZE)
+ if (body->valid & OBD_MD_FLSIZE) {
inode->i_size = body->size;
- if (body->valid & OBD_MD_FLBLOCKS)
- inode->i_blocks = body->blocks;
- if (body->valid & OBD_MD_FLSIZE)
- set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
+ if (body->valid & OBD_MD_FLBLOCKS)
+ inode->i_blocks = body->blocks;
+
+ lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
+ }
if (body->valid & OBD_MD_FLID) {
/* FID shouldn't be changed! */
}
case EXT3_IOC_SETFLAGS: {
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct ll_iattr_struct attr = { 0 };
struct obd_info oinfo = { { { 0 } } };
struct md_op_data *op_data;
RETURN(-ENOMEM);
ll_prepare_md_op_data(op_data, inode, NULL, NULL, 0, 0);
-
- attr.ia_attr_flags = flags;
- ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG;
-
+
+ ((struct ll_iattr *)&op_data->attr)->ia_attr_flags = flags;
+ op_data->attr.ia_valid |= ATTR_ATTR_FLAG;
rc = md_setattr(sbi->ll_md_exp, op_data,
- (struct iattr *)&attr, NULL, 0, NULL, 0, &req);
+ NULL, 0, NULL, 0, &req);
OBD_FREE_PTR(op_data);
ptlrpc_req_finished(req);
if (rc || lsm == NULL) {
case LDLM_CB_CANCELING: {
struct inode *inode = ll_inode_from_lock(lock);
__u64 bits = lock->l_policy_data.l_inodebits.bits;
+ struct lu_fid *fid;
/* Invalidate all dentries associated with this inode */
if (inode == NULL)
break;
- if (lock->l_resource->lr_name.name[0] != fid_seq(ll_inode2fid(inode)) ||
- lock->l_resource->lr_name.name[1] != fid_oid(ll_inode2fid(inode)) ||
- lock->l_resource->lr_name.name[2] != fid_ver(ll_inode2fid(inode))) {
- LDLM_ERROR(lock, "data mismatch with object "DFID" (%p)",
- PFID(ll_inode2fid(inode)), inode);
+ fid = ll_inode2fid(inode);
+ if (lock->l_resource->lr_name.name[0] != fid_seq(fid) ||
+ lock->l_resource->lr_name.name[1] != fid_oid(fid) ||
+ lock->l_resource->lr_name.name[2] != fid_ver(fid)) {
+ LDLM_ERROR(lock, "data mismatch with object "
+ DFID" (%p)", PFID(fid), inode);
}
if (bits & MDS_INODELOCK_OPEN) {
}
if (bits & MDS_INODELOCK_UPDATE)
- clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
- &(ll_i2info(inode)->lli_flags));
+ ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
if (S_ISDIR(inode->i_mode) &&
(bits & MDS_INODELOCK_UPDATE)) {
}
}
-/*
- * this function prepares md_op_data hint for passing ot down to MD stack.
- *
- * Note: it zeroes @op_data out before doing anything else, so all additional
- * initializations of @op_data should be done after it.
- */
+/* this function prepares md_op_data hint for passing ot down to MD stack. */
void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
struct inode *i2, const char *name, int namelen,
int mode)
LASSERT(i1 != NULL);
LASSERT(op_data != NULL);
- memset(op_data, 0, sizeof(*op_data));
ll_i2gids(op_data->suppgids, i1, i2);
op_data->fid1 = ll_i2info(i1)->lli_fid;
valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME;
if (cmd & OBD_BRW_WRITE) {
oa->o_valid |= OBD_MD_FLEPOCH;
- oa->o_easize = ll_i2info(inode)->lli_io_epoch;
+ oa->o_easize = ll_i2info(inode)->lli_ioepoch;
valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
OBD_MD_FLUID | OBD_MD_FLGID |
return count;
}
-static struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
+struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
{
struct ll_async_page *llap;
struct obd_export *exp;
/* also zeroing the PRIVBITS low order bitflags */
__set_page_ll_data(page, llap);
llap->llap_page = page;
-
spin_lock(&sbi->ll_lock);
sbi->ll_pglist_gen++;
sbi->ll_async_page_count++;
list_add_tail(&llap->llap_pglist_item, &sbi->ll_pglist);
+ INIT_LIST_HEAD(&llap->llap_pending_write);
spin_unlock(&sbi->ll_lock);
out:
0, 0, 0, async_flags);
if (rc == 0) {
LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n");
- //llap_write_pending(inode, llap);
GOTO(out, 0);
}
unlock_page(page);
- if (0 && cmd & OBD_BRW_WRITE) {
- llap_write_complete(page->mapping->host, llap);
- ll_try_done_writing(page->mapping->host);
+ if (cmd & OBD_BRW_WRITE) {
+ if (llap_write_complete(page->mapping->host, llap))
+ ll_queue_done_writing(page->mapping->host);
}
if (PageWriteback(page)) {
return;
}
- //llap_write_complete(inode, llap);
+ if (llap_write_complete(inode, llap))
+ ll_queue_done_writing(inode);
+
rc = obd_teardown_async_page(exp, ll_i2info(inode)->lli_smd, NULL,
llap->llap_cookie);
if (rc != 0)
return 1;
}
+static int ll_set_page_dirty(struct page *page)
+{
+ struct ll_async_page *llap;
+
+ llap = llap_from_page(page, LLAP_ORIGIN_UNKNOWN);
+ if (IS_ERR(llap))
+ RETURN(PTR_ERR(llap));
+
+ llap_write_pending(page->mapping->host, llap);
+ return(__set_page_dirty_nobuffers(page));
+}
+
#define MAX_DIRECTIO_SIZE 2*1024*1024*1024UL
static inline int ll_get_user_pages(int rw, unsigned long user_addr,
ll_inode_fill_obdo(inode, rw, &oa);
- if (rw == WRITE)
+ if (rw == WRITE) {
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRECT_WRITE, size);
- else
+ llap_write_pending(inode, NULL);
+ } else {
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRECT_READ, size);
+ }
rc = obd_brw_rqset(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
ll_i2dtexp(inode), &oa, lsm, page_count, pga, NULL);
if (rc == 0) {
.direct_IO = ll_direct_IO_26,
.writepage = ll_writepage_26,
.writepages = generic_writepages,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = ll_set_page_dirty,
.sync_page = NULL,
.prepare_write = ll_prepare_write,
.commit_write = ll_commit_write,
}
static int lmv_done_writing(struct obd_export *exp,
- struct md_op_data *op_data)
+ struct md_op_data *op_data,
+ struct obd_client_handle *och)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_done_writing(tgt_exp, op_data);
+ rc = md_done_writing(tgt_exp, op_data, och);
RETURN(rc);
}
}
static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
- struct iattr *iattr, void *ea, int ealen, void *ea2,
- int ea2len, struct ptlrpc_request **request)
+ void *ea, int ealen, void *ea2, int ea2len,
+ struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
obj = lmv_obj_grab(obd, &op_data->fid1);
CDEBUG(D_OTHER, "SETATTR for "DFID", valid 0x%x%s\n",
- PFID(&op_data->fid1), iattr->ia_valid, obj ? ", split" : "");
+ PFID(&op_data->fid1), op_data->attr.ia_valid,
+ obj ? ", split" : "");
if (obj) {
for (i = 0; i < obj->lo_objcount; i++) {
break;
}
- rc = md_setattr(tgt_exp, op_data, iattr, ea, ealen,
+ rc = md_setattr(tgt_exp, op_data, ea, ealen,
ea2, ea2len, &req);
if (lu_fid_eq(&obj->lo_fid, &obj->lo_inodes[i].li_fid)) {
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_setattr(tgt_exp, op_data, iattr, ea, ealen, ea2,
+ rc = md_setattr(tgt_exp, op_data, ea, ealen, ea2,
ea2len, request);
}
RETURN(rc);
sizeof(*req->rq_oi.oi_oa));
req->rq_oi.oi_oa->o_id = loi->loi_id;
req->rq_oi.oi_cb_up = cb_getattr_update;
+ req->rq_rqset = set;
lov_set_add_req(req, set);
}
int flags, struct md_op_data *data);
void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
struct md_op_data *op_data,
- struct iattr *iattr, void *ea, int ealen,
- void *ea2, int ea2len);
+ void *ea, int ealen, void *ea2, int ea2len);
void mdc_create_pack(struct ptlrpc_request *req, int offset,
struct md_op_data *op_data, const void *data, int datalen,
__u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
void mdc_rename_pack(struct ptlrpc_request *req, int offset,
struct md_op_data *op_data,
const char *old, int oldlen, const char *new, int newlen);
-void mdc_close_pack(struct ptlrpc_request *req, int offset, struct md_op_data *op_data,
- __u64 valid, struct obd_client_handle *och);
+void mdc_close_pack(struct ptlrpc_request *req, int offset,
+ struct md_op_data *op_data);
void mdc_exit_request(struct client_obd *cli);
void mdc_enter_request(struct client_obd *cli);
const char *filename, int namelen, obd_valid valid,
int ea_size, struct ptlrpc_request **request);
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
+ void *ea, int ealen, void *ea2, int ea2len,
struct ptlrpc_request **request);
int mdc_setxattr(struct obd_export *exp, const struct lu_fid *fid,
obd_valid valid, const char *xattr_name,
struct obd_client_handle *och);
int mdc_close(struct obd_export *, struct md_op_data *,
- struct obd_client_handle *, struct ptlrpc_request **);
+ struct obd_client_handle *och, struct ptlrpc_request **);
int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
__u64 offset, struct page *, struct ptlrpc_request **);
int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
int flags, void *opaque);
-int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data);
+int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
+ struct obd_client_handle *och);
#endif
}
}
-void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
- struct md_op_data *op_data, struct iattr *iattr,
- void *ea, int ealen, void *ea2, int ea2len)
+static void mdc_setattr_pack_rec(struct mdt_rec_setattr *rec,
+ struct md_op_data *op_data)
{
- struct mdt_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, offset,
- sizeof (*rec));
rec->sa_opcode = REINT_SETATTR;
rec->sa_fsuid = current->fsuid;
rec->sa_fsgid = current->fsgid;
rec->sa_cap = current->cap_effective;
- rec->sa_fid = op_data->fid1;
rec->sa_suppgid = -1;
- if (iattr) {
- rec->sa_valid = iattr->ia_valid;
- rec->sa_mode = iattr->ia_mode;
- rec->sa_uid = iattr->ia_uid;
- rec->sa_gid = iattr->ia_gid;
- rec->sa_size = iattr->ia_size;
- rec->sa_atime = LTIME_S(iattr->ia_atime);
- rec->sa_mtime = LTIME_S(iattr->ia_mtime);
- rec->sa_ctime = LTIME_S(iattr->ia_ctime);
- rec->sa_attr_flags =
- ((struct ll_iattr_struct *)iattr)->ia_attr_flags;
- if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid))
- rec->sa_suppgid = iattr->ia_gid;
- else
- rec->sa_suppgid = op_data->suppgids[0];
- }
+ rec->sa_fid = op_data->fid1;
+ rec->sa_valid = op_data->attr.ia_valid;
+ rec->sa_mode = op_data->attr.ia_mode;
+ rec->sa_uid = op_data->attr.ia_uid;
+ rec->sa_gid = op_data->attr.ia_gid;
+ rec->sa_size = op_data->attr.ia_size;
+ rec->sa_blocks = op_data->attr_blocks;
+ rec->sa_atime = LTIME_S(op_data->attr.ia_atime);
+ rec->sa_mtime = LTIME_S(op_data->attr.ia_mtime);
+ rec->sa_ctime = LTIME_S(op_data->attr.ia_ctime);
+ rec->sa_attr_flags = ((struct ll_iattr *)&op_data->attr)->ia_attr_flags;
+ if ((op_data->attr.ia_valid & ATTR_GID) &&
+ in_group_p(op_data->attr.ia_gid))
+ rec->sa_suppgid = op_data->attr.ia_gid;
+ else
+ rec->sa_suppgid = op_data->suppgids[0];
+}
+
+static void mdc_epoch_pack(struct mdt_epoch *epoch, struct md_op_data *op_data)
+{
+ memcpy(&epoch->handle, &op_data->handle, sizeof(epoch->handle));
+ epoch->ioepoch = op_data->ioepoch;
+ epoch->flags = op_data->flags;
+}
+
+void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
+ struct md_op_data *op_data, void *ea,
+ int ealen, void *ea2, int ea2len)
+{
+ struct mdt_rec_setattr *rec;
+ struct mdt_epoch *epoch;
+
+ rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+ epoch = lustre_msg_buf(req->rq_reqmsg, offset + 1, sizeof(*epoch));
+ mdc_setattr_pack_rec(rec, op_data);
+ if (epoch)
+ mdc_epoch_pack(epoch, op_data);
if (ealen == 0)
return;
- memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 1, ealen), ea, ealen);
+ memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ealen), ea, ealen);
if (ea2len == 0)
return;
- memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ea2len), ea2, ea2len);
+ memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 3, ea2len), ea2, ea2len);
}
void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
}
void mdc_close_pack(struct ptlrpc_request *req, int offset,
- struct md_op_data *op_data, __u64 valid,
- struct obd_client_handle *och)
+ struct md_op_data *op_data)
{
- struct mdt_body *body;
+ struct mdt_epoch *epoch;
+ struct mdt_rec_setattr *rec;
- body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
+ epoch = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*epoch));
+ rec = lustre_msg_buf(req->rq_reqmsg, offset + 1, sizeof(*rec));
- body->fid1 = op_data->fid1;
- memcpy(&body->handle, &och->och_fh, sizeof(body->handle));
- if (op_data->valid & OBD_MD_FLATIME) {
- body->atime = op_data->atime;
- body->valid |= OBD_MD_FLATIME;
- }
- if (op_data->valid & OBD_MD_FLMTIME) {
- body->mtime = op_data->mtime;
- body->valid |= OBD_MD_FLMTIME;
- }
- if (op_data->valid & OBD_MD_FLCTIME) {
- body->ctime = op_data->ctime;
- body->valid |= OBD_MD_FLCTIME;
- }
- if (op_data->valid & OBD_MD_FLSIZE) {
- body->size = op_data->size;
- body->valid |= OBD_MD_FLSIZE;
- }
- if (op_data->valid & OBD_MD_FLBLOCKS) {
- body->blocks = op_data->blocks;
- body->valid |= OBD_MD_FLBLOCKS;
- }
- if (op_data->valid & OBD_MD_FLFLAGS) {
- body->flags = op_data->flags;
- body->valid |= OBD_MD_FLFLAGS;
- }
+ mdc_setattr_pack_rec(rec, op_data);
+ mdc_epoch_pack(epoch, op_data);
}
struct mdc_cache_waiter {
* magic open-path setattr that should take the setattr semaphore and
* go to the setattr portal. */
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
- struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
+ void *ea, int ealen, void *ea2, int ea2len,
struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
struct mdt_rec_setattr *rec;
struct mdc_rpc_lock *rpc_lock;
struct obd_device *obd = exp->exp_obd;
- int size[4] = { sizeof(struct ptlrpc_body),
- sizeof(*rec), ealen, ea2len };
- int bufcount = 2, rc;
+ int size[5] = { sizeof(struct ptlrpc_body),
+ sizeof(*rec), 0, ealen, ea2len };
+ int bufcount = 3, rc;
ENTRY;
- LASSERT(iattr != NULL);
+ LASSERT(op_data != NULL);
+
+ if (op_data->flags & (MF_SOM_CHANGE | MF_EPOCH_OPEN))
+ size[2] = sizeof(struct mdt_epoch);
if (ealen > 0) {
bufcount++;
if (req == NULL)
RETURN(-ENOMEM);
- if (iattr->ia_valid & ATTR_FROM_OPEN) {
+ if (op_data->attr.ia_valid & ATTR_FROM_OPEN) {
req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249
rpc_lock = obd->u.cli.cl_setattr_lock;
} else {
rpc_lock = obd->u.cli.cl_rpc_lock;
}
- if (iattr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+ if (op_data->attr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
- LTIME_S(iattr->ia_mtime), LTIME_S(iattr->ia_ctime));
- mdc_setattr_pack(req, REQ_REC_OFF, op_data, iattr, ea, ealen, ea2, ea2len);
+ LTIME_S(op_data->attr.ia_mtime),
+ LTIME_S(op_data->attr.ia_ctime));
+ mdc_setattr_pack(req, REQ_REC_OFF, op_data, ea, ealen, ea2, ea2len);
size[REPLY_REC_OFF] = sizeof(struct mdt_body);
ptlrpc_req_set_repsize(req, 2, size);
struct obd_client_handle *och, struct ptlrpc_request **request)
{
struct obd_device *obd = class_exp2obd(exp);
- int reqsize[2] = { sizeof(struct ptlrpc_body),
- sizeof(struct mdt_body) };
+ int reqsize[3] = { sizeof(struct ptlrpc_body),
+ sizeof(struct mdt_epoch),
+ sizeof(struct mdt_rec_setattr)};
int rc, repsize[4] = { sizeof(struct ptlrpc_body),
sizeof(struct mdt_body),
obd->u.cli.cl_max_mds_easize,
ENTRY;
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_CLOSE, 2, reqsize, NULL);
+ MDS_CLOSE, 3, reqsize, NULL);
if (req == NULL)
GOTO(out, rc = -ENOMEM);
CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
}
- mdc_close_pack(req, REQ_REC_OFF, op_data, op_data->valid, och);
-
+ mdc_close_pack(req, REQ_REC_OFF, op_data);
ptlrpc_req_set_repsize(req, 4, repsize);
req->rq_commit_cb = mdc_commit_close;
LASSERT(req->rq_cb_data == NULL);
return rc;
}
-int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data)
+int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
+ struct obd_client_handle *och)
{
struct ptlrpc_request *req;
- struct mdt_body *body;
- int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+ int rc, size[3] = { sizeof(struct ptlrpc_body),
+ sizeof(struct mdt_epoch),
+ sizeof(struct mdt_rec_setattr)};
+ int repsize[2] = { sizeof(struct ptlrpc_body),
+ sizeof(struct mdt_body)};
+
ENTRY;
-
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
- MDS_DONE_WRITING, 2, size, NULL);
+ MDS_DONE_WRITING, 3, size, NULL);
if (req == NULL)
RETURN(-ENOMEM);
- body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
- body->fid1 = op_data->fid1;
- body->size = op_data->size;
- body->blocks = op_data->blocks;
- body->flags = op_data->flags;
- body->valid = op_data->valid;
-
- ptlrpc_req_set_repsize(req, 2, size);
-
+ /* XXX: add DONE_WRITING request to och -- when Size-on-MDS
+ * recovery will be ready. */
+ mdc_close_pack(req, REQ_REC_OFF, op_data);
+
+ ptlrpc_req_set_repsize(req, 2, repsize);
rc = ptlrpc_queue_wait(req);
ptlrpc_req_finished(req);
RETURN(rc);
la->la_mode =
(mode & S_IALLUGO) | (tmp_la->la_mode & ~S_IALLUGO);
}
+
+ /* For the "Size-on-MDS" setattr update, merge coming attributes with
+ * the set in the inode. */
+ if (la->la_valid & LA_SIZE) {
+ if ((la->la_valid & LA_ATIME) &&
+ (la->la_atime < tmp_la->la_atime))
+ la->la_valid &= ~LA_ATIME;
+
+ if ((la->la_valid & LA_CTIME) &&
+ (la->la_ctime < tmp_la->la_ctime))
+ la->la_valid &= ~(LA_MTIME | LA_CTIME);
+ }
+
RETURN(rc);
}
RETURN(-EFAULT);
}
- if (body->flags & MDS_BFLAG_UNCOMMITTED_WRITES)
- /* do some stuff */ ;
-
spin_lock(&med->med_open_lock);
mfd = mds_handle2mfd(&body->handle);
if (mfd == NULL) {
RETURN(rc);
}
+void mdt_pack_size2body(struct mdt_body *b, const struct lu_attr *attr,
+ struct mdt_object *o)
+{
+ /* Check if Size-on-MDS is enabled. */
+ if (S_ISREG(attr->la_mode) && mdt_sizeonmds_enabled(o)) {
+ b->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+ b->size = attr->la_size;
+ b->blocks = attr->la_blocks;
+ }
+}
+
void mdt_pack_attr2body(struct mdt_body *b, const struct lu_attr *attr,
const struct lu_fid *fid)
{
if (!S_ISREG(attr->la_mode))
b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLRDEV;
-
+
b->atime = attr->la_atime;
b->mtime = attr->la_mtime;
b->ctime = attr->la_ctime;
repbody->fid1 = *mdt_object_fid(o);
repbody->valid = OBD_MD_FLID | OBD_MD_MDS;
RETURN(0);
- } else if (rc){
+ } else if (rc) {
CERROR("getattr error for "DFID": %d\n",
PFID(mdt_object_fid(o)), rc);
RETURN(rc);
if (rc != 0)
mdt_object_unlock(info, child, lhc, 1);
else {
- /* This is pure debugging code. */
- struct ldlm_lock *lock;
- struct ldlm_res_id *res_id;
- lock = ldlm_handle2lock(&lhc->mlh_lh);
+ struct ldlm_lock *lock = ldlm_handle2lock(&lhc->mlh_lh);
if (lock) {
+ struct ldlm_res_id *res_id;
+ struct mdt_body *repbody;
+ struct lu_attr *ma;
+
+ /* Debugging code. */
res_id = &lock->l_resource->lr_name;
LDLM_DEBUG(lock, "we will return this lock client\n");
LASSERTF(fid_res_name_eq(mdt_object_fid(child),
(unsigned long)res_id->name[1],
(unsigned long)res_id->name[2],
PFID(mdt_object_fid(child)));
+
+ /* Pack Size-on-MDS inode attributes to the body if
+ * update lock is given. */
+ repbody = req_capsule_server_get(&info->mti_pill,
+ &RMF_MDT_BODY);
+ ma = &info->mti_attr.ma_attr;
+ if (lock->l_policy_data.l_inodebits.bits &
+ MDS_INODELOCK_UPDATE)
+ mdt_pack_size2body(repbody, ma, child);
LDLM_LOCK_PUT(lock);
}
+
+
}
mdt_object_put(info->mti_ctxt, child);
if (rc == 0) {
rc = mdt_object_sync(info);
if (rc == 0) {
- struct md_object *next;
+ struct md_object *next;
const struct lu_fid *fid;
- struct lu_attr *la;
-
+ struct lu_attr *la = &info->mti_attr.ma_attr;
+
next = mdt_object_child(info->mti_object);
- fid = mdt_object_fid(info->mti_object);
info->mti_attr.ma_need = MA_INODE;
rc = mo_attr_get(info->mti_ctxt, next,
&info->mti_attr);
- la = &info->mti_attr.ma_attr;
if (rc == 0) {
body = req_capsule_server_get(pill,
&RMF_MDT_BODY);
+ fid = mdt_object_fid(info->mti_object);
mdt_pack_attr2body(body, la, fid);
}
}
m->mdt_max_mdsize = MAX_MD_SIZE;
m->mdt_max_cookiesize = sizeof(struct llog_cookie);
- spin_lock_init(&m->mdt_epoch_lock);
+ spin_lock_init(&m->mdt_ioepoch_lock);
/* Temporary. should parse mount option. */
m->mdt_opts.mo_user_xattr = 0;
m->mdt_opts.mo_acl = 0;
info = lu_context_key_get(&ctxt, &mdt_thread_key);
LASSERT(info != NULL);
memset(info, 0, sizeof *info);
+ info->mti_ctxt = &ctxt;
+ info->mti_mdt = mdt;
ma = &info->mti_attr;
ma->ma_lmm_size = mdt->mdt_max_mdsize;
struct list_head *tmp = med->med_open_head.next;
struct mdt_file_data *mfd =
list_entry(tmp, struct mdt_file_data, mfd_list);
- struct mdt_object *o = mfd->mfd_object;
+ struct md_attr *ma = &info->mti_attr;
/* Remove mfd handle so it can't be found again.
* We are consuming the mfd_list reference here. */
class_handle_unhash(&mfd->mfd_handle);
list_del_init(&mfd->mfd_list);
spin_unlock(&med->med_open_lock);
- mdt_mfd_close(&ctxt, mdt, mfd, ma);
+ mdt_mfd_close(info, mfd);
/* TODO: if we close the unlinked file,
* we need to remove it's objects from OST */
- mdt_object_put(&ctxt, o);
+ memset(&ma->ma_attr, 0, sizeof(ma->ma_attr));
spin_lock(&med->med_open_lock);
}
spin_unlock(&med->med_open_lock);
+ info->mti_mdt = NULL;
mdt_client_del(&ctxt, mdt, med);
out:
DEF_MDT_HNDL_F(0 |MUTABOR,
REINT, mdt_reint),
DEF_MDT_HNDL_F(HABEO_CORPUS , CLOSE, mdt_close),
-DEF_MDT_HNDL_0(0, DONE_WRITING, mdt_done_writing),
+DEF_MDT_HNDL_F(HABEO_CORPUS , DONE_WRITING, mdt_done_writing),
DEF_MDT_HNDL_F(0 |HABEO_REFERO, PIN, mdt_pin),
DEF_MDT_HNDL_0(0, SYNC, mdt_sync),
DEF_MDT_HNDL_0(0, QUOTACHECK, mdt_quotacheck_handle),
* detailed comments. --umka
*/
DEF_MDT_HNDL_F(HABEO_CORPUS, CLOSE, mdt_close),
+ DEF_MDT_HNDL_F(HABEO_CORPUS, DONE_WRITING, mdt_done_writing),
};
static struct mdt_opc_slice mdt_readpage_handlers[] = {
signed int mo_compat_resname:1;
} mdt_opts;
- /* lock to pretect epoch and write count
- */
- spinlock_t mdt_epoch_lock;
- __u64 mdt_io_epoch;
+ /* lock to pretect epoch and write count */
+ spinlock_t mdt_ioepoch_lock;
+ __u64 mdt_ioepoch;
/* Transaction related stuff here */
spinlock_t mdt_transno_lock;
struct mdt_object {
struct lu_object_header mot_header;
struct md_object mot_obj;
- __u64 mot_io_epoch;
+ __u64 mot_ioepoch;
+ __u64 mot_flags;
+ int mot_epochcount;
int mot_writecount;
};
} rdpg;
} mti_u;
+ /* IO epoch related stuff. */
+ struct mdt_epoch *mti_epoch;
+
/* server and client data buffers */
struct mdt_server_data mti_msd;
struct mdt_client_data mti_mcd;
struct mdt_lock_handle *,
int decref);
+int mdt_close_unpack(struct mdt_thread_info *info);
int mdt_reint_unpack(struct mdt_thread_info *info, __u32 op);
int mdt_reint_rec(struct mdt_thread_info *);
+void mdt_pack_size2body(struct mdt_body *b, const struct lu_attr *attr,
+ struct mdt_object *o);
void mdt_pack_attr2body(struct mdt_body *b, const struct lu_attr *attr,
const struct lu_fid *fid);
int mdt_open(struct mdt_thread_info *info);
-void mdt_mfd_close(const struct lu_context *ctxt, struct mdt_device *mdt,
- struct mdt_file_data *mfd, struct md_attr *ma);
-
+struct mdt_file_data *mdt_handle2mfd(const struct lustre_handle *handle);
+int mdt_epoch_open(struct mdt_thread_info *info, struct mdt_object *o,
+ __u64 epoch);
+void mdt_sizeonmds_enable(struct mdt_thread_info *info, struct mdt_object *mo);
+int mdt_sizeonmds_enabled(struct mdt_object *mo);
+int mdt_write_get(struct mdt_device *mdt, struct mdt_object *o);
+struct mdt_file_data *mdt_mfd_new(void);
+int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd);
+void mdt_mfd_free(struct mdt_file_data *mfd);
int mdt_close(struct mdt_thread_info *info);
-
+int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo,
+ int flags);
int mdt_done_writing(struct mdt_thread_info *info);
void mdt_shrink_reply(struct mdt_thread_info *info, int offset);
int mdt_handle_last_unlink(struct mdt_thread_info *, struct mdt_object *,
out |= LA_GID;
if (in & ATTR_SIZE)
out |= LA_SIZE;
+ if (in & ATTR_BLOCKS)
+ out |= LA_BLOCKS;
if (in & ATTR_FROM_OPEN)
rr->rr_flags |= MRF_SETATTR_LOCKED;
out |= LA_FLAGS;
/*XXX need ATTR_RAW?*/
- in &= ~(ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|
+ in &= ~(ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_BLOCKS|
ATTR_ATIME|ATTR_MTIME|ATTR_CTIME|ATTR_FROM_OPEN|
ATTR_ATIME_SET|ATTR_CTIME_SET|ATTR_MTIME_SET|
ATTR_ATTR_FLAG|ATTR_RAW);
return out;
}
/* unpacking */
-static int mdt_setattr_unpack(struct mdt_thread_info *info)
+
+static int mdt_setattr_unpack_rec(struct mdt_thread_info *info)
{
- struct mdt_rec_setattr *rec;
struct md_attr *ma = &info->mti_attr;
struct lu_attr *la = &ma->ma_attr;
- struct mdt_reint_record *rr = &info->mti_rr;
struct req_capsule *pill = &info->mti_pill;
+ struct mdt_reint_record *rr = &info->mti_rr;
+ struct mdt_rec_setattr *rec;
ENTRY;
-
+
rec = req_capsule_client_get(pill, &RMF_REC_SETATTR);
-
if (rec == NULL)
RETURN(-EFAULT);
la->la_uid = rec->sa_uid;
la->la_gid = rec->sa_gid;
la->la_size = rec->sa_size;
+ la->la_blocks = rec->sa_blocks;
la->la_ctime = rec->sa_ctime;
la->la_atime = rec->sa_atime;
la->la_mtime = rec->sa_mtime;
ma->ma_valid = MA_INODE;
+ RETURN(0);
+}
+
+static int mdt_epoch_unpack(struct mdt_thread_info *info)
+{
+ struct req_capsule *pill = &info->mti_pill;
+ ENTRY;
+
+ info->mti_epoch = req_capsule_client_get(pill, &RMF_MDT_EPOCH);
+ RETURN(info->mti_epoch == NULL ? -EFAULT : 0);
+}
+
+static int mdt_setattr_unpack(struct mdt_thread_info *info)
+{
+ struct md_attr *ma = &info->mti_attr;
+ struct req_capsule *pill = &info->mti_pill;
+ int rc;
+ ENTRY;
+
+ rc = mdt_setattr_unpack_rec(info);
+ if (rc)
+ RETURN(rc);
+
+ /* Epoch may be absent, skip errors. */
+ mdt_epoch_unpack(info);
if (req_capsule_field_present(pill, &RMF_EADATA, RCL_CLIENT)) {
ma->ma_lmm = req_capsule_client_get(pill, &RMF_EADATA);
RETURN(0);
}
+int mdt_close_unpack(struct mdt_thread_info *info)
+{
+ int rc;
+ ENTRY;
+
+ rc = mdt_epoch_unpack(info);
+ if (rc)
+ RETURN(rc);
+
+ RETURN(mdt_setattr_unpack_rec(info));
+}
+
static int mdt_create_unpack(struct mdt_thread_info *info)
{
struct mdt_rec_create *rec;
/* Create a new mdt_file_data struct, initialize it,
* and insert it to global hash table */
-static struct mdt_file_data *mdt_mfd_new(void)
+struct mdt_file_data *mdt_mfd_new(void)
{
struct mdt_file_data *mfd;
ENTRY;
}
/* Find the mfd pointed to by handle in global hash table. */
-static struct mdt_file_data *mdt_handle2mfd(const struct lustre_handle *handle)
+struct mdt_file_data *mdt_handle2mfd(const struct lustre_handle *handle)
{
ENTRY;
LASSERT(handle != NULL);
}
/* free mfd */
-static void mdt_mfd_free(struct mdt_file_data *mfd)
+void mdt_mfd_free(struct mdt_file_data *mfd)
{
LASSERT(list_empty(&mfd->mfd_handle.h_link));
LASSERT(list_empty(&mfd->mfd_list));
RETURN(rc);
}
+static int mdt_epoch_opened(struct mdt_object *mo)
+{
+ return mo->mot_epochcount;
+}
-/*The following four functions are copied from MDS */
+int mdt_sizeonmds_enabled(struct mdt_object *mo)
+{
+ return !mo->mot_ioepoch;
+}
-/* Write access to a file: executors cause a negative count,
- * writers a positive count. The semaphore is needed to perform
- * a check for the sign and then increment or decrement atomically.
- *
- * This code is closely tied to the allocation of the d_fsdata and the
- * MDS epoch, so we use the same semaphore for the whole lot.
+/* Re-enable Size-on-MDS. */
+void mdt_sizeonmds_enable(struct mdt_thread_info *info,
+ struct mdt_object *mo)
+{
+ spin_lock(&info->mti_mdt->mdt_ioepoch_lock);
+ if (info->mti_epoch->ioepoch == mo->mot_ioepoch) {
+ mo->mot_ioepoch = 0;
+ mo->mot_flags = 0;
+ }
+ spin_unlock(&info->mti_mdt->mdt_ioepoch_lock);
+}
+
+/* Open the epoch. Epoch open is allowed if @writecount is not negative.
+ * The epoch and writecount handling is performed under the mdt_ioepoch_lock.
*
- * FIXME and TODO : handle the epoch!
- * epoch argument is nonzero during recovery */
-static int mdt_get_write_access(struct mdt_device *mdt, struct mdt_object *o,
- __u64 epoch)
+ * @epoch is nonzero during recovery XXX not ready. */
+int mdt_epoch_open(struct mdt_thread_info *info, struct mdt_object *o,
+ __u64 epoch)
{
- int rc = 0;
+ struct mdt_device *mdt = info->mti_mdt;
+ int cancel = 0;
+ int rc;
ENTRY;
- spin_lock(&mdt->mdt_epoch_lock);
-
- if (o->mot_writecount < 0) {
- rc = -ETXTBSY;
+ if (!S_ISREG(lu_object_attr(&o->mot_obj.mo_lu)))
+ RETURN(0);
+
+ spin_lock(&mdt->mdt_ioepoch_lock);
+ if (mdt_epoch_opened(o)) {
+ /* Epoch continues even if there is no writers yet. */
+ CDEBUG(D_INODE, "continue epoch "LPU64" for "DFID"\n",
+ o->mot_ioepoch, PFID(mdt_object_fid(o)));
} else {
- if (o->mot_io_epoch != 0) {
- CDEBUG(D_INODE, "continue epoch "LPU64" for "DFID"\n",
- o->mot_io_epoch, PFID(mdt_object_fid(o)));
- } else {
- if (epoch > mdt->mdt_io_epoch)
- mdt->mdt_io_epoch = epoch;
- else
- mdt->mdt_io_epoch++;
- o->mot_io_epoch = mdt->mdt_io_epoch;
- CDEBUG(D_INODE, "starting epoch "LPU64" for "DFID"\n",
- mdt->mdt_io_epoch, PFID(mdt_object_fid(o)));
- }
- o->mot_writecount ++;
+ if (epoch > mdt->mdt_ioepoch)
+ mdt->mdt_ioepoch = epoch;
+ else
+ mdt->mdt_ioepoch++;
+ o->mot_ioepoch = epoch ? epoch : mdt->mdt_ioepoch;
+ CDEBUG(D_INODE, "starting epoch "LPU64" for "DFID"\n",
+ mdt->mdt_ioepoch, PFID(mdt_object_fid(o)));
+ cancel = 1;
}
- spin_unlock(&mdt->mdt_epoch_lock);
- RETURN(rc);
+ o->mot_epochcount++;
+ spin_unlock(&mdt->mdt_ioepoch_lock);
+
+ /* Cancel Size-on-MDS attributes on clients if not truncate.
+ * In the later case, mdt_reint_setattr will do it. */
+ if (cancel && (info->mti_rr.rr_fid1 != NULL)) {
+ struct mdt_lock_handle *lh = &info->mti_lh[MDT_LH_CHILD];
+ lh->mlh_mode = LCK_EX;
+ rc = mdt_object_lock(info, o, lh, MDS_INODELOCK_UPDATE);
+ mdt_object_unlock(info, o, lh, 1);
+ RETURN(rc);
+ }
+ RETURN(0);
}
-static void mdt_put_write_access(struct mdt_device *mdt, struct mdt_object *o)
+/* Update the on-disk attributes if needed and re-enable Size-on-MDS caching. */
+static int mdt_sizeonmds_update(struct mdt_thread_info *info,
+ struct mdt_object *o)
{
ENTRY;
- spin_lock(&mdt->mdt_epoch_lock);
- o->mot_writecount --;
- if (o->mot_writecount == 0)
- o->mot_io_epoch = 0;
- spin_unlock(&mdt->mdt_epoch_lock);
- EXIT;
+ CDEBUG(D_INODE, "Closing epoch "LPU64" on "DFID". Count %d\n",
+ o->mot_ioepoch, PFID(mdt_object_fid(o)), o->mot_epochcount);
+
+ if (info->mti_attr.ma_attr.la_valid & LA_SIZE)
+ /* Do Size-on-MDS attribute update.
+ * Size-on-MDS is re-enabled inside. */
+ RETURN(mdt_attr_set(info, o, 0));
+ else
+ mdt_sizeonmds_enable(info, o);
+ RETURN(0);
}
-static int mdt_deny_write_access(struct mdt_device *mdt, struct mdt_object *o)
+/* Epoch closes.
+ * Returns 1 if epoch does not close.
+ * Returns 0 if epoch closes.
+ * Returns EAGAIN if epoch closes but an Size-on-MDS Update is still needed
+ * from the client. */
+static int mdt_epoch_close(struct mdt_thread_info *info, struct mdt_object *o)
+{
+ int eviction = (mdt_info_req(info) == NULL ? 1 : 0);
+ struct lu_attr *la = &info->mti_attr.ma_attr;
+ int achange = 0;
+ int opened;
+ int rc = 1;
+ ENTRY;
+
+ if (!S_ISREG(lu_object_attr(&o->mot_obj.mo_lu)))
+ RETURN(0);
+
+ spin_lock(&info->mti_mdt->mdt_ioepoch_lock);
+
+ /* Epoch closes only if client tells about it or eviction occures. */
+ if (eviction || (info->mti_epoch->flags & MF_EPOCH_CLOSE)) {
+ LASSERT(o->mot_epochcount);
+ o->mot_epochcount--;
+
+ CDEBUG(D_INODE, "Closing epoch "LPU64" on "DFID". Count %d\n",
+ o->mot_ioepoch, PFID(mdt_object_fid(o)),
+ o->mot_epochcount);
+
+ if (!eviction)
+ achange = (info->mti_epoch->flags & MF_SOM_CHANGE);
+
+ rc = 0;
+ if (!eviction && !mdt_epoch_opened(o)) {
+ /* Epoch ends. Is an Size-on-MDS update needed? */
+ if (o->mot_flags & MF_SOM_CHANGE) {
+ /* Some previous writer changed the attribute.
+ * Do not beleive to the current Size-on-MDS
+ * update, re-ask client. */
+ rc = EAGAIN;
+ } else if (!(la->la_valid & LA_SIZE) && achange) {
+ /* Attributes were changed by the last writer
+ * only but no Size-on-MDS update is received.*/
+ rc = EAGAIN;
+ }
+ }
+
+ if (achange || eviction)
+ o->mot_flags |= MF_SOM_CHANGE;
+ }
+
+ opened = mdt_epoch_opened(o);
+ spin_unlock(&info->mti_mdt->mdt_ioepoch_lock);
+
+ /* XXX: if eviction occured, do nothing yet. */
+ if ((rc == 0) && !opened && !eviction) {
+ /* Epoch ends and wanted Size-on-MDS update is obtained. */
+ rc = mdt_sizeonmds_update(info, o);
+ }
+ RETURN(rc);
+}
+
+int mdt_write_get(struct mdt_device *mdt, struct mdt_object *o)
{
int rc = 0;
ENTRY;
- spin_lock(&mdt->mdt_epoch_lock);
- if (o->mot_writecount > 0) {
+ spin_lock(&mdt->mdt_ioepoch_lock);
+ if (o->mot_writecount < 0)
rc = -ETXTBSY;
- } else
- o->mot_writecount --;
- spin_unlock(&mdt->mdt_epoch_lock);
+ else
+ o->mot_writecount++;
+ spin_unlock(&mdt->mdt_ioepoch_lock);
RETURN(rc);
}
-static void mdt_allow_write_access(struct mdt_device *mdt,
- struct mdt_object *o)
+static void mdt_write_put(struct mdt_device *mdt, struct mdt_object *o)
{
ENTRY;
- spin_lock(&mdt->mdt_epoch_lock);
- o->mot_writecount ++;
- spin_unlock(&mdt->mdt_epoch_lock);
+ spin_lock(&mdt->mdt_ioepoch_lock);
+ o->mot_writecount--;
+ spin_unlock(&mdt->mdt_ioepoch_lock);
EXIT;
}
-int mdt_query_write_access(struct mdt_device *mdt, struct mdt_object *o)
+static int mdt_write_deny(struct mdt_device *mdt, struct mdt_object *o)
{
- int wc;
+ int rc = 0;
ENTRY;
+ spin_lock(&mdt->mdt_ioepoch_lock);
+ if (o->mot_writecount > 0)
+ rc = -ETXTBSY;
+ else
+ o->mot_writecount--;
+ spin_unlock(&mdt->mdt_ioepoch_lock);
+ RETURN(rc);
+}
- spin_lock(&mdt->mdt_epoch_lock);
- wc = o->mot_writecount;
- spin_unlock(&mdt->mdt_epoch_lock);
-
- RETURN(wc);
+static void mdt_write_allow(struct mdt_device *mdt, struct mdt_object *o)
+{
+ ENTRY;
+ spin_lock(&mdt->mdt_ioepoch_lock);
+ o->mot_writecount++;
+ spin_unlock(&mdt->mdt_ioepoch_lock);
+ EXIT;
}
/* there can be no real transaction so prepare the fake one */
int created,
struct ldlm_reply *rep)
{
- struct mdt_export_data *med;
struct mdt_file_data *mfd;
- struct mdt_device *mdt = info->mti_mdt;
struct mdt_body *repbody;
struct md_attr *ma = &info->mti_attr;
struct lu_attr *la = &ma->ma_attr;
if (flags & FMODE_WRITE) {
- /* FIXME: in recovery, need to pass old epoch here */
- rc = mdt_get_write_access(mdt, o, 0);
- if (rc == 0)
- repbody->io_epoch = o->mot_io_epoch;
+ rc = mdt_write_get(info->mti_mdt, o);
+ if (rc == 0) {
+ /* FIXME: in recovery, need to pass old epoch here */
+ mdt_epoch_open(info, o, 0);
+ repbody->ioepoch = o->mot_ioepoch;
+ }
} else if (flags & MDS_FMODE_EXEC) {
- rc = mdt_deny_write_access(mdt, o);
+ rc = mdt_write_deny(info->mti_mdt, o);
}
if (rc)
RETURN(rc);
mfd = mdt_mfd_new();
if (mfd != NULL) {
+ struct mdt_export_data *med = &req->rq_export->exp_mdt_data;
+
/* keep a reference on this object for this open,
* and is released by mdt_mfd_close() */
mdt_object_get(info->mti_ctxt, o);
mfd->mfd_mode = flags;
mfd->mfd_object = o;
- mfd->mfd_xid = mdt_info_req(info)->rq_xid;
+ mfd->mfd_xid = req->rq_xid;
- med = &req->rq_export->exp_mdt_data;
spin_lock(&med->med_open_lock);
list_add(&mfd->mfd_list, &med->med_open_head);
spin_unlock(&med->med_open_lock);
const struct lu_context *ctxt = info->mti_ctxt;
struct mdt_object *parent;
struct mdt_object *child;
- struct ldlm_reply *ldlm_rep;
+ struct ldlm_reply *ldlm_rep = NULL;
struct mdt_body *repbody;
struct mdt_file_data *mfd;
struct list_head *h;
if (flags & FMODE_WRITE) {
/* FIXME: in recovery, need to pass old epoch here */
- result = mdt_get_write_access(mdt, child, 0);
- if (result == 0)
- repbody->io_epoch = child->mot_io_epoch;
+ result = mdt_write_get(info->mti_mdt, child);
+ if (result == 0) {
+ /* FIXME: in recovery, need to pass old epoch here */
+ mdt_epoch_open(info, child, 0);
+ repbody->ioepoch = child->mot_ioepoch;
+ }
} else if (flags & MDS_FMODE_EXEC)
- result = mdt_deny_write_access(mdt, child);
+ result = mdt_write_deny(mdt, child);
if (result)
GOTO(out_child, result);
return result;
}
-void mdt_mfd_close(const struct lu_context *ctxt,
- struct mdt_device *mdt, struct mdt_file_data *mfd,
- struct md_attr *ma)
+#define MFD_CLOSED(mode) (((mode) & ~(FMODE_EPOCH | FMODE_SOM | \
+ FMODE_EPOCHLCK)) == FMODE_CLOSED)
+
+static int mdt_mfd_closed(struct mdt_file_data *mfd)
+{
+ return ((mfd == NULL) || MFD_CLOSED(mfd->mfd_mode));
+}
+
+int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd)
{
struct mdt_object *o = mfd->mfd_object;
+ struct md_object *next = mdt_object_child(o);
+ struct md_attr *ma = &info->mti_attr;
+ int rc = 0, ret = 0;
+ int mode;
ENTRY;
- if (mfd->mfd_mode & FMODE_WRITE) {
- mdt_put_write_access(mdt, o);
- } else if (mfd->mfd_mode & MDS_FMODE_EXEC) {
- mdt_allow_write_access(mdt, o);
+ mode = mfd->mfd_mode;
+ mfd->mfd_mode = FMODE_CLOSED;
+
+ if ((mode & FMODE_WRITE) || (mode & FMODE_EPOCHLCK)) {
+ mdt_write_put(info->mti_mdt, o);
+ ret = mdt_epoch_close(info, o);
+ } else if (mode & MDS_FMODE_EXEC) {
+ mdt_write_allow(info->mti_mdt, o);
+ } else if (mode & FMODE_EPOCH) {
+ ret = mdt_epoch_close(info, o);
}
- mdt_mfd_free(mfd);
+ ma->ma_need |= MA_INODE;
+
+ if (!MFD_CLOSED(mode))
+ rc = mo_close(info->mti_ctxt, next, ma);
+ else if (ret == EAGAIN)
+ rc = mo_attr_get(info->mti_ctxt, next, ma);
+
+ /* If the object is unlinked, do not try to re-enable SIZEONMDS */
+ if ((ret == EAGAIN) && (ma->ma_valid & MA_INODE) &&
+ (ma->ma_attr.la_nlink == 0))
+ {
+ ret = 0;
+ }
- mo_close(ctxt, mdt_object_child(o), ma);
- EXIT;
+ if ((ret == EAGAIN) || (ret == 1)) {
+ struct mdt_export_data *med;
+ /* The epoch has not closed or Size-on-MDS update is needed.
+ * Put mfd back into the list. */
+ mfd->mfd_mode = (ret == 1 ? FMODE_EPOCH : FMODE_SOM);
+
+ LASSERT(mdt_info_req(info));
+ med = &mdt_info_req(info)->rq_export->exp_mdt_data;
+ spin_lock(&med->med_open_lock);
+ list_add(&mfd->mfd_list, &med->med_open_head);
+ class_handle_hash_back(&mfd->mfd_handle);
+ spin_unlock(&med->med_open_lock);
+ if (ret == 1)
+ ret = 0;
+ else {
+ CDEBUG(D_INODE, "Size-on-MDS attribute update is "
+ "needed on "DFID"\n", PFID(mdt_object_fid(o)));
+ }
+ } else {
+ mdt_mfd_free(mfd);
+ mdt_object_put(info->mti_ctxt, o);
+ }
+
+ RETURN(rc ? rc : ret);
}
int mdt_close(struct mdt_thread_info *info)
struct mdt_object *o;
struct md_attr *ma = &info->mti_attr;
struct mdt_body *repbody = NULL;
- int rc;
+ int rc, ret = 0;
ENTRY;
+ /* Close may come with the Size-on-MDS update. Unpack it. */
+ rc = mdt_close_unpack(info);
+ if (rc)
+ RETURN(rc);
+
+ LASSERT(info->mti_epoch);
+
req_capsule_set_size(&info->mti_pill, &RMF_MDT_MD, RCL_SERVER,
info->mti_mdt->mdt_max_mdsize);
req_capsule_set_size(&info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER,
med = &mdt_info_req(info)->rq_export->exp_mdt_data;
spin_lock(&med->med_open_lock);
- mfd = mdt_handle2mfd(&(info->mti_body->handle));
- if (mfd == NULL) {
+ mfd = mdt_handle2mfd(&(info->mti_epoch->handle));
+ if (mdt_mfd_closed(mfd)) {
spin_unlock(&med->med_open_lock);
CDEBUG(D_INODE, "no handle for file close: fid = "DFID
- ": cookie = "LPX64"\n", PFID(&info->mti_body->fid1),
- info->mti_body->handle.cookie);
+ ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1),
+ info->mti_epoch->handle.cookie);
rc = -ESTALE;
} else {
class_handle_unhash(&mfd->mfd_handle);
RCL_SERVER);
ma->ma_need = MA_INODE;
}
+
+ /* Do not lose object before last unlink. */
o = mfd->mfd_object;
- mdt_mfd_close(info->mti_ctxt, info->mti_mdt, mfd, ma);
+ mdt_object_get(info->mti_ctxt, o);
+ ret = mdt_mfd_close(info, mfd);
if (repbody != NULL)
rc = mdt_handle_last_unlink(info, o, ma);
-
- /* release reference on this object. */
mdt_object_put(info->mti_ctxt, o);
}
if (repbody != NULL)
if (MDT_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK))
RETURN(-ENOMEM);
- RETURN(rc);
+ RETURN(rc ? rc : ret);
}
int mdt_done_writing(struct mdt_thread_info *info)
{
+ struct mdt_body *repbody = NULL;
+ struct mdt_export_data *med;
+ struct mdt_file_data *mfd;
int rc;
ENTRY;
- req_capsule_set(&info->mti_pill, &RQF_MDS_DONE_WRITING);
rc = req_capsule_pack(&info->mti_pill);
+ if (rc)
+ RETURN(rc);
+
+ repbody = req_capsule_server_get(&info->mti_pill,
+ &RMF_MDT_BODY);
+ repbody->eadatasize = 0;
+ repbody->aclsize = 0;
- RETURN(0);
+ /* Done Writing may come with the Size-on-MDS update. Unpack it. */
+ rc = mdt_close_unpack(info);
+ if (rc)
+ RETURN(rc);
+
+ med = &mdt_info_req(info)->rq_export->exp_mdt_data;
+ spin_lock(&med->med_open_lock);
+ mfd = mdt_handle2mfd(&(info->mti_epoch->handle));
+ if (mfd == NULL) {
+ spin_unlock(&med->med_open_lock);
+ CDEBUG(D_INODE, "no handle for file close: fid = "DFID
+ ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1),
+ info->mti_epoch->handle.cookie);
+ rc = -ESTALE;
+ } else {
+ LASSERT((mfd->mfd_mode == FMODE_EPOCH) ||
+ (mfd->mfd_mode == FMODE_EPOCHLCK));
+ class_handle_unhash(&mfd->mfd_handle);
+ list_del_init(&mfd->mfd_list);
+ spin_unlock(&med->med_open_lock);
+
+ /* Set EPOCH CLOSE flag if not set by client. */
+ info->mti_epoch->flags |= MF_EPOCH_CLOSE;
+ rc = mdt_mfd_close(info, mfd);
+ }
+ RETURN(rc);
}
/* transno in two contexts - for commit_cb and for thread */
txi = lu_context_key_get(&txn->th_ctx, &mdt_txn_key);
mti = lu_context_key_get(ctx, &mdt_thread_key);
+ req = mdt_info_req(mti);
/* FIXME: don't handle requests from SEQ/FLD,
* should be fixed
*/
- if (mti->mti_mdt == NULL) {
+ if (mti->mti_mdt == NULL || req == NULL) {
txi->txi_transno = 0;
return 0;
}
- req = mdt_info_req(mti);
LASSERT(req != NULL);
/*TODO: checks for recovery cases, see mds_finish_transno */
spin_lock(&mdt->mdt_transno_lock);
req->rq_status = rc;
body->valid |= OBD_MD_MDS;
}
- mdt_pack_attr2body(body, &mti->mti_attr.ma_attr,
- mti->mti_rr.rr_fid2);
+ mdt_pack_attr2body(body, &mti->mti_attr.ma_attr, mdt_object_fid(child));
mdt_object_put(mti->mti_ctxt, child);
}
obj = mdt_object_find(mti->mti_ctxt, mdt, mti->mti_rr.rr_fid1);
LASSERT(!IS_ERR(obj));
mo_attr_get(mti->mti_ctxt, mdt_object_child(obj), &mti->mti_attr);
- mdt_pack_attr2body(body, &mti->mti_attr.ma_attr,
- mti->mti_rr.rr_fid1);
+ mdt_pack_attr2body(body, &mti->mti_attr.ma_attr, mdt_object_fid(obj));
/* Don't return OST-specific attributes if we didn't just set them */
/*
if (rc == 0) {
/* return fid & attr to client. */
if (ma->ma_valid & MA_INODE)
- mdt_pack_attr2body(repbody, &ma->ma_attr,
+ mdt_pack_attr2body(repbody, &ma->ma_attr,
mdt_object_fid(child));
}
mdt_object_put(info->mti_ctxt, child);
RETURN(rc);
}
-
/* In the raw-setattr case, we lock the child inode.
* In the write-back case or if being called from open,
* the client holds a lock already.
* We use the ATTR_FROM_OPEN (translated into MRF_SETATTR_LOCKED by
* mdt_setattr_unpack()) flag to tell these cases apart. */
-static int mdt_reint_setattr(struct mdt_thread_info *info)
+int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo, int flags)
{
- struct lu_attr *attr = &info->mti_attr.ma_attr;
- struct mdt_reint_record *rr = &info->mti_rr;
- struct ptlrpc_request *req = mdt_info_req(info);
- struct mdt_object *mo;
+ struct md_attr *ma = &info->mti_attr;
struct md_object *next;
struct mdt_lock_handle *lh;
- struct mdt_body *repbody;
- int rc;
-
+ int som_update = 0;
+ int rc;
ENTRY;
- DEBUG_REQ(D_INODE, req, "setattr "DFID" %x", PFID(rr->rr_fid1),
- (unsigned int)attr->la_valid);
+ if (info->mti_epoch)
+ som_update = (info->mti_epoch->flags & MF_SOM_CHANGE);
+ /* Try to avoid object_lock if another epoch has been started
+ * already. */
+ if (som_update && (info->mti_epoch->ioepoch != mo->mot_ioepoch))
+ RETURN(0);
+
lh = &info->mti_lh[MDT_LH_PARENT];
lh->mlh_mode = LCK_EX;
- if (rr->rr_flags & MRF_SETATTR_LOCKED) {
- mo = mdt_object_find(info->mti_ctxt, info->mti_mdt,
- rr->rr_fid1);
- } else {
+ if (!(flags & MRF_SETATTR_LOCKED)) {
__u64 lockpart = MDS_INODELOCK_UPDATE;
- if (attr->la_valid & (LA_MODE|LA_UID|LA_GID))
+ if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
lockpart |= MDS_INODELOCK_LOOKUP;
- mo = mdt_object_find_lock(info, rr->rr_fid1, lh, lockpart);
+ rc = mdt_object_lock(info, mo, lh, lockpart);
+ if (rc != 0)
+ GOTO(out, rc);
}
- if (IS_ERR(mo))
- RETURN(rc = PTR_ERR(mo));
+ /* Setattrs are syncronized through dlm lock taken above. If another
+ * epoch started, its attributes may be already flushed on disk,
+ * skip setattr. */
+ next = mdt_object_child(mo);
+ if (som_update && (info->mti_epoch->ioepoch != mo->mot_ioepoch))
+ GOTO(out, rc = 0);
+
next = mdt_object_child(mo);
if (lu_object_assert_not_exists(&mo->mot_obj.mo_lu))
- GOTO(out_unlock, rc = -ENOENT);
+ GOTO(out, rc = -ENOENT);
/* all attrs are packed into mti_attr in unpack_setattr */
mdt_fail_write(info->mti_ctxt, info->mti_mdt->mdt_bottom,
OBD_FAIL_MDS_REINT_SETATTR_WRITE);
- rc = mo_attr_set(info->mti_ctxt, next, &info->mti_attr);
+ /* all attrs are packed into mti_attr in unpack_setattr */
+ rc = mo_attr_set(info->mti_ctxt, next, ma);
if (rc != 0)
- GOTO(out_unlock, rc);
+ GOTO(out, rc);
- info->mti_attr.ma_need = MA_INODE;
- rc = mo_attr_get(info->mti_ctxt, next, &info->mti_attr);
- if (rc != 0)
- GOTO(out_unlock, rc);
+ /* Re-enable SIZEONMDS. */
+ if (som_update) {
+ CDEBUG(D_INODE, "Closing epoch "LPU64" on "DFID". Count %d\n",
+ mo->mot_ioepoch, PFID(mdt_object_fid(mo)),
+ mo->mot_epochcount);
+
+ mdt_sizeonmds_enable(info, mo);
+ }
+
+ EXIT;
+out:
+ mdt_object_unlock(info, mo, lh, rc);
+ return(rc);
+}
+
+static int mdt_reint_setattr(struct mdt_thread_info *info)
+{
+ struct md_attr *ma = &info->mti_attr;
+ struct mdt_reint_record *rr = &info->mti_rr;
+ struct ptlrpc_request *req = mdt_info_req(info);
+ struct mdt_export_data *med = &req->rq_export->exp_mdt_data;
+ struct mdt_file_data *mfd;
+ struct mdt_object *mo;
+ struct md_object *next;
+ struct mdt_body *repbody;
+ int rc;
+
+ ENTRY;
+
+ DEBUG_REQ(D_INODE, req, "setattr "DFID" %x", PFID(rr->rr_fid1),
+ (unsigned int)ma->ma_attr.la_valid);
repbody = req_capsule_server_get(&info->mti_pill, &RMF_MDT_BODY);
- mdt_pack_attr2body(repbody, attr, mdt_object_fid(mo));
-
- /* don't return OST-specific attributes if we didn't just set them.
- if (valid & ATTR_SIZE)
- repbody->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
- if (valid & (ATTR_MTIME | ATTR_MTIME_SET))
- repbody->valid |= OBD_MD_FLMTIME;
- if (valid & (ATTR_ATIME | ATTR_ATIME_SET))
- repbody->valid |= OBD_MD_FLATIME;
- */
- GOTO(out_unlock, rc);
-out_unlock:
- mdt_object_unlock_put(info, mo, lh, rc);
+ mo = mdt_object_find(info->mti_ctxt, info->mti_mdt, rr->rr_fid1);
+ if (IS_ERR(mo))
+ RETURN(rc = PTR_ERR(mo));
+
+ if (info->mti_epoch && (info->mti_epoch->flags & MF_EPOCH_OPEN)) {
+ /* Truncate case. */
+ rc = mdt_write_get(info->mti_mdt, mo);
+ if (rc)
+ GOTO(out, rc);
+
+ mfd = mdt_mfd_new();
+ if (mfd == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ /* FIXME: in recovery, need to pass old epoch here */
+ mdt_epoch_open(info, mo, 0);
+ repbody->ioepoch = mo->mot_ioepoch;
+
+ mdt_object_get(info->mti_ctxt, mo);
+ mfd->mfd_mode = FMODE_EPOCHLCK;
+ mfd->mfd_object = mo;
+ mfd->mfd_xid = req->rq_xid;
+
+ spin_lock(&med->med_open_lock);
+ list_add(&mfd->mfd_list, &med->med_open_head);
+ spin_unlock(&med->med_open_lock);
+ repbody->handle.cookie = mfd->mfd_handle.h_cookie;
+ }
+
+ rc = mdt_attr_set(info, mo, rr->rr_flags);
+ if (rc)
+ GOTO(out, rc);
+
+ if (info->mti_epoch && (info->mti_epoch->flags & MF_SOM_CHANGE)) {
+ LASSERT(info->mti_epoch);
+
+ /* Size-on-MDS Update. Find and free mfd. */
+ spin_lock(&med->med_open_lock);
+ mfd = mdt_handle2mfd(&(info->mti_epoch->handle));
+ if (mfd == NULL) {
+ spin_unlock(&med->med_open_lock);
+ CDEBUG(D_INODE, "no handle for file close: "
+ "fid = "DFID": cookie = "LPX64"\n",
+ PFID(info->mti_rr.rr_fid1),
+ info->mti_epoch->handle.cookie);
+ GOTO(out, rc = -ESTALE);
+ }
+
+ LASSERT(mfd->mfd_mode == FMODE_SOM);
+ LASSERT(ma->ma_attr.la_valid & LA_SIZE);
+ LASSERT(!(info->mti_epoch->flags & MF_EPOCH_CLOSE));
+
+ class_handle_unhash(&mfd->mfd_handle);
+ list_del_init(&mfd->mfd_list);
+ spin_unlock(&med->med_open_lock);
+ mdt_mfd_close(info, mfd);
+ }
+
+ ma->ma_need = MA_INODE;
+ next = mdt_object_child(mo);
+ rc = mo_attr_get(info->mti_ctxt, next, ma);
+ if (rc != 0)
+ GOTO(out, rc);
+
+ mdt_pack_attr2body(repbody, &ma->ma_attr, mdt_object_fid(mo));
+ EXIT;
+out:
+ mdt_object_put(info->mti_ctxt, mo);
return rc;
}
-
static int mdt_reint_create(struct mdt_thread_info *info)
{
int rc;
EXPORT_SYMBOL(class_handle_hash);
EXPORT_SYMBOL(class_handle_unhash);
+EXPORT_SYMBOL(class_handle_hash_back);
EXPORT_SYMBOL(class_handle2object);
/* obd_config.c */
#include <linux/fs.h>
#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
-void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid)
-{
- if (ia_valid & ATTR_ATIME) {
- oa->o_atime = LTIME_S(attr->ia_atime);
- oa->o_valid |= OBD_MD_FLATIME;
- }
- if (ia_valid & ATTR_MTIME) {
- oa->o_mtime = LTIME_S(attr->ia_mtime);
- oa->o_valid |= OBD_MD_FLMTIME;
- }
- if (ia_valid & ATTR_CTIME) {
- oa->o_ctime = LTIME_S(attr->ia_ctime);
- oa->o_valid |= OBD_MD_FLCTIME;
- }
- if (ia_valid & ATTR_SIZE) {
- oa->o_size = attr->ia_size;
- oa->o_valid |= OBD_MD_FLSIZE;
- }
- if (ia_valid & ATTR_MODE) {
- oa->o_mode = attr->ia_mode;
- oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
- if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
- oa->o_mode &= ~S_ISGID;
- }
- if (ia_valid & ATTR_UID) {
- oa->o_uid = attr->ia_uid;
- oa->o_valid |= OBD_MD_FLUID;
- }
- if (ia_valid & ATTR_GID) {
- oa->o_gid = attr->ia_gid;
- oa->o_valid |= OBD_MD_FLGID;
- }
-}
-EXPORT_SYMBOL(obdo_from_iattr);
-
-void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid)
-{
- valid &= oa->o_valid;
-
- if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
- CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n",
- oa->o_valid, oa->o_mtime, oa->o_ctime);
-
- attr->ia_valid = 0;
- if (valid & OBD_MD_FLATIME) {
- LTIME_S(attr->ia_atime) = oa->o_atime;
- attr->ia_valid |= ATTR_ATIME;
- }
- if (valid & OBD_MD_FLMTIME) {
- LTIME_S(attr->ia_mtime) = oa->o_mtime;
- attr->ia_valid |= ATTR_MTIME;
- }
- if (valid & OBD_MD_FLCTIME) {
- LTIME_S(attr->ia_ctime) = oa->o_ctime;
- attr->ia_valid |= ATTR_CTIME;
- }
- if (valid & OBD_MD_FLSIZE) {
- attr->ia_size = oa->o_size;
- attr->ia_valid |= ATTR_SIZE;
- }
-#if 0 /* you shouldn't be able to change a file's type with setattr */
- if (valid & OBD_MD_FLTYPE) {
- attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
- attr->ia_valid |= ATTR_MODE;
- }
-#endif
- if (valid & OBD_MD_FLMODE) {
- attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
- attr->ia_valid |= ATTR_MODE;
- if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
- attr->ia_mode &= ~S_ISGID;
- }
- if (valid & OBD_MD_FLUID) {
- attr->ia_uid = oa->o_uid;
- attr->ia_valid |= ATTR_UID;
- }
- if (valid & OBD_MD_FLGID) {
- attr->ia_gid = oa->o_gid;
- attr->ia_valid |= ATTR_GID;
- }
-}
-EXPORT_SYMBOL(iattr_from_obdo);
-
/* WARNING: the file systems must take care not to tinker with
attributes they don't manage (such as blocks). */
void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
}
EXPORT_SYMBOL(obdo_to_inode);
#endif
-
(struct llog_size_change_rec *)rec;
lustre_swab_ll_fid(&lsc->lsc_fid);
- __swab32s(&lsc->lsc_io_epoch);
+ __swab32s(&lsc->lsc_ioepoch);
break;
}
spin_unlock(&handle_lock);
}
+void class_handle_hash_back(struct portals_handle *h)
+{
+ struct list_head *bucket;
+ ENTRY;
+
+ spin_lock(&handle_lock);
+ bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK);
+ list_add(&h->h_link, bucket);
+ handle_count++;
+ spin_unlock(&handle_lock);
+
+ EXIT;
+}
+
void *class_handle2object(__u64 cookie)
{
struct list_head *bucket, *tmp;
ioobj->ioo_type = oa->o_mode;
}
EXPORT_SYMBOL(obdo_to_ioobj);
+
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid)
+{
+ if (ia_valid & ATTR_ATIME) {
+ oa->o_atime = LTIME_S(attr->ia_atime);
+ oa->o_valid |= OBD_MD_FLATIME;
+ }
+ if (ia_valid & ATTR_MTIME) {
+ oa->o_mtime = LTIME_S(attr->ia_mtime);
+ oa->o_valid |= OBD_MD_FLMTIME;
+ }
+ if (ia_valid & ATTR_CTIME) {
+ oa->o_ctime = LTIME_S(attr->ia_ctime);
+ oa->o_valid |= OBD_MD_FLCTIME;
+ }
+ if (ia_valid & ATTR_SIZE) {
+ oa->o_size = attr->ia_size;
+ oa->o_valid |= OBD_MD_FLSIZE;
+ }
+ if (ia_valid & ATTR_MODE) {
+ oa->o_mode = attr->ia_mode;
+ oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
+ if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
+ oa->o_mode &= ~S_ISGID;
+ }
+ if (ia_valid & ATTR_UID) {
+ oa->o_uid = attr->ia_uid;
+ oa->o_valid |= OBD_MD_FLUID;
+ }
+ if (ia_valid & ATTR_GID) {
+ oa->o_gid = attr->ia_gid;
+ oa->o_valid |= OBD_MD_FLGID;
+ }
+}
+EXPORT_SYMBOL(obdo_from_iattr);
+
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid)
+{
+ valid &= oa->o_valid;
+
+ if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+ CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n",
+ oa->o_valid, oa->o_mtime, oa->o_ctime);
+
+ attr->ia_valid = 0;
+ if (valid & OBD_MD_FLATIME) {
+ LTIME_S(attr->ia_atime) = oa->o_atime;
+ attr->ia_valid |= ATTR_ATIME;
+ }
+ if (valid & OBD_MD_FLMTIME) {
+ LTIME_S(attr->ia_mtime) = oa->o_mtime;
+ attr->ia_valid |= ATTR_MTIME;
+ }
+ if (valid & OBD_MD_FLCTIME) {
+ LTIME_S(attr->ia_ctime) = oa->o_ctime;
+ attr->ia_valid |= ATTR_CTIME;
+ }
+ if (valid & OBD_MD_FLSIZE) {
+ attr->ia_size = oa->o_size;
+ attr->ia_valid |= ATTR_SIZE;
+ }
+#if 0 /* you shouldn't be able to change a file's type with setattr */
+ if (valid & OBD_MD_FLTYPE) {
+ attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
+ attr->ia_valid |= ATTR_MODE;
+ }
+#endif
+ if (valid & OBD_MD_FLMODE) {
+ attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
+ attr->ia_valid |= ATTR_MODE;
+ if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
+ attr->ia_mode &= ~S_ISGID;
+ }
+ if (valid & OBD_MD_FLUID) {
+ attr->ia_uid = oa->o_uid;
+ attr->ia_valid |= ATTR_UID;
+ }
+ if (valid & OBD_MD_FLGID) {
+ attr->ia_gid = oa->o_gid;
+ attr->ia_valid |= ATTR_GID;
+ }
+}
+EXPORT_SYMBOL(iattr_from_obdo);
+
+void md_from_obdo(struct md_op_data *op_data, struct obdo *oa, obd_flag valid)
+{
+ iattr_from_obdo(&op_data->attr, oa, valid);
+ if (valid & OBD_MD_FLBLOCKS) {
+ op_data->attr_blocks = oa->o_blocks;
+ op_data->attr.ia_valid |= ATTR_BLOCKS;
+ }
+ if (valid & OBD_MD_FLFLAGS) {
+ ((struct ll_iattr *)&op_data->attr)->ia_attr_flags =
+ oa->o_flags;
+ op_data->attr.ia_valid |= ATTR_ATTR_FLAG;
+ }
+}
+EXPORT_SYMBOL(md_from_obdo);
+
+void obdo_from_md(struct obdo *oa, struct md_op_data *op_data,
+ unsigned int valid)
+{
+ obdo_from_iattr(oa, &op_data->attr, valid);
+ if (valid & ATTR_BLOCKS) {
+ oa->o_blocks = op_data->attr_blocks;
+ oa->o_valid |= OBD_MD_FLBLOCKS;
+ }
+ if (valid & ATTR_ATTR_FLAG) {
+ oa->o_flags =
+ ((struct ll_iattr *)&op_data->attr)->ia_attr_flags;
+ oa->o_valid |= OBD_MD_FLFLAGS;
+ }
+}
+EXPORT_SYMBOL(obdo_from_md);
};
int filter_log_sz_change(struct llog_handle *cathandle,
struct ll_fid *mds_fid,
- __u32 io_epoch,
+ __u32 ioepoch,
struct llog_cookie *logcookie,
struct inode *inode);
//int filter_get_catalog(struct obd_device *);
int filter_log_sz_change(struct llog_handle *cathandle,
struct ll_fid *mds_fid,
- __u32 io_epoch,
+ __u32 ioepoch,
struct llog_cookie *logcookie,
struct inode *inode)
{
LOCK_INODE_MUTEX(inode);
ofd = inode->i_filterdata;
- if (ofd && ofd->ofd_epoch >= io_epoch) {
- if (ofd->ofd_epoch > io_epoch)
+ if (ofd && ofd->ofd_epoch >= ioepoch) {
+ if (ofd->ofd_epoch > ioepoch)
CERROR("client sent old epoch %d for obj ino %ld\n",
- io_epoch, inode->i_ino);
+ ioepoch, inode->i_ino);
UNLOCK_INODE_MUTEX(inode);
RETURN(0);
}
- if (ofd && ofd->ofd_epoch < io_epoch) {
- ofd->ofd_epoch = io_epoch;
+ if (ofd && ofd->ofd_epoch < ioepoch) {
+ ofd->ofd_epoch = ioepoch;
} else if (!ofd) {
OBD_ALLOC(ofd, sizeof(*ofd));
if (!ofd)
GOTO(out, rc = -ENOMEM);
igrab(inode);
inode->i_filterdata = ofd;
- ofd->ofd_epoch = io_epoch;
+ ofd->ofd_epoch = ioepoch;
}
/* the decision to write a record is now made, unlock */
UNLOCK_INODE_MUTEX(inode);
lsc->lsc_hdr.lrh_len = lsc->lsc_tail.lrt_len = sizeof(*lsc);
lsc->lsc_hdr.lrh_type = OST_SZ_REC;
lsc->lsc_fid = *mds_fid;
- lsc->lsc_io_epoch = io_epoch;
+ lsc->lsc_ioepoch = ioepoch;
rc = llog_cat_add_rec(cathandle, &lsc->lsc_hdr, logcookie, NULL);
OBD_FREE(lsc, sizeof(*lsc));
&RMF_MDT_BODY
};
+static const struct req_msg_field *mdt_close_msg[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_MDT_EPOCH,
+ &RMF_REC_SETATTR
+};
+
static const struct req_msg_field *mds_statfs_server[] = {
&RMF_PTLRPC_BODY,
&RMF_OBD_STATFS
static const struct req_msg_field *mds_reint_setattr_client[] = {
&RMF_PTLRPC_BODY,
&RMF_REC_SETATTR,
+ &RMF_MDT_EPOCH,
&RMF_EADATA,
&RMF_LOGCOOKIES
};
sizeof(struct mdt_body), lustre_swab_mdt_body);
EXPORT_SYMBOL(RMF_MDT_BODY);
+const struct req_msg_field RMF_MDT_EPOCH =
+ DEFINE_MSGF("mdt_epoch", 0,
+ sizeof(struct mdt_epoch), lustre_swab_mdt_epoch);
+EXPORT_SYMBOL(RMF_MDT_EPOCH);
+
const struct req_msg_field RMF_PTLRPC_BODY =
DEFINE_MSGF("ptlrpc_body", 0,
sizeof(struct ptlrpc_body), lustre_swab_ptlrpc_body);
const struct req_format RQF_MDS_CLOSE =
DEFINE_REQ_FMT0("MDS_CLOSE",
- mdt_body_only, mds_last_unlink_server);
+ mdt_close_msg, mds_last_unlink_server);
EXPORT_SYMBOL(RQF_MDS_CLOSE);
const struct req_format RQF_MDS_PIN =
const struct req_format RQF_MDS_DONE_WRITING =
DEFINE_REQ_FMT0("MDS_DONE_WRITING",
- mdt_body_only, mdt_body_only);
+ mdt_close_msg, mdt_body_only);
EXPORT_SYMBOL(RQF_MDS_DONE_WRITING);
const struct req_format RQF_MDS_READPAGE =
__swab64s (&b->atime);
__swab64s (&b->ctime);
__swab64s (&b->blocks);
- __swab64s (&b->io_epoch);
+ __swab64s (&b->ioepoch);
__swab32s (&b->fsuid);
__swab32s (&b->fsgid);
__swab32s (&b->capability);
__swab32s (&b->max_cookiesize);
}
+void lustre_swab_mdt_epoch (struct mdt_body *b)
+{
+ /* handle is opaque */
+ __swab64s (&b->ioepoch);
+ __swab32s (&b->flags);
+}
+
void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
{
int i;
lustre_swab_lu_fid (&sa->sa_fid);
__swab64s (&sa->sa_valid);
__swab64s (&sa->sa_size);
+ __swab64s (&sa->sa_blocks);
__swab64s (&sa->sa_mtime);
__swab64s (&sa->sa_atime);
__swab64s (&sa->sa_ctime);
EXPORT_SYMBOL(lustre_swab_mds_status_req);
EXPORT_SYMBOL(lustre_swab_mds_body);
EXPORT_SYMBOL(lustre_swab_mdt_body);
+EXPORT_SYMBOL(lustre_swab_mdt_epoch);
EXPORT_SYMBOL(lustre_swab_obd_quotactl);
EXPORT_SYMBOL(lustre_swab_mds_rec_setattr);
EXPORT_SYMBOL(lustre_swab_mdt_rec_setattr);
rm -f $OOS
+sync; sleep 3; sync
+
if [ $SUCCESS -eq 1 ]; then
echo "Success!"
rm -f $LOG
#include <stdlib.h>
#include <unistd.h>
+#include <liblustre.h>
#include <lustre/lustre_idl.h>
#define __REQ_LAYOUT_USER__ (1)
-#define ARRAY_SIZE(a) ((sizeof (a))/(sizeof (a)[0]))
#define lustre_swab_generic_32s NULL
#define lustre_swab_lu_range NULL
#define lustre_swab_md_fld NULL
#define lustre_swab_mdt_body NULL
+#define lustre_swab_mdt_epoch NULL
#define lustre_swab_ptlrpc_body NULL
#define lustre_swab_obd_statfs NULL
#define lustre_swab_connect NULL
#define lustre_swab_mdt_rec_create NULL
#define lustre_swab_mdt_rec_setattr NULL
-#define EXPORT_SYMBOL(name)
-
/*
* Yes, include .c file.
*/
CHECK_STRUCT(llog_size_change_rec);
CHECK_MEMBER(llog_size_change_rec, lsc_hdr);
CHECK_MEMBER(llog_size_change_rec, lsc_fid);
- CHECK_MEMBER(llog_size_change_rec, lsc_io_epoch);
+ CHECK_MEMBER(llog_size_change_rec, lsc_ioepoch);
CHECK_MEMBER(llog_size_change_rec, padding);
CHECK_MEMBER(llog_size_change_rec, lsc_tail);
}