}
rc = md_setattr(mc->mc_desc.cl_exp, &mci->mci_opdata,
- NULL, 0, NULL, 0, &mci->mci_req);
+ NULL, 0, NULL, 0, &mci->mci_req, NULL);
ptlrpc_req_finished(mci->mci_req);
int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg);
struct client_obd *client_conn2cli(struct lustre_handle *conn);
-struct mdc_open_data;
+struct md_open_data;
struct obd_client_handle {
struct lustre_handle och_fh;
struct lu_fid och_fid;
- struct llog_cookie och_cookie;
- struct mdc_open_data *och_mod;
+ struct md_open_data *och_mod;
__u32 och_magic;
int och_flags;
};
* after server commits corresponding transaction. This is
* used for operations that require sequence of multiple
* requests to be replayed. The only example currently is file
- * open/close. When last request in such a sequence is
- * committed, ->rq_replay is cleared on all requests in the
+ * open/close/dw/setattr. When last request in such a sequence
+ * is committed, ->rq_replay is cleared on all requests in the
* sequence.
*/
rq_replay:1,
+ /* this is the last request in the sequence. */
+ rq_sequence:1,
rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1;
enum rq_phase rq_phase; /* one of RQ_PHASE_* */
__u64 rq_transno;
__u64 rq_xid;
struct list_head rq_replay_list;
+ struct list_head rq_mod_list;
struct ptlrpc_cli_ctx *rq_cli_ctx; /* client's half ctx */
struct ptlrpc_svc_ctx *rq_svc_ctx; /* server's half ctx */
struct lu_context rq_session;
};
+static inline void ptlrpc_close_replay_seq(struct ptlrpc_request *req)
+{
+ spin_lock(&req->rq_lock);
+ req->rq_replay = 0;
+ req->rq_sequence = 1;
+ spin_unlock(&req->rq_lock);
+}
+
static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
{
LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
struct obd_capa *oss_capa;
};
+struct md_open_data {
+ struct obd_client_handle *mod_och;
+ struct list_head mod_replay_list;
+};
+
struct md_ops {
int (*m_getstatus)(struct obd_export *, struct lu_fid *,
struct obd_capa **);
int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *,
ldlm_iterator_t, void *);
int (*m_close)(struct obd_export *, struct md_op_data *,
- struct obd_client_handle *, struct ptlrpc_request **);
+ struct md_open_data *, struct ptlrpc_request **);
int (*m_create)(struct obd_export *, struct md_op_data *,
const void *, int, int, __u32, __u32, __u32,
__u64, struct ptlrpc_request **);
int (*m_done_writing)(struct obd_export *, struct md_op_data *,
- struct obd_client_handle *);
+ struct md_open_data *);
int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
struct lookup_intent *, struct md_op_data *,
struct lustre_handle *, void *, int, int);
const struct lu_fid *,
struct ptlrpc_request **);
int (*m_setattr)(struct obd_export *, struct md_op_data *, void *,
- int , void *, int, struct ptlrpc_request **);
+ int , void *, int, struct ptlrpc_request **,
+ struct md_open_data **mod);
int (*m_sync)(struct obd_export *, const struct lu_fid *,
struct obd_capa *, struct ptlrpc_request **);
int (*m_readpage)(struct obd_export *, const struct lu_fid *,
}
static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
- struct obd_client_handle *och,
+ struct md_open_data *mod,
struct ptlrpc_request **request)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, close);
EXP_MD_COUNTER_INCREMENT(exp, close);
- rc = MDP(exp->exp_obd, close)(exp, op_data, och, request);
+ rc = MDP(exp->exp_obd, close)(exp, op_data, mod, request);
RETURN(rc);
}
static inline int md_done_writing(struct obd_export *exp,
struct md_op_data *op_data,
- struct obd_client_handle *och)
+ struct md_open_data *mod)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, done_writing);
EXP_MD_COUNTER_INCREMENT(exp, done_writing);
- rc = MDP(exp->exp_obd, done_writing)(exp, op_data, och);
+ rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod);
RETURN(rc);
}
static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
void *ea, int ealen, void *ea2, int ea2len,
- struct ptlrpc_request **request)
+ struct ptlrpc_request **request,
+ struct md_open_data **mod)
{
int rc;
ENTRY;
EXP_CHECK_MD_OP(exp, setattr);
EXP_MD_COUNTER_INCREMENT(exp, setattr);
rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen,
- ea2, ea2len, request);
+ ea2, ea2len, request, mod);
RETURN(rc);
}
return rc;
}
-int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
- __u64 ioepoch)
+int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
+ struct lustre_handle *fh, __u64 ioepoch)
{
struct llu_inode_info *lli = llu_i2info(inode);
struct llu_sb_info *sbi = llu_i2sbi(inode);
LASSERT(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM);
rc = llu_inode_getattr(inode, &oa);
- if (rc) {
+ if (rc == -ENOENT) {
+ oa.o_valid = 0;
+ CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n",
+ lli->lli_smd->lsm_object_id);
+ } else if (rc) {
CERROR("inode_getattr failed (%d): unable to send a "
"Size-on-MDS attribute update for inode %llu/%lu\n",
rc, (long long)llu_i2stat(inode)->st_ino,
op_data.op_ioepoch = ioepoch;
op_data.op_flags |= MF_SOM_CHANGE;
- rc = llu_md_setattr(inode, &op_data);
+ rc = llu_md_setattr(inode, &op_data, &mod);
RETURN(rc);
}
struct obd_client_handle *och = &fd->fd_mds_och;
struct intnl_stat *st = llu_i2stat(inode);
struct md_op_data op_data = { { 0 } };
- int rc;
+ int seq_end = 0, rc;
ENTRY;
/* clear group lock, if present */
op_data.op_ioepoch = lli->lli_ioepoch;
memcpy(&op_data.op_handle, &och->och_fh, sizeof(op_data.op_handle));
- rc = md_close(md_exp, &op_data, och, &req);
+ rc = md_close(md_exp, &op_data, och->och_mod, &req);
+ if (rc != -EAGAIN)
+ seq_end = 1;
+
if (rc == -EAGAIN) {
/* We are the last writer, so the MDS has instructed us to get
* the file size and any write cookies, then close again. */
LASSERT(fd->fd_flags & FMODE_WRITE);
- rc = llu_sizeonmds_update(inode, &och->och_fh,
+ rc = llu_sizeonmds_update(inode, och->och_mod, &och->och_fh,
op_data.op_ioepoch);
if (rc) {
CERROR("inode %llu mdc Size-on-MDS update failed: "
(long long)st->st_ino, rc);
}
+ if (seq_end)
+ ptlrpc_close_replay_seq(req);
md_clear_open_replay_data(md_exp, och);
ptlrpc_req_finished(req);
och->och_fh.cookie = DEAD_HANDLE_MAGIC;
int ll_it_open_error(int phase, struct lookup_intent *it);
struct inode *llu_iget(struct filesys *fs, struct lustre_md *md);
int llu_inode_getattr(struct inode *inode, struct obdo *obdo);
-int llu_md_setattr(struct inode *inode, struct md_op_data *op_data);
+int llu_md_setattr(struct inode *inode, struct md_op_data *op_data,
+ struct md_open_data **mod);
int llu_setattr_raw(struct inode *inode, struct iattr *attr);
extern struct fssw_ops llu_fssw_ops;
int llu_iop_open(struct pnode *pnode, int flags, mode_t mode);
int llu_md_close(struct obd_export *md_exp, struct inode *inode);
int llu_file_release(struct inode *inode);
-int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
- __u64 ioepoch);
+int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
+ struct lustre_handle *fh, __u64 ioepoch);
int llu_iop_close(struct inode *inode);
_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off);
int llu_vmtruncate(struct inode * inode, loff_t offset, obd_flag obd_flags);
return error;
}
-int llu_md_setattr(struct inode *inode, struct md_op_data *op_data)
+int llu_md_setattr(struct inode *inode, struct md_op_data *op_data,
+ struct md_open_data **mod)
{
struct lustre_md md;
struct llu_sb_info *sbi = llu_i2sbi(inode);
ENTRY;
llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY);
- rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &request);
+ rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL,
+ 0, &request, mod);
if (rc) {
ptlrpc_req_finished(request);
/* Close IO epoch and send Size-on-MDS attribute update. */
static int llu_setattr_done_writing(struct inode *inode,
- struct md_op_data *op_data)
+ struct md_op_data *op_data,
+ struct md_open_data *mod)
{
struct llu_inode_info *lli = llu_i2info(inode);
struct intnl_stat *st = llu_i2stat(inode);
op_data->op_ioepoch, PFID(&lli->lli_fid));
op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
- rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, NULL);
+ rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, mod);
if (rc == -EAGAIN) {
/* MDS has instructed us to obtain Size-on-MDS attribute
* from OSTs and send setattr to back to MDS. */
- rc = llu_sizeonmds_update(inode, &op_data->op_handle,
+ rc = llu_sizeonmds_update(inode, mod, &op_data->op_handle,
op_data->op_ioepoch);
} else if (rc) {
CERROR("inode %llu mdc truncate failed: rc = %d\n",
struct intnl_stat *st = llu_i2stat(inode);
int ia_valid = attr->ia_valid;
struct md_op_data op_data = { { 0 } };
- int rc = 0;
+ struct md_open_data *mod = NULL;
+ int rc = 0, rc1 = 0;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
/* Open epoch for truncate. */
if (ia_valid & ATTR_SIZE)
op_data.op_flags = MF_EPOCH_OPEN;
- rc = llu_md_setattr(inode, &op_data);
+ rc = llu_md_setattr(inode, &op_data, &mod);
if (rc)
RETURN(rc);
+ if (op_data.op_ioepoch)
+ CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for "
+ "truncate\n", op_data.op_ioepoch,
+ PFID(&llu_i2info(inode)->lli_fid));
+
if (!lsm || !S_ISREG(st->st_mode)) {
CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
- if (op_data.op_ioepoch)
- rc = llu_setattr_done_writing(inode, &op_data);
- RETURN(rc);
+ GOTO(out, rc);
}
} else {
/* The OST doesn't check permissions, but the alternative is
&lockh, flags);
if (rc != ELDLM_OK) {
if (rc > 0)
- RETURN(-ENOLCK);
- RETURN(rc);
+ GOTO(out, rc = -ENOLCK);
+ GOTO(out, rc);
}
-
rc = llu_vmtruncate(inode, attr->ia_size, obd_flags);
/* unlock now as we don't mind others file lockers racing with
if (!rc)
rc = err;
}
-
- if (op_data.op_ioepoch)
- rc = llu_setattr_done_writing(inode, &op_data);
} else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
struct obd_info oinfo = { { { 0 } } };
struct obdo oa;
if (rc)
CERROR("obd_setattr_async fails: rc=%d\n", rc);
}
- RETURN(rc);
+ EXIT;
+out:
+ if (op_data.op_ioepoch)
+ rc1 = llu_setattr_done_writing(inode, &op_data, mod);
+ return rc ? rc : rc1;
}
/* here we simply act as a thin layer to glue it with
/* swabbing is done in lov_setstripe() on server side */
rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
- sizeof(lum), NULL, 0, &request);
+ sizeof(lum), NULL, 0, &request, NULL);
if (rc) {
ptlrpc_req_finished(request);
if (rc != -EPERM && rc != -EACCES)
/* swabbing is done in lov_setstripe() on server side */
rc = md_setattr(sbi->ll_md_exp, op_data, lump, sizeof(*lump),
- NULL, 0, &req);
+ NULL, 0, &req, NULL);
ll_finish_md_op_data(op_data);
ptlrpc_req_finished(req);
if (rc) {
struct ptlrpc_request *req = NULL;
struct obd_device *obd = class_exp2obd(exp);
int epoch_close = 1;
- int rc;
+ int seq_end = 0, rc;
ENTRY;
if (obd == NULL) {
ll_prepare_close(inode, op_data, och);
epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
- rc = md_close(md_exp, op_data, och, &req);
+ rc = md_close(md_exp, op_data, och->och_mod, &req);
+ if (rc != -EAGAIN)
+ seq_end = 1;
if (rc == -EAGAIN) {
/* This close must have the epoch closed. */
LASSERT(epoch_close);
/* MDS has instructed us to obtain Size-on-MDS attribute from
* OSTs and send setattr to back to MDS. */
- rc = ll_sizeonmds_update(inode, &och->och_fh,
- op_data->op_ioepoch);
+ rc = ll_sizeonmds_update(inode, och->och_mod,
+ &och->och_fh, op_data->op_ioepoch);
if (rc) {
CERROR("inode %lu mdc Size-on-MDS update failed: "
"rc = %d\n", inode->i_ino, rc);
inode->i_ino, rc);
}
- ptlrpc_req_finished(req); /* This is close request */
EXIT;
out:
S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
ll_queue_done_writing(inode, LLIF_DONE_WRITING);
} else {
+ if (seq_end)
+ ptlrpc_close_replay_seq(req);
md_clear_open_replay_data(md_exp, och);
/* Free @och if it is not waiting for DONE_WRITING. */
och->och_fh.cookie = DEAD_HANDLE_MAGIC;
OBD_FREE_PTR(och);
}
-
+ if (req) /* This is close request */
+ ptlrpc_req_finished(req);
return rc;
}
return;
}
-int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
- __u64 ioepoch)
+int ll_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
+ struct lustre_handle *fh, __u64 ioepoch)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct md_op_data *op_data;
RETURN(-ENOMEM);
}
rc = ll_inode_getattr(inode, oa);
- if (rc) {
+ if (rc == -ENOENT) {
+ oa->o_valid = 0;
+ CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n",
+ lli->lli_smd->lsm_object_id);
+ } else if (rc) {
CERROR("inode_getattr failed (%d): unable to send a "
"Size-on-MDS attribute update for inode %lu/%u\n",
rc, inode->i_ino, inode->i_generation);
op_data->op_ioepoch = ioepoch;
op_data->op_flags |= MF_SOM_CHANGE;
- rc = ll_md_setattr(inode, op_data);
+ rc = ll_md_setattr(inode, op_data, &mod);
EXIT;
out:
if (oa)
ll_pack_inode2opdata(inode, op_data, &och->och_fh);
- rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och);
+ rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och->och_mod);
if (rc == -EAGAIN) {
/* MDS has instructed us to obtain Size-on-MDS attribute from
* OSTs and send setattr to back to MDS. */
- rc = ll_sizeonmds_update(inode, &och->och_fh,
- op_data->op_ioepoch);
+ rc = ll_sizeonmds_update(inode, och->och_mod,
+ &och->och_fh, op_data->op_ioepoch);
} else if (rc) {
CERROR("inode %lu mdc done_writing failed: rc = %d\n",
inode->i_ino, rc);
int ll_md_real_close(struct inode *inode, int flags);
void ll_epoch_close(struct inode *inode, struct md_op_data *op_data,
struct obd_client_handle **och, unsigned long flags);
-int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
- __u64 ioepoch);
+int ll_sizeonmds_update(struct inode *inode, struct md_open_data *data,
+ struct lustre_handle *fh, __u64 ioepoch);
int ll_inode_getattr(struct inode *inode, struct obdo *obdo);
-int ll_md_setattr(struct inode *inode, struct md_op_data *op_data);
+int ll_md_setattr(struct inode *inode, struct md_op_data *op_data,
+ struct md_open_data **mod);
void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
struct lustre_handle *fh);
extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
EXIT;
}
-int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
+int ll_md_setattr(struct inode *inode, struct md_op_data *op_data,
+ struct md_open_data **mod)
{
struct lustre_md md;
struct ll_sb_info *sbi = ll_i2sbi(inode);
RETURN(PTR_ERR(op_data));
rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0,
- &request);
+ &request, mod);
if (rc) {
ptlrpc_req_finished(request);
if (rc == -ENOENT) {
rc = inode_setattr(inode, &op_data->op_attr);
/* Extract epoch data if obtained. */
- memcpy(&op_data->op_handle, &md.body->handle, sizeof(op_data->op_handle));
+ op_data->op_handle = md.body->handle;
op_data->op_ioepoch = md.body->ioepoch;
ll_update_inode(inode, &md);
/* Close IO epoch and send Size-on-MDS attribute update. */
static int ll_setattr_done_writing(struct inode *inode,
- struct md_op_data *op_data)
+ struct md_op_data *op_data,
+ struct md_open_data *mod)
{
struct ll_inode_info *lli = ll_i2info(inode);
int rc = 0;
op_data->op_ioepoch, PFID(&lli->lli_fid));
op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
- /* XXX: pass och here for the recovery purpose. */
- rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL);
+ rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod);
if (rc == -EAGAIN) {
/* MDS has instructed us to obtain Size-on-MDS attribute
* from OSTs and send setattr to back to MDS. */
- rc = ll_sizeonmds_update(inode, &op_data->op_handle,
+ rc = ll_sizeonmds_update(inode, mod, &op_data->op_handle,
op_data->op_ioepoch);
} else if (rc) {
CERROR("inode %lu mdc truncate failed: rc = %d\n",
struct lov_stripe_md *lsm = lli->lli_smd;
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct md_op_data *op_data = NULL;
+ struct md_open_data *mod = NULL;
int ia_valid = attr->ia_valid;
int rc = 0, rc1 = 0;
ENTRY;
/* Open epoch for truncate. */
if (ia_valid & ATTR_SIZE)
op_data->op_flags = MF_EPOCH_OPEN;
-
- rc = ll_md_setattr(inode, op_data);
+
+ rc = ll_md_setattr(inode, op_data, &mod);
if (rc)
GOTO(out, rc);
"truncate\n", op_data->op_ioepoch, PFID(&lli->lli_fid));
if (!lsm || !S_ISREG(inode->i_mode)) {
- CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
- GOTO(out, rc = 0);
+ CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
+ GOTO(out, rc = 0);
}
/* We really need to get our PW lock before we change inode->i_size.
EXIT;
out:
if (op_data) {
- if (op_data->op_ioepoch) {
- rc1 = ll_setattr_done_writing(inode, op_data);
- }
+ if (op_data->op_ioepoch)
+ rc1 = ll_setattr_done_writing(inode, op_data, mod);
ll_finish_md_op_data(op_data);
}
return rc ? rc : rc1;
((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
rc = md_setattr(sbi->ll_md_exp, op_data,
- NULL, 0, NULL, 0, &req);
+ NULL, 0, NULL, 0, &req, NULL);
ll_finish_md_op_data(op_data);
ptlrpc_req_finished(req);
if (rc || lsm == NULL) {
static int lmv_close(struct obd_export *exp,
struct md_op_data *op_data,
- struct obd_client_handle *och,
+ struct md_open_data *mod,
struct ptlrpc_request **request)
{
struct obd_device *obd = exp->exp_obd;
RETURN(PTR_ERR(tgt_exp));
CDEBUG(D_OTHER, "CLOSE "DFID"\n", PFID(&op_data->op_fid1));
- rc = md_close(tgt_exp, op_data, och, request);
+ rc = md_close(tgt_exp, op_data, mod, request);
RETURN(rc);
}
static int lmv_done_writing(struct obd_export *exp,
struct md_op_data *op_data,
- struct obd_client_handle *och)
+ struct md_open_data *mod)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
if (IS_ERR(tgt_exp))
RETURN(PTR_ERR(tgt_exp));
- rc = md_done_writing(tgt_exp, op_data, och);
+ rc = md_done_writing(tgt_exp, op_data, mod);
RETURN(rc);
}
static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
void *ea, int ealen, void *ea2, int ea2len,
- struct ptlrpc_request **request)
+ struct ptlrpc_request **request,
+ struct md_open_data **mod)
{
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
}
rc = md_setattr(tgt_exp, op_data, ea, ealen,
- ea2, ea2len, &req);
+ ea2, ea2len, &req, mod);
if (lu_fid_eq(&obj->lo_fid, &obj->lo_inodes[i].li_fid)) {
/*
RETURN(PTR_ERR(tgt_exp));
rc = md_setattr(tgt_exp, op_data, ea, ealen, ea2,
- ea2len, request);
+ ea2len, request, mod);
}
RETURN(rc);
}
void mdc_exit_request(struct client_obd *cli);
void mdc_enter_request(struct client_obd *cli);
-struct mdc_open_data {
- struct obd_client_handle *mod_och;
- struct ptlrpc_request *mod_open_req;
- struct ptlrpc_request *mod_close_req;
-};
-
static inline int client_is_remote(struct obd_export *exp)
{
struct obd_import *imp = class_exp2cliimp(exp);
int mdc_clear_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och);
+void mdc_commit_delayed(struct ptlrpc_request *req);
int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
const void *data, int datalen, int mode, __u32 uid, __u32 gid,
struct ptlrpc_request **request);
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
void *ea, int ealen, void *ea2, int ea2len,
- struct ptlrpc_request **request);
+ struct ptlrpc_request **request, struct md_open_data **mod);
int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request);
int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
* go to the setattr portal. */
int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
void *ea, int ealen, void *ea2, int ea2len,
- struct ptlrpc_request **request)
+ struct ptlrpc_request **request, struct md_open_data **mod)
{
CFS_LIST_HEAD(cancels);
struct ptlrpc_request *req;
size[REPLY_REC_OFF] = sizeof(struct mdt_body);
size[REPLY_REC_OFF + 1] = sizeof(struct lustre_capa);
ptlrpc_req_set_repsize(req, 3, size);
+ if (mod && (op_data->op_flags & MF_EPOCH_OPEN) &&
+ req->rq_import->imp_replayable)
+ {
+ LASSERT(*mod == NULL);
+
+ OBD_ALLOC_PTR(*mod);
+ if (*mod == NULL) {
+ DEBUG_REQ(D_ERROR, req, "Can't allocate "
+ "md_open_data");
+ } else {
+ CFS_INIT_LIST_HEAD(&(*mod)->mod_replay_list);
+ }
+ }
+ if (mod && *mod) {
+ req->rq_cb_data = *mod;
+ req->rq_commit_cb = mdc_commit_delayed;
+ list_add_tail(&req->rq_mod_list, &(*mod)->mod_replay_list);
+ /* This is not the last request in sequence for truncate. */
+ if (op_data->op_flags & MF_EPOCH_OPEN)
+ req->rq_replay = 1;
+ else
+ req->rq_sequence = 1;
+ }
rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL);
*request = req;
if (rc == -ERESTARTSYS)
rc = 0;
-
+ if (rc && req->rq_commit_cb)
+ req->rq_commit_cb(req);
RETURN(rc);
}
RETURN(0);
}
-static void mdc_commit_open(struct ptlrpc_request *req)
-{
- struct mdc_open_data *mod = req->rq_cb_data;
- if (mod == NULL)
- return;
-
- if (mod->mod_close_req != NULL)
- mod->mod_close_req->rq_cb_data = NULL;
-
- if (mod->mod_och != NULL)
- mod->mod_och->och_mod = NULL;
-
- OBD_FREE(mod, sizeof(*mod));
- req->rq_cb_data = NULL;
-}
-
static void mdc_replay_open(struct ptlrpc_request *req)
{
- struct mdc_open_data *mod = req->rq_cb_data;
- struct ptlrpc_request *close_req;
+ struct md_open_data *mod = req->rq_cb_data;
+ struct ptlrpc_request *cur, *tmp;
struct obd_client_handle *och;
struct lustre_handle old;
struct mdt_body *body;
body = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, sizeof(*body),
lustre_swab_mdt_body);
+ LASSERT(body != NULL);
och = mod->mod_och;
if (och != NULL) {
struct lustre_handle *file_fh;
LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
- LASSERT(body != NULL);
file_fh = &och->och_fh;
CDEBUG(D_HA, "updating handle from "LPX64" to "LPX64"\n",
file_fh->cookie, body->handle.cookie);
- memcpy(&old, file_fh, sizeof(old));
- memcpy(file_fh, &body->handle, sizeof(*file_fh));
+ old = *file_fh;
+ *file_fh = body->handle;
}
- close_req = mod->mod_close_req;
- if (close_req != NULL) {
- struct mdt_epoch *epoch;
- LASSERT(lustre_msg_get_opc(close_req->rq_reqmsg) == MDS_CLOSE);
- LASSERT(body != NULL);
-
- epoch = lustre_msg_buf(close_req->rq_reqmsg, REQ_REC_OFF,
- sizeof(*epoch));
- LASSERT(epoch);
- if (och != NULL)
- LASSERT(!memcmp(&old, &epoch->handle, sizeof(old)));
- DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
- memcpy(&epoch->handle, &body->handle, sizeof(epoch->handle));
+ list_for_each_entry_safe(cur, tmp, &mod->mod_replay_list, rq_mod_list) {
+ int opc = lustre_msg_get_opc(cur->rq_reqmsg);
+ struct mdt_epoch *epoch = NULL;
+
+ if (opc == MDS_CLOSE || opc == MDS_DONE_WRITING) {
+ epoch = lustre_msg_buf(cur->rq_reqmsg,
+ REQ_REC_OFF, sizeof(*epoch));
+ LASSERT(epoch);
+ DEBUG_REQ(D_HA, cur, "updating %s body with new fh",
+ opc == MDS_CLOSE ? "CLOSE" : "DONE_WRITING");
+ } else if (opc == MDS_REINT) {
+ struct mdt_rec_setattr *rec;
+
+ /* Check this is REINT_SETATTR. */
+ rec = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF,
+ sizeof (*rec));
+ LASSERT(rec && rec->sa_opcode == REINT_SETATTR);
+
+ epoch = lustre_msg_buf(cur->rq_reqmsg,
+ REQ_REC_OFF + 2, sizeof(*epoch));
+ LASSERT(epoch);
+ DEBUG_REQ(D_HA, cur, "updating REINT_SETATTR body "
+ "with new fh");
+ }
+ if (epoch) {
+ if (och != NULL)
+ LASSERT(!memcmp(&old, &epoch->handle,
+ sizeof(old)));
+ epoch->handle = body->handle;
+ }
}
EXIT;
}
+void mdc_commit_delayed(struct ptlrpc_request *req)
+{
+ struct md_open_data *mod = req->rq_cb_data;
+ struct ptlrpc_request *cur, *tmp;
+
+ DEBUG_REQ(D_HA, req, "req committed");
+
+ if (mod == NULL)
+ return;
+
+ req->rq_cb_data = NULL;
+ req->rq_commit_cb = NULL;
+ list_del_init(&req->rq_mod_list);
+ if (req->rq_sequence) {
+ list_for_each_entry_safe(cur, tmp, &mod->mod_replay_list,
+ rq_mod_list)
+ {
+ LASSERT(cur != LP_POISON);
+ LASSERT(cur->rq_type != LI_POISON);
+ DEBUG_REQ(D_HA, cur, "req balanced");
+ LASSERT(cur->rq_transno != 0);
+ LASSERT(cur->rq_import == req->rq_import);
+
+ list_del_init(&cur->rq_mod_list);
+ /* We no longer want to preserve this for transno-
+ * unconditional replay. */
+ spin_lock(&cur->rq_lock);
+ cur->rq_replay = 0;
+ spin_unlock(&cur->rq_lock);
+ }
+ }
+
+ if (list_empty(&mod->mod_replay_list)) {
+ if (mod->mod_och != NULL)
+ mod->mod_och->och_mod = NULL;
+
+ OBD_FREE_PTR(mod);
+ }
+}
+
int mdc_set_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och,
struct ptlrpc_request *open_req)
{
- struct mdc_open_data *mod;
+ struct md_open_data *mod;
struct mdt_rec_create *rec = lustre_msg_buf(open_req->rq_reqmsg,
DLM_INTENT_REC_OFF,
sizeof(*rec));
OBD_ALLOC(mod, sizeof(*mod));
if (mod == NULL) {
DEBUG_REQ(D_ERROR, open_req,
- "Can't allocate mdc_open_data");
+ "Can't allocate md_open_data");
RETURN(0);
}
+ CFS_INIT_LIST_HEAD(&mod->mod_replay_list);
spin_lock(&open_req->rq_lock);
if (!open_req->rq_replay) {
och->och_mod = mod;
mod->mod_och = och;
open_req->rq_cb_data = mod;
- mod->mod_open_req = open_req;
- open_req->rq_commit_cb = mdc_commit_open;
+ list_add_tail(&open_req->rq_mod_list, &mod->mod_replay_list);
+ open_req->rq_commit_cb = mdc_commit_delayed;
spin_unlock(&open_req->rq_lock);
}
int mdc_clear_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och)
{
- struct mdc_open_data *mod = och->och_mod;
+ struct md_open_data *mod = och->och_mod;
ENTRY;
/*
- * Don't free the structure now (it happens in mdc_commit_open(), after
- * we're sure we won't need to fix up the close request in the future),
+ * Don't free the structure now (it happens in mdc_commit_delayed(),
+ * after the last request is removed from its replay list),
* but make sure that replay doesn't poke at the och, which is about to
* be freed.
*/
RETURN(0);
}
-static void mdc_commit_close(struct ptlrpc_request *req)
-{
- struct mdc_open_data *mod = req->rq_cb_data;
- struct ptlrpc_request *open_req;
- struct obd_import *imp = req->rq_import;
-
- DEBUG_REQ(D_HA, req, "close req committed");
- if (mod == NULL)
- return;
-
- mod->mod_close_req = NULL;
- req->rq_cb_data = NULL;
- req->rq_commit_cb = NULL;
-
- open_req = mod->mod_open_req;
- LASSERT(open_req != NULL);
- LASSERT(open_req != LP_POISON);
- LASSERT(open_req->rq_type != LI_POISON);
-
- DEBUG_REQ(D_HA, open_req, "open req balanced");
- LASSERT(open_req->rq_transno != 0);
- LASSERT(open_req->rq_import == imp);
-
- /*
- * We no longer want to preserve this for transno-unconditional
- * replay. Decref open req here as well.
- */
- spin_lock(&open_req->rq_lock);
- open_req->rq_replay = 0;
- spin_unlock(&open_req->rq_lock);
-}
-
int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
- struct obd_client_handle *och, struct ptlrpc_request **request)
+ struct md_open_data *mod, struct ptlrpc_request **request)
{
struct obd_device *obd = class_exp2obd(exp);
int reqsize[4] = { sizeof(struct ptlrpc_body),
obd->u.cli.cl_max_mds_easize,
obd->u.cli.cl_max_mds_cookiesize };
struct ptlrpc_request *req;
- struct mdc_open_data *mod;
int rc;
ENTRY;
req->rq_request_portal = MDS_READPAGE_PORTAL;
/* Ensure that this close's handle is fixed up during replay. */
- LASSERT(och != NULL);
- LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
- mod = och->och_mod;
- if (likely(mod != NULL)) {
- if (mod->mod_open_req->rq_type == LI_POISON) {
- CERROR("LBUG POISONED open %p!\n", mod->mod_open_req);
- LBUG();
- ptlrpc_req_finished(req);
- req = NULL;
- GOTO(out, rc = -EIO);
- }
- mod->mod_close_req = req;
- DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
- } else {
+ if (likely(mod != NULL))
+ list_add_tail(&req->rq_mod_list, &mod->mod_replay_list);
+ else
CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
- }
mdc_close_pack(req, REQ_REC_OFF, op_data);
ptlrpc_req_set_repsize(req, 4, repsize);
- req->rq_commit_cb = mdc_commit_close;
+ req->rq_commit_cb = mdc_commit_delayed;
+ req->rq_replay = 1;
LASSERT(req->rq_cb_data == NULL);
req->rq_cb_data = mod;
rc = -rc;
} else if (mod == NULL) {
if (req->rq_import->imp_replayable)
- CERROR("Unexpected: can't find mdc_open_data,"
+ CERROR("Unexpected: can't find md_open_data,"
"but close succeeded with replayable imp"
"Please tell CFS.\n");
}
}
int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
- struct obd_client_handle *och)
+ struct md_open_data *mod)
{
struct obd_device *obd = class_exp2obd(exp);
struct ptlrpc_request *req;
if (req == NULL)
RETURN(-ENOMEM);
- /* XXX: add DONE_WRITING request to och -- when Size-on-MDS
- * recovery will be ready. */
mdc_close_pack(req, REQ_REC_OFF, op_data);
+
+ req->rq_replay = 1;
+ req->rq_cb_data = mod;
+ req->rq_commit_cb = mdc_commit_delayed;
+ if (likely(mod != NULL))
+ list_add_tail(&req->rq_mod_list, &mod->mod_replay_list);
+ else
+ CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
ptlrpc_req_set_repsize(req, 2, repsize);
mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
rc = ptlrpc_queue_wait(req);
mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
+
+ /* Close the open replay sequence if an error occured or no SOM
+ * attribute update is needed. */
+ if (rc != -EAGAIN)
+ ptlrpc_close_replay_seq(req);
+
+ if (rc && rc != -EAGAIN && req->rq_commit_cb)
+ req->rq_commit_cb(req);
+
ptlrpc_req_finished(req);
RETURN(rc);
}
OBD_ALLOC(handle->och_mod, sizeof(*handle->och_mod));
if (handle->och_mod == NULL) {
- DEBUG_REQ(D_ERROR, req, "can't allocate mdc_open_data");
+ DEBUG_REQ(D_ERROR, req, "can't allocate md_open_data");
RETURN(-ENOMEM);
}
- handle->och_mod->mod_open_req = req; /* will be dropped by unpin */
+
+ /* will be dropped by unpin */
+ CFS_INIT_LIST_HEAD(&handle->och_mod->mod_replay_list);
+ list_add_tail(&req->rq_mod_list, &handle->och_mod->mod_replay_list);
RETURN(rc);
}
CERROR("unpin failed: %d\n", rc);
ptlrpc_req_finished(req);
- ptlrpc_req_finished(handle->och_mod->mod_open_req);
+
+ LASSERT(!list_empty(&handle->och_mod->mod_replay_list));
+ req = list_entry(handle->och_mod->mod_replay_list.next,
+ typeof(*req), rq_mod_list);
+ list_del_init(&req->rq_mod_list);
+ ptlrpc_req_finished(req);
+ LASSERT(list_empty(&handle->och_mod->mod_replay_list));
+
OBD_FREE(handle->och_mod, sizeof(*handle->och_mod));
RETURN(rc);
}
{
struct req_capsule *pill = &info->mti_pill;
struct mdt_device *mdt = info->mti_mdt;
- struct ptlrpc_request *req = mdt_info_req(info);
struct mdt_body *repbody;
int need_shrink = 0;
int rc;
if (rc != 0)
GOTO(out_ucred, rc = err_serious(rc));
- if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
- struct mdt_client_data *mcd;
-
- mcd = req->rq_export->exp_mdt_data.med_mcd;
- if (req_xid_is_last(req)) {
- need_shrink = 0;
- mdt_reconstruct(info, lhc);
- rc = lustre_msg_get_status(req->rq_repmsg);
- GOTO(out_ucred, rc);
- }
- DEBUG_REQ(D_HA, req, "no reply for RESENT (xid "LPD64")",
- mcd->mcd_last_xid);
- }
-
need_shrink = 0;
+ if (mdt_check_resent(info, mdt_reconstruct, lhc)) {
+ rc = lustre_msg_get_status(mdt_info_req(info)->rq_repmsg);
+ GOTO(out_ucred, rc);
+ }
+
rc = mdt_reint_rec(info, lhc);
EXIT;
out_ucred:
void mdt_lock_handle_fini(struct mdt_lock_handle *lh);
void mdt_reconstruct(struct mdt_thread_info *, struct mdt_lock_handle *);
+void mdt_reconstruct_generic(struct mdt_thread_info *mti,
+ struct mdt_lock_handle *lhc);
extern void target_recovery_fini(struct obd_device *obd);
extern void target_recovery_init(struct obd_device *obd,
return &req->rq_export->exp_mdt_data;
}
+typedef void (*mdt_reconstruct_t)(struct mdt_thread_info *mti,
+ struct mdt_lock_handle *lhc);
+static inline int mdt_check_resent(struct mdt_thread_info *info,
+ mdt_reconstruct_t reconstruct,
+ struct mdt_lock_handle *lhc)
+{
+ struct ptlrpc_request *req = mdt_info_req(info);
+ ENTRY;
+
+ if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+ if (req_xid_is_last(req)) {
+ reconstruct(info, lhc);
+ RETURN(1);
+ }
+ DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",
+ req->rq_export->exp_mdt_data.med_mcd->mcd_last_xid);
+ }
+ RETURN(0);
+}
+
#define MDT_FAIL_CHECK(id) \
({ \
if (unlikely(OBD_FAIL_CHECK(id))) \
struct mdt_object *o;
struct md_attr *ma = &info->mti_attr;
struct mdt_body *repbody = NULL;
+ struct ptlrpc_request *req = mdt_info_req(info);
int rc, ret = 0;
ENTRY;
req_capsule_set_size(&info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER,
info->mti_mdt->mdt_max_cookiesize);
rc = req_capsule_pack(&info->mti_pill);
+ if (mdt_check_resent(info, mdt_reconstruct_generic, NULL))
+ RETURN(lustre_msg_get_status(req->rq_repmsg));
+
/* Continue to close handle even if we can not pack reply */
if (rc == 0) {
repbody = req_capsule_server_get(&info->mti_pill,
} else
rc = err_serious(rc);
- med = &mdt_info_req(info)->rq_export->exp_mdt_data;
+ med = &req->rq_export->exp_mdt_data;
spin_lock(&med->med_open_lock);
mfd = mdt_handle2mfd(info, &info->mti_epoch->handle);
if (mdt_mfd_closed(mfd)) {
int mdt_done_writing(struct mdt_thread_info *info)
{
struct mdt_body *repbody = NULL;
- struct ptlrpc_request *req = mdt_info_req(info);
struct mdt_export_data *med;
struct mdt_file_data *mfd;
int rc;
if (rc)
RETURN(err_serious(rc));
+ if (mdt_check_resent(info, mdt_reconstruct_generic, NULL))
+ RETURN(lustre_msg_get_status(mdt_info_req(info)->rq_repmsg));
+
med = &info->mti_exp->exp_mdt_data;
spin_lock(&med->med_open_lock);
mfd = mdt_handle2mfd(info, &info->mti_epoch->handle);
RETURN(-ESTALE);
}
- if (!(mfd->mfd_mode == FMODE_EPOCH ||
- mfd->mfd_mode == FMODE_EPOCHLCK)) {
- spin_unlock(&med->med_open_lock);
- DEBUG_REQ(D_WARNING, req, "req should be resent req");
- LASSERT(mfd->mfd_mode == FMODE_SOM);
- LASSERT(lustre_msg_get_flags(req->rq_reqmsg) &
- (MSG_RESENT | MSG_REPLAY));
- /*Since we did not bond this req with open/close,
- *Why we should keep this req as replay req XXX*/
- GOTO(empty_transno, rc);
- }
+ LASSERT(mfd->mfd_mode == FMODE_EPOCH ||
+ mfd->mfd_mode == FMODE_EPOCHLCK);
class_handle_unhash(&mfd->mfd_handle);
list_del_init(&mfd->mfd_list);
spin_unlock(&med->med_open_lock);
info->mti_epoch->flags |= MF_EPOCH_CLOSE;
info->mti_attr.ma_valid = 0;
rc = mdt_mfd_close(info, mfd);
-empty_transno:
mdt_empty_transno(info);
RETURN(rc);
}
mds_steal_ack_locks(req);
}
-static void mdt_reconstruct_generic(struct mdt_thread_info *mti,
- struct mdt_lock_handle *lhc)
+void mdt_reconstruct_generic(struct mdt_thread_info *mti,
+ struct mdt_lock_handle *lhc)
{
struct ptlrpc_request *req = mdt_info_req(mti);
struct mdt_export_data *med = &req->rq_export->exp_mdt_data;
obj = mdt_object_find(mti->mti_env, mdt, mti->mti_rr.rr_fid1);
LASSERT(!IS_ERR(obj));
mo_attr_get(mti->mti_env, mdt_object_child(obj), &mti->mti_attr);
- mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr, mdt_object_fid(obj));
+ mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr,
+ mdt_object_fid(obj));
+ if (mti->mti_epoch && (mti->mti_epoch->flags & MF_EPOCH_OPEN)) {
+ struct mdt_file_data *mfd;
+ struct mdt_body *repbody;
+
+ repbody = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY);
+ repbody->ioepoch = obj->mot_ioepoch;
+ spin_lock(&med->med_open_lock);
+ list_for_each_entry(mfd, &med->med_open_head, mfd_list) {
+ if (mfd->mfd_xid == req->rq_xid)
+ break;
+ }
+ LASSERT(&mfd->mfd_list != &med->med_open_head);
+ spin_unlock(&med->med_open_lock);
+ repbody->handle.cookie = mfd->mfd_handle.h_cookie;
+ }
- /* Don't return OST-specific attributes if we didn't just set them */
-/*
- if (rec->ur_iattr.ia_valid & ATTR_SIZE)
- body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
- if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
- body->valid |= OBD_MD_FLMTIME;
- if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
- body->valid |= OBD_MD_FLATIME;
-*/
mdt_object_put(mti->mti_env, obj);
}
reconstructors[mti->mti_rr.rr_opcode](mti, lhc);
EXIT;
}
-
info->mti_epoch->handle.cookie);
GOTO(out_put, rc = -ESTALE);
}
- if (mfd->mfd_mode != FMODE_SOM) {
- CWARN("mfd_mode %d not right should be in replay\n",
- mfd->mfd_mode);
- LASSERT(lustre_msg_get_flags(req->rq_reqmsg) &
- MSG_REPLAY);
- LASSERT(mfd->mfd_mode == FMODE_EPOCH);
- mfd->mfd_mode = FMODE_SOM;
- }
-
- LASSERT(ma->ma_attr.la_valid & LA_SIZE);
+ LASSERT(mfd->mfd_mode == FMODE_SOM);
LASSERT(!(info->mti_epoch->flags & MF_EPOCH_CLOSE));
class_handle_unhash(&mfd->mfd_handle);
spin_lock_init(&request->rq_lock);
CFS_INIT_LIST_HEAD(&request->rq_list);
CFS_INIT_LIST_HEAD(&request->rq_replay_list);
+ CFS_INIT_LIST_HEAD(&request->rq_mod_list);
CFS_INIT_LIST_HEAD(&request->rq_ctx_chain);
CFS_INIT_LIST_HEAD(&request->rq_set_chain);
cfs_waitq_init(&request->rq_reply_waitq);
if (request->rq_import != NULL) {
if (!locked)
spin_lock(&request->rq_import->imp_lock);
+ list_del_init(&request->rq_mod_list);
list_del_init(&request->rq_replay_list);
if (!locked)
spin_unlock(&request->rq_import->imp_lock);