struct obd_client_handle {
struct lustre_handle och_fh;
struct lu_fid och_fid;
- struct md_open_data *och_mod;
+ struct md_open_data *och_mod;
__u32 och_magic;
int och_flags;
};
* after server commits corresponding transaction. This is
* used for operations that require sequence of multiple
* requests to be replayed. The only example currently is file
- * open/close/dw/setattr. When last request in such a sequence
- * is committed, ->rq_replay is cleared on all requests in the
+ * open/close. When last request in such a sequence is
+ * committed, ->rq_replay is cleared on all requests in the
* sequence.
*/
rq_replay:1,
- /* this is the last request in the sequence. */
- rq_sequence:1,
rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
rq_early:1, rq_must_unlink:1,
__u64 rq_transno;
__u64 rq_xid;
struct list_head rq_replay_list;
- struct list_head rq_mod_list;
struct ptlrpc_cli_ctx *rq_cli_ctx; /* client's half ctx */
struct ptlrpc_svc_ctx *rq_svc_ctx; /* server's half ctx */
struct req_capsule rq_pill;
};
-static inline void ptlrpc_close_replay_seq(struct ptlrpc_request *req)
-{
- spin_lock(&req->rq_lock);
- req->rq_replay = 0;
- req->rq_sequence = 1;
- spin_unlock(&req->rq_lock);
-}
-
static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
{
LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
struct md_open_data {
struct obd_client_handle *mod_och;
- struct list_head mod_replay_list;
+ struct ptlrpc_request *mod_open_req;
+ struct ptlrpc_request *mod_close_req;
};
struct lookup_intent;
return rc;
}
-int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
- struct lustre_handle *fh, __u64 ioepoch)
+int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
+ __u64 ioepoch)
{
struct llu_inode_info *lli = llu_i2info(inode);
struct llu_sb_info *sbi = llu_i2sbi(inode);
op_data.op_ioepoch = ioepoch;
op_data.op_flags |= MF_SOM_CHANGE;
- rc = llu_md_setattr(inode, &op_data, &mod);
+ rc = llu_md_setattr(inode, &op_data, NULL);
RETURN(rc);
}
struct obd_client_handle *och = &fd->fd_mds_och;
struct intnl_stat *st = llu_i2stat(inode);
struct md_op_data op_data = { { 0 } };
- int seq_end = 0, rc;
+ int rc;
ENTRY;
/* clear group lock, if present */
memcpy(&op_data.op_handle, &och->och_fh, sizeof(op_data.op_handle));
rc = md_close(md_exp, &op_data, och->och_mod, &req);
- if (rc != -EAGAIN)
- seq_end = 1;
-
if (rc == -EAGAIN) {
/* We are the last writer, so the MDS has instructed us to get
* the file size and any write cookies, then close again. */
LASSERT(fd->fd_flags & FMODE_WRITE);
- rc = llu_sizeonmds_update(inode, och->och_mod, &och->och_fh,
+ rc = llu_sizeonmds_update(inode, &och->och_fh,
op_data.op_ioepoch);
if (rc) {
CERROR("inode %llu mdc Size-on-MDS update failed: "
(long long)st->st_ino, rc);
}
- if (seq_end)
- ptlrpc_close_replay_seq(req);
md_clear_open_replay_data(md_exp, och);
ptlrpc_req_finished(req);
och->och_fh.cookie = DEAD_HANDLE_MAGIC;
int llu_iop_open(struct pnode *pnode, int flags, mode_t mode);
int llu_md_close(struct obd_export *md_exp, struct inode *inode);
int llu_file_release(struct inode *inode);
-int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
- struct lustre_handle *fh, __u64 ioepoch);
+int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
+ __u64 ioepoch);
int llu_iop_close(struct inode *inode);
_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off);
int llu_vmtruncate(struct inode * inode, loff_t offset, obd_flag obd_flags);
if (rc == -EAGAIN) {
/* MDS has instructed us to obtain Size-on-MDS attribute
* from OSTs and send setattr to back to MDS. */
- rc = llu_sizeonmds_update(inode, mod, &op_data->op_handle,
+ rc = llu_sizeonmds_update(inode, &op_data->op_handle,
op_data->op_ioepoch);
} else if (rc) {
CERROR("inode %llu mdc truncate failed: rc = %d\n",
memcpy(&op_data.op_attr, attr, sizeof(*attr));
/* Open epoch for truncate. */
- if (ia_valid & ATTR_SIZE)
+ if ((llu_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) &&
+ (ia_valid & ATTR_SIZE))
op_data.op_flags = MF_EPOCH_OPEN;
rc = llu_md_setattr(inode, &op_data, &mod);
if (rc)
struct ptlrpc_request *req = NULL;
struct obd_device *obd = class_exp2obd(exp);
int epoch_close = 1;
- int seq_end = 0, rc;
+ int rc;
ENTRY;
if (obd == NULL) {
ll_prepare_close(inode, op_data, och);
epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
rc = md_close(md_exp, op_data, och->och_mod, &req);
- if (rc != -EAGAIN)
- seq_end = 1;
-
if (rc == -EAGAIN) {
/* This close must have the epoch closed. */
LASSERT(exp->exp_connect_flags & OBD_CONNECT_SOM);
LASSERT(epoch_close);
/* MDS has instructed us to obtain Size-on-MDS attribute from
* OSTs and send setattr to back to MDS. */
- rc = ll_sizeonmds_update(inode, och->och_mod,
- &och->och_fh, op_data->op_ioepoch);
+ rc = ll_sizeonmds_update(inode, &och->och_fh,
+ op_data->op_ioepoch);
if (rc) {
CERROR("inode %lu mdc Size-on-MDS update failed: "
"rc = %d\n", inode->i_ino, rc);
S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
ll_queue_done_writing(inode, LLIF_DONE_WRITING);
} else {
- if (seq_end)
- ptlrpc_close_replay_seq(req);
md_clear_open_replay_data(md_exp, och);
/* Free @och if it is not waiting for DONE_WRITING. */
och->och_fh.cookie = DEAD_HANDLE_MAGIC;
return;
}
-int ll_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
- struct lustre_handle *fh, __u64 ioepoch)
+int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
+ __u64 ioepoch)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct md_op_data *op_data;
op_data->op_ioepoch = ioepoch;
op_data->op_flags |= MF_SOM_CHANGE;
- rc = ll_md_setattr(inode, op_data, &mod);
+ rc = ll_md_setattr(inode, op_data, NULL);
EXIT;
out:
if (oa)
ll_pack_inode2opdata(inode, op_data, &och->och_fh);
- rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och->och_mod);
+ rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL);
if (rc == -EAGAIN) {
/* MDS has instructed us to obtain Size-on-MDS attribute from
* OSTs and send setattr to back to MDS. */
- rc = ll_sizeonmds_update(inode, och->och_mod,
- &och->och_fh, op_data->op_ioepoch);
+ rc = ll_sizeonmds_update(inode, &och->och_fh,
+ op_data->op_ioepoch);
} else if (rc) {
CERROR("inode %lu mdc done_writing failed: rc = %d\n",
inode->i_ino, rc);
int ll_md_real_close(struct inode *inode, int flags);
void ll_epoch_close(struct inode *inode, struct md_op_data *op_data,
struct obd_client_handle **och, unsigned long flags);
-int ll_sizeonmds_update(struct inode *inode, struct md_open_data *data,
- struct lustre_handle *fh, __u64 ioepoch);
+int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
+ __u64 ioepoch);
int ll_inode_getattr(struct inode *inode, struct obdo *obdo);
int ll_md_setattr(struct inode *inode, struct md_op_data *op_data,
struct md_open_data **mod);
if (rc == -EAGAIN) {
/* MDS has instructed us to obtain Size-on-MDS attribute
* from OSTs and send setattr to back to MDS. */
- rc = ll_sizeonmds_update(inode, mod, &op_data->op_handle,
+ rc = ll_sizeonmds_update(inode, &op_data->op_handle,
op_data->op_ioepoch);
} else if (rc) {
CERROR("inode %lu mdc truncate failed: rc = %d\n",
int mdc_clear_open_replay_data(struct obd_export *exp,
struct obd_client_handle *och);
-void mdc_commit_delayed(struct ptlrpc_request *req);
+void mdc_commit_open(struct ptlrpc_request *req);
+void mdc_replay_open(struct ptlrpc_request *req);
int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
const void *data, int datalen, int mode, __u32 uid, __u32 gid,
DEBUG_REQ(D_ERROR, req, "Can't allocate "
"md_open_data");
} else {
- CFS_INIT_LIST_HEAD(&(*mod)->mod_replay_list);
- }
- }
- if (mod && *mod) {
- req->rq_cb_data = *mod;
- req->rq_commit_cb = mdc_commit_delayed;
- list_add_tail(&req->rq_mod_list, &(*mod)->mod_replay_list);
- /* This is not the last request in sequence for truncate. */
- if (op_data->op_flags & MF_EPOCH_OPEN)
req->rq_replay = 1;
- else
- req->rq_sequence = 1;
+ req->rq_cb_data = *mod;
+ (*mod)->mod_open_req = req;
+ req->rq_commit_cb = mdc_commit_open;
+ }
}
rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL);
+
+ /* Save the obtained info in the original RPC for the replay case. */
+ if (rc == 0 && (op_data->op_flags & MF_EPOCH_OPEN)) {
+ struct mdt_epoch *epoch;
+ struct mdt_body *body;
+
+ epoch = req_capsule_client_get(&req->rq_pill, &RMF_MDT_EPOCH);
+ body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+ LASSERT(epoch != NULL);
+ LASSERT(body != NULL);
+ epoch->handle = body->handle;
+ epoch->ioepoch = body->ioepoch;
+ req->rq_replay_cb = mdc_replay_open;
+ }
*request = req;
if (rc == -ERESTARTSYS)
rc = 0;
RETURN(0);
}
-static void mdc_replay_open(struct ptlrpc_request *req)
+/**
+ * Handles both OPEN and SETATTR RPCs for OPEN-CLOSE and SETATTR-DONE_WRITING
+ * RPC chains.
+ */
+void mdc_replay_open(struct ptlrpc_request *req)
{
struct md_open_data *mod = req->rq_cb_data;
- struct ptlrpc_request *cur, *tmp;
+ struct ptlrpc_request *close_req;
struct obd_client_handle *och;
struct lustre_handle old;
struct mdt_body *body;
old = *file_fh;
*file_fh = body->handle;
}
- list_for_each_entry_safe(cur, tmp, &mod->mod_replay_list, rq_mod_list) {
- int opc = lustre_msg_get_opc(cur->rq_reqmsg);
- struct mdt_epoch *epoch = NULL;
+ close_req = mod->mod_close_req;
+ if (close_req != NULL) {
+ __u32 opc = lustre_msg_get_opc(close_req->rq_reqmsg);
+ struct mdt_epoch *epoch;
- if (opc == MDS_CLOSE || opc == MDS_DONE_WRITING) {
- epoch = req_capsule_client_get(&cur->rq_pill,
- &RMF_MDT_EPOCH);
- LASSERT(epoch);
- DEBUG_REQ(D_HA, cur, "updating %s body with new fh",
- opc == MDS_CLOSE ? "CLOSE" : "DONE_WRITING");
- } else if (opc == MDS_REINT) {
- struct mdt_rec_setattr *rec;
-
- /* Check this is REINT_SETATTR. */
- rec = req_capsule_client_get(&cur->rq_pill,
- &RMF_REC_REINT);
- LASSERT(rec && rec->sa_opcode == REINT_SETATTR);
-
- epoch = req_capsule_client_get(&cur->rq_pill,
+ LASSERT(opc == MDS_CLOSE || opc == MDS_DONE_WRITING);
+ epoch = req_capsule_client_get(&close_req->rq_pill,
&RMF_MDT_EPOCH);
- LASSERT(epoch);
- DEBUG_REQ(D_HA, cur, "updating REINT_SETATTR body "
- "with new fh");
- }
- if (epoch) {
- if (och != NULL)
- LASSERT(!memcmp(&old, &epoch->handle,
- sizeof(old)));
- epoch->handle = body->handle;
- }
+ LASSERT(epoch);
+
+ if (och != NULL)
+ LASSERT(!memcmp(&old, &epoch->handle, sizeof(old)));
+ DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
+ epoch->handle = body->handle;
}
EXIT;
}
-void mdc_commit_delayed(struct ptlrpc_request *req)
+void mdc_commit_open(struct ptlrpc_request *req)
{
struct md_open_data *mod = req->rq_cb_data;
- struct ptlrpc_request *cur, *tmp;
-
- DEBUG_REQ(D_HA, req, "req committed");
-
if (mod == NULL)
return;
- req->rq_cb_data = NULL;
- req->rq_commit_cb = NULL;
- list_del_init(&req->rq_mod_list);
- if (req->rq_sequence) {
- list_for_each_entry_safe(cur, tmp, &mod->mod_replay_list,
- rq_mod_list) {
- LASSERT(cur != LP_POISON);
- LASSERT(cur->rq_type != LI_POISON);
- DEBUG_REQ(D_HA, cur, "req balanced");
- LASSERT(cur->rq_transno != 0);
- LASSERT(cur->rq_import == req->rq_import);
-
- /* We no longer want to preserve this for transno-
- * unconditional replay. */
- spin_lock(&cur->rq_lock);
- cur->rq_replay = 0;
- spin_unlock(&cur->rq_lock);
- }
- }
+ if (mod->mod_close_req != NULL)
+ mod->mod_close_req->rq_cb_data = NULL;
- if (list_empty(&mod->mod_replay_list)) {
- if (mod->mod_och != NULL)
- mod->mod_och->och_mod = NULL;
+ if (mod->mod_och != NULL)
+ mod->mod_och->och_mod = NULL;
- OBD_FREE_PTR(mod);
- }
+ OBD_FREE(mod, sizeof(*mod));
+ req->rq_cb_data = NULL;
}
int mdc_set_open_replay_data(struct obd_export *exp,
"Can't allocate md_open_data");
RETURN(0);
}
- CFS_INIT_LIST_HEAD(&mod->mod_replay_list);
spin_lock(&open_req->rq_lock);
och->och_mod = mod;
mod->mod_och = och;
+ mod->mod_open_req = open_req;
open_req->rq_cb_data = mod;
- list_add_tail(&open_req->rq_mod_list, &mod->mod_replay_list);
- open_req->rq_commit_cb = mdc_commit_delayed;
+ open_req->rq_commit_cb = mdc_commit_open;
spin_unlock(&open_req->rq_lock);
}
ENTRY;
/*
- * Don't free the structure now (it happens in mdc_commit_delayed(),
- * after the last request is removed from its replay list),
+ * Don't free the structure now (it happens in mdc_commit_open(), after
+ * we're sure we won't need to fix up the close request in the future),
* but make sure that replay doesn't poke at the och, which is about to
* be freed.
*/
ptlrpc_at_set_req_timeout(req);
/* Ensure that this close's handle is fixed up during replay. */
- if (likely(mod != NULL))
- list_add_tail(&req->rq_mod_list, &mod->mod_replay_list);
- else
- CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
+ if (likely(mod != NULL)) {
+ LASSERTF(mod->mod_open_req->rq_type != LI_POISON,
+ "POISONED open %p!\n", mod->mod_open_req);
+
+ mod->mod_close_req = req;
+ DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
+ /* We no longer want to preserve this open for replay even
+ * though the open was committed. b=3632, b=3633 */
+ spin_lock(&mod->mod_open_req->rq_lock);
+ mod->mod_open_req->rq_replay = 0;
+ spin_unlock(&mod->mod_open_req->rq_lock);
+ } else {
+ CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
+ }
mdc_close_pack(req, op_data);
ptlrpc_request_set_replen(req);
- req->rq_commit_cb = mdc_commit_delayed;
- req->rq_replay = 1;
- LASSERT(req->rq_cb_data == NULL);
- req->rq_cb_data = mod;
-
mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
rc = ptlrpc_queue_wait(req);
mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
rc = -EPROTO;
}
- EXIT;
- if (rc != 0 && rc != -EAGAIN && req && req->rq_commit_cb)
- req->rq_commit_cb(req);
-
*request = req;
- return rc;
+ RETURN(rc);
}
int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
RETURN(rc);
}
- /* XXX: add DONE_WRITING request to och -- when Size-on-MDS
- * recovery will be ready. */
+ if (mod != NULL) {
+ LASSERTF(mod->mod_open_req->rq_type != LI_POISON,
+ "POISONED setattr %p!\n", mod->mod_open_req);
+
+ mod->mod_close_req = req;
+ DEBUG_REQ(D_HA, mod->mod_open_req, "matched setattr");
+ /* We no longer want to preserve this open for replay even
+ * though the open was committed. b=3632, b=3633 */
+ spin_lock(&mod->mod_open_req->rq_lock);
+ mod->mod_open_req->rq_replay = 0;
+ spin_unlock(&mod->mod_open_req->rq_lock);
+ }
+
mdc_close_pack(req, op_data);
ptlrpc_request_set_replen(req);
- req->rq_replay = 1;
- req->rq_cb_data = mod;
- req->rq_commit_cb = mdc_commit_delayed;
- if (likely(mod != NULL))
- list_add_tail(&req->rq_mod_list, &mod->mod_replay_list);
- else
- CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
rc = ptlrpc_queue_wait(req);
mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
- /* Close the open replay sequence if an error occured or no SOM
- * attribute update is needed. */
- if (rc != -EAGAIN)
- ptlrpc_close_replay_seq(req);
-
- if (rc && rc != -EAGAIN && req->rq_commit_cb)
- req->rq_commit_cb(req);
-
ptlrpc_req_finished(req);
RETURN(rc);
}
OBD_ALLOC_PTR(handle->och_mod);
if (handle->och_mod == NULL) {
- DEBUG_REQ(D_ERROR, req, "can't allocate mdc_open_data");
+ DEBUG_REQ(D_ERROR, req, "can't allocate md_open_data");
GOTO(err_out, rc = -ENOMEM);
}
- /* will be dropped by unpin */
- CFS_INIT_LIST_HEAD(&handle->och_mod->mod_replay_list);
- list_add_tail(&req->rq_mod_list, &handle->och_mod->mod_replay_list);
+ handle->och_mod->mod_open_req = req; /* will be dropped by unpin */
RETURN(0);
CERROR("Unpin failed: %d\n", rc);
ptlrpc_req_finished(req);
-
- LASSERT(!list_empty(&handle->och_mod->mod_replay_list));
- req = list_entry(handle->och_mod->mod_replay_list.next,
- typeof(*req), rq_mod_list);
- list_del_init(&req->rq_mod_list);
- ptlrpc_req_finished(req);
- LASSERT(list_empty(&handle->och_mod->mod_replay_list));
+ ptlrpc_req_finished(handle->och_mod->mod_open_req);
OBD_FREE(handle->och_mod, sizeof(*handle->och_mod));
RETURN(rc);
if (mfd == NULL) {
spin_unlock(&med->med_open_lock);
CDEBUG(D_INODE, "no handle for done write: fid = "DFID
- ": cookie = "LPX64"\n", PFID(info->mti_rr.rr_fid1),
- info->mti_epoch->handle.cookie);
+ ": cookie = "LPX64" ioepoch = "LPU64"\n",
+ PFID(info->mti_rr.rr_fid1),
+ info->mti_epoch->handle.cookie,
+ info->mti_epoch->ioepoch);
RETURN(-ESTALE);
}
CFS_INIT_LIST_HEAD(&request->rq_list);
CFS_INIT_LIST_HEAD(&request->rq_timed_list);
CFS_INIT_LIST_HEAD(&request->rq_replay_list);
- CFS_INIT_LIST_HEAD(&request->rq_mod_list);
CFS_INIT_LIST_HEAD(&request->rq_ctx_chain);
CFS_INIT_LIST_HEAD(&request->rq_set_chain);
CFS_INIT_LIST_HEAD(&request->rq_history_list);
if (request->rq_import != NULL) {
if (!locked)
spin_lock(&request->rq_import->imp_lock);
- list_del_init(&request->rq_mod_list);
list_del_init(&request->rq_replay_list);
if (!locked)
spin_unlock(&request->rq_import->imp_lock);