From 995c1fbbf92a8776f3a516fea86d0acbef6887e5 Mon Sep 17 00:00:00 2001 From: vitaly Date: Wed, 19 Sep 2007 19:36:26 +0000 Subject: [PATCH] Branch HEAD b=13581 i=tappro i=huanghua Replay Size-on-MDS related RPCs properly. --- lustre/cmm/mdc_object.c | 2 +- lustre/include/lustre_lib.h | 5 +- lustre/include/lustre_net.h | 15 ++- lustre/include/obd.h | 12 ++- lustre/include/obd_class.h | 13 +-- lustre/liblustre/file.c | 23 +++-- lustre/liblustre/llite_lib.h | 7 +- lustre/liblustre/super.c | 43 +++++---- lustre/llite/dir.c | 2 +- lustre/llite/file.c | 16 +-- lustre/llite/llite_close.c | 18 ++-- lustre/llite/llite_internal.h | 7 +- lustre/llite/llite_lib.c | 31 +++--- lustre/lmv/lmv_obd.c | 15 +-- lustre/mdc/mdc_internal.h | 9 +- lustre/mdc/mdc_reint.c | 28 +++++- lustre/mdc/mdc_request.c | 219 +++++++++++++++++++++++------------------- lustre/mdt/mdt_handler.c | 20 +--- lustre/mdt/mdt_internal.h | 22 +++++ lustre/mdt/mdt_open.c | 24 ++--- lustre/mdt/mdt_recovery.c | 32 +++--- lustre/mdt/mdt_reint.c | 11 +-- lustre/ptlrpc/client.c | 2 + 23 files changed, 335 insertions(+), 241 deletions(-) diff --git a/lustre/cmm/mdc_object.c b/lustre/cmm/mdc_object.c index 1ac12c9..fde11da 100644 --- a/lustre/cmm/mdc_object.c +++ b/lustre/cmm/mdc_object.c @@ -290,7 +290,7 @@ static int mdc_attr_set(const struct lu_env *env, struct md_object *mo, } rc = md_setattr(mc->mc_desc.cl_exp, &mci->mci_opdata, - NULL, 0, NULL, 0, &mci->mci_req); + NULL, 0, NULL, 0, &mci->mci_req, NULL); ptlrpc_req_finished(mci->mci_req); diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 206a1bf..85cd2cf 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -90,12 +90,11 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg); struct client_obd *client_conn2cli(struct lustre_handle *conn); -struct mdc_open_data; +struct md_open_data; struct obd_client_handle { struct lustre_handle och_fh; struct lu_fid och_fid; - struct llog_cookie och_cookie; - struct mdc_open_data *och_mod; + struct md_open_data *och_mod; __u32 och_magic; int och_flags; }; diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index d5c5553..b519770 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -297,11 +297,13 @@ struct ptlrpc_request { * after server commits corresponding transaction. This is * used for operations that require sequence of multiple * requests to be replayed. The only example currently is file - * open/close. When last request in such a sequence is - * committed, ->rq_replay is cleared on all requests in the + * open/close/dw/setattr. When last request in such a sequence + * is committed, ->rq_replay is cleared on all requests in the * sequence. */ rq_replay:1, + /* this is the last request in the sequence. */ + rq_sequence:1, rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1; enum rq_phase rq_phase; /* one of RQ_PHASE_* */ @@ -323,6 +325,7 @@ struct ptlrpc_request { __u64 rq_transno; __u64 rq_xid; struct list_head rq_replay_list; + struct list_head rq_mod_list; struct ptlrpc_cli_ctx *rq_cli_ctx; /* client's half ctx */ struct ptlrpc_svc_ctx *rq_svc_ctx; /* server's half ctx */ @@ -402,6 +405,14 @@ struct ptlrpc_request { struct lu_context rq_session; }; +static inline void ptlrpc_close_replay_seq(struct ptlrpc_request *req) +{ + spin_lock(&req->rq_lock); + req->rq_replay = 0; + req->rq_sequence = 1; + spin_unlock(&req->rq_lock); +} + static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index) { LASSERT(index < sizeof(req->rq_req_swab_mask) * 8); diff --git a/lustre/include/obd.h b/lustre/include/obd.h index d16b2ea..8c1489f 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1214,18 +1214,23 @@ struct lustre_md { struct obd_capa *oss_capa; }; +struct md_open_data { + struct obd_client_handle *mod_och; + struct list_head mod_replay_list; +}; + struct md_ops { int (*m_getstatus)(struct obd_export *, struct lu_fid *, struct obd_capa **); int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *, ldlm_iterator_t, void *); int (*m_close)(struct obd_export *, struct md_op_data *, - struct obd_client_handle *, struct ptlrpc_request **); + struct md_open_data *, struct ptlrpc_request **); int (*m_create)(struct obd_export *, struct md_op_data *, const void *, int, int, __u32, __u32, __u32, __u64, struct ptlrpc_request **); int (*m_done_writing)(struct obd_export *, struct md_op_data *, - struct obd_client_handle *); + struct md_open_data *); int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *, struct lookup_intent *, struct md_op_data *, struct lustre_handle *, void *, int, int); @@ -1248,7 +1253,8 @@ struct md_ops { const struct lu_fid *, struct ptlrpc_request **); int (*m_setattr)(struct obd_export *, struct md_op_data *, void *, - int , void *, int, struct ptlrpc_request **); + int , void *, int, struct ptlrpc_request **, + struct md_open_data **mod); int (*m_sync)(struct obd_export *, const struct lu_fid *, struct obd_capa *, struct ptlrpc_request **); int (*m_readpage)(struct obd_export *, const struct lu_fid *, diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 385df8f..64df844 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -1677,14 +1677,14 @@ static inline int md_change_cbdata(struct obd_export *exp, } static inline int md_close(struct obd_export *exp, struct md_op_data *op_data, - struct obd_client_handle *och, + struct md_open_data *mod, struct ptlrpc_request **request) { int rc; ENTRY; EXP_CHECK_MD_OP(exp, close); EXP_MD_COUNTER_INCREMENT(exp, close); - rc = MDP(exp->exp_obd, close)(exp, op_data, och, request); + rc = MDP(exp->exp_obd, close)(exp, op_data, mod, request); RETURN(rc); } @@ -1704,13 +1704,13 @@ static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, static inline int md_done_writing(struct obd_export *exp, struct md_op_data *op_data, - struct obd_client_handle *och) + struct md_open_data *mod) { int rc; ENTRY; EXP_CHECK_MD_OP(exp, done_writing); EXP_MD_COUNTER_INCREMENT(exp, done_writing); - rc = MDP(exp->exp_obd, done_writing)(exp, op_data, och); + rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod); RETURN(rc); } @@ -1802,14 +1802,15 @@ static inline int md_is_subdir(struct obd_export *exp, static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, void *ea, int ealen, void *ea2, int ea2len, - struct ptlrpc_request **request) + struct ptlrpc_request **request, + struct md_open_data **mod) { int rc; ENTRY; EXP_CHECK_MD_OP(exp, setattr); EXP_MD_COUNTER_INCREMENT(exp, setattr); rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen, - ea2, ea2len, request); + ea2, ea2len, request, mod); RETURN(rc); } diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 2ee4933..153c64c 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -318,8 +318,8 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) return rc; } -int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, - __u64 ioepoch) +int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod, + struct lustre_handle *fh, __u64 ioepoch) { struct llu_inode_info *lli = llu_i2info(inode); struct llu_sb_info *sbi = llu_i2sbi(inode); @@ -332,7 +332,11 @@ int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, LASSERT(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM); rc = llu_inode_getattr(inode, &oa); - if (rc) { + if (rc == -ENOENT) { + oa.o_valid = 0; + CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n", + lli->lli_smd->lsm_object_id); + } else if (rc) { CERROR("inode_getattr failed (%d): unable to send a " "Size-on-MDS attribute update for inode %llu/%lu\n", rc, (long long)llu_i2stat(inode)->st_ino, @@ -345,7 +349,7 @@ int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, op_data.op_ioepoch = ioepoch; op_data.op_flags |= MF_SOM_CHANGE; - rc = llu_md_setattr(inode, &op_data); + rc = llu_md_setattr(inode, &op_data, &mod); RETURN(rc); } @@ -357,7 +361,7 @@ int llu_md_close(struct obd_export *md_exp, struct inode *inode) struct obd_client_handle *och = &fd->fd_mds_och; struct intnl_stat *st = llu_i2stat(inode); struct md_op_data op_data = { { 0 } }; - int rc; + int seq_end = 0, rc; ENTRY; /* clear group lock, if present */ @@ -401,12 +405,15 @@ int llu_md_close(struct obd_export *md_exp, struct inode *inode) op_data.op_ioepoch = lli->lli_ioepoch; memcpy(&op_data.op_handle, &och->och_fh, sizeof(op_data.op_handle)); - rc = md_close(md_exp, &op_data, och, &req); + rc = md_close(md_exp, &op_data, och->och_mod, &req); + if (rc != -EAGAIN) + seq_end = 1; + if (rc == -EAGAIN) { /* We are the last writer, so the MDS has instructed us to get * the file size and any write cookies, then close again. */ LASSERT(fd->fd_flags & FMODE_WRITE); - rc = llu_sizeonmds_update(inode, &och->och_fh, + rc = llu_sizeonmds_update(inode, och->och_mod, &och->och_fh, op_data.op_ioepoch); if (rc) { CERROR("inode %llu mdc Size-on-MDS update failed: " @@ -423,6 +430,8 @@ int llu_md_close(struct obd_export *md_exp, struct inode *inode) (long long)st->st_ino, rc); } + if (seq_end) + ptlrpc_close_replay_seq(req); md_clear_open_replay_data(md_exp, och); ptlrpc_req_finished(req); och->och_fh.cookie = DEAD_HANDLE_MAGIC; diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index f242b45..3243fc2 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -193,7 +193,8 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); int ll_it_open_error(int phase, struct lookup_intent *it); struct inode *llu_iget(struct filesys *fs, struct lustre_md *md); int llu_inode_getattr(struct inode *inode, struct obdo *obdo); -int llu_md_setattr(struct inode *inode, struct md_op_data *op_data); +int llu_md_setattr(struct inode *inode, struct md_op_data *op_data, + struct md_open_data **mod); int llu_setattr_raw(struct inode *inode, struct iattr *attr); extern struct fssw_ops llu_fssw_ops; @@ -208,8 +209,8 @@ int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it); int llu_iop_open(struct pnode *pnode, int flags, mode_t mode); int llu_md_close(struct obd_export *md_exp, struct inode *inode); int llu_file_release(struct inode *inode); -int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, - __u64 ioepoch); +int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod, + struct lustre_handle *fh, __u64 ioepoch); int llu_iop_close(struct inode *inode); _SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off); int llu_vmtruncate(struct inode * inode, loff_t offset, obd_flag obd_flags); diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 6c9dc66..0173a1d 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -594,7 +594,8 @@ static int inode_setattr(struct inode * inode, struct iattr * attr) return error; } -int llu_md_setattr(struct inode *inode, struct md_op_data *op_data) +int llu_md_setattr(struct inode *inode, struct md_op_data *op_data, + struct md_open_data **mod) { struct lustre_md md; struct llu_sb_info *sbi = llu_i2sbi(inode); @@ -603,7 +604,8 @@ int llu_md_setattr(struct inode *inode, struct md_op_data *op_data) ENTRY; llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY); - rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &request); + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, + 0, &request, mod); if (rc) { ptlrpc_req_finished(request); @@ -633,7 +635,8 @@ int llu_md_setattr(struct inode *inode, struct md_op_data *op_data) /* Close IO epoch and send Size-on-MDS attribute update. */ static int llu_setattr_done_writing(struct inode *inode, - struct md_op_data *op_data) + struct md_op_data *op_data, + struct md_open_data *mod) { struct llu_inode_info *lli = llu_i2info(inode); struct intnl_stat *st = llu_i2stat(inode); @@ -649,11 +652,11 @@ static int llu_setattr_done_writing(struct inode *inode, op_data->op_ioepoch, PFID(&lli->lli_fid)); op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE; - rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, NULL); + rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, mod); if (rc == -EAGAIN) { /* MDS has instructed us to obtain Size-on-MDS attribute * from OSTs and send setattr to back to MDS. */ - rc = llu_sizeonmds_update(inode, &op_data->op_handle, + rc = llu_sizeonmds_update(inode, mod, &op_data->op_handle, op_data->op_ioepoch); } else if (rc) { CERROR("inode %llu mdc truncate failed: rc = %d\n", @@ -682,7 +685,8 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) struct intnl_stat *st = llu_i2stat(inode); int ia_valid = attr->ia_valid; struct md_op_data op_data = { { 0 } }; - int rc = 0; + struct md_open_data *mod = NULL; + int rc = 0, rc1 = 0; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino); @@ -741,15 +745,18 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) /* Open epoch for truncate. */ if (ia_valid & ATTR_SIZE) op_data.op_flags = MF_EPOCH_OPEN; - rc = llu_md_setattr(inode, &op_data); + rc = llu_md_setattr(inode, &op_data, &mod); if (rc) RETURN(rc); + if (op_data.op_ioepoch) + CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for " + "truncate\n", op_data.op_ioepoch, + PFID(&llu_i2info(inode)->lli_fid)); + if (!lsm || !S_ISREG(st->st_mode)) { CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); - if (op_data.op_ioepoch) - rc = llu_setattr_done_writing(inode, &op_data); - RETURN(rc); + GOTO(out, rc); } } else { /* The OST doesn't check permissions, but the alternative is @@ -810,10 +817,9 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) &lockh, flags); if (rc != ELDLM_OK) { if (rc > 0) - RETURN(-ENOLCK); - RETURN(rc); + GOTO(out, rc = -ENOLCK); + GOTO(out, rc); } - rc = llu_vmtruncate(inode, attr->ia_size, obd_flags); /* unlock now as we don't mind others file lockers racing with @@ -824,9 +830,6 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) if (!rc) rc = err; } - - if (op_data.op_ioepoch) - rc = llu_setattr_done_writing(inode, &op_data); } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) { struct obd_info oinfo = { { { 0 } } }; struct obdo oa; @@ -846,7 +849,11 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) if (rc) CERROR("obd_setattr_async fails: rc=%d\n", rc); } - RETURN(rc); + EXIT; +out: + if (op_data.op_ioepoch) + rc1 = llu_setattr_done_writing(inode, &op_data, mod); + return rc ? rc : rc1; } /* here we simply act as a thin layer to glue it with @@ -1671,7 +1678,7 @@ static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg) /* swabbing is done in lov_setstripe() on server side */ rc = md_setattr(sbi->ll_md_exp, &op_data, &lum, - sizeof(lum), NULL, 0, &request); + sizeof(lum), NULL, 0, &request, NULL); if (rc) { ptlrpc_req_finished(request); if (rc != -EPERM && rc != -EACCES) diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 4c463d1..7a2cc4b 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -553,7 +553,7 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump) /* swabbing is done in lov_setstripe() on server side */ rc = md_setattr(sbi->ll_md_exp, op_data, lump, sizeof(*lump), - NULL, 0, &req); + NULL, 0, &req, NULL); ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); if (rc) { diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 62b0bd0..cf82da1 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -95,7 +95,7 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, struct ptlrpc_request *req = NULL; struct obd_device *obd = class_exp2obd(exp); int epoch_close = 1; - int rc; + int seq_end = 0, rc; ENTRY; if (obd == NULL) { @@ -122,7 +122,9 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, ll_prepare_close(inode, op_data, och); epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE); - rc = md_close(md_exp, op_data, och, &req); + rc = md_close(md_exp, op_data, och->och_mod, &req); + if (rc != -EAGAIN) + seq_end = 1; if (rc == -EAGAIN) { /* This close must have the epoch closed. */ @@ -130,8 +132,8 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, LASSERT(epoch_close); /* MDS has instructed us to obtain Size-on-MDS attribute from * OSTs and send setattr to back to MDS. */ - rc = ll_sizeonmds_update(inode, &och->och_fh, - op_data->op_ioepoch); + rc = ll_sizeonmds_update(inode, och->och_mod, + &och->och_fh, op_data->op_ioepoch); if (rc) { CERROR("inode %lu mdc Size-on-MDS update failed: " "rc = %d\n", inode->i_ino, rc); @@ -150,7 +152,6 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp, inode->i_ino, rc); } - ptlrpc_req_finished(req); /* This is close request */ EXIT; out: @@ -158,12 +159,15 @@ out: S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) { ll_queue_done_writing(inode, LLIF_DONE_WRITING); } else { + if (seq_end) + ptlrpc_close_replay_seq(req); md_clear_open_replay_data(md_exp, och); /* Free @och if it is not waiting for DONE_WRITING. */ och->och_fh.cookie = DEAD_HANDLE_MAGIC; OBD_FREE_PTR(och); } - + if (req) /* This is close request */ + ptlrpc_req_finished(req); return rc; } diff --git a/lustre/llite/llite_close.c b/lustre/llite/llite_close.c index 326adf4..76e4b4a 100644 --- a/lustre/llite/llite_close.c +++ b/lustre/llite/llite_close.c @@ -184,8 +184,8 @@ out: return; } -int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, - __u64 ioepoch) +int ll_sizeonmds_update(struct inode *inode, struct md_open_data *mod, + struct lustre_handle *fh, __u64 ioepoch) { struct ll_inode_info *lli = ll_i2info(inode); struct md_op_data *op_data; @@ -207,7 +207,11 @@ int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, RETURN(-ENOMEM); } rc = ll_inode_getattr(inode, oa); - if (rc) { + if (rc == -ENOENT) { + oa->o_valid = 0; + CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n", + lli->lli_smd->lsm_object_id); + } else if (rc) { CERROR("inode_getattr failed (%d): unable to send a " "Size-on-MDS attribute update for inode %lu/%u\n", rc, inode->i_ino, inode->i_generation); @@ -221,7 +225,7 @@ int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, op_data->op_ioepoch = ioepoch; op_data->op_flags |= MF_SOM_CHANGE; - rc = ll_md_setattr(inode, op_data); + rc = ll_md_setattr(inode, op_data, &mod); EXIT; out: if (oa) @@ -255,12 +259,12 @@ static void ll_done_writing(struct inode *inode) ll_pack_inode2opdata(inode, op_data, &och->och_fh); - rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och); + rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och->och_mod); if (rc == -EAGAIN) { /* MDS has instructed us to obtain Size-on-MDS attribute from * OSTs and send setattr to back to MDS. */ - rc = ll_sizeonmds_update(inode, &och->och_fh, - op_data->op_ioepoch); + rc = ll_sizeonmds_update(inode, och->och_mod, + &och->och_fh, op_data->op_ioepoch); } else if (rc) { CERROR("inode %lu mdc done_writing failed: rc = %d\n", inode->i_ino, rc); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 2357fed..b61b3db 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -525,10 +525,11 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode, int ll_md_real_close(struct inode *inode, int flags); void ll_epoch_close(struct inode *inode, struct md_op_data *op_data, struct obd_client_handle **och, unsigned long flags); -int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, - __u64 ioepoch); +int ll_sizeonmds_update(struct inode *inode, struct md_open_data *data, + struct lustre_handle *fh, __u64 ioepoch); int ll_inode_getattr(struct inode *inode, struct obdo *obdo); -int ll_md_setattr(struct inode *inode, struct md_op_data *op_data); +int ll_md_setattr(struct inode *inode, struct md_op_data *op_data, + struct md_open_data **mod); void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, struct lustre_handle *fh); extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index efdb244..8107969 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1127,7 +1127,8 @@ void ll_clear_inode(struct inode *inode) EXIT; } -int ll_md_setattr(struct inode *inode, struct md_op_data *op_data) +int ll_md_setattr(struct inode *inode, struct md_op_data *op_data, + struct md_open_data **mod) { struct lustre_md md; struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -1141,7 +1142,7 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data) RETURN(PTR_ERR(op_data)); rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, - &request); + &request, mod); if (rc) { ptlrpc_req_finished(request); if (rc == -ENOENT) { @@ -1172,7 +1173,7 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data) rc = inode_setattr(inode, &op_data->op_attr); /* Extract epoch data if obtained. */ - memcpy(&op_data->op_handle, &md.body->handle, sizeof(op_data->op_handle)); + op_data->op_handle = md.body->handle; op_data->op_ioepoch = md.body->ioepoch; ll_update_inode(inode, &md); @@ -1183,7 +1184,8 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data) /* Close IO epoch and send Size-on-MDS attribute update. */ static int ll_setattr_done_writing(struct inode *inode, - struct md_op_data *op_data) + struct md_op_data *op_data, + struct md_open_data *mod) { struct ll_inode_info *lli = ll_i2info(inode); int rc = 0; @@ -1197,12 +1199,11 @@ static int ll_setattr_done_writing(struct inode *inode, op_data->op_ioepoch, PFID(&lli->lli_fid)); op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE; - /* XXX: pass och here for the recovery purpose. */ - rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL); + rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod); if (rc == -EAGAIN) { /* MDS has instructed us to obtain Size-on-MDS attribute * from OSTs and send setattr to back to MDS. */ - rc = ll_sizeonmds_update(inode, &op_data->op_handle, + rc = ll_sizeonmds_update(inode, mod, &op_data->op_handle, op_data->op_ioepoch); } else if (rc) { CERROR("inode %lu mdc truncate failed: rc = %d\n", @@ -1230,6 +1231,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) struct lov_stripe_md *lsm = lli->lli_smd; struct ll_sb_info *sbi = ll_i2sbi(inode); struct md_op_data *op_data = NULL; + struct md_open_data *mod = NULL; int ia_valid = attr->ia_valid; int rc = 0, rc1 = 0; ENTRY; @@ -1300,8 +1302,8 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) /* Open epoch for truncate. */ if (ia_valid & ATTR_SIZE) op_data->op_flags = MF_EPOCH_OPEN; - - rc = ll_md_setattr(inode, op_data); + + rc = ll_md_setattr(inode, op_data, &mod); if (rc) GOTO(out, rc); @@ -1310,8 +1312,8 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) "truncate\n", op_data->op_ioepoch, PFID(&lli->lli_fid)); if (!lsm || !S_ISREG(inode->i_mode)) { - CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); - GOTO(out, rc = 0); + CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); + GOTO(out, rc = 0); } /* We really need to get our PW lock before we change inode->i_size. @@ -1400,9 +1402,8 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) EXIT; out: if (op_data) { - if (op_data->op_ioepoch) { - rc1 = ll_setattr_done_writing(inode, op_data); - } + if (op_data->op_ioepoch) + rc1 = ll_setattr_done_writing(inode, op_data, mod); ll_finish_md_op_data(op_data); } return rc ? rc : rc1; @@ -1838,7 +1839,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; rc = md_setattr(sbi->ll_md_exp, op_data, - NULL, 0, NULL, 0, &req); + NULL, 0, NULL, 0, &req, NULL); ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); if (rc || lsm == NULL) { diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index dbebe38..7a7da61 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -1248,7 +1248,7 @@ static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid, static int lmv_close(struct obd_export *exp, struct md_op_data *op_data, - struct obd_client_handle *och, + struct md_open_data *mod, struct ptlrpc_request **request) { struct obd_device *obd = exp->exp_obd; @@ -1266,7 +1266,7 @@ static int lmv_close(struct obd_export *exp, RETURN(PTR_ERR(tgt_exp)); CDEBUG(D_OTHER, "CLOSE "DFID"\n", PFID(&op_data->op_fid1)); - rc = md_close(tgt_exp, op_data, och, request); + rc = md_close(tgt_exp, op_data, mod, request); RETURN(rc); } @@ -1411,7 +1411,7 @@ repeat: static int lmv_done_writing(struct obd_export *exp, struct md_op_data *op_data, - struct obd_client_handle *och) + struct md_open_data *mod) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -1427,7 +1427,7 @@ static int lmv_done_writing(struct obd_export *exp, if (IS_ERR(tgt_exp)) RETURN(PTR_ERR(tgt_exp)); - rc = md_done_writing(tgt_exp, op_data, och); + rc = md_done_writing(tgt_exp, op_data, mod); RETURN(rc); } @@ -2025,7 +2025,8 @@ request: static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, void *ea, int ealen, void *ea2, int ea2len, - struct ptlrpc_request **request) + struct ptlrpc_request **request, + struct md_open_data **mod) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; @@ -2057,7 +2058,7 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, } rc = md_setattr(tgt_exp, op_data, ea, ealen, - ea2, ea2len, &req); + ea2, ea2len, &req, mod); if (lu_fid_eq(&obj->lo_fid, &obj->lo_inodes[i].li_fid)) { /* @@ -2079,7 +2080,7 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data, RETURN(PTR_ERR(tgt_exp)); rc = md_setattr(tgt_exp, op_data, ea, ealen, ea2, - ea2len, request); + ea2len, request, mod); } RETURN(rc); } diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index a23511d..723a37e 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -64,12 +64,6 @@ void mdc_close_pack(struct ptlrpc_request *req, int offset, void mdc_exit_request(struct client_obd *cli); void mdc_enter_request(struct client_obd *cli); -struct mdc_open_data { - struct obd_client_handle *mod_och; - struct ptlrpc_request *mod_open_req; - struct ptlrpc_request *mod_close_req; -}; - static inline int client_is_remote(struct obd_export *exp) { struct obd_import *imp = class_exp2cliimp(exp); @@ -139,6 +133,7 @@ int mdc_set_open_replay_data(struct obd_export *exp, int mdc_clear_open_replay_data(struct obd_export *exp, struct obd_client_handle *och); +void mdc_commit_delayed(struct ptlrpc_request *req); int mdc_create(struct obd_export *exp, struct md_op_data *op_data, const void *data, int datalen, int mode, __u32 uid, __u32 gid, @@ -150,7 +145,7 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request); int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, void *ea, int ealen, void *ea2, int ea2len, - struct ptlrpc_request **request); + struct ptlrpc_request **request, struct md_open_data **mod); int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, struct ptlrpc_request **request); int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid, diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 712a70b..0021b22 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -98,7 +98,7 @@ int mdc_resource_get_unused(struct obd_export *exp, struct lu_fid *fid, * go to the setattr portal. */ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, void *ea, int ealen, void *ea2, int ea2len, - struct ptlrpc_request **request) + struct ptlrpc_request **request, struct md_open_data **mod) { CFS_LIST_HEAD(cancels); struct ptlrpc_request *req; @@ -162,12 +162,36 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, size[REPLY_REC_OFF] = sizeof(struct mdt_body); size[REPLY_REC_OFF + 1] = sizeof(struct lustre_capa); ptlrpc_req_set_repsize(req, 3, size); + if (mod && (op_data->op_flags & MF_EPOCH_OPEN) && + req->rq_import->imp_replayable) + { + LASSERT(*mod == NULL); + + OBD_ALLOC_PTR(*mod); + if (*mod == NULL) { + DEBUG_REQ(D_ERROR, req, "Can't allocate " + "md_open_data"); + } else { + CFS_INIT_LIST_HEAD(&(*mod)->mod_replay_list); + } + } + if (mod && *mod) { + req->rq_cb_data = *mod; + req->rq_commit_cb = mdc_commit_delayed; + list_add_tail(&req->rq_mod_list, &(*mod)->mod_replay_list); + /* This is not the last request in sequence for truncate. */ + if (op_data->op_flags & MF_EPOCH_OPEN) + req->rq_replay = 1; + else + req->rq_sequence = 1; + } rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL); *request = req; if (rc == -ERESTARTSYS) rc = 0; - + if (rc && req->rq_commit_cb) + req->rq_commit_cb(req); RETURN(rc); } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index d70954a..e294986 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -610,26 +610,10 @@ int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md) RETURN(0); } -static void mdc_commit_open(struct ptlrpc_request *req) -{ - struct mdc_open_data *mod = req->rq_cb_data; - if (mod == NULL) - return; - - if (mod->mod_close_req != NULL) - mod->mod_close_req->rq_cb_data = NULL; - - if (mod->mod_och != NULL) - mod->mod_och->och_mod = NULL; - - OBD_FREE(mod, sizeof(*mod)); - req->rq_cb_data = NULL; -} - static void mdc_replay_open(struct ptlrpc_request *req) { - struct mdc_open_data *mod = req->rq_cb_data; - struct ptlrpc_request *close_req; + struct md_open_data *mod = req->rq_cb_data; + struct ptlrpc_request *cur, *tmp; struct obd_client_handle *och; struct lustre_handle old; struct mdt_body *body; @@ -644,43 +628,100 @@ static void mdc_replay_open(struct ptlrpc_request *req) body = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, sizeof(*body), lustre_swab_mdt_body); + LASSERT(body != NULL); och = mod->mod_och; if (och != NULL) { struct lustre_handle *file_fh; LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); - LASSERT(body != NULL); file_fh = &och->och_fh; CDEBUG(D_HA, "updating handle from "LPX64" to "LPX64"\n", file_fh->cookie, body->handle.cookie); - memcpy(&old, file_fh, sizeof(old)); - memcpy(file_fh, &body->handle, sizeof(*file_fh)); + old = *file_fh; + *file_fh = body->handle; } - close_req = mod->mod_close_req; - if (close_req != NULL) { - struct mdt_epoch *epoch; - LASSERT(lustre_msg_get_opc(close_req->rq_reqmsg) == MDS_CLOSE); - LASSERT(body != NULL); - - epoch = lustre_msg_buf(close_req->rq_reqmsg, REQ_REC_OFF, - sizeof(*epoch)); - LASSERT(epoch); - if (och != NULL) - LASSERT(!memcmp(&old, &epoch->handle, sizeof(old))); - DEBUG_REQ(D_HA, close_req, "updating close body with new fh"); - memcpy(&epoch->handle, &body->handle, sizeof(epoch->handle)); + list_for_each_entry_safe(cur, tmp, &mod->mod_replay_list, rq_mod_list) { + int opc = lustre_msg_get_opc(cur->rq_reqmsg); + struct mdt_epoch *epoch = NULL; + + if (opc == MDS_CLOSE || opc == MDS_DONE_WRITING) { + epoch = lustre_msg_buf(cur->rq_reqmsg, + REQ_REC_OFF, sizeof(*epoch)); + LASSERT(epoch); + DEBUG_REQ(D_HA, cur, "updating %s body with new fh", + opc == MDS_CLOSE ? "CLOSE" : "DONE_WRITING"); + } else if (opc == MDS_REINT) { + struct mdt_rec_setattr *rec; + + /* Check this is REINT_SETATTR. */ + rec = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, + sizeof (*rec)); + LASSERT(rec && rec->sa_opcode == REINT_SETATTR); + + epoch = lustre_msg_buf(cur->rq_reqmsg, + REQ_REC_OFF + 2, sizeof(*epoch)); + LASSERT(epoch); + DEBUG_REQ(D_HA, cur, "updating REINT_SETATTR body " + "with new fh"); + } + if (epoch) { + if (och != NULL) + LASSERT(!memcmp(&old, &epoch->handle, + sizeof(old))); + epoch->handle = body->handle; + } } EXIT; } +void mdc_commit_delayed(struct ptlrpc_request *req) +{ + struct md_open_data *mod = req->rq_cb_data; + struct ptlrpc_request *cur, *tmp; + + DEBUG_REQ(D_HA, req, "req committed"); + + if (mod == NULL) + return; + + req->rq_cb_data = NULL; + req->rq_commit_cb = NULL; + list_del_init(&req->rq_mod_list); + if (req->rq_sequence) { + list_for_each_entry_safe(cur, tmp, &mod->mod_replay_list, + rq_mod_list) + { + LASSERT(cur != LP_POISON); + LASSERT(cur->rq_type != LI_POISON); + DEBUG_REQ(D_HA, cur, "req balanced"); + LASSERT(cur->rq_transno != 0); + LASSERT(cur->rq_import == req->rq_import); + + list_del_init(&cur->rq_mod_list); + /* We no longer want to preserve this for transno- + * unconditional replay. */ + spin_lock(&cur->rq_lock); + cur->rq_replay = 0; + spin_unlock(&cur->rq_lock); + } + } + + if (list_empty(&mod->mod_replay_list)) { + if (mod->mod_och != NULL) + mod->mod_och->och_mod = NULL; + + OBD_FREE_PTR(mod); + } +} + int mdc_set_open_replay_data(struct obd_export *exp, struct obd_client_handle *och, struct ptlrpc_request *open_req) { - struct mdc_open_data *mod; + struct md_open_data *mod; struct mdt_rec_create *rec = lustre_msg_buf(open_req->rq_reqmsg, DLM_INTENT_REC_OFF, sizeof(*rec)); @@ -703,9 +744,10 @@ int mdc_set_open_replay_data(struct obd_export *exp, OBD_ALLOC(mod, sizeof(*mod)); if (mod == NULL) { DEBUG_REQ(D_ERROR, open_req, - "Can't allocate mdc_open_data"); + "Can't allocate md_open_data"); RETURN(0); } + CFS_INIT_LIST_HEAD(&mod->mod_replay_list); spin_lock(&open_req->rq_lock); if (!open_req->rq_replay) { @@ -717,8 +759,8 @@ int mdc_set_open_replay_data(struct obd_export *exp, och->och_mod = mod; mod->mod_och = och; open_req->rq_cb_data = mod; - mod->mod_open_req = open_req; - open_req->rq_commit_cb = mdc_commit_open; + list_add_tail(&open_req->rq_mod_list, &mod->mod_replay_list); + open_req->rq_commit_cb = mdc_commit_delayed; spin_unlock(&open_req->rq_lock); } @@ -739,12 +781,12 @@ int mdc_set_open_replay_data(struct obd_export *exp, int mdc_clear_open_replay_data(struct obd_export *exp, struct obd_client_handle *och) { - struct mdc_open_data *mod = och->och_mod; + struct md_open_data *mod = och->och_mod; ENTRY; /* - * Don't free the structure now (it happens in mdc_commit_open(), after - * we're sure we won't need to fix up the close request in the future), + * Don't free the structure now (it happens in mdc_commit_delayed(), + * after the last request is removed from its replay list), * but make sure that replay doesn't poke at the och, which is about to * be freed. */ @@ -756,40 +798,8 @@ int mdc_clear_open_replay_data(struct obd_export *exp, RETURN(0); } -static void mdc_commit_close(struct ptlrpc_request *req) -{ - struct mdc_open_data *mod = req->rq_cb_data; - struct ptlrpc_request *open_req; - struct obd_import *imp = req->rq_import; - - DEBUG_REQ(D_HA, req, "close req committed"); - if (mod == NULL) - return; - - mod->mod_close_req = NULL; - req->rq_cb_data = NULL; - req->rq_commit_cb = NULL; - - open_req = mod->mod_open_req; - LASSERT(open_req != NULL); - LASSERT(open_req != LP_POISON); - LASSERT(open_req->rq_type != LI_POISON); - - DEBUG_REQ(D_HA, open_req, "open req balanced"); - LASSERT(open_req->rq_transno != 0); - LASSERT(open_req->rq_import == imp); - - /* - * We no longer want to preserve this for transno-unconditional - * replay. Decref open req here as well. - */ - spin_lock(&open_req->rq_lock); - open_req->rq_replay = 0; - spin_unlock(&open_req->rq_lock); -} - int mdc_close(struct obd_export *exp, struct md_op_data *op_data, - struct obd_client_handle *och, struct ptlrpc_request **request) + struct md_open_data *mod, struct ptlrpc_request **request) { struct obd_device *obd = class_exp2obd(exp); int reqsize[4] = { sizeof(struct ptlrpc_body), @@ -800,7 +810,6 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data, obd->u.cli.cl_max_mds_easize, obd->u.cli.cl_max_mds_cookiesize }; struct ptlrpc_request *req; - struct mdc_open_data *mod; int rc; ENTRY; @@ -818,26 +827,15 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data, req->rq_request_portal = MDS_READPAGE_PORTAL; /* Ensure that this close's handle is fixed up during replay. */ - LASSERT(och != NULL); - LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); - mod = och->och_mod; - if (likely(mod != NULL)) { - if (mod->mod_open_req->rq_type == LI_POISON) { - CERROR("LBUG POISONED open %p!\n", mod->mod_open_req); - LBUG(); - ptlrpc_req_finished(req); - req = NULL; - GOTO(out, rc = -EIO); - } - mod->mod_close_req = req; - DEBUG_REQ(D_HA, mod->mod_open_req, "matched open"); - } else { + if (likely(mod != NULL)) + list_add_tail(&req->rq_mod_list, &mod->mod_replay_list); + else CDEBUG(D_HA, "couldn't find open req; expecting close error\n"); - } mdc_close_pack(req, REQ_REC_OFF, op_data); ptlrpc_req_set_repsize(req, 4, repsize); - req->rq_commit_cb = mdc_commit_close; + req->rq_commit_cb = mdc_commit_delayed; + req->rq_replay = 1; LASSERT(req->rq_cb_data == NULL); req->rq_cb_data = mod; @@ -859,7 +857,7 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data, rc = -rc; } else if (mod == NULL) { if (req->rq_import->imp_replayable) - CERROR("Unexpected: can't find mdc_open_data," + CERROR("Unexpected: can't find md_open_data," "but close succeeded with replayable imp" "Please tell CFS.\n"); } @@ -881,7 +879,7 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data, } int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data, - struct obd_client_handle *och) + struct md_open_data *mod) { struct obd_device *obd = class_exp2obd(exp); struct ptlrpc_request *req; @@ -900,14 +898,29 @@ int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data, if (req == NULL) RETURN(-ENOMEM); - /* XXX: add DONE_WRITING request to och -- when Size-on-MDS - * recovery will be ready. */ mdc_close_pack(req, REQ_REC_OFF, op_data); + + req->rq_replay = 1; + req->rq_cb_data = mod; + req->rq_commit_cb = mdc_commit_delayed; + if (likely(mod != NULL)) + list_add_tail(&req->rq_mod_list, &mod->mod_replay_list); + else + CDEBUG(D_HA, "couldn't find open req; expecting close error\n"); ptlrpc_req_set_repsize(req, 2, repsize); mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL); rc = ptlrpc_queue_wait(req); mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL); + + /* Close the open replay sequence if an error occured or no SOM + * attribute update is needed. */ + if (rc != -EAGAIN) + ptlrpc_close_replay_seq(req); + + if (rc && rc != -EAGAIN && req->rq_commit_cb) + req->rq_commit_cb(req); + ptlrpc_req_finished(req); RETURN(rc); } @@ -1271,10 +1284,13 @@ static int mdc_pin(struct obd_export *exp, const struct lu_fid *fid, OBD_ALLOC(handle->och_mod, sizeof(*handle->och_mod)); if (handle->och_mod == NULL) { - DEBUG_REQ(D_ERROR, req, "can't allocate mdc_open_data"); + DEBUG_REQ(D_ERROR, req, "can't allocate md_open_data"); RETURN(-ENOMEM); } - handle->och_mod->mod_open_req = req; /* will be dropped by unpin */ + + /* will be dropped by unpin */ + CFS_INIT_LIST_HEAD(&handle->och_mod->mod_replay_list); + list_add_tail(&req->rq_mod_list, &handle->och_mod->mod_replay_list); RETURN(rc); } @@ -1308,7 +1324,14 @@ static int mdc_unpin(struct obd_export *exp, CERROR("unpin failed: %d\n", rc); ptlrpc_req_finished(req); - ptlrpc_req_finished(handle->och_mod->mod_open_req); + + LASSERT(!list_empty(&handle->och_mod->mod_replay_list)); + req = list_entry(handle->och_mod->mod_replay_list.next, + typeof(*req), rq_mod_list); + list_del_init(&req->rq_mod_list); + ptlrpc_req_finished(req); + LASSERT(list_empty(&handle->och_mod->mod_replay_list)); + OBD_FREE(handle->och_mod, sizeof(*handle->och_mod)); RETURN(rc); } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 6d6f6dc..0497cfd 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1396,7 +1396,6 @@ static int mdt_reint_internal(struct mdt_thread_info *info, { struct req_capsule *pill = &info->mti_pill; struct mdt_device *mdt = info->mti_mdt; - struct ptlrpc_request *req = mdt_info_req(info); struct mdt_body *repbody; int need_shrink = 0; int rc; @@ -1443,21 +1442,12 @@ static int mdt_reint_internal(struct mdt_thread_info *info, if (rc != 0) GOTO(out_ucred, rc = err_serious(rc)); - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { - struct mdt_client_data *mcd; - - mcd = req->rq_export->exp_mdt_data.med_mcd; - if (req_xid_is_last(req)) { - need_shrink = 0; - mdt_reconstruct(info, lhc); - rc = lustre_msg_get_status(req->rq_repmsg); - GOTO(out_ucred, rc); - } - DEBUG_REQ(D_HA, req, "no reply for RESENT (xid "LPD64")", - mcd->mcd_last_xid); - } - need_shrink = 0; + if (mdt_check_resent(info, mdt_reconstruct, lhc)) { + rc = lustre_msg_get_status(mdt_info_req(info)->rq_repmsg); + GOTO(out_ucred, rc); + } + rc = mdt_reint_rec(info, lhc); EXIT; out_ucred: diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 2434c62..084e6d1 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -532,6 +532,8 @@ void mdt_lock_handle_init(struct mdt_lock_handle *lh); void mdt_lock_handle_fini(struct mdt_lock_handle *lh); void mdt_reconstruct(struct mdt_thread_info *, struct mdt_lock_handle *); +void mdt_reconstruct_generic(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc); extern void target_recovery_fini(struct obd_device *obd); extern void target_recovery_init(struct obd_device *obd, @@ -671,6 +673,26 @@ static inline struct mdt_export_data *mdt_req2med(struct ptlrpc_request *req) return &req->rq_export->exp_mdt_data; } +typedef void (*mdt_reconstruct_t)(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc); +static inline int mdt_check_resent(struct mdt_thread_info *info, + mdt_reconstruct_t reconstruct, + struct mdt_lock_handle *lhc) +{ + struct ptlrpc_request *req = mdt_info_req(info); + ENTRY; + + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + if (req_xid_is_last(req)) { + reconstruct(info, lhc); + RETURN(1); + } + DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")", + req->rq_export->exp_mdt_data.med_mcd->mcd_last_xid); + } + RETURN(0); +} + #define MDT_FAIL_CHECK(id) \ ({ \ if (unlikely(OBD_FAIL_CHECK(id))) \ diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index fe4b6f3..e83eede 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -1150,6 +1150,7 @@ int mdt_close(struct mdt_thread_info *info) struct mdt_object *o; struct md_attr *ma = &info->mti_attr; struct mdt_body *repbody = NULL; + struct ptlrpc_request *req = mdt_info_req(info); int rc, ret = 0; ENTRY; @@ -1165,6 +1166,9 @@ int mdt_close(struct mdt_thread_info *info) req_capsule_set_size(&info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER, info->mti_mdt->mdt_max_cookiesize); rc = req_capsule_pack(&info->mti_pill); + if (mdt_check_resent(info, mdt_reconstruct_generic, NULL)) + RETURN(lustre_msg_get_status(req->rq_repmsg)); + /* Continue to close handle even if we can not pack reply */ if (rc == 0) { repbody = req_capsule_server_get(&info->mti_pill, @@ -1185,7 +1189,7 @@ int mdt_close(struct mdt_thread_info *info) } else rc = err_serious(rc); - med = &mdt_info_req(info)->rq_export->exp_mdt_data; + med = &req->rq_export->exp_mdt_data; spin_lock(&med->med_open_lock); mfd = mdt_handle2mfd(info, &info->mti_epoch->handle); if (mdt_mfd_closed(mfd)) { @@ -1220,7 +1224,6 @@ int mdt_close(struct mdt_thread_info *info) int mdt_done_writing(struct mdt_thread_info *info) { struct mdt_body *repbody = NULL; - struct ptlrpc_request *req = mdt_info_req(info); struct mdt_export_data *med; struct mdt_file_data *mfd; int rc; @@ -1240,6 +1243,9 @@ int mdt_done_writing(struct mdt_thread_info *info) if (rc) RETURN(err_serious(rc)); + if (mdt_check_resent(info, mdt_reconstruct_generic, NULL)) + RETURN(lustre_msg_get_status(mdt_info_req(info)->rq_repmsg)); + med = &info->mti_exp->exp_mdt_data; spin_lock(&med->med_open_lock); mfd = mdt_handle2mfd(info, &info->mti_epoch->handle); @@ -1251,17 +1257,8 @@ int mdt_done_writing(struct mdt_thread_info *info) RETURN(-ESTALE); } - if (!(mfd->mfd_mode == FMODE_EPOCH || - mfd->mfd_mode == FMODE_EPOCHLCK)) { - spin_unlock(&med->med_open_lock); - DEBUG_REQ(D_WARNING, req, "req should be resent req"); - LASSERT(mfd->mfd_mode == FMODE_SOM); - LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_RESENT | MSG_REPLAY)); - /*Since we did not bond this req with open/close, - *Why we should keep this req as replay req XXX*/ - GOTO(empty_transno, rc); - } + LASSERT(mfd->mfd_mode == FMODE_EPOCH || + mfd->mfd_mode == FMODE_EPOCHLCK); class_handle_unhash(&mfd->mfd_handle); list_del_init(&mfd->mfd_list); spin_unlock(&med->med_open_lock); @@ -1270,7 +1267,6 @@ int mdt_done_writing(struct mdt_thread_info *info) info->mti_epoch->flags |= MF_EPOCH_CLOSE; info->mti_attr.ma_valid = 0; rc = mdt_mfd_close(info, mfd); -empty_transno: mdt_empty_transno(info); RETURN(rc); } diff --git a/lustre/mdt/mdt_recovery.c b/lustre/mdt/mdt_recovery.c index 7780a7b..38f8ecb 100644 --- a/lustre/mdt/mdt_recovery.c +++ b/lustre/mdt/mdt_recovery.c @@ -1021,8 +1021,8 @@ void mdt_req_from_mcd(struct ptlrpc_request *req, mds_steal_ack_locks(req); } -static void mdt_reconstruct_generic(struct mdt_thread_info *mti, - struct mdt_lock_handle *lhc) +void mdt_reconstruct_generic(struct mdt_thread_info *mti, + struct mdt_lock_handle *lhc) { struct ptlrpc_request *req = mdt_info_req(mti); struct mdt_export_data *med = &req->rq_export->exp_mdt_data; @@ -1076,17 +1076,24 @@ static void mdt_reconstruct_setattr(struct mdt_thread_info *mti, obj = mdt_object_find(mti->mti_env, mdt, mti->mti_rr.rr_fid1); LASSERT(!IS_ERR(obj)); mo_attr_get(mti->mti_env, mdt_object_child(obj), &mti->mti_attr); - mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr, mdt_object_fid(obj)); + mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr, + mdt_object_fid(obj)); + if (mti->mti_epoch && (mti->mti_epoch->flags & MF_EPOCH_OPEN)) { + struct mdt_file_data *mfd; + struct mdt_body *repbody; + + repbody = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY); + repbody->ioepoch = obj->mot_ioepoch; + spin_lock(&med->med_open_lock); + list_for_each_entry(mfd, &med->med_open_head, mfd_list) { + if (mfd->mfd_xid == req->rq_xid) + break; + } + LASSERT(&mfd->mfd_list != &med->med_open_head); + spin_unlock(&med->med_open_lock); + repbody->handle.cookie = mfd->mfd_handle.h_cookie; + } - /* Don't return OST-specific attributes if we didn't just set them */ -/* - if (rec->ur_iattr.ia_valid & ATTR_SIZE) - body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) - body->valid |= OBD_MD_FLMTIME; - if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET)) - body->valid |= OBD_MD_FLATIME; -*/ mdt_object_put(mti->mti_env, obj); } @@ -1116,4 +1123,3 @@ void mdt_reconstruct(struct mdt_thread_info *mti, reconstructors[mti->mti_rr.rr_opcode](mti, lhc); EXIT; } - diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 676df34..eba73a4 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -353,16 +353,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info, info->mti_epoch->handle.cookie); GOTO(out_put, rc = -ESTALE); } - if (mfd->mfd_mode != FMODE_SOM) { - CWARN("mfd_mode %d not right should be in replay\n", - mfd->mfd_mode); - LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & - MSG_REPLAY); - LASSERT(mfd->mfd_mode == FMODE_EPOCH); - mfd->mfd_mode = FMODE_SOM; - } - - LASSERT(ma->ma_attr.la_valid & LA_SIZE); + LASSERT(mfd->mfd_mode == FMODE_SOM); LASSERT(!(info->mti_epoch->flags & MF_EPOCH_CLOSE)); class_handle_unhash(&mfd->mfd_handle); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 6779362..8f79036 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -393,6 +393,7 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, spin_lock_init(&request->rq_lock); CFS_INIT_LIST_HEAD(&request->rq_list); CFS_INIT_LIST_HEAD(&request->rq_replay_list); + CFS_INIT_LIST_HEAD(&request->rq_mod_list); CFS_INIT_LIST_HEAD(&request->rq_ctx_chain); CFS_INIT_LIST_HEAD(&request->rq_set_chain); cfs_waitq_init(&request->rq_reply_waitq); @@ -1264,6 +1265,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) if (request->rq_import != NULL) { if (!locked) spin_lock(&request->rq_import->imp_lock); + list_del_init(&request->rq_mod_list); list_del_init(&request->rq_replay_list); if (!locked) spin_unlock(&request->rq_import->imp_lock); -- 1.8.3.1