Whamcloud - gitweb
Branch HEAD
authorvitaly <vitaly>
Wed, 19 Sep 2007 19:36:26 +0000 (19:36 +0000)
committervitaly <vitaly>
Wed, 19 Sep 2007 19:36:26 +0000 (19:36 +0000)
b=13581
i=tappro
i=huanghua

Replay Size-on-MDS related RPCs properly.

23 files changed:
lustre/cmm/mdc_object.c
lustre/include/lustre_lib.h
lustre/include/lustre_net.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/liblustre/file.c
lustre/liblustre/llite_lib.h
lustre/liblustre/super.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_close.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/lmv/lmv_obd.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_reint.c
lustre/mdc/mdc_request.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_open.c
lustre/mdt/mdt_recovery.c
lustre/mdt/mdt_reint.c
lustre/ptlrpc/client.c

index 1ac12c9..fde11da 100644 (file)
@@ -290,7 +290,7 @@ static int mdc_attr_set(const struct lu_env *env, struct md_object *mo,
         }
 
         rc = md_setattr(mc->mc_desc.cl_exp, &mci->mci_opdata,
-                        NULL, 0, NULL, 0, &mci->mci_req);
+                        NULL, 0, NULL, 0, &mci->mci_req, NULL);
 
         ptlrpc_req_finished(mci->mci_req);
 
index 206a1bf..85cd2cf 100644 (file)
@@ -90,12 +90,11 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
 int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg);
 struct client_obd *client_conn2cli(struct lustre_handle *conn);
 
-struct mdc_open_data;
+struct md_open_data;
 struct obd_client_handle {
         struct lustre_handle  och_fh;
         struct lu_fid         och_fid;
-        struct llog_cookie    och_cookie;
-        struct mdc_open_data *och_mod;
+        struct md_open_data *och_mod;
         __u32 och_magic;
         int och_flags;
 };
index d5c5553..b519770 100644 (file)
@@ -297,11 +297,13 @@ struct ptlrpc_request {
                  * after server commits corresponding transaction. This is
                  * used for operations that require sequence of multiple
                  * requests to be replayed. The only example currently is file
-                 * open/close. When last request in such a sequence is
-                 * committed, ->rq_replay is cleared on all requests in the
+                 * open/close/dw/setattr. When last request in such a sequence
+                 * is committed, ->rq_replay is cleared on all requests in the
                  * sequence.
                  */
                 rq_replay:1,
+                /* this is the last request in the sequence. */
+                rq_sequence:1,
                 rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
                 rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1;
         enum rq_phase rq_phase; /* one of RQ_PHASE_* */
@@ -323,6 +325,7 @@ struct ptlrpc_request {
         __u64 rq_transno;
         __u64 rq_xid;
         struct list_head rq_replay_list;
+        struct list_head rq_mod_list;
 
         struct ptlrpc_cli_ctx   *rq_cli_ctx;     /* client's half ctx */
         struct ptlrpc_svc_ctx   *rq_svc_ctx;     /* server's half ctx */
@@ -402,6 +405,14 @@ struct ptlrpc_request {
         struct lu_context           rq_session;
 };
 
+static inline void ptlrpc_close_replay_seq(struct ptlrpc_request *req)
+{
+        spin_lock(&req->rq_lock);
+        req->rq_replay = 0;
+        req->rq_sequence = 1;
+        spin_unlock(&req->rq_lock);
+}
+
 static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
 {
         LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
index d16b2ea..8c1489f 100644 (file)
@@ -1214,18 +1214,23 @@ struct lustre_md {
         struct obd_capa         *oss_capa;
 };
 
+struct md_open_data {
+        struct obd_client_handle *mod_och;
+        struct list_head          mod_replay_list;
+};
+
 struct md_ops {
         int (*m_getstatus)(struct obd_export *, struct lu_fid *,
                            struct obd_capa **);
         int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *,
                                ldlm_iterator_t, void *);
         int (*m_close)(struct obd_export *, struct md_op_data *,
-                       struct obd_client_handle *, struct ptlrpc_request **);
+                       struct md_open_data *, struct ptlrpc_request **);
         int (*m_create)(struct obd_export *, struct md_op_data *,
                         const void *, int, int, __u32, __u32, __u32,
                         __u64, struct ptlrpc_request **);
         int (*m_done_writing)(struct obd_export *, struct md_op_data  *,
-                              struct obd_client_handle *);
+                              struct md_open_data *);
         int (*m_enqueue)(struct obd_export *, struct ldlm_enqueue_info *,
                          struct lookup_intent *, struct md_op_data *,
                          struct lustre_handle *, void *, int, int);
@@ -1248,7 +1253,8 @@ struct md_ops {
                            const struct lu_fid *,
                            struct ptlrpc_request **);
         int (*m_setattr)(struct obd_export *, struct md_op_data *, void *,
-                         int , void *, int, struct ptlrpc_request **);
+                         int , void *, int, struct ptlrpc_request **,
+                         struct md_open_data **mod);
         int (*m_sync)(struct obd_export *, const struct lu_fid *,
                       struct obd_capa *, struct ptlrpc_request **);
         int (*m_readpage)(struct obd_export *, const struct lu_fid *,
index 385df8f..64df844 100644 (file)
@@ -1677,14 +1677,14 @@ static inline int md_change_cbdata(struct obd_export *exp,
 }
 
 static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
-                           struct obd_client_handle *och,
+                           struct md_open_data *mod,
                            struct ptlrpc_request **request)
 {
         int rc;
         ENTRY;
         EXP_CHECK_MD_OP(exp, close);
         EXP_MD_COUNTER_INCREMENT(exp, close);
-        rc = MDP(exp->exp_obd, close)(exp, op_data, och, request);
+        rc = MDP(exp->exp_obd, close)(exp, op_data, mod, request);
         RETURN(rc);
 }
 
@@ -1704,13 +1704,13 @@ static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
 
 static inline int md_done_writing(struct obd_export *exp,
                                   struct md_op_data *op_data,
-                                  struct obd_client_handle *och)
+                                  struct md_open_data *mod)
 {
         int rc;
         ENTRY;
         EXP_CHECK_MD_OP(exp, done_writing);
         EXP_MD_COUNTER_INCREMENT(exp, done_writing);
-        rc = MDP(exp->exp_obd, done_writing)(exp, op_data, och);
+        rc = MDP(exp->exp_obd, done_writing)(exp, op_data, mod);
         RETURN(rc);
 }
 
@@ -1802,14 +1802,15 @@ static inline int md_is_subdir(struct obd_export *exp,
 
 static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
                              void *ea, int ealen, void *ea2, int ea2len,
-                             struct ptlrpc_request **request)
+                             struct ptlrpc_request **request,
+                             struct md_open_data **mod)
 {
         int rc;
         ENTRY;
         EXP_CHECK_MD_OP(exp, setattr);
         EXP_MD_COUNTER_INCREMENT(exp, setattr);
         rc = MDP(exp->exp_obd, setattr)(exp, op_data, ea, ealen,
-                                        ea2, ea2len, request);
+                                        ea2, ea2len, request, mod);
         RETURN(rc);
 }
 
index 2ee4933..153c64c 100644 (file)
@@ -318,8 +318,8 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
         return rc;
 }
 
-int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
-                         __u64 ioepoch)
+int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
+                         struct lustre_handle *fh, __u64 ioepoch)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
         struct llu_sb_info *sbi = llu_i2sbi(inode);
@@ -332,7 +332,11 @@ int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
         LASSERT(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM);
         
         rc = llu_inode_getattr(inode, &oa);
-        if (rc) {
+        if (rc == -ENOENT) {
+                oa.o_valid = 0;
+                CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n",
+                       lli->lli_smd->lsm_object_id);
+        } else if (rc) {
                 CERROR("inode_getattr failed (%d): unable to send a "
                        "Size-on-MDS attribute update for inode %llu/%lu\n",
                        rc, (long long)llu_i2stat(inode)->st_ino,
@@ -345,7 +349,7 @@ int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
         op_data.op_ioepoch = ioepoch;
         op_data.op_flags |= MF_SOM_CHANGE;
 
-        rc = llu_md_setattr(inode, &op_data);
+        rc = llu_md_setattr(inode, &op_data, &mod);
         RETURN(rc);
 }
 
@@ -357,7 +361,7 @@ int llu_md_close(struct obd_export *md_exp, struct inode *inode)
         struct obd_client_handle *och = &fd->fd_mds_och;
         struct intnl_stat *st = llu_i2stat(inode);
         struct md_op_data op_data = { { 0 } };
-        int rc;
+        int seq_end = 0, rc;
         ENTRY;
 
         /* clear group lock, if present */
@@ -401,12 +405,15 @@ int llu_md_close(struct obd_export *md_exp, struct inode *inode)
         op_data.op_ioepoch = lli->lli_ioepoch;
         memcpy(&op_data.op_handle, &och->och_fh, sizeof(op_data.op_handle));
 
-        rc = md_close(md_exp, &op_data, och, &req);
+        rc = md_close(md_exp, &op_data, och->och_mod, &req);
+        if (rc != -EAGAIN)
+                seq_end = 1;
+
         if (rc == -EAGAIN) {
                 /* We are the last writer, so the MDS has instructed us to get
                  * the file size and any write cookies, then close again. */
                 LASSERT(fd->fd_flags & FMODE_WRITE);
-                rc = llu_sizeonmds_update(inode, &och->och_fh,
+                rc = llu_sizeonmds_update(inode, och->och_mod, &och->och_fh,
                                           op_data.op_ioepoch);
                 if (rc) {
                         CERROR("inode %llu mdc Size-on-MDS update failed: "
@@ -423,6 +430,8 @@ int llu_md_close(struct obd_export *md_exp, struct inode *inode)
                                (long long)st->st_ino, rc);
         }
 
+        if (seq_end)
+                ptlrpc_close_replay_seq(req);
         md_clear_open_replay_data(md_exp, och);
         ptlrpc_req_finished(req);
         och->och_fh.cookie = DEAD_HANDLE_MAGIC;
index f242b45..3243fc2 100644 (file)
@@ -193,7 +193,8 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
 int ll_it_open_error(int phase, struct lookup_intent *it);
 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md);
 int llu_inode_getattr(struct inode *inode, struct obdo *obdo);
-int llu_md_setattr(struct inode *inode, struct md_op_data *op_data);
+int llu_md_setattr(struct inode *inode, struct md_op_data *op_data,
+                   struct md_open_data **mod);
 int llu_setattr_raw(struct inode *inode, struct iattr *attr);
 
 extern struct fssw_ops llu_fssw_ops;
@@ -208,8 +209,8 @@ int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it);
 int llu_iop_open(struct pnode *pnode, int flags, mode_t mode);
 int llu_md_close(struct obd_export *md_exp, struct inode *inode);
 int llu_file_release(struct inode *inode);
-int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
-                         __u64 ioepoch);
+int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
+                         struct lustre_handle *fh, __u64 ioepoch);
 int llu_iop_close(struct inode *inode);
 _SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off);
 int llu_vmtruncate(struct inode * inode, loff_t offset, obd_flag obd_flags);
index 6c9dc66..0173a1d 100644 (file)
@@ -594,7 +594,8 @@ static int inode_setattr(struct inode * inode, struct iattr * attr)
         return error;
 }
 
-int llu_md_setattr(struct inode *inode, struct md_op_data *op_data)
+int llu_md_setattr(struct inode *inode, struct md_op_data *op_data,
+                   struct md_open_data **mod)
 {
         struct lustre_md md;
         struct llu_sb_info *sbi = llu_i2sbi(inode);
@@ -603,7 +604,8 @@ int llu_md_setattr(struct inode *inode, struct md_op_data *op_data)
         ENTRY;
 
         llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY);
-        rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &request);
+        rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL,
+                        0, &request, mod);
 
         if (rc) {
                 ptlrpc_req_finished(request);
@@ -633,7 +635,8 @@ int llu_md_setattr(struct inode *inode, struct md_op_data *op_data)
 
 /* Close IO epoch and send Size-on-MDS attribute update. */
 static int llu_setattr_done_writing(struct inode *inode,
-                                    struct md_op_data *op_data)
+                                    struct md_op_data *op_data,
+                                    struct md_open_data *mod)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
         struct intnl_stat *st = llu_i2stat(inode);
@@ -649,11 +652,11 @@ static int llu_setattr_done_writing(struct inode *inode,
                op_data->op_ioepoch, PFID(&lli->lli_fid));
 
         op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
-        rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, NULL);
+        rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, mod);
         if (rc == -EAGAIN) {
                 /* MDS has instructed us to obtain Size-on-MDS attribute
                  * from OSTs and send setattr to back to MDS. */
-                rc = llu_sizeonmds_update(inode, &op_data->op_handle,
+                rc = llu_sizeonmds_update(inode, mod, &op_data->op_handle,
                                           op_data->op_ioepoch);
         } else if (rc) {
                 CERROR("inode %llu mdc truncate failed: rc = %d\n",
@@ -682,7 +685,8 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
         struct intnl_stat *st = llu_i2stat(inode);
         int ia_valid = attr->ia_valid;
         struct md_op_data op_data = { { 0 } };
-        int rc = 0;
+        struct md_open_data *mod = NULL;
+        int rc = 0, rc1 = 0;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
@@ -741,15 +745,18 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
                 /* Open epoch for truncate. */
                 if (ia_valid & ATTR_SIZE)
                         op_data.op_flags = MF_EPOCH_OPEN;
-                rc = llu_md_setattr(inode, &op_data);
+                rc = llu_md_setattr(inode, &op_data, &mod);
                 if (rc)
                         RETURN(rc);
 
+                if (op_data.op_ioepoch)
+                        CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for "
+                               "truncate\n", op_data.op_ioepoch,
+                               PFID(&llu_i2info(inode)->lli_fid));
+
                 if (!lsm || !S_ISREG(st->st_mode)) {
                         CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
-                        if (op_data.op_ioepoch)
-                                rc = llu_setattr_done_writing(inode, &op_data);
-                        RETURN(rc);
+                        GOTO(out, rc);
                 }
         } else {
                 /* The OST doesn't check permissions, but the alternative is
@@ -810,10 +817,9 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
                                      &lockh, flags);
                 if (rc != ELDLM_OK) {
                         if (rc > 0)
-                                RETURN(-ENOLCK);
-                        RETURN(rc);
+                                GOTO(out, rc = -ENOLCK);
+                        GOTO(out, rc);
                 }
-
                 rc = llu_vmtruncate(inode, attr->ia_size, obd_flags);
 
                 /* unlock now as we don't mind others file lockers racing with
@@ -824,9 +830,6 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
                         if (!rc)
                                 rc = err;
                 }
-
-                if (op_data.op_ioepoch)
-                        rc = llu_setattr_done_writing(inode, &op_data);
         } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
                 struct obd_info oinfo = { { { 0 } } };
                 struct obdo oa;
@@ -846,7 +849,11 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
                 if (rc)
                         CERROR("obd_setattr_async fails: rc=%d\n", rc);
         }
-        RETURN(rc);
+        EXIT;
+out:
+        if (op_data.op_ioepoch)
+                rc1 = llu_setattr_done_writing(inode, &op_data, mod);
+        return rc ? rc : rc1;
 }
 
 /* here we simply act as a thin layer to glue it with
@@ -1671,7 +1678,7 @@ static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
 
         /* swabbing is done in lov_setstripe() on server side */
         rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
-                        sizeof(lum), NULL, 0, &request);
+                        sizeof(lum), NULL, 0, &request, NULL);
         if (rc) {
                 ptlrpc_req_finished(request);
                 if (rc != -EPERM && rc != -EACCES)
index 4c463d1..7a2cc4b 100644 (file)
@@ -553,7 +553,7 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump)
 
         /* swabbing is done in lov_setstripe() on server side */
         rc = md_setattr(sbi->ll_md_exp, op_data, lump, sizeof(*lump),
-                        NULL, 0, &req);
+                        NULL, 0, &req, NULL);
         ll_finish_md_op_data(op_data);
         ptlrpc_req_finished(req);
         if (rc) {
index 62b0bd0..cf82da1 100644 (file)
@@ -95,7 +95,7 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
         struct ptlrpc_request *req = NULL;
         struct obd_device *obd = class_exp2obd(exp);
         int epoch_close = 1;
-        int rc;
+        int seq_end = 0, rc;
         ENTRY;
 
         if (obd == NULL) {
@@ -122,7 +122,9 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
 
         ll_prepare_close(inode, op_data, och);
         epoch_close = (op_data->op_flags & MF_EPOCH_CLOSE);
-        rc = md_close(md_exp, op_data, och, &req);
+        rc = md_close(md_exp, op_data, och->och_mod, &req);
+        if (rc != -EAGAIN)
+                seq_end = 1;
 
         if (rc == -EAGAIN) {
                 /* This close must have the epoch closed. */
@@ -130,8 +132,8 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
                 LASSERT(epoch_close);
                 /* MDS has instructed us to obtain Size-on-MDS attribute from
                  * OSTs and send setattr to back to MDS. */
-                rc = ll_sizeonmds_update(inode, &och->och_fh,
-                                         op_data->op_ioepoch);
+                rc = ll_sizeonmds_update(inode, och->och_mod,
+                                         &och->och_fh, op_data->op_ioepoch);
                 if (rc) {
                         CERROR("inode %lu mdc Size-on-MDS update failed: "
                                "rc = %d\n", inode->i_ino, rc);
@@ -150,7 +152,6 @@ static int ll_close_inode_openhandle(struct obd_export *md_exp,
                                inode->i_ino, rc);
         }
 
-        ptlrpc_req_finished(req); /* This is close request */
         EXIT;
 out:
       
@@ -158,12 +159,15 @@ out:
             S_ISREG(inode->i_mode) && (och->och_flags & FMODE_WRITE)) {
                 ll_queue_done_writing(inode, LLIF_DONE_WRITING);
         } else {
+                if (seq_end)
+                        ptlrpc_close_replay_seq(req);
                 md_clear_open_replay_data(md_exp, och);
                 /* Free @och if it is not waiting for DONE_WRITING. */
                 och->och_fh.cookie = DEAD_HANDLE_MAGIC;
                 OBD_FREE_PTR(och);
         }
-        
+        if (req) /* This is close request */
+                ptlrpc_req_finished(req);
         return rc;
 }
 
index 326adf4..76e4b4a 100644 (file)
@@ -184,8 +184,8 @@ out:
         return;
 }
 
-int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
-                        __u64 ioepoch)
+int ll_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
+                        struct lustre_handle *fh, __u64 ioepoch)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
         struct md_op_data *op_data;
@@ -207,7 +207,11 @@ int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
                 RETURN(-ENOMEM);
         }
         rc = ll_inode_getattr(inode, oa);
-        if (rc) {
+        if (rc == -ENOENT) {
+                oa->o_valid = 0;
+                CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n",
+                       lli->lli_smd->lsm_object_id);
+        } else if (rc) {
                 CERROR("inode_getattr failed (%d): unable to send a "
                        "Size-on-MDS attribute update for inode %lu/%u\n",
                        rc, inode->i_ino, inode->i_generation);
@@ -221,7 +225,7 @@ int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
         op_data->op_ioepoch = ioepoch;
         op_data->op_flags |= MF_SOM_CHANGE;
 
-        rc = ll_md_setattr(inode, op_data);
+        rc = ll_md_setattr(inode, op_data, &mod);
         EXIT;
 out:
         if (oa)
@@ -255,12 +259,12 @@ static void ll_done_writing(struct inode *inode)
         
         ll_pack_inode2opdata(inode, op_data, &och->och_fh);
 
-        rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och);
+        rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, och->och_mod);
         if (rc == -EAGAIN) {
                 /* MDS has instructed us to obtain Size-on-MDS attribute from 
                  * OSTs and send setattr to back to MDS. */
-                rc = ll_sizeonmds_update(inode, &och->och_fh,
-                                         op_data->op_ioepoch);
+                rc = ll_sizeonmds_update(inode, och->och_mod,
+                                         &och->och_fh, op_data->op_ioepoch);
         } else if (rc) {
                 CERROR("inode %lu mdc done_writing failed: rc = %d\n",
                        inode->i_ino, rc);
index 2357fed..b61b3db 100644 (file)
@@ -525,10 +525,11 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode,
 int ll_md_real_close(struct inode *inode, int flags);
 void ll_epoch_close(struct inode *inode, struct md_op_data *op_data,
                     struct obd_client_handle **och, unsigned long flags);
-int ll_sizeonmds_update(struct inode *inode, struct lustre_handle *fh,
-                        __u64 ioepoch);
+int ll_sizeonmds_update(struct inode *inode, struct md_open_data *data,
+                        struct lustre_handle *fh, __u64 ioepoch);
 int ll_inode_getattr(struct inode *inode, struct obdo *obdo);
-int ll_md_setattr(struct inode *inode, struct md_op_data *op_data);
+int ll_md_setattr(struct inode *inode, struct md_op_data *op_data,
+                  struct md_open_data **mod);
 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
                           struct lustre_handle *fh);
 extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, struct file
index efdb244..8107969 100644 (file)
@@ -1127,7 +1127,8 @@ void ll_clear_inode(struct inode *inode)
         EXIT;
 }
 
-int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
+int ll_md_setattr(struct inode *inode, struct md_op_data *op_data,
+                  struct md_open_data **mod)
 {
         struct lustre_md md;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
@@ -1141,7 +1142,7 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
                 RETURN(PTR_ERR(op_data));
 
         rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, 
-                        &request);
+                        &request, mod);
         if (rc) {
                 ptlrpc_req_finished(request);
                 if (rc == -ENOENT) {
@@ -1172,7 +1173,7 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
         rc = inode_setattr(inode, &op_data->op_attr);
 
         /* Extract epoch data if obtained. */
-        memcpy(&op_data->op_handle, &md.body->handle, sizeof(op_data->op_handle));
+        op_data->op_handle = md.body->handle;
         op_data->op_ioepoch = md.body->ioepoch;
 
         ll_update_inode(inode, &md);
@@ -1183,7 +1184,8 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data)
 
 /* Close IO epoch and send Size-on-MDS attribute update. */
 static int ll_setattr_done_writing(struct inode *inode,
-                                   struct md_op_data *op_data)
+                                   struct md_op_data *op_data,
+                                   struct md_open_data *mod)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
         int rc = 0;
@@ -1197,12 +1199,11 @@ static int ll_setattr_done_writing(struct inode *inode,
                op_data->op_ioepoch, PFID(&lli->lli_fid));
 
         op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
-        /* XXX: pass och here for the recovery purpose. */
-        rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL);
+        rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod);
         if (rc == -EAGAIN) {
                 /* MDS has instructed us to obtain Size-on-MDS attribute
                  * from OSTs and send setattr to back to MDS. */
-                rc = ll_sizeonmds_update(inode, &op_data->op_handle,
+                rc = ll_sizeonmds_update(inode, mod, &op_data->op_handle,
                                          op_data->op_ioepoch);
         } else if (rc) {
                 CERROR("inode %lu mdc truncate failed: rc = %d\n",
@@ -1230,6 +1231,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct md_op_data *op_data = NULL;
+        struct md_open_data *mod = NULL;
         int ia_valid = attr->ia_valid;
         int rc = 0, rc1 = 0;
         ENTRY;
@@ -1300,8 +1302,8 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
         /* Open epoch for truncate. */
         if (ia_valid & ATTR_SIZE)
                 op_data->op_flags = MF_EPOCH_OPEN;
-
-        rc = ll_md_setattr(inode, op_data);
+        
+        rc = ll_md_setattr(inode, op_data, &mod);
         if (rc)
                 GOTO(out, rc);
 
@@ -1310,8 +1312,8 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                        "truncate\n", op_data->op_ioepoch, PFID(&lli->lli_fid));
 
         if (!lsm || !S_ISREG(inode->i_mode)) {
-                        CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
-                        GOTO(out, rc = 0);
+                CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
+                GOTO(out, rc = 0);
         }
 
         /* We really need to get our PW lock before we change inode->i_size.
@@ -1400,9 +1402,8 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
         EXIT;
 out:
         if (op_data) {
-                if (op_data->op_ioepoch) {
-                        rc1 = ll_setattr_done_writing(inode, op_data);
-                }
+                if (op_data->op_ioepoch)
+                        rc1 = ll_setattr_done_writing(inode, op_data, mod);
                 ll_finish_md_op_data(op_data);
         }
         return rc ? rc : rc1;
@@ -1838,7 +1839,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                 ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags;
                 op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG;
                 rc = md_setattr(sbi->ll_md_exp, op_data,
-                                NULL, 0, NULL, 0, &req);
+                                NULL, 0, NULL, 0, &req, NULL);
                 ll_finish_md_op_data(op_data);
                 ptlrpc_req_finished(req);
                 if (rc || lsm == NULL) {
index dbebe38..7a7da61 100644 (file)
@@ -1248,7 +1248,7 @@ static int lmv_change_cbdata(struct obd_export *exp, const struct lu_fid *fid,
 
 static int lmv_close(struct obd_export *exp,
                      struct md_op_data *op_data,
-                     struct obd_client_handle *och,
+                     struct md_open_data *mod,
                      struct ptlrpc_request **request)
 {
         struct obd_device *obd = exp->exp_obd;
@@ -1266,7 +1266,7 @@ static int lmv_close(struct obd_export *exp,
                 RETURN(PTR_ERR(tgt_exp));
 
         CDEBUG(D_OTHER, "CLOSE "DFID"\n", PFID(&op_data->op_fid1));
-        rc = md_close(tgt_exp, op_data, och, request);
+        rc = md_close(tgt_exp, op_data, mod, request);
         RETURN(rc);
 }
 
@@ -1411,7 +1411,7 @@ repeat:
 
 static int lmv_done_writing(struct obd_export *exp,
                             struct md_op_data *op_data,
-                            struct obd_client_handle *och)
+                            struct md_open_data *mod)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -1427,7 +1427,7 @@ static int lmv_done_writing(struct obd_export *exp,
         if (IS_ERR(tgt_exp))
                 RETURN(PTR_ERR(tgt_exp));
 
-        rc = md_done_writing(tgt_exp, op_data, och);
+        rc = md_done_writing(tgt_exp, op_data, mod);
         RETURN(rc);
 }
 
@@ -2025,7 +2025,8 @@ request:
 
 static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
                        void *ea, int ealen, void *ea2, int ea2len,
-                       struct ptlrpc_request **request)
+                       struct ptlrpc_request **request,
+                       struct md_open_data **mod)
 {
         struct obd_device *obd = exp->exp_obd;
         struct lmv_obd *lmv = &obd->u.lmv;
@@ -2057,7 +2058,7 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
                         }
 
                         rc = md_setattr(tgt_exp, op_data, ea, ealen,
-                                        ea2, ea2len, &req);
+                                        ea2, ea2len, &req, mod);
 
                         if (lu_fid_eq(&obj->lo_fid, &obj->lo_inodes[i].li_fid)) {
                                 /*
@@ -2079,7 +2080,7 @@ static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
                         RETURN(PTR_ERR(tgt_exp));
 
                 rc = md_setattr(tgt_exp, op_data, ea, ealen, ea2,
-                                ea2len, request);
+                                ea2len, request, mod);
         }
         RETURN(rc);
 }
index a23511d..723a37e 100644 (file)
@@ -64,12 +64,6 @@ void mdc_close_pack(struct ptlrpc_request *req, int offset,
 void mdc_exit_request(struct client_obd *cli);
 void mdc_enter_request(struct client_obd *cli);
 
-struct mdc_open_data {
-        struct obd_client_handle *mod_och;
-        struct ptlrpc_request    *mod_open_req;
-        struct ptlrpc_request    *mod_close_req;
-};
-
 static inline int client_is_remote(struct obd_export *exp)
 {
         struct obd_import *imp = class_exp2cliimp(exp);
@@ -139,6 +133,7 @@ int mdc_set_open_replay_data(struct obd_export *exp,
 
 int mdc_clear_open_replay_data(struct obd_export *exp,
                                struct obd_client_handle *och);
+void mdc_commit_delayed(struct ptlrpc_request *req);
 
 int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
                const void *data, int datalen, int mode, __u32 uid, __u32 gid,
@@ -150,7 +145,7 @@ int mdc_rename(struct obd_export *exp, struct md_op_data *op_data,
                struct ptlrpc_request **request);
 int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
                 void *ea, int ealen, void *ea2, int ea2len,
-                struct ptlrpc_request **request);
+                struct ptlrpc_request **request, struct md_open_data **mod);
 int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
                struct ptlrpc_request **request);
 int mdc_cancel_unused(struct obd_export *exp, const struct lu_fid *fid,
index 712a70b..0021b22 100644 (file)
@@ -98,7 +98,7 @@ int mdc_resource_get_unused(struct obd_export *exp, struct lu_fid *fid,
  * go to the setattr portal. */
 int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
                 void *ea, int ealen, void *ea2, int ea2len,
-                struct ptlrpc_request **request)
+                struct ptlrpc_request **request, struct md_open_data **mod)
 {
         CFS_LIST_HEAD(cancels);
         struct ptlrpc_request *req;
@@ -162,12 +162,36 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
         size[REPLY_REC_OFF] = sizeof(struct mdt_body);
         size[REPLY_REC_OFF + 1] = sizeof(struct lustre_capa);
         ptlrpc_req_set_repsize(req, 3, size);
+        if (mod && (op_data->op_flags & MF_EPOCH_OPEN) &&
+            req->rq_import->imp_replayable)
+        {
+                LASSERT(*mod == NULL);
+
+                OBD_ALLOC_PTR(*mod);
+                if (*mod == NULL) {
+                        DEBUG_REQ(D_ERROR, req, "Can't allocate "
+                                  "md_open_data");
+                } else {
+                        CFS_INIT_LIST_HEAD(&(*mod)->mod_replay_list);
+                }
+        }
+        if (mod && *mod) {
+                req->rq_cb_data = *mod;
+                req->rq_commit_cb = mdc_commit_delayed;
+                list_add_tail(&req->rq_mod_list, &(*mod)->mod_replay_list);
+                /* This is not the last request in sequence for truncate. */
+                if (op_data->op_flags & MF_EPOCH_OPEN)
+                        req->rq_replay = 1;
+                else
+                        req->rq_sequence = 1;
+        }
 
         rc = mdc_reint(req, rpc_lock, LUSTRE_IMP_FULL);
         *request = req;
         if (rc == -ERESTARTSYS)
                 rc = 0;
-
+        if (rc && req->rq_commit_cb)
+                req->rq_commit_cb(req);
         RETURN(rc);
 }
 
index d70954a..e294986 100644 (file)
@@ -610,26 +610,10 @@ int mdc_free_lustre_md(struct obd_export *exp, struct lustre_md *md)
         RETURN(0);
 }
 
-static void mdc_commit_open(struct ptlrpc_request *req)
-{
-        struct mdc_open_data *mod = req->rq_cb_data;
-        if (mod == NULL)
-                return;
-
-        if (mod->mod_close_req != NULL)
-                mod->mod_close_req->rq_cb_data = NULL;
-
-        if (mod->mod_och != NULL)
-                mod->mod_och->och_mod = NULL;
-
-        OBD_FREE(mod, sizeof(*mod));
-        req->rq_cb_data = NULL;
-}
-
 static void mdc_replay_open(struct ptlrpc_request *req)
 {
-        struct mdc_open_data *mod = req->rq_cb_data;
-        struct ptlrpc_request *close_req;
+        struct md_open_data *mod = req->rq_cb_data;
+        struct ptlrpc_request *cur, *tmp;
         struct obd_client_handle *och;
         struct lustre_handle old;
         struct mdt_body *body;
@@ -644,43 +628,100 @@ static void mdc_replay_open(struct ptlrpc_request *req)
 
         body = lustre_swab_repbuf(req, DLM_REPLY_REC_OFF, sizeof(*body),
                                   lustre_swab_mdt_body);
+        LASSERT(body != NULL);
 
         och = mod->mod_och;
         if (och != NULL) {
                 struct lustre_handle *file_fh;
 
                 LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
-                LASSERT(body != NULL);
 
                 file_fh = &och->och_fh;
                 CDEBUG(D_HA, "updating handle from "LPX64" to "LPX64"\n",
                        file_fh->cookie, body->handle.cookie);
-                memcpy(&old, file_fh, sizeof(old));
-                memcpy(file_fh, &body->handle, sizeof(*file_fh));
+                old = *file_fh;
+                *file_fh = body->handle;
         }
-        close_req = mod->mod_close_req;
-        if (close_req != NULL) {
-                struct mdt_epoch *epoch;
 
-                LASSERT(lustre_msg_get_opc(close_req->rq_reqmsg) == MDS_CLOSE);
-                LASSERT(body != NULL);
-
-                epoch = lustre_msg_buf(close_req->rq_reqmsg, REQ_REC_OFF,
-                                       sizeof(*epoch));
-                LASSERT(epoch);
-                if (och != NULL)
-                        LASSERT(!memcmp(&old, &epoch->handle, sizeof(old)));
-                DEBUG_REQ(D_HA, close_req, "updating close body with new fh");
-                memcpy(&epoch->handle, &body->handle, sizeof(epoch->handle));
+        list_for_each_entry_safe(cur, tmp, &mod->mod_replay_list, rq_mod_list) {
+                int opc = lustre_msg_get_opc(cur->rq_reqmsg);
+                struct mdt_epoch *epoch = NULL;
+
+                if (opc == MDS_CLOSE || opc == MDS_DONE_WRITING) {
+                        epoch = lustre_msg_buf(cur->rq_reqmsg,
+                                               REQ_REC_OFF, sizeof(*epoch));
+                        LASSERT(epoch);
+                        DEBUG_REQ(D_HA, cur, "updating %s body with new fh",
+                                  opc == MDS_CLOSE ? "CLOSE" : "DONE_WRITING");
+                } else if (opc == MDS_REINT) {
+                        struct mdt_rec_setattr *rec;
+                        
+                        /* Check this is REINT_SETATTR. */
+                        rec = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF,
+                                             sizeof (*rec));
+                        LASSERT(rec && rec->sa_opcode == REINT_SETATTR);
+
+                        epoch = lustre_msg_buf(cur->rq_reqmsg,
+                                               REQ_REC_OFF + 2, sizeof(*epoch));
+                        LASSERT(epoch);
+                        DEBUG_REQ(D_HA, cur, "updating REINT_SETATTR body "
+                                  "with new fh");
+                }
+                if (epoch) {
+                        if (och != NULL)
+                                LASSERT(!memcmp(&old, &epoch->handle,
+                                                sizeof(old)));
+                        epoch->handle = body->handle;
+                }
         }
         EXIT;
 }
 
+void mdc_commit_delayed(struct ptlrpc_request *req)
+{
+        struct md_open_data *mod = req->rq_cb_data;
+        struct ptlrpc_request *cur, *tmp;
+        
+        DEBUG_REQ(D_HA, req, "req committed");
+
+        if (mod == NULL)
+                return;
+
+        req->rq_cb_data = NULL;
+        req->rq_commit_cb = NULL;
+        list_del_init(&req->rq_mod_list);
+        if (req->rq_sequence) {
+                list_for_each_entry_safe(cur, tmp, &mod->mod_replay_list,
+                                         rq_mod_list)
+                {
+                        LASSERT(cur != LP_POISON);
+                        LASSERT(cur->rq_type != LI_POISON);
+                        DEBUG_REQ(D_HA, cur, "req balanced");
+                        LASSERT(cur->rq_transno != 0);
+                        LASSERT(cur->rq_import == req->rq_import);
+
+                        list_del_init(&cur->rq_mod_list);
+                        /* We no longer want to preserve this for transno-
+                         * unconditional replay. */
+                        spin_lock(&cur->rq_lock);
+                        cur->rq_replay = 0;
+                        spin_unlock(&cur->rq_lock);
+                }
+        }
+
+        if (list_empty(&mod->mod_replay_list)) {
+                if (mod->mod_och != NULL)
+                        mod->mod_och->och_mod = NULL;
+
+                OBD_FREE_PTR(mod);
+        }
+}
+
 int mdc_set_open_replay_data(struct obd_export *exp,
                              struct obd_client_handle *och,
                              struct ptlrpc_request *open_req)
 {
-        struct mdc_open_data *mod;
+        struct md_open_data *mod;
         struct mdt_rec_create *rec = lustre_msg_buf(open_req->rq_reqmsg,
                                                     DLM_INTENT_REC_OFF,
                                                     sizeof(*rec));
@@ -703,9 +744,10 @@ int mdc_set_open_replay_data(struct obd_export *exp,
                 OBD_ALLOC(mod, sizeof(*mod));
                 if (mod == NULL) {
                         DEBUG_REQ(D_ERROR, open_req,
-                                  "Can't allocate mdc_open_data");
+                                  "Can't allocate md_open_data");
                         RETURN(0);
                 }
+                CFS_INIT_LIST_HEAD(&mod->mod_replay_list);
 
                 spin_lock(&open_req->rq_lock);
                 if (!open_req->rq_replay) {
@@ -717,8 +759,8 @@ int mdc_set_open_replay_data(struct obd_export *exp,
                 och->och_mod = mod;
                 mod->mod_och = och;
                 open_req->rq_cb_data = mod;
-                mod->mod_open_req = open_req;
-                open_req->rq_commit_cb = mdc_commit_open;
+                list_add_tail(&open_req->rq_mod_list, &mod->mod_replay_list);
+                open_req->rq_commit_cb = mdc_commit_delayed;
                 spin_unlock(&open_req->rq_lock);
         }
 
@@ -739,12 +781,12 @@ int mdc_set_open_replay_data(struct obd_export *exp,
 int mdc_clear_open_replay_data(struct obd_export *exp,
                                struct obd_client_handle *och)
 {
-        struct mdc_open_data *mod = och->och_mod;
+        struct md_open_data *mod = och->och_mod;
         ENTRY;
 
         /*
-         * Don't free the structure now (it happens in mdc_commit_open(), after
-         * we're sure we won't need to fix up the close request in the future),
+         * Don't free the structure now (it happens in mdc_commit_delayed(),
+         * after the last request is removed from its replay list),
          * but make sure that replay doesn't poke at the och, which is about to
          * be freed.
          */
@@ -756,40 +798,8 @@ int mdc_clear_open_replay_data(struct obd_export *exp,
         RETURN(0);
 }
 
-static void mdc_commit_close(struct ptlrpc_request *req)
-{
-        struct mdc_open_data *mod = req->rq_cb_data;
-        struct ptlrpc_request *open_req;
-        struct obd_import *imp = req->rq_import;
-
-        DEBUG_REQ(D_HA, req, "close req committed");
-        if (mod == NULL)
-                return;
-
-        mod->mod_close_req = NULL;
-        req->rq_cb_data = NULL;
-        req->rq_commit_cb = NULL;
-
-        open_req = mod->mod_open_req;
-        LASSERT(open_req != NULL);
-        LASSERT(open_req != LP_POISON);
-        LASSERT(open_req->rq_type != LI_POISON);
-
-        DEBUG_REQ(D_HA, open_req, "open req balanced");
-        LASSERT(open_req->rq_transno != 0);
-        LASSERT(open_req->rq_import == imp);
-
-        /*
-         * We no longer want to preserve this for transno-unconditional
-         * replay. Decref open req here as well.
-         */
-        spin_lock(&open_req->rq_lock);
-        open_req->rq_replay = 0;
-        spin_unlock(&open_req->rq_lock);
-}
-
 int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
-              struct obd_client_handle *och, struct ptlrpc_request **request)
+              struct md_open_data *mod, struct ptlrpc_request **request)
 {
         struct obd_device *obd = class_exp2obd(exp);
         int reqsize[4] = { sizeof(struct ptlrpc_body),
@@ -800,7 +810,6 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
                            obd->u.cli.cl_max_mds_easize,
                            obd->u.cli.cl_max_mds_cookiesize };
         struct ptlrpc_request *req;
-        struct mdc_open_data *mod;
         int rc;
         ENTRY;
 
@@ -818,26 +827,15 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
         req->rq_request_portal = MDS_READPAGE_PORTAL;
 
         /* Ensure that this close's handle is fixed up during replay. */
-        LASSERT(och != NULL);
-        LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
-        mod = och->och_mod;
-        if (likely(mod != NULL)) {
-                if (mod->mod_open_req->rq_type == LI_POISON) {
-                        CERROR("LBUG POISONED open %p!\n", mod->mod_open_req);
-                        LBUG();
-                        ptlrpc_req_finished(req);
-                        req = NULL;
-                        GOTO(out, rc = -EIO);
-                }
-                mod->mod_close_req = req;
-                DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
-        } else {
+        if (likely(mod != NULL))
+                list_add_tail(&req->rq_mod_list, &mod->mod_replay_list);
+        else
                 CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
-        }
 
         mdc_close_pack(req, REQ_REC_OFF, op_data);
         ptlrpc_req_set_repsize(req, 4, repsize);
-        req->rq_commit_cb = mdc_commit_close;
+        req->rq_commit_cb = mdc_commit_delayed;
+        req->rq_replay = 1;
         LASSERT(req->rq_cb_data == NULL);
         req->rq_cb_data = mod;
 
@@ -859,7 +857,7 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
                                 rc = -rc;
                 } else if (mod == NULL) {
                         if (req->rq_import->imp_replayable) 
-                                CERROR("Unexpected: can't find mdc_open_data," 
+                                CERROR("Unexpected: can't find md_open_data," 
                                        "but close succeeded with replayable imp"
                                        "Please tell CFS.\n");
                 }
@@ -881,7 +879,7 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
 }
 
 int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
-                     struct obd_client_handle *och)
+                     struct md_open_data *mod)
 {
         struct obd_device *obd = class_exp2obd(exp);
         struct ptlrpc_request *req;
@@ -900,14 +898,29 @@ int mdc_done_writing(struct obd_export *exp, struct md_op_data *op_data,
         if (req == NULL)
                 RETURN(-ENOMEM);
 
-        /* XXX: add DONE_WRITING request to och -- when Size-on-MDS
-         * recovery will be ready. */
         mdc_close_pack(req, REQ_REC_OFF, op_data);
+        
+        req->rq_replay = 1;
+        req->rq_cb_data = mod;
+        req->rq_commit_cb = mdc_commit_delayed;
+        if (likely(mod != NULL))
+                list_add_tail(&req->rq_mod_list, &mod->mod_replay_list);
+        else
+                CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
 
         ptlrpc_req_set_repsize(req, 2, repsize);
         mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
         rc = ptlrpc_queue_wait(req);
         mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
+
+        /* Close the open replay sequence if an error occured or no SOM
+         * attribute update is needed. */
+        if (rc != -EAGAIN)
+                ptlrpc_close_replay_seq(req);
+                
+        if (rc && rc != -EAGAIN && req->rq_commit_cb)
+                req->rq_commit_cb(req);
+
         ptlrpc_req_finished(req);
         RETURN(rc);
 }
@@ -1271,10 +1284,13 @@ static int mdc_pin(struct obd_export *exp, const struct lu_fid *fid,
 
         OBD_ALLOC(handle->och_mod, sizeof(*handle->och_mod));
         if (handle->och_mod == NULL) {
-                DEBUG_REQ(D_ERROR, req, "can't allocate mdc_open_data");
+                DEBUG_REQ(D_ERROR, req, "can't allocate md_open_data");
                 RETURN(-ENOMEM);
         }
-        handle->och_mod->mod_open_req = req; /* will be dropped by unpin */
+
+        /* will be dropped by unpin */
+        CFS_INIT_LIST_HEAD(&handle->och_mod->mod_replay_list);
+        list_add_tail(&req->rq_mod_list, &handle->och_mod->mod_replay_list);
 
         RETURN(rc);
 }
@@ -1308,7 +1324,14 @@ static int mdc_unpin(struct obd_export *exp,
                 CERROR("unpin failed: %d\n", rc);
 
         ptlrpc_req_finished(req);
-        ptlrpc_req_finished(handle->och_mod->mod_open_req);
+
+        LASSERT(!list_empty(&handle->och_mod->mod_replay_list));
+        req = list_entry(handle->och_mod->mod_replay_list.next,
+                         typeof(*req), rq_mod_list);
+        list_del_init(&req->rq_mod_list);
+        ptlrpc_req_finished(req);
+        LASSERT(list_empty(&handle->och_mod->mod_replay_list));
+
         OBD_FREE(handle->och_mod, sizeof(*handle->och_mod));
         RETURN(rc);
 }
index 6d6f6dc..0497cfd 100644 (file)
@@ -1396,7 +1396,6 @@ static int mdt_reint_internal(struct mdt_thread_info *info,
 {
         struct req_capsule      *pill = &info->mti_pill;
         struct mdt_device       *mdt = info->mti_mdt;
-        struct ptlrpc_request   *req = mdt_info_req(info);
         struct mdt_body         *repbody;
         int                      need_shrink = 0;
         int                      rc;
@@ -1443,21 +1442,12 @@ static int mdt_reint_internal(struct mdt_thread_info *info,
         if (rc != 0)
                 GOTO(out_ucred, rc = err_serious(rc));
 
-        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
-                struct mdt_client_data *mcd;
-
-                mcd = req->rq_export->exp_mdt_data.med_mcd;
-                if (req_xid_is_last(req)) {
-                        need_shrink = 0;
-                        mdt_reconstruct(info, lhc);
-                        rc = lustre_msg_get_status(req->rq_repmsg);
-                        GOTO(out_ucred, rc);
-                }
-                DEBUG_REQ(D_HA, req, "no reply for RESENT (xid "LPD64")",
-                          mcd->mcd_last_xid);
-        }
-
         need_shrink = 0;
+        if (mdt_check_resent(info, mdt_reconstruct, lhc)) {
+                rc = lustre_msg_get_status(mdt_info_req(info)->rq_repmsg);
+                GOTO(out_ucred, rc);
+        }
+        
         rc = mdt_reint_rec(info, lhc);
         EXIT;
 out_ucred:
index 2434c62..084e6d1 100644 (file)
@@ -532,6 +532,8 @@ void mdt_lock_handle_init(struct mdt_lock_handle *lh);
 void mdt_lock_handle_fini(struct mdt_lock_handle *lh);
 
 void mdt_reconstruct(struct mdt_thread_info *, struct mdt_lock_handle *);
+void mdt_reconstruct_generic(struct mdt_thread_info *mti,
+                             struct mdt_lock_handle *lhc);
 
 extern void target_recovery_fini(struct obd_device *obd);
 extern void target_recovery_init(struct obd_device *obd,
@@ -671,6 +673,26 @@ static inline struct mdt_export_data *mdt_req2med(struct ptlrpc_request *req)
         return &req->rq_export->exp_mdt_data;
 }
 
+typedef void (*mdt_reconstruct_t)(struct mdt_thread_info *mti,
+                                  struct mdt_lock_handle *lhc);
+static inline int mdt_check_resent(struct mdt_thread_info *info,
+                                   mdt_reconstruct_t reconstruct,
+                                   struct mdt_lock_handle *lhc)
+{
+        struct ptlrpc_request *req = mdt_info_req(info);
+        ENTRY;
+
+        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+                if (req_xid_is_last(req)) {
+                        reconstruct(info, lhc);
+                        RETURN(1);
+                }
+                DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",
+                          req->rq_export->exp_mdt_data.med_mcd->mcd_last_xid);
+        }
+        RETURN(0);
+}
+
 #define MDT_FAIL_CHECK(id)                                              \
 ({                                                                      \
         if (unlikely(OBD_FAIL_CHECK(id)))                               \
index fe4b6f3..e83eede 100644 (file)
@@ -1150,6 +1150,7 @@ int mdt_close(struct mdt_thread_info *info)
         struct mdt_object      *o;
         struct md_attr         *ma = &info->mti_attr;
         struct mdt_body        *repbody = NULL;
+        struct ptlrpc_request  *req = mdt_info_req(info);
         int rc, ret = 0;
         ENTRY;
 
@@ -1165,6 +1166,9 @@ int mdt_close(struct mdt_thread_info *info)
         req_capsule_set_size(&info->mti_pill, &RMF_LOGCOOKIES, RCL_SERVER,
                              info->mti_mdt->mdt_max_cookiesize);
         rc = req_capsule_pack(&info->mti_pill);
+        if (mdt_check_resent(info, mdt_reconstruct_generic, NULL))
+                RETURN(lustre_msg_get_status(req->rq_repmsg));
+
         /* Continue to close handle even if we can not pack reply */
         if (rc == 0) {
                 repbody = req_capsule_server_get(&info->mti_pill,
@@ -1185,7 +1189,7 @@ int mdt_close(struct mdt_thread_info *info)
         } else
                 rc = err_serious(rc);
 
-        med = &mdt_info_req(info)->rq_export->exp_mdt_data;
+        med = &req->rq_export->exp_mdt_data;
         spin_lock(&med->med_open_lock);
         mfd = mdt_handle2mfd(info, &info->mti_epoch->handle);
         if (mdt_mfd_closed(mfd)) {
@@ -1220,7 +1224,6 @@ int mdt_close(struct mdt_thread_info *info)
 int mdt_done_writing(struct mdt_thread_info *info)
 {
         struct mdt_body         *repbody = NULL;
-        struct ptlrpc_request   *req = mdt_info_req(info);
         struct mdt_export_data  *med;
         struct mdt_file_data    *mfd;
         int rc;
@@ -1240,6 +1243,9 @@ int mdt_done_writing(struct mdt_thread_info *info)
         if (rc)
                 RETURN(err_serious(rc));
 
+        if (mdt_check_resent(info, mdt_reconstruct_generic, NULL))
+                RETURN(lustre_msg_get_status(mdt_info_req(info)->rq_repmsg));
+
         med = &info->mti_exp->exp_mdt_data;
         spin_lock(&med->med_open_lock);
         mfd = mdt_handle2mfd(info, &info->mti_epoch->handle);
@@ -1251,17 +1257,8 @@ int mdt_done_writing(struct mdt_thread_info *info)
                 RETURN(-ESTALE);
         } 
  
-        if (!(mfd->mfd_mode == FMODE_EPOCH ||
-             mfd->mfd_mode == FMODE_EPOCHLCK)) {
-                spin_unlock(&med->med_open_lock);
-                DEBUG_REQ(D_WARNING, req, "req should be resent req");
-                LASSERT(mfd->mfd_mode == FMODE_SOM);
-                LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & 
-                        (MSG_RESENT | MSG_REPLAY));
-                /*Since we did not bond this req with open/close,
-                 *Why we should keep this req as replay req XXX*/
-                GOTO(empty_transno, rc);
-        }
+        LASSERT(mfd->mfd_mode == FMODE_EPOCH ||
+                mfd->mfd_mode == FMODE_EPOCHLCK);
         class_handle_unhash(&mfd->mfd_handle);
         list_del_init(&mfd->mfd_list);
         spin_unlock(&med->med_open_lock);
@@ -1270,7 +1267,6 @@ int mdt_done_writing(struct mdt_thread_info *info)
         info->mti_epoch->flags |= MF_EPOCH_CLOSE;
         info->mti_attr.ma_valid = 0;
         rc = mdt_mfd_close(info, mfd);
-empty_transno:
         mdt_empty_transno(info);
         RETURN(rc);
 }
index 7780a7b..38f8ecb 100644 (file)
@@ -1021,8 +1021,8 @@ void mdt_req_from_mcd(struct ptlrpc_request *req,
         mds_steal_ack_locks(req);
 }
 
-static void mdt_reconstruct_generic(struct mdt_thread_info *mti,
-                                    struct mdt_lock_handle *lhc)
+void mdt_reconstruct_generic(struct mdt_thread_info *mti,
+                             struct mdt_lock_handle *lhc)
 {
         struct ptlrpc_request *req = mdt_info_req(mti);
         struct mdt_export_data *med = &req->rq_export->exp_mdt_data;
@@ -1076,17 +1076,24 @@ static void mdt_reconstruct_setattr(struct mdt_thread_info *mti,
         obj = mdt_object_find(mti->mti_env, mdt, mti->mti_rr.rr_fid1);
         LASSERT(!IS_ERR(obj));
         mo_attr_get(mti->mti_env, mdt_object_child(obj), &mti->mti_attr);
-        mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr, mdt_object_fid(obj));
+        mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr,
+                           mdt_object_fid(obj));
+        if (mti->mti_epoch && (mti->mti_epoch->flags & MF_EPOCH_OPEN)) {
+                struct mdt_file_data *mfd;
+                struct mdt_body *repbody;
+
+                repbody = req_capsule_server_get(&mti->mti_pill, &RMF_MDT_BODY);
+                repbody->ioepoch = obj->mot_ioepoch;
+                spin_lock(&med->med_open_lock);
+                list_for_each_entry(mfd, &med->med_open_head, mfd_list) {
+                        if (mfd->mfd_xid == req->rq_xid)
+                                break;
+                }
+                LASSERT(&mfd->mfd_list != &med->med_open_head);
+                spin_unlock(&med->med_open_lock);
+                repbody->handle.cookie = mfd->mfd_handle.h_cookie;
+        }
 
-        /* Don't return OST-specific attributes if we didn't just set them */
-/*
-        if (rec->ur_iattr.ia_valid & ATTR_SIZE)
-                body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
-        if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
-                body->valid |= OBD_MD_FLMTIME;
-        if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
-                body->valid |= OBD_MD_FLATIME;
-*/
         mdt_object_put(mti->mti_env, obj);
 }
 
@@ -1116,4 +1123,3 @@ void mdt_reconstruct(struct mdt_thread_info *mti,
         reconstructors[mti->mti_rr.rr_opcode](mti, lhc);
         EXIT;
 }
-
index 676df34..eba73a4 100644 (file)
@@ -353,16 +353,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
                                         info->mti_epoch->handle.cookie);
                         GOTO(out_put, rc = -ESTALE);
                 }
-                if (mfd->mfd_mode != FMODE_SOM) {
-                        CWARN("mfd_mode %d not right should be in replay\n", 
-                               mfd->mfd_mode);
-                        LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & 
-                                MSG_REPLAY);
-                        LASSERT(mfd->mfd_mode == FMODE_EPOCH);
-                        mfd->mfd_mode = FMODE_SOM;
-                }
-                
-                LASSERT(ma->ma_attr.la_valid & LA_SIZE);
+                LASSERT(mfd->mfd_mode == FMODE_SOM);
                 LASSERT(!(info->mti_epoch->flags & MF_EPOCH_CLOSE));
 
                 class_handle_unhash(&mfd->mfd_handle);
index 6779362..8f79036 100644 (file)
@@ -393,6 +393,7 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
         spin_lock_init(&request->rq_lock);
         CFS_INIT_LIST_HEAD(&request->rq_list);
         CFS_INIT_LIST_HEAD(&request->rq_replay_list);
+        CFS_INIT_LIST_HEAD(&request->rq_mod_list);
         CFS_INIT_LIST_HEAD(&request->rq_ctx_chain);
         CFS_INIT_LIST_HEAD(&request->rq_set_chain);
         cfs_waitq_init(&request->rq_reply_waitq);
@@ -1264,6 +1265,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
         if (request->rq_import != NULL) {
                 if (!locked)
                         spin_lock(&request->rq_import->imp_lock);
+                list_del_init(&request->rq_mod_list);
                 list_del_init(&request->rq_replay_list);
                 if (!locked)
                         spin_unlock(&request->rq_import->imp_lock);