Whamcloud - gitweb
- Replace client-side generation fixup with bug 304's righteous assertion of
authorshaver <shaver>
Sat, 2 Nov 2002 09:08:58 +0000 (09:08 +0000)
committershaver <shaver>
Sat, 2 Nov 2002 09:08:58 +0000 (09:08 +0000)
  generations numbers at recreate-time.  Single-client recovery should now be
  _that_much_ more robust. (Bug 299.)

lustre/include/linux/lustre_idl.h
lustre/include/linux/lustre_mds.h
lustre/lib/mds_updates.c
lustre/llite/namei.c
lustre/mdc/mdc_reint.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/mds_reint.c

index 8a99cd1..a8f73c1 100644 (file)
@@ -420,6 +420,7 @@ struct mds_rec_create {
         __u32           cr_fsgid;
         __u32           cr_mode;
         struct ll_fid   cr_fid;
+        struct ll_fid   cr_replayfid;
         __u32           cr_uid;
         __u32           cr_gid;
         __u64           cr_time;
index b6b4551..401682b 100644 (file)
@@ -187,8 +187,9 @@ int mdc_rename(struct lustre_handle *conn,
                struct ptlrpc_request **);
 int mdc_create_client(obd_uuid_t uuid, struct ptlrpc_client *cl);
 
-void mdc_store_create_replay_data(struct ptlrpc_request *req,
-                                  struct super_block *sb);
+/* Store the generation of a newly-created inode in |req| for replay. */
+void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
+                                int repoff);
 
 extern int mds_client_add(struct mds_export_data *med, int cl_off);
 extern int mds_client_free(struct obd_export *exp);
index b436d05..3e6194a 100644 (file)
@@ -145,6 +145,7 @@ void mds_create_pack(struct ptlrpc_request *req, int offset,
         rec->cr_fsuid = HTON__u32(current->fsuid);
         rec->cr_fsgid = HTON__u32(current->fsgid);
         ll_inode2fid(&rec->cr_fid, dir);
+        memset(&rec->cr_replayfid, 0, sizeof rec->cr_replayfid);
         rec->cr_mode = HTON__u32(mode);
         rec->cr_rdev = HTON__u64(rdev);
         rec->cr_uid = HTON__u32(uid);
@@ -331,6 +332,7 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset,
         r->ur_fsuid = NTOH__u32(rec->cr_fsuid);
         r->ur_fsgid = NTOH__u32(rec->cr_fsgid);
         r->ur_fid1 = &rec->cr_fid;
+        r->ur_fid2 = &rec->cr_replayfid;
         r->ur_mode = NTOH__u32(rec->cr_mode);
         r->ur_rdev = NTOH__u64(rec->cr_rdev);
         r->ur_uid = NTOH__u32(rec->cr_uid);
index 4f42bf9..bf9576c 100644 (file)
@@ -236,7 +236,7 @@ int ll_intent_lock(struct inode *parent, struct dentry **de,
                 ino = mds_body->fid1.id;
                 mode = mds_body->mode;
                 if (it->it_op & (IT_CREAT | IT_MKDIR | IT_SYMLINK | IT_MKNOD)) {
-                        mdc_store_create_replay_data(request, parent->i_sb);
+                        mdc_store_inode_generation(request, 2, 1);
                         /* For create ops, we want the lookup to be negative,
                          * unless the create failed in a way that indicates
                          * that the file is already there */
index 348fc21..51cacf7 100644 (file)
@@ -116,6 +116,8 @@ int mdc_create(struct lustre_handle *conn,
                 goto resend;
         }
 
+        mdc_store_inode_generation(req, 0, 0);
+
         *request = req;
         RETURN(rc);
 }
index 27c0808..22180dc 100644 (file)
@@ -223,177 +223,14 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
         RETURN(0);
 }
 
-struct create_replay_data {
-        struct super_block *sb;
-        u32                 generation;
-};
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int create_replay_find_inode(struct inode *inode, unsigned long ino,
-                                    void *opaque)
-#else
-static int create_replay_find_inode(struct inode *inode, void *opaque)
-#endif
-{
-        struct ptlrpc_request *req = opaque;
-        struct create_replay_data *saved;
-        struct mds_body *body;
-        
-        saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
-        
-        if (saved->generation != inode->i_generation) {
-                CDEBUG(D_HA,
-                       "generation mismatch for ino %u: saved %u != inode %u\n",
-                       inode->i_ino, saved->generation, inode->i_generation);
-                return 0;
-        }
-
-        body = lustre_msg_buf(req->rq_repmsg, 1);
-
-        /* XXX do I need more out of ll_update_inode? */
-        CDEBUG(D_HA, "updating inode %u generation %u to %u\n",
-               inode->i_ino, inode->i_generation, body->generation);
-
-        inode->i_generation = body->generation;
-
-        return 1;
-}
-
-static void fixup_req_for_recreate(struct ptlrpc_request *fixreq,
-                                   struct ptlrpc_request *req,
-                                   struct inode *inode)
-{
-        struct ldlm_request *lockreq; 
-        struct mds_rec_link *rec; /* representative, two-fid op structure */
-        int opc;
-
-        if (fixreq->rq_import != req->rq_import) {
-                DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping");
-                return;
-        }
-
-        DEBUG_REQ(D_HA, fixreq, "fixing");
-        
-        /* XXX check replay_state to see if we'll actually replay. */
-
-        /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */
-        if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) {
-                lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0);
-
-                if (lockreq->lock_desc.l_resource.lr_type != LDLM_PLAIN &&
-                    !(lockreq->lock_flags & LDLM_FL_HAS_INTENT)) {
-                        DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping");
-                        return;
-                }
-
-                if (fixreq->rq_reqmsg->bufcount < 2) {
-                        DEBUG_REQ(D_HA, fixreq,
-                                  "short intent (probably readdir), skipping");
-                        return;
-                }
-
-                /* XXX endianness is probably very very wrong here. Very. */
-                rec = lustre_msg_buf(fixreq->rq_reqmsg, 2);
-        } else if (fixreq->rq_reqmsg->opc == MDS_REINT) {
-                rec = lustre_msg_buf(fixreq->rq_reqmsg, 0);
-        } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) {
-                struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0);
-                DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
-                          body->fid1.generation, inode->i_generation);
-                body->fid1.generation = inode->i_generation;
-                return;
-        } else {
-                DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping");
-                return;
-        }
-        
-        if (rec->lk_fid1.id == inode->i_ino) {
-                DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
-                          rec->lk_fid1.generation, inode->i_generation);
-                rec->lk_fid1.generation = inode->i_generation;
-        }
-        
-        /* Some ops have two FIDs. ZZZ We rely on the identical
-         * placement of that second FID in all such ops' messages.
-         */
-        opc = rec->lk_opcode & REINT_OPCODE_MASK;
-        if ((opc == REINT_LINK || opc == REINT_UNLINK ||
-             opc == REINT_RENAME) &&
-            rec->lk_fid2.id == inode->i_ino) {
-                DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u",
-                          rec->lk_fid2.generation, inode->i_generation);
-                rec->lk_fid2.generation = inode->i_generation;
-        }
-}
-
-static void mdc_replay_create(struct ptlrpc_request *req)
+/* This should be called with both the request and the reply still packed. */
+void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
+                                int repoff)
 {
-        struct create_replay_data *saved;
-        struct mds_body *body;
-        struct inode *inode;
-        struct list_head *tmp;
-
-        if (req->rq_reqmsg->opc == MDS_REINT)
-                LBUG(); /* XXX don't handle the non-intent case yet */
-
-        body = lustre_msg_buf(req->rq_repmsg, 1);
-        saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
-
-        CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n",
-               body->fid1.id, saved->generation, body->generation);
-        /* XXX cargo-culted right out of ll_iget */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req);
-#endif
-#if 0
-        {
-                extern int ll_read_inode2(struct inode *inode, void *opaque);
-                inode = iget5_locked(saved->sb, body->fid1.id,
-                                     create_replay_find_inode, 
-                                     ll_read_inode2, req);
-
-                if (!inode)
-                        LBUG(); /* XXX ick */
-                
-                if (inode->i_state & I_NEW)
-                        unlock_new_inode(inode);
-        }
-#endif
-
-        /* Now that we've updated the generation, we need to go and find all
-         * the other requests that refer to this file and will be replayed,
-         * and teach them about our new generation.
-         */
-        list_for_each(tmp, &req->rq_connection->c_sending_head) {
-                struct ptlrpc_request *fixreq =
-                        list_entry(tmp, struct ptlrpc_request, rq_list);
-
-                fixup_req_for_recreate(fixreq, req, inode);
-        }
-
-        list_for_each(tmp, &req->rq_connection->c_delayed_head) {
-                struct ptlrpc_request *fixreq =
-                        list_entry(tmp, struct ptlrpc_request, rq_list);
+        struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
+        struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
 
-                fixup_req_for_recreate(fixreq, req, inode);
-        }
-}
-
-void mdc_store_create_replay_data(struct ptlrpc_request *req,
-                                  struct super_block *sb)
-{
-        struct create_replay_data *saved = 
-                lustre_msg_buf(req->rq_reqmsg, 5);
-        struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
-
-
-        if (req->rq_reqmsg->opc == MDS_REINT)
-                LBUG(); /* XXX don't handle the non-intent case yet */
-
-        saved->generation = body->generation;
-        saved->sb = sb; /* XXX is this safe? */
-
-        req->rq_replay_cb = mdc_replay_create;
+        memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
 }
 
 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
@@ -435,8 +272,7 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                 size[2] = sizeof(struct mds_rec_create);
                 size[3] = de->d_name.len + 1;
                 size[4] = tgtlen + 1;
-                size[5] = sizeof(struct create_replay_data);
-                req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6,
+                req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
                                       size, NULL);
                 if (!req)
                         RETURN(-ENOMEM);
@@ -542,6 +378,13 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type,
                               lock_type, NULL, 0, lock_mode, &flags,
                               ldlm_completion_ast, mdc_blocking_ast, data,
                               datalen, lockh);
+
+        if (it->it_op != IT_READDIR) {
+                /* XXX This should become a lustre_msg flag, but for now... */
+                __u32 *opp = lustre_msg_buf(req->rq_reqmsg, 2);
+                *opp |= REINT_REPLAYING;
+        }
+
         if (rc == -ENOENT) {
                 /* This can go when we're sure that this can never happen */
                 LBUG();
@@ -834,7 +677,7 @@ EXPORT_SYMBOL(mdc_setattr);
 EXPORT_SYMBOL(mdc_close);
 EXPORT_SYMBOL(mdc_open);
 
-EXPORT_SYMBOL(mdc_store_create_replay_data);
+EXPORT_SYMBOL(mdc_store_inode_generation);
 
 module_init(ptlrpc_request_init);
 module_exit(ptlrpc_request_exit);
index 7b6da6b..80b850a 100644 (file)
@@ -1394,7 +1394,7 @@ static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie,
                         LBUG();
                         break;
                 default:
-                        CERROR("Unhandled intent\n");
+                        CERROR("Unhandled intent "LPD64"\n", it->opc);
                         LBUG();
                 }
 
index 97a1900..8c0a226 100644 (file)
@@ -300,8 +300,6 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 struct inode *inode = dchild->d_inode;
                 struct mds_body *body;
 
-                CDEBUG(D_INODE, "created ino %ld\n", dchild->d_inode->i_ino);
-
                 iattr.ia_atime = rec->ur_time;
                 iattr.ia_ctime = rec->ur_time;
                 iattr.ia_mtime = rec->ur_time;
@@ -310,6 +308,16 @@ static int mds_reint_create(struct mds_update_record *rec, int offset,
                 iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME |
                         ATTR_MTIME | ATTR_CTIME;
 
+                if (rec->ur_fid2->id) {
+                        LASSERT(rec->ur_opcode & REINT_REPLAYING);
+                        inode->i_generation = rec->ur_fid2->generation;
+                        /* Dirtied and committed by this setattr: */
+                        CDEBUG(D_INODE, "recreated ino %ld with gen %ld\n",
+                               inode->i_ino, inode->i_generation);
+                } else {
+                        CDEBUG(D_INODE, "created ino %ld\n", inode->i_ino);
+                }
+
                 rc = mds_fs_setattr(mds, dchild, handle, &iattr);
                 if (rc) {
                         CERROR("error on setattr: rc = %d\n", rc);