From bac62a00ccb263233b343a74cb2ffcdf705c631e Mon Sep 17 00:00:00 2001 From: shaver Date: Sat, 2 Nov 2002 09:08:58 +0000 Subject: [PATCH] - Replace client-side generation fixup with bug 304's righteous assertion of generations numbers at recreate-time. Single-client recovery should now be _that_much_ more robust. (Bug 299.) --- lustre/include/linux/lustre_idl.h | 1 + lustre/include/linux/lustre_mds.h | 5 +- lustre/lib/mds_updates.c | 2 + lustre/llite/namei.c | 2 +- lustre/mdc/mdc_reint.c | 2 + lustre/mdc/mdc_request.c | 187 +++----------------------------------- lustre/mds/handler.c | 2 +- lustre/mds/mds_reint.c | 12 ++- 8 files changed, 35 insertions(+), 178 deletions(-) diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 8a99cd1..a8f73c1 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -420,6 +420,7 @@ struct mds_rec_create { __u32 cr_fsgid; __u32 cr_mode; struct ll_fid cr_fid; + struct ll_fid cr_replayfid; __u32 cr_uid; __u32 cr_gid; __u64 cr_time; diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index b6b4551..401682b 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -187,8 +187,9 @@ int mdc_rename(struct lustre_handle *conn, struct ptlrpc_request **); int mdc_create_client(obd_uuid_t uuid, struct ptlrpc_client *cl); -void mdc_store_create_replay_data(struct ptlrpc_request *req, - struct super_block *sb); +/* Store the generation of a newly-created inode in |req| for replay. */ +void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, + int repoff); extern int mds_client_add(struct mds_export_data *med, int cl_off); extern int mds_client_free(struct obd_export *exp); diff --git a/lustre/lib/mds_updates.c b/lustre/lib/mds_updates.c index b436d05..3e6194a 100644 --- a/lustre/lib/mds_updates.c +++ b/lustre/lib/mds_updates.c @@ -145,6 +145,7 @@ void mds_create_pack(struct ptlrpc_request *req, int offset, rec->cr_fsuid = HTON__u32(current->fsuid); rec->cr_fsgid = HTON__u32(current->fsgid); ll_inode2fid(&rec->cr_fid, dir); + memset(&rec->cr_replayfid, 0, sizeof rec->cr_replayfid); rec->cr_mode = HTON__u32(mode); rec->cr_rdev = HTON__u64(rdev); rec->cr_uid = HTON__u32(uid); @@ -331,6 +332,7 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset, r->ur_fsuid = NTOH__u32(rec->cr_fsuid); r->ur_fsgid = NTOH__u32(rec->cr_fsgid); r->ur_fid1 = &rec->cr_fid; + r->ur_fid2 = &rec->cr_replayfid; r->ur_mode = NTOH__u32(rec->cr_mode); r->ur_rdev = NTOH__u64(rec->cr_rdev); r->ur_uid = NTOH__u32(rec->cr_uid); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 4f42bf9..bf9576c 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -236,7 +236,7 @@ int ll_intent_lock(struct inode *parent, struct dentry **de, ino = mds_body->fid1.id; mode = mds_body->mode; if (it->it_op & (IT_CREAT | IT_MKDIR | IT_SYMLINK | IT_MKNOD)) { - mdc_store_create_replay_data(request, parent->i_sb); + mdc_store_inode_generation(request, 2, 1); /* For create ops, we want the lookup to be negative, * unless the create failed in a way that indicates * that the file is already there */ diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 348fc21..51cacf7 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -116,6 +116,8 @@ int mdc_create(struct lustre_handle *conn, goto resend; } + mdc_store_inode_generation(req, 0, 0); + *request = req; RETURN(rc); } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 27c0808..22180dc 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -223,177 +223,14 @@ static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, RETURN(0); } -struct create_replay_data { - struct super_block *sb; - u32 generation; -}; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int create_replay_find_inode(struct inode *inode, unsigned long ino, - void *opaque) -#else -static int create_replay_find_inode(struct inode *inode, void *opaque) -#endif -{ - struct ptlrpc_request *req = opaque; - struct create_replay_data *saved; - struct mds_body *body; - - saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */ - - if (saved->generation != inode->i_generation) { - CDEBUG(D_HA, - "generation mismatch for ino %u: saved %u != inode %u\n", - inode->i_ino, saved->generation, inode->i_generation); - return 0; - } - - body = lustre_msg_buf(req->rq_repmsg, 1); - - /* XXX do I need more out of ll_update_inode? */ - CDEBUG(D_HA, "updating inode %u generation %u to %u\n", - inode->i_ino, inode->i_generation, body->generation); - - inode->i_generation = body->generation; - - return 1; -} - -static void fixup_req_for_recreate(struct ptlrpc_request *fixreq, - struct ptlrpc_request *req, - struct inode *inode) -{ - struct ldlm_request *lockreq; - struct mds_rec_link *rec; /* representative, two-fid op structure */ - int opc; - - if (fixreq->rq_import != req->rq_import) { - DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping"); - return; - } - - DEBUG_REQ(D_HA, fixreq, "fixing"); - - /* XXX check replay_state to see if we'll actually replay. */ - - /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */ - if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) { - lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0); - - if (lockreq->lock_desc.l_resource.lr_type != LDLM_PLAIN && - !(lockreq->lock_flags & LDLM_FL_HAS_INTENT)) { - DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping"); - return; - } - - if (fixreq->rq_reqmsg->bufcount < 2) { - DEBUG_REQ(D_HA, fixreq, - "short intent (probably readdir), skipping"); - return; - } - - /* XXX endianness is probably very very wrong here. Very. */ - rec = lustre_msg_buf(fixreq->rq_reqmsg, 2); - } else if (fixreq->rq_reqmsg->opc == MDS_REINT) { - rec = lustre_msg_buf(fixreq->rq_reqmsg, 0); - } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) { - struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0); - DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u", - body->fid1.generation, inode->i_generation); - body->fid1.generation = inode->i_generation; - return; - } else { - DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping"); - return; - } - - if (rec->lk_fid1.id == inode->i_ino) { - DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u", - rec->lk_fid1.generation, inode->i_generation); - rec->lk_fid1.generation = inode->i_generation; - } - - /* Some ops have two FIDs. ZZZ We rely on the identical - * placement of that second FID in all such ops' messages. - */ - opc = rec->lk_opcode & REINT_OPCODE_MASK; - if ((opc == REINT_LINK || opc == REINT_UNLINK || - opc == REINT_RENAME) && - rec->lk_fid2.id == inode->i_ino) { - DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u", - rec->lk_fid2.generation, inode->i_generation); - rec->lk_fid2.generation = inode->i_generation; - } -} - -static void mdc_replay_create(struct ptlrpc_request *req) +/* This should be called with both the request and the reply still packed. */ +void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, + int repoff) { - struct create_replay_data *saved; - struct mds_body *body; - struct inode *inode; - struct list_head *tmp; - - if (req->rq_reqmsg->opc == MDS_REINT) - LBUG(); /* XXX don't handle the non-intent case yet */ - - body = lustre_msg_buf(req->rq_repmsg, 1); - saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */ - - CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n", - body->fid1.id, saved->generation, body->generation); - /* XXX cargo-culted right out of ll_iget */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req); -#endif -#if 0 - { - extern int ll_read_inode2(struct inode *inode, void *opaque); - inode = iget5_locked(saved->sb, body->fid1.id, - create_replay_find_inode, - ll_read_inode2, req); - - if (!inode) - LBUG(); /* XXX ick */ - - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - } -#endif - - /* Now that we've updated the generation, we need to go and find all - * the other requests that refer to this file and will be replayed, - * and teach them about our new generation. - */ - list_for_each(tmp, &req->rq_connection->c_sending_head) { - struct ptlrpc_request *fixreq = - list_entry(tmp, struct ptlrpc_request, rq_list); - - fixup_req_for_recreate(fixreq, req, inode); - } - - list_for_each(tmp, &req->rq_connection->c_delayed_head) { - struct ptlrpc_request *fixreq = - list_entry(tmp, struct ptlrpc_request, rq_list); + struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff); + struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff); - fixup_req_for_recreate(fixreq, req, inode); - } -} - -void mdc_store_create_replay_data(struct ptlrpc_request *req, - struct super_block *sb) -{ - struct create_replay_data *saved = - lustre_msg_buf(req->rq_reqmsg, 5); - struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1); - - - if (req->rq_reqmsg->opc == MDS_REINT) - LBUG(); /* XXX don't handle the non-intent case yet */ - - saved->generation = body->generation; - saved->sb = sb; /* XXX is this safe? */ - - req->rq_replay_cb = mdc_replay_create; + memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid); } int mdc_enqueue(struct lustre_handle *conn, int lock_type, @@ -435,8 +272,7 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type, size[2] = sizeof(struct mds_rec_create); size[3] = de->d_name.len + 1; size[4] = tgtlen + 1; - size[5] = sizeof(struct create_replay_data); - req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6, + req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5, size, NULL); if (!req) RETURN(-ENOMEM); @@ -542,6 +378,13 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type, lock_type, NULL, 0, lock_mode, &flags, ldlm_completion_ast, mdc_blocking_ast, data, datalen, lockh); + + if (it->it_op != IT_READDIR) { + /* XXX This should become a lustre_msg flag, but for now... */ + __u32 *opp = lustre_msg_buf(req->rq_reqmsg, 2); + *opp |= REINT_REPLAYING; + } + if (rc == -ENOENT) { /* This can go when we're sure that this can never happen */ LBUG(); @@ -834,7 +677,7 @@ EXPORT_SYMBOL(mdc_setattr); EXPORT_SYMBOL(mdc_close); EXPORT_SYMBOL(mdc_open); -EXPORT_SYMBOL(mdc_store_create_replay_data); +EXPORT_SYMBOL(mdc_store_inode_generation); module_init(ptlrpc_request_init); module_exit(ptlrpc_request_exit); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 7b6da6b..80b850a 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1394,7 +1394,7 @@ static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie, LBUG(); break; default: - CERROR("Unhandled intent\n"); + CERROR("Unhandled intent "LPD64"\n", it->opc); LBUG(); } diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 97a1900..8c0a226 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -300,8 +300,6 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, struct inode *inode = dchild->d_inode; struct mds_body *body; - CDEBUG(D_INODE, "created ino %ld\n", dchild->d_inode->i_ino); - iattr.ia_atime = rec->ur_time; iattr.ia_ctime = rec->ur_time; iattr.ia_mtime = rec->ur_time; @@ -310,6 +308,16 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | ATTR_MTIME | ATTR_CTIME; + if (rec->ur_fid2->id) { + LASSERT(rec->ur_opcode & REINT_REPLAYING); + inode->i_generation = rec->ur_fid2->generation; + /* Dirtied and committed by this setattr: */ + CDEBUG(D_INODE, "recreated ino %ld with gen %ld\n", + inode->i_ino, inode->i_generation); + } else { + CDEBUG(D_INODE, "created ino %ld\n", inode->i_ino); + } + rc = mds_fs_setattr(mds, dchild, handle, &iattr); if (rc) { CERROR("error on setattr: rc = %d\n", rc); -- 1.8.3.1