From: tappro Date: Wed, 22 Apr 2009 09:52:00 +0000 (+0000) Subject: - update versions for partially committed open request X-Git-Tag: v1_8_0_110~84 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=a6687b41436ba32c79f7a1ce450b6887213740ff;p=fs%2Flustre-release.git - update versions for partially committed open request b:19224 i:adilger,rread --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index c6159a7..343fb3a 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -185,6 +185,7 @@ extern unsigned int obd_alloc_fail_rate; #define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING 0x13f #define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 #define OBD_FAIL_MDS_LOV_PREP_CREATE 0x141 +#define OBD_FAIL_MDS_SPLIT_OPEN 0x142 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index f7e98e8..43c1793 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -814,6 +814,8 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, } if (!(body->valid & OBD_MD_FLEASIZE) && !(body->valid & OBD_MD_FLMODEASIZE)) { + /* split open transactions here */ + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_SPLIT_OPEN, 10); /* no EA: create objects */ rc = mds_create_objects(req, DLM_REPLY_REC_OFF + 1, rec, mds, obd, dchild, handle, &lmm); @@ -874,10 +876,11 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, { struct obd_device *obd = req->rq_export->exp_obd; struct mds_obd *mds = mds_req2mds(req); - struct dentry *dchild; + struct dentry *dchild, *dparent = NULL; char fidname[LL_FID_NAMELEN]; int fidlen = 0, rc; void *handle = NULL; + struct inode *inodes[PTLRPC_NUM_VERSIONS] = { NULL }; ENTRY; fidlen = ll_fid2str(fidname, fid->id, fid->generation); @@ -893,6 +896,7 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, CWARN("Orphan %s found and opened in PENDING directory\n", fidname); } else { + __u64 *pre_versions = lustre_msg_get_versions(req->rq_reqmsg); l_dput(dchild); /* We didn't find it in PENDING so it isn't an orphan. See @@ -900,6 +904,33 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, dchild = mds_fid2dentry(mds, fid, NULL); if (IS_ERR(dchild)) RETURN(PTR_ERR(dchild)); + /** + * bug19224 + * this can be replay of partially committed open|create, + * the create itself was committed while LOV EA weren't + * We need to set versions again if conditions are: + * - this is replay + * - the transaction is greater than last_committed + * - this was open|create + * - there was real create so parent pre_version was saved + */ + if ((lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && + (lustre_msg_get_transno(req->rq_reqmsg) > + req->rq_export->exp_last_committed) && + (rec->ur_flags & MDS_OPEN_CREAT) && + pre_versions[0] != 0) { + /* need parent to set version */ + dparent = mds_fid2dentry(mds, rec->ur_fid1, NULL); + if (IS_ERR(dparent)) { + CERROR("Can't find parent for open replay\n"); + l_dput(dchild); + RETURN(PTR_ERR(dparent)); + } + /* though file was created, the versions were not + * changed yet, need to replay that too */ + inodes[0] = dparent->d_inode; + inodes[1] = dchild->d_inode; + } } mds_pack_inode2body(body, dchild->d_inode); @@ -907,10 +938,11 @@ static int mds_open_by_fid(struct ptlrpc_request *req, struct ll_fid *fid, ldlm_reply_set_disposition(rep, DISP_LOOKUP_POS); rc = mds_finish_open(req, dchild, body, flags, &handle, rec, rep, NULL); - rc = mds_finish_transno(mds, NULL, handle, + rc = mds_finish_transno(mds, inodes, handle, req, rc, rep ? rep->lock_policy_res1 : 0, 0); /* XXX what do we do here if mds_finish_transno itself failed? */ + l_dput(dparent); l_dput(dchild); RETURN(rc); } diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index ed9a913..75a3623 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -799,7 +799,6 @@ test_40(){ } run_test 40 "cause recovery in ptlrpc, ensure IO continues" - #b=2814 # make sure that a read to one osc doesn't try to double-unlock its page just # because another osc is invalid. trigger_group_io used to mistakenly return @@ -2046,6 +2045,25 @@ test_82() { #bug 18927 } run_test 82 "second open|creat in replay with open orphan" +test_83() { #bug 19224 +#define OBD_FAIL_MDS_SPLIT_OPEN 0x142 + do_facet mds "lctl set_param fail_loc=0x80000142" + # open will sleep after first transaction + touch $DIR/$tfile & + PID=$! + sleep 2 + # set barrier between open transactions + replay_barrier_nodf mds + createmany -o $DIR/$tfile- 10 + # open should finish now + wait $PID || return 1 + fail mds + rm $DIR/$tfile || return 2 + unlinkmany $DIR/$tfile- 10 || return 3 + return 0 +} +run_test 83 "open replay with barrier between transactions" + equals_msg `basename $0`: test complete, cleaning up check_and_cleanup_lustre [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true