From 3402e6c6517cc15f159b19bf4462279285cd83f2 Mon Sep 17 00:00:00 2001 From: Wang Di Date: Sat, 15 Sep 2012 07:34:15 -0700 Subject: [PATCH] LU-1571 mdt: Do not update xid for open replay req Do not update last_xid for open replay req, otherwise the following resend(after replay) can not be matched with correct xid. Remove unnecessary mti_transo zero check in mdt_empty_transno. Signed-off-by: wang di Change-Id: I2a05f3ac05b301ae31641a1dc51f8c4eed96427d Reviewed-on: http://review.whamcloud.com/3195 Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Alex Zhuravlev --- lustre/mdt/mdt_open.c | 39 ++++++++++++++++++++++++++++++++------- lustre/mdt/mdt_recovery.c | 5 +++-- lustre/tests/recovery-small.sh | 23 +++++++++++++++++++++++ 3 files changed, 58 insertions(+), 9 deletions(-) diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index bdee514..d1fe097 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -577,13 +577,24 @@ static void mdt_empty_transno(struct mdt_thread_info *info, int rc) RETURN_EXIT; cfs_spin_lock(&mdt->mdt_lut.lut_translock); - if (info->mti_transno == 0) { + if (rc != 0) { + if (info->mti_transno != 0) { + struct obd_export *exp = req->rq_export; + + CERROR("%s: replay trans "LPU64" NID %s: rc = %d\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name, + info->mti_transno, + libcfs_nid2str(exp->exp_connection->c_peer.nid), + rc); + RETURN_EXIT; + } + } else if (info->mti_transno == 0) { info->mti_transno = ++ mdt->mdt_lut.lut_last_transno; } else { /* should be replay */ if (info->mti_transno > mdt->mdt_lut.lut_last_transno) mdt->mdt_lut.lut_last_transno = info->mti_transno; - } + } cfs_spin_unlock(&mdt->mdt_lut.lut_translock); CDEBUG(D_INODE, "transno = "LPU64", last_committed = "LPU64"\n", @@ -598,10 +609,23 @@ static void mdt_empty_transno(struct mdt_thread_info *info, int rc) LASSERT(ted); cfs_mutex_lock(&ted->ted_lcd_lock); lcd = ted->ted_lcd; + if (info->mti_transno < lcd->lcd_last_transno && + info->mti_transno != 0) { + /* This should happen during replay. Do not update + * last rcvd info if replay req transno < last transno, + * otherwise the following resend(after replay) can not + * be checked correctly by xid */ + cfs_mutex_unlock(&ted->ted_lcd_lock); + CDEBUG(D_HA, "%s: transno = "LPU64" < last_transno = "LPU64"\n", + mdt->mdt_md_dev.md_lu_dev.ld_obd->obd_name, + info->mti_transno, lcd->lcd_last_transno); + RETURN_EXIT; + } + if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE || lustre_msg_get_opc(req->rq_reqmsg) == MDS_DONE_WRITING) { - if (info->mti_transno != 0) - lcd->lcd_last_close_transno = info->mti_transno; + if (info->mti_transno != 0) + lcd->lcd_last_close_transno = info->mti_transno; lcd->lcd_last_close_xid = req->rq_xid; lcd->lcd_last_close_result = rc; } else { @@ -613,9 +637,10 @@ static void mdt_empty_transno(struct mdt_thread_info *info, int rc) lcd->lcd_pre_versions[2] = pre_versions[2]; lcd->lcd_pre_versions[3] = pre_versions[3]; } - if (info->mti_transno != 0) - lcd->lcd_last_transno = info->mti_transno; - lcd->lcd_last_xid = req->rq_xid; + if (info->mti_transno != 0) + lcd->lcd_last_transno = info->mti_transno; + + lcd->lcd_last_xid = req->rq_xid; lcd->lcd_last_result = rc; lcd->lcd_last_data = info->mti_opdata; } diff --git a/lustre/mdt/mdt_recovery.c b/lustre/mdt/mdt_recovery.c index 98bae6f..30e8ece 100644 --- a/lustre/mdt/mdt_recovery.c +++ b/lustre/mdt/mdt_recovery.c @@ -515,8 +515,9 @@ static int mdt_txn_stop_cb(const struct lu_env *env, cfs_spin_lock(&mdt->mdt_lut.lut_translock); if (txn->th_result != 0) { if (mti->mti_transno != 0) { - CERROR("Replay transno "LPU64" failed: rc %d\n", - mti->mti_transno, txn->th_result); + CERROR("Replay transno "LPU64" failed: rc %d\n", + mti->mti_transno, txn->th_result); + return 0; } } else if (mti->mti_transno == 0) { mti->mti_transno = ++ mdt->mdt_lut.lut_last_transno; diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index aba5353..5c19142 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1542,6 +1542,29 @@ test_106() { # LU-1789 } run_test 106 "lightweight connection support" +test_107 () { + local CLIENT_PID + local close_pid + + mkdir -p $DIR/$tdir + # OBD_FAIL_MDS_REINT_NET_REP 0x119 + do_facet $SINGLEMDS lctl set_param fail_loc=0x119 + multiop $DIR/$tdir D_c & + close_pid=$! + mkdir $DIR/$tdir/dir_106 & + CLIENT_PID=$! + do_facet $SINGLEMDS lctl set_param fail_loc=0 + fail $SINGLEMDS + + wait $CLIENT_PID || rc=$? + checkstat -t dir $DIR/$tdir/dir_106 || return 1 + + kill -USR1 $close_pid + wait $close_pid || return 2 + + return $rc +} +run_test 107 "drop reint reply, then restart MDT" complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1