From db2b3c41e09cd20a31e80a1f8fb5c4cec2590f99 Mon Sep 17 00:00:00 2001 From: johann Date: Fri, 6 Feb 2009 22:40:06 +0000 Subject: [PATCH] Branch b_release_1_6_7 b=18374 i=johann (patch from Tappro) i=shadow Return only valid cookies, empty slot for failed ones. This fixes a memory corruption causing random oops. --- lustre/include/obd_support.h | 1 + lustre/lov/lov_log.c | 21 +++++++++++++++++---- lustre/tests/replay-single.sh | 21 +++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 3362d31..e446d23 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -168,6 +168,7 @@ extern unsigned int obd_alloc_fail_rate; #define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b #define OBD_FAIL_MDS_BLOCK_QUOTA_REQ 0x13c #define OBD_FAIL_MDS_DROP_QUOTA_REQ 0x13d +#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 59d2e9d..d4964b0 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -74,7 +74,7 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; - int i, rc = 0; + int i, rc = 0, cookies = 0; ENTRY; LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count, @@ -105,12 +105,25 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, break; } - rc += llog_add(cctxt, rec, NULL, logcookies + rc, - numcookies - rc); + /* inject error in llog_add() below */ + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FAIL_LOV_LOG_ADD)) { + llog_ctxt_put(cctxt); + cctxt = NULL; + } + rc = llog_add(cctxt, rec, NULL, logcookies + cookies, + numcookies - cookies); llog_ctxt_put(cctxt); + if (rc < 0) { + CERROR("Can't add llog (rc = %d) for stripe %i\n", + rc, cookies); + memset(logcookies + cookies, 0, sizeof(struct llog_cookie)); + rc = 1; /* skip this cookie */ + } + /* Note that rc is always 1 if llog_add was successful */ + cookies += rc; } - RETURN(rc); + RETURN(cookies); } static int lov_llog_origin_connect(struct llog_ctxt *ctxt, diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index f0e6a60..bd7ea8f 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1733,6 +1733,27 @@ test_70b () { run_test 70b "mds recovery; $CLIENTCOUNT clients" # end multi-client tests +test_81a() { + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/$tfile- 10 || return 1 +#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 + do_facet mds "lctl set_param fail_loc=0x80000140" + unlinkmany $DIR/$tdir/$tfile- 10 || return 2 +} +run_test 81a "fail log_add during unlink recovery" + +test_81b() { + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/$tfile- 10 || return 1 + replay_barrier mds + unlinkmany $DIR/$tdir/$tfile- 10 || return 2 +#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 + do_facet mds "lctl set_param fail_loc=0x80000140" + fail mds +} +run_test 81b "fail log_add during unlink recovery" + + equals_msg `basename $0`: test complete, cleaning up check_and_cleanup_lustre [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true -- 1.8.3.1