From f77b8242413f1a2afa7ab816e7087e97efe75567 Mon Sep 17 00:00:00 2001 From: johann Date: Mon, 9 Feb 2009 14:59:19 +0000 Subject: [PATCH] Branch b_release_1_8_0 i=johann (patch from Tappro) i=shadow Return only valid cookies, empty slot for failed ones. This fixes a memory corruption causing random oops. --- lustre/include/obd_support.h | 1 + lustre/lov/lov_log.c | 21 +++++++++++++++++---- lustre/tests/replay-single.sh | 21 +++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 67a0607..449a943 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -177,6 +177,7 @@ extern unsigned int obd_alloc_fail_rate; #define OBD_FAIL_MDS_DROP_QUOTA_REQ 0x13d #define OBD_FAIL_MDS_REMOVE_COMMON_EA 0x13e #define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING 0x13f +#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 8a5c39e..2271253 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -74,7 +74,7 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; - int i, rc = 0; + int i, rc = 0, cookies = 0; ENTRY; LASSERTF(logcookies && numcookies >= lsm->lsm_stripe_count, @@ -111,12 +111,25 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, break; } - rc += llog_add(cctxt, rec, NULL, logcookies + rc, - numcookies - rc); + /* inject error in llog_add() below */ + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FAIL_LOV_LOG_ADD)) { + llog_ctxt_put(cctxt); + cctxt = NULL; + } + rc = llog_add(cctxt, rec, NULL, logcookies + cookies, + numcookies - cookies); llog_ctxt_put(cctxt); + if (rc < 0) { + CERROR("Can't add llog (rc = %d) for stripe %i\n", + rc, cookies); + memset(logcookies + cookies, 0, sizeof(struct llog_cookie)); + rc = 1; /* skip this cookie */ + } + /* Note that rc is always 1 if llog_add was successful */ + cookies += rc; } - RETURN(rc); + RETURN(cookies); } static int lov_llog_origin_connect(struct llog_ctxt *ctxt, diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 0925803..b6d9d8b 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -1948,6 +1948,27 @@ test_80b() { } run_test 80b "write replay with changed data (checksum resend)" +test_81a() { + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/$tfile- 10 || return 1 +#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 + do_facet mds "lctl set_param fail_loc=0x80000140" + unlinkmany $DIR/$tdir/$tfile- 10 || return 2 +} +run_test 81a "fail log_add during unlink recovery" + +test_81b() { + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/$tfile- 10 || return 1 + replay_barrier mds + unlinkmany $DIR/$tdir/$tfile- 10 || return 2 +#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 + do_facet mds "lctl set_param fail_loc=0x80000140" + fail mds +} +run_test 81b "fail log_add during unlink recovery" + + equals_msg `basename $0`: test complete, cleaning up check_and_cleanup_lustre [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true -- 1.8.3.1