From a5b9cdae6f45ed12f6560a641f2eab207c36a69f Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Fri, 14 Feb 2014 23:07:58 +0400 Subject: [PATCH] LU-4290 llog: discard unavailable records and keep going if llog can't process some records due to I/O errors or corruption, just discard them from the header and keep going. a new test added to verity this behavior. Signed-off-by: Alex Zhuravlev Change-Id: Id0dc83ae6239cd55a43eec128b3c750bb9f0894a Reviewed-on: http://review.whamcloud.com/9281 Tested-by: Jenkins Reviewed-by: Andreas Dilger Reviewed-by: Mike Pershin Tested-by: Maloo --- lustre/obdclass/llog.c | 14 +++ lustre/obdclass/llog_test.c | 204 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index e77eb63..7ff0dd7 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -400,6 +400,20 @@ out: if (cd != NULL) cd->lpcd_last_idx = last_called_index; + if (unlikely(rc == -EIO)) { + /* something bad happened to the processing, probably I/O + * error or the log got corrupted.. + * to be able to finally release the log we discard any + * remaining bits in the header */ + CERROR("llog found corrupted\n"); + while (index <= last_index) { + if (ext2_test_bit(index, llh->llh_bitmap) != 0) + llog_cancel_rec(lpi->lpi_env, loghandle, index); + index++; + } + rc = 0; + } + OBD_FREE(buf, LLOG_CHUNK_SIZE); lpi->lpi_rc = rc; return 0; diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c index e0362ab..3a91918 100644 --- a/lustre/obdclass/llog_test.c +++ b/lustre/obdclass/llog_test.c @@ -898,6 +898,206 @@ out: RETURN(rc); } +static int llog_truncate(const struct lu_env *env, struct dt_object *o) +{ + struct lu_attr la; + struct thandle *th; + struct dt_device *d; + int rc; + ENTRY; + + LASSERT(o); + d = lu2dt_dev(o->do_lu.lo_dev); + LASSERT(d); + + rc = dt_attr_get(env, o, &la, NULL); + if (rc) + RETURN(rc); + + CDEBUG(D_OTHER, "original size %Lu\n", la.la_size); + rc = sizeof(struct llog_log_hdr) + sizeof(struct llog_mini_rec); + if (la.la_size < rc) { + CERROR("too small llog: %Lu\n", la.la_size); + RETURN(0); + } + + /* drop 2 records */ + la.la_size = la.la_size - (sizeof(struct llog_mini_rec) * 2); + la.la_valid = LA_SIZE; + + th = dt_trans_create(env, d); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + rc = dt_declare_attr_set(env, o, &la, th); + if (rc) + GOTO(stop, rc); + + rc = dt_declare_punch(env, o, la.la_size, OBD_OBJECT_EOF, th); + + rc = dt_trans_start_local(env, d, th); + if (rc) + GOTO(stop, rc); + + rc = dt_punch(env, o, la.la_size, OBD_OBJECT_EOF, th, BYPASS_CAPA); + if (rc) + GOTO(stop, rc); + + rc = dt_attr_set(env, o, &la, th, BYPASS_CAPA); + if (rc) + GOTO(stop, rc); + +stop: + dt_trans_stop(env, d, th); + + RETURN(rc); +} + +static int test_8_cb(const struct lu_env *env, struct llog_handle *llh, + struct llog_rec_hdr *rec, void *data) +{ + plain_counter++; + return 0; +} + +static int llog_test_8(const struct lu_env *env, struct obd_device *obd) +{ + struct llog_handle *llh = NULL; + char name[10]; + int rc, rc2, i; + int orig_counter; + struct llog_mini_rec lmr; + struct llog_ctxt *ctxt; + struct dt_object *obj = NULL; + + ENTRY; + + ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT); + LASSERT(ctxt); + + lmr.lmr_hdr.lrh_len = lmr.lmr_tail.lrt_len = LLOG_MIN_REC_SIZE; + lmr.lmr_hdr.lrh_type = 0xf00f00; + + CWARN("8a: fill the first plain llog\n"); + rc = llog_open(env, ctxt, &llh, &cat_logid, NULL, LLOG_OPEN_EXISTS); + if (rc) { + CERROR("8a: llog_create with logid failed: %d\n", rc); + GOTO(out_put, rc); + } + + rc = llog_init_handle(env, llh, LLOG_F_IS_CAT, &uuid); + if (rc) { + CERROR("8a: can't init llog handle: %d\n", rc); + GOTO(out, rc); + } + + plain_counter = 0; + rc = llog_cat_process(env, llh, test_8_cb, "foobar", 0, 0); + if (rc != 0) { + CERROR("5a: process with cat_cancel_cb failed: %d\n", rc); + GOTO(out, rc); + } + orig_counter = plain_counter; + + for (i = 0; i < 100; i++) { + rc = llog_cat_add(env, llh, &lmr.lmr_hdr, NULL, NULL); + if (rc) { + CERROR("5a: add record failed\n"); + GOTO(out, rc); + } + } + + /* grab the current plain llog, we'll corrupt it later */ + obj = llh->u.chd.chd_current_log->lgh_obj; + LASSERT(obj); + lu_object_get(&obj->do_lu); + CWARN("8a: pin llog "DFID"\n", PFID(lu_object_fid(&obj->do_lu))); + + rc2 = llog_cat_close(env, llh); + if (rc2) { + CERROR("8a: close log %s failed: %d\n", name, rc2); + if (rc == 0) + rc = rc2; + GOTO(out_put, rc); + } + + CWARN("8b: fill the second plain llog\n"); + rc = llog_open(env, ctxt, &llh, &cat_logid, NULL, LLOG_OPEN_EXISTS); + if (rc) { + CERROR("8b: llog_create with logid failed: %d\n", rc); + GOTO(out_put, rc); + } + + rc = llog_init_handle(env, llh, LLOG_F_IS_CAT, &uuid); + if (rc) { + CERROR("8b: can't init llog handle: %d\n", rc); + GOTO(out, rc); + } + + for (i = 0; i < 100; i++) { + rc = llog_cat_add(env, llh, &lmr.lmr_hdr, NULL, NULL); + if (rc) { + CERROR("8b: add record failed\n"); + GOTO(out, rc); + } + } + CWARN("8b: second llog "DFID"\n", + PFID(lu_object_fid(&llh->u.chd.chd_current_log->lgh_obj->do_lu))); + + rc2 = llog_cat_close(env, llh); + if (rc2) { + CERROR("8b: close log %s failed: %d\n", name, rc2); + if (rc == 0) + rc = rc2; + GOTO(out_put, rc); + } + + CWARN("8c: drop two records from the first plain llog\n"); + llog_truncate(env, obj); + + CWARN("8d: count survived records\n"); + rc = llog_open(env, ctxt, &llh, &cat_logid, NULL, LLOG_OPEN_EXISTS); + if (rc) { + CERROR("8d: llog_create with logid failed: %d\n", rc); + GOTO(out_put, rc); + } + + rc = llog_init_handle(env, llh, LLOG_F_IS_CAT, &uuid); + if (rc) { + CERROR("8d: can't init llog handle: %d\n", rc); + GOTO(out, rc); + } + + plain_counter = 0; + rc = llog_cat_process(env, llh, test_8_cb, "foobar", 0, 0); + if (rc != 0) { + CERROR("8d: process with cat_cancel_cb failed: %d\n", rc); + GOTO(out, rc); + } + + if (orig_counter + 200 - 2 != plain_counter) { + CERROR("found %d records (expected %d)\n", plain_counter, + orig_counter + 200 - 2); + rc = -EIO; + } + +out: + CWARN("8d: close re-opened catalog\n"); + rc2 = llog_cat_close(env, llh); + if (rc2) { + CERROR("8d: close log %s failed: %d\n", name, rc2); + if (rc == 0) + rc = rc2; + } +out_put: + llog_ctxt_put(ctxt); + + if (obj != NULL) + lu_object_put(env, &obj->do_lu); + + RETURN(rc); +} + /* ------------------------------------------------------------------------- * Tests above, boring obd functions below * ------------------------------------------------------------------------- */ @@ -942,6 +1142,10 @@ static int llog_run_tests(const struct lu_env *env, struct obd_device *obd) if (rc) GOTO(cleanup, rc); + rc = llog_test_8(env, obd); + if (rc) + GOTO(cleanup, rc); + cleanup: err = llog_destroy(env, llh); if (err) -- 1.8.3.1