X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fllog.c;h=12e179eff74f575f1896be70a7558fc6a0def300;hb=d813c75df6798efbf3228347628c0d671ca7269c;hp=2f8c0463c7e4e884bd70e56f49ec7d3aa997bff6;hpb=bc7f8cfe0fc6a5977d452c4637e340cd63081bdc;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 2f8c046..12e179e 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lustre/obdclass/llog.c * @@ -50,6 +49,7 @@ #include #include #include "llog_internal.h" + /* * Allocate a new log or catalog handle * Used inside llog_open(). @@ -103,7 +103,7 @@ int llog_handle_put(const struct lu_env *env, struct llog_handle *loghandle) int rc = 0; if (refcount_dec_and_test(&loghandle->lgh_refcount)) { - struct llog_operations *lop; + const struct llog_operations *lop; rc = llog_handle2ops(loghandle, &lop); if (!rc) { @@ -121,7 +121,7 @@ static int llog_declare_destroy(const struct lu_env *env, struct llog_handle *handle, struct thandle *th) { - struct llog_operations *lop; + const struct llog_operations *lop; int rc; ENTRY; @@ -140,7 +140,7 @@ static int llog_declare_destroy(const struct lu_env *env, int llog_trans_destroy(const struct lu_env *env, struct llog_handle *handle, struct thandle *th) { - struct llog_operations *lop; + const struct llog_operations *lop; int rc; ENTRY; @@ -161,9 +161,9 @@ int llog_trans_destroy(const struct lu_env *env, struct llog_handle *handle, int llog_destroy(const struct lu_env *env, struct llog_handle *handle) { - struct llog_operations *lop; - struct dt_device *dt; - struct thandle *th; + const struct llog_operations *lop; + struct dt_device *dt; + struct thandle *th; int rc; ENTRY; @@ -304,7 +304,7 @@ int llog_cancel_arr_rec(const struct lu_env *env, struct llog_handle *loghandle, * be accessed anymore, let's return 0 for now, and * the orphan will be handled by LFSCK. */ CERROR("%s: can't destroy empty llog "DFID": rc = %d\n", - loghandle->lgh_ctxt->loc_obd->obd_name, + loghandle2name(loghandle), PFID(&loghandle->lgh_id.lgl_oi.oi_fid), rc); GOTO(out_unlock, rc = 0); } @@ -312,13 +312,22 @@ int llog_cancel_arr_rec(const struct lu_env *env, struct llog_handle *loghandle, } out_unlock: + if (rc < 0) { + /* restore bitmap while holding a mutex */ + if (subtract_count) { + loghandle->lgh_hdr->llh_count += num; + subtract_count = false; + } + for (i = i - 1; i >= 0; i--) + set_bit_le(index[i], LLOG_HDR_BITMAP(llh)); + } mutex_unlock(&loghandle->lgh_hdr_mutex); up_write(&loghandle->lgh_lock); out_trans: rc1 = dt_trans_stop(env, dt, th); if (rc == 0) rc = rc1; - if (rc < 0) { + if (rc1 < 0) { mutex_lock(&loghandle->lgh_hdr_mutex); if (subtract_count) loghandle->lgh_hdr->llh_count += num; @@ -338,7 +347,7 @@ int llog_cancel_rec(const struct lu_env *env, struct llog_handle *loghandle, int llog_read_header(const struct lu_env *env, struct llog_handle *handle, const struct obd_uuid *uuid) { - struct llog_operations *lop; + const struct llog_operations *lop; int rc; ENTRY; @@ -407,7 +416,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, (llh->llh_flags & LLOG_F_IS_CAT && flags & LLOG_F_IS_PLAIN))) { CERROR("%s: llog type is %s but initializing %s\n", - handle->lgh_ctxt->loc_obd->obd_name, + loghandle2name(handle), llh->llh_flags & LLOG_F_IS_CAT ? "catalog" : "plain", flags & LLOG_F_IS_CAT ? "catalog" : "plain"); @@ -427,7 +436,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, if (unlikely(uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid))) { CERROR("%s: llog uuid mismatch: %s/%s\n", - handle->lgh_ctxt->loc_obd->obd_name, + loghandle2name(handle), (char *)uuid->uuid, (char *)llh->llh_tgtuuid.uuid); GOTO(out, rc = -EEXIST); @@ -440,8 +449,8 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle, llh->llh_flags |= LLOG_F_IS_FIXSIZE; } else if (!(flags & LLOG_F_IS_PLAIN)) { CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n", - handle->lgh_ctxt->loc_obd->obd_name, - flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN); + loghandle2name(handle), flags, LLOG_F_IS_CAT, + LLOG_F_IS_PLAIN); rc = -EINVAL; } llh->llh_flags |= fmt; @@ -454,6 +463,30 @@ out: } EXPORT_SYMBOL(llog_init_handle); +int llog_verify_record(const struct llog_handle *llh, struct llog_rec_hdr *rec) +{ + int chunk_size = llh->lgh_hdr->llh_hdr.lrh_len; + + if (rec->lrh_len == 0 || rec->lrh_len > chunk_size) { + CERROR("%s: record is too large: %d > %d\n", + loghandle2name(llh), rec->lrh_len, chunk_size); + return -EINVAL; + } + if (rec->lrh_index >= LLOG_HDR_BITMAP_SIZE(llh->lgh_hdr)) { + CERROR("%s: index is too high: %d\n", + loghandle2name(llh), rec->lrh_index); + return -EINVAL; + } + if ((rec->lrh_type & LLOG_OP_MASK) != LLOG_OP_MAGIC) { + CERROR("%s: magic %x is bad\n", + loghandle2name(llh), rec->lrh_type); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(llog_verify_record); + static int llog_process_thread(void *arg) { struct llog_process_info *lpi = arg; @@ -468,6 +501,7 @@ static int llog_process_thread(void *arg) int saved_index = 0; int last_called_index = 0; bool repeated = false; + bool refresh_idx = false; ENTRY; @@ -526,6 +560,8 @@ repeat: CDEBUG(D_OTHER, "cur_offset %llu, chunk_offset %llu," " buf_offset %u, rc = %d\n", cur_offset, (__u64)chunk_offset, buf_offset, rc); + if (rc == -ESTALE) + GOTO(out, rc = 0); /* we`ve tried to reread the chunk, but there is no * new records */ if (rc == -EIO && repeated && (chunk_offset + buf_offset) == @@ -611,15 +647,21 @@ repeat: repeated = false; - if (rec->lrh_len == 0 || rec->lrh_len > chunk_size) { - CWARN("%s: invalid length %d in llog "DFID - "record for index %d/%d\n", - loghandle->lgh_ctxt->loc_obd->obd_name, - rec->lrh_len, + rc = llog_verify_record(loghandle, rec); + if (rc) { + CERROR("%s: invalid record in llog "DFID + " record for index %d/%d: rc = %d\n", + loghandle2name(loghandle), PFID(&loghandle->lgh_id.lgl_oi.oi_fid), - rec->lrh_index, index); - - GOTO(out, rc = -EINVAL); + rec->lrh_index, index, rc); + /* + * the block seem to be corrupted, let's try + * with the next one. reset rc to go to the + * next chunk. + */ + refresh_idx = true; + index = 0; + GOTO(repeat, rc = 0); } if (rec->lrh_index < index) { @@ -629,12 +671,22 @@ repeat: } if (rec->lrh_index != index) { - CERROR("%s: "DFID" Invalid record: index %u" - " but expected %u\n", - loghandle->lgh_ctxt->loc_obd->obd_name, - PFID(&loghandle->lgh_id.lgl_oi.oi_fid), - rec->lrh_index, index); - GOTO(out, rc = -ERANGE); + /* + * the last time we couldn't parse the block due + * to corruption, thus has no idea about the + * next index, take it from the block, once. + */ + if (refresh_idx) { + refresh_idx = false; + index = rec->lrh_index; + } else { + CERROR("%s: "DFID" Invalid record: index" + " %u but expected %u\n", + loghandle2name(loghandle), + PFID(&loghandle->lgh_id.lgl_oi.oi_fid), + rec->lrh_index, index); + GOTO(out, rc = -ERANGE); + } } CDEBUG(D_OTHER, @@ -686,6 +738,12 @@ repeat: rc = llog_cancel_rec(lpi->lpi_env, loghandle, rec->lrh_index); + /* Allow parallel cancelling, ENOENT + * means record was canceled at another + * processing thread or callback + */ + if (rc == -ENOENT) + rc = 0; } if (rc) GOTO(out, rc); @@ -720,7 +778,7 @@ out: * retry until the umount or abort recovery, see * lod_sub_recovery_thread() */ CERROR("%s retry remote llog process\n", - loghandle->lgh_ctxt->loc_obd->obd_name); + loghandle2name(loghandle)); rc = -EAGAIN; } else { /* something bad happened to the processing of a local @@ -729,7 +787,7 @@ out: * discard any remaining bits in the header */ CERROR("%s: Local llog found corrupted #"DOSTID":%x" " %s index %d count %d\n", - loghandle->lgh_ctxt->loc_obd->obd_name, + loghandle2name(loghandle), POSTID(&loghandle->lgh_id.lgl_oi), loghandle->lgh_id.lgl_ogen, ((llh->llh_flags & LLOG_F_IS_CAT) ? "catalog" : @@ -774,8 +832,6 @@ static int llog_process_thread_daemonize(void *arg) } task_unlock(lpi->lpi_reftask); - unshare_fs_struct(); - /* client env has no keys, tags is just 0 */ rc = lu_env_init(&env, LCT_LOCAL | LCT_MG_THREAD); if (rc) @@ -833,7 +889,7 @@ int llog_process_or_fork(const struct lu_env *env, if (IS_ERR(task)) { rc = PTR_ERR(task); CERROR("%s: cannot start thread: rc = %d\n", - loghandle->lgh_ctxt->loc_obd->obd_name, rc); + loghandle2name(loghandle), rc); GOTO(out_lpi, rc); } wait_for_completion(&lpi->lpi_completion); @@ -984,8 +1040,8 @@ EXPORT_SYMBOL(llog_reverse_process); */ int llog_exist(struct llog_handle *loghandle) { - struct llog_operations *lop; - int rc; + const struct llog_operations *lop; + int rc; ENTRY; @@ -1004,7 +1060,7 @@ int llog_declare_create(const struct lu_env *env, struct llog_handle *loghandle, struct thandle *th) { const struct cred *old_cred; - struct llog_operations *lop; + const struct llog_operations *lop; int rc; ENTRY; @@ -1025,7 +1081,7 @@ int llog_create(const struct lu_env *env, struct llog_handle *handle, struct thandle *th) { const struct cred *old_cred; - struct llog_operations *lop; + const struct llog_operations *lop; int rc; ENTRY; @@ -1048,7 +1104,7 @@ int llog_declare_write_rec(const struct lu_env *env, struct thandle *th) { const struct cred *old_cred; - struct llog_operations *lop; + const struct llog_operations *lop; int rc; ENTRY; @@ -1071,7 +1127,7 @@ int llog_write_rec(const struct lu_env *env, struct llog_handle *handle, int idx, struct thandle *th) { const struct cred *old_cred; - struct llog_operations *lop; + const struct llog_operations *lop; int rc, buflen; ENTRY; @@ -1086,12 +1142,11 @@ int llog_write_rec(const struct lu_env *env, struct llog_handle *handle, RETURN(-EPROTO); } else if (th == NULL) { CERROR("%s: missed transaction handle\n", - handle->lgh_obj->do_lu.lo_dev->ld_obd->obd_name); + loghandle2name(handle)); RETURN(-EPROTO); } else if (handle->lgh_hdr == NULL) { CERROR("%s: loghandle %p with no header\n", - handle->lgh_obj->do_lu.lo_dev->ld_obd->obd_name, - handle); + loghandle2name(handle), handle); RETURN(-EPROTO); } @@ -1453,8 +1508,8 @@ __u64 llog_size(const struct lu_env *env, struct llog_handle *llh) rc = llh->lgh_obj->do_ops->do_attr_get(env, llh->lgh_obj, &la); if (rc) { CERROR("%s: attr_get failed for "DFID": rc = %d\n", - llh->lgh_ctxt->loc_obd->obd_name, - PFID(&llh->lgh_id.lgl_oi.oi_fid), rc); + loghandle2name(llh), PFID(&llh->lgh_id.lgl_oi.oi_fid), + rc); return 0; }