/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Copyright (C) 2001-2003 Cluster File Systems, Inc.
- * Author: Andreas Dilger <adilger@clusterfs.com>
+ * GPL HEADER START
*
- * This file is part of Lustre, http://www.lustre.org.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
- * OST<->MDS recovery logging infrastructure.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
*
+ * lustre/obdclass/llog.c
+ *
+ * OST<->MDS recovery logging infrastructure.
* Invariants in implementation:
* - we do not share logs among different OST<->MDS connections, so that
* if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
*/
#define DEBUG_SUBSYSTEM S_LOG
#define EXPORT_SYMTAB
#endif
-#ifdef __KERNEL__
-#include <linux/fs.h>
-#else
+#ifndef __KERNEL__
#include <liblustre.h>
#endif
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
-#include <portals/list.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <libcfs/list.h>
+#include "llog_internal.h"
/* Allocate a new log or catalog handle */
struct llog_handle *llog_alloc_handle(void)
int rc = 0;
ENTRY;
- CDEBUG(D_HA, "canceling %d in log "LPX64"\n",
+ CDEBUG(D_RPCTRACE, "Canceling %d in log "LPX64"\n",
index, loghandle->lgh_id.lgl_oid);
if (index == 0) {
- CERROR("cannot cancel index 0 (which is header)\n");
+ CERROR("Can't cancel index 0 which is header\n");
RETURN(-EINVAL);
}
if (!ext2_clear_bit(index, llh->llh_bitmap)) {
- CERROR("catalog index %u already clear?\n", index);
- RETURN(-EINVAL);
+ CDEBUG(D_RPCTRACE, "Catalog index %u already clear?\n", index);
+ RETURN(-ENOENT);
}
llh->llh_count--;
(llh->llh_count == 1) &&
(loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
rc = llog_destroy(loghandle);
- if (rc)
- CERROR("failure destroying log after last cancel: %d\n",
+ if (rc) {
+ CERROR("Failure destroying log after last cancel: %d\n",
rc);
- LASSERT(rc == 0);
- RETURN(1);
+ ext2_set_bit(index, llh->llh_bitmap);
+ llh->llh_count++;
+ } else {
+ rc = 1;
+ }
+ RETURN(rc);
}
rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
- if (rc)
- CERROR("failure re-writing header %d\n", rc);
- LASSERT(rc == 0);
+ if (rc) {
+ CERROR("Failure re-writing header %d\n", rc);
+ ext2_set_bit(index, llh->llh_bitmap);
+ llh->llh_count++;
+ }
RETURN(rc);
}
EXPORT_SYMBOL(llog_cancel_rec);
rc = llog_read_header(handle);
if (rc == 0) {
flags = llh->llh_flags;
- if (uuid)
- LASSERT(obd_uuid_equals(uuid, &llh->llh_tgtuuid));
+ if (uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid)) {
+ CERROR("uuid mismatch: %s/%s\n", (char *)uuid->uuid,
+ (char *)llh->llh_tgtuuid.uuid);
+ rc = -EEXIST;
+ }
GOTO(out, rc);
} else if (rc != LLOG_EEMPTY || !flags) {
/* set a pesudo flag for initialization */
llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
- llh->llh_timestamp = LTIME_S(CURRENT_TIME);
+ llh->llh_timestamp = cfs_time_current_sec();
if (uuid)
memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap);
out:
if (flags & LLOG_F_IS_CAT) {
- INIT_LIST_HEAD(&handle->u.chd.chd_head);
+ CFS_INIT_LIST_HEAD(&handle->u.chd.chd_head);
llh->llh_size = sizeof(struct llog_logid_rec);
- }
- else if (flags & LLOG_F_IS_PLAIN)
- INIT_LIST_HEAD(&handle->u.phd.phd_entry);
- else
+ } else if (flags & LLOG_F_IS_PLAIN) {
+ CFS_INIT_LIST_HEAD(&handle->u.phd.phd_entry);
+ } else {
+ CERROR("Unknown flags: %#x (Expected %#x or %#x\n",
+ flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
LBUG();
+ }
if (rc) {
OBD_FREE(llh, sizeof(*llh));
}
EXPORT_SYMBOL(llog_close);
+static int llog_process_thread(void *arg)
+{
+ struct llog_process_info *lpi = (struct llog_process_info *)arg;
+ struct llog_handle *loghandle = lpi->lpi_loghandle;
+ struct llog_log_hdr *llh = loghandle->lgh_hdr;
+ struct llog_process_cat_data *cd = lpi->lpi_catdata;
+ char *buf;
+ __u64 cur_offset = LLOG_CHUNK_SIZE;
+ __u64 last_offset;
+ int rc = 0, index = 1, last_index;
+ int saved_index = 0, last_called_index = 0;
+
+ LASSERT(llh);
+
+ OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
+ if (!buf) {
+ lpi->lpi_rc = -ENOMEM;
+#ifdef __KERNEL__
+ complete(&lpi->lpi_completion);
+#endif
+ return 0;
+ }
+
+ cfs_daemonize_ctxt("llog_process_thread");
+
+ if (cd != NULL) {
+ last_called_index = cd->lpcd_first_idx;
+ index = cd->lpcd_first_idx + 1;
+ }
+ if (cd != NULL && cd->lpcd_last_idx)
+ last_index = cd->lpcd_last_idx;
+ else
+ last_index = LLOG_BITMAP_BYTES * 8 - 1;
+
+ while (rc == 0) {
+ struct llog_rec_hdr *rec;
+
+ /* skip records not set in bitmap */
+ while (index <= last_index &&
+ !ext2_test_bit(index, llh->llh_bitmap))
+ ++index;
+
+ LASSERT(index <= last_index + 1);
+ if (index == last_index + 1)
+ break;
+
+ CDEBUG(D_OTHER, "index: %d last_index %d\n",
+ index, last_index);
+
+ /* get the buf with our target record; avoid old garbage */
+ memset(buf, 0, LLOG_CHUNK_SIZE);
+ last_offset = cur_offset;
+ rc = llog_next_block(loghandle, &saved_index, index,
+ &cur_offset, buf, LLOG_CHUNK_SIZE);
+ if (rc)
+ GOTO(out, rc);
+
+ /* NB: when rec->lrh_len is accessed it is already swabbed
+ * since it is used at the "end" of the loop and the rec
+ * swabbing is done at the beginning of the loop. */
+ for (rec = (struct llog_rec_hdr *)buf;
+ (char *)rec < buf + LLOG_CHUNK_SIZE;
+ rec = (struct llog_rec_hdr *)((char *)rec + rec->lrh_len)){
+
+ CDEBUG(D_OTHER, "processing rec 0x%p type %#x\n",
+ rec, rec->lrh_type);
+
+ if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
+ lustre_swab_llog_rec(rec, NULL);
+
+ CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
+ rec->lrh_type, rec->lrh_index);
+
+ if (rec->lrh_index == 0)
+ GOTO(out, 0); /* no more records */
+
+ if (rec->lrh_len == 0 || rec->lrh_len >LLOG_CHUNK_SIZE){
+ CWARN("invalid length %d in llog record for "
+ "index %d/%d\n", rec->lrh_len,
+ rec->lrh_index, index);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ if (rec->lrh_index < index) {
+ CDEBUG(D_OTHER, "skipping lrh_index %d\n",
+ rec->lrh_index);
+ continue;
+ }
+
+ CDEBUG(D_OTHER,
+ "lrh_index: %d lrh_len: %d (%d remains)\n",
+ rec->lrh_index, rec->lrh_len,
+ (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));
+
+ loghandle->lgh_cur_idx = rec->lrh_index;
+ loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
+ last_offset;
+
+ /* if set, process the callback on this record */
+ if (ext2_test_bit(index, llh->llh_bitmap)) {
+ rc = lpi->lpi_cb(loghandle, rec,
+ lpi->lpi_cbdata);
+ last_called_index = index;
+ if (rc == LLOG_PROC_BREAK) {
+ GOTO(out, rc);
+ } else if (rc == LLOG_DEL_RECORD) {
+ llog_cancel_rec(loghandle,
+ rec->lrh_index);
+ rc = 0;
+ }
+ if (rc)
+ GOTO(out, rc);
+ } else {
+ CDEBUG(D_OTHER, "Skipped index %d\n", index);
+ }
+
+ /* next record, still in buffer? */
+ ++index;
+ if (index > last_index)
+ GOTO(out, rc = 0);
+ }
+ }
+
+ out:
+ if (cd != NULL)
+ cd->lpcd_last_idx = last_called_index;
+ if (buf)
+ OBD_FREE(buf, LLOG_CHUNK_SIZE);
+ lpi->lpi_rc = rc;
+#ifdef __KERNEL__
+ complete(&lpi->lpi_completion);
+#endif
+ return 0;
+}
+
int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
void *data, void *catdata)
{
+ struct llog_process_info *lpi;
+ int rc;
+ ENTRY;
+
+ OBD_ALLOC_PTR(lpi);
+ if (lpi == NULL) {
+ CERROR("cannot alloc pointer\n");
+ RETURN(-ENOMEM);
+ }
+ lpi->lpi_loghandle = loghandle;
+ lpi->lpi_cb = cb;
+ lpi->lpi_cbdata = data;
+ lpi->lpi_catdata = catdata;
+
+#ifdef __KERNEL__
+ init_completion(&lpi->lpi_completion);
+ rc = cfs_kernel_thread(llog_process_thread, lpi, CLONE_VM | CLONE_FILES);
+ if (rc < 0) {
+ CERROR("cannot start thread: %d\n", rc);
+ OBD_FREE_PTR(lpi);
+ RETURN(rc);
+ }
+ wait_for_completion(&lpi->lpi_completion);
+#else
+ llog_process_thread(lpi);
+#endif
+ rc = lpi->lpi_rc;
+ OBD_FREE_PTR(lpi);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(llog_process);
+
+inline int llog_get_size(struct llog_handle *loghandle)
+{
+ if (loghandle && loghandle->lgh_hdr)
+ return loghandle->lgh_hdr->llh_count;
+ return 0;
+}
+EXPORT_SYMBOL(llog_get_size);
+
+int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
+ void *data, void *catdata)
+{
struct llog_log_hdr *llh = loghandle->lgh_hdr;
struct llog_process_cat_data *cd = catdata;
void *buf;
- __u64 cur_offset = LLOG_CHUNK_SIZE;
- int rc = 0, index = 1, last_index, idx;
- int saved_index = 0;
+ int rc = 0, first_index = 1, index, idx;
ENTRY;
OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
RETURN(-ENOMEM);
if (cd != NULL)
- index = cd->first_idx + 1;
- if (cd != NULL && cd->last_idx)
- last_index = cd->last_idx;
+ first_index = cd->lpcd_first_idx + 1;
+ if (cd != NULL && cd->lpcd_last_idx)
+ index = cd->lpcd_last_idx;
else
- last_index = LLOG_BITMAP_BYTES * 8 - 1;
+ index = LLOG_BITMAP_BYTES * 8 - 1;
while (rc == 0) {
struct llog_rec_hdr *rec;
+ struct llog_rec_tail *tail;
/* skip records not set in bitmap */
- while (index <= last_index &&
+ while (index >= first_index &&
!ext2_test_bit(index, llh->llh_bitmap))
- ++index;
+ --index;
- LASSERT(index <= last_index + 1);
- if (index == last_index + 1)
+ LASSERT(index >= first_index - 1);
+ if (index == first_index - 1)
break;
/* get the buf with our target record; avoid old garbage */
memset(buf, 0, LLOG_CHUNK_SIZE);
- rc = llog_next_block(loghandle, &saved_index, index,
- &cur_offset, buf, LLOG_CHUNK_SIZE);
+ rc = llog_prev_block(loghandle, index, buf, LLOG_CHUNK_SIZE);
if (rc)
GOTO(out, rc);
rec = buf;
- idx = rec->lrh_index;
+ idx = le32_to_cpu(rec->lrh_index);
if (idx < index)
- CDEBUG(D_HA, "index %u : idx %u\n", index, idx);
+ CDEBUG(D_RPCTRACE, "index %u : idx %u\n", index, idx);
while (idx < index) {
- rec = (struct llog_rec_hdr *)
- ((char *)rec + rec->lrh_len);
+ rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
idx ++;
}
+ tail = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*tail);
/* process records in buffer, starting where we found one */
- while ((void *)rec < buf + LLOG_CHUNK_SIZE) {
+ while ((void *)tail > buf) {
+ rec = (void *)tail - le32_to_cpu(tail->lrt_len) +
+ sizeof(*tail);
+
if (rec->lrh_index == 0)
GOTO(out, 0); /* no more records */
if (ext2_test_bit(index, llh->llh_bitmap)) {
rc = cb(loghandle, rec, data);
if (rc == LLOG_PROC_BREAK) {
- CWARN("recovery from log: "LPX64":%x"
- " stopped\n",
- loghandle->lgh_id.lgl_oid,
- loghandle->lgh_id.lgl_ogen);
GOTO(out, rc);
}
if (rc)
GOTO(out, rc);
}
- /* next record, still in buffer? */
- ++index;
- if (index > last_index)
+ /* previous record, still in buffer? */
+ --index;
+ if (index < first_index)
GOTO(out, rc = 0);
- rec = (struct llog_rec_hdr *)
- ((char *)rec + rec->lrh_len);
+ tail = (void *)rec - sizeof(*tail);
}
}
- out:
+out:
if (buf)
OBD_FREE(buf, LLOG_CHUNK_SIZE);
RETURN(rc);
}
-EXPORT_SYMBOL(llog_process);
+EXPORT_SYMBOL(llog_reverse_process);