Whamcloud - gitweb
Land b1_8_gate onto b1_8 (20081218_1708)
[fs/lustre-release.git] / lustre / obdclass / llog.c
index 85cec7b..2a5ba2d 100644 (file)
@@ -1,32 +1,46 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
- *   Author: Andreas Dilger <adilger@clusterfs.com>
+ * GPL HEADER START
  *
- *   This file is part of the Lustre file system, http://www.lustre.org
- *   Lustre is a trademark of Cluster File Systems, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *   You may have signed or agreed to another license before downloading
- *   this software.  If so, you are bound by the terms and conditions
- *   of that agreement, and the following does not apply to you.  See the
- *   LICENSE file included with this distribution for more information.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   If you did not agree to a different license, then this copy of Lustre
- *   is open source software; you can redistribute it and/or modify it
- *   under the terms of version 2 of the GNU General Public License as
- *   published by the Free Software Foundation.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   In either case, Lustre is distributed in the hope that it will be
- *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   license text for more details.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- * OST<->MDS recovery logging infrastructure.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
  *
+ * lustre/obdclass/llog.c
+ *
+ * OST<->MDS recovery logging infrastructure.
  * Invariants in implementation:
  * - we do not share logs among different OST<->MDS connections, so that
  *   if an OST or MDS fails it need only look at log(s) relevant to itself
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
  */
 
 #define DEBUG_SUBSYSTEM S_LOG
 #define EXPORT_SYMTAB
 #endif
 
-#ifdef __KERNEL__
-#include <linux/fs.h>
-#else
+#ifndef __KERNEL__
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <lustre_log.h>
 #include <libcfs/list.h>
+#include "llog_internal.h"
 
 /* Allocate a new log or catalog handle */
 struct llog_handle *llog_alloc_handle(void)
@@ -87,17 +100,17 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index)
         int rc = 0;
         ENTRY;
 
-        CDEBUG(D_HA, "canceling %d in log "LPX64"\n",
+        CDEBUG(D_RPCTRACE, "Canceling %d in log "LPX64"\n",
                index, loghandle->lgh_id.lgl_oid);
 
         if (index == 0) {
-                CERROR("cannot cancel index 0 (which is header)\n");
+                CERROR("Can't cancel index 0 which is header\n");
                 RETURN(-EINVAL);
         }
 
         if (!ext2_clear_bit(index, llh->llh_bitmap)) {
-                CDEBUG(D_HA, "catalog index %u already clear?\n", index);
-                RETURN(-EINVAL);
+                CDEBUG(D_RPCTRACE, "Catalog index %u already clear?\n", index);
+                RETURN(-ENOENT);
         }
 
         llh->llh_count--;
@@ -107,7 +120,7 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index)
             (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
                 rc = llog_destroy(loghandle);
                 if (rc) {
-                        CERROR("failure destroying log after last cancel: %d\n",
+                        CERROR("Failure destroying log after last cancel: %d\n",
                                rc);
                         ext2_set_bit(index, llh->llh_bitmap);
                         llh->llh_count++;
@@ -119,7 +132,7 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index)
 
         rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
         if (rc) {
-                CERROR("failure re-writing header %d\n", rc);
+                CERROR("Failure re-writing header %d\n", rc);
                 ext2_set_bit(index, llh->llh_bitmap);
                 llh->llh_count++;
         }
@@ -142,11 +155,13 @@ int llog_init_handle(struct llog_handle *handle, int flags,
         /* first assign flags to use llog_client_ops */
         llh->llh_flags = flags;
         rc = llog_read_header(handle);
-        CDEBUG(D_ERROR, "read header rc=%d fl=%d\n", rc, flags);
         if (rc == 0) {
                 flags = llh->llh_flags;
-                if (uuid)
-                        LASSERT(obd_uuid_equals(uuid, &llh->llh_tgtuuid));
+                if (uuid && !obd_uuid_equals(uuid, &llh->llh_tgtuuid)) {
+                        CERROR("uuid mismatch: %s/%s\n", (char *)uuid->uuid,
+                               (char *)llh->llh_tgtuuid.uuid);
+                        rc = -EEXIST;
+                }
                 GOTO(out, rc);
         } else if (rc != LLOG_EEMPTY || !flags) {
                 /* set a pesudo flag for initialization */
@@ -168,10 +183,10 @@ int llog_init_handle(struct llog_handle *handle, int flags,
 
 out:
         if (flags & LLOG_F_IS_CAT) {
-                INIT_LIST_HEAD(&handle->u.chd.chd_head);
+                CFS_INIT_LIST_HEAD(&handle->u.chd.chd_head);
                 llh->llh_size = sizeof(struct llog_logid_rec);
         } else if (flags & LLOG_F_IS_PLAIN) {
-                INIT_LIST_HEAD(&handle->u.phd.phd_entry);
+                CFS_INIT_LIST_HEAD(&handle->u.phd.phd_entry);
         } else {
                 CERROR("Unknown flags: %#x (Expected %#x or %#x\n",
                        flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
@@ -204,27 +219,37 @@ int llog_close(struct llog_handle *loghandle)
 }
 EXPORT_SYMBOL(llog_close);
 
-int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
-                 void *data, void *catdata)
+static int llog_process_thread(void *arg)
 {
-        struct llog_log_hdr *llh = loghandle->lgh_hdr;
-        struct llog_process_cat_data *cd = catdata;
-        char *buf;
-        __u64 cur_offset = LLOG_CHUNK_SIZE;
-        int rc = 0, index = 1, last_index;
-        int saved_index = 0;
-        ENTRY;
+        struct llog_process_info     *lpi = (struct llog_process_info *)arg;
+        struct llog_handle           *loghandle = lpi->lpi_loghandle;
+        struct llog_log_hdr          *llh = loghandle->lgh_hdr;
+        struct llog_process_cat_data *cd  = lpi->lpi_catdata;
+        char                         *buf;
+        __u64                         cur_offset = LLOG_CHUNK_SIZE;
+        __u64                         last_offset;
+        int                           rc = 0, index = 1, last_index;
+        int                           saved_index = 0, last_called_index = 0;
 
         LASSERT(llh);
 
         OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
-        if (!buf)
-                RETURN(-ENOMEM);
+        if (!buf) {
+                lpi->lpi_rc = -ENOMEM;
+#ifdef __KERNEL__
+                complete(&lpi->lpi_completion);
+#endif
+                return 0;
+        }
 
-        if (cd != NULL)
-                index = cd->first_idx + 1;
-        if (cd != NULL && cd->last_idx)
-                last_index = cd->last_idx;
+        cfs_daemonize_ctxt("llog_process_thread");
+
+        if (cd != NULL) {
+                last_called_index = cd->lpcd_first_idx;
+                index = cd->lpcd_first_idx + 1;
+        }
+        if (cd != NULL && cd->lpcd_last_idx)
+                last_index = cd->lpcd_last_idx;
         else
                 last_index = LLOG_BITMAP_BYTES * 8 - 1;
 
@@ -244,7 +269,7 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
                        index, last_index);
 
                 /* get the buf with our target record; avoid old garbage */
-                memset(buf, 0, LLOG_CHUNK_SIZE);
+                last_offset = cur_offset;
                 rc = llog_next_block(loghandle, &saved_index, index,
                                      &cur_offset, buf, LLOG_CHUNK_SIZE);
                 if (rc)
@@ -263,17 +288,17 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
                         if (LLOG_REC_HDR_NEEDS_SWABBING(rec))
                                 lustre_swab_llog_rec(rec, NULL);
 
-                        CDEBUG(D_OTHER, "after swabbing, type: %#x\n",
-                               rec->lrh_type);
+                        CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
+                               rec->lrh_type, rec->lrh_index);
 
                         if (rec->lrh_index == 0)
                                 GOTO(out, 0); /* no more records */
 
                         if (rec->lrh_len == 0 || rec->lrh_len >LLOG_CHUNK_SIZE){
                                 CWARN("invalid length %d in llog record for "
-                                      "index %d\n", rec->lrh_len,
-                                rec->lrh_index);
-                                GOTO(out, 0);
+                                      "index %d/%d\n", rec->lrh_len,
+                                      rec->lrh_index, index);
+                                GOTO(out, rc = -EINVAL);
                         }
 
                         if (rec->lrh_index < index) {
@@ -287,15 +312,25 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
                                rec->lrh_index, rec->lrh_len,
                                (int)(buf + LLOG_CHUNK_SIZE - (char *)rec));
 
+                        loghandle->lgh_cur_idx    = rec->lrh_index;
+                        loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
+                                                    last_offset;
+
                         /* if set, process the callback on this record */
                         if (ext2_test_bit(index, llh->llh_bitmap)) {
-                                rc = cb(loghandle, rec, data);
+                                rc = lpi->lpi_cb(loghandle, rec,
+                                                 lpi->lpi_cbdata);
+                                last_called_index = index;
                                 if (rc == LLOG_PROC_BREAK) {
-                                        CWARN("recovery from log: "LPX64":%x"
-                                              " stopped\n",
-                                              loghandle->lgh_id.lgl_oid,
-                                              loghandle->lgh_id.lgl_ogen);
+                                        CDEBUG(D_HA, "recovery from log: "LPX64
+                                               ":%x stopped\n",
+                                               loghandle->lgh_id.lgl_oid,
+                                               loghandle->lgh_id.lgl_ogen);
                                         GOTO(out, rc);
+                                } else if (rc == LLOG_DEL_RECORD) {
+                                        llog_cancel_rec(loghandle,
+                                                        rec->lrh_index);
+                                        rc = 0;
                                 }
                                 if (rc)
                                         GOTO(out, rc);
@@ -311,8 +346,142 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
         }
 
  out:
+        if (cd != NULL)
+                cd->lpcd_last_idx = last_called_index;
         if (buf)
                 OBD_FREE(buf, LLOG_CHUNK_SIZE);
+        lpi->lpi_rc = rc;
+#ifdef __KERNEL__
+        complete(&lpi->lpi_completion);
+#endif
+        return 0;
+}
+
+int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
+                 void *data, void *catdata)
+{
+        struct llog_process_info *lpi;
+        int                      rc;
+        ENTRY;
+
+        OBD_ALLOC_PTR(lpi);
+        if (lpi == NULL) {
+                CERROR("cannot alloc pointer\n");
+                RETURN(-ENOMEM);
+        }
+        lpi->lpi_loghandle = loghandle;
+        lpi->lpi_cb        = cb;
+        lpi->lpi_cbdata    = data;
+        lpi->lpi_catdata   = catdata;
+
+#ifdef __KERNEL__
+        init_completion(&lpi->lpi_completion);
+        rc = cfs_kernel_thread(llog_process_thread, lpi, CLONE_VM | CLONE_FILES);
+        if (rc < 0) {
+                CERROR("cannot start thread: %d\n", rc);
+                OBD_FREE_PTR(lpi);
+                RETURN(rc);
+        }
+        wait_for_completion(&lpi->lpi_completion);
+#else
+        llog_process_thread(lpi);
+#endif
+        rc = lpi->lpi_rc;
+        OBD_FREE_PTR(lpi);
         RETURN(rc);
 }
 EXPORT_SYMBOL(llog_process);
+
+inline int llog_get_size(struct llog_handle *loghandle)
+{
+        if (loghandle && loghandle->lgh_hdr)
+                return loghandle->lgh_hdr->llh_count;
+        return 0;
+}
+EXPORT_SYMBOL(llog_get_size);
+
+int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
+                         void *data, void *catdata)
+{
+        struct llog_log_hdr *llh = loghandle->lgh_hdr;
+        struct llog_process_cat_data *cd = catdata;
+        void *buf;
+        int rc = 0, first_index = 1, index, idx;
+        ENTRY;
+
+        OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
+        if (!buf)
+                RETURN(-ENOMEM);
+
+        if (cd != NULL)
+                first_index = cd->lpcd_first_idx + 1;
+        if (cd != NULL && cd->lpcd_last_idx)
+                index = cd->lpcd_last_idx;
+        else
+                index = LLOG_BITMAP_BYTES * 8 - 1;
+
+        while (rc == 0) {
+                struct llog_rec_hdr *rec;
+                struct llog_rec_tail *tail;
+
+                /* skip records not set in bitmap */
+                while (index >= first_index &&
+                       !ext2_test_bit(index, llh->llh_bitmap))
+                        --index;
+
+                LASSERT(index >= first_index - 1);
+                if (index == first_index - 1)
+                        break;
+
+                /* get the buf with our target record; avoid old garbage */
+                memset(buf, 0, LLOG_CHUNK_SIZE);
+                rc = llog_prev_block(loghandle, index, buf, LLOG_CHUNK_SIZE);
+                if (rc)
+                        GOTO(out, rc);
+
+                rec = buf;
+                idx = le32_to_cpu(rec->lrh_index);
+                if (idx < index)
+                        CDEBUG(D_RPCTRACE, "index %u : idx %u\n", index, idx);
+                while (idx < index) {
+                        rec = ((void *)rec + le32_to_cpu(rec->lrh_len));
+                        idx ++;
+                }
+                tail = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*tail);
+
+                /* process records in buffer, starting where we found one */
+                while ((void *)tail > buf) {
+                        rec = (void *)tail - le32_to_cpu(tail->lrt_len) +
+                                sizeof(*tail);
+
+                        if (rec->lrh_index == 0)
+                                GOTO(out, 0); /* no more records */
+
+                        /* if set, process the callback on this record */
+                        if (ext2_test_bit(index, llh->llh_bitmap)) {
+                                rc = cb(loghandle, rec, data);
+                                if (rc == LLOG_PROC_BREAK) {
+                                        CWARN("recovery from log: "LPX64":%x"
+                                              " stopped\n",
+                                              loghandle->lgh_id.lgl_oid,
+                                              loghandle->lgh_id.lgl_ogen);
+                                        GOTO(out, rc);
+                                }
+                                if (rc)
+                                        GOTO(out, rc);
+                        }
+
+                        /* previous record, still in buffer? */
+                        --index;
+                        if (index < first_index)
+                                GOTO(out, rc = 0);
+                        tail = (void *)rec - sizeof(*tail);
+                }
+        }
+
+out:
+        if (buf)
+                OBD_FREE(buf, LLOG_CHUNK_SIZE);
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_reverse_process);