Whamcloud - gitweb
re-add files that were erroneously deleted on HEAD
authornic <nic>
Thu, 29 Apr 2004 14:33:01 +0000 (14:33 +0000)
committernic <nic>
Thu, 29 Apr 2004 14:33:01 +0000 (14:33 +0000)
lustre/obdclass/llog.c [new file with mode: 0644]
lustre/obdclass/llog_cat.c [new file with mode: 0644]
lustre/obdclass/llog_internal.h [new file with mode: 0644]
lustre/obdclass/llog_lvfs.c [new file with mode: 0644]

diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c
new file mode 100644 (file)
index 0000000..0ad595f
--- /dev/null
@@ -0,0 +1,279 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * OST<->MDS recovery logging infrastructure.
+ *
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ *   if an OST or MDS fails it need only look at log(s) relevant to itself
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+
+#ifdef __KERNEL__
+#include <linux/fs.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/lustre_log.h>
+#include <portals/list.h>
+
+/* Allocate a new log or catalog handle */
+struct llog_handle *llog_alloc_handle(void)
+{
+        struct llog_handle *loghandle;
+        ENTRY;
+
+        OBD_ALLOC(loghandle, sizeof(*loghandle));
+        if (loghandle == NULL)
+                RETURN(ERR_PTR(-ENOMEM));
+
+        init_rwsem(&loghandle->lgh_lock);
+
+        RETURN(loghandle);
+}
+EXPORT_SYMBOL(llog_alloc_handle);
+
+
+void llog_free_handle(struct llog_handle *loghandle)
+{
+        if (!loghandle)
+                return;
+
+        if (!loghandle->lgh_hdr)
+                goto out;
+        if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)
+                list_del_init(&loghandle->u.phd.phd_entry);
+        if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)
+                LASSERT(list_empty(&loghandle->u.chd.chd_head));
+        OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE);
+
+ out:
+        OBD_FREE(loghandle, sizeof(*loghandle));
+}
+EXPORT_SYMBOL(llog_free_handle);
+
+/* returns negative on error; 0 if success; 1 if success & log destroyed */
+int llog_cancel_rec(struct llog_handle *loghandle, int index)
+{
+        struct llog_log_hdr *llh = loghandle->lgh_hdr;
+        int rc = 0;
+        ENTRY;
+
+        CDEBUG(D_HA, "canceling %d in log "LPX64"\n",
+               index, loghandle->lgh_id.lgl_oid);
+
+        if (index == 0) {
+                CERROR("cannot cancel index 0 (which is header)\n");
+                RETURN(-EINVAL);
+        }
+
+        if (!ext2_clear_bit(index, llh->llh_bitmap)) {
+                CERROR("catalog index %u already clear?\n", index);
+                RETURN(-EINVAL);
+        }
+
+        llh->llh_count--;
+
+        if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) &&
+            (llh->llh_count == 1) &&
+            (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
+                rc = llog_destroy(loghandle);
+                if (rc)
+                        CERROR("failure destroying log after last cancel: %d\n",
+                               rc);
+                LASSERT(rc == 0);
+                RETURN(1);
+        }
+
+        rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
+        if (rc)
+                CERROR("failure re-writing header %d\n", rc);
+        LASSERT(rc == 0);
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cancel_rec);
+
+int llog_init_handle(struct llog_handle *handle, int flags,
+                     struct obd_uuid *uuid)
+{
+        int rc;
+        struct llog_log_hdr *llh;
+        ENTRY;
+        LASSERT(handle->lgh_hdr == NULL);
+
+        OBD_ALLOC(llh, sizeof(*llh));
+        if (llh == NULL)
+                RETURN(-ENOMEM);
+        handle->lgh_hdr = llh;
+        /* first assign flags to use llog_client_ops */
+        llh->llh_flags = flags;
+        rc = llog_read_header(handle);
+        if (rc == 0) {
+                flags = llh->llh_flags;
+                if (uuid)
+                        LASSERT(obd_uuid_equals(uuid, &llh->llh_tgtuuid));
+                GOTO(out, rc);
+        } else if (rc != LLOG_EEMPTY || !flags) {
+                /* set a pesudo flag for initialization */
+                flags = LLOG_F_IS_CAT;
+                GOTO(out, rc);
+        }
+        rc = 0;
+
+        handle->lgh_last_idx = 0; /* header is record with index 0 */
+        llh->llh_count = 1;         /* for the header record */
+        llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
+        llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
+        llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
+        llh->llh_timestamp = LTIME_S(CURRENT_TIME);
+        if (uuid)
+                memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
+        llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap);
+        ext2_set_bit(0, llh->llh_bitmap);
+
+out:
+        if (flags & LLOG_F_IS_CAT) {
+                INIT_LIST_HEAD(&handle->u.chd.chd_head);
+                llh->llh_size = sizeof(struct llog_logid_rec);
+        }
+        else if (flags & LLOG_F_IS_PLAIN)
+                INIT_LIST_HEAD(&handle->u.phd.phd_entry);
+        else
+                LBUG();
+
+        if (rc) {
+                OBD_FREE(llh, sizeof(*llh));
+                handle->lgh_hdr = NULL;
+        }
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_init_handle);
+
+int llog_close(struct llog_handle *loghandle)
+{
+        struct llog_operations *lop;
+        int rc;
+        ENTRY;
+
+        rc = llog_handle2ops(loghandle, &lop);
+        if (rc)
+                GOTO(out, rc);
+        if (lop->lop_close == NULL)
+                GOTO(out, -EOPNOTSUPP);
+        rc = lop->lop_close(loghandle);
+ out:
+        llog_free_handle(loghandle);
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_close);
+
+int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
+                 void *data, void *catdata)
+{
+        struct llog_log_hdr *llh = loghandle->lgh_hdr;
+        struct llog_process_cat_data *cd = catdata;
+        void *buf;
+        __u64 cur_offset = LLOG_CHUNK_SIZE;
+        int rc = 0, index = 1, last_index, idx;
+        int saved_index = 0;
+        ENTRY;
+
+        OBD_ALLOC(buf, LLOG_CHUNK_SIZE);
+        if (!buf)
+                RETURN(-ENOMEM);
+
+        if (cd != NULL)
+                index = cd->first_idx + 1;
+        if (cd != NULL && cd->last_idx)
+                last_index = cd->last_idx;
+        else
+                last_index = LLOG_BITMAP_BYTES * 8 - 1;
+
+        while (rc == 0) {
+                struct llog_rec_hdr *rec;
+
+                /* skip records not set in bitmap */
+                while (index <= last_index &&
+                       !ext2_test_bit(index, llh->llh_bitmap))
+                        ++index;
+
+                LASSERT(index <= last_index + 1);
+                if (index == last_index + 1)
+                        break;
+
+                /* get the buf with our target record; avoid old garbage */
+                memset(buf, 0, LLOG_CHUNK_SIZE);
+                rc = llog_next_block(loghandle, &saved_index, index,
+                                     &cur_offset, buf, LLOG_CHUNK_SIZE);
+                if (rc)
+                        GOTO(out, rc);
+
+                rec = buf;
+                idx = rec->lrh_index;
+                if (idx < index)
+                        CDEBUG(D_HA, "index %u : idx %u\n", index, idx);
+                while (idx < index) {
+                        rec = (struct llog_rec_hdr *)
+                                ((char *)rec + rec->lrh_len);
+                        idx ++;
+                }
+
+                /* process records in buffer, starting where we found one */
+                while ((void *)rec < buf + LLOG_CHUNK_SIZE) {
+                        if (rec->lrh_index == 0)
+                                GOTO(out, 0); /* no more records */
+
+                        /* if set, process the callback on this record */
+                        if (ext2_test_bit(index, llh->llh_bitmap)) {
+                                rc = cb(loghandle, rec, data);
+                                if (rc == LLOG_PROC_BREAK) {
+                                        CWARN("recovery from log: "LPX64":%x"
+                                              " stopped\n",
+                                              loghandle->lgh_id.lgl_oid,
+                                              loghandle->lgh_id.lgl_ogen);
+                                        GOTO(out, rc);
+                                }
+                                if (rc)
+                                        GOTO(out, rc);
+                        }
+
+                        /* next record, still in buffer? */
+                        ++index;
+                        if (index > last_index)
+                                GOTO(out, rc = 0);
+                        rec = (struct llog_rec_hdr *)
+                                ((char *)rec + rec->lrh_len);
+                }
+        }
+
+ out:
+        if (buf)
+                OBD_FREE(buf, LLOG_CHUNK_SIZE);
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_process);
diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c
new file mode 100644 (file)
index 0000000..d4fa370
--- /dev/null
@@ -0,0 +1,467 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * OST<->MDS recovery logging infrastructure.
+ *
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ *   if an OST or MDS fails it need only look at log(s) relevant to itself
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+
+#ifdef __KERNEL__
+#include <linux/fs.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/lustre_log.h>
+#include <portals/list.h>
+
+/* Create a new log handle and add it to the open list.
+ * This log handle will be closed when all of the records in it are removed.
+ *
+ * Assumes caller has already pushed us into the kernel context and is locking.
+ */
+static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle)
+{
+        struct llog_handle *loghandle;
+        struct llog_log_hdr *llh;
+        struct llog_logid_rec rec = { { 0 }, };
+        int rc, index, bitmap_size;
+        ENTRY;
+
+        llh = cathandle->lgh_hdr;
+        bitmap_size = sizeof(llh->llh_bitmap) * 8;
+
+        index = (cathandle->lgh_last_idx + 1) % bitmap_size;
+
+        /* maximum number of available slots in catlog is bitmap_size - 2 */
+        if (llh->llh_cat_idx == index) {
+                CERROR("no free catalog slots for log...\n");
+                RETURN(ERR_PTR(-ENOSPC));
+        } else {
+                if (index == 0)
+                        index = 1;
+                if (ext2_set_bit(index, llh->llh_bitmap)) {
+                        CERROR("argh, index %u already set in log bitmap?\n",
+                               index);
+                        LBUG(); /* should never happen */
+                }
+                cathandle->lgh_last_idx = index;
+                llh->llh_count++;
+                llh->llh_tail.lrt_index = index;
+        }
+
+        rc = llog_create(cathandle->lgh_ctxt, &loghandle, NULL, NULL);
+        if (rc)
+                RETURN(ERR_PTR(rc));
+
+        rc = llog_init_handle(loghandle,
+                              LLOG_F_IS_PLAIN | LLOG_F_ZAP_WHEN_EMPTY,
+                              &cathandle->lgh_hdr->llh_tgtuuid);
+        if (rc)
+                GOTO(out_destroy, rc);
+
+        CDEBUG(D_HA, "new recovery log "LPX64":%x for index %u of catalog "
+               LPX64"\n", loghandle->lgh_id.lgl_oid, loghandle->lgh_id.lgl_ogen,
+               index, cathandle->lgh_id.lgl_oid);
+        /* build the record for this log in the catalog */
+        rec.lid_hdr.lrh_len = sizeof(rec);
+        rec.lid_hdr.lrh_index = index;
+        rec.lid_hdr.lrh_type = LLOG_LOGID_MAGIC;
+        rec.lid_id = loghandle->lgh_id;
+        rec.lid_tail.lrt_len = sizeof(rec);
+        rec.lid_tail.lrt_index = index;
+
+        /* update the catalog: header and record */
+        rc = llog_write_rec(cathandle, &rec.lid_hdr,
+                            &loghandle->u.phd.phd_cookie, 1, NULL, index);
+        if (rc < 0) {
+                GOTO(out_destroy, rc);
+        }
+
+        loghandle->lgh_hdr->llh_cat_idx = index;
+        cathandle->u.chd.chd_current_log = loghandle;
+        LASSERT(list_empty(&loghandle->u.phd.phd_entry));
+        list_add_tail(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head);
+
+ out_destroy:
+        if (rc < 0)
+                llog_destroy(loghandle);
+
+        RETURN(loghandle);
+}
+EXPORT_SYMBOL(llog_cat_new_log);
+
+/* Open an existent log handle and add it to the open list.
+ * This log handle will be closed when all of the records in it are removed.
+ *
+ * Assumes caller has already pushed us into the kernel context and is locking.
+ * We return a lock on the handle to ensure nobody yanks it from us.
+ */
+int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res,
+                       struct llog_logid *logid)
+{
+        struct llog_handle *loghandle;
+        int rc = 0;
+        ENTRY;
+
+        if (cathandle == NULL)
+                RETURN(-EBADF);
+
+        list_for_each_entry(loghandle, &cathandle->u.chd.chd_head,
+                            u.phd.phd_entry) {
+                struct llog_logid *cgl = &loghandle->lgh_id;
+                if (cgl->lgl_oid == logid->lgl_oid) {
+                        if (cgl->lgl_ogen != logid->lgl_ogen) {
+                                CERROR("log "LPX64" generation %x != %x\n",
+                                       logid->lgl_oid, cgl->lgl_ogen,
+                                       logid->lgl_ogen);
+                                continue;
+                        }
+                        loghandle->u.phd.phd_cat_handle = cathandle;
+                        GOTO(out, rc = 0);
+                }
+        }
+
+        rc = llog_create(cathandle->lgh_ctxt, &loghandle, logid, NULL);
+        if (rc) {
+                CERROR("error opening log id "LPX64":%x: rc %d\n",
+                       logid->lgl_oid, logid->lgl_ogen, rc);
+        } else {
+                rc = llog_init_handle(loghandle, LLOG_F_IS_PLAIN, NULL);
+                if (!rc) {
+                        list_add(&loghandle->u.phd.phd_entry,
+                                 &cathandle->u.chd.chd_head);
+                }
+        }
+        if (!rc) {
+                loghandle->u.phd.phd_cat_handle = cathandle;
+                loghandle->u.phd.phd_cookie.lgc_lgl = cathandle->lgh_id;
+                loghandle->u.phd.phd_cookie.lgc_index = 
+                        loghandle->lgh_hdr->llh_cat_idx;
+        }
+
+out:
+        *res = loghandle;
+        RETURN(rc);
+}
+
+int llog_cat_put(struct llog_handle *cathandle)
+{
+        struct llog_handle *loghandle, *n;
+        int rc;
+        ENTRY;
+
+        list_for_each_entry_safe(loghandle, n, &cathandle->u.chd.chd_head,
+                                 u.phd.phd_entry) {
+                int err = llog_close(loghandle);
+                if (err)
+                        CERROR("error closing loghandle\n");
+        }
+        rc = llog_close(cathandle);
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_put);
+
+/* Return the currently active log handle.  If the current log handle doesn't
+ * have enough space left for the current record, start a new one.
+ *
+ * If reclen is 0, we only want to know what the currently active log is,
+ * otherwise we get a lock on this log so nobody can steal our space.
+ *
+ * Assumes caller has already pushed us into the kernel context and is locking.
+ *
+ * NOTE: loghandle is write-locked upon successful return
+ */
+static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle,
+                                                int create)
+{
+        struct llog_handle *loghandle = NULL;
+        ENTRY;
+
+        down_read(&cathandle->lgh_lock);
+        loghandle = cathandle->u.chd.chd_current_log;
+        if (loghandle) {
+                struct llog_log_hdr *llh = loghandle->lgh_hdr;
+                if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) {
+                        down_write(&loghandle->lgh_lock);
+                        up_read(&cathandle->lgh_lock);
+                        RETURN(loghandle);
+                }
+        }
+        if (!create) {
+                if (loghandle)
+                        down_write(&loghandle->lgh_lock);
+                up_read(&cathandle->lgh_lock);
+                RETURN(loghandle);
+        }
+        up_read(&cathandle->lgh_lock);
+
+        /* time to create new log */
+
+        /* first, we have to make sure the state hasn't changed */
+        down_write(&cathandle->lgh_lock);
+        loghandle = cathandle->u.chd.chd_current_log;
+        if (loghandle) {
+                struct llog_log_hdr *llh = loghandle->lgh_hdr;
+                if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) {
+                        down_write(&loghandle->lgh_lock);
+                        up_write(&cathandle->lgh_lock);
+                        RETURN(loghandle);
+                }
+        }
+
+        CDEBUG(D_INODE, "creating new log\n");
+        loghandle = llog_cat_new_log(cathandle);
+        if (!IS_ERR(loghandle))
+                down_write(&loghandle->lgh_lock);
+        up_write(&cathandle->lgh_lock);
+        RETURN(loghandle);
+}
+
+/* Add a single record to the recovery log(s) using a catalog
+ * Returns as llog_write_record
+ *
+ * Assumes caller has already pushed us into the kernel context.
+ */
+int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec,
+                     struct llog_cookie *reccookie, void *buf)
+{
+        struct llog_handle *loghandle;
+        int rc;
+        ENTRY;
+
+        LASSERT(rec->lrh_len <= LLOG_CHUNK_SIZE);
+        loghandle = llog_cat_current_log(cathandle, 1);
+        if (IS_ERR(loghandle))
+                RETURN(PTR_ERR(loghandle));
+        /* loghandle is already locked by llog_cat_current_log() for us */
+        rc = llog_write_rec(loghandle, rec, reccookie, 1, buf, -1);
+        up_write(&loghandle->lgh_lock);
+        if (rc == -ENOSPC) {
+                /* to create a new plain log */
+                loghandle = llog_cat_current_log(cathandle, 1);
+                if (IS_ERR(loghandle))
+                        RETURN(PTR_ERR(loghandle));
+                rc = llog_write_rec(loghandle, rec, reccookie, 1, buf, -1);
+                up_write(&loghandle->lgh_lock);
+        }
+
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_add_rec);
+
+/* For each cookie in the cookie array, we clear the log in-use bit and either:
+ * - the log is empty, so mark it free in the catalog header and delete it
+ * - the log is not empty, just write out the log header
+ *
+ * The cookies may be in different log files, so we need to get new logs
+ * each time.
+ *
+ * Assumes caller has already pushed us into the kernel context.
+ */
+int llog_cat_cancel_records(struct llog_handle *cathandle, int count,
+                        struct llog_cookie *cookies)
+{
+        int i, index, rc = 0;
+        ENTRY;
+
+        down_write(&cathandle->lgh_lock);
+        for (i = 0; i < count; i++, cookies++) {
+                struct llog_handle *loghandle;
+                struct llog_logid *lgl = &cookies->lgc_lgl;
+
+                rc = llog_cat_id2handle(cathandle, &loghandle, lgl);
+                if (rc) {
+                        CERROR("Cannot find log "LPX64"\n", lgl->lgl_oid);
+                        break;
+                }
+
+                down_write(&loghandle->lgh_lock);
+                rc = llog_cancel_rec(loghandle, cookies->lgc_index);
+                up_write(&loghandle->lgh_lock);
+
+                if (rc == 1) {          /* log has been destroyed */
+                        index = loghandle->u.phd.phd_cookie.lgc_index;
+                        if (cathandle->u.chd.chd_current_log == loghandle)
+                                cathandle->u.chd.chd_current_log = NULL;
+                        llog_free_handle(loghandle);
+
+                        LASSERT(index);
+                        llog_cat_set_first_idx(cathandle, index);
+                        rc = llog_cancel_rec(cathandle, index);
+                        if (rc == 0)
+                                CDEBUG(D_HA, "cancel plain log at index %u "
+                                       "of catalog "LPX64"\n",
+                                       index, cathandle->lgh_id.lgl_oid);
+                }
+        }
+        up_write(&cathandle->lgh_lock);
+
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_cancel_records);
+
+int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec,
+                        void *data)
+{
+        struct llog_process_data *d = data;
+        struct llog_logid_rec *lir = (struct llog_logid_rec *)rec;
+        struct llog_handle *llh;
+        int rc;
+
+        if (rec->lrh_type != LLOG_LOGID_MAGIC) {
+                CERROR("invalid record in catalog\n");
+                RETURN(-EINVAL);
+        }
+        CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n",
+               lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen,
+               rec->lrh_index, cat_llh->lgh_id.lgl_oid);
+
+        rc = llog_cat_id2handle(cat_llh, &llh, &lir->lid_id);
+        if (rc) {
+                CERROR("Cannot find handle for log "LPX64"\n",
+                       lir->lid_id.lgl_oid);
+                RETURN(rc);
+        }
+
+        rc = llog_process(llh, d->lpd_cb, d->lpd_data, NULL);
+        RETURN(rc);
+}
+
+int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data)
+{
+        struct llog_process_data d;
+        struct llog_process_cat_data cd;
+        struct llog_log_hdr *llh = cat_llh->lgh_hdr;
+        int rc;
+        ENTRY;
+
+        LASSERT(llh->llh_flags & LLOG_F_IS_CAT);
+        d.lpd_data = data;
+        d.lpd_cb = cb;
+
+        if (llh->llh_cat_idx > cat_llh->lgh_last_idx) {
+                CWARN("catlog "LPX64" crosses index zero\n",
+                      cat_llh->lgh_id.lgl_oid);
+
+                cd.first_idx = llh->llh_cat_idx;
+                cd.last_idx = 0;
+                rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd);
+                if (rc != 0)
+                        RETURN(rc);
+
+                cd.first_idx = 0;
+                cd.last_idx = cat_llh->lgh_last_idx;
+                rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd);
+        } else {
+                rc = llog_process(cat_llh, llog_cat_process_cb, &d, NULL);
+        }
+
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_process);
+
+int llog_cat_set_first_idx(struct llog_handle *cathandle, int index)
+{
+        struct llog_log_hdr *llh = cathandle->lgh_hdr;
+        int i, bitmap_size, idx;
+        ENTRY;
+
+        bitmap_size = sizeof(llh->llh_bitmap) * 8;
+        if (llh->llh_cat_idx == (index - 1)) {
+                idx = llh->llh_cat_idx + 1;
+                llh->llh_cat_idx = idx;
+                if (idx == cathandle->lgh_last_idx)
+                        goto out;
+                for (i = (index + 1) % bitmap_size;
+                     i != cathandle->lgh_last_idx;
+                     i = (i + 1) % bitmap_size) {
+                        if (!ext2_test_bit(i, llh->llh_bitmap)) {
+                                idx = llh->llh_cat_idx + 1;
+                                llh->llh_cat_idx = idx;
+                        } else if (i == 0) {
+                                llh->llh_cat_idx = 0;
+                        } else {
+                                break;
+                        }
+                }
+out:
+                CDEBUG(D_HA, "set catlog "LPX64" first idx %u\n",
+                       cathandle->lgh_id.lgl_oid, llh->llh_cat_idx);
+        }
+
+        RETURN(0);
+}
+
+#if 0
+/* Assumes caller has already pushed us into the kernel context. */
+int llog_cat_init(struct llog_handle *cathandle, struct obd_uuid *tgtuuid)
+{
+        struct llog_log_hdr *llh;
+        loff_t offset = 0;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(sizeof(*llh) == LLOG_CHUNK_SIZE);
+
+        down(&cathandle->lgh_lock);
+        llh = cathandle->lgh_hdr;
+
+        if (cathandle->lgh_file->f_dentry->d_inode->i_size == 0) {
+                llog_write_rec(cathandle, &llh->llh_hdr, NULL, 0, NULL, 0);
+
+write_hdr:
+                rc = lustre_fwrite(cathandle->lgh_file, llh, LLOG_CHUNK_SIZE,
+                                   &offset);
+                if (rc != LLOG_CHUNK_SIZE) {
+                        CERROR("error writing catalog header: rc %d\n", rc);
+                        OBD_FREE(llh, sizeof(*llh));
+                        if (rc >= 0)
+                                rc = -ENOSPC;
+                } else
+                        rc = 0;
+        } else {
+                rc = lustre_fread(cathandle->lgh_file, llh, LLOG_CHUNK_SIZE,
+                                  &offset);
+                if (rc != LLOG_CHUNK_SIZE) {
+                        CERROR("error reading catalog header: rc %d\n", rc);
+                        /* Can we do much else if the header is bad? */
+                        goto write_hdr;
+                } else
+                        rc = 0;
+        }
+
+        cathandle->lgh_tgtuuid = &llh->llh_tgtuuid;
+        up(&cathandle->lgh_lock);
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_cat_init);
+
+#endif
diff --git a/lustre/obdclass/llog_internal.h b/lustre/obdclass/llog_internal.h
new file mode 100644 (file)
index 0000000..0066087
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef __LLOG_INTERNAL_H__
+#define __LLOG_INTERNAL_H__
+
+int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
+                      char *name, int count, struct llog_catid *idarray);
+int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res,
+                       struct llog_logid *logid);
+#endif
diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c
new file mode 100644 (file)
index 0000000..9d99e9a
--- /dev/null
@@ -0,0 +1,765 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * OST<->MDS recovery logging infrastructure.
+ *
+ * Invariants in implementation:
+ * - we do not share logs among different OST<->MDS connections, so that
+ *   if an OST or MDS fails it need only look at log(s) relevant to itself
+ */
+
+#define DEBUG_SUBSYSTEM S_LOG
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+
+#ifdef __KERNEL__
+#include <linux/fs.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_log.h>
+#include <linux/obd_ost.h>
+#include <portals/list.h>
+#include <linux/lvfs.h>
+#include <linux/lustre_fsfilt.h>
+#include "llog_internal.h"
+
+#ifdef __KERNEL__
+
+static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file,
+                                int len, int index)
+{
+        struct llog_rec_hdr rec = { 0 };
+        struct llog_rec_tail tail;
+        int rc;
+        ENTRY;
+
+        LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0);
+
+        tail.lrt_len = rec.lrh_len = len;
+        tail.lrt_index = rec.lrh_index = index;
+        rec.lrh_type = 0;
+
+        rc = fsfilt_write_record(obd, file, &rec, sizeof(rec), &file->f_pos, 0);
+        if (rc) {
+                CERROR("error writing padding record: rc %d\n", rc);
+                goto out;
+        }
+
+        file->f_pos += len - sizeof(rec) - sizeof(tail);
+        rc = fsfilt_write_record(obd, file, &tail, sizeof(tail),&file->f_pos,0);
+        if (rc) {
+                CERROR("error writing padding record: rc %d\n", rc);
+                goto out;
+        }
+
+ out:
+        RETURN(rc);
+}
+
+static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file,
+                                struct llog_rec_hdr *rec, void *buf, loff_t off)
+{
+        int rc;
+        struct llog_rec_tail end;
+        loff_t saved_off = file->f_pos;
+        int buflen = rec->lrh_len;
+
+        ENTRY;
+        file->f_pos = off;
+
+        if (!buf) {
+                rc = fsfilt_write_record(obd, file, rec, buflen,&file->f_pos,0);
+                if (rc) {
+                        CERROR("error writing log record: rc %d\n", rc);
+                        goto out;
+                }
+                GOTO(out, rc = 0);
+        }
+
+        /* the buf case */
+        rec->lrh_len = sizeof(*rec) + buflen + sizeof(end);
+        rc = fsfilt_write_record(obd, file, rec, sizeof(*rec), &file->f_pos, 0);
+        if (rc) {
+                CERROR("error writing log hdr: rc %d\n", rc);
+                goto out;
+        }
+
+        rc = fsfilt_write_record(obd, file, buf, buflen, &file->f_pos, 0);
+        if (rc) {
+                CERROR("error writing log buffer: rc %d\n", rc);
+                goto out;
+        }
+
+        end.lrt_len = rec->lrh_len;
+        end.lrt_index = rec->lrh_index;
+        rc = fsfilt_write_record(obd, file, &end, sizeof(end), &file->f_pos, 0);
+        if (rc) {
+                CERROR("error writing log tail: rc %d\n", rc);
+                goto out;
+        }
+
+        rc = 0;
+ out:
+        if (saved_off > file->f_pos)
+                file->f_pos = saved_off;
+        LASSERT(rc <= 0);
+        RETURN(rc);
+}
+
+static int llog_lvfs_read_blob(struct obd_device *obd, struct l_file *file,
+                                void *buf, int size, loff_t off)
+{
+        loff_t offset = off;
+        int rc;
+        ENTRY;
+
+        rc = fsfilt_read_record(obd, file, buf, size, &offset);
+        if (rc) {
+                CERROR("error reading log record: rc %d\n", rc);
+                RETURN(rc);
+        }
+        RETURN(0);
+}
+
+static int llog_lvfs_read_header(struct llog_handle *handle)
+{
+        struct obd_device *obd;
+        int rc;
+        ENTRY;
+
+        LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE);
+
+        obd = handle->lgh_ctxt->loc_exp->exp_obd;
+
+        if (handle->lgh_file->f_dentry->d_inode->i_size == 0) {
+                CDEBUG(D_HA, "not reading header from 0-byte log\n");
+                RETURN(LLOG_EEMPTY);
+        }
+
+        rc = llog_lvfs_read_blob(obd, handle->lgh_file, handle->lgh_hdr,
+                                 LLOG_CHUNK_SIZE, 0);
+        if (rc) {
+                CERROR("error reading log header from %*s\n",
+                       handle->lgh_file->f_dentry->d_name.len,
+                       handle->lgh_file->f_dentry->d_name.name);
+        } else {
+                struct llog_rec_hdr *llh_hdr = &handle->lgh_hdr->llh_hdr;
+                /*
+                 * These need to be fixed for bug 1987
+                 */
+                if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) {
+                        CERROR("bad log %*s header magic: %#x (expected %#x)\n",
+                               handle->lgh_file->f_dentry->d_name.len,
+                               handle->lgh_file->f_dentry->d_name.name,
+                               llh_hdr->lrh_type, LLOG_HDR_MAGIC);
+                        rc = -EIO;
+                } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) {
+                        CERROR("incorrectly sized log %*s header: %#x "
+                               "(expected %#x)\n",
+                               handle->lgh_file->f_dentry->d_name.len,
+                               handle->lgh_file->f_dentry->d_name.name,
+                               llh_hdr->lrh_len, LLOG_CHUNK_SIZE);
+                        CERROR("you may need to re-run lconf --write_conf.\n");
+                        rc = -EIO;
+                }
+        }
+
+        handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index;
+        handle->lgh_file->f_pos = handle->lgh_file->f_dentry->d_inode->i_size;
+
+        RETURN(rc);
+}
+
+/* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */
+/* appends if idx == -1, otherwise overwrites record idx. */
+static int llog_lvfs_write_rec(struct llog_handle *loghandle,
+                               struct llog_rec_hdr *rec,
+                               struct llog_cookie *reccookie, int cookiecount,
+                               void *buf, int idx)
+{
+        struct llog_log_hdr *llh;
+        int reclen = rec->lrh_len, index, rc;
+        struct llog_rec_tail *lrt;
+        struct obd_device *obd;
+        struct file *file;
+        size_t left;
+        ENTRY;
+
+        llh = loghandle->lgh_hdr;
+        file = loghandle->lgh_file;
+        obd = loghandle->lgh_ctxt->loc_exp->exp_obd;
+
+        /* record length should not bigger than LLOG_CHUNK_SIZE */
+        if (buf)
+                rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) -
+                      sizeof(struct llog_rec_tail)) ? -E2BIG : 0;
+        else
+                rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0;
+        if (rc)
+                RETURN(rc);
+
+        if (idx != -1) {
+                loff_t saved_offset;
+
+                /* no header: only allowed to insert record 1 */
+                if (idx != 1 && !file->f_dentry->d_inode->i_size) {
+                        CERROR("idx != -1 in empty log\n");
+                        LBUG();
+                }
+
+                if (idx && llh->llh_size && llh->llh_size != reclen)
+                        RETURN(-EINVAL);
+
+                rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
+                /* we are done if we only write the header or on error */
+                if (rc || idx == 0)
+                        RETURN(rc);
+
+                saved_offset = sizeof(*llh) + (idx-1)*rec->lrh_len;
+                rc = llog_lvfs_write_blob(obd, file, rec, buf, saved_offset);
+                if (rc == 0 && reccookie) {
+                        reccookie->lgc_lgl = loghandle->lgh_id;
+                        reccookie->lgc_index = idx;
+                        rc = 1;
+                }
+                RETURN(rc);
+        }
+
+        /* Make sure that records don't cross a chunk boundary, so we can
+         * process them page-at-a-time if needed.  If it will cross a chunk
+         * boundary, write in a fake (but referenced) entry to pad the chunk.
+         *
+         * We know that llog_current_log() will return a loghandle that is
+         * big enough to hold reclen, so all we care about is padding here.
+         */
+        left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1));
+        if (buf)
+                reclen = sizeof(*rec) + rec->lrh_len + 
+                        sizeof(struct llog_rec_tail);
+
+        /* NOTE: padding is a record, but no bit is set */
+        if (left != 0 && left != reclen &&
+            left < (reclen + LLOG_MIN_REC_SIZE)) {
+                int bitmap_size = sizeof(llh->llh_bitmap) * 8;
+                loghandle->lgh_last_idx++;
+                rc = llog_lvfs_pad(obd, file, left, loghandle->lgh_last_idx);
+                if (rc)
+                        RETURN(rc);
+                /* if it's the last idx in log file, then return -ENOSPC */
+                if (loghandle->lgh_last_idx == bitmap_size - 1)
+                        RETURN(-ENOSPC);
+        }
+
+        loghandle->lgh_last_idx++;
+        index = loghandle->lgh_last_idx;
+        rec->lrh_index = index;
+        if (buf == NULL) {
+                lrt = (struct llog_rec_tail *)
+                        ((char *)rec + rec->lrh_len - sizeof(*lrt));
+                lrt->lrt_len = rec->lrh_len;
+                lrt->lrt_index = rec->lrh_index;
+        }
+        if (ext2_set_bit(index, llh->llh_bitmap)) {
+                CERROR("argh, index %u already set in log bitmap?\n", index);
+                LBUG(); /* should never happen */
+        }
+        llh->llh_count++;
+        llh->llh_tail.lrt_index = index;
+
+        rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0);
+        if (rc)
+                RETURN(rc);
+
+        rc = llog_lvfs_write_blob(obd, file, rec, buf, file->f_pos);
+        if (rc)
+                RETURN(rc);
+
+        CDEBUG(D_HA, "added record "LPX64": idx: %u, %u bytes\n",
+               loghandle->lgh_id.lgl_oid, index, rec->lrh_len);
+        if (rc == 0 && reccookie) {
+                reccookie->lgc_lgl = loghandle->lgh_id;
+                reccookie->lgc_index = index;
+                if (rec->lrh_type == MDS_UNLINK_REC)
+                        reccookie->lgc_subsys = LLOG_UNLINK_ORIG_CTXT;
+                else if (rec->lrh_type == OST_SZ_REC)
+                        reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT;
+                else if (rec->lrh_type == OST_RAID1_REC)
+                        reccookie->lgc_subsys = LLOG_RD1_ORIG_CTXT;
+                else
+                        reccookie->lgc_subsys = -1;
+                rc = 1;
+        }
+        if (rc == 0 && rec->lrh_type == LLOG_GEN_REC)
+                rc = 1;
+
+        RETURN(rc);
+}
+
+/* We can skip reading at least as many log blocks as the number of
+* minimum sized log records we are skipping.  If it turns out
+* that we are not far enough along the log (because the
+* actual records are larger than minimum size) we just skip
+* some more records. */
+
+static void llog_skip_over(__u64 *off, int curr, int goal)
+{
+        if (goal <= curr)
+                return;
+        *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) &
+                ~(LLOG_CHUNK_SIZE - 1);
+}
+
+
+/* sets:
+ *  - cur_offset to the furthest point read in the log file
+ *  - cur_idx to the log index preceeding cur_offset
+ * returns -EIO/-EINVAL on error
+ */
+static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx,
+                                int next_idx, __u64 *cur_offset, void *buf,
+                                int len)
+{
+        int rc;
+        ENTRY;
+
+        if (len == 0 || len & (LLOG_CHUNK_SIZE - 1))
+                RETURN(-EINVAL);
+
+        CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n",
+               next_idx, *cur_idx, *cur_offset);
+
+        while (*cur_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) {
+                struct llog_rec_hdr *rec;
+                struct llog_rec_tail *tail;
+                loff_t ppos;
+
+                llog_skip_over(cur_offset, *cur_idx, next_idx);
+
+                ppos = *cur_offset;
+                rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd,
+                                        loghandle->lgh_file, buf, len,
+                                        &ppos);
+
+                if (rc) {
+                        CERROR("Cant read llog block at log id "LPU64
+                               "/%u offset "LPU64"\n",
+                               loghandle->lgh_id.lgl_oid,
+                               loghandle->lgh_id.lgl_ogen,
+                               *cur_offset);
+                        RETURN(rc);
+                }
+
+                /* put number of bytes read into rc to make code simpler */
+                rc = ppos - *cur_offset;
+                *cur_offset = ppos;
+
+                if (rc == 0) /* end of file, nothing to do */
+                        RETURN(0);
+
+                if (rc < sizeof(*tail)) {
+                        CERROR("Invalid llog block at log id "LPU64"/%u offset "
+                               LPU64"\n", loghandle->lgh_id.lgl_oid,
+                               loghandle->lgh_id.lgl_ogen, *cur_offset);
+                        RETURN(-EINVAL);
+                }
+
+                tail = buf + rc - sizeof(struct llog_rec_tail);
+                *cur_idx = tail->lrt_index;
+
+                /* this shouldn't happen */
+                if (tail->lrt_index == 0) {
+                        CERROR("Invalid llog tail at log id "LPU64"/%u offset "
+                               LPU64"\n", loghandle->lgh_id.lgl_oid,
+                               loghandle->lgh_id.lgl_ogen, *cur_offset);
+                        RETURN(-EINVAL);
+                }
+                if (tail->lrt_index < next_idx)
+                        continue;
+
+                /* sanity check that the start of the new buffer is no farther
+                 * than the record that we wanted.  This shouldn't happen. */
+                rec = buf;
+                if (rec->lrh_index > next_idx) {
+                        CERROR("missed desired record? %u > %u\n",
+                               rec->lrh_index, next_idx);
+                        RETURN(-ENOENT);
+                }
+                RETURN(0);
+        }
+        RETURN(-EIO);
+}
+
+static struct file *llog_filp_open(char *name, int flags, int mode)
+{
+        char *logname;
+        struct file *filp;
+        int len;
+
+        OBD_ALLOC(logname, PATH_MAX);
+        if (logname == NULL)
+                return ERR_PTR(-ENOMEM);
+
+        len = snprintf(logname, PATH_MAX, "LOGS/%s", name);
+        if (len >= PATH_MAX - 1) {
+                filp = ERR_PTR(-ENAMETOOLONG);
+        } else {
+                filp = l_filp_open(logname, flags, mode);
+                if (IS_ERR(filp))
+                        CERROR("logfile creation %s: %ld\n", logname,
+                               PTR_ERR(filp));
+        }
+
+        OBD_FREE(logname, PATH_MAX);
+        return filp;
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res,
+                            struct llog_logid *logid, char *name)
+{
+        struct llog_handle *handle;
+        struct obd_device *obd;
+        struct l_dentry *dchild = NULL;
+        struct obdo *oa = NULL;
+        int rc = 0, cleanup_phase = 1;
+        int open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
+        ENTRY;
+
+        handle = llog_alloc_handle();
+        if (handle == NULL)
+                RETURN(-ENOMEM);
+        *res = handle;
+
+        LASSERT(ctxt);
+        LASSERT(ctxt->loc_exp);
+        obd = ctxt->loc_exp->exp_obd;
+
+        if (logid != NULL) {
+                dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, logid->lgl_oid,
+                                             logid->lgl_ogen, logid->lgl_ogr);
+
+                if (IS_ERR(dchild)) {
+                        rc = PTR_ERR(dchild);
+                        CERROR("error looking up logfile "LPX64":0x%x: rc %d\n",
+                               logid->lgl_oid, logid->lgl_ogen, rc);
+                        GOTO(cleanup, rc);
+                }
+
+                cleanup_phase = 2;
+                if (dchild->d_inode == NULL) {
+                        rc = -ENOENT;
+                        CERROR("nonexistent log file "LPX64":"LPX64": rc %d\n",
+                               logid->lgl_oid, logid->lgl_ogr, rc);
+                        GOTO(cleanup, rc);
+                }
+
+                handle->lgh_file = l_dentry_open(&obd->obd_ctxt, dchild,
+                                                    O_RDWR | O_LARGEFILE);
+                if (IS_ERR(handle->lgh_file)) {
+                        rc = PTR_ERR(handle->lgh_file);
+                        CERROR("error opening logfile "LPX64"0x%x: rc %d\n",
+                               logid->lgl_oid, logid->lgl_ogen, rc);
+                        GOTO(cleanup, rc);
+                }
+
+                /* assign the value of lgh_id for handle directly */
+                handle->lgh_id = *logid;
+
+        } else if (name) {
+                handle->lgh_file = llog_filp_open(name, open_flags, 0644);
+                if (IS_ERR(handle->lgh_file))
+                        GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
+
+                handle->lgh_id.lgl_ogr = 1;
+                handle->lgh_id.lgl_oid =
+                        handle->lgh_file->f_dentry->d_inode->i_ino;
+                handle->lgh_id.lgl_ogen =
+                        handle->lgh_file->f_dentry->d_inode->i_generation;
+        } else {
+                oa = obdo_alloc();
+                if (oa == NULL)
+                        GOTO(cleanup, rc = -ENOMEM);
+                /* XXX get some filter group constants */
+                oa->o_gr = 1;
+                oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP;
+                rc = obd_create(ctxt->loc_exp, oa, NULL, NULL);
+                if (rc)
+                        GOTO(cleanup, rc);
+
+                dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, oa->o_id,
+                                             oa->o_generation, oa->o_gr);
+
+                if (IS_ERR(dchild))
+                        GOTO(cleanup, rc = PTR_ERR(dchild));
+                cleanup_phase = 2;
+                handle->lgh_file = l_dentry_open(&obd->obd_ctxt, dchild,
+                                                 open_flags);
+                if (IS_ERR(handle->lgh_file))
+                        GOTO(cleanup, rc = PTR_ERR(handle->lgh_file));
+
+                handle->lgh_id.lgl_ogr = oa->o_gr;
+                handle->lgh_id.lgl_oid = oa->o_id;
+                handle->lgh_id.lgl_ogen = oa->o_generation;
+        }
+
+        handle->lgh_ctxt = ctxt;
+ finish:
+        if (oa)
+                obdo_free(oa);
+        RETURN(rc);
+cleanup:
+        switch (cleanup_phase) {
+        case 2:
+                l_dput(dchild);
+        case 1:
+                llog_free_handle(handle);
+        }
+        goto finish;
+}
+
+static int llog_lvfs_close(struct llog_handle *handle)
+{
+        int rc;
+        ENTRY;
+
+        rc = filp_close(handle->lgh_file, 0);
+        if (rc)
+                CERROR("error closing log: rc %d\n", rc);
+        RETURN(rc);
+}
+
+static int llog_lvfs_destroy(struct llog_handle *handle)
+{
+        struct dentry *fdentry;
+        struct obdo *oa;
+        int rc;
+        ENTRY;
+
+        fdentry = handle->lgh_file->f_dentry;
+        if (!strcmp(fdentry->d_parent->d_name.name, "LOGS")) {
+                struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd;
+                struct inode *inode = fdentry->d_parent->d_inode;
+                struct obd_run_ctxt saved;
+
+                push_ctxt(&saved, &obd->obd_ctxt, NULL);
+                dget(fdentry);
+                rc = llog_lvfs_close(handle);
+
+                if (rc == 0) {
+                        down(&inode->i_sem);
+                        rc = vfs_unlink(inode, fdentry);
+                        up(&inode->i_sem);
+                }
+
+                dput(fdentry);
+                pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+                RETURN(rc);
+        }
+
+        oa = obdo_alloc();
+        if (oa == NULL)
+                RETURN(-ENOMEM);
+
+        oa->o_id = handle->lgh_id.lgl_oid;
+        oa->o_gr = handle->lgh_id.lgl_ogr;
+        oa->o_generation = handle->lgh_id.lgl_ogen;
+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER;
+
+        rc = llog_lvfs_close(handle);
+        if (rc)
+                GOTO(out, rc);
+
+        rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL);
+ out:
+        obdo_free(oa);
+        RETURN(rc);
+}
+
+/* reads the catalog list */
+int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
+                      char *name, int count, struct llog_catid *idarray)
+{
+        struct obd_run_ctxt saved;
+        struct l_file *file;
+        int rc;
+        int size = sizeof(*idarray) * count;
+        loff_t off = 0;
+
+        LASSERT(count);
+
+        push_ctxt(&saved, &obd->obd_ctxt, NULL);
+        file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
+        if (!file || IS_ERR(file)) {
+                rc = PTR_ERR(file);
+                CERROR("OBD filter: cannot open/create %s: rc = %d\n",
+                       name, rc);
+                GOTO(out, rc);
+        }
+
+        if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+                CERROR("%s is not a regular file!: mode = %o\n", name,
+                       file->f_dentry->d_inode->i_mode);
+                GOTO(out, rc = -ENOENT);
+        }
+
+        rc = fsfilt_read_record(disk_obd, file, idarray, size, &off);
+        if (rc) {
+                CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
+                       name, rc);
+                GOTO(out, rc);
+        }
+
+ out:
+        pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+        if (file && !IS_ERR(file))
+                rc = filp_close(file, 0);
+        RETURN(rc);
+}
+EXPORT_SYMBOL(llog_get_cat_list);
+
+/* writes the cat list */
+int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
+                      char *name, int count, struct llog_catid *idarray)
+{
+        struct obd_run_ctxt saved;
+        struct l_file *file;
+        int rc;
+        int size = sizeof(*idarray) * count;
+        loff_t off = 0;
+
+        LASSERT(count);
+
+        push_ctxt(&saved, &obd->obd_ctxt, NULL);
+        file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
+        if (!file || IS_ERR(file)) {
+                rc = PTR_ERR(file);
+                CERROR("OBD filter: cannot open/create %s: rc = %d\n",
+                       name, rc);
+                GOTO(out, rc);
+        }
+
+        if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+                CERROR("%s is not a regular file!: mode = %o\n", name,
+                       file->f_dentry->d_inode->i_mode);
+                GOTO(out, rc = -ENOENT);
+        }
+
+        rc = fsfilt_write_record(disk_obd, file, idarray, size, &off, 1);
+        if (rc) {
+                CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
+                       name, rc);
+                GOTO(out, rc);
+        }
+
+ out:
+        pop_ctxt(&saved, &obd->obd_ctxt, NULL);
+        if (file && !IS_ERR(file))
+                rc = filp_close(file, 0);
+        RETURN(rc);
+}
+
+struct llog_operations llog_lvfs_ops = {
+        lop_write_rec:   llog_lvfs_write_rec,
+        lop_next_block:  llog_lvfs_next_block,
+        lop_read_header: llog_lvfs_read_header,
+        lop_create:      llog_lvfs_create,
+        lop_destroy:     llog_lvfs_destroy,
+        lop_close:       llog_lvfs_close,
+        //        lop_cancel: llog_lvfs_cancel,
+};
+
+EXPORT_SYMBOL(llog_lvfs_ops);
+
+#else /* !__KERNEL__ */
+
+static int llog_lvfs_read_header(struct llog_handle *handle)
+{
+        LBUG();
+        return 0;
+}
+
+static int llog_lvfs_write_rec(struct llog_handle *loghandle,
+                               struct llog_rec_hdr *rec,
+                               struct llog_cookie *reccookie, int cookiecount,
+                               void *buf, int idx)
+{
+        LBUG();
+        return 0;
+}
+
+static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx,
+                                int next_idx, __u64 *cur_offset, void *buf,
+                                int len)
+{
+        LBUG();
+        return 0;
+}
+
+static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res,
+                            struct llog_logid *logid, char *name)
+{
+        LBUG();
+        return 0;
+}
+
+static int llog_lvfs_close(struct llog_handle *handle)
+{
+        LBUG();
+        return 0;
+}
+
+static int llog_lvfs_destroy(struct llog_handle *handle)
+{
+        LBUG();
+        return 0;
+}
+
+int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
+                      char *name, int count, struct llog_catid *idarray)
+{
+        LBUG();
+        return 0;
+}
+
+int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
+                      char *name, int count, struct llog_catid *idarray)
+{
+        LBUG();
+        return 0;
+}
+
+struct llog_operations llog_lvfs_ops = {
+        lop_write_rec:   llog_lvfs_write_rec,
+        lop_next_block:  llog_lvfs_next_block,
+        lop_read_header: llog_lvfs_read_header,
+        lop_create:      llog_lvfs_create,
+        lop_destroy:     llog_lvfs_destroy,
+        lop_close:       llog_lvfs_close,
+//        lop_cancel:      llog_lvfs_cancel,
+};
+#endif