Whamcloud - gitweb
LU-8066 obd_type: discard obd_types linked list.
[fs/lustre-release.git] / lustre / mdd / mdd_lproc.c
index d675478..829afc7 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
  * Author: Wang Di <wangdi@clusterfs.com>
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #define DEBUG_SUBSYSTEM S_MDS
 
-#include <linux/module.h>
 #include <obd.h>
 #include <obd_class.h>
-#include <lustre_ver.h>
 #include <obd_support.h>
 #include <lprocfs_status.h>
-#include <lu_time.h>
-#include <lustre_log.h>
-#include <lustre/lustre_idl.h>
 #include <libcfs/libcfs_string.h>
-
 #include "mdd_internal.h"
 
-#ifndef SEEK_CUR /* SLES10 needs this */
-#define SEEK_CUR        1
-#define SEEK_END        2
-#endif
-
-static const char *mdd_counter_names[LPROC_MDD_NR] = {
-};
+static ssize_t uuid_show(struct kobject *kobj, struct attribute *attr,
+                        char *buf)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       struct obd_device *obd = mdd2obd_dev(mdd);
 
-/* from LPROC_SEQ_FOPS(mdd_changelog) below */
-extern struct file_operations mdd_changelog_fops;
+       return sprintf(buf, "%s\n", obd->obd_uuid.uuid);
+}
+LUSTRE_RO_ATTR(uuid);
 
-int mdd_procfs_init(struct mdd_device *mdd, const char *name)
+static ssize_t atime_diff_show(struct kobject *kobj, struct attribute *attr,
+                              char *buf)
 {
-        struct lprocfs_static_vars lvars;
-        struct lu_device    *ld = &mdd->mdd_md_dev.md_lu_dev;
-        struct obd_type     *type;
-        int                  rc;
-        ENTRY;
-
-        type = ld->ld_type->ldt_obd_type;
-
-        LASSERT(name != NULL);
-        LASSERT(type != NULL);
-
-        /* Find the type procroot and add the proc entry for this device */
-        lprocfs_mdd_init_vars(&lvars);
-        mdd->mdd_proc_entry = lprocfs_register(name, type->typ_procroot,
-                                               lvars.obd_vars, mdd);
-        if (IS_ERR(mdd->mdd_proc_entry)) {
-                rc = PTR_ERR(mdd->mdd_proc_entry);
-                CERROR("Error %d setting up lprocfs for %s\n",
-                       rc, name);
-                mdd->mdd_proc_entry = NULL;
-                GOTO(out, rc);
-        }
-
-        rc = lu_time_init(&mdd->mdd_stats,
-                          mdd->mdd_proc_entry,
-                          mdd_counter_names, ARRAY_SIZE(mdd_counter_names));
-
-        EXIT;
-out:
-        if (rc)
-               mdd_procfs_fini(mdd);
-        return rc;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+
+       return sprintf(buf, "%lld\n", mdd->mdd_atime_diff);
 }
 
-int mdd_procfs_fini(struct mdd_device *mdd)
+static ssize_t atime_diff_store(struct kobject *kobj,
+                               struct attribute *attr,
+                               const char *buffer, size_t count)
 {
-        if (mdd->mdd_stats)
-                lu_time_fini(&mdd->mdd_stats);
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       time64_t diff = 0;
+       int rc;
+
+       rc = kstrtoll(buffer, 10, &diff);
+       if (rc)
+               return rc;
 
-        if (mdd->mdd_proc_entry) {
-                 lprocfs_remove(&mdd->mdd_proc_entry);
-                 mdd->mdd_proc_entry = NULL;
-        }
-        RETURN(0);
+        mdd->mdd_atime_diff = diff;
+        return count;
 }
+LUSTRE_RW_ATTR(atime_diff);
 
-void mdd_lprocfs_time_start(const struct lu_env *env)
+/**** changelogs ****/
+static int mdd_changelog_mask_seq_show(struct seq_file *m, void *data)
 {
-        lu_lprocfs_time_start(env);
+       struct mdd_device *mdd = m->private;
+       int i = 0;
+
+       while (i < CL_LAST) {
+               if (mdd->mdd_cl.mc_mask & (1 << i))
+                       seq_printf(m, "%s ", changelog_type2str(i));
+               i++;
+       }
+       seq_putc(m, '\n');
+       return 0;
 }
 
-void mdd_lprocfs_time_end(const struct lu_env *env, struct mdd_device *mdd,
-                          int idx)
+static ssize_t
+mdd_changelog_mask_seq_write(struct file *file, const char __user *buffer,
+                            size_t count, loff_t *off)
 {
-        lu_lprocfs_time_end(env, mdd->mdd_stats, idx);
+       struct seq_file *m = file->private_data;
+       struct mdd_device *mdd = m->private;
+       char *kernbuf;
+       int rc;
+       ENTRY;
+
+       if (count >= PAGE_SIZE)
+               RETURN(-EINVAL);
+       OBD_ALLOC(kernbuf, PAGE_SIZE);
+       if (kernbuf == NULL)
+               RETURN(-ENOMEM);
+       if (copy_from_user(kernbuf, buffer, count))
+               GOTO(out, rc = -EFAULT);
+       kernbuf[count] = 0;
+
+       rc = cfs_str2mask(kernbuf, changelog_type2str, &mdd->mdd_cl.mc_mask,
+                         CHANGELOG_MINMASK, CHANGELOG_ALLMASK);
+       if (rc == 0)
+               rc = count;
+out:
+       OBD_FREE(kernbuf, PAGE_SIZE);
+       return rc;
 }
+LDEBUGFS_SEQ_FOPS(mdd_changelog_mask);
 
-static int lprocfs_wr_atime_diff(struct file *file, const char *buffer,
-                                 unsigned long count, void *data)
+static int lprocfs_changelog_users_cb(const struct lu_env *env,
+                                     struct llog_handle *llh,
+                                     struct llog_rec_hdr *hdr, void *data)
 {
-        struct mdd_device *mdd = data;
-        char kernbuf[20], *end;
-        unsigned long diff = 0;
+       struct llog_changelog_user_rec *rec;
+       struct seq_file *m = data;
 
-        if (count > (sizeof(kernbuf) - 1))
-                return -EINVAL;
+       LASSERT(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN);
 
-        if (copy_from_user(kernbuf, buffer, count))
-                return -EFAULT;
+       rec = (struct llog_changelog_user_rec *)hdr;
 
-        kernbuf[count] = '\0';
+       seq_printf(m, CHANGELOG_USER_PREFIX"%-3d %llu (%u)\n",
+                  rec->cur_id, rec->cur_endrec, (__u32)get_seconds() -
+                                                rec->cur_time);
+       return 0;
+}
 
-        diff = simple_strtoul(kernbuf, &end, 0);
-        if (kernbuf == end)
-                return -EINVAL;
+static int mdd_changelog_users_seq_show(struct seq_file *m, void *data)
+{
+       struct lu_env            env;
+       struct mdd_device       *mdd = m->private;
+       struct llog_ctxt        *ctxt;
+       __u64                    cur;
+       int                      rc;
+
+        ctxt = llog_get_context(mdd2obd_dev(mdd),
+                               LLOG_CHANGELOG_USER_ORIG_CTXT);
+        if (ctxt == NULL)
+                return -ENXIO;
+        LASSERT(ctxt->loc_handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT);
 
-        mdd->mdd_atime_diff = diff;
-        return count;
+       rc = lu_env_init(&env, LCT_LOCAL);
+       if (rc) {
+               llog_ctxt_put(ctxt);
+               return rc;
+       }
+
+       spin_lock(&mdd->mdd_cl.mc_lock);
+       cur = mdd->mdd_cl.mc_index;
+       spin_unlock(&mdd->mdd_cl.mc_lock);
+
+       seq_printf(m, "current index: %llu\n", cur);
+       seq_printf(m, "%-5s %s %s\n", "ID", "index", "(idle seconds)");
+
+       llog_cat_process(&env, ctxt->loc_handle, lprocfs_changelog_users_cb,
+                        m, 0, 0);
+
+       lu_env_fini(&env);
+       llog_ctxt_put(ctxt);
+       return 0;
 }
+LDEBUGFS_SEQ_FOPS_RO(mdd_changelog_users);
 
-static int lprocfs_rd_atime_diff(char *page, char **start, off_t off,
-                                 int count, int *eof, void *data)
+static int mdd_changelog_size_ctxt(const struct lu_env *env,
+                                  struct mdd_device *mdd,
+                                  int index, __u64 *val)
 {
-        struct mdd_device *mdd = data;
+       struct llog_ctxt        *ctxt;
 
-        *eof = 1;
-        return snprintf(page, count, "%lu\n", mdd->mdd_atime_diff);
-}
+       ctxt = llog_get_context(mdd2obd_dev(mdd),
+                               index);
+       if (ctxt == NULL)
+               return -ENXIO;
+
+       if (!(ctxt->loc_handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT)) {
+               CERROR("%s: ChangeLog has wrong flags: rc = %d\n",
+                      ctxt->loc_obd->obd_name, -EINVAL);
+               llog_ctxt_put(ctxt);
+               return -EINVAL;
+       }
 
-/* match enum changelog_rec_type */
-static const char *changelog_str[] = {"MARK","CREAT","MKDIR","HLINK","SLINK",
-        "MKNOD","UNLNK","RMDIR","RNMFM","RNMTO","OPEN","CLOSE","IOCTL",
-        "TRUNC","SATTR","XATTR"};
+       *val += llog_cat_size(env, ctxt->loc_handle);
 
-const char *changelog_bit2str(int bit)
+       llog_ctxt_put(ctxt);
+
+       return 0;
+}
+
+static ssize_t changelog_size_show(struct kobject *kobj,
+                                  struct attribute *attr,
+                                  char *buf)
 {
-        if (bit < CL_LAST)
-                return changelog_str[bit];
-        return NULL;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       struct lu_env env;
+       u64 tmp = 0;
+       int rc;
+
+       rc = lu_env_init(&env, LCT_LOCAL);
+       if (rc)
+               return rc;
+
+       rc = mdd_changelog_size_ctxt(&env, mdd, LLOG_CHANGELOG_ORIG_CTXT, &tmp);
+       if (rc) {
+               lu_env_fini(&env);
+               return rc;
+       }
+
+       rc = mdd_changelog_size_ctxt(&env, mdd, LLOG_CHANGELOG_USER_ORIG_CTXT,
+                                    &tmp);
+
+       rc = sprintf(buf, "%llu\n", tmp);
+       lu_env_fini(&env);
+       return rc;
 }
+LUSTRE_RO_ATTR(changelog_size);
 
-static int lprocfs_rd_cl_mask(char *page, char **start, off_t off,
-                              int count, int *eof, void *data)
+static ssize_t changelog_gc_show(struct kobject *kobj,
+                                struct attribute *attr,
+                                char *buf)
 {
-        struct mdd_device *mdd = data;
-        int i = 0, rc = 0;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
 
-        *eof = 1;
-        while (i < CL_LAST) {
-                if (mdd->mdd_cl.mc_mask & (1 << i))
-                        rc += snprintf(page + rc, count - rc, "%s ",
-                                       changelog_str[i]);
-                i++;
-        }
-        return rc;
+       return sprintf(buf, "%u\n", mdd->mdd_changelog_gc);
 }
 
-static int lprocfs_wr_cl_mask(struct file *file, const char *buffer,
-                              unsigned long count, void *data)
+static ssize_t changelog_gc_store(struct kobject *kobj,
+                                 struct attribute *attr,
+                                 const char *buffer, size_t count)
 {
-        struct mdd_device *mdd = data;
-        char *kernbuf;
-        int rc;
-        ENTRY;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       bool val;
+       int rc;
 
-        if (count >= CFS_PAGE_SIZE)
-                RETURN(-EINVAL);
-        OBD_ALLOC(kernbuf, CFS_PAGE_SIZE);
-        if (kernbuf == NULL)
-                RETURN(-ENOMEM);
-        if (copy_from_user(kernbuf, buffer, count))
-                GOTO(out, rc = -EFAULT);
-        kernbuf[count] = 0;
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
 
-        rc = libcfs_str2mask(kernbuf, changelog_bit2str,
-                             &mdd->mdd_cl.mc_mask, CL_MINMASK, CL_ALLMASK);
-        if (rc == 0)
-                rc = count;
-out:
-        OBD_FREE(kernbuf, CFS_PAGE_SIZE);
-        return rc;
-}
-
-/** struct for holding changelog data for seq_file processing */
-struct cl_seq_iter {
-        struct mdd_device *csi_mdd;
-        __u64 csi_startrec;
-        __u64 csi_endrec;
-        loff_t csi_pos;
-        int csi_wrote;
-        int csi_startcat;
-        int csi_startidx;
-        int csi_fill:1;
-};
+       mdd->mdd_changelog_gc = val;
 
-/* non-seq version for direct calling by class_process_proc_param */
-static int lprocfs_wr_cl(struct file *file, const char *buffer,
-                         unsigned long count, void *data)
-{
-        struct mdd_device *mdd = (struct mdd_device *)data;
-        char kernbuf[32];
-        char *end;
-        int rc;
-
-        if (count > (sizeof(kernbuf) - 1))
-                goto out_usage;
-
-        count = min_t(unsigned long, count, sizeof(kernbuf));
-        if (copy_from_user(kernbuf, buffer, count))
-                return -EFAULT;
-
-        kernbuf[count] = '\0';
-        /* strip trailing newline from "echo blah" */
-        if (kernbuf[count - 1] == '\n')
-                kernbuf[count - 1] = '\0';
-
-        if (strcmp(kernbuf, "on") == 0) {
-                LCONSOLE_INFO("changelog on\n");
-                if (mdd->mdd_cl.mc_flags & CLM_ERR) {
-                        CERROR("Changelogs cannot be enabled due to error "
-                               "condition.\n");
-                } else {
-                        spin_lock(&mdd->mdd_cl.mc_lock);
-                        mdd->mdd_cl.mc_flags |= CLM_ON;
-                        spin_unlock(&mdd->mdd_cl.mc_lock);
-                        rc = mdd_changelog_write_header(mdd, CLM_START);
-                        if (rc)
-                              return rc;
-                }
-        } else if (strcmp(kernbuf, "off") == 0) {
-                LCONSOLE_INFO("changelog off\n");
-                rc = mdd_changelog_write_header(mdd, CLM_FINI);
-                if (rc)
-                      return rc;
-                spin_lock(&mdd->mdd_cl.mc_lock);
-                mdd->mdd_cl.mc_flags &= ~CLM_ON;
-                spin_unlock(&mdd->mdd_cl.mc_lock);
-        } else {
-                /* purge to an index */
-                long long unsigned endrec, cur;
-
-                spin_lock(&mdd->mdd_cl.mc_lock);
-                cur = (long long)mdd->mdd_cl.mc_index;
-                spin_unlock(&mdd->mdd_cl.mc_lock);
-
-                if (strcmp(kernbuf, "0") == 0)
-                        /* purge to "0" is shorthand for everything */
-                        endrec = cur;
-                else
-                        endrec = (long long)simple_strtoull(kernbuf, &end, 0);
-                if ((kernbuf == end) || (endrec == 0))
-                        goto out_usage;
-                if (endrec > cur)
-                        endrec = cur;
-
-                /* If purging all records, write a header entry so we
-                   don't have an empty catalog and
-                   we're sure to have a valid starting index next time.  In
-                   case of crash, we just restart with old log so we're
-                   allright. */
-                if (endrec == cur) {
-                        rc = mdd_changelog_write_header(mdd, CLM_PURGE);
-                        if (rc)
-                              return rc;
-                }
-
-                LCONSOLE_INFO("changelog purge to %llu\n", endrec);
-
-                rc = mdd_changelog_llog_cancel(mdd, endrec);
-                if (rc < 0)
-                        return rc;
-        }
+       return count;
+}
+LUSTRE_RW_ATTR(changelog_gc);
 
-        return count;
+static ssize_t changelog_max_idle_time_show(struct kobject *kobj,
+                                           struct attribute *attr,
+                                           char *buf)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
 
-out_usage:
-        CWARN("changelog write usage: [on|off] | <purge_idx (0=all)>\n");
-        return -EINVAL;
-}
-
-static ssize_t mdd_cl_seq_write(struct file *file, const char *buffer,
-                                size_t count, loff_t *off)
-{
-        struct seq_file *seq = file->private_data;
-        struct cl_seq_iter *csi = seq->private;
-        struct mdd_device *mdd = csi->csi_mdd;
-
-        return lprocfs_wr_cl(file, buffer, count, mdd);
-}
-
-#define D_CL 0
-
-/* How many records per seq_show.  Too small, we spawn llog_process threads
-   too often; too large, we run out of buffer space */
-#define CL_CHUNK_SIZE 100
-
-static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr,
-                             void *data)
-{
-        struct seq_file *seq = (struct seq_file *)data;
-        struct cl_seq_iter *csi = seq->private;
-        struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr;
-        int rc;
-        ENTRY;
-
-        if ((rec->cr_hdr.lrh_type != CHANGELOG_REC) ||
-            (rec->cr_type >= CL_LAST)) {
-                CERROR("Not a changelog rec? %d/%d\n", rec->cr_hdr.lrh_type,
-                       rec->cr_type);
-                RETURN(-EINVAL);
-        }
-
-        CDEBUG(D_CL, "rec="LPU64" start="LPU64" cat=%d:%d start=%d:%d\n",
-               rec->cr_index, csi->csi_startrec,
-               llh->lgh_hdr->llh_cat_idx, llh->lgh_cur_idx,
-               csi->csi_startcat, csi->csi_startidx);
-
-        if (rec->cr_index < csi->csi_startrec)
-                RETURN(0);
-        if (rec->cr_index == csi->csi_startrec) {
-                /* Remember where we started, since seq_read will re-read
-                 * the data when it reallocs space.  Sigh, if only there was
-                 * a way to tell seq_file how big the buf should be in the
-                 * first place... */
-                csi->csi_startcat = llh->lgh_hdr->llh_cat_idx;
-                csi->csi_startidx = rec->cr_hdr.lrh_index - 1;
-        }
-        if (csi->csi_wrote > CL_CHUNK_SIZE) {
-                /* Stop at some point with a reasonable seq_file buffer size.
-                 * Start from here the next time.
-                 */
-                csi->csi_endrec = rec->cr_index - 1;
-                csi->csi_startcat = llh->lgh_hdr->llh_cat_idx;
-                csi->csi_startidx = rec->cr_hdr.lrh_index - 1;
-                csi->csi_wrote = 0;
-                RETURN(LLOG_PROC_BREAK);
-        }
-
-        rc = seq_printf(seq, LPU64" %02d%-5s "LPU64" 0x%x t="DFID,
-                        rec->cr_index, rec->cr_type,
-                        changelog_str[rec->cr_type], rec->cr_time,
-                        rec->cr_flags & CLF_FLAGMASK, PFID(&rec->cr_tfid));
-
-        if (rec->cr_namelen)
-                /* namespace rec includes parent and filename */
-                rc += seq_printf(seq, " p="DFID" %.*s\n", PFID(&rec->cr_pfid),
-                                 rec->cr_namelen, rec->cr_name);
-        else
-                rc += seq_puts(seq, "\n");
-
-        if (rc < 0) {
-                /* seq_read will dump the whole buffer and re-seq_start with a
-                   larger one; no point in continuing the llog_process */
-                CDEBUG(D_CL, "rec="LPU64" overflow "LPU64"<-"LPU64"\n",
-                       rec->cr_index, csi->csi_startrec, csi->csi_endrec);
-                csi->csi_endrec = csi->csi_startrec - 1;
-                csi->csi_wrote = 0;
-                RETURN(LLOG_PROC_BREAK);
-        }
-
-        csi->csi_wrote++;
-        csi->csi_endrec = rec->cr_index;
-
-        RETURN(0);
-}
-
-static int mdd_cl_seq_show(struct seq_file *seq, void *v)
-{
-        struct cl_seq_iter *csi = seq->private;
-        struct obd_device *obd = mdd2obd_dev(csi->csi_mdd);
-        struct llog_ctxt *ctxt;
-        int rc;
-
-        if (csi->csi_fill) {
-                /* seq_read wants more data to fill his buffer. But we already
-                   filled the buf as much as we cared to; force seq_read to
-                   accept that. */
-                while ((rc = seq_putc(seq, 0)) == 0);
-                return 0;
-        }
-
-        ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT);
-        if (ctxt == NULL)
-                return -ENOENT;
+       return sprintf(buf, "%lld\n", mdd->mdd_changelog_max_idle_time);
+}
 
-        /* Since we have to restart the llog_cat_process for each chunk of the
-           seq_ functions, start from where we left off. */
-        rc = llog_cat_process(ctxt->loc_handle, changelog_show_cb, seq,
-                              csi->csi_startcat, csi->csi_startidx);
+static ssize_t changelog_max_idle_time_store(struct kobject *kobj,
+                                            struct attribute *attr,
+                                            const char *buffer, size_t count)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       time64_t val;
+       int rc;
 
-        CDEBUG(D_CL, "seq_show "LPU64"-"LPU64" cat=%d:%d wrote=%d rc=%d\n",
-               csi->csi_startrec, csi->csi_endrec, csi->csi_startcat,
-               csi->csi_startidx, csi->csi_wrote, rc);
+       rc = kstrtoll(buffer, 10, &val);
+       if (rc)
+               return rc;
 
-        llog_ctxt_put(ctxt);
+       /* as it sounds reasonable, do not allow a user to be idle since
+        * more than about 68 years, this will allow to use 32bits
+        * timestamps for comparison
+        */
+       if (val < 1 || val > INT_MAX)
+               return -ERANGE;
 
-        if (rc == LLOG_PROC_BREAK)
-                rc = 0;
+       mdd->mdd_changelog_max_idle_time = val;
 
-        return rc;
+       return count;
 }
+LUSTRE_RW_ATTR(changelog_max_idle_time);
 
-static int mdd_cl_done(struct cl_seq_iter *csi)
+static ssize_t changelog_max_idle_indexes_show(struct kobject *kobj,
+                                              struct attribute *attr,
+                                              char *buf)
 {
-        int done = 0;
-        spin_lock(&csi->csi_mdd->mdd_cl.mc_lock);
-        done = (csi->csi_endrec >= csi->csi_mdd->mdd_cl.mc_index);
-        spin_unlock(&csi->csi_mdd->mdd_cl.mc_lock);
-        return done;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+
+       return sprintf(buf, "%lu\n", mdd->mdd_changelog_max_idle_indexes);
 }
 
+static ssize_t changelog_max_idle_indexes_store(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               const char *buffer,
+                                               size_t count)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       unsigned long val;
+       int rc;
+
+       LASSERT(mdd != NULL);
+       rc = kstrtoul(buffer, 0, &val);
+       if (rc)
+               return rc;
+
+       /* XXX may need to limit/check with reasonable elapsed/idle indexes */
+       /* XXX may better allow to specify a % of full ChangeLogs */
 
-static void *mdd_cl_seq_start(struct seq_file *seq, loff_t *pos)
+       mdd->mdd_changelog_max_idle_indexes = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(changelog_max_idle_indexes);
+
+static ssize_t changelog_min_gc_interval_show(struct kobject *kobj,
+                                             struct attribute *attr,
+                                             char *buf)
 {
-        struct cl_seq_iter *csi = seq->private;
-        LASSERT(csi);
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
 
-        CDEBUG(D_CL, "start "LPU64"-"LPU64" pos="LPU64"\n",
-               csi->csi_startrec, csi->csi_endrec, *pos);
+       return sprintf(buf, "%lld\n", mdd->mdd_changelog_min_gc_interval);
+}
 
-        csi->csi_fill = 0;
+static ssize_t changelog_min_gc_interval_store(struct kobject *kobj,
+                                              struct attribute *attr,
+                                              const char *buffer,
+                                              size_t count)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       time64_t val;
+       int rc;
 
-        if (mdd_cl_done(csi))
-                /* no more records, seq_read should return 0 if buffer
-                   is empty */
-                return NULL;
+       rc = kstrtoll(buffer, 10, &val);
+       if (rc)
+               return rc;
 
-        if (*pos > csi->csi_pos) {
-                /* The seq_read implementation sucks.  It may call start
-                   multiple times, using pos to indicate advances, if any,
-                   by arbitrarily increasing it by 1. So ignore the actual
-                   value of pos, and just register any increase as
-                   "seq_read wants the next values". */
-                csi->csi_startrec = csi->csi_endrec + 1;
-                csi->csi_pos = *pos;
-        }
-        /* else use old startrec/startidx */
+       /* XXX may need to limit with reasonable elapsed/interval times */
+       if (val < 1)
+               return -ERANGE;
 
-        return csi;
+       mdd->mdd_changelog_min_gc_interval = val;
+
+       return count;
 }
+LUSTRE_RW_ATTR(changelog_min_gc_interval);
 
-static void mdd_cl_seq_stop(struct seq_file *seq, void *v)
+static ssize_t changelog_min_free_cat_entries_show(struct kobject *kobj,
+                                                  struct attribute *attr,
+                                                  char *buf)
 {
-        struct cl_seq_iter *csi = seq->private;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
 
-        CDEBUG(D_CL, "stop "LPU64"-"LPU64"\n",
-               csi->csi_startrec, csi->csi_endrec);
+       return sprintf(buf, "%u\n", mdd->mdd_changelog_min_free_cat_entries);
 }
 
-static void *mdd_cl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static ssize_t changelog_min_free_cat_entries_store(struct kobject *kobj,
+                                                   struct attribute *attr,
+                                                   const char *buffer,
+                                                   size_t count)
 {
-        struct cl_seq_iter *csi = seq->private;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       unsigned int val;
+       int rc;
+
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc)
+               return rc;
 
-        CDEBUG(D_CL, "next "LPU64"-"LPU64" pos="LPU64"\n",
-               csi->csi_startrec, csi->csi_endrec, *pos);
+       /* XXX may need to limit with more reasonable number of free entries */
+       if (val < 1)
+               return -ERANGE;
 
-        csi->csi_fill = 1;
+       mdd->mdd_changelog_min_free_cat_entries = val;
 
-        return csi;
+       return count;
 }
+LUSTRE_RW_ATTR(changelog_min_free_cat_entries);
 
-struct seq_operations mdd_cl_sops = {
-        .start = mdd_cl_seq_start,
-        .stop = mdd_cl_seq_stop,
-        .next = mdd_cl_seq_next,
-        .show = mdd_cl_seq_show,
-};
+static ssize_t changelog_deniednext_show(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        char *buf)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+
+       return sprintf(buf, "%u\n", mdd->mdd_cl.mc_deniednext);
+}
 
-static int mdd_cl_seq_open(struct inode *inode, struct file *file)
+static ssize_t changelog_deniednext_store(struct kobject *kobj,
+                                         struct attribute *attr,
+                                         const char *buffer,
+                                         size_t count)
 {
-        struct cl_seq_iter *csi;
-        struct proc_dir_entry *dp = PDE(inode);
-        struct seq_file *seq;
-        int rc;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       unsigned int time = 0;
+       int rc;
 
-        LPROCFS_ENTRY_AND_CHECK(dp);
+       rc = kstrtouint(buffer, 0, &time);
+       if (rc)
+               return rc;
 
-        rc = seq_open(file, &mdd_cl_sops);
-        if (rc)
-                goto out;
+       mdd->mdd_cl.mc_deniednext = time;
+       return count;
+}
+LUSTRE_RW_ATTR(changelog_deniednext);
 
-        OBD_ALLOC_PTR(csi);
-        if (csi == NULL) {
-                rc = -ENOMEM;
-                goto out;
-        }
-        csi->csi_mdd = dp->data;
-        seq = file->private_data;
-        seq->private = csi;
+static ssize_t sync_permission_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
 
-out:
-        if (rc)
-                LPROCFS_EXIT();
-        return rc;
+       return sprintf(buf, "%d\n", mdd->mdd_sync_permission);
 }
 
-static int mdd_cl_seq_release(struct inode *inode, struct file *file)
+static ssize_t sync_permission_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
 {
-        struct seq_file *seq = file->private_data;
-        struct cl_seq_iter *csi = seq->private;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
 
-        OBD_FREE_PTR(csi);
+       mdd->mdd_sync_permission = val;
 
-        return lprocfs_seq_release(inode, file);
+       return count;
 }
+LUSTRE_RW_ATTR(sync_permission);
 
-static loff_t mdd_cl_seq_lseek(struct file *file, loff_t offset, int origin)
+static ssize_t lfsck_speed_limit_show(struct kobject *kobj,
+                                     struct attribute *attr, char *buf)
 {
-        struct seq_file *seq = (struct seq_file *)file->private_data;
-        struct cl_seq_iter *csi = seq->private;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
 
-        CDEBUG(D_CL, "seek "LPU64"-"LPU64" off="LPU64":%d fpos="LPU64"\n",
-               csi->csi_startrec, csi->csi_endrec, offset, origin, file->f_pos);
+       return lfsck_get_speed(buf, mdd->mdd_bottom);
+}
 
-        LL_SEQ_LOCK(seq);
+static ssize_t lfsck_speed_limit_store(struct kobject *kobj,
+                                      struct attribute *attr,
+                                      const char *buffer, size_t count)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       unsigned int val;
+       int rc;
 
-        switch (origin) {
-                case SEEK_CUR:
-                        offset += csi->csi_endrec;
-                        break;
-                case SEEK_END:
-                        spin_lock(&csi->csi_mdd->mdd_cl.mc_lock);
-                        offset += csi->csi_mdd->mdd_cl.mc_index;
-                        spin_unlock(&csi->csi_mdd->mdd_cl.mc_lock);
-                        break;
-        }
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc != 0)
+               return rc;
 
-        /* SEEK_SET */
+       rc = lfsck_set_speed(mdd->mdd_bottom, val);
+       return rc != 0 ? rc : count;
+}
+LUSTRE_RW_ATTR(lfsck_speed_limit);
+
+static ssize_t lfsck_async_windows_show(struct kobject *kobj,
+                                       struct attribute *attr, char *buf)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+
+       return lfsck_get_windows(buf, mdd->mdd_bottom);
+}
+
+static ssize_t lfsck_async_windows_store(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        const char *buffer, size_t count)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       unsigned int val;
+       int rc;
 
-        if (offset < 0) {
-                LL_SEQ_UNLOCK(seq);
-                return -EINVAL;
-        }
+       rc = kstrtouint(buffer, 10, &val);
+       if (rc)
+               return rc;
 
-        csi->csi_startrec = offset;
-        csi->csi_endrec = offset ? offset - 1 : 0;
+       rc = lfsck_set_windows(mdd->mdd_bottom, val);
 
-        /* drop whatever is left in sucky seq_read's buffer */
-        seq->count = 0;
-        seq->from = 0;
-        seq->index++;
-        LL_SEQ_UNLOCK(seq);
-        file->f_pos = csi->csi_startrec;
-        return csi->csi_startrec;
+       return rc != 0 ? rc : count;
 }
+LUSTRE_RW_ATTR(lfsck_async_windows);
 
-static ssize_t mdd_cl_seq_read(struct file *file, char __user *buf,
-                               size_t count, loff_t *ppos)
+static int mdd_lfsck_namespace_seq_show(struct seq_file *m, void *data)
 {
-        struct seq_file *seq = (struct seq_file *)file->private_data;
-        struct cl_seq_iter *csi = seq->private;
+       struct mdd_device *mdd = m->private;
 
-        if ((file->f_flags & O_NONBLOCK) && mdd_cl_done(csi))
-                return -EAGAIN;
-        return seq_read(file, buf, count, ppos);
+       LASSERT(mdd != NULL);
+
+       return lfsck_dump(m, mdd->mdd_bottom, LFSCK_TYPE_NAMESPACE);
 }
+LDEBUGFS_SEQ_FOPS_RO(mdd_lfsck_namespace);
 
-static unsigned int mdd_cl_seq_poll(struct file *file, poll_table *wait)
-{   /* based on kmsg_poll */
-        struct seq_file *seq = (struct seq_file *)file->private_data;
-        struct cl_seq_iter *csi = seq->private;
+static int mdd_lfsck_layout_seq_show(struct seq_file *m, void *data)
+{
+       struct mdd_device *mdd = m->private;
 
-        poll_wait(file, &csi->csi_mdd->mdd_cl.mc_waitq, wait);
-        if (!mdd_cl_done(csi))
-                return POLLIN | POLLRDNORM;
+       LASSERT(mdd != NULL);
 
-        return 0;
+       return lfsck_dump(m, mdd->mdd_bottom, LFSCK_TYPE_LAYOUT);
 }
+LDEBUGFS_SEQ_FOPS_RO(mdd_lfsck_layout);
 
-struct file_operations mdd_changelog_fops = {
-        .owner   = THIS_MODULE,
-        .open    = mdd_cl_seq_open,
-        .read    = mdd_cl_seq_read,
-        .write   = mdd_cl_seq_write,
-        .llseek  = mdd_cl_seq_lseek,
-        .poll    = mdd_cl_seq_poll,
-        .release = mdd_cl_seq_release,
-};
+/**
+ * Show default number of stripes for O_APPEND files.
+ *
+ * \param[in] m                seq file
+ * \param[in] v                unused for single entry
+ *
+ * \retval 0           on success,
+ * \retval negative    error code if failed
+ */
+static ssize_t append_stripe_count_show(struct kobject *kobj,
+                                       struct attribute *attr, char *buf)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", mdd->mdd_append_stripe_count);
+}
+
+/**
+ * Set default number of stripes for O_APPEND files.
+ *
+ * \param[in] file     proc file
+ * \param[in] buffer   string containing the default number of stripes
+ *                     for new files
+ * \param[in] count    @buffer length
+ * \param[in] off      unused for single entry
+ *
+ * \retval @count      on success
+ * \retval negative    error code otherwise
+ */
+static ssize_t append_stripe_count_store(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        const char *buffer, size_t count)
+{
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       int stripe_count;
+       int rc;
+
+       rc = kstrtoint(buffer, 0, &stripe_count);
+       if (rc)
+               return rc;
+
+       if (stripe_count < -1)
+               return -ERANGE;
+
+       mdd->mdd_append_stripe_count = stripe_count;
 
-#ifdef HAVE_QUOTA_SUPPORT
-static int mdd_lprocfs_quota_rd_type(char *page, char **start, off_t off,
-                                     int count, int *eof, void *data)
+       return count;
+}
+LUSTRE_RW_ATTR(append_stripe_count);
+
+/**
+ * Show default OST pool for O_APPEND files.
+ *
+ * \param[in] kobject  proc object
+ * \param[in] attribute proc attribute
+ * \param[in] buf      output buffer
+ *
+ * \retval 0           on success,
+ * \retval negative    error code if failed
+ */
+static ssize_t append_pool_show(struct kobject *kobj,
+                               struct attribute *attr, char *buf)
 {
-        struct mdd_device *mdd = data;
-        return lprocfs_quota_rd_type(page, start, off, count, eof,
-                                     mdd->mdd_obd_dev);
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+
+       return snprintf(buf, LOV_MAXPOOLNAME + 1, "%s\n", mdd->mdd_append_pool);
 }
 
-static int mdd_lprocfs_quota_wr_type(struct file *file, const char *buffer,
-                                     unsigned long count, void *data)
+/**
+ * Set default OST pool for O_APPEND files.
+ *
+ * \param[in] kobject  proc object
+ * \param[in] attribute proc attribute
+ * \param[in] buffer   user inputted pool name
+ * \param[in] count    @buffer length
+ *
+ * \retval @count      on success
+ * \retval negative    error code otherwise
+ */
+static ssize_t append_pool_store(struct kobject *kobj, struct attribute *attr,
+                                const char *buffer, size_t count)
 {
-        struct mdd_device *mdd = data;
-        return lprocfs_quota_wr_type(file, buffer, count, mdd->mdd_obd_dev);
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+
+       if (!count || count > LOV_MAXPOOLNAME + 1)
+               return -EINVAL;
+
+       /* clear previous value */
+       memset(mdd->mdd_append_pool, 0, LOV_MAXPOOLNAME + 1);
+
+       /* entering "none" clears the pool, otherwise copy the new pool */
+       if (strncmp("none", buffer, 4)) {
+               memcpy(mdd->mdd_append_pool, buffer, count);
+
+               /* Trim the trailing '\n' if any */
+               if (mdd->mdd_append_pool[count - 1] == '\n') {
+                       /* Don't echo just a newline */
+                       if (count == 1)
+                               return -EINVAL;
+                       mdd->mdd_append_pool[count - 1] = 0;
+               }
+       }
+
+       return count;
 }
-#endif
+LUSTRE_RW_ATTR(append_pool);
 
 static struct lprocfs_vars lprocfs_mdd_obd_vars[] = {
-        { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 },
-        { "changelog_mask", lprocfs_rd_cl_mask, lprocfs_wr_cl_mask, 0 },
-        { "changelog", 0, lprocfs_wr_cl, 0, &mdd_changelog_fops, 0600 },
-#ifdef HAVE_QUOTA_SUPPORT
-        { "quota_type",      mdd_lprocfs_quota_rd_type,
-                             mdd_lprocfs_quota_wr_type, 0 },
-#endif
-        { 0 }
+       { .name =       "changelog_mask",
+         .fops =       &mdd_changelog_mask_fops        },
+       { .name =       "changelog_users",
+         .fops =       &mdd_changelog_users_fops       },
+       { .name =       "lfsck_namespace",
+         .fops =       &mdd_lfsck_namespace_fops       },
+       { .name =       "lfsck_layout",
+         .fops =       &mdd_lfsck_layout_fops          },
+       { NULL }
 };
 
-static struct lprocfs_vars lprocfs_mdd_module_vars[] = {
-        { "num_refs",   lprocfs_rd_numrefs, 0, 0 },
-        { 0 }
+static struct attribute *mdd_attrs[] = {
+       &lustre_attr_uuid.attr,
+       &lustre_attr_atime_diff.attr,
+       &lustre_attr_changelog_size.attr,
+       &lustre_attr_changelog_gc.attr,
+       &lustre_attr_changelog_max_idle_time.attr,
+       &lustre_attr_changelog_max_idle_indexes.attr,
+       &lustre_attr_changelog_min_gc_interval.attr,
+       &lustre_attr_changelog_min_free_cat_entries.attr,
+       &lustre_attr_changelog_deniednext.attr,
+       &lustre_attr_lfsck_async_windows.attr,
+       &lustre_attr_lfsck_speed_limit.attr,
+       &lustre_attr_sync_permission.attr,
+       &lustre_attr_append_stripe_count.attr,
+       &lustre_attr_append_pool.attr,
+       NULL,
 };
 
-void lprocfs_mdd_init_vars(struct lprocfs_static_vars *lvars)
+static void mdd_sysfs_release(struct kobject *kobj)
 {
-        lvars->module_vars  = lprocfs_mdd_module_vars;
-        lvars->obd_vars     = lprocfs_mdd_obd_vars;
+       struct mdd_device *mdd = container_of(kobj, struct mdd_device,
+                                             mdd_kobj);
+       struct obd_device *obd = mdd2obd_dev(mdd);
+
+       debugfs_remove_recursive(obd->obd_debugfs_entry);
+       obd->obd_debugfs_entry = NULL;
+
+       complete(&mdd->mdd_kobj_unregister);
 }
 
+int mdd_procfs_init(struct mdd_device *mdd, const char *name)
+{
+       struct obd_device *obd = mdd2obd_dev(mdd);
+       struct obd_type *type;
+       int rc;
+
+       ENTRY;
+       /* at the moment there is no linkage between lu_type
+        * and obd_type, so we lookup obd_type this way
+        */
+       type = class_search_type(LUSTRE_MDD_NAME);
+
+       LASSERT(name != NULL);
+       LASSERT(type != NULL);
+       LASSERT(obd  != NULL);
+
+       /* put reference taken by class_search_type */
+       kobject_put(&type->typ_kobj);
+
+       mdd->mdd_ktype.default_attrs = mdd_attrs;
+       mdd->mdd_ktype.release = mdd_sysfs_release;
+       mdd->mdd_ktype.sysfs_ops = &lustre_sysfs_ops;
+
+       init_completion(&mdd->mdd_kobj_unregister);
+       rc = kobject_init_and_add(&mdd->mdd_kobj, &mdd->mdd_ktype,
+                                 &type->typ_kobj, "%s", name);
+       if (rc)
+               return rc;
+
+       /* Find the type procroot and add the proc entry for this device */
+       obd->obd_vars = lprocfs_mdd_obd_vars;
+       obd->obd_debugfs_entry = ldebugfs_register(name,
+                                                  type->typ_debugfs_entry,
+                                                  obd->obd_vars, mdd);
+       if (IS_ERR_OR_NULL(obd->obd_debugfs_entry)) {
+               rc = obd->obd_debugfs_entry ? PTR_ERR(obd->obd_debugfs_entry)
+                                           : -ENOMEM;
+               CERROR("Error %d setting up debugfs for %s\n",
+                      rc, name);
+               obd->obd_debugfs_entry = NULL;
+
+               kobject_put(&mdd->mdd_kobj);
+       }
+
+       RETURN(rc);
+}
+
+void mdd_procfs_fini(struct mdd_device *mdd)
+{
+       kobject_put(&mdd->mdd_kobj);
+       wait_for_completion(&mdd->mdd_kobj_unregister);
+}