Whamcloud - gitweb
LU-957 lfsck: LFSCK main engine
authorFan Yong <yong.fan@whamcloud.com>
Sun, 1 Jul 2012 02:16:10 +0000 (10:16 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 3 Jul 2012 01:43:22 +0000 (21:43 -0400)
Implement the main engine for Lustre online fsck. The kernel
thread "lfsck" scans the system object table by low layer DT
iteration APIs, and will drives registered scrub component(s)
to check/repair Lustre system.

It is the controller for the whole LFSCK, and controls the
speed, including the low layer OI scrub (for osd-ldiskfs).

For urgent mode, like MDT is restored from file-level backup
against ldiskfs, the OI files are invalided. Under such case,
we need to rebuild the OI files ASAP, then low layer OI scrub
inside osd-ldiskfs ignores the main engine speed control, and
runs with full speed.

Signed-off-by: Fan Yong <yong.fan@whamcloud.com>
Change-Id: If1bd3cfac1f299e964c029e5e9c4cce6432edfa5
Reviewed-on: http://review.whamcloud.com/3169
Tested-by: Hudson
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/lustre/Makefile.am
lustre/include/lustre/lustre_lfsck_user.h [new file with mode: 0644]
lustre/include/lustre_fid.h
lustre/mdd/Makefile.in
lustre/mdd/mdd_device.c
lustre/mdd/mdd_internal.h
lustre/mdd/mdd_lfsck.c [new file with mode: 0644]
lustre/mdd/mdd_lproc.c
lustre/osd-ldiskfs/osd_compat.c
lustre/osd-ldiskfs/osd_handler.c

index 23d7ebe..e58a6fe 100644 (file)
@@ -39,4 +39,5 @@ pkginclude_HEADERS = lustre_idl.h lustre_user.h liblustreapi.h libiam.h \
         ll_fiemap.h
 endif
 
-EXTRA_DIST = lustre_idl.h lustre_user.h liblustreapi.h libiam.h ll_fiemap.h
+EXTRA_DIST = lustre_idl.h lustre_user.h liblustreapi.h libiam.h ll_fiemap.h \
+            lustre_lfsck_user.h
diff --git a/lustre/include/lustre/lustre_lfsck_user.h b/lustre/include/lustre/lustre_lfsck_user.h
new file mode 100644 (file)
index 0000000..cfc2b50
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2012 Whamcloud, Inc.
+ */
+/*
+ * lustre/include/lustre/lustre_lfsck_user.h
+ *
+ * Lustre LFSCK userspace interfaces.
+ *
+ * Author: Fan Yong <yong.fan@whamcloud.com>
+ */
+
+#ifndef _LUSTRE_LFSCK_USER_H
+# define _LUSTRE_LFSCK_USER_H
+
+enum lfsck_param_flags {
+       /* Reset LFSCK iterator position to the device beginning. */
+       LPF_RESET       = 0x0001,
+
+       /* Exit when fail. */
+       LPF_FAILOUT     = 0x0002,
+
+       /* Dryrun mode, only check without modification */
+       LPF_DRYRUN      = 0x0004,
+};
+
+enum lfsck_method {
+       /* Object table based iteration, depends on backend filesystem.
+        * For ldiskfs, it is inode table based iteration. */
+       LM_OTABLE       = 1,
+
+       /* Namespace based scanning. NOT support yet. */
+       LM_NAMESPACE    = 2,
+};
+
+enum lfsck_type {
+       /* For MDT-OST consistency check/repair. */
+       LT_LAYOUT       = 0x0001,
+
+       /* For MDT-MDT consistency check/repair. */
+       LT_DNE          = 0x0002,
+};
+
+#define LFSCK_VERSION_V1       10
+
+#define LFSCK_TYPES_ALL        ((__u16)(~0))
+#define LFSCK_TYPES_DEF        ((__u16)0)
+
+#define LFSCK_SPEED_NO_LIMIT   0
+#define LFSCK_SPEED_LIMIT_DEF  LFSCK_SPEED_NO_LIMIT
+
+enum lfsck_start_valid {
+       LSV_SPEED_LIMIT         = 0x00000001,
+       LSV_METHOD              = 0x00000002,
+       LSV_ERROR_HANDLE        = 0x00000004,
+       LSV_DRYRUN              = 0x00000008,
+};
+
+/* Arguments for starting lfsck. */
+struct lfsck_start {
+       /* Which arguments are valid, see 'enum lfsck_start_valid'. */
+       __u32   ls_valid;
+
+       /* For compatibility between user space tools and kernel service. */
+       __u16   ls_version;
+
+       /* Which LFSCK components to be (have been) started. */
+       __u16   ls_active;
+
+       /* Flags for the LFSCK, see 'enum lfsck_param_flags'. */
+       __u16   ls_flags;
+
+       /* Object iteration method, see 'enum lfsck_method'. */
+       __u16   ls_method;
+
+       /* How many items can be scanned at most per second. */
+       __u32   ls_speed_limit;
+};
+
+#endif /* _LUSTRE_LFSCK_USER_H */
index e4c8324..bb664b2 100644 (file)
@@ -112,6 +112,8 @@ enum local_oid {
         OSD_FS_ROOT_OID         = 13UL,
         ACCT_USER_OID           = 15UL,
         ACCT_GROUP_OID          = 16UL,
+       LFSCK_BOOKMARK_OID      = 17UL,
+       OTABLE_IT_OID           = 18UL,
         OFD_LAST_RECV_OID       = 19UL,
         OFD_GROUP0_LAST_OID     = 20UL,
         OFD_GROUP4K_LAST_OID    = 20UL+4096,
@@ -128,6 +130,12 @@ static inline void lu_local_obj_fid(struct lu_fid *fid, __u32 oid)
         fid->f_ver = 0;
 }
 
+static inline int fid_is_otable_it(const struct lu_fid *fid)
+{
+       return unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
+                       fid_oid(fid) == OTABLE_IT_OID);
+}
+
 static inline int fid_is_acct(const struct lu_fid *fid)
 {
         return fid_seq(fid) == FID_SEQ_LOCAL_FILE &&
index bfecc0c..b2fa867 100644 (file)
@@ -1,6 +1,7 @@
 MODULES := mdd
 mdd-objs := mdd_object.o mdd_lov.o mdd_orphans.o mdd_lproc.o mdd_dir.o
 mdd-objs += mdd_device.o mdd_trans.o mdd_permission.o mdd_lock.o mdd_quota.o
+mdd-objs += mdd_lfsck.o
 
 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs
 
index 725677b..0241d9e 100644 (file)
@@ -50,7 +50,6 @@
 #include <obd_support.h>
 #include <lprocfs_status.h>
 
-#include <lustre_disk.h>
 #include <lustre_fid.h>
 #include <ldiskfs/ldiskfs.h>
 #include <lustre_mds.h>
@@ -113,6 +112,7 @@ static void mdd_device_shutdown(const struct lu_env *env,
                                 struct mdd_device *m, struct lustre_cfg *cfg)
 {
         ENTRY;
+       mdd_lfsck_cleanup(env, m);
         mdd_changelog_fini(env, m);
         dt_txn_callback_del(m->mdd_child, &m->mdd_txn_cb);
         if (m->mdd_dot_lustre_objs.mdd_obf)
@@ -430,7 +430,7 @@ static int create_dot_lustre_dir(const struct lu_env *env, struct mdd_device *m)
                 rc = PTR_ERR(mdo);
                 CERROR("creating obj [%s] fid = "DFID" rc = %d\n",
                         dot_lustre_name, PFID(fid), rc);
-                RETURN(rc);
+               return rc;
         }
 
         if (!IS_ERR(mdo))
@@ -1094,13 +1094,16 @@ static int mdd_prepare(const struct lu_env *env,
         /* we use capa file to declare llog changes,
          * will be fixed with new llog in 2.3 */
         root = dt_store_open(env, mdd->mdd_child, "", CAPA_KEYS, &fid);
-        if (!IS_ERR(root))
-                mdd->mdd_capa = root;
-        else
-                rc = PTR_ERR(root);
+       if (IS_ERR(root))
+               GOTO(out, rc = PTR_ERR(root));
+
+       mdd->mdd_capa = root;
+       rc = mdd_lfsck_setup(env, mdd);
+
+       GOTO(out, rc);
 
 out:
-        RETURN(rc);
+       return rc;
 }
 
 const struct lu_device_operations mdd_lu_ops = {
@@ -1428,7 +1431,7 @@ static int mdd_changelog_user_purge_cb(struct llog_handle *llh,
                         RETURN(-ENOMEM);
                 }
 
-                rc = mdd_declare_llog_cancel(mcud->mcud_env, mdd, th); 
+               rc = mdd_declare_llog_cancel(mcud->mcud_env, mdd, th);
                 if (rc)
                         GOTO(stop, rc);
 
@@ -1669,26 +1672,34 @@ static struct lu_local_obj_desc llod_mdd_root = {
         .llod_feat      = &dt_directory_features,
 };
 
+static struct lu_local_obj_desc llod_lfsck_bookmark_key = {
+       .llod_name      = lfsck_bookmark_name,
+       .llod_oid       = LFSCK_BOOKMARK_OID,
+       .llod_is_index  = 0,
+};
+
 static int __init mdd_mod_init(void)
 {
-        struct lprocfs_static_vars lvars;
-        lprocfs_mdd_init_vars(&lvars);
+       struct lprocfs_static_vars lvars;
+       lprocfs_mdd_init_vars(&lvars);
 
-        llo_local_obj_register(&llod_capa_key);
-        llo_local_obj_register(&llod_mdd_orphan);
-        llo_local_obj_register(&llod_mdd_root);
+       llo_local_obj_register(&llod_capa_key);
+       llo_local_obj_register(&llod_mdd_orphan);
+       llo_local_obj_register(&llod_mdd_root);
+       llo_local_obj_register(&llod_lfsck_bookmark_key);
 
-        return class_register_type(&mdd_obd_device_ops, NULL, lvars.module_vars,
-                                   LUSTRE_MDD_NAME, &mdd_device_type);
+       return class_register_type(&mdd_obd_device_ops, NULL, lvars.module_vars,
+                                  LUSTRE_MDD_NAME, &mdd_device_type);
 }
 
 static void __exit mdd_mod_exit(void)
 {
-        llo_local_obj_unregister(&llod_capa_key);
-        llo_local_obj_unregister(&llod_mdd_orphan);
-        llo_local_obj_unregister(&llod_mdd_root);
+       llo_local_obj_unregister(&llod_capa_key);
+       llo_local_obj_unregister(&llod_mdd_orphan);
+       llo_local_obj_unregister(&llod_mdd_root);
+       llo_local_obj_unregister(&llod_lfsck_bookmark_key);
 
-        class_unregister_type(LUSTRE_MDD_NAME);
+       class_unregister_type(LUSTRE_MDD_NAME);
 }
 
 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
index 161a1c3..192404f 100644 (file)
@@ -52,6 +52,7 @@
 # include <lustre_quota.h>
 #endif
 #include <lustre_fsfilt.h>
+#include <lustre/lustre_lfsck_user.h>
 
 #ifdef HAVE_QUOTA_SUPPORT
 /* quota stuff */
@@ -96,6 +97,29 @@ struct mdd_dot_lustre_objs {
         struct mdd_object *mdd_obf;
 };
 
+extern const char lfsck_bookmark_name[];
+
+struct md_lfsck {
+       cfs_mutex_t           ml_mutex;
+       cfs_spinlock_t        ml_lock;
+       struct ptlrpc_thread  ml_thread;
+       struct dt_object     *ml_bookmark_obj;
+       struct dt_object     *ml_it_obj;
+       __u32                 ml_new_scanned;
+       /* Arguments for low layer iteration. */
+       __u32                 ml_args;
+
+       /* Raw value for LFSCK speed limit. */
+       __u32                 ml_speed_limit;
+
+       /* Schedule for every N objects. */
+       __u32                 ml_sleep_rate;
+
+       /* Sleep N jiffies for each schedule. */
+       __u32                 ml_sleep_jif;
+       __u16                 ml_version;
+};
+
 struct mdd_device {
         struct md_device                 mdd_md_dev;
         struct dt_device                *mdd_child;
@@ -111,7 +135,8 @@ struct mdd_device {
         unsigned long                    mdd_atime_diff;
         struct mdd_object               *mdd_dot_lustre;
         struct mdd_dot_lustre_objs       mdd_dot_lustre_objs;
-        unsigned int                     mdd_sync_permission;
+       struct md_lfsck                  mdd_lfsck;
+       unsigned int                     mdd_sync_permission;
 };
 
 enum mod_flags {
@@ -436,6 +461,14 @@ int mdd_txn_stop_cb(const struct lu_env *env, struct thandle *txn,
 int mdd_txn_start_cb(const struct lu_env *env, struct thandle *,
                      void *cookie);
 
+/* mdd_lfsck.c */
+void mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit);
+int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
+                   struct lfsck_start *start);
+int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck);
+int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd);
+void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd);
+
 /* mdd_device.c */
 struct lu_object *mdd_object_alloc(const struct lu_env *env,
                                    const struct lu_object_header *hdr,
diff --git a/lustre/mdd/mdd_lfsck.c b/lustre/mdd/mdd_lfsck.c
new file mode 100644 (file)
index 0000000..c0c6400
--- /dev/null
@@ -0,0 +1,323 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2012 Whamcloud, Inc.
+ */
+/*
+ * lustre/mdd/mdd_lfsck.c
+ *
+ * Top-level entry points into mdd module
+ *
+ * LFSCK controller, which scans the whole device through low layer
+ * iteration APIs, drives all lfsck compeonents, controls the speed.
+ *
+ * Author: Fan Yong <yong.fan@whamcloud.com>
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <lustre/lustre_idl.h>
+#include <lustre_fid.h>
+
+#include "mdd_internal.h"
+
+static inline char *mdd_lfsck2name(struct md_lfsck *lfsck)
+{
+       struct mdd_device *mdd;
+
+       mdd = container_of0(lfsck, struct mdd_device, mdd_lfsck);
+       return mdd->mdd_obd_dev->obd_name;
+}
+
+void mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit)
+{
+       cfs_spin_lock(&lfsck->ml_lock);
+       lfsck->ml_speed_limit = limit;
+       if (limit != LFSCK_SPEED_NO_LIMIT) {
+               if (limit > CFS_HZ) {
+                       lfsck->ml_sleep_rate = limit / CFS_HZ;
+                       lfsck->ml_sleep_jif = 1;
+               } else {
+                       lfsck->ml_sleep_rate = 1;
+                       lfsck->ml_sleep_jif = CFS_HZ / limit;
+               }
+       } else {
+               lfsck->ml_sleep_jif = 0;
+               lfsck->ml_sleep_rate = 0;
+       }
+       cfs_spin_unlock(&lfsck->ml_lock);
+}
+
+static void mdd_lfsck_control_speed(struct md_lfsck *lfsck)
+{
+       struct ptlrpc_thread *thread = &lfsck->ml_thread;
+       struct l_wait_info    lwi;
+
+       if (lfsck->ml_sleep_jif > 0 &&
+           lfsck->ml_new_scanned >= lfsck->ml_sleep_rate) {
+               cfs_spin_lock(&lfsck->ml_lock);
+               if (likely(lfsck->ml_sleep_jif > 0 &&
+                          lfsck->ml_new_scanned >= lfsck->ml_sleep_rate)) {
+                       lwi = LWI_TIMEOUT_INTR(lfsck->ml_sleep_jif, NULL,
+                                              LWI_ON_SIGNAL_NOOP, NULL);
+                       cfs_spin_unlock(&lfsck->ml_lock);
+
+                       l_wait_event(thread->t_ctl_waitq,
+                                    !thread_is_running(thread),
+                                    &lwi);
+                       lfsck->ml_new_scanned = 0;
+               } else {
+                       cfs_spin_unlock(&lfsck->ml_lock);
+               }
+       }
+}
+
+static int mdd_lfsck_main(void *args)
+{
+       struct lu_env            env;
+       struct md_lfsck         *lfsck  = (struct md_lfsck *)args;
+       struct ptlrpc_thread    *thread = &lfsck->ml_thread;
+       struct dt_object        *obj    = lfsck->ml_it_obj;
+       const struct dt_it_ops  *iops   = &obj->do_index_ops->dio_it;
+       struct dt_it            *di;
+       struct lu_fid           *fid;
+       int                      rc;
+       ENTRY;
+
+       cfs_daemonize("lfsck");
+       rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
+       if (rc != 0) {
+               CERROR("%s: LFSCK, fail to init env, rc = %d\n",
+                      mdd_lfsck2name(lfsck), rc);
+               GOTO(noenv, rc);
+       }
+
+       di = iops->init(&env, obj, lfsck->ml_args, BYPASS_CAPA);
+       if (IS_ERR(di)) {
+               rc = PTR_ERR(di);
+               CERROR("%s: LFSCK, fail to init iteration, rc = %d\n",
+                      mdd_lfsck2name(lfsck), rc);
+               GOTO(fini_env, rc);
+       }
+
+       CDEBUG(D_LFSCK, "LFSCK: flags = 0x%x, pid = %d\n",
+              lfsck->ml_args, cfs_curproc_pid());
+
+       /* XXX: Prepare before wakeup the sponsor.
+        *      Each lfsck component should call iops->get() API with
+        *      every bookmark, then low layer module can decide the
+        *      start point for current iteration. */
+
+       cfs_spin_lock(&lfsck->ml_lock);
+       thread_set_flags(thread, SVC_RUNNING);
+       cfs_spin_unlock(&lfsck->ml_lock);
+       cfs_waitq_broadcast(&thread->t_ctl_waitq);
+
+       /* Call iops->load() to finish the choosing start point. */
+       rc = iops->load(&env, di, 0);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       CDEBUG(D_LFSCK, "LFSCK: iteration start: pos = %s\n",
+              (char *)iops->key(&env, di));
+
+       lfsck->ml_new_scanned = 0;
+       fid = &mdd_env_info(&env)->mti_fid;
+       while (rc == 0) {
+               iops->rec(&env, di, (struct dt_rec *)fid, 0);
+
+               /* XXX: here, perform LFSCK when some LFSCK component(s)
+                *      introduced in the future. */
+               lfsck->ml_new_scanned++;
+
+               /* XXX: here, make checkpoint when some LFSCK component(s)
+                *      introduced in the future. */
+
+               /* Rate control. */
+               mdd_lfsck_control_speed(lfsck);
+               if (unlikely(!thread_is_running(thread)))
+                       GOTO(out, rc = 0);
+
+               rc = iops->next(&env, di);
+       }
+
+       GOTO(out, rc);
+
+out:
+       CDEBUG(D_LFSCK, "LFSCK: iteration stop: pos = %s, rc = %d\n",
+              (char *)iops->key(&env, di), rc);
+       iops->fini(&env, di);
+
+fini_env:
+       lu_env_fini(&env);
+
+noenv:
+       cfs_spin_lock(&lfsck->ml_lock);
+       thread_set_flags(thread, SVC_STOPPED);
+       cfs_waitq_broadcast(&thread->t_ctl_waitq);
+       cfs_spin_unlock(&lfsck->ml_lock);
+       return rc;
+}
+
+int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
+                   struct lfsck_start *start)
+{
+       struct ptlrpc_thread *thread  = &lfsck->ml_thread;
+       struct l_wait_info    lwi     = { 0 };
+       int                   rc      = 0;
+       __u16                 valid   = 0;
+       __u16                 flags   = 0;
+       ENTRY;
+
+       cfs_mutex_lock(&lfsck->ml_mutex);
+       cfs_spin_lock(&lfsck->ml_lock);
+       if (thread_is_running(thread)) {
+               cfs_spin_unlock(&lfsck->ml_lock);
+               cfs_mutex_unlock(&lfsck->ml_mutex);
+               RETURN(-EALREADY);
+       }
+
+       cfs_spin_unlock(&lfsck->ml_lock);
+       if (start->ls_valid & LSV_SPEED_LIMIT)
+               mdd_lfsck_set_speed(lfsck, start->ls_speed_limit);
+
+       if (start->ls_valid & LSV_METHOD && start->ls_method != LM_OTABLE) {
+               cfs_mutex_unlock(&lfsck->ml_mutex);
+               RETURN(-EOPNOTSUPP);
+       }
+
+       if (start->ls_valid & LSV_ERROR_HANDLE) {
+               valid |= DOIV_ERROR_HANDLE;
+               if (start->ls_flags & LPF_FAILOUT)
+                       flags |= DOIF_FAILOUT;
+       }
+
+       /* XXX: 1. low layer does not care 'dryrun'.
+        *      2. will process 'ls_active' when introduces LFSCK for layout
+        *         consistency, DNE consistency, and so on in the future. */
+       start->ls_active = 0;
+
+       if (start->ls_flags & LPF_RESET)
+               flags |= DOIF_RESET;
+
+       if (start->ls_active != 0)
+               flags |= DOIF_OUTUSED;
+
+       lfsck->ml_args = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
+       thread_set_flags(thread, 0);
+       rc = cfs_create_thread(mdd_lfsck_main, lfsck, 0);
+       if (rc < 0)
+               CERROR("%s: cannot start LFSCK thread, rc = %d\n",
+                      mdd_lfsck2name(lfsck), rc);
+       else
+               l_wait_event(thread->t_ctl_waitq,
+                            thread_is_running(thread) ||
+                            thread_is_stopped(thread),
+                            &lwi);
+       cfs_mutex_unlock(&lfsck->ml_mutex);
+
+       RETURN(rc < 0 ? rc : 0);
+}
+
+int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck)
+{
+       struct ptlrpc_thread *thread = &lfsck->ml_thread;
+       struct l_wait_info    lwi    = { 0 };
+       ENTRY;
+
+       cfs_mutex_lock(&lfsck->ml_mutex);
+       cfs_spin_lock(&lfsck->ml_lock);
+       if (thread_is_init(thread) || thread_is_stopped(thread)) {
+               cfs_spin_unlock(&lfsck->ml_lock);
+               cfs_mutex_unlock(&lfsck->ml_mutex);
+               RETURN(-EALREADY);
+       }
+
+       thread_set_flags(thread, SVC_STOPPING);
+       cfs_spin_unlock(&lfsck->ml_lock);
+
+       cfs_waitq_broadcast(&thread->t_ctl_waitq);
+       l_wait_event(thread->t_ctl_waitq,
+                    thread_is_stopped(thread),
+                    &lwi);
+       cfs_mutex_unlock(&lfsck->ml_mutex);
+
+       RETURN(0);
+}
+
+const char lfsck_bookmark_name[] = "lfsck_bookmark";
+
+static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE,
+                                           .f_oid = OTABLE_IT_OID,
+                                           .f_ver = 0 };
+
+int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
+{
+       struct md_lfsck  *lfsck = &mdd->mdd_lfsck;
+       struct dt_object *obj;
+       int               rc;
+
+       lfsck->ml_version = LFSCK_VERSION_V1;
+       cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq);
+       cfs_mutex_init(&lfsck->ml_mutex);
+       cfs_spin_lock_init(&lfsck->ml_lock);
+
+       obj = dt_store_open(env, mdd->mdd_child, "", lfsck_bookmark_name,
+                           &mdd_env_info(env)->mti_fid);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       lfsck->ml_bookmark_obj = obj;
+
+       obj = dt_locate(env, mdd->mdd_child, &lfsck_it_fid);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
+       if (rc != 0) {
+               lu_object_put(env, &obj->do_lu);
+               return rc;
+       }
+
+       lfsck->ml_it_obj = obj;
+
+       return 0;
+}
+
+void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)
+{
+       struct md_lfsck *lfsck = &mdd->mdd_lfsck;
+
+       if (lfsck->ml_it_obj != NULL) {
+               mdd_lfsck_stop(env, lfsck);
+               lu_object_put(env, &lfsck->ml_it_obj->do_lu);
+               lfsck->ml_it_obj = NULL;
+       }
+
+       if (lfsck->ml_bookmark_obj != NULL) {
+               lu_object_put(env, &lfsck->ml_bookmark_obj->do_lu);
+               lfsck->ml_bookmark_obj = NULL;
+       }
+}
index e3cd4ee..7321d8e 100644 (file)
@@ -294,6 +294,35 @@ static int lprocfs_wr_sync_perm(struct file *file, const char *buffer,
         return count;
 }
 
+static int lprocfs_rd_lfsck_speed_limit(char *page, char **start, off_t off,
+                                       int count, int *eof, void *data)
+{
+       struct mdd_device *mdd = data;
+
+       LASSERT(mdd != NULL);
+       *eof = 1;
+       return snprintf(page, count, "%u\n", mdd->mdd_lfsck.ml_speed_limit);
+}
+
+static int lprocfs_wr_lfsck_speed_limit(struct file *file, const char *buffer,
+                                       unsigned long count, void *data)
+{
+       struct mdd_device *mdd = data;
+       struct md_lfsck *lfsck;
+       __u32 val;
+       int rc;
+
+       LASSERT(mdd != NULL);
+       rc = lprocfs_write_helper(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       lfsck = &mdd->mdd_lfsck;
+       if (val != lfsck->ml_speed_limit)
+               mdd_lfsck_set_speed(lfsck, val);
+       return count;
+}
+
 static struct lprocfs_vars lprocfs_mdd_obd_vars[] = {
         { "atime_diff",      lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 },
         { "changelog_mask",  lprocfs_rd_changelog_mask,
@@ -304,7 +333,9 @@ static struct lprocfs_vars lprocfs_mdd_obd_vars[] = {
                              mdd_lprocfs_quota_wr_type, 0 },
 #endif
         { "sync_permission", lprocfs_rd_sync_perm, lprocfs_wr_sync_perm, 0 },
-        { 0 }
+       { "lfsck_speed_limit", lprocfs_rd_lfsck_speed_limit,
+                              lprocfs_wr_lfsck_speed_limit, 0 },
+       { 0 }
 };
 
 static struct lprocfs_vars lprocfs_mdd_module_vars[] = {
index 4c0c3b6..408ae93 100644 (file)
@@ -483,20 +483,22 @@ struct named_oid {
 };
 
 static const struct named_oid oids[] = {
-        { FLD_INDEX_OID,        "" /* "fld" */ },
-        { FID_SEQ_CTL_OID,      "" /* "seq_ctl" */ },
-        { FID_SEQ_SRV_OID,      "" /* "seq_srv" */ },
-        { MDD_ROOT_INDEX_OID,   "" /* "ROOT" */ },
-        { MDD_ORPHAN_OID,       "" /* "PENDING" */ },
-        { MDD_LOV_OBJ_OID,      "" /* LOV_OBJID */ },
-        { MDD_CAPA_KEYS_OID,    "" /* CAPA_KEYS */ },
+       { FLD_INDEX_OID,        "" /* "fld" */ },
+       { FID_SEQ_CTL_OID,      "" /* "seq_ctl" */ },
+       { FID_SEQ_SRV_OID,      "" /* "seq_srv" */ },
+       { MDD_ROOT_INDEX_OID,   "" /* "ROOT" */ },
+       { MDD_ORPHAN_OID,       "" /* "PENDING" */ },
+       { MDD_LOV_OBJ_OID,      "" /* LOV_OBJID */ },
+       { MDD_CAPA_KEYS_OID,    "" /* CAPA_KEYS */ },
        { MDT_LAST_RECV_OID,    LAST_RCVD },
-        { OFD_LAST_RECV_OID,    "" /* LAST_RCVD */ },
+       { LFSCK_BOOKMARK_OID,   "" /* "lfsck_bookmark" */ },
+       { OTABLE_IT_OID,        "" /* "otable iterator" */},
+       { OFD_LAST_RECV_OID,    "" /* LAST_RCVD */ },
        { OFD_LAST_GROUP_OID,   "LAST_GROUP" },
-        { LLOG_CATALOGS_OID,    "" /* "CATALOGS" */ },
-        { MGS_CONFIGS_OID,      "" /* MOUNT_CONFIGS_DIR */ },
+       { LLOG_CATALOGS_OID,    "" /* "CATALOGS" */ },
+       { MGS_CONFIGS_OID,      "" /* MOUNT_CONFIGS_DIR */ },
        { OFD_HEALTH_CHECK_OID, HEALTH_CHECK },
-        { 0,                    NULL }
+       { 0,                    NULL }
 };
 
 static char *oid2name(const unsigned long oid)
index 0a258ec..8e4c862 100644 (file)
@@ -90,6 +90,7 @@ static const char remote_obj_dir[] = "REM_OBJ_DIR";
 static const struct lu_object_operations      osd_lu_obj_ops;
 static const struct dt_object_operations      osd_obj_ops;
 static const struct dt_object_operations      osd_obj_ea_ops;
+static const struct dt_object_operations      osd_obj_otable_it_ops;
 static const struct dt_index_operations       osd_index_iam_ops;
 static const struct dt_index_operations       osd_index_ea_ops;
 
@@ -524,13 +525,18 @@ static int osd_object_init(const struct lu_env *env, struct lu_object *l,
        LINVRNT(osd_invariant(obj));
 
        result = osd_fid_lookup(env, obj, lu_object_fid(l), conf);
-        obj->oo_dt.do_body_ops = &osd_body_ops_new;
-        if (result == 0) {
-                if (obj->oo_inode != NULL)
-                        osd_object_init0(obj);
-        }
-        LINVRNT(osd_invariant(obj));
-        return result;
+       obj->oo_dt.do_body_ops = &osd_body_ops_new;
+       if (result == 0) {
+               if (obj->oo_inode != NULL) {
+                       osd_object_init0(obj);
+               } else if (fid_is_otable_it(&l->lo_header->loh_fid)) {
+                       obj->oo_dt.do_ops = &osd_obj_otable_it_ops;
+                       /* LFSCK iterator object is special without inode */
+                       l->lo_header->loh_attr |= LOHA_EXISTS;
+               }
+       }
+       LINVRNT(osd_invariant(obj));
+       return result;
 }
 
 /*
@@ -2758,6 +2764,15 @@ static int osd_index_try(const struct lu_env *env, struct dt_object *dt,
         return result;
 }
 
+static int osd_otable_it_attr_get(const struct lu_env *env,
+                                struct dt_object *dt,
+                                struct lu_attr *attr,
+                                struct lustre_capa *capa)
+{
+       attr->la_valid = 0;
+       return 0;
+}
+
 static const struct dt_object_operations osd_obj_ops = {
         .do_read_lock         = osd_object_read_lock,
         .do_write_lock        = osd_object_write_lock,
@@ -2822,6 +2837,11 @@ static const struct dt_object_operations osd_obj_ea_ops = {
         .do_data_get          = osd_data_get,
 };
 
+static const struct dt_object_operations osd_obj_otable_it_ops = {
+       .do_attr_get    = osd_otable_it_attr_get,
+       .do_index_try   = osd_index_try,
+};
+
 static int osd_index_declare_iam_delete(const struct lu_env *env,
                                         struct dt_object *dt,
                                         const struct dt_key *key,