Whamcloud - gitweb
LU-2915 lfsck: NO scrub for agent inode and remote parent
[fs/lustre-release.git] / lustre / mdd / mdd_lfsck.c
index fc2c469..3adfda1 100644 (file)
@@ -20,7 +20,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2012 Whamcloud, Inc.
+ * Copyright (c) 2012, 2013, Intel Corporation.
  */
 /*
  * lustre/mdd/mdd_lfsck.c
 
 #include <lustre/lustre_idl.h>
 #include <lustre_fid.h>
+#include <obd_support.h>
 
 #include "mdd_internal.h"
+#include "mdd_lfsck.h"
+
+#define HALF_SEC                       (CFS_HZ >> 1)
+#define LFSCK_CHECKPOINT_INTERVAL      60
+
+#define LFSCK_NAMEENTRY_DEAD           1 /* The object has been unlinked. */
+#define LFSCK_NAMEENTRY_REMOVED        2 /* The entry has been removed. */
+#define LFSCK_NAMEENTRY_RECREATED      3 /* The entry has been recreated. */
+
+const char lfsck_bookmark_name[] = "lfsck_bookmark";
+const char lfsck_namespace_name[] = "lfsck_namespace";
+
+static const char *lfsck_status_names[] = {
+       "init",
+       "scanning-phase1",
+       "scanning-phase2",
+       "completed",
+       "failed",
+       "stopped",
+       "paused",
+       "crashed",
+       NULL
+};
+
+static const char *lfsck_flags_names[] = {
+       "scanned-once",
+       "inconsistent",
+       "upgrade",
+       NULL
+};
+
+static const char *lfsck_param_names[] = {
+       "failout",
+       "dryrun",
+       NULL
+};
+
+/* misc functions */
+
+static inline struct mdd_device *mdd_lfsck2mdd(struct md_lfsck *lfsck)
+{
+       return container_of0(lfsck, struct mdd_device, mdd_lfsck);
+}
 
 static inline char *mdd_lfsck2name(struct md_lfsck *lfsck)
 {
-       struct mdd_device *mdd;
+       struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
+
+       return mdd2obd_dev(mdd)->obd_name;
+}
+
+static inline void mdd_lfsck_component_get(struct lfsck_component *com)
+{
+       atomic_inc(&com->lc_ref);
+}
+
+static inline void mdd_lfsck_component_put(const struct lu_env *env,
+                                          struct lfsck_component *com)
+{
+       if (atomic_dec_and_test(&com->lc_ref)) {
+               if (com->lc_obj != NULL)
+                       lu_object_put(env, &com->lc_obj->do_lu);
+               if (com->lc_file_ram != NULL)
+                       OBD_FREE(com->lc_file_ram, com->lc_file_size);
+               if (com->lc_file_disk != NULL)
+                       OBD_FREE(com->lc_file_disk, com->lc_file_size);
+               OBD_FREE_PTR(com);
+       }
+}
+
+static inline struct lfsck_component *
+__mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type, cfs_list_t *list)
+{
+       struct lfsck_component *com;
+
+       cfs_list_for_each_entry(com, list, lc_link) {
+               if (com->lc_type == type)
+                       return com;
+       }
+       return NULL;
+}
+
+static struct lfsck_component *
+mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type)
+{
+       struct lfsck_component *com;
+
+       spin_lock(&lfsck->ml_lock);
+       com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_scan);
+       if (com != NULL)
+               goto unlock;
+
+       com = __mdd_lfsck_component_find(lfsck, type,
+                                        &lfsck->ml_list_double_scan);
+       if (com != NULL)
+               goto unlock;
+
+       com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_idle);
+
+unlock:
+       if (com != NULL)
+               mdd_lfsck_component_get(com);
+       spin_unlock(&lfsck->ml_lock);
+       return com;
+}
+
+static void mdd_lfsck_component_cleanup(const struct lu_env *env,
+                                       struct lfsck_component *com)
+{
+       if (!cfs_list_empty(&com->lc_link))
+               cfs_list_del_init(&com->lc_link);
+       if (!cfs_list_empty(&com->lc_link_dir))
+               cfs_list_del_init(&com->lc_link_dir);
+
+       mdd_lfsck_component_put(env, com);
+}
+
+static int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
+                          const char *prefix)
+{
+       int save = *len;
+       int flag;
+       int rc;
+       int i;
+
+       rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
+       if (rc <= 0)
+               return -ENOSPC;
+
+       *buf += rc;
+       *len -= rc;
+       for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
+               if (flag & bits) {
+                       bits &= ~flag;
+                       rc = snprintf(*buf, *len, "%s%c", names[i],
+                                     bits != 0 ? ',' : '\n');
+                       if (rc <= 0)
+                               return -ENOSPC;
+
+                       *buf += rc;
+                       *len -= rc;
+               }
+       }
+       return save - *len;
+}
+
+static int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix)
+{
+       int rc;
+
+       if (time != 0)
+               rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix,
+                             cfs_time_current_sec() - time);
+       else
+               rc = snprintf(*buf, *len, "%s: N/A\n", prefix);
+       if (rc <= 0)
+               return -ENOSPC;
+
+       *buf += rc;
+       *len -= rc;
+       return rc;
+}
+
+static int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
+                         const char *prefix)
+{
+       int rc;
+
+       if (fid_is_zero(&pos->lp_dir_parent)) {
+               if (pos->lp_oit_cookie == 0)
+                       rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n",
+                                     prefix);
+               else
+                       rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n",
+                                     prefix, pos->lp_oit_cookie);
+       } else {
+               rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n",
+                             prefix, pos->lp_oit_cookie,
+                             PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
+       }
+       if (rc <= 0)
+               return -ENOSPC;
+
+       *buf += rc;
+       *len -= rc;
+       return rc;
+}
+
+static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck,
+                              struct lfsck_position *pos, bool init)
+{
+       const struct dt_it_ops *iops = &lfsck->ml_obj_oit->do_index_ops->dio_it;
+
+       spin_lock(&lfsck->ml_lock);
+       if (unlikely(lfsck->ml_di_oit == NULL)) {
+               spin_unlock(&lfsck->ml_lock);
+               memset(pos, 0, sizeof(*pos));
+               return;
+       }
+
+       pos->lp_oit_cookie = iops->store(env, lfsck->ml_di_oit);
+       if (!lfsck->ml_current_oit_processed && !init)
+               pos->lp_oit_cookie--;
+
+       LASSERT(pos->lp_oit_cookie > 0);
+
+       if (lfsck->ml_di_dir != NULL) {
+               struct dt_object *dto = lfsck->ml_obj_dir;
+
+               pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
+                                                       lfsck->ml_di_dir);
+
+               if (pos->lp_dir_cookie >= MDS_DIR_END_OFF) {
+                       fid_zero(&pos->lp_dir_parent);
+                       pos->lp_dir_cookie = 0;
+               } else {
+                       pos->lp_dir_parent = *lu_object_fid(&dto->do_lu);
+               }
+       } else {
+               fid_zero(&pos->lp_dir_parent);
+               pos->lp_dir_cookie = 0;
+       }
+       spin_unlock(&lfsck->ml_lock);
+}
+
+static inline void mdd_lfsck_pos_set_zero(struct lfsck_position *pos)
+{
+       memset(pos, 0, sizeof(*pos));
+}
+
+static inline int mdd_lfsck_pos_is_zero(const struct lfsck_position *pos)
+{
+       return pos->lp_oit_cookie == 0 && fid_is_zero(&pos->lp_dir_parent);
+}
+
+static inline int mdd_lfsck_pos_is_eq(const struct lfsck_position *pos1,
+                                     const struct lfsck_position *pos2)
+{
+       if (pos1->lp_oit_cookie < pos2->lp_oit_cookie)
+               return -1;
+
+       if (pos1->lp_oit_cookie > pos2->lp_oit_cookie)
+               return 1;
+
+       if (fid_is_zero(&pos1->lp_dir_parent) &&
+           !fid_is_zero(&pos2->lp_dir_parent))
+               return -1;
+
+       if (!fid_is_zero(&pos1->lp_dir_parent) &&
+           fid_is_zero(&pos2->lp_dir_parent))
+               return 1;
+
+       if (fid_is_zero(&pos1->lp_dir_parent) &&
+           fid_is_zero(&pos2->lp_dir_parent))
+               return 0;
+
+       LASSERT(lu_fid_eq(&pos1->lp_dir_parent, &pos2->lp_dir_parent));
+
+       if (pos1->lp_dir_cookie < pos2->lp_dir_cookie)
+               return -1;
+
+       if (pos1->lp_dir_cookie > pos2->lp_dir_cookie)
+               return 1;
+
+       return 0;
+}
+
+static void mdd_lfsck_close_dir(const struct lu_env *env,
+                               struct md_lfsck *lfsck)
+{
+       struct dt_object        *dir_obj  = lfsck->ml_obj_dir;
+       const struct dt_it_ops  *dir_iops = &dir_obj->do_index_ops->dio_it;
+       struct dt_it            *dir_di   = lfsck->ml_di_dir;
+
+       spin_lock(&lfsck->ml_lock);
+       lfsck->ml_di_dir = NULL;
+       spin_unlock(&lfsck->ml_lock);
+
+       dir_iops->put(env, dir_di);
+       dir_iops->fini(env, dir_di);
+       lfsck->ml_obj_dir = NULL;
+       lu_object_put(env, &dir_obj->do_lu);
+}
+
+static void __mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit)
+{
+       lfsck->ml_bookmark_ram.lb_speed_limit = limit;
+       if (limit != LFSCK_SPEED_NO_LIMIT) {
+               if (limit > CFS_HZ) {
+                       lfsck->ml_sleep_rate = limit / CFS_HZ;
+                       lfsck->ml_sleep_jif = 1;
+               } else {
+                       lfsck->ml_sleep_rate = 1;
+                       lfsck->ml_sleep_jif = CFS_HZ / limit;
+               }
+       } else {
+               lfsck->ml_sleep_jif = 0;
+               lfsck->ml_sleep_rate = 0;
+       }
+}
+
+static void mdd_lfsck_control_speed(struct md_lfsck *lfsck)
+{
+       struct ptlrpc_thread *thread = &lfsck->ml_thread;
+       struct l_wait_info    lwi;
+
+       if (lfsck->ml_sleep_jif > 0 &&
+           lfsck->ml_new_scanned >= lfsck->ml_sleep_rate) {
+               spin_lock(&lfsck->ml_lock);
+               if (likely(lfsck->ml_sleep_jif > 0 &&
+                          lfsck->ml_new_scanned >= lfsck->ml_sleep_rate)) {
+                       lwi = LWI_TIMEOUT_INTR(lfsck->ml_sleep_jif, NULL,
+                                              LWI_ON_SIGNAL_NOOP, NULL);
+                       spin_unlock(&lfsck->ml_lock);
+
+                       l_wait_event(thread->t_ctl_waitq,
+                                    !thread_is_running(thread),
+                                    &lwi);
+                       lfsck->ml_new_scanned = 0;
+               } else {
+                       spin_unlock(&lfsck->ml_lock);
+               }
+       }
+}
+
+/* lfsck_bookmark file ops */
+
+static void inline mdd_lfsck_bookmark_to_cpu(struct lfsck_bookmark *des,
+                                            struct lfsck_bookmark *src)
+{
+       des->lb_magic = le32_to_cpu(src->lb_magic);
+       des->lb_version = le16_to_cpu(src->lb_version);
+       des->lb_param = le16_to_cpu(src->lb_param);
+       des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit);
+}
+
+static void inline mdd_lfsck_bookmark_to_le(struct lfsck_bookmark *des,
+                                           struct lfsck_bookmark *src)
+{
+       des->lb_magic = cpu_to_le32(src->lb_magic);
+       des->lb_version = cpu_to_le16(src->lb_version);
+       des->lb_param = cpu_to_le16(src->lb_param);
+       des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit);
+}
+
+static int mdd_lfsck_bookmark_load(const struct lu_env *env,
+                                  struct md_lfsck *lfsck)
+{
+       loff_t pos = 0;
+       int    len = sizeof(struct lfsck_bookmark);
+       int    rc;
+
+       rc = dt_record_read(env, lfsck->ml_bookmark_obj,
+                           mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
+                           &pos);
+       if (rc == 0) {
+               struct lfsck_bookmark *bm = &lfsck->ml_bookmark_ram;
+
+               mdd_lfsck_bookmark_to_cpu(bm, &lfsck->ml_bookmark_disk);
+               if (bm->lb_magic != LFSCK_BOOKMARK_MAGIC) {
+                       CWARN("%.16s: invalid lfsck_bookmark magic "
+                             "0x%x != 0x%x\n", mdd_lfsck2name(lfsck),
+                             bm->lb_magic, LFSCK_BOOKMARK_MAGIC);
+                       /* Process it as new lfsck_bookmark. */
+                       rc = -ENODATA;
+               }
+       } else {
+               if (rc == -EFAULT && pos == 0)
+                       /* return -ENODATA for empty lfsck_bookmark. */
+                       rc = -ENODATA;
+               else
+                       CERROR("%.16s: fail to load lfsck_bookmark, "
+                              "expected = %d, rc = %d\n",
+                              mdd_lfsck2name(lfsck), len, rc);
+       }
+       return rc;
+}
+
+static int mdd_lfsck_bookmark_store(const struct lu_env *env,
+                                   struct md_lfsck *lfsck)
+{
+       struct mdd_device *mdd    = mdd_lfsck2mdd(lfsck);
+       struct thandle    *handle;
+       struct dt_object  *obj    = lfsck->ml_bookmark_obj;
+       loff_t             pos    = 0;
+       int                len    = sizeof(struct lfsck_bookmark);
+       int                rc;
+       ENTRY;
+
+       mdd_lfsck_bookmark_to_le(&lfsck->ml_bookmark_disk,
+                                &lfsck->ml_bookmark_ram);
+       handle = dt_trans_create(env, mdd->mdd_bottom);
+       if (IS_ERR(handle)) {
+               rc = PTR_ERR(handle);
+               CERROR("%.16s: fail to create trans for storing "
+                      "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
+               RETURN(rc);
+       }
+
+       rc = dt_declare_record_write(env, obj, len, 0, handle);
+       if (rc != 0) {
+               CERROR("%.16s: fail to declare trans for storing "
+                      "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
+               GOTO(out, rc);
+       }
+
+       rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
+       if (rc != 0) {
+               CERROR("%.16s: fail to start trans for storing "
+                      "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
+               GOTO(out, rc);
+       }
+
+       rc = dt_record_write(env, obj,
+                            mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
+                            &pos, handle);
+       if (rc != 0)
+               CERROR("%.16s: fail to store lfsck_bookmark, expected = %d, "
+                      "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
+
+       GOTO(out, rc);
+
+out:
+       dt_trans_stop(env, mdd->mdd_bottom, handle);
+       return rc;
+}
+
+static int mdd_lfsck_bookmark_init(const struct lu_env *env,
+                                  struct md_lfsck *lfsck)
+{
+       struct lfsck_bookmark *mb = &lfsck->ml_bookmark_ram;
+       int rc;
+
+       memset(mb, 0, sizeof(*mb));
+       mb->lb_magic = LFSCK_BOOKMARK_MAGIC;
+       mb->lb_version = LFSCK_VERSION_V2;
+       mutex_lock(&lfsck->ml_mutex);
+       rc = mdd_lfsck_bookmark_store(env, lfsck);
+       mutex_unlock(&lfsck->ml_mutex);
+       return rc;
+}
+
+/* lfsck_namespace file ops */
+
+static void inline mdd_lfsck_position_to_cpu(struct lfsck_position *des,
+                                            struct lfsck_position *src)
+{
+       des->lp_oit_cookie = le64_to_cpu(src->lp_oit_cookie);
+       fid_le_to_cpu(&des->lp_dir_parent, &src->lp_dir_parent);
+       des->lp_dir_cookie = le64_to_cpu(src->lp_dir_cookie);
+}
+
+static void inline mdd_lfsck_position_to_le(struct lfsck_position *des,
+                                            struct lfsck_position *src)
+{
+       des->lp_oit_cookie = cpu_to_le64(src->lp_oit_cookie);
+       fid_cpu_to_le(&des->lp_dir_parent, &src->lp_dir_parent);
+       des->lp_dir_cookie = cpu_to_le64(src->lp_dir_cookie);
+}
+
+static void inline mdd_lfsck_namespace_to_cpu(struct lfsck_namespace *des,
+                                             struct lfsck_namespace *src)
+{
+       des->ln_magic = le32_to_cpu(src->ln_magic);
+       des->ln_status = le32_to_cpu(src->ln_status);
+       des->ln_flags = le32_to_cpu(src->ln_flags);
+       des->ln_success_count = le32_to_cpu(src->ln_success_count);
+       des->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1);
+       des->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2);
+       des->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
+       des->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
+       des->ln_time_last_checkpoint =
+                               le64_to_cpu(src->ln_time_last_checkpoint);
+       mdd_lfsck_position_to_cpu(&des->ln_pos_latest_start,
+                                 &src->ln_pos_latest_start);
+       mdd_lfsck_position_to_cpu(&des->ln_pos_last_checkpoint,
+                                 &src->ln_pos_last_checkpoint);
+       mdd_lfsck_position_to_cpu(&des->ln_pos_first_inconsistent,
+                                 &src->ln_pos_first_inconsistent);
+       des->ln_items_checked = le64_to_cpu(src->ln_items_checked);
+       des->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
+       des->ln_items_failed = le64_to_cpu(src->ln_items_failed);
+       des->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
+       des->ln_mlinked_checked = le64_to_cpu(src->ln_mlinked_checked);
+       des->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
+       des->ln_objs_repaired_phase2 =
+                               le64_to_cpu(src->ln_objs_repaired_phase2);
+       des->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
+       des->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
+       des->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
+       fid_le_to_cpu(&des->ln_fid_latest_scanned_phase2,
+                     &src->ln_fid_latest_scanned_phase2);
+}
+
+static void inline mdd_lfsck_namespace_to_le(struct lfsck_namespace *des,
+                                            struct lfsck_namespace *src)
+{
+       des->ln_magic = cpu_to_le32(src->ln_magic);
+       des->ln_status = cpu_to_le32(src->ln_status);
+       des->ln_flags = cpu_to_le32(src->ln_flags);
+       des->ln_success_count = cpu_to_le32(src->ln_success_count);
+       des->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1);
+       des->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2);
+       des->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
+       des->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
+       des->ln_time_last_checkpoint =
+                               cpu_to_le64(src->ln_time_last_checkpoint);
+       mdd_lfsck_position_to_le(&des->ln_pos_latest_start,
+                                &src->ln_pos_latest_start);
+       mdd_lfsck_position_to_le(&des->ln_pos_last_checkpoint,
+                                &src->ln_pos_last_checkpoint);
+       mdd_lfsck_position_to_le(&des->ln_pos_first_inconsistent,
+                                &src->ln_pos_first_inconsistent);
+       des->ln_items_checked = cpu_to_le64(src->ln_items_checked);
+       des->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
+       des->ln_items_failed = cpu_to_le64(src->ln_items_failed);
+       des->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
+       des->ln_mlinked_checked = cpu_to_le64(src->ln_mlinked_checked);
+       des->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
+       des->ln_objs_repaired_phase2 =
+                               cpu_to_le64(src->ln_objs_repaired_phase2);
+       des->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
+       des->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
+       des->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
+       fid_cpu_to_le(&des->ln_fid_latest_scanned_phase2,
+                     &src->ln_fid_latest_scanned_phase2);
+}
+
+/**
+ * \retval +ve: the lfsck_namespace is broken, the caller should reset it.
+ * \retval 0: succeed.
+ * \retval -ve: failed cases.
+ */
+static int mdd_lfsck_namespace_load(const struct lu_env *env,
+                                   struct lfsck_component *com)
+{
+       int len = com->lc_file_size;
+       int rc;
+
+       rc = dt_xattr_get(env, com->lc_obj,
+                         mdd_buf_get(env, com->lc_file_disk, len),
+                         XATTR_NAME_LFSCK_NAMESPACE, BYPASS_CAPA);
+       if (rc == len) {
+               struct lfsck_namespace *ns = com->lc_file_ram;
+
+               mdd_lfsck_namespace_to_cpu(ns,
+                               (struct lfsck_namespace *)com->lc_file_disk);
+               if (ns->ln_magic != LFSCK_NAMESPACE_MAGIC) {
+                       CWARN("%.16s: invalid lfsck_namespace magic "
+                             "0x%x != 0x%x\n",
+                             mdd_lfsck2name(com->lc_lfsck),
+                             ns->ln_magic, LFSCK_NAMESPACE_MAGIC);
+                       rc = 1;
+               } else {
+                       rc = 0;
+               }
+       } else if (rc != -ENODATA) {
+               CERROR("%.16s: fail to load lfsck_namespace, expected = %d, "
+                      "rc = %d\n", mdd_lfsck2name(com->lc_lfsck), len, rc);
+               if (rc >= 0)
+                       rc = 1;
+       }
+       return rc;
+}
+
+static int mdd_lfsck_namespace_store(const struct lu_env *env,
+                                    struct lfsck_component *com, bool init)
+{
+       struct dt_object  *obj    = com->lc_obj;
+       struct md_lfsck   *lfsck  = com->lc_lfsck;
+       struct mdd_device *mdd    = mdd_lfsck2mdd(lfsck);
+       struct thandle    *handle;
+       int                len    = com->lc_file_size;
+       int                rc;
+       ENTRY;
+
+       mdd_lfsck_namespace_to_le((struct lfsck_namespace *)com->lc_file_disk,
+                                 (struct lfsck_namespace *)com->lc_file_ram);
+       handle = dt_trans_create(env, mdd->mdd_bottom);
+       if (IS_ERR(handle)) {
+               rc = PTR_ERR(handle);
+               CERROR("%.16s: fail to create trans for storing "
+                      "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
+               RETURN(rc);
+       }
+
+       rc = dt_declare_xattr_set(env, obj,
+                                 mdd_buf_get(env, com->lc_file_disk, len),
+                                 XATTR_NAME_LFSCK_NAMESPACE, 0, handle);
+       if (rc != 0) {
+               CERROR("%.16s: fail to declare trans for storing "
+                      "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
+               GOTO(out, rc);
+       }
+
+       rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
+       if (rc != 0) {
+               CERROR("%.16s: fail to start trans for storing "
+                      "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
+               GOTO(out, rc);
+       }
+
+       rc = dt_xattr_set(env, obj,
+                         mdd_buf_get(env, com->lc_file_disk, len),
+                         XATTR_NAME_LFSCK_NAMESPACE,
+                         init ? LU_XATTR_CREATE : LU_XATTR_REPLACE,
+                         handle, BYPASS_CAPA);
+       if (rc != 0)
+               CERROR("%.16s: fail to store lfsck_namespace, len = %d, "
+                      "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
+
+       GOTO(out, rc);
+
+out:
+       dt_trans_stop(env, mdd->mdd_bottom, handle);
+       return rc;
+}
+
+static int mdd_lfsck_namespace_init(const struct lu_env *env,
+                                   struct lfsck_component *com)
+{
+       struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
+       int rc;
+
+       memset(ns, 0, sizeof(*ns));
+       ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
+       ns->ln_status = LS_INIT;
+       down_write(&com->lc_sem);
+       rc = mdd_lfsck_namespace_store(env, com, true);
+       up_write(&com->lc_sem);
+       return rc;
+}
+
+static int mdd_lfsck_namespace_lookup(const struct lu_env *env,
+                                     struct lfsck_component *com,
+                                     const struct lu_fid *fid,
+                                     __u8 *flags)
+{
+       struct lu_fid *key = &mdd_env_info(env)->mti_fid;
+       int            rc;
+
+       fid_cpu_to_be(key, fid);
+       rc = dt_lookup(env, com->lc_obj, (struct dt_rec *)flags,
+                      (const struct dt_key *)key, BYPASS_CAPA);
+       return rc;
+}
+
+static int mdd_lfsck_namespace_delete(const struct lu_env *env,
+                                     struct lfsck_component *com,
+                                     const struct lu_fid *fid)
+{
+       struct mdd_device *mdd    = mdd_lfsck2mdd(com->lc_lfsck);
+       struct lu_fid     *key    = &mdd_env_info(env)->mti_fid;
+       struct thandle    *handle;
+       struct dt_object *obj     = com->lc_obj;
+       int               rc;
+       ENTRY;
+
+       handle = dt_trans_create(env, mdd->mdd_bottom);
+       if (IS_ERR(handle))
+               RETURN(PTR_ERR(handle));
+
+       rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       fid_cpu_to_be(key, fid);
+       rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
+                      BYPASS_CAPA);
+
+       GOTO(out, rc);
+
+out:
+       dt_trans_stop(env, mdd->mdd_bottom, handle);
+       return rc;
+}
+
+static int mdd_lfsck_namespace_update(const struct lu_env *env,
+                                     struct lfsck_component *com,
+                                     const struct lu_fid *fid,
+                                     __u8 flags, bool force)
+{
+       struct mdd_device *mdd    = mdd_lfsck2mdd(com->lc_lfsck);
+       struct lu_fid     *key    = &mdd_env_info(env)->mti_fid;
+       struct thandle    *handle;
+       struct dt_object *obj     = com->lc_obj;
+       int               rc;
+       bool              exist   = false;
+       __u8              tf;
+       ENTRY;
+
+       rc = mdd_lfsck_namespace_lookup(env, com, fid, &tf);
+       if (rc != 0 && rc != -ENOENT)
+               RETURN(rc);
+
+       if (rc == 0) {
+               if (!force || flags == tf)
+                       RETURN(0);
+
+               exist = true;
+               handle = dt_trans_create(env, mdd->mdd_bottom);
+               if (IS_ERR(handle))
+                       RETURN(PTR_ERR(handle));
+
+               rc = dt_declare_delete(env, obj, (const struct dt_key *)fid,
+                                      handle);
+               if (rc != 0)
+                       GOTO(out, rc);
+       } else {
+               handle = dt_trans_create(env, mdd->mdd_bottom);
+               if (IS_ERR(handle))
+                       RETURN(PTR_ERR(handle));
+       }
+
+       rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
+                              (const struct dt_key *)fid, handle);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       fid_cpu_to_be(key, fid);
+       if (exist) {
+               rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
+                              BYPASS_CAPA);
+               if (rc != 0) {
+                       CERROR("%s: fail to insert "DFID", rc = %d\n",
+                              mdd_lfsck2name(com->lc_lfsck), PFID(fid), rc);
+                       GOTO(out, rc);
+               }
+       }
+
+       rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
+                      (const struct dt_key *)key, handle, BYPASS_CAPA, 1);
+
+       GOTO(out, rc);
+
+out:
+       dt_trans_stop(env, mdd->mdd_bottom, handle);
+       return rc;
+}
+
+/**
+ * \retval +ve repaired
+ * \retval 0   no need to repair
+ * \retval -ve error cases
+ */
+static int mdd_lfsck_namespace_double_scan_one(const struct lu_env *env,
+                                              struct lfsck_component *com,
+                                              struct mdd_object *child,
+                                              __u8 flags)
+{
+       struct mdd_thread_info  *info     = mdd_env_info(env);
+       struct lu_attr          *la       = &info->mti_la;
+       struct lu_name          *cname    = &info->mti_name;
+       struct lu_fid           *pfid     = &info->mti_fid;
+       struct lu_fid           *cfid     = &info->mti_fid2;
+       struct md_lfsck         *lfsck    = com->lc_lfsck;
+       struct mdd_device       *mdd      = mdd_lfsck2mdd(lfsck);
+       struct lfsck_bookmark   *bk       = &lfsck->ml_bookmark_ram;
+       struct lfsck_namespace  *ns       =
+                               (struct lfsck_namespace *)com->lc_file_ram;
+       struct linkea_data       ldata    = { 0 };
+       struct thandle          *handle   = NULL;
+       bool                     locked   = false;
+       bool                     update   = false;
+       int                      count;
+       int                      rc;
+       ENTRY;
+
+       if (com->lc_journal) {
+
+again:
+               LASSERT(!locked);
+
+               com->lc_journal = 1;
+               handle = mdd_trans_create(env, mdd);
+               if (IS_ERR(handle))
+                       RETURN(rc = PTR_ERR(handle));
+
+               rc = mdd_declare_links_add(env, child, handle, NULL);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = mdd_trans_start(env, mdd, handle);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               mdd_write_lock(env, child, MOR_TGT_CHILD);
+               locked = true;
+       }
+
+       if (unlikely(mdd_is_dead_obj(child)))
+               GOTO(stop, rc = 0);
+
+       rc = mdd_links_read(env, child, &ldata);
+       if (rc != 0) {
+               if ((bk->lb_param & LPF_DRYRUN) &&
+                   (rc == -EINVAL || rc == -ENODATA))
+                       rc = 1;
+
+               GOTO(stop, rc);
+       }
+
+       rc = mdd_la_get(env, child, la, BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       ldata.ld_lee = LINKEA_FIRST_ENTRY(ldata);
+       count = ldata.ld_leh->leh_reccount;
+       while (count-- > 0) {
+               struct mdd_object *parent = NULL;
+               struct dt_object *dir;
+
+               linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname,
+                                   pfid);
+               if (!fid_is_sane(pfid))
+                       goto shrink;
+
+               parent = mdd_object_find(env, mdd, pfid);
+               if (parent == NULL)
+                       goto shrink;
+               else if (IS_ERR(parent))
+                       GOTO(stop, rc = PTR_ERR(parent));
+
+               if (!mdd_object_exists(parent))
+                       goto shrink;
+
+               /* XXX: Currently, skip remote object, the consistency for
+                *      remote object will be processed in LFSCK phase III. */
+               if (mdd_object_remote(parent)) {
+                       mdd_object_put(env, parent);
+                       ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
+                       continue;
+               }
+
+               dir = mdd_object_child(parent);
+               if (unlikely(!dt_try_as_dir(env, dir)))
+                       goto shrink;
+
+               /* To guarantee the 'name' is terminated with '0'. */
+               memcpy(info->mti_key, cname->ln_name, cname->ln_namelen);
+               info->mti_key[cname->ln_namelen] = 0;
+               cname->ln_name = info->mti_key;
+               rc = dt_lookup(env, dir, (struct dt_rec *)cfid,
+                              (const struct dt_key *)cname->ln_name,
+                              BYPASS_CAPA);
+               if (rc != 0 && rc != -ENOENT) {
+                       mdd_object_put(env, parent);
+                       GOTO(stop, rc);
+               }
+
+               if (rc == 0) {
+                       if (lu_fid_eq(cfid, mdo2fid(child))) {
+                               mdd_object_put(env, parent);
+                               ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
+                               continue;
+                       }
+
+                       goto shrink;
+               }
+
+               if (ldata.ld_leh->leh_reccount > la->la_nlink)
+                       goto shrink;
+
+               /* XXX: For the case of there is linkea entry, but without name
+                *      entry pointing to the object, and the object link count
+                *      isn't less than the count of name entries, then add the
+                *      name entry back to namespace.
+                *
+                *      It is out of LFSCK 1.5 scope, will implement it in the
+                *      future. Keep the linkEA entry. */
+               mdd_object_put(env, parent);
+               ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
+               continue;
+
+shrink:
+               if (parent != NULL)
+                       mdd_object_put(env, parent);
+               if (bk->lb_param & LPF_DRYRUN)
+                       RETURN(1);
+
+               CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n",
+                      PFID(mdo2fid(child)), cname->ln_namelen, cname->ln_name,
+                      PFID(pfid));
+               linkea_del_buf(&ldata, cname);
+               update = true;
+       }
+
+       if (update) {
+               if (!com->lc_journal) {
+                       com->lc_journal = 1;
+                       goto again;
+               }
+
+               rc = mdd_links_write(env, child, &ldata, handle);
+       }
+
+       GOTO(stop, rc);
+
+stop:
+       if (locked)
+               mdd_write_unlock(env, child);
+
+       if (handle != NULL)
+               mdd_trans_stop(env, mdd, rc, handle);
+
+       if (rc == 0 && update) {
+               ns->ln_objs_nlink_repaired++;
+               rc = 1;
+       }
+       return rc;
+}
+
+/* namespace APIs */
+
+static int mdd_lfsck_namespace_reset(const struct lu_env *env,
+                                    struct lfsck_component *com, bool init)
+{
+       struct lfsck_namespace  *ns   = (struct lfsck_namespace *)com->lc_file_ram;
+       struct mdd_device       *mdd  = mdd_lfsck2mdd(com->lc_lfsck);
+       struct dt_object        *dto, *root;
+       int                      rc;
+       ENTRY;
+
+       down_write(&com->lc_sem);
+       if (init) {
+               memset(ns, 0, sizeof(*ns));
+       } else {
+               __u32 count = ns->ln_success_count;
+               __u64 last_time = ns->ln_time_last_complete;
+
+               memset(ns, 0, sizeof(*ns));
+               ns->ln_success_count = count;
+               ns->ln_time_last_complete = last_time;
+       }
+       ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
+       ns->ln_status = LS_INIT;
+
+       root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid);
+       if (unlikely(IS_ERR(root)))
+               GOTO(out, rc = PTR_ERR(root));
+
+       rc = local_object_unlink(env, mdd->mdd_bottom, root,
+                                lfsck_namespace_name);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       dto = local_index_find_or_create(env, mdd->mdd_los, root,
+                                        lfsck_namespace_name,
+                                        S_IFREG | S_IRUGO | S_IWUSR,
+                                        &dt_lfsck_features);
+       if (IS_ERR(dto))
+               GOTO(out, rc = PTR_ERR(dto));
+
+       rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features);
+       if (rc != 0)
+               GOTO(out, rc);
+       com->lc_obj = dto;
+
+       rc = mdd_lfsck_namespace_store(env, com, true);
+
+       GOTO(out, rc);
+out:
+       lu_object_put(env, &root->do_lu);
+       up_write(&com->lc_sem);
+       return rc;
+}
+
+static void
+mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
+                        bool new_checked)
+{
+       struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
+
+       down_write(&com->lc_sem);
+       if (new_checked)
+               com->lc_new_checked++;
+       ns->ln_items_failed++;
+       if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
+               mdd_lfsck_pos_fill(env, com->lc_lfsck,
+                                  &ns->ln_pos_first_inconsistent, false);
+       up_write(&com->lc_sem);
+}
+
+static int mdd_lfsck_namespace_checkpoint(const struct lu_env *env,
+                                         struct lfsck_component *com,
+                                         bool init)
+{
+       struct md_lfsck         *lfsck = com->lc_lfsck;
+       struct lfsck_namespace  *ns    =
+                               (struct lfsck_namespace *)com->lc_file_ram;
+       int                      rc;
+
+       if (com->lc_new_checked == 0 && !init)
+               return 0;
+
+       down_write(&com->lc_sem);
+
+       if (init) {
+               ns->ln_pos_latest_start = lfsck->ml_pos_current;
+       } else {
+               ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
+               ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->ml_time_last_checkpoint);
+               ns->ln_time_last_checkpoint = cfs_time_current_sec();
+               ns->ln_items_checked += com->lc_new_checked;
+               com->lc_new_checked = 0;
+       }
+
+       rc = mdd_lfsck_namespace_store(env, com, false);
+
+       up_write(&com->lc_sem);
+       return rc;
+}
+
+static int mdd_lfsck_namespace_prep(const struct lu_env *env,
+                                   struct lfsck_component *com)
+{
+       struct md_lfsck         *lfsck  = com->lc_lfsck;
+       struct lfsck_namespace  *ns     =
+                               (struct lfsck_namespace *)com->lc_file_ram;
+       struct lfsck_position   *pos    = &com->lc_pos_start;
+
+       if (ns->ln_status == LS_COMPLETED) {
+               int rc;
+
+               rc = mdd_lfsck_namespace_reset(env, com, false);
+               if (rc != 0)
+                       return rc;
+       }
+
+       down_write(&com->lc_sem);
+
+       ns->ln_time_latest_start = cfs_time_current_sec();
+
+       spin_lock(&lfsck->ml_lock);
+       if (ns->ln_flags & LF_SCANNED_ONCE) {
+               if (!lfsck->ml_drop_dryrun ||
+                   mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
+                       ns->ln_status = LS_SCANNING_PHASE2;
+                       cfs_list_del_init(&com->lc_link);
+                       cfs_list_add_tail(&com->lc_link,
+                                         &lfsck->ml_list_double_scan);
+                       if (!cfs_list_empty(&com->lc_link_dir))
+                               cfs_list_del_init(&com->lc_link_dir);
+                       mdd_lfsck_pos_set_zero(pos);
+               } else {
+                       ns->ln_status = LS_SCANNING_PHASE1;
+                       ns->ln_run_time_phase1 = 0;
+                       ns->ln_run_time_phase2 = 0;
+                       ns->ln_items_checked = 0;
+                       ns->ln_items_repaired = 0;
+                       ns->ln_items_failed = 0;
+                       ns->ln_dirs_checked = 0;
+                       ns->ln_mlinked_checked = 0;
+                       ns->ln_objs_checked_phase2 = 0;
+                       ns->ln_objs_repaired_phase2 = 0;
+                       ns->ln_objs_failed_phase2 = 0;
+                       ns->ln_objs_nlink_repaired = 0;
+                       ns->ln_objs_lost_found = 0;
+                       fid_zero(&ns->ln_fid_latest_scanned_phase2);
+                       if (cfs_list_empty(&com->lc_link_dir))
+                               cfs_list_add_tail(&com->lc_link_dir,
+                                                 &lfsck->ml_list_dir);
+                       *pos = ns->ln_pos_first_inconsistent;
+               }
+       } else {
+               ns->ln_status = LS_SCANNING_PHASE1;
+               if (cfs_list_empty(&com->lc_link_dir))
+                       cfs_list_add_tail(&com->lc_link_dir,
+                                         &lfsck->ml_list_dir);
+               if (!lfsck->ml_drop_dryrun ||
+                   mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
+                       *pos = ns->ln_pos_last_checkpoint;
+                       pos->lp_oit_cookie++;
+               } else {
+                       *pos = ns->ln_pos_first_inconsistent;
+               }
+       }
+       spin_unlock(&lfsck->ml_lock);
+
+       up_write(&com->lc_sem);
+       return 0;
+}
+
+static int mdd_lfsck_namespace_exec_oit(const struct lu_env *env,
+                                       struct lfsck_component *com,
+                                       struct mdd_object *obj)
+{
+       down_write(&com->lc_sem);
+       com->lc_new_checked++;
+       if (S_ISDIR(mdd_object_type(obj)))
+               ((struct lfsck_namespace *)com->lc_file_ram)->ln_dirs_checked++;
+       up_write(&com->lc_sem);
+       return 0;
+}
+
+static int mdd_declare_lfsck_namespace_exec_dir(const struct lu_env *env,
+                                               struct mdd_object *obj,
+                                               struct thandle *handle)
+{
+       int rc;
+
+       /* For destroying all invalid linkEA entries. */
+       rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle);
+       if (rc != 0)
+               return rc;
+
+       /* For insert new linkEA entry. */
+       rc = mdd_declare_links_add(env, obj, handle, NULL);
+       return rc;
+}
+
+static int mdd_lfsck_namespace_check_exist(const struct lu_env *env,
+                                          struct md_lfsck *lfsck,
+                                          struct mdd_object *obj,
+                                          const char *name)
+{
+       struct dt_object *dir = lfsck->ml_obj_dir;
+       struct lu_fid    *fid = &mdd_env_info(env)->mti_fid;
+       int               rc;
+       ENTRY;
+
+       if (unlikely(mdd_is_dead_obj(obj)))
+               RETURN(LFSCK_NAMEENTRY_DEAD);
+
+       rc = dt_lookup(env, dir, (struct dt_rec *)fid,
+                      (const struct dt_key *)name, BYPASS_CAPA);
+       if (rc == -ENOENT)
+               RETURN(LFSCK_NAMEENTRY_REMOVED);
+
+       if (rc < 0)
+               RETURN(rc);
+
+       if (!lu_fid_eq(fid, mdo2fid(obj)))
+               RETURN(LFSCK_NAMEENTRY_RECREATED);
+
+       RETURN(0);
+}
+
+static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env,
+                                       struct lfsck_component *com,
+                                       struct mdd_object *obj,
+                                       struct lu_dirent *ent)
+{
+       struct mdd_thread_info     *info     = mdd_env_info(env);
+       struct lu_attr             *la       = &info->mti_la;
+       struct md_lfsck            *lfsck    = com->lc_lfsck;
+       struct lfsck_bookmark      *bk       = &lfsck->ml_bookmark_ram;
+       struct lfsck_namespace     *ns       =
+                               (struct lfsck_namespace *)com->lc_file_ram;
+       struct mdd_device          *mdd      = mdd_lfsck2mdd(lfsck);
+       struct linkea_data          ldata    = { 0 };
+       const struct lu_fid        *pfid     =
+                               lu_object_fid(&lfsck->ml_obj_dir->do_lu);
+       const struct lu_fid        *cfid     = mdo2fid(obj);
+       const struct lu_name       *cname;
+       struct thandle             *handle   = NULL;
+       bool                        repaired = false;
+       bool                        locked   = false;
+       int                         count    = 0;
+       int                         rc;
+       ENTRY;
+
+       cname = mdd_name_get_const(env, ent->lde_name, ent->lde_namelen);
+       down_write(&com->lc_sem);
+       com->lc_new_checked++;
+
+       if (ent->lde_attrs & LUDA_UPGRADE) {
+               ns->ln_flags |= LF_UPGRADE;
+               repaired = true;
+       } else if (ent->lde_attrs & LUDA_REPAIR) {
+               ns->ln_flags |= LF_INCONSISTENT;
+               repaired = true;
+       }
+
+       if (ent->lde_name[0] == '.' &&
+           (ent->lde_namelen == 1 ||
+            (ent->lde_namelen == 2 && ent->lde_name[1] == '.') ||
+            fid_is_dot_lustre(&ent->lde_fid)))
+               GOTO(out, rc = 0);
+
+       if (!(bk->lb_param & LPF_DRYRUN) &&
+           (com->lc_journal || repaired)) {
+
+again:
+               LASSERT(!locked);
+
+               com->lc_journal = 1;
+               handle = mdd_trans_create(env, mdd);
+               if (IS_ERR(handle))
+                       GOTO(out, rc = PTR_ERR(handle));
+
+               rc = mdd_declare_lfsck_namespace_exec_dir(env, obj, handle);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = mdd_trans_start(env, mdd, handle);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               mdd_write_lock(env, obj, MOR_TGT_CHILD);
+               locked = true;
+       }
+
+       rc = mdd_lfsck_namespace_check_exist(env, lfsck, obj, ent->lde_name);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = mdd_links_read(env, obj, &ldata);
+       if (rc == 0) {
+               count = ldata.ld_leh->leh_reccount;
+               rc = linkea_links_find(&ldata, cname, pfid);
+               if (rc == 0) {
+                       /* For dir, if there are more than one linkea entries,
+                        * then remove all the other redundant linkea entries.*/
+                       if (unlikely(count > 1 &&
+                                    S_ISDIR(mdd_object_type(obj))))
+                               goto unmatch;
+
+                       goto record;
+               } else {
+
+unmatch:
+                       ns->ln_flags |= LF_INCONSISTENT;
+                       if (bk->lb_param & LPF_DRYRUN) {
+                               repaired = true;
+                               goto record;
+                       }
+
+                       /*For dir, remove the unmatched linkea entry directly.*/
+                       if (S_ISDIR(mdd_object_type(obj))) {
+                               if (!com->lc_journal)
+                                       goto again;
+
+                               rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK,
+                                                  handle, BYPASS_CAPA);
+                               if (rc != 0)
+                                       GOTO(stop, rc);
+
+                               goto nodata;
+                       } else {
+                               goto add;
+                       }
+               }
+       } else if (unlikely(rc == -EINVAL)) {
+               ns->ln_flags |= LF_INCONSISTENT;
+               if (bk->lb_param & LPF_DRYRUN) {
+                       count = 1;
+                       repaired = true;
+                       goto record;
+               }
+
+               if (!com->lc_journal)
+                       goto again;
+
+               /* The magic crashed, we are not sure whether there are more
+                * corrupt data in the linkea, so remove all linkea entries. */
+               rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK, handle,
+                                  BYPASS_CAPA);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               goto nodata;
+       } else if (rc == -ENODATA) {
+               ns->ln_flags |= LF_UPGRADE;
+               if (bk->lb_param & LPF_DRYRUN) {
+                       count = 1;
+                       repaired = true;
+                       goto record;
+               }
+
+nodata:
+               rc = linkea_data_new(&ldata, &mdd_env_info(env)->mti_link_buf);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+add:
+               if (!com->lc_journal)
+                       goto again;
+
+               rc = linkea_add_buf(&ldata, cname, pfid);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = mdd_links_write(env, obj, &ldata, handle);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               count = ldata.ld_leh->leh_reccount;
+               repaired = true;
+       } else {
+               GOTO(stop, rc);
+       }
+
+record:
+       LASSERT(count > 0);
+
+       rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       if ((count == 1) &&
+           (la->la_nlink == 1 || S_ISDIR(mdd_object_type(obj))))
+               /* Usually, it is for single linked object or dir, do nothing.*/
+               GOTO(stop, rc);
+
+       /* Following modification will be in another transaction.  */
+       if (handle != NULL) {
+               LASSERT(mdd_write_locked(env, obj));
+
+               mdd_write_unlock(env, obj);
+               locked = false;
+
+               mdd_trans_stop(env, mdd, 0, handle);
+               handle = NULL;
+       }
+
+       ns->ln_mlinked_checked++;
+       rc = mdd_lfsck_namespace_update(env, com, cfid,
+                       count != la->la_nlink ? LLF_UNMATCH_NLINKS : 0, false);
+
+       GOTO(out, rc);
+
+stop:
+       if (locked)
+               mdd_write_unlock(env, obj);
+
+       if (handle != NULL)
+               mdd_trans_stop(env, mdd, rc, handle);
+
+out:
+       if (rc < 0) {
+               ns->ln_items_failed++;
+               if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
+                       mdd_lfsck_pos_fill(env, lfsck,
+                                          &ns->ln_pos_first_inconsistent,
+                                          false);
+               if (!(bk->lb_param & LPF_FAILOUT))
+                       rc = 0;
+       } else {
+               if (repaired)
+                       ns->ln_items_repaired++;
+               else
+                       com->lc_journal = 0;
+               rc = 0;
+       }
+       up_write(&com->lc_sem);
+       return rc;
+}
+
+static int mdd_lfsck_namespace_post(const struct lu_env *env,
+                                   struct lfsck_component *com,
+                                   int result, bool init)
+{
+       struct md_lfsck         *lfsck = com->lc_lfsck;
+       struct lfsck_namespace  *ns    =
+                               (struct lfsck_namespace *)com->lc_file_ram;
+       int                      rc;
+
+       down_write(&com->lc_sem);
+
+       spin_lock(&lfsck->ml_lock);
+       if (!init)
+               ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
+       if (result > 0) {
+               ns->ln_status = LS_SCANNING_PHASE2;
+               ns->ln_flags |= LF_SCANNED_ONCE;
+               ns->ln_flags &= ~LF_UPGRADE;
+               cfs_list_del_init(&com->lc_link);
+               cfs_list_del_init(&com->lc_link_dir);
+               cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_double_scan);
+       } else if (result == 0) {
+               if (lfsck->ml_paused) {
+                       ns->ln_status = LS_PAUSED;
+               } else {
+                       ns->ln_status = LS_STOPPED;
+                       cfs_list_del_init(&com->lc_link);
+                       cfs_list_del_init(&com->lc_link_dir);
+                       cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
+               }
+       } else {
+               ns->ln_status = LS_FAILED;
+               cfs_list_del_init(&com->lc_link);
+               cfs_list_del_init(&com->lc_link_dir);
+               cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
+       }
+       spin_unlock(&lfsck->ml_lock);
+
+       if (!init) {
+               ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->ml_time_last_checkpoint);
+               ns->ln_time_last_checkpoint = cfs_time_current_sec();
+               ns->ln_items_checked += com->lc_new_checked;
+               com->lc_new_checked = 0;
+       }
+
+       rc = mdd_lfsck_namespace_store(env, com, false);
+
+       up_write(&com->lc_sem);
+       return rc;
+}
+
+static int
+mdd_lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
+                        char *buf, int len)
+{
+       struct md_lfsck         *lfsck = com->lc_lfsck;
+       struct lfsck_bookmark   *bk    = &lfsck->ml_bookmark_ram;
+       struct lfsck_namespace  *ns    =
+                               (struct lfsck_namespace *)com->lc_file_ram;
+       int                      save  = len;
+       int                      ret   = -ENOSPC;
+       int                      rc;
+
+       down_read(&com->lc_sem);
+       rc = snprintf(buf, len,
+                     "name: lfsck_namespace\n"
+                     "magic: 0x%x\n"
+                     "version: %d\n"
+                     "status: %s\n",
+                     ns->ln_magic,
+                     bk->lb_version,
+                     lfsck_status_names[ns->ln_status]);
+       if (rc <= 0)
+               goto out;
+
+       buf += rc;
+       len -= rc;
+       rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names,
+                            "flags");
+       if (rc < 0)
+               goto out;
+
+       rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
+                            "param");
+       if (rc < 0)
+               goto out;
+
+       rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete,
+                            "time_since_last_completed");
+       if (rc < 0)
+               goto out;
+
+       rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start,
+                            "time_since_latest_start");
+       if (rc < 0)
+               goto out;
+
+       rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint,
+                            "time_since_last_checkpoint");
+       if (rc < 0)
+               goto out;
+
+       rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start,
+                           "latest_start_position");
+       if (rc < 0)
+               goto out;
+
+       rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint,
+                           "last_checkpoint_position");
+       if (rc < 0)
+               goto out;
+
+       rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent,
+                           "first_failure_position");
+       if (rc < 0)
+               goto out;
+
+       if (ns->ln_status == LS_SCANNING_PHASE1) {
+               struct lfsck_position pos;
+               cfs_duration_t duration = cfs_time_current() -
+                                         lfsck->ml_time_last_checkpoint;
+               __u64 checked = ns->ln_items_checked + com->lc_new_checked;
+               __u64 speed = checked;
+               __u64 new_checked = com->lc_new_checked * CFS_HZ;
+               __u32 rtime = ns->ln_run_time_phase1 +
+                             cfs_duration_sec(duration + HALF_SEC);
+
+               if (duration != 0)
+                       do_div(new_checked, duration);
+               if (rtime != 0)
+                       do_div(speed, rtime);
+               rc = snprintf(buf, len,
+                             "checked_phase1: "LPU64"\n"
+                             "checked_phase2: "LPU64"\n"
+                             "updated_phase1: "LPU64"\n"
+                             "updated_phase2: "LPU64"\n"
+                             "failed_phase1: "LPU64"\n"
+                             "failed_phase2: "LPU64"\n"
+                             "dirs: "LPU64"\n"
+                             "M-linked: "LPU64"\n"
+                             "nlinks_repaired: "LPU64"\n"
+                             "lost_found: "LPU64"\n"
+                             "success_count: %u\n"
+                             "run_time_phase1: %u seconds\n"
+                             "run_time_phase2: %u seconds\n"
+                             "average_speed_phase1: "LPU64" items/sec\n"
+                             "average_speed_phase2: N/A\n"
+                             "real-time_speed_phase1: "LPU64" items/sec\n"
+                             "real-time_speed_phase2: N/A\n",
+                             checked,
+                             ns->ln_objs_checked_phase2,
+                             ns->ln_items_repaired,
+                             ns->ln_objs_repaired_phase2,
+                             ns->ln_items_failed,
+                             ns->ln_objs_failed_phase2,
+                             ns->ln_dirs_checked,
+                             ns->ln_mlinked_checked,
+                             ns->ln_objs_nlink_repaired,
+                             ns->ln_objs_lost_found,
+                             ns->ln_success_count,
+                             rtime,
+                             ns->ln_run_time_phase2,
+                             speed,
+                             new_checked);
+               if (rc <= 0)
+                       goto out;
+
+               buf += rc;
+               len -= rc;
+               mdd_lfsck_pos_fill(env, lfsck, &pos, false);
+               rc = lfsck_pos_dump(&buf, &len, &pos, "current_position");
+               if (rc <= 0)
+                       goto out;
+       } else if (ns->ln_status == LS_SCANNING_PHASE2) {
+               cfs_duration_t duration = cfs_time_current() -
+                                         lfsck->ml_time_last_checkpoint;
+               __u64 checked = ns->ln_objs_checked_phase2 +
+                               com->lc_new_checked;
+               __u64 speed1 = ns->ln_items_checked;
+               __u64 speed2 = checked;
+               __u64 new_checked = com->lc_new_checked * CFS_HZ;
+               __u32 rtime = ns->ln_run_time_phase2 +
+                             cfs_duration_sec(duration + HALF_SEC);
+
+               if (duration != 0)
+                       do_div(new_checked, duration);
+               if (ns->ln_run_time_phase1 != 0)
+                       do_div(speed1, ns->ln_run_time_phase1);
+               if (rtime != 0)
+                       do_div(speed2, rtime);
+               rc = snprintf(buf, len,
+                             "checked_phase1: "LPU64"\n"
+                             "checked_phase2: "LPU64"\n"
+                             "updated_phase1: "LPU64"\n"
+                             "updated_phase2: "LPU64"\n"
+                             "failed_phase1: "LPU64"\n"
+                             "failed_phase2: "LPU64"\n"
+                             "dirs: "LPU64"\n"
+                             "M-linked: "LPU64"\n"
+                             "nlinks_repaired: "LPU64"\n"
+                             "lost_found: "LPU64"\n"
+                             "success_count: %u\n"
+                             "run_time_phase1: %u seconds\n"
+                             "run_time_phase2: %u seconds\n"
+                             "average_speed_phase1: "LPU64" items/sec\n"
+                             "average_speed_phase2: "LPU64" objs/sec\n"
+                             "real-time_speed_phase1: N/A\n"
+                             "real-time_speed_phase2: "LPU64" objs/sec\n"
+                             "current_position: "DFID"\n",
+                             ns->ln_items_checked,
+                             checked,
+                             ns->ln_items_repaired,
+                             ns->ln_objs_repaired_phase2,
+                             ns->ln_items_failed,
+                             ns->ln_objs_failed_phase2,
+                             ns->ln_dirs_checked,
+                             ns->ln_mlinked_checked,
+                             ns->ln_objs_nlink_repaired,
+                             ns->ln_objs_lost_found,
+                             ns->ln_success_count,
+                             ns->ln_run_time_phase1,
+                             rtime,
+                             speed1,
+                             speed2,
+                             new_checked,
+                             PFID(&ns->ln_fid_latest_scanned_phase2));
+               if (rc <= 0)
+                       goto out;
+
+               buf += rc;
+               len -= rc;
+       } else {
+               __u64 speed1 = ns->ln_items_checked;
+               __u64 speed2 = ns->ln_objs_checked_phase2;
+
+               if (ns->ln_run_time_phase1 != 0)
+                       do_div(speed1, ns->ln_run_time_phase1);
+               if (ns->ln_run_time_phase2 != 0)
+                       do_div(speed2, ns->ln_run_time_phase2);
+               rc = snprintf(buf, len,
+                             "checked_phase1: "LPU64"\n"
+                             "checked_phase2: "LPU64"\n"
+                             "updated_phase1: "LPU64"\n"
+                             "updated_phase2: "LPU64"\n"
+                             "failed_phase1: "LPU64"\n"
+                             "failed_phase2: "LPU64"\n"
+                             "dirs: "LPU64"\n"
+                             "M-linked: "LPU64"\n"
+                             "nlinks_repaired: "LPU64"\n"
+                             "lost_found: "LPU64"\n"
+                             "success_count: %u\n"
+                             "run_time_phase1: %u seconds\n"
+                             "run_time_phase2: %u seconds\n"
+                             "average_speed_phase1: "LPU64" items/sec\n"
+                             "average_speed_phase2: "LPU64" objs/sec\n"
+                             "real-time_speed_phase1: N/A\n"
+                             "real-time_speed_phase2: N/A\n"
+                             "current_position: N/A\n",
+                             ns->ln_items_checked,
+                             ns->ln_objs_checked_phase2,
+                             ns->ln_items_repaired,
+                             ns->ln_objs_repaired_phase2,
+                             ns->ln_items_failed,
+                             ns->ln_objs_failed_phase2,
+                             ns->ln_dirs_checked,
+                             ns->ln_mlinked_checked,
+                             ns->ln_objs_nlink_repaired,
+                             ns->ln_objs_lost_found,
+                             ns->ln_success_count,
+                             ns->ln_run_time_phase1,
+                             ns->ln_run_time_phase2,
+                             speed1,
+                             speed2);
+               if (rc <= 0)
+                       goto out;
+
+               buf += rc;
+               len -= rc;
+       }
+       ret = save - len;
+
+out:
+       up_read(&com->lc_sem);
+       return ret;
+}
+
+static int mdd_lfsck_namespace_double_scan(const struct lu_env *env,
+                                          struct lfsck_component *com)
+{
+       struct md_lfsck         *lfsck  = com->lc_lfsck;
+       struct ptlrpc_thread    *thread = &lfsck->ml_thread;
+       struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
+       struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
+       struct lfsck_namespace  *ns     =
+                               (struct lfsck_namespace *)com->lc_file_ram;
+       struct dt_object        *obj    = com->lc_obj;
+       const struct dt_it_ops  *iops   = &obj->do_index_ops->dio_it;
+       struct mdd_object       *target;
+       struct dt_it            *di;
+       struct dt_key           *key;
+       struct lu_fid            fid;
+       int                      rc;
+       __u8                     flags;
+       ENTRY;
+
+       lfsck->ml_new_scanned = 0;
+       lfsck->ml_time_last_checkpoint = cfs_time_current();
+       lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+                               cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+
+       di = iops->init(env, obj, 0, BYPASS_CAPA);
+       if (IS_ERR(di))
+               RETURN(PTR_ERR(di));
+
+       fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
+       rc = iops->get(env, di, (const struct dt_key *)&fid);
+       if (rc < 0)
+               GOTO(fini, rc);
+
+       /* Skip the start one, which either has been processed or non-exist. */
+       rc = iops->next(env, di);
+       if (rc != 0)
+               GOTO(put, rc);
+
+       if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN))
+               GOTO(put, rc = 0);
+
+       do {
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) &&
+                   cfs_fail_val > 0) {
+                       struct l_wait_info lwi;
+
+                       lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
+                                         NULL, NULL);
+                       l_wait_event(thread->t_ctl_waitq,
+                                    !thread_is_running(thread),
+                                    &lwi);
+               }
+
+               key = iops->key(env, di);
+               fid_be_to_cpu(&fid, (const struct lu_fid *)key);
+               target = mdd_object_find(env, mdd, &fid);
+               down_write(&com->lc_sem);
+               if (target == NULL) {
+                       rc = 0;
+                       goto checkpoint;
+               } else if (IS_ERR(target)) {
+                       rc = PTR_ERR(target);
+                       goto checkpoint;
+               }
+
+               /* XXX: Currently, skip remote object, the consistency for
+                *      remote object will be processed in LFSCK phase III. */
+               if (!mdd_object_exists(target) || mdd_object_remote(target))
+                       goto obj_put;
+
+               rc = iops->rec(env, di, (struct dt_rec *)&flags, 0);
+               if (rc == 0)
+                       rc = mdd_lfsck_namespace_double_scan_one(env, com,
+                                                                target, flags);
+
+obj_put:
+               mdd_object_put(env, target);
+
+checkpoint:
+               lfsck->ml_new_scanned++;
+               com->lc_new_checked++;
+               ns->ln_fid_latest_scanned_phase2 = fid;
+               if (rc > 0)
+                       ns->ln_objs_repaired_phase2++;
+               else if (rc < 0)
+                       ns->ln_objs_failed_phase2++;
+               up_write(&com->lc_sem);
+
+               if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) {
+                       mdd_lfsck_namespace_delete(env, com, &fid);
+               } else if (rc < 0) {
+                       flags |= LLF_REPAIR_FAILED;
+                       mdd_lfsck_namespace_update(env, com, &fid, flags, true);
+               }
+
+               if (rc < 0 && bk->lb_param & LPF_FAILOUT)
+                       GOTO(put, rc);
+
+               if (likely(cfs_time_beforeq(cfs_time_current(),
+                                           lfsck->ml_time_next_checkpoint)) ||
+                   com->lc_new_checked == 0)
+                       goto speed;
+
+               down_write(&com->lc_sem);
+               ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->ml_time_last_checkpoint);
+               ns->ln_time_last_checkpoint = cfs_time_current_sec();
+               ns->ln_objs_checked_phase2 += com->lc_new_checked;
+               com->lc_new_checked = 0;
+               rc = mdd_lfsck_namespace_store(env, com, false);
+               up_write(&com->lc_sem);
+               if (rc != 0)
+                       GOTO(put, rc);
+
+               lfsck->ml_time_last_checkpoint = cfs_time_current();
+               lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+                               cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+
+speed:
+               mdd_lfsck_control_speed(lfsck);
+               if (unlikely(!thread_is_running(thread)))
+                       GOTO(put, rc = 0);
+
+               rc = iops->next(env, di);
+       } while (rc == 0);
+
+       GOTO(put, rc);
+
+put:
+       iops->put(env, di);
+
+fini:
+       iops->fini(env, di);
+       down_write(&com->lc_sem);
+
+       ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->ml_time_last_checkpoint);
+       ns->ln_time_last_checkpoint = cfs_time_current_sec();
+       ns->ln_objs_checked_phase2 += com->lc_new_checked;
+       com->lc_new_checked = 0;
+
+       if (rc > 0) {
+               com->lc_journal = 0;
+               ns->ln_status = LS_COMPLETED;
+               if (!(bk->lb_param & LPF_DRYRUN))
+                       ns->ln_flags &=
+                       ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
+               ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
+               ns->ln_success_count++;
+       } else if (rc == 0) {
+               if (lfsck->ml_paused)
+                       ns->ln_status = LS_PAUSED;
+               else
+                       ns->ln_status = LS_STOPPED;
+       } else {
+               ns->ln_status = LS_FAILED;
+       }
+
+       if (ns->ln_status != LS_PAUSED) {
+               spin_lock(&lfsck->ml_lock);
+               cfs_list_del_init(&com->lc_link);
+               cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
+               spin_unlock(&lfsck->ml_lock);
+       }
+
+       rc = mdd_lfsck_namespace_store(env, com, false);
+
+       up_write(&com->lc_sem);
+       return rc;
+}
+
+static struct lfsck_operations mdd_lfsck_namespace_ops = {
+       .lfsck_reset            = mdd_lfsck_namespace_reset,
+       .lfsck_fail             = mdd_lfsck_namespace_fail,
+       .lfsck_checkpoint       = mdd_lfsck_namespace_checkpoint,
+       .lfsck_prep             = mdd_lfsck_namespace_prep,
+       .lfsck_exec_oit         = mdd_lfsck_namespace_exec_oit,
+       .lfsck_exec_dir         = mdd_lfsck_namespace_exec_dir,
+       .lfsck_post             = mdd_lfsck_namespace_post,
+       .lfsck_dump             = mdd_lfsck_namespace_dump,
+       .lfsck_double_scan      = mdd_lfsck_namespace_double_scan,
+};
+
+/* LFSCK component setup/cleanup functions */
+
+static int mdd_lfsck_namespace_setup(const struct lu_env *env,
+                                    struct md_lfsck *lfsck)
+{
+       struct mdd_device       *mdd = mdd_lfsck2mdd(lfsck);
+       struct lfsck_component  *com;
+       struct lfsck_namespace  *ns;
+       struct dt_object        *obj, *root;
+       int                      rc;
+       ENTRY;
+
+       OBD_ALLOC_PTR(com);
+       if (com == NULL)
+               RETURN(-ENOMEM);
+
+       CFS_INIT_LIST_HEAD(&com->lc_link);
+       CFS_INIT_LIST_HEAD(&com->lc_link_dir);
+       init_rwsem(&com->lc_sem);
+       atomic_set(&com->lc_ref, 1);
+       com->lc_lfsck = lfsck;
+       com->lc_type = LT_NAMESPACE;
+       com->lc_ops = &mdd_lfsck_namespace_ops;
+       com->lc_file_size = sizeof(struct lfsck_namespace);
+       OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
+       if (com->lc_file_ram == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
+       if (com->lc_file_disk == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid);
+       if (unlikely(IS_ERR(root)))
+               GOTO(out, rc = PTR_ERR(root));
+
+       obj = local_index_find_or_create(env, mdd->mdd_los, root,
+                                        lfsck_namespace_name,
+                                        S_IFREG | S_IRUGO | S_IWUSR,
+                                        &dt_lfsck_features);
+       lu_object_put(env, &root->do_lu);
+       if (IS_ERR(obj))
+               GOTO(out, rc = PTR_ERR(obj));
+
+       com->lc_obj = obj;
+       rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_features);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       rc = mdd_lfsck_namespace_load(env, com);
+       if (rc > 0)
+               rc = mdd_lfsck_namespace_reset(env, com, true);
+       else if (rc == -ENODATA)
+               rc = mdd_lfsck_namespace_init(env, com);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       ns = (struct lfsck_namespace *)com->lc_file_ram;
+       switch (ns->ln_status) {
+       case LS_INIT:
+       case LS_COMPLETED:
+       case LS_FAILED:
+       case LS_STOPPED:
+               cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
+               break;
+       default:
+               CERROR("%s: unknown status: %u\n",
+                      mdd_lfsck2name(lfsck), ns->ln_status);
+               /* fall through */
+       case LS_SCANNING_PHASE1:
+       case LS_SCANNING_PHASE2:
+               /* No need to store the status to disk right now.
+                * If the system crashed before the status stored,
+                * it will be loaded back when next time. */
+               ns->ln_status = LS_CRASHED;
+               /* fall through */
+       case LS_PAUSED:
+       case LS_CRASHED:
+               cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_scan);
+               cfs_list_add_tail(&com->lc_link_dir, &lfsck->ml_list_dir);
+               break;
+       }
+
+       GOTO(out, rc = 0);
+
+out:
+       if (rc != 0)
+               mdd_lfsck_component_cleanup(env, com);
+       return rc;
+}
+
+/* helper functions for framework */
+
+static int object_needs_lfsck(const struct lu_env *env, struct mdd_device *mdd,
+                             struct mdd_object *obj)
+{
+       struct lu_fid *fid   = &mdd_env_info(env)->mti_fid;
+       int            depth = 0;
+       int            rc;
+
+       LASSERT(S_ISDIR(mdd_object_type(obj)));
+
+       while (1) {
+               if (mdd_is_root(mdd, mdo2fid(obj))) {
+                       if (depth > 0)
+                               mdd_object_put(env, obj);
+                       return 1;
+               }
+
+               /* .lustre doesn't contain "real" user objects, no need lfsck */
+               if (fid_is_dot_lustre(mdo2fid(obj))) {
+                       if (depth > 0)
+                               mdd_object_put(env, obj);
+                       return 0;
+               }
+
+               mdd_read_lock(env, obj, MOR_TGT_CHILD);
+               if (unlikely(mdd_is_dead_obj(obj))) {
+                       mdd_read_unlock(env, obj);
+                       if (depth > 0)
+                               mdd_object_put(env, obj);
+                       return 0;
+               }
+
+               rc = dt_xattr_get(env, mdd_object_child(obj),
+                                 mdd_buf_get(env, NULL, 0), XATTR_NAME_LINK,
+                                 BYPASS_CAPA);
+               mdd_read_unlock(env, obj);
+               if (rc >= 0) {
+                       if (depth > 0)
+                               mdd_object_put(env, obj);
+                       return 1;
+               }
+
+               if (rc < 0 && rc != -ENODATA) {
+                       if (depth > 0)
+                               mdd_object_put(env, obj);
+                       return rc;
+               }
+
+               rc = mdd_parent_fid(env, obj, fid);
+               if (depth > 0)
+                       mdd_object_put(env, obj);
+               if (rc != 0)
+                       return rc;
+
+               if (unlikely(lu_fid_eq(fid, &mdd->mdd_local_root_fid)))
+                       return 0;
+
+               obj = mdd_object_find(env, mdd, fid);
+               if (obj == NULL)
+                       return 0;
+               else if (IS_ERR(obj))
+                       return PTR_ERR(obj);
+
+               if (!mdd_object_exists(obj)) {
+                       mdd_object_put(env, obj);
+                       return 0;
+               }
+
+               /* Currently, only client visible directory can be remote. */
+               if (mdd_object_remote(obj)) {
+                       mdd_object_put(env, obj);
+                       return 1;
+               }
+
+               depth++;
+       }
+       return 0;
+}
+
+static void mdd_lfsck_unpack_ent(struct lu_dirent *ent)
+{
+       fid_le_to_cpu(&ent->lde_fid, &ent->lde_fid);
+       ent->lde_hash = le64_to_cpu(ent->lde_hash);
+       ent->lde_reclen = le16_to_cpu(ent->lde_reclen);
+       ent->lde_namelen = le16_to_cpu(ent->lde_namelen);
+       ent->lde_attrs = le32_to_cpu(ent->lde_attrs);
+
+       /* Make sure the name is terminated with '0'.
+        * The data (type) after ent::lde_name maybe
+        * broken, but we do not care. */
+       ent->lde_name[ent->lde_namelen] = 0;
+}
+
+/* LFSCK wrap functions */
+
+static void mdd_lfsck_fail(const struct lu_env *env, struct md_lfsck *lfsck,
+                          bool new_checked)
+{
+       struct lfsck_component *com;
+
+       cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
+               com->lc_ops->lfsck_fail(env, com, new_checked);
+       }
+}
+
+static int mdd_lfsck_checkpoint(const struct lu_env *env,
+                               struct md_lfsck *lfsck)
+{
+       struct lfsck_component *com;
+       int                     rc;
+
+       if (likely(cfs_time_beforeq(cfs_time_current(),
+                                   lfsck->ml_time_next_checkpoint)))
+               return 0;
+
+       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false);
+       cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
+               rc = com->lc_ops->lfsck_checkpoint(env, com, false);
+               if (rc != 0)
+                       return rc;;
+       }
+
+       lfsck->ml_time_last_checkpoint = cfs_time_current();
+       lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+                               cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+       return 0;
+}
+
+static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck)
+{
+       struct mdd_device      *mdd     = mdd_lfsck2mdd(lfsck);
+       struct mdd_object      *obj     = NULL;
+       struct dt_object       *dt_obj;
+       struct lfsck_component *com;
+       struct lfsck_component *next;
+       struct lfsck_position  *pos     = NULL;
+       const struct dt_it_ops *iops    =
+                               &lfsck->ml_obj_oit->do_index_ops->dio_it;
+       struct dt_it           *di;
+       int                     rc;
+       ENTRY;
+
+       LASSERT(lfsck->ml_obj_dir == NULL);
+       LASSERT(lfsck->ml_di_dir == NULL);
+
+       lfsck->ml_current_oit_processed = 0;
+       cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
+               com->lc_new_checked = 0;
+               if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
+                       com->lc_journal = 0;
+
+               rc = com->lc_ops->lfsck_prep(env, com);
+               if (rc != 0)
+                       RETURN(rc);
+
+               if ((pos == NULL) ||
+                   (!mdd_lfsck_pos_is_zero(&com->lc_pos_start) &&
+                    mdd_lfsck_pos_is_eq(pos, &com->lc_pos_start) > 0))
+                       pos = &com->lc_pos_start;
+       }
+
+       /* Init otable-based iterator. */
+       if (pos == NULL) {
+               rc = iops->load(env, lfsck->ml_di_oit, 0);
+               if (rc > 0) {
+                       lfsck->ml_oit_over = 1;
+                       rc = 0;
+               }
+
+               GOTO(out, rc);
+       }
+
+       rc = iops->load(env, lfsck->ml_di_oit, pos->lp_oit_cookie);
+       if (rc < 0)
+               GOTO(out, rc);
+       else if (rc > 0)
+               lfsck->ml_oit_over = 1;
+
+       if (fid_is_zero(&pos->lp_dir_parent))
+               GOTO(out, rc = 0);
+
+       /* Find the directory for namespace-based traverse. */
+       obj = mdd_object_find(env, mdd, &pos->lp_dir_parent);
+       if (obj == NULL)
+               GOTO(out, rc = 0);
+       else if (IS_ERR(obj))
+               RETURN(PTR_ERR(obj));
+
+       /* XXX: Currently, skip remote object, the consistency for
+        *      remote object will be processed in LFSCK phase III. */
+       if (!mdd_object_exists(obj) || mdd_object_remote(obj) ||
+           unlikely(!S_ISDIR(mdd_object_type(obj))))
+               GOTO(out, rc = 0);
+
+       if (unlikely(mdd_is_dead_obj(obj)))
+               GOTO(out, rc = 0);
+
+       dt_obj = mdd_object_child(obj);
+       if (unlikely(!dt_try_as_dir(env, dt_obj)))
+               GOTO(out, rc = -ENOTDIR);
+
+       /* Init the namespace-based directory traverse. */
+       iops = &dt_obj->do_index_ops->dio_it;
+       di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
+       if (IS_ERR(di))
+               GOTO(out, rc = PTR_ERR(di));
+
+       LASSERT(pos->lp_dir_cookie < MDS_DIR_END_OFF);
+
+       rc = iops->load(env, di, pos->lp_dir_cookie);
+       if ((rc == 0) || (rc > 0 && pos->lp_dir_cookie > 0))
+               rc = iops->next(env, di);
+       else if (rc > 0)
+               rc = 0;
+
+       if (rc != 0) {
+               iops->put(env, di);
+               iops->fini(env, di);
+               GOTO(out, rc);
+       }
+
+       lfsck->ml_obj_dir = dt_obj;
+       spin_lock(&lfsck->ml_lock);
+       lfsck->ml_di_dir = di;
+       spin_unlock(&lfsck->ml_lock);
+       obj = NULL;
+
+       GOTO(out, rc = 0);
+
+out:
+       if (obj != NULL)
+               mdd_object_put(env, obj);
+
+       if (rc < 0) {
+               cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan,
+                                            lc_link)
+                       com->lc_ops->lfsck_post(env, com, rc, true);
 
-       mdd = container_of0(lfsck, struct mdd_device, mdd_lfsck);
-       return mdd->mdd_obd_dev->obd_name;
+               return rc;
+       }
+
+       rc = 0;
+       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true);
+       cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
+               rc = com->lc_ops->lfsck_checkpoint(env, com, true);
+               if (rc != 0)
+                       break;
+       }
+
+       lfsck->ml_time_last_checkpoint = cfs_time_current();
+       lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+                               cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+       return rc;
 }
 
-void mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit)
+static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck,
+                             struct mdd_object *obj)
 {
-       cfs_spin_lock(&lfsck->ml_lock);
-       lfsck->ml_speed_limit = limit;
-       if (limit != LFSCK_SPEED_NO_LIMIT) {
-               if (limit > CFS_HZ) {
-                       lfsck->ml_sleep_rate = limit / CFS_HZ;
-                       lfsck->ml_sleep_jif = 1;
-               } else {
-                       lfsck->ml_sleep_rate = 1;
-                       lfsck->ml_sleep_jif = CFS_HZ / limit;
-               }
-       } else {
-               lfsck->ml_sleep_jif = 0;
-               lfsck->ml_sleep_rate = 0;
+       struct lfsck_component *com;
+       struct dt_object       *dt_obj;
+       const struct dt_it_ops *iops;
+       struct dt_it           *di;
+       int                     rc;
+       ENTRY;
+
+       LASSERT(lfsck->ml_obj_dir == NULL);
+
+       cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
+               rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
+               if (rc != 0)
+                       RETURN(rc);
+       }
+
+       if (!S_ISDIR(mdd_object_type(obj)) ||
+           cfs_list_empty(&lfsck->ml_list_dir))
+              RETURN(0);
+
+       rc = object_needs_lfsck(env, mdd_lfsck2mdd(lfsck), obj);
+       if (rc <= 0)
+               GOTO(out, rc);
+
+       if (unlikely(mdd_is_dead_obj(obj)))
+               GOTO(out, rc = 0);
+
+       dt_obj = mdd_object_child(obj);
+       if (unlikely(!dt_try_as_dir(env, dt_obj)))
+               GOTO(out, rc = -ENOTDIR);
+
+       iops = &dt_obj->do_index_ops->dio_it;
+       di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
+       if (IS_ERR(di))
+               GOTO(out, rc = PTR_ERR(di));
+
+       rc = iops->load(env, di, 0);
+       if (rc == 0)
+               rc = iops->next(env, di);
+       else if (rc > 0)
+               rc = 0;
+
+       if (rc != 0) {
+               iops->put(env, di);
+               iops->fini(env, di);
+               GOTO(out, rc);
+       }
+
+       mdd_object_get(obj);
+       lfsck->ml_obj_dir = dt_obj;
+       spin_lock(&lfsck->ml_lock);
+       lfsck->ml_di_dir = di;
+       spin_unlock(&lfsck->ml_lock);
+
+       GOTO(out, rc = 0);
+
+out:
+       if (rc < 0)
+               mdd_lfsck_fail(env, lfsck, false);
+       return (rc > 0 ? 0 : rc);
+}
+
+static int mdd_lfsck_exec_dir(const struct lu_env *env, struct md_lfsck *lfsck,
+                             struct mdd_object *obj, struct lu_dirent *ent)
+{
+       struct lfsck_component *com;
+       int                     rc;
+
+       cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
+               rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
+               if (rc != 0)
+                       return rc;
        }
-       cfs_spin_unlock(&lfsck->ml_lock);
+       return 0;
 }
 
-static void mdd_lfsck_control_speed(struct md_lfsck *lfsck)
+static int mdd_lfsck_post(const struct lu_env *env, struct md_lfsck *lfsck,
+                         int result)
 {
-       struct ptlrpc_thread *thread = &lfsck->ml_thread;
-       struct l_wait_info    lwi;
+       struct lfsck_component *com;
+       struct lfsck_component *next;
+       int                     rc;
 
-       if (lfsck->ml_sleep_jif > 0 &&
-           lfsck->ml_new_scanned >= lfsck->ml_sleep_rate) {
-               cfs_spin_lock(&lfsck->ml_lock);
-               if (likely(lfsck->ml_sleep_jif > 0 &&
-                          lfsck->ml_new_scanned >= lfsck->ml_sleep_rate)) {
-                       lwi = LWI_TIMEOUT_INTR(lfsck->ml_sleep_jif, NULL,
-                                              LWI_ON_SIGNAL_NOOP, NULL);
-                       cfs_spin_unlock(&lfsck->ml_lock);
+       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false);
+       cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
+               rc = com->lc_ops->lfsck_post(env, com, result, false);
+               if (rc != 0)
+                       return rc;
+       }
+
+       lfsck->ml_time_last_checkpoint = cfs_time_current();
+       lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+                               cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+       return result;
+}
+
+static int mdd_lfsck_double_scan(const struct lu_env *env,
+                                struct md_lfsck *lfsck)
+{
+       struct lfsck_component *com;
+       struct lfsck_component *next;
+       int                     rc;
+
+       cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_double_scan,
+                                    lc_link) {
+               if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
+                       com->lc_journal = 0;
+
+               rc = com->lc_ops->lfsck_double_scan(env, com);
+               if (rc != 0)
+                       return rc;
+       }
+       return 0;
+}
+
+/* LFSCK engines */
+
+static int mdd_lfsck_dir_engine(const struct lu_env *env,
+                               struct md_lfsck *lfsck)
+{
+       struct mdd_thread_info  *info   = mdd_env_info(env);
+       struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
+       const struct dt_it_ops  *iops   =
+                       &lfsck->ml_obj_dir->do_index_ops->dio_it;
+       struct dt_it            *di     = lfsck->ml_di_dir;
+       struct lu_dirent        *ent    = &info->mti_ent;
+       struct lu_fid           *fid    = &info->mti_fid;
+       struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
+       struct ptlrpc_thread    *thread = &lfsck->ml_thread;
+       int                      rc;
+       ENTRY;
+
+       do {
+               struct mdd_object *child;
 
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY2) &&
+                   cfs_fail_val > 0) {
+                       struct l_wait_info lwi;
+
+                       lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
+                                         NULL, NULL);
                        l_wait_event(thread->t_ctl_waitq,
                                     !thread_is_running(thread),
                                     &lwi);
-                       lfsck->ml_new_scanned = 0;
-               } else {
-                       cfs_spin_unlock(&lfsck->ml_lock);
                }
-       }
+
+               lfsck->ml_new_scanned++;
+               rc = iops->rec(env, di, (struct dt_rec *)ent,
+                              lfsck->ml_args_dir);
+               if (rc != 0) {
+                       mdd_lfsck_fail(env, lfsck, true);
+                       if (bk->lb_param & LPF_FAILOUT)
+                               RETURN(rc);
+                       else
+                               goto checkpoint;
+               }
+
+               mdd_lfsck_unpack_ent(ent);
+               if (ent->lde_attrs & LUDA_IGNORE)
+                       goto checkpoint;
+
+               *fid = ent->lde_fid;
+               child = mdd_object_find(env, mdd, fid);
+               if (child == NULL) {
+                       goto checkpoint;
+               } else if (IS_ERR(child)) {
+                       mdd_lfsck_fail(env, lfsck, true);
+                       if (bk->lb_param & LPF_FAILOUT)
+                               RETURN(PTR_ERR(child));
+                       else
+                               goto checkpoint;
+               }
+
+               /* XXX: Currently, skip remote object, the consistency for
+                *      remote object will be processed in LFSCK phase III. */
+               if (mdd_object_exists(child) && !mdd_object_remote(child))
+                       rc = mdd_lfsck_exec_dir(env, lfsck, child, ent);
+               mdd_object_put(env, child);
+               if (rc != 0 && bk->lb_param & LPF_FAILOUT)
+                       RETURN(rc);
+
+checkpoint:
+               rc = mdd_lfsck_checkpoint(env, lfsck);
+               if (rc != 0 && bk->lb_param & LPF_FAILOUT)
+                       RETURN(rc);
+
+               /* Rate control. */
+               mdd_lfsck_control_speed(lfsck);
+               if (unlikely(!thread_is_running(thread)))
+                       RETURN(0);
+
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL2)) {
+                       spin_lock(&lfsck->ml_lock);
+                       thread_set_flags(thread, SVC_STOPPING);
+                       spin_unlock(&lfsck->ml_lock);
+                       RETURN(-EINVAL);
+               }
+
+               rc = iops->next(env, di);
+       } while (rc == 0);
+
+       if (rc > 0 && !lfsck->ml_oit_over)
+               mdd_lfsck_close_dir(env, lfsck);
+
+       RETURN(rc);
+}
+
+static int mdd_lfsck_oit_engine(const struct lu_env *env,
+                               struct md_lfsck *lfsck)
+{
+       struct mdd_thread_info  *info   = mdd_env_info(env);
+       struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
+       const struct dt_it_ops  *iops   =
+                               &lfsck->ml_obj_oit->do_index_ops->dio_it;
+       struct dt_it            *di     = lfsck->ml_di_oit;
+       struct lu_fid           *fid    = &info->mti_fid;
+       struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
+       struct ptlrpc_thread    *thread = &lfsck->ml_thread;
+       int                      rc;
+       ENTRY;
+
+       do {
+               struct mdd_object *target;
+
+               if (lfsck->ml_di_dir != NULL) {
+                       rc = mdd_lfsck_dir_engine(env, lfsck);
+                       if (rc <= 0)
+                               RETURN(rc);
+               }
+
+               if (unlikely(lfsck->ml_oit_over))
+                       RETURN(1);
+
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY1) &&
+                   cfs_fail_val > 0) {
+                       struct l_wait_info lwi;
+
+                       lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
+                                         NULL, NULL);
+                       l_wait_event(thread->t_ctl_waitq,
+                                    !thread_is_running(thread),
+                                    &lwi);
+               }
+
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
+                       RETURN(0);
+
+               lfsck->ml_current_oit_processed = 1;
+               lfsck->ml_new_scanned++;
+               rc = iops->rec(env, di, (struct dt_rec *)fid, 0);
+               if (rc != 0) {
+                       mdd_lfsck_fail(env, lfsck, true);
+                       if (bk->lb_param & LPF_FAILOUT)
+                               RETURN(rc);
+                       else
+                               goto checkpoint;
+               }
+
+               target = mdd_object_find(env, mdd, fid);
+               if (target == NULL) {
+                       goto checkpoint;
+               } else if (IS_ERR(target)) {
+                       mdd_lfsck_fail(env, lfsck, true);
+                       if (bk->lb_param & LPF_FAILOUT)
+                               RETURN(PTR_ERR(target));
+                       else
+                               goto checkpoint;
+               }
+
+               /* XXX: Currently, skip remote object, the consistency for
+                *      remote object will be processed in LFSCK phase III. */
+               if (mdd_object_exists(target) && !mdd_object_remote(target))
+                       rc = mdd_lfsck_exec_oit(env, lfsck, target);
+               mdd_object_put(env, target);
+               if (rc != 0 && bk->lb_param & LPF_FAILOUT)
+                       RETURN(rc);
+
+checkpoint:
+               rc = mdd_lfsck_checkpoint(env, lfsck);
+               if (rc != 0 && bk->lb_param & LPF_FAILOUT)
+                       RETURN(rc);
+
+               /* Rate control. */
+               mdd_lfsck_control_speed(lfsck);
+
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL1)) {
+                       spin_lock(&lfsck->ml_lock);
+                       thread_set_flags(thread, SVC_STOPPING);
+                       spin_unlock(&lfsck->ml_lock);
+                       RETURN(-EINVAL);
+               }
+
+               rc = iops->next(env, di);
+               if (unlikely(rc > 0))
+                       lfsck->ml_oit_over = 1;
+               else if (likely(rc == 0))
+                       lfsck->ml_current_oit_processed = 0;
+
+               if (unlikely(!thread_is_running(thread)))
+                       RETURN(0);
+       } while (rc == 0 || lfsck->ml_di_dir != NULL);
+
+       RETURN(rc);
 }
 
 static int mdd_lfsck_main(void *args)
 {
        struct lu_env            env;
-       struct md_lfsck         *lfsck  = (struct md_lfsck *)args;
-       struct ptlrpc_thread    *thread = &lfsck->ml_thread;
-       struct dt_object        *obj    = lfsck->ml_it_obj;
-       const struct dt_it_ops  *iops   = &obj->do_index_ops->dio_it;
-       struct dt_it            *di;
-       struct lu_fid           *fid;
+       struct md_lfsck         *lfsck    = (struct md_lfsck *)args;
+       struct ptlrpc_thread    *thread   = &lfsck->ml_thread;
+       struct dt_object        *oit_obj  = lfsck->ml_obj_oit;
+       const struct dt_it_ops  *oit_iops = &oit_obj->do_index_ops->dio_it;
+       struct dt_it            *oit_di;
        int                      rc;
        ENTRY;
 
@@ -114,134 +2544,262 @@ static int mdd_lfsck_main(void *args)
                GOTO(noenv, rc);
        }
 
-       di = iops->init(&env, obj, lfsck->ml_args, BYPASS_CAPA);
-       if (IS_ERR(di)) {
-               rc = PTR_ERR(di);
+       oit_di = oit_iops->init(&env, oit_obj, lfsck->ml_args_oit, BYPASS_CAPA);
+       if (IS_ERR(oit_di)) {
+               rc = PTR_ERR(oit_di);
                CERROR("%s: LFSCK, fail to init iteration, rc = %d\n",
                       mdd_lfsck2name(lfsck), rc);
                GOTO(fini_env, rc);
        }
 
-       CDEBUG(D_LFSCK, "LFSCK: flags = 0x%x, pid = %d\n",
-              lfsck->ml_args, cfs_curproc_pid());
+       spin_lock(&lfsck->ml_lock);
+       lfsck->ml_di_oit = oit_di;
+       spin_unlock(&lfsck->ml_lock);
+       rc = mdd_lfsck_prep(&env, lfsck);
+       if (rc != 0)
+               GOTO(fini_oit, rc);
 
-       /* XXX: Prepare before wakeup the sponsor.
-        *      Each lfsck component should call iops->get() API with
-        *      every bookmark, then low layer module can decide the
-        *      start point for current iteration. */
+       CDEBUG(D_LFSCK, "LFSCK entry: oit_flags = 0x%x, dir_flags = 0x%x, "
+              "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
+              ", pid = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
+              lfsck->ml_pos_current.lp_oit_cookie,
+              lfsck->ml_pos_current.lp_dir_cookie,
+              PFID(&lfsck->ml_pos_current.lp_dir_parent),
+              cfs_curproc_pid());
 
-       cfs_spin_lock(&lfsck->ml_lock);
+       spin_lock(&lfsck->ml_lock);
        thread_set_flags(thread, SVC_RUNNING);
-       cfs_spin_unlock(&lfsck->ml_lock);
+       spin_unlock(&lfsck->ml_lock);
        cfs_waitq_broadcast(&thread->t_ctl_waitq);
 
-       /* Call iops->load() to finish the choosing start point. */
-       rc = iops->load(&env, di, 0);
-       if (rc != 0)
-               GOTO(out, rc);
-
-       CDEBUG(D_LFSCK, "LFSCK: iteration start: pos = %s\n",
-              (char *)iops->key(&env, di));
+       if (!cfs_list_empty(&lfsck->ml_list_scan) ||
+           cfs_list_empty(&lfsck->ml_list_double_scan))
+               rc = mdd_lfsck_oit_engine(&env, lfsck);
+       else
+               rc = 1;
 
-       lfsck->ml_new_scanned = 0;
-       fid = &mdd_env_info(&env)->mti_fid;
-       while (rc == 0) {
-               iops->rec(&env, di, (struct dt_rec *)fid, 0);
+       CDEBUG(D_LFSCK, "LFSCK exit: oit_flags = 0x%x, dir_flags = 0x%x, "
+              "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
+              ", pid = %d, rc = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
+              lfsck->ml_pos_current.lp_oit_cookie,
+              lfsck->ml_pos_current.lp_dir_cookie,
+              PFID(&lfsck->ml_pos_current.lp_dir_parent),
+              cfs_curproc_pid(), rc);
 
-               /* XXX: here, perform LFSCK when some LFSCK component(s)
-                *      introduced in the future. */
-               lfsck->ml_new_scanned++;
+       if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan))
+               oit_iops->put(&env, oit_di);
 
-               /* XXX: here, make checkpoint when some LFSCK component(s)
-                *      introduced in the future. */
+       if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
+               rc = mdd_lfsck_post(&env, lfsck, rc);
+       if (lfsck->ml_di_dir != NULL)
+               mdd_lfsck_close_dir(&env, lfsck);
 
-               /* Rate control. */
-               mdd_lfsck_control_speed(lfsck);
-               if (unlikely(!thread_is_running(thread)))
-                       GOTO(out, rc = 0);
+fini_oit:
+       spin_lock(&lfsck->ml_lock);
+       lfsck->ml_di_oit = NULL;
+       spin_unlock(&lfsck->ml_lock);
 
-               rc = iops->next(&env, di);
+       oit_iops->fini(&env, oit_di);
+       if (rc == 1) {
+               if (!cfs_list_empty(&lfsck->ml_list_double_scan))
+                       rc = mdd_lfsck_double_scan(&env, lfsck);
+               else
+                       rc = 0;
        }
 
-       GOTO(out, rc);
-
-out:
-       if (lfsck->ml_paused) {
-               /* XXX: It is hack here: if the lfsck is still running when MDS
-                *      umounts, it should be restarted automatically after MDS
-                *      remounts up.
-                *
-                *      To support that, we need to record the lfsck status in
-                *      the lfsck on-disk bookmark file. But now, there is not
-                *      lfsck component under the lfsck framework. To avoid to
-                *      introduce nunecessary bookmark incompatibility issues,
-                *      we write nothing to the lfsck bookmark file now.
-                *
-                *      Instead, we will reuse dt_it_ops::put() method to notify
-                *      low layer iterator to process such case.
-                *
-                *      It is just temporary solution, and will be replaced when
-                *      some lfsck component is introduced in the future. */
-               iops->put(&env, di);
-               CDEBUG(D_LFSCK, "LFSCK: iteration pasued: pos = %s, rc = %d\n",
-                      (char *)iops->key(&env, di), rc);
-       } else {
-               CDEBUG(D_LFSCK, "LFSCK: iteration stop: pos = %s, rc = %d\n",
-                      (char *)iops->key(&env, di), rc);
-       }
-       iops->fini(&env, di);
+       /* XXX: Purge the pinned objects in the future. */
 
 fini_env:
        lu_env_fini(&env);
 
 noenv:
-       cfs_spin_lock(&lfsck->ml_lock);
+       spin_lock(&lfsck->ml_lock);
        thread_set_flags(thread, SVC_STOPPED);
        cfs_waitq_broadcast(&thread->t_ctl_waitq);
-       cfs_spin_unlock(&lfsck->ml_lock);
+       spin_unlock(&lfsck->ml_lock);
+       return rc;
+}
+
+/* external interfaces */
+
+int mdd_lfsck_set_speed(const struct lu_env *env, struct md_lfsck *lfsck,
+                       __u32 limit)
+{
+       int rc;
+
+       mutex_lock(&lfsck->ml_mutex);
+       __mdd_lfsck_set_speed(lfsck, limit);
+       rc = mdd_lfsck_bookmark_store(env, lfsck);
+       mutex_unlock(&lfsck->ml_mutex);
+       return rc;
+}
+
+int mdd_lfsck_dump(const struct lu_env *env, struct md_lfsck *lfsck,
+                  __u16 type, char *buf, int len)
+{
+       struct lfsck_component *com;
+       int                     rc;
+
+       if (!lfsck->ml_initialized)
+               return -ENODEV;
+
+       com = mdd_lfsck_component_find(lfsck, type);
+       if (com == NULL)
+               return -ENOTSUPP;
+
+       rc = com->lc_ops->lfsck_dump(env, com, buf, len);
+       mdd_lfsck_component_put(env, com);
        return rc;
 }
 
 int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
                    struct lfsck_start *start)
 {
-       struct ptlrpc_thread *thread  = &lfsck->ml_thread;
-       struct l_wait_info    lwi     = { 0 };
-       int                   rc      = 0;
-       __u16                 valid   = 0;
-       __u16                 flags   = 0;
+       struct lfsck_bookmark  *bk     = &lfsck->ml_bookmark_ram;
+       struct ptlrpc_thread   *thread = &lfsck->ml_thread;
+       struct lfsck_component *com;
+       struct l_wait_info      lwi    = { 0 };
+       bool                    dirty  = false;
+       int                     rc     = 0;
+       __u16                   valid  = 0;
+       __u16                   flags  = 0;
        ENTRY;
 
-       cfs_mutex_lock(&lfsck->ml_mutex);
-       cfs_spin_lock(&lfsck->ml_lock);
-       if (thread_is_running(thread)) {
-               cfs_spin_unlock(&lfsck->ml_lock);
-               cfs_mutex_unlock(&lfsck->ml_mutex);
+       if (lfsck->ml_obj_oit == NULL)
+               RETURN(-ENOTSUPP);
+
+       /* start == NULL means auto trigger paused LFSCK. */
+       if ((start == NULL) &&
+           (cfs_list_empty(&lfsck->ml_list_scan) ||
+            OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
+               RETURN(0);
+
+       mutex_lock(&lfsck->ml_mutex);
+       spin_lock(&lfsck->ml_lock);
+       if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
+               spin_unlock(&lfsck->ml_lock);
+               mutex_unlock(&lfsck->ml_mutex);
                RETURN(-EALREADY);
        }
 
-       cfs_spin_unlock(&lfsck->ml_lock);
-       if (start->ls_valid & LSV_SPEED_LIMIT)
-               mdd_lfsck_set_speed(lfsck, start->ls_speed_limit);
+       spin_unlock(&lfsck->ml_lock);
+
+       lfsck->ml_paused = 0;
+       lfsck->ml_oit_over = 0;
+       lfsck->ml_drop_dryrun = 0;
+       lfsck->ml_new_scanned = 0;
+
+       /* For auto trigger. */
+       if (start == NULL)
+               goto trigger;
+
+       start->ls_version = bk->lb_version;
+       if (start->ls_valid & LSV_SPEED_LIMIT) {
+               __mdd_lfsck_set_speed(lfsck, start->ls_speed_limit);
+               dirty = true;
+       }
 
        if (start->ls_valid & LSV_ERROR_HANDLE) {
                valid |= DOIV_ERROR_HANDLE;
                if (start->ls_flags & LPF_FAILOUT)
                        flags |= DOIF_FAILOUT;
+
+               if ((start->ls_flags & LPF_FAILOUT) &&
+                   !(bk->lb_param & LPF_FAILOUT)) {
+                       bk->lb_param |= LPF_FAILOUT;
+                       dirty = true;
+               } else if (!(start->ls_flags & LPF_FAILOUT) &&
+                          (bk->lb_param & LPF_FAILOUT)) {
+                       bk->lb_param &= ~LPF_FAILOUT;
+                       dirty = true;
+               }
        }
 
-       /* XXX: 1. low layer does not care 'dryrun'.
-        *      2. will process 'ls_active' when introduces LFSCK for layout
-        *         consistency, DNE consistency, and so on in the future. */
-       start->ls_active = 0;
+       if (start->ls_valid & LSV_DRYRUN) {
+               if ((start->ls_flags & LPF_DRYRUN) &&
+                   !(bk->lb_param & LPF_DRYRUN)) {
+                       bk->lb_param |= LPF_DRYRUN;
+                       dirty = true;
+               } else if (!(start->ls_flags & LPF_DRYRUN) &&
+                          (bk->lb_param & LPF_DRYRUN)) {
+                       bk->lb_param &= ~LPF_DRYRUN;
+                       lfsck->ml_drop_dryrun = 1;
+                       dirty = true;
+               }
+       }
+
+       if (dirty) {
+               rc = mdd_lfsck_bookmark_store(env, lfsck);
+               if (rc != 0)
+                       GOTO(out, rc);
+       }
 
        if (start->ls_flags & LPF_RESET)
                flags |= DOIF_RESET;
 
-       if (start->ls_active != 0)
+       if (start->ls_active != 0) {
+               struct lfsck_component *next;
+               __u16 type = 1;
+
+               if (start->ls_active == LFSCK_TYPES_ALL)
+                       start->ls_active = LFSCK_TYPES_SUPPORTED;
+
+               if (start->ls_active & ~LFSCK_TYPES_SUPPORTED) {
+                       start->ls_active &= ~LFSCK_TYPES_SUPPORTED;
+                       GOTO(out, rc = -ENOTSUPP);
+               }
+
+               cfs_list_for_each_entry_safe(com, next,
+                                            &lfsck->ml_list_scan, lc_link) {
+                       if (!(com->lc_type & start->ls_active)) {
+                               rc = com->lc_ops->lfsck_post(env, com, 0,
+                                                            false);
+                               if (rc != 0)
+                                       GOTO(out, rc);
+                       }
+               }
+
+               while (start->ls_active != 0) {
+                       if (type & start->ls_active) {
+                               com = __mdd_lfsck_component_find(lfsck, type,
+                                                       &lfsck->ml_list_idle);
+                               if (com != NULL) {
+                                       /* The component status will be updated
+                                        * when its prep() is called later by
+                                        * the LFSCK main engine. */
+                                       cfs_list_del_init(&com->lc_link);
+                                       cfs_list_add_tail(&com->lc_link,
+                                                         &lfsck->ml_list_scan);
+                               }
+                               start->ls_active &= ~type;
+                       }
+                       type <<= 1;
+               }
+       }
+
+       cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
+               start->ls_active |= com->lc_type;
+               if (flags & DOIF_RESET) {
+                       rc = com->lc_ops->lfsck_reset(env, com, false);
+                       if (rc != 0)
+                               GOTO(out, rc);
+               }
+       }
+
+trigger:
+       lfsck->ml_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
+       if (bk->lb_param & LPF_DRYRUN)
+               lfsck->ml_args_dir |= LUDA_VERIFY_DRYRUN;
+
+       if (bk->lb_param & LPF_FAILOUT) {
+               valid |= DOIV_ERROR_HANDLE;
+               flags |= DOIF_FAILOUT;
+       }
+
+       if (!cfs_list_empty(&lfsck->ml_list_scan))
                flags |= DOIF_OUTUSED;
 
-       lfsck->ml_args = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
+       lfsck->ml_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
        thread_set_flags(thread, 0);
        rc = cfs_create_thread(mdd_lfsck_main, lfsck, 0);
        if (rc < 0)
@@ -252,90 +2810,162 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
                             thread_is_running(thread) ||
                             thread_is_stopped(thread),
                             &lwi);
-       cfs_mutex_unlock(&lfsck->ml_mutex);
 
-       RETURN(rc < 0 ? rc : 0);
+       GOTO(out, rc = 0);
+
+out:
+       mutex_unlock(&lfsck->ml_mutex);
+       return (rc < 0 ? rc : 0);
 }
 
-int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck)
+int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck,
+                  bool pause)
 {
        struct ptlrpc_thread *thread = &lfsck->ml_thread;
        struct l_wait_info    lwi    = { 0 };
        ENTRY;
 
-       cfs_mutex_lock(&lfsck->ml_mutex);
-       cfs_spin_lock(&lfsck->ml_lock);
+       if (!lfsck->ml_initialized)
+               RETURN(0);
+
+       mutex_lock(&lfsck->ml_mutex);
+       spin_lock(&lfsck->ml_lock);
        if (thread_is_init(thread) || thread_is_stopped(thread)) {
-               cfs_spin_unlock(&lfsck->ml_lock);
-               cfs_mutex_unlock(&lfsck->ml_mutex);
+               spin_unlock(&lfsck->ml_lock);
+               mutex_unlock(&lfsck->ml_mutex);
                RETURN(-EALREADY);
        }
 
+       if (pause)
+               lfsck->ml_paused = 1;
        thread_set_flags(thread, SVC_STOPPING);
-       cfs_spin_unlock(&lfsck->ml_lock);
+       /* The LFSCK thread may be sleeping on low layer wait queue,
+        * wake it up. */
+       if (likely(lfsck->ml_di_oit != NULL))
+               lfsck->ml_obj_oit->do_index_ops->dio_it.put(env,
+                                                           lfsck->ml_di_oit);
+       spin_unlock(&lfsck->ml_lock);
 
        cfs_waitq_broadcast(&thread->t_ctl_waitq);
        l_wait_event(thread->t_ctl_waitq,
                     thread_is_stopped(thread),
                     &lwi);
-       cfs_mutex_unlock(&lfsck->ml_mutex);
+       mutex_unlock(&lfsck->ml_mutex);
 
        RETURN(0);
 }
 
-const char lfsck_bookmark_name[] = "lfsck_bookmark";
-
 static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE,
                                            .f_oid = OTABLE_IT_OID,
                                            .f_ver = 0 };
 
 int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
 {
-       struct md_lfsck  *lfsck = &mdd->mdd_lfsck;
-       struct dt_object *obj;
-       int               rc;
+       struct md_lfsck         *lfsck = &mdd->mdd_lfsck;
+       struct dt_object        *obj;
+       struct lu_fid            fid;
+       int                      rc;
 
-       memset(lfsck, 0, sizeof(*lfsck));
-       lfsck->ml_version = LFSCK_VERSION_V1;
-       cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq);
-       cfs_mutex_init(&lfsck->ml_mutex);
-       cfs_spin_lock_init(&lfsck->ml_lock);
+       ENTRY;
 
-       obj = dt_store_open(env, mdd->mdd_child, "", lfsck_bookmark_name,
-                           &mdd_env_info(env)->mti_fid);
-       if (IS_ERR(obj))
-               return PTR_ERR(obj);
+       LASSERT(!lfsck->ml_initialized);
 
-       lfsck->ml_bookmark_obj = obj;
+       lfsck->ml_initialized = 1;
+       mutex_init(&lfsck->ml_mutex);
+       spin_lock_init(&lfsck->ml_lock);
+       CFS_INIT_LIST_HEAD(&lfsck->ml_list_scan);
+       CFS_INIT_LIST_HEAD(&lfsck->ml_list_dir);
+       CFS_INIT_LIST_HEAD(&lfsck->ml_list_double_scan);
+       CFS_INIT_LIST_HEAD(&lfsck->ml_list_idle);
+       cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq);
 
-       obj = dt_locate(env, mdd->mdd_child, &lfsck_it_fid);
+       obj = dt_locate(env, mdd->mdd_bottom, &lfsck_it_fid);
        if (IS_ERR(obj))
-               return PTR_ERR(obj);
+               RETURN(PTR_ERR(obj));
 
+       lfsck->ml_obj_oit = obj;
        rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
        if (rc != 0) {
-               lu_object_put(env, &obj->do_lu);
-               return rc;
+               if (rc == -ENOTSUPP)
+                       RETURN(0);
+               GOTO(out, rc);
        }
 
-       lfsck->ml_it_obj = obj;
+       /* LFSCK bookmark */
+       fid_zero(&fid);
+       rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid,
+                                  lfsck_bookmark_name,
+                                  S_IFREG | S_IRUGO | S_IWUSR, &fid);
+       if (rc < 0)
+               GOTO(out, rc);
+
+       obj = dt_locate(env, mdd->mdd_bottom, &fid);
+       if (IS_ERR(obj))
+               GOTO(out, rc = PTR_ERR(obj));
+
+       LASSERT(lu_object_exists(&obj->do_lu));
+       lfsck->ml_bookmark_obj = obj;
+
+       rc = mdd_lfsck_bookmark_load(env, lfsck);
+       if (rc == -ENODATA)
+               rc = mdd_lfsck_bookmark_init(env, lfsck);
+       if (rc != 0)
+               GOTO(out, rc);
 
+       rc = mdd_lfsck_namespace_setup(env, lfsck);
+       if (rc < 0)
+               GOTO(out, rc);
+       /* XXX: LFSCK components initialization to be added here. */
+       RETURN(0);
+out:
+       lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
+       lfsck->ml_obj_oit = NULL;
        return 0;
 }
 
 void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)
 {
-       struct md_lfsck *lfsck = &mdd->mdd_lfsck;
+       struct md_lfsck         *lfsck  = &mdd->mdd_lfsck;
+       struct ptlrpc_thread    *thread = &lfsck->ml_thread;
+       struct lfsck_component  *com;
 
-       if (lfsck->ml_it_obj != NULL) {
-               lfsck->ml_paused = 1;
-               mdd_lfsck_stop(env, lfsck);
-               lu_object_put(env, &lfsck->ml_it_obj->do_lu);
-               lfsck->ml_it_obj = NULL;
+       if (!lfsck->ml_initialized)
+               return;
+
+       LASSERT(thread_is_init(thread) || thread_is_stopped(thread));
+
+       if (lfsck->ml_obj_oit != NULL) {
+               lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
+               lfsck->ml_obj_oit = NULL;
        }
 
+       LASSERT(lfsck->ml_obj_dir == NULL);
+
        if (lfsck->ml_bookmark_obj != NULL) {
                lu_object_put(env, &lfsck->ml_bookmark_obj->do_lu);
                lfsck->ml_bookmark_obj = NULL;
        }
+
+       while (!cfs_list_empty(&lfsck->ml_list_scan)) {
+               com = cfs_list_entry(lfsck->ml_list_scan.next,
+                                    struct lfsck_component,
+                                    lc_link);
+               mdd_lfsck_component_cleanup(env, com);
+       }
+
+       LASSERT(cfs_list_empty(&lfsck->ml_list_dir));
+
+       while (!cfs_list_empty(&lfsck->ml_list_double_scan)) {
+               com = cfs_list_entry(lfsck->ml_list_double_scan.next,
+                                    struct lfsck_component,
+                                    lc_link);
+               mdd_lfsck_component_cleanup(env, com);
+       }
+
+       while (!cfs_list_empty(&lfsck->ml_list_idle)) {
+               com = cfs_list_entry(lfsck->ml_list_idle.next,
+                                    struct lfsck_component,
+                                    lc_link);
+               mdd_lfsck_component_cleanup(env, com);
+       }
 }