Whamcloud - gitweb
LU-2752 build: Enhance build for cross compilation for MIC
[fs/lustre-release.git] / lustre / mdd / mdd_lfsck.c
index 349058b..7ab15f9 100644 (file)
@@ -20,7 +20,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2012, Intel Corporation.
+ * Copyright (c) 2012, 2013, Intel Corporation.
  */
 /*
  * lustre/mdd/mdd_lfsck.c
 
 #define HALF_SEC                       (CFS_HZ >> 1)
 #define LFSCK_CHECKPOINT_INTERVAL      60
-#define MDS_DIR_DUMMY_START            0xffffffffffffffffULL
+
+#define LFSCK_NAMEENTRY_DEAD           1 /* The object has been unlinked. */
+#define LFSCK_NAMEENTRY_REMOVED        2 /* The entry has been removed. */
+#define LFSCK_NAMEENTRY_RECREATED      3 /* The entry has been recreated. */
 
 const char lfsck_bookmark_name[] = "lfsck_bookmark";
 const char lfsck_namespace_name[] = "lfsck_namespace";
@@ -229,8 +232,7 @@ static int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
 }
 
 static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck,
-                              struct lfsck_position *pos, bool oit_processed,
-                              bool dir_processed)
+                              struct lfsck_position *pos, bool init)
 {
        const struct dt_it_ops *iops = &lfsck->ml_obj_oit->do_index_ops->dio_it;
 
@@ -242,29 +244,23 @@ static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck,
        }
 
        pos->lp_oit_cookie = iops->store(env, lfsck->ml_di_oit);
+       if (!lfsck->ml_current_oit_processed && !init)
+               pos->lp_oit_cookie--;
 
        LASSERT(pos->lp_oit_cookie > 0);
 
-       if (!oit_processed)
-               pos->lp_oit_cookie--;
-
        if (lfsck->ml_di_dir != NULL) {
                struct dt_object *dto = lfsck->ml_obj_dir;
 
-               pos->lp_dir_parent = *lu_object_fid(&dto->do_lu);
                pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
                                                        lfsck->ml_di_dir);
 
-               LASSERT(pos->lp_dir_cookie != MDS_DIR_DUMMY_START);
-
-               if (pos->lp_dir_cookie == MDS_DIR_END_OFF)
-                       LASSERT(dir_processed);
-
-               /* For the dir which just to be processed,
-                * lp_dir_cookie will become MDS_DIR_DUMMY_START,
-                * which can be correctly handled by mdd_lfsck_prep. */
-               if (!dir_processed)
-                       pos->lp_dir_cookie--;
+               if (pos->lp_dir_cookie >= MDS_DIR_END_OFF) {
+                       fid_zero(&pos->lp_dir_parent);
+                       pos->lp_dir_cookie = 0;
+               } else {
+                       pos->lp_dir_parent = *lu_object_fid(&dto->do_lu);
+               }
        } else {
                fid_zero(&pos->lp_dir_parent);
                pos->lp_dir_cookie = 0;
@@ -480,7 +476,7 @@ static int mdd_lfsck_bookmark_init(const struct lu_env *env,
        struct lfsck_bookmark *mb = &lfsck->ml_bookmark_ram;
        int rc;
 
-       memset(mb, 0, sizeof(mb));
+       memset(mb, 0, sizeof(*mb));
        mb->lb_magic = LFSCK_BOOKMARK_MAGIC;
        mb->lb_version = LFSCK_VERSION_V2;
        mutex_lock(&lfsck->ml_mutex);
@@ -680,89 +676,289 @@ static int mdd_lfsck_namespace_init(const struct lu_env *env,
        return rc;
 }
 
-static int mdd_declare_lfsck_namespace_unlink(const struct lu_env *env,
-                                             struct mdd_device *mdd,
-                                             struct dt_object *p,
-                                             struct dt_object *c,
-                                             const char *name,
-                                             struct thandle *handle)
+static int mdd_lfsck_namespace_lookup(const struct lu_env *env,
+                                     struct lfsck_component *com,
+                                     const struct lu_fid *fid,
+                                     __u8 *flags)
 {
-       int rc;
+       struct lu_fid *key = &mdd_env_info(env)->mti_fid;
+       int            rc;
+
+       fid_cpu_to_be(key, fid);
+       rc = dt_lookup(env, com->lc_obj, (struct dt_rec *)flags,
+                      (const struct dt_key *)key, BYPASS_CAPA);
+       return rc;
+}
 
-       rc = dt_declare_delete(env, p, (const struct dt_key *)name, handle);
+static int mdd_lfsck_namespace_delete(const struct lu_env *env,
+                                     struct lfsck_component *com,
+                                     const struct lu_fid *fid)
+{
+       struct mdd_device *mdd    = mdd_lfsck2mdd(com->lc_lfsck);
+       struct lu_fid     *key    = &mdd_env_info(env)->mti_fid;
+       struct thandle    *handle;
+       struct dt_object *obj     = com->lc_obj;
+       int               rc;
+       ENTRY;
+
+       handle = dt_trans_create(env, mdd->mdd_bottom);
+       if (IS_ERR(handle))
+               RETURN(PTR_ERR(handle));
+
+       rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle);
        if (rc != 0)
-               return rc;
+               GOTO(out, rc);
 
-       rc = dt_declare_ref_del(env, c, handle);
+       rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
        if (rc != 0)
-               return rc;
+               GOTO(out, rc);
+
+       fid_cpu_to_be(key, fid);
+       rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
+                      BYPASS_CAPA);
 
-       rc = dt_declare_destroy(env, c, handle);
+       GOTO(out, rc);
+
+out:
+       dt_trans_stop(env, mdd->mdd_bottom, handle);
        return rc;
 }
 
-static int mdd_lfsck_namespace_unlink(const struct lu_env *env,
-                                     struct mdd_device *mdd,
-                                     struct lfsck_component *com)
+static int mdd_lfsck_namespace_update(const struct lu_env *env,
+                                     struct lfsck_component *com,
+                                     const struct lu_fid *fid,
+                                     __u8 flags, bool force)
 {
-       struct mdd_thread_info  *info   = mdd_env_info(env);
-       struct lu_fid           *fid    = &info->mti_fid;
-       struct dt_object        *child  = com->lc_obj;
-       struct dt_object        *parent;
-       struct thandle          *handle;
-       int                      rc;
+       struct mdd_device *mdd    = mdd_lfsck2mdd(com->lc_lfsck);
+       struct lu_fid     *key    = &mdd_env_info(env)->mti_fid;
+       struct thandle    *handle;
+       struct dt_object *obj     = com->lc_obj;
+       int               rc;
+       bool              exist   = false;
+       __u8              tf;
        ENTRY;
 
-       parent = dt_store_resolve(env, mdd->mdd_bottom, "", fid);
-       if (IS_ERR(parent))
-               RETURN(rc = PTR_ERR(parent));
+       rc = mdd_lfsck_namespace_lookup(env, com, fid, &tf);
+       if (rc != 0 && rc != -ENOENT)
+               RETURN(rc);
 
-       if (dt_try_as_dir(env, parent))
-               GOTO(out, rc = -ENOTDIR);
+       if (rc == 0) {
+               if (!force || flags == tf)
+                       RETURN(0);
 
-       handle = dt_trans_create(env, mdd->mdd_bottom);
-       if (IS_ERR(handle))
-               GOTO(out, rc = PTR_ERR(handle));
+               exist = true;
+               handle = dt_trans_create(env, mdd->mdd_bottom);
+               if (IS_ERR(handle))
+                       RETURN(PTR_ERR(handle));
 
-       rc = mdd_declare_lfsck_namespace_unlink(env, mdd, parent, child,
-                                               lfsck_namespace_name, handle);
+               rc = dt_declare_delete(env, obj, (const struct dt_key *)fid,
+                                      handle);
+               if (rc != 0)
+                       GOTO(out, rc);
+       } else {
+               handle = dt_trans_create(env, mdd->mdd_bottom);
+               if (IS_ERR(handle))
+                       RETURN(PTR_ERR(handle));
+       }
+
+       rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
+                              (const struct dt_key *)fid, handle);
        if (rc != 0)
-               GOTO(stop, rc);
+               GOTO(out, rc);
 
        rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
        if (rc != 0)
+               GOTO(out, rc);
+
+       fid_cpu_to_be(key, fid);
+       if (exist) {
+               rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
+                              BYPASS_CAPA);
+               if (rc != 0) {
+                       CERROR("%s: fail to insert "DFID", rc = %d\n",
+                              mdd_lfsck2name(com->lc_lfsck), PFID(fid), rc);
+                       GOTO(out, rc);
+               }
+       }
+
+       rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
+                      (const struct dt_key *)key, handle, BYPASS_CAPA, 1);
+
+       GOTO(out, rc);
+
+out:
+       dt_trans_stop(env, mdd->mdd_bottom, handle);
+       return rc;
+}
+
+/**
+ * \retval +ve repaired
+ * \retval 0   no need to repair
+ * \retval -ve error cases
+ */
+static int mdd_lfsck_namespace_double_scan_one(const struct lu_env *env,
+                                              struct lfsck_component *com,
+                                              struct mdd_object *child,
+                                              __u8 flags)
+{
+       struct mdd_thread_info  *info     = mdd_env_info(env);
+       struct lu_attr          *la       = &info->mti_la;
+       struct lu_name          *cname    = &info->mti_name;
+       struct lu_fid           *pfid     = &info->mti_fid;
+       struct lu_fid           *cfid     = &info->mti_fid2;
+       struct md_lfsck         *lfsck    = com->lc_lfsck;
+       struct mdd_device       *mdd      = mdd_lfsck2mdd(lfsck);
+       struct lfsck_bookmark   *bk       = &lfsck->ml_bookmark_ram;
+       struct lfsck_namespace  *ns       =
+                               (struct lfsck_namespace *)com->lc_file_ram;
+       struct linkea_data       ldata    = { 0 };
+       struct thandle          *handle   = NULL;
+       bool                     locked   = false;
+       bool                     update   = false;
+       int                      count;
+       int                      rc;
+       ENTRY;
+
+       if (com->lc_journal) {
+
+again:
+               LASSERT(!locked);
+
+               com->lc_journal = 1;
+               handle = mdd_trans_create(env, mdd);
+               if (IS_ERR(handle))
+                       RETURN(rc = PTR_ERR(handle));
+
+               rc = mdd_declare_links_add(env, child, handle, NULL);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = mdd_trans_start(env, mdd, handle);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               mdd_write_lock(env, child, MOR_TGT_CHILD);
+               locked = true;
+       }
+
+       if (unlikely(mdd_is_dead_obj(child)))
+               GOTO(stop, rc = 0);
+
+       rc = mdd_links_read(env, child, &ldata);
+       if (rc != 0) {
+               if ((bk->lb_param & LPF_DRYRUN) &&
+                   (rc == -EINVAL || rc == -ENODATA))
+                       rc = 1;
+
                GOTO(stop, rc);
+       }
 
-       rc = dt_delete(env, parent, (struct dt_key *)lfsck_namespace_name,
-                      handle, BYPASS_CAPA);
+       rc = mdd_la_get(env, child, la, BYPASS_CAPA);
        if (rc != 0)
                GOTO(stop, rc);
 
-       rc = child->do_ops->do_ref_del(env, child, handle);
-       if (rc != 0) {
-               lu_local_obj_fid(fid, LFSCK_NAMESPACE_OID);
-               rc = dt_insert(env, parent,
-                              (const struct dt_rec*)fid,
-                              (const struct dt_key *)lfsck_namespace_name,
-                              handle, BYPASS_CAPA, 1);
+       ldata.ld_lee = LINKEA_FIRST_ENTRY(ldata);
+       count = ldata.ld_leh->leh_reccount;
+       while (count-- > 0) {
+               struct mdd_object *parent = NULL;
+               struct dt_object *dir;
 
-               GOTO(stop, rc);
+               linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname,
+                                   pfid);
+               if (!fid_is_sane(pfid))
+                       goto shrink;
+
+               parent = mdd_object_find(env, mdd, pfid);
+               if (parent == NULL)
+                       goto shrink;
+               else if (IS_ERR(parent))
+                       GOTO(stop, rc = PTR_ERR(parent));
+
+               if (!mdd_object_exists(parent))
+                       goto shrink;
+
+               /* XXX: need more processing for remote object in the future. */
+               if (mdd_object_remote(parent)) {
+                       mdd_object_put(env, parent);
+                       ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
+                       continue;
+               }
+
+               dir = mdd_object_child(parent);
+               if (unlikely(!dt_try_as_dir(env, dir)))
+                       goto shrink;
+
+               /* To guarantee the 'name' is terminated with '0'. */
+               memcpy(info->mti_key, cname->ln_name, cname->ln_namelen);
+               info->mti_key[cname->ln_namelen] = 0;
+               cname->ln_name = info->mti_key;
+               rc = dt_lookup(env, dir, (struct dt_rec *)cfid,
+                              (const struct dt_key *)cname->ln_name,
+                              BYPASS_CAPA);
+               if (rc != 0 && rc != -ENOENT) {
+                       mdd_object_put(env, parent);
+                       GOTO(stop, rc);
+               }
+
+               if (rc == 0) {
+                       if (lu_fid_eq(cfid, mdo2fid(child))) {
+                               mdd_object_put(env, parent);
+                               ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
+                               continue;
+                       }
+
+                       goto shrink;
+               }
+
+               if (ldata.ld_leh->leh_reccount > la->la_nlink)
+                       goto shrink;
+
+               /* XXX: For the case of there is linkea entry, but without name
+                *      entry pointing to the object, and the object link count
+                *      isn't less than the count of name entries, then add the
+                *      name entry back to namespace.
+                *
+                *      It is out of LFSCK 1.5 scope, will implement it in the
+                *      future. Keep the linkEA entry. */
+               mdd_object_put(env, parent);
+               ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
+               continue;
+
+shrink:
+               if (parent != NULL)
+                       mdd_object_put(env, parent);
+               if (bk->lb_param & LPF_DRYRUN)
+                       RETURN(1);
+
+               CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n",
+                      PFID(mdo2fid(child)), cname->ln_namelen, cname->ln_name,
+                      PFID(pfid));
+               linkea_del_buf(&ldata, cname);
+               update = true;
        }
 
+       if (update) {
+               if (!com->lc_journal) {
+                       com->lc_journal = 1;
+                       goto again;
+               }
 
-       rc = dt_destroy(env, child, handle);
-       if (rc == 0) {
-               lu_object_put(env, &child->do_lu);
-               com->lc_obj = NULL;
+               rc = mdd_links_write(env, child, &ldata, handle);
        }
 
        GOTO(stop, rc);
 
 stop:
-       dt_trans_stop(env, mdd->mdd_bottom, handle);
+       if (locked)
+               mdd_write_unlock(env, child);
 
-out:
-       lu_object_put(env, &parent->do_lu);
+       if (handle != NULL)
+               mdd_trans_stop(env, mdd, rc, handle);
+
+       if (rc == 0 && update) {
+               ns->ln_objs_nlink_repaired++;
+               rc = 1;
+       }
        return rc;
 }
 
@@ -771,12 +967,9 @@ out:
 static int mdd_lfsck_namespace_reset(const struct lu_env *env,
                                     struct lfsck_component *com, bool init)
 {
-       struct mdd_thread_info  *info = mdd_env_info(env);
-       struct lu_fid           *fid  = &info->mti_fid;
        struct lfsck_namespace  *ns   = (struct lfsck_namespace *)com->lc_file_ram;
        struct mdd_device       *mdd  = mdd_lfsck2mdd(com->lc_lfsck);
-       struct md_object        *mdo;
-       struct dt_object        *dto;
+       struct dt_object        *dto, *root;
        int                      rc;
        ENTRY;
 
@@ -794,39 +987,39 @@ static int mdd_lfsck_namespace_reset(const struct lu_env *env,
        ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
        ns->ln_status = LS_INIT;
 
-       rc = mdd_lfsck_namespace_unlink(env, mdd, com);
+       root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid);
+       if (unlikely(IS_ERR(root)))
+               GOTO(out, rc = PTR_ERR(root));
+
+       rc = local_object_unlink(env, mdd->mdd_bottom, root,
+                                lfsck_namespace_name);
        if (rc != 0)
                GOTO(out, rc);
 
-       lu_local_obj_fid(fid, LFSCK_NAMESPACE_OID);
-       mdo = llo_store_create_index(env, &mdd->mdd_md_dev, mdd->mdd_bottom, "",
-                                    lfsck_namespace_name, fid,
-                                    &dt_lfsck_features);
-       if (IS_ERR(mdo))
-               GOTO(out, rc = PTR_ERR(mdo));
-
-       lu_object_put(env, &mdo->mo_lu);
-       dto = dt_store_open(env, mdd->mdd_bottom, "", lfsck_namespace_name, fid);
+       dto = local_index_find_or_create(env, mdd->mdd_los, root,
+                                        lfsck_namespace_name,
+                                        S_IFREG | S_IRUGO | S_IWUSR,
+                                        &dt_lfsck_features);
        if (IS_ERR(dto))
                GOTO(out, rc = PTR_ERR(dto));
 
-       com->lc_obj = dto;
        rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features);
        if (rc != 0)
                GOTO(out, rc);
+       com->lc_obj = dto;
 
        rc = mdd_lfsck_namespace_store(env, com, true);
 
        GOTO(out, rc);
-
 out:
+       lu_object_put(env, &root->do_lu);
        up_write(&com->lc_sem);
        return rc;
 }
 
 static void
 mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
-                        bool oit, bool new_checked)
+                        bool new_checked)
 {
        struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
 
@@ -836,7 +1029,7 @@ mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
        ns->ln_items_failed++;
        if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
                mdd_lfsck_pos_fill(env, com->lc_lfsck,
-                                  &ns->ln_pos_first_inconsistent, oit, !oit);
+                                  &ns->ln_pos_first_inconsistent, false);
        up_write(&com->lc_sem);
 }
 
@@ -854,11 +1047,10 @@ static int mdd_lfsck_namespace_checkpoint(const struct lu_env *env,
 
        down_write(&com->lc_sem);
 
-       ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
        if (init) {
-               ns->ln_time_last_checkpoint = ns->ln_time_latest_start;
                ns->ln_pos_latest_start = lfsck->ml_pos_current;
        } else {
+               ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
                ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
                                HALF_SEC - lfsck->ml_time_last_checkpoint);
                ns->ln_time_last_checkpoint = cfs_time_current_sec();
@@ -932,13 +1124,6 @@ static int mdd_lfsck_namespace_prep(const struct lu_env *env,
                    mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
                        *pos = ns->ln_pos_last_checkpoint;
                        pos->lp_oit_cookie++;
-                       if (!fid_is_zero(&pos->lp_dir_parent)) {
-                               if (pos->lp_dir_cookie == MDS_DIR_END_OFF) {
-                                       fid_zero(&pos->lp_dir_parent);
-                               } else {
-                                       pos->lp_dir_cookie++;
-                               }
-                       }
                } else {
                        *pos = ns->ln_pos_first_inconsistent;
                }
@@ -961,16 +1146,72 @@ static int mdd_lfsck_namespace_exec_oit(const struct lu_env *env,
        return 0;
 }
 
-/* XXX: to be implemented in other patch.  */
+static int mdd_declare_lfsck_namespace_exec_dir(const struct lu_env *env,
+                                               struct mdd_object *obj,
+                                               struct thandle *handle)
+{
+       int rc;
+
+       /* For destroying all invalid linkEA entries. */
+       rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle);
+       if (rc != 0)
+               return rc;
+
+       /* For insert new linkEA entry. */
+       rc = mdd_declare_links_add(env, obj, handle, NULL);
+       return rc;
+}
+
+static int mdd_lfsck_namespace_check_exist(const struct lu_env *env,
+                                          struct md_lfsck *lfsck,
+                                          struct mdd_object *obj,
+                                          const char *name)
+{
+       struct dt_object *dir = lfsck->ml_obj_dir;
+       struct lu_fid    *fid = &mdd_env_info(env)->mti_fid;
+       int               rc;
+       ENTRY;
+
+       if (unlikely(mdd_is_dead_obj(obj)))
+               RETURN(LFSCK_NAMEENTRY_DEAD);
+
+       rc = dt_lookup(env, dir, (struct dt_rec *)fid,
+                      (const struct dt_key *)name, BYPASS_CAPA);
+       if (rc == -ENOENT)
+               RETURN(LFSCK_NAMEENTRY_REMOVED);
+
+       if (rc < 0)
+               RETURN(rc);
+
+       if (!lu_fid_eq(fid, mdo2fid(obj)))
+               RETURN(LFSCK_NAMEENTRY_RECREATED);
+
+       RETURN(0);
+}
+
 static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env,
                                        struct lfsck_component *com,
                                        struct mdd_object *obj,
                                        struct lu_dirent *ent)
 {
+       struct mdd_thread_info     *info     = mdd_env_info(env);
+       struct lu_attr             *la       = &info->mti_la;
+       struct md_lfsck            *lfsck    = com->lc_lfsck;
+       struct lfsck_bookmark      *bk       = &lfsck->ml_bookmark_ram;
        struct lfsck_namespace     *ns       =
                                (struct lfsck_namespace *)com->lc_file_ram;
+       struct mdd_device          *mdd      = mdd_lfsck2mdd(lfsck);
+       struct linkea_data          ldata    = { 0 };
+       const struct lu_fid        *pfid     =
+                               lu_object_fid(&lfsck->ml_obj_dir->do_lu);
+       const struct lu_fid        *cfid     = mdo2fid(obj);
        const struct lu_name       *cname;
-       int                         repaired;
+       struct thandle             *handle   = NULL;
+       bool                        repaired = false;
+       bool                        locked   = false;
+       int                         count    = 0;
+       int                         rc;
+       ENTRY;
 
        cname = mdd_name_get_const(env, ent->lde_name, ent->lde_namelen);
        down_write(&com->lc_sem);
@@ -978,22 +1219,190 @@ static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env,
 
        if (ent->lde_attrs & LUDA_UPGRADE) {
                ns->ln_flags |= LF_UPGRADE;
-               repaired = 1;
+               repaired = true;
        } else if (ent->lde_attrs & LUDA_REPAIR) {
                ns->ln_flags |= LF_INCONSISTENT;
-               repaired = 1;
+               repaired = true;
+       }
+
+       if (ent->lde_name[0] == '.' &&
+           (ent->lde_namelen == 1 ||
+            (ent->lde_namelen == 2 && ent->lde_name[1] == '.') ||
+            fid_is_dot_lustre(&ent->lde_fid)))
+               GOTO(out, rc = 0);
+
+       if (!(bk->lb_param & LPF_DRYRUN) &&
+           (com->lc_journal || repaired)) {
+
+again:
+               LASSERT(!locked);
+
+               com->lc_journal = 1;
+               handle = mdd_trans_create(env, mdd);
+               if (IS_ERR(handle))
+                       GOTO(out, rc = PTR_ERR(handle));
+
+               rc = mdd_declare_lfsck_namespace_exec_dir(env, obj, handle);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = mdd_trans_start(env, mdd, handle);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               mdd_write_lock(env, obj, MOR_TGT_CHILD);
+               locked = true;
+       }
+
+       rc = mdd_lfsck_namespace_check_exist(env, lfsck, obj, ent->lde_name);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = mdd_links_read(env, obj, &ldata);
+       if (rc == 0) {
+               count = ldata.ld_leh->leh_reccount;
+               rc = linkea_links_find(&ldata, cname, pfid);
+               if (rc == 0) {
+                       /* For dir, if there are more than one linkea entries,
+                        * then remove all the other redundant linkea entries.*/
+                       if (unlikely(count > 1 &&
+                                    S_ISDIR(mdd_object_type(obj))))
+                               goto unmatch;
+
+                       goto record;
+               } else {
+
+unmatch:
+                       ns->ln_flags |= LF_INCONSISTENT;
+                       if (bk->lb_param & LPF_DRYRUN) {
+                               repaired = true;
+                               goto record;
+                       }
+
+                       /*For dir, remove the unmatched linkea entry directly.*/
+                       if (S_ISDIR(mdd_object_type(obj))) {
+                               if (!com->lc_journal)
+                                       goto again;
+
+                               rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK,
+                                                  handle, BYPASS_CAPA);
+                               if (rc != 0)
+                                       GOTO(stop, rc);
+
+                               goto nodata;
+                       } else {
+                               goto add;
+                       }
+               }
+       } else if (unlikely(rc == -EINVAL)) {
+               ns->ln_flags |= LF_INCONSISTENT;
+               if (bk->lb_param & LPF_DRYRUN) {
+                       count = 1;
+                       repaired = true;
+                       goto record;
+               }
+
+               if (!com->lc_journal)
+                       goto again;
+
+               /* The magic crashed, we are not sure whether there are more
+                * corrupt data in the linkea, so remove all linkea entries. */
+               rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK, handle,
+                                  BYPASS_CAPA);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               goto nodata;
+       } else if (rc == -ENODATA) {
+               ns->ln_flags |= LF_UPGRADE;
+               if (bk->lb_param & LPF_DRYRUN) {
+                       count = 1;
+                       repaired = true;
+                       goto record;
+               }
+
+nodata:
+               rc = linkea_data_new(&ldata, &mdd_env_info(env)->mti_link_buf);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+add:
+               if (!com->lc_journal)
+                       goto again;
+
+               rc = linkea_add_buf(&ldata, cname, pfid);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               rc = mdd_links_write(env, obj, &ldata, handle);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               count = ldata.ld_leh->leh_reccount;
+               repaired = true;
        } else {
-               repaired = 0;
+               GOTO(stop, rc);
+       }
+
+record:
+       LASSERT(count > 0);
+
+       rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       if ((count == 1) &&
+           (la->la_nlink == 1 || S_ISDIR(mdd_object_type(obj))))
+               /* Usually, it is for single linked object or dir, do nothing.*/
+               GOTO(stop, rc);
+
+       /* Following modification will be in another transaction.  */
+       if (handle != NULL) {
+               LASSERT(mdd_write_locked(env, obj));
+
+               mdd_write_unlock(env, obj);
+               locked = false;
+
+               mdd_trans_stop(env, mdd, 0, handle);
+               handle = NULL;
        }
 
-       ns->ln_items_repaired += repaired;
+       ns->ln_mlinked_checked++;
+       rc = mdd_lfsck_namespace_update(env, com, cfid,
+                       count != la->la_nlink ? LLF_UNMATCH_NLINKS : 0, false);
+
+       GOTO(out, rc);
+
+stop:
+       if (locked)
+               mdd_write_unlock(env, obj);
+
+       if (handle != NULL)
+               mdd_trans_stop(env, mdd, rc, handle);
+
+out:
+       if (rc < 0) {
+               ns->ln_items_failed++;
+               if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
+                       mdd_lfsck_pos_fill(env, lfsck,
+                                          &ns->ln_pos_first_inconsistent,
+                                          false);
+               if (!(bk->lb_param & LPF_FAILOUT))
+                       rc = 0;
+       } else {
+               if (repaired)
+                       ns->ln_items_repaired++;
+               else
+                       com->lc_journal = 0;
+               rc = 0;
+       }
        up_write(&com->lc_sem);
-       return 0;
+       return rc;
 }
 
 static int mdd_lfsck_namespace_post(const struct lu_env *env,
                                    struct lfsck_component *com,
-                                   int result)
+                                   int result, bool init)
 {
        struct md_lfsck         *lfsck = com->lc_lfsck;
        struct lfsck_namespace  *ns    =
@@ -1003,6 +1412,8 @@ static int mdd_lfsck_namespace_post(const struct lu_env *env,
        down_write(&com->lc_sem);
 
        spin_lock(&lfsck->ml_lock);
+       if (!init)
+               ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
        if (result > 0) {
                ns->ln_status = LS_SCANNING_PHASE2;
                ns->ln_flags |= LF_SCANNED_ONCE;
@@ -1027,11 +1438,13 @@ static int mdd_lfsck_namespace_post(const struct lu_env *env,
        }
        spin_unlock(&lfsck->ml_lock);
 
-       ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
+       if (!init) {
+               ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
                                HALF_SEC - lfsck->ml_time_last_checkpoint);
-       ns->ln_time_last_checkpoint = cfs_time_current_sec();
-       ns->ln_items_checked += com->lc_new_checked;
-       com->lc_new_checked = 0;
+               ns->ln_time_last_checkpoint = cfs_time_current_sec();
+               ns->ln_items_checked += com->lc_new_checked;
+               com->lc_new_checked = 0;
+       }
 
        rc = mdd_lfsck_namespace_store(env, com, false);
 
@@ -1157,7 +1570,7 @@ mdd_lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
 
                buf += rc;
                len -= rc;
-               mdd_lfsck_pos_fill(env, lfsck, &pos, true, true);
+               mdd_lfsck_pos_fill(env, lfsck, &pos, false);
                rc = lfsck_pos_dump(&buf, &len, &pos, "current_position");
                if (rc <= 0)
                        goto out;
@@ -1274,33 +1687,169 @@ out:
        return ret;
 }
 
-/* XXX: to be implemented in other patch.  */
 static int mdd_lfsck_namespace_double_scan(const struct lu_env *env,
                                           struct lfsck_component *com)
 {
        struct md_lfsck         *lfsck  = com->lc_lfsck;
+       struct ptlrpc_thread    *thread = &lfsck->ml_thread;
+       struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
        struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
        struct lfsck_namespace  *ns     =
                                (struct lfsck_namespace *)com->lc_file_ram;
+       struct dt_object        *obj    = com->lc_obj;
+       const struct dt_it_ops  *iops   = &obj->do_index_ops->dio_it;
+       struct mdd_object       *target;
+       struct dt_it            *di;
+       struct dt_key           *key;
+       struct lu_fid            fid;
        int                      rc;
+       __u8                     flags;
+       ENTRY;
 
+       lfsck->ml_new_scanned = 0;
+       lfsck->ml_time_last_checkpoint = cfs_time_current();
+       lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+                               cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+
+       di = iops->init(env, obj, 0, BYPASS_CAPA);
+       if (IS_ERR(di))
+               RETURN(PTR_ERR(di));
+
+       fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
+       rc = iops->get(env, di, (const struct dt_key *)&fid);
+       if (rc < 0)
+               GOTO(fini, rc);
+
+       /* Skip the start one, which either has been processed or non-exist. */
+       rc = iops->next(env, di);
+       if (rc != 0)
+               GOTO(put, rc);
+
+       if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN))
+               GOTO(put, rc = 0);
+
+       do {
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) &&
+                   cfs_fail_val > 0) {
+                       struct l_wait_info lwi;
+
+                       lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
+                                         NULL, NULL);
+                       l_wait_event(thread->t_ctl_waitq,
+                                    !thread_is_running(thread),
+                                    &lwi);
+               }
+
+               key = iops->key(env, di);
+               fid_be_to_cpu(&fid, (const struct lu_fid *)key);
+               target = mdd_object_find(env, mdd, &fid);
+               down_write(&com->lc_sem);
+               if (target == NULL) {
+                       rc = 0;
+                       goto checkpoint;
+               } else if (IS_ERR(target)) {
+                       rc = PTR_ERR(target);
+                       goto checkpoint;
+               }
+
+               /* XXX: need more processing for remote object in the future. */
+               if (!mdd_object_exists(target) || mdd_object_remote(target))
+                       goto obj_put;
+
+               rc = iops->rec(env, di, (struct dt_rec *)&flags, 0);
+               if (rc == 0)
+                       rc = mdd_lfsck_namespace_double_scan_one(env, com,
+                                                                target, flags);
+
+obj_put:
+               mdd_object_put(env, target);
+
+checkpoint:
+               lfsck->ml_new_scanned++;
+               com->lc_new_checked++;
+               ns->ln_fid_latest_scanned_phase2 = fid;
+               if (rc > 0)
+                       ns->ln_objs_repaired_phase2++;
+               else if (rc < 0)
+                       ns->ln_objs_failed_phase2++;
+               up_write(&com->lc_sem);
+
+               if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) {
+                       mdd_lfsck_namespace_delete(env, com, &fid);
+               } else if (rc < 0) {
+                       flags |= LLF_REPAIR_FAILED;
+                       mdd_lfsck_namespace_update(env, com, &fid, flags, true);
+               }
+
+               if (rc < 0 && bk->lb_param & LPF_FAILOUT)
+                       GOTO(put, rc);
+
+               if (likely(cfs_time_beforeq(cfs_time_current(),
+                                           lfsck->ml_time_next_checkpoint)) ||
+                   com->lc_new_checked == 0)
+                       goto speed;
+
+               down_write(&com->lc_sem);
+               ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->ml_time_last_checkpoint);
+               ns->ln_time_last_checkpoint = cfs_time_current_sec();
+               ns->ln_objs_checked_phase2 += com->lc_new_checked;
+               com->lc_new_checked = 0;
+               rc = mdd_lfsck_namespace_store(env, com, false);
+               up_write(&com->lc_sem);
+               if (rc != 0)
+                       GOTO(put, rc);
+
+               lfsck->ml_time_last_checkpoint = cfs_time_current();
+               lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
+                               cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+
+speed:
+               mdd_lfsck_control_speed(lfsck);
+               if (unlikely(!thread_is_running(thread)))
+                       GOTO(put, rc = 0);
+
+               rc = iops->next(env, di);
+       } while (rc == 0);
+
+       GOTO(put, rc);
+
+put:
+       iops->put(env, di);
+
+fini:
+       iops->fini(env, di);
        down_write(&com->lc_sem);
 
+       ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->ml_time_last_checkpoint);
        ns->ln_time_last_checkpoint = cfs_time_current_sec();
+       ns->ln_objs_checked_phase2 += com->lc_new_checked;
        com->lc_new_checked = 0;
-       com->lc_journal = 0;
 
-       ns->ln_status = LS_COMPLETED;
-       if (!(bk->lb_param & LPF_DRYRUN))
-               ns->ln_flags &=
-               ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
-       ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
-       ns->ln_success_count++;
+       if (rc > 0) {
+               com->lc_journal = 0;
+               ns->ln_status = LS_COMPLETED;
+               if (!(bk->lb_param & LPF_DRYRUN))
+                       ns->ln_flags &=
+                       ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
+               ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
+               ns->ln_success_count++;
+       } else if (rc == 0) {
+               if (lfsck->ml_paused)
+                       ns->ln_status = LS_PAUSED;
+               else
+                       ns->ln_status = LS_STOPPED;
+       } else {
+               ns->ln_status = LS_FAILED;
+       }
 
-       spin_lock(&lfsck->ml_lock);
-       cfs_list_del_init(&com->lc_link);
-       cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
-       spin_unlock(&lfsck->ml_lock);
+       if (ns->ln_status != LS_PAUSED) {
+               spin_lock(&lfsck->ml_lock);
+               cfs_list_del_init(&com->lc_link);
+               cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
+               spin_unlock(&lfsck->ml_lock);
+       }
 
        rc = mdd_lfsck_namespace_store(env, com, false);
 
@@ -1325,11 +1874,11 @@ static struct lfsck_operations mdd_lfsck_namespace_ops = {
 static int mdd_lfsck_namespace_setup(const struct lu_env *env,
                                     struct md_lfsck *lfsck)
 {
-       struct mdd_device      *mdd = mdd_lfsck2mdd(lfsck);
-       struct lfsck_component *com;
-       struct lfsck_namespace *ns;
-       struct dt_object       *obj;
-       int                     rc;
+       struct mdd_device       *mdd = mdd_lfsck2mdd(lfsck);
+       struct lfsck_component  *com;
+       struct lfsck_namespace  *ns;
+       struct dt_object        *obj, *root;
+       int                      rc;
        ENTRY;
 
        OBD_ALLOC_PTR(com);
@@ -1352,8 +1901,15 @@ static int mdd_lfsck_namespace_setup(const struct lu_env *env,
        if (com->lc_file_disk == NULL)
                GOTO(out, rc = -ENOMEM);
 
-       obj = dt_store_open(env, mdd->mdd_bottom, "", lfsck_namespace_name,
-                           &mdd_env_info(env)->mti_fid);
+       root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid);
+       if (unlikely(IS_ERR(root)))
+               GOTO(out, rc = PTR_ERR(root));
+
+       obj = local_index_find_or_create(env, mdd->mdd_los, root,
+                                        lfsck_namespace_name,
+                                        S_IFREG | S_IRUGO | S_IWUSR,
+                                        &dt_lfsck_features);
+       lu_object_put(env, &root->do_lu);
        if (IS_ERR(obj))
                GOTO(out, rc = PTR_ERR(obj));
 
@@ -1406,9 +1962,8 @@ out:
 
 /* helper functions for framework */
 
-static int object_is_client_visible(const struct lu_env *env,
-                                   struct mdd_device *mdd,
-                                   struct mdd_object *obj)
+static int object_needs_lfsck(const struct lu_env *env, struct mdd_device *mdd,
+                             struct mdd_object *obj)
 {
        struct lu_fid *fid   = &mdd_env_info(env)->mti_fid;
        int            depth = 0;
@@ -1423,6 +1978,13 @@ static int object_is_client_visible(const struct lu_env *env,
                        return 1;
                }
 
+               /* .lustre doesn't contain "real" user objects, no need lfsck */
+               if (fid_is_dot_lustre(mdo2fid(obj))) {
+                       if (depth > 0)
+                               mdd_object_put(env, obj);
+                       return 0;
+               }
+
                mdd_read_lock(env, obj, MOR_TGT_CHILD);
                if (unlikely(mdd_is_dead_obj(obj))) {
                        mdd_read_unlock(env, obj);
@@ -1462,12 +2024,17 @@ static int object_is_client_visible(const struct lu_env *env,
                else if (IS_ERR(obj))
                        return PTR_ERR(obj);
 
-               /* XXX: need more processing for remote object in the future. */
-               if (!mdd_object_exists(obj) || mdd_object_remote(obj)) {
+               if (!mdd_object_exists(obj)) {
                        mdd_object_put(env, obj);
                        return 0;
                }
 
+               /* Currently, only client visible directory can be remote. */
+               if (mdd_object_remote(obj)) {
+                       mdd_object_put(env, obj);
+                       return 1;
+               }
+
                depth++;
        }
        return 0;
@@ -1490,17 +2057,17 @@ static void mdd_lfsck_unpack_ent(struct lu_dirent *ent)
 /* LFSCK wrap functions */
 
 static void mdd_lfsck_fail(const struct lu_env *env, struct md_lfsck *lfsck,
-                          bool oit, bool new_checked)
+                          bool new_checked)
 {
        struct lfsck_component *com;
 
        cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
-               com->lc_ops->lfsck_fail(env, com, oit, new_checked);
+               com->lc_ops->lfsck_fail(env, com, new_checked);
        }
 }
 
 static int mdd_lfsck_checkpoint(const struct lu_env *env,
-                               struct md_lfsck *lfsck, bool oit)
+                               struct md_lfsck *lfsck)
 {
        struct lfsck_component *com;
        int                     rc;
@@ -1509,7 +2076,7 @@ static int mdd_lfsck_checkpoint(const struct lu_env *env,
                                    lfsck->ml_time_next_checkpoint)))
                return 0;
 
-       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, oit, !oit);
+       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false);
        cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
                rc = com->lc_ops->lfsck_checkpoint(env, com, false);
                if (rc != 0)
@@ -1539,6 +2106,7 @@ static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck)
        LASSERT(lfsck->ml_obj_dir == NULL);
        LASSERT(lfsck->ml_di_dir == NULL);
 
+       lfsck->ml_current_oit_processed = 0;
        cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
                com->lc_new_checked = 0;
                if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
@@ -1557,12 +2125,19 @@ static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck)
        /* Init otable-based iterator. */
        if (pos == NULL) {
                rc = iops->load(env, lfsck->ml_di_oit, 0);
-               GOTO(out, rc = (rc >= 0 ? 0 : rc));
+               if (rc > 0) {
+                       lfsck->ml_oit_over = 1;
+                       rc = 0;
+               }
+
+               GOTO(out, rc);
        }
 
        rc = iops->load(env, lfsck->ml_di_oit, pos->lp_oit_cookie);
        if (rc < 0)
                GOTO(out, rc);
+       else if (rc > 0)
+               lfsck->ml_oit_over = 1;
 
        if (fid_is_zero(&pos->lp_dir_parent))
                GOTO(out, rc = 0);
@@ -1592,8 +2167,10 @@ static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck)
        if (IS_ERR(di))
                GOTO(out, rc = PTR_ERR(di));
 
+       LASSERT(pos->lp_dir_cookie < MDS_DIR_END_OFF);
+
        rc = iops->load(env, di, pos->lp_dir_cookie);
-       if (rc == 0)
+       if ((rc == 0) || (rc > 0 && pos->lp_dir_cookie > 0))
                rc = iops->next(env, di);
        else if (rc > 0)
                rc = 0;
@@ -1616,10 +2193,16 @@ out:
        if (obj != NULL)
                mdd_object_put(env, obj);
 
-       if (rc != 0)
-               return (rc > 0 ? 0 : rc);
+       if (rc < 0) {
+               cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan,
+                                            lc_link)
+                       com->lc_ops->lfsck_post(env, com, rc, true);
 
-       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false, false);
+               return rc;
+       }
+
+       rc = 0;
+       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true);
        cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
                rc = com->lc_ops->lfsck_checkpoint(env, com, true);
                if (rc != 0)
@@ -1654,7 +2237,7 @@ static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck,
            cfs_list_empty(&lfsck->ml_list_dir))
               RETURN(0);
 
-       rc = object_is_client_visible(env, mdd_lfsck2mdd(lfsck), obj);
+       rc = object_needs_lfsck(env, mdd_lfsck2mdd(lfsck), obj);
        if (rc <= 0)
                GOTO(out, rc);
 
@@ -1692,7 +2275,7 @@ static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck,
 
 out:
        if (rc < 0)
-               mdd_lfsck_fail(env, lfsck, false, false);
+               mdd_lfsck_fail(env, lfsck, false);
        return (rc > 0 ? 0 : rc);
 }
 
@@ -1717,9 +2300,9 @@ static int mdd_lfsck_post(const struct lu_env *env, struct md_lfsck *lfsck,
        struct lfsck_component *next;
        int                     rc;
 
-       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true, true);
+       mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false);
        cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
-               rc = com->lc_ops->lfsck_post(env, com, result);
+               rc = com->lc_ops->lfsck_post(env, com, result, false);
                if (rc != 0)
                        return rc;
        }
@@ -1784,7 +2367,7 @@ static int mdd_lfsck_dir_engine(const struct lu_env *env,
                rc = iops->rec(env, di, (struct dt_rec *)ent,
                               lfsck->ml_args_dir);
                if (rc != 0) {
-                       mdd_lfsck_fail(env, lfsck, false, true);
+                       mdd_lfsck_fail(env, lfsck, true);
                        if (bk->lb_param & LPF_FAILOUT)
                                RETURN(rc);
                        else
@@ -1800,7 +2383,7 @@ static int mdd_lfsck_dir_engine(const struct lu_env *env,
                if (child == NULL) {
                        goto checkpoint;
                } else if (IS_ERR(child)) {
-                       mdd_lfsck_fail(env, lfsck, false, true);
+                       mdd_lfsck_fail(env, lfsck, true);
                        if (bk->lb_param & LPF_FAILOUT)
                                RETURN(PTR_ERR(child));
                        else
@@ -1815,7 +2398,7 @@ static int mdd_lfsck_dir_engine(const struct lu_env *env,
                        RETURN(rc);
 
 checkpoint:
-               rc = mdd_lfsck_checkpoint(env, lfsck, false);
+               rc = mdd_lfsck_checkpoint(env, lfsck);
                if (rc != 0 && bk->lb_param & LPF_FAILOUT)
                        RETURN(rc);
 
@@ -1877,10 +2460,14 @@ static int mdd_lfsck_oit_engine(const struct lu_env *env,
                                     &lwi);
                }
 
+               if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
+                       RETURN(0);
+
+               lfsck->ml_current_oit_processed = 1;
                lfsck->ml_new_scanned++;
                rc = iops->rec(env, di, (struct dt_rec *)fid, 0);
                if (rc != 0) {
-                       mdd_lfsck_fail(env, lfsck, true, true);
+                       mdd_lfsck_fail(env, lfsck, true);
                        if (bk->lb_param & LPF_FAILOUT)
                                RETURN(rc);
                        else
@@ -1891,7 +2478,7 @@ static int mdd_lfsck_oit_engine(const struct lu_env *env,
                if (target == NULL) {
                        goto checkpoint;
                } else if (IS_ERR(target)) {
-                       mdd_lfsck_fail(env, lfsck, true, true);
+                       mdd_lfsck_fail(env, lfsck, true);
                        if (bk->lb_param & LPF_FAILOUT)
                                RETURN(PTR_ERR(target));
                        else
@@ -1908,7 +2495,7 @@ static int mdd_lfsck_oit_engine(const struct lu_env *env,
                        RETURN(rc);
 
 checkpoint:
-               rc = mdd_lfsck_checkpoint(env, lfsck, true);
+               rc = mdd_lfsck_checkpoint(env, lfsck);
                if (rc != 0 && bk->lb_param & LPF_FAILOUT)
                        RETURN(rc);
 
@@ -1923,8 +2510,10 @@ checkpoint:
                }
 
                rc = iops->next(env, di);
-               if (rc > 0)
+               if (unlikely(rc > 0))
                        lfsck->ml_oit_over = 1;
+               else if (likely(rc == 0))
+                       lfsck->ml_current_oit_processed = 0;
 
                if (unlikely(!thread_is_running(thread)))
                        RETURN(0);
@@ -1997,7 +2586,8 @@ static int mdd_lfsck_main(void *args)
        if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan))
                oit_iops->put(&env, oit_di);
 
-       rc = mdd_lfsck_post(&env, lfsck, rc);
+       if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
+               rc = mdd_lfsck_post(&env, lfsck, rc);
        if (lfsck->ml_di_dir != NULL)
                mdd_lfsck_close_dir(&env, lfsck);
 
@@ -2076,7 +2666,9 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
                RETURN(-ENOTSUPP);
 
        /* start == NULL means auto trigger paused LFSCK. */
-       if (start == NULL && cfs_list_empty(&lfsck->ml_list_scan))
+       if ((start == NULL) &&
+           (cfs_list_empty(&lfsck->ml_list_scan) ||
+            OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
                RETURN(0);
 
        mutex_lock(&lfsck->ml_mutex);
@@ -2157,7 +2749,8 @@ int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
                cfs_list_for_each_entry_safe(com, next,
                                             &lfsck->ml_list_scan, lc_link) {
                        if (!(com->lc_type & start->ls_active)) {
-                               rc = com->lc_ops->lfsck_post(env, com, 0);
+                               rc = com->lc_ops->lfsck_post(env, com, 0,
+                                                            false);
                                if (rc != 0)
                                        GOTO(out, rc);
                        }
@@ -2229,6 +2822,9 @@ int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck,
        struct l_wait_info    lwi    = { 0 };
        ENTRY;
 
+       if (!lfsck->ml_initialized)
+               RETURN(0);
+
        mutex_lock(&lfsck->ml_mutex);
        spin_lock(&lfsck->ml_lock);
        if (thread_is_init(thread) || thread_is_stopped(thread)) {
@@ -2262,9 +2858,11 @@ static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE,
 
 int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
 {
-       struct md_lfsck  *lfsck = &mdd->mdd_lfsck;
-       struct dt_object *obj;
-       int               rc;
+       struct md_lfsck         *lfsck = &mdd->mdd_lfsck;
+       struct dt_object        *obj;
+       struct lu_fid            fid;
+       int                      rc;
+
        ENTRY;
 
        LASSERT(!lfsck->ml_initialized);
@@ -2286,27 +2884,40 @@ int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
        rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
        if (rc != 0) {
                if (rc == -ENOTSUPP)
-                       rc = 0;
-
-               RETURN(rc);
+                       RETURN(0);
+               GOTO(out, rc);
        }
 
-       obj = dt_store_open(env, mdd->mdd_bottom, "", lfsck_bookmark_name,
-                           &mdd_env_info(env)->mti_fid);
+       /* LFSCK bookmark */
+       fid_zero(&fid);
+       rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid,
+                                  lfsck_bookmark_name,
+                                  S_IFREG | S_IRUGO | S_IWUSR, &fid);
+       if (rc < 0)
+               GOTO(out, rc);
+
+       obj = dt_locate(env, mdd->mdd_bottom, &fid);
        if (IS_ERR(obj))
-               RETURN(PTR_ERR(obj));
+               GOTO(out, rc = PTR_ERR(obj));
 
+       LASSERT(lu_object_exists(&obj->do_lu));
        lfsck->ml_bookmark_obj = obj;
+
        rc = mdd_lfsck_bookmark_load(env, lfsck);
        if (rc == -ENODATA)
                rc = mdd_lfsck_bookmark_init(env, lfsck);
        if (rc != 0)
-               RETURN(rc);
+               GOTO(out, rc);
 
        rc = mdd_lfsck_namespace_setup(env, lfsck);
+       if (rc < 0)
+               GOTO(out, rc);
        /* XXX: LFSCK components initialization to be added here. */
-
-       RETURN(rc);
+       RETURN(0);
+out:
+       lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
+       lfsck->ml_obj_oit = NULL;
+       return 0;
 }
 
 void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)