Whamcloud - gitweb
LU-4788 lfsck: replace cfs_list_t with list_head
[fs/lustre-release.git] / lustre / lfsck / lfsck_lib.c
index de7b849..566a99e 100644 (file)
@@ -58,7 +58,7 @@ static void lfsck_key_fini(const struct lu_context *ctx,
 LU_CONTEXT_KEY_DEFINE(lfsck, LCT_MD_THREAD | LCT_DT_THREAD);
 LU_KEY_INIT_GENERIC(lfsck);
 
-static CFS_LIST_HEAD(lfsck_instance_list);
+static struct list_head lfsck_instance_list;
 static struct list_head lfsck_ost_orphan_list;
 static struct list_head lfsck_mdt_orphan_list;
 static DEFINE_SPINLOCK(lfsck_instance_lock);
@@ -92,6 +92,9 @@ const char *lfsck_param_names[] = {
        "failout",
        "dryrun",
        "all_targets",
+       "broadcast",
+       "orphan",
+       "create_ostobj",
        NULL
 };
 
@@ -241,11 +244,9 @@ static int lfsck_add_target_from_orphan(const struct lu_env *env,
 again:
        spin_lock(&lfsck_instance_lock);
        list_for_each_entry_safe(ltd, next, head, ltd_orphan_list) {
-               if (ltd->ltd_key == lfsck->li_bottom) {
-                       list_del_init(&ltd->ltd_orphan_list);
-                       list_add_tail(&ltd->ltd_orphan_list,
-                                     &ltds->ltd_orphan);
-               }
+               if (ltd->ltd_key == lfsck->li_bottom)
+                       list_move_tail(&ltd->ltd_orphan_list,
+                                      &ltds->ltd_orphan);
        }
        spin_unlock(&lfsck_instance_lock);
 
@@ -276,11 +277,12 @@ again:
 }
 
 static inline struct lfsck_component *
-__lfsck_component_find(struct lfsck_instance *lfsck, __u16 type, cfs_list_t *list)
+__lfsck_component_find(struct lfsck_instance *lfsck, __u16 type,
+                      struct list_head *list)
 {
        struct lfsck_component *com;
 
-       cfs_list_for_each_entry(com, list, lc_link) {
+       list_for_each_entry(com, list, lc_link) {
                if (com->lc_type == type)
                        return com;
        }
@@ -314,19 +316,537 @@ unlock:
 void lfsck_component_cleanup(const struct lu_env *env,
                             struct lfsck_component *com)
 {
-       if (!cfs_list_empty(&com->lc_link))
-               cfs_list_del_init(&com->lc_link);
-       if (!cfs_list_empty(&com->lc_link_dir))
-               cfs_list_del_init(&com->lc_link_dir);
+       if (!list_empty(&com->lc_link))
+               list_del_init(&com->lc_link);
+       if (!list_empty(&com->lc_link_dir))
+               list_del_init(&com->lc_link_dir);
 
        lfsck_component_put(env, com);
 }
 
+int lfsck_fid_alloc(const struct lu_env *env, struct lfsck_instance *lfsck,
+                   struct lu_fid *fid, bool locked)
+{
+       struct lfsck_bookmark   *bk = &lfsck->li_bookmark_ram;
+       int                      rc = 0;
+       ENTRY;
+
+       if (!locked)
+               mutex_lock(&lfsck->li_mutex);
+
+       rc = seq_client_alloc_fid(env, lfsck->li_seq, fid);
+       if (rc >= 0) {
+               bk->lb_last_fid = *fid;
+               /* We do not care about whether the subsequent sub-operations
+                * failed or not. The worst case is that one FID is lost that
+                * is not a big issue for the LFSCK since it is relative rare
+                * for LFSCK create. */
+               rc = lfsck_bookmark_store(env, lfsck);
+       }
+
+       if (!locked)
+               mutex_unlock(&lfsck->li_mutex);
+
+       RETURN(rc);
+}
+
+static const char dot[] = ".";
+static const char dotdot[] = "..";
+static const char dotlustre[] = ".lustre";
+static const char lostfound[] = "lost+found";
+
+static int lfsck_create_lpf_local(const struct lu_env *env,
+                                 struct lfsck_instance *lfsck,
+                                 struct dt_object *parent,
+                                 struct dt_object *child,
+                                 struct lu_attr *la,
+                                 struct dt_object_format *dof,
+                                 const char *name)
+{
+       struct dt_insert_rec    *rec    = &lfsck_env_info(env)->lti_dt_rec;
+       struct dt_device        *dev    = lfsck->li_bottom;
+       struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
+       struct dt_object        *bk_obj = lfsck->li_bookmark_obj;
+       const struct lu_fid     *cfid   = lfsck_dto2fid(child);
+       struct thandle          *th     = NULL;
+       struct linkea_data       ldata  = { 0 };
+       struct lu_buf            linkea_buf;
+       const struct lu_name    *cname;
+       loff_t                   pos    = 0;
+       int                      len    = sizeof(struct lfsck_bookmark);
+       int                      rc;
+       ENTRY;
+
+       rc = linkea_data_new(&ldata,
+                            &lfsck_env_info(env)->lti_linkea_buf);
+       if (rc != 0)
+               RETURN(rc);
+
+       cname = lfsck_name_get_const(env, name, strlen(name));
+       rc = linkea_add_buf(&ldata, cname, lfsck_dto2fid(parent));
+       if (rc != 0)
+               RETURN(rc);
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       /* 1a. create child */
+       rc = dt_declare_create(env, child, la, NULL, dof, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 2a. increase child nlink */
+       rc = dt_declare_ref_add(env, child, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 3a. insert linkEA for child */
+       linkea_buf.lb_buf = ldata.ld_buf->lb_buf;
+       linkea_buf.lb_len = ldata.ld_leh->leh_len;
+       rc = dt_declare_xattr_set(env, child, &linkea_buf,
+                                 XATTR_NAME_LINK, 0, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 4a. insert name into parent dir */
+       rec->rec_type = S_IFDIR;
+       rec->rec_fid = cfid;
+       rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
+                              (const struct dt_key *)name, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 5a. increase parent nlink */
+       rc = dt_declare_ref_add(env, parent, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 6a. update bookmark */
+       rc = dt_declare_record_write(env, bk_obj,
+                                    lfsck_buf_get(env, bk, len), 0, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, child, 0);
+       /* 1b.1. create child */
+       rc = dt_create(env, child, la, NULL, dof, th);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       if (unlikely(!dt_try_as_dir(env, child)))
+               GOTO(unlock, rc = -ENOTDIR);
+
+       /* 1b.2. insert dot into child dir */
+       rec->rec_fid = cfid;
+       rc = dt_insert(env, child, (const struct dt_rec *)rec,
+                      (const struct dt_key *)dot, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 1b.3. insert dotdot into child dir */
+       rec->rec_fid = &LU_LPF_FID;
+       rc = dt_insert(env, child, (const struct dt_rec *)rec,
+                      (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 2b. increase child nlink */
+       rc = dt_ref_add(env, child, th);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 3b. insert linkEA for child. */
+       rc = dt_xattr_set(env, child, &linkea_buf,
+                         XATTR_NAME_LINK, 0, th, BYPASS_CAPA);
+       dt_write_unlock(env, child);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 4b. insert name into parent dir */
+       rec->rec_fid = cfid;
+       rc = dt_insert(env, parent, (const struct dt_rec *)rec,
+                      (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, parent, 0);
+       /* 5b. increase parent nlink */
+       rc = dt_ref_add(env, parent, th);
+       dt_write_unlock(env, parent);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       bk->lb_lpf_fid = *cfid;
+       lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk);
+
+       /* 6b. update bookmark */
+       rc = dt_record_write(env, bk_obj,
+                            lfsck_buf_get(env, bk, len), &pos, th);
+
+       GOTO(stop, rc);
+
+unlock:
+       dt_write_unlock(env, child);
+
+stop:
+       dt_trans_stop(env, dev, th);
+
+       return rc;
+}
+
+static int lfsck_create_lpf_remote(const struct lu_env *env,
+                                  struct lfsck_instance *lfsck,
+                                  struct dt_object *parent,
+                                  struct dt_object *child,
+                                  struct lu_attr *la,
+                                  struct dt_object_format *dof,
+                                  const char *name)
+{
+       struct dt_insert_rec    *rec    = &lfsck_env_info(env)->lti_dt_rec;
+       struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
+       struct dt_object        *bk_obj = lfsck->li_bookmark_obj;
+       const struct lu_fid     *cfid   = lfsck_dto2fid(child);
+       struct thandle          *th     = NULL;
+       struct linkea_data       ldata  = { 0 };
+       struct lu_buf            linkea_buf;
+       const struct lu_name    *cname;
+       struct dt_device        *dev;
+       loff_t                   pos    = 0;
+       int                      len    = sizeof(struct lfsck_bookmark);
+       int                      rc;
+       ENTRY;
+
+       rc = linkea_data_new(&ldata,
+                            &lfsck_env_info(env)->lti_linkea_buf);
+       if (rc != 0)
+               RETURN(rc);
+
+       cname = lfsck_name_get_const(env, name, strlen(name));
+       rc = linkea_add_buf(&ldata, cname, lfsck_dto2fid(parent));
+       if (rc != 0)
+               RETURN(rc);
+
+       /* Create .lustre/lost+found/MDTxxxx. */
+
+       /* XXX: Currently, cross-MDT create operation needs to create the child
+        *      object firstly, then insert name into the parent directory. For
+        *      this case, the child object resides on current MDT (local), but
+        *      the parent ".lustre/lost+found" may be on remote MDT. It is not
+        *      easy to contain all the sub-modifications orderly within single
+        *      transaction.
+        *
+        *      To avoid more inconsistency, we split the create operation into
+        *      two transactions:
+        *
+        *      1) create the child and update the lfsck_bookmark::lb_lpf_fid
+        *         locally.
+        *      2) insert the name "MDTXXXX" in the parent ".lustre/lost+found"
+        *         remotely.
+        *
+        *      If 1) done, but 2) failed, then go ahead, the LFSCK will try to
+        *      repair such inconsistency when LFSCK run next time. */
+
+       /* Transaction I: locally */
+
+       dev = lfsck->li_bottom;
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       /* 1a. create child */
+       rc = dt_declare_create(env, child, la, NULL, dof, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 2a. increase child nlink */
+       rc = dt_declare_ref_add(env, child, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 3a. insert linkEA for child */
+       linkea_buf.lb_buf = ldata.ld_buf->lb_buf;
+       linkea_buf.lb_len = ldata.ld_leh->leh_len;
+       rc = dt_declare_xattr_set(env, child, &linkea_buf,
+                                 XATTR_NAME_LINK, 0, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 4a. update bookmark */
+       rc = dt_declare_record_write(env, bk_obj,
+                                    lfsck_buf_get(env, bk, len), 0, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, child, 0);
+       /* 1b.1. create child */
+       rc = dt_create(env, child, la, NULL, dof, th);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       if (unlikely(!dt_try_as_dir(env, child)))
+               GOTO(unlock, rc = -ENOTDIR);
+
+       /* 1b.2. insert dot into child dir */
+       rec->rec_type = S_IFDIR;
+       rec->rec_fid = cfid;
+       rc = dt_insert(env, child, (const struct dt_rec *)rec,
+                      (const struct dt_key *)dot, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 1b.3. insert dotdot into child dir */
+       rec->rec_fid = &LU_LPF_FID;
+       rc = dt_insert(env, child, (const struct dt_rec *)rec,
+                      (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 2b. increase child nlink */
+       rc = dt_ref_add(env, child, th);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 3b. insert linkEA for child */
+       rc = dt_xattr_set(env, child, &linkea_buf,
+                         XATTR_NAME_LINK, 0, th, BYPASS_CAPA);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       bk->lb_lpf_fid = *cfid;
+       lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk);
+
+       /* 4b. update bookmark */
+       rc = dt_record_write(env, bk_obj,
+                            lfsck_buf_get(env, bk, len), &pos, th);
+
+       dt_write_unlock(env, child);
+       dt_trans_stop(env, dev, th);
+       if (rc != 0)
+               RETURN(rc);
+
+       /* Transaction II: remotely */
+
+       dev = lfsck->li_next;
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       /* 5a. insert name into parent dir */
+       rec->rec_fid = cfid;
+       rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
+                              (const struct dt_key *)name, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 6a. increase parent nlink */
+       rc = dt_declare_ref_add(env, parent, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 5b. insert name into parent dir */
+       rc = dt_insert(env, parent, (const struct dt_rec *)rec,
+                      (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, parent, 0);
+       /* 6b. increase parent nlink */
+       rc = dt_ref_add(env, parent, th);
+       dt_write_unlock(env, parent);
+
+       GOTO(stop, rc);
+
+unlock:
+       dt_write_unlock(env, child);
+stop:
+       dt_trans_stop(env, dev, th);
+
+       if (rc != 0 && dev == lfsck->li_next)
+               CDEBUG(D_LFSCK, "%s: partially created the object "DFID
+                      "for orphans, but failed to insert the name %s "
+                      "to the .lustre/lost+found/. Such inconsistency "
+                      "will be repaired when LFSCK run next time: rc = %d\n",
+                      lfsck_lfsck2name(lfsck), PFID(cfid), name, rc);
+
+       return rc;
+}
+
+/* Do NOT create .lustre/lost+found/MDTxxxx when register the lfsck instance,
+ * because the MDT0 maybe not reaady for sequence allocation yet. We do that
+ * only when it is required, such as orphan OST-objects repairing. */
+int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck)
+{
+       struct lfsck_bookmark    *bk    = &lfsck->li_bookmark_ram;
+       struct lfsck_thread_info *info  = lfsck_env_info(env);
+       struct lu_fid            *cfid  = &info->lti_fid2;
+       struct lu_attr           *la    = &info->lti_la;
+       struct dt_object_format  *dof   = &info->lti_dof;
+       struct dt_object         *parent = NULL;
+       struct dt_object         *child = NULL;
+       char                      name[8];
+       int                       node  = lfsck_dev_idx(lfsck->li_bottom);
+       int                       rc    = 0;
+       ENTRY;
+
+       LASSERT(lfsck->li_master);
+
+       sprintf(name, "MDT%04x", node);
+       if (node == 0) {
+               parent = lfsck_object_find_by_dev(env, lfsck->li_bottom,
+                                                 &LU_LPF_FID);
+       } else {
+               struct lfsck_tgt_desc *ltd;
+
+               ltd = lfsck_tgt_get(&lfsck->li_mdt_descs, 0);
+               if (unlikely(ltd == NULL))
+                       RETURN(-ENXIO);
+
+               parent = lfsck_object_find_by_dev(env, ltd->ltd_tgt,
+                                                 &LU_LPF_FID);
+               lfsck_tgt_put(ltd);
+       }
+       if (IS_ERR(parent))
+               RETURN(PTR_ERR(parent));
+
+       if (unlikely(!dt_try_as_dir(env, parent)))
+               GOTO(out, rc = -ENOTDIR);
+
+       mutex_lock(&lfsck->li_mutex);
+       if (lfsck->li_lpf_obj != NULL)
+               GOTO(unlock, rc = 0);
+
+       if (fid_is_zero(&bk->lb_lpf_fid)) {
+               /* There is corner case that: in former LFSCK scanning we have
+                * created the .lustre/lost+found/MDTxxxx but failed to update
+                * the lfsck_bookmark::lb_lpf_fid successfully. So need lookup
+                * it from MDT0 firstly. */
+               rc = dt_lookup(env, parent, (struct dt_rec *)cfid,
+                              (const struct dt_key *)name, BYPASS_CAPA);
+               if (rc != 0 && rc != -ENOENT)
+                       GOTO(unlock, rc);
+
+               if (rc == 0) {
+                       bk->lb_lpf_fid = *cfid;
+                       rc = lfsck_bookmark_store(env, lfsck);
+               } else {
+                       rc = lfsck_fid_alloc(env, lfsck, cfid, true);
+               }
+               if (rc != 0)
+                       GOTO(unlock, rc);
+       } else {
+               *cfid = bk->lb_lpf_fid;
+       }
+
+       child = lfsck_object_find_by_dev(env, lfsck->li_bottom, cfid);
+       if (IS_ERR(child))
+               GOTO(unlock, rc = PTR_ERR(child));
+
+       if (dt_object_exists(child) != 0) {
+               if (unlikely(!dt_try_as_dir(env, child)))
+                       rc = -ENOTDIR;
+               else
+                       lfsck->li_lpf_obj = child;
+
+               GOTO(unlock, rc);
+       }
+
+       memset(la, 0, sizeof(*la));
+       la->la_atime = la->la_mtime = la->la_ctime = cfs_time_current_sec();
+       la->la_mode = S_IFDIR | S_IRWXU;
+       la->la_valid = LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE |
+                      LA_UID | LA_GID;
+       memset(dof, 0, sizeof(*dof));
+       dof->dof_type = dt_mode_to_dft(S_IFDIR);
+
+       if (node == 0)
+               rc = lfsck_create_lpf_local(env, lfsck, parent, child, la,
+                                           dof, name);
+       else
+               rc = lfsck_create_lpf_remote(env, lfsck, parent, child, la,
+                                            dof, name);
+       if (rc == 0)
+               lfsck->li_lpf_obj = child;
+
+       GOTO(unlock, rc);
+
+unlock:
+       mutex_unlock(&lfsck->li_mutex);
+       if (rc != 0 && child != NULL && !IS_ERR(child))
+               lu_object_put(env, &child->do_lu);
+out:
+       if (parent != NULL && !IS_ERR(parent))
+               lu_object_put(env, &parent->do_lu);
+
+       return rc;
+}
+
+static int lfsck_fid_init(struct lfsck_instance *lfsck)
+{
+       struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
+       struct seq_server_site  *ss;
+       char                    *prefix;
+       int                      rc     = 0;
+       ENTRY;
+
+       ss = lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site);
+       if (unlikely(ss == NULL))
+               RETURN(-ENXIO);
+
+       OBD_ALLOC_PTR(lfsck->li_seq);
+       if (lfsck->li_seq == NULL)
+               RETURN(-ENOMEM);
+
+       OBD_ALLOC(prefix, MAX_OBD_NAME + 7);
+       if (prefix == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       snprintf(prefix, MAX_OBD_NAME + 7, "lfsck-%s", lfsck_lfsck2name(lfsck));
+       rc = seq_client_init(lfsck->li_seq, NULL, LUSTRE_SEQ_METADATA, prefix,
+                            ss->ss_server_seq);
+       OBD_FREE(prefix, MAX_OBD_NAME + 7);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       if (fid_is_sane(&bk->lb_last_fid))
+               lfsck->li_seq->lcs_fid = bk->lb_last_fid;
+
+       RETURN(0);
+
+out:
+       OBD_FREE_PTR(lfsck->li_seq);
+       lfsck->li_seq = NULL;
+
+       return rc;
+}
+
+static void lfsck_fid_fini(struct lfsck_instance *lfsck)
+{
+       if (lfsck->li_seq != NULL) {
+               seq_client_fini(lfsck->li_seq);
+               OBD_FREE_PTR(lfsck->li_seq);
+               lfsck->li_seq = NULL;
+       }
+}
+
 void lfsck_instance_cleanup(const struct lu_env *env,
                            struct lfsck_instance *lfsck)
 {
        struct ptlrpc_thread    *thread = &lfsck->li_thread;
        struct lfsck_component  *com;
+       struct lfsck_component  *next;
        ENTRY;
 
        LASSERT(list_empty(&lfsck->li_link));
@@ -339,26 +859,18 @@ void lfsck_instance_cleanup(const struct lu_env *env,
 
        LASSERT(lfsck->li_obj_dir == NULL);
 
-       while (!cfs_list_empty(&lfsck->li_list_scan)) {
-               com = cfs_list_entry(lfsck->li_list_scan.next,
-                                    struct lfsck_component,
-                                    lc_link);
+       list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
                lfsck_component_cleanup(env, com);
        }
 
-       LASSERT(cfs_list_empty(&lfsck->li_list_dir));
+       LASSERT(list_empty(&lfsck->li_list_dir));
 
-       while (!cfs_list_empty(&lfsck->li_list_double_scan)) {
-               com = cfs_list_entry(lfsck->li_list_double_scan.next,
-                                    struct lfsck_component,
-                                    lc_link);
+       list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
+                                lc_link) {
                lfsck_component_cleanup(env, com);
        }
 
-       while (!cfs_list_empty(&lfsck->li_list_idle)) {
-               com = cfs_list_entry(lfsck->li_list_idle.next,
-                                    struct lfsck_component,
-                                    lc_link);
+       list_for_each_entry_safe(com, next, &lfsck->li_list_idle, lc_link) {
                lfsck_component_cleanup(env, com);
        }
 
@@ -370,11 +882,18 @@ void lfsck_instance_cleanup(const struct lu_env *env,
                lfsck->li_bookmark_obj = NULL;
        }
 
+       if (lfsck->li_lpf_obj != NULL) {
+               lu_object_put(env, &lfsck->li_lpf_obj->do_lu);
+               lfsck->li_lpf_obj = NULL;
+       }
+
        if (lfsck->li_los != NULL) {
                local_oid_storage_fini(env, lfsck->li_los);
                lfsck->li_los = NULL;
        }
 
+       lfsck_fid_fini(lfsck);
+
        OBD_FREE_PTR(lfsck);
 }
 
@@ -383,7 +902,7 @@ __lfsck_instance_find(struct dt_device *key, bool ref, bool unlink)
 {
        struct lfsck_instance *lfsck;
 
-       cfs_list_for_each_entry(lfsck, &lfsck_instance_list, li_link) {
+       list_for_each_entry(lfsck, &lfsck_instance_list, li_link) {
                if (lfsck->li_bottom == key) {
                        if (ref)
                                lfsck_instance_get(lfsck);
@@ -414,89 +933,71 @@ static inline int lfsck_instance_add(struct lfsck_instance *lfsck)
        struct lfsck_instance *tmp;
 
        spin_lock(&lfsck_instance_lock);
-       cfs_list_for_each_entry(tmp, &lfsck_instance_list, li_link) {
+       list_for_each_entry(tmp, &lfsck_instance_list, li_link) {
                if (lfsck->li_bottom == tmp->li_bottom) {
                        spin_unlock(&lfsck_instance_lock);
                        return -EEXIST;
                }
        }
 
-       cfs_list_add_tail(&lfsck->li_link, &lfsck_instance_list);
+       list_add_tail(&lfsck->li_link, &lfsck_instance_list);
        spin_unlock(&lfsck_instance_lock);
        return 0;
 }
 
-int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
+int lfsck_bits_dump(struct seq_file *m, int bits, const char *names[],
                    const char *prefix)
 {
-       int save = *len;
        int flag;
-       int rc;
        int i;
+       bool newline = (bits != 0 ? false : true);
 
-       rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
-       if (rc <= 0)
-               return -ENOSPC;
+       seq_printf(m, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
 
-       *buf += rc;
-       *len -= rc;
        for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
                if (flag & bits) {
                        bits &= ~flag;
                        if (names[i] != NULL) {
-                               rc = snprintf(*buf, *len, "%s%c", names[i],
-                                             bits != 0 ? ',' : '\n');
-                               if (rc <= 0)
-                                       return -ENOSPC;
+                               if (bits == 0)
+                                       newline = true;
 
-                               *buf += rc;
-                               *len -= rc;
+                               seq_printf(m, "%s%c", names[i],
+                                          newline ? '\n' : ',');
                        }
                }
        }
-       return save - *len;
+
+       if (!newline)
+               seq_printf(m, "\n");
+       return 0;
 }
 
-int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix)
+int lfsck_time_dump(struct seq_file *m, __u64 time, const char *prefix)
 {
-       int rc;
-
        if (time != 0)
-               rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix,
-                             cfs_time_current_sec() - time);
+               seq_printf(m, "%s: "LPU64" seconds\n", prefix,
+                         cfs_time_current_sec() - time);
        else
-               rc = snprintf(*buf, *len, "%s: N/A\n", prefix);
-       if (rc <= 0)
-               return -ENOSPC;
-
-       *buf += rc;
-       *len -= rc;
-       return rc;
+               seq_printf(m, "%s: N/A\n", prefix);
+       return 0;
 }
 
-int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
+int lfsck_pos_dump(struct seq_file *m, struct lfsck_position *pos,
                   const char *prefix)
 {
-       int rc;
-
        if (fid_is_zero(&pos->lp_dir_parent)) {
                if (pos->lp_oit_cookie == 0)
-                       rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n",
-                                     prefix);
+                       seq_printf(m, "%s: N/A, N/A, N/A\n",
+                                  prefix);
                else
-                       rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n",
-                                     prefix, pos->lp_oit_cookie);
+                       seq_printf(m, "%s: "LPU64", N/A, N/A\n",
+                                  prefix, pos->lp_oit_cookie);
        } else {
-               rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n",
-                             prefix, pos->lp_oit_cookie,
-                             PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
+               seq_printf(m, "%s: "LPU64", "DFID", "LPX64"\n",
+                          prefix, pos->lp_oit_cookie,
+                          PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
        }
-       if (rc <= 0)
-               return -ENOSPC;
-
-       *buf += rc;
-       *len -= rc;
-       return rc;
+       return 0;
 }
 
 void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck,
@@ -533,9 +1034,10 @@ void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck,
        }
 }
 
-static void __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
+bool __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
 {
-       lfsck->li_bookmark_ram.lb_speed_limit = limit;
+       bool dirty = false;
+
        if (limit != LFSCK_SPEED_NO_LIMIT) {
                if (limit > HZ) {
                        lfsck->li_sleep_rate = limit / HZ;
@@ -548,6 +1050,13 @@ static void __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
                lfsck->li_sleep_jif = 0;
                lfsck->li_sleep_rate = 0;
        }
+
+       if (lfsck->li_bookmark_ram.lb_speed_limit != limit) {
+               lfsck->li_bookmark_ram.lb_speed_limit = limit;
+               dirty = true;
+       }
+
+       return dirty;
 }
 
 void lfsck_control_speed(struct lfsck_instance *lfsck)
@@ -605,7 +1114,7 @@ static int lfsck_needs_scan_dir(const struct lu_env *env,
        int            rc;
 
        if (!lfsck->li_master || !S_ISDIR(lfsck_object_type(obj)) ||
-           cfs_list_empty(&lfsck->li_list_dir))
+           list_empty(&lfsck->li_list_dir))
               RETURN(0);
 
        while (1) {
@@ -620,8 +1129,8 @@ static int lfsck_needs_scan_dir(const struct lu_env *env,
                        return 1;
                }
 
-               /* .lustre doesn't contain "real" user objects, no need lfsck */
-               if (fid_is_dot_lustre(lfsck_dto2fid(obj))) {
+               /* No need to check .lustre and its children. */
+               if (fid_seq_is_dot_lustre(fid_seq(lfsck_dto2fid(obj)))) {
                        if (depth > 0)
                                lfsck_object_put(env, obj);
                        return 0;
@@ -671,10 +1180,16 @@ static int lfsck_needs_scan_dir(const struct lu_env *env,
                        return 0;
                }
 
-               /* Currently, only client visible directory can be remote. */
                if (dt_object_remote(obj)) {
+                       /* .lustre/lost+found/MDTxxx can be remote directory. */
+                       if (fid_seq_is_dot_lustre(fid_seq(lfsck_dto2fid(obj))))
+                               rc = 0;
+                       else
+                               /* Other remote directory should be client
+                                * visible and need to be checked. */
+                               rc = 1;
                        lfsck_object_put(env, obj);
-                       return 1;
+                       return rc;
                }
 
                depth++;
@@ -724,7 +1239,7 @@ void lfsck_fail(const struct lu_env *env, struct lfsck_instance *lfsck,
 {
        struct lfsck_component *com;
 
-       cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
                com->lc_ops->lfsck_fail(env, com, new_checked);
        }
 }
@@ -740,7 +1255,7 @@ int lfsck_checkpoint(const struct lu_env *env, struct lfsck_instance *lfsck)
                return 0;
 
        lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, false);
-       cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
                rc = com->lc_ops->lfsck_checkpoint(env, com, false);
                if (rc != 0)
                        rc1 = rc;
@@ -769,7 +1284,7 @@ int lfsck_prep(const struct lu_env *env, struct lfsck_instance *lfsck,
        LASSERT(lfsck->li_di_dir == NULL);
 
        lfsck->li_current_oit_processed = 0;
-       cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
+       list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
                com->lc_new_checked = 0;
                if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
                        com->lc_journal = 0;
@@ -853,8 +1368,8 @@ out:
                lfsck_object_put(env, obj);
 
        if (rc < 0) {
-               cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan,
-                                            lc_link)
+               list_for_each_entry_safe(com, next, &lfsck->li_list_scan,
+                                        lc_link)
                        com->lc_ops->lfsck_post(env, com, rc, true);
 
                return rc;
@@ -862,7 +1377,7 @@ out:
 
        rc = 0;
        lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, true);
-       cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
                rc = com->lc_ops->lfsck_checkpoint(env, com, true);
                if (rc != 0)
                        break;
@@ -885,7 +1400,7 @@ int lfsck_exec_oit(const struct lu_env *env, struct lfsck_instance *lfsck,
 
        LASSERT(lfsck->li_obj_dir == NULL);
 
-       cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
                rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
                if (rc != 0)
                        RETURN(rc);
@@ -935,7 +1450,7 @@ int lfsck_exec_dir(const struct lu_env *env, struct lfsck_instance *lfsck,
        struct lfsck_component *com;
        int                     rc;
 
-       cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
                rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
                if (rc != 0)
                        return rc;
@@ -952,7 +1467,7 @@ int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck,
        int                     rc1 = 0;
 
        lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, false);
-       cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
+       list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
                rc = com->lc_ops->lfsck_post(env, com, result, false);
                if (rc != 0)
                        rc1 = rc;
@@ -973,6 +1488,7 @@ static void lfsck_interpret(const struct lu_env *env,
        struct lfsck_async_interpret_args *laia = args;
        struct lfsck_component            *com;
 
+       LASSERT(laia->laia_com == NULL);
        LASSERT(laia->laia_shared);
 
        spin_lock(&lfsck->li_lock);
@@ -1000,8 +1516,7 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
        int                     rc  = 0;
        int                     rc1 = 0;
 
-       cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
-                                    lc_link) {
+       list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
                if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
                        com->lc_journal = 0;
 
@@ -1014,6 +1529,16 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
                     atomic_read(&lfsck->li_double_scan_count) == 0,
                     &lwi);
 
+       if (lfsck->li_status != LS_PAUSED &&
+           lfsck->li_status != LS_CO_PAUSED) {
+               list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
+                                        lc_link) {
+                       spin_lock(&lfsck->li_lock);
+                       list_move_tail(&com->lc_link, &lfsck->li_list_idle);
+                       spin_unlock(&lfsck->li_lock);
+               }
+       }
+
        return rc1 != 0 ? rc1 : rc;
 }
 
@@ -1034,7 +1559,7 @@ static int lfsck_stop_notify(const struct lu_env *env,
                                             &lfsck->li_list_double_scan);
        if (com != NULL)
                lfsck_component_get(com);
-       spin_lock(&lfsck->li_lock);
+       spin_unlock(&lfsck->li_lock);
 
        if (com != NULL) {
                if (com->lc_ops->lfsck_stop_notify != NULL) {
@@ -1068,12 +1593,21 @@ void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck)
                                 lc_link) {
                if (com->lc_ops->lfsck_quit != NULL)
                        com->lc_ops->lfsck_quit(env, com);
+
+               spin_lock(&lfsck->li_lock);
+               list_del_init(&com->lc_link_dir);
+               list_move_tail(&com->lc_link, &lfsck->li_list_idle);
+               spin_unlock(&lfsck->li_lock);
        }
 
        list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
                                 lc_link) {
                if (com->lc_ops->lfsck_quit != NULL)
                        com->lc_ops->lfsck_quit(env, com);
+
+               spin_lock(&lfsck->li_lock);
+               list_move_tail(&com->lc_link, &lfsck->li_list_idle);
+               spin_unlock(&lfsck->li_lock);
        }
 }
 
@@ -1106,9 +1640,6 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
        struct req_format                 *format;
        int                                rc;
 
-       if (!(exp_connect_flags(exp) & OBD_CONNECT_LFSCK))
-               return -EOPNOTSUPP;
-
        switch (request) {
        case LFSCK_NOTIFY:
                format = &RQF_LFSCK_NOTIFY;
@@ -1117,8 +1648,8 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
                format = &RQF_LFSCK_QUERY;
                break;
        default:
-               CERROR("%s: unknown async request: opc = %d\n",
-                      exp->exp_obd->obd_name, request);
+               CDEBUG(D_LFSCK, "%s: unknown async request %d: rc = %d\n",
+                      exp->exp_obd->obd_name, request, -EINVAL);
                return -EINVAL;
        }
 
@@ -1149,7 +1680,7 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
 
 /* external interfaces */
 
-int lfsck_get_speed(struct dt_device *key, void *buf, int len)
+int lfsck_get_speed(struct seq_file *m, struct dt_device *key)
 {
        struct lu_env           env;
        struct lfsck_instance  *lfsck;
@@ -1162,11 +1693,10 @@ int lfsck_get_speed(struct dt_device *key, void *buf, int len)
 
        lfsck = lfsck_instance_find(key, true, false);
        if (likely(lfsck != NULL)) {
-               rc = snprintf(buf, len, "%u\n",
-                             lfsck->li_bookmark_ram.lb_speed_limit);
+               seq_printf(m, "%u\n", lfsck->li_bookmark_ram.lb_speed_limit);
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1189,12 +1719,12 @@ int lfsck_set_speed(struct dt_device *key, int val)
        lfsck = lfsck_instance_find(key, true, false);
        if (likely(lfsck != NULL)) {
                mutex_lock(&lfsck->li_mutex);
-               __lfsck_set_speed(lfsck, val);
-               rc = lfsck_bookmark_store(&env, lfsck);
+               if (__lfsck_set_speed(lfsck, val))
+                       rc = lfsck_bookmark_store(&env, lfsck);
                mutex_unlock(&lfsck->li_mutex);
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1203,7 +1733,7 @@ int lfsck_set_speed(struct dt_device *key, int val)
 }
 EXPORT_SYMBOL(lfsck_set_speed);
 
-int lfsck_get_windows(struct dt_device *key, void *buf, int len)
+int lfsck_get_windows(struct seq_file *m, struct dt_device *key)
 {
        struct lu_env           env;
        struct lfsck_instance  *lfsck;
@@ -1216,11 +1746,10 @@ int lfsck_get_windows(struct dt_device *key, void *buf, int len)
 
        lfsck = lfsck_instance_find(key, true, false);
        if (likely(lfsck != NULL)) {
-               rc = snprintf(buf, len, "%u\n",
-                             lfsck->li_bookmark_ram.lb_async_windows);
+               seq_printf(m, "%u\n", lfsck->li_bookmark_ram.lb_async_windows);
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1243,12 +1772,12 @@ int lfsck_set_windows(struct dt_device *key, int val)
        lfsck = lfsck_instance_find(key, true, false);
        if (likely(lfsck != NULL)) {
                if (val > LFSCK_ASYNC_WIN_MAX) {
-                       CERROR("%s: Too large async windows size, which "
-                              "may cause memory issues. The valid range "
-                              "is [0 - %u]. If you do not want to restrict "
-                              "the windows size for async requests pipeline, "
-                              "just set it as 0.\n",
-                              lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX);
+                       CWARN("%s: Too large async window size, which "
+                             "may cause memory issues. The valid range "
+                             "is [0 - %u]. If you do not want to restrict "
+                             "the window size for async requests pipeline, "
+                             "just set it as 0.\n",
+                             lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX);
                        rc = -EINVAL;
                } else if (lfsck->li_bookmark_ram.lb_async_windows != val) {
                        mutex_lock(&lfsck->li_mutex);
@@ -1258,7 +1787,7 @@ int lfsck_set_windows(struct dt_device *key, int val)
                }
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1267,7 +1796,7 @@ int lfsck_set_windows(struct dt_device *key, int val)
 }
 EXPORT_SYMBOL(lfsck_set_windows);
 
-int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type)
+int lfsck_dump(struct seq_file *m, struct dt_device *key, enum lfsck_type type)
 {
        struct lu_env           env;
        struct lfsck_instance  *lfsck;
@@ -1283,7 +1812,7 @@ int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type)
        if (likely(lfsck != NULL)) {
                com = lfsck_component_find(lfsck, type);
                if (likely(com != NULL)) {
-                       rc = com->lc_ops->lfsck_dump(&env, com, buf, len);
+                       rc = com->lc_ops->lfsck_dump(&env, com, m);
                        lfsck_component_put(&env, com);
                } else {
                        rc = -ENOTSUPP;
@@ -1291,7 +1820,7 @@ int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type)
 
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1319,12 +1848,8 @@ static int lfsck_stop_all(const struct lu_env *env,
        LASSERT(stop->ls_flags & LPF_BROADCAST);
 
        set = ptlrpc_prep_set();
-       if (unlikely(set == NULL)) {
-               CERROR("%s: cannot allocate memory for stop LFSCK on "
-                      "all targets\n", lfsck_lfsck2name(lfsck));
-
+       if (unlikely(set == NULL))
                RETURN(-ENOMEM);
-       }
 
        memset(lr, 0, sizeof(*lr));
        lr->lr_event = LE_STOP;
@@ -1352,8 +1877,8 @@ static int lfsck_stop_all(const struct lu_env *env,
                if (rc != 0) {
                        lfsck_interpret(env, lfsck, NULL, laia, rc);
                        lfsck_tgt_put(ltd);
-                       CWARN("%s: cannot notify MDT %x for LFSCK stop: "
-                             "rc = %d\n", lfsck_lfsck2name(lfsck), idx, rc);
+                       CERROR("%s: cannot notify MDT %x for LFSCK stop: "
+                              "rc = %d\n", lfsck_lfsck2name(lfsck), idx, rc);
                        rc1 = rc;
                }
        }
@@ -1369,8 +1894,8 @@ static int lfsck_stop_all(const struct lu_env *env,
                rc = 0;
 
        if (rc != 0)
-               CWARN("%s: fail to stop LFSCK on some MDTs: rc = %d\n",
-                     lfsck_lfsck2name(lfsck), rc);
+               CERROR("%s: fail to stop LFSCK on some MDTs: rc = %d\n",
+                      lfsck_lfsck2name(lfsck), rc);
 
        RETURN(rc != 0 ? rc : rc1);
 }
@@ -1393,21 +1918,8 @@ static int lfsck_start_all(const struct lu_env *env,
        LASSERT(start->ls_flags & LPF_BROADCAST);
 
        set = ptlrpc_prep_set();
-       if (unlikely(set == NULL)) {
-               if (bk->lb_param & LPF_FAILOUT) {
-                       CERROR("%s: cannot allocate memory for start LFSCK on "
-                              "all targets, failout.\n",
-                              lfsck_lfsck2name(lfsck));
-
-                       RETURN(-ENOMEM);
-               } else {
-                       CWARN("%s: cannot allocate memory for start LFSCK on "
-                             "all targets, partly scan.\n",
-                             lfsck_lfsck2name(lfsck));
-
-                       RETURN(0);
-               }
-       }
+       if (unlikely(set == NULL))
+               RETURN(-ENOMEM);
 
        memset(lr, 0, sizeof(*lr));
        lr->lr_event = LE_START;
@@ -1439,17 +1951,10 @@ static int lfsck_start_all(const struct lu_env *env,
                if (rc != 0) {
                        lfsck_interpret(env, lfsck, NULL, laia, rc);
                        lfsck_tgt_put(ltd);
-                       if (bk->lb_param & LPF_FAILOUT) {
-                               CERROR("%s: cannot notify MDT %x for LFSCK "
-                                      "start, failout: rc = %d\n",
-                                      lfsck_lfsck2name(lfsck), idx, rc);
-                               break;
-                       } else {
-                               CWARN("%s: cannot notify MDT %x for LFSCK "
-                                     "start, partly scan: rc = %d\n",
-                                     lfsck_lfsck2name(lfsck), idx, rc);
-                               rc = 0;
-                       }
+                       CERROR("%s: cannot notify MDT %x for LFSCK "
+                              "start, failout: rc = %d\n",
+                              lfsck_lfsck2name(lfsck), idx, rc);
+                       break;
                }
        }
        up_read(&ltds->ltd_rw_sem);
@@ -1467,22 +1972,15 @@ static int lfsck_start_all(const struct lu_env *env,
                rc = laia->laia_result;
 
        if (rc != 0) {
-               if (bk->lb_param & LPF_FAILOUT) {
-                       struct lfsck_stop *stop = &info->lti_stop;
-
-                       CERROR("%s: cannot start LFSCK on some MDTs, "
-                              "stop all: rc = %d\n",
-                              lfsck_lfsck2name(lfsck), rc);
-                       if (rc != -EALREADY) {
-                               stop->ls_status = LS_FAILED;
-                               stop->ls_flags = LPF_ALL_TGT | LPF_BROADCAST;
-                               lfsck_stop_all(env, lfsck, stop);
-                       }
-               } else {
-                       CWARN("%s: cannot start LFSCK on some MDTs, "
-                             "partly scan: rc = %d\n",
-                             lfsck_lfsck2name(lfsck), rc);
-                       rc = 0;
+               struct lfsck_stop *stop = &info->lti_stop;
+
+               CERROR("%s: cannot start LFSCK on some MDTs, "
+                      "stop all: rc = %d\n",
+                      lfsck_lfsck2name(lfsck), rc);
+               if (rc != -EALREADY) {
+                       stop->ls_status = LS_FAILED;
+                       stop->ls_flags = LPF_ALL_TGT | LPF_BROADCAST;
+                       lfsck_stop_all(env, lfsck, stop);
                }
        }
 
@@ -1499,8 +1997,8 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        struct lfsck_component          *com;
        struct l_wait_info               lwi    = { 0 };
        struct lfsck_thread_args        *lta;
-       bool                             dirty  = false;
-       long                             rc     = 0;
+       struct task_struct              *task;
+       int                              rc     = 0;
        __u16                            valid  = 0;
        __u16                            flags  = 0;
        __u16                            type   = 1;
@@ -1508,7 +2006,7 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
 
        lfsck = lfsck_instance_find(key, true, false);
        if (unlikely(lfsck == NULL))
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
 
        /* System is not ready, try again later. */
        if (unlikely(lfsck->li_namespace == NULL))
@@ -1516,7 +2014,7 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
 
        /* start == NULL means auto trigger paused LFSCK. */
        if ((start == NULL) &&
-           (cfs_list_empty(&lfsck->li_list_scan) ||
+           (list_empty(&lfsck->li_list_scan) ||
             OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
                GOTO(put, rc = 0);
 
@@ -1526,6 +2024,11 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        spin_lock(&lfsck->li_lock);
        if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
                rc = -EALREADY;
+               if (unlikely(start == NULL)) {
+                       spin_unlock(&lfsck->li_lock);
+                       GOTO(out, rc);
+               }
+
                while (start->ls_active != 0) {
                        if (!(type & start->ls_active)) {
                                type <<= 1;
@@ -1573,78 +2076,6 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        }
 
        start->ls_version = bk->lb_version;
-       if (start->ls_valid & LSV_SPEED_LIMIT) {
-               __lfsck_set_speed(lfsck, start->ls_speed_limit);
-               dirty = true;
-       }
-
-       if (start->ls_valid & LSV_ASYNC_WINDOWS &&
-           bk->lb_async_windows != start->ls_async_windows) {
-               bk->lb_async_windows = start->ls_async_windows;
-               dirty = true;
-       }
-
-       if (start->ls_valid & LSV_ERROR_HANDLE) {
-               valid |= DOIV_ERROR_HANDLE;
-               if (start->ls_flags & LPF_FAILOUT)
-                       flags |= DOIF_FAILOUT;
-
-               if ((start->ls_flags & LPF_FAILOUT) &&
-                   !(bk->lb_param & LPF_FAILOUT)) {
-                       bk->lb_param |= LPF_FAILOUT;
-                       dirty = true;
-               } else if (!(start->ls_flags & LPF_FAILOUT) &&
-                          (bk->lb_param & LPF_FAILOUT)) {
-                       bk->lb_param &= ~LPF_FAILOUT;
-                       dirty = true;
-               }
-       }
-
-       if (start->ls_valid & LSV_DRYRUN) {
-               valid |= DOIV_DRYRUN;
-               if (start->ls_flags & LPF_DRYRUN)
-                       flags |= DOIF_DRYRUN;
-
-               if ((start->ls_flags & LPF_DRYRUN) &&
-                   !(bk->lb_param & LPF_DRYRUN)) {
-                       bk->lb_param |= LPF_DRYRUN;
-                       dirty = true;
-               } else if (!(start->ls_flags & LPF_DRYRUN) &&
-                          (bk->lb_param & LPF_DRYRUN)) {
-                       bk->lb_param &= ~LPF_DRYRUN;
-                       lfsck->li_drop_dryrun = 1;
-                       dirty = true;
-               }
-       }
-
-       if (bk->lb_param & LPF_ALL_TGT &&
-           !(start->ls_flags & LPF_ALL_TGT)) {
-               bk->lb_param &= ~LPF_ALL_TGT;
-               dirty = true;
-       } else if (!(bk->lb_param & LPF_ALL_TGT) &&
-                  start->ls_flags & LPF_ALL_TGT) {
-               bk->lb_param |= LPF_ALL_TGT;
-               dirty = true;
-       }
-
-       if (bk->lb_param & LPF_ORPHAN &&
-           !(start->ls_flags & LPF_ORPHAN)) {
-               bk->lb_param &= ~LPF_ORPHAN;
-               dirty = true;
-       } else if (!(bk->lb_param & LPF_ORPHAN) &&
-                  start->ls_flags & LPF_ORPHAN) {
-               bk->lb_param |= LPF_ORPHAN;
-               dirty = true;
-       }
-
-       if (dirty) {
-               rc = lfsck_bookmark_store(env, lfsck);
-               if (rc != 0)
-                       GOTO(out, rc);
-       }
-
-       if (start->ls_flags & LPF_RESET)
-               flags |= DOIF_RESET;
 
        if (start->ls_active != 0) {
                struct lfsck_component *next;
@@ -1657,8 +2088,8 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
                        GOTO(out, rc = -ENOTSUPP);
                }
 
-               cfs_list_for_each_entry_safe(com, next,
-                                            &lfsck->li_list_scan, lc_link) {
+               list_for_each_entry_safe(com, next,
+                                        &lfsck->li_list_scan, lc_link) {
                        if (!(com->lc_type & start->ls_active)) {
                                rc = com->lc_ops->lfsck_post(env, com, 0,
                                                             false);
@@ -1671,21 +2102,40 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
                        if (type & start->ls_active) {
                                com = __lfsck_component_find(lfsck, type,
                                                        &lfsck->li_list_idle);
-                               if (com != NULL) {
+                               if (com != NULL)
                                        /* The component status will be updated
                                         * when its prep() is called later by
                                         * the LFSCK main engine. */
-                                       cfs_list_del_init(&com->lc_link);
-                                       cfs_list_add_tail(&com->lc_link,
-                                                         &lfsck->li_list_scan);
-                               }
+                                       list_move_tail(&com->lc_link,
+                                                      &lfsck->li_list_scan);
                                start->ls_active &= ~type;
                        }
                        type <<= 1;
                }
        }
 
-       cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+       if (list_empty(&lfsck->li_list_scan)) {
+               /* The speed limit will be used to control both the LFSCK and
+                * low layer scrub (if applied), need to be handled firstly. */
+               if (start->ls_valid & LSV_SPEED_LIMIT) {
+                       if (__lfsck_set_speed(lfsck, start->ls_speed_limit)) {
+                               rc = lfsck_bookmark_store(env, lfsck);
+                               if (rc != 0)
+                                       GOTO(out, rc);
+                       }
+               }
+
+               goto trigger;
+       }
+
+       if (start->ls_flags & LPF_RESET)
+               flags |= DOIF_RESET;
+
+       rc = lfsck_set_param(env, lfsck, start, !!(flags & DOIF_RESET));
+       if (rc != 0)
+               GOTO(out, rc);
+
+       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
                start->ls_active |= com->lc_type;
                if (flags & DOIF_RESET) {
                        rc = com->lc_ops->lfsck_reset(env, com, false);
@@ -1696,18 +2146,22 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
 
 trigger:
        lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
-       if (bk->lb_param & LPF_DRYRUN) {
+       if (bk->lb_param & LPF_DRYRUN)
                lfsck->li_args_dir |= LUDA_VERIFY_DRYRUN;
-               valid |= DOIV_DRYRUN;
-               flags |= DOIF_DRYRUN;
-       }
 
-       if (bk->lb_param & LPF_FAILOUT) {
+       if (start != NULL && start->ls_valid & LSV_ERROR_HANDLE) {
                valid |= DOIV_ERROR_HANDLE;
-               flags |= DOIF_FAILOUT;
+               if (start->ls_flags & LPF_FAILOUT)
+                       flags |= DOIF_FAILOUT;
        }
 
-       if (!cfs_list_empty(&lfsck->li_list_scan))
+       if (start != NULL && start->ls_valid & LSV_DRYRUN) {
+               valid |= DOIV_DRYRUN;
+               if (start->ls_flags & LPF_DRYRUN)
+                       flags |= DOIF_DRYRUN;
+       }
+
+       if (!list_empty(&lfsck->li_list_scan))
                flags |= DOIF_OUTUSED;
 
        lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
@@ -1716,9 +2170,11 @@ trigger:
        if (IS_ERR(lta))
                GOTO(out, rc = PTR_ERR(lta));
 
-       rc = PTR_ERR(kthread_run(lfsck_master_engine, lta, "lfsck"));
-       if (IS_ERR_VALUE(rc)) {
-               CERROR("%s: cannot start LFSCK thread: rc = %ld\n",
+       __lfsck_set_speed(lfsck, bk->lb_speed_limit);
+       task = kthread_run(lfsck_master_engine, lta, "lfsck");
+       if (IS_ERR(task)) {
+               rc = PTR_ERR(task);
+               CERROR("%s: cannot start LFSCK thread: rc = %d\n",
                       lfsck_lfsck2name(lfsck), rc);
                lfsck_thread_args_fini(lta);
 
@@ -1784,7 +2240,7 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key,
 
        lfsck = lfsck_instance_find(key, true, false);
        if (unlikely(lfsck == NULL))
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
 
        thread = &lfsck->li_thread;
        /* release lfsck::li_mutex to avoid deadlock. */
@@ -1801,9 +2257,10 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key,
 
        mutex_lock(&lfsck->li_mutex);
        spin_lock(&lfsck->li_lock);
+       /* no error if LFSCK is already stopped, or was never started */
        if (thread_is_init(thread) || thread_is_stopped(thread)) {
                spin_unlock(&lfsck->li_lock);
-               GOTO(out, rc = -EALREADY);
+               GOTO(out, rc = 0);
        }
 
        if (stop != NULL) {
@@ -1869,13 +2326,15 @@ int lfsck_in_notify(const struct lu_env *env, struct dt_device *key,
        case LE_PHASE1_DONE:
        case LE_PHASE2_DONE:
        case LE_FID_ACCESSED:
-       case LE_PEER_EXIT: {
+       case LE_PEER_EXIT:
+       case LE_CONDITIONAL_DESTROY:
+       case LE_PAIRS_VERIFY: {
                struct lfsck_instance  *lfsck;
                struct lfsck_component *com;
 
                lfsck = lfsck_instance_find(key, true, false);
                if (unlikely(lfsck == NULL))
-                       RETURN(-ENODEV);
+                       RETURN(-ENXIO);
 
                com = lfsck_component_find(lfsck, lr->lr_active);
                if (likely(com != NULL)) {
@@ -1904,7 +2363,7 @@ int lfsck_query(const struct lu_env *env, struct dt_device *key,
 
        lfsck = lfsck_instance_find(key, true, false);
        if (unlikely(lfsck == NULL))
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
 
        com = lfsck_component_find(lfsck, lr->lr_active);
        if (likely(com != NULL)) {
@@ -1924,7 +2383,7 @@ int lfsck_register_namespace(const struct lu_env *env, struct dt_device *key,
                             struct ldlm_namespace *ns)
 {
        struct lfsck_instance  *lfsck;
-       int                     rc      = -ENODEV;
+       int                     rc      = -ENXIO;
 
        lfsck = lfsck_instance_find(key, true, false);
        if (likely(lfsck != NULL)) {
@@ -1943,7 +2402,7 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key,
 {
        struct lfsck_instance   *lfsck;
        struct dt_object        *root  = NULL;
-       struct dt_object        *obj;
+       struct dt_object        *obj   = NULL;
        struct lu_fid           *fid   = &lfsck_env_info(env)->lti_fid;
        int                      rc;
        ENTRY;
@@ -1958,11 +2417,11 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key,
 
        mutex_init(&lfsck->li_mutex);
        spin_lock_init(&lfsck->li_lock);
-       CFS_INIT_LIST_HEAD(&lfsck->li_link);
-       CFS_INIT_LIST_HEAD(&lfsck->li_list_scan);
-       CFS_INIT_LIST_HEAD(&lfsck->li_list_dir);
-       CFS_INIT_LIST_HEAD(&lfsck->li_list_double_scan);
-       CFS_INIT_LIST_HEAD(&lfsck->li_list_idle);
+       INIT_LIST_HEAD(&lfsck->li_link);
+       INIT_LIST_HEAD(&lfsck->li_list_scan);
+       INIT_LIST_HEAD(&lfsck->li_list_dir);
+       INIT_LIST_HEAD(&lfsck->li_list_double_scan);
+       INIT_LIST_HEAD(&lfsck->li_list_idle);
        atomic_set(&lfsck->li_ref, 1);
        atomic_set(&lfsck->li_double_scan_count, 0);
        init_waitqueue_head(&lfsck->li_thread.t_ctl_waitq);
@@ -1983,7 +2442,7 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key,
        fid->f_seq = FID_SEQ_LOCAL_NAME;
        fid->f_oid = 1;
        fid->f_ver = 0;
-       rc = local_oid_storage_init(env, lfsck->li_bottom, fid, &lfsck->li_los);
+       rc = local_oid_storage_init(env, key, fid, &lfsck->li_los);
        if (rc != 0)
                GOTO(out, rc);
 
@@ -1991,7 +2450,7 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key,
        if (rc != 0)
                GOTO(out, rc);
 
-       root = dt_locate(env, lfsck->li_bottom, fid);
+       root = dt_locate(env, key, fid);
        if (IS_ERR(root))
                GOTO(out, rc = PTR_ERR(root));
 
@@ -2001,36 +2460,82 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key,
        lfsck->li_local_root_fid = *fid;
        if (master) {
                lfsck->li_master = 1;
-               if (lfsck_dev_idx(lfsck->li_bottom) == 0) {
+               if (lfsck_dev_idx(key) == 0) {
+                       struct lu_fid *pfid = &lfsck_env_info(env)->lti_fid2;
+                       const struct lu_name *cname;
+
                        rc = dt_lookup(env, root,
                                (struct dt_rec *)(&lfsck->li_global_root_fid),
                                (const struct dt_key *)"ROOT", BYPASS_CAPA);
                        if (rc != 0)
                                GOTO(out, rc);
+
+                       obj = dt_locate(env, key, &lfsck->li_global_root_fid);
+                       if (IS_ERR(obj))
+                               GOTO(out, rc = PTR_ERR(obj));
+
+                       rc = dt_lookup(env, obj, (struct dt_rec *)fid,
+                               (const struct dt_key *)dotlustre, BYPASS_CAPA);
+                       if (rc != 0)
+                               GOTO(out, rc);
+
+                       lu_object_put(env, &obj->do_lu);
+                       obj = dt_locate(env, key, fid);
+                       if (IS_ERR(obj))
+                               GOTO(out, rc = PTR_ERR(obj));
+
+                       cname = lfsck_name_get_const(env, dotlustre,
+                                                    strlen(dotlustre));
+                       rc = lfsck_verify_linkea(env, key, obj, cname,
+                                                &lfsck->li_global_root_fid);
+                       if (rc != 0)
+                               GOTO(out, rc);
+
+                       *pfid = *fid;
+                       rc = dt_lookup(env, obj, (struct dt_rec *)fid,
+                                      (const struct dt_key *)lostfound,
+                                      BYPASS_CAPA);
+                       if (rc != 0)
+                               GOTO(out, rc);
+
+                       lu_object_put(env, &obj->do_lu);
+                       obj = dt_locate(env, key, fid);
+                       if (IS_ERR(obj))
+                               GOTO(out, rc = PTR_ERR(obj));
+
+                       cname = lfsck_name_get_const(env, lostfound,
+                                                    strlen(lostfound));
+                       rc = lfsck_verify_linkea(env, key, obj, cname, pfid);
+                       if (rc != 0)
+                               GOTO(out, rc);
+
+                       lu_object_put(env, &obj->do_lu);
+                       obj = NULL;
                }
        }
 
        fid->f_seq = FID_SEQ_LOCAL_FILE;
        fid->f_oid = OTABLE_IT_OID;
        fid->f_ver = 0;
-       obj = dt_locate(env, lfsck->li_bottom, fid);
+       obj = dt_locate(env, key, fid);
        if (IS_ERR(obj))
                GOTO(out, rc = PTR_ERR(obj));
 
+       lu_object_get(&obj->do_lu);
        lfsck->li_obj_oit = obj;
        rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
-       if (rc != 0) {
-               if (rc == -ENOTSUPP)
-                       GOTO(add, rc = 0);
-
+       if (rc != 0)
                GOTO(out, rc);
-       }
 
        rc = lfsck_bookmark_setup(env, lfsck);
        if (rc != 0)
                GOTO(out, rc);
 
        if (master) {
+               rc = lfsck_fid_init(lfsck);
+               if (rc < 0)
+                       GOTO(out, rc);
+
                rc = lfsck_namespace_setup(env, lfsck);
                if (rc < 0)
                        GOTO(out, rc);
@@ -2042,11 +2547,12 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key,
 
        /* XXX: more LFSCK components initialization to be added here. */
 
-add:
        rc = lfsck_instance_add(lfsck);
        if (rc == 0)
                rc = lfsck_add_target_from_orphan(env, lfsck);
 out:
+       if (obj != NULL && !IS_ERR(obj))
+               lu_object_put(env, &obj->do_lu);
        if (root != NULL && !IS_ERR(root))
                lu_object_put(env, &root->do_lu);
        if (rc != 0)
@@ -2117,7 +2623,7 @@ void lfsck_del_target(const struct lu_env *env, struct dt_device *key,
 {
        struct lfsck_instance   *lfsck;
        struct lfsck_tgt_descs  *ltds;
-       struct lfsck_tgt_desc   *ltd    = NULL;
+       struct lfsck_tgt_desc   *ltd;
        struct list_head        *head;
 
        if (for_ost)
@@ -2136,6 +2642,7 @@ void lfsck_del_target(const struct lu_env *env, struct dt_device *key,
                }
        }
 
+       ltd = NULL;
        lfsck = __lfsck_instance_find(key, true, false);
        spin_unlock(&lfsck_instance_lock);
        if (unlikely(lfsck == NULL))
@@ -2167,7 +2674,7 @@ unlock:
                if (for_ost)
                        head = &lfsck->li_ost_descs.ltd_orphan;
                else
-                       head = &lfsck->li_ost_descs.ltd_orphan;
+                       head = &lfsck->li_mdt_descs.ltd_orphan;
 
                list_for_each_entry(ltd, head, ltd_orphan_list) {
                        if (ltd->ltd_tgt == tgt) {
@@ -2182,7 +2689,7 @@ unlock:
                spin_lock(&ltds->ltd_lock);
                ltd->ltd_dead = 1;
                spin_unlock(&ltds->ltd_lock);
-               lfsck_stop_notify(env, lfsck, ltds, ltd, LT_LAYOUT);
+               lfsck_stop_notify(env, lfsck, ltds, ltd, LFSCK_TYPE_LAYOUT);
                lfsck_tgt_put(ltd);
        }
 
@@ -2194,6 +2701,7 @@ static int __init lfsck_init(void)
 {
        int rc;
 
+       INIT_LIST_HEAD(&lfsck_instance_list);
        INIT_LIST_HEAD(&lfsck_ost_orphan_list);
        INIT_LIST_HEAD(&lfsck_mdt_orphan_list);
        lfsck_key_init_generic(&lfsck_thread_key, NULL);
@@ -2211,7 +2719,7 @@ static void __exit lfsck_exit(void)
        struct lfsck_tgt_desc *ltd;
        struct lfsck_tgt_desc *next;
 
-       LASSERT(cfs_list_empty(&lfsck_instance_list));
+       LASSERT(list_empty(&lfsck_instance_list));
 
        list_for_each_entry_safe(ltd, next, &lfsck_ost_orphan_list,
                                 ltd_orphan_list) {