Whamcloud - gitweb
LU-4972 lfsck: skip .lustre and children for namespace check
[fs/lustre-release.git] / lustre / lfsck / lfsck_lib.c
index c2f2b50..891becc 100644 (file)
@@ -92,6 +92,9 @@ const char *lfsck_param_names[] = {
        "failout",
        "dryrun",
        "all_targets",
+       "broadcast",
+       "orphan",
+       "create_ostobj",
        NULL
 };
 
@@ -287,7 +290,7 @@ __lfsck_component_find(struct lfsck_instance *lfsck, __u16 type, cfs_list_t *lis
        return NULL;
 }
 
-static struct lfsck_component *
+struct lfsck_component *
 lfsck_component_find(struct lfsck_instance *lfsck, __u16 type)
 {
        struct lfsck_component *com;
@@ -322,6 +325,470 @@ void lfsck_component_cleanup(const struct lu_env *env,
        lfsck_component_put(env, com);
 }
 
+int lfsck_fid_alloc(const struct lu_env *env, struct lfsck_instance *lfsck,
+                   struct lu_fid *fid, bool locked)
+{
+       struct lfsck_bookmark   *bk = &lfsck->li_bookmark_ram;
+       int                      rc = 0;
+       ENTRY;
+
+       if (!locked)
+               mutex_lock(&lfsck->li_mutex);
+
+       rc = seq_client_alloc_fid(env, lfsck->li_seq, fid);
+       if (rc >= 0) {
+               bk->lb_last_fid = *fid;
+               /* We do not care about whether the subsequent sub-operations
+                * failed or not. The worst case is that one FID is lost that
+                * is not a big issue for the LFSCK since it is relative rare
+                * for LFSCK create. */
+               rc = lfsck_bookmark_store(env, lfsck);
+       }
+
+       if (!locked)
+               mutex_unlock(&lfsck->li_mutex);
+
+       RETURN(rc);
+}
+
+static const char dot[] = ".";
+static const char dotdot[] = "..";
+
+static int lfsck_create_lpf_local(const struct lu_env *env,
+                                 struct lfsck_instance *lfsck,
+                                 struct dt_object *parent,
+                                 struct dt_object *child,
+                                 struct lu_attr *la,
+                                 struct dt_object_format *dof,
+                                 const char *name)
+{
+       struct dt_device        *dev    = lfsck->li_bottom;
+       struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
+       struct dt_object        *bk_obj = lfsck->li_bookmark_obj;
+       const struct lu_fid     *cfid   = lu_object_fid(&child->do_lu);
+       struct thandle          *th     = NULL;
+       loff_t                   pos    = 0;
+       int                      len    = sizeof(struct lfsck_bookmark);
+       int                      rc     = 0;
+       ENTRY;
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       /* 1a. create child */
+       rc = dt_declare_create(env, child, la, NULL, dof, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 2a. increase child nlink */
+       rc = dt_declare_ref_add(env, child, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 3a. insert name into parent dir */
+       rc = dt_declare_insert(env, parent, (const struct dt_rec *)cfid,
+                              (const struct dt_key *)name, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 4a. increase parent nlink */
+       rc = dt_declare_ref_add(env, parent, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 5a. update bookmark */
+       rc = dt_declare_record_write(env, bk_obj,
+                                    lfsck_buf_get(env, bk, len), 0, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, child, 0);
+       /* 1b.1 create child */
+       rc = dt_create(env, child, la, NULL, dof, th);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       if (unlikely(!dt_try_as_dir(env, child)))
+               GOTO(unlock, rc = -ENOTDIR);
+
+       /* 1b.2 insert dot into child dir */
+       rc = dt_insert(env, child, (const struct dt_rec *)cfid,
+                      (const struct dt_key *)dot, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 1b.3 insert dotdot into child dir */
+       rc = dt_insert(env, child, (const struct dt_rec *)&LU_LPF_FID,
+                      (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 2b. increase child nlink */
+       rc = dt_ref_add(env, child, th);
+       dt_write_unlock(env, child);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 3b. insert name into parent dir */
+       rc = dt_insert(env, parent, (const struct dt_rec *)cfid,
+                      (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, parent, 0);
+       /* 4b. increase parent nlink */
+       rc = dt_ref_add(env, parent, th);
+       dt_write_unlock(env, parent);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       bk->lb_lpf_fid = *cfid;
+       lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk);
+
+       /* 5b. update bookmark */
+       rc = dt_record_write(env, bk_obj,
+                            lfsck_buf_get(env, bk, len), &pos, th);
+
+       GOTO(stop, rc);
+
+unlock:
+       dt_write_unlock(env, child);
+
+stop:
+       dt_trans_stop(env, dev, th);
+
+       return rc;
+}
+
+static int lfsck_create_lpf_remote(const struct lu_env *env,
+                                  struct lfsck_instance *lfsck,
+                                  struct dt_object *parent,
+                                  struct dt_object *child,
+                                  struct lu_attr *la,
+                                  struct dt_object_format *dof,
+                                  const char *name)
+{
+       struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
+       struct dt_object        *bk_obj = lfsck->li_bookmark_obj;
+       const struct lu_fid     *cfid   = lu_object_fid(&child->do_lu);
+       struct thandle          *th     = NULL;
+       struct dt_device        *dev;
+       loff_t                   pos    = 0;
+       int                      len    = sizeof(struct lfsck_bookmark);
+       int                      rc     = 0;
+       ENTRY;
+
+       /* Create .lustre/lost+found/MDTxxxx. */
+
+       /* XXX: Currently, cross-MDT create operation needs to create the child
+        *      object firstly, then insert name into the parent directory. For
+        *      this case, the child object resides on current MDT (local), but
+        *      the parent ".lustre/lost+found" may be on remote MDT. It is not
+        *      easy to contain all the sub-modifications orderly within single
+        *      transaction.
+        *
+        *      To avoid more inconsistency, we split the create operation into
+        *      two transactions:
+        *
+        *      1) create the child locally.
+        *      2) insert the name "MDTXXXX" in the parent ".lustre/lost+found"
+        *         remotely and update the lfsck_bookmark::lb_lpf_fid locally.
+        *
+        *      If 1) done but 2) failed, then the worst case is that we lose
+        *      one object locally, which is not a big issue. (can be repaird
+        *      by LFSCK phase III) */
+
+       /* Transaction I: */
+
+       dev = lfsck->li_bottom;
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       /* 1a. create child locally. */
+       rc = dt_declare_create(env, child, la, NULL, dof, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 2a. increase child nlink locally. */
+       rc = dt_declare_ref_add(env, child, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       dt_write_lock(env, child, 0);
+       /* 1b. create child locally. */
+       rc = dt_create(env, child, la, NULL, dof, th);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       if (unlikely(!dt_try_as_dir(env, child)))
+               GOTO(unlock, rc = -ENOTDIR);
+
+       /* 2b.1 insert dot into child dir locally. */
+       rc = dt_insert(env, child, (const struct dt_rec *)cfid,
+                      (const struct dt_key *)dot, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 2b.2 insert dotdot into child dir locally. */
+       rc = dt_insert(env, child, (const struct dt_rec *)&LU_LPF_FID,
+                      (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1);
+       if (rc != 0)
+               GOTO(unlock, rc);
+
+       /* 2b.3 increase child nlink locally. */
+       rc = dt_ref_add(env, child, th);
+       dt_write_unlock(env, child);
+       dt_trans_stop(env, dev, th);
+       if (rc != 0)
+               RETURN(rc);
+
+       /* Transaction II: */
+
+       dev = lfsck->li_next;
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       /* 3a. insert name into parent dir remotely. */
+       rc = dt_declare_insert(env, parent, (const struct dt_rec *)cfid,
+                              (const struct dt_key *)name, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 4a. increase parent nlink remotely. */
+       rc = dt_declare_ref_add(env, parent, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 5a. decrease child nlink for dotdot locally if former remote
+        *     update failed. */
+       rc = dt_declare_ref_del(env, child, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 6a. decrease child nlink for dot locally if former remote
+        *     update failed. */
+       rc = dt_declare_ref_del(env, child, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 7a. destroy child locally if former remote update failed. */
+       rc = dt_declare_destroy(env, child, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 8a. update bookmark locally. */
+       rc = dt_declare_record_write(env, bk_obj,
+                                    lfsck_buf_get(env, bk, len), 0, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       /* 3b. insert name into parent dir remotely. */
+       rc = dt_insert(env, parent, (const struct dt_rec *)cfid,
+                      (const struct dt_key *)name, th, BYPASS_CAPA, 1);
+       if (rc == 0) {
+               dt_write_lock(env, parent, 0);
+               /* 4b. increase parent nlink remotely. */
+               rc = dt_ref_add(env, parent, th);
+               dt_write_unlock(env, parent);
+       }
+       if (rc != 0) {
+               /* 5b. decrease child nlink for dotdot locally. */
+               dt_ref_del(env, child, th);
+               /* 6b. decrease child nlink for dot locally. */
+               dt_ref_del(env, child, th);
+               /* 7b. destroy child locally. */
+               dt_destroy(env, child, th);
+               GOTO(stop, rc);
+       }
+
+       bk->lb_lpf_fid = *cfid;
+       lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk);
+
+       /* 8b. update bookmark locally. */
+       rc = dt_record_write(env, bk_obj,
+                            lfsck_buf_get(env, bk, len), &pos, th);
+
+       GOTO(stop, rc);
+
+unlock:
+       dt_write_unlock(env, child);
+stop:
+       dt_trans_stop(env, dev, th);
+
+       return rc;
+}
+
+/* Do NOT create .lustre/lost+found/MDTxxxx when register the lfsck instance,
+ * because the MDT0 maybe not reaady for sequence allocation yet. We do that
+ * only when it is required, such as orphan OST-objects repairing. */
+int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck)
+{
+       struct lfsck_bookmark    *bk    = &lfsck->li_bookmark_ram;
+       struct lfsck_thread_info *info  = lfsck_env_info(env);
+       struct lu_fid            *cfid  = &info->lti_fid2;
+       struct lu_attr           *la    = &info->lti_la;
+       struct dt_object_format  *dof   = &info->lti_dof;
+       struct dt_object         *parent = NULL;
+       struct dt_object         *child = NULL;
+       char                      name[8];
+       int                       node  = lfsck_dev_idx(lfsck->li_bottom);
+       int                       rc    = 0;
+       ENTRY;
+
+       LASSERT(lfsck->li_master);
+
+       sprintf(name, "MDT%04x", node);
+       if (node == 0) {
+               parent = lfsck_object_find_by_dev(env, lfsck->li_bottom,
+                                                 &LU_LPF_FID);
+       } else {
+               struct lfsck_tgt_desc *ltd;
+
+               ltd = lfsck_tgt_get(&lfsck->li_mdt_descs, 0);
+               if (unlikely(ltd == NULL))
+                       RETURN(-ENXIO);
+
+               parent = lfsck_object_find_by_dev(env, ltd->ltd_tgt,
+                                                 &LU_LPF_FID);
+               lfsck_tgt_put(ltd);
+       }
+       if (IS_ERR(parent))
+               RETURN(PTR_ERR(parent));
+
+       if (unlikely(!dt_try_as_dir(env, parent)))
+               GOTO(out, rc = -ENOTDIR);
+
+       mutex_lock(&lfsck->li_mutex);
+       if (lfsck->li_lpf_obj != NULL)
+               GOTO(unlock, rc = 0);
+
+       if (fid_is_zero(&bk->lb_lpf_fid)) {
+               /* There is corner case that: in former LFSCK scanning we have
+                * created the .lustre/lost+found/MDTxxxx but failed to update
+                * the lfsck_bookmark::lb_lpf_fid successfully. So need lookup
+                * it from MDT0 firstly. */
+               rc = dt_lookup(env, parent, (struct dt_rec *)cfid,
+                              (const struct dt_key *)name, BYPASS_CAPA);
+               if (rc != 0 && rc != -ENOENT)
+                       GOTO(unlock, rc);
+
+               if (rc == 0) {
+                       bk->lb_lpf_fid = *cfid;
+                       rc = lfsck_bookmark_store(env, lfsck);
+               } else {
+                       rc = lfsck_fid_alloc(env, lfsck, cfid, true);
+               }
+               if (rc != 0)
+                       GOTO(unlock, rc);
+       } else {
+               *cfid = bk->lb_lpf_fid;
+       }
+
+       child = lfsck_object_find_by_dev(env, lfsck->li_bottom, cfid);
+       if (IS_ERR(child))
+               GOTO(unlock, rc = PTR_ERR(child));
+
+       if (dt_object_exists(child) != 0) {
+               if (unlikely(!dt_try_as_dir(env, child)))
+                       GOTO(unlock, rc = -ENOTDIR);
+
+               lfsck->li_lpf_obj = child;
+               GOTO(unlock, rc = 0);
+       }
+
+       memset(la, 0, sizeof(*la));
+       la->la_atime = la->la_mtime = la->la_ctime = cfs_time_current_sec();
+       la->la_mode = S_IFDIR | S_IRWXU;
+       la->la_valid = LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE |
+                      LA_UID | LA_GID;
+       memset(dof, 0, sizeof(*dof));
+       dof->dof_type = dt_mode_to_dft(S_IFDIR);
+
+       if (node == 0)
+               rc = lfsck_create_lpf_local(env, lfsck, parent, child, la,
+                                           dof, name);
+       else
+               rc = lfsck_create_lpf_remote(env, lfsck, parent, child, la,
+                                            dof, name);
+       if (rc == 0)
+               lfsck->li_lpf_obj = child;
+
+       GOTO(unlock, rc);
+
+unlock:
+       mutex_unlock(&lfsck->li_mutex);
+       if (rc != 0 && child != NULL && !IS_ERR(child))
+               lu_object_put(env, &child->do_lu);
+out:
+       if (parent != NULL && !IS_ERR(parent))
+               lu_object_put(env, &parent->do_lu);
+
+       return rc;
+}
+
+static int lfsck_fid_init(struct lfsck_instance *lfsck)
+{
+       struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
+       struct seq_server_site  *ss;
+       char                    *prefix;
+       int                      rc     = 0;
+       ENTRY;
+
+       ss = lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site);
+       if (unlikely(ss == NULL))
+               RETURN(-ENXIO);
+
+       OBD_ALLOC_PTR(lfsck->li_seq);
+       if (lfsck->li_seq == NULL)
+               RETURN(-ENOMEM);
+
+       OBD_ALLOC(prefix, MAX_OBD_NAME + 7);
+       if (prefix == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       snprintf(prefix, MAX_OBD_NAME + 7, "lfsck-%s", lfsck_lfsck2name(lfsck));
+       rc = seq_client_init(lfsck->li_seq, NULL, LUSTRE_SEQ_METADATA, prefix,
+                            ss->ss_server_seq);
+       OBD_FREE(prefix, MAX_OBD_NAME + 7);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       if (fid_is_sane(&bk->lb_last_fid))
+               lfsck->li_seq->lcs_fid = bk->lb_last_fid;
+
+       RETURN(0);
+
+out:
+       OBD_FREE_PTR(lfsck->li_seq);
+       lfsck->li_seq = NULL;
+
+       return rc;
+}
+
+static void lfsck_fid_fini(struct lfsck_instance *lfsck)
+{
+       if (lfsck->li_seq != NULL) {
+               seq_client_fini(lfsck->li_seq);
+               OBD_FREE_PTR(lfsck->li_seq);
+               lfsck->li_seq = NULL;
+       }
+}
+
 void lfsck_instance_cleanup(const struct lu_env *env,
                            struct lfsck_instance *lfsck)
 {
@@ -370,11 +837,18 @@ void lfsck_instance_cleanup(const struct lu_env *env,
                lfsck->li_bookmark_obj = NULL;
        }
 
+       if (lfsck->li_lpf_obj != NULL) {
+               lu_object_put(env, &lfsck->li_lpf_obj->do_lu);
+               lfsck->li_lpf_obj = NULL;
+       }
+
        if (lfsck->li_los != NULL) {
                local_oid_storage_fini(env, lfsck->li_los);
                lfsck->li_los = NULL;
        }
 
+       lfsck_fid_fini(lfsck);
+
        OBD_FREE_PTR(lfsck);
 }
 
@@ -397,8 +871,8 @@ __lfsck_instance_find(struct dt_device *key, bool ref, bool unlink)
        return NULL;
 }
 
-static inline struct lfsck_instance *lfsck_instance_find(struct dt_device *key,
-                                                        bool ref, bool unlink)
+struct lfsck_instance *lfsck_instance_find(struct dt_device *key, bool ref,
+                                          bool unlink)
 {
        struct lfsck_instance *lfsck;
 
@@ -433,8 +907,9 @@ int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
        int flag;
        int rc;
        int i;
+       bool newline = (bits != 0 ? false : true);
 
-       rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
+       rc = snprintf(*buf, *len, "%s:%c", prefix, newline ? '\n' : ' ');
        if (rc <= 0)
                return -ENOSPC;
 
@@ -444,8 +919,11 @@ int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
                if (flag & bits) {
                        bits &= ~flag;
                        if (names[i] != NULL) {
+                               if (bits == 0)
+                                       newline = true;
+
                                rc = snprintf(*buf, *len, "%s%c", names[i],
-                                             bits != 0 ? ',' : '\n');
+                                             newline ? '\n' : ',');
                                if (rc <= 0)
                                        return -ENOSPC;
 
@@ -454,6 +932,16 @@ int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
                        }
                }
        }
+
+       if (!newline) {
+               rc = snprintf(*buf, *len, "\n");
+               if (rc <= 0)
+                       return -ENOSPC;
+
+               *buf += rc;
+               *len -= rc;
+       }
+
        return save - *len;
 }
 
@@ -533,9 +1021,10 @@ void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck,
        }
 }
 
-static void __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
+bool __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
 {
-       lfsck->li_bookmark_ram.lb_speed_limit = limit;
+       bool dirty = false;
+
        if (limit != LFSCK_SPEED_NO_LIMIT) {
                if (limit > HZ) {
                        lfsck->li_sleep_rate = limit / HZ;
@@ -548,6 +1037,13 @@ static void __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
                lfsck->li_sleep_jif = 0;
                lfsck->li_sleep_rate = 0;
        }
+
+       if (lfsck->li_bookmark_ram.lb_speed_limit != limit) {
+               lfsck->li_bookmark_ram.lb_speed_limit = limit;
+               dirty = true;
+       }
+
+       return dirty;
 }
 
 void lfsck_control_speed(struct lfsck_instance *lfsck)
@@ -620,8 +1116,8 @@ static int lfsck_needs_scan_dir(const struct lu_env *env,
                        return 1;
                }
 
-               /* .lustre doesn't contain "real" user objects, no need lfsck */
-               if (fid_is_dot_lustre(lfsck_dto2fid(obj))) {
+               /* No need to check .lustre and its children. */
+               if (fid_seq_is_dot_lustre(fid_seq(lfsck_dto2fid(obj)))) {
                        if (depth > 0)
                                lfsck_object_put(env, obj);
                        return 0;
@@ -671,10 +1167,16 @@ static int lfsck_needs_scan_dir(const struct lu_env *env,
                        return 0;
                }
 
-               /* Currently, only client visible directory can be remote. */
                if (dt_object_remote(obj)) {
+                       /* .lustre/lost+found/MDTxxx can be remote directory. */
+                       if (fid_seq_is_dot_lustre(fid_seq(lfsck_dto2fid(obj))))
+                               rc = 0;
+                       else
+                               /* Other remote directory should be client
+                                * visible and need to be checked. */
+                               rc = 1;
                        lfsck_object_put(env, obj);
-                       return 1;
+                       return rc;
                }
 
                depth++;
@@ -973,6 +1475,7 @@ static void lfsck_interpret(const struct lu_env *env,
        struct lfsck_async_interpret_args *laia = args;
        struct lfsck_component            *com;
 
+       LASSERT(laia->laia_com == NULL);
        LASSERT(laia->laia_shared);
 
        spin_lock(&lfsck->li_lock);
@@ -1000,8 +1503,7 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
        int                     rc  = 0;
        int                     rc1 = 0;
 
-       cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
-                                    lc_link) {
+       list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
                if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
                        com->lc_journal = 0;
 
@@ -1014,6 +1516,17 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
                     atomic_read(&lfsck->li_double_scan_count) == 0,
                     &lwi);
 
+       if (lfsck->li_status != LS_PAUSED &&
+           lfsck->li_status != LS_CO_PAUSED) {
+               list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
+                                        lc_link) {
+                       spin_lock(&lfsck->li_lock);
+                       list_del_init(&com->lc_link);
+                       list_add_tail(&com->lc_link, &lfsck->li_list_idle);
+                       spin_unlock(&lfsck->li_lock);
+               }
+       }
+
        return rc1 != 0 ? rc1 : rc;
 }
 
@@ -1068,12 +1581,23 @@ void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck)
                                 lc_link) {
                if (com->lc_ops->lfsck_quit != NULL)
                        com->lc_ops->lfsck_quit(env, com);
+
+               spin_lock(&lfsck->li_lock);
+               list_del_init(&com->lc_link);
+               list_del_init(&com->lc_link_dir);
+               list_add_tail(&com->lc_link, &lfsck->li_list_idle);
+               spin_unlock(&lfsck->li_lock);
        }
 
        list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
                                 lc_link) {
                if (com->lc_ops->lfsck_quit != NULL)
                        com->lc_ops->lfsck_quit(env, com);
+
+               spin_lock(&lfsck->li_lock);
+               list_del_init(&com->lc_link);
+               list_add_tail(&com->lc_link, &lfsck->li_list_idle);
+               spin_unlock(&lfsck->li_lock);
        }
 }
 
@@ -1106,9 +1630,6 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
        struct req_format                 *format;
        int                                rc;
 
-       if (!(exp_connect_flags(exp) & OBD_CONNECT_LFSCK))
-               return -EOPNOTSUPP;
-
        switch (request) {
        case LFSCK_NOTIFY:
                format = &RQF_LFSCK_NOTIFY;
@@ -1166,7 +1687,7 @@ int lfsck_get_speed(struct dt_device *key, void *buf, int len)
                              lfsck->li_bookmark_ram.lb_speed_limit);
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1189,12 +1710,12 @@ int lfsck_set_speed(struct dt_device *key, int val)
        lfsck = lfsck_instance_find(key, true, false);
        if (likely(lfsck != NULL)) {
                mutex_lock(&lfsck->li_mutex);
-               __lfsck_set_speed(lfsck, val);
-               rc = lfsck_bookmark_store(&env, lfsck);
+               if (__lfsck_set_speed(lfsck, val))
+                       rc = lfsck_bookmark_store(&env, lfsck);
                mutex_unlock(&lfsck->li_mutex);
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1220,7 +1741,7 @@ int lfsck_get_windows(struct dt_device *key, void *buf, int len)
                              lfsck->li_bookmark_ram.lb_async_windows);
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1243,12 +1764,12 @@ int lfsck_set_windows(struct dt_device *key, int val)
        lfsck = lfsck_instance_find(key, true, false);
        if (likely(lfsck != NULL)) {
                if (val > LFSCK_ASYNC_WIN_MAX) {
-                       CERROR("%s: Too large async windows size, which "
-                              "may cause memory issues. The valid range "
-                              "is [0 - %u]. If you do not want to restrict "
-                              "the windows size for async requests pipeline, "
-                              "just set it as 0.\n",
-                              lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX);
+                       CWARN("%s: Too large async window size, which "
+                             "may cause memory issues. The valid range "
+                             "is [0 - %u]. If you do not want to restrict "
+                             "the window size for async requests pipeline, "
+                             "just set it as 0.\n",
+                             lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX);
                        rc = -EINVAL;
                } else if (lfsck->li_bookmark_ram.lb_async_windows != val) {
                        mutex_lock(&lfsck->li_mutex);
@@ -1258,7 +1779,7 @@ int lfsck_set_windows(struct dt_device *key, int val)
                }
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1291,7 +1812,7 @@ int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type)
 
                lfsck_instance_put(&env, lfsck);
        } else {
-               rc = -ENODEV;
+               rc = -ENXIO;
        }
 
        lu_env_fini(&env);
@@ -1499,7 +2020,6 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        struct lfsck_component          *com;
        struct l_wait_info               lwi    = { 0 };
        struct lfsck_thread_args        *lta;
-       bool                             dirty  = false;
        long                             rc     = 0;
        __u16                            valid  = 0;
        __u16                            flags  = 0;
@@ -1508,7 +2028,7 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
 
        lfsck = lfsck_instance_find(key, true, false);
        if (unlikely(lfsck == NULL))
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
 
        /* System is not ready, try again later. */
        if (unlikely(lfsck->li_namespace == NULL))
@@ -1573,78 +2093,6 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        }
 
        start->ls_version = bk->lb_version;
-       if (start->ls_valid & LSV_SPEED_LIMIT) {
-               __lfsck_set_speed(lfsck, start->ls_speed_limit);
-               dirty = true;
-       }
-
-       if (start->ls_valid & LSV_ASYNC_WINDOWS &&
-           bk->lb_async_windows != start->ls_async_windows) {
-               bk->lb_async_windows = start->ls_async_windows;
-               dirty = true;
-       }
-
-       if (start->ls_valid & LSV_ERROR_HANDLE) {
-               valid |= DOIV_ERROR_HANDLE;
-               if (start->ls_flags & LPF_FAILOUT)
-                       flags |= DOIF_FAILOUT;
-
-               if ((start->ls_flags & LPF_FAILOUT) &&
-                   !(bk->lb_param & LPF_FAILOUT)) {
-                       bk->lb_param |= LPF_FAILOUT;
-                       dirty = true;
-               } else if (!(start->ls_flags & LPF_FAILOUT) &&
-                          (bk->lb_param & LPF_FAILOUT)) {
-                       bk->lb_param &= ~LPF_FAILOUT;
-                       dirty = true;
-               }
-       }
-
-       if (start->ls_valid & LSV_DRYRUN) {
-               valid |= DOIV_DRYRUN;
-               if (start->ls_flags & LPF_DRYRUN)
-                       flags |= DOIF_DRYRUN;
-
-               if ((start->ls_flags & LPF_DRYRUN) &&
-                   !(bk->lb_param & LPF_DRYRUN)) {
-                       bk->lb_param |= LPF_DRYRUN;
-                       dirty = true;
-               } else if (!(start->ls_flags & LPF_DRYRUN) &&
-                          (bk->lb_param & LPF_DRYRUN)) {
-                       bk->lb_param &= ~LPF_DRYRUN;
-                       lfsck->li_drop_dryrun = 1;
-                       dirty = true;
-               }
-       }
-
-       if (bk->lb_param & LPF_ALL_TGT &&
-           !(start->ls_flags & LPF_ALL_TGT)) {
-               bk->lb_param &= ~LPF_ALL_TGT;
-               dirty = true;
-       } else if (!(bk->lb_param & LPF_ALL_TGT) &&
-                  start->ls_flags & LPF_ALL_TGT) {
-               bk->lb_param |= LPF_ALL_TGT;
-               dirty = true;
-       }
-
-       if (bk->lb_param & LPF_ORPHAN &&
-           !(start->ls_flags & LPF_ORPHAN)) {
-               bk->lb_param &= ~LPF_ORPHAN;
-               dirty = true;
-       } else if (!(bk->lb_param & LPF_ORPHAN) &&
-                  start->ls_flags & LPF_ORPHAN) {
-               bk->lb_param |= LPF_ORPHAN;
-               dirty = true;
-       }
-
-       if (dirty) {
-               rc = lfsck_bookmark_store(env, lfsck);
-               if (rc != 0)
-                       GOTO(out, rc);
-       }
-
-       if (start->ls_flags & LPF_RESET)
-               flags |= DOIF_RESET;
 
        if (start->ls_active != 0) {
                struct lfsck_component *next;
@@ -1657,8 +2105,8 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
                        GOTO(out, rc = -ENOTSUPP);
                }
 
-               cfs_list_for_each_entry_safe(com, next,
-                                            &lfsck->li_list_scan, lc_link) {
+               list_for_each_entry_safe(com, next,
+                                        &lfsck->li_list_scan, lc_link) {
                        if (!(com->lc_type & start->ls_active)) {
                                rc = com->lc_ops->lfsck_post(env, com, 0,
                                                             false);
@@ -1675,9 +2123,9 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
                                        /* The component status will be updated
                                         * when its prep() is called later by
                                         * the LFSCK main engine. */
-                                       cfs_list_del_init(&com->lc_link);
-                                       cfs_list_add_tail(&com->lc_link,
-                                                         &lfsck->li_list_scan);
+                                       list_del_init(&com->lc_link);
+                                       list_add_tail(&com->lc_link,
+                                                     &lfsck->li_list_scan);
                                }
                                start->ls_active &= ~type;
                        }
@@ -1685,7 +2133,28 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
                }
        }
 
-       cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+       if (list_empty(&lfsck->li_list_scan)) {
+               /* The speed limit will be used to control both the LFSCK and
+                * low layer scrub (if applied), need to be handled firstly. */
+               if (start->ls_valid & LSV_SPEED_LIMIT) {
+                       if (__lfsck_set_speed(lfsck, start->ls_speed_limit)) {
+                               rc = lfsck_bookmark_store(env, lfsck);
+                               if (rc != 0)
+                                       GOTO(out, rc);
+                       }
+               }
+
+               goto trigger;
+       }
+
+       if (start->ls_flags & LPF_RESET)
+               flags |= DOIF_RESET;
+
+       rc = lfsck_set_param(env, lfsck, start, !!(flags & DOIF_RESET));
+       if (rc != 0)
+               GOTO(out, rc);
+
+       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
                start->ls_active |= com->lc_type;
                if (flags & DOIF_RESET) {
                        rc = com->lc_ops->lfsck_reset(env, com, false);
@@ -1696,18 +2165,22 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
 
 trigger:
        lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
-       if (bk->lb_param & LPF_DRYRUN) {
+       if (bk->lb_param & LPF_DRYRUN)
                lfsck->li_args_dir |= LUDA_VERIFY_DRYRUN;
-               valid |= DOIV_DRYRUN;
-               flags |= DOIF_DRYRUN;
-       }
 
-       if (bk->lb_param & LPF_FAILOUT) {
+       if (start != NULL && start->ls_valid & LSV_ERROR_HANDLE) {
                valid |= DOIV_ERROR_HANDLE;
-               flags |= DOIF_FAILOUT;
+               if (start->ls_flags & LPF_FAILOUT)
+                       flags |= DOIF_FAILOUT;
        }
 
-       if (!cfs_list_empty(&lfsck->li_list_scan))
+       if (start != NULL && start->ls_valid & LSV_DRYRUN) {
+               valid |= DOIV_DRYRUN;
+               if (start->ls_flags & LPF_DRYRUN)
+                       flags |= DOIF_DRYRUN;
+       }
+
+       if (!list_empty(&lfsck->li_list_scan))
                flags |= DOIF_OUTUSED;
 
        lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
@@ -1716,6 +2189,7 @@ trigger:
        if (IS_ERR(lta))
                GOTO(out, rc = PTR_ERR(lta));
 
+       __lfsck_set_speed(lfsck, bk->lb_speed_limit);
        rc = PTR_ERR(kthread_run(lfsck_master_engine, lta, "lfsck"));
        if (IS_ERR_VALUE(rc)) {
                CERROR("%s: cannot start LFSCK thread: rc = %ld\n",
@@ -1784,7 +2258,7 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key,
 
        lfsck = lfsck_instance_find(key, true, false);
        if (unlikely(lfsck == NULL))
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
 
        thread = &lfsck->li_thread;
        /* release lfsck::li_mutex to avoid deadlock. */
@@ -1869,13 +2343,15 @@ int lfsck_in_notify(const struct lu_env *env, struct dt_device *key,
        case LE_PHASE1_DONE:
        case LE_PHASE2_DONE:
        case LE_FID_ACCESSED:
-       case LE_PEER_EXIT: {
+       case LE_PEER_EXIT:
+       case LE_CONDITIONAL_DESTROY:
+       case LE_PAIRS_VERIFY: {
                struct lfsck_instance  *lfsck;
                struct lfsck_component *com;
 
                lfsck = lfsck_instance_find(key, true, false);
                if (unlikely(lfsck == NULL))
-                       RETURN(-ENODEV);
+                       RETURN(-ENXIO);
 
                com = lfsck_component_find(lfsck, lr->lr_active);
                if (likely(com != NULL)) {
@@ -1904,7 +2380,7 @@ int lfsck_query(const struct lu_env *env, struct dt_device *key,
 
        lfsck = lfsck_instance_find(key, true, false);
        if (unlikely(lfsck == NULL))
-               RETURN(-ENODEV);
+               RETURN(-ENXIO);
 
        com = lfsck_component_find(lfsck, lr->lr_active);
        if (likely(com != NULL)) {
@@ -1924,7 +2400,7 @@ int lfsck_register_namespace(const struct lu_env *env, struct dt_device *key,
                             struct ldlm_namespace *ns)
 {
        struct lfsck_instance  *lfsck;
-       int                     rc      = -ENODEV;
+       int                     rc      = -ENXIO;
 
        lfsck = lfsck_instance_find(key, true, false);
        if (likely(lfsck != NULL)) {
@@ -2031,6 +2507,10 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key,
                GOTO(out, rc);
 
        if (master) {
+               rc = lfsck_fid_init(lfsck);
+               if (rc < 0)
+                       GOTO(out, rc);
+
                rc = lfsck_namespace_setup(env, lfsck);
                if (rc < 0)
                        GOTO(out, rc);