Whamcloud - gitweb
LU-1267 lfsck: framework (3) for MDT-OST consistency 62/7062/37
authorFan Yong <fan.yong@intel.com>
Fri, 24 Jan 2014 19:42:07 +0000 (03:42 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 31 Jan 2014 04:19:51 +0000 (04:19 +0000)
Introduce an assistant kernel thread to help to handle MDT-OST
consistency verification. The LFSCK main engine thread and the
assistant kernel thread compose an async mode pipeline:

For a given MDT-object, the LFSCK main engine thread reads its
layout EA, and for each stripe, it prefetches the OST-object's
attribute asynchronously. The LFSCK main engine thread doesn't
wait for the OST-object's attribute to be replied, intead, add
the request structure on the shared list.

The LFSCK assistant kernel thread scans the shared list, and
for each replied request, checks whether the OST-object's attr
is consistent with its MDT-object's attr or not. If found some
inconsistency, the LFSCK assistant kernel thread will fix it.

To avoid the LFSCK main engine thread is too much ahead of the
LFSCK assistant kernel thread as to too many objects have been
pre-fetched then memory pressure, use an async windows size to
control how many objects the LFSCK main engine thread can be
ahead of the LFSCK assistant kernel thread at most. It is also
used to control how many objects the assistant kernel thread
can be ahead of backend ptlrpcd threds at most. Such windows
size can be specified via the "lctl lfsck_start" command "-w"
option and can be adjusted dynamically via the proc interface
"lfsck_async_windows".

Test-Parameters: allwaysuploadlogs
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I41efd93bc614591a9aabe1099a13fbcc1275d2d9
Reviewed-on: http://review.whamcloud.com/7062
Tested-by: Jenkins
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
18 files changed:
lustre/include/lustre/Makefile.am
lustre/include/lustre/lustre_idl.h
lustre/include/lustre/lustre_lfsck_user.h
lustre/include/lustre_lfsck.h
lustre/lfsck/lfsck_bookmark.c
lustre/lfsck/lfsck_engine.c
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_layout.c
lustre/lfsck/lfsck_lib.c
lustre/lfsck/lfsck_namespace.c
lustre/mdd/mdd_lproc.c
lustre/osp/osp_object.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/wiretest.c
lustre/utils/lctl.c
lustre/utils/lustre_lfsck.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 6c93838..e0abff4 100644 (file)
@@ -38,7 +38,7 @@
 
 if UTILS
 pkginclude_HEADERS = lustreapi.h lustre_idl.h lustre_user.h liblustreapi.h \
 
 if UTILS
 pkginclude_HEADERS = lustreapi.h lustre_idl.h lustre_user.h liblustreapi.h \
-       libiam.h ll_fiemap.h
+       libiam.h ll_fiemap.h lustre_lfsck_user.h
 endif
 
 EXTRA_DIST = lustreapi.h lustre_idl.h lustre_user.h liblustreapi.h \
 endif
 
 EXTRA_DIST = lustreapi.h lustre_idl.h lustre_user.h liblustreapi.h \
index 4b1f06f..207fe8d 100644 (file)
@@ -3409,6 +3409,37 @@ struct obdo {
 #define o_cksum   o_nlink
 #define o_grant_used o_data_version
 
 #define o_cksum   o_nlink
 #define o_grant_used o_data_version
 
+struct lfsck_request {
+       __u32           lr_event;
+       __u32           lr_index;
+       __u32           lr_flags;
+       __u32           lr_valid;
+       union {
+               __u32   lr_speed;
+               __u32   lr_status;
+       };
+       __u16           lr_version;
+       __u16           lr_active;
+       __u16           lr_param;
+       __u16           lr_async_windows;
+       __u32           lr_padding_1;
+       /* lr_fid is used on server-side only, and can be
+        * reused as others by client in the future. */
+       struct lu_fid   lr_fid;
+       __u64           lr_padding_2;
+       __u64           lr_padding_3;
+};
+
+void lustre_swab_lfsck_request(struct lfsck_request *lr);
+
+struct lfsck_reply {
+       __u32           lr_status;
+       __u32           lr_padding_1;
+       __u64           lr_padding_2;
+};
+
+void lustre_swab_lfsck_reply(struct lfsck_reply *lr);
+
 static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd,
                                        struct obdo *wobdo,
                                        const struct obdo *lobdo)
 static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd,
                                        struct obdo *wobdo,
                                        const struct obdo *lobdo)
index aca5eb3..4d901dc 100644 (file)
@@ -27,7 +27,7 @@
  *
  * Lustre LFSCK userspace interfaces.
  *
  *
  * Lustre LFSCK userspace interfaces.
  *
- * Author: Fan Yong <yong.fan@whamcloud.com>
+ * Author: Fan, Yong <fan.yong@intel.com>
  */
 
 #ifndef _LUSTRE_LFSCK_USER_H
  */
 
 #ifndef _LUSTRE_LFSCK_USER_H
@@ -64,11 +64,14 @@ enum lfsck_type {
 
 #define LFSCK_SPEED_NO_LIMIT   0
 #define LFSCK_SPEED_LIMIT_DEF  LFSCK_SPEED_NO_LIMIT
 
 #define LFSCK_SPEED_NO_LIMIT   0
 #define LFSCK_SPEED_LIMIT_DEF  LFSCK_SPEED_NO_LIMIT
+#define LFSCK_ASYNC_WIN_DEFAULT 1024
+#define LFSCK_ASYNC_WIN_MAX    ((__u16)(~0))
 
 enum lfsck_start_valid {
        LSV_SPEED_LIMIT         = 0x00000001,
        LSV_ERROR_HANDLE        = 0x00000002,
        LSV_DRYRUN              = 0x00000004,
 
 enum lfsck_start_valid {
        LSV_SPEED_LIMIT         = 0x00000001,
        LSV_ERROR_HANDLE        = 0x00000002,
        LSV_DRYRUN              = 0x00000004,
+       LSV_ASYNC_WINDOWS       = 0x00000008,
 };
 
 /* Arguments for starting lfsck. */
 };
 
 /* Arguments for starting lfsck. */
@@ -88,8 +91,15 @@ struct lfsck_start {
        /* Flags for the LFSCK, see 'enum lfsck_param_flags'. */
        __u16   ls_flags;
 
        /* Flags for the LFSCK, see 'enum lfsck_param_flags'. */
        __u16   ls_flags;
 
-       /* For 64-bits aligned. */
-       __u16   ls_padding;
+       /* The windows size for async requests pipeline. */
+       __u16   ls_async_windows;
+};
+
+struct lfsck_stop {
+       __u32   ls_status;
+       __u16   ls_flags;
+       __u16   ls_padding_1; /* For 64-bits aligned. */
+       __u64   ls_padding_2;
 };
 
 #endif /* _LUSTRE_LFSCK_USER_H */
 };
 
 #endif /* _LUSTRE_LFSCK_USER_H */
index 1be54ec..ccf9fb5 100644 (file)
@@ -57,6 +57,7 @@
  *     |                   |:          |:         |:         |:        |:
  *     v                   v:          v:         v:         v:        v:
  * LS_SCANNING_PHASE2  LS_FAILED  LS_STOPPED  LS_PAUSED LS_CRASHED LS_PARTIAL
  *     |                   |:          |:         |:         |:        |:
  *     v                   v:          v:         v:         v:        v:
  * LS_SCANNING_PHASE2  LS_FAILED  LS_STOPPED  LS_PAUSED LS_CRASHED LS_PARTIAL
+ *                       (CO_)       (CO_)      (CO_)
  *     |       ^           ^:          ^:         ^:         ^:        ^:
  *     |       :           |:          |:         |:         |:        |:
  *     | (lfsck:restart)   |:          |:         |:         |:        |:
  *     |       ^           ^:          ^:         ^:         ^:        ^:
  *     |       :           |:          |:         |:         |:        |:
  *     | (lfsck:restart)   |:          |:         |:         |:        |:
@@ -97,6 +98,15 @@ enum lfsck_status {
        /* Some OST/MDT failed during the LFSCK, or not join the LFSCK. */
        LS_PARTIAL              = 8,
 
        /* Some OST/MDT failed during the LFSCK, or not join the LFSCK. */
        LS_PARTIAL              = 8,
 
+       /* The LFSCK is failed because its controller is failed. */
+       LS_CO_FAILED            = 9,
+
+       /* The LFSCK is stopped because its controller is stopped. */
+       LS_CO_STOPPED           = 10,
+
+       /* The LFSCK is paused because its controller is paused. */
+       LS_CO_PAUSED            = 11,
+
        LS_MAX
 };
 
        LS_MAX
 };
 
@@ -108,6 +118,10 @@ struct lfsck_start_param {
 enum lfsck_events {
        LE_LASTID_REBUILDING    = 1,
        LE_LASTID_REBUILT       = 2,
 enum lfsck_events {
        LE_LASTID_REBUILDING    = 1,
        LE_LASTID_REBUILT       = 2,
+       LE_PHASE1_DONE          = 3,
+       LE_PHASE2_DONE          = 4,
+       LE_START                = 5,
+       LE_STOP                 = 6,
 };
 
 typedef int (*lfsck_out_notify)(const struct lu_env *env, void *data,
 };
 
 typedef int (*lfsck_out_notify)(const struct lu_env *env, void *data,
@@ -131,6 +145,8 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key,
 
 int lfsck_get_speed(struct dt_device *key, void *buf, int len);
 int lfsck_set_speed(struct dt_device *key, int val);
 
 int lfsck_get_speed(struct dt_device *key, void *buf, int len);
 int lfsck_set_speed(struct dt_device *key, int val);
+int lfsck_get_windows(struct dt_device *key, void *buf, int len);
+int lfsck_set_windows(struct dt_device *key, int val);
 
 int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type);
 
 
 int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type);
 
index 8275bf1..82ee512 100644 (file)
@@ -48,6 +48,7 @@ static void lfsck_bookmark_le_to_cpu(struct lfsck_bookmark *des,
        des->lb_version = le16_to_cpu(src->lb_version);
        des->lb_param = le16_to_cpu(src->lb_param);
        des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit);
        des->lb_version = le16_to_cpu(src->lb_version);
        des->lb_param = le16_to_cpu(src->lb_param);
        des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit);
+       des->lb_async_windows = le16_to_cpu(src->lb_async_windows);
 }
 
 static void lfsck_bookmark_cpu_to_le(struct lfsck_bookmark *des,
 }
 
 static void lfsck_bookmark_cpu_to_le(struct lfsck_bookmark *des,
@@ -57,6 +58,7 @@ static void lfsck_bookmark_cpu_to_le(struct lfsck_bookmark *des,
        des->lb_version = cpu_to_le16(src->lb_version);
        des->lb_param = cpu_to_le16(src->lb_param);
        des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit);
        des->lb_version = cpu_to_le16(src->lb_version);
        des->lb_param = cpu_to_le16(src->lb_param);
        des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit);
+       des->lb_async_windows = cpu_to_le16(src->lb_async_windows);
 }
 
 static int lfsck_bookmark_load(const struct lu_env *env,
 }
 
 static int lfsck_bookmark_load(const struct lu_env *env,
@@ -148,6 +150,7 @@ static int lfsck_bookmark_init(const struct lu_env *env,
        memset(mb, 0, sizeof(*mb));
        mb->lb_magic = LFSCK_BOOKMARK_MAGIC;
        mb->lb_version = LFSCK_VERSION_V2;
        memset(mb, 0, sizeof(*mb));
        mb->lb_magic = LFSCK_BOOKMARK_MAGIC;
        mb->lb_version = LFSCK_VERSION_V2;
+       mb->lb_async_windows = LFSCK_ASYNC_WIN_DEFAULT;
        mutex_lock(&lfsck->li_mutex);
        rc = lfsck_bookmark_store(env, lfsck);
        mutex_unlock(&lfsck->li_mutex);
        mutex_lock(&lfsck->li_mutex);
        rc = lfsck_bookmark_store(env, lfsck);
        mutex_unlock(&lfsck->li_mutex);
index 7f8e3fc..8daeb1b 100644 (file)
@@ -428,6 +428,8 @@ fini_oit:
                        rc = lfsck_double_scan(env, lfsck);
                else
                        rc = 0;
                        rc = lfsck_double_scan(env, lfsck);
                else
                        rc = 0;
+       } else {
+               lfsck_quit(env, lfsck);
        }
 
        /* XXX: Purge the pinned objects in the future. */
        }
 
        /* XXX: Purge the pinned objects in the future. */
index e0366b6..038c350 100644 (file)
@@ -95,8 +95,11 @@ struct lfsck_bookmark {
        /* How many items can be scanned at most per second. */
        __u32   lb_speed_limit;
 
        /* How many items can be scanned at most per second. */
        __u32   lb_speed_limit;
 
+       /* The windows size for async requests pipeline. */
+       __u16   lb_async_windows;
+
        /* For 64-bits aligned. */
        /* For 64-bits aligned. */
-       __u32   lb_padding;
+       __u16   lb_padding;
 
        /* For future using. */
        __u64   lb_reserved[6];
 
        /* For future using. */
        __u64   lb_reserved[6];
@@ -290,6 +293,9 @@ struct lfsck_operations {
 
        void (*lfsck_data_release)(const struct lu_env *env,
                                   struct lfsck_component *com);
 
        void (*lfsck_data_release)(const struct lu_env *env,
                                   struct lfsck_component *com);
+
+       void (*lfsck_quit)(const struct lu_env *env,
+                          struct lfsck_component *com);
 };
 
 #define TGT_PTRS               256     /* number of pointers at 1st level */
 };
 
 #define TGT_PTRS               256     /* number of pointers at 1st level */
@@ -394,6 +400,7 @@ struct lfsck_instance {
        cfs_list_t                li_list_idle;
 
        atomic_t                  li_ref;
        cfs_list_t                li_list_idle;
 
        atomic_t                  li_ref;
+       atomic_t                  li_double_scan_count;
        struct ptlrpc_thread      li_thread;
 
        /* The time for last checkpoint, jiffies */
        struct ptlrpc_thread      li_thread;
 
        /* The time for last checkpoint, jiffies */
@@ -490,6 +497,7 @@ struct lfsck_thread_info {
         * then lti_ent::lde_name will be lti_key. */
        struct lu_dirent        lti_ent;
        char                    lti_key[NAME_MAX + 16];
         * then lti_ent::lde_name will be lti_key. */
        struct lu_dirent        lti_ent;
        char                    lti_key[NAME_MAX + 16];
+       struct lfsck_request    lti_lr;
 };
 
 /* lfsck_lib.c */
 };
 
 /* lfsck_lib.c */
@@ -523,6 +531,7 @@ int lfsck_exec_dir(const struct lu_env *env, struct lfsck_instance *lfsck,
 int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck,
               int result);
 int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck);
 int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck,
               int result);
 int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck);
+void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck);
 
 /* lfsck_engine.c */
 int lfsck_master_engine(void *args);
 
 /* lfsck_engine.c */
 int lfsck_master_engine(void *args);
index f04950a..39cffe3 100644 (file)
@@ -44,6 +44,7 @@
 #include <lustre_net.h>
 #include <lustre/lustre_user.h>
 #include <md_object.h>
 #include <lustre_net.h>
 #include <lustre/lustre_user.h>
 #include <md_object.h>
+#include <obd_class.h>
 
 #include "lfsck_internal.h"
 
 
 #include "lfsck_internal.h"
 
@@ -65,6 +66,64 @@ struct lfsck_layout_slave_data {
        struct list_head         llsd_seq_list;
 };
 
        struct list_head         llsd_seq_list;
 };
 
+struct lfsck_layout_object {
+       struct dt_object        *llo_obj;
+       struct lu_attr           llo_attr;
+       atomic_t                 llo_ref;
+       __u16                    llo_gen;
+};
+
+struct lfsck_layout_req {
+       struct list_head                 llr_list;
+       struct lfsck_layout_object      *llr_parent;
+       struct dt_object                *llr_child;
+       __u32                            llr_ost_idx;
+       __u32                            llr_lov_idx; /* offset in LOV EA */
+};
+
+struct lfsck_layout_master_data {
+       struct list_head        llmd_req_list;
+       spinlock_t              llmd_lock;
+       struct ptlrpc_thread    llmd_thread;
+       atomic_t                llmd_rpcs_in_flight;
+       int                     llmd_prefetched;
+       int                     llmd_assistant_status;
+       int                     llmd_post_result;
+       unsigned int            llmd_to_post:1,
+                               llmd_to_double_scan:1,
+                               llmd_in_double_scan:1,
+                               llmd_exit:1;
+};
+
+static inline void lfsck_layout_object_put(const struct lu_env *env,
+                                          struct lfsck_layout_object *llo)
+{
+       if (atomic_dec_and_test(&llo->llo_ref)) {
+               lfsck_object_put(env, llo->llo_obj);
+               OBD_FREE_PTR(llo);
+       }
+}
+
+static inline void lfsck_layout_req_fini(const struct lu_env *env,
+                                        struct lfsck_layout_req *llr)
+{
+       lu_object_put(env, &llr->llr_child->do_lu);
+       lfsck_layout_object_put(env, llr->llr_parent);
+       OBD_FREE_PTR(llr);
+}
+
+static inline bool lfsck_layout_req_empty(struct lfsck_layout_master_data *llmd)
+{
+       bool empty = false;
+
+       spin_lock(&llmd->llmd_lock);
+       if (list_empty(&llmd->llmd_req_list))
+               empty = true;
+       spin_unlock(&llmd->llmd_lock);
+
+       return empty;
+}
+
 static void lfsck_layout_le_to_cpu(struct lfsck_layout *des,
                                   const struct lfsck_layout *src)
 {
 static void lfsck_layout_le_to_cpu(struct lfsck_layout *des,
                                   const struct lfsck_layout *src)
 {
@@ -557,8 +616,306 @@ out:
        return rc;
 }
 
        return rc;
 }
 
+static int lfsck_layout_master_query_others(const struct lu_env *env,
+                                           struct lfsck_component *com)
+{
+       /* XXX: to be implemented. */
+
+       return 0;
+}
+
+static inline bool
+lfsck_layout_master_to_orphan(struct lfsck_layout_master_data *llmd)
+{
+       /* XXX: to be implemented. */
+
+       return 1;
+}
+
+static int lfsck_layout_master_notify_others(const struct lu_env *env,
+                                            struct lfsck_component *com,
+                                            struct lfsck_request *lr)
+{
+       /* XXX: to be implemented. */
+
+       return 0;
+}
+
+static int lfsck_layout_double_scan_result(const struct lu_env *env,
+                                          struct lfsck_component *com,
+                                          int rc)
+{
+       struct lfsck_instance   *lfsck = com->lc_lfsck;
+       struct lfsck_layout     *lo    = com->lc_file_ram;
+       struct lfsck_bookmark   *bk    = &lfsck->li_bookmark_ram;
+
+       down_write(&com->lc_sem);
+
+       lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->li_time_last_checkpoint);
+       lo->ll_time_last_checkpoint = cfs_time_current_sec();
+       lo->ll_objs_checked_phase2 += com->lc_new_checked;
+
+       if (rc > 0) {
+               com->lc_journal = 0;
+               if (lo->ll_flags & LF_INCOMPLETE)
+                       lo->ll_status = LS_PARTIAL;
+               else
+                       lo->ll_status = LS_COMPLETED;
+               if (!(bk->lb_param & LPF_DRYRUN))
+                       lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT);
+               lo->ll_time_last_complete = lo->ll_time_last_checkpoint;
+               lo->ll_success_count++;
+       } else if (rc == 0) {
+               if (lfsck->li_paused)
+                       lo->ll_status = LS_PAUSED;
+               else
+                       lo->ll_status = LS_STOPPED;
+       } else {
+               lo->ll_status = LS_FAILED;
+       }
+
+       if (lo->ll_status != LS_PAUSED) {
+               spin_lock(&lfsck->li_lock);
+               list_del_init(&com->lc_link);
+               list_add_tail(&com->lc_link, &lfsck->li_list_idle);
+               spin_unlock(&lfsck->li_lock);
+       }
+
+       rc = lfsck_layout_store(env, com);
+
+       up_write(&com->lc_sem);
+
+       return rc;
+}
+
+static int lfsck_layout_assistant(void *args)
+{
+       struct lfsck_thread_args        *lta     = args;
+       struct lu_env                   *env     = &lta->lta_env;
+       struct lfsck_component          *com     = lta->lta_com;
+       struct lfsck_instance           *lfsck   = lta->lta_lfsck;
+       struct lfsck_bookmark           *bk      = &lfsck->li_bookmark_ram;
+       struct lfsck_position           *pos     = &com->lc_pos_start;
+       struct lfsck_thread_info        *info    = lfsck_env_info(env);
+       struct lfsck_request            *lr      = &info->lti_lr;
+       struct lfsck_layout_master_data *llmd    = com->lc_data;
+       struct ptlrpc_thread            *mthread = &lfsck->li_thread;
+       struct ptlrpc_thread            *athread = &llmd->llmd_thread;
+       struct lfsck_layout_req         *llr;
+       struct l_wait_info               lwi     = { 0 };
+       int                              rc      = 0;
+       int                              rc1     = 0;
+       ENTRY;
+
+       memset(lr, 0, sizeof(*lr));
+       lr->lr_event = LE_START;
+       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
+       lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
+                      LSV_ASYNC_WINDOWS;
+       lr->lr_speed = bk->lb_speed_limit;
+       lr->lr_version = bk->lb_version;
+       lr->lr_active = LT_LAYOUT;
+       lr->lr_param = bk->lb_param;
+       lr->lr_async_windows = bk->lb_async_windows;
+       if (pos->lp_oit_cookie <= 1)
+               lr->lr_param |= LPF_RESET;
+
+       rc = lfsck_layout_master_notify_others(env, com, lr);
+       if (rc != 0) {
+               CERROR("%s: fail to notify others for layout start: rc = %d\n",
+                      lfsck_lfsck2name(lfsck), rc);
+               GOTO(fini, rc);
+       }
+
+       spin_lock(&llmd->llmd_lock);
+       thread_set_flags(athread, SVC_RUNNING);
+       spin_unlock(&llmd->llmd_lock);
+       wake_up_all(&mthread->t_ctl_waitq);
+
+       while (1) {
+               while (!list_empty(&llmd->llmd_req_list)) {
+                       bool wakeup = false;
+
+                       l_wait_event(athread->t_ctl_waitq,
+                                    bk->lb_async_windows == 0 ||
+                                    atomic_read(&llmd->llmd_rpcs_in_flight) <
+                                               bk->lb_async_windows ||
+                                    llmd->llmd_exit,
+                                    &lwi);
+
+                       if (unlikely(llmd->llmd_exit))
+                               GOTO(cleanup1, rc = llmd->llmd_post_result);
+
+                       /* XXX: To be extended in other patch.
+                        *
+                        * Compare the OST side attribute with local attribute,
+                        * and fix it if found inconsistency. */
+
+                       spin_lock(&llmd->llmd_lock);
+                       llr = list_entry(llmd->llmd_req_list.next,
+                                        struct lfsck_layout_req,
+                                        llr_list);
+                       list_del_init(&llr->llr_list);
+                       if (bk->lb_async_windows != 0 &&
+                           llmd->llmd_prefetched >= bk->lb_async_windows)
+                               wakeup = true;
+
+                       llmd->llmd_prefetched--;
+                       spin_unlock(&llmd->llmd_lock);
+                       if (wakeup)
+                               wake_up_all(&mthread->t_ctl_waitq);
+
+                       lfsck_layout_req_fini(env, llr);
+               }
+
+               /* Wakeup the master engine if it is waiting in checkpoint. */
+               if (atomic_read(&llmd->llmd_rpcs_in_flight) == 0)
+                       wake_up_all(&mthread->t_ctl_waitq);
+
+               l_wait_event(athread->t_ctl_waitq,
+                            !lfsck_layout_req_empty(llmd) ||
+                            llmd->llmd_exit ||
+                            llmd->llmd_to_post ||
+                            llmd->llmd_to_double_scan,
+                            &lwi);
+
+               if (unlikely(llmd->llmd_exit))
+                       GOTO(cleanup1, rc = llmd->llmd_post_result);
+
+               if (!list_empty(&llmd->llmd_req_list))
+                       continue;
+
+               if (llmd->llmd_to_post) {
+                       llmd->llmd_to_post = 0;
+                       LASSERT(llmd->llmd_post_result > 0);
+
+                       memset(lr, 0, sizeof(*lr));
+                       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
+                       lr->lr_active = LT_LAYOUT;
+                       lr->lr_event = LE_PHASE1_DONE;
+                       lr->lr_status = llmd->llmd_post_result;
+                       rc = lfsck_layout_master_notify_others(env, com, lr);
+                       if (rc != 0)
+                               CERROR("%s: failed to notify others "
+                                      "for layout post: rc = %d\n",
+                                      lfsck_lfsck2name(lfsck), rc);
+
+                       /* Wakeup the master engine to go ahead. */
+                       wake_up_all(&mthread->t_ctl_waitq);
+               }
+
+               if (llmd->llmd_to_double_scan) {
+                       llmd->llmd_to_double_scan = 0;
+                       atomic_inc(&lfsck->li_double_scan_count);
+                       llmd->llmd_in_double_scan = 1;
+                       wake_up_all(&mthread->t_ctl_waitq);
+
+                       while (llmd->llmd_in_double_scan) {
+                               rc = lfsck_layout_master_query_others(env, com);
+                               if (lfsck_layout_master_to_orphan(llmd))
+                                       goto orphan;
+
+                               if (rc < 0)
+                                       GOTO(cleanup2, rc);
+
+                               /* Pull LFSCK status on related targets once
+                                * per 30 seconds if we are not notified. */
+                               lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(30),
+                                                          cfs_time_seconds(1),
+                                                          NULL, NULL);
+                               rc = l_wait_event(athread->t_ctl_waitq,
+                                       lfsck_layout_master_to_orphan(llmd) ||
+                                       llmd->llmd_exit ||
+                                       !thread_is_running(mthread),
+                                       &lwi);
+
+                               if (unlikely(llmd->llmd_exit ||
+                                            !thread_is_running(mthread)))
+                                       GOTO(cleanup2, rc = 0);
+
+                               if (rc == -ETIMEDOUT)
+                                       continue;
+
+                               if (rc < 0)
+                                       GOTO(cleanup2, rc);
+
+orphan:
+                               /* XXX: real double scan for ost orphans. */
+
+                               GOTO(cleanup2, rc = 1);
+                       }
+               }
+       }
+
+cleanup1:
+       /* Cleanup the unfinished requests. */
+       spin_lock(&llmd->llmd_lock);
+       while (!list_empty(&llmd->llmd_req_list)) {
+               llr = list_entry(llmd->llmd_req_list.next,
+                                struct lfsck_layout_req,
+                                llr_list);
+               list_del_init(&llr->llr_list);
+               llmd->llmd_prefetched--;
+               spin_unlock(&llmd->llmd_lock);
+               lfsck_layout_req_fini(env, llr);
+               spin_lock(&llmd->llmd_lock);
+       }
+       spin_unlock(&llmd->llmd_lock);
+
+       LASSERTF(llmd->llmd_prefetched == 0, "unmatched prefeteched objs %d\n",
+                llmd->llmd_prefetched);
+
+       l_wait_event(athread->t_ctl_waitq,
+                    atomic_read(&llmd->llmd_rpcs_in_flight) == 0,
+                    &lwi);
+
+cleanup2:
+       memset(lr, 0, sizeof(*lr));
+       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
+       lr->lr_active = LT_LAYOUT;
+       if (rc > 0) {
+               lr->lr_event = LE_PHASE2_DONE;
+               lr->lr_status = rc;
+       } else if (rc == 0) {
+               lr->lr_event = LE_STOP;
+               if (lfsck->li_paused)
+                       lr->lr_status = LS_CO_PAUSED;
+               else
+                       lr->lr_status = LS_CO_STOPPED;
+       } else {
+               lr->lr_event = LE_STOP;
+               lr->lr_status = LS_CO_FAILED;
+       }
+
+       rc1 = lfsck_layout_master_notify_others(env, com, lr);
+       if (rc1 != 0) {
+               CERROR("%s: failed to notify others for layout quit: rc = %d\n",
+                      lfsck_lfsck2name(lfsck), rc1);
+               rc = rc1;
+       }
+
+       /* Under force exit case, some requests may be just freed without
+        * verification, those objects should be re-handled when next run.
+        * So not update the on-disk tracing file under such case. */
+       if (!llmd->llmd_exit)
+               rc1 = lfsck_layout_double_scan_result(env, com, rc);
+
+fini:
+       if (llmd->llmd_in_double_scan)
+               atomic_dec(&lfsck->li_double_scan_count);
+
+       spin_lock(&llmd->llmd_lock);
+       llmd->llmd_assistant_status = (rc1 != 0 ? rc1 : rc);
+       thread_set_flags(athread, SVC_STOPPED);
+       wake_up_all(&mthread->t_ctl_waitq);
+       spin_unlock(&llmd->llmd_lock);
+       lfsck_thread_args_fini(lta);
+
+       return rc;
+}
+
 /* layout APIs */
 /* layout APIs */
-/* XXX: Some to be implemented in other patch(es). */
 
 static int lfsck_layout_reset(const struct lu_env *env,
                              struct lfsck_component *com, bool init)
 
 static int lfsck_layout_reset(const struct lu_env *env,
                              struct lfsck_component *com, bool init)
@@ -606,8 +963,51 @@ static void lfsck_layout_fail(const struct lu_env *env,
        up_write(&com->lc_sem);
 }
 
        up_write(&com->lc_sem);
 }
 
-static int lfsck_layout_checkpoint(const struct lu_env *env,
-                                  struct lfsck_component *com, bool init)
+static int lfsck_layout_master_checkpoint(const struct lu_env *env,
+                                         struct lfsck_component *com, bool init)
+{
+       struct lfsck_instance           *lfsck   = com->lc_lfsck;
+       struct lfsck_layout             *lo      = com->lc_file_ram;
+       struct lfsck_layout_master_data *llmd    = com->lc_data;
+       struct ptlrpc_thread            *mthread = &lfsck->li_thread;
+       struct ptlrpc_thread            *athread = &llmd->llmd_thread;
+       struct l_wait_info               lwi     = { 0 };
+       int                              rc;
+
+       if (com->lc_new_checked == 0 && !init)
+               return 0;
+
+       l_wait_event(mthread->t_ctl_waitq,
+                    (list_empty(&llmd->llmd_req_list) &&
+                     atomic_read(&llmd->llmd_rpcs_in_flight) == 0) ||
+                    !thread_is_running(mthread) ||
+                    thread_is_stopped(athread),
+                    &lwi);
+
+       if (!thread_is_running(mthread) || thread_is_stopped(athread))
+               return 0;
+
+       down_write(&com->lc_sem);
+       if (init) {
+               lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
+       } else {
+               lo->ll_pos_last_checkpoint =
+                                       lfsck->li_pos_current.lp_oit_cookie;
+               lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->li_time_last_checkpoint);
+               lo->ll_time_last_checkpoint = cfs_time_current_sec();
+               lo->ll_objs_checked_phase1 += com->lc_new_checked;
+               com->lc_new_checked = 0;
+       }
+
+       rc = lfsck_layout_store(env, com);
+       up_write(&com->lc_sem);
+
+       return rc;
+}
+
+static int lfsck_layout_slave_checkpoint(const struct lu_env *env,
+                                        struct lfsck_component *com, bool init)
 {
        struct lfsck_instance   *lfsck = com->lc_lfsck;
        struct lfsck_layout     *lo    = com->lc_file_ram;
 {
        struct lfsck_instance   *lfsck = com->lc_lfsck;
        struct lfsck_layout     *lo    = com->lc_file_ram;
@@ -637,12 +1037,6 @@ static int lfsck_layout_checkpoint(const struct lu_env *env,
        return rc;
 }
 
        return rc;
 }
 
-static int lfsck_layout_master_prep(const struct lu_env *env,
-                                   struct lfsck_component *com)
-{
-       return 0;
-}
-
 static int lfsck_layout_slave_prep(const struct lu_env *env,
                                   struct lfsck_component *com)
 {
 static int lfsck_layout_slave_prep(const struct lu_env *env,
                                   struct lfsck_component *com)
 {
@@ -707,10 +1101,70 @@ static int lfsck_layout_slave_prep(const struct lu_env *env,
        return 0;
 }
 
        return 0;
 }
 
+static int lfsck_layout_master_prep(const struct lu_env *env,
+                                   struct lfsck_component *com)
+{
+       struct lfsck_instance           *lfsck   = com->lc_lfsck;
+       struct lfsck_layout_master_data *llmd    = com->lc_data;
+       struct ptlrpc_thread            *mthread = &lfsck->li_thread;
+       struct ptlrpc_thread            *athread = &llmd->llmd_thread;
+       struct lfsck_thread_args        *lta;
+       long                             rc;
+       ENTRY;
+
+       rc = lfsck_layout_slave_prep(env, com);
+       if (rc != 0)
+               RETURN(rc);
+
+       llmd->llmd_assistant_status = 0;
+       llmd->llmd_post_result = 0;
+       llmd->llmd_to_post = 0;
+       llmd->llmd_to_double_scan = 0;
+       llmd->llmd_in_double_scan = 0;
+       llmd->llmd_exit = 0;
+       thread_set_flags(athread, 0);
+
+       lta = lfsck_thread_args_init(lfsck, com);
+       if (IS_ERR(lta))
+               RETURN(PTR_ERR(lta));
+
+       rc = PTR_ERR(kthread_run(lfsck_layout_assistant, lta, "lfsck_layout"));
+       if (IS_ERR_VALUE(rc)) {
+               CERROR("%s: Cannot start LFSCK layout assistant thread: "
+                      "rc = %ld\n", lfsck_lfsck2name(lfsck), rc);
+               lfsck_thread_args_fini(lta);
+       } else {
+               struct l_wait_info lwi = { 0 };
+
+               l_wait_event(mthread->t_ctl_waitq,
+                            thread_is_running(athread) ||
+                            thread_is_stopped(athread),
+                            &lwi);
+               if (unlikely(!thread_is_running(athread)))
+                       rc = llmd->llmd_assistant_status;
+               else
+                       rc = 0;
+       }
+
+       RETURN(rc);
+}
+
 static int lfsck_layout_master_exec_oit(const struct lu_env *env,
                                        struct lfsck_component *com,
                                        struct dt_object *obj)
 {
 static int lfsck_layout_master_exec_oit(const struct lu_env *env,
                                        struct lfsck_component *com,
                                        struct dt_object *obj)
 {
+       /* XXX: To be implemented in other patches.
+        *
+        * For the given object, read its layout EA locally. For each stripe,
+        * pre-fetch the OST-object's attribute and generate an structure
+        * lfsck_layout_req on the list ::llmd_req_list.
+        *
+        * For each request on the ::llmd_req_list, the lfsck_layout_assistant
+        * thread will compare the OST side attribute with local attribute,
+        * if inconsistent, then repair it.
+        *
+        * All above processing is async mode with pipeline. */
+
        return 0;
 }
 
        return 0;
 }
 
@@ -810,7 +1264,73 @@ static int lfsck_layout_master_post(const struct lu_env *env,
                                    struct lfsck_component *com,
                                    int result, bool init)
 {
                                    struct lfsck_component *com,
                                    int result, bool init)
 {
-       return 0;
+       struct lfsck_instance           *lfsck   = com->lc_lfsck;
+       struct lfsck_layout             *lo      = com->lc_file_ram;
+       struct lfsck_layout_master_data *llmd    = com->lc_data;
+       struct ptlrpc_thread            *mthread = &lfsck->li_thread;
+       struct ptlrpc_thread            *athread = &llmd->llmd_thread;
+       struct l_wait_info               lwi     = { 0 };
+       int                              rc;
+       ENTRY;
+
+
+       llmd->llmd_post_result = result;
+       llmd->llmd_to_post = 1;
+       if (llmd->llmd_post_result <= 0)
+               llmd->llmd_exit = 1;
+
+       wake_up_all(&athread->t_ctl_waitq);
+       l_wait_event(mthread->t_ctl_waitq,
+                    (result > 0 && list_empty(&llmd->llmd_req_list) &&
+                     atomic_read(&llmd->llmd_rpcs_in_flight) == 0) ||
+                    thread_is_stopped(athread),
+                    &lwi);
+
+       if (llmd->llmd_assistant_status < 0)
+               result = llmd->llmd_assistant_status;
+
+       down_write(&com->lc_sem);
+       spin_lock(&lfsck->li_lock);
+       /* When LFSCK failed, there may be some prefetched objects those are
+        * not been processed yet, we do not know the exactly position, then
+        * just restart from last check-point next time. */
+       if (!init && !llmd->llmd_exit)
+               lo->ll_pos_last_checkpoint =
+                                       lfsck->li_pos_current.lp_oit_cookie;
+
+       if (result > 0) {
+               lo->ll_status = LS_SCANNING_PHASE2;
+               lo->ll_flags |= LF_SCANNED_ONCE;
+               lo->ll_flags &= ~LF_UPGRADE;
+               list_del_init(&com->lc_link);
+               list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
+       } else if (result == 0) {
+               if (lfsck->li_paused) {
+                       lo->ll_status = LS_PAUSED;
+               } else {
+                       lo->ll_status = LS_STOPPED;
+                       list_del_init(&com->lc_link);
+                       list_add_tail(&com->lc_link, &lfsck->li_list_idle);
+               }
+       } else {
+               lo->ll_status = LS_FAILED;
+               list_del_init(&com->lc_link);
+               list_add_tail(&com->lc_link, &lfsck->li_list_idle);
+       }
+       spin_unlock(&lfsck->li_lock);
+
+       if (!init) {
+               lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
+                               HALF_SEC - lfsck->li_time_last_checkpoint);
+               lo->ll_time_last_checkpoint = cfs_time_current_sec();
+               lo->ll_objs_checked_phase1 += com->lc_new_checked;
+               com->lc_new_checked = 0;
+       }
+
+       rc = lfsck_layout_store(env, com);
+       up_write(&com->lc_sem);
+
+       RETURN(rc);
 }
 
 static int lfsck_layout_slave_post(const struct lu_env *env,
 }
 
 static int lfsck_layout_slave_post(const struct lu_env *env,
@@ -1062,6 +1582,24 @@ out:
 static int lfsck_layout_master_double_scan(const struct lu_env *env,
                                           struct lfsck_component *com)
 {
 static int lfsck_layout_master_double_scan(const struct lu_env *env,
                                           struct lfsck_component *com)
 {
+       struct lfsck_layout_master_data *llmd    = com->lc_data;
+       struct ptlrpc_thread            *mthread = &com->lc_lfsck->li_thread;
+       struct ptlrpc_thread            *athread = &llmd->llmd_thread;
+       struct lfsck_layout             *lo      = com->lc_file_ram;
+       struct l_wait_info               lwi     = { 0 };
+
+       if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
+               return 0;
+
+       llmd->llmd_to_double_scan = 1;
+       wake_up_all(&athread->t_ctl_waitq);
+       l_wait_event(mthread->t_ctl_waitq,
+                    llmd->llmd_in_double_scan ||
+                    thread_is_stopped(athread),
+                    &lwi);
+       if (llmd->llmd_assistant_status < 0)
+               return llmd->llmd_assistant_status;
+
        return 0;
 }
 
        return 0;
 }
 
@@ -1069,16 +1607,13 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env,
                                          struct lfsck_component *com)
 {
        struct lfsck_instance   *lfsck = com->lc_lfsck;
                                          struct lfsck_component *com)
 {
        struct lfsck_instance   *lfsck = com->lc_lfsck;
-       struct lfsck_bookmark   *bk    = &lfsck->li_bookmark_ram;
        struct lfsck_layout     *lo    = com->lc_file_ram;
        int                      rc    = 1;
 
        struct lfsck_layout     *lo    = com->lc_file_ram;
        int                      rc    = 1;
 
-       down_write(&com->lc_sem);
+       if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
+               return 0;
 
 
-       lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
-                               HALF_SEC - lfsck->li_time_last_checkpoint);
-       lo->ll_time_last_checkpoint = cfs_time_current_sec();
-       lo->ll_objs_checked_phase2 += com->lc_new_checked;
+       atomic_inc(&lfsck->li_double_scan_count);
 
        com->lc_new_checked = 0;
        com->lc_new_scanned = 0;
 
        com->lc_new_checked = 0;
        com->lc_new_scanned = 0;
@@ -1086,35 +1621,10 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env,
        com->lc_time_next_checkpoint = com->lc_time_last_checkpoint +
                                cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
 
        com->lc_time_next_checkpoint = com->lc_time_last_checkpoint +
                                cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
 
-       if (rc > 0) {
-               com->lc_journal = 0;
-               if (lo->ll_flags & LF_INCOMPLETE)
-                       lo->ll_status = LS_PARTIAL;
-               else
-                       lo->ll_status = LS_COMPLETED;
-               if (!(bk->lb_param & LPF_DRYRUN))
-                       lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT);
-               lo->ll_time_last_complete = lo->ll_time_last_checkpoint;
-               lo->ll_success_count++;
-       } else if (rc == 0) {
-               if (lfsck->li_paused)
-                       lo->ll_status = LS_PAUSED;
-               else
-                       lo->ll_status = LS_STOPPED;
-       } else {
-               lo->ll_status = LS_FAILED;
-       }
-
-       if (lo->ll_status != LS_PAUSED) {
-               spin_lock(&lfsck->li_lock);
-               list_del_init(&com->lc_link);
-               list_add_tail(&com->lc_link, &lfsck->li_list_idle);
-               spin_unlock(&lfsck->li_lock);
-       }
+       rc = lfsck_layout_double_scan_result(env, com, rc);
 
 
-       rc = lfsck_layout_store(env, com);
-
-       up_write(&com->lc_sem);
+       if (atomic_dec_and_test(&lfsck->li_double_scan_count))
+               wake_up_all(&lfsck->li_thread.t_ctl_waitq);
 
        return rc;
 }
 
        return rc;
 }
@@ -1122,6 +1632,16 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env,
 static void lfsck_layout_master_data_release(const struct lu_env *env,
                                             struct lfsck_component *com)
 {
 static void lfsck_layout_master_data_release(const struct lu_env *env,
                                             struct lfsck_component *com)
 {
+       struct lfsck_layout_master_data *llmd = com->lc_data;
+
+       LASSERT(llmd != NULL);
+       LASSERT(thread_is_init(&llmd->llmd_thread) ||
+               thread_is_stopped(&llmd->llmd_thread));
+       LASSERT(list_empty(&llmd->llmd_req_list));
+       LASSERT(atomic_read(&llmd->llmd_rpcs_in_flight) == 0);
+
+       com->lc_data = NULL;
+       OBD_FREE_PTR(llmd);
 }
 
 static void lfsck_layout_slave_data_release(const struct lu_env *env,
 }
 
 static void lfsck_layout_slave_data_release(const struct lu_env *env,
@@ -1145,10 +1665,26 @@ static void lfsck_layout_slave_data_release(const struct lu_env *env,
        OBD_FREE_PTR(llsd);
 }
 
        OBD_FREE_PTR(llsd);
 }
 
+static void lfsck_layout_master_quit(const struct lu_env *env,
+                                    struct lfsck_component *com)
+{
+       struct lfsck_layout_master_data *llmd    = com->lc_data;
+       struct ptlrpc_thread            *mthread = &com->lc_lfsck->li_thread;
+       struct ptlrpc_thread            *athread = &llmd->llmd_thread;
+       struct l_wait_info               lwi     = { 0 };
+
+       llmd->llmd_exit = 1;
+       wake_up_all(&athread->t_ctl_waitq);
+       l_wait_event(mthread->t_ctl_waitq,
+                    thread_is_init(athread) ||
+                    thread_is_stopped(athread),
+                    &lwi);
+}
+
 static struct lfsck_operations lfsck_layout_master_ops = {
        .lfsck_reset            = lfsck_layout_reset,
        .lfsck_fail             = lfsck_layout_fail,
 static struct lfsck_operations lfsck_layout_master_ops = {
        .lfsck_reset            = lfsck_layout_reset,
        .lfsck_fail             = lfsck_layout_fail,
-       .lfsck_checkpoint       = lfsck_layout_checkpoint,
+       .lfsck_checkpoint       = lfsck_layout_master_checkpoint,
        .lfsck_prep             = lfsck_layout_master_prep,
        .lfsck_exec_oit         = lfsck_layout_master_exec_oit,
        .lfsck_exec_dir         = lfsck_layout_exec_dir,
        .lfsck_prep             = lfsck_layout_master_prep,
        .lfsck_exec_oit         = lfsck_layout_master_exec_oit,
        .lfsck_exec_dir         = lfsck_layout_exec_dir,
@@ -1156,12 +1692,13 @@ static struct lfsck_operations lfsck_layout_master_ops = {
        .lfsck_dump             = lfsck_layout_dump,
        .lfsck_double_scan      = lfsck_layout_master_double_scan,
        .lfsck_data_release     = lfsck_layout_master_data_release,
        .lfsck_dump             = lfsck_layout_dump,
        .lfsck_double_scan      = lfsck_layout_master_double_scan,
        .lfsck_data_release     = lfsck_layout_master_data_release,
+       .lfsck_quit             = lfsck_layout_master_quit,
 };
 
 static struct lfsck_operations lfsck_layout_slave_ops = {
        .lfsck_reset            = lfsck_layout_reset,
        .lfsck_fail             = lfsck_layout_fail,
 };
 
 static struct lfsck_operations lfsck_layout_slave_ops = {
        .lfsck_reset            = lfsck_layout_reset,
        .lfsck_fail             = lfsck_layout_fail,
-       .lfsck_checkpoint       = lfsck_layout_checkpoint,
+       .lfsck_checkpoint       = lfsck_layout_slave_checkpoint,
        .lfsck_prep             = lfsck_layout_slave_prep,
        .lfsck_exec_oit         = lfsck_layout_slave_exec_oit,
        .lfsck_exec_dir         = lfsck_layout_exec_dir,
        .lfsck_prep             = lfsck_layout_slave_prep,
        .lfsck_exec_oit         = lfsck_layout_slave_exec_oit,
        .lfsck_exec_dir         = lfsck_layout_exec_dir,
@@ -1191,7 +1728,18 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck)
        com->lc_lfsck = lfsck;
        com->lc_type = LT_LAYOUT;
        if (lfsck->li_master) {
        com->lc_lfsck = lfsck;
        com->lc_type = LT_LAYOUT;
        if (lfsck->li_master) {
+               struct lfsck_layout_master_data *llmd;
+
                com->lc_ops = &lfsck_layout_master_ops;
                com->lc_ops = &lfsck_layout_master_ops;
+               OBD_ALLOC_PTR(llmd);
+               if (llmd == NULL)
+                       GOTO(out, rc = -ENOMEM);
+
+               INIT_LIST_HEAD(&llmd->llmd_req_list);
+               spin_lock_init(&llmd->llmd_lock);
+               init_waitqueue_head(&llmd->llmd_thread.t_ctl_waitq);
+               atomic_set(&llmd->llmd_rpcs_in_flight, 0);
+               com->lc_data = llmd;
        } else {
                struct lfsck_layout_slave_data *llsd;
 
        } else {
                struct lfsck_layout_slave_data *llsd;
 
@@ -1260,6 +1808,9 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck)
                /* fall through */
        case LS_PAUSED:
        case LS_CRASHED:
                /* fall through */
        case LS_PAUSED:
        case LS_CRASHED:
+       case LS_CO_FAILED:
+       case LS_CO_STOPPED:
+       case LS_CO_PAUSED:
                spin_lock(&lfsck->li_lock);
                list_add_tail(&com->lc_link, &lfsck->li_list_scan);
                spin_unlock(&lfsck->li_lock);
                spin_lock(&lfsck->li_lock);
                list_add_tail(&com->lc_link, &lfsck->li_list_scan);
                spin_unlock(&lfsck->li_lock);
index d4811bd..6939cde 100644 (file)
@@ -71,7 +71,10 @@ static const char *lfsck_status_names[] = {
        [LS_STOPPED]            = "stopped",
        [LS_PAUSED]             = "paused",
        [LS_CRASHED]            = "crashed",
        [LS_STOPPED]            = "stopped",
        [LS_PAUSED]             = "paused",
        [LS_CRASHED]            = "crashed",
-       [LS_PARTIAL]            = "partial"
+       [LS_PARTIAL]            = "partial",
+       [LS_CO_FAILED]          = "co-failed",
+       [LS_CO_STOPPED]         = "co-stopped",
+       [LS_CO_PAUSED]          = "co-paused"
 };
 
 const char *lfsck_flags_names[] = {
 };
 
 const char *lfsck_flags_names[] = {
@@ -960,7 +963,9 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
 {
        struct lfsck_component *com;
        struct lfsck_component *next;
 {
        struct lfsck_component *com;
        struct lfsck_component *next;
-       int                     rc;
+       struct l_wait_info      lwi = { 0 };
+       int                     rc  = 0;
+       int                     rc1 = 0;
 
        cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
                                     lc_link) {
 
        cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
                                     lc_link) {
@@ -969,9 +974,32 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
 
                rc = com->lc_ops->lfsck_double_scan(env, com);
                if (rc != 0)
 
                rc = com->lc_ops->lfsck_double_scan(env, com);
                if (rc != 0)
-                       return rc;
+                       rc1 = rc;
+       }
+
+       l_wait_event(lfsck->li_thread.t_ctl_waitq,
+                    atomic_read(&lfsck->li_double_scan_count) == 0,
+                    &lwi);
+
+       return (rc1 != 0 ? rc1 : rc);
+}
+
+void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck)
+{
+       struct lfsck_component *com;
+       struct lfsck_component *next;
+
+       list_for_each_entry_safe(com, next, &lfsck->li_list_scan,
+                                lc_link) {
+               if (com->lc_ops->lfsck_quit != NULL)
+                       com->lc_ops->lfsck_quit(env, com);
+       }
+
+       list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
+                                lc_link) {
+               if (com->lc_ops->lfsck_quit != NULL)
+                       com->lc_ops->lfsck_quit(env, com);
        }
        }
-       return 0;
 }
 
 /* external interfaces */
 }
 
 /* external interfaces */
@@ -1030,6 +1058,70 @@ int lfsck_set_speed(struct dt_device *key, int val)
 }
 EXPORT_SYMBOL(lfsck_set_speed);
 
 }
 EXPORT_SYMBOL(lfsck_set_speed);
 
+int lfsck_get_windows(struct dt_device *key, void *buf, int len)
+{
+       struct lu_env           env;
+       struct lfsck_instance  *lfsck;
+       int                     rc;
+       ENTRY;
+
+       rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
+       if (rc != 0)
+               RETURN(rc);
+
+       lfsck = lfsck_instance_find(key, true, false);
+       if (likely(lfsck != NULL)) {
+               rc = snprintf(buf, len, "%u\n",
+                             lfsck->li_bookmark_ram.lb_async_windows);
+               lfsck_instance_put(&env, lfsck);
+       } else {
+               rc = -ENODEV;
+       }
+
+       lu_env_fini(&env);
+
+       RETURN(rc);
+}
+EXPORT_SYMBOL(lfsck_get_windows);
+
+int lfsck_set_windows(struct dt_device *key, int val)
+{
+       struct lu_env           env;
+       struct lfsck_instance  *lfsck;
+       int                     rc;
+       ENTRY;
+
+       rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
+       if (rc != 0)
+               RETURN(rc);
+
+       lfsck = lfsck_instance_find(key, true, false);
+       if (likely(lfsck != NULL)) {
+               if (val > LFSCK_ASYNC_WIN_MAX) {
+                       CERROR("%s: Too large async windows size, which "
+                              "may cause memory issues. The valid range "
+                              "is [0 - %u]. If you do not want to restrict "
+                              "the windows size for async requests pipeline, "
+                              "just set it as 0.\n",
+                              lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX);
+                       rc = -EINVAL;
+               } else if (lfsck->li_bookmark_ram.lb_async_windows != val) {
+                       mutex_lock(&lfsck->li_mutex);
+                       lfsck->li_bookmark_ram.lb_async_windows = val;
+                       rc = lfsck_bookmark_store(&env, lfsck);
+                       mutex_unlock(&lfsck->li_mutex);
+               }
+               lfsck_instance_put(&env, lfsck);
+       } else {
+               rc = -ENODEV;
+       }
+
+       lu_env_fini(&env);
+
+       RETURN(rc);
+}
+EXPORT_SYMBOL(lfsck_set_windows);
+
 int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type)
 {
        struct lu_env           env;
 int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type)
 {
        struct lu_env           env;
@@ -1134,6 +1226,12 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
                dirty = true;
        }
 
                dirty = true;
        }
 
+       if (start->ls_valid & LSV_ASYNC_WINDOWS &&
+           bk->lb_async_windows != start->ls_async_windows) {
+               bk->lb_async_windows = start->ls_async_windows;
+               dirty = true;
+       }
+
        if (start->ls_valid & LSV_ERROR_HANDLE) {
                valid |= DOIV_ERROR_HANDLE;
                if (start->ls_flags & LPF_FAILOUT)
        if (start->ls_valid & LSV_ERROR_HANDLE) {
                valid |= DOIV_ERROR_HANDLE;
                if (start->ls_flags & LPF_FAILOUT)
@@ -1333,6 +1431,7 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key,
        CFS_INIT_LIST_HEAD(&lfsck->li_list_double_scan);
        CFS_INIT_LIST_HEAD(&lfsck->li_list_idle);
        atomic_set(&lfsck->li_ref, 1);
        CFS_INIT_LIST_HEAD(&lfsck->li_list_double_scan);
        CFS_INIT_LIST_HEAD(&lfsck->li_list_idle);
        atomic_set(&lfsck->li_ref, 1);
+       atomic_set(&lfsck->li_double_scan_count, 0);
        init_waitqueue_head(&lfsck->li_thread.t_ctl_waitq);
        lfsck->li_out_notify = notify;
        lfsck->li_out_notify_data = notify_data;
        init_waitqueue_head(&lfsck->li_thread.t_ctl_waitq);
        lfsck->li_out_notify = notify;
        lfsck->li_out_notify_data = notify_data;
index d1ba149..d030528 100644 (file)
@@ -1347,9 +1347,11 @@ out:
        return ret;
 }
 
        return ret;
 }
 
-static int lfsck_namespace_double_scan(const struct lu_env *env,
-                                      struct lfsck_component *com)
+static int lfsck_namespace_double_scan_main(void *args)
 {
 {
+       struct lfsck_thread_args *lta   = args;
+       const struct lu_env     *env    = &lta->lta_env;
+       struct lfsck_component  *com    = lta->lta_com;
        struct lfsck_instance   *lfsck  = com->lc_lfsck;
        struct ptlrpc_thread    *thread = &lfsck->li_thread;
        struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
        struct lfsck_instance   *lfsck  = com->lc_lfsck;
        struct ptlrpc_thread    *thread = &lfsck->li_thread;
        struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
@@ -1372,7 +1374,7 @@ static int lfsck_namespace_double_scan(const struct lu_env *env,
 
        di = iops->init(env, obj, 0, BYPASS_CAPA);
        if (IS_ERR(di))
 
        di = iops->init(env, obj, 0, BYPASS_CAPA);
        if (IS_ERR(di))
-               RETURN(PTR_ERR(di));
+               GOTO(out, rc = PTR_ERR(di));
 
        fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
        rc = iops->get(env, di, (const struct dt_key *)&fid);
 
        fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
        rc = iops->get(env, di, (const struct dt_key *)&fid);
@@ -1477,6 +1479,8 @@ put:
 
 fini:
        iops->fini(env, di);
 
 fini:
        iops->fini(env, di);
+
+out:
        down_write(&com->lc_sem);
 
        ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
        down_write(&com->lc_sem);
 
        ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
@@ -1511,9 +1515,45 @@ fini:
        rc = lfsck_namespace_store(env, com, false);
 
        up_write(&com->lc_sem);
        rc = lfsck_namespace_store(env, com, false);
 
        up_write(&com->lc_sem);
+       if (atomic_dec_and_test(&lfsck->li_double_scan_count))
+               wake_up_all(&thread->t_ctl_waitq);
+
+       lfsck_thread_args_fini(lta);
+
        return rc;
 }
 
        return rc;
 }
 
+static int lfsck_namespace_double_scan(const struct lu_env *env,
+                                      struct lfsck_component *com)
+{
+       struct lfsck_instance           *lfsck = com->lc_lfsck;
+       struct lfsck_namespace          *ns    = com->lc_file_ram;
+       struct lfsck_thread_args        *lta;
+       long                             rc;
+       ENTRY;
+
+       if (unlikely(ns->ln_status != LS_SCANNING_PHASE2))
+               RETURN(0);
+
+       lta = lfsck_thread_args_init(lfsck, com);
+       if (IS_ERR(lta))
+               RETURN(PTR_ERR(lta));
+
+       atomic_inc(&lfsck->li_double_scan_count);
+       rc = PTR_ERR(kthread_run(lfsck_namespace_double_scan_main, lta,
+                                "lfsck_namespace"));
+       if (IS_ERR_VALUE(rc)) {
+               CERROR("%s: cannot start LFSCK namespace thread: rc = %ld\n",
+                      lfsck_lfsck2name(lfsck), rc);
+               atomic_dec(&lfsck->li_double_scan_count);
+               lfsck_thread_args_fini(lta);
+       } else {
+               rc = 0;
+       }
+
+       RETURN(rc);
+}
+
 static struct lfsck_operations lfsck_namespace_ops = {
        .lfsck_reset            = lfsck_namespace_reset,
        .lfsck_fail             = lfsck_namespace_fail,
 static struct lfsck_operations lfsck_namespace_ops = {
        .lfsck_reset            = lfsck_namespace_reset,
        .lfsck_fail             = lfsck_namespace_fail,
index bd23302..2573e33 100644 (file)
@@ -294,6 +294,35 @@ static int lprocfs_wr_lfsck_speed_limit(struct file *file, const char *buffer,
        return rc != 0 ? rc : count;
 }
 
        return rc != 0 ? rc : count;
 }
 
+static int lprocfs_rd_lfsck_async_windows(char *page, char **start, off_t off,
+                                         int count, int *eof, void *data)
+{
+       struct mdd_device *mdd = data;
+       int                rc;
+
+       LASSERT(mdd != NULL);
+       *eof = 1;
+
+       rc = lfsck_get_windows(mdd->mdd_bottom, page, count);
+
+       return rc != 0 ? rc : count;
+}
+
+static int lprocfs_wr_lfsck_async_windows(struct file *file, const char *buffer,
+                                         unsigned long count, void *data)
+{
+       struct mdd_device *mdd = data;
+       __u32              val;
+       int                rc;
+
+       LASSERT(mdd != NULL);
+       rc = lprocfs_write_helper(buffer, count, &val);
+       if (rc == 0)
+               rc = lfsck_set_windows(mdd->mdd_bottom, val);
+
+       return rc != 0 ? rc : count;
+}
+
 static int lprocfs_rd_lfsck_namespace(char *page, char **start, off_t off,
                                      int count, int *eof, void *data)
 {
 static int lprocfs_rd_lfsck_namespace(char *page, char **start, off_t off,
                                      int count, int *eof, void *data)
 {
@@ -315,6 +344,8 @@ static struct lprocfs_vars lprocfs_mdd_obd_vars[] = {
         { "sync_permission", lprocfs_rd_sync_perm, lprocfs_wr_sync_perm, 0 },
        { "lfsck_speed_limit", lprocfs_rd_lfsck_speed_limit,
                               lprocfs_wr_lfsck_speed_limit, 0 },
         { "sync_permission", lprocfs_rd_sync_perm, lprocfs_wr_sync_perm, 0 },
        { "lfsck_speed_limit", lprocfs_rd_lfsck_speed_limit,
                               lprocfs_wr_lfsck_speed_limit, 0 },
+       { "lfsck_async_windows", lprocfs_rd_lfsck_async_windows,
+                                lprocfs_wr_lfsck_async_windows, 0 },
        { "lfsck_namespace", lprocfs_rd_lfsck_namespace, 0, 0 },
        { 0 }
 };
        { "lfsck_namespace", lprocfs_rd_lfsck_namespace, 0, 0 },
        { 0 }
 };
index 4642d51..f474f43 100644 (file)
@@ -413,6 +413,14 @@ static void osp_object_release(const struct lu_env *env, struct lu_object *o)
                /* not needed in cache any more */
                set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags);
        }
                /* not needed in cache any more */
                set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags);
        }
+
+       if (is_ost_obj(o))
+               /* XXX: Currently, NOT cache OST-object on MDT because:
+                *      1. it is not often accessed on MDT.
+                *      2. avoid up layer (such as LFSCK) to load too many
+                *         once-used OST-objects. */
+               set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags);
+
        EXIT;
 }
 
        EXIT;
 }
 
index c33a6da..0707ec3 100644 (file)
@@ -2580,3 +2580,29 @@ void lustre_swab_close_data(struct close_data *cd)
        __swab64s(&cd->cd_data_version);
 }
 EXPORT_SYMBOL(lustre_swab_close_data);
        __swab64s(&cd->cd_data_version);
 }
 EXPORT_SYMBOL(lustre_swab_close_data);
+
+void lustre_swab_lfsck_request(struct lfsck_request *lr)
+{
+       __swab32s(&lr->lr_event);
+       __swab32s(&lr->lr_index);
+       __swab32s(&lr->lr_flags);
+       __swab32s(&lr->lr_valid);
+       __swab32s(&lr->lr_speed);
+       __swab16s(&lr->lr_version);
+       __swab16s(&lr->lr_active);
+       __swab16s(&lr->lr_param);
+       __swab16s(&lr->lr_async_windows);
+       CLASSERT(offsetof(typeof(*lr), lr_padding_1) != 0);
+       lustre_swab_lu_fid(&lr->lr_fid);
+       CLASSERT(offsetof(typeof(*lr), lr_padding_2) != 0);
+       CLASSERT(offsetof(typeof(*lr), lr_padding_3) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_lfsck_request);
+
+void lustre_swab_lfsck_reply(struct lfsck_reply *lr)
+{
+       __swab32s(&lr->lr_status);
+       CLASSERT(offsetof(typeof(*lr), lr_padding_1) != 0);
+       CLASSERT(offsetof(typeof(*lr), lr_padding_2) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_lfsck_reply);
index a85b05c..f6569b3 100644 (file)
@@ -1401,6 +1401,8 @@ void lustre_assert_wire_constants(void)
        CLASSERT(OBD_FL_MMAP == 0x00040000);
        CLASSERT(OBD_FL_RECOV_RESEND == 0x00080000);
        CLASSERT(OBD_FL_NOSPC_BLK == 0x00100000);
        CLASSERT(OBD_FL_MMAP == 0x00040000);
        CLASSERT(OBD_FL_RECOV_RESEND == 0x00080000);
        CLASSERT(OBD_FL_NOSPC_BLK == 0x00100000);
+       CLASSERT(OBD_FL_FLUSH == 0x00200000);
+       CLASSERT(OBD_FL_SHORT_IO == 0x00400000);
        CLASSERT(OBD_FL_LOCAL_MASK == 0xf0000000);
 
        /* Checks for struct lov_ost_data_v1 */
        CLASSERT(OBD_FL_LOCAL_MASK == 0xf0000000);
 
        /* Checks for struct lov_ost_data_v1 */
@@ -4506,5 +4508,77 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct update, u_bufs));
        LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n",
                 (long long)(int)sizeof(((struct update *)0)->u_bufs));
                 (long long)(int)offsetof(struct update, u_bufs));
        LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n",
                 (long long)(int)sizeof(((struct update *)0)->u_bufs));
+
+       /* Checks for struct lfsck_request */
+       LASSERTF((int)sizeof(struct lfsck_request) == 64, "found %lld\n",
+                (long long)(int)sizeof(struct lfsck_request));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_event) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_event));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_event) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_event));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_index) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_index));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_index) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_index));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_flags) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_flags));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_flags) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_flags));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_valid) == 12, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_valid));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_valid) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_valid));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_speed) == 16, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_speed));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_speed) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_speed));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_version) == 20, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_version));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_version) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_version));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_active) == 22, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_active));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_active) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_active));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_param) == 24, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_param));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_param) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_param));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_async_windows) == 26, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_async_windows));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_async_windows) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_async_windows));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_padding_1) == 28, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_padding_1));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_1));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_fid) == 32, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_fid));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_fid) == 16, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_fid));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_padding_2) == 48, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_padding_2));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_2) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_2));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_padding_3) == 56, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_padding_3));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_3) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_3));
+
+       /* Checks for struct lfsck_reply */
+       LASSERTF((int)sizeof(struct lfsck_reply) == 16, "found %lld\n",
+                (long long)(int)sizeof(struct lfsck_reply));
+       LASSERTF((int)offsetof(struct lfsck_reply, lr_status) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_reply, lr_status));
+       LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_status) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_status));
+       LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_1) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_reply, lr_padding_1));
+       LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_1));
+       LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_2) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_reply, lr_padding_2));
+       LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_2) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_2));
 }
 
 }
 
index 7d7d0a1..0145064 100644 (file)
@@ -370,7 +370,8 @@ command_t cmdlist[] = {
         "                   [-e | --error error_handle] [-h | --help]\n"
         "                   [-n | --dryrun switch] [-r | --reset]\n"
         "                   [-s | --speed speed_limit]\n"
         "                   [-e | --error error_handle] [-h | --help]\n"
         "                   [-n | --dryrun switch] [-r | --reset]\n"
         "                   [-s | --speed speed_limit]\n"
-        "                   [-t | --type lfsck_type[,lfsck_type...]]"},
+        "                   [-t | --type lfsck_type[,lfsck_type...]]\n"
+        "                   [-w | --windows win_size]"},
        {"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n"
         "usage: lfsck_stop <-M | --device [MDT,OST]_device> [-h | --help]"},
 
        {"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n"
         "usage: lfsck_stop <-M | --device [MDT,OST]_device> [-h | --help]"},
 
index 289c8fd..e04cc21 100644 (file)
@@ -53,6 +53,7 @@ static struct option long_opt_start[] = {
        {"reset",       no_argument,       0, 'r'},
        {"speed",       required_argument, 0, 's'},
        {"type",        required_argument, 0, 't'},
        {"reset",       no_argument,       0, 'r'},
        {"speed",       required_argument, 0, 's'},
        {"type",        required_argument, 0, 't'},
+       {"windows",     required_argument, 0, 'w'},
        {0,             0,                 0,   0}
 };
 
        {0,             0,                 0,   0}
 };
 
@@ -96,6 +97,7 @@ static void usage_start(void)
                "            [-n | --dryrun switch] [-r | --reset]\n"
                "            [-s | --speed speed_limit]\n"
                "            [-t | --type lfsck_type[,lfsck_type...]]\n"
                "            [-n | --dryrun switch] [-r | --reset]\n"
                "            [-s | --speed speed_limit]\n"
                "            [-t | --type lfsck_type[,lfsck_type...]]\n"
+               "            [-w | --windows win_size]\n"
                "OPTIONS:\n"
                "-M: The device to start LFSCK/scrub on.\n"
                "-e: Error handle, 'continue'(default) or 'abort'.\n"
                "OPTIONS:\n"
                "-M: The device to start LFSCK/scrub on.\n"
                "-e: Error handle, 'continue'(default) or 'abort'.\n"
@@ -104,7 +106,8 @@ static void usage_start(void)
                "-r: Reset scanning start position to the device beginning.\n"
                "-s: How many items can be scanned at most per second. "
                    "'%d' means no limit (default).\n"
                "-r: Reset scanning start position to the device beginning.\n"
                "-s: How many items can be scanned at most per second. "
                    "'%d' means no limit (default).\n"
-               "-t: The LFSCK type(s) to be started.\n",
+               "-t: The LFSCK type(s) to be started.\n"
+               "-w: The windows size for async requests pipeline.\n",
                LFSCK_SPEED_NO_LIMIT);
 }
 
                LFSCK_SPEED_NO_LIMIT);
 }
 
@@ -141,7 +144,7 @@ int jt_lfsck_start(int argc, char **argv)
        char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
        char device[MAX_OBD_NAME];
        struct lfsck_start start;
        char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
        char device[MAX_OBD_NAME];
        struct lfsck_start start;
-       char *optstring = "M:e:hn:rs:t:";
+       char *optstring = "M:e:hn:rs:t:w:";
        int opt, index, rc, val, i, type;
 
        memset(&data, 0, sizeof(data));
        int opt, index, rc, val, i, type;
 
        memset(&data, 0, sizeof(data));
@@ -234,6 +237,23 @@ int jt_lfsck_start(int argc, char **argv)
                        }
                        break;
                }
                        }
                        break;
                }
+               case 'w':
+                       val = atoi(optarg);
+                       if (val < 0 || val > LFSCK_ASYNC_WIN_MAX) {
+                               fprintf(stderr,
+                                       "Too large async windows size, "
+                                       "which may cause memory issues. "
+                                       "The valid range is [0 - %u]. "
+                                       "If you do not want to restrict "
+                                       "the windows size for async reqeusts "
+                                       "pipeline, just set it as 0.\n",
+                                       LFSCK_ASYNC_WIN_MAX);
+                               return -EINVAL;
+                       }
+
+                       start.ls_async_windows = val;
+                       start.ls_valid |= LSV_ASYNC_WINDOWS;
+                       break;
                default:
                        fprintf(stderr, "Invalid option, '-h' for help.\n");
                        return -EINVAL;
                default:
                        fprintf(stderr, "Invalid option, '-h' for help.\n");
                        return -EINVAL;
index 5ad0312..0536384 100644 (file)
@@ -2028,6 +2028,34 @@ static void check_update(void)
        CHECK_MEMBER(update, u_bufs);
 }
 
        CHECK_MEMBER(update, u_bufs);
 }
 
+static void check_lfsck_request(void)
+{
+       BLANK_LINE();
+       CHECK_STRUCT(lfsck_request);
+       CHECK_MEMBER(lfsck_request, lr_event);
+       CHECK_MEMBER(lfsck_request, lr_index);
+       CHECK_MEMBER(lfsck_request, lr_flags);
+       CHECK_MEMBER(lfsck_request, lr_valid);
+       CHECK_MEMBER(lfsck_request, lr_speed);
+       CHECK_MEMBER(lfsck_request, lr_version);
+       CHECK_MEMBER(lfsck_request, lr_active);
+       CHECK_MEMBER(lfsck_request, lr_param);
+       CHECK_MEMBER(lfsck_request, lr_async_windows);
+       CHECK_MEMBER(lfsck_request, lr_padding_1);
+       CHECK_MEMBER(lfsck_request, lr_fid);
+       CHECK_MEMBER(lfsck_request, lr_padding_2);
+       CHECK_MEMBER(lfsck_request, lr_padding_3);
+}
+
+static void check_lfsck_reply(void)
+{
+       BLANK_LINE();
+       CHECK_STRUCT(lfsck_reply);
+       CHECK_MEMBER(lfsck_reply, lr_status);
+       CHECK_MEMBER(lfsck_reply, lr_padding_1);
+       CHECK_MEMBER(lfsck_reply, lr_padding_2);
+}
+
 static void system_string(char *cmdline, char *str, int len)
 {
        int   fds[2];
 static void system_string(char *cmdline, char *str, int len)
 {
        int   fds[2];
@@ -2422,6 +2450,9 @@ main(int argc, char **argv)
        check_update_reply();
        check_update();
 
        check_update_reply();
        check_update();
 
+       check_lfsck_request();
+       check_lfsck_reply();
+
        printf("}\n\n");
 
        return 0;
        printf("}\n\n");
 
        return 0;
index def2824..1dee792 100644 (file)
@@ -4517,5 +4517,77 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct update, u_bufs));
        LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n",
                 (long long)(int)sizeof(((struct update *)0)->u_bufs));
                 (long long)(int)offsetof(struct update, u_bufs));
        LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n",
                 (long long)(int)sizeof(((struct update *)0)->u_bufs));
+
+       /* Checks for struct lfsck_request */
+       LASSERTF((int)sizeof(struct lfsck_request) == 64, "found %lld\n",
+                (long long)(int)sizeof(struct lfsck_request));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_event) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_event));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_event) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_event));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_index) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_index));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_index) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_index));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_flags) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_flags));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_flags) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_flags));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_valid) == 12, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_valid));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_valid) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_valid));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_speed) == 16, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_speed));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_speed) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_speed));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_version) == 20, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_version));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_version) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_version));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_active) == 22, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_active));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_active) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_active));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_param) == 24, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_param));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_param) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_param));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_async_windows) == 26, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_async_windows));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_async_windows) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_async_windows));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_padding_1) == 28, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_padding_1));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_1));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_fid) == 32, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_fid));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_fid) == 16, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_fid));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_padding_2) == 48, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_padding_2));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_2) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_2));
+       LASSERTF((int)offsetof(struct lfsck_request, lr_padding_3) == 56, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_request, lr_padding_3));
+       LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_3) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_3));
+
+       /* Checks for struct lfsck_reply */
+       LASSERTF((int)sizeof(struct lfsck_reply) == 16, "found %lld\n",
+                (long long)(int)sizeof(struct lfsck_reply));
+       LASSERTF((int)offsetof(struct lfsck_reply, lr_status) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_reply, lr_status));
+       LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_status) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_status));
+       LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_1) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_reply, lr_padding_1));
+       LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_1) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_1));
+       LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_2) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct lfsck_reply, lr_padding_2));
+       LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_2) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_2));
 }
 
 }