Whamcloud - gitweb
LU-4604 lfsck: LFSCK async updates RPC flow control 94/8694/13
authorFan Yong <fan.yong@intel.com>
Wed, 12 Feb 2014 20:49:45 +0000 (04:49 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 27 Feb 2014 21:06:55 +0000 (21:06 +0000)
Control the max in flight async updates RPCs count in OSP layer to
avoid too much in flight RPCs causes memory issue and OST overload.

Currently, we use a semaphore for that which is initialized as 512.

It also contains the layout LFSCK performance tests:
1) lfsck_layout performance under single MDS mode with kinds of
   stripe counts and kinds of file sets, for routine check case
   and repairing case.

2) lfsck_layout performance under DNE mode with kinds of file
   sets, for routine check case and repairing case.

3) lfsck_layout impact on create performance.

This patch also fixes some bugs related with lu_device_type
operations smooth the lfsck performance test.

There was no protection when inc/dec lu_device_type::ldt_device_nr,
which may caused the ldt_device_nr to be wrong and trigger assert.
This patch redefine lu_device_type::ldt_device_nr as atomic type.

There was no protection when add/del lu_device_type::ldt_linkage
into/from the global lu_device_types list, which may caused bad
address accessing. This patch uses the existing obd_types_lock
to protect related operations.

We do NOT need lu_types_stop() any longer. Such function scans
the global lu_device_types list, and for each type item on it
which has zerod lu_device_type::ldt_device_nr, call its stop()
method. In fact, the lu_device_type::ldt_device_nr only will be
zero when the last lu_device_fini() is called, and at that time,
inside the lu_device_fini(), its stop() method will be called.
So it is unnecessary to call the stop() again via lu_types_stop().

Test-Parameters: mdtcount=2 ostcount=2 envdefinitions=SLOW=yes testlist=lfsck-performance
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Ib9bae6ad0446e8705fa2767d080150e82a495e2f
Reviewed-on: http://review.whamcloud.com/8694
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lu_object.h
lustre/lfsck/lfsck_layout.c
lustre/liblustre/llite_cl.c
lustre/llite/vvp_dev.c
lustre/obdclass/lu_object.c
lustre/obdclass/obd_mount.c
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/osp/osp_trans.c
lustre/tests/lfsck-performance.sh

index e169f34..bb43780 100644 (file)
@@ -333,13 +333,13 @@ struct lu_device_type {
         /**
          * Number of existing device type instances.
          */
-        unsigned                                ldt_device_nr;
-        /**
-         * Linkage into a global list of all device types.
-         *
-         * \see lu_device_types.
-         */
-        cfs_list_t                              ldt_linkage;
+       atomic_t                                ldt_device_nr;
+       /**
+        * Linkage into a global list of all device types.
+        *
+        * \see lu_device_types.
+        */
+       struct list_head                        ldt_linkage;
 };
 
 /**
@@ -681,7 +681,6 @@ void lu_dev_del_linkage(struct lu_site *s, struct lu_device *d);
 
 int  lu_device_type_init(struct lu_device_type *ldt);
 void lu_device_type_fini(struct lu_device_type *ldt);
-void lu_types_stop(void);
 
 /** @} ctors */
 
index 01e3151..b01122e 100644 (file)
@@ -3380,11 +3380,15 @@ static int lfsck_layout_assistant(void *args)
                        rc = lfsck_layout_assistant_handle_one(env, com, llr);
                        spin_lock(&llmd->llmd_lock);
                        list_del_init(&llr->llr_list);
-                       if (bk->lb_async_windows != 0 &&
-                           llmd->llmd_prefetched >= bk->lb_async_windows)
-                               wakeup = true;
-
                        llmd->llmd_prefetched--;
+                       /* Wake up the main engine thread only when the list
+                        * is empty or half of the prefetched items have been
+                        * handled to avoid too frequent thread schedule. */
+                       if (llmd->llmd_prefetched == 0 ||
+                           (bk->lb_async_windows != 0 &&
+                            (bk->lb_async_windows >> 1) ==
+                            llmd->llmd_prefetched))
+                               wakeup = true;
                        spin_unlock(&llmd->llmd_lock);
                        if (wakeup)
                                wake_up_all(&mthread->t_ctl_waitq);
@@ -3394,9 +3398,6 @@ static int lfsck_layout_assistant(void *args)
                                GOTO(cleanup1, rc);
                }
 
-               /* Wakeup the master engine if it is waiting in checkpoint. */
-               wake_up_all(&mthread->t_ctl_waitq);
-
                l_wait_event(athread->t_ctl_waitq,
                             !lfsck_layout_req_empty(llmd) ||
                             llmd->llmd_exit ||
index 4f45b6f..0d857fa 100644 (file)
@@ -784,12 +784,7 @@ int cl_sb_fini(struct llu_sb_info *sbi)
                 sbi->ll_site = NULL;
         }
         cl_env_put(env, &refcheck);
-        /*
-         * If mount failed (sbi->ll_cl == NULL), and this there are no other
-         * mounts, stop device types manually (this usually happens
-         * automatically when last device is destroyed).
-         */
-        lu_types_stop();
-        cl_env_cache_purge(~0);
-        RETURN(0);
+       cl_env_cache_purge(~0);
+
+       RETURN(0);
 }
index 0133be0..e4cda52 100644 (file)
@@ -245,13 +245,8 @@ int cl_sb_fini(struct super_block *sb)
                 CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
                 result = PTR_ERR(env);
         }
-        /*
-         * If mount failed (sbi->ll_cl == NULL), and this there are no other
-         * mounts, stop device types manually (this usually happens
-         * automatically when last device is destroyed).
-         */
-        lu_types_stop();
-        RETURN(result);
+
+       RETURN(result);
 }
 
 /****************************************************************************
index 489796a..1304e95 100644 (file)
@@ -60,6 +60,8 @@
 #include <lu_ref.h>
 #include <libcfs/list.h>
 
+extern spinlock_t obd_types_lock;
+
 static void lu_object_free(const struct lu_env *env, struct lu_object *o);
 
 /**
@@ -802,34 +804,31 @@ int lu_device_type_init(struct lu_device_type *ldt)
 {
        int result = 0;
 
-       CFS_INIT_LIST_HEAD(&ldt->ldt_linkage);
+       atomic_set(&ldt->ldt_device_nr, 0);
+       INIT_LIST_HEAD(&ldt->ldt_linkage);
        if (ldt->ldt_ops->ldto_init)
                result = ldt->ldt_ops->ldto_init(ldt);
-       if (result == 0)
-               cfs_list_add(&ldt->ldt_linkage, &lu_device_types);
+
+       if (result == 0) {
+               spin_lock(&obd_types_lock);
+               list_add(&ldt->ldt_linkage, &lu_device_types);
+               spin_unlock(&obd_types_lock);
+       }
+
        return result;
 }
 EXPORT_SYMBOL(lu_device_type_init);
 
 void lu_device_type_fini(struct lu_device_type *ldt)
 {
-       cfs_list_del_init(&ldt->ldt_linkage);
+       spin_lock(&obd_types_lock);
+       list_del_init(&ldt->ldt_linkage);
+       spin_unlock(&obd_types_lock);
        if (ldt->ldt_ops->ldto_fini)
                ldt->ldt_ops->ldto_fini(ldt);
 }
 EXPORT_SYMBOL(lu_device_type_fini);
 
-void lu_types_stop(void)
-{
-        struct lu_device_type *ldt;
-
-       cfs_list_for_each_entry(ldt, &lu_device_types, ldt_linkage) {
-               if (ldt->ldt_device_nr == 0 && ldt->ldt_ops->ldto_stop)
-                       ldt->ldt_ops->ldto_stop(ldt);
-       }
-}
-EXPORT_SYMBOL(lu_types_stop);
-
 /**
  * Global list of all sites on this node
  */
@@ -1169,14 +1168,16 @@ EXPORT_SYMBOL(lu_device_put);
  */
 int lu_device_init(struct lu_device *d, struct lu_device_type *t)
 {
-        if (t->ldt_device_nr++ == 0 && t->ldt_ops->ldto_start != NULL)
-                t->ldt_ops->ldto_start(t);
-        memset(d, 0, sizeof *d);
-       atomic_set(&d->ld_ref, 0);
-        d->ld_type = t;
-        lu_ref_init(&d->ld_reference);
-        CFS_INIT_LIST_HEAD(&d->ld_linkage);
-        return 0;
+       if (atomic_inc_return(&t->ldt_device_nr) == 1 &&
+           t->ldt_ops->ldto_start != NULL)
+               t->ldt_ops->ldto_start(t);
+
+       memset(d, 0, sizeof *d);
+       d->ld_type = t;
+       lu_ref_init(&d->ld_reference);
+       INIT_LIST_HEAD(&d->ld_linkage);
+
+       return 0;
 }
 EXPORT_SYMBOL(lu_device_init);
 
@@ -1185,20 +1186,21 @@ EXPORT_SYMBOL(lu_device_init);
  */
 void lu_device_fini(struct lu_device *d)
 {
-        struct lu_device_type *t;
+       struct lu_device_type *t = d->ld_type;
 
-        t = d->ld_type;
-        if (d->ld_obd != NULL) {
-                d->ld_obd->obd_lu_dev = NULL;
-                d->ld_obd = NULL;
-        }
+       if (d->ld_obd != NULL) {
+               d->ld_obd->obd_lu_dev = NULL;
+               d->ld_obd = NULL;
+       }
 
-        lu_ref_fini(&d->ld_reference);
+       lu_ref_fini(&d->ld_reference);
        LASSERTF(atomic_read(&d->ld_ref) == 0,
                 "Refcount is %u\n", atomic_read(&d->ld_ref));
-        LASSERT(t->ldt_device_nr > 0);
-        if (--t->ldt_device_nr == 0 && t->ldt_ops->ldto_stop != NULL)
-                t->ldt_ops->ldto_stop(t);
+       LASSERT(atomic_read(&t->ldt_device_nr) > 0);
+
+       if (atomic_dec_and_test(&t->ldt_device_nr) &&
+           t->ldt_ops->ldto_stop != NULL)
+               t->ldt_ops->ldto_stop(t);
 }
 EXPORT_SYMBOL(lu_device_fini);
 
index 0f70f44..f6e4690 100644 (file)
@@ -834,8 +834,8 @@ int lustre_common_put_super(struct super_block *sb)
         }
         /* Drop a ref to the mounted disk */
         lustre_put_lsi(sb);
-        lu_types_stop();
-        RETURN(rc);
+
+       RETURN(rc);
 }
 EXPORT_SYMBOL(lustre_common_put_super);
 
index a8d75ea..d27146d 100644 (file)
@@ -531,6 +531,8 @@ out:
        RETURN(rc);
 }
 
+#define OSP_MAX_AUIF_MAX       512
+
 static int osp_init0(const struct lu_env *env, struct osp_device *m,
                     struct lu_device_type *ldt, struct lustre_cfg *cfg)
 {
@@ -544,6 +546,9 @@ static int osp_init0(const struct lu_env *env, struct osp_device *m,
        ENTRY;
 
        mutex_init(&m->opd_async_requests_mutex);
+       /* We allow OSP_MAX_AUIF_MAX async updates in flight at most. */
+       sema_init(&m->opd_async_fc_sem, OSP_MAX_AUIF_MAX);
+
        obd = class_name2obd(lustre_cfg_string(cfg, 0));
        if (obd == NULL) {
                CERROR("Cannot find obd with name %s\n",
index 2af16c0..c040efa 100644 (file)
@@ -122,11 +122,11 @@ struct osp_device {
        cfs_proc_dir_entry_t            *opd_proc_entry;
        struct lprocfs_stats            *opd_stats;
        /* connection status. */
-       int                              opd_new_connection;
-       int                              opd_got_disconnected;
-       int                              opd_imp_connected;
-       int                              opd_imp_active;
-       unsigned int                     opd_imp_seen_connected:1,
+       unsigned int                     opd_new_connection:1,
+                                        opd_got_disconnected:1,
+                                        opd_imp_connected:1,
+                                        opd_imp_active:1,
+                                        opd_imp_seen_connected:1,
                                         opd_connect_mdt:1;
 
        /* whether local recovery is completed:
@@ -195,6 +195,7 @@ struct osp_device {
        struct update_request           *opd_async_requests;
        /* Protect current operations on opd_async_requests. */
        struct mutex                     opd_async_requests_mutex;
+       struct semaphore                 opd_async_fc_sem;
 };
 
 #define opd_pre_lock                   opd_pre->osp_pre_lock
index dc3481c..3b560f8 100644 (file)
@@ -35,6 +35,7 @@
 
 struct osp_async_update_args {
        struct update_request   *oaua_update;
+       unsigned int             oaua_fc:1;
 };
 
 struct osp_async_update_item {
@@ -81,10 +82,14 @@ static int osp_async_update_interpret(const struct lu_env *env,
        struct update_request           *update = oaua->oaua_update;
        struct osp_async_update_item    *oaui;
        struct osp_async_update_item    *next;
+       struct osp_device               *osp    = dt2osp_dev(update->ur_dt);
        int                              count  = 0;
        int                              index  = 0;
        int                              rc1    = 0;
 
+       if (oaua->oaua_fc)
+               up(&osp->opd_async_fc_sem);
+
        if (rc == 0 || req->rq_repmsg != NULL) {
                reply = req_capsule_server_sized_get(&req->rq_pill,
                                                     &RMF_UPDATE_REPLY,
@@ -260,7 +265,8 @@ out:
 }
 
 static int osp_trans_trigger(const struct lu_env *env, struct osp_device *osp,
-                            struct update_request *update, struct thandle *th)
+                            struct update_request *update, struct thandle *th,
+                            bool fc)
 {
        struct thandle_update   *tu = th->th_update;
        int                     rc = 0;
@@ -280,6 +286,7 @@ static int osp_trans_trigger(const struct lu_env *env, struct osp_device *osp,
                if (rc == 0) {
                        args = ptlrpc_req_async_args(req);
                        args->oaua_update = update;
+                       args->oaua_fc = !!fc;
                        req->rq_interpret_reply =
                                osp_async_update_interpret;
                        ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
@@ -326,7 +333,7 @@ int osp_trans_start(const struct lu_env *env, struct dt_device *dt,
         * the local transaction, i.e. delete the name entry remote
         * first, then destroy the local object. */
        if (!is_only_remote_trans(th) && !tu->tu_sent_after_local_trans)
-               rc = osp_trans_trigger(env, dt2osp_dev(dt), update, th);
+               rc = osp_trans_trigger(env, dt2osp_dev(dt), update, th, false);
 
        return rc;
 }
@@ -354,8 +361,25 @@ int osp_trans_stop(const struct lu_env *env, struct dt_device *dt,
 
        if (is_only_remote_trans(th)) {
                if (th->th_result == 0) {
+                       struct osp_device *osp = dt2osp_dev(th->th_dev);
+
+                       do {
+                               if (!osp->opd_imp_active ||
+                                   osp->opd_got_disconnected) {
+                                       out_destroy_update_req(update);
+                                       GOTO(put, rc = -ENOTCONN);
+                               }
+
+                               /* Get the semaphore to guarantee it has
+                                * free slot, which will be released via
+                                * osp_async_update_interpret(). */
+                               rc = down_timeout(&osp->opd_async_fc_sem, HZ);
+                       } while (rc != 0);
+
                        rc = osp_trans_trigger(env, dt2osp_dev(dt),
-                                              update, th);
+                                              update, th, true);
+                       if (rc != 0)
+                               up(&osp->opd_async_fc_sem);
                } else {
                        rc = th->th_result;
                        out_destroy_update_req(update);
@@ -363,7 +387,7 @@ int osp_trans_stop(const struct lu_env *env, struct dt_device *dt,
        } else {
                if (tu->tu_sent_after_local_trans)
                        rc = osp_trans_trigger(env, dt2osp_dev(dt),
-                                              update, th);
+                                              update, th, false);
                rc = update->ur_rc;
                out_destroy_update_req(update);
        }
index 9b602de..c30ab30 100644 (file)
@@ -13,23 +13,29 @@ init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
 init_logging
 
+#remove it when zfs-based backend iteration is enabled
 [ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
        skip "lfsck performance only for ldiskfs" && exit 0
 
 require_dsh_mds || exit 0
+require_dsh_ost || exit 0
 
 [ "$SLOW" = "no" ] &&
        skip "skip lfsck performance test under non-SLOW mode" && exit 0
 
 NTHREADS=${NTHREADS:-0}
-UNIT=${UNIT:-1048576}
-MINCOUNT=${MINCOUNT:-8192}
-MAXCOUNT=${MAXCOUNT:-32768}
-MINCOUNT_REPAIR=${MINCOUNT_REPAIR:-8192}
-MAXCOUNT_REPAIR=${MAXCOUNT_REPAIR:-32768}
-BASE_COUNT=${BASE_COUNT:-1048576}
+UNIT=${UNIT:-8192}
+MINCOUNT=${MINCOUNT:-4096}
+MAXCOUNT=${MAXCOUNT:-8192}
+MINCOUNT_REPAIR=${MINCOUNT_REPAIR:-4096}
+MAXCOUNT_REPAIR=${MAXCOUNT_REPAIR:-8192}
+BASE_COUNT=${BASE_COUNT:-8192}
 FACTOR=${FACTOR:-2}
 INCFACTOR=${INCFACTOR:-25} #percent
+MINSUBDIR=${MINSUBDIR:-1}
+MAXSUBDIR=${MAXSUBDIR:-2}
+TOTSUBDIR=${TOTSUBDIR:-2}
+WTIME=${WTIME:-86400}
 
 RCMD="do_facet ${SINGLEMDS}"
 RLCTL="${RCMD} ${LCTL}"
@@ -41,6 +47,8 @@ SHOW_NAMESPACE="${RLCTL} get_param -n mdd.${MDT_DEV}.lfsck_namespace"
 MNTOPTS_NOSCRUB="-o user_xattr,noscrub"
 remote_mds && ECHOCMD=${RCMD} || ECHOCMD="eval"
 
+LFSCKDIR="$MOUNT/lfsck/"
+
 if [ ${NTHREADS} -eq 0 ]; then
        CPUCORE=$(${RCMD} cat /proc/cpuinfo | grep "processor.*:" | wc -l)
        NTHREADS=$((CPUCORE * 2))
@@ -167,7 +175,7 @@ test_0() {
                stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!"
        done
 }
-run_test 0 "lfsck performance test (routine case) without load"
+run_test 0 "lfsck namespace performance (routine case) without load"
 
 test_1() {
        local BCOUNT=0
@@ -219,7 +227,7 @@ test_1() {
                stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!"
        done
 }
-run_test 1 "lfsck performance test (backup/restore) without load"
+run_test 1 "lfsck namespace performance (backup/restore) without load"
 
 test_2() {
        local i
@@ -258,13 +266,9 @@ test_2() {
                stop ${SINGLEMDS} > /dev/null || error "Fail to stop MDS!"
        done
 }
-run_test 2 "lfsck performance test (simulate upgrade from 1.8) without load"
+run_test 2 "lfsck namespace performance (upgrade from 1.8) without load"
 
 test_3() {
-       [ $MDSSIZE -lt 4000000 ] &&
-               skip "MDT device is too small, expect at last 4GB" && exit 0
-
-       [ $BASE_COUNT -lt 1048576 ] && BASE_COUNT=1048576
        [ $INCFACTOR -gt 25 ] && INCFACTOR=25
 
        local inc_count=$((BASE_COUNT * INCFACTOR / 100))
@@ -347,7 +351,363 @@ test_3() {
                error "Fail to create files!"
        echo "+++ end to create for ${i} files set at: $(date) +++"
 }
-run_test 3 "lfsck performance test (routine case) without load"
+run_test 3 "lfsck namespace impact on create performance"
+
+show_layout() {
+       local idx=$1
+
+       $RLCTL get_param -n mdd.$(facet_svc mds${idx}).lfsck_layout
+}
+
+layout_test_one()
+{
+       echo "***** Start layout LFSCK on all devices at: $(date) *****"
+       $RLCTL lfsck_start -M ${MDT_DEV} -t layout -A -r || return 21
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_layout |
+               awk '/^status/ { print \\\$2 }'" "completed" $WTIME || {
+               show_layout 1
+               return 22
+       }
+       echo "***** End layout LFSCK on all devices at: $(date) *****"
+
+       for n in $(seq $MDSCOUNT); do
+               show_layout ${n}
+
+               local SPEED=$(show_layout ${n} |
+                             awk '/^average_speed_phase1/ { print $2 }')
+               echo
+               echo "lfsck_layout speed on MDS_${n} is $SPEED objs/sec"
+               echo
+       done
+}
+
+layout_gen_one()
+{
+       local idx1=$1
+       local idx2=$2
+       local mntpt="/mnt/lustre_lfsck_${idx1}_${idx2}"
+       local basedir="$mntpt/lfsck/$idx1/$idx2"
+
+       mkdir -p $mntpt || {
+               error_noexit "(11) Fail to mkdir $mntpt"
+               return 11
+       }
+
+       mount_client $mntpt || {
+               error_noexit "(12) Fail to mount $mntpt"
+               return 12
+       }
+
+       mkdir $basedir || {
+               umount_client $mntpt
+               error_noexit "(13) Fail to mkdir $basedir"
+               return 13
+       }
+
+       echo "&&&&& Start create $UNIT files under $basedir at: $(date) &&&&&"
+       createmany -o ${basedir}/f $UNIT || {
+               umount_client $mntpt
+               error_noexit "(14) Fail to gen $UNIT files under $basedir"
+               return 14
+       }
+       echo "&&&&& End create $UNIT files under $basedir at: $(date) &&&&&"
+
+       umount_client $mntpt
+}
+
+layout_gen_set()
+{
+       local cnt=$1
+
+       echo "##### Start generate test set for subdirs=$cnt at: $(date) #####"
+       for ((k=0; k<$MDSCOUNT; k++)); do
+               $LFS mkdir -i ${k} $LFSCKDIR/${k} || return 10
+
+               for ((l=1; l<=$cnt; l++)); do
+                       layout_gen_one ${k} ${l} &
+               done
+       done
+
+       wait
+       echo "##### End generate test set for subdirs=$cnt at: $(date) #####"
+}
+
+t4_test()
+{
+       local saved_mdscount=$MDSCOUNT
+       local saved_ostcount=$OSTCOUNT
+
+       echo "stopall"
+       stopall > /dev/null || error "(1) Fail to stopall"
+
+       MDSCOUNT=1
+       for ((i=1; i<=$saved_ostcount; i=$((i * 2)))); do
+               OSTCOUNT=${i}
+
+               echo "+++++ Start cycle ostcount=$OSTCOUNT at: $(date) +++++"
+               echo
+
+               for ((j=$MINSUBDIR; j<=$MAXSUBDIR; j=$((j * FACTOR)))); do
+                       echo "formatall"
+                       formatall > /dev/null ||
+                               error "(2) Fail to formatall, subdirs=${j}"
+
+                       echo "setupall"
+                       setupall > /dev/null ||
+                               error "(3) Fail to setupall, subdirs=${j}"
+
+                       mkdir $LFSCKDIR ||
+                       error "(4) Fail to mkdir $LFSCKDIR, subdirs=${j}"
+
+                       $LFS setstripe -c ${OSTCOUNT} -i 0 $LFSCKDIR ||
+                       error "(5) Fail to setstripe on $LFSCKDIR, subdirs=${j}"
+
+                       local RC=0
+                       layout_gen_set ${j} || RC=$?
+                       [ $RC -eq 0 ] ||
+                       error "(6) Fail to generate set $RC, subdirs=${j}"
+
+                       RC=0
+                       layout_test_one || RC=$?
+                       [ $RC -eq 0 ] ||
+                               error "(7) LFSCK failed with $RC, subdirs=${j}"
+               done
+
+               echo "stopall"
+               stopall > /dev/null || error "(8) Fail to stopall, subdirs=${j}"
+
+               echo
+               echo "----- Stop cycle ostcount=$OSTCOUNT at: $(date) -----"
+       done
+
+       MDSCOUNT=$saved_mdscount
+       OSTCOUNT=$saved_ostcount
+
+       echo "formatall"
+       formatall > /dev/null || error "(9) Fail to stopall"
+}
+
+test_4a() {
+       t4_test
+}
+run_test 4a "Single MDS lfsck layout performance (routine case) without load"
+
+test_4b() {
+       echo "Inject failure stub to simulate dangling reference"
+       #define OBD_FAIL_LFSCK_DANGLING 0x1610
+       for i in $(seq $OSTCOUNT); do
+               do_facet ost${i} $LCTL set_param fail_loc=0x1610
+       done
+
+       t4_test
+
+       for i in $(seq $OSTCOUNT); do
+               do_facet ost${i} $LCTL set_param fail_loc=0
+       done
+}
+run_test 4b "Single MDS lfsck layout performance (repairing case) without load"
+
+t5_test()
+{
+       local saved_mdscount=$MDSCOUNT
+
+       echo "stopall"
+       stopall > /dev/null || error "(1) Fail to stopall"
+
+       for ((i=1; i<=$saved_mdscount; i++)); do
+               MDSCOUNT=${i}
+
+               echo "+++++ Start cycle mdscount=$MDSCOUNT at: $(date) +++++"
+               echo
+
+               for ((j=$MINSUBDIR; j<=$MAXSUBDIR; j=$((j * FACTOR)))); do
+                       echo "formatall"
+                       formatall > /dev/null ||
+                               error "(2) Fail to formatall, subdirs=${j}"
+
+                       echo "setupall"
+                       setupall > /dev/null ||
+                               error "(3) Fail to setupall, subdirs=${j}"
+
+                       mkdir $LFSCKDIR ||
+                       error "(4) Fail to mkdir $LFSCKDIR, subdirs=${j}"
+
+                       $LFS setstripe -c ${OSTCOUNT} -i 0 $LFSCKDIR ||
+                       error "(5) Fail to setstripe on $LFSCKDIR, subdirs=${j}"
+
+                       local RC=0
+                       layout_gen_set ${j} || RC=$?
+                       [ $RC -eq 0 ] ||
+                       error "(6) Fail to generate set $RC, subdirs=${j}"
+
+                       RC=0
+                       layout_test_one || RC=$?
+                       [ $RC -eq 0 ] ||
+                               error "(7) LFSCK failed with $RC, subdirs=${j}"
+               done
+
+               echo "stopall"
+               stopall > /dev/null || error "(8) Fail to stopall"
+
+               echo
+               echo "----- Stop cycle mdscount=$MDSCOUNT at: $(date) -----"
+       done
+
+       MDSCOUNT=$saved_mdscount
+
+       echo "formatall"
+       formatall > /dev/null || error "(9) Fail to stopall"
+}
+
+test_5a() {
+       t5_test
+}
+run_test 5a "lfsck layout performance (routine case) without load for DNE"
+
+test_5b() {
+       echo "Inject failure stub to simulate dangling reference"
+       #define OBD_FAIL_LFSCK_DANGLING 0x1610
+       for i in $(seq $OSTCOUNT); do
+               do_facet ost${i} $LCTL set_param fail_loc=0x1610
+       done
+
+       t5_test
+
+       for i in $(seq $OSTCOUNT); do
+               do_facet ost${i} $LCTL set_param fail_loc=0
+       done
+}
+run_test 5b "lfsck layout performance (repairing case) without load for DNE"
+
+layout_fast_create() {
+       local total=$1
+       local lbase=$2
+       local threads=$3
+       local ldir="/test-${lbase}"
+       local cycle=0
+       local count=$UNIT
+
+       while true; do
+               [ $count -eq 0 -o  $count -gt ${total} ] && count=$total
+               local usize=$((count / NTHREADS))
+               [ ${usize} -eq 0 ] && break
+               local tdir=${ldir}-${cycle}-
+
+               echo "[cycle: $cycle] [threads: $threads]"\
+                    "[files: $count] [basedir: $tdir]"
+
+               lfsck_create
+
+               total=$((total - usize * NTHREADS))
+               [ $total -eq 0 ] && break
+               lbase=$((lbase + usize))
+               cycle=$((cycle + 1))
+       done
+}
+
+lfsck_detach_error()
+{
+       lfsck_detach
+       error "$@"
+}
+
+test_6() {
+       [ $INCFACTOR -gt 25 ] && INCFACTOR=25
+
+       echo "stopall"
+       stopall > /dev/null || error "(1) Fail to stopall"
+
+       local saved_mdscount=$MDSCOUNT
+
+       MDSCOUNT=1
+       echo "formatall"
+       formatall > /dev/null || error "(2) Fail to formatall"
+
+       echo "setupall"
+       setupall > /dev/null || error "(3) Fail to setupall"
+
+       mkdir $LFSCKDIR || error "(4) Fail to mkdir $LFSCKDIR"
+
+       $LFS setstripe -c ${OSTCOUNT} -i 0 $LFSCKDIR ||
+               error "(5) Fail to setstripe on $LFSCKDIR"
+
+       local RC=0
+       layout_gen_set $TOTSUBDIR || RC=$?
+       [ $RC -eq 0 ] ||
+               error "(6) Fail to generate set $RC, subdirs=$TOTSUBDIR"
+
+       echo
+       echo "***** Start layout LFSCK on single MDS at: $(date) *****"
+       $RLCTL lfsck_start -M ${MDT_DEV} -t layout -r ||
+               error "(7) Fail to start layout LFSCK"
+
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_layout |
+               awk '/^status/ { print \\\$2 }'" "completed" $WTIME || {
+               show_layout 1
+               error "(8) layout LFSCK cannot finished in time"
+       }
+       echo "***** End layout LFSCK on single MDS at: $(date) *****"
+
+       local SPEED=$(show_layout 1 |
+                     awk '/^average_speed_phase1/ { print $2 }')
+       echo "lfsck_layout full_speed is $SPEED objs/sec"
+
+       local inc_count=$((BASE_COUNT * INCFACTOR / 100))
+       local nfiles=$((inc_count / 2))
+
+       lfsck_attach
+       for ((m=0, n=$INCFACTOR; n<100;
+             m=$((m + inc_count)), n=$((n + INCFACTOR)))); do
+               local sl=$((SPEED * n / 100))
+
+               $STOP_LFSCK > /dev/null 2>&1
+               echo
+               echo "start lfsck_layout with speed ${sl} at: $(date)"
+               $RLCTL lfsck_start -M ${MDT_DEV} -t layout -r -s ${sl} ||
+                       lfsck_detach_error \
+                       "(9) Fail to start lfsck_layout with speed ${sl}"
+
+               echo "&&&&& Start create files set from ${m} at: $(date) &&&&&"
+               layout_fast_create $nfiles ${m} $NTHREADS ||
+                       lfsck_detach_error "(10) Fail to create files"
+               echo "&&&&& End create files set from ${m} at: $(date) &&&&&"
+       done
+
+       $STOP_LFSCK > /dev/null 2>&1
+       echo
+       echo "start lfsck_layout with full speed at: $(date)"
+       $RLCTL lfsck_start -M ${MDT_DEV} -t layout -r -s 0 ||
+               lfsck_detach_error \
+               "(11) Fail to start lfsck_layout with full speed"
+
+       echo "&&&&& start to create files set from ${m} at: $(date) &&&&&"
+       layout_fast_create $nfiles ${m} $NTHREADS ||
+               lfsck_detach_error "(12) Fail to create files"
+       echo "&&&&& end to create files set from ${m} at: $(date) &&&&&"
+
+       m=$((m + inc_count))
+       $STOP_LFSCK > /dev/null 2>&1
+       echo
+       echo "create without lfsck_layout run back-ground"
+       echo "&&&&& start to create files set from ${m} at: $(date) &&&&&"
+       layout_fast_create $nfiles ${m} $NTHREADS ||
+               lfsck_detach_error "(13) Fail to create files"
+       echo "&&&&& end to create files set from ${m} at: $(date) &&&&&"
+
+       lfsck_detach
+       echo
+       echo "stopall"
+       stopall > /dev/null || error "(14) Fail to stopall"
+
+       MDSCOUNT=$saved_mdscount
+
+       echo "formatall"
+       formatall > /dev/null || error "(15) Fail to stopall"
+}
+run_test 6 "lfsck layout impact on create performance"
 
 # cleanup the system at last
 lfsck_cleanup