Whamcloud - gitweb
LU-5241 tests: speed up sanity-lfsck and sanity-scrub tests 18/10818/6
authorEmoly Liu <emoly.liu@intel.com>
Sat, 20 Sep 2014 06:10:30 +0000 (23:10 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 4 Dec 2014 20:30:48 +0000 (20:30 +0000)
1) drop unnecessary devices reformat.
2) drop unnecessary system stop/re-start.
3) replace 'sleep' with wait_update_facet to avoid idle wait.
4) drop unnecessary "-p" option for some "mkdir" cases.
5) replace "touch" with "createmany -m".
6) other code style changes and cleanup.

This patch is back-ported from the following one:
Lustre-commit: 1dbba329174e6c7f7712f01fc4e44c44400fbc92
Lustre-change: http://review.whamcloud.com/9704

LU-4803 ofd: skip orphan cleanup when inject OI error

When inject OI error for simulating some failure cases, the object
may be mapped to invalid local target (inode for ldiskfs), so skip
the orphan cleanup in OSP to avoid unexpected warning or destroy.

Lustre-commit: 0bafbd7d8f652997d83b3cc2419894f48833f424
Lustre-change: http://review.whamcloud.com/9759

Test-Parameters: alwaysuploadlogs envdefinitions=SLOW=yes \
testlist=lfsck-performance,sanity-scrub,sanity-lfsck

Signed-off-by: Fan Yong <fan.yong@intel.com>
Signed-off-by: Emoly Liu <emoly.liu@intel.com>
Change-Id: Iacae0047a502e3baa778b86536220d70c717f6f5
Reviewed-on: http://review.whamcloud.com/10818
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/ofd/ofd_obd.c
lustre/osd-ldiskfs/osd_handler.c
lustre/tests/lfsck-performance.sh
lustre/tests/sanity-lfsck.sh
lustre/tests/sanity-scrub.sh
lustre/tests/test-framework.sh

index eaed5a2..2bcff3b 100644 (file)
@@ -317,6 +317,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_OST_ENOINO              0x229
 #define OBD_FAIL_OST_DQACQ_NET           0x230
 #define OBD_FAIL_OST_STATFS_EINPROGRESS  0x231
+#define OBD_FAIL_OST_NODESTROY          0x233
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
index aa39083..f4844b7 100644 (file)
@@ -1126,6 +1126,7 @@ static int ofd_orphans_destroy(const struct lu_env *env,
                               struct obdo *oa)
 {
        struct ofd_thread_info  *info = ofd_info(env);
+       struct lu_fid           *fid    = &info->fti_fid;
        obd_id                   last;
        int                      skip_orphan;
        int                      rc = 0;
@@ -1141,24 +1142,30 @@ static int ofd_orphans_destroy(const struct lu_env *env,
                       ofd_name(ofd), POSTID(&oa->o_oi));
                RETURN(-EINVAL);
        }
+       last = ofd_seq_last_oid(oseq);
 
        LASSERT(exp != NULL);
        skip_orphan = !!(exp_connect_flags(exp) & OBD_CONNECT_SKIP_ORPHAN);
 
-       last = ofd_seq_last_oid(oseq);
+       if (OBD_FAIL_CHECK(OBD_FAIL_OST_NODESTROY))
+               goto done;
+
        LCONSOLE(D_INFO, "%s: deleting orphan objects from "DOSTID
                 " to "DOSTID"\n", ofd_name(ofd), ostid_seq(&oa->o_oi),
                 end_id + 1, ostid_seq(&oa->o_oi), last);
 
        for (ostid_set_id(&oi, last); ostid_id(&oi) > end_id;
                          ostid_dec_id(&oi)) {
-               rc = ostid_to_fid(&info->fti_fid, &oi, 0);
+               rc = ostid_to_fid(fid, &oi, 0);
                if (rc != 0)
                        GOTO(out_put, rc);
-               rc = ofd_destroy_by_fid(env, ofd, &info->fti_fid, 1);
-               if (rc && rc != -ENOENT) /* this is pretty fatal... */
-                       CEMERG("%s: error destroying precreated id "DOSTID
-                              ": rc = %d\n", ofd_name(ofd), POSTID(&oi), rc);
+               rc = ofd_destroy_by_fid(env, ofd, fid, 1);
+               if (rc != 0 && rc != -ENOENT && rc != -ESTALE &&
+                   likely(rc != -EREMCHG && rc != -EINPROGRESS))
+                       /* this is pretty fatal... */
+                       CEMERG("%s: error destroying precreated id "
+                              DFID": rc = %d\n",
+                              ofd_name(ofd), PFID(fid), rc);
                if (!skip_orphan) {
                        ofd_seq_last_oid_set(oseq, ostid_id(&oi) - 1);
                        /* update last_id on disk periodically so that if we
@@ -1170,7 +1177,9 @@ static int ofd_orphans_destroy(const struct lu_env *env,
        }
        CDEBUG(D_HA, "%s: after destroy: set last_id to "DOSTID"\n",
               ofd_obd(ofd)->obd_name, POSTID(&oa->o_oi));
+done:
        if (!skip_orphan) {
+               ofd_seq_last_oid_set(oseq, ostid_id(&oi) - 1);
                rc = ofd_seq_last_oid_write(env, ofd, oseq);
        } else {
                /* don't reuse orphan object, return last used objid */
index b01f028..717be8c 100644 (file)
@@ -442,9 +442,6 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        int                      rc;
        ENTRY;
 
-       if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
-               RETURN(0);
-
        CLASSERT(LMA_OLD_SIZE >= sizeof(*lma));
        rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA,
                             info->oti_mdt_attrs_old, LMA_OLD_SIZE);
index 9b602de..dca5049 100644 (file)
@@ -70,7 +70,7 @@ lfsck_create() {
                test_mkdir ${tdir}
        EOF"
 
-       for ((j=1; j<${threads}; j++)); do
+       for ((j = 1; j < ${threads}; j++)); do
                ${ECHOCMD} "${LCTL} <<-EOF
                        cfg_device ${echodev}
                        test_mkdir ${tdir}${j}
@@ -138,7 +138,7 @@ test_0() {
                ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null ||
                error "Fail to reformat the MDS!"
 
-       for ((i=$MINCOUNT; i<=$MAXCOUNT; i=$((i * FACTOR)))); do
+       for ((i = $MINCOUNT; i <= $MAXCOUNT; i = $((i * FACTOR)))); do
                local nfiles=$((i - BCOUNT))
 
                echo "+++ start to create for ${i} files set at: $(date) +++"
@@ -180,7 +180,8 @@ test_1() {
                ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null ||
                error "Fail to reformat the MDS!"
 
-       for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do
+       for ((i = $MINCOUNT_REPAIR; i <= $MAXCOUNT_REPAIR;
+             i = $((i * FACTOR)))); do
                local nfiles=$((i - BCOUNT))
 
                echo "+++ start to create for ${i} files set at: $(date) +++"
@@ -224,7 +225,8 @@ run_test 1 "lfsck performance test (backup/restore) without load"
 test_2() {
        local i
 
-       for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do
+       for ((i = $MINCOUNT_REPAIR; i <= $MAXCOUNT_REPAIR;
+             i = $((i * FACTOR)))); do
                stopall
                do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local
                reformat_external_journal
@@ -278,7 +280,7 @@ test_3() {
                ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null ||
                error "Fail to reformat the MDS!"
 
-       for ((i=$inc_count; i<=$BASE_COUNT; i=$((i + inc_count)))); do
+       for ((i = $inc_count; i <= $BASE_COUNT; i = $((i + inc_count)))); do
                local nfiles=$((i - BCOUNT))
 
                echo "+++ start to create for ${i} files set at: $(date) +++"
@@ -309,7 +311,7 @@ test_3() {
        local inc_speed=$((FULL_SPEED * INCFACTOR / 100))
        local j
 
-       for ((j=$inc_speed; j<$FULL_SPEED; j=$((j + inc_speed)))); do
+       for ((j = $inc_speed; j < $FULL_SPEED; j = $((j + inc_speed)))); do
                start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null ||
                        error "Fail to start MDS!"
 
index 23139b0..d08d75a 100644 (file)
@@ -24,12 +24,17 @@ require_dsh_mds || exit 0
 MCREATE=${MCREATE:-mcreate}
 SAVED_MDSSIZE=${MDSSIZE}
 SAVED_OSTSIZE=${OSTSIZE}
+SAVED_OSTCOUNT=${OSTCOUNT}
 # use small MDS + OST size to speed formatting time
 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
 MDSSIZE=100000
 OSTSIZE=100000
+# no need too much OSTs, to reduce the format/start/stop overhead
+[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
 
-check_and_setup_lustre
+# build up a clean test environment.
+formatall
+setupall
 
 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] &&
        skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre &&
@@ -57,50 +62,41 @@ lfsck_prep() {
        local nfiles=$2
        local igif=$3
 
-       echo "formatall"
-       formatall > /dev/null
-
-       echo "setupall"
-       setupall > /dev/null
+       check_mount_and_prep
 
+       echo "preparing... $nfiles * $ndirs files will be created $(date)."
        if [ ! -z $igif ]; then
                #define OBD_FAIL_FID_IGIF       0x1504
                do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504
        fi
 
-       echo "preparing... ${nfiles} * ${ndirs} files will be created."
-       mkdir -p $DIR/$tdir
-       cp $LUSTRE/tests/*.sh $DIR/
-       for ((i = 0; i < ${ndirs}; i++)); do
-               mkdir $DIR/$tdir/d${i}
-               touch $DIR/$tdir/f${i}
-               for ((j = 0; j < ${nfiles}; j++)); do
-                       touch $DIR/$tdir/d${i}/f${j}
-               done
-               mkdir $DIR/$tdir/e${i}
-       done
+       cp $LUSTRE/tests/*.sh $DIR/$tdir/
+       if [ $ndirs -gt 0 ]; then
+               createmany -d $DIR/$tdir/d $ndirs
+               createmany -m $DIR/$tdir/f $ndirs
+               if [ $nfiles -gt 0 ]; then
+                       for ((i = 0; i < $ndirs; i++)); do
+                               createmany -m $DIR/$tdir/d${i}/f $nfiles > \
+                                       /dev/null || error "createmany $nfiles"
+                       done
+               fi
+               createmany -d $DIR/$tdir/e $ndirs
+       fi
 
        if [ ! -z $igif ]; then
                touch $DIR/$tdir/dummy
                do_facet $SINGLEMDS $LCTL set_param fail_loc=0
        fi
 
-       echo "prepared."
-       cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
-       echo "stop $SINGLEMDS"
-       stop $SINGLEMDS > /dev/null || error "Fail to stop MDS!"
+       echo "prepared $(date)."
 }
 
 test_0() {
-       lfsck_prep 10 10
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
+       lfsck_prep 3 3
 
        #define OBD_FAIL_LFSCK_DELAY1           0x1600
-       do_facet $SINGLEMDS $LCTL set_param fail_val=3
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
-       $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
+       do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600
+       $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
 
        $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)"
 
@@ -120,12 +116,13 @@ test_0() {
        [ "$STATUS" == "scanning-phase1" ] ||
                error "(8) Expect 'scanning-phase1', but got '$STATUS'"
 
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(9) Expect 'completed', but got '$STATUS'"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(9) unexpected status"
+       }
 
        local repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
@@ -134,28 +131,24 @@ test_0() {
 
        local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
        $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!"
-       sleep 3
-
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(12) Expect 'completed', but got '$STATUS'"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(12) unexpected status"
+       }
 
        local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }')
        [ $((scanned1 + 1)) -eq $scanned2 ] ||
                error "(13) Expect success $((scanned1 + 1)), but got $scanned2"
 
        echo "stopall, should NOT crash LU-3649"
-       stopall > /dev/null
+       stopall || error "(14) Fail to stopall"
 }
 run_test 0 "Control LFSCK manually"
 
 test_1a() {
        lfsck_prep 1 1
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
-
-       mount_client $MOUNT || error "(2) Fail to start client!"
 
        #define OBD_FAIL_FID_INDIR      0x1501
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
@@ -163,12 +156,13 @@ test_1a() {
 
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0
        umount_client $MOUNT
-       $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
-
-       sleep 3
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(4) Expect 'completed', but got '$STATUS'"
+       $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(4) unexpected status"
+       }
 
        local repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
@@ -188,11 +182,6 @@ run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
 test_1b()
 {
        lfsck_prep 1 1
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
-
-       mount_client $MOUNT || error "(2) Fail to start client!"
 
        #define OBD_FAIL_FID_INLMA      0x1502
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
@@ -202,12 +191,13 @@ test_1b()
        umount_client $MOUNT
        #define OBD_FAIL_FID_NOLMA      0x1506
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
-       $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
-
-       sleep 3
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(4) Expect 'completed', but got '$STATUS'"
+       $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(4) unexpected status"
+       }
 
        local repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
@@ -227,11 +217,6 @@ run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
 
 test_2a() {
        lfsck_prep 1 1
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
-
-       mount_client $MOUNT || error "(2) Fail to start client!"
 
        #define OBD_FAIL_LFSCK_LINKEA_CRASH     0x1603
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
@@ -239,12 +224,13 @@ test_2a() {
 
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0
        umount_client $MOUNT
-       $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
-
-       sleep 3
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(4) Expect 'completed', but got '$STATUS'"
+       $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(4) unexpected status"
+       }
 
        local repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase1/ { print $2 }')
@@ -266,11 +252,6 @@ run_test 2a "LFSCK can find out and repair crashed linkEA entry"
 test_2b()
 {
        lfsck_prep 1 1
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
-
-       mount_client $MOUNT || error "(2) Fail to start client!"
 
        #define OBD_FAIL_LFSCK_LINKEA_MORE      0x1604
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
@@ -278,12 +259,13 @@ test_2b()
 
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0
        umount_client $MOUNT
-       $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
-
-       sleep 3
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(4) Expect 'completed', but got '$STATUS'"
+       $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(4) unexpected status"
+       }
 
        local repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase2/ { print $2 }')
@@ -305,11 +287,6 @@ run_test 2b "LFSCK can find out and remove invalid linkEA entry"
 test_2c()
 {
        lfsck_prep 1 1
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
-
-       mount_client $MOUNT || error "(2) Fail to start client!"
 
        #define OBD_FAIL_LFSCK_LINKEA_MORE2     0x1605
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605
@@ -317,12 +294,13 @@ test_2c()
 
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0
        umount_client $MOUNT
-       $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
-
-       sleep 3
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(4) Expect 'completed', but got '$STATUS'"
+       $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(4) unexpected status"
+       }
 
        local repaired=$($SHOW_NAMESPACE |
                         awk '/^updated_phase2/ { print $2 }')
@@ -344,35 +322,35 @@ run_test 2c "LFSCK can find out and remove repeated linkEA entry"
 test_4()
 {
        lfsck_prep 3 3
+       cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
+       stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
+
        mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!"
        echo "start $SINGLEMDS with disabling OI scrub"
        start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
                error "(2) Fail to start MDS!"
 
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(3) Expect 'init', but got '$STATUS'"
-
        #define OBD_FAIL_LFSCK_DELAY2           0x1601
-       do_facet $SINGLEMDS $LCTL set_param fail_val=1
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
-       $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
+       $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || {
+               $SHOW_NAMESPACE
+               error "(5) unexpected status"
+       }
 
-       sleep 5
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
        [ "$STATUS" == "scanning-phase1" ] ||
-               error "(5) Expect 'scanning-phase1', but got '$STATUS'"
-
-       local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
-       [ "$FLAGS" == "inconsistent" ] ||
-               error "(6) Expect 'inconsistent', but got '$FLAGS'"
+               error "(6) Expect 'scanning-phase1', but got '$STATUS'"
 
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(7) Expect 'completed', but got '$STATUS'"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(7) unexpected status"
+       }
 
        FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
        [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
@@ -398,35 +376,35 @@ run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
 test_5()
 {
        lfsck_prep 1 1 1
+       cleanup_mount $MOUNT || error "(0.1) Fail to stop client!"
+       stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!"
+
        mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!"
        echo "start $SINGLEMDS with disabling OI scrub"
        start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
                error "(2) Fail to start MDS!"
 
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(3) Expect 'init', but got '$STATUS'"
-
        #define OBD_FAIL_LFSCK_DELAY2           0x1601
-       do_facet $SINGLEMDS $LCTL set_param fail_val=1
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
-       $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
+       $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || {
+               $SHOW_NAMESPACE
+               error "(5) unexpected status"
+       }
 
-       sleep 5
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
        [ "$STATUS" == "scanning-phase1" ] ||
-               error "(5) Expect 'scanning-phase1', but got '$STATUS'"
-
-       local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
-       [ "$FLAGS" == "inconsistent,upgrade" ] ||
-               error "(6) Expect 'inconsistent,upgrade', but got '$FLAGS'"
+               error "(6) Expect 'scanning-phase1', but got '$STATUS'"
 
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(7) Expect 'completed', but got '$STATUS'"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(7) unexpected status"
+       }
 
        FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
        [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
@@ -450,18 +428,14 @@ test_5()
        [ "$dummyname" == "$DIR/$tdir/dummy" ] ||
                error "(13) Fail to generate linkEA: $dummyfid $dummyname"
 }
-run_test 5 "LFSCK can handle IFIG object upgrading"
+run_test 5 "LFSCK can handle IGIF object upgrading"
 
 test_6a() {
-       lfsck_prep 10 10
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
+       lfsck_prep 5 5
 
        #define OBD_FAIL_LFSCK_DELAY1           0x1600
-       do_facet $SINGLEMDS $LCTL set_param fail_val=1
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
-       $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
+       $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
 
        local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
        [ "$STATUS" == "scanning-phase1" ] ||
@@ -472,103 +446,111 @@ test_6a() {
        # Fail the LFSCK to guarantee there is at least one checkpoint
        #define OBD_FAIL_LFSCK_FATAL1           0x1608
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "failed" ] ||
-               error "(4) Expect 'failed', but got '$STATUS'"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "failed" 6 || {
+               $SHOW_NAMESPACE
+               error "(4) unexpected status"
+       }
 
-       local POSITION0=$($SHOW_NAMESPACE |
-                         awk '/^last_checkpoint_position/ { print $2 }' |
-                         tr -d ',')
+       local POS0=$($SHOW_NAMESPACE |
+                    awk '/^last_checkpoint_position/ { print $2 }' |
+                    tr -d ',')
 
        #define OBD_FAIL_LFSCK_DELAY1           0x1600
-       do_facet $SINGLEMDS $LCTL set_param fail_val=1
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600
        $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
 
        STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
        [ "$STATUS" == "scanning-phase1" ] ||
                error "(6) Expect 'scanning-phase1', but got '$STATUS'"
 
-       local POSITION1=$($SHOW_NAMESPACE |
-                         awk '/^latest_start_position/ { print $2 }' |
-                         tr -d ',')
-       [ $POSITION0 -lt $POSITION1 ] ||
-               error "(7) Expect larger than: $POSITION0, but got $POSITION1"
-
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(8) Expect 'completed', but got '$STATUS'"
+       local POS1=$($SHOW_NAMESPACE |
+                    awk '/^latest_start_position/ { print $2 }' |
+                    tr -d ',')
+       [ $POS0 -lt $POS1 ] ||
+               error "(7) Expect larger than: $POS0, but got $POS1"
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(8) unexpected status"
+       }
 }
 run_test 6a "LFSCK resumes from last checkpoint (1)"
 
 test_6b() {
-       lfsck_prep 10 10
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
+       lfsck_prep 5 5
 
        #define OBD_FAIL_LFSCK_DELAY2           0x1601
-       do_facet $SINGLEMDS $LCTL set_param fail_val=1
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
-       $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
+       $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
 
        local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
        [ "$STATUS" == "scanning-phase1" ] ||
                error "(3) Expect 'scanning-phase1', but got '$STATUS'"
 
-       # Sleep 3 sec to guarantee at least one object processed by LFSCK
-       sleep 3
+       # Sleep 5 sec to guarantee that we are in the directory scanning
+       sleep 5
        # Fail the LFSCK to guarantee there is at least one checkpoint
        #define OBD_FAIL_LFSCK_FATAL2           0x1609
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "failed" ] ||
-               error "(4) Expect 'failed', but got '$STATUS'"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "failed" 6 || {
+               $SHOW_NAMESPACE
+               error "(4) unexpected status"
+       }
+
+       local O_POS0=$($SHOW_NAMESPACE |
+                      awk '/^last_checkpoint_position/ { print $2 }' |
+                      tr -d ',')
 
-       local POSITION0=$($SHOW_NAMESPACE |
-                         awk '/^last_checkpoint_position/ { print $4 }')
+       local D_POS0=$($SHOW_NAMESPACE |
+                      awk '/^last_checkpoint_position/ { print $4 }')
 
        #define OBD_FAIL_LFSCK_DELAY2           0x1601
-       do_facet $SINGLEMDS $LCTL set_param fail_val=1
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
        $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!"
 
        STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
        [ "$STATUS" == "scanning-phase1" ] ||
                error "(6) Expect 'scanning-phase1', but got '$STATUS'"
 
-       local POSITION1=$($SHOW_NAMESPACE |
-                         awk '/^latest_start_position/ { print $4 }')
-       if [ $POSITION0 -gt $POSITION1 ]; then
-               [ $POSITION1 -eq 0 -a $POSITION0 -eq $((POSITION1 + 1)) ] ||
-               error "(7) Expect larger than: $POSITION0, but got $POSITION1"
+       local O_POS1=$($SHOW_NAMESPACE |
+                      awk '/^latest_start_position/ { print $2 }' |
+                      tr -d ',')
+       local D_POS1=$($SHOW_NAMESPACE |
+                      awk '/^latest_start_position/ { print $4 }')
+
+       if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then
+               [ $O_POS0 -lt $O_POS1 ] ||
+                       error "(7.1) $O_POS1 is not larger than $O_POS0"
+       else
+               [ $D_POS0 -lt $D_POS1 ] ||
+                       error "(7.2) $D_POS1 is not larger than $D_POS0"
        fi
 
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(8) Expect 'completed', but got '$STATUS'"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(8) unexpected status"
+       }
 }
 run_test 6b "LFSCK resumes from last checkpoint (2)"
 
 test_7a()
 {
-       lfsck_prep 10 10
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
+       lfsck_prep 5 5
+       umount_client $MOUNT
 
        #define OBD_FAIL_LFSCK_DELAY2           0x1601
-       do_facet $SINGLEMDS $LCTL set_param fail_val=1
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
-       $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!"
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601
+       $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!"
 
        local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
        [ "$STATUS" == "scanning-phase1" ] ||
@@ -587,23 +569,19 @@ test_7a()
        [ "$STATUS" == "scanning-phase1" ] ||
                error "(6) Expect 'scanning-phase1', but got '$STATUS'"
 
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(7) Expect 'completed', but got '$STATUS'"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(7) unexpected status"
+       }
 }
 run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)"
 
 test_7b()
 {
        lfsck_prep 2 2
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
-
-       mount_client $MOUNT || error "(2) Fail to start client!"
 
        #define OBD_FAIL_LFSCK_LINKEA_MORE      0x1604
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
@@ -612,14 +590,14 @@ test_7b()
        done
 
        #define OBD_FAIL_LFSCK_DELAY3           0x1602
-       do_facet $SINGLEMDS $LCTL set_param fail_val=1
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
-       $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
-
-       sleep 3
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "scanning-phase2" ] ||
-               error "(4) Expect 'scanning-phase2', but got '$STATUS'"
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602
+       $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
+               $SHOW_NAMESPACE
+               error "(4) unexpected status"
+       }
 
        echo "stop $SINGLEMDS"
        stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!"
@@ -632,28 +610,29 @@ test_7b()
        [ "$STATUS" == "scanning-phase2" ] ||
                error "(7) Expect 'scanning-phase2', but got '$STATUS'"
 
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(8) Expect 'completed', but got '$STATUS'"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(8) unexpected status"
+       }
 }
 run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)"
 
 test_8()
 {
+       echo "formatall"
+       formatall > /dev/null
+       echo "setupall"
+       setupall > /dev/null
+
        lfsck_prep 20 20
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
 
        local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
        [ "$STATUS" == "init" ] ||
                error "(2) Expect 'init', but got '$STATUS'"
 
-       mount_client $MOUNT || error "(3) Fail to start client!"
-
        #define OBD_FAIL_LFSCK_LINKEA_CRASH     0x1603
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
        mkdir $DIR/$tdir/crashed
@@ -664,9 +643,10 @@ test_8()
                touch $DIR/$tdir/dummy${i}
        done
 
+       umount_client $MOUNT || error "(3) Fail to stop client!"
+
        #define OBD_FAIL_LFSCK_DELAY2           0x1601
-       do_facet $SINGLEMDS $LCTL set_param fail_val=2
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
+       do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601
        $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
 
        STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
@@ -687,10 +667,12 @@ test_8()
 
        #define OBD_FAIL_LFSCK_FATAL2           0x1609
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609
-       sleep 3
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "failed" ] ||
-               error "(10) Expect 'failed', but got '$STATUS'"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "failed" 6 || {
+               $SHOW_NAMESPACE
+               error "(10) unexpected status"
+       }
 
        #define OBD_FAIL_LFSCK_DELAY1           0x1600
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600
@@ -741,29 +723,30 @@ test_8()
                error "(20) Expect 'paused', but got '$STATUS'"
 
        #define OBD_FAIL_LFSCK_DELAY3           0x1602
-       do_facet $SINGLEMDS $LCTL set_param fail_val=2
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
+       do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602
 
        $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!"
-       sleep 2
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "scanning-phase2" ] ||
-               error "(22) Expect 'scanning-phase2', but got '$STATUS'"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || {
+               $SHOW_NAMESPACE
+               error "(22) unexpected status"
+       }
 
        local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
        [ "$FLAGS" == "scanned-once,inconsistent" ] ||
                error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'"
 
-       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-       do_facet $SINGLEMDS $LCTL set_param fail_val=0
-       sleep 2
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(24) Expect 'completed', but got '$STATUS'"
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(24) unexpected status"
+       }
 
        FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
        [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'"
-
 }
 run_test 8 "LFSCK state machine"
 
@@ -774,17 +757,10 @@ test_9a() {
        fi
 
        lfsck_prep 70 70
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
-
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(2) Expect 'init', but got '$STATUS'"
 
        local BASE_SPEED1=100
        local RUN_TIME1=10
-       $START_NAMESPACE -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
+       $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!"
 
        sleep $RUN_TIME1
        STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
@@ -841,37 +817,28 @@ test_9b() {
        fi
 
        lfsck_prep 0 0
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
 
-       mount_client $MOUNT || error "(2) Fail to start client!"
-
-       echo "Another preparing... 50 * 50 files (with error) will be created."
+       echo "Preparing another 50 * 50 files (with error) at $(date)."
        #define OBD_FAIL_LFSCK_LINKEA_MORE      0x1604
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
+       createmany -d $DIR/$tdir/d 50
+       createmany -m $DIR/$tdir/f 50
        for ((i = 0; i < 50; i++)); do
-               mkdir -p $DIR/$tdir/d${i}
-               touch $DIR/$tdir/f${i}
-               for ((j = 0; j < 50; j++)); do
-                       touch $DIR/$tdir/d${i}/f${j}
-               done
+               createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null
        done
 
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(3) Expect 'init', but got '$STATUS'"
-
        #define OBD_FAIL_LFSCK_NO_DOUBLESCAN    0x160c
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c
-       $START_NAMESPACE || error "(4) Fail to start LFSCK!"
-
-       sleep 10
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "stopped" ] ||
-               error "(5) Expect 'stopped', but got '$STATUS'"
+       $START_NAMESPACE -r || error "(4) Fail to start LFSCK!"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "stopped" 10 || {
+               $SHOW_NAMESPACE
+               error "(5) unexpected status"
+       }
 
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+       echo "Prepared at $(date)."
 
        local BASE_SPEED1=50
        local RUN_TIME1=10
@@ -917,53 +884,47 @@ test_9b() {
 
        do_facet $SINGLEMDS \
                $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
-       sleep 5
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(11) Expect 'completed', but got '$STATUS'"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(11) unexpected status"
+       }
 }
 run_test 9b "LFSCK speed control (2)"
 
 test_10()
 {
        lfsck_prep 1 1
-       echo "start $SINGLEMDS"
-       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
-               error "(1) Fail to start MDS!"
-
-       mount_client $MOUNT || error "(2) Fail to start client!"
 
+       echo "Preparing more files with error at $(date)."
        #define OBD_FAIL_LFSCK_LINKEA_CRASH     0x1603
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603
+
        for ((i = 0; i < 1000; i = $((i+2)))); do
                mkdir -p $DIR/$tdir/d${i}
                touch $DIR/$tdir/f${i}
-               for ((j = 0; j < 5; j++)); do
-                       touch $DIR/$tdir/d${i}/f${j}
-               done
+               createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
        done
 
        #define OBD_FAIL_LFSCK_LINKEA_MORE      0x1604
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604
+
        for ((i = 1; i < 1000; i = $((i+2)))); do
                mkdir -p $DIR/$tdir/d${i}
                touch $DIR/$tdir/f${i}
-               for ((j = 0; j < 5; j++)); do
-                       touch $DIR/$tdir/d${i}/f${j}
-               done
+               createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null
        done
 
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+       echo "Prepared at $(date)."
+
        ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy
 
        umount_client $MOUNT
        mount_client $MOUNT || error "(3) Fail to start client!"
 
-       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(4) Expect 'init', but got '$STATUS'"
-
-       $START_NAMESPACE -s 100 || error "(5) Fail to start LFSCK!"
+       $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!"
 
        sleep 10
        STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
@@ -993,11 +954,12 @@ test_10()
 
        do_facet $SINGLEMDS \
                $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0
-       umount_client $MOUNT
-       sleep 10
-       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(16) Expect 'completed', but got '$STATUS'"
+       wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 || {
+               $SHOW_NAMESPACE
+               error "(16) unexpected status"
+       }
 }
 run_test 10 "System is available during LFSCK scanning"
 
@@ -1006,6 +968,7 @@ $LCTL set_param debug=-lfsck > /dev/null || true
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
 OSTSIZE=${SAVED_OSTSIZE}
+OSTCOUNT=${SAVED_OSTCOUNT}
 
 # cleanup the system at last
 formatall
index eae09fb..e9fdb2a 100644 (file)
@@ -21,13 +21,19 @@ require_dsh_mds || exit 0
 
 SAVED_MDSSIZE=${MDSSIZE}
 SAVED_OSTSIZE=${OSTSIZE}
+SAVED_OSTCOUNT=${OSTCOUNT}
 # use small MDS + OST size to speed formatting time
 # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size
 MDSSIZE=100000
 OSTSIZE=100000
+# no need too much OSTs, to reduce the format/start/stop overhead
+[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4
 
 MOUNT_2=""
-check_and_setup_lustre
+
+# build up a clean test environment.
+formatall
+setupall
 
 [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
        skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
@@ -103,16 +109,13 @@ scrub_prep() {
        local nfiles=$1
        local n
 
-       echo "formatall"
-       formatall > /dev/null
-       echo "setupall"
-       setupall > /dev/null
+       check_mount_and_prep
 
-       echo "preparing..."
+       echo "preparing... $(date)"
        for n in $(seq $MDSCOUNT); do
                echo "creating $nfiles files on mds$n"
                if [ $n -eq 1 ]; then
-                       mkdir -p $DIR/$tdir/mds$n ||
+                       mkdir $DIR/$tdir/mds$n ||
                                error "Failed to create directory mds$n"
                else
                        $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
@@ -121,11 +124,11 @@ scrub_prep() {
                cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n ||
                        error "Failed to copy files to mds$n"
                if [[ $nfiles -gt 0 ]]; then
-                       createmany -o $DIR/$tdir/mds$n/$tfile $nfiles ||
-                               error "createmany failed on mds$n"
+                       createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \
+                               /dev/null || error "createmany failed on mds$n"
                fi
        done
-       echo "prepared."
+       echo "prepared $(date)."
        cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
        for n in $(seq $MDSCOUNT); do
                echo "stop mds$n"
@@ -158,17 +161,13 @@ scrub_stop_mds() {
 scrub_check_status() {
        local error_id=$1
        local expected=$2
-       local actual
        local n
 
        for n in $(seq $MDSCOUNT); do
-               actual=$(do_facet mds$n $LCTL get_param -n \
+               wait_update_facet mds$n "$LCTL get_param -n \
                        osd-ldiskfs.$(facet_svc mds$n).oi_scrub |
-                       awk '/^status/ { print $2 }')
-               if [ "$actual" != "$expected" ]; then
-                       error "($error_id) Expected '$expected' on mds$n, but" \
-                              "got '$actual'"
-               fi
+                       awk '/^status/ { print \\\$2 }'" "$expected" 6 ||
+                       error "($error_id) Expected '$expected' on mds$n"
        done
 }
 
@@ -288,21 +287,15 @@ test_1a() {
        start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
                error "(1) Fail to start MDS!"
 
-       local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(2) Expect 'init', but got '$STATUS'"
-
        local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
        [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'"
 
        mount_client $MOUNT || error "(4) Fail to start client!"
-
        #define OBD_FAIL_OSD_FID_MAPPING                        0x193
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193
        # update .lustre OI mapping
        touch $MOUNT/.lustre
        do_facet $SINGLEMDS $LCTL set_param fail_loc=0
-
        umount_client $MOUNT || error "(5) Fail to stop client!"
 
        echo "stop $SINGLEMDS"
@@ -312,10 +305,6 @@ test_1a() {
        start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
                error "(7) Fail to start MDS!"
 
-       local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(8) Expect 'init', but got '$STATUS'"
-
        local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }')
        [ "$FLAGS" == "inconsistent" ] ||
                error "(9) Expect 'inconsistent', but got '$FLAGS'"
@@ -327,7 +316,6 @@ test_1b() {
        scrub_remove_ois 1
        echo "start MDTs without disabling OI scrub"
        scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
-       sleep 3
        scrub_check_status 3 completed
        mount_client $MOUNT || error "(4) Fail to start client!"
        scrub_check_data 5
@@ -339,20 +327,15 @@ test_1c() {
 
        # OI files to be removed:
        # idx 0: oi.16.0
-       # idx 1: oi.16.1
        # idx 2: oi.16.{2,4,8,16,32}
        # idx 3: oi.16.{3,9,27}
-       # idx 5: oi.16.{5,25}
-       # idx 7: oi.16.{7,49}
-       for index in 0 1 2 3 5 7; do
+       for index in 0 2 3; do
                scrub_prep 0
                scrub_remove_ois 1 $index
-
                echo "start MDTs with OI scrub disabled"
                scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
                scrub_check_flags 3 recreated
                scrub_start 4
-               sleep 3
                scrub_check_status 5 completed
                scrub_check_flags 6 ""
        done
@@ -364,43 +347,45 @@ test_2() {
        scrub_backup_restore 1
        echo "starting MDTs without disabling OI scrub"
        scrub_start_mds 2 "$MOUNT_OPTS_SCRUB"
-       sleep 3
        scrub_check_status 3 completed
        mount_client $MOUNT || error "(4) Fail to start client!"
        scrub_check_data 5
 }
 run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case"
 
+# test_3 is obsolete, it will be covered by test_5.
 test_3() {
+       formatall > /dev/null
+       setupall > /dev/null
+
        scrub_prep 0
        scrub_backup_restore 1
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
-       sleep 3
        scrub_check_status 3 init
        scrub_check_flags 4 inconsistent
-       echo "stopall"
-       stopall > /dev/null
 }
-run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
+#run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified"
 
 test_4() {
        scrub_prep 0
        scrub_backup_restore 1
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
-       scrub_check_status 3 init
        scrub_check_flags 4 inconsistent
        mount_client $MOUNT || error "(5) Fail to start client!"
        scrub_enable_auto
        scrub_check_data 6
-       sleep 3
        scrub_check_status 7 completed
+       scrub_check_flags 8 ""
 }
 run_test 4 "Trigger OI scrub automatically if inconsistent OI mapping was found"
 
 test_5() {
-       scrub_prep 1500
+       formatall > /dev/null
+       setupall > /dev/null
+
+       scrub_prep 1000
        scrub_backup_restore 1
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
@@ -409,73 +394,58 @@ test_5() {
        mount_client $MOUNT || error "(5) Fail to start client!"
        scrub_enable_auto
 
-       local n
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=3
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-       done
-       scrub_check_data 6
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=3 fail_loc=0x190
 
+       scrub_check_data 6
        umount_client $MOUNT || error "(7) Fail to stop client!"
-
        scrub_check_status 8 scanning
 
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_CRASH         0x191
-               do_facet mds$n $LCTL set_param fail_loc=0x191
-       done
+       #define OBD_FAIL_OSD_SCRUB_CRASH         0x191
+       do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
+
        sleep 4
        scrub_stop_mds 9
 
-       for n in $(seq $MDSCOUNT); do
-               do_facet mds$n $LCTL set_param fail_loc=0
-               do_facet mds$n $LCTL set_param fail_val=0
-       done
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_loc=0 fail_val=0
 
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
-
        scrub_check_status 11 crashed
-
        scrub_stop_mds 12
 
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=3
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-       done
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=3 fail_loc=0x190
+
        echo "starting MDTs without disabling OI scrub"
        scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
-
        scrub_check_status 14 scanning
 
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_FATAL         0x192
-               do_facet mds$n $LCTL set_param fail_loc=0x192
-       done
-       sleep 4
-       scrub_check_status 15 failed
+       #define OBD_FAIL_OSD_SCRUB_FATAL         0x192
+       do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
 
+       scrub_check_status 15 failed
        mount_client $MOUNT || error "(16) Fail to start client!"
 
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=3 fail_loc=0x190
+
+       local n
        for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=3
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-               stat $DIR/$tdir/mds$n/${tfile}1000 ||
-                       error "(17) Failed to stat mds$n/${tfile}1000"
+               stat $DIR/$tdir/mds$n/${tfile}800 ||
+                       error "(17) Failed to stat mds$n/${tfile}800"
        done
 
        scrub_check_status 18 scanning
 
-       for n in $(seq $MDSCOUNT); do
-               do_facet mds$n $LCTL set_param fail_loc=0
-               do_facet mds$n $LCTL set_param fail_val=0
-       done
-       sleep 5
-       scrub_check_status 19 completed
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_loc=0 fail_val=0
 
+       scrub_check_status 19 completed
        scrub_check_flags 20 ""
 }
 run_test 5 "OI scrub state machine"
@@ -485,45 +455,41 @@ test_6() {
        scrub_backup_restore 1
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
-       scrub_check_status 3 init
        scrub_check_flags 4 inconsistent
        mount_client $MOUNT || error "(5) Fail to start client!"
        scrub_enable_auto
-       local n
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=3
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-       done
+
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=3 fail_loc=0x190
+
        scrub_check_data 6
 
        # Sleep 5 sec to guarantee at least one object processed by OI scrub
        sleep 5
        # Fail the OI scrub to guarantee there is at least one checkpoint
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_FATAL         0x192
-               do_facet mds$n $LCTL set_param fail_loc=0x192
-       done
-       sleep 4
+       #define OBD_FAIL_OSD_SCRUB_FATAL         0x192
+       do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192
+
        scrub_check_status 7 failed
 
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=3 fail_loc=0x190
+
+       local n
        for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=3
-               do_facet mds$n $LCTL set_param fail_loc=0x190
                # stat will re-trigger OI scrub
                stat $DIR/$tdir/mds$n/${tfile}800 ||
                        error "(8) Failed to stat mds$n/${tfile}800"
        done
 
        umount_client $MOUNT || error "(9) Fail to stop client!"
-
        scrub_check_status 10 scanning
 
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_CRASH         0x191
-               do_facet mds$n $LCTL set_param fail_loc=0x191
-       done
+       #define OBD_FAIL_OSD_SCRUB_CRASH         0x191
+       do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191
+
        sleep 4
        local -a position0
        for n in $(seq $MDSCOUNT); do
@@ -534,11 +500,10 @@ test_6() {
 
        scrub_stop_mds 11
 
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=3
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-       done
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=3 fail_loc=0x190
+
        echo "starting MDTs without disabling OI scrub"
        scrub_start_mds 12 "$MOUNT_OPTS_SCRUB"
 
@@ -554,13 +519,10 @@ test_6() {
                fi
        done
 
-       for n in $(seq $MDSCOUNT); do
-               do_facet mds$n $LCTL set_param fail_loc=0
-               do_facet mds$n $LCTL set_param fail_val=0
-       done
-       sleep 5
-       scrub_check_status 15 completed
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_loc=0 fail_val=0
 
+       scrub_check_status 15 completed
        scrub_check_flags 16 ""
 }
 run_test 6 "OI scrub resumes from last checkpoint"
@@ -568,39 +530,31 @@ run_test 6 "OI scrub resumes from last checkpoint"
 test_7() {
        scrub_prep 500
        scrub_backup_restore 1
-
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
-       scrub_check_status 3 init
        scrub_check_flags 4 inconsistent
-
        mount_client $MOUNT || error "(5) Fail to start client!"
-
        scrub_enable_auto
-       local n
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=3
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-       done
+
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=3 fail_loc=0x190
+
        scrub_check_data 6
 
+       local n
        for n in $(seq $MDSCOUNT); do
                stat $DIR/$tdir/mds$n/${tfile}300 ||
                        error "(7) Failed to stat mds$n/${tfile}300!"
        done
 
        scrub_check_status 8 scanning
-
        scrub_check_flags 9 inconsistent,auto
 
-       for n in $(seq $MDSCOUNT); do
-               do_facet mds$n $LCTL set_param fail_loc=0
-               do_facet mds$n $LCTL set_param fail_val=0
-       done
-       sleep 5
-       scrub_check_status 10 completed
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_loc=0 fail_val=0
 
+       scrub_check_status 10 completed
        scrub_check_flags ""
 }
 run_test 7 "System is available during OI scrub scanning"
@@ -608,39 +562,25 @@ run_test 7 "System is available during OI scrub scanning"
 test_8() {
        scrub_prep 128
        scrub_backup_restore 1
-
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
-
-       scrub_check_status 3 init
-
        scrub_check_flags 4 inconsistent
 
-       local n
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=1
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-       done
-       scrub_start 5
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=1 fail_loc=0x190
 
+       scrub_start 5
        scrub_check_status 6 scanning
-
        scrub_stop 7
-
        scrub_check_status 8 stopped
-
        scrub_start 9
-
        scrub_check_status 10 scanning
 
-       for n in $(seq $MDSCOUNT); do
-               do_facet mds$n $LCTL set_param fail_loc=0
-               do_facet mds$n $LCTL set_param fail_val=0
-       done
-       sleep 5
-       scrub_check_status 11 completed
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_loc=0 fail_val=0
 
+       scrub_check_status 11 completed
        scrub_check_flags 12 ""
 }
 run_test 8 "Control OI scrub manually"
@@ -651,14 +591,11 @@ test_9() {
                return 0
        fi
 
-       scrub_prep 8000
+       scrub_prep 6000
        scrub_backup_restore 1
 
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
-
-       scrub_check_status 3 init
-
        scrub_check_flags 4 inconsistent
 
        local BASE_SPEED1=100
@@ -668,9 +605,7 @@ test_9() {
 
        sleep $RUN_TIME1
        scrub_check_status 6 completed
-
        scrub_check_flags 7 ""
-
        # OI scrub should run with limited speed under non-inconsistent case
        scrub_start 8 -s $BASE_SPEED1 -r
 
@@ -725,7 +660,7 @@ test_9() {
                do_facet mds$n $LCTL set_param -n \
                                mdd.$(facet_svc mds$n).lfsck_speed_limit 0
        done
-       sleep 6
+
        scrub_check_status 13 completed
 }
 run_test 9 "OI scrub speed control"
@@ -733,50 +668,32 @@ run_test 9 "OI scrub speed control"
 test_10a() {
        scrub_prep 0
        scrub_backup_restore 1
-
        echo "starting mds$n with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
-
-       scrub_check_status 3 init
-
        scrub_check_flags 4 inconsistent
-
        mount_client $MOUNT || error "(5) Fail to start client!"
-
        scrub_enable_auto
-       local n
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=1
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-       done
-       scrub_check_data 6
 
-       scrub_check_status 7 scanning
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=1 fail_loc=0x190
 
+       scrub_check_data 6
+       scrub_check_status 7 scanning
        umount_client $MOUNT || error "(8) Fail to stop client!"
-
        scrub_stop_mds 9
-
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB"
-
        scrub_check_status 11 paused
-
        scrub_stop_mds 12
-
        echo "starting MDTs without disabling OI scrub"
        scrub_start_mds 13 "$MOUNT_OPTS_SCRUB"
-
        scrub_check_status 14 scanning
 
-       for n in $(seq $MDSCOUNT); do
-               do_facet mds$n $LCTL set_param fail_loc=0
-               do_facet mds$n $LCTL set_param fail_val=0
-       done
-       sleep 5
-       scrub_check_status 15 completed
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_loc=0 fail_val=0
 
+       scrub_check_status 15 completed
        scrub_check_flags 16 ""
 }
 run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
@@ -785,77 +702,49 @@ run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)"
 test_10b() {
        scrub_prep 0
        scrub_backup_restore 1
-
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB"
-
-       scrub_check_status 3 init
-
        scrub_check_flags 4 inconsistent
 
-       local n
-       for n in $(seq $MDSCOUNT); do
-               #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
-               do_facet mds$n $LCTL set_param fail_val=3
-               do_facet mds$n $LCTL set_param fail_loc=0x190
-       done
+       #define OBD_FAIL_OSD_SCRUB_DELAY         0x190
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_val=3 fail_loc=0x190
 
        scrub_start 5
-
        scrub_check_status 6 scanning
-
        scrub_stop_mds 7
-
        echo "starting MDTs with OI scrub disabled"
        scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB"
-
        scrub_check_status 9 paused
-
        scrub_stop_mds 10
-
        echo "starting MDTs without disabling OI scrub"
        scrub_start_mds 11 "$MOUNT_OPTS_SCRUB"
-
        scrub_check_status 12 scanning
 
-       for n in $(seq $MDSCOUNT); do
-               do_facet mds$n $LCTL set_param fail_loc=0
-               do_facet mds$n $LCTL set_param fail_val=0
-       done
-       sleep 5
-       scrub_check_status 13 completed
+       do_nodes $(comma_list $(mdts_nodes)) \
+               $LCTL set_param fail_loc=0 fail_val=0
 
+       scrub_check_status 13 completed
        scrub_check_flags 14 ""
 }
 #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)"
 
 test_11() {
-       echo "stopall"
-       stopall > /dev/null
-       echo "formatall"
-       formatall > /dev/null
-       echo "setupall"
-       setupall > /dev/null
-
        local CREATED=100
-       local tname=`date +%s`
-       rm -rf $MOUNT/$tname > /dev/null
-       mkdir -p $MOUNT/$tname || error "(0) Failed to create $MOUNT/$tname"
        local n
+
+       check_mount_and_prep
+
        for n in $(seq $MDSCOUNT); do
-               $LFS mkdir -i $((n - 1)) $MOUNT/$tname/mds$n ||
-                       error "(1) Fail to mkdir $MOUNT/$tname/mds$n"
+               $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n ||
+                       error "(1) Fail to mkdir $DIR/$tdir/mds$n"
 
-               createmany -o $MOUNT/$tname/mds$n/f $CREATED ||
-                       error "(2) Fail to create in $tname/mds$n"
+               createmany -o $DIR/$tdir/mds$n/f $CREATED ||
+                       error "(2) Fail to create under $tdir/mds$n"
        done
 
-       cleanup_mount $MOUNT
-       do_facet $SINGLEMDS $LCTL clear
-       start_full_debug_logging
        # reset OI scrub start point by force
        scrub_start 3 -r
-       sleep 3
        scrub_check_status 4 completed
 
        declare -a checked0
@@ -878,7 +767,6 @@ test_11() {
 
        # reset OI scrub start point by force
        scrub_start 6 -r
-       sleep 3
        scrub_check_status 7 completed
 
        # OI scrub should skip the new created object only once
@@ -890,129 +778,111 @@ test_11() {
                        error "(8) Expect 0 objects skipped on mds$n, but" \
                                "got $SKIPPED"
        done
-
-       stop_full_debug_logging
-       restore_mount $MOUNT || error "(9) Fail to start client!"
-       rm -rf $MOUNT/$tname > /dev/null
 }
 run_test 11 "OI scrub skips the new created objects only once"
 
 test_12() {
-       echo "stopall"
-       stopall > /dev/null
-       echo "formatall"
-       formatall > /dev/null
-       echo "setupall"
-       setupall > /dev/null
-
-       mkdir -p $DIR/$tdir
+       check_mount_and_prep
        $SETSTRIPE -c 1 -i 0 $DIR/$tdir
 
+       local count=$(precreated_ost_obj_count 0 0)
+
        #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY               0x195
        do_facet ost1 $LCTL set_param fail_loc=0x195
-       createmany -o $DIR/$tdir/f 1000
+       createmany -o $DIR/$tdir/f $((count + 32))
 
-       echo "stopall"
-       stopall > /dev/null
-       echo "setupall"
-       setupall > /dev/null
+       umount_client $MOUNT || error "(1) Fail to stop client!"
 
-       do_facet ost1 $LCTL set_param fail_loc=0
-       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(1) Expect 'init', but got '$STATUS'"
+       stop ost1 || error "(2) Fail to stop ost1"
 
-       ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail"
+       #define OBD_FAIL_OST_NODESTROY           0x233
+       do_facet ost1 $LCTL set_param fail_loc=0x233
 
-       sleep 3
-       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(3) Expect 'completed', but got '$STATUS'"
+       start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
+               error "(3) Fail to start ost1"
 
-       ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(4) ls should succeed"
+       mount_client $MOUNT || error "(4) Fail to start client!"
+
+       ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
+
+       $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
+
+       do_facet ost1 $LCTL set_param fail_loc=0
+       wait_update_facet ost1 "$LCTL get_param -n \
+               osd-ldiskfs.$(facet_svc ost1).oi_scrub |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 ||
+               error "(7) Expected '$expected' on ost1"
+
+       ls -ail $DIR/$tdir > /dev/null || {
+               $SHOW_SCRUB_ON_OST
+               error "(8) ls should succeed"
+       }
 }
 run_test 12 "OI scrub can rebuild invalid /O entries"
 
 test_13() {
-       echo "stopall"
-       stopall > /dev/null
-       echo "formatall"
-       formatall > /dev/null
-       echo "setupall"
-       setupall > /dev/null
-
-       mkdir -p $DIR/$tdir
+       check_mount_and_prep
        $SETSTRIPE -c 1 -i 0 $DIR/$tdir
 
+       local count=$(precreated_ost_obj_count 0 0)
+
        #define OBD_FAIL_OSD_COMPAT_NO_ENTRY            0x196
        do_facet ost1 $LCTL set_param fail_loc=0x196
-       createmany -o $DIR/$tdir/f 1000
+       createmany -o $DIR/$tdir/f $((count + 32))
        do_facet ost1 $LCTL set_param fail_loc=0
 
-       echo "stopall"
-       stopall > /dev/null
-       echo "setupall"
-       setupall > /dev/null
+       umount_client $MOUNT || error "(1) Fail to stop client!"
+
+       stop ost1 || error "(2) Fail to stop ost1"
+
+       start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB ||
+               error "(3) Fail to start ost1"
 
-       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(1) Expect 'init', but got '$STATUS'"
+       mount_client $MOUNT || error "(4) Fail to start client!"
+
+       ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail"
 
-       ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail"
+       $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!"
 
-       $START_SCRUB_ON_OST || error "(3) Fail to start OI scrub on OST!"
-       sleep 3
-       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "completed" ] ||
-               error "(4) Expect 'completed', but got '$STATUS'"
+       wait_update_facet ost1 "$LCTL get_param -n \
+               osd-ldiskfs.$(facet_svc ost1).oi_scrub |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 ||
+               error "(7) Expected '$expected' on ost1"
 
-       ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed"
+       ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed"
 }
 run_test 13 "OI scrub can rebuild missed /O entries"
 
 test_14() {
-       echo "stopall"
-       stopall > /dev/null
-       echo "formatall"
-       formatall > /dev/null
-       echo "setupall"
-       setupall > /dev/null
-
-       mkdir -p $DIR/$tdir
+       check_mount_and_prep
        $SETSTRIPE -c 1 -i 0 $DIR/$tdir
 
+       local count=$(precreated_ost_obj_count 0 0)
+
        #define OBD_FAIL_OSD_COMPAT_NO_ENTRY            0x196
        do_facet ost1 $LCTL set_param fail_loc=0x196
-       createmany -o $DIR/$tdir/f 64
+       createmany -o $DIR/$tdir/f $((count + 32))
        do_facet ost1 $LCTL set_param fail_loc=0
 
-       echo "stopall"
-       stopall > /dev/null
-       echo "setupall"
-       setupall > /dev/null
-
-       local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }')
-       [ "$STATUS" == "init" ] ||
-               error "(1) Expect 'init', but got '$STATUS'"
-
-       ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail"
+       umount_client $MOUNT || error "(1) Fail to stop client!"
 
-       echo "stopall"
-       stopall > /dev/null
+       stop ost1 || error "(2) Fail to stop ost1"
 
        echo "run e2fsck"
        run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" ||
                error "(3) Fail to run e2fsck error"
 
-       echo "setupall"
-       setupall > /dev/null
+       start ost1 $(ostdevname 1) $OST_MOUNT_OPTS ||
+               error "(4) Fail to start ost1"
+
+       mount_client $MOUNT || error "(5) Fail to start client!"
 
        local LF_REPAIRED=$($SHOW_SCRUB_ON_OST |
                            awk '/^lf_reparied/ { print $2 }')
        [ $LF_REPAIRED -gt 0 ] ||
-               error "(4) Some entry under /lost+found should be repaired"
+               error "(6) Some entry under /lost+found should be repaired"
 
-       ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed"
+       ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed"
 }
 run_test 14 "OI scrub can repair objects under lost+found"
 
@@ -1028,7 +898,6 @@ test_15() {
 
        # run under dryrun mode
        scrub_start 5 -n on
-       sleep 3
        scrub_check_status 6 completed
        scrub_check_flags 7 inconsistent
        scrub_check_params 8 dryrun
@@ -1036,7 +905,6 @@ test_15() {
 
        # run under dryrun mode again
        scrub_start 10 -n on
-       sleep 3
        scrub_check_status 11 completed
        scrub_check_flags 12 inconsistent
        scrub_check_params 13 dryrun
@@ -1044,7 +912,6 @@ test_15() {
 
        # run under normal mode
        scrub_start 15 -n off
-       sleep 3
        scrub_check_status 16 completed
        scrub_check_flags 17 ""
        scrub_check_params 18 ""
@@ -1052,7 +919,6 @@ test_15() {
 
        # run under normal mode again
        scrub_start 20 -n off
-       sleep 3
        scrub_check_status 21 completed
        scrub_check_flags 22 ""
        scrub_check_params 23 ""
@@ -1063,6 +929,7 @@ run_test 15 "Dryrun mode OI scrub"
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
 OSTSIZE=${SAVED_OSTSIZE}
+OSTCOUNT=${SAVED_OSTCOUNT}
 
 # cleanup the system at last
 formatall
index 1145118..4b68a25 100644 (file)
@@ -6637,3 +6637,27 @@ free_fd()
         [ $fd -lt $max_fd ] || error "finding free file descriptor failed"
         echo $fd
 }
+
+check_mount_and_prep()
+{
+       is_mounted $MOUNT || setupall
+
+       rm -rf $DIR/[df][0-9]* || error "Fail to cleanup the env!"
+       mkdir $DIR/$tdir || error "Fail to mkdir $DIR/$tdir."
+}
+
+# calcule how many ost-objects to be created.
+precreated_ost_obj_count()
+{
+       local mdt_idx=$1
+       local ost_idx=$2
+       local mdt_name="MDT$(printf '%04x' $mdt_idx)"
+       local ost_name="OST$(printf '%04x' $ost_idx)"
+       local proc_path="${FSNAME}-${ost_name}-osc-${mdt_name}"
+       local last_id=$(do_facet mds${mdt_idx} lctl get_param -n \
+                       osp.$proc_path.prealloc_last_id)
+       local next_id=$(do_facet mds${mdt_idx} lctl get_param -n \
+                       osp.$proc_path.prealloc_next_id)
+
+       echo $((last_id - next_id + 1))
+}