From 2ab8b98ea5dafbce59043e5d8477e794197116a0 Mon Sep 17 00:00:00 2001 From: Emoly Liu Date: Fri, 19 Sep 2014 23:10:30 -0700 Subject: [PATCH] LU-5241 tests: speed up sanity-lfsck and sanity-scrub tests 1) drop unnecessary devices reformat. 2) drop unnecessary system stop/re-start. 3) replace 'sleep' with wait_update_facet to avoid idle wait. 4) drop unnecessary "-p" option for some "mkdir" cases. 5) replace "touch" with "createmany -m". 6) other code style changes and cleanup. This patch is back-ported from the following one: Lustre-commit: 1dbba329174e6c7f7712f01fc4e44c44400fbc92 Lustre-change: http://review.whamcloud.com/9704 LU-4803 ofd: skip orphan cleanup when inject OI error When inject OI error for simulating some failure cases, the object may be mapped to invalid local target (inode for ldiskfs), so skip the orphan cleanup in OSP to avoid unexpected warning or destroy. Lustre-commit: 0bafbd7d8f652997d83b3cc2419894f48833f424 Lustre-change: http://review.whamcloud.com/9759 Test-Parameters: alwaysuploadlogs envdefinitions=SLOW=yes \ testlist=lfsck-performance,sanity-scrub,sanity-lfsck Signed-off-by: Fan Yong Signed-off-by: Emoly Liu Change-Id: Iacae0047a502e3baa778b86536220d70c717f6f5 Reviewed-on: http://review.whamcloud.com/10818 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/ofd/ofd_obd.c | 21 +- lustre/osd-ldiskfs/osd_handler.c | 3 - lustre/tests/lfsck-performance.sh | 14 +- lustre/tests/sanity-lfsck.sh | 567 ++++++++++++++++++-------------------- lustre/tests/sanity-scrub.sh | 479 ++++++++++++-------------------- lustre/tests/test-framework.sh | 24 ++ 7 files changed, 486 insertions(+), 623 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index eaed5a2..2bcff3b 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -317,6 +317,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_OST_ENOINO 0x229 #define OBD_FAIL_OST_DQACQ_NET 0x230 #define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231 +#define OBD_FAIL_OST_NODESTROY 0x233 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index aa39083..f4844b7 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -1126,6 +1126,7 @@ static int ofd_orphans_destroy(const struct lu_env *env, struct obdo *oa) { struct ofd_thread_info *info = ofd_info(env); + struct lu_fid *fid = &info->fti_fid; obd_id last; int skip_orphan; int rc = 0; @@ -1141,24 +1142,30 @@ static int ofd_orphans_destroy(const struct lu_env *env, ofd_name(ofd), POSTID(&oa->o_oi)); RETURN(-EINVAL); } + last = ofd_seq_last_oid(oseq); LASSERT(exp != NULL); skip_orphan = !!(exp_connect_flags(exp) & OBD_CONNECT_SKIP_ORPHAN); - last = ofd_seq_last_oid(oseq); + if (OBD_FAIL_CHECK(OBD_FAIL_OST_NODESTROY)) + goto done; + LCONSOLE(D_INFO, "%s: deleting orphan objects from "DOSTID " to "DOSTID"\n", ofd_name(ofd), ostid_seq(&oa->o_oi), end_id + 1, ostid_seq(&oa->o_oi), last); for (ostid_set_id(&oi, last); ostid_id(&oi) > end_id; ostid_dec_id(&oi)) { - rc = ostid_to_fid(&info->fti_fid, &oi, 0); + rc = ostid_to_fid(fid, &oi, 0); if (rc != 0) GOTO(out_put, rc); - rc = ofd_destroy_by_fid(env, ofd, &info->fti_fid, 1); - if (rc && rc != -ENOENT) /* this is pretty fatal... */ - CEMERG("%s: error destroying precreated id "DOSTID - ": rc = %d\n", ofd_name(ofd), POSTID(&oi), rc); + rc = ofd_destroy_by_fid(env, ofd, fid, 1); + if (rc != 0 && rc != -ENOENT && rc != -ESTALE && + likely(rc != -EREMCHG && rc != -EINPROGRESS)) + /* this is pretty fatal... */ + CEMERG("%s: error destroying precreated id " + DFID": rc = %d\n", + ofd_name(ofd), PFID(fid), rc); if (!skip_orphan) { ofd_seq_last_oid_set(oseq, ostid_id(&oi) - 1); /* update last_id on disk periodically so that if we @@ -1170,7 +1177,9 @@ static int ofd_orphans_destroy(const struct lu_env *env, } CDEBUG(D_HA, "%s: after destroy: set last_id to "DOSTID"\n", ofd_obd(ofd)->obd_name, POSTID(&oa->o_oi)); +done: if (!skip_orphan) { + ofd_seq_last_oid_set(oseq, ostid_id(&oi) - 1); rc = ofd_seq_last_oid_write(env, ofd, oseq); } else { /* don't reuse orphan object, return last used objid */ diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index b01f028..717be8c 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -442,9 +442,6 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) int rc; ENTRY; - if (OBD_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY)) - RETURN(0); - CLASSERT(LMA_OLD_SIZE >= sizeof(*lma)); rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA, info->oti_mdt_attrs_old, LMA_OLD_SIZE); diff --git a/lustre/tests/lfsck-performance.sh b/lustre/tests/lfsck-performance.sh index 9b602de..dca5049 100644 --- a/lustre/tests/lfsck-performance.sh +++ b/lustre/tests/lfsck-performance.sh @@ -70,7 +70,7 @@ lfsck_create() { test_mkdir ${tdir} EOF" - for ((j=1; j<${threads}; j++)); do + for ((j = 1; j < ${threads}; j++)); do ${ECHOCMD} "${LCTL} <<-EOF cfg_device ${echodev} test_mkdir ${tdir}${j} @@ -138,7 +138,7 @@ test_0() { ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || error "Fail to reformat the MDS!" - for ((i=$MINCOUNT; i<=$MAXCOUNT; i=$((i * FACTOR)))); do + for ((i = $MINCOUNT; i <= $MAXCOUNT; i = $((i * FACTOR)))); do local nfiles=$((i - BCOUNT)) echo "+++ start to create for ${i} files set at: $(date) +++" @@ -180,7 +180,8 @@ test_1() { ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || error "Fail to reformat the MDS!" - for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do + for ((i = $MINCOUNT_REPAIR; i <= $MAXCOUNT_REPAIR; + i = $((i * FACTOR)))); do local nfiles=$((i - BCOUNT)) echo "+++ start to create for ${i} files set at: $(date) +++" @@ -224,7 +225,8 @@ run_test 1 "lfsck performance test (backup/restore) without load" test_2() { local i - for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do + for ((i = $MINCOUNT_REPAIR; i <= $MAXCOUNT_REPAIR; + i = $((i * FACTOR)))); do stopall do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local reformat_external_journal @@ -278,7 +280,7 @@ test_3() { ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || error "Fail to reformat the MDS!" - for ((i=$inc_count; i<=$BASE_COUNT; i=$((i + inc_count)))); do + for ((i = $inc_count; i <= $BASE_COUNT; i = $((i + inc_count)))); do local nfiles=$((i - BCOUNT)) echo "+++ start to create for ${i} files set at: $(date) +++" @@ -309,7 +311,7 @@ test_3() { local inc_speed=$((FULL_SPEED * INCFACTOR / 100)) local j - for ((j=$inc_speed; j<$FULL_SPEED; j=$((j + inc_speed)))); do + for ((j = $inc_speed; j < $FULL_SPEED; j = $((j + inc_speed)))); do start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null || error "Fail to start MDS!" diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 23139b0..d08d75a 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -24,12 +24,17 @@ require_dsh_mds || exit 0 MCREATE=${MCREATE:-mcreate} SAVED_MDSSIZE=${MDSSIZE} SAVED_OSTSIZE=${OSTSIZE} +SAVED_OSTCOUNT=${OSTCOUNT} # use small MDS + OST size to speed formatting time # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size MDSSIZE=100000 OSTSIZE=100000 +# no need too much OSTs, to reduce the format/start/stop overhead +[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4 -check_and_setup_lustre +# build up a clean test environment. +formatall +setupall [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] && skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre && @@ -57,50 +62,41 @@ lfsck_prep() { local nfiles=$2 local igif=$3 - echo "formatall" - formatall > /dev/null - - echo "setupall" - setupall > /dev/null + check_mount_and_prep + echo "preparing... $nfiles * $ndirs files will be created $(date)." if [ ! -z $igif ]; then #define OBD_FAIL_FID_IGIF 0x1504 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504 fi - echo "preparing... ${nfiles} * ${ndirs} files will be created." - mkdir -p $DIR/$tdir - cp $LUSTRE/tests/*.sh $DIR/ - for ((i = 0; i < ${ndirs}; i++)); do - mkdir $DIR/$tdir/d${i} - touch $DIR/$tdir/f${i} - for ((j = 0; j < ${nfiles}; j++)); do - touch $DIR/$tdir/d${i}/f${j} - done - mkdir $DIR/$tdir/e${i} - done + cp $LUSTRE/tests/*.sh $DIR/$tdir/ + if [ $ndirs -gt 0 ]; then + createmany -d $DIR/$tdir/d $ndirs + createmany -m $DIR/$tdir/f $ndirs + if [ $nfiles -gt 0 ]; then + for ((i = 0; i < $ndirs; i++)); do + createmany -m $DIR/$tdir/d${i}/f $nfiles > \ + /dev/null || error "createmany $nfiles" + done + fi + createmany -d $DIR/$tdir/e $ndirs + fi if [ ! -z $igif ]; then touch $DIR/$tdir/dummy do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fi - echo "prepared." - cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!" - echo "stop $SINGLEMDS" - stop $SINGLEMDS > /dev/null || error "Fail to stop MDS!" + echo "prepared $(date)." } test_0() { - lfsck_prep 10 10 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + lfsck_prep 3 3 #define OBD_FAIL_LFSCK_DELAY1 0x1600 - do_facet $SINGLEMDS $LCTL set_param fail_val=3 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 - $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600 + $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!" $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)" @@ -120,12 +116,13 @@ test_0() { [ "$STATUS" == "scanning-phase1" ] || error "(8) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(9) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(9) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -134,28 +131,24 @@ test_0() { local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }') $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!" - sleep 3 - - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(12) Expect 'completed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(12) unexpected status" + } local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }') [ $((scanned1 + 1)) -eq $scanned2 ] || error "(13) Expect success $((scanned1 + 1)), but got $scanned2" echo "stopall, should NOT crash LU-3649" - stopall > /dev/null + stopall || error "(14) Fail to stopall" } run_test 0 "Control LFSCK manually" test_1a() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_FID_INDIR 0x1501 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501 @@ -163,12 +156,13 @@ test_1a() { do_facet $SINGLEMDS $LCTL set_param fail_loc=0 umount_client $MOUNT - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -188,11 +182,6 @@ run_test 1a "LFSCK can find out and repair crashed FID-in-dirent" test_1b() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_FID_INLMA 0x1502 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502 @@ -202,12 +191,13 @@ test_1b() umount_client $MOUNT #define OBD_FAIL_FID_NOLMA 0x1506 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506 - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -227,11 +217,6 @@ run_test 1b "LFSCK can find out and repair missed FID-in-LMA" test_2a() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 @@ -239,12 +224,13 @@ test_2a() { do_facet $SINGLEMDS $LCTL set_param fail_loc=0 umount_client $MOUNT - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -266,11 +252,6 @@ run_test 2a "LFSCK can find out and repair crashed linkEA entry" test_2b() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 @@ -278,12 +259,13 @@ test_2b() do_facet $SINGLEMDS $LCTL set_param fail_loc=0 umount_client $MOUNT - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase2/ { print $2 }') @@ -305,11 +287,6 @@ run_test 2b "LFSCK can find out and remove invalid linkEA entry" test_2c() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605 @@ -317,12 +294,13 @@ test_2c() do_facet $SINGLEMDS $LCTL set_param fail_loc=0 umount_client $MOUNT - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase2/ { print $2 }') @@ -344,35 +322,35 @@ run_test 2c "LFSCK can find out and remove repeated linkEA entry" test_4() { lfsck_prep 3 3 + cleanup_mount $MOUNT || error "(0.1) Fail to stop client!" + stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!" + mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(3) Expect 'init', but got '$STATUS'" - #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 - $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 + $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || { + $SHOW_NAMESPACE + error "(5) unexpected status" + } - sleep 5 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || - error "(5) Expect 'scanning-phase1', but got '$STATUS'" - - local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') - [ "$FLAGS" == "inconsistent" ] || - error "(6) Expect 'inconsistent', but got '$FLAGS'" + error "(6) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(7) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(7) unexpected status" + } FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'" @@ -398,35 +376,35 @@ run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore" test_5() { lfsck_prep 1 1 1 + cleanup_mount $MOUNT || error "(0.1) Fail to stop client!" + stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!" + mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(3) Expect 'init', but got '$STATUS'" - #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 - $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 + $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || { + $SHOW_NAMESPACE + error "(5) unexpected status" + } - sleep 5 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || - error "(5) Expect 'scanning-phase1', but got '$STATUS'" - - local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') - [ "$FLAGS" == "inconsistent,upgrade" ] || - error "(6) Expect 'inconsistent,upgrade', but got '$FLAGS'" + error "(6) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(7) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(7) unexpected status" + } FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'" @@ -450,18 +428,14 @@ test_5() [ "$dummyname" == "$DIR/$tdir/dummy" ] || error "(13) Fail to generate linkEA: $dummyfid $dummyname" } -run_test 5 "LFSCK can handle IFIG object upgrading" +run_test 5 "LFSCK can handle IGIF object upgrading" test_6a() { - lfsck_prep 10 10 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + lfsck_prep 5 5 #define OBD_FAIL_LFSCK_DELAY1 0x1600 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 - $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600 + $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!" local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || @@ -472,103 +446,111 @@ test_6a() { # Fail the LFSCK to guarantee there is at least one checkpoint #define OBD_FAIL_LFSCK_FATAL1 0x1608 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "failed" ] || - error "(4) Expect 'failed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "failed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } - local POSITION0=$($SHOW_NAMESPACE | - awk '/^last_checkpoint_position/ { print $2 }' | - tr -d ',') + local POS0=$($SHOW_NAMESPACE | + awk '/^last_checkpoint_position/ { print $2 }' | + tr -d ',') #define OBD_FAIL_LFSCK_DELAY1 0x1600 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!" STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || error "(6) Expect 'scanning-phase1', but got '$STATUS'" - local POSITION1=$($SHOW_NAMESPACE | - awk '/^latest_start_position/ { print $2 }' | - tr -d ',') - [ $POSITION0 -lt $POSITION1 ] || - error "(7) Expect larger than: $POSITION0, but got $POSITION1" - - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(8) Expect 'completed', but got '$STATUS'" + local POS1=$($SHOW_NAMESPACE | + awk '/^latest_start_position/ { print $2 }' | + tr -d ',') + [ $POS0 -lt $POS1 ] || + error "(7) Expect larger than: $POS0, but got $POS1" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(8) unexpected status" + } } run_test 6a "LFSCK resumes from last checkpoint (1)" test_6b() { - lfsck_prep 10 10 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + lfsck_prep 5 5 #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 - $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 + $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!" local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || error "(3) Expect 'scanning-phase1', but got '$STATUS'" - # Sleep 3 sec to guarantee at least one object processed by LFSCK - sleep 3 + # Sleep 5 sec to guarantee that we are in the directory scanning + sleep 5 # Fail the LFSCK to guarantee there is at least one checkpoint #define OBD_FAIL_LFSCK_FATAL2 0x1609 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "failed" ] || - error "(4) Expect 'failed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "failed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } + + local O_POS0=$($SHOW_NAMESPACE | + awk '/^last_checkpoint_position/ { print $2 }' | + tr -d ',') - local POSITION0=$($SHOW_NAMESPACE | - awk '/^last_checkpoint_position/ { print $4 }') + local D_POS0=$($SHOW_NAMESPACE | + awk '/^last_checkpoint_position/ { print $4 }') #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!" STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || error "(6) Expect 'scanning-phase1', but got '$STATUS'" - local POSITION1=$($SHOW_NAMESPACE | - awk '/^latest_start_position/ { print $4 }') - if [ $POSITION0 -gt $POSITION1 ]; then - [ $POSITION1 -eq 0 -a $POSITION0 -eq $((POSITION1 + 1)) ] || - error "(7) Expect larger than: $POSITION0, but got $POSITION1" + local O_POS1=$($SHOW_NAMESPACE | + awk '/^latest_start_position/ { print $2 }' | + tr -d ',') + local D_POS1=$($SHOW_NAMESPACE | + awk '/^latest_start_position/ { print $4 }') + + if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then + [ $O_POS0 -lt $O_POS1 ] || + error "(7.1) $O_POS1 is not larger than $O_POS0" + else + [ $D_POS0 -lt $D_POS1 ] || + error "(7.2) $D_POS1 is not larger than $D_POS0" fi - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(8) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(8) unexpected status" + } } run_test 6b "LFSCK resumes from last checkpoint (2)" test_7a() { - lfsck_prep 10 10 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + lfsck_prep 5 5 + umount_client $MOUNT #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 - $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 + $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!" local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || @@ -587,23 +569,19 @@ test_7a() [ "$STATUS" == "scanning-phase1" ] || error "(6) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(7) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(7) unexpected status" + } } run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)" test_7b() { lfsck_prep 2 2 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 @@ -612,14 +590,14 @@ test_7b() done #define OBD_FAIL_LFSCK_DELAY3 0x1602 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602 - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "scanning-phase2" ] || - error "(4) Expect 'scanning-phase2', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602 + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } echo "stop $SINGLEMDS" stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!" @@ -632,28 +610,29 @@ test_7b() [ "$STATUS" == "scanning-phase2" ] || error "(7) Expect 'scanning-phase2', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(8) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(8) unexpected status" + } } run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)" test_8() { + echo "formatall" + formatall > /dev/null + echo "setupall" + setupall > /dev/null + lfsck_prep 20 20 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "init" ] || error "(2) Expect 'init', but got '$STATUS'" - mount_client $MOUNT || error "(3) Fail to start client!" - #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 mkdir $DIR/$tdir/crashed @@ -664,9 +643,10 @@ test_8() touch $DIR/$tdir/dummy${i} done + umount_client $MOUNT || error "(3) Fail to stop client!" + #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=2 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 + do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!" STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') @@ -687,10 +667,12 @@ test_8() #define OBD_FAIL_LFSCK_FATAL2 0x1609 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "failed" ] || - error "(10) Expect 'failed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "failed" 6 || { + $SHOW_NAMESPACE + error "(10) unexpected status" + } #define OBD_FAIL_LFSCK_DELAY1 0x1600 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 @@ -741,29 +723,30 @@ test_8() error "(20) Expect 'paused', but got '$STATUS'" #define OBD_FAIL_LFSCK_DELAY3 0x1602 - do_facet $SINGLEMDS $LCTL set_param fail_val=2 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602 + do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!" - sleep 2 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "scanning-phase2" ] || - error "(22) Expect 'scanning-phase2', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || { + $SHOW_NAMESPACE + error "(22) unexpected status" + } local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') [ "$FLAGS" == "scanned-once,inconsistent" ] || error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 2 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(24) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(24) unexpected status" + } FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'" - } run_test 8 "LFSCK state machine" @@ -774,17 +757,10 @@ test_9a() { fi lfsck_prep 70 70 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(2) Expect 'init', but got '$STATUS'" local BASE_SPEED1=100 local RUN_TIME1=10 - $START_NAMESPACE -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!" + $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!" sleep $RUN_TIME1 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') @@ -841,37 +817,28 @@ test_9b() { fi lfsck_prep 0 0 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - mount_client $MOUNT || error "(2) Fail to start client!" - - echo "Another preparing... 50 * 50 files (with error) will be created." + echo "Preparing another 50 * 50 files (with error) at $(date)." #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + createmany -d $DIR/$tdir/d 50 + createmany -m $DIR/$tdir/f 50 for ((i = 0; i < 50; i++)); do - mkdir -p $DIR/$tdir/d${i} - touch $DIR/$tdir/f${i} - for ((j = 0; j < 50; j++)); do - touch $DIR/$tdir/d${i}/f${j} - done + createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null done - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(3) Expect 'init', but got '$STATUS'" - #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c - $START_NAMESPACE || error "(4) Fail to start LFSCK!" - - sleep 10 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "stopped" ] || - error "(5) Expect 'stopped', but got '$STATUS'" + $START_NAMESPACE -r || error "(4) Fail to start LFSCK!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "stopped" 10 || { + $SHOW_NAMESPACE + error "(5) unexpected status" + } do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + echo "Prepared at $(date)." local BASE_SPEED1=50 local RUN_TIME1=10 @@ -917,53 +884,47 @@ test_9b() { do_facet $SINGLEMDS \ $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0 - sleep 5 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(11) Expect 'completed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(11) unexpected status" + } } run_test 9b "LFSCK speed control (2)" test_10() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" + echo "Preparing more files with error at $(date)." #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 + for ((i = 0; i < 1000; i = $((i+2)))); do mkdir -p $DIR/$tdir/d${i} touch $DIR/$tdir/f${i} - for ((j = 0; j < 5; j++)); do - touch $DIR/$tdir/d${i}/f${j} - done + createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null done #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + for ((i = 1; i < 1000; i = $((i+2)))); do mkdir -p $DIR/$tdir/d${i} touch $DIR/$tdir/f${i} - for ((j = 0; j < 5; j++)); do - touch $DIR/$tdir/d${i}/f${j} - done + createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null done do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + echo "Prepared at $(date)." + ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy umount_client $MOUNT mount_client $MOUNT || error "(3) Fail to start client!" - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(4) Expect 'init', but got '$STATUS'" - - $START_NAMESPACE -s 100 || error "(5) Fail to start LFSCK!" + $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!" sleep 10 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') @@ -993,11 +954,12 @@ test_10() do_facet $SINGLEMDS \ $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0 - umount_client $MOUNT - sleep 10 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(16) Expect 'completed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(16) unexpected status" + } } run_test 10 "System is available during LFSCK scanning" @@ -1006,6 +968,7 @@ $LCTL set_param debug=-lfsck > /dev/null || true # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} +OSTCOUNT=${SAVED_OSTCOUNT} # cleanup the system at last formatall diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index eae09fb..e9fdb2a 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -21,13 +21,19 @@ require_dsh_mds || exit 0 SAVED_MDSSIZE=${MDSSIZE} SAVED_OSTSIZE=${OSTSIZE} +SAVED_OSTCOUNT=${OSTCOUNT} # use small MDS + OST size to speed formatting time # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size MDSSIZE=100000 OSTSIZE=100000 +# no need too much OSTs, to reduce the format/start/stop overhead +[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4 MOUNT_2="" -check_and_setup_lustre + +# build up a clean test environment. +formatall +setupall [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && @@ -103,16 +109,13 @@ scrub_prep() { local nfiles=$1 local n - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null + check_mount_and_prep - echo "preparing..." + echo "preparing... $(date)" for n in $(seq $MDSCOUNT); do echo "creating $nfiles files on mds$n" if [ $n -eq 1 ]; then - mkdir -p $DIR/$tdir/mds$n || + mkdir $DIR/$tdir/mds$n || error "Failed to create directory mds$n" else $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n || @@ -121,11 +124,11 @@ scrub_prep() { cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n || error "Failed to copy files to mds$n" if [[ $nfiles -gt 0 ]]; then - createmany -o $DIR/$tdir/mds$n/$tfile $nfiles || - error "createmany failed on mds$n" + createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \ + /dev/null || error "createmany failed on mds$n" fi done - echo "prepared." + echo "prepared $(date)." cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!" for n in $(seq $MDSCOUNT); do echo "stop mds$n" @@ -158,17 +161,13 @@ scrub_stop_mds() { scrub_check_status() { local error_id=$1 local expected=$2 - local actual local n for n in $(seq $MDSCOUNT); do - actual=$(do_facet mds$n $LCTL get_param -n \ + wait_update_facet mds$n "$LCTL get_param -n \ osd-ldiskfs.$(facet_svc mds$n).oi_scrub | - awk '/^status/ { print $2 }') - if [ "$actual" != "$expected" ]; then - error "($error_id) Expected '$expected' on mds$n, but" \ - "got '$actual'" - fi + awk '/^status/ { print \\\$2 }'" "$expected" 6 || + error "($error_id) Expected '$expected' on mds$n" done } @@ -288,21 +287,15 @@ test_1a() { start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(1) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(2) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'" mount_client $MOUNT || error "(4) Fail to start client!" - #define OBD_FAIL_OSD_FID_MAPPING 0x193 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193 # update .lustre OI mapping touch $MOUNT/.lustre do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - umount_client $MOUNT || error "(5) Fail to stop client!" echo "stop $SINGLEMDS" @@ -312,10 +305,6 @@ test_1a() { start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(7) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(8) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent" ] || error "(9) Expect 'inconsistent', but got '$FLAGS'" @@ -327,7 +316,6 @@ test_1b() { scrub_remove_ois 1 echo "start MDTs without disabling OI scrub" scrub_start_mds 2 "$MOUNT_OPTS_SCRUB" - sleep 3 scrub_check_status 3 completed mount_client $MOUNT || error "(4) Fail to start client!" scrub_check_data 5 @@ -339,20 +327,15 @@ test_1c() { # OI files to be removed: # idx 0: oi.16.0 - # idx 1: oi.16.1 # idx 2: oi.16.{2,4,8,16,32} # idx 3: oi.16.{3,9,27} - # idx 5: oi.16.{5,25} - # idx 7: oi.16.{7,49} - for index in 0 1 2 3 5 7; do + for index in 0 2 3; do scrub_prep 0 scrub_remove_ois 1 $index - echo "start MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_flags 3 recreated scrub_start 4 - sleep 3 scrub_check_status 5 completed scrub_check_flags 6 "" done @@ -364,43 +347,45 @@ test_2() { scrub_backup_restore 1 echo "starting MDTs without disabling OI scrub" scrub_start_mds 2 "$MOUNT_OPTS_SCRUB" - sleep 3 scrub_check_status 3 completed mount_client $MOUNT || error "(4) Fail to start client!" scrub_check_data 5 } run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case" +# test_3 is obsolete, it will be covered by test_5. test_3() { + formatall > /dev/null + setupall > /dev/null + scrub_prep 0 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - sleep 3 scrub_check_status 3 init scrub_check_flags 4 inconsistent - echo "stopall" - stopall > /dev/null } -run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" +#run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" test_4() { scrub_prep 0 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto scrub_check_data 6 - sleep 3 scrub_check_status 7 completed + scrub_check_flags 8 "" } run_test 4 "Trigger OI scrub automatically if inconsistent OI mapping was found" test_5() { - scrub_prep 1500 + formatall > /dev/null + setupall > /dev/null + + scrub_prep 1000 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" @@ -409,73 +394,58 @@ test_5() { mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_check_data 6 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 umount_client $MOUNT || error "(7) Fail to stop client!" - scrub_check_status 8 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 - do_facet mds$n $LCTL set_param fail_loc=0x191 - done + #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191 + sleep 4 scrub_stop_mds 9 - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 echo "starting MDTs with OI scrub disabled" scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 11 crashed - scrub_stop_mds 12 - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + echo "starting MDTs without disabling OI scrub" scrub_start_mds 13 "$MOUNT_OPTS_SCRUB" - scrub_check_status 14 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 - do_facet mds$n $LCTL set_param fail_loc=0x192 - done - sleep 4 - scrub_check_status 15 failed + #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192 + scrub_check_status 15 failed mount_client $MOUNT || error "(16) Fail to start client!" + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + + local n for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - stat $DIR/$tdir/mds$n/${tfile}1000 || - error "(17) Failed to stat mds$n/${tfile}1000" + stat $DIR/$tdir/mds$n/${tfile}800 || + error "(17) Failed to stat mds$n/${tfile}800" done scrub_check_status 18 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 19 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 19 completed scrub_check_flags 20 "" } run_test 5 "OI scrub state machine" @@ -485,45 +455,41 @@ test_6() { scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 # Sleep 5 sec to guarantee at least one object processed by OI scrub sleep 5 # Fail the OI scrub to guarantee there is at least one checkpoint - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 - do_facet mds$n $LCTL set_param fail_loc=0x192 - done - sleep 4 + #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192 + scrub_check_status 7 failed + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + + local n for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 # stat will re-trigger OI scrub stat $DIR/$tdir/mds$n/${tfile}800 || error "(8) Failed to stat mds$n/${tfile}800" done umount_client $MOUNT || error "(9) Fail to stop client!" - scrub_check_status 10 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 - do_facet mds$n $LCTL set_param fail_loc=0x191 - done + #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191 + sleep 4 local -a position0 for n in $(seq $MDSCOUNT); do @@ -534,11 +500,10 @@ test_6() { scrub_stop_mds 11 - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + echo "starting MDTs without disabling OI scrub" scrub_start_mds 12 "$MOUNT_OPTS_SCRUB" @@ -554,13 +519,10 @@ test_6() { fi done - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 15 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 15 completed scrub_check_flags 16 "" } run_test 6 "OI scrub resumes from last checkpoint" @@ -568,39 +530,31 @@ run_test 6 "OI scrub resumes from last checkpoint" test_7() { scrub_prep 500 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent - mount_client $MOUNT || error "(5) Fail to start client!" - scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 + local n for n in $(seq $MDSCOUNT); do stat $DIR/$tdir/mds$n/${tfile}300 || error "(7) Failed to stat mds$n/${tfile}300!" done scrub_check_status 8 scanning - scrub_check_flags 9 inconsistent,auto - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 10 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 10 completed scrub_check_flags "" } run_test 7 "System is available during OI scrub scanning" @@ -608,39 +562,25 @@ run_test 7 "System is available during OI scrub scanning" test_8() { scrub_prep 128 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=1 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_start 5 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=1 fail_loc=0x190 + scrub_start 5 scrub_check_status 6 scanning - scrub_stop 7 - scrub_check_status 8 stopped - scrub_start 9 - scrub_check_status 10 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 11 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 11 completed scrub_check_flags 12 "" } run_test 8 "Control OI scrub manually" @@ -651,14 +591,11 @@ test_9() { return 0 fi - scrub_prep 8000 + scrub_prep 6000 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent local BASE_SPEED1=100 @@ -668,9 +605,7 @@ test_9() { sleep $RUN_TIME1 scrub_check_status 6 completed - scrub_check_flags 7 "" - # OI scrub should run with limited speed under non-inconsistent case scrub_start 8 -s $BASE_SPEED1 -r @@ -725,7 +660,7 @@ test_9() { do_facet mds$n $LCTL set_param -n \ mdd.$(facet_svc mds$n).lfsck_speed_limit 0 done - sleep 6 + scrub_check_status 13 completed } run_test 9 "OI scrub speed control" @@ -733,50 +668,32 @@ run_test 9 "OI scrub speed control" test_10a() { scrub_prep 0 scrub_backup_restore 1 - echo "starting mds$n with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - mount_client $MOUNT || error "(5) Fail to start client!" - scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=1 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_check_data 6 - scrub_check_status 7 scanning + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=1 fail_loc=0x190 + scrub_check_data 6 + scrub_check_status 7 scanning umount_client $MOUNT || error "(8) Fail to stop client!" - scrub_stop_mds 9 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 11 paused - scrub_stop_mds 12 - echo "starting MDTs without disabling OI scrub" scrub_start_mds 13 "$MOUNT_OPTS_SCRUB" - scrub_check_status 14 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 15 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 15 completed scrub_check_flags 16 "" } run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" @@ -785,77 +702,49 @@ run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" test_10b() { scrub_prep 0 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 scrub_start 5 - scrub_check_status 6 scanning - scrub_stop_mds 7 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 9 paused - scrub_stop_mds 10 - echo "starting MDTs without disabling OI scrub" scrub_start_mds 11 "$MOUNT_OPTS_SCRUB" - scrub_check_status 12 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 13 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 13 completed scrub_check_flags 14 "" } #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)" test_11() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - local CREATED=100 - local tname=`date +%s` - rm -rf $MOUNT/$tname > /dev/null - mkdir -p $MOUNT/$tname || error "(0) Failed to create $MOUNT/$tname" local n + + check_mount_and_prep + for n in $(seq $MDSCOUNT); do - $LFS mkdir -i $((n - 1)) $MOUNT/$tname/mds$n || - error "(1) Fail to mkdir $MOUNT/$tname/mds$n" + $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n || + error "(1) Fail to mkdir $DIR/$tdir/mds$n" - createmany -o $MOUNT/$tname/mds$n/f $CREATED || - error "(2) Fail to create in $tname/mds$n" + createmany -o $DIR/$tdir/mds$n/f $CREATED || + error "(2) Fail to create under $tdir/mds$n" done - cleanup_mount $MOUNT - do_facet $SINGLEMDS $LCTL clear - start_full_debug_logging # reset OI scrub start point by force scrub_start 3 -r - sleep 3 scrub_check_status 4 completed declare -a checked0 @@ -878,7 +767,6 @@ test_11() { # reset OI scrub start point by force scrub_start 6 -r - sleep 3 scrub_check_status 7 completed # OI scrub should skip the new created object only once @@ -890,129 +778,111 @@ test_11() { error "(8) Expect 0 objects skipped on mds$n, but" \ "got $SKIPPED" done - - stop_full_debug_logging - restore_mount $MOUNT || error "(9) Fail to start client!" - rm -rf $MOUNT/$tname > /dev/null } run_test 11 "OI scrub skips the new created objects only once" test_12() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195 do_facet ost1 $LCTL set_param fail_loc=0x195 - createmany -o $DIR/$tdir/f 1000 + createmany -o $DIR/$tdir/f $((count + 32)) - echo "stopall" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + umount_client $MOUNT || error "(1) Fail to stop client!" - do_facet ost1 $LCTL set_param fail_loc=0 - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" + stop ost1 || error "(2) Fail to stop ost1" - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + #define OBD_FAIL_OST_NODESTROY 0x233 + do_facet ost1 $LCTL set_param fail_loc=0x233 - sleep 3 - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(3) Expect 'completed', but got '$STATUS'" + start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB || + error "(3) Fail to start ost1" - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(4) ls should succeed" + mount_client $MOUNT || error "(4) Fail to start client!" + + ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail" + + $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!" + + do_facet ost1 $LCTL set_param fail_loc=0 + wait_update_facet ost1 "$LCTL get_param -n \ + osd-ldiskfs.$(facet_svc ost1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 6 || + error "(7) Expected '$expected' on ost1" + + ls -ail $DIR/$tdir > /dev/null || { + $SHOW_SCRUB_ON_OST + error "(8) ls should succeed" + } } run_test 12 "OI scrub can rebuild invalid /O entries" test_13() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 - createmany -o $DIR/$tdir/f 1000 + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + umount_client $MOUNT || error "(1) Fail to stop client!" + + stop ost1 || error "(2) Fail to stop ost1" + + start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB || + error "(3) Fail to start ost1" - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" + mount_client $MOUNT || error "(4) Fail to start client!" + + ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail" - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!" - $START_SCRUB_ON_OST || error "(3) Fail to start OI scrub on OST!" - sleep 3 - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + wait_update_facet ost1 "$LCTL get_param -n \ + osd-ldiskfs.$(facet_svc ost1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 6 || + error "(7) Expected '$expected' on ost1" - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed" + ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed" } run_test 13 "OI scrub can rebuild missed /O entries" test_14() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 - createmany -o $DIR/$tdir/f 64 + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall" - stopall > /dev/null - echo "setupall" - setupall > /dev/null - - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" - - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + umount_client $MOUNT || error "(1) Fail to stop client!" - echo "stopall" - stopall > /dev/null + stop ost1 || error "(2) Fail to stop ost1" echo "run e2fsck" run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" || error "(3) Fail to run e2fsck error" - echo "setupall" - setupall > /dev/null + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || + error "(4) Fail to start ost1" + + mount_client $MOUNT || error "(5) Fail to start client!" local LF_REPAIRED=$($SHOW_SCRUB_ON_OST | awk '/^lf_reparied/ { print $2 }') [ $LF_REPAIRED -gt 0 ] || - error "(4) Some entry under /lost+found should be repaired" + error "(6) Some entry under /lost+found should be repaired" - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed" + ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed" } run_test 14 "OI scrub can repair objects under lost+found" @@ -1028,7 +898,6 @@ test_15() { # run under dryrun mode scrub_start 5 -n on - sleep 3 scrub_check_status 6 completed scrub_check_flags 7 inconsistent scrub_check_params 8 dryrun @@ -1036,7 +905,6 @@ test_15() { # run under dryrun mode again scrub_start 10 -n on - sleep 3 scrub_check_status 11 completed scrub_check_flags 12 inconsistent scrub_check_params 13 dryrun @@ -1044,7 +912,6 @@ test_15() { # run under normal mode scrub_start 15 -n off - sleep 3 scrub_check_status 16 completed scrub_check_flags 17 "" scrub_check_params 18 "" @@ -1052,7 +919,6 @@ test_15() { # run under normal mode again scrub_start 20 -n off - sleep 3 scrub_check_status 21 completed scrub_check_flags 22 "" scrub_check_params 23 "" @@ -1063,6 +929,7 @@ run_test 15 "Dryrun mode OI scrub" # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} +OSTCOUNT=${SAVED_OSTCOUNT} # cleanup the system at last formatall diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 1145118..4b68a25 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -6637,3 +6637,27 @@ free_fd() [ $fd -lt $max_fd ] || error "finding free file descriptor failed" echo $fd } + +check_mount_and_prep() +{ + is_mounted $MOUNT || setupall + + rm -rf $DIR/[df][0-9]* || error "Fail to cleanup the env!" + mkdir $DIR/$tdir || error "Fail to mkdir $DIR/$tdir." +} + +# calcule how many ost-objects to be created. +precreated_ost_obj_count() +{ + local mdt_idx=$1 + local ost_idx=$2 + local mdt_name="MDT$(printf '%04x' $mdt_idx)" + local ost_name="OST$(printf '%04x' $ost_idx)" + local proc_path="${FSNAME}-${ost_name}-osc-${mdt_name}" + local last_id=$(do_facet mds${mdt_idx} lctl get_param -n \ + osp.$proc_path.prealloc_last_id) + local next_id=$(do_facet mds${mdt_idx} lctl get_param -n \ + osp.$proc_path.prealloc_next_id) + + echo $((last_id - next_id + 1)) +} -- 1.8.3.1