From 1dbba329174e6c7f7712f01fc4e44c44400fbc92 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sat, 29 Mar 2014 01:55:20 +0800 Subject: [PATCH] LU-4556 tests: speed up sanity-lfsck and sanity-scrub tests 1) drop unnecessary devices reformat. 2) drop unnecessary system stop/re-start. 3) replace 'sleep' with wait_update_facet to avoid idle wait. 4) drop unnecessary "-p" option for some "mkdir" cases. 5) replace "touch" with "createmany -m". 6) other code style changes and cleanup. Test-Parameters: envdefinitions=SLOW=yes testlist=lfsck-performance Signed-off-by: Fan Yong Change-Id: I047c63b8793fa843fabe1a69b332c42f2f523f68 Reviewed-on: http://review.whamcloud.com/9704 Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Jian Yu Reviewed-by: Oleg Drokin --- lustre/tests/lfsck-performance.sh | 74 +-- lustre/tests/sanity-lfsck.sh | 1132 ++++++++++++++++--------------------- lustre/tests/sanity-scrub.sh | 484 ++++++---------- lustre/tests/test-framework.sh | 24 + 4 files changed, 730 insertions(+), 984 deletions(-) diff --git a/lustre/tests/lfsck-performance.sh b/lustre/tests/lfsck-performance.sh index 17da00e..f3b9046 100644 --- a/lustre/tests/lfsck-performance.sh +++ b/lustre/tests/lfsck-performance.sh @@ -43,8 +43,6 @@ SHOW_NAMESPACE="${RLCTL} get_param -n mdd.${MDT_DEV}.lfsck_namespace" MNTOPTS_NOSCRUB="-o user_xattr,noscrub" remote_mds && ECHOCMD=${RCMD} || ECHOCMD="eval" -LFSCKDIR="$MOUNT/lfsck/" - if [ ${NTHREADS} -eq 0 ]; then CPUCORE=$(${RCMD} cat /proc/cpuinfo | grep "processor.*:" | wc -l) NTHREADS=$((CPUCORE * 2)) @@ -74,7 +72,7 @@ lfsck_create() { test_mkdir ${tdir} EOF" - for ((j=1; j<${threads}; j++)); do + for ((j = 1; j < ${threads}; j++)); do ${ECHOCMD} "${LCTL} <<-EOF cfg_device ${echodev} test_mkdir ${tdir}${j} @@ -142,7 +140,7 @@ test_0() { $(facet_fstype ${SINGLEMDS}) --reformat ${MDT_DEVNAME} \ $(mdsvdevname 1) >/dev/null || error "Fail to reformat the MDS!" - for ((i=$MINCOUNT; i<=$MAXCOUNT; i=$((i * FACTOR)))); do + for ((i = $MINCOUNT; i <= $MAXCOUNT; i = $((i * FACTOR)))); do local nfiles=$((i - BCOUNT)) echo "+++ start to create for ${i} files set at: $(date) +++" @@ -187,7 +185,8 @@ test_1() { $(facet_fstype ${SINGLEMDS}) --reformat ${MDT_DEVNAME} \ $(mdsvdevname 1) > /dev/null || error "Fail to reformat the MDS" - for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do + for ((i = $MINCOUNT_REPAIR; i <= $MAXCOUNT_REPAIR; + i = $((i * FACTOR)))); do local nfiles=$((i - BCOUNT)) echo "+++ start to create for ${i} files set at: $(date) +++" @@ -231,7 +230,8 @@ run_test 1 "lfsck namespace performance (backup/restore) without load" test_2() { local i - for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do + for ((i = $MINCOUNT_REPAIR; i <= $MAXCOUNT_REPAIR; + i = $((i * FACTOR)))); do stopall do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local reformat_external_journal @@ -281,7 +281,7 @@ test_3() { $(facet_fstype ${SINGLEMDS}) --reformat ${MDT_DEVNAME} \ $(mdsvdevname 1) > /dev/null || error "Fail to reformat the MDS" - for ((i=$inc_count; i<=$BASE_COUNT; i=$((i + inc_count)))); do + for ((i = $inc_count; i <= $BASE_COUNT; i = $((i + inc_count)))); do local nfiles=$((i - BCOUNT)) echo "+++ start to create for ${i} files set at: $(date) +++" @@ -312,7 +312,7 @@ test_3() { local inc_speed=$((FULL_SPEED * INCFACTOR / 100)) local j - for ((j=$inc_speed; j<$FULL_SPEED; j=$((j + inc_speed)))); do + for ((j = $inc_speed; j < $FULL_SPEED; j = $((j + inc_speed)))); do start ${SINGLEMDS} $MDT_DEVNAME $MNTOPTS_NOSCRUB > /dev/null || error "Fail to start MDS!" @@ -387,7 +387,7 @@ layout_gen_one() local idx1=$1 local idx2=$2 local mntpt="/mnt/lustre_lfsck_${idx1}_${idx2}" - local basedir="$mntpt/lfsck/$idx1/$idx2" + local basedir="$mntpt/$tdir/$idx1/$idx2" mkdir -p $mntpt || { error_noexit "(11) Fail to mkdir $mntpt" @@ -421,10 +421,10 @@ layout_gen_set() local cnt=$1 echo "##### Start generate test set for subdirs=$cnt at: $(date) #####" - for ((k=0; k<$MDSCOUNT; k++)); do + for ((k = 0; k < $MDSCOUNT; k++)); do $LFS mkdir -i ${k} $LFSCKDIR/${k} || return 10 - for ((l=1; l<=$cnt; l++)); do + for ((l = 1; l <= $cnt; l++)); do layout_gen_one ${k} ${l} & done done @@ -441,14 +441,15 @@ t4_test() echo "stopall" stopall > /dev/null || error "(1) Fail to stopall" + LFSCKDIR="$DIR/$tdir" MDSCOUNT=1 - for ((i=1; i<=$saved_ostcount; i=$((i * 2)))); do + for ((i = 1; i <= $saved_ostcount; i = $((i * 2)))); do OSTCOUNT=${i} echo "+++++ Start cycle ostcount=$OSTCOUNT at: $(date) +++++" echo - for ((j=$MINSUBDIR; j<=$MAXSUBDIR; j=$((j * FACTOR)))); do + for ((j = $MINSUBDIR; j <= $MAXSUBDIR; j = $((j * FACTOR)))); do echo "formatall" formatall > /dev/null || error "(2) Fail to formatall, subdirs=${j}" @@ -458,15 +459,15 @@ t4_test() error "(3) Fail to setupall, subdirs=${j}" mkdir $LFSCKDIR || - error "(4) Fail to mkdir $LFSCKDIR, subdirs=${j}" + error "(4) mkdir $LFSCKDIR, subdirs=${j}" $LFS setstripe -c ${OSTCOUNT} -i 0 $LFSCKDIR || - error "(5) Fail to setstripe on $LFSCKDIR, subdirs=${j}" + error "(5) setstripe on $LFSCKDIR, subdirs=${j}" local RC=0 layout_gen_set ${j} || RC=$? [ $RC -eq 0 ] || - error "(6) Fail to generate set $RC, subdirs=${j}" + error "(6) generate set $RC, subdirs=${j}" RC=0 layout_test_one || RC=$? @@ -483,9 +484,6 @@ t4_test() MDSCOUNT=$saved_mdscount OSTCOUNT=$saved_ostcount - - echo "formatall" - formatall > /dev/null || error "(9) Fail to stopall" } test_4a() { @@ -496,15 +494,9 @@ run_test 4a "Single MDS lfsck layout performance (routine case) without load" test_4b() { echo "Inject failure stub to simulate dangling reference" #define OBD_FAIL_LFSCK_DANGLING 0x1610 - for i in $(seq $OSTCOUNT); do - do_facet ost${i} $LCTL set_param fail_loc=0x1610 - done + do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x1610 t4_test - - for i in $(seq $OSTCOUNT); do - do_facet ost${i} $LCTL set_param fail_loc=0 - done } run_test 4b "Single MDS lfsck layout performance (repairing case) without load" @@ -515,13 +507,14 @@ t5_test() echo "stopall" stopall > /dev/null || error "(1) Fail to stopall" - for ((i=1; i<=$saved_mdscount; i++)); do + LFSCKDIR="$DIR/$tdir" + for ((i = 1; i <= $saved_mdscount; i++)); do MDSCOUNT=${i} echo "+++++ Start cycle mdscount=$MDSCOUNT at: $(date) +++++" echo - for ((j=$MINSUBDIR; j<=$MAXSUBDIR; j=$((j * FACTOR)))); do + for ((j = $MINSUBDIR; j <= $MAXSUBDIR; j = $((j * FACTOR)))); do echo "formatall" formatall > /dev/null || error "(2) Fail to formatall, subdirs=${j}" @@ -531,15 +524,15 @@ t5_test() error "(3) Fail to setupall, subdirs=${j}" mkdir $LFSCKDIR || - error "(4) Fail to mkdir $LFSCKDIR, subdirs=${j}" + error "(4) mkdir $LFSCKDIR, subdirs=${j}" $LFS setstripe -c ${OSTCOUNT} -i 0 $LFSCKDIR || - error "(5) Fail to setstripe on $LFSCKDIR, subdirs=${j}" + error "(5) setstripe on $LFSCKDIR, subdirs=${j}" local RC=0 layout_gen_set ${j} || RC=$? [ $RC -eq 0 ] || - error "(6) Fail to generate set $RC, subdirs=${j}" + error "(6) generate set $RC, subdirs=${j}" RC=0 layout_test_one || RC=$? @@ -555,9 +548,6 @@ t5_test() done MDSCOUNT=$saved_mdscount - - echo "formatall" - formatall > /dev/null || error "(9) Fail to stopall" } test_5a() { @@ -568,15 +558,9 @@ run_test 5a "lfsck layout performance (routine case) without load for DNE" test_5b() { echo "Inject failure stub to simulate dangling reference" #define OBD_FAIL_LFSCK_DANGLING 0x1610 - for i in $(seq $OSTCOUNT); do - do_facet ost${i} $LCTL set_param fail_loc=0x1610 - done + do_nodes $(comma_list $(osts_nodes)) $LCTL set_param fail_loc=0x1610 t5_test - - for i in $(seq $OSTCOUNT); do - do_facet ost${i} $LCTL set_param fail_loc=0 - done } run_test 5b "lfsck layout performance (repairing case) without load for DNE" @@ -620,6 +604,7 @@ test_6() { local saved_mdscount=$MDSCOUNT + LFSCKDIR="$DIR/$tdir" MDSCOUNT=1 echo "formatall" formatall > /dev/null || error "(2) Fail to formatall" @@ -658,8 +643,8 @@ test_6() { local nfiles=$((inc_count / 2)) lfsck_attach - for ((m=0, n=$INCFACTOR; n<100; - m=$((m + inc_count)), n=$((n + INCFACTOR)))); do + for ((m = 0, n = $INCFACTOR; n < 100; + m = $((m + inc_count)), n = $((n + INCFACTOR)))); do local sl=$((SPEED * n / 100)) $STOP_LFSCK > /dev/null 2>&1 @@ -702,9 +687,6 @@ test_6() { stopall > /dev/null || error "(14) Fail to stopall" MDSCOUNT=$saved_mdscount - - echo "formatall" - formatall > /dev/null || error "(15) Fail to stopall" } run_test 6 "lfsck layout impact on create performance" diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 756100c..96b2716 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -22,12 +22,17 @@ require_dsh_mds || exit 0 MCREATE=${MCREATE:-mcreate} SAVED_MDSSIZE=${MDSSIZE} SAVED_OSTSIZE=${OSTSIZE} +SAVED_OSTCOUNT=${OSTCOUNT} # use small MDS + OST size to speed formatting time # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size MDSSIZE=100000 OSTSIZE=100000 +# no need too much OSTs, to reduce the format/start/stop overhead +[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4 -check_and_setup_lustre +# build up a clean test environment. +formatall +setupall [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.60) ]] && skip "Need MDS version at least 2.3.60" && check_and_cleanup_lustre && @@ -66,50 +71,41 @@ lfsck_prep() { local nfiles=$2 local igif=$3 - echo "formatall" - formatall > /dev/null - - echo "setupall" - setupall > /dev/null + check_mount_and_prep + echo "preparing... $nfiles * $ndirs files will be created $(date)." if [ ! -z $igif ]; then #define OBD_FAIL_FID_IGIF 0x1504 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1504 fi - echo "preparing... ${nfiles} * ${ndirs} files will be created." - mkdir -p $DIR/$tdir - cp $LUSTRE/tests/*.sh $DIR/ - for ((i = 0; i < ${ndirs}; i++)); do - mkdir $DIR/$tdir/d${i} - touch $DIR/$tdir/f${i} - for ((j = 0; j < ${nfiles}; j++)); do - touch $DIR/$tdir/d${i}/f${j} - done - mkdir $DIR/$tdir/e${i} - done + cp $LUSTRE/tests/*.sh $DIR/$tdir/ + if [ $ndirs -gt 0 ]; then + createmany -d $DIR/$tdir/d $ndirs + createmany -m $DIR/$tdir/f $ndirs + if [ $nfiles -gt 0 ]; then + for ((i = 0; i < $ndirs; i++)); do + createmany -m $DIR/$tdir/d${i}/f $nfiles > \ + /dev/null || error "createmany $nfiles" + done + fi + createmany -d $DIR/$tdir/e $ndirs + fi if [ ! -z $igif ]; then touch $DIR/$tdir/dummy do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fi - echo "prepared." - cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!" - echo "stop $SINGLEMDS" - stop $SINGLEMDS > /dev/null || error "Fail to stop MDS!" + echo "prepared $(date)." } test_0() { - lfsck_prep 10 10 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + lfsck_prep 3 3 #define OBD_FAIL_LFSCK_DELAY1 0x1600 - do_facet $SINGLEMDS $LCTL set_param fail_val=3 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 - $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=3 fail_loc=0x1600 + $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!" $SHOW_NAMESPACE || error "Fail to monitor LFSCK (3)" @@ -129,12 +125,13 @@ test_0() { [ "$STATUS" == "scanning-phase1" ] || error "(8) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - wait_update_facet $SINGLEMDS \ - "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace | \ - awk '/^status/ { print \\\$2 }'" "completed" 20 || \ + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE error "(9) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -143,17 +140,19 @@ test_0() { local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }') $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!" - wait_update_facet $SINGLEMDS \ - "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace | \ - awk '/^status/ { print \\\$2 }'" "completed" 20 || \ + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE error "(12) unexpected status" + } local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }') [ $((scanned1 + 1)) -eq $scanned2 ] || error "(13) Expect success $((scanned1 + 1)), but got $scanned2" echo "stopall, should NOT crash LU-3649" - stopall > /dev/null + stopall || error "(14) Fail to stopall" } run_test 0 "Control LFSCK manually" @@ -162,11 +161,6 @@ test_1a() { skip "OI Scrub not implemented for ZFS" && return lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_FID_INDIR 0x1501 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501 @@ -174,12 +168,13 @@ test_1a() { do_facet $SINGLEMDS $LCTL set_param fail_loc=0 umount_client $MOUNT - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -202,11 +197,6 @@ test_1b() skip "OI Scrub not implemented for ZFS" && return lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_FID_INLMA 0x1502 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502 @@ -216,12 +206,13 @@ test_1b() umount_client $MOUNT #define OBD_FAIL_FID_NOLMA 0x1506 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506 - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -241,11 +232,6 @@ run_test 1b "LFSCK can find out and repair missed FID-in-LMA" test_2a() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 @@ -253,12 +239,13 @@ test_2a() { do_facet $SINGLEMDS $LCTL set_param fail_loc=0 umount_client $MOUNT - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -280,11 +267,6 @@ run_test 2a "LFSCK can find out and repair crashed linkEA entry" test_2b() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 @@ -292,12 +274,13 @@ test_2b() do_facet $SINGLEMDS $LCTL set_param fail_loc=0 umount_client $MOUNT - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase2/ { print $2 }') @@ -319,11 +302,6 @@ run_test 2b "LFSCK can find out and remove invalid linkEA entry" test_2c() { lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_LFSCK_LINKEA_MORE2 0x1605 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1605 @@ -331,12 +309,13 @@ test_2c() do_facet $SINGLEMDS $LCTL set_param fail_loc=0 umount_client $MOUNT - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase2/ { print $2 }') @@ -361,35 +340,35 @@ test_4() skip "OI Scrub not implemented for ZFS" && return lfsck_prep 3 3 + cleanup_mount $MOUNT || error "(0.1) Fail to stop client!" + stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!" + mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(3) Expect 'init', but got '$STATUS'" - #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 - $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 + $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^flags/ { print \\\$2 }'" "inconsistent" 6 || { + $SHOW_NAMESPACE + error "(5) unexpected status" + } - sleep 5 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || - error "(5) Expect 'scanning-phase1', but got '$STATUS'" - - local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') - [ "$FLAGS" == "inconsistent" ] || - error "(6) Expect 'inconsistent', but got '$FLAGS'" + error "(6) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(7) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(7) unexpected status" + } FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'" @@ -404,7 +383,6 @@ test_4() #define OBD_FAIL_FID_LOOKUP 0x1505 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505 ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent." - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 } run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore" @@ -415,35 +393,35 @@ test_5() skip "OI Scrub not implemented for ZFS" && return lfsck_prep 1 1 1 + cleanup_mount $MOUNT || error "(0.1) Fail to stop client!" + stop $SINGLEMDS > /dev/null || error "(0.2) Fail to stop MDS!" + mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start MDS!" - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(3) Expect 'init', but got '$STATUS'" - #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 - $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 + $START_NAMESPACE -r || error "(4) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^flags/ { print \\\$2 }'" "inconsistent,upgrade" 6 || { + $SHOW_NAMESPACE + error "(5) unexpected status" + } - sleep 5 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || - error "(5) Expect 'scanning-phase1', but got '$STATUS'" - - local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') - [ "$FLAGS" == "inconsistent,upgrade" ] || - error "(6) Expect 'inconsistent,upgrade', but got '$FLAGS'" + error "(6) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(7) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(7) unexpected status" + } FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'" @@ -467,18 +445,14 @@ test_5() [ "$dummyname" == "$DIR/$tdir/dummy" ] || error "(13) Fail to generate linkEA: $dummyfid $dummyname" } -run_test 5 "LFSCK can handle IFIG object upgrading" +run_test 5 "LFSCK can handle IGIF object upgrading" test_6a() { - lfsck_prep 10 10 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + lfsck_prep 5 5 #define OBD_FAIL_LFSCK_DELAY1 0x1600 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 - $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600 + $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!" local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || @@ -489,103 +463,111 @@ test_6a() { # Fail the LFSCK to guarantee there is at least one checkpoint #define OBD_FAIL_LFSCK_FATAL1 0x1608 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001608 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "failed" ] || - error "(4) Expect 'failed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "failed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } - local POSITION0=$($SHOW_NAMESPACE | - awk '/^last_checkpoint_position/ { print $2 }' | - tr -d ',') + local POS0=$($SHOW_NAMESPACE | + awk '/^last_checkpoint_position/ { print $2 }' | + tr -d ',') #define OBD_FAIL_LFSCK_DELAY1 0x1600 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1600 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!" STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || error "(6) Expect 'scanning-phase1', but got '$STATUS'" - local POSITION1=$($SHOW_NAMESPACE | - awk '/^latest_start_position/ { print $2 }' | - tr -d ',') - [ $POSITION0 -lt $POSITION1 ] || - error "(7) Expect larger than: $POSITION0, but got $POSITION1" + local POS1=$($SHOW_NAMESPACE | + awk '/^latest_start_position/ { print $2 }' | + tr -d ',') + [ $POS0 -lt $POS1 ] || + error "(7) Expect larger than: $POS0, but got $POS1" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(8) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(8) unexpected status" + } } run_test 6a "LFSCK resumes from last checkpoint (1)" test_6b() { - lfsck_prep 10 10 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + lfsck_prep 5 5 #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 - $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 + $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!" local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || error "(3) Expect 'scanning-phase1', but got '$STATUS'" - # Sleep 3 sec to guarantee at least one object processed by LFSCK - sleep 3 + # Sleep 5 sec to guarantee that we are in the directory scanning + sleep 5 # Fail the LFSCK to guarantee there is at least one checkpoint #define OBD_FAIL_LFSCK_FATAL2 0x1609 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "failed" ] || - error "(4) Expect 'failed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "failed" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } + + local O_POS0=$($SHOW_NAMESPACE | + awk '/^last_checkpoint_position/ { print $2 }' | + tr -d ',') - local POSITION0=$($SHOW_NAMESPACE | - awk '/^last_checkpoint_position/ { print $4 }') + local D_POS0=$($SHOW_NAMESPACE | + awk '/^last_checkpoint_position/ { print $4 }') #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 $START_NAMESPACE || error "(5) Fail to start LFSCK for namespace!" STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || error "(6) Expect 'scanning-phase1', but got '$STATUS'" - local POSITION1=$($SHOW_NAMESPACE | - awk '/^latest_start_position/ { print $4 }') - if [ $POSITION0 -gt $POSITION1 ]; then - [ $POSITION1 -eq 0 -a $POSITION0 -eq $((POSITION1 + 1)) ] || - error "(7) Expect larger than: $POSITION0, but got $POSITION1" + local O_POS1=$($SHOW_NAMESPACE | + awk '/^latest_start_position/ { print $2 }' | + tr -d ',') + local D_POS1=$($SHOW_NAMESPACE | + awk '/^latest_start_position/ { print $4 }') + + if [ "$D_POS0" == "N/A" -o "$D_POS1" == "N/A" ]; then + [ $O_POS0 -lt $O_POS1 ] || + error "(7.1) $O_POS1 is not larger than $O_POS0" + else + [ $D_POS0 -lt $D_POS1 ] || + error "(7.2) $D_POS1 is not larger than $D_POS0" fi - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(8) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(8) unexpected status" + } } run_test 6b "LFSCK resumes from last checkpoint (2)" test_7a() { - lfsck_prep 10 10 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" + lfsck_prep 5 5 + umount_client $MOUNT #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 - $START_NAMESPACE || error "(2) Fail to start LFSCK for namespace!" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1601 + $START_NAMESPACE -r || error "(2) Fail to start LFSCK for namespace!" local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "scanning-phase1" ] || @@ -604,23 +586,19 @@ test_7a() [ "$STATUS" == "scanning-phase1" ] || error "(6) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(7) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(7) unexpected status" + } } run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)" test_7b() { lfsck_prep 2 2 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 @@ -629,14 +607,14 @@ test_7b() done #define OBD_FAIL_LFSCK_DELAY3 0x1602 - do_facet $SINGLEMDS $LCTL set_param fail_val=1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602 - $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" - - sleep 3 - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "scanning-phase2" ] || - error "(4) Expect 'scanning-phase2', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_val=1 fail_loc=0x1602 + $START_NAMESPACE -r || error "(3) Fail to start LFSCK for namespace!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || { + $SHOW_NAMESPACE + error "(4) unexpected status" + } echo "stop $SINGLEMDS" stop $SINGLEMDS > /dev/null || error "(5) Fail to stop MDS!" @@ -649,28 +627,29 @@ test_7b() [ "$STATUS" == "scanning-phase2" ] || error "(7) Expect 'scanning-phase2', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(8) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(8) unexpected status" + } } run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)" test_8() { + echo "formatall" + formatall > /dev/null + echo "setupall" + setupall > /dev/null + lfsck_prep 20 20 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') [ "$STATUS" == "init" ] || error "(2) Expect 'init', but got '$STATUS'" - mount_client $MOUNT || error "(3) Fail to start client!" - #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 mkdir $DIR/$tdir/crashed @@ -681,9 +660,10 @@ test_8() touch $DIR/$tdir/dummy${i} done + umount_client $MOUNT || error "(3) Fail to stop client!" + #define OBD_FAIL_LFSCK_DELAY2 0x1601 - do_facet $SINGLEMDS $LCTL set_param fail_val=2 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 + do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1601 $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!" STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') @@ -704,10 +684,12 @@ test_8() #define OBD_FAIL_LFSCK_FATAL2 0x1609 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x80001609 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "failed" ] || - error "(10) Expect 'failed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "failed" 6 || { + $SHOW_NAMESPACE + error "(10) unexpected status" + } #define OBD_FAIL_LFSCK_DELAY1 0x1600 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1600 @@ -758,29 +740,30 @@ test_8() error "(20) Expect 'paused', but got '$STATUS'" #define OBD_FAIL_LFSCK_DELAY3 0x1602 - do_facet $SINGLEMDS $LCTL set_param fail_val=2 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602 + do_facet $SINGLEMDS $LCTL set_param fail_val=2 fail_loc=0x1602 $START_NAMESPACE || error "(21) Fail to start LFSCK for namespace!" - sleep 2 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "scanning-phase2" ] || - error "(22) Expect 'scanning-phase2', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || { + $SHOW_NAMESPACE + error "(22) unexpected status" + } local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') [ "$FLAGS" == "scanned-once,inconsistent" ] || error "(23) Expect 'scanned-once,inconsistent',but got '$FLAGS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 2 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(24) Expect 'completed', but got '$STATUS'" + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(24) unexpected status" + } FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(25) Expect empty flags, but got '$FLAGS'" - } run_test 8 "LFSCK state machine" @@ -791,17 +774,10 @@ test_9a() { fi lfsck_prep 70 70 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(2) Expect 'init', but got '$STATUS'" local BASE_SPEED1=100 local RUN_TIME1=10 - $START_NAMESPACE -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!" + $START_NAMESPACE -r -s $BASE_SPEED1 || error "(3) Fail to start LFSCK!" sleep $RUN_TIME1 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') @@ -859,37 +835,28 @@ test_9b() { fi lfsck_prep 0 0 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - mount_client $MOUNT || error "(2) Fail to start client!" - - echo "Another preparing... 50 * 50 files (with error) will be created." + echo "Preparing another 50 * 50 files (with error) at $(date)." #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + createmany -d $DIR/$tdir/d 50 + createmany -m $DIR/$tdir/f 50 for ((i = 0; i < 50; i++)); do - mkdir -p $DIR/$tdir/d${i} - touch $DIR/$tdir/f${i} - for ((j = 0; j < 50; j++)); do - touch $DIR/$tdir/d${i}/f${j} - done + createmany -m $DIR/$tdir/d${i}/f 50 > /dev/null done - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(3) Expect 'init', but got '$STATUS'" - #define OBD_FAIL_LFSCK_NO_DOUBLESCAN 0x160c do_facet $SINGLEMDS $LCTL set_param fail_loc=0x160c - $START_NAMESPACE || error "(4) Fail to start LFSCK!" - - sleep 10 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "stopped" ] || - error "(5) Expect 'stopped', but got '$STATUS'" + $START_NAMESPACE -r || error "(4) Fail to start LFSCK!" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "stopped" 10 || { + $SHOW_NAMESPACE + error "(5) unexpected status" + } do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + echo "Prepared at $(date)." local BASE_SPEED1=50 local RUN_TIME1=10 @@ -935,10 +902,12 @@ test_9b() { do_facet $SINGLEMDS \ $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0 - sleep 5 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(11) Expect 'completed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(11) unexpected status" + } } run_test 9b "LFSCK speed control (2)" @@ -948,43 +917,35 @@ test_10() skip "lookup(..)/linkea on ZFS issue" && return lfsck_prep 1 1 - echo "start $SINGLEMDS" - start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || - error "(1) Fail to start MDS!" - - mount_client $MOUNT || error "(2) Fail to start client!" + echo "Preparing more files with error at $(date)." #define OBD_FAIL_LFSCK_LINKEA_CRASH 0x1603 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1603 + for ((i = 0; i < 1000; i = $((i+2)))); do mkdir -p $DIR/$tdir/d${i} touch $DIR/$tdir/f${i} - for ((j = 0; j < 5; j++)); do - touch $DIR/$tdir/d${i}/f${j} - done + createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null done #define OBD_FAIL_LFSCK_LINKEA_MORE 0x1604 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1604 + for ((i = 1; i < 1000; i = $((i+2)))); do mkdir -p $DIR/$tdir/d${i} touch $DIR/$tdir/f${i} - for ((j = 0; j < 5; j++)); do - touch $DIR/$tdir/d${i}/f${j} - done + createmany -m $DIR/$tdir/d${i}/f 5 > /dev/null done do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + echo "Prepared at $(date)." + ln $DIR/$tdir/f200 $DIR/$tdir/d200/dummy umount_client $MOUNT mount_client $MOUNT || error "(3) Fail to start client!" - local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(4) Expect 'init', but got '$STATUS'" - - $START_NAMESPACE -s 100 || error "(5) Fail to start LFSCK!" + $START_NAMESPACE -r -s 100 || error "(5) Fail to start LFSCK!" sleep 10 STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') @@ -1014,11 +975,12 @@ test_10() do_facet $SINGLEMDS \ $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0 - umount_client $MOUNT - sleep 10 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(16) Expect 'completed', but got '$STATUS'" + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_NAMESPACE + error "(16) unexpected status" + } } run_test 10 "System is available during LFSCK scanning" @@ -1039,52 +1001,38 @@ ost_remove_lastid() { } test_11a() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir - createmany -o $DIR/$tdir/f 64 + createmany -o $DIR/$tdir/f 64 || error "(0) Fail to create 64 files." echo "stopall" stopall > /dev/null ost_remove_lastid 1 0 || error "(1) Fail to remove LAST_ID" - echo "start ost1" start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null || error "(2) Fail to start ost1" - local STATUS=$($SHOW_LAYOUT_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(3) Expect 'init', but got '$STATUS'" - #define OBD_FAIL_LFSCK_DELAY4 0x160e - do_facet ost1 $LCTL set_param fail_val=3 - do_facet ost1 $LCTL set_param fail_loc=0x160e + do_facet ost1 $LCTL set_param fail_val=3 fail_loc=0x160e echo "trigger LFSCK for layout on ost1 to rebuild the LAST_ID(s)" - $START_LAYOUT_ON_OST || error "(4) Fail to start LFSCK on OST!" + $START_LAYOUT_ON_OST -r || error "(4) Fail to start LFSCK on OST!" wait_update_facet ost1 "$LCTL get_param -n \ obdfilter.${OST_DEV}.lfsck_layout | awk '/^flags/ { print \\\$2 }'" "crashed_lastid" 60 || { $SHOW_LAYOUT_ON_OST - return 5 + error "(5) unexpected status" } - do_facet ost1 $LCTL set_param fail_val=0 - do_facet ost1 $LCTL set_param fail_loc=0 + do_facet ost1 $LCTL set_param fail_val=0 fail_loc=0 wait_update_facet ost1 "$LCTL get_param -n \ obdfilter.${OST_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 3 || { + awk '/^status/ { print \\\$2 }'" "completed" 6 || { $SHOW_LAYOUT_ON_OST - return 6 + error "(6) unexpected status" } echo "the LAST_ID(s) should have been rebuilt" @@ -1094,14 +1042,7 @@ test_11a() { run_test 11a "LFSCK can rebuild lost last_id" test_11b() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir echo "set fail_loc=0x160d to skip the updating LAST_ID on-disk" @@ -1113,20 +1054,14 @@ test_11b() { awk -F: '{ print $2 }') umount_client $MOUNT - echo "stop ost1" stop ost1 || error "(1) Fail to stop ost1" #define OBD_FAIL_OST_ENOSPC 0x215 do_facet ost1 $LCTL set_param fail_loc=0x215 - echo "start ost1" start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || error "(2) Fail to start ost1" - local STATUS=$($SHOW_LAYOUT_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(3) Expect 'init', but got '$STATUS'" - for ((i = 0; i < 60; i++)); do lastid2=$(do_facet ost1 "lctl get_param -n \ obdfilter.${ost1_svc}.last_id" | grep 0x100000000 | @@ -1140,19 +1075,17 @@ test_11b() { error "(4) expect lastid1 [ $lastid1 ] > lastid2 [ $lastid2 ]" echo "trigger LFSCK for layout on ost1 to rebuild the on-disk LAST_ID" - $START_LAYOUT_ON_OST || error "(5) Fail to start LFSCK on OST!" + $START_LAYOUT_ON_OST -r || error "(5) Fail to start LFSCK on OST!" wait_update_facet ost1 "$LCTL get_param -n \ obdfilter.${OST_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 3 || { + awk '/^status/ { print \\\$2 }'" "completed" 6 || { $SHOW_LAYOUT_ON_OST - return 6 + error "(6) unexpected status" } - echo "stop ost1" stop ost1 || error "(7) Fail to stop ost1" - echo "start ost1" start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || error "(8) Fail to start ost1" @@ -1165,6 +1098,7 @@ test_11b() { } do_facet ost1 $LCTL set_param fail_loc=0 + stopall || error "(10) Fail to stopall" } run_test 11b "LFSCK can rebuild crashed last_id" @@ -1172,30 +1106,16 @@ test_12() { [ $MDSCOUNT -lt 2 ] && skip "We need at least 2 MDSes for test_12" && exit 0 - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir - - echo "All the LFSCK targets should be in 'init' status." + check_mount_and_prep for k in $(seq $MDSCOUNT); do - local STATUS=$(do_facet mds${k} $LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_layout | - awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) MDS${k} Expect 'init', but got '$STATUS'" - $LFS mkdir -i $((k - 1)) $DIR/$tdir/${k} - createmany -o $DIR/$tdir/${k}/f 100 + createmany -o $DIR/$tdir/${k}/f 100 || + error "(0) Fail to create 100 files." done echo "Start namespace LFSCK on all targets by single command (-s 1)." do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \ - -s 1 || error "(2) Fail to start LFSCK on all devices!" + -s 1 -r || error "(2) Fail to start LFSCK on all devices!" echo "All the LFSCK targets should be in 'scanning-phase1' status." for k in $(seq $MDSCOUNT); do @@ -1233,7 +1153,7 @@ test_12() { echo "Start layout LFSCK on all targets by single command (-s 1)." do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \ - -s 1 || error "(8) Fail to start LFSCK on all devices!" + -s 1 -r || error "(8) Fail to start LFSCK on all devices!" echo "All the LFSCK targets should be in 'scanning-phase1' status." for k in $(seq $MDSCOUNT); do @@ -1289,14 +1209,7 @@ test_13() { echo "MDT-object FID." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep echo "Inject failure stub to simulate bad lmm_oi" #define OBD_FAIL_LFSCK_BAD_LMMOI 0x160f @@ -1304,17 +1217,15 @@ test_13() { createmany -o $DIR/$tdir/f 32 do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - echo "stopall to cleanup object cache" - stopall > /dev/null - echo "setupall" - setupall > /dev/null - echo "Trigger layout LFSCK to find out the bad lmm_oi and fix them" - $START_LAYOUT || error "(1) Fail to start LFSCK for layout!" + $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!" wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2 + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_LAYOUT + error "(2) unexpected status" + } local repaired=$($SHOW_LAYOUT | awk '/^repaired_others/ { print $2 }') @@ -1329,40 +1240,38 @@ test_14() { echo "otherwise, the LFSCK should re-create the missed OST-object." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS setstripe -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + echo "Inject failure stub to simulate dangling referenced MDT-object" #define OBD_FAIL_LFSCK_DANGLING 0x1610 do_facet ost1 $LCTL set_param fail_loc=0x1610 - createmany -o $DIR/$tdir/f 64 + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall to cleanup object cache" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + # exhaust other pre-created dangling cases + count=$(precreated_ost_obj_count 0 0) + createmany -o $DIR/$tdir/a $count || + error "(0) Fail to create $count files." echo "'ls' should fail because of dangling referenced MDT-object" ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail." echo "Trigger layout LFSCK to find out dangling reference" - $START_LAYOUT || error "(2) Fail to start LFSCK for layout!" + $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!" wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 6 || return 3 + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_LAYOUT + error "(3) unexpected status" + } local repaired=$($SHOW_LAYOUT | awk '/^repaired_dangling/ { print $2 }') - [ $repaired -eq 32 ] || + [ $repaired -ge 32 ] || error "(4) Fail to repair dangling reference: $repaired" echo "'ls' should fail because it will not repair dangling by default" @@ -1373,15 +1282,18 @@ test_14() { wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 6 || return 3 + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_LAYOUT + error "(7) unexpected status" + } - local repaired=$($SHOW_LAYOUT | + repaired=$($SHOW_LAYOUT | awk '/^repaired_dangling/ { print $2 }') - [ $repaired -eq 32 ] || - error "(7) Fail to repair dangling reference: $repaired" + [ $repaired -ge 32 ] || + error "(8) Fail to repair dangling reference: $repaired" echo "'ls' should success after layout LFSCK repairing" - ls -ail $DIR/$tdir > /dev/null || error "(8) ls should success." + ls -ail $DIR/$tdir > /dev/null || error "(9) ls should success." } run_test 14 "LFSCK can repair MDT-object with dangling reference" @@ -1392,14 +1304,7 @@ test_15a() { echo "the OST-object to back point to the right MDT-object." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS setstripe -c 1 -i 0 $DIR/$tdir echo "Inject failure stub to make the OST-object to back point to" @@ -1409,21 +1314,17 @@ test_15a() { do_facet ost1 $LCTL set_param fail_loc=0x1611 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1 cancel_lru_locks osc - sync - sleep 2 do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall to cleanup object cache" - stopall > /dev/null - echo "setupall" - setupall > /dev/null - echo "Trigger layout LFSCK to find out unmatched pairs and fix them" - $START_LAYOUT || error "(1) Fail to start LFSCK for layout!" + $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!" wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2 + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_LAYOUT + error "(2) unexpected status" + } local repaired=$($SHOW_LAYOUT | awk '/^repaired_unmatched_pair/ { print $2 }') @@ -1440,16 +1341,9 @@ test_15b() { echo "MDT-object (the first one)." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS setstripe -c 1 -i 0 $DIR/$tdir - dd if=/dev/zero of=$DIR/$tdir/guard bs=1K count=1 + dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1 cancel_lru_locks osc echo "Inject failure stub to make the OST-object to back point to" @@ -1459,21 +1353,17 @@ test_15b() { do_facet ost1 $LCTL set_param fail_loc=0x1612 dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1 cancel_lru_locks osc - sync - sleep 2 do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall to cleanup object cache" - stopall > /dev/null - echo "setupall" - setupall > /dev/null - echo "Trigger layout LFSCK to find out unmatched pairs and fix them" - $START_LAYOUT || error "(1) Fail to start LFSCK for layout!" + $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!" wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2 + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_LAYOUT + error "(2) unexpected status" + } local repaired=$($SHOW_LAYOUT | awk '/^repaired_unmatched_pair/ { print $2 }') @@ -1489,19 +1379,10 @@ test_16() { echo "MDT-object and update the OST-object's owner information." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS setstripe -c 1 -i 0 $DIR/$tdir dd if=/dev/zero of=$DIR/$tdir/f0 bs=1M count=1 cancel_lru_locks osc - sync - sleep 2 echo "Inject failure stub to skip OST-object owner changing" #define OBD_FAIL_LFSCK_BAD_OWNER 0x1613 @@ -1512,11 +1393,14 @@ test_16() { echo "Trigger layout LFSCK to find out inconsistent OST-object owner" echo "and fix them" - $START_LAYOUT || error "(1) Fail to start LFSCK for layout!" + $START_LAYOUT -r || error "(1) Fail to start LFSCK for layout!" wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 3 || return 2 + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_LAYOUT + error "(2) unexpected status" + } local repaired=$($SHOW_LAYOUT | awk '/^repaired_inconsistent_owner/ { print $2 }') @@ -1533,37 +1417,24 @@ test_17() { echo "MDT-objects." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS setstripe -c 1 -i 0 $DIR/$tdir echo "Inject failure stub to make two MDT-objects to refernce" echo "the OST-object" - do_facet $SINGLEMDS $LCTL set_param fail_val=0 #define OBD_FAIL_LFSCK_MULTIPLE_REF 0x1614 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1614 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0x1614 dd if=/dev/zero of=$DIR/$tdir/guard bs=1M count=1 cancel_lru_locks osc - sync - sleep 2 - createmany -o $DIR/$tdir/f 1 > /dev/null 2>&1 + createmany -o $DIR/$tdir/f 1 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 - echo "stopall to cleanup object cache" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + cancel_lru_locks mdc + cancel_lru_locks osc echo "$DIR/$tdir/f0 and $DIR/$tdir/guard use the same OST-objects" local size=$(ls -l $DIR/$tdir/f0 | awk '{ print $5 }') @@ -1573,11 +1444,14 @@ test_17() { echo "Trigger layout LFSCK to find out multiple refenced MDT-objects" echo "and fix them" - $START_LAYOUT || error "(2) Fail to start LFSCK for layout!" + $START_LAYOUT -r || error "(2) Fail to start LFSCK for layout!" wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_layout | - awk '/^status/ { print \\\$2 }'" "completed" 3 || return 3 + awk '/^status/ { print \\\$2 }'" "completed" 6 || { + $SHOW_LAYOUT + error "(3) unexpected status" + } local repaired=$($SHOW_LAYOUT | awk '/^repaired_multiple_referenced/ { print $2 }') @@ -1594,69 +1468,66 @@ test_17() { run_test 17 "LFSCK can repair multiple references" test_18a() { - [ $MDSCOUNT -lt 2 ] && - skip "We need at least 2 MDSes for test_18a" && exit 0 - - [ $OSTCOUNT -lt 2 ] && - skip "We need at least 2 OSTs for test_18a" && exit 0 - echo "#####" echo "The target MDT-object is there, but related stripe information" echo "is lost or partly lost. The LFSCK should regenerate the missed" echo "layout EA entries." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS mkdir -i 0 $DIR/$tdir/a1 - $LFS mkdir -i 1 $DIR/$tdir/a2 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1 - $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2 - dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }') $LFS path2fid $DIR/$tdir/a1/f1 $LFS getstripe $DIR/$tdir/a1/f1 - $LFS path2fid $DIR/$tdir/a2/f2 - $LFS getstripe $DIR/$tdir/a2/f2 - sync + + if [ $MDSCOUNT -ge 2 ]; then + $LFS mkdir -i 1 $DIR/$tdir/a2 + $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2 + dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2 + $LFS path2fid $DIR/$tdir/a2/f2 + $LFS getstripe $DIR/$tdir/a2/f2 + fi + cancel_lru_locks osc echo "Inject failure, to make the MDT-object lost its layout EA" #define OBD_FAIL_LFSCK_LOST_STRIPE 0x1615 do_facet mds1 $LCTL set_param fail_loc=0x1615 chown 1.1 $DIR/$tdir/a1/f1 - do_facet mds2 $LCTL set_param fail_loc=0x1615 - chown 1.1 $DIR/$tdir/a2/f2 + + if [ $MDSCOUNT -ge 2 ]; then + do_facet mds2 $LCTL set_param fail_loc=0x1615 + chown 1.1 $DIR/$tdir/a2/f2 + fi + sync sleep 2 + do_facet mds1 $LCTL set_param fail_loc=0 - do_facet mds2 $LCTL set_param fail_loc=0 + if [ $MDSCOUNT -ge 2 ]; then + do_facet mds2 $LCTL set_param fail_loc=0 + fi - echo "stopall to cleanup object cache" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + cancel_lru_locks mdc + cancel_lru_locks osc echo "The file size should be incorrect since layout EA is lost" local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }') [ "$cur_size" != "$saved_size" ] || error "(1) Expect incorrect file1 size" - cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }') - [ "$cur_size" != "$saved_size" ] || - error "(2) Expect incorrect file2 size" + if [ $MDSCOUNT -ge 2 ]; then + cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }') + [ "$cur_size" != "$saved_size" ] || + error "(2) Expect incorrect file2 size" + fi echo "Trigger layout LFSCK on all devices to find out orphan OST-object" - $START_LAYOUT -o || error "(3) Fail to start LFSCK for layout!" + $START_LAYOUT -r -o || error "(3) Fail to start LFSCK for layout!" for k in $(seq $MDSCOUNT); do # The LFSCK status query internal is 30 seconds. For the case @@ -1676,85 +1547,91 @@ test_18a() { error "(5) OST${k} Expect 'completed', but got '$cur_status'" done - for k in 1 2; do - local repaired=$(do_facet mds${k} $LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_layout | + local repaired=$(do_facet mds1 $LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_layout | + awk '/^repaired_orphan/ { print $2 }') + [ $repaired -eq 1 ] || + error "(6.1) Expect 1 fixed on mds1, but got: $repaired" + + if [ $MDSCOUNT -ge 2 ]; then + repaired=$(do_facet mds2 $LCTL get_param -n \ + mdd.$(facet_svc mds2).lfsck_layout | awk '/^repaired_orphan/ { print $2 }') - [ $repaired -eq ${k} ] || - error "(6) Expect ${k} fixed on mds${k}, but got: $repaired" - done + [ $repaired -eq 2 ] || + error "(6.2) Expect 2 fixed on mds2, but got: $repaired" + fi $LFS path2fid $DIR/$tdir/a1/f1 $LFS getstripe $DIR/$tdir/a1/f1 - $LFS path2fid $DIR/$tdir/a2/f2 - $LFS getstripe $DIR/$tdir/a2/f2 + + if [ $MDSCOUNT -ge 2 ]; then + $LFS path2fid $DIR/$tdir/a2/f2 + $LFS getstripe $DIR/$tdir/a2/f2 + fi echo "The file size should be correct after layout LFSCK scanning" cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }') [ "$cur_size" == "$saved_size" ] || error "(7) Expect file1 size $saved_size, but got $cur_size" - cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }') - [ "$cur_size" == "$saved_size" ] || + if [ $MDSCOUNT -ge 2 ]; then + cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }') + [ "$cur_size" == "$saved_size" ] || error "(8) Expect file2 size $saved_size, but got $cur_size" + fi } run_test 18a "Find out orphan OST-object and repair it (1)" test_18b() { - [ $MDSCOUNT -lt 2 ] && - skip "We need at least 2 MDSes for test_18b" && exit 0 - - [ $OSTCOUNT -lt 2 ] && - skip "We need at least 2 OSTs for test_18b" && exit 0 - echo "#####" echo "The target MDT-object is lost. The LFSCK should re-create the" echo "MDT-object under .lustre/lost+found/MDTxxxx. The admin should" echo "can move it back to normal namespace manually." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS mkdir -i 0 $DIR/$tdir/a1 - $LFS mkdir -i 1 $DIR/$tdir/a2 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1 - $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2 - dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2 local saved_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }') local fid1=$($LFS path2fid $DIR/$tdir/a1/f1) echo ${fid1} $LFS getstripe $DIR/$tdir/a1/f1 - local fid2=$($LFS path2fid $DIR/$tdir/a2/f2) - echo ${fid2} - $LFS getstripe $DIR/$tdir/a2/f2 - sync + + if [ $MDSCOUNT -ge 2 ]; then + $LFS mkdir -i 1 $DIR/$tdir/a2 + $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2 + dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2 + fid2=$($LFS path2fid $DIR/$tdir/a2/f2) + echo ${fid2} + $LFS getstripe $DIR/$tdir/a2/f2 + fi + cancel_lru_locks osc echo "Inject failure, to simulate the case of missing the MDT-object" #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616 do_facet mds1 $LCTL set_param fail_loc=0x1616 rm -f $DIR/$tdir/a1/f1 - do_facet mds2 $LCTL set_param fail_loc=0x1616 - rm -f $DIR/$tdir/a2/f2 + + if [ $MDSCOUNT -ge 2 ]; then + do_facet mds2 $LCTL set_param fail_loc=0x1616 + rm -f $DIR/$tdir/a2/f2 + fi + sync sleep 2 + do_facet mds1 $LCTL set_param fail_loc=0 - do_facet mds2 $LCTL set_param fail_loc=0 + if [ $MDSCOUNT -ge 2 ]; then + do_facet mds2 $LCTL set_param fail_loc=0 + fi - echo "stopall to cleanup object cache" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + cancel_lru_locks mdc + cancel_lru_locks osc echo "Trigger layout LFSCK on all devices to find out orphan OST-object" - $START_LAYOUT -o || error "(1) Fail to start LFSCK for layout!" + $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!" for k in $(seq $MDSCOUNT); do # The LFSCK status query internal is 30 seconds. For the case @@ -1774,93 +1651,101 @@ test_18b() { error "(3) OST${k} Expect 'completed', but got '$cur_status'" done - for k in 1 2; do - local repaired=$(do_facet mds${k} $LCTL get_param -n \ - mdd.$(facet_svc mds${k}).lfsck_layout | - awk '/^repaired_orphan/ { print $2 }') - [ $repaired -eq ${k} ] || - error "(4) Expect ${k} fixed on mds${k}, but got: $repaired" - done + local repaired=$(do_facet mds1 $LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_layout | + awk '/^repaired_orphan/ { print $2 }') + [ $repaired -eq 1 ] || + error "(4.1) Expect 1 fixed on mds1, but got: $repaired" + + if [ $MDSCOUNT -ge 2 ]; then + repaired=$(do_facet mds2 $LCTL get_param -n \ + mdd.$(facet_svc mds2).lfsck_layout | + awk '/^repaired_orphan/ { print $2 }') + [ $repaired -eq 2 ] || + error "(4.2) Expect 2 fixed on mds2, but got: $repaired" + fi echo "Move the files from ./lustre/lost+found/MDTxxxx to namespace" mv $MOUNT/.lustre/lost+found/MDT0000/R-${fid1} $DIR/$tdir/a1/f1 || error "(5) Fail to move $MOUNT/.lustre/lost+found/MDT0000/R-${fid1}" - mv $MOUNT/.lustre/lost+found/MDT0001/R-${fid2} $DIR/$tdir/a2/f2 || - error "(6) Fail to move $MOUNT/.lustre/lost+found/MDT0001/R-${fid2}" + if [ $MDSCOUNT -ge 2 ]; then + local name=$MOUNT/.lustre/lost+found/MDT0001/R-${fid2} + mv $name $DIR/$tdir/a2/f2 || error "(6) Fail to move $name" + fi $LFS path2fid $DIR/$tdir/a1/f1 $LFS getstripe $DIR/$tdir/a1/f1 - $LFS path2fid $DIR/$tdir/a2/f2 - $LFS getstripe $DIR/$tdir/a2/f2 + + if [ $MDSCOUNT -ge 2 ]; then + $LFS path2fid $DIR/$tdir/a2/f2 + $LFS getstripe $DIR/$tdir/a2/f2 + fi echo "The file size should be correct after layout LFSCK scanning" local cur_size=$(ls -il $DIR/$tdir/a1/f1 | awk '{ print $6 }') [ "$cur_size" == "$saved_size" ] || error "(7) Expect file1 size $saved_size, but got $cur_size" - cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }') - [ "$cur_size" == "$saved_size" ] || + if [ $MDSCOUNT -ge 2 ]; then + cur_size=$(ls -il $DIR/$tdir/a2/f2 | awk '{ print $6 }') + [ "$cur_size" == "$saved_size" ] || error "(8) Expect file2 size $saved_size, but got $cur_size" + fi } run_test 18b "Find out orphan OST-object and repair it (2)" test_18c() { - [ $MDSCOUNT -lt 2 ] && - skip "We need at least 2 MDSes for test_18c" && exit 0 - - [ $OSTCOUNT -lt 2 ] && - skip "We need at least 2 OSTs for test_18c" && exit 0 - echo "#####" echo "The target MDT-object is lost, and the OST-object FID is missing." echo "The LFSCK should re-create the MDT-object with new FID under the " echo "directory .lustre/lost+found/MDTxxxx." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS mkdir -i 0 $DIR/$tdir/a1 - $LFS mkdir -i 1 $DIR/$tdir/a2 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1 - $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2 echo "Inject failure, to simulate the case of missing parent FID" #define OBD_FAIL_LFSCK_NOPFID 0x1617 do_facet ost1 $LCTL set_param fail_loc=0x1617 - do_facet ost2 $LCTL set_param fail_loc=0x1617 dd if=/dev/zero of=$DIR/$tdir/a1/f1 bs=1M count=2 - dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2 $LFS getstripe $DIR/$tdir/a1/f1 - $LFS getstripe $DIR/$tdir/a2/f2 - sync + + if [ $MDSCOUNT -ge 2 ]; then + $LFS mkdir -i 1 $DIR/$tdir/a2 + $LFS setstripe -c 2 -i 1 -s 1M $DIR/$tdir/a2 + do_facet ost2 $LCTL set_param fail_loc=0x1617 + dd if=/dev/zero of=$DIR/$tdir/a2/f2 bs=1M count=2 + $LFS getstripe $DIR/$tdir/a2/f2 + fi + cancel_lru_locks osc echo "Inject failure, to simulate the case of missing the MDT-object" #define OBD_FAIL_LFSCK_LOST_MDTOBJ 0x1616 do_facet mds1 $LCTL set_param fail_loc=0x1616 rm -f $DIR/$tdir/a1/f1 - do_facet mds2 $LCTL set_param fail_loc=0x1616 - rm -f $DIR/$tdir/a2/f2 + + if [ $MDSCOUNT -ge 2 ]; then + do_facet mds2 $LCTL set_param fail_loc=0x1616 + rm -f $DIR/$tdir/a2/f2 + fi + sync sleep 2 + do_facet mds1 $LCTL set_param fail_loc=0 - do_facet mds2 $LCTL set_param fail_loc=0 + if [ $MDSCOUNT -ge 2 ]; then + do_facet mds2 $LCTL set_param fail_loc=0 + fi - echo "stopall to cleanup object cache" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + cancel_lru_locks mdc + cancel_lru_locks osc echo "Trigger layout LFSCK on all devices to find out orphan OST-object" - $START_LAYOUT -o || error "(1) Fail to start LFSCK for layout!" + $START_LAYOUT -r -o || error "(1) Fail to start LFSCK for layout!" for k in $(seq $MDSCOUNT); do # The LFSCK status query internal is 30 seconds. For the case @@ -1880,17 +1765,25 @@ test_18c() { error "(3) OST${k} Expect 'completed', but got '$cur_status'" done + if [ $MDSCOUNT -ge 2 ]; then + expected=3 + else + expected=1 + fi + local repaired=$(do_facet mds1 $LCTL get_param -n \ mdd.$(facet_svc mds1).lfsck_layout | awk '/^repaired_orphan/ { print $2 }') - [ $repaired -eq 3 ] || - error "(4) Expect 3 fixed on mds1, but got: $repaired" - - repaired=$(do_facet mds2 $LCTL get_param -n \ - mdd.$(facet_svc mds2).lfsck_layout | - awk '/^repaired_orphan/ { print $2 }') - [ $repaired -eq 0 ] || - error "(5) Expect 0 fixed on mds2, but got: $repaired" + [ $repaired -eq $expected ] || + error "(4) Expect $expected fixed on mds1, but got: $repaired" + + if [ $MDSCOUNT -ge 2 ]; then + repaired=$(do_facet mds2 $LCTL get_param -n \ + mdd.$(facet_svc mds2).lfsck_layout | + awk '/^repaired_orphan/ { print $2 }') + [ $repaired -eq 0 ] || + error "(5) Expect 0 fixed on mds2, but got: $repaired" + fi echo "There should be some stub under .lustre/lost+found/MDT0001/" ls -ail $MOUNT/.lustre/lost+found/MDT0001/N-* && @@ -1911,14 +1804,8 @@ test_18d() { echo "OST-object." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir/a1 + check_mount_and_prep + mkdir $DIR/$tdir/a1 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1 echo "guard" > $DIR/$tdir/a1/f1 echo "foo" > $DIR/$tdir/a1/f2 @@ -1927,7 +1814,6 @@ test_18d() { $LFS getstripe $DIR/$tdir/a1/f1 $LFS path2fid $DIR/$tdir/a1/f2 $LFS getstripe $DIR/$tdir/a1/f2 - sync cancel_lru_locks osc echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2" @@ -1954,8 +1840,23 @@ test_18d() { [ "$cur_size" != "$saved_size" ] || error "(1) Expect incorrect file2 size" + #define OBD_FAIL_LFSCK_DELAY3 0x1602 + do_facet $SINGLEMDS $LCTL set_param fail_val=5 fail_loc=0x1602 + echo "Trigger layout LFSCK on all devices to find out orphan OST-object" - $START_LAYOUT -o -c || error "(2) Fail to start LFSCK for layout!" + $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!" + + wait_update_facet mds1 "$LCTL get_param -n \ + mdd.$(facet_svc mds1).lfsck_layout | + awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 || + error "(3.0) MDS1 is not the expected 'scanning-phase2'" + + # LU-3469: before osp_sync() is enabled, wait for a while to guarantee + # that former async repair operations have been executed on the OST(s). + sync + sleep 2 + + do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0 for k in $(seq $MDSCOUNT); do # The LFSCK status query internal is 30 seconds. For the case @@ -1986,10 +1887,6 @@ test_18d() { [ "$cur_size" == "$saved_size" ] || error "(6) Expect file2 size $saved_size, but got $cur_size" - echo "There should be some stub under .lustre/lost+found/MDT0000/" - ls -ail $MOUNT/.lustre/lost+found/MDT0000/ && - error "(7) .lustre/lost+found/MDT0000/ should be empty" - echo "The LFSCK should find back the original data." cat $DIR/$tdir/a1/f2 $LFS path2fid $DIR/$tdir/a1/f2 @@ -2006,14 +1903,8 @@ test_18e() { echo "old orphan OST-object." echo "#####" - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir/a1 + check_mount_and_prep + mkdir $DIR/$tdir/a1 $LFS setstripe -c 1 -i 0 -s 1M $DIR/$tdir/a1 echo "guard" > $DIR/$tdir/a1/f1 echo "foo" > $DIR/$tdir/a1/f2 @@ -2022,7 +1913,6 @@ test_18e() { $LFS getstripe $DIR/$tdir/a1/f1 $LFS path2fid $DIR/$tdir/a1/f2 $LFS getstripe $DIR/$tdir/a1/f2 - sync cancel_lru_locks osc echo "Inject failure to make $DIR/$tdir/a1/f1 and $DIR/$tdir/a1/f2" @@ -2050,11 +1940,10 @@ test_18e() { error "(1) Expect incorrect file2 size" #define OBD_FAIL_LFSCK_DELAY3 0x1602 - do_facet $SINGLEMDS $LCTL set_param fail_val=10 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602 + do_facet $SINGLEMDS $LCTL set_param fail_val=10 fail_loc=0x1602 echo "Trigger layout LFSCK on all devices to find out orphan OST-object" - $START_LAYOUT -o -c || error "(2) Fail to start LFSCK for layout!" + $START_LAYOUT -r -o -c || error "(2) Fail to start LFSCK for layout!" wait_update_facet mds1 "$LCTL get_param -n \ mdd.$(facet_svc mds1).lfsck_layout | @@ -2068,8 +1957,7 @@ test_18e() { echo "Write new data to f2 to modify the new created OST-object." echo "dummy" >> $DIR/$tdir/a1/f2 - do_facet $SINGLEMDS $LCTL set_param fail_val=0 - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0 for k in $(seq $MDSCOUNT); do # The LFSCK status query internal is 30 seconds. For the case @@ -2117,22 +2005,12 @@ test_18e() { run_test 18e "Find out orphan OST-object and repair it (5)" test_19a() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS setstripe -c 1 -i 0 $DIR/$tdir echo "foo" > $DIR/$tdir/a0 echo "guard" > $DIR/$tdir/a1 - cancel_lru_locks osc - umount_client $MOUNT || error "(1) Fail to stop client!" - mount_client $MOUNT || error "(2) Fail to start client!" echo "Inject failure, then client will offer wrong parent FID when read" do_facet ost1 $LCTL set_param -n \ @@ -2147,14 +2025,7 @@ test_19a() { run_test 19a "OST-object inconsistency self detect" test_19b() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $LFS setstripe -c 1 -i 0 $DIR/$tdir echo "Inject failure stub to make the OST-object to back point to" @@ -2164,8 +2035,6 @@ test_19b() { do_facet ost1 $LCTL set_param fail_loc=0x1611 echo "foo" > $DIR/$tdir/f0 cancel_lru_locks osc - sync - sleep 2 do_facet ost1 $LCTL set_param fail_loc=0 echo "Nothing should be fixed since self detect and repair is disabled" @@ -2195,6 +2064,7 @@ $LCTL set_param debug=-lfsck > /dev/null || true # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} +OSTCOUNT=${SAVED_OSTCOUNT} # cleanup the system at last formatall diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index cde6ca4..62f3779 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -21,13 +21,19 @@ require_dsh_mds || exit 0 SAVED_MDSSIZE=${MDSSIZE} SAVED_OSTSIZE=${OSTSIZE} +SAVED_OSTCOUNT=${OSTCOUNT} # use small MDS + OST size to speed formatting time # do not use too small MDSSIZE/OSTSIZE, which affect the default journal size MDSSIZE=100000 OSTSIZE=100000 +# no need too much OSTs, to reduce the format/start/stop overhead +[ $OSTCOUNT -gt 4 ] && OSTCOUNT=4 MOUNT_2="" -check_and_setup_lustre + +# build up a clean test environment. +formatall +setupall [ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && @@ -99,16 +105,13 @@ scrub_prep() { local nfiles=$1 local n - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null + check_mount_and_prep - echo "preparing..." + echo "preparing... $(date)" for n in $(seq $MDSCOUNT); do echo "creating $nfiles files on mds$n" if [ $n -eq 1 ]; then - mkdir -p $DIR/$tdir/mds$n || + mkdir $DIR/$tdir/mds$n || error "Failed to create directory mds$n" else $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n || @@ -117,11 +120,11 @@ scrub_prep() { cp $LUSTRE/tests/*.sh $DIR/$tdir/mds$n || error "Failed to copy files to mds$n" if [[ $nfiles -gt 0 ]]; then - createmany -o $DIR/$tdir/mds$n/$tfile $nfiles || - error "createmany failed on mds$n" + createmany -m $DIR/$tdir/mds$n/$tfile $nfiles > \ + /dev/null || error "createmany failed on mds$n" fi done - echo "prepared." + echo "prepared $(date)." cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!" for n in $(seq $MDSCOUNT); do echo "stop mds$n" @@ -154,17 +157,13 @@ scrub_stop_mds() { scrub_check_status() { local error_id=$1 local expected=$2 - local actual local n for n in $(seq $MDSCOUNT); do - actual=$(do_facet mds$n $LCTL get_param -n \ + wait_update_facet mds$n "$LCTL get_param -n \ osd-ldiskfs.$(facet_svc mds$n).oi_scrub | - awk '/^status/ { print $2 }') - if [ "$actual" != "$expected" ]; then - error "($error_id) Expected '$expected' on mds$n, but" \ - "got '$actual'" - fi + awk '/^status/ { print \\\$2 }'" "$expected" 6 || + error "($error_id) Expected '$expected' on mds$n" done } @@ -284,21 +283,15 @@ test_1a() { start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(1) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(2) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ -z "$FLAGS" ] || error "(3) Expect empty flags, but got '$FLAGS'" mount_client $MOUNT || error "(4) Fail to start client!" - #define OBD_FAIL_OSD_FID_MAPPING 0x193 do_facet $SINGLEMDS $LCTL set_param fail_loc=0x193 # update .lustre OI mapping touch $MOUNT/.lustre do_facet $SINGLEMDS $LCTL set_param fail_loc=0 - umount_client $MOUNT || error "(5) Fail to stop client!" echo "stop $SINGLEMDS" @@ -308,10 +301,6 @@ test_1a() { start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || error "(7) Fail to start MDS!" - local STATUS=$($SHOW_SCRUB | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(8) Expect 'init', but got '$STATUS'" - local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "inconsistent" ] || error "(9) Expect 'inconsistent', but got '$FLAGS'" @@ -323,7 +312,6 @@ test_1b() { scrub_remove_ois 1 echo "start MDTs without disabling OI scrub" scrub_start_mds 2 "$MOUNT_OPTS_SCRUB" - sleep 3 scrub_check_status 3 completed mount_client $MOUNT || error "(4) Fail to start client!" scrub_check_data 5 @@ -335,20 +323,15 @@ test_1c() { # OI files to be removed: # idx 0: oi.16.0 - # idx 1: oi.16.1 # idx 2: oi.16.{2,4,8,16,32} # idx 3: oi.16.{3,9,27} - # idx 5: oi.16.{5,25} - # idx 7: oi.16.{7,49} - for index in 0 1 2 3 5 7; do + for index in 0 2 3; do scrub_prep 0 scrub_remove_ois 1 $index - echo "start MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" scrub_check_flags 3 recreated scrub_start 4 - sleep 3 scrub_check_status 5 completed scrub_check_flags 6 "" done @@ -360,43 +343,45 @@ test_2() { scrub_backup_restore 1 echo "starting MDTs without disabling OI scrub" scrub_start_mds 2 "$MOUNT_OPTS_SCRUB" - sleep 3 scrub_check_status 3 completed mount_client $MOUNT || error "(4) Fail to start client!" scrub_check_data 5 } run_test 2 "Trigger OI scrub when MDT mounts for backup/restore case" +# test_3 is obsolete, it will be covered by test_5. test_3() { + formatall > /dev/null + setupall > /dev/null + scrub_prep 0 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - sleep 3 scrub_check_status 3 init scrub_check_flags 4 inconsistent - echo "stopall" - stopall > /dev/null } -run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" +#run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" test_4() { scrub_prep 0 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto scrub_check_data 6 - sleep 3 scrub_check_status 7 completed + scrub_check_flags 8 "" } run_test 4 "Trigger OI scrub automatically if inconsistent OI mapping was found" test_5() { - scrub_prep 1500 + formatall > /dev/null + setupall > /dev/null + + scrub_prep 1000 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" @@ -405,73 +390,58 @@ test_5() { mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_check_data 6 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 umount_client $MOUNT || error "(7) Fail to stop client!" - scrub_check_status 8 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 - do_facet mds$n $LCTL set_param fail_loc=0x191 - done + #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191 + sleep 4 scrub_stop_mds 9 - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 echo "starting MDTs with OI scrub disabled" scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 11 crashed - scrub_stop_mds 12 - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + echo "starting MDTs without disabling OI scrub" scrub_start_mds 13 "$MOUNT_OPTS_SCRUB" - scrub_check_status 14 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 - do_facet mds$n $LCTL set_param fail_loc=0x192 - done - sleep 4 - scrub_check_status 15 failed + #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192 + scrub_check_status 15 failed mount_client $MOUNT || error "(16) Fail to start client!" + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + + local n for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - stat $DIR/$tdir/mds$n/${tfile}1000 || - error "(17) Failed to stat mds$n/${tfile}1000" + stat $DIR/$tdir/mds$n/${tfile}800 || + error "(17) Failed to stat mds$n/${tfile}800" done scrub_check_status 18 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 19 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 19 completed scrub_check_flags 20 "" } run_test 5 "OI scrub state machine" @@ -481,45 +451,41 @@ test_6() { scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 # Sleep 5 sec to guarantee at least one object processed by OI scrub sleep 5 # Fail the OI scrub to guarantee there is at least one checkpoint - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 - do_facet mds$n $LCTL set_param fail_loc=0x192 - done - sleep 4 + #define OBD_FAIL_OSD_SCRUB_FATAL 0x192 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x192 + scrub_check_status 7 failed + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + + local n for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 # stat will re-trigger OI scrub stat $DIR/$tdir/mds$n/${tfile}800 || error "(8) Failed to stat mds$n/${tfile}800" done umount_client $MOUNT || error "(9) Fail to stop client!" - scrub_check_status 10 scanning - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 - do_facet mds$n $LCTL set_param fail_loc=0x191 - done + #define OBD_FAIL_OSD_SCRUB_CRASH 0x191 + do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x191 + sleep 4 local -a position0 for n in $(seq $MDSCOUNT); do @@ -530,11 +496,10 @@ test_6() { scrub_stop_mds 11 - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + echo "starting MDTs without disabling OI scrub" scrub_start_mds 12 "$MOUNT_OPTS_SCRUB" @@ -550,13 +515,10 @@ test_6() { fi done - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 15 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 15 completed scrub_check_flags 16 "" } run_test 6 "OI scrub resumes from last checkpoint" @@ -564,39 +526,31 @@ run_test 6 "OI scrub resumes from last checkpoint" test_7() { scrub_prep 500 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 3 init scrub_check_flags 4 inconsistent - mount_client $MOUNT || error "(5) Fail to start client!" - scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 + scrub_check_data 6 + local n for n in $(seq $MDSCOUNT); do stat $DIR/$tdir/mds$n/${tfile}300 || error "(7) Failed to stat mds$n/${tfile}300!" done scrub_check_status 8 scanning - scrub_check_flags 9 inconsistent,auto - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 10 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 10 completed scrub_check_flags "" } run_test 7 "System is available during OI scrub scanning" @@ -604,39 +558,25 @@ run_test 7 "System is available during OI scrub scanning" test_8() { scrub_prep 128 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=1 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_start 5 + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=1 fail_loc=0x190 + scrub_start 5 scrub_check_status 6 scanning - scrub_stop 7 - scrub_check_status 8 stopped - scrub_start 9 - scrub_check_status 10 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 11 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 11 completed scrub_check_flags 12 "" } run_test 8 "Control OI scrub manually" @@ -647,14 +587,11 @@ test_9() { return 0 fi - scrub_prep 8000 + scrub_prep 6000 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent local BASE_SPEED1=100 @@ -664,9 +601,7 @@ test_9() { sleep $RUN_TIME1 scrub_check_status 6 completed - scrub_check_flags 7 "" - # OI scrub should run with limited speed under non-inconsistent case scrub_start 8 -s $BASE_SPEED1 -r @@ -721,7 +656,7 @@ test_9() { do_facet mds$n $LCTL set_param -n \ mdd.$(facet_svc mds$n).lfsck_speed_limit 0 done - sleep 6 + scrub_check_status 13 completed } run_test 9 "OI scrub speed control" @@ -729,50 +664,32 @@ run_test 9 "OI scrub speed control" test_10a() { scrub_prep 0 scrub_backup_restore 1 - echo "starting mds$n with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - mount_client $MOUNT || error "(5) Fail to start client!" - scrub_enable_auto - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=1 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done - scrub_check_data 6 - scrub_check_status 7 scanning + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=1 fail_loc=0x190 + scrub_check_data 6 + scrub_check_status 7 scanning umount_client $MOUNT || error "(8) Fail to stop client!" - scrub_stop_mds 9 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 10 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 11 paused - scrub_stop_mds 12 - echo "starting MDTs without disabling OI scrub" scrub_start_mds 13 "$MOUNT_OPTS_SCRUB" - scrub_check_status 14 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 15 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 15 completed scrub_check_flags 16 "" } run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" @@ -781,79 +698,54 @@ run_test 10a "non-stopped OI scrub should auto restarts after MDS remount (1)" test_10b() { scrub_prep 0 scrub_backup_restore 1 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" - - scrub_check_status 3 init - scrub_check_flags 4 inconsistent - local n - for n in $(seq $MDSCOUNT); do - #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 - do_facet mds$n $LCTL set_param fail_val=3 - do_facet mds$n $LCTL set_param fail_loc=0x190 - done + #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_val=3 fail_loc=0x190 scrub_start 5 - scrub_check_status 6 scanning - scrub_stop_mds 7 - echo "starting MDTs with OI scrub disabled" scrub_start_mds 8 "$MOUNT_OPTS_NOSCRUB" - scrub_check_status 9 paused - scrub_stop_mds 10 - echo "starting MDTs without disabling OI scrub" scrub_start_mds 11 "$MOUNT_OPTS_SCRUB" - scrub_check_status 12 scanning - for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL set_param fail_loc=0 - do_facet mds$n $LCTL set_param fail_val=0 - done - sleep 5 - scrub_check_status 13 completed + do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL set_param fail_loc=0 fail_val=0 + scrub_check_status 13 completed scrub_check_flags 14 "" } #run_test 10b "non-stopped OI scrub should auto restarts after MDS remount (2)" test_11() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - local CREATED=100 - local tname=`date +%s` - rm -rf $MOUNT/$tname > /dev/null - mkdir -p $MOUNT/$tname || error "(0) Failed to create $MOUNT/$tname" local n + + check_mount_and_prep + for n in $(seq $MDSCOUNT); do - $LFS mkdir -i $((n - 1)) $MOUNT/$tname/mds$n || - error "(1) Fail to mkdir $MOUNT/$tname/mds$n" + $LFS mkdir -i $((n - 1)) $DIR/$tdir/mds$n || + error "(1) Fail to mkdir $DIR/$tdir/mds$n" - createmany -o $MOUNT/$tname/mds$n/f $CREATED || - error "(2) Fail to create in $tname/mds$n" + createmany -o $DIR/$tdir/mds$n/f $CREATED || + error "(2) Fail to create under $tdir/mds$n" done - cleanup_mount $MOUNT - do_facet $SINGLEMDS $LCTL clear - start_full_debug_logging # reset OI scrub start point by force scrub_start 3 -r - sleep 3 scrub_check_status 4 completed + declare -a checked0 + declare -a checked1 + # OI scrub should skip the new created objects for the first accessing # notice we're creating a new llog for every OST on every startup # new features can make this even less stable, so we only check @@ -865,147 +757,128 @@ test_11() { [ $SKIPPED -ge $MAXIMUM -o $SKIPPED -lt $MINIMUM ] && error "(5) Expect [ $MINIMUM , $MAXIMUM ) objects" \ "skipped on mds$n, but got $SKIPPED" + + checked0[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }') done # reset OI scrub start point by force - scrub_start -r - sleep 3 + scrub_start 6 -r scrub_check_status 7 completed # OI scrub should skip the new created object only once for n in $(seq $MDSCOUNT); do SKIPPED=$(scrub_status $n | awk '/^noscrub/ { print $2 }') - [ $SKIPPED -eq 0 ] || + checked1[$n]=$(scrub_status $n | awk '/^checked/ { print $2 }') + + [ ${checked0[$n]} -ne ${checked1[$n]} -o $SKIPPED -eq 0 ] || error "(8) Expect 0 objects skipped on mds$n, but" \ "got $SKIPPED" done - - stop_full_debug_logging - restore_mount $MOUNT || error "(9) Fail to start client!" - rm -rf $MOUNT/$tname > /dev/null } run_test 11 "OI scrub skips the new created objects only once" test_12() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_INVALID_ENTRY 0x195 do_facet ost1 $LCTL set_param fail_loc=0x195 - createmany -o $DIR/$tdir/f 1000 + createmany -o $DIR/$tdir/f $((count + 32)) + + umount_client $MOUNT || error "(1) Fail to stop client!" - echo "stopall" - stopall > /dev/null + stop ost1 || error "(2) Fail to stop ost1" #define OBD_FAIL_OST_NODESTROY 0x233 do_facet ost1 $LCTL set_param fail_loc=0x233 - echo "setupall" - setupall > /dev/null + start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB || + error "(3) Fail to start ost1" - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" + mount_client $MOUNT || error "(4) Fail to start client!" - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail" - sleep 3 - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(3) Expect 'completed', but got '$STATUS'" + $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!" do_facet ost1 $LCTL set_param fail_loc=0 - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(4) ls should succeed" + wait_update_facet ost1 "$LCTL get_param -n \ + osd-ldiskfs.$(facet_svc ost1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 6 || + error "(7) Expected '$expected' on ost1" + + ls -ail $DIR/$tdir > /dev/null || { + $SHOW_SCRUB_ON_OST + error "(8) ls should succeed" + } } run_test 12 "OI scrub can rebuild invalid /O entries" test_13() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 - createmany -o $DIR/$tdir/f 1000 + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall" - stopall > /dev/null - echo "setupall" - setupall > /dev/null + umount_client $MOUNT || error "(1) Fail to stop client!" + + stop ost1 || error "(2) Fail to stop ost1" + + start ost1 $(ostdevname 1) $MOUNT_OPTS_NOSCRUB || + error "(3) Fail to start ost1" - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" + mount_client $MOUNT || error "(4) Fail to start client!" + + ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail" - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + $START_SCRUB_ON_OST -r || error "(6) Fail to start OI scrub on OST!" - $START_SCRUB_ON_OST || error "(3) Fail to start OI scrub on OST!" - sleep 3 - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(4) Expect 'completed', but got '$STATUS'" + wait_update_facet ost1 "$LCTL get_param -n \ + osd-ldiskfs.$(facet_svc ost1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 6 || + error "(7) Expected '$expected' on ost1" - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed" + ls -ail $DIR/$tdir > /dev/null || error "(8) ls should succeed" } run_test 13 "OI scrub can rebuild missed /O entries" test_14() { - echo "stopall" - stopall > /dev/null - echo "formatall" - formatall > /dev/null - echo "setupall" - setupall > /dev/null - - mkdir -p $DIR/$tdir + check_mount_and_prep $SETSTRIPE -c 1 -i 0 $DIR/$tdir + local count=$(precreated_ost_obj_count 0 0) + #define OBD_FAIL_OSD_COMPAT_NO_ENTRY 0x196 do_facet ost1 $LCTL set_param fail_loc=0x196 - createmany -o $DIR/$tdir/f 64 + createmany -o $DIR/$tdir/f $((count + 32)) do_facet ost1 $LCTL set_param fail_loc=0 - echo "stopall" - stopall > /dev/null - echo "setupall" - setupall > /dev/null - - local STATUS=$($SHOW_SCRUB_ON_OST | awk '/^status/ { print $2 }') - [ "$STATUS" == "init" ] || - error "(1) Expect 'init', but got '$STATUS'" - - ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(2) ls should fail" + umount_client $MOUNT || error "(1) Fail to stop client!" - echo "stopall" - stopall > /dev/null + stop ost1 || error "(2) Fail to stop ost1" echo "run e2fsck" run_e2fsck $(facet_host ost1) $(ostdevname 1) "-y" || error "(3) Fail to run e2fsck error" - echo "setupall" - setupall > /dev/null + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS || + error "(4) Fail to start ost1" + + mount_client $MOUNT || error "(5) Fail to start client!" local LF_REPAIRED=$($SHOW_SCRUB_ON_OST | awk '/^lf_reparied/ { print $2 }') [ $LF_REPAIRED -gt 0 ] || - error "(4) Some entry under /lost+found should be repaired" + error "(6) Some entry under /lost+found should be repaired" - ls -ail $DIR/$tdir > /dev/null 2>&1 || error "(5) ls should succeed" + ls -ail $DIR/$tdir > /dev/null || error "(7) ls should succeed" } run_test 14 "OI scrub can repair objects under lost+found" @@ -1021,7 +894,6 @@ test_15() { # run under dryrun mode scrub_start 5 --dryrun - sleep 3 scrub_check_status 6 completed scrub_check_flags 7 inconsistent scrub_check_params 8 dryrun @@ -1029,7 +901,6 @@ test_15() { # run under dryrun mode again scrub_start 10 --dryrun - sleep 3 scrub_check_status 11 completed scrub_check_flags 12 inconsistent scrub_check_params 13 dryrun @@ -1041,7 +912,6 @@ test_15() { # work under Lustre-2.y (y >=6), the test scripts should be fixed as # "-noff" or "--dryrun=off". scrub_start 15 --dryrun=off - sleep 3 scrub_check_status 16 completed scrub_check_flags 17 "" scrub_check_params 18 "" @@ -1049,7 +919,6 @@ test_15() { # run under normal mode again scrub_start 20 --dryrun=off - sleep 3 scrub_check_status 21 completed scrub_check_flags 22 "" scrub_check_params 23 "" @@ -1060,6 +929,7 @@ run_test 15 "Dryrun mode OI scrub" # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} OSTSIZE=${SAVED_OSTSIZE} +OSTCOUNT=${SAVED_OSTCOUNT} # cleanup the system at last formatall diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 5b9cf9b..d6d1ee1 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -6783,3 +6783,27 @@ free_fd() [ $fd -lt $max_fd ] || error "finding free file descriptor failed" echo $fd } + +check_mount_and_prep() +{ + is_mounted $MOUNT || setupall + + rm -rf $DIR/[df][0-9]* || error "Fail to cleanup the env!" + mkdir $DIR/$tdir || error "Fail to mkdir $DIR/$tdir." +} + +# calcule how many ost-objects to be created. +precreated_ost_obj_count() +{ + local mdt_idx=$1 + local ost_idx=$2 + local mdt_name="MDT$(printf '%04x' $mdt_idx)" + local ost_name="OST$(printf '%04x' $ost_idx)" + local proc_path="${FSNAME}-${ost_name}-osc-${mdt_name}" + local last_id=$(do_facet mds${mdt_idx} lctl get_param -n \ + osp.$proc_path.prealloc_last_id) + local next_id=$(do_facet mds${mdt_idx} lctl get_param -n \ + osp.$proc_path.prealloc_next_id) + + echo $((last_id - next_id + 1)) +} -- 1.8.3.1