From: Andreas Dilger Date: Fri, 4 Sep 2015 07:22:13 +0000 (-0600) Subject: LU-7301 tests: delete old lfsck tests X-Git-Tag: 2.7.65~51 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=5a3dfc2b5d90;p=fs%2Flustre-release.git LU-7301 tests: delete old lfsck tests The lfsck utility is no longer supported and the corresponding test scripts and functions can be removed. Instead of deleting the run_lfsck() routine and LFSCK_ALWAYS check to run after every test script, update it to run the new lfsck with all available checks on all targets. Signed-off-by: Andreas Dilger Change-Id: If7fb8f61e02e09ba346030a3d04d74b9ed3b0c4c Reviewed-on: http://review.whamcloud.com/16237 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Fan Yong Reviewed-by: Emoly Liu Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index e98bb84..eab54e7 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -14,7 +14,7 @@ noinst_DATA += disk2_4-zfs.tar.bz2 disk2_7-ldiskfs.tar.bz2 disk2_7-zfs.tar.bz2 noinst_SCRIPTS = leak_finder.pl llmount.sh llmountcleanup.sh functions.sh noinst_SCRIPTS += test-framework.sh runvmstat runiozone runtests sanity.sh noinst_SCRIPTS += rundbench acceptance-small.sh compile.sh conf-sanity.sh -noinst_SCRIPTS += insanity.sh lfsck.sh oos.sh oos2.sh dne_sanity.sh +noinst_SCRIPTS += insanity.sh oos.sh oos2.sh dne_sanity.sh noinst_SCRIPTS += recovery-small.sh replay-dual.sh sanity-quota.sh noinst_SCRIPTS += replay-ost-single.sh replay-single.sh run-llog.sh sanityn.sh noinst_SCRIPTS += large-scale.sh racer.sh replay-vbr.sh diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 0d131e1..08f468b 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -4924,8 +4924,8 @@ test_72() { #LU-2634 for num in $(seq $MDSCOUNT); do add mds${num} $(mkfs_opts mds$num $(mdsdevname $num)) \ - --reformat $(mdsdevname $num) $(mdsvdevname $num) || - error "add mds $num failed" + --reformat $(mdsdevname $num) $(mdsvdevname $num) || + error "add mds $num failed" do_facet mds${num} "$TUNE2FS -O extents $(mdsdevname $num)" || error "$TUNE2FS failed on mds${num}" done diff --git a/lustre/tests/lfsck.sh b/lustre/tests/lfsck.sh deleted file mode 100644 index b03834d..0000000 --- a/lustre/tests/lfsck.sh +++ /dev/null @@ -1,293 +0,0 @@ -#!/bin/bash -# test e2fsck and lfsck to detect and fix filesystem corruption -# -#set -vx -set -e - -[ "$1" = "-v" ] && shift && VERBOSE=echo || VERBOSE=: - -LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} -. $LUSTRE/tests/test-framework.sh -init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} - -NUMFILES=${NUMFILES:-10} -NUMDIRS=${NUMDIRS:-4} -OSTIDX=${OSTIDX:-0} # the OST index in LOV -OBJGRP=${OBJGRP:-0} # the OST object group - -[ ! -d "$SHARED_DIRECTORY" ] && - skip_env "SHARED_DIRECTORY should be accessible on all nodes" && - exit 0 -[[ $(facet_fstype $SINGLEMDS) != ldiskfs ]] && - skip "Only applicable to ldiskfs-based MDTs" && exit 0 -[[ $(facet_fstype OST) != ldiskfs ]] && - skip "Only applicable to ldiskfs-based OST" && exit 0 - -which getfattr &>/dev/null || { skip_env "could not find getfattr" && exit 0; } -which setfattr &>/dev/null || { skip_env "could not find setfattr" && exit 0; } - -MOUNT_2="" -check_and_setup_lustre - -assert_DIR - -SAMPLE_FILE=$TMP/$TESTSUITE.junk -dd if=/dev/urandom of=$SAMPLE_FILE bs=1M count=1 - -# Create some dirs and files on the filesystem. -create_files_sub() { - local test_dir=$1 - local num_dirs=$2 - local file_name=$3 - local first_num=$4 - local last_num=$5 - local d e f - - echo "creating files in $test_dir/d[$first_num..$last_num]:" - for d in $(seq -f $test_dir/d%g $first_num $last_num); do - mkdir -p $d || error "mkdir $d failed" - $VERBOSE "created $d $(lfs path2fid $d)" - for e in $(seq -f $d/d%g $num_dirs); do - mkdir -p $e || error "mkdir $$e failed" - $VERBOSE "created $e $(lfs path2fid $e)" - for f in $(seq -f $e/test%g $num_dirs); do - cp $file_name $f || - error "cp $file_name $f failed" - $VERBOSE "created $f $(lfs path2fid $f)" - done - done - done -} - -create_files() { - local test_dir=$1 - local num_dirs=$2 - local num_files=$3 - local f - - # create some files on the filesystem - local first_num=1 - local last_num=$num_dirs - create_files_sub $test_dir $num_dirs /etc/fstab $first_num $last_num - - # create files to be modified - echo "creating files $test_dir/testfile.[0..$((num_files * 3))]:" - for f in $(seq -f $test_dir/testfile.%g $((num_files * 3))); do - cp $SAMPLE_FILE $f || error "cp $SAMPLE_FILE $f failed" - $VERBOSE "created $f $(lfs path2fid $f)" - done - - # create some more files - first_num=$((num_dirs * 2 + 1)) - last_num=$((num_dirs * 2 + 3)) - create_files_sub $test_dir $num_dirs /etc/hosts $first_num $last_num - - # these should NOT be taken as duplicates - echo "linking files in $test_dir/d[$first_num..$last_num]:" - for f in $(seq -f $test_dir/d$last_num/linkfile.%g $num_files); do - cp /etc/hosts $f || error "cp /etc/hosts $f failed" - ln $f $f.link || error "ln $f $f.link failed" - $VERBOSE "linked $f to $f.link $(lfs path2fid $f)" - done -} - -# Get the objids for files on the OST (given the OST index and object group). -get_objects() { - local obdidx=$1 - shift - local seq=$1 - shift - local ost_files="$@" - local ost_objids - local objids - - for F in $ostfiles; do - objid=$($GETSTRIPE $F | - awk "{ if (\$1 == $obdidx && \$4 == $seq) print \$2 }") - $VERBOSE $GETSTRIPE -v $F | grep -v "lmm_seq|lmm_object_id" 1>&2 - ost_objids="$ost_objids $objid" - done - - echo $ost_objids -} - -# Get the OST target device (given the OST facet name and OST index). -get_ost_dev() { - local node=$1 - local obdidx=$2 - local ost_name - local ost_dev - - ost_name=$(ostname_from_index $obdidx) - ost_dev=$(get_osd_param $node $ost_name mntdev) - if [ $? -ne 0 ]; then - printf "unable to find OST%04x on $facet\n" $obdidx - return 1 - fi - - if [[ $ost_dev = *loop* ]]; then - ost_dev=$(do_node $node "losetup $ost_dev" | - sed -e "s/.*(//" -e "s/).*//") - fi - - echo $ost_dev -} - -# Get the file names to be duplicated or removed on the MDS. -get_files() { - local flavor=$1 - local test_dir=$2 - local num_files=$3 - local first last - local test_file - - case $flavor in - dup) - first=$((num_files + 1)) - last=$((num_files * 2)) - ;; - remove) - first=$((num_files * 2 + 1)) - last=$((num_files * 3)) - ;; - *) echo "get_files(): invalid flavor" && return 1 ;; - esac - - local files="" - local f - for f in $(seq -f testfile.%g $first $last); do - test_file=$test_dir/$f - $GETSTRIPE -v $test_file | - egrep -v "lmm_stripe|lmm_layout|lmm_magic" 1>&2 - files="$files $test_file" - done - files=$(echo $files | sed "s#$DIR/##g") - echo $files -} - -# Remove objects associated with files. -remove_objects() { - do_rpc_nodes $(facet_host $1) remove_ost_objects $@ -} - -# Remove files from MDS. -remove_files() { - do_rpc_nodes $(facet_host $1) remove_mdt_files $@ -} - -# Create EAs on files so objects are referenced from different files. -duplicate_files() { - do_rpc_nodes $(facet_host $1) duplicate_mdt_files $@ -} - -#********************************* Main Flow **********************************# - -init_logging - -# get the server target devices -get_svr_devs - -TESTDIR=$DIR/d0.$TESTSUITE -if is_empty_fs $MOUNT; then - # create test directory - mkdir -p $TESTDIR || error "mkdir $TESTDIR failed" - - # create some dirs and files on the filesystem - create_files $TESTDIR $NUMDIRS $NUMFILES - - # get objids for files in group $OBJGRP on the OST with index $OSTIDX - echo "objects to be removed, leaving dangling references:" - OST_REMOVE=$(get_objects $OSTIDX $OBJGRP \ - $(seq -f $TESTDIR/testfile.%g $NUMFILES)) - - # get the node name and target device for the OST with index $OSTIDX - OSTNODE=$(facet_active_host ost$((OSTIDX + 1))) - OSTDEV=$(get_ost_dev $OSTNODE $OSTIDX) || - error "get_ost_dev $OSTNODE $OSTIDX failed" - - # get the file names to be duplicated on the MDS - echo "files to be duplicated, leaving double-referenced objects:" - MDS_DUPE=$(get_files dup $TESTDIR $NUMFILES) || error "$MDS_DUPE" - # get the file names to be removed from the MDS - echo "files to be removed, leaving orphan objects:" - MDS_REMOVE=$(get_files remove $TESTDIR $NUMFILES) || error "$MDS_REMOVE" - - stopall -f || error "cleanupall failed" - - # remove objects associated with files in group $OBJGRP - # on the OST with index $OSTIDX - remove_objects ost$((OSTIDX + 1)) $OSTDEV $OBJGRP $OST_REMOVE || - error "removing objects failed" - - # remove files from MDS - remove_files $SINGLEMDS $MDTDEV $MDS_REMOVE || - error "removing files failed" - - # create EAs on files so objects are referenced from different files - duplicate_files $SINGLEMDS $MDTDEV $MDS_DUPE || - error "duplicating files failed" - FSCK_MAX_ERR=1 # file system errors corrected -else # is_empty_fs $MOUNT - FSCK_MAX_ERR=4 # file system errors left uncorrected - sync; sync; sleep 3 # make sure all data flush back -fi - -# Test 1a - check and repair the filesystem -# lfsck will return 1 if the filesystem had errors fixed -# run e2fsck to generate databases used for lfsck -generate_db - -# remount filesystem -ORIG_REFORMAT=$REFORMAT -REFORMAT="" -check_and_setup_lustre -REFORMAT=$ORIG_REFORMAT - -# run lfsck -rc=0 -run_lfsck || rc=$? -if [ $rc -eq 0 ]; then - echo "clean after the first check" -else - # remove the files in lost+found created by the first lfsck - # run, they could confuse the second run of lfsck. - rm -fr $DIR/lost+found/* - sync; sync; sleep 3 - - # run e2fsck again to generate databases used for lfsck - generate_db - - # run lfsck again - rc=0 - run_lfsck || rc=$? - if [ $rc -eq 0 ]; then - echo "clean after the second check" - else - # FIXME: If the first run of lfsck fixed some errors, - # the second run of lfsck will always return 1 (some - # errors fixed) but not 0 (fs clean), the reason of - # this unexpected behaviour is unkown yet. - # - # Actually, this issue exists from day one but was - # not detected before, because run_lfsck() always return - # 0 before. Let's supress this error and make the lfsck - # test pass for now, once we figure out the problem, - # following 'echo' should be replaced with 'error'. - # See LU-3180. - echo "lfsck test 2 - finished with rc=$rc" - fi -fi - -complete $SECONDS -# The test directory contains some files referencing to some object -# which could cause error when removing the directory. -RMCNT=0 -while [ -d $TESTDIR ]; do - RMCNT=$((RMCNT + 1)) - rm -fr $TESTDIR || echo "$RMCNT round: rm $TESTDIR failed" - [ $RMCNT -ge 10 ] && error "cleanup $TESTDIR failed $RMCNT times" - remount_client $MOUNT -done -check_and_cleanup_lustre -exit_status diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index a66f35f..8a689af 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -556,11 +556,7 @@ run_test 17l "Ensure lgetxattr's returned xattr size is consistent ========" test_17m() { local short_sym="0123456789" local WDIR=$DIR/${tdir}m - local mds_index - local devname - local cmd local i - local rc=0 remote_mds_nodsh && skip "remote MDS with nodsh" && return [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.2.0) ] && @@ -593,49 +589,42 @@ test_17m() { echo "recreate the 512 symlink files with a shorter string" for ((i = 0; i < 512; ++i)); do # rewrite the symlink file with a shorter string - ln -sf ${long_sym} $WDIR/long-$i - ln -sf ${short_sym} $WDIR/short-$i + ln -sf ${long_sym} $WDIR/long-$i || error "long_sym failed" + ln -sf ${short_sym} $WDIR/short-$i || error "short_sym failed" done - mds_index=$($LFS getstripe -M $WDIR) - mds_index=$((mds_index+1)) - devname=$(mdsdevname $mds_index) - cmd="$E2FSCK -fnvd $devname" + local mds_index=$(($($LFS getstripe -M $WDIR) + 1)) + local devname=$(mdsdevname $mds_index) - echo "stop and checking mds${mds_index}: $cmd" + echo "stop and checking mds${mds_index}:" # e2fsck should not return error stop mds${mds_index} - do_facet mds${mds_index} $cmd || rc=$? + run_e2fsck $(facet_active_host mds${mds_index}) $devname -n + rc=$? start mds${mds_index} $devname $MDS_MOUNT_OPTS || error "start failed" df $MOUNT > /dev/null 2>&1 - [ $rc -ne 0 ] && error "e2fsck should not report error upon "\ - "short/long symlink MDT: rc=$rc" - return $rc + [ $rc -eq 0 ] || + error "e2fsck detected error for short/long symlink: rc=$rc" } run_test 17m "run e2fsck against MDT which contains short/long symlink" check_fs_consistency_17n() { local mdt_index - local devname - local cmd local rc=0 # create/unlink in 17n only change 2 MDTs(MDT1/MDT2), # so it only check MDT1/MDT2 instead of all of MDTs. - for mdt_index in $(seq 1 2); do - devname=$(mdsdevname $mdt_index) - cmd="$E2FSCK -fnvd $devname" - - echo "stop and checking mds${mdt_index}: $cmd" + for mdt_index in 1 2; do + local devname=$(mdsdevname $mdt_index) # e2fsck should not return error stop mds${mdt_index} - do_facet mds${mdt_index} $cmd || rc=$? + run_e2fsck $(facet_active_host mds$mdt_index) $devname -n || + rc=$((rc + $?)) start mds${mdt_index} $devname $MDS_MOUNT_OPTS || - error "mount mds${mdt_index} failed" + error "mount mds$mdt_index failed" df $MOUNT > /dev/null 2>&1 - [ $rc -ne 0 ] && break done return $rc } diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 8902440..baaaae9 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -76,7 +76,6 @@ usage() { print_summary () { trap 0 [ -z "$DEFAULT_SUITES"] && return 0 - [ "$TESTSUITE" == "lfsck" ] && return 0 [ -n "$ONLY" ] && echo "WARNING: ONLY is set to $(echo $ONLY)" local details local form="%-13s %-17s %-9s %s %s\n" @@ -198,9 +197,8 @@ init_test_env() { fi fi - export LFSCK_BIN=${LFSCK_BIN:-lfsck} - export LFSCK_ALWAYS=${LFSCK_ALWAYS:-"no"} # check fs after each test suite - export FSCK_MAX_ERR=4 # File system errors left uncorrected + export LFSCK_ALWAYS=${LFSCK_ALWAYS:-"no"} # check fs after test suite + export FSCK_MAX_ERR=4 # File system errors left uncorrected export ZFS=${ZFS:-zfs} export ZPOOL=${ZPOOL:-zpool} @@ -2135,9 +2133,12 @@ wait_update () { } wait_update_facet() { + local verbose= + [ "$1" = "--verbose" ] && verbose="$1" && shift + local facet=$1 shift - wait_update $(facet_active_host $facet) "$@" + wait_update $verbose $(facet_active_host $facet) "$@" } sync_all_data() { @@ -4217,88 +4218,31 @@ check_shared_dir() { return 0 } -# Run e2fsck on MDT and OST(s) to generate databases used for lfsck. -generate_db() { - local i - local ostidx - local dev - local node - - [[ $(lustre_version_code $SINGLEMDS) -ne $(version_code 2.2.0) ]] || - { skip "Lustre 2.2.0 lacks the patch for LU-1255"; exit 0; } - - check_shared_dir $SHARED_DIRECTORY || - error "$SHARED_DIRECTORY isn't a shared directory" - - export MDSDB=$SHARED_DIRECTORY/mdsdb - export OSTDB=$SHARED_DIRECTORY/ostdb - - # DNE is not supported, so when running e2fsck on a DNE filesystem, - # we only pass master MDS parameters. - run_e2fsck $MDTNODE $MDTDEV "-n --mdsdb $MDSDB" - - i=0 - ostidx=0 - OSTDB_LIST="" - for node in $(osts_nodes); do - for dev in ${OSTDEVS[i]}; do - run_e2fsck $node $dev "-n --mdsdb $MDSDB --ostdb $OSTDB-$ostidx" - OSTDB_LIST="$OSTDB_LIST $OSTDB-$ostidx" - ostidx=$((ostidx + 1)) - done - i=$((i + 1)) - done -} - -# Run lfsck on server node if lfsck can't be found on client (LU-2571) -run_lfsck_remote() { - local cmd="$LFSCK_BIN -c -l --mdsdb $MDSDB --ostdb $OSTDB_LIST $MOUNT" - local client=$1 - local mounted=true - local rc=0 - - #Check if lustre is already mounted - do_rpc_nodes $client is_mounted $MOUNT || mounted=false - if ! $mounted; then - zconf_mount $client $MOUNT || - error "failed to mount Lustre on $client" - fi - #Run lfsck - echo $cmd - do_node $client $cmd || rc=$? - #Umount if necessary - if ! $mounted; then - zconf_umount $client $MOUNT || - error "failed to unmount Lustre on $client" - fi - - [ $rc -le $FSCK_MAX_ERR ] || - error "$cmd returned $rc, should be <= $FSCK_MAX_ERR" - echo "lfsck finished with rc=$rc" - - return $rc -} - run_lfsck() { - local facets="client $SINGLEMDS" - local found=false - local facet - local node - local rc=0 - - for facet in $facets; do - node=$(facet_active_host $facet) - if check_progs_installed $node $LFSCK_BIN; then - found=true - break - fi + do_nodes $(comma_list $(mdts_nodes) $(osts_nodes)) \ + $LCTL set_param printk=+lfsck + do_facet $SINGLEMDS "$LCTL lfsck_start -M $FSNAME-MDT0000 -r -A -t all" + + for k in $(seq $MDSCOUNT); do + # wait up to 10+1 minutes for LFSCK to complete + wait_update_facet --verbose mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_layout | + awk '/^status/ { print \\\$2 }'" "completed" 600 || + error "MDS${k} layout isn't the expected 'completed'" + wait_update_facet --verbose mds${k} "$LCTL get_param -n \ + mdd.$(facet_svc mds${k}).lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 60 || + error "MDS${k} namespace isn't the expected 'completed'" done - ! $found && error "None of \"$facets\" supports lfsck" - - run_lfsck_remote $node || rc=$? - - rm -rvf $MDSDB* $OSTDB* || true - return $rc + local rep_mdt=$(do_nodes $(comma_list $(mdts_nodes)) \ + $LCTL get_param -n mdd.$FSNAME-*.lfsck_* | + awk '/repaired/ { print $2 }' | calc_sum) + local rep_ost=$(do_nodes $(comma_list $(osts_nodes)) \ + $LCTL get_param -n obdfilter.$FSNAME-*.lfsck_* | + awk '/repaired/ { print $2 }' | calc_sum) + local repaired=$((rep_mdt + rep_ost)) + [ $repaired -eq 0 ] || + error "lfsck repaired $rep_mdt MDT and $rep_ost OST errors" } dump_file_contents() { @@ -4356,11 +4300,10 @@ log_zfs_info() { } check_and_cleanup_lustre() { - if [ "$LFSCK_ALWAYS" = "yes" -a "$TESTSUITE" != "lfsck" ]; then - get_svr_devs - generate_db - run_lfsck - fi + if [ "$LFSCK_ALWAYS" = "yes" -a "$TESTSUITE" != "sanity-lfsck" -a \ + "$TESTSUITE" != "sanity-scrub" ]; then + run_lfsck + fi if is_mounted $MOUNT; then [ -n "$DIR" ] && rm -rf $DIR/[Rdfs][0-9]* ||