From: Andreas Dilger Date: Wed, 1 May 2013 08:26:26 +0000 (-0600) Subject: LU-3180 tests: lfsck.sh improved messages X-Git-Tag: 2.4.51~54 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=3d1305b528f370b893a77078489a04806f78b555 LU-3180 tests: lfsck.sh improved messages Use skip_env() for the SHARED_DIRECTORY check, so that lfsck is not skipped for the normal test environment (VMs sharing a single host) or single-node tests. Add some more information about lfsck.sh corruption, for improved debugging in case of problems. Test-Parameters: testlist=lfsck Signed-off-by: Andreas Dilger Change-Id: I56d7a52b0f880587d988c842fb6ee0b14b699ca2 Reviewed-on: http://review.whamcloud.com/6206 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Jian Yu Reviewed-by: Emoly Liu Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh index 766875f..438c99f 100644 --- a/lustre/tests/cfg/local.sh +++ b/lustre/tests/cfg/local.sh @@ -124,4 +124,4 @@ MACHINEFILE_OPTION=${MACHINEFILE_OPTION:-"-machinefile"} # This is used by a small number of tests to share state between the client # running the tests, or in some cases between the servers (e.g. lfsck.sh). # It needs to be a non-lustre filesystem that is available on all the nodes. -SHARED_DIRECTORY=${SHARED_DIRECTORY:-""} # bug 17839 comment 65 +SHARED_DIRECTORY=${SHARED_DIRECTORY:-$TMP} # bug 17839 comment 65 diff --git a/lustre/tests/lfsck.sh b/lustre/tests/lfsck.sh index 88aa5bd..f726f11 100644 --- a/lustre/tests/lfsck.sh +++ b/lustre/tests/lfsck.sh @@ -1,12 +1,11 @@ #!/bin/bash -# -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*- -# vim:shiftwidth=4:softtabstop=4:tabstop=4: -# # test e2fsck and lfsck to detect and fix filesystem corruption # #set -vx set -e +[ "$1" = "-v" ] && shift && VERBOSE=echo || VERBOSE=: + LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} . $LUSTRE/tests/test-framework.sh init_test_env $@ @@ -17,9 +16,9 @@ NUMDIRS=${NUMDIRS:-4} OSTIDX=${OSTIDX:-0} # the OST index in LOV OBJGRP=${OBJGRP:-0} # the OST object group -[ -d "$SHARED_DIRECTORY" ] || \ - { skip "SHARED_DIRECTORY should be specified with a shared directory \ -which can be accessable on all of the nodes" && exit 0; } +[ ! -d "$SHARED_DIRECTORY" ] && + skip_env "SHARED_DIRECTORY should be accessible on all nodes" && + exit 0 [[ $(facet_fstype $SINGLEMDS) != ldiskfs ]] && skip "Only applicable to ldiskfs-based MDTs" && exit 0 [[ $(facet_fstype OST) != ldiskfs ]] && @@ -38,84 +37,79 @@ dd if=/dev/urandom of=$SAMPLE_FILE bs=1M count=1 # Create some dirs and files on the filesystem. create_files_sub() { - local test_dir=$1 - local num_dirs=$2 - local file_name=$3 - local first_num=$4 - local last_num=$5 - local d e f - - for d in $(seq -f d%g $first_num $last_num); do - echo "creating files in $test_dir/$d" - for e in $(seq -f d%g $num_dirs); do - mkdir -p $test_dir/$d/$e || error "mkdir $test_dir/$d/$e failed" - for f in $(seq -f test%g $num_dirs); do - cp $file_name $test_dir/$d/$e/$f || \ - error "cp $file_name $test_dir/$d/$e/$f failed" - done - done - done + local test_dir=$1 + local num_dirs=$2 + local file_name=$3 + local first_num=$4 + local last_num=$5 + local d e f + + echo "creating files in $test_dir/d[$first_num..$last_num]:" + for d in $(seq -f $test_dir/d%g $first_num $last_num); do + mkdir -p $d || error "mkdir $d failed" + $VERBOSE "created $d $(lfs path2fid $d)" + for e in $(seq -f $d/d%g $num_dirs); do + mkdir -p $e || error "mkdir $$e failed" + $VERBOSE "created $e $(lfs path2fid $e)" + for f in $(seq -f $e/test%g $num_dirs); do + cp $file_name $f || + error "cp $file_name $f failed" + $VERBOSE "created $f $(lfs path2fid $f)" + done + done + done } create_files() { - local test_dir=$1 - local num_dirs=$2 - local num_files=$3 - local f - - # create some files on the filesystem - local first_num=1 - local last_num=$num_dirs - create_files_sub $test_dir $num_dirs /etc/fstab $first_num $last_num - - # create files to be modified - for f in $(seq -f $test_dir/testfile.%g $((num_files * 3))); do - echo "creating $f" - cp $SAMPLE_FILE $f || error "cp $SAMPLE_FILE $f failed" - done - - # create some more files - first_num=$((num_dirs * 2 + 1)) - last_num=$((num_dirs * 2 + 3)) - create_files_sub $test_dir $num_dirs /etc/hosts $first_num $last_num - - # these should NOT be taken as duplicates - for f in $(seq -f $test_dir/d$last_num/linkfile.%g $num_files); do - echo "linking files in $test_dir/d$last_num" - cp /etc/hosts $f || error "cp /etc/hosts $f failed" - ln $f $f.link || error "ln $f $f.link failed" - done + local test_dir=$1 + local num_dirs=$2 + local num_files=$3 + local f + + # create some files on the filesystem + local first_num=1 + local last_num=$num_dirs + create_files_sub $test_dir $num_dirs /etc/fstab $first_num $last_num + + # create files to be modified + echo "creating files $test_dir/testfile.[0..$((num_files * 3))]:" + for f in $(seq -f $test_dir/testfile.%g $((num_files * 3))); do + cp $SAMPLE_FILE $f || error "cp $SAMPLE_FILE $f failed" + $VERBOSE "created $f $(lfs path2fid $f)" + done + + # create some more files + first_num=$((num_dirs * 2 + 1)) + last_num=$((num_dirs * 2 + 3)) + create_files_sub $test_dir $num_dirs /etc/hosts $first_num $last_num + + # these should NOT be taken as duplicates + echo "linking files in $test_dir/d[$first_num..$last_num]:" + for f in $(seq -f $test_dir/d$last_num/linkfile.%g $num_files); do + cp /etc/hosts $f || error "cp /etc/hosts $f failed" + ln $f $f.link || error "ln $f $f.link failed" + $VERBOSE "linked $f to $f.link $(lfs path2fid $f)" + done } # Get the objids for files on the OST (given the OST index and object group). get_objects() { - local obdidx=$1 - shift - local group=$1 - shift - local ost_files="$@" - local ost_objids - ost_objids=$($LFS getstripe $ost_files | \ - awk '{if ($1 == '$obdidx' && $4 == '$group') print $2 }') - echo $ost_objids -} - -# Get the OST nodet name (given the OST index). -get_ost_node() { - local obdidx=$1 - local ost_uuid - local ost_node - local node - - ost_uuid=$(ostuuid_from_index $obdidx) - - for node in $(osts_nodes); do - do_node $node "lctl get_param -n obdfilter.*.uuid" | grep -q $ost_uuid - [ ${PIPESTATUS[1]} -eq 0 ] && ost_node=$node && break - done - [ -z "$ost_node" ] && \ - echo "failed to find the OST with index $obdidx" && return 1 - echo $ost_node + local obdidx=$1 + shift + local seq=$1 + shift + local ost_files="$@" + local ost_objids + local objids + + for F in $ostfiles; do + objid=$($GETSTRIPE $F | + awk "{ if (\$1 == $obdidx && \$4 == $seq) print \$2 }") + $VERBOSE $GETSTRIPE -v $F | grep -v "lmm_seq|lmm_object_id" 1>&2 + ost_objids="$ost_objids $objid" + done + + echo $ost_objids } # Get the OST target device (given the OST facet name and OST index). @@ -133,8 +127,8 @@ get_ost_dev() { fi if [[ $ost_dev = *loop* ]]; then - ost_dev=$(do_node $node "losetup $ost_dev" | \ - sed -e "s/.*(//" -e "s/).*//") + ost_dev=$(do_node $node "losetup $ost_dev" | + sed -e "s/.*(//" -e "s/).*//") fi echo $ost_dev @@ -142,47 +136,49 @@ get_ost_dev() { # Get the file names to be duplicated or removed on the MDS. get_files() { - local flavor=$1 - local test_dir=$2 - local num_files=$3 - local first last - local test_file - - case $flavor in - dup) - first=$((num_files + 1)) - last=$((num_files * 2)) - ;; - remove) - first=$((num_files * 2 + 1)) - last=$((num_files * 3)) - ;; - *) echo "get_files(): invalid flavor" && return 1 ;; - esac - - local files="" - local f - for f in $(seq -f testfile.%g $first $last); do - test_file=$test_dir/$f - files="$files $test_file" - done - files=$(echo $files | sed "s#$DIR/##g") - echo $files + local flavor=$1 + local test_dir=$2 + local num_files=$3 + local first last + local test_file + + case $flavor in + dup) + first=$((num_files + 1)) + last=$((num_files * 2)) + ;; + remove) + first=$((num_files * 2 + 1)) + last=$((num_files * 3)) + ;; + *) echo "get_files(): invalid flavor" && return 1 ;; + esac + + local files="" + local f + for f in $(seq -f testfile.%g $first $last); do + test_file=$test_dir/$f + $GETSTRIPE -v $test_file | + egrep -v "lmm_stripe|lmm_layout|lmm_magic" 1>&2 + files="$files $test_file" + done + files=$(echo $files | sed "s#$DIR/##g") + echo $files } # Remove objects associated with files. remove_objects() { - do_rpc_nodes $1 remove_ost_objects $@ + do_rpc_nodes $(facet_host $1) remove_ost_objects $@ } # Remove files from MDS. remove_files() { - do_rpc_nodes $(facet_host $1) remove_mdt_files $@ + do_rpc_nodes $(facet_host $1) remove_mdt_files $@ } # Create EAs on files so objects are referenced from different files. duplicate_files() { - do_rpc_nodes $(facet_host $1) duplicate_mdt_files $@ + do_rpc_nodes $(facet_host $1) duplicate_mdt_files $@ } #********************************* Main Flow **********************************# @@ -194,42 +190,46 @@ get_svr_devs TESTDIR=$DIR/d0.$TESTSUITE if is_empty_fs $MOUNT; then - # create test directory - mkdir -p $TESTDIR || error "mkdir $TESTDIR failed" - - # create some dirs and files on the filesystem - create_files $TESTDIR $NUMDIRS $NUMFILES - - # get the objids for files in group $OBJGRP on the OST with index $OSTIDX - OST_REMOVE=$(get_objects $OSTIDX $OBJGRP \ - $(seq -f $TESTDIR/testfile.%g $NUMFILES)) - - # get the node name and target device for the OST with index $OSTIDX - OSTNODE=$(get_ost_node $OSTIDX) || error "get_ost_node by index $OSTIDX failed" - OSTDEV=$(get_ost_dev $OSTNODE $OSTIDX) || - error "get_ost_dev $OSTNODE $OSTIDX failed" - - # get the file names to be duplicated on the MDS - MDS_DUPE=$(get_files dup $TESTDIR $NUMFILES) || error "$MDS_DUPE" - # get the file names to be removed from the MDS - MDS_REMOVE=$(get_files remove $TESTDIR $NUMFILES) || error "$MDS_REMOVE" - - stopall -f || error "cleanupall failed" - - # remove objects associated with files in group $OBJGRP - # on the OST with index $OSTIDX - remove_objects $OSTNODE $OSTDEV $OBJGRP $OST_REMOVE || - error "removing objects failed" - - # remove files from MDS - remove_files $SINGLEMDS $MDTDEV $MDS_REMOVE || error "removing files failed" - - # create EAs on files so objects are referenced from different files - duplicate_files $SINGLEMDS $MDTDEV $MDS_DUPE || - error "duplicating files failed" - FSCK_MAX_ERR=1 # file system errors corrected + # create test directory + mkdir -p $TESTDIR || error "mkdir $TESTDIR failed" + + # create some dirs and files on the filesystem + create_files $TESTDIR $NUMDIRS $NUMFILES + + # get objids for files in group $OBJGRP on the OST with index $OSTIDX + echo "objects to be removed, leaving dangling references:" + OST_REMOVE=$(get_objects $OSTIDX $OBJGRP \ + $(seq -f $TESTDIR/testfile.%g $NUMFILES)) + + # get the node name and target device for the OST with index $OSTIDX + OSTNODE=$(facet_active_host ost$((OSTIDX + 1))) + OSTDEV=$(get_ost_dev $OSTNODE $OSTIDX) || + error "get_ost_dev $OSTNODE $OSTIDX failed" + + # get the file names to be duplicated on the MDS + echo "files to be duplicated, leaving double-referenced objects:" + MDS_DUPE=$(get_files dup $TESTDIR $NUMFILES) || error "$MDS_DUPE" + # get the file names to be removed from the MDS + echo "files to be removed, leaving orphan objects:" + MDS_REMOVE=$(get_files remove $TESTDIR $NUMFILES) || error "$MDS_REMOVE" + + stopall -f || error "cleanupall failed" + + # remove objects associated with files in group $OBJGRP + # on the OST with index $OSTIDX + remove_objects ost$((OSTIDX + 1)) $OSTDEV $OBJGRP $OST_REMOVE || + error "removing objects failed" + + # remove files from MDS + remove_files $SINGLEMDS $MDTDEV $MDS_REMOVE || + error "removing files failed" + + # create EAs on files so objects are referenced from different files + duplicate_files $SINGLEMDS $MDTDEV $MDS_DUPE || + error "duplicating files failed" + FSCK_MAX_ERR=1 # file system errors corrected else # is_empty_fs $MOUNT - FSCK_MAX_ERR=4 # file system errors left uncorrected + FSCK_MAX_ERR=4 # file system errors left uncorrected fi # Test 1a - check and repair the filesystem @@ -247,19 +247,19 @@ REFORMAT=$ORIG_REFORMAT rc=0 run_lfsck || rc=$? if [ $rc -eq 0 ]; then - echo "clean after the first check" + echo "clean after the first check" else - # run e2fsck again to generate databases used for lfsck - generate_db - - # run lfsck again - rc=0 - run_lfsck || rc=$? - if [ $rc -eq 0 ]; then - echo "clean after the second check" - else - error "lfsck test 2 - finished with rc=$rc" - fi + # run e2fsck again to generate databases used for lfsck + generate_db + + # run lfsck again + rc=0 + run_lfsck || rc=$? + if [ $rc -eq 0 ]; then + echo "clean after the second check" + else + error "lfsck test 2 - finished with rc=$rc" + fi fi complete $SECONDS diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index ff194e5..a305fa6 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -5938,19 +5938,18 @@ run_llverfs() #Remove objects from OST remove_ost_objects() { - shift - local ostdev=$1 - local group=$2 - shift 2 + local facet=$1 + local ostdev=$2 + local group=$3 + shift 3 local objids="$@" - local facet=ost$((OSTIDX + 1)) local mntpt=$(facet_mntpt $facet) local opts=$OST_MOUNT_OPTS local i local rc echo "removing objects from $ostdev on $facet: $objids" - if ! do_facet $facet test -b $ostdev; then + if ! test -b $ostdev; then opts=$(csa_add "$opts" -o loop) fi mount -t $(facet_fstype $facet) $opts $ostdev $mntpt ||