From 5735f4bc3d39977a028b2aa95a5985c39774c6c3 Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Thu, 10 Jun 2010 03:53:28 +0400 Subject: [PATCH] b=13698 support for remote mds and oss in lfscktest.sh SHARED_DIRECTORY is to be specified in case of remote servers LFSCK_ALWAYS flag to check the filesystem from check_and_cleanup_lustre() it is "no" by default SKIP_LFSCK flag to allow e2fsck part of lfscktest.sh to be run without lfsck it is "yes" by default i=adliger i=grev i=jian.yu --- lustre/tests/acceptance-small.sh | 9 +- lustre/tests/lfscktest.sh | 483 +++++++++++++++++++++------------------ lustre/tests/test-framework.sh | 192 ++++++++++++++++ 3 files changed, 456 insertions(+), 228 deletions(-) diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index 7e285b9..4204831 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -286,16 +286,9 @@ for NAME in $CONFIGS; do SANITYN="done" fi - [ "$LFSCK" != "no" ] && remote_mds && log "Remote MDS, skipping LFSCK test" && LFSCK=no && MSKIPPED=1 - [ "$LFSCK" != "no" ] && remote_ost && log "Remote OST, skipping LFSCK test" && LFSCK=no && OSKIPPED=1 if [ "$LFSCK" != "no" ]; then title lfsck - if [ -x /usr/sbin/lfsck ]; then - bash lfscktest.sh - else - log "$($E2FSCK -V)" - log "SKIP: $E2FSCK does not support lfsck" - fi + bash lfscktest.sh LFSCK="done" fi diff --git a/lustre/tests/lfscktest.sh b/lustre/tests/lfscktest.sh index 660783a..e699589 100755 --- a/lustre/tests/lfscktest.sh +++ b/lustre/tests/lfscktest.sh @@ -1,241 +1,284 @@ #!/bin/bash +# +# test e2fsck and lfsck to detect and fix filesystem corruption +# #set -vx set -e -TESTNAME="lfscktest" -TMP=${TMP:-/tmp} -MDSDB=${MDSDB:-$TMP/mdsdb} -OSTDB=${OSTDB:-$TMP/ostdb} -LOG=${LOG:-"$TMP/lfscktest.log"} -L2FSCK_PATH=${L2FSCK_PATH:-""} +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} + NUMFILES=${NUMFILES:-10} NUMDIRS=${NUMDIRS:-4} -GETSTRIPE=${GETSTRIPE:-"lfs getstripe"} -GETFATTR=${GETFATTR:-getfattr} -SETFATTR=${SETFATTR:-setfattr} -MAX_ERR=1 +OSTIDX=${OSTIDX:-0} # the OST index in LOV +OBJGRP=${OBJGRP:-0} # the OST object group + +[ -d "$SHARED_DIRECTORY" ] || \ + { skip "SHARED_DIRECTORY should be specified with a shared directory \ +which can be accessable on all of the nodes" && exit 0; } + +which getfattr > /dev/null 2>&1 || { skip "could not find getfattr" && exit 0; } +which setfattr > /dev/null 2>&1 || { skip "could not find setfattr" && exit 0; } + +MOUNT_2="" +check_and_setup_lustre + +assert_DIR + +# Create some dirs and files on the filesystem. +create_files_sub() { + local test_dir=$1 + local num_dirs=$2 + local file_name=$3 + local first_num=$4 + local last_num=$5 + local d e f + + for d in $(seq -f d%g $first_num $last_num); do + echo "creating files in $test_dir/$d" + for e in $(seq -f d%g $num_dirs); do + mkdir -p $test_dir/$d/$e || error "mkdir $test_dir/$d/$e failed" + for f in $(seq -f test%g $num_dirs); do + cp $file_name $test_dir/$d/$e/$f || \ + error "cp $file_name $test_dir/$d/$e/$f failed" + done + done + done +} -export PATH=$LFSCK_PATH:`dirname $0`:`dirname $0`/../utils:$PATH +create_files() { + local test_dir=$1 + local num_dirs=$2 + local num_files=$3 + local f + + # create some files on the filesystem + local first_num=1 + local last_num=$num_dirs + create_files_sub $test_dir $num_dirs /etc/fstab $first_num $last_num + + # create files to be modified + for f in $(seq -f $test_dir/testfile.%g $((num_files * 3))); do + echo "creating $f" + cp /etc/termcap $f || error "cp /etc/termcap $f failed" + done + + # create some more files + first_num=$((num_dirs * 2 + 1)) + last_num=$((num_dirs * 2 + 3)) + create_files_sub $test_dir $num_dirs /etc/hosts $first_num $last_num + + # these should NOT be taken as duplicates + for f in $(seq -f $test_dir/d$last_num/linkfile.%g $num_files); do + echo "linking files in $test_dir/d$last_num" + cp /etc/hosts $f || error "cp /etc/hosts $f failed" + ln $f $f.link || error "ln $f $f.link failed" + done +} -[ -z "`which $GETFATTR`" ] && echo "$0: $GETFATTR not found" && exit 5 -[ -z "`which $SETFATTR`" ] && echo "$0: $SETFATTR not found" && exit 6 +# Get the objids for files on the OST (given the OST index and object group). +get_objects() { + local obdidx=$1 + shift + local group=$1 + shift + local ost_files="$@" + local ost_objids + ost_objids=$($LFS getstripe $ost_files | \ + awk '{if ($1 == '$obdidx' && $4 == '$group') print $2 }') + echo $ost_objids +} -LUSTRE=${LUSTRE:-`dirname $0`/..} -. $LUSTRE/tests/test-framework.sh -init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +# Get the OST nodet name (given the OST index). +get_ost_node() { + local obdidx=$1 + local ost_uuid + local ost_node + local node + + ost_uuid=$($LFS osts | grep "^$obdidx: " | cut -d' ' -f2 | head -n1) + + for node in $(osts_nodes); do + do_node $node "lctl get_param -n obdfilter.*.uuid" | grep -q $ost_uuid + [ ${PIPESTATUS[1]} -eq 0 ] && ost_node=$node && break + done + [ -z "$ost_node" ] && \ + echo "failed to find the OST with index $obdidx" && return 1 + echo $ost_node +} -remote_mds && skip "remote MDS" && exit 0 -remote_ost && skip "remote OST" && exit 0 +# Get the OST target device (given the OST facet name and OST index). +get_ost_dev() { + local node=$1 + local obdidx=$2 + local ost_name + local ost_dev -# if nothing mounted, don't nuke MOUNT variable needed in llmount.sh -WAS_MOUNTED=$(mounted_lustre_filesystems | head -1) -if [ -z "$WAS_MOUNTED" ]; then - # This code doesn't handle multiple mounts well, so nuke MOUNT2 variable - MOUNT2="" sh llmount.sh - MOUNT=$(mounted_lustre_filesystems) - [ -z "$MOUNT" ] && echo "NAME=$NAME not mounted" && exit 2 -else - MOUNT=${WAS_MOUNTED} -fi + ost_name=$($LFS osts | grep "^$obdidx: " | cut -d' ' -f2 | \ + head -n1 | sed -e 's/_UUID$//') -DIR=$DIR/$TESTNAME -[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 3 + ost_dev=$(do_node $node "lctl get_param -n obdfilter.$ost_name.mntdev") + [ ${PIPESTATUS[0]} -ne 0 ] && \ + echo "failed to find the OST device with index $obdidx on $facet" && \ + return 1 -if [ "$WAS_MOUNTED" ]; then - LFSCK_SETUP=no - MAX_ERR=4 # max expected error from e2fsck -fi + if [[ $ost_dev = *loop* ]]; then + ost_dev=$(do_node $node "losetup $ost_dev" | \ + sed -e "s/.*(//" -e "s/).*//") + fi -get_mnt_devs() { - DEVS=`lctl get_param -n $1.*.mntdev` - for DEV in $DEVS; do - case $DEV in - *loop*) losetup $DEV | sed -e "s/.*(//" -e "s/).*//" ;; - *) echo $DEV ;; - esac - done + echo $ost_dev } -if [ "$LFSCK_SETUP" != "no" ]; then - #Create test directory - # -- can't remove the mountpoint... - [ -z "$DIR" ] && rm -rf $DIR/* - mkdir -p $DIR - OSTCOUNT=`$LFS osts | grep -c "^[0-9]*: " || true` - - # Create some files on the filesystem - for d in `seq -f d%g $NUMDIRS`; do - echo "creating files in $DIR/$d" - for e in `seq -f d%g $NUMDIRS`; do - mkdir -p $DIR/$d/$e - for f in `seq -f test%g $NUMDIRS`; do - cp /etc/fstab $DIR/$d/$e/$f ||exit 5 - done - done - done - - # Create Files to be modified - for f in `seq -f $DIR/testfile.%g $((NUMFILES * 3))`; do - echo "creating $f" - cp /etc/termcap $f || exit 10 - done - - #Create some more files - for d in `seq -f d%g $((NUMDIRS * 2 + 1)) $((NUMDIRS * 2 + 3))`; do - echo "creating files in $DIR/$d" - for e in `seq -f d%g $NUMDIRS`; do - mkdir -p $DIR/$d/$e - for f in `seq -f test%g $NUMDIRS`; do - cp /etc/hosts $DIR/$d/$e/$f ||exit 15 - done - done - done - - # these should NOT be taken as duplicates - for f in `seq -f $DIR/$d/linkfile.%g $NUMFILES`; do - echo "linking files in $DIR/$d" - cp /etc/hosts $f - ln $f $f.link - done - - # Get objids for a file on the OST - OST_FILES=`seq -f $DIR/testfile.%g $NUMFILES` - OST_REMOVE=`$GETSTRIPE $OST_FILES | awk '$1 == 0 { print $2 }' | head -n $NUMFILES` - - export MDS_DUPE="" - for f in `seq -f testfile.%g $((NUMFILES + 1)) $((NUMFILES * 2))`; do - TEST_FILE=$DIR/$f - echo "DUPLICATING MDS file $TEST_FILE" - $GETSTRIPE -v $TEST_FILE >> $LOG || exit 20 - MDS_DUPE="$MDS_DUPE $TEST_FILE" - done - MDS_DUPE=`echo $MDS_DUPE | sed "s#$MOUNT/##g"` - - export MDS_REMOVE="" - for f in `seq -f testfile.%g $((NUMFILES * 2 + 1)) $((NUMFILES * 3))`; do - TEST_FILE=$DIR/$f - echo "REMOVING MDS file $TEST_FILE which has info:" - $GETSTRIPE -v $TEST_FILE >> $LOG || exit 30 - MDS_REMOVE="$MDS_REMOVE $TEST_FILE" - done - MDS_REMOVE=`echo $MDS_REMOVE | sed "s#$MOUNT/##g"` - - MDTDEVS=`get_mnt_devs mds` - OSTDEVS=`get_mnt_devs obdfilter` - OSTCOUNT=`echo $OSTDEVS | wc -w` - sh llmountcleanup.sh || exit 40 - - # Remove objects associated with files - echo "removing objects: `echo $OST_REMOVE`" - DEBUGTMP=`mktemp $TMP/debugfs.XXXXXXXXXX` - for i in $OST_REMOVE; do - echo "rm O/0/d$((i % 32))/$i" >> $DEBUGTMP - done - $DEBUGFS -w -f $DEBUGTMP `echo $OSTDEVS | cut -d' ' -f 1` - RET=$? - rm $DEBUGTMP - [ $RET -ne 0 ] && exit 50 - - SAVE_PWD=$PWD - [ "$FSTYPE" = "ldiskfs" ] && load_module ../ldiskfs/ldiskfs/ldiskfs - mount -t $FSTYPE -o loop $MDSDEV $MOUNT || exit 60 - do_umount() { - trap 0 - cd $SAVE_PWD - umount -f $MOUNT - } - trap do_umount EXIT - - #Remove files from mds - for f in $MDS_REMOVE; do - rm $MOUNT/ROOT/$f || exit 70 - done - - #Create EAs on files so objects are referenced from different files - ATTRTMP=`mktemp $TMP/setfattr.XXXXXXXXXX` - cd $MOUNT/ROOT || exit 78 - for f in $MDS_DUPE; do - touch $f.bad || exit 74 - getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $ATTRTMP - setfattr --restore $ATTRTMP || exit 80 - done - cd $SAVE_PWD - rm $ATTRTMP - - do_umount -else - MDTDEVS=`get_mnt_devs mds` - OSTDEVS=`get_mnt_devs obdfilter` - OSTCOUNT=`echo $OSTDEVS | wc -w` -fi # LFSCK_SETUP - -# Run e2fsck to get mds and ost info -# a return status of 1 indicates e2fsck successfuly fixed problems found -set +e - -echo "$E2FSCK -d -v -fn --mdsdb $MDSDB $MDSDEV" -df > /dev/null # update statfs data on disk -$E2FSCK -d -v -fn --mdsdb $MDSDB $MDSDEV -RET=$? -[ $RET -gt $MAX_ERR ] && echo "$E2FSCK returned $RET" && exit 90 || true - -export OSTDB_LIST="" -i=0 -for OSTDEV in $OSTDEVS; do - df > /dev/null # update statfs data on disk - $E2FSCK -d -v -fn --mdsdb $MDSDB --ostdb $OSTDB-$i $OSTDEV - RET=$? - [ $RET -gt $MAX_ERR ] && echo "$E2FSCK returned $RET" && exit 100 - OSTDB_LIST="$OSTDB_LIST $OSTDB-$i" - i=$((i + 1)) -done - -#Remount filesystem -[ "`mount | grep $MOUNT`" ] || setupall - -# need to turn off shell error detection to get proper error return +# Get the file names to be duplicated or removed on the MDS. +get_files() { + local flavor=$1 + local test_dir=$2 + local num_files=$3 + local first last + local test_file + + case $flavor in + dup) + first=$((num_files + 1)) + last=$((num_files * 2)) + ;; + remove) + first=$((num_files * 2 + 1)) + last=$((num_files * 3)) + ;; + *) echo "get_files(): invalid flavor" && return 1 ;; + esac + + local files="" + local f + for f in $(seq -f testfile.%g $first $last); do + test_file=$test_dir/$f + files="$files $test_file" + done + files=$(echo $files | sed "s#$DIR/##g") + echo $files +} + +# Remove objects associated with files. +remove_objects() { + local node=$1 + shift + local ostdev=$1 + shift + local group=$1 + shift + local objids="$@" + local tmp + local i + local rc + + echo "removing objects from $ostdev on $facet: $objids" + tmp=$(mktemp $SHARED_DIRECTORY/debugfs.XXXXXXXXXX) + for i in $objids; do + echo "rm O/$group/d$((i % 32))/$i" >> $tmp + done + + do_node $node "$DEBUGFS -w -f $tmp $ostdev" + rc=${PIPESTATUS[0]} + rm -f $tmp + + return $rc +} + +# Remove files from MDS. +remove_files() { + do_rpc_nodes $(facet_host $1) remove_mdt_files $@ +} + +# Create EAs on files so objects are referenced from different files. +duplicate_files() { + do_rpc_nodes $(facet_host $1) duplicate_mdt_files $@ +} + +#********************************* Main Flow **********************************# + +# get the server target devices +get_svr_devs + +if [ "$SKIP_LFSCK" = "no" ] && is_empty_fs $MOUNT; then + # create test directory + TESTDIR=$DIR/d0.$TESTSUITE + mkdir -p $TESTDIR || error "mkdir $TESTDIR failed" + + # create some dirs and files on the filesystem + create_files $TESTDIR $NUMDIRS $NUMFILES + + # get the objids for files in group $OBJGRP on the OST with index $OSTIDX + OST_REMOVE=$(get_objects $OSTIDX $OBJGRP \ + $(seq -f $TESTDIR/testfile.%g $NUMFILES)) + + # get the node name and target device for the OST with index $OSTIDX + OSTNODE=$(get_ost_node $OSTIDX) || error "get_ost_node by index $OSTIDX failed" + OSTDEV=$(get_ost_dev $OSTNODE $OSTIDX) || \ + error "get_ost_dev $OSTNODE $OSTIDX failed" + + # get the file names to be duplicated on the MDS + MDS_DUPE=$(get_files dup $TESTDIR $NUMFILES) || error "$MDS_DUPE" + # get the file names to be removed from the MDS + MDS_REMOVE=$(get_files remove $TESTDIR $NUMFILES) || error "$MDS_REMOVE" + + stopall -f || error "cleanupall failed" + + # remove objects associated with files in group $OBJGRP + # on the OST with index $OSTIDX + remove_objects $OSTNODE $OSTDEV $OBJGRP $OST_REMOVE || \ + error "removing objects failed" + + # remove files from MDS + remove_files mds $MDSDEV $MDS_REMOVE || error "removing files failed" + + # create EAs on files so objects are referenced from different files + duplicate_files mds $MDSDEV $MDS_DUPE || \ + error "duplicating files failed" + FSCK_MAX_ERR=1 # file system errors corrected +else # $SKIP_LFSCK = yes || !is_empty_fs $MOUNT + FSCK_MAX_ERR=4 # file system errors left uncorrected +fi + +# Test 1a - check and repair the filesystem # lfsck will return 1 if the filesystem had errors fixed -echo "LFSCK TEST 1" -echo "lfsck -c -l --mdsdb $MDSDB --ostdb $OSTDB_LIST $MOUNT" -echo y | lfsck -c -l --mdsdb $MDSDB --ostdb $OSTDB_LIST $MOUNT -RET=$? -[ $RET -eq 0 ] && echo "clean after first check" && exit 0 -echo "LFSCK TEST 1 - finished with rc=$RET" -[ $RET -gt $MAX_ERR ] && exit 110 || true - -# make sure everything gets to the backing store -sync; sleep 2; sync - -echo "LFSCK TEST 2" -echo "$E2FSCK -d -v -fn --mdsdb $MDSDB $MDSDEV" -df > /dev/null # update statfs data on disk -$E2FSCK -d -v -fn --mdsdb $MDSDB $MDSDEV -RET=$? -[ $RET -gt $MAX_ERR ] && echo "$E2FSCK returned $RET" && exit 123 || true - -i=0 -export OSTDB_LIST="" -for OSTDEV in $OSTDEVS; do - df > /dev/null # update statfs data on disk - $E2FSCK -d -v -fn --mdsdb $MDSDB --ostdb $OSTDB-$i $OSTDEV - RET=$? - [ $RET -gt $MAX_ERR ] && echo "$E2FSCK returned $RET" && exit 124 - OSTDB_LIST="$OSTDB_LIST $OSTDB-$i" - i=$((i + 1)) -done - -echo "LFSCK TEST 2" -echo "lfsck -c -l --mdsdb $MDSDB --ostdb $OSTDB_LIST $MOUNT" -lfsck -c -l --mdsdb $MDSDB --ostdb $OSTDB_LIST $MOUNT -RET=$? -echo "LFSCK TEST 2 - finished with rc=$RET" -[ $RET -ne 0 ] && exit 125 || true -if [ -z "$WAS_MOUNTED" ]; then - sh llmountcleanup.sh || exit 120 +# run e2fsck to generate databases used for lfsck +generate_db +if [ "$SKIP_LFSCK" != "no" ]; then + echo "skip lfsck" +else + # remount filesystem + REFORMAT="" + check_and_setup_lustre + + # run lfsck + rc=0 + run_lfsck || rc=$? + if [ $rc -eq 0 ]; then + echo "clean after the first check" + else + # run e2fsck again to generate databases used for lfsck + generate_db + + # run lfsck again + rc=0 + run_lfsck || rc=$? + if [ $rc -eq 0 ]; then + echo "clean after the second check" + else + error "lfsck test 2 - finished with rc=$rc" + fi + fi fi -#Cleanup -rm -f $MDSDB $OSTDB-* || true +equals_msg $(basename $0): test complete, cleaning up + +LFSCK_ALWAYS=no +check_and_cleanup_lustre +[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && \ + grep -q FAIL $TESTSUITELOG && exit 1 || true echo "$0: completed" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 2e996b0..0654c53 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -99,6 +99,23 @@ init_test_env() { export E2LABEL=${E2LABEL:-e2label} export DUMPE2FS=${DUMPE2FS:-dumpe2fs} export E2FSCK=${E2FSCK:-e2fsck} + export LFSCK1=${LFSCK1:-lfsck} + export LFSCK_ALWAYS=${LFSCK_ALWAYS:-"no"} # check filesystem after each test suit + export SKIP_LFSCK=${SKIP_LFSCK:-"yes"} # bug 13698, change to "no" when fixed + export SHARED_DIRECTORY=${SHARED_DIRECTORY:-"/tmp"} + export FSCK_MAX_ERR=4 # File system errors left uncorrected + if [ "$SKIP_LFSCK" == "no" ]; then + if [ ! -x `which $LFSCK1` ]; then + log "$($E2FSCK -V)" + error_exit "$E2FSCK does not support lfsck" + fi + + export MDSDB=${MDSDB:-$SHARED_DIRECTORY/mdsdb} + export OSTDB=${OSTDB:-$SHARED_DIRECTORY/ostdb} + export MDSDB_OPT="--mdsdb $MDSDB" + export OSTDB_OPT="--ostdb $OSTDB-\$ostidx" + fi + declare -a OSTDEVS #[ -d /r ] && export ROOT=${ROOT:-/r} export TMP=${TMP:-$ROOT/tmp} @@ -1783,6 +1800,20 @@ is_mounted () { echo $mounted' ' | grep -w -q $mntpt' ' } +is_empty_dir() { + [ $(find $1 -maxdepth 1 -print | wc -l) = 1 ] && return 0 + return 1 +} + +# empty lustre filesystem may have empty directories lost+found and .lustre +is_empty_fs() { + [ $(find $1 -maxdepth 1 -name lost+found -o -name .lustre -prune -o \ + -print | wc -l) = 1 ] || return 1 + [ ! -d $1/lost+found ] || is_empty_dir $1/lost+found && return 0 + [ ! -d $1/.lustre ] || is_empty_dir $1/.lustre && return 0 + return 1 +} + check_and_setup_lustre() { nfs_client_mode && return @@ -1873,7 +1904,119 @@ cleanup_and_setup_lustre() { check_and_setup_lustre } +# Get all of the server target devices from a given server node and type. +get_mnt_devs() { + local node=$1 + local type=$2 + local obd_type + local devs + local dev + + case $type in + mdt) obd_type="osd" ;; + ost) obd_type="obdfilter" ;; # needs to be fixed when OST also uses an OSD + *) echo "invalid server type" && return 1 ;; + esac + + devs=$(do_node $node "lctl get_param -n $obd_type.*.mntdev") + for dev in $devs; do + case $dev in + *loop*) do_node $node "losetup $dev" | \ + sed -e "s/.*(//" -e "s/).*//" ;; + *) echo $dev ;; + esac + done +} + +# Get all of the server target devices. +get_svr_devs() { + local i + + # OST devices + i=0 + for node in $(osts_nodes); do + OSTDEVS[i]=$(get_mnt_devs $node ost) + i=$((i + 1)) + done +} + +# Run e2fsck on MDT or OST device. +run_e2fsck() { + local node=$1 + local target_dev=$2 + local ostidx=$3 + local ostdb_opt=$4 + + df > /dev/null # update statfs data on disk + local cmd="$E2FSCK -d -v -f -n $MDSDB_OPT $ostdb_opt $target_dev" + echo $cmd + do_node $node $cmd + local rc=${PIPESTATUS[0]} + [ $rc -le $FSCK_MAX_ERR ] || \ + error "$cmd returned $rc, should be <= $FSCK_MAX_ERR" + return 0 +} + +# Run e2fsck on MDT and OST(s) to generate databases used for lfsck. +generate_db() { + local i + local ostidx + local dev + local tmp_file + + tmp_file=$(mktemp -p $SHARED_DIRECTORY || + error "fail to create file in $SHARED_DIRECTORY") + + # make sure everything gets to the backing store + local list=$(comma_list $CLIENTS $(facet_host mds) $(osts_nodes)) + do_nodes $list "sync; sleep 2; sync" + + do_nodes $list ls $tmp_file || \ + error "$SHARED_DIRECTORY is not a shared directory" + rm $tmp_file + + run_e2fsck $(facet_host mds) $MDSDEV + + i=0 + ostidx=0 + OSTDB_LIST="" + for node in $(osts_nodes); do + for dev in ${OSTDEVS[i]}; do + local ostdb_opt=`eval echo $OSTDB_OPT` + run_e2fsck $node $dev $ostidx "$ostdb_opt" + OSTDB_LIST="$OSTDB_LIST $OSTDB-$ostidx" + ostidx=$((ostidx + 1)) + done + i=$((i + 1)) + done +} + +run_lfsck() { + local cmd="$LFSCK1 -c -l --mdsdb $MDSDB --ostdb $OSTDB_LIST $MOUNT" + echo $cmd + eval $cmd + local rc=${PIPESTATUS[0]} + [ $rc -le $FSCK_MAX_ERR ] || \ + error "$cmd returned $rc, should be <= $FSCK_MAX_ERR" + echo "lfsck finished with rc=$rc" + + rm -rvf $MDSDB* $OSTDB* || true + + return $rc +} + check_and_cleanup_lustre() { + if [ "$LFSCK_ALWAYS" = "yes" ]; then + get_svr_devs + generate_db + if [ "$SKIP_LFSCK" == "no" ]; then + local rc=0 + run_lfsck || rc=$? + else + echo "skip lfsck" + fi + fi + if is_mounted $MOUNT; then [ -n "$DIR" ] && rm -rf $DIR/[Rdfs][0-9]* [ "$ENABLE_QUOTA" ] && restore_quota_type || true @@ -3144,3 +3287,52 @@ max_recovery_time () { echo $service_time } + +remove_mdt_files() { + local facet=$1 + local mdtdev=$2 + shift 2 + local files="$@" + local mntpt=${MOUNT%/*}/$facet + + echo "removing files from $mdtdev on $facet: $files" + mount -t $FSTYPE $MDS_MOUNT_OPTS $mdtdev $mntpt || return $? + rc=0; + for f in $files; do + rm $mntpt/ROOT/$f || { rc=$?; break; } + done + umount -f $mntpt || return $? + return $rc +} + +duplicate_mdt_files() { + local facet=$1 + local mdtdev=$2 + shift 2 + local files="$@" + local mntpt=${MOUNT%/*}/$facet + + echo "duplicating files on $mdtdev on $facet: $files" + mkdir -p $mntpt || return $? + mount -t $FSTYPE $MDS_MOUNT_OPTS $mdtdev $mntpt || return $? + + do_umount() { + trap 0 + popd > /dev/null + rm $tmp + umount -f $mntpt + } + trap do_umount EXIT + + tmp=$(mktemp $TMP/setfattr.XXXXXXXXXX) + pushd $mntpt/ROOT > /dev/null || return $? + rc=0 + for f in $files; do + touch $f.bad || return $? + getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $tmp + rc=${PIPESTATUS[0]} + [ $rc -eq 0 ] || return $rc + setfattr --restore $tmp || return $? + done + do_umount +} -- 1.8.3.1