From: Vladimir Saveliev Date: Tue, 1 Jun 2010 21:44:56 +0000 (+0400) Subject: b=13698 lfsck.sh support for remote mds and oss X-Git-Tag: v1_10_0_44~36 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=d44b89dce32f4de91a8ef6c07d2340c9ff75773b b=13698 lfsck.sh support for remote mds and oss SHARED_DIRECTORY is to be specified in case of remote servers LFSCK_ALWAYS flag to check the filesystem from check_and_cleanup_lustre() it is "no" by default i=adliger i=grev --- diff --git a/lustre/tests/lfsck.sh b/lustre/tests/lfsck.sh index 784d253..866c1e9 100644 --- a/lustre/tests/lfsck.sh +++ b/lustre/tests/lfsck.sh @@ -1,256 +1,286 @@ #!/bin/bash +# +# test e2fsck and lfsck to detect and fix filesystem corruption +# #set -vx set -e -TESTNAME="lfsck" -TMP=${TMP:-/tmp} -MDSDB=${MDSDB:-$TMP/mdsdb} -OSTDB=${OSTDB:-$TMP/ostdb} -LOG=${LOG:-"$TMP/lfsck.log"} -L2FSCK_PATH=${L2FSCK_PATH:-""} +LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)} +. $LUSTRE/tests/test-framework.sh +init_test_env $@ +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} + NUMFILES=${NUMFILES:-10} NUMDIRS=${NUMDIRS:-4} -LFIND=${LFIND:-"lfs find"} -GETFATTR=${GETFATTR:-getfattr} -SETFATTR=${SETFATTR:-setfattr} -MAX_ERR=1 +OSTIDX=${OSTIDX:-0} # the OST index in LOV +OBJGRP=${OBJGRP:-0} # the OST object group + +[ -d "$SHARED_DIRECTORY" ] || \ + { skip "SHARED_DIRECTORY should be specified with a shared directory \ +which can be accessable on all of the nodes" && exit 0; } + +which getfattr > /dev/null 2>&1 || { skip "could not find getfattr" && exit 0; } +which setfattr > /dev/null 2>&1 || { skip "could not find setfattr" && exit 0; } + +MOUNT_2="" +check_and_setup_lustre + +assert_DIR + +# Create some dirs and files on the filesystem. +create_files_sub() { + local test_dir=$1 + local num_dirs=$2 + local file_name=$3 + local first_num=$4 + local last_num=$5 + local d e f + + for d in $(seq -f d%g $first_num $last_num); do + echo "creating files in $test_dir/$d" + for e in $(seq -f d%g $num_dirs); do + mkdir -p $test_dir/$d/$e || error "mkdir $test_dir/$d/$e failed" + for f in $(seq -f test%g $num_dirs); do + cp $file_name $test_dir/$d/$e/$f || \ + error "cp $file_name $test_dir/$d/$e/$f failed" + done + done + done +} -export PATH=$LFSCK_PATH:`dirname $0`:`dirname $0`/../utils:$PATH +create_files() { + local test_dir=$1 + local num_dirs=$2 + local num_files=$3 + local f + + # create some files on the filesystem + local first_num=1 + local last_num=$num_dirs + create_files_sub $test_dir $num_dirs /etc/fstab $first_num $last_num + + # create files to be modified + for f in $(seq -f $test_dir/testfile.%g $((num_files * 3))); do + echo "creating $f" + cp /etc/termcap $f || error "cp /etc/termcap $f failed" + done + + # create some more files + first_num=$((num_dirs * 2 + 1)) + last_num=$((num_dirs * 2 + 3)) + create_files_sub $test_dir $num_dirs /etc/hosts $first_num $last_num + + # these should NOT be taken as duplicates + for f in $(seq -f $test_dir/d$last_num/linkfile.%g $num_files); do + echo "linking files in $test_dir/d$last_num" + cp /etc/hosts $f || error "cp /etc/hosts $f failed" + ln $f $f.link || error "ln $f $f.link failed" + done +} -[ -z "`which $GETFATTR`" ] && echo "$0: $GETFATTR not found" && exit 5 -[ -z "`which $SETFATTR`" ] && echo "$0: $SETFATTR not found" && exit 6 +# Get the objids for files on the OST (given the OST index and object group). +get_objects() { + local obdidx=$1 + shift + local group=$1 + shift + local ost_files="$@" + local ost_objids + ost_objids=$($LFS getstripe $ost_files | \ + awk '{if ($1 == '$obdidx' && $4 == '$group') print $2 }') + echo $ost_objids +} -LUSTRE=${LUSTRE:-`dirname $0`/..} -. $LUSTRE/tests/test-framework.sh -init_test_env $@ -. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} -init_logging +# Get the OST nodet name (given the OST index). +get_ost_node() { + local obdidx=$1 + local ost_uuid + local ost_node + local node + + ost_uuid=$($LFS osts | grep "^$obdidx: " | cut -d' ' -f2 | head -n1) + + for node in $(osts_nodes); do + do_node $node "lctl get_param -n obdfilter.*.uuid" | grep -q $ost_uuid + [ ${PIPESTATUS[1]} -eq 0 ] && ost_node=$node && break + done + [ -z "$ost_node" ] && \ + echo "failed to find the OST with index $obdidx" && return 1 + echo $ost_node +} -require_dsh_mds || exit 0 -require_dsh_ost || exit 0 +# Get the OST target device (given the OST facet name and OST index). +get_ost_dev() { + local node=$1 + local obdidx=$2 + local ost_name + local ost_dev -SKIP_LFSCK=${SKIP_LFSCK:-"yes"} # bug 13698, change to "no" when fixed + ost_name=$($LFS osts | grep "^$obdidx: " | cut -d' ' -f2 | \ + head -n1 | sed -e 's/_UUID$//') -if [ "$SKIP_LFSCK" == "no" ]; then - if [ ! -x /usr/sbin/lfsck ]; then - log "$($E2FSCK -V)" - log "SKIP: $E2FSCK does not support lfsck" - exit 0 - fi + ost_dev=$(do_node $node "lctl get_param -n obdfilter.$ost_name.mntdev") + [ ${PIPESTATUS[0]} -ne 0 ] && \ + echo "failed to find the OST device with index $obdidx on $facet" && \ + return 1 - MDSDB_OPT="--mdsdb $MDSDB" - OSTDB_OPT="--ostdb $OSTDB-\$ostidx" -fi + if [[ $ost_dev = *loop* ]]; then + ost_dev=$(do_node $node "losetup $ost_dev" | \ + sed -e "s/.*(//" -e "s/).*//") + fi -# if nothing mounted, don't nuke MOUNT variable needed in llmount.sh -WAS_MOUNTED=$(mounted_lustre_filesystems | head -1) -if [ -z "$WAS_MOUNTED" ]; then - # This code doesn't handle multiple mounts well, so nuke MOUNT2 variable - MOUNT2="" MOUNT_2="" check_and_setup_lustre - MOUNT=$(mounted_lustre_filesystems) - [ -z "$MOUNT" ] && echo "NAME=$NAME not mounted" && exit 2 -else - MOUNT=${WAS_MOUNTED} -fi + echo $ost_dev +} -DIR=${DIR:-$MOUNT/$TESTNAME} -[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 3 +# Get the file names to be duplicated or removed on the MDS. +get_files() { + local flavor=$1 + local test_dir=$2 + local num_files=$3 + local first last + local test_file + + case $flavor in + dup) + first=$((num_files + 1)) + last=$((num_files * 2)) + ;; + remove) + first=$((num_files * 2 + 1)) + last=$((num_files * 3)) + ;; + *) echo "get_files(): invalid flavor" && return 1 ;; + esac + + local files="" + local f + for f in $(seq -f testfile.%g $first $last); do + test_file=$test_dir/$f + files="$files $test_file" + done + files=$(echo $files | sed "s#$DIR/##g") + echo $files +} -if [ "$WAS_MOUNTED" ]; then - LFSCK_SETUP=no - MAX_ERR=4 # max expected error from e2fsck -fi +# Remove objects associated with files. +remove_objects() { + local node=$1 + shift + local ostdev=$1 + shift + local group=$1 + shift + local objids="$@" + local tmp + local i + local rc + + echo "removing objects from $ostdev on $facet: $objids" + tmp=$(mktemp $SHARED_DIRECTORY/debugfs.XXXXXXXXXX) + for i in $objids; do + echo "rm O/$group/d$((i % 32))/$i" >> $tmp + done + + do_node $node "$DEBUGFS -w -f $tmp $ostdev" + rc=${PIPESTATUS[0]} + rm -f $tmp + + return $rc +} -get_mnt_devs() { - DEVS=`lctl get_param -n $1.*.mntdev` - for DEV in $DEVS; do - case $DEV in - *loop*) losetup $DEV | sed -e "s/.*(//" -e "s/).*//" ;; - *) echo $DEV ;; - esac - done +# Remove files from MDS. +remove_files() { + do_rpc_nodes $(facet_host $1) remove_mdt_files $@ } -MDSDEV=$(mdsdevname 1) - -if [ "$LFSCK_SETUP" != "no" -a "$SKIP_LFSCK" == "no" ]; then - #Create test directory - # -- can't remove the mountpoint... - [ -z "$DIR" ] && rm -rf $DIR/* - mkdir -p $DIR - OSTCOUNT=`$LFIND $MOUNT | grep -c "^[0-9]*: "` - - # Create some files on the filesystem - for d in `seq -f d%g $NUMDIRS`; do - echo "creating files in $DIR/$d" - for e in `seq -f d%g $NUMDIRS`; do - mkdir -p $DIR/$d/$e - for f in `seq -f test%g $NUMDIRS`; do - cp /etc/fstab $DIR/$d/$e/$f ||exit 5 - done - done - done +# Create EAs on files so objects are referenced from different files. +duplicate_files() { + do_rpc_nodes $(facet_host $1) duplicate_mdt_files $@ +} - # Create Files to be modified - for f in `seq -f $DIR/testfile.%g $((NUMFILES * 3))`; do - echo "creating $f" - cp /etc/termcap $f || exit 10 - done +#********************************* Main Flow **********************************# - #Create some more files - for d in `seq -f d%g $((NUMDIRS * 2 + 1)) $((NUMDIRS * 2 + 3))`; do - echo "creating files in $DIR/$d" - for e in `seq -f d%g $NUMDIRS`; do - mkdir -p $DIR/$d/$e - for f in `seq -f test%g $NUMDIRS`; do - cp /etc/hosts $DIR/$d/$e/$f ||exit 15 - done - done - done +init_logging - # these should NOT be taken as duplicates - for f in `seq -f $DIR/$d/linkfile.%g $NUMFILES`; do - echo "linking files in $DIR/$d" - cp /etc/hosts $f - ln $f $f.link - done +# get the server target devices +get_svr_devs - # Get objids for a file on the OST - OST_FILES=`seq -f $DIR/testfile.%g $NUMFILES` - OST_REMOVE=`$LFIND $OST_FILES | awk '$1 == 0 { print $2 }' | head -n $NUMFILES` +if [ "$SKIP_LFSCK" = "no" ] && is_empty_fs $MOUNT; then + # create test directory + TESTDIR=$DIR/d0.$TESTSUITE + mkdir -p $TESTDIR || error "mkdir $TESTDIR failed" - export MDS_DUPE="" - for f in `seq -f testfile.%g $((NUMFILES + 1)) $((NUMFILES * 2))`; do - TEST_FILE=$DIR/$f - echo "DUPLICATING MDS file $TEST_FILE" - $LFIND -v $TEST_FILE >> $LOG || exit 20 - MDS_DUPE="$MDS_DUPE $TEST_FILE" - done - MDS_DUPE=`echo $MDS_DUPE | sed "s#$MOUNT/##g"` - - export MDS_REMOVE="" - for f in `seq -f testfile.%g $((NUMFILES * 2 + 1)) $((NUMFILES * 3))`; do - TEST_FILE=$DIR/$f - echo "REMOVING MDS file $TEST_FILE which has info:" - $LFIND -v $TEST_FILE >> $LOG || exit 30 - MDS_REMOVE="$MDS_REMOVE $TEST_FILE" - done - MDS_REMOVE=`echo $MDS_REMOVE | sed "s#$MOUNT/##g"` - - # when the OST is also using an OSD this needs to be fixed - MDTDEVS=`get_mnt_devs osd` - OSTDEVS=`get_mnt_devs obdfilter` - OSTCOUNT=`echo $OSTDEVS | wc -w` - sh llmountcleanup.sh || exit 40 - - # Remove objects associated with files - echo "removing objects: `echo $OST_REMOVE`" - DEBUGTMP=`mktemp $TMP/debugfs.XXXXXXXXXX` - for i in $OST_REMOVE; do - echo "rm O/0/d$((i % 32))/$i" >> $DEBUGTMP - done - $DEBUGFS -w -f $DEBUGTMP `echo $OSTDEVS | cut -d' ' -f 1` - RET=$? - rm $DEBUGTMP - [ $RET -ne 0 ] && exit 50 - - SAVE_PWD=$PWD - mount -t $FSTYPE -o loop $MDSDEV $MOUNT || exit 60 - do_umount() { - trap 0 - cd $SAVE_PWD - umount -f $MOUNT - } - trap do_umount EXIT - - #Remove files from mds - for f in $MDS_REMOVE; do - rm $MOUNT/ROOT/$f || exit 70 - done + # create some dirs and files on the filesystem + create_files $TESTDIR $NUMDIRS $NUMFILES - #Create EAs on files so objects are referenced from different files - ATTRTMP=`mktemp $TMP/setfattr.XXXXXXXXXX` - cd $MOUNT/ROOT || exit 78 - for f in $MDS_DUPE; do - touch $f.bad || exit 74 - getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $ATTRTMP - setfattr --restore $ATTRTMP || exit 80 - done - cd $SAVE_PWD - rm $ATTRTMP + # get the objids for files in group $OBJGRP on the OST with index $OSTIDX + OST_REMOVE=$(get_objects $OSTIDX $OBJGRP \ + $(seq -f $TESTDIR/testfile.%g $NUMFILES)) - do_umount -else - # when the OST is also using an OSD this needs to be fixed - MDTDEVS=`get_mnt_devs osd` - OSTDEVS=`get_mnt_devs obdfilter` - OSTCOUNT=`echo $OSTDEVS | wc -w` -fi # LFSCK_SETUP - -echo "$E2FSCK -d -v -fn $MDSDB_OPT $MDSDEV" -df > /dev/null # update statfs data on disk -RET=0 -$E2FSCK -d -v -fn $MDSDB_OPT $MDSDEV || RET=$? -[ $RET -gt $MAX_ERR ] && echo "$E2FSCK returned $RET" && exit 90 || true - -export OSTDB_LIST="" -ostidx=0 -for OSTDEV in $OSTDEVS; do - df > /dev/null # update statfs data on disk - RET=0 - eval $E2FSCK -d -v -fn $MDSDB_OPT $OSTDB_OPT $OSTDEV || RET=$? - [ $RET -gt $MAX_ERR ] && echo "$E2FSCK returned $RET" && exit 100 - OSTDB_LIST="$OSTDB_LIST $OSTDB-$ostidx" - ostidx=$((ostidx + 1)) -done - -[ "$SKIP_LFSCK" != "no" ] && exit 0 - -#Remount filesystem -[ "`mount | grep $MOUNT`" ] || $SETUP - -# need to turn off shell error detection to get proper error return + # get the node name and target device for the OST with index $OSTIDX + OSTNODE=$(get_ost_node $OSTIDX) || error "get_ost_node by index $OSTIDX failed" + OSTDEV=$(get_ost_dev $OSTNODE $OSTIDX) || \ + error "get_ost_dev $OSTNODE $OSTIDX failed" + + # get the file names to be duplicated on the MDS + MDS_DUPE=$(get_files dup $TESTDIR $NUMFILES) || error "$MDS_DUPE" + # get the file names to be removed from the MDS + MDS_REMOVE=$(get_files remove $TESTDIR $NUMFILES) || error "$MDS_REMOVE" + + stopall -f || error "cleanupall failed" + + # remove objects associated with files in group $OBJGRP + # on the OST with index $OSTIDX + remove_objects $OSTNODE $OSTDEV $OBJGRP $OST_REMOVE || \ + error "removing objects failed" + + # remove files from MDS + remove_files $SINGLEMDS $MDTDEV $MDS_REMOVE || error "removing files failed" + + # create EAs on files so objects are referenced from different files + duplicate_files $SINGLEMDS $MDTDEV $MDS_DUPE || \ + error "duplicating files failed" + FSCK_MAX_ERR=1 # file system errors corrected +else # I_MOUNTED=no + FSCK_MAX_ERR=4 # file system errors left uncorrected +fi + +# Test 1a - check and repair the filesystem # lfsck will return 1 if the filesystem had errors fixed -echo "LFSCK TEST 1" -echo "lfsck -c -l $MDSDB_OPT --ostdb $OSTDB_LIST $MOUNT" -RET=0 -echo y | lfsck -c -l $MDSDB_OPT --ostdb $OSTDB_LIST $MOUNT || RET=$? -[ $RET -eq 0 ] && echo "clean after first check" && exit 0 -echo "LFSCK TEST 1 - finished with rc=$RET" -[ $RET -gt $MAX_ERR ] && exit 110 || true - -# make sure everything gets to the backing store -sync; sleep 2; sync - -echo "LFSCK TEST 2" -echo "$E2FSCK -d -v -fn $MDSDB_OPT $MDSDEV" -df > /dev/null # update statfs data on disk -RET=0 -$E2FSCK -d -v -fn $MDSDB_OPT $MDSDEV || RET=$? -[ $RET -gt $MAX_ERR ] && echo "$E2FSCK returned $RET" && exit 123 || true - -ostidx=0 -export OSTDB_LIST="" -for OSTDEV in $OSTDEVS; do - df > /dev/null # update statfs data on disk - RET=0 - eval $E2FSCK -d -v -fn $MDSDB_OPT $OSTDB_OPT $OSTDEV || RET=$? - [ $RET -gt $MAX_ERR ] && echo "$E2FSCK returned $RET" && exit 124 - OSTDB_LIST="$OSTDB_LIST $OSTDB-$ostidx" - ostidx=$((ostidx + 1)) -done - -echo "LFSCK TEST 2" -echo "lfsck -c -l $MDSDB_OPT --ostdb $OSTDB_LIST $MOUNT" -RET=0 -lfsck -c -l $MDSDB_OPT --ostdb $OSTDB_LIST $MOUNT || RET=$? -echo "LFSCK TEST 2 - finished with rc=$RET" -[ $RET -ne 0 ] && exit 125 || true -if [ -z "$WAS_MOUNTED" ]; then - sh llmountcleanup.sh || exit 120 +# run e2fsck to generate databases used for lfsck +generate_db +if [ "$SKIP_LFSCK" != "no" ]; then + echo "skip lfsck" +else + # remount filesystem + REFORMAT="" + check_and_setup_lustre + + # run lfsck + rc=0 + run_lfsck || rc=$? + if [ $rc -eq 0 ]; then + echo "clean after the first check" + else + # run e2fsck again to generate databases used for lfsck + generate_db + + # run lfsck again + rc=0 + run_lfsck || rc=$? + if [ $rc -eq 0 ]; then + echo "clean after the second check" + else + error "lfsck test 2 - finished with rc=$rc" + fi + fi fi -#Cleanup -rm -f $MDSDB $OSTDB-* || true +equals_msg $(basename $0): test complete, cleaning up + +LFSCK_ALWAYS=no +check_and_cleanup_lustre +[ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && \ + grep -q FAIL $TESTSUITELOG && exit 1 || true echo "$0: completed" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 900f93f..a0b0dd8 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -111,7 +111,22 @@ init_test_env() { export E2LABEL=${E2LABEL:-e2label} export DUMPE2FS=${DUMPE2FS:-dumpe2fs} export E2FSCK=${E2FSCK:-e2fsck} + export LFSCK_BIN=${LFSCK_BIN:-lfsck} + export LFSCK_ALWAYS=${LFSCK_ALWAYS:-"no"} # check filesystem after each test suit + export SKIP_LFSCK=${SKIP_LFSCK:-"yes"} # bug 13698, change to "no" when fixed + export SHARED_DIRECTORY=${SHARED_DIRECTORY:-"/tmp"} + export FSCK_MAX_ERR=4 # File system errors left uncorrected + if [ "$SKIP_LFSCK" == "no" ]; then + if [ ! -x `which $LFSCK_BIN` ]; then + log "$($E2FSCK -V)" + error_exit "$E2FSCK does not support lfsck" + fi + export MDSDB=${MDSDB:-$SHARED_DIRECTORY/mdsdb} + export OSTDB=${OSTDB:-$SHARED_DIRECTORY/ostdb} + export MDSDB_OPT="--mdsdb $MDSDB" + export OSTDB_OPT="--ostdb $OSTDB-\$ostidx" + fi #[ -d /r ] && export ROOT=${ROOT:-/r} export TMP=${TMP:-$ROOT/tmp} export TESTSUITELOG=${TMP}/${TESTSUITE}.log @@ -261,7 +276,7 @@ load_module() { # Nothing in $MODOPTS_; try modprobe.conf set -- $(grep "^options\\s*\<${module}\>" $MODPROBECONF) # Get rid of "options $module" - (($# > 0)) && shift 2 + (($# > 0)) && shift 2 # Ensure we have accept=all for lnet if [ $(basename $module) = lnet ]; then @@ -2048,6 +2063,20 @@ is_mounted () { echo $mounted' ' | grep -w -q $mntpt' ' } +is_empty_dir() { + [ $(find $1 -maxdepth 1 -print | wc -l) = 1 ] && return 0 + return 1 +} + +# empty lustre filesystem may have empty directories lost+found and .lustre +is_empty_fs() { + [ $(find $1 -maxdepth 1 -name lost+found -o -name .lustre -prune -o \ + -print | wc -l) = 1 ] || return 1 + [ ! -d $1/lost+found ] || is_empty_dir $1/lost+found && return 0 + [ ! -d $1/.lustre ] || is_empty_dir $1/.lustre && return 0 + return 1 +} + check_and_setup_lustre() { nfs_client_mode && return @@ -2142,7 +2171,123 @@ cleanup_and_setup_lustre() { check_and_setup_lustre } +# Get all of the server target devices from a given server node and type. +get_mnt_devs() { + local node=$1 + local type=$2 + local obd_type + local devs + local dev + + case $type in + mdt) obd_type="osd" ;; + ost) obd_type="obdfilter" ;; # needs to be fixed when OST also uses an OSD + *) echo "invalid server type" && return 1 ;; + esac + + devs=$(do_node $node "lctl get_param -n $obd_type.*.mntdev") + for dev in $devs; do + case $dev in + *loop*) do_node $node "losetup $dev" | \ + sed -e "s/.*(//" -e "s/).*//" ;; + *) echo $dev ;; + esac + done +} + +# Get all of the server target devices. +get_svr_devs() { + local i + + # MDT device + MDTDEV=$(get_mnt_devs $(mdts_nodes) mdt) + + # OST devices + i=0 + for node in $(osts_nodes); do + OSTDEVS[i]=$(get_mnt_devs $node ost) + i=$((i + 1)) + done +} + +# Run e2fsck on MDT or OST device. +run_e2fsck() { + local node=$1 + local target_dev=$2 + local ostidx=$3 + local ostdb_opt=$4 + + df > /dev/null # update statfs data on disk + local cmd="$E2FSCK -d -v -f -n $MDSDB_OPT $ostdb_opt $target_dev" + echo $cmd + do_node $node $cmd + local rc=${PIPESTATUS[0]} + [ $rc -le $FSCK_MAX_ERR ] || \ + error "$cmd returned $rc, should be <= $FSCK_MAX_ERR" + return 0 +} + +# Run e2fsck on MDT and OST(s) to generate databases used for lfsck. +generate_db() { + local i + local ostidx + local dev + local tmp_file + + [ $MDSCOUNT -eq 1 ] || error "CMD is not supported" + tmp_file=$(mktemp -p $SHARED_DIRECTORY || + error "fail to create file in $SHARED_DIRECTORY") + + # make sure everything gets to the backing store + local list=$(comma_list $CLIENTS $(facet_host $SINGLEMDS) $(osts_nodes)) + do_nodes $list "sync; sleep 2; sync" + + do_nodes $list ls $tmp_file || \ + error "$SHARED_DIRECTORY is not a shared directory" + rm $tmp_file + + run_e2fsck $(mdts_nodes) $MDTDEV + + i=0 + ostidx=0 + OSTDB_LIST="" + for node in $(osts_nodes); do + for dev in ${OSTDEVS[i]}; do + local ostdb_opt=`eval echo $OSTDB_OPT` + run_e2fsck $node $dev $ostidx "$ostdb_opt" + OSTDB_LIST="$OSTDB_LIST $OSTDB-$ostidx" + ostidx=$((ostidx + 1)) + done + i=$((i + 1)) + done +} + +run_lfsck() { + local cmd="$LFSCK_BIN -c -l --mdsdb $MDSDB --ostdb $OSTDB_LIST $MOUNT" + echo $cmd + eval $cmd + local rc=${PIPESTATUS[0]} + [ $rc -le $FSCK_MAX_ERR ] || \ + error "$cmd returned $rc, should be <= $FSCK_MAX_ERR" + echo "lfsck finished with rc=$rc" + + rm -rvf $MDSDB* $OSTDB* || true + + return $rc +} + check_and_cleanup_lustre() { + if [ "$LFSCK_ALWAYS" = "yes" ]; then + get_svr_devs + generate_db + if [ "$SKIP_LFSCK" == "no" ]; then + local rc=0 + run_lfsck || rc=$? + else + echo "skip lfsck" + fi + fi + if is_mounted $MOUNT; then [ -n "$DIR" ] && rm -rf $DIR/[Rdfs][0-9]* [ "$ENABLE_QUOTA" ] && restore_quota_type || true @@ -3946,8 +4091,8 @@ run_llverdev() local dev=$1 local devname=$(basename $1) local size=$(grep "$devname"$ /proc/partitions | awk '{print $3}') - # loop devices aren't in /proc/partitions - [ "x$size" == "x" ] && local size=$(ls -l $dev | awk '{print $5}') + # loop devices aren't in /proc/partitions + [ "x$size" == "x" ] && local size=$(ls -l $dev | awk '{print $5}') size=$(($size / 1024 / 1024)) # Gb @@ -3958,3 +4103,52 @@ run_llverdev() llverdev --force $partial_arg $dev } + +remove_mdt_files() { + local facet=$1 + local mdtdev=$2 + shift 2 + local files="$@" + local mntpt=${MOUNT%/*}/$facet + + echo "removing files from $mdtdev on $facet: $files" + mount -t $FSTYPE $MDS_MOUNT_OPTS $mdtdev $mntpt || return $? + rc=0; + for f in $files; do + rm $mntpt/ROOT/$f || { rc=$?; break; } + done + umount -f $mntpt || return $? + return $rc +} + +duplicate_mdt_files() { + local facet=$1 + local mdtdev=$2 + shift 2 + local files="$@" + local mntpt=${MOUNT%/*}/$facet + + echo "duplicating files on $mdtdev on $facet: $files" + mkdir -p $mntpt || return $? + mount -t $FSTYPE $MDS_MOUNT_OPTS $mdtdev $mntpt || return $? + + do_umount() { + trap 0 + popd > /dev/null + rm $tmp + umount -f $mntpt + } + trap do_umount EXIT + + tmp=$(mktemp $TMP/setfattr.XXXXXXXXXX) + pushd $mntpt/ROOT > /dev/null || return $? + rc=0 + for f in $files; do + touch $f.bad || return $? + getfattr -n trusted.lov $f | sed "s#$f#&.bad#" > $tmp + rc=${PIPESTATUS[0]} + [ $rc -eq 0 ] || return $rc + setfattr --restore $tmp || return $? + done + do_umount +}