2 # -*- mode: Bash; tab-width: 4; indent-tabs-mode: t; -*-
3 # vim:shiftwidth=4:softtabstop=4:tabstop=4:
5 # test e2fsck and lfsck to detect and fix filesystem corruption
10 LUSTRE=${LUSTRE:-$(cd $(dirname $0)/..; echo $PWD)}
11 . $LUSTRE/tests/test-framework.sh
13 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
15 NUMFILES=${NUMFILES:-10}
17 OSTIDX=${OSTIDX:-0} # the OST index in LOV
18 OBJGRP=${OBJGRP:-0} # the OST object group
20 [ -d "$SHARED_DIRECTORY" ] || \
21 { skip "SHARED_DIRECTORY should be specified with a shared directory \
22 which can be accessable on all of the nodes" && exit 0; }
23 [[ $(facet_fstype $SINGLEMDS) != ldiskfs ]] &&
24 skip "Only applicable to ldiskfs-based MDTs" && exit 0
25 [[ $(facet_fstype OST) != ldiskfs ]] &&
26 skip "Only applicable to ldiskfs-based OST" && exit 0
28 which getfattr &>/dev/null || { skip_env "could not find getfattr" && exit 0; }
29 which setfattr &>/dev/null || { skip_env "could not find setfattr" && exit 0; }
31 if [ ! -x `which $LFSCK_BIN` ]; then
33 error "e2fsprogs does not support lfsck"
37 check_and_setup_lustre
41 SAMPLE_FILE=$TMP/$TESTSUITE.junk
42 dd if=/dev/urandom of=$SAMPLE_FILE bs=1M count=1
44 # Create some dirs and files on the filesystem.
53 for d in $(seq -f d%g $first_num $last_num); do
54 echo "creating files in $test_dir/$d"
55 for e in $(seq -f d%g $num_dirs); do
56 mkdir -p $test_dir/$d/$e || error "mkdir $test_dir/$d/$e failed"
57 for f in $(seq -f test%g $num_dirs); do
58 cp $file_name $test_dir/$d/$e/$f || \
59 error "cp $file_name $test_dir/$d/$e/$f failed"
71 # create some files on the filesystem
73 local last_num=$num_dirs
74 create_files_sub $test_dir $num_dirs /etc/fstab $first_num $last_num
76 # create files to be modified
77 for f in $(seq -f $test_dir/testfile.%g $((num_files * 3))); do
79 cp $SAMPLE_FILE $f || error "cp $SAMPLE_FILE $f failed"
82 # create some more files
83 first_num=$((num_dirs * 2 + 1))
84 last_num=$((num_dirs * 2 + 3))
85 create_files_sub $test_dir $num_dirs /etc/hosts $first_num $last_num
87 # these should NOT be taken as duplicates
88 for f in $(seq -f $test_dir/d$last_num/linkfile.%g $num_files); do
89 echo "linking files in $test_dir/d$last_num"
90 cp /etc/hosts $f || error "cp /etc/hosts $f failed"
91 ln $f $f.link || error "ln $f $f.link failed"
95 # Get the objids for files on the OST (given the OST index and object group).
103 ost_objids=$($LFS getstripe $ost_files | \
104 awk '{if ($1 == '$obdidx' && $4 == '$group') print $2 }')
108 # Get the OST nodet name (given the OST index).
115 ost_uuid=$(ostuuid_from_index $obdidx)
117 for node in $(osts_nodes); do
118 do_node $node "lctl get_param -n obdfilter.*.uuid" | grep -q $ost_uuid
119 [ ${PIPESTATUS[1]} -eq 0 ] && ost_node=$node && break
121 [ -z "$ost_node" ] && \
122 echo "failed to find the OST with index $obdidx" && return 1
126 # Get the OST target device (given the OST facet name and OST index).
133 ost_name=$(ostname_from_index $obdidx)
134 ost_dev=$(get_osd_param $node $ost_name mntdev)
135 if [ $? -ne 0 ]; then
136 printf "unable to find OST%04x on $facet\n" $obdidx
140 if [[ $ost_dev = *loop* ]]; then
141 ost_dev=$(do_node $node "losetup $ost_dev" | \
142 sed -e "s/.*(//" -e "s/).*//")
148 # Get the file names to be duplicated or removed on the MDS.
158 first=$((num_files + 1))
159 last=$((num_files * 2))
162 first=$((num_files * 2 + 1))
163 last=$((num_files * 3))
165 *) echo "get_files(): invalid flavor" && return 1 ;;
170 for f in $(seq -f testfile.%g $first $last); do
171 test_file=$test_dir/$f
172 files="$files $test_file"
174 files=$(echo $files | sed "s#$DIR/##g")
178 # Remove objects associated with files.
185 local facet=ost$((OSTIDX + 1))
186 local mntpt=$(facet_mntpt $facet)
187 local opts=$OST_MOUNT_OPTS
191 echo "removing objects from $ostdev on $facet: $objids"
192 if ! do_facet $facet test -b $ostdev; then
193 opts=$(csa_add "$opts" -o loop)
195 mount -t $(facet_fstype $facet) $opts $ostdev $mntpt ||
199 rm $mntpt/O/$group/d$((i % 32))/$i || { rc=$?; break; }
201 umount -f $mntpt || return $?
205 # Remove files from MDS.
207 do_rpc_nodes $(facet_host $1) remove_mdt_files $@
210 # Create EAs on files so objects are referenced from different files.
212 do_rpc_nodes $(facet_host $1) duplicate_mdt_files $@
215 #********************************* Main Flow **********************************#
219 # get the server target devices
222 TESTDIR=$DIR/d0.$TESTSUITE
223 if is_empty_fs $MOUNT; then
224 # create test directory
225 mkdir -p $TESTDIR || error "mkdir $TESTDIR failed"
227 # create some dirs and files on the filesystem
228 create_files $TESTDIR $NUMDIRS $NUMFILES
230 # get the objids for files in group $OBJGRP on the OST with index $OSTIDX
231 OST_REMOVE=$(get_objects $OSTIDX $OBJGRP \
232 $(seq -f $TESTDIR/testfile.%g $NUMFILES))
234 # get the node name and target device for the OST with index $OSTIDX
235 OSTNODE=$(get_ost_node $OSTIDX) || error "get_ost_node by index $OSTIDX failed"
236 OSTDEV=$(get_ost_dev $OSTNODE $OSTIDX) ||
237 error "get_ost_dev $OSTNODE $OSTIDX failed"
239 # get the file names to be duplicated on the MDS
240 MDS_DUPE=$(get_files dup $TESTDIR $NUMFILES) || error "$MDS_DUPE"
241 # get the file names to be removed from the MDS
242 MDS_REMOVE=$(get_files remove $TESTDIR $NUMFILES) || error "$MDS_REMOVE"
244 stopall -f || error "cleanupall failed"
246 # remove objects associated with files in group $OBJGRP
247 # on the OST with index $OSTIDX
248 remove_objects $OSTDEV $OBJGRP $OST_REMOVE ||
249 error "removing objects failed"
251 # remove files from MDS
252 remove_files $SINGLEMDS $MDTDEV $MDS_REMOVE || error "removing files failed"
254 # create EAs on files so objects are referenced from different files
255 duplicate_files $SINGLEMDS $MDTDEV $MDS_DUPE ||
256 error "duplicating files failed"
257 FSCK_MAX_ERR=1 # file system errors corrected
258 else # is_empty_fs $MOUNT
259 FSCK_MAX_ERR=4 # file system errors left uncorrected
262 # Test 1a - check and repair the filesystem
263 # lfsck will return 1 if the filesystem had errors fixed
264 # run e2fsck to generate databases used for lfsck
268 ORIG_REFORMAT=$REFORMAT
270 check_and_setup_lustre
271 REFORMAT=$ORIG_REFORMAT
276 if [ $rc -eq 0 ]; then
277 echo "clean after the first check"
279 # run e2fsck again to generate databases used for lfsck
285 if [ $rc -eq 0 ]; then
286 echo "clean after the second check"
288 error "lfsck test 2 - finished with rc=$rc"
293 # The test directory contains some files referencing to some object
294 # which could cause error when removing the directory.
296 while [ -d $TESTDIR ]; do
298 rm -fr $TESTDIR || echo "$RMCNT round: rm $TESTDIR failed"
299 [ $RMCNT -ge 10 ] && error "cleanup $TESTDIR failed $RMCNT times"
300 remount_client $MOUNT
302 check_and_cleanup_lustre