From: Andreas Dilger Date: Fri, 13 Nov 2015 05:35:17 +0000 (-0700) Subject: LU-7381 e2fsck: fix e2fsck -fD directory truncation X-Git-Tag: v1.42.13.wc6~17 X-Git-Url: https://git.whamcloud.com/tools/e2fsprogs.git/?a=commitdiff_plain;h=7bdcf88d5b805d6e81fbe6ba83ed5e556519339a;p=tools%2Fe2fsprogs.git LU-7381 e2fsck: fix e2fsck -fD directory truncation When an extent-mapped directory is compacted by "e2fsck -fD" and frees enough leaf blocks that it loses an extent tree index block, the old e2fsck_rehash_dir->ext2fs_block_iterate3->write_dir_block() code would not free the extent block, which would result in the extent tree becoming corrupted when it is written out. Pass 1: Checking inodes, blocks, and sizes Inode 17825800, end of extent exceeds allowed value (logical block 710, physical block 570459684, len 1019) This results in loss of a whole index block of directory leaf blocks and thousands or millions of files in lost+found. Fix e2fsck_rehash_dir() to call ext2fs_punch() to free the blocks at the end of the directory instead of trying to handle this itself while writing out the directory. That properly handles all of the cases of updating the extent tree as well as accounting for blocks that are released (both leaf blocks and index blocks). Add a test case for compacting the directory to be smaller than the index block that originally caused the corruption. e2fsprogs-commit: 19961cd0003564c63c33ec14e69dfec6d81a2238 Signed-off-by: Andreas Dilger Signed-off-by: Theodore Ts'o Change-Id: I2e075849423693ebf4468fd7b0f41d6b2f500c1e Reviewed-on: http://review.whamcloud.com/17153 Tested-by: Jenkins Tested-by: Maloo --- diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c index b544370..60967f6 100644 --- a/e2fsck/rehash.c +++ b/e2fsck/rehash.c @@ -52,10 +52,13 @@ #include "e2fsck.h" #include "problem.h" +#undef REHASH_DEBUG + struct fill_dir_struct { char *buf; struct ext2_inode *inode; errcode_t err; + ext2_ino_t ino; e2fsck_t ctx; struct hash_entry *harray; int max_array, num_array; @@ -639,8 +642,8 @@ static errcode_t calculate_tree(ext2_filsys fs, struct write_dir_struct { struct out_dir *outdir; errcode_t err; + ext2_ino_t ino; e2fsck_t ctx; - blk64_t cleared; }; /* @@ -657,28 +660,35 @@ static int write_dir_block(ext2_filsys fs, blk64_t blk; char *dir; - if (*block_nr == 0) +#ifdef REHASH_DEBUG + printf("%u: write_dir_block %lld:%lld", wd->ino, blockcnt, *block_nr); +#endif + if (*block_nr == 0) { +#ifdef REHASH_DEBUG + printf(" - skip\n"); +#endif return 0; + } + /* Don't free blocks at the end of the directory, they will be + * truncated by the caller. */ if (blockcnt >= wd->outdir->num) { - e2fsck_read_bitmaps(wd->ctx); - blk = *block_nr; - /* - * In theory, we only release blocks from the end of the - * directory file, so it's fine to clobber a whole cluster at - * once. - */ - if (blk % EXT2FS_CLUSTER_RATIO(fs) == 0) { - ext2fs_block_alloc_stats2(fs, blk, -1); - wd->cleared++; - } - *block_nr = 0; - return BLOCK_CHANGED; +#ifdef REHASH_DEBUG + printf(" - not freed\n"); +#endif + return 0; } - if (blockcnt < 0) + if (blockcnt < 0) { +#ifdef REHASH_DEBUG + printf(" - skip\n"); +#endif return 0; + } dir = wd->outdir->buf + (blockcnt * fs->blocksize); wd->err = ext2fs_write_dir_block3(fs, *block_nr, dir, 0); +#ifdef REHASH_DEBUG + printf(" - write (%d)\n", wd->err); +#endif if (wd->err) return BLOCK_ABORT; return 0; @@ -698,10 +708,10 @@ static errcode_t write_directory(e2fsck_t ctx, ext2_filsys fs, wd.outdir = outdir; wd.err = 0; + wd.ino = ino; wd.ctx = ctx; - wd.cleared = 0; - retval = ext2fs_block_iterate3(fs, ino, 0, 0, + retval = ext2fs_block_iterate3(fs, ino, 0, NULL, write_dir_block, &wd); if (retval) return retval; @@ -713,14 +723,17 @@ static errcode_t write_directory(e2fsck_t ctx, ext2_filsys fs, inode.i_flags &= ~EXT2_INDEX_FL; else inode.i_flags |= EXT2_INDEX_FL; - retval = ext2fs_inode_size_set(fs, &inode, - outdir->num * fs->blocksize); +#ifdef REHASH_DEBUG + printf("%u: set inode size to %u blocks = %u bytes\n", + ino, outdir->num, outdir->num * fs->blocksize); +#endif + retval = ext2fs_inode_size_set(fs, &inode, (ext2_off64_t)outdir->num * + fs->blocksize); if (retval) return retval; - ext2fs_iblk_sub_blocks(fs, &inode, wd.cleared); - e2fsck_write_inode(ctx, ino, &inode, "rehash_dir"); - return 0; + /* ext2fs_punch() also calls ext2fs_write_inode() */ + return ext2fs_punch(fs, ino, &inode, NULL, outdir->num, ~0ULL); } errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino) @@ -729,32 +742,25 @@ errcode_t e2fsck_rehash_dir(e2fsck_t ctx, ext2_ino_t ino) errcode_t retval; struct ext2_inode inode; char *dir_buf = 0; - struct fill_dir_struct fd; - struct out_dir outdir; + struct fill_dir_struct fd = { NULL }; + struct out_dir outdir = { 0 }; - outdir.max = outdir.num = 0; - outdir.buf = 0; - outdir.hashes = 0; e2fsck_read_inode(ctx, ino, &inode, "rehash_dir"); retval = ENOMEM; - fd.harray = 0; dir_buf = malloc(inode.i_size); if (!dir_buf) goto errout; fd.max_array = inode.i_size / 32; - fd.num_array = 0; fd.harray = malloc(fd.max_array * sizeof(struct hash_entry)); if (!fd.harray) goto errout; + fd.ino = ino; fd.ctx = ctx; fd.buf = dir_buf; fd.inode = &inode; - fd.err = 0; - fd.dir_size = 0; - fd.compress = 0; if (!(fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_DIR_INDEX) || (inode.i_size / fs->blocksize) < 2) fd.compress = 1; diff --git a/tests/f_extent_htree/expect.1 b/tests/f_extent_htree/expect.1 new file mode 100644 index 0000000..223ca69 --- /dev/null +++ b/tests/f_extent_htree/expect.1 @@ -0,0 +1,29 @@ +Pass 1: Checking inodes, blocks, and sizes +Pass 2: Checking directory structure +Pass 3: Checking directory connectivity +Pass 3A: Optimizing directories +Pass 4: Checking reference counts +Pass 5: Checking group summary information + +test_filesys: ***** FILE SYSTEM WAS MODIFIED ***** + + 352 inodes used (41.12%, out of 856) + 0 non-contiguous files (0.0%) + 1 non-contiguous directory (0.3%) + # of inodes with ind/dind/tind blocks: 0/0/0 + Extent depth histogram: 342/1 + 586 blocks used (68.94%, out of 850) + 0 bad blocks + 0 large files + + 340 regular files + 3 directories + 0 character device files + 0 block device files + 0 fifos + 0 links + 0 symbolic links (0 fast symbolic links) + 0 sockets +------------ + 343 files +Exit status is 1 diff --git a/tests/f_extent_htree/expect.2 b/tests/f_extent_htree/expect.2 new file mode 100644 index 0000000..860b491 --- /dev/null +++ b/tests/f_extent_htree/expect.2 @@ -0,0 +1,7 @@ +Pass 1: Checking inodes, blocks, and sizes +Pass 2: Checking directory structure +Pass 3: Checking directory connectivity +Pass 4: Checking reference counts +Pass 5: Checking group summary information +test_filesys: 352/856 files (0.3% non-contiguous), 586/850 blocks +Exit status is 0 diff --git a/tests/f_extent_htree/image.gz b/tests/f_extent_htree/image.gz new file mode 100644 index 0000000..284207e Binary files /dev/null and b/tests/f_extent_htree/image.gz differ diff --git a/tests/f_extent_htree/name b/tests/f_extent_htree/name new file mode 100644 index 0000000..fc3812d --- /dev/null +++ b/tests/f_extent_htree/name @@ -0,0 +1 @@ +htree extent compression diff --git a/tests/f_extent_htree/script b/tests/f_extent_htree/script new file mode 100644 index 0000000..60854c6 --- /dev/null +++ b/tests/f_extent_htree/script @@ -0,0 +1,69 @@ +#!/bin/bash + +FSCK_OPT="-fyvD" +. $cmd_dir/run_e2fsck + +exit $? +# This script depends on "mke2fs -d", which is only in master and not maint, +# to populate the file directory tree poorly (namely that there are no +# contiguous blocks in the directory leaf and the extent tree is large). + +# Once the "mke2fs -d" option is available on the "maint" branch, the +# above few lines should be deleted, along with the "image.gz" file. + +TMPDIR=${TMPDIR:-"/tmp"} +OUT=$test_name.log + +FSCK_OPT="-fyvD" +SKIP_GUNZIP="true" + +NAMELEN=250 +SRC=$TMPDIR/$test_name.tmp +SUB=subdir +BASE=$SRC/$SUB/$(yes | tr -d '\n' | dd bs=$NAMELEN count=1 2> /dev/null) +TMPFILE=${TMPFILE:-"$TMPDIR/image"} +BSIZE=1024 + +> $OUT +mkdir -p $SRC/$SUB +# calculate the number of files needed to create the directory extent tree +# deep enough to exceed the in-inode index and spill into an index block. +# +# dirents per block * extents per block * (index blocks > i_blocks) +NUM=$(((BSIZE / (NAMELEN + 8)) * (BSIZE / 12) * 2)) +# Create source files. Unfortunately hard links will be copied as links, +# and blocks with only NULs will be turned into holes. +if [ ! -f $BASE.1 ]; then + for N in $(seq $NUM); do + echo "foo" > $BASE.$N + done >> $OUT +fi + +# make filesystem with enough inodes and blocks to hold all the test files +> $TMPFILE +NUM=$((NUM * 5 / 3)) +echo "mke2fs -b $BSIZE -O dir_index,extent -d$SRC -N$NUM $TMPFILE $NUM" >> $OUT +$MKE2FS -b $BSIZE -O dir_index,extent -d$SRC -N$NUM $TMPFILE $NUM >> $OUT 2>&1 +rm -r $SRC + +# Run e2fsck to convert dir to htree before deleting the files, as mke2fs +# doesn't do this. Run second e2fsck to verify there is no corruption yet. +( + EXP1=$test_dir/expect.pre.1 + EXP2=$test_dir/expect.pre.2 + OUT1=$test_name.pre.1.log + OUT2=$test_name.pre.2.log + DESCRIPTION="$(cat $test_dir/name) setup" + . $cmd_dir/run_e2fsck +) + +# generate a list of filenames for debugfs to delete, one from each leaf block +DELETE_LIST=$TMPDIR/delete.$$ +$DEBUGFS -c -R "htree subdir" $TMPFILE 2>> $OUT | + grep -A2 "Reading directory block" | + awk '/yyyyy/ { print "rm '$SUB'/"$4 }' > $DELETE_LIST +$DEBUGFS -w -f $DELETE_LIST $TMPFILE >> $OUT 2>&1 +rm $DELETE_LIST +cp $TMPFILE $TMPFILE.sav + +. $cmd_dir/run_e2fsck diff --git a/tests/f_h_badnode/expect.1 b/tests/f_h_badnode/expect.1 index ce2adb3..95b1cee 100644 --- a/tests/f_h_badnode/expect.1 +++ b/tests/f_h_badnode/expect.1 @@ -14,5 +14,5 @@ Pass 4: Checking reference counts Pass 5: Checking group summary information test_filesys: ***** FILE SYSTEM WAS MODIFIED ***** -test_filesys: 47730/100192 files (0.0% non-contiguous), 13551/31745 blocks +test_filesys: 47730/100192 files (0.0% non-contiguous), 13550/31745 blocks Exit status is 1 diff --git a/tests/f_h_badnode/expect.2 b/tests/f_h_badnode/expect.2 index b9dadb7..65985d1 100644 --- a/tests/f_h_badnode/expect.2 +++ b/tests/f_h_badnode/expect.2 @@ -3,5 +3,5 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test_filesys: 47730/100192 files (0.0% non-contiguous), 13551/31745 blocks +test_filesys: 47730/100192 files (0.0% non-contiguous), 13550/31745 blocks Exit status is 0