From 611d341377607d69b053436fec6de016fe8258fd Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Mon, 22 Jan 2018 10:47:41 -0800 Subject: [PATCH] AOSP: libext2fs: add EXT2_FLAG_SHARE_DUP to de-duplicate data blocks When assigning physical address for new data block, search sha of existing blocks for match. If there's a match, reuse address of the matched block. Also set EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS in de-duped image. Signed-off-by: Theodore Ts'o Google-Bug-Id: 64109868 Change-Id: I8d2d22e9c301264413c1454c84d7bf6bb32ac5c0 From AOSP commit: 3d7abcc7843d6dfdfdafabf43f5e072cb7aaffbd --- contrib/android/e2fsdroid.c | 7 +++++-- lib/e2p/feature.c | 2 ++ lib/ext2fs/ext2_fs.h | 2 ++ lib/ext2fs/ext2fs.h | 11 +++++++++-- lib/ext2fs/fileio.c | 43 ++++++++++++++++++++++++++++++++++++++++--- lib/ext2fs/freefs.c | 4 ++++ lib/ext2fs/openfs.c | 16 ++++++++++++++++ 7 files changed, 78 insertions(+), 7 deletions(-) diff --git a/contrib/android/e2fsdroid.c b/contrib/android/e2fsdroid.c index c73b0be..2fe922d 100644 --- a/contrib/android/e2fsdroid.c +++ b/contrib/android/e2fsdroid.c @@ -32,7 +32,7 @@ static void usage(int ret) { fprintf(stderr, "%s [-B block_list] [-D basefs_out] [-T timestamp]\n" "\t[-C fs_config] [-S file_contexts] [-p product_out]\n" - "\t[-a mountpoint] [-d basefs_in] [-f src_dir] [-e] image\n", + "\t[-a mountpoint] [-d basefs_in] [-f src_dir] [-e] [-s] image\n", prog_name); exit(ret); } @@ -73,7 +73,7 @@ int main(int argc, char *argv[]) add_error_table(&et_ext2_error_table); - while ((c = getopt (argc, argv, "T:C:S:p:a:D:d:B:f:e")) != EOF) { + while ((c = getopt (argc, argv, "T:C:S:p:a:D:d:B:f:es")) != EOF) { switch (c) { case 'T': fixed_time = strtoul(optarg, &p, 0); @@ -119,6 +119,9 @@ int main(int argc, char *argv[]) case 'e': android_sparse_file = 0; break; + case 's': + flags |= EXT2_FLAG_SHARE_DUP; + break; default: usage(EXIT_FAILURE); } diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c index b7f6c1d..0fab9c7 100644 --- a/lib/e2p/feature.c +++ b/lib/e2p/feature.c @@ -72,6 +72,8 @@ static struct feature feature_list[] = { "read-only" }, { E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_PROJECT, "project"}, + { E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS, + "shared_blocks"}, { E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION, "compression" }, diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h index 7d62694..a851a02 100644 --- a/lib/ext2fs/ext2_fs.h +++ b/lib/ext2fs/ext2_fs.h @@ -812,6 +812,7 @@ struct ext2_super_block { #define EXT4_FEATURE_RO_COMPAT_REPLICA 0x0800 #define EXT4_FEATURE_RO_COMPAT_READONLY 0x1000 #define EXT4_FEATURE_RO_COMPAT_PROJECT 0x2000 /* Project quota */ +#define EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS 0x4000 #define EXT2_FEATURE_INCOMPAT_COMPRESSION 0x0001 @@ -904,6 +905,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, 4, METADATA_CSUM) EXT4_FEATURE_RO_COMPAT_FUNCS(replica, 4, REPLICA) EXT4_FEATURE_RO_COMPAT_FUNCS(readonly, 4, READONLY) EXT4_FEATURE_RO_COMPAT_FUNCS(project, 4, PROJECT) +EXT4_FEATURE_RO_COMPAT_FUNCS(shared_blocks, 4, SHARED_BLOCKS) EXT4_FEATURE_INCOMPAT_FUNCS(compression, 2, COMPRESSION) EXT4_FEATURE_INCOMPAT_FUNCS(filetype, 2, FILETYPE) diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h index 250fd17..eb6c092 100644 --- a/lib/ext2fs/ext2fs.h +++ b/lib/ext2fs/ext2fs.h @@ -94,6 +94,8 @@ typedef __u32 __bitwise ext2_dirhash_t; #include #endif +#include "hashmap.h" + /* * Portability help for Microsoft Visual C++ */ @@ -195,7 +197,8 @@ typedef struct ext2_file *ext2_file_t; #define EXT2_FLAG_DIRECT_IO 0x80000 #define EXT2_FLAG_SKIP_MMP 0x100000 #define EXT2_FLAG_IGNORE_CSUM_ERRORS 0x200000 -#define EXT2_FLAG_IGNORE_SB_ERRORS 0x400000 +#define EXT2_FLAG_SHARE_DUP 0x400000 +#define EXT2_FLAG_IGNORE_SB_ERRORS 0x800000 /* * Special flag in the ext2 inode i_flag field that means that this is @@ -296,6 +299,9 @@ struct struct_ext2_filsys { blk64_t len, blk64_t *pblk, blk64_t *plen); void (*block_alloc_stats_range)(ext2_filsys fs, blk64_t blk, blk_t num, int inuse); + + /* hashmap for SHA of data blocks */ + struct ext2fs_hashmap* block_sha_map; }; #if EXT2_FLAT_INCLUDES @@ -618,7 +624,8 @@ typedef struct ext2_icount *ext2_icount_t; EXT4_FEATURE_RO_COMPAT_QUOTA|\ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\ EXT4_FEATURE_RO_COMPAT_READONLY |\ - EXT4_FEATURE_RO_COMPAT_PROJECT) + EXT4_FEATURE_RO_COMPAT_PROJECT |\ + EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS) /* * These features are only allowed if EXT2_FLAG_SOFTSUPP_FEATURES is passed diff --git a/lib/ext2fs/fileio.c b/lib/ext2fs/fileio.c index 810a7fd..5bc02d0 100644 --- a/lib/ext2fs/fileio.c +++ b/lib/ext2fs/fileio.c @@ -32,6 +32,12 @@ struct ext2_file { char *buf; }; +struct block_entry { + blk64_t physblock; + unsigned char sha[EXT2FS_SHA512_LENGTH]; +}; +typedef struct block_entry *block_entry_t; + #define BMAP_BUFFER (file->buf + fs->blocksize) errcode_t ext2fs_file_open2(ext2_filsys fs, ext2_ino_t ino, @@ -389,6 +395,8 @@ errcode_t ext2fs_file_write(ext2_file_t file, const void *buf, errcode_t retval = 0; unsigned int start, c, count = 0; const char *ptr = (const char *) buf; + block_entry_t new_block = NULL, old_block = NULL; + int bmap_flags = 0; EXT2_CHECK_MAGIC(file, EXT2_ET_MAGIC_EXT2_FILE); fs = file->fs; @@ -424,22 +432,51 @@ errcode_t ext2fs_file_write(ext2_file_t file, const void *buf, if (retval) goto fail; + file->flags |= EXT2_FILE_BUF_DIRTY; + memcpy(file->buf+start, ptr, c); + /* * OK, the physical block hasn't been allocated yet. * Allocate it. */ if (!file->physblock) { + bmap_flags = (file->ino ? BMAP_ALLOC : 0); + if (fs->flags & EXT2_FLAG_SHARE_DUP) { + new_block = calloc(1, sizeof(*new_block)); + if (!new_block) { + retval = EXT2_ET_NO_MEMORY; + goto fail; + } + ext2fs_sha512((const unsigned char*)file->buf, + fs->blocksize, new_block->sha); + old_block = ext2fs_hashmap_lookup( + fs->block_sha_map, + new_block->sha, + sizeof(new_block->sha)); + } + + if (old_block) { + file->physblock = old_block->physblock; + bmap_flags |= BMAP_SET; + free(new_block); + new_block = NULL; + } + retval = ext2fs_bmap2(fs, file->ino, &file->inode, BMAP_BUFFER, - file->ino ? BMAP_ALLOC : 0, + bmap_flags, file->blockno, 0, &file->physblock); if (retval) goto fail; + + if (new_block) { + new_block->physblock = file->physblock; + ext2fs_hashmap_add(fs->block_sha_map, new_block, + new_block->sha, sizeof(new_block->sha)); + } } - file->flags |= EXT2_FILE_BUF_DIRTY; - memcpy(file->buf+start, ptr, c); file->pos += c; ptr += c; count += c; diff --git a/lib/ext2fs/freefs.c b/lib/ext2fs/freefs.c index ea9742e..68b8e9a 100644 --- a/lib/ext2fs/freefs.c +++ b/lib/ext2fs/freefs.c @@ -17,6 +17,7 @@ #include "ext2_fs.h" #include "ext2fsP.h" +#include "hashmap.h" void ext2fs_free(ext2_filsys fs) { @@ -59,6 +60,9 @@ void ext2fs_free(ext2_filsys fs) if (fs->mmp_cmp) ext2fs_free_mem(&fs->mmp_cmp); + if (fs->block_sha_map) + ext2fs_hashmap_free(fs->block_sha_map); + fs->magic = 0; ext2fs_zero_blocks2(NULL, 0, 0, NULL, NULL); diff --git a/lib/ext2fs/openfs.c b/lib/ext2fs/openfs.c index 902d69e..e159bb9 100644 --- a/lib/ext2fs/openfs.c +++ b/lib/ext2fs/openfs.c @@ -94,6 +94,12 @@ errcode_t ext2fs_open(const char *name, int flags, int superblock, manager, ret_fs); } +static void block_sha_map_free_entry(void *data) +{ + free(data); + return; +} + /* * Note: if superblock is non-zero, block-size must also be non-zero. * Superblock and block_size can be zero to use the default size. @@ -486,6 +492,16 @@ errcode_t ext2fs_open2(const char *name, const char *io_options, } } + if (fs->flags & EXT2_FLAG_SHARE_DUP) { + fs->block_sha_map = ext2fs_hashmap_create(ext2fs_djb2_hash, + block_sha_map_free_entry, 4096); + if (!fs->block_sha_map) { + retval = EXT2_ET_NO_MEMORY; + goto cleanup; + } + ext2fs_set_feature_shared_blocks(fs->super); + } + fs->flags &= ~EXT2_FLAG_NOFREE_ON_ERROR; *ret_fs = fs; -- 1.8.3.1