Whamcloud - gitweb
AOSP: libext2fs: add EXT2_FLAG_SHARE_DUP to de-duplicate data blocks
authorJin Qian <jinqian@google.com>
Mon, 22 Jan 2018 18:47:41 +0000 (10:47 -0800)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 21 Jun 2018 13:43:09 +0000 (09:43 -0400)
When assigning physical address for new data block, search sha
of existing blocks for match. If there's a match, reuse address
of the matched block.

Also set EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS in de-duped image.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Google-Bug-Id: 64109868
Change-Id: I8d2d22e9c301264413c1454c84d7bf6bb32ac5c0
From AOSP commit: 3d7abcc7843d6dfdfdafabf43f5e072cb7aaffbd

contrib/android/e2fsdroid.c
lib/e2p/feature.c
lib/ext2fs/ext2_fs.h
lib/ext2fs/ext2fs.h
lib/ext2fs/fileio.c
lib/ext2fs/freefs.c
lib/ext2fs/openfs.c

index c73b0be..2fe922d 100644 (file)
@@ -32,7 +32,7 @@ static void usage(int ret)
 {
        fprintf(stderr, "%s [-B block_list] [-D basefs_out] [-T timestamp]\n"
                        "\t[-C fs_config] [-S file_contexts] [-p product_out]\n"
-                       "\t[-a mountpoint] [-d basefs_in] [-f src_dir] [-e] image\n",
+                       "\t[-a mountpoint] [-d basefs_in] [-f src_dir] [-e] [-s] image\n",
                 prog_name);
        exit(ret);
 }
@@ -73,7 +73,7 @@ int main(int argc, char *argv[])
 
        add_error_table(&et_ext2_error_table);
 
-       while ((c = getopt (argc, argv, "T:C:S:p:a:D:d:B:f:e")) != EOF) {
+       while ((c = getopt (argc, argv, "T:C:S:p:a:D:d:B:f:es")) != EOF) {
                switch (c) {
                case 'T':
                        fixed_time = strtoul(optarg, &p, 0);
@@ -119,6 +119,9 @@ int main(int argc, char *argv[])
                case 'e':
                        android_sparse_file = 0;
                        break;
+               case 's':
+                       flags |= EXT2_FLAG_SHARE_DUP;
+                       break;
                default:
                        usage(EXIT_FAILURE);
                }
index b7f6c1d..0fab9c7 100644 (file)
@@ -72,6 +72,8 @@ static struct feature feature_list[] = {
                        "read-only" },
        {       E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_PROJECT,
                        "project"},
+       {       E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS,
+                       "shared_blocks"},
 
        {       E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION,
                        "compression" },
index 7d62694..a851a02 100644 (file)
@@ -812,6 +812,7 @@ struct ext2_super_block {
 #define EXT4_FEATURE_RO_COMPAT_REPLICA         0x0800
 #define EXT4_FEATURE_RO_COMPAT_READONLY                0x1000
 #define EXT4_FEATURE_RO_COMPAT_PROJECT         0x2000 /* Project quota */
+#define EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS   0x4000
 
 
 #define EXT2_FEATURE_INCOMPAT_COMPRESSION      0x0001
@@ -904,6 +905,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(metadata_csum, 4, METADATA_CSUM)
 EXT4_FEATURE_RO_COMPAT_FUNCS(replica,          4, REPLICA)
 EXT4_FEATURE_RO_COMPAT_FUNCS(readonly,         4, READONLY)
 EXT4_FEATURE_RO_COMPAT_FUNCS(project,          4, PROJECT)
+EXT4_FEATURE_RO_COMPAT_FUNCS(shared_blocks,    4, SHARED_BLOCKS)
 
 EXT4_FEATURE_INCOMPAT_FUNCS(compression,       2, COMPRESSION)
 EXT4_FEATURE_INCOMPAT_FUNCS(filetype,          2, FILETYPE)
index 250fd17..eb6c092 100644 (file)
@@ -94,6 +94,8 @@ typedef __u32 __bitwise               ext2_dirhash_t;
 #include <ext2fs/ext2_ext_attr.h>
 #endif
 
+#include "hashmap.h"
+
 /*
  * Portability help for Microsoft Visual C++
  */
@@ -195,7 +197,8 @@ typedef struct ext2_file *ext2_file_t;
 #define EXT2_FLAG_DIRECT_IO            0x80000
 #define EXT2_FLAG_SKIP_MMP             0x100000
 #define EXT2_FLAG_IGNORE_CSUM_ERRORS   0x200000
-#define EXT2_FLAG_IGNORE_SB_ERRORS     0x400000
+#define EXT2_FLAG_SHARE_DUP            0x400000
+#define EXT2_FLAG_IGNORE_SB_ERRORS     0x800000
 
 /*
  * Special flag in the ext2 inode i_flag field that means that this is
@@ -296,6 +299,9 @@ struct struct_ext2_filsys {
                               blk64_t len, blk64_t *pblk, blk64_t *plen);
        void (*block_alloc_stats_range)(ext2_filsys fs, blk64_t blk, blk_t num,
                                        int inuse);
+
+       /* hashmap for SHA of data blocks */
+       struct ext2fs_hashmap* block_sha_map;
 };
 
 #if EXT2_FLAT_INCLUDES
@@ -618,7 +624,8 @@ typedef struct ext2_icount *ext2_icount_t;
                                         EXT4_FEATURE_RO_COMPAT_QUOTA|\
                                         EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
                                         EXT4_FEATURE_RO_COMPAT_READONLY |\
-                                        EXT4_FEATURE_RO_COMPAT_PROJECT)
+                                        EXT4_FEATURE_RO_COMPAT_PROJECT |\
+                                        EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS)
 
 /*
  * These features are only allowed if EXT2_FLAG_SOFTSUPP_FEATURES is passed
index 810a7fd..5bc02d0 100644 (file)
@@ -32,6 +32,12 @@ struct ext2_file {
        char                    *buf;
 };
 
+struct block_entry {
+       blk64_t         physblock;
+       unsigned char   sha[EXT2FS_SHA512_LENGTH];
+};
+typedef struct block_entry *block_entry_t;
+
 #define BMAP_BUFFER (file->buf + fs->blocksize)
 
 errcode_t ext2fs_file_open2(ext2_filsys fs, ext2_ino_t ino,
@@ -389,6 +395,8 @@ errcode_t ext2fs_file_write(ext2_file_t file, const void *buf,
        errcode_t       retval = 0;
        unsigned int    start, c, count = 0;
        const char      *ptr = (const char *) buf;
+       block_entry_t   new_block = NULL, old_block = NULL;
+       int             bmap_flags = 0;
 
        EXT2_CHECK_MAGIC(file, EXT2_ET_MAGIC_EXT2_FILE);
        fs = file->fs;
@@ -424,22 +432,51 @@ errcode_t ext2fs_file_write(ext2_file_t file, const void *buf,
                if (retval)
                        goto fail;
 
+               file->flags |= EXT2_FILE_BUF_DIRTY;
+               memcpy(file->buf+start, ptr, c);
+
                /*
                 * OK, the physical block hasn't been allocated yet.
                 * Allocate it.
                 */
                if (!file->physblock) {
+                       bmap_flags = (file->ino ? BMAP_ALLOC : 0);
+                       if (fs->flags & EXT2_FLAG_SHARE_DUP) {
+                               new_block = calloc(1, sizeof(*new_block));
+                               if (!new_block) {
+                                       retval = EXT2_ET_NO_MEMORY;
+                                       goto fail;
+                               }
+                               ext2fs_sha512((const unsigned char*)file->buf,
+                                               fs->blocksize, new_block->sha);
+                               old_block = ext2fs_hashmap_lookup(
+                                                       fs->block_sha_map,
+                                                       new_block->sha,
+                                                       sizeof(new_block->sha));
+                       }
+
+                       if (old_block) {
+                               file->physblock = old_block->physblock;
+                               bmap_flags |= BMAP_SET;
+                               free(new_block);
+                               new_block = NULL;
+                       }
+
                        retval = ext2fs_bmap2(fs, file->ino, &file->inode,
                                              BMAP_BUFFER,
-                                             file->ino ? BMAP_ALLOC : 0,
+                                             bmap_flags,
                                              file->blockno, 0,
                                              &file->physblock);
                        if (retval)
                                goto fail;
+
+                       if (new_block) {
+                               new_block->physblock = file->physblock;
+                               ext2fs_hashmap_add(fs->block_sha_map, new_block,
+                                       new_block->sha, sizeof(new_block->sha));
+                       }
                }
 
-               file->flags |= EXT2_FILE_BUF_DIRTY;
-               memcpy(file->buf+start, ptr, c);
                file->pos += c;
                ptr += c;
                count += c;
index ea9742e..68b8e9a 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "ext2_fs.h"
 #include "ext2fsP.h"
+#include "hashmap.h"
 
 void ext2fs_free(ext2_filsys fs)
 {
@@ -59,6 +60,9 @@ void ext2fs_free(ext2_filsys fs)
        if (fs->mmp_cmp)
                ext2fs_free_mem(&fs->mmp_cmp);
 
+       if (fs->block_sha_map)
+               ext2fs_hashmap_free(fs->block_sha_map);
+
        fs->magic = 0;
 
        ext2fs_zero_blocks2(NULL, 0, 0, NULL, NULL);
index 902d69e..e159bb9 100644 (file)
@@ -94,6 +94,12 @@ errcode_t ext2fs_open(const char *name, int flags, int superblock,
                            manager, ret_fs);
 }
 
+static void block_sha_map_free_entry(void *data)
+{
+    free(data);
+    return;
+}
+
 /*
  *  Note: if superblock is non-zero, block-size must also be non-zero.
  *     Superblock and block_size can be zero to use the default size.
@@ -486,6 +492,16 @@ errcode_t ext2fs_open2(const char *name, const char *io_options,
                }
        }
 
+       if (fs->flags & EXT2_FLAG_SHARE_DUP) {
+               fs->block_sha_map = ext2fs_hashmap_create(ext2fs_djb2_hash,
+                                       block_sha_map_free_entry, 4096);
+               if (!fs->block_sha_map) {
+                       retval = EXT2_ET_NO_MEMORY;
+                       goto cleanup;
+               }
+               ext2fs_set_feature_shared_blocks(fs->super);
+       }
+
        fs->flags &= ~EXT2_FLAG_NOFREE_ON_ERROR;
        *ret_fs = fs;