1 From 3d56b8d2c74cc3f375ce332b3ac3519e009d79ee Mon Sep 17 00:00:00 2001
2 From: Tao Ma <boyu.mt@taobao.com>
3 Date: Mon, 11 Jul 2011 00:03:38 -0400
4 Subject: ext4: Speed up FITRIM by recording flags in ext4_group_info
6 Patch-mainline: v3.1-rc1
8 In ext4, when FITRIM is called every time, we iterate all the
9 groups and do trim one by one. It is a bit time wasting if the
10 group has been trimmed and there is no change since the last
13 So this patch adds a new flag in ext4_group_info->bb_state to
14 indicate that the group has been trimmed, and it will be cleared
15 if some blocks is freed(in release_blocks_on_commit). Another
16 trim_minlen is added in ext4_sb_info to record the last minlen
17 we use to trim the volume, so that if the caller provide a small
18 one, we will go on the trim regardless of the bb_state.
20 A simple test with my intel x25m ssd:
22 /dev/sdb1 40G 21G 17G 56% /mnt/ext4
25 run the FITRIM with the following parameter:
27 range.len = UINT64_MAX;
28 range.minlen = 1048576;
31 [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
35 [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
39 [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
45 [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
49 [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
53 [root@boyu-tm linux-2.6]# time ./ftrim /mnt/ext4/a
58 A big improvement for the 2nd and 3rd run.
60 Even after I delete some big image files, it is still much
61 faster than iterating the whole disk.
63 [root@boyu-tm test]# time ./ftrim /mnt/ext4/a
68 Upstream-Cc: Lukas Czerner <lczerner@redhat.com>
69 Upstream-Reviewed-by: Andreas Dilger <adilger.kernel@dilger.ca>
70 Upstream-Signed-off-by: Tao Ma <boyu.mt@taobao.com>
71 Upstream-Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
72 Signed-off-by: Jeff Mahoney <jeffm@suse.com>
74 fs/ext4/ext4.h | 13 ++++++++++++-
75 fs/ext4/mballoc.c | 20 ++++++++++++++++++++
76 2 files changed, 32 insertions(+), 1 deletion(-)
80 @@ -1215,6 +1215,9 @@ struct ext4_sb_info {
82 /* Kernel thread for multiple mount protection */
83 struct task_struct *s_mmp_tsk;
85 + /* record the last minlen when FITRIM is called. */
86 + atomic_t s_last_trim_minblks;
89 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
90 @@ -2071,11 +2074,19 @@ struct ext4_group_info {
91 * 5 free 8-block regions. */
94 -#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
95 +#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
96 +#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
98 #define EXT4_MB_GRP_NEED_INIT(grp) \
99 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
101 +#define EXT4_MB_GRP_WAS_TRIMMED(grp) \
102 + (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
103 +#define EXT4_MB_GRP_SET_TRIMMED(grp) \
104 + (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
105 +#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \
106 + (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
108 #define EXT4_MAX_CONTENTION 8
109 #define EXT4_CONTENTION_THRESHOLD 2
111 --- a/fs/ext4/mballoc.c
112 +++ b/fs/ext4/mballoc.c
113 @@ -2629,6 +2629,15 @@ static void release_blocks_on_commit(jou
114 rb_erase(&entry->node, &(db->bb_free_root));
115 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
118 + * Clear the trimmed flag for the group so that the next
119 + * ext4_trim_fs can trim it.
120 + * If the volume is mounted with -o discard, online discard
121 + * is supported and the free blocks will be trimmed online.
123 + if (!test_opt(sb, DISCARD))
124 + EXT4_MB_GRP_CLEAR_TRIMMED(db);
126 if (!db->bb_free_root.rb_node) {
127 /* No more items in the per group rb tree
128 * balance refcounts from ext4_mb_free_metadata()
129 @@ -4838,6 +4847,10 @@ ext4_trim_all_free(struct super_block *s
130 bitmap = e4b.bd_bitmap;
132 ext4_lock_group(sb, group);
133 + if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
134 + minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
137 start = (e4b.bd_info->bb_first_free > start) ?
138 e4b.bd_info->bb_first_free : start;
140 @@ -4868,6 +4881,10 @@ ext4_trim_all_free(struct super_block *s
141 if ((e4b.bd_info->bb_free - count) < minblocks)
146 + EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
148 ext4_unlock_group(sb, group);
149 ext4_mb_unload_buddy(&e4b);
151 @@ -4954,5 +4971,8 @@ int ext4_trim_fs(struct super_block *sb,
153 range->len = trimmed * sb->s_blocksize;
156 + atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);