3 fs/ext4/mballoc.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 fs/ext4/mballoc.h | 3 +
5 fs/ext4/sysfs.c | 52 +++++++++++++++++++++
6 4 files changed, 195 insertions(+)
10 @@ -1536,6 +1536,9 @@ struct ext4_sb_info {
11 unsigned int s_mb_min_to_scan;
12 unsigned int s_mb_stats;
13 unsigned int s_mb_order2_reqs;
14 + ext4_fsblk_t s_mb_c1_blocks;
15 + ext4_fsblk_t s_mb_c2_blocks;
16 + ext4_fsblk_t s_mb_c3_blocks;
17 unsigned long *s_mb_prealloc_table;
18 unsigned int s_mb_group_prealloc;
19 unsigned int s_mb_max_inode_prealloc;
20 @@ -1555,6 +1558,9 @@ struct ext4_sb_info {
21 atomic_t s_bal_goals; /* goal hits */
22 atomic_t s_bal_breaks; /* too long searches */
23 atomic_t s_bal_2orders; /* 2^order hits */
24 + /* cX loop didn't find blocks */
25 + atomic64_t s_bal_cX_failed[3];
26 + atomic64_t s_bal_cX_skipped[3];
27 spinlock_t s_bal_lock;
28 unsigned long s_mb_buddies_generated;
29 unsigned long long s_mb_generation_time;
30 @@ -2846,6 +2852,7 @@ ext4_read_inode_bitmap(struct super_bloc
32 extern const struct proc_ops ext4_seq_prealloc_table_fops;
33 extern const struct seq_operations ext4_mb_seq_groups_ops;
34 +extern const struct proc_ops ext4_mb_seq_alloc_fops;
35 extern const struct proc_ops ext4_seq_mb_last_group_fops;
36 extern int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v);
37 extern long ext4_mb_stats;
38 --- a/fs/ext4/mballoc.c
39 +++ b/fs/ext4/mballoc.c
40 @@ -2308,6 +2308,20 @@ void ext4_mb_prefetch_fini(struct super_
44 +static u64 available_blocks_count(struct ext4_sb_info *sbi)
46 + ext4_fsblk_t resv_blocks;
48 + struct ext4_super_block *es = sbi->s_es;
50 + resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
51 + bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
52 + percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
54 + bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
55 + return bfree - (ext4_r_blocks_count(es) + resv_blocks);
58 static noinline_for_stack int
59 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
61 @@ -2318,6 +2332,7 @@ ext4_mb_regular_allocator(struct ext4_al
62 struct ext4_sb_info *sbi;
63 struct super_block *sb;
64 struct ext4_buddy e4b;
65 + ext4_fsblk_t avail_blocks;
69 @@ -2371,6 +2386,21 @@ ext4_mb_regular_allocator(struct ext4_al
71 /* Let's just scan groups to find more-less suitable blocks */
72 cr = ac->ac_2order ? 0 : 1;
74 + /* Choose what loop to pass based on disk fullness */
75 + avail_blocks = available_blocks_count(sbi) ;
77 + if (avail_blocks < sbi->s_mb_c3_blocks) {
79 + atomic64_inc(&sbi->s_bal_cX_skipped[2]);
80 + } else if(avail_blocks < sbi->s_mb_c2_blocks) {
82 + atomic64_inc(&sbi->s_bal_cX_skipped[1]);
83 + } else if(avail_blocks < sbi->s_mb_c1_blocks) {
85 + atomic64_inc(&sbi->s_bal_cX_skipped[0]);
89 * cr == 0 try to get exact allocation,
90 * cr == 3 try to get anything
91 @@ -2458,6 +2488,9 @@ repeat:
92 if (ac->ac_status != AC_STATUS_CONTINUE)
95 + /* Processed all groups and haven't found blocks */
97 + atomic64_inc(&sbi->s_bal_cX_failed[cr]);
100 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
101 @@ -2746,6 +2779,92 @@ const struct proc_ops ext4_seq_mb_last_g
102 .proc_write = ext4_mb_last_group_write,
105 +static int mb_seq_alloc_show(struct seq_file *seq, void *v)
107 + struct super_block *sb = seq->private;
108 + struct ext4_sb_info *sbi = EXT4_SB(sb);
110 + seq_printf(seq, "mballoc:\n");
111 + seq_printf(seq, "\tblocks: %u\n", atomic_read(&sbi->s_bal_allocated));
112 + seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
113 + seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
115 + seq_printf(seq, "\textents_scanned: %u\n",
116 + atomic_read(&sbi->s_bal_ex_scanned));
117 + seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
118 + seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
119 + seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
120 + seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
122 + seq_printf(seq, "\tuseless_c1_loops: %llu\n",
123 + atomic64_read(&sbi->s_bal_cX_failed[0]));
124 + seq_printf(seq, "\tuseless_c2_loops: %llu\n",
125 + atomic64_read(&sbi->s_bal_cX_failed[1]));
126 + seq_printf(seq, "\tuseless_c3_loops: %llu\n",
127 + atomic64_read(&sbi->s_bal_cX_failed[2]));
128 + seq_printf(seq, "\tskipped_c1_loops: %llu\n",
129 + atomic64_read(&sbi->s_bal_cX_skipped[0]));
130 + seq_printf(seq, "\tskipped_c2_loops: %llu\n",
131 + atomic64_read(&sbi->s_bal_cX_skipped[1]));
132 + seq_printf(seq, "\tskipped_c3_loops: %llu\n",
133 + atomic64_read(&sbi->s_bal_cX_skipped[2]));
134 + seq_printf(seq, "\tbuddies_generated: %lu\n",
135 + sbi->s_mb_buddies_generated);
136 + seq_printf(seq, "\tbuddies_time_used: %llu\n", sbi->s_mb_generation_time);
137 + seq_printf(seq, "\tpreallocated: %u\n",
138 + atomic_read(&sbi->s_mb_preallocated));
139 + seq_printf(seq, "\tdiscarded: %u\n",
140 + atomic_read(&sbi->s_mb_discarded));
144 +static ssize_t mb_seq_alloc_write(struct file *file,
145 + const char __user *buf,
146 + size_t cnt, loff_t *pos)
148 + struct ext4_sb_info *sbi = EXT4_SB(PDE_DATA(file_inode(file)));
150 + atomic_set(&sbi->s_bal_allocated, 0),
151 + atomic_set(&sbi->s_bal_reqs, 0),
152 + atomic_set(&sbi->s_bal_success, 0);
154 + atomic_set(&sbi->s_bal_ex_scanned, 0),
155 + atomic_set(&sbi->s_bal_goals, 0),
156 + atomic_set(&sbi->s_bal_2orders, 0),
157 + atomic_set(&sbi->s_bal_breaks, 0),
158 + atomic_set(&sbi->s_mb_lost_chunks, 0);
160 + atomic64_set(&sbi->s_bal_cX_failed[0], 0),
161 + atomic64_set(&sbi->s_bal_cX_failed[1], 0),
162 + atomic64_set(&sbi->s_bal_cX_failed[2], 0);
164 + atomic64_set(&sbi->s_bal_cX_skipped[0], 0),
165 + atomic64_set(&sbi->s_bal_cX_skipped[1], 0),
166 + atomic64_set(&sbi->s_bal_cX_skipped[2], 0);
169 + sbi->s_mb_buddies_generated = 0;
170 + sbi->s_mb_generation_time = 0;
172 + atomic_set(&sbi->s_mb_preallocated, 0),
173 + atomic_set(&sbi->s_mb_discarded, 0);
178 +static int mb_seq_alloc_open(struct inode *inode, struct file *file)
180 + return single_open(file, mb_seq_alloc_show, PDE_DATA(inode));
183 +const struct proc_ops ext4_mb_seq_alloc_fops = {
184 + .proc_open = mb_seq_alloc_open,
185 + .proc_read = seq_read,
186 + .proc_lseek = seq_lseek,
187 + .proc_release = single_release,
188 + .proc_write = mb_seq_alloc_write,
191 int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v)
193 struct ext4_sb_info *sbi = EXT4_SB(m->private);
194 @@ -2992,6 +3111,7 @@ static int ext4_groupinfo_create_slab(si
198 +#define THRESHOLD_BLOCKS(ts) (ext4_blocks_count(sbi->s_es) / 100 * ts)
199 int ext4_mb_init(struct super_block *sb)
201 struct ext4_sb_info *sbi = EXT4_SB(sb);
202 @@ -3046,6 +3166,9 @@ int ext4_mb_init(struct super_block *sb)
203 sbi->s_mb_stats = MB_DEFAULT_STATS;
204 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
205 sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
206 + sbi->s_mb_c1_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C1_THRESHOLD);
207 + sbi->s_mb_c2_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C2_THRESHOLD);
208 + sbi->s_mb_c3_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C3_THRESHOLD);
210 * The default group preallocation is 512, which for 4k block
211 * sizes translates to 2 megabytes. However for bigalloc file
212 @@ -3185,6 +3308,16 @@ int ext4_mb_release(struct super_block *
213 atomic_read(&sbi->s_bal_reqs),
214 atomic_read(&sbi->s_bal_success));
215 ext4_msg(sb, KERN_INFO,
216 + "mballoc: (%llu, %llu, %llu) useless c(0,1,2) loops",
217 + atomic64_read(&sbi->s_bal_cX_failed[0]),
218 + atomic64_read(&sbi->s_bal_cX_failed[1]),
219 + atomic64_read(&sbi->s_bal_cX_failed[2]));
220 + ext4_msg(sb, KERN_INFO,
221 + "mballoc: (%llu, %llu, %llu) skipped c(0,1,2) loops",
222 + atomic64_read(&sbi->s_bal_cX_skipped[0]),
223 + atomic64_read(&sbi->s_bal_cX_skipped[1]),
224 + atomic64_read(&sbi->s_bal_cX_skipped[2]));
225 + ext4_msg(sb, KERN_INFO,
226 "mballoc: %u extents scanned, %u goal hits, "
227 "%u 2^N hits, %u breaks, %u lost",
228 atomic_read(&sbi->s_bal_ex_scanned),
229 --- a/fs/ext4/mballoc.h
230 +++ b/fs/ext4/mballoc.h
232 * for which requests use 2^N search using buddies
234 #define MB_DEFAULT_ORDER2_REQS 8
235 +#define MB_DEFAULT_C1_THRESHOLD 25
236 +#define MB_DEFAULT_C2_THRESHOLD 15
237 +#define MB_DEFAULT_C3_THRESHOLD 5
240 * default group prealloc size 512 blocks
241 --- a/fs/ext4/sysfs.c
242 +++ b/fs/ext4/sysfs.c
246 attr_delayed_allocation_blocks,
247 + attr_mb_c1_threshold,
248 + attr_mb_c2_threshold,
249 + attr_mb_c3_threshold,
250 attr_session_write_kbytes,
251 attr_lifetime_write_kbytes,
252 attr_reserved_clusters,
253 @@ -140,6 +143,32 @@ static ssize_t journal_task_show(struct
254 task_pid_vnr(sbi->s_journal->j_task));
257 +#define THRESHOLD_PERCENT(ts) (ts * 100 / ext4_blocks_count(sbi->s_es))
259 +static int save_threshold_percent(struct ext4_sb_info *sbi, const char *buf,
260 + ext4_fsblk_t *blocks)
262 + unsigned long long val;
266 + ret = kstrtoull(skip_spaces(buf), 0, &val);
267 + if (ret || val > 100)
270 + *blocks = val * ext4_blocks_count(sbi->s_es) / 100;
274 +static ssize_t mb_threshold_store(struct ext4_sb_info *sbi,
275 + const char *buf, size_t count,
276 + ext4_fsblk_t *blocks)
278 + int ret = save_threshold_percent(sbi, buf, blocks);
280 + return ret ?: count;
283 #define EXT4_ATTR(_name,_mode,_id) \
284 static struct ext4_attr ext4_attr_##_name = { \
285 .attr = {.name = __stringify(_name), .mode = _mode }, \
286 @@ -208,6 +237,9 @@ EXT4_ATTR_FUNC(delayed_allocation_blocks
287 EXT4_ATTR_FUNC(session_write_kbytes, 0444);
288 EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
289 EXT4_ATTR_FUNC(reserved_clusters, 0644);
290 +EXT4_ATTR_FUNC(mb_c1_threshold, 0644);
291 +EXT4_ATTR_FUNC(mb_c2_threshold, 0644);
292 +EXT4_ATTR_FUNC(mb_c3_threshold, 0644);
294 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
295 ext4_sb_info, s_inode_readahead_blks);
296 @@ -261,6 +293,9 @@ static struct attribute *ext4_attrs[] =
297 ATTR_LIST(session_write_kbytes),
298 ATTR_LIST(lifetime_write_kbytes),
299 ATTR_LIST(reserved_clusters),
300 + ATTR_LIST(mb_c1_threshold),
301 + ATTR_LIST(mb_c2_threshold),
302 + ATTR_LIST(mb_c3_threshold),
303 ATTR_LIST(inode_readahead_blks),
304 ATTR_LIST(inode_goal),
305 ATTR_LIST(max_dir_size),
306 @@ -378,6 +413,15 @@ static ssize_t ext4_attr_show(struct kob
307 return snprintf(buf, PAGE_SIZE, "%llu\n",
309 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
310 + case attr_mb_c1_threshold:
311 + return scnprintf(buf, PAGE_SIZE, "%llu\n",
312 + THRESHOLD_PERCENT(sbi->s_mb_c1_blocks));
313 + case attr_mb_c2_threshold:
314 + return scnprintf(buf, PAGE_SIZE, "%llu\n",
315 + THRESHOLD_PERCENT(sbi->s_mb_c2_blocks));
316 + case attr_mb_c3_threshold:
317 + return scnprintf(buf, PAGE_SIZE, "%llu\n",
318 + THRESHOLD_PERCENT(sbi->s_mb_c3_blocks));
319 case attr_session_write_kbytes:
320 return session_write_kbytes_show(sbi, buf);
321 case attr_lifetime_write_kbytes:
322 @@ -479,6 +523,12 @@ static ssize_t ext4_attr_store(struct ko
323 return inode_readahead_blks_store(sbi, buf, len);
324 case attr_trigger_test_error:
325 return trigger_test_error(sbi, buf, len);
326 + case attr_mb_c1_threshold:
327 + return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c1_blocks);
328 + case attr_mb_c2_threshold:
329 + return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c2_blocks);
330 + case attr_mb_c3_threshold:
331 + return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c3_blocks);
335 @@ -541,6 +591,8 @@ int ext4_register_sysfs(struct super_blo
336 &ext4_seq_mb_last_group_fops, sb);
337 proc_create_single_data("mb_last_start", S_IRUGO, sbi->s_proc,
338 ext4_mb_seq_last_start_seq_show, sb);
339 + proc_create_data("mb_alloc_stats", S_IFREG | S_IRUGO | S_IWUSR,
340 + sbi->s_proc, &ext4_mb_seq_alloc_fops, sb);