1 Index: linux-stage/fs/ext4/ext4.h
2 ===================================================================
3 --- linux-stage.orig/fs/ext4/ext4.h
4 +++ linux-stage/fs/ext4/ext4.h
5 @@ -1494,6 +1494,9 @@ struct ext4_sb_info {
6 unsigned int s_mb_min_to_scan;
7 unsigned int s_mb_stats;
8 unsigned int s_mb_order2_reqs;
9 + ext4_fsblk_t s_mb_c1_blocks;
10 + ext4_fsblk_t s_mb_c2_blocks;
11 + ext4_fsblk_t s_mb_c3_blocks;
12 unsigned long *s_mb_prealloc_table;
13 unsigned int s_mb_group_prealloc;
14 unsigned int s_max_dir_size_kb;
15 @@ -1510,6 +1513,9 @@ struct ext4_sb_info {
16 atomic_t s_bal_goals; /* goal hits */
17 atomic_t s_bal_breaks; /* too long searches */
18 atomic_t s_bal_2orders; /* 2^order hits */
19 + /* cX loop didn't find blocks */
20 + atomic64_t s_bal_cX_failed[3];
21 + atomic64_t s_bal_cX_skipped[3];
22 spinlock_t s_bal_lock;
23 unsigned long s_mb_buddies_generated;
24 unsigned long long s_mb_generation_time;
25 @@ -2723,6 +2729,9 @@ ext4_read_inode_bitmap(struct super_bloc
27 extern const struct file_operations ext4_seq_prealloc_table_fops;
28 extern const struct seq_operations ext4_mb_seq_groups_ops;
29 +extern const struct file_operations ext4_mb_seq_alloc_fops;
30 +extern int save_threshold_percent(struct ext4_sb_info *sbi, const char *buf,
31 + ext4_fsblk_t *blocks);
32 extern const struct file_operations ext4_seq_mb_last_group_fops;
33 extern int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v);
34 extern long ext4_mb_stats;
35 Index: linux-stage/fs/ext4/mballoc.c
36 ===================================================================
37 --- linux-stage.orig/fs/ext4/mballoc.c
38 +++ linux-stage/fs/ext4/mballoc.c
39 @@ -2114,6 +2114,20 @@ static int ext4_mb_good_group(struct ext
43 +static u64 available_blocks_count(struct ext4_sb_info *sbi)
45 + ext4_fsblk_t resv_blocks;
47 + struct ext4_super_block *es = sbi->s_es;
49 + resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
50 + bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
51 + percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
53 + bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
54 + return bfree - (ext4_r_blocks_count(es) + resv_blocks);
57 static noinline_for_stack int
58 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
60 @@ -2123,6 +2137,7 @@ ext4_mb_regular_allocator(struct ext4_al
61 struct ext4_sb_info *sbi;
62 struct super_block *sb;
63 struct ext4_buddy e4b;
64 + ext4_fsblk_t avail_blocks;
68 @@ -2175,6 +2190,21 @@ ext4_mb_regular_allocator(struct ext4_al
70 /* Let's just scan groups to find more-less suitable blocks */
71 cr = ac->ac_2order ? 0 : 1;
73 + /* Choose what loop to pass based on disk fullness */
74 + avail_blocks = available_blocks_count(sbi) ;
76 + if (avail_blocks < sbi->s_mb_c3_blocks) {
78 + atomic64_inc(&sbi->s_bal_cX_skipped[2]);
79 + } else if(avail_blocks < sbi->s_mb_c2_blocks) {
81 + atomic64_inc(&sbi->s_bal_cX_skipped[1]);
82 + } else if(avail_blocks < sbi->s_mb_c1_blocks) {
84 + atomic64_inc(&sbi->s_bal_cX_skipped[0]);
88 * cr == 0 try to get exact allocation,
89 * cr == 3 try to get anything
90 @@ -2240,6 +2270,9 @@ repeat:
91 if (ac->ac_status != AC_STATUS_CONTINUE)
94 + /* Processed all groups and haven't found blocks */
96 + atomic64_inc(&sbi->s_bal_cX_failed[cr]);
99 if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
100 @@ -2520,6 +2553,93 @@ const struct file_operations ext4_seq_mb
101 .write = ext4_mb_last_group_write,
104 +static int mb_seq_alloc_show(struct seq_file *seq, void *v)
106 + struct super_block *sb = seq->private;
107 + struct ext4_sb_info *sbi = EXT4_SB(sb);
109 + seq_printf(seq, "mballoc:\n");
110 + seq_printf(seq, "\tblocks: %u\n", atomic_read(&sbi->s_bal_allocated));
111 + seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
112 + seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
114 + seq_printf(seq, "\textents_scanned: %u\n",
115 + atomic_read(&sbi->s_bal_ex_scanned));
116 + seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
117 + seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
118 + seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
119 + seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
121 + seq_printf(seq, "\tuseless_c1_loops: %llu\n",
122 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[0]));
123 + seq_printf(seq, "\tuseless_c2_loops: %llu\n",
124 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[1]));
125 + seq_printf(seq, "\tuseless_c3_loops: %llu\n",
126 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[2]));
127 + seq_printf(seq, "\tskipped_c1_loops: %llu\n",
128 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[0]));
129 + seq_printf(seq, "\tskipped_c2_loops: %llu\n",
130 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[1]));
131 + seq_printf(seq, "\tskipped_c3_loops: %llu\n",
132 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[2]));
133 + seq_printf(seq, "\tbuddies_generated: %lu\n",
134 + sbi->s_mb_buddies_generated);
135 + seq_printf(seq, "\tbuddies_time_used: %llu\n", sbi->s_mb_generation_time);
136 + seq_printf(seq, "\tpreallocated: %u\n",
137 + atomic_read(&sbi->s_mb_preallocated));
138 + seq_printf(seq, "\tdiscarded: %u\n",
139 + atomic_read(&sbi->s_mb_discarded));
143 +static ssize_t mb_seq_alloc_write(struct file *file,
144 + const char __user *buf,
145 + size_t cnt, loff_t *pos)
147 + struct ext4_sb_info *sbi = EXT4_SB(PDE_DATA(file_inode(file)));
149 + atomic_set(&sbi->s_bal_allocated, 0),
150 + atomic_set(&sbi->s_bal_reqs, 0),
151 + atomic_set(&sbi->s_bal_success, 0);
153 + atomic_set(&sbi->s_bal_ex_scanned, 0),
154 + atomic_set(&sbi->s_bal_goals, 0),
155 + atomic_set(&sbi->s_bal_2orders, 0),
156 + atomic_set(&sbi->s_bal_breaks, 0),
157 + atomic_set(&sbi->s_mb_lost_chunks, 0);
159 + atomic64_set(&sbi->s_bal_cX_failed[0], 0),
160 + atomic64_set(&sbi->s_bal_cX_failed[1], 0),
161 + atomic64_set(&sbi->s_bal_cX_failed[2], 0);
163 + atomic64_set(&sbi->s_bal_cX_skipped[0], 0),
164 + atomic64_set(&sbi->s_bal_cX_skipped[1], 0),
165 + atomic64_set(&sbi->s_bal_cX_skipped[2], 0);
168 + sbi->s_mb_buddies_generated = 0;
169 + sbi->s_mb_generation_time = 0;
171 + atomic_set(&sbi->s_mb_preallocated, 0),
172 + atomic_set(&sbi->s_mb_discarded, 0);
177 +static int mb_seq_alloc_open(struct inode *inode, struct file *file)
179 + return single_open(file, mb_seq_alloc_show, PDE_DATA(inode));
182 +const struct file_operations ext4_mb_seq_alloc_fops = {
183 + .owner = THIS_MODULE,
184 + .open = mb_seq_alloc_open,
186 + .llseek = seq_lseek,
187 + .release = single_release,
188 + .write = mb_seq_alloc_write,
191 int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v)
193 struct ext4_sb_info *sbi = EXT4_SB(m->private);
194 @@ -2759,6 +2879,8 @@ static int ext4_groupinfo_create_slab(si
198 +#define THRESHOLD_BLOCKS(sbi, percent) \
199 + (ext4_blocks_count((sbi)->s_es) / 100 * (percent))
200 int ext4_mb_init(struct super_block *sb)
202 struct ext4_sb_info *sbi = EXT4_SB(sb);
203 @@ -2812,6 +2934,15 @@ int ext4_mb_init(struct super_block *sb)
204 sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
205 sbi->s_mb_stats = MB_DEFAULT_STATS;
206 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
207 + if (!sbi->s_mb_c1_blocks)
208 + sbi->s_mb_c1_blocks =
209 + THRESHOLD_BLOCKS(sbi, MB_DEFAULT_C1_THRESHOLD);
210 + if (!sbi->s_mb_c2_blocks)
211 + sbi->s_mb_c2_blocks =
212 + THRESHOLD_BLOCKS(sbi, MB_DEFAULT_C2_THRESHOLD);
213 + if (!sbi->s_mb_c3_blocks)
214 + sbi->s_mb_c3_blocks =
215 + THRESHOLD_BLOCKS(sbi, MB_DEFAULT_C3_THRESHOLD);
217 * The default group preallocation is 512, which for 4k block
218 * sizes translates to 2 megabytes. However for bigalloc file
219 @@ -2951,6 +3082,16 @@ int ext4_mb_release(struct super_block *
220 atomic_read(&sbi->s_bal_reqs),
221 atomic_read(&sbi->s_bal_success));
222 ext4_msg(sb, KERN_INFO,
223 + "mballoc: (%llu, %llu, %llu) useless c(0,1,2) loops",
224 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[0]),
225 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[1]),
226 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[2]));
227 + ext4_msg(sb, KERN_INFO,
228 + "mballoc: (%llu, %llu, %llu) skipped c(0,1,2) loops",
229 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[0]),
230 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[1]),
231 + (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[2]));
232 + ext4_msg(sb, KERN_INFO,
233 "mballoc: %u extents scanned, %u goal hits, "
234 "%u 2^N hits, %u breaks, %u lost",
235 atomic_read(&sbi->s_bal_ex_scanned),
236 Index: linux-stage/fs/ext4/mballoc.h
237 ===================================================================
238 --- linux-stage.orig/fs/ext4/mballoc.h
239 +++ linux-stage/fs/ext4/mballoc.h
240 @@ -72,6 +72,9 @@ do { \
241 * for which requests use 2^N search using buddies
243 #define MB_DEFAULT_ORDER2_REQS 8
244 +#define MB_DEFAULT_C1_THRESHOLD 25
245 +#define MB_DEFAULT_C2_THRESHOLD 15
246 +#define MB_DEFAULT_C3_THRESHOLD 5
249 * default group prealloc size 512 blocks
250 Index: linux-stage/fs/ext4/super.c
251 ===================================================================
252 --- linux-stage.orig/fs/ext4/super.c
253 +++ linux-stage/fs/ext4/super.c
254 @@ -1468,6 +1468,7 @@ enum {
255 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
256 Opt_inode_readahead_blks, Opt_journal_ioprio,
257 Opt_dioread_nolock, Opt_dioread_lock,
258 + Opt_mb_c1_threshold, Opt_mb_c2_threshold, Opt_mb_c3_threshold,
259 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
260 Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
262 @@ -1554,6 +1555,9 @@ static const match_table_t tokens = {
263 {Opt_init_itable, "init_itable"},
264 {Opt_noinit_itable, "noinit_itable"},
265 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
266 + {Opt_mb_c1_threshold, "mb_c1_threshold=%s"},
267 + {Opt_mb_c2_threshold, "mb_c2_threshold=%s"},
268 + {Opt_mb_c3_threshold, "mb_c3_threshold=%s"},
269 {Opt_test_dummy_encryption, "test_dummy_encryption"},
270 {Opt_nombcache, "nombcache"},
271 {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
272 @@ -1766,6 +1770,9 @@ static const struct mount_opts {
273 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
274 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
275 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
276 + {Opt_mb_c1_threshold, 0, MOPT_STRING},
277 + {Opt_mb_c2_threshold, 0, MOPT_STRING},
278 + {Opt_mb_c3_threshold, 0, MOPT_STRING},
279 {Opt_test_dummy_encryption, 0, MOPT_GTE0},
280 {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
282 @@ -1929,6 +1936,12 @@ static int handle_mount_opt(struct super
283 sbi->s_max_dir_size_kb = arg;
284 /* reset s_warning_dir_size and make it re-calculated */
285 sbi->s_warning_dir_size = 0;
286 + } else if (token == Opt_mb_c1_threshold) {
287 + save_threshold_percent(sbi, args[0].from, &sbi->s_mb_c1_blocks);
288 + } else if (token == Opt_mb_c2_threshold) {
289 + save_threshold_percent(sbi, args[0].from, &sbi->s_mb_c2_blocks);
290 + } else if (token == Opt_mb_c3_threshold) {
291 + save_threshold_percent(sbi, args[0].from, &sbi->s_mb_c3_blocks);
292 } else if (token == Opt_stripe) {
294 } else if (token == Opt_resuid) {
295 Index: linux-stage/fs/ext4/sysfs.c
296 ===================================================================
297 --- linux-stage.orig/fs/ext4/sysfs.c
298 +++ linux-stage/fs/ext4/sysfs.c
302 attr_delayed_allocation_blocks,
303 + attr_mb_c1_threshold,
304 + attr_mb_c2_threshold,
305 + attr_mb_c3_threshold,
306 attr_session_write_kbytes,
307 attr_lifetime_write_kbytes,
308 attr_reserved_clusters,
309 @@ -135,6 +138,32 @@ static ssize_t journal_task_show(struct
310 task_pid_vnr(sbi->s_journal->j_task));
313 +int save_threshold_percent(struct ext4_sb_info *sbi, const char *buf,
314 + ext4_fsblk_t *blocks)
316 + unsigned long long val;
320 + ret = kstrtoull(skip_spaces(buf), 0, &val);
321 + if (ret || val > 100)
324 + *blocks = val * ext4_blocks_count(sbi->s_es) / 100;
328 +#define THRESHOLD_PERCENT(sbi, blocks) \
329 + (((blocks) - 1) * 100 / ext4_blocks_count((sbi)->s_es) + 1)
330 +static ssize_t mb_threshold_store(struct ext4_sb_info *sbi,
331 + const char *buf, size_t count,
332 + ext4_fsblk_t *blocks)
334 + int ret = save_threshold_percent(sbi, buf, blocks);
336 + return ret ?: count;
339 #define EXT4_ATTR(_name,_mode,_id) \
340 static struct ext4_attr ext4_attr_##_name = { \
341 .attr = {.name = __stringify(_name), .mode = _mode }, \
342 @@ -178,6 +207,9 @@ EXT4_ATTR_FUNC(session_write_kbytes, 044
343 EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
344 EXT4_ATTR_FUNC(reserved_clusters, 0644);
345 EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);
346 +EXT4_ATTR_FUNC(mb_c1_threshold, 0644);
347 +EXT4_ATTR_FUNC(mb_c2_threshold, 0644);
348 +EXT4_ATTR_FUNC(mb_c3_threshold, 0644);
350 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
351 ext4_sb_info, s_inode_readahead_blks);
352 @@ -214,6 +246,9 @@ static struct attribute *ext4_attrs[] =
353 ATTR_LIST(lifetime_write_kbytes),
354 ATTR_LIST(reserved_clusters),
355 ATTR_LIST(sra_exceeded_retry_limit),
356 + ATTR_LIST(mb_c1_threshold),
357 + ATTR_LIST(mb_c2_threshold),
358 + ATTR_LIST(mb_c3_threshold),
359 ATTR_LIST(inode_readahead_blks),
360 ATTR_LIST(inode_goal),
361 ATTR_LIST(max_dir_size),
362 @@ -311,6 +346,15 @@ static ssize_t ext4_attr_show(struct kob
363 return snprintf(buf, PAGE_SIZE, "%llu\n",
365 percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
366 + case attr_mb_c1_threshold:
367 + return scnprintf(buf, PAGE_SIZE, "%llu\n",
368 + THRESHOLD_PERCENT(sbi, sbi->s_mb_c1_blocks));
369 + case attr_mb_c2_threshold:
370 + return scnprintf(buf, PAGE_SIZE, "%llu\n",
371 + THRESHOLD_PERCENT(sbi, sbi->s_mb_c2_blocks));
372 + case attr_mb_c3_threshold:
373 + return scnprintf(buf, PAGE_SIZE, "%llu\n",
374 + THRESHOLD_PERCENT(sbi, sbi->s_mb_c3_blocks));
375 case attr_session_write_kbytes:
376 return session_write_kbytes_show(sbi, buf);
377 case attr_lifetime_write_kbytes:
378 @@ -384,6 +428,12 @@ static ssize_t ext4_attr_store(struct ko
379 return inode_readahead_blks_store(sbi, buf, len);
380 case attr_trigger_test_error:
381 return trigger_test_error(sbi, buf, len);
382 + case attr_mb_c1_threshold:
383 + return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c1_blocks);
384 + case attr_mb_c2_threshold:
385 + return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c2_blocks);
386 + case attr_mb_c3_threshold:
387 + return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c3_blocks);
391 @@ -446,6 +496,8 @@ int ext4_register_sysfs(struct super_blo
392 &ext4_seq_mb_last_group_fops, sb);
393 proc_create_single_data("mb_last_start", S_IRUGO, sbi->s_proc,
394 ext4_mb_seq_last_start_seq_show, sb);
395 + proc_create_data("mb_alloc_stats", S_IFREG | S_IRUGO | S_IWUSR,
396 + sbi->s_proc, &ext4_mb_seq_alloc_fops, sb);