---
fs/ext4/ext4.h | 23 ++++++++++++++++++++++-
+ fs/ext4/ext4_jbd2.c | 1 +
fs/ext4/ialloc.c | 3 ++-
fs/ext4/inode.c | 15 +++++++++++++++
fs/ext4/namei.c | 9 ++++++---
fs/ext4/super.c | 10 ++--------
- 5 files changed, 47 insertions(+), 13 deletions(-)
+ 5 files changed, 48 insertions(+), 13 deletions(-)
---- a/fs/ext4/ext4.h
-+++ b/fs/ext4/ext4.h
-@@ -1759,6 +1759,8 @@ static inline bool ext4_verity_in_progre
+diff -ur a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+--- a/fs/ext4/ext4.h 2021-06-28 08:45:39.093954644 -0600
++++ b/fs/ext4/ext4.h 2021-06-28 08:46:06.913523572 -0600
+@@ -1764,6 +1764,8 @@
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
* Codes for operating systems
*/
-@@ -1990,7 +1992,21 @@ static inline bool ext4_has_unknown_ext#
+@@ -1995,7 +1997,21 @@
EXTN_FEATURE_FUNCS(2)
EXTN_FEATURE_FUNCS(3)
static inline bool ext4_has_compat_features(struct super_block *sb)
{
-@@ -3393,6 +3409,11 @@ struct ext4_extent;
+@@ -3399,6 +3415,11 @@
#define EXT_MAX_BLOCKS 0xffffffff
extern void ext4_ext_tree_init(handle_t *handle, struct inode *inode);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
---- a/fs/ext4/ialloc.c
-+++ b/fs/ext4/ialloc.c
-@@ -115,7 +115,7 @@ verified:
+diff -ur a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
+--- a/fs/ext4/ext4_jbd2.c 2021-06-28 08:45:38.905957595 -0600
++++ b/fs/ext4/ext4_jbd2.c 2021-06-28 08:49:18.306817373 -0600
+@@ -169,6 +169,7 @@
+ revoke_cred = max(0, revoke_cred - handle->h_revoke_credits);
+ return ext4_journal_extend(handle, extend_cred, revoke_cred);
+ }
++EXPORT_SYMBOL(__ext4_journal_ensure_credits);
+
+ static void ext4_journal_abort_handle(const char *caller, unsigned int line,
+ const char *err_fn,
+diff -ur a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
+--- a/fs/ext4/ialloc.c 2021-06-28 08:45:38.993956213 -0600
++++ b/fs/ext4/ialloc.c 2021-06-28 08:46:06.917523511 -0600
+@@ -115,7 +115,7 @@
*
* Return buffer_head of bitmap on success, or an ERR_PTR on error.
*/
ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc *desc;
-@@ -213,6 +213,7 @@ out:
+@@ -214,6 +214,7 @@
put_bh(bh);
return ERR_PTR(err);
}
/*
* NOTE! When we get the inode, we're the only people
---- a/fs/ext4/inode.c
-+++ b/fs/ext4/inode.c
-@@ -6065,3 +6065,18 @@ vm_fault_t ext4_filemap_fault(struct vm_
+diff -ur a/fs/ext4/inode.c b/fs/ext4/inode.c
+--- a/fs/ext4/inode.c 2021-06-28 08:45:39.069955021 -0600
++++ b/fs/ext4/inode.c 2021-06-28 08:46:06.921523449 -0600
+@@ -6088,3 +6088,18 @@
return ret;
}
+EXPORT_SYMBOL(__ext4_std_error);
+EXPORT_SYMBOL(ext4fs_dirhash);
+EXPORT_SYMBOL(ext4_get_inode_loc);
---- a/fs/ext4/namei.c
-+++ b/fs/ext4/namei.c
+diff -ur a/fs/ext4/namei.c b/fs/ext4/namei.c
+--- a/fs/ext4/namei.c 2021-06-28 08:45:39.093954644 -0600
++++ b/fs/ext4/namei.c 2021-06-28 08:46:06.921523449 -0600
@@ -50,7 +50,7 @@
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
struct inode *inode,
ext4_lblk_t *block)
{
-@@ -181,6 +181,7 @@ static struct buffer_head *__ext4_read_d
+@@ -181,6 +181,7 @@
}
return bh;
}
#ifndef assert
#define assert(test) J_ASSERT(test)
-@@ -2572,23 +2573,25 @@ EXPORT_SYMBOL(ext4_delete_entry);
+@@ -2584,23 +2585,25 @@
* for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set
* on regular files) and to avoid creating huge/slow non-HTREE directories.
*/
/*
---- a/fs/ext4/super.c
-+++ b/fs/ext4/super.c
-@@ -347,7 +347,7 @@ static void __save_error_info(struct sup
+diff -ur a/fs/ext4/super.c b/fs/ext4/super.c
+--- a/fs/ext4/super.c 2021-06-28 08:45:38.909957532 -0600
++++ b/fs/ext4/super.c 2021-06-28 08:46:06.921523449 -0600
+@@ -348,7 +348,7 @@
return;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_update_tstamp(es, s_last_error_time);
es->s_last_error_line = cpu_to_le32(line);
es->s_last_error_ino = cpu_to_le32(ino);
es->s_last_error_block = cpu_to_le64(block);
-@@ -408,7 +408,7 @@ static void __save_error_info(struct sup
+@@ -409,7 +409,7 @@
if (!es->s_first_error_time) {
es->s_first_error_time = es->s_last_error_time;
es->s_first_error_time_hi = es->s_last_error_time_hi;
sizeof(es->s_first_error_func));
es->s_first_error_line = cpu_to_le32(line);
es->s_first_error_ino = es->s_last_error_ino;
-@@ -6315,16 +6315,12 @@ static int __init ext4_init_fs(void)
+@@ -6382,16 +6382,12 @@
err = init_inodecache();
if (err)
goto out1;
destroy_inodecache();
out1:
ext4_exit_mballoc();
-@@ -6347,8 +6343,6 @@ out7:
+@@ -6414,8 +6410,6 @@
static void __exit ext4_exit_fs(void)
{
ext4_destroy_lazyinit_thread();
--- /dev/null
+diff -ur a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+--- a/fs/ext4/ext4.h 2021-06-28 08:57:13.741381853 -0600
++++ b/fs/ext4/ext4.h 2021-06-28 08:58:52.432392498 -0600
+@@ -1519,6 +1519,9 @@
+ unsigned int s_mb_min_to_scan;
+ unsigned int s_mb_stats;
+ unsigned int s_mb_order2_reqs;
++ ext4_fsblk_t s_mb_c1_blocks;
++ ext4_fsblk_t s_mb_c2_blocks;
++ ext4_fsblk_t s_mb_c3_blocks;
+ unsigned long *s_mb_prealloc_table;
+ unsigned int s_mb_group_prealloc;
+ unsigned int s_mb_max_inode_prealloc;
+@@ -1536,6 +1539,9 @@
+ atomic_t s_bal_goals; /* goal hits */
+ atomic_t s_bal_breaks; /* too long searches */
+ atomic_t s_bal_2orders; /* 2^order hits */
++ /* cX loop didn't find blocks */
++ atomic64_t s_bal_cX_failed[3];
++ atomic64_t s_bal_cX_skipped[3];
+ spinlock_t s_bal_lock;
+ unsigned long s_mb_buddies_generated;
+ unsigned long long s_mb_generation_time;
+@@ -2820,6 +2826,7 @@
+ /* mballoc.c */
+ extern const struct proc_ops ext4_seq_prealloc_table_fops;
+ extern const struct seq_operations ext4_mb_seq_groups_ops;
++extern const struct proc_ops ext4_mb_seq_alloc_fops;
+ extern const struct proc_ops ext4_seq_mb_last_group_fops;
+ extern int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v);
+ extern long ext4_mb_stats;
+diff -ur a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+--- a/fs/ext4/mballoc.c 2021-06-28 08:57:13.669382587 -0600
++++ b/fs/ext4/mballoc.c 2021-06-28 08:58:52.436392458 -0600
+@@ -2237,6 +2237,20 @@
+ return ret;
+ }
+
++static u64 available_blocks_count(struct ext4_sb_info *sbi)
++{
++ ext4_fsblk_t resv_blocks;
++ u64 bfree;
++ struct ext4_super_block *es = sbi->s_es;
++
++ resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
++ bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
++ percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
++
++ bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
++ return bfree - (ext4_r_blocks_count(es) + resv_blocks);
++}
++
+ static noinline_for_stack int
+ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+ {
+@@ -2246,6 +2260,7 @@
+ struct ext4_sb_info *sbi;
+ struct super_block *sb;
+ struct ext4_buddy e4b;
++ ext4_fsblk_t avail_blocks;
+
+ sb = ac->ac_sb;
+ sbi = EXT4_SB(sb);
+@@ -2298,6 +2313,21 @@
+
+ /* Let's just scan groups to find more-less suitable blocks */
+ cr = ac->ac_2order ? 0 : 1;
++
++ /* Choose what loop to pass based on disk fullness */
++ avail_blocks = available_blocks_count(sbi) ;
++
++ if (avail_blocks < sbi->s_mb_c3_blocks) {
++ cr = 3;
++ atomic64_inc(&sbi->s_bal_cX_skipped[2]);
++ } else if(avail_blocks < sbi->s_mb_c2_blocks) {
++ cr = 2;
++ atomic64_inc(&sbi->s_bal_cX_skipped[1]);
++ } else if(avail_blocks < sbi->s_mb_c1_blocks) {
++ cr = 1;
++ atomic64_inc(&sbi->s_bal_cX_skipped[0]);
++ }
++
+ /*
+ * cr == 0 try to get exact allocation,
+ * cr == 3 try to get anything
+@@ -2361,6 +2391,9 @@
+ if (ac->ac_status != AC_STATUS_CONTINUE)
+ break;
+ }
++ /* Processed all groups and haven't found blocks */
++ if (i == ngroups)
++ atomic64_inc(&sbi->s_bal_cX_failed[cr]);
+ }
+
+ if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
+@@ -2643,6 +2676,92 @@
+ .proc_write = ext4_mb_last_group_write,
+ };
+
++static int mb_seq_alloc_show(struct seq_file *seq, void *v)
++{
++ struct super_block *sb = seq->private;
++ struct ext4_sb_info *sbi = EXT4_SB(sb);
++
++ seq_printf(seq, "mballoc:\n");
++ seq_printf(seq, "\tblocks: %u\n", atomic_read(&sbi->s_bal_allocated));
++ seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
++ seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
++
++ seq_printf(seq, "\textents_scanned: %u\n",
++ atomic_read(&sbi->s_bal_ex_scanned));
++ seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
++ seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
++ seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
++ seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
++
++ seq_printf(seq, "\tuseless_c1_loops: %llu\n",
++ atomic64_read(&sbi->s_bal_cX_failed[0]));
++ seq_printf(seq, "\tuseless_c2_loops: %llu\n",
++ atomic64_read(&sbi->s_bal_cX_failed[1]));
++ seq_printf(seq, "\tuseless_c3_loops: %llu\n",
++ atomic64_read(&sbi->s_bal_cX_failed[2]));
++ seq_printf(seq, "\tskipped_c1_loops: %llu\n",
++ atomic64_read(&sbi->s_bal_cX_skipped[0]));
++ seq_printf(seq, "\tskipped_c2_loops: %llu\n",
++ atomic64_read(&sbi->s_bal_cX_skipped[1]));
++ seq_printf(seq, "\tskipped_c3_loops: %llu\n",
++ atomic64_read(&sbi->s_bal_cX_skipped[2]));
++ seq_printf(seq, "\tbuddies_generated: %lu\n",
++ sbi->s_mb_buddies_generated);
++ seq_printf(seq, "\tbuddies_time_used: %llu\n", sbi->s_mb_generation_time);
++ seq_printf(seq, "\tpreallocated: %u\n",
++ atomic_read(&sbi->s_mb_preallocated));
++ seq_printf(seq, "\tdiscarded: %u\n",
++ atomic_read(&sbi->s_mb_discarded));
++ return 0;
++}
++
++static ssize_t mb_seq_alloc_write(struct file *file,
++ const char __user *buf,
++ size_t cnt, loff_t *pos)
++{
++ struct ext4_sb_info *sbi = EXT4_SB(PDE_DATA(file_inode(file)));
++
++ atomic_set(&sbi->s_bal_allocated, 0),
++ atomic_set(&sbi->s_bal_reqs, 0),
++ atomic_set(&sbi->s_bal_success, 0);
++
++ atomic_set(&sbi->s_bal_ex_scanned, 0),
++ atomic_set(&sbi->s_bal_goals, 0),
++ atomic_set(&sbi->s_bal_2orders, 0),
++ atomic_set(&sbi->s_bal_breaks, 0),
++ atomic_set(&sbi->s_mb_lost_chunks, 0);
++
++ atomic64_set(&sbi->s_bal_cX_failed[0], 0),
++ atomic64_set(&sbi->s_bal_cX_failed[1], 0),
++ atomic64_set(&sbi->s_bal_cX_failed[2], 0);
++
++ atomic64_set(&sbi->s_bal_cX_skipped[0], 0),
++ atomic64_set(&sbi->s_bal_cX_skipped[1], 0),
++ atomic64_set(&sbi->s_bal_cX_skipped[2], 0);
++
++
++ sbi->s_mb_buddies_generated = 0;
++ sbi->s_mb_generation_time = 0;
++
++ atomic_set(&sbi->s_mb_preallocated, 0),
++ atomic_set(&sbi->s_mb_discarded, 0);
++
++ return cnt;
++}
++
++static int mb_seq_alloc_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, mb_seq_alloc_show, PDE_DATA(inode));
++}
++
++const struct proc_ops ext4_mb_seq_alloc_fops = {
++ .proc_open = mb_seq_alloc_open,
++ .proc_read = seq_read,
++ .proc_lseek = seq_lseek,
++ .proc_release = single_release,
++ .proc_write = mb_seq_alloc_write,
++};
++
+ int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(m->private);
+@@ -2869,6 +2988,7 @@
+ return 0;
+ }
+
++#define THRESHOLD_BLOCKS(ts) (ext4_blocks_count(sbi->s_es) / 100 * ts)
+ int ext4_mb_init(struct super_block *sb)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+@@ -2923,6 +3043,9 @@
+ sbi->s_mb_stats = MB_DEFAULT_STATS;
+ sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
+ sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
++ sbi->s_mb_c1_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C1_THRESHOLD);
++ sbi->s_mb_c2_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C2_THRESHOLD);
++ sbi->s_mb_c3_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C3_THRESHOLD);
+ /*
+ * The default group preallocation is 512, which for 4k block
+ * sizes translates to 2 megabytes. However for bigalloc file
+@@ -3062,6 +3185,16 @@
+ atomic_read(&sbi->s_bal_reqs),
+ atomic_read(&sbi->s_bal_success));
+ ext4_msg(sb, KERN_INFO,
++ "mballoc: (%llu, %llu, %llu) useless c(0,1,2) loops",
++ atomic64_read(&sbi->s_bal_cX_failed[0]),
++ atomic64_read(&sbi->s_bal_cX_failed[1]),
++ atomic64_read(&sbi->s_bal_cX_failed[2]));
++ ext4_msg(sb, KERN_INFO,
++ "mballoc: (%llu, %llu, %llu) skipped c(0,1,2) loops",
++ atomic64_read(&sbi->s_bal_cX_skipped[0]),
++ atomic64_read(&sbi->s_bal_cX_skipped[1]),
++ atomic64_read(&sbi->s_bal_cX_skipped[2]));
++ ext4_msg(sb, KERN_INFO,
+ "mballoc: %u extents scanned, %u goal hits, "
+ "%u 2^N hits, %u breaks, %u lost",
+ atomic_read(&sbi->s_bal_ex_scanned),
+diff -ur a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
+--- a/fs/ext4/mballoc.h 2021-06-28 08:57:13.337385973 -0600
++++ b/fs/ext4/mballoc.h 2021-06-28 08:58:52.436392458 -0600
+@@ -68,6 +68,9 @@
+ * for which requests use 2^N search using buddies
+ */
+ #define MB_DEFAULT_ORDER2_REQS 8
++#define MB_DEFAULT_C1_THRESHOLD 25
++#define MB_DEFAULT_C2_THRESHOLD 15
++#define MB_DEFAULT_C3_THRESHOLD 5
+
+ /*
+ * default group prealloc size 512 blocks
+diff -ur a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
+--- a/fs/ext4/sysfs.c 2021-06-28 08:57:13.669382587 -0600
++++ b/fs/ext4/sysfs.c 2021-06-28 09:00:20.199538001 -0600
+@@ -21,6 +21,9 @@
+ typedef enum {
+ attr_noop,
+ attr_delayed_allocation_blocks,
++ attr_mb_c1_threshold,
++ attr_mb_c2_threshold,
++ attr_mb_c3_threshold,
+ attr_session_write_kbytes,
+ attr_lifetime_write_kbytes,
+ attr_reserved_clusters,
+@@ -140,6 +143,32 @@
+ task_pid_vnr(sbi->s_journal->j_task));
+ }
+
++#define THRESHOLD_PERCENT(ts) (ts * 100 / ext4_blocks_count(sbi->s_es))
++
++static int save_threshold_percent(struct ext4_sb_info *sbi, const char *buf,
++ ext4_fsblk_t *blocks)
++{
++ unsigned long long val;
++
++ int ret;
++
++ ret = kstrtoull(skip_spaces(buf), 0, &val);
++ if (ret || val > 100)
++ return -EINVAL;
++
++ *blocks = val * ext4_blocks_count(sbi->s_es) / 100;
++ return 0;
++}
++
++static ssize_t mb_threshold_store(struct ext4_sb_info *sbi,
++ const char *buf, size_t count,
++ ext4_fsblk_t *blocks)
++{
++ int ret = save_threshold_percent(sbi, buf, blocks);
++
++ return ret ?: count;
++}
++
+ #define EXT4_ATTR(_name,_mode,_id) \
+ static struct ext4_attr ext4_attr_##_name = { \
+ .attr = {.name = __stringify(_name), .mode = _mode }, \
+@@ -202,6 +231,9 @@
+ #define ATTR_LIST(name) &ext4_attr_##name.attr
+
+ EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444);
++EXT4_ATTR_FUNC(mb_c1_threshold, 0644);
++EXT4_ATTR_FUNC(mb_c2_threshold, 0644);
++EXT4_ATTR_FUNC(mb_c3_threshold, 0644);
+ EXT4_ATTR_FUNC(session_write_kbytes, 0444);
+ EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
+ EXT4_ATTR_FUNC(reserved_clusters, 0644);
+@@ -251,6 +283,9 @@
+
+ static struct attribute *ext4_attrs[] = {
+ ATTR_LIST(delayed_allocation_blocks),
++ ATTR_LIST(mb_c1_threshold),
++ ATTR_LIST(mb_c2_threshold),
++ ATTR_LIST(mb_c3_threshold),
+ ATTR_LIST(session_write_kbytes),
+ ATTR_LIST(lifetime_write_kbytes),
+ ATTR_LIST(reserved_clusters),
+@@ -367,6 +402,15 @@
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (s64) EXT4_C2B(sbi,
+ percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
++ case attr_mb_c1_threshold:
++ return scnprintf(buf, PAGE_SIZE, "%llu\n",
++ THRESHOLD_PERCENT(sbi->s_mb_c1_blocks));
++ case attr_mb_c2_threshold:
++ return scnprintf(buf, PAGE_SIZE, "%llu\n",
++ THRESHOLD_PERCENT(sbi->s_mb_c2_blocks));
++ case attr_mb_c3_threshold:
++ return scnprintf(buf, PAGE_SIZE, "%llu\n",
++ THRESHOLD_PERCENT(sbi->s_mb_c3_blocks));
+ case attr_session_write_kbytes:
+ return session_write_kbytes_show(sbi, buf);
+ case attr_lifetime_write_kbytes:
+@@ -468,6 +512,12 @@
+ return inode_readahead_blks_store(sbi, buf, len);
+ case attr_trigger_test_error:
+ return trigger_test_error(sbi, buf, len);
++ case attr_mb_c1_threshold:
++ return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c1_blocks);
++ case attr_mb_c2_threshold:
++ return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c2_blocks);
++ case attr_mb_c3_threshold:
++ return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c3_blocks);
+ }
+ return 0;
+ }
+@@ -530,6 +580,8 @@
+ &ext4_seq_mb_last_group_fops, sb);
+ proc_create_single_data("mb_last_start", S_IRUGO, sbi->s_proc,
+ ext4_mb_seq_last_start_seq_show, sb);
++ proc_create_data("mb_alloc_stats", S_IFREG | S_IRUGO | S_IWUSR,
++ sbi->s_proc, &ext4_mb_seq_alloc_fops, sb);
+ }
+ return 0;
+ }