])
], [test x$UBUNTU_KERNEL = xyes], [
BASEVER=$(echo $LINUXRELEASE | cut -d'-' -f1)
+ AS_VERSION_COMPARE([$BASEVER],[5.19.0],[
AS_VERSION_COMPARE([$BASEVER],[5.15.0],[
AS_VERSION_COMPARE([$BASEVER],[5.11.0],[
AS_VERSION_COMPARE([$BASEVER],[5.8.0],[
[LDISKFS_SERIES="5.11.0-40-ubuntu20.series"],
[LDISKFS_SERIES="5.11.0-40-ubuntu20.series"])],
[LDISKFS_SERIES="5.15.0-83-ubuntu20.series"],
- [LDISKFS_SERIES="5.15.0-83-ubuntu20.series"])
+ [LDISKFS_SERIES="5.15.0-83-ubuntu20.series"])],
+ [LDISKFS_SERIES="5.19.0-35-ubuntu.series"],
+ [LDISKFS_SERIES="5.19.0-35-ubuntu.series"])
], [test x$OPENEULER_KERNEL = xyes], [
case $OPENEULER_VERSION_NO in
2203.0) LDISKFS_SERIES="5.10.0-oe2203.series" ;;
--- /dev/null
+commit f2f28f1d09c0a00b3fc569422f881931d857fac9
+Author: Alex Zhuravlev <alex.zhuravlev@sun.com>
+AuthorDate: Tue Oct 28 17:59:09 2008 +0000
+Subject: ext4: detect on-disk corruption of block bitmap
+Detect on-disk corruption of block bitmap and better checking of
+preallocated blocks.
+Bugzilla-ID: b=16680
+Signed-off-by: Alex Zhuravlev <alex.zhuravlev@sun.com>
+Reviewed-by: Kalpak Shah <kalpak.shah@sun.com>
+Signed-off-by: Andreas Dilger <andreas.dilger@sun.com>
+---
+ fs/ext4/ext4.h | 1 +
+ fs/ext4/mballoc.c | 103 ++++++++++++++++++++++++++++++++++++++++------
+ fs/ext4/mballoc.h | 2 +-
+ 3 files changed, 93 insertions(+), 13 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index da13a1c..e84d4a7 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -3445,6 +3445,7 @@ struct ext4_group_info {
+ ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
+ ext4_group_t bb_group; /* Group number */
+ struct list_head bb_prealloc_list;
++ unsigned long bb_prealloc_nr;
+ #ifdef DOUBLE_CHECK
+ void *bb_bitmap;
+ #endif
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index e3bae5b..ae046a0 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -402,7 +402,7 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
+ "ext4_groupinfo_64k", "ext4_groupinfo_128k"
+ };
+
+-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
++static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ ext4_group_t group);
+ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+ ext4_group_t group);
+@@ -1059,7 +1059,7 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
+ }
+
+ static noinline_for_stack
+-void ext4_mb_generate_buddy(struct super_block *sb,
++int ext4_mb_generate_buddy(struct super_block *sb,
+ void *buddy, void *bitmap, ext4_group_t group)
+ {
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+@@ -1103,6 +1103,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
+ grp->bb_free = free;
+ ext4_mark_group_bitmap_corrupted(sb, group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
++ return -EIO;
+ }
+ mb_set_largest_free_order(sb, grp);
+ mb_update_avg_fragment_size(sb, grp);
+@@ -1112,6 +1113,8 @@ void ext4_mb_generate_buddy(struct super_block *sb,
+ period = get_cycles() - period;
+ atomic_inc(&sbi->s_mb_buddies_generated);
+ atomic64_add(period, &sbi->s_mb_generation_time);
++
++ return 0;
+ }
+
+ /* The buddy information is attached the buddy cache inode
+@@ -1214,7 +1217,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+ }
+
+ first_block = page->index * blocks_per_page;
+- for (i = 0; i < blocks_per_page; i++) {
++ for (i = 0; i < blocks_per_page && err == 0; i++) {
+ group = (first_block + i) >> 1;
+ if (group >= ngroups)
+ break;
+@@ -1258,7 +1261,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+ ext4_lock_group(sb, group);
+ /* init the buddy */
+ memset(data, 0xff, blocksize);
+- ext4_mb_generate_buddy(sb, data, incore, group);
++ err = ext4_mb_generate_buddy(sb, data, incore, group);
+ ext4_unlock_group(sb, group);
+ incore = NULL;
+ } else {
+@@ -1273,7 +1276,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+ memcpy(data, bitmap, blocksize);
+
+ /* mark all preallocated blks used in in-core bitmap */
+- ext4_mb_generate_from_pa(sb, data, group);
++ err = ext4_mb_generate_from_pa(sb, data, group);
+ ext4_mb_generate_from_freelist(sb, data, group);
+ ext4_unlock_group(sb, group);
+
+@@ -1283,7 +1286,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+ incore = data;
+ }
+ }
+- SetPageUptodate(page);
++ if (likely(err == 0))
++ SetPageUptodate(page);
+
+ out:
+ if (bh) {
+@@ -2815,9 +2819,11 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
+ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
+ {
+ struct super_block *sb = pde_data(file_inode(seq->file));
++ struct ext4_group_desc *gdp;
+ ext4_group_t group = (ext4_group_t) ((unsigned long) v);
+ int i;
+ int err, buddy_loaded = 0;
++ int free = 0;
+ struct ext4_buddy e4b;
+ struct ext4_group_info *grinfo;
+ unsigned char blocksize_bits = min_t(unsigned char,
+@@ -2830,7 +2836,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
+
+ group--;
+ if (group == 0)
+- seq_puts(seq, "#group: free frags first ["
++ seq_puts(seq, "#group: bfree gfree frags first pa ["
+ " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
+ " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
+
+@@ -2848,13 +2854,19 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
+ buddy_loaded = 1;
+ }
+
++ gdp = ext4_get_group_desc(sb, group, NULL);
++ if (gdp != NULL)
++ free = ext4_free_group_clusters(sb, gdp);
++
+ memcpy(&sg, ext4_get_group_info(sb, group), i);
+
+ if (buddy_loaded)
+ ext4_mb_unload_buddy(&e4b);
+
+- seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
+- sg.info.bb_fragments, sg.info.bb_first_free);
++ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
++ (long unsigned int)group, sg.info.bb_free, free,
++ sg.info.bb_fragments, sg.info.bb_first_free,
++ sg.info.bb_prealloc_nr);
+ for (i = 0; i <= 13; i++)
+ seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
+ sg.info.bb_counters[i] : 0);
+@@ -4571,23 +4583,72 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
+ return;
+ }
+
++/*
++ * check free blocks in bitmap match free block in group descriptor
++ * do this before taking preallocated blocks into account to be able
++ * to detect on-disk corruptions. The group lock should be hold by the
++ * caller.
++ */
++int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
++ struct ext4_group_desc *gdp, int group)
++{
++ unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
++ unsigned short i, first, free = 0;
++ unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
++
++ if (free_in_gdp == 0 && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
++ return 0;
++
++ i = mb_find_next_zero_bit(bitmap, max, 0);
++
++ while (i < max) {
++ first = i;
++ i = mb_find_next_bit(bitmap, max, i);
++ if (i > max)
++ i = max;
++ free += i - first;
++ if (i < max)
++ i = mb_find_next_zero_bit(bitmap, max, i);
++ }
++
++ if (free != free_in_gdp) {
++ ext4_error(sb, "on-disk bitmap for group %d"
++ "corrupted: %u blocks free in bitmap, %u - in gd\n",
++ group, free, free_in_gdp);
++ return -EIO;
++ }
++ return 0;
++}
++
+ /*
+ * the function goes through all preallocation in this group and marks them
+ * used in in-core bitmap. buddy must be generated from this bitmap
+ * Need to be called with ext4 group lock held
+ */
+ static noinline_for_stack
+-void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
++int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ ext4_group_t group)
+ {
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_prealloc_space *pa;
++ struct ext4_group_desc *gdp;
+ struct list_head *cur;
+ ext4_group_t groupnr;
+ ext4_grpblk_t start;
+ int preallocated = 0;
++ int skip = 0, count = 0;
++ int err;
+ int len;
+
++ gdp = ext4_get_group_desc(sb, group, NULL);
++ if (gdp == NULL)
++ return -EIO;
++
++ /* before applying preallocations, check bitmap consistency */
++ err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
++ if (err)
++ return err;
++
+ /* all form of preallocation discards first load group,
+ * so the only competing code is preallocation use.
+ * we don't need any locking here
+@@ -4603,13 +4664,23 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ &groupnr, &start);
+ len = pa->pa_len;
+ spin_unlock(&pa->pa_lock);
+- if (unlikely(len == 0))
++ if (unlikely(len == 0)) {
++ skip++;
+ continue;
++ }
+ BUG_ON(groupnr != group);
+ mb_set_bits(bitmap, start, len);
+ preallocated += len;
++ count++;
++ }
++ if (count + skip != grp->bb_prealloc_nr) {
++ ext4_error(sb, "lost preallocations: "
++ "count %d, bb_prealloc_nr %lu, skip %d\n",
++ count, grp->bb_prealloc_nr, skip);
++ return -EIO;
+ }
+ mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
++ return 0;
+ }
+
+ static void ext4_mb_mark_pa_deleted(struct super_block *sb,
+@@ -4693,6 +4764,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
+ */
+ ext4_lock_group(sb, grp);
+ list_del(&pa->pa_group_list);
++ ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
+ ext4_unlock_group(sb, grp);
+
+ spin_lock(pa->pa_obj_lock);
+@@ -4784,6 +4856,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+ pa->pa_inode = ac->ac_inode;
+
+ list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
++ grp->bb_prealloc_nr++;
+
+ spin_lock(pa->pa_obj_lock);
+ list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
+@@ -4839,6 +4912,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+ pa->pa_inode = NULL;
+
+ list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
++ grp->bb_prealloc_nr++;
+
+ /*
+ * We will later add the new pa to the right bucket
+@@ -5003,6 +5077,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
+
+ spin_unlock(&pa->pa_lock);
+
++ BUG_ON(grp->bb_prealloc_nr == 0);
++ grp->bb_prealloc_nr--;
+ list_del(&pa->pa_group_list);
+ list_add(&pa->u.pa_tmp_list, &list);
+ }
+@@ -5130,7 +5206,7 @@ repeat:
+ if (err) {
+ ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
+ err, group);
+- continue;
++ return;
+ }
+
+ bitmap_bh = ext4_read_block_bitmap(sb, group);
+@@ -5143,6 +5219,8 @@ repeat:
+ }
+
+ ext4_lock_group(sb, group);
++ BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
++ e4b.bd_info->bb_prealloc_nr--;
+ list_del(&pa->pa_group_list);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+ ext4_unlock_group(sb, group);
+@@ -5439,6 +5517,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
+ }
+ ext4_lock_group(sb, group);
+ list_del(&pa->pa_group_list);
++ ext4_get_group_info(sb, group)->bb_prealloc_nr--;
+ ext4_mb_release_group_pa(&e4b, pa);
+ ext4_unlock_group(sb, group);
+
+diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
+index dcda2a9..c7b753b 100644
+--- a/fs/ext4/mballoc.h
++++ b/fs/ext4/mballoc.h
+@@ -66,7 +66,7 @@
+ /*
+ * for which requests use 2^N search using buddies
+ */
+-#define MB_DEFAULT_ORDER2_REQS 2
++#define MB_DEFAULT_ORDER2_REQS 8
+
+ /*
+ * default group prealloc size 512 blocks
+--
+2.34.1
+
--- /dev/null
+commit d8d8fd9192a54c7b8caef8cca9b7a1eb5e5e3298
+Author: Alex Zhuravlev <alex.zhuravlev@sun.com>
+AuthorDate: Thu Oct 23 10:02:19 2008 +0000
+Subject: ext4: support for tunable preallocation window
+Add support for tunable preallocation window and new tunables
+for large/small requests.
+Bugzilla-ID: b=12800
+Signed-off-by: Alex Zhuravlev <alex.zhuravlev@sun.com>
+Reviewed-by: Kalpak Shah <kalpak@clusterfs.com>
+Reviewed-by: Andreas Dilger <andreas.dilger@sun.com>
+---
+ fs/ext4/ext4.h | 7 +-
+ fs/ext4/inode.c | 3 +
+ fs/ext4/mballoc.c | 220 +++++++++++++++++++++++++++++++++++-----------
+ fs/ext4/sysfs.c | 8 +-
+ 4 files changed, 182 insertions(+), 56 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 499f3a3..cda3fbd 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1302,6 +1302,8 @@ extern void mb_set_bits(void *bm, int cur, int len);
+ #define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
+ #define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */
+
++#define EXT4_MAX_PREALLOC_TABLE 64
++
+ /*
+ * Behaviour when detecting errors
+ */
+@@ -1608,11 +1610,13 @@ struct ext4_sb_info {
+ /* tunables */
+ unsigned long s_stripe;
+ unsigned int s_mb_max_linear_groups;
+- unsigned int s_mb_stream_request;
++ unsigned long s_mb_small_req;
++ unsigned long s_mb_large_req;
+ unsigned int s_mb_max_to_scan;
+ unsigned int s_mb_min_to_scan;
+ unsigned int s_mb_stats;
+ unsigned int s_mb_order2_reqs;
++ unsigned long *s_mb_prealloc_table;
+ unsigned int s_mb_group_prealloc;
+ unsigned int s_mb_max_inode_prealloc;
+ unsigned int s_max_dir_size_kb;
+@@ -2903,6 +2907,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
+ int len, int replay);
+
+ /* mballoc.c */
++extern const struct proc_ops ext4_seq_prealloc_table_fops;
+ extern const struct seq_operations ext4_mb_seq_groups_ops;
+ extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
+ extern long ext4_mb_stats;
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 3aa591c..8a5ce9d 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2750,6 +2750,9 @@ static int ext4_writepages(struct address_space *mapping,
+ PAGE_SIZE >> inode->i_blkbits);
+ }
+
++ if (wbc->nr_to_write < sbi->s_mb_small_req)
++ wbc->nr_to_write = sbi->s_mb_small_req;
++
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 9dad930..987e1d7 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3016,6 +3016,99 @@ const struct seq_operations ext4_mb_seq_structs_summary_ops = {
+ .show = ext4_mb_seq_structs_summary_show,
+ };
+
++static int ext4_mb_check_and_update_prealloc(struct ext4_sb_info *sbi,
++ char *str, size_t cnt,
++ int update)
++{
++ unsigned long value;
++ unsigned long prev = 0;
++ char *cur;
++ char *next;
++ char *end;
++ int num = 0;
++
++ cur = str;
++ end = str + cnt;
++ while (cur < end) {
++ while ((cur < end) && (*cur == ' ')) cur++;
++ value = simple_strtol(cur, &next, 0);
++ if (value == 0)
++ break;
++ if (cur == next)
++ return -EINVAL;
++
++ cur = next;
++
++ if (value > (sbi->s_blocks_per_group - 1 - 1 - sbi->s_itb_per_group))
++ return -EINVAL;
++
++ /* they should add values in order */
++ if (value <= prev)
++ return -EINVAL;
++
++ if (update)
++ sbi->s_mb_prealloc_table[num] = value;
++
++ prev = value;
++ num++;
++ }
++
++ if (num > EXT4_MAX_PREALLOC_TABLE - 1)
++ return -EOVERFLOW;
++
++ if (update)
++ sbi->s_mb_prealloc_table[num] = 0;
++
++ return 0;
++}
++
++static ssize_t ext4_mb_prealloc_table_proc_write(struct file *file,
++ const char __user *buf,
++ size_t cnt, loff_t *pos)
++{
++ struct ext4_sb_info *sbi = EXT4_SB(pde_data(file_inode(file)));
++ char str[128];
++ int rc;
++
++ if (cnt >= sizeof(str))
++ return -EINVAL;
++ if (copy_from_user(str, buf, cnt))
++ return -EFAULT;
++
++ rc = ext4_mb_check_and_update_prealloc(sbi, str, cnt, 0);
++ if (rc)
++ return rc;
++
++ rc = ext4_mb_check_and_update_prealloc(sbi, str, cnt, 1);
++ return rc ? rc : cnt;
++}
++
++static int mb_prealloc_table_seq_show(struct seq_file *m, void *v)
++{
++ struct ext4_sb_info *sbi = EXT4_SB(m->private);
++ int i;
++
++ for (i = 0; i < EXT4_MAX_PREALLOC_TABLE &&
++ sbi->s_mb_prealloc_table[i] != 0; i++)
++ seq_printf(m, "%ld ", sbi->s_mb_prealloc_table[i]);
++ seq_printf(m, "\n");
++
++ return 0;
++}
++
++static int mb_prealloc_table_seq_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, mb_prealloc_table_seq_show, pde_data(inode));
++}
++
++const struct proc_ops ext4_seq_prealloc_table_fops = {
++ .proc_open = mb_prealloc_table_seq_open,
++ .proc_read = seq_read,
++ .proc_lseek = seq_lseek,
++ .proc_release = single_release,
++ .proc_write = ext4_mb_prealloc_table_proc_write,
++};
++
+ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
+ {
+ int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+@@ -3329,7 +3422,7 @@ static void ext4_discard_work(struct work_struct *work)
+ int ext4_mb_init(struct super_block *sb)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+- unsigned i, j;
++ unsigned i, j, k, l;
+ unsigned offset, offset_incr;
+ unsigned max;
+ int ret;
+@@ -3417,7 +3510,6 @@ int ext4_mb_init(struct super_block *sb)
+ sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
+ sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
+ sbi->s_mb_stats = MB_DEFAULT_STATS;
+- sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
+ sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
+ sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
+ /*
+@@ -3442,9 +3534,29 @@ int ext4_mb_init(struct super_block *sb)
+ * RAID stripe size so that preallocations don't fragment
+ * the stripes.
+ */
+- if (sbi->s_stripe > 1) {
+- sbi->s_mb_group_prealloc = roundup(
+- sbi->s_mb_group_prealloc, sbi->s_stripe);
++
++ /* Allocate table once */
++ sbi->s_mb_prealloc_table = kzalloc(
++ EXT4_MAX_PREALLOC_TABLE * sizeof(unsigned long), GFP_NOFS);
++ if (sbi->s_mb_prealloc_table == NULL) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ if (sbi->s_stripe == 0) {
++ for (k = 0, l = 4; k <= 9; ++k, l *= 2)
++ sbi->s_mb_prealloc_table[k] = l;
++
++ sbi->s_mb_small_req = 256;
++ sbi->s_mb_large_req = 1024;
++ sbi->s_mb_group_prealloc = 512;
++ } else {
++ for (k = 0, l = sbi->s_stripe; k <= 2; ++k, l *= 2)
++ sbi->s_mb_prealloc_table[k] = l;
++
++ sbi->s_mb_small_req = sbi->s_stripe;
++ sbi->s_mb_large_req = sbi->s_stripe * 8;
++ sbi->s_mb_group_prealloc = sbi->s_stripe * 4;
+ }
+
+ sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
+@@ -3480,6 +3592,7 @@ out:
+ kfree(sbi->s_mb_avg_fragment_size_locks);
+ kfree(sbi->s_mb_largest_free_orders);
+ kfree(sbi->s_mb_largest_free_orders_locks);
++ kfree(sbi->s_mb_prealloc_table);
+ kfree(sbi->s_mb_offsets);
+ sbi->s_mb_offsets = NULL;
+ kfree(sbi->s_mb_maxs);
+@@ -3750,7 +3863,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ int err, len;
+
+ BUG_ON(ac->ac_status != AC_STATUS_FOUND);
+- BUG_ON(ac->ac_b_ex.fe_len <= 0);
+
+ sb = ac->ac_sb;
+ sbi = EXT4_SB(sb);
+@@ -3993,13 +4105,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ struct ext4_allocation_request *ar)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+- int bsbits, max;
++ int bsbits, i, wind;
+ ext4_lblk_t end;
+- loff_t size, start_off;
++ loff_t size;
+ loff_t orig_size __maybe_unused;
+ ext4_lblk_t start;
+ struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+ struct ext4_prealloc_space *pa;
++ unsigned long value, last_non_zero;
+
+ /* do normalize only data requests, metadata requests
+ do not need preallocation */
+@@ -4028,51 +4141,46 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ size = size << bsbits;
+ if (size < i_size_read(ac->ac_inode))
+ size = i_size_read(ac->ac_inode);
+- orig_size = size;
++ size = (size + ac->ac_sb->s_blocksize - 1) >> bsbits;
++
++ start = wind = 0;
++ value = last_non_zero = 0;
++
++ /* let's choose preallocation window depending on file size */
++ for (i = 0; i < EXT4_MAX_PREALLOC_TABLE; i++) {
++ value = sbi->s_mb_prealloc_table[i];
++ if (value == 0)
++ break;
++ else
++ last_non_zero = value;
+
+- /* max size of free chunks */
+- max = 2 << bsbits;
+-
+-#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
+- (req <= (size) || max <= (chunk_size))
+-
+- /* first, try to predict filesize */
+- /* XXX: should this table be tunable? */
+- start_off = 0;
+- if (size <= 16 * 1024) {
+- size = 16 * 1024;
+- } else if (size <= 32 * 1024) {
+- size = 32 * 1024;
+- } else if (size <= 64 * 1024) {
+- size = 64 * 1024;
+- } else if (size <= 128 * 1024) {
+- size = 128 * 1024;
+- } else if (size <= 256 * 1024) {
+- size = 256 * 1024;
+- } else if (size <= 512 * 1024) {
+- size = 512 * 1024;
+- } else if (size <= 1024 * 1024) {
+- size = 1024 * 1024;
+- } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
+- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+- (21 - bsbits)) << 21;
+- size = 2 * 1024 * 1024;
+- } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
+- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+- (22 - bsbits)) << 22;
+- size = 4 * 1024 * 1024;
+- } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
+- (8<<20)>>bsbits, max, 8 * 1024)) {
+- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+- (23 - bsbits)) << 23;
+- size = 8 * 1024 * 1024;
++ if (size <= value) {
++ wind = value;
++ break;
++ }
++ }
++
++ if (wind == 0) {
++ if (last_non_zero != 0) {
++ __u64 tstart, tend;
++ /* file is quite large, we now preallocate with
++ * the biggest configured window with regart to
++ * logical offset */
++ wind = last_non_zero;
++ tstart = ac->ac_o_ex.fe_logical;
++ do_div(tstart, wind);
++ start = tstart * wind;
++ tend = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len - 1;
++ do_div(tend, wind);
++ tend = tend * wind + wind;
++ size = tend - start;
++ }
+ } else {
+- start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
+- size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
+- ac->ac_o_ex.fe_len) << bsbits;
++ size = wind;
+ }
+- size = size >> bsbits;
+- start = start_off >> bsbits;
++
++
++ orig_size = size;
+
+ /*
+ * For tiny groups (smaller than 8MB) the chosen allocation
+@@ -4178,7 +4286,6 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ (unsigned long) ac->ac_o_ex.fe_logical);
+ BUG();
+ }
+- BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+
+ /* now prepare goal request */
+
+@@ -5177,8 +5284,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
+ inode_pa_eligible = false;
+
+ size = max(size, isize);
+- /* Don't use group allocation for large files */
+- if (size > sbi->s_mb_stream_request)
++ if ((ac->ac_o_ex.fe_len >= sbi->s_mb_small_req) ||
++ (size >= sbi->s_mb_large_req))
+ group_pa_eligible = false;
+
+ if (!group_pa_eligible) {
+@@ -5189,6 +5296,13 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
+ return;
+ }
+
++ /*
++ * request is so large that we don't care about
++ * streaming - it overweights any possible seek
++ */
++ if (ac->ac_o_ex.fe_len >= sbi->s_mb_large_req)
++ return;
++
+ BUG_ON(ac->ac_lg != NULL);
+ /*
+ * locality group prealloc space are per cpu. The reason for having
+diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
+index d233c24..9d2254f 100644
+--- a/fs/ext4/sysfs.c
++++ b/fs/ext4/sysfs.c
+@@ -212,7 +212,8 @@ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+ EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
+-EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
++EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req);
++EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req);
+ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+ EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc);
+ EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
+@@ -262,7 +263,8 @@ static struct attribute *ext4_attrs[] = {
+ ATTR_LIST(mb_max_to_scan),
+ ATTR_LIST(mb_min_to_scan),
+ ATTR_LIST(mb_order2_req),
+- ATTR_LIST(mb_stream_req),
++ ATTR_LIST(mb_small_req),
++ ATTR_LIST(mb_large_req),
+ ATTR_LIST(mb_group_prealloc),
+ ATTR_LIST(mb_max_inode_prealloc),
+ ATTR_LIST(mb_max_linear_groups),
+@@ -543,6 +545,8 @@ int ext4_register_sysfs(struct super_block *sb)
+ ext4_fc_info_show, sb);
+ proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
+ &ext4_mb_seq_groups_ops, sb);
++ proc_create_data("prealloc_table", S_IRUGO, sbi->s_proc,
++ &ext4_seq_prealloc_table_fops, sb);
+ proc_create_single_data("mb_stats", 0444, sbi->s_proc,
+ ext4_seq_mb_stats_show, sb);
+ proc_create_seq_data("mb_structs_summary", 0444, sbi->s_proc,
+--
+2.34.1
+
--- /dev/null
+linux-5.16/ext4-inode-version.patch
+linux-5.18/ext4-lookup-dotdot.patch
+linux-5.14/ext4-print-inum-in-htree-warning.patch
+ubuntu22.04.3/ext4-prealloc.patch
+linux-5.16/ext4-osd-iop-common.patch
+linux-5.16/ext4-misc.patch
+ubuntu22.04.3/ext4-mballoc-extra-checks.patch
+sles15sp4/ext4-hash-indexed-dir-dotdot-update.patch
+linux-5.14/ext4-kill-dx-root.patch
+linux-5.18/ext4-mballoc-pa-free-mismatch.patch
+linux-6.0/ext4-data-in-dirent.patch
+rhel8/ext4-nocmtime.patch
+base/ext4-htree-lock.patch
+linux-6.0/ext4-pdirop.patch
+rhel9/ext4-max-dir-size.patch
+linux-5.16/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
+rhel9/ext4-give-warning-with-dir-htree-growing.patch
+ubuntu18/ext4-jcb-optimization.patch
+linux-5.10/ext4-attach-jinode-in-writepages.patch
+rhel8/ext4-dont-check-before-replay.patch
+rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch
+rhel7.6/ext4-export-orphan-add.patch
+linux-5.18/ext4-export-mb-stream-allocator-variables.patch
+ubuntu19/ext4-iget-with-flags.patch
+linux-5.14/export-ext4fs-dirhash-helper.patch
+linux-5.8/ext4-no-max-dir-size-limit-for-iam-objects.patch
+rhel9/ext4-dquot-commit-speedup.patch
+linux-5.14/ext4-ialloc-uid-gid-and-pass-owner-down.patch
+linux-5.14/ext4-projid-xattrs.patch
+rhel9.1/ext4-delayed-iput.patch
+rhel8/ext4-ext-merge.patch
+linux-5.14/ext4-xattr-disable-credits-check.patch
+rhel9.2/ext4-fiemap-kernel-data.patch
+rhel8/ext4-old_ea_inodes_handling_fix.patch
+ubuntu20.04.5/ext4-filename-encode.patch
+rhel9.1/ext4-enc-flag.patch
+rhel9.2/ext4-encdata.patch
+rhel9/ext4-add-periodic-superblock-update.patch