From 1534c43ccb034048d8ab0a22cb55635116eebe09 Mon Sep 17 00:00:00 2001
From: Bobi Jam <bobijam@whamcloud.com>
Date: Tue, 3 Sep 2024 23:42:43 +0800
Subject: [PATCH] LU-14438 ldiskfs: backport ldiskfs mballoc patches

This contains following kernel patches:

cfd732377221 ("ext4: add prefetching for block allocation bitmaps")
3d392b2676bf ("ext4: add prefetch_block_bitmaps mount option")
dddcd2f9ebde ("ext4: optimize the implementation of
		     ext4_mb_good_group()")
67d251860461 ("ext4: drop s_mb_bal_lock and convert protected fields
                     to atomic")
a6c75eaf1103 ("ext4: add mballoc stats proc file")
4b68f6df1059 ("ext4: add MB_NUM_ORDERS macro")
196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
21175ca434c5 ("ext4: make prefetch_block_bitmaps default")
077d0c2c78df ("ext4: make mb_optimize_scan performance mount option
                     work with extents")
3fa5d23e68a3 ("ext4: reflect mb_optimize_scan value in options file")
4fca50d440cc ("ext4: make mballoc try target group first even with
                     mb_optimize_scan")
1940265ede66 ("ext4: avoid unnecessary spreading of allocations among
                     groups")
a9f2a2931d0e ("ext4: use locality group preallocation for small
                     closed files")
83e80a6e3543 ("ext4: use buckets for cr 1 block scan instead of
                     rbtree")
80fa46d6b9e7 ("ext4: limit the number of retries after discarding
                     preallocations blocks")
a078dff87013 ("ext4: fixup possible uninitialized variable access in
                     ext4_mb_choose_next_group_cr1()")
820897258ad3 ("ext4: Refactor code related to freeing PAs")
5354b2af3406 ("ext4: allow ext4_get_group_info() to fail")
3c6296046c85 ("ext4: Don't skip prefetching BLOCK_UNINIT groups")
4f3d1e4533b0 ("ext4: Ensure ext4_mb_prefetch_fini() is called for all
                     prefetched BGs")

Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: I079dfb74bd743894934484803cedb683073e4d94
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51472
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
---
 .../rhel8.8/ext4-add-MB_NUM_ORDERS-macro.patch     | 118 +++
 .../rhel8.8/ext4-add-mballoc-stats-proc-file.patch | 333 ++++++++
 ...t4-add-prefetch-block-bitmap-mount-option.patch | 278 +++++++
 ...-prefetching-for-block-allocation-bitmaps.patch | 291 +++++++
 .../ext4-allow-ext4_get_group_info-to-fail.patch   | 449 +++++++++++
 ...oiod-unnecessary-spreading-of-allocations.patch |  69 ++
 ...l_lock-convert-protected-fields-to-atomic.patch | 102 +++
 ...refetch_fini-called-for-all-prefetched-bg.patch |  85 ++
 ...init-var-in-ext4_mb_choose_next_group_cr1.patch |  40 +
 .../ext4-improve-cr0-cr1-group-scanning.patch      | 867 +++++++++++++++++++++
 ...-of-retries-after-discard-prealloc-blocks.patch |  71 ++
 ...mb_optimize_scan-performance-with-extents.patch | 118 +++
 .../ext4-make-mballoc-try-target-group-first.patch |  86 ++
 .../ext4-make-prefetch_block_bitmaps-default.patch |  87 +++
 ...-not-skip-prefetching-BLOCK_UNINIT-groups.patch |  66 ++
 .../ext4-optimize-the-ext4_mb_good_group.patch     |  59 ++
 .../ext4-refactor-code-related-to-freeing-pa.patch | 104 +++
 ...reflect-mb_optimize_scan-value-in-options.patch |  35 +
 .../ext4-use-buckets-for-cr1-block-scan.patch      | 440 +++++++++++
 ...ality-group-preallocation-for-small-files.patch |  79 ++
 .../ext4-allow-ext4_get_group_info-to-fail.patch   | 433 ++++++++++
 ...-not-skip-prefetching-BLOCK_UNINIT-groups.patch |  51 ++
 .../series/ldiskfs-4.18-rhel8.10.series            |  19 +
 .../series/ldiskfs-4.18-rhel8.8.series             |  20 +
 .../series/ldiskfs-5.14-rhel9.4.series             |   3 +
 .../series/ldiskfs-5.14-rhel9.5.series             |   2 +
 26 files changed, 4305 insertions(+)
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-MB_NUM_ORDERS-macro.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-mballoc-stats-proc-file.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-prefetch-block-bitmap-mount-option.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-prefetching-for-block-allocation-bitmaps.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-allow-ext4_get_group_info-to-fail.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-avoiod-unnecessary-spreading-of-allocations.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-drop-s_mb_bal_lock-convert-protected-fields-to-atomic.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-ensure-ext4_mb_prefetch_fini-called-for-all-prefetched-bg.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-fixup-possible-uninit-var-in-ext4_mb_choose_next_group_cr1.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-improve-cr0-cr1-group-scanning.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-limit-number-of-retries-after-discard-prealloc-blocks.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-mb_optimize_scan-performance-with-extents.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-mballoc-try-target-group-first.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-prefetch_block_bitmaps-default.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-optimize-the-ext4_mb_good_group.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-refactor-code-related-to-freeing-pa.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-reflect-mb_optimize_scan-value-in-options.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-use-buckets-for-cr1-block-scan.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel8.8/ext4-use-locality-group-preallocation-for-small-files.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel9.4/ext4-allow-ext4_get_group_info-to-fail.patch
 create mode 100644 ldiskfs/kernel_patches/patches/rhel9.4/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch

diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-MB_NUM_ORDERS-macro.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-MB_NUM_ORDERS-macro.patch
new file mode 100644
index 0000000..64868c9
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-MB_NUM_ORDERS-macro.patch
@@ -0,0 +1,118 @@
+commit 4b68f6df105966f04f45f1eca0561b86f2b3551d
+Author:     Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+AuthorDate: Thu Apr 1 10:21:26 2021 -0700
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Fri Apr 9 11:34:59 2021 -0400
+
+ext4: add MB_NUM_ORDERS macro
+
+A few arrays in mballoc.c use the total number of valid orders as
+their size. Currently, this value is set as "sb->s_blocksize_bits +
+2". This makes code harder to read. So, instead add a new macro
+MB_NUM_ORDERS(sb) to make the code more readable.
+
+Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Reviewed-by: Ritesh Harjani <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20210401172129.189766-5-harshadshirwadkar@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -759,7 +759,7 @@ mb_set_largest_free_order(struct super_b
+ 
+ 	grp->bb_largest_free_order = -1; /* uninit */
+ 
+-	bits = sb->s_blocksize_bits + 1;
++	bits = MB_NUM_ORDERS(sb) - 1;
+ 	for (i = bits; i >= 0; i--) {
+ 		if (grp->bb_counters[i] > 0) {
+ 			grp->bb_largest_free_order = i;
+@@ -985,7 +985,7 @@ static int ext4_mb_init_cache(struct pag
+ 			grinfo->bb_fragments = 0;
+ 			memset(grinfo->bb_counters, 0,
+ 			       sizeof(*grinfo->bb_counters) *
+-				(sb->s_blocksize_bits+2));
++			       (MB_NUM_ORDERS(sb)));
+ 			/*
+ 			 * incore got set to the group block bitmap below
+ 			 */
+@@ -1987,7 +1987,7 @@ void ext4_mb_simple_scan_group(struct ex
+ 	int max;
+ 
+ 	BUG_ON(ac->ac_2order <= 0);
+-	for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
++	for (i = ac->ac_2order; i < MB_NUM_ORDERS(sb); i++) {
+ 		if (grp->bb_counters[i] == 0)
+ 			continue;
+ 
+@@ -2157,7 +2157,7 @@ static bool ext4_mb_good_group(struct ex
+ 		if (free < ac->ac_g_ex.fe_len)
+ 			return false;
+ 
+-		if (ac->ac_2order > ac->ac_sb->s_blocksize_bits+1)
++		if (ac->ac_2order >= MB_NUM_ORDERS(ac->ac_sb))
+ 			return true;
+ 
+ 		if (grp->bb_largest_free_order < ac->ac_2order)
+@@ -2368,13 +2368,13 @@ ext4_mb_regular_allocator(struct ext4_al
+ 	 * We also support searching for power-of-two requests only for
+ 	 * requests upto maximum buddy size we have constructed.
+ 	 */
+-	if (i >= sbi->s_mb_order2_reqs && i <= sb->s_blocksize_bits + 2) {
++	if (i >= sbi->s_mb_order2_reqs && i <= MB_NUM_ORDERS(sb)) {
+ 		/*
+ 		 * This should tell if fe_len is exactly power of 2
+ 		 */
+ 		if ((ac->ac_g_ex.fe_len & (~(1 << (i - 1)))) == 0)
+ 			ac->ac_2order = array_index_nospec(i - 1,
+-							   sb->s_blocksize_bits + 2);
++							   MB_NUM_ORDERS(sb));
+ 	}
+ 
+ 	/* if stream allocation is enabled, use global goal */
+@@ -3101,7 +3101,7 @@ int ext4_mb_init(struct super_block *sb)
+ 	unsigned max;
+ 	int ret;
+ 
+-	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
++	i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_offsets);
+ 
+ 	sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
+ 	if (sbi->s_mb_offsets == NULL) {
+@@ -3109,7 +3109,7 @@ int ext4_mb_init(struct super_block *sb)
+ 		goto out;
+ 	}
+ 
+-	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
++	i = MB_NUM_ORDERS(sb) * sizeof(*sbi->s_mb_maxs);
+ 	sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
+ 	if (sbi->s_mb_maxs == NULL) {
+ 		ret = -ENOMEM;
+@@ -3135,7 +3135,7 @@ int ext4_mb_init(struct super_block *sb)
+ 		offset_incr = offset_incr >> 1;
+ 		max = max >> 1;
+ 		i++;
+-	} while (i <= sb->s_blocksize_bits + 1);
++	} while (i < MB_NUM_ORDERS(sb));
+ 
+ 	spin_lock_init(&sbi->s_md_lock);
+ 	sbi->s_mb_free_pending = 0;
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -82,6 +82,11 @@
+  */
+ #define MB_DEFAULT_MAX_INODE_PREALLOC	512
+ 
++/*
++ * Number of valid buddy orders
++ */
++#define MB_NUM_ORDERS(sb)		((sb)->s_blocksize_bits + 2)
++
+ struct ext4_free_data {
+ 	/* this links the free block information from sb_info */
+ 	struct list_head		efd_list;
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-mballoc-stats-proc-file.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-mballoc-stats-proc-file.patch
new file mode 100644
index 0000000..bc5492b
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-mballoc-stats-proc-file.patch
@@ -0,0 +1,333 @@
+commit a6c75eaf11032f4a3d2b3ce2265a194ac6e4a7f0
+Author:     Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+AuthorDate: Thu Apr 1 10:21:25 2021 -0700
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Fri Apr 9 11:34:59 2021 -0400
+
+ext4: add mballoc stats proc file
+
+Add new stats for measuring the performance of mballoc. This patch is
+forked from Artem Blagodarenko's work that can be found here:
+
+https://github.com/lustre/lustre-release/blob/master/ldiskfs/kernel_patches/patches/rhel8/ext4-simple-blockalloc.patch
+
+This patch reorganizes the stats by cr level. This is how the output
+looks like:
+
+    mballoc:
+        reqs: 0
+        success: 0
+        groups_scanned: 0
+        cr0_stats:
+                hits: 0
+                groups_considered: 0
+                useless_loops: 0
+                bad_suggestions: 0
+        cr1_stats:
+                hits: 0
+                groups_considered: 0
+                useless_loops: 0
+                bad_suggestions: 0
+        cr2_stats:
+                hits: 0
+                groups_considered: 0
+                useless_loops: 0
+        cr3_stats:
+                hits: 0
+                groups_considered: 0
+                useless_loops: 0
+        extents_scanned: 0
+                goal_hits: 0
+                2^n_hits: 0
+                breaks: 0
+                lost: 0
+        buddies_generated: 0/40
+        buddies_time_used: 0
+        preallocated: 0
+        discarded: 0
+
+Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Reviewed-by: Ritesh Harjani <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20210401172129.189766-4-harshadshirwadkar@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -1516,11 +1516,13 @@ struct ext4_sb_info {
+ 	atomic_t s_bal_success;	/* we found long enough chunks */
+ 	atomic_t s_bal_allocated;	/* in blocks */
+ 	atomic_t s_bal_ex_scanned;	/* total extents scanned */
++	atomic_t s_bal_groups_scanned;  /* number of groups scanned */
+ 	atomic_t s_bal_goals;	/* goal hits */
+ 	atomic_t s_bal_breaks;	/* too long searches */
+ 	atomic_t s_bal_2orders;	/* 2^order hits */
+-	/* cX loop didn't find blocks */
+-	atomic64_t s_bal_cX_failed[4];
++	atomic64_t s_bal_cX_groups_considered[4];
++	atomic64_t s_bal_cX_hits[4];
++	atomic64_t s_bal_cX_failed[4];          /* cX loop didn't find blocks */
+ 	atomic64_t s_bal_cX_skipped[3];
+ 	atomic_t s_mb_buddies_generated;	/* number of buddies generated */
+ 	atomic64_t s_mb_generation_time;
+@@ -2702,6 +2704,7 @@ extern const struct file_operations ext4
+ extern int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v);
+ extern long ext4_mb_stats;
+ extern long ext4_mb_max_to_scan;
++extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
+ extern int ext4_mb_init(struct super_block *);
+ extern int ext4_mb_release(struct super_block *);
+ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -2191,10 +2191,13 @@ static int ext4_mb_good_group_nolock(str
+ {
+ 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
+ 	struct super_block *sb = ac->ac_sb;
++	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	bool should_lock = ac->ac_flags & EXT4_MB_STRICT_CHECK;
+ 	ext4_grpblk_t free;
+ 	int ret = 0;
+ 
++	if (sbi->s_mb_stats)
++		atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
+ 	if (should_lock)
+ 		ext4_lock_group(sb, group);
+ 	free = grp->bb_free;
+@@ -2488,7 +2491,7 @@ repeat:
+ 				break;
+ 		}
+ 		/* Processed all groups and haven't found blocks */
+-		if (i == ngroups)
++		if (sbi->s_mb_stats && i == ngroups)
+ 			atomic64_inc(&sbi->s_bal_cX_failed[cr]);
+ 	}
+ 
+@@ -2517,6 +2520,9 @@ repeat:
+ 			goto repeat;
+ 		}
+ 	}
++
++	if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND)
++		atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]);
+ out:
+ 	if (!err && ac->ac_status != AC_STATUS_FOUND && first_err)
+ 		err = first_err;
+@@ -2623,6 +2629,67 @@ const struct seq_operations ext4_mb_seq_
+ 	.show   = ext4_mb_seq_groups_show,
+ };
+ 
++int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
++{
++	struct super_block *sb = (struct super_block *)seq->private;
++	struct ext4_sb_info *sbi = EXT4_SB(sb);
++
++	seq_puts(seq, "mballoc:\n");
++	if (!sbi->s_mb_stats) {
++		seq_puts(seq, "\tmb stats collection turned off.\n");
++		seq_puts(seq, "\tTo enable, please write \"1\" to sysfs file mb_stats.\n");
++		return 0;
++	}
++	seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
++	seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
++
++	seq_printf(seq, "\tgroups_scanned: %u\n",  atomic_read(&sbi->s_bal_groups_scanned));
++
++	seq_puts(seq, "\tcr0_stats:\n");
++	seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0]));
++	seq_printf(seq, "\t\tgroups_considered: %llu\n",
++		   atomic64_read(&sbi->s_bal_cX_groups_considered[0]));
++	seq_printf(seq, "\t\tuseless_loops: %llu\n",
++		   atomic64_read(&sbi->s_bal_cX_failed[0]));
++
++	seq_puts(seq, "\tcr1_stats:\n");
++	seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1]));
++	seq_printf(seq, "\t\tgroups_considered: %llu\n",
++		   atomic64_read(&sbi->s_bal_cX_groups_considered[1]));
++	seq_printf(seq, "\t\tuseless_loops: %llu\n",
++		   atomic64_read(&sbi->s_bal_cX_failed[1]));
++
++	seq_puts(seq, "\tcr2_stats:\n");
++	seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2]));
++	seq_printf(seq, "\t\tgroups_considered: %llu\n",
++		   atomic64_read(&sbi->s_bal_cX_groups_considered[2]));
++	seq_printf(seq, "\t\tuseless_loops: %llu\n",
++		   atomic64_read(&sbi->s_bal_cX_failed[2]));
++
++	seq_puts(seq, "\tcr3_stats:\n");
++	seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3]));
++	seq_printf(seq, "\t\tgroups_considered: %llu\n",
++		   atomic64_read(&sbi->s_bal_cX_groups_considered[3]));
++	seq_printf(seq, "\t\tuseless_loops: %llu\n",
++		   atomic64_read(&sbi->s_bal_cX_failed[3]));
++	seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned));
++	seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
++	seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
++	seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
++	seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
++
++	seq_printf(seq, "\tbuddies_generated: %u/%u\n",
++		   atomic_read(&sbi->s_mb_buddies_generated),
++		   ext4_get_groups_count(sb));
++	seq_printf(seq, "\tbuddies_time_used: %llu\n",
++		   atomic64_read(&sbi->s_mb_generation_time));
++	seq_printf(seq, "\tpreallocated: %u\n",
++		   atomic_read(&sbi->s_mb_preallocated));
++	seq_printf(seq, "\tdiscarded: %u\n",
++		   atomic_read(&sbi->s_mb_discarded));
++	return 0;
++}
++
+ static int ext4_mb_check_and_update_prealloc(struct ext4_sb_info *sbi,
+ 						 char *str, size_t cnt,
+ 						 int update)
+@@ -2777,97 +2844,6 @@ const struct file_operations ext4_seq_mb
+ 	.write         = ext4_mb_last_group_write,
+ };
+ 
+-static int mb_seq_alloc_show(struct seq_file *seq, void *v)
+-{
+-	struct super_block *sb = seq->private;
+-	struct ext4_sb_info *sbi = EXT4_SB(sb);
+-
+-	seq_printf(seq, "mballoc:\n");
+-	seq_printf(seq, "\tblocks: %u\n", atomic_read(&sbi->s_bal_allocated));
+-	seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
+-	seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
+-
+-	seq_printf(seq, "\textents_scanned: %u\n",
+-		   atomic_read(&sbi->s_bal_ex_scanned));
+-	seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
+-	seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
+-	seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
+-	seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
+-
+-	seq_printf(seq, "\tuseless_c0_loops: %llu\n",
+-		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[0]));
+-	seq_printf(seq, "\tuseless_c1_loops: %llu\n",
+-		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[1]));
+-	seq_printf(seq, "\tuseless_c2_loops: %llu\n",
+-		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[2]));
+-	seq_printf(seq, "\tuseless_c3_loops: %llu\n",
+-		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[3]));
+-	seq_printf(seq, "\tskipped_c0_loops: %llu\n",
+-		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[0]));
+-	seq_printf(seq, "\tskipped_c1_loops: %llu\n",
+-		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[1]));
+-	seq_printf(seq, "\tskipped_c2_loops: %llu\n",
+-		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[2]));
+-	seq_printf(seq, "\tbuddies_generated: %u\n",
+-		   atomic_read(&sbi->s_mb_buddies_generated));
+-	seq_printf(seq, "\tbuddies_time_used: %llu\n",
+-		   atomic64_read(&sbi->s_mb_generation_time));
+-	seq_printf(seq, "\tpreallocated: %u\n",
+-		   atomic_read(&sbi->s_mb_preallocated));
+-	seq_printf(seq, "\tdiscarded: %u\n",
+-		   atomic_read(&sbi->s_mb_discarded));
+-	return 0;
+-}
+-
+-static ssize_t mb_seq_alloc_write(struct file *file,
+-			      const char __user *buf,
+-			      size_t cnt, loff_t *pos)
+-{
+-	struct ext4_sb_info *sbi = EXT4_SB(PDE_DATA(file_inode(file)));
+-
+-	atomic_set(&sbi->s_bal_allocated, 0),
+-	atomic_set(&sbi->s_bal_reqs, 0),
+-	atomic_set(&sbi->s_bal_success, 0);
+-
+-	atomic_set(&sbi->s_bal_ex_scanned, 0),
+-	atomic_set(&sbi->s_bal_goals, 0),
+-	atomic_set(&sbi->s_bal_2orders, 0),
+-	atomic_set(&sbi->s_bal_breaks, 0),
+-	atomic_set(&sbi->s_mb_lost_chunks, 0);
+-
+-	atomic64_set(&sbi->s_bal_cX_failed[0], 0),
+-	atomic64_set(&sbi->s_bal_cX_failed[1], 0),
+-	atomic64_set(&sbi->s_bal_cX_failed[2], 0);
+-	atomic64_set(&sbi->s_bal_cX_failed[3], 0);
+-
+-	atomic64_set(&sbi->s_bal_cX_skipped[0], 0),
+-	atomic64_set(&sbi->s_bal_cX_skipped[1], 0),
+-	atomic64_set(&sbi->s_bal_cX_skipped[2], 0);
+-
+-
+-	atomic_set(&sbi->s_mb_buddies_generated, 0);
+-	atomic64_set(&sbi->s_mb_generation_time, 0);
+-
+-	atomic_set(&sbi->s_mb_preallocated, 0),
+-	atomic_set(&sbi->s_mb_discarded, 0);
+-
+-	return cnt;
+-}
+-
+-static int mb_seq_alloc_open(struct inode *inode, struct file *file)
+-{
+-	return single_open(file, mb_seq_alloc_show, PDE_DATA(inode));
+-}
+-
+-const struct file_operations ext4_mb_seq_alloc_fops = {
+-	.owner		= THIS_MODULE,
+-	.open		= mb_seq_alloc_open,
+-	.read		= seq_read,
+-	.llseek		= seq_lseek,
+-	.release	= single_release,
+-	.write		= mb_seq_alloc_write,
+-};
+-
+ int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(m->private);
+@@ -3350,9 +3326,10 @@ int ext4_mb_release(struct super_block *
+ 				(unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[1]),
+ 				(unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[2]));
+ 		ext4_msg(sb, KERN_INFO,
+-		      "mballoc: %u extents scanned, %u goal hits, "
++		      "mballoc: %u extents scanned, %u groups scanned, %u goal hits, "
+ 				"%u 2^N hits, %u breaks, %u lost",
+ 				atomic_read(&sbi->s_bal_ex_scanned),
++				atomic_read(&sbi->s_bal_groups_scanned),
+ 				atomic_read(&sbi->s_bal_goals),
+ 				atomic_read(&sbi->s_bal_2orders),
+ 				atomic_read(&sbi->s_bal_breaks),
+@@ -3871,12 +3848,13 @@ static void ext4_mb_collect_stats(struct
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+ 
+-	if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
++	if (sbi->s_mb_stats && ac->ac_g_ex.fe_len >= 1) {
+ 		atomic_inc(&sbi->s_bal_reqs);
+ 		atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
+ 		if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
+ 			atomic_inc(&sbi->s_bal_success);
+ 		atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
++		atomic_add(ac->ac_groups_scanned, &sbi->s_bal_groups_scanned);
+ 		if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
+ 				ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
+ 			atomic_inc(&sbi->s_bal_goals);
+Index: linux-stage/fs/ext4/sysfs.c
+===================================================================
+--- linux-stage.orig/fs/ext4/sysfs.c
++++ linux-stage/fs/ext4/sysfs.c
+@@ -477,14 +477,14 @@ int ext4_register_sysfs(struct super_blo
+ 				sb);
+ 		proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
+ 				&ext4_mb_seq_groups_ops, sb);
++		proc_create_single_data("mb_stats", 0444, sbi->s_proc,
++				ext4_seq_mb_stats_show, sb);
+ 		proc_create_data("prealloc_table", S_IRUGO, sbi->s_proc,
+ 				&ext4_seq_prealloc_table_fops, sb);
+ 		proc_create_data("mb_last_group", S_IRUGO, sbi->s_proc,
+ 				&ext4_seq_mb_last_group_fops, sb);
+ 		proc_create_single_data("mb_last_start", S_IRUGO, sbi->s_proc,
+ 				ext4_mb_seq_last_start_seq_show, sb);
+-		proc_create_data("mb_alloc_stats", S_IFREG | S_IRUGO | S_IWUSR,
+-				 sbi->s_proc, &ext4_mb_seq_alloc_fops, sb);
+ 	}
+ 	return 0;
+ }
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-prefetch-block-bitmap-mount-option.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-prefetch-block-bitmap-mount-option.patch
new file mode 100644
index 0000000..7634b7b
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-prefetch-block-bitmap-mount-option.patch
@@ -0,0 +1,278 @@
+commit 3d392b2676bf3199863a1e5efb2c087ad9d442a4
+Author:     Theodore Ts'o <tytso@mit.edu>
+AuthorDate: Fri Jul 17 00:14:40 2020 -0400
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Fri Aug 7 14:12:35 2020 -0400
+
+ext4: add prefetch_block_bitmaps mount option
+
+For file systems where we can afford to keep the buddy bitmaps cached,
+we can speed up initial writes to large file systems by starting to
+load the block allocation bitmaps as soon as the file system is
+mounted.  This won't work well for _super_ large file systems, or
+memory constrained systems, so we only enable this when it is
+requested via a mount option.
+
+Addresses-Google-Bug: 159488342
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+---
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -1171,6 +1171,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
+ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
+ #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
++#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
+ #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
+ #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
+ #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
+@@ -2402,9 +2403,15 @@ struct ext4_lazy_init {
+ 	struct mutex		li_list_mtx;
+ };
+ 
++enum ext4_li_mode {
++	EXT4_LI_MODE_PREFETCH_BBITMAP,
++	EXT4_LI_MODE_ITABLE,
++};
++
+ struct ext4_li_request {
+ 	struct super_block	*lr_super;
+-	struct ext4_sb_info	*lr_sbi;
++	enum ext4_li_mode       lr_mode;
++	ext4_group_t            lr_first_not_zeroed;
+ 	ext4_group_t		lr_next_group;
+ 	struct list_head	lr_request;
+ 	unsigned long		lr_next_sched;
+@@ -2704,6 +2711,12 @@ extern int ext4_mb_reserve_blocks(struct
+ extern void ext4_discard_preallocations(struct inode *, unsigned int);
+ extern int __init ext4_init_mballoc(void);
+ extern void ext4_exit_mballoc(void);
++extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
++				     ext4_group_t group,
++				     unsigned int nr, int *cnt);
++extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
++				  unsigned int nr);
++
+ extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
+ 			     struct buffer_head *bh, ext4_fsblk_t block,
+ 			     unsigned long count, int flags);
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -2249,9 +2249,8 @@ static u64 available_blocks_count(struct
+  * Start prefetching @nr block bitmaps starting at @group.
+  * Return the next group which needs to be prefetched.
+  */
+-static  ext4_group_t
+-ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
+-		unsigned int nr, int *cnt)
++ext4_group_t ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
++			      unsigned int nr, int *cnt)
+ {
+ 	ext4_group_t ngroups = ext4_get_groups_count(sb);
+ 	struct buffer_head *bh;
+@@ -2302,9 +2301,8 @@ ext4_mb_prefetch(struct super_block *sb,
+  * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
+  */
+ 
+-static void
+-ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+-		      unsigned int nr)
++void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
++			   unsigned int nr)
+ {
+ 	while (nr-- > 0) {
+ 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -1623,6 +1623,7 @@ enum {
+ 	Opt_mb_c1_threshold, Opt_mb_c2_threshold, Opt_mb_c3_threshold,
+ 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+ 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
++	Opt_prefetch_block_bitmaps,
+ };
+ 
+ static const match_table_t tokens = {
+@@ -1716,6 +1717,7 @@ static const match_table_t tokens = {
+ 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ 	{Opt_nombcache, "nombcache"},
+ 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
++	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
+ 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
+ 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
+ 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
+@@ -1938,6 +1940,8 @@ static const struct mount_opts {
+ 	{Opt_mb_c3_threshold, 0, MOPT_STRING},
+ 	{Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
++	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
++	 MOPT_SET},
+ 	{Opt_err, 0, 0}
+ };
+ 
+@@ -3249,15 +3253,34 @@ static void print_daily_error_info(struc
+ static int ext4_run_li_request(struct ext4_li_request *elr)
+ {
+ 	struct ext4_group_desc *gdp = NULL;
+-	ext4_group_t group, ngroups;
+-	struct super_block *sb;
++	struct super_block *sb = elr->lr_super;
++	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
++	ext4_group_t group = elr->lr_next_group;
++	unsigned int prefetch_ios = 0;
+ 	int ret = 0;
+ 	u64 start_time;
+ 
+-	sb = elr->lr_super;
+-	ngroups = EXT4_SB(sb)->s_groups_count;
++	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
++		elr->lr_next_group = ext4_mb_prefetch(sb, group,
++				EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
++		if (prefetch_ios)
++			ext4_mb_prefetch_fini(sb, elr->lr_next_group,
++					      prefetch_ios);
++		trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
++					    prefetch_ios);
++		if (group >= elr->lr_next_group) {
++			ret = 1;
++			if (elr->lr_first_not_zeroed != ngroups &&
++			    !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
++				elr->lr_next_group = elr->lr_first_not_zeroed;
++				elr->lr_mode = EXT4_LI_MODE_ITABLE;
++				ret = 0;
++			}
++		}
++		return ret;
++	}
+ 
+-	for (group = elr->lr_next_group; group < ngroups; group++) {
++	for (; group < ngroups; group++) {
+ 		gdp = ext4_get_group_desc(sb, group, NULL);
+ 		if (!gdp) {
+ 			ret = 1;
+@@ -3275,9 +3298,10 @@ static int ext4_run_li_request(struct ex
+ 		start_time = ktime_get_real_ns();
+ 		ret = ext4_init_inode_table(sb, group,
+ 					    elr->lr_timeout ? 0 : 1);
++		trace_ext4_lazy_itable_init(sb, group);
+ 		if (elr->lr_timeout == 0) {
+ 			elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
+-				elr->lr_sbi->s_li_wait_mult);
++				EXT4_SB(elr->lr_super)->s_li_wait_mult);
+ 		}
+ 		elr->lr_next_sched = jiffies + elr->lr_timeout;
+ 		elr->lr_next_group = group + 1;
+@@ -3291,15 +3315,11 @@ static int ext4_run_li_request(struct ex
+  */
+ static void ext4_remove_li_request(struct ext4_li_request *elr)
+ {
+-	struct ext4_sb_info *sbi;
+-
+ 	if (!elr)
+ 		return;
+ 
+-	sbi = elr->lr_sbi;
+-
+ 	list_del(&elr->lr_request);
+-	sbi->s_li_request = NULL;
++	EXT4_SB(elr->lr_super)->s_li_request = NULL;
+ 	kfree(elr);
+ }
+ 
+@@ -3508,7 +3528,6 @@ static int ext4_li_info_new(void)
+ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
+ 					    ext4_group_t start)
+ {
+-	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	struct ext4_li_request *elr;
+ 
+ 	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
+@@ -3516,8 +3535,13 @@ static struct ext4_li_request *ext4_li_r
+ 		return NULL;
+ 
+ 	elr->lr_super = sb;
+-	elr->lr_sbi = sbi;
+-	elr->lr_next_group = start;
++	elr->lr_first_not_zeroed = start;
++	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
++		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
++	else {
++		elr->lr_mode = EXT4_LI_MODE_ITABLE;
++		elr->lr_next_group = start;
++	}
+ 
+ 	/*
+ 	 * Randomize first schedule time of the request to
+@@ -3547,8 +3571,9 @@ int ext4_register_li_request(struct supe
+ 		goto out;
+ 	}
+ 
+-	if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
+-	    !test_opt(sb, INIT_INODE_TABLE))
++	if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) &&
++	    (first_not_zeroed == ngroups || sb_rdonly(sb) ||
++	    !test_opt(sb, INIT_INODE_TABLE)))
+ 		goto out;
+ 
+ 	elr = ext4_li_request_new(sb, first_not_zeroed);
+Index: linux-stage/include/trace/events/ext4.h
+===================================================================
+--- linux-stage.orig/include/trace/events/ext4.h
++++ linux-stage/include/trace/events/ext4.h
+@@ -2712,6 +2712,50 @@ TRACE_EVENT(ext4_error,
+ 		  __entry->function, __entry->line)
+ );
+ 
++TRACE_EVENT(ext4_prefetch_bitmaps,
++	TP_PROTO(struct super_block *sb, ext4_group_t group,
++		 ext4_group_t next, unsigned int prefetch_ios),
++
++	TP_ARGS(sb, group, next, prefetch_ios),
++
++	TP_STRUCT__entry(
++		__field(        dev_t,  dev                     )
++		__field(        __u32,  group                   )
++		__field(        __u32,  next                    )
++		__field(        __u32,  ios                     )
++	),
++
++	TP_fast_assign(
++		__entry->dev    = sb->s_dev;
++		__entry->group  = group;
++		__entry->next   = next;
++		__entry->ios    = prefetch_ios;
++	),
++
++	TP_printk("dev %d,%d group %u next %u ios %u",
++		  MAJOR(__entry->dev), MINOR(__entry->dev),
++		  __entry->group, __entry->next, __entry->ios)
++);
++
++TRACE_EVENT(ext4_lazy_itable_init,
++	TP_PROTO(struct super_block *sb, ext4_group_t group),
++
++	TP_ARGS(sb, group),
++
++	TP_STRUCT__entry(
++		__field(        dev_t,  dev                     )
++		__field(        __u32,  group                   )
++	),
++
++	TP_fast_assign(
++		__entry->dev    = sb->s_dev;
++		__entry->group  = group;
++	),
++
++	TP_printk("dev %d,%d group %u",
++		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group)
++);
++
+ #endif /* _TRACE_EXT4_H */
+ 
+ /* This part must be outside protection */
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-prefetching-for-block-allocation-bitmaps.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-prefetching-for-block-allocation-bitmaps.patch
new file mode 100644
index 0000000..9360a72
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-add-prefetching-for-block-allocation-bitmaps.patch
@@ -0,0 +1,291 @@
+commit cfd73237722135807967f389bcbda558a60a30d6
+Author:     Alex Zhuravlev <bzzz@whamcloud.com>
+AuthorDate: Tue Apr 21 10:54:07 2020 +0300
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Thu Aug 6 01:44:48 2020 -0400
+
+ext4: add prefetching for block allocation bitmaps
+
+This should significantly improve bitmap loading, especially for flex
+groups as it tries to load all bitmaps within a flex.group instead of
+one by one synchronously.
+
+Prefetching is done in 8 * flex_bg groups, so it should be 8
+read-ahead reads for a single allocating thread. At the end of
+allocation the thread waits for read-ahead completion and initializes
+buddy information so that read-aheads are not lost in case of memory
+pressure.
+
+At cr=0 the number of prefetching IOs is limited per allocation
+context to prevent a situation when mballoc loads thousands of bitmaps
+looking for a perfect group and ignoring groups with good chunks.
+
+Together with the patch "ext4: limit scanning of uninitialized groups"
+the mount time (which includes few tiny allocations) of a 1PB
+filesystem is reduced significantly:
+
+               0% full    50%-full unpatched    patched
+  mount time       33s                9279s       563s
+
+[ Restructured by tytso; removed the state flags in the allocation
+context, so it can be used to lazily prefetch the allocation bitmaps
+immediately after the file system is mounted.  Skip prefetching
+block groups which are uninitialized.  Finally pass in the
+REQ_RAHEAD flag to the block layer while prefetching. ]
+
+Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
+Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+
+---
+Index: linux-stage/fs/ext4/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/balloc.c
++++ linux-stage/fs/ext4/balloc.c
+@@ -498,7 +498,8 @@ ext4_read_block_bitmap_nowait(struct sup
+ 	trace_ext4_read_block_bitmap_load(sb, block_group);
+ 	bh->b_end_io = ext4_end_bitmap_read;
+ 	get_bh(bh);
+-	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
++	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO |
++		  (ignore_locked ? REQ_RAHEAD : 0), bh);
+ 	return bh;
+ verify:
+ 	err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -2246,97 +2246,93 @@ static u64 available_blocks_count(struct
+ }
+ 
+ /*
+- * each allocation context (i.e. a thread doing allocation) has own
+- * sliding prefetch window of @s_mb_prefetch size which starts at the
+- * very first goal and moves ahead of scaning.
+- * a side effect is that subsequent allocations will likely find
+- * the bitmaps in cache or at least in-flight.
++ * Start prefetching @nr block bitmaps starting at @group.
++ * Return the next group which needs to be prefetched.
+  */
+-static void
+-ext4_mb_prefetch(struct ext4_allocation_context *ac,
+-		    ext4_group_t start)
++static  ext4_group_t
++ext4_mb_prefetch(struct super_block *sb, ext4_group_t group,
++		unsigned int nr, int *cnt)
+ {
+-	struct super_block *sb = ac->ac_sb;
+ 	ext4_group_t ngroups = ext4_get_groups_count(sb);
+-	struct ext4_sb_info *sbi = EXT4_SB(sb);
+-	struct ext4_group_info *grp;
+-	ext4_group_t group = start;
+ 	struct buffer_head *bh;
+-	int nr;
+-
+-	/* limit prefetching at cr=0, otherwise mballoc can
+-	 * spend a lot of time loading imperfect groups */
+-	if (ac->ac_criteria < 2 && ac->ac_prefetch_ios >= sbi->s_mb_prefetch_limit)
+-		return;
+-
+-	/* batch prefetching to get few READs in flight */
+-	nr = ac->ac_prefetch - group;
+-	if (ac->ac_prefetch < group)
+-		/* wrapped to the first groups */
+-		nr += ngroups;
+-	if (nr > 0)
+-		return;
+-	BUG_ON(nr < 0);
++	struct blk_plug plug;
+ 
+-	nr = sbi->s_mb_prefetch;
+-	if (ext4_has_feature_flex_bg(sb)) {
+-		/* align to flex_bg to get more bitmas with a single IO */
+-		nr = (group / sbi->s_mb_prefetch) * sbi->s_mb_prefetch;
+-		nr = nr + sbi->s_mb_prefetch - group;
+-	}
++	blk_start_plug(&plug);
+ 	while (nr-- > 0) {
+-		grp = ext4_get_group_info(sb, group);
+-		/* prevent expensive getblk() on groups w/ IO in progress */
+-		if (EXT4_MB_GRP_TEST(grp) || EXT4_MB_GRP_TEST_AND_SET_READ(grp))
+-			goto next;
+-
+-		/* ignore empty groups - those will be skipped
+-		 * during the scanning as well */
+-		if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) {
++		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
++								  NULL);
++		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
++
++		/*
++		 * Prefetch block groups with free blocks; but don't
++		 * bother if it is marked uninitialized on disk, since
++		 * it won't require I/O to read.  Also only try to
++		 * prefetch once, so we avoid getblk() call, which can
++		 * be expensive.
++		 */
++		if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
++		    EXT4_MB_GRP_NEED_INIT(grp) &&
++		    ext4_free_group_clusters(sb, gdp) > 0 &&
++		    !(ext4_has_group_desc_csum(sb) &&
++		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
+ 			bh = ext4_read_block_bitmap_nowait(sb, group, 1);
+ 			if (bh && !IS_ERR(bh)) {
+-				if (!buffer_uptodate(bh))
+-					ac->ac_prefetch_ios++;
++				if (!buffer_uptodate(bh) && cnt)
++					(*cnt)++;
+ 				brelse(bh);
+ 			}
+ 		}
+-next:
+ 		if (++group >= ngroups)
+ 			group = 0;
+ 	}
+-	ac->ac_prefetch = group;
++	blk_finish_plug(&plug);
++	return group;
+ }
+ 
++/*
++ * Prefetching reads the block bitmap into the buffer cache; but we
++ * need to make sure that the buddy bitmap in the page cache has been
++ * initialized.  Note that ext4_mb_init_group() will block if the I/O
++ * is not yet completed, or indeed if it was not initiated by
++ * ext4_mb_prefetch did not start the I/O.
++ *
++ * TODO: We should actually kick off the buddy bitmap setup in a work
++ * queue when the buffer I/O is completed, so that we don't block
++ * waiting for the block allocation bitmap read to finish when
++ * ext4_mb_prefetch_fini is called from ext4_mb_regular_allocator().
++ */
++
+ static void
+-ext4_mb_prefetch_fini(struct ext4_allocation_context *ac)
++ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
++		      unsigned int nr)
+ {
+-	struct ext4_group_info *grp;
+-	ext4_group_t group;
+-	int nr, rc;
+-
+-	/* initialize last window of prefetched groups */
+-	nr = ac->ac_prefetch_ios;
+-	if (nr > EXT4_SB(ac->ac_sb)->s_mb_prefetch)
+-		nr = EXT4_SB(ac->ac_sb)->s_mb_prefetch;
+-	group = ac->ac_prefetch;
+ 	while (nr-- > 0) {
+-		grp = ext4_get_group_info(ac->ac_sb, group);
+-		if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) {
+-			rc = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
+-			if (rc)
++		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
++								  NULL);
++		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
++
++		if (!group)
++			group = ext4_get_groups_count(sb);
++		group--;
++		grp = ext4_get_group_info(sb, group);
++
++		if (EXT4_MB_GRP_NEED_INIT(grp) &&
++		    ext4_free_group_clusters(sb, gdp) > 0 &&
++		    !(ext4_has_group_desc_csum(sb) &&
++		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
++			if (ext4_mb_init_group(sb, group, GFP_NOFS))
+ 				break;
+ 		}
+-		if (group-- == 0)
+-			group = ext4_get_groups_count(ac->ac_sb) - 1;
+ 	}
+ }
+ 
+ static noinline_for_stack int
+ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+ {
+-	ext4_group_t ngroups, group, i;
++	ext4_group_t prefetch_grp = 0, ngroups, group, i;
+ 	int cr = -1;
+ 	int err = 0, first_err = 0;
++	unsigned int nr = 0, prefetch_ios = 0;
+ 	struct ext4_sb_info *sbi;
+ 	struct super_block *sb;
+ 	struct ext4_buddy e4b;
+@@ -2420,7 +2416,7 @@ repeat:
+ 		 * from the goal value specified
+ 		 */
+ 		group = ac->ac_g_ex.fe_group;
+-		ac->ac_prefetch = group;
++		prefetch_grp = group;
+ 
+ 		for (i = 0; i < ngroups; group++, i++) {
+ 			int ret = 0;
+@@ -2432,7 +2428,28 @@ repeat:
+ 			if (group >= ngroups)
+ 				group = 0;
+ 
+-			ext4_mb_prefetch(ac, group);
++			/*
++			 * Batch reads of the block allocation bitmaps
++			 * to get multiple READs in flight; limit
++			 * prefetching at cr=0/1, otherwise mballoc can
++			 * spend a lot of time loading imperfect groups
++			 */
++			if ((prefetch_grp == group) &&
++			    (cr > 1 ||
++			     prefetch_ios < sbi->s_mb_prefetch_limit)) {
++				unsigned int curr_ios = prefetch_ios;
++
++				nr = sbi->s_mb_prefetch;
++				if (ext4_has_feature_flex_bg(sb)) {
++					nr = (group / sbi->s_mb_prefetch) *
++						sbi->s_mb_prefetch;
++					nr = nr + sbi->s_mb_prefetch - group;
++				}
++				prefetch_grp = ext4_mb_prefetch(sb, group,
++							nr, &prefetch_ios);
++				if (prefetch_ios == curr_ios)
++					nr = 0;
++			}
+ 
+ 			/* This now checks without needing the buddy page */
+ 			ret = ext4_mb_good_group_nolock(ac, group, cr);
+@@ -2512,8 +2529,8 @@ out:
+ 		 ac->ac_b_ex.fe_len, ac->ac_o_ex.fe_len, ac->ac_status,
+ 		 ac->ac_flags, cr, err);
+ 
+-	/* use prefetched bitmaps to init buddy so that read info is not lost */
+-	ext4_mb_prefetch_fini(ac);
++	if (nr)
++		ext4_mb_prefetch_fini(sb, prefetch_grp, nr);
+ 	return err;
+ }
+ 
+@@ -3012,6 +3029,26 @@ static int ext4_mb_init_backend(struct s
+ 			goto err_freebuddy;
+ 	}
+ 
++	if (ext4_has_feature_flex_bg(sb)) {
++		/* a single flex group is supposed to be read by a single IO */
++		sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex;
++		sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
++	} else {
++		sbi->s_mb_prefetch = 32;
++	}
++	if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
++		sbi->s_mb_prefetch = ext4_get_groups_count(sb);
++	/* now many real IOs to prefetch within a single allocation at cr=0
++	 * given cr=0 is an CPU-related optimization we shouldn't try to
++	 * load too many groups, at some point we should start to use what
++	 * we've got in memory.
++	 * with an average random access time 5ms, it'd take a second to get
++	 * 200 groups (* N with flex_bg), so let's make this limit 4
++	 */
++	sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4;
++	if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb))
++		sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb);
++
+ 	return 0;
+ 
+ err_freebuddy:
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-allow-ext4_get_group_info-to-fail.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-allow-ext4_get_group_info-to-fail.patch
new file mode 100644
index 0000000..f3fb31e
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-allow-ext4_get_group_info-to-fail.patch
@@ -0,0 +1,449 @@
+commit 5354b2af34064a4579be8bc0e2f15a7b70f14b5f
+Author:     Theodore Ts'o <tytso@mit.edu>
+AuthorDate: Sat Apr 29 00:06:28 2023 -0400
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Sat May 13 18:02:46 2023 -0400
+
+ext4: allow ext4_get_group_info() to fail
+
+Previously, ext4_get_group_info() would treat an invalid group number
+as BUG(), since in theory it should never happen.  However, if a
+malicious attaker (or fuzzer) modifies the superblock via the block
+device while it is the file system is mounted, it is possible for
+s_first_data_block to get set to a very large number.  In that case,
+when calculating the block group of some block number (such as the
+starting block of a preallocation region), could result in an
+underflow and very large block group number.  Then the BUG_ON check in
+ext4_get_group_info() would fire, resutling in a denial of service
+attack that can be triggered by root or someone with write access to
+the block device.
+
+For a quality of implementation perspective, it's best that even if
+the system administrator does something that they shouldn't, that it
+will not trigger a BUG.  So instead of BUG'ing, ext4_get_group_info()
+will call ext4_error and return NULL.  We also add fallback code in
+all of the callers of ext4_get_group_info() that it might NULL.
+
+Also, since ext4_get_group_info() was already borderline to be an
+inline function, un-inline it.  The results in a next reduction of the
+compiled text size of ext4 by roughly 2k.
+
+Cc: stable@kernel.org
+Link: https://lore.kernel.org/r/20230430154311.579720-2-tytso@mit.edu
+Reported-by: syzbot+e2efa3efc15a1c9e95c3@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=69b28112e098b070f639efb356393af3ffec4220
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+---
+Index: linux-stage/fs/ext4/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/balloc.c
++++ linux-stage/fs/ext4/balloc.c
+@@ -303,6 +303,22 @@ struct ext4_group_desc * ext4_get_group_
+ 	return desc;
+ }
+ 
++struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
++					   ext4_group_t group)
++{
++	struct ext4_group_info **grp_info;
++	long indexv, indexh;
++
++	if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) {
++		ext4_error(sb, "invalid group %u", group);
++		return NULL;
++	}
++	indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
++	indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
++	grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
++	return grp_info[indexh];
++}
++
+ /*
+  * Return the block number which was discovered to be invalid, or 0 if
+  * the block bitmap is valid.
+@@ -372,7 +388,7 @@ static int ext4_validate_block_bitmap(st
+ 
+ 	if (buffer_verified(bh))
+ 		return 0;
+-	if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
++	if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ 		return -EFSCORRUPTED;
+ 
+ 	ext4_lock_group(sb, block_group);
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -2554,6 +2554,8 @@ extern void ext4_check_blocks_bitmap(str
+ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
+ 						    ext4_group_t block_group,
+ 						    struct buffer_head ** bh);
++extern struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
++						  ext4_group_t group);
+ extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+ 
+ extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
+@@ -3129,19 +3131,6 @@ static inline void ext4_isize_set(struct
+ 	raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
+ }
+ 
+-static inline
+-struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+-					    ext4_group_t group)
+-{
+-	 struct ext4_group_info **grp_info;
+-	 long indexv, indexh;
+-	 BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
+-	 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
+-	 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
+-	 grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
+-	 return grp_info[indexh];
+-}
+-
+ /*
+  * Reading s_groups_count requires using smp_rmb() afterwards.  See
+  * the locking protocol documented in the comments of ext4_group_add()
+Index: linux-stage/fs/ext4/ialloc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/ialloc.c
++++ linux-stage/fs/ext4/ialloc.c
+@@ -87,7 +87,7 @@ static int ext4_validate_inode_bitmap(st
+ 
+ 	if (buffer_verified(bh))
+ 		return 0;
+-	if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
++	if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ 		return -EFSCORRUPTED;
+ 
+ 	ext4_lock_group(sb, block_group);
+@@ -296,7 +296,7 @@ void ext4_free_inode(handle_t *handle, s
+ 		bitmap_bh = NULL;
+ 		goto error_return;
+ 	}
+-	if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
++	if (!grp || unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
+ 		fatal = -EFSCORRUPTED;
+ 		goto error_return;
+ 	}
+@@ -916,13 +916,13 @@ got_group:
+ 
+ 		grp = ext4_get_group_info(sb, group);
+ 		/* Skip groups with already-known suspicious inode tables */
+-		if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
++		if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ 			goto next_group;
+ 
+ 		brelse(inode_bitmap_bh);
+ 		inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
+ 		/* Skip groups with suspicious inode tables */
+-		if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) ||
++		if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp) ||
+ 		    IS_ERR(inode_bitmap_bh)) {
+ 			inode_bitmap_bh = NULL;
+ 			goto next_group;
+@@ -1047,6 +1047,10 @@ got:
+ 		int free;
+ 		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ 
++		if (!grp) {
++			err = -EFSCORRUPTED;
++			goto out;
++		}
+ 		down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
+ 		ext4_lock_group(sb, group); /* while we modify the bg desc */
+ 		free = EXT4_INODES_PER_GROUP(sb) -
+@@ -1395,7 +1399,7 @@ int ext4_init_inode_table(struct super_b
+ 	}
+ 
+ 	gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+-	if (!gdp)
++	if (!gdp || !grp)
+ 		goto out;
+ 
+ 	/*
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -746,6 +746,8 @@ static int __mb_check_buddy(struct ext4_
+ 	MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
+ 
+ 	grp = ext4_get_group_info(sb, e4b->bd_group);
++	if (!grp)
++		return NULL;
+ 	list_for_each(cur, &grp->bb_prealloc_list) {
+ 		ext4_group_t groupnr;
+ 		struct ext4_prealloc_space *pa;
+@@ -1060,10 +1062,10 @@ mb_set_largest_free_order(struct super_b
+ }
+ 
+ static noinline_for_stack
+-int ext4_mb_generate_buddy(struct super_block *sb,
+-				void *buddy, void *bitmap, ext4_group_t group)
++void ext4_mb_generate_buddy(struct super_block *sb,
++			   void *buddy, void *bitmap, ext4_group_t group,
++			   struct ext4_group_info *grp)
+ {
+-	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
+ 	ext4_grpblk_t i = 0;
+@@ -1108,7 +1110,6 @@ int ext4_mb_generate_buddy(struct super_
+ 		grp->bb_free = free;
+ 		ext4_mark_group_bitmap_corrupted(sb, group,
+ 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+-		return -EIO;
+ 	}
+ 	mb_set_largest_free_order(sb, grp);
+ 	mb_update_avg_fragment_size(sb, grp);
+@@ -1118,8 +1119,6 @@ int ext4_mb_generate_buddy(struct super_
+ 	period = get_cycles() - period;
+ 	atomic_inc(&sbi->s_mb_buddies_generated);
+ 	atomic64_add(period, &sbi->s_mb_generation_time);
+-
+-	return 0;
+ }
+ 
+ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
+@@ -1137,7 +1136,7 @@ static void mb_regenerate_buddy(struct e
+ 		(e4b->bd_sb->s_blocksize_bits + 2));
+ 
+ 	ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
+-		e4b->bd_bitmap, e4b->bd_group);
++		e4b->bd_bitmap, e4b->bd_group, e4b->bd_info);
+ }
+ 
+ /* The buddy information is attached the buddy cache inode
+@@ -1209,6 +1208,8 @@ static int ext4_mb_init_cache(struct pag
+ 			break;
+ 
+ 		grinfo = ext4_get_group_info(sb, group);
++		if (!grinfo)
++			continue;
+ 		/*
+ 		 * If page is uptodate then we came here after online resize
+ 		 * which added some new uninitialized group info structs, so
+@@ -1274,6 +1275,10 @@ static int ext4_mb_init_cache(struct pag
+ 				group, page->index, i * blocksize);
+ 			trace_ext4_mb_buddy_bitmap_load(sb, group);
+ 			grinfo = ext4_get_group_info(sb, group);
++			if (!grinfo) {
++				err = -EFSCORRUPTED;
++				goto out;
++			}
+ 			grinfo->bb_fragments = 0;
+ 			memset(grinfo->bb_counters, 0,
+ 			       sizeof(*grinfo->bb_counters) *
+@@ -1284,7 +1289,7 @@ static int ext4_mb_init_cache(struct pag
+ 			ext4_lock_group(sb, group);
+ 			/* init the buddy */
+ 			memset(data, 0xff, blocksize);
+-			err = ext4_mb_generate_buddy(sb, data, incore, group);
++			ext4_mb_generate_buddy(sb, data, incore, group, grinfo);
+ 			ext4_unlock_group(sb, group);
+ 			incore = NULL;
+ 		} else {
+@@ -1399,6 +1404,9 @@ int ext4_mb_init_group(struct super_bloc
+ 	might_sleep();
+ 	mb_debug(sb, "init group %u\n", group);
+ 	this_grp = ext4_get_group_info(sb, group);
++	if (!this_grp)
++		return -EFSCORRUPTED;
++
+ 	/*
+ 	 * This ensures that we don't reinit the buddy cache
+ 	 * page which map to the group from which we are already
+@@ -1473,6 +1481,8 @@ ext4_mb_load_buddy_gfp(struct super_bloc
+ 
+ 	blocks_per_page = PAGE_SIZE / sb->s_blocksize;
+ 	grp = ext4_get_group_info(sb, group);
++	if (!grp)
++		return -EFSCORRUPTED;
+ 
+ 	e4b->bd_blkbits = sb->s_blocksize_bits;
+ 	e4b->bd_info = grp;
+@@ -2197,6 +2207,8 @@ int ext4_mb_find_by_goal(struct ext4_all
+ 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
+ 	struct ext4_free_extent ex;
+ 
++	if (!grp)
++		return -EFSCORRUPTED;
+ 	if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
+ 		return 0;
+ 	if (grp->bb_free == 0)
+@@ -2427,7 +2439,7 @@ static bool ext4_mb_good_group(struct ex
+ 
+ 	BUG_ON(cr < 0 || cr >= 4);
+ 
+-	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
++	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) || !grp)
+ 		return false;
+ 
+ 	free = grp->bb_free;
+@@ -2490,6 +2502,8 @@ static int ext4_mb_good_group_nolock(str
+ 	ext4_grpblk_t free;
+ 	int ret = 0;
+ 
++	if (!grp)
++		return -EFSCORRUPTED;
+ 	if (sbi->s_mb_stats)
+ 		atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
+ 	if (should_lock)
+@@ -2564,7 +2578,7 @@ ext4_group_t ext4_mb_prefetch(struct sup
+ 		 * prefetch once, so we avoid getblk() call, which can
+ 		 * be expensive.
+ 		 */
+-		if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
++		if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+ 		    EXT4_MB_GRP_NEED_INIT(grp) &&
+ 		    ext4_free_group_clusters(sb, gdp) > 0 &&
+ 		    !(ext4_has_group_desc_csum(sb) &&
+@@ -2609,7 +2623,7 @@ void ext4_mb_prefetch_fini(struct super_
+ 		group--;
+ 		grp = ext4_get_group_info(sb, group);
+ 
+-		if (EXT4_MB_GRP_NEED_INIT(grp) &&
++		if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) &&
+ 		    ext4_free_group_clusters(sb, gdp) > 0 &&
+ 		    !(ext4_has_group_desc_csum(sb) &&
+ 		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
+@@ -2883,6 +2897,8 @@ static int ext4_mb_seq_groups_show(struc
+ 		sizeof(struct ext4_group_info);
+ 
+ 	grinfo = ext4_get_group_info(sb, group);
++	if (!grinfo)
++		return 0;
+ 	/* Load the group info in memory only if not already loaded. */
+ 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
+ 		err = ext4_mb_load_buddy(sb, group, &e4b);
+@@ -2897,7 +2913,7 @@ static int ext4_mb_seq_groups_show(struc
+ 	if (gdp != NULL)
+ 		free = ext4_free_group_clusters(sb, gdp);
+ 
+-	memcpy(&sg, ext4_get_group_info(sb, group), i);
++	memcpy(&sg, grinfo, i);
+ 
+ 	if (buddy_loaded)
+ 		ext4_mb_unload_buddy(&e4b);
+@@ -3330,8 +3346,12 @@ static int ext4_mb_init_backend(struct s
+ 
+ err_freebuddy:
+ 	cachep = get_groupinfo_cache(sb->s_blocksize_bits);
+-	while (i-- > 0)
+-		kmem_cache_free(cachep, ext4_get_group_info(sb, i));
++	while (i-- > 0) {
++		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
++
++		if (grp)
++			kmem_cache_free(cachep, grp);
++	}
+ 	i = sbi->s_group_info_size;
+ 	rcu_read_lock();
+ 	group_info = rcu_dereference(sbi->s_group_info);
+@@ -3634,6 +3654,8 @@ int ext4_mb_release(struct super_block *
+ 		for (i = 0; i < ngroups; i++) {
+ 			cond_resched();
+ 			grinfo = ext4_get_group_info(sb, i);
++			if (!grinfo)
++				continue;
+ 			mb_group_bb_bitmap_free(grinfo);
+ 			ext4_lock_group(sb, i);
+ 			count = ext4_mb_cleanup_pa(grinfo);
+@@ -4480,6 +4502,8 @@ static void ext4_mb_generate_from_freeli
+ 	struct ext4_free_data *entry;
+ 
+ 	grp = ext4_get_group_info(sb, group);
++	if (!grp)
++		return;
+ 	n = rb_first(&(grp->bb_free_root));
+ 
+ 	while (n) {
+@@ -4549,6 +4573,9 @@ int ext4_mb_generate_from_pa(struct supe
+ 	int err;
+ 	int len;
+ 
++	if (!grp)
++		return -EIO;
++
+ 	gdp = ext4_get_group_desc(sb, group, NULL);
+ 	if (gdp == NULL)
+ 		return -EIO;
+@@ -4769,6 +4796,8 @@ ext4_mb_new_inode_pa(struct ext4_allocat
+ 
+ 	ei = EXT4_I(ac->ac_inode);
+ 	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
++	if (!grp)
++		return;
+ 
+ 	pa->pa_obj_lock = &ei->i_prealloc_lock;
+ 	pa->pa_inode = ac->ac_inode;
+@@ -4825,6 +4854,8 @@ ext4_mb_new_group_pa(struct ext4_allocat
+ 	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
+ 
+ 	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
++	if (!grp)
++		return;
+ 	lg = ac->ac_lg;
+ 	BUG_ON(lg == NULL);
+ 
+@@ -4953,6 +4984,8 @@ ext4_mb_discard_group_preallocations(str
+ 	int err;
+ 	int free = 0;
+ 
++	if (!grp)
++		return 0;
+ 	mb_debug(sb, "discard preallocation for group %u\n", group);
+ 	if (list_empty(&grp->bb_prealloc_list))
+ 		goto out_dbg;
+@@ -5187,6 +5220,9 @@ static inline void ext4_mb_show_pa(struc
+ 		struct ext4_prealloc_space *pa;
+ 		ext4_grpblk_t start;
+ 		struct list_head *cur;
++
++		if (!grp)
++			continue;
+ 		ext4_lock_group(sb, i);
+ 		list_for_each(cur, &grp->bb_prealloc_list) {
+ 			pa = list_entry(cur, struct ext4_prealloc_space,
+@@ -5906,6 +5942,7 @@ void ext4_free_blocks(handle_t *handle,
+ 	struct buffer_head *bitmap_bh = NULL;
+ 	struct super_block *sb = inode->i_sb;
+ 	struct ext4_group_desc *gdp;
++	struct ext4_group_info *grp;
+ 	unsigned int overflow;
+ 	ext4_grpblk_t bit;
+ 	struct buffer_head *gd_bh;
+@@ -5990,8 +6027,8 @@ do_more:
+ 	overflow = 0;
+ 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
+ 
+-	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
+-			ext4_get_group_info(sb, block_group))))
++	grp = ext4_get_group_info(sb, block_group);
++	if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+ 		return;
+ 
+ 	/*
+@@ -6537,6 +6574,8 @@ int ext4_trim_fs(struct super_block *sb,
+ 
+ 	for (group = first_group; group <= last_group; group++) {
+ 		grp = ext4_get_group_info(sb, group);
++		if (!grp)
++			continue;
+ 		/* We only do this if the grp has never been initialized */
+ 		if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+ 			ret = ext4_mb_init_group(sb, group, GFP_NOFS);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -967,6 +967,8 @@ void ext4_mark_group_bitmap_corrupted(st
+ 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ 	int ret;
+ 
++	if (!grp || !gdp)
++		return;
+ 	if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+ 		ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+ 					    &grp->bb_state);
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-avoiod-unnecessary-spreading-of-allocations.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-avoiod-unnecessary-spreading-of-allocations.patch
new file mode 100644
index 0000000..9bb5e2f
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-avoiod-unnecessary-spreading-of-allocations.patch
@@ -0,0 +1,69 @@
+commit 1940265ede6683f6317cba0d428ce6505eaca944
+Author:     Jan Kara <jack@suse.cz>
+AuthorDate: Thu Sep 8 11:21:25 2022 +0200
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Wed Sep 21 22:11:41 2022 -0400
+
+ext4: avoid unnecessary spreading of allocations among groups
+
+mb_set_largest_free_order() updates lists containing groups with largest
+chunk of free space of given order. The way it updates it leads to
+always moving the group to the tail of the list. Thus allocations
+looking for free space of given order effectively end up cycling through
+all groups (and due to initialization in last to first order). This
+spreads allocations among block groups which reduces performance for
+rotating disks or low-end flash media. Change
+mb_set_largest_free_order() to only update lists if the order of the
+largest free chunk in the group changed.
+
+Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
+CC: stable@kernel.org
+Reported-and-tested-by: Stefan Wahren <stefan.wahren@i2se.com>
+Tested-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/
+Link: https://lore.kernel.org/r/20220908092136.11770-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -1078,23 +1078,25 @@ mb_set_largest_free_order(struct super_b
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	int i;
+ 
+-	if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
++	for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
++		if (grp->bb_counters[i] > 0)
++			break;
++	/* No need to move between order lists? */
++	if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
++			i == grp->bb_largest_free_order) {
++		grp->bb_largest_free_order = i;
++		return;
++	}
++
++	if (grp->bb_largest_free_order >= 0) {
+ 		write_lock(&sbi->s_mb_largest_free_orders_locks[
+ 					grp->bb_largest_free_order]);
+ 		list_del_init(&grp->bb_largest_free_order_node);
+ 		write_unlock(&sbi->s_mb_largest_free_orders_locks[
+ 					grp->bb_largest_free_order]);
+ 	}
+-	grp->bb_largest_free_order = -1; /* uninit */
+-
+-	for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
+-		if (grp->bb_counters[i] > 0) {
+-			grp->bb_largest_free_order = i;
+-			break;
+-		}
+-	}
+-	if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
+-	    grp->bb_largest_free_order >= 0 && grp->bb_free) {
++	grp->bb_largest_free_order = i;
++	if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
+ 		write_lock(&sbi->s_mb_largest_free_orders_locks[
+ 					grp->bb_largest_free_order]);
+ 		list_add_tail(&grp->bb_largest_free_order_node,
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-drop-s_mb_bal_lock-convert-protected-fields-to-atomic.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-drop-s_mb_bal_lock-convert-protected-fields-to-atomic.patch
new file mode 100644
index 0000000..d029ee1
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-drop-s_mb_bal_lock-convert-protected-fields-to-atomic.patch
@@ -0,0 +1,102 @@
+commit 67d25186046145748d5fe4c5019d832215e01c1e
+Author:     Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+AuthorDate: Thu Apr 1 10:21:23 2021 -0700
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Fri Apr 9 11:34:58 2021 -0400
+
+ext4: drop s_mb_bal_lock and convert protected fields to atomic
+
+s_mb_buddies_generated gets used later in this patch series to
+determine if the cr 0 and cr 1 optimziations should be performed or
+not. Currently, s_mb_buddies_generated is protected under a
+spin_lock. In the allocation path, it is better if we don't depend on
+the lock and instead read the value atomically. In order to do that,
+we drop s_bal_lock altogether and we convert the only two protected
+fields by it s_mb_buddies_generated and s_mb_generation_time to atomic
+type.
+
+Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Reviewed-by: Ritesh Harjani <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20210401172129.189766-2-harshadshirwadkar@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -1522,9 +1522,8 @@ struct ext4_sb_info {
+ 	/* cX loop didn't find blocks */
+ 	atomic64_t s_bal_cX_failed[4];
+ 	atomic64_t s_bal_cX_skipped[3];
+-	spinlock_t s_bal_lock;
+-	unsigned long s_mb_buddies_generated;
+-	unsigned long long s_mb_generation_time;
++	atomic_t s_mb_buddies_generated;	/* number of buddies generated */
++	atomic64_t s_mb_generation_time;
+ 	atomic_t s_mb_lost_chunks;
+ 	atomic_t s_mb_preallocated;
+ 	atomic_t s_mb_discarded;
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -824,10 +824,8 @@ int ext4_mb_generate_buddy(struct super_
+ 	clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
+ 
+ 	period = get_cycles() - period;
+-	spin_lock(&sbi->s_bal_lock);
+-	sbi->s_mb_buddies_generated++;
+-	sbi->s_mb_generation_time += period;
+-	spin_unlock(&sbi->s_bal_lock);
++	atomic_inc(&sbi->s_mb_buddies_generated);
++	atomic64_add(period, &sbi->s_mb_generation_time);
+ 
+ 	return 0;
+ }
+@@ -2810,9 +2808,10 @@ static int mb_seq_alloc_show(struct seq_
+ 		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[1]));
+ 	seq_printf(seq, "\tskipped_c2_loops: %llu\n",
+ 		   (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[2]));
+-	seq_printf(seq, "\tbuddies_generated: %lu\n",
+-		   sbi->s_mb_buddies_generated);
+-	seq_printf(seq, "\tbuddies_time_used: %llu\n", sbi->s_mb_generation_time);
++	seq_printf(seq, "\tbuddies_generated: %u\n",
++		   atomic_read(&sbi->s_mb_buddies_generated));
++	seq_printf(seq, "\tbuddies_time_used: %llu\n",
++		   atomic64_read(&sbi->s_mb_generation_time));
+ 	seq_printf(seq, "\tpreallocated: %u\n",
+ 		   atomic_read(&sbi->s_mb_preallocated));
+ 	seq_printf(seq, "\tdiscarded: %u\n",
+@@ -2846,8 +2845,8 @@ static ssize_t mb_seq_alloc_write(struct
+ 	atomic64_set(&sbi->s_bal_cX_skipped[2], 0);
+ 
+ 
+-	sbi->s_mb_buddies_generated = 0;
+-	sbi->s_mb_generation_time = 0;
++	atomic_set(&sbi->s_mb_buddies_generated, 0);
++	atomic64_set(&sbi->s_mb_generation_time, 0);
+ 
+ 	atomic_set(&sbi->s_mb_preallocated, 0),
+ 	atomic_set(&sbi->s_mb_discarded, 0);
+@@ -3163,7 +3162,6 @@ int ext4_mb_init(struct super_block *sb)
+ 	} while (i <= sb->s_blocksize_bits + 1);
+ 
+ 	spin_lock_init(&sbi->s_md_lock);
+-	spin_lock_init(&sbi->s_bal_lock);
+ 	sbi->s_mb_free_pending = 0;
+ 	INIT_LIST_HEAD(&sbi->s_freed_data_list);
+ 
+@@ -3360,9 +3358,9 @@ int ext4_mb_release(struct super_block *
+ 				atomic_read(&sbi->s_bal_breaks),
+ 				atomic_read(&sbi->s_mb_lost_chunks));
+ 		ext4_msg(sb, KERN_INFO,
+-		       "mballoc: %lu generated and it took %Lu",
+-				sbi->s_mb_buddies_generated,
+-				sbi->s_mb_generation_time);
++		       "mballoc: %u generated and it took %Lu",
++				atomic_read(&sbi->s_mb_buddies_generated),
++				atomic64_read(&sbi->s_mb_generation_time));
+ 		ext4_msg(sb, KERN_INFO,
+ 		       "mballoc: %u preallocated, %u discarded",
+ 				atomic_read(&sbi->s_mb_preallocated),
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-ensure-ext4_mb_prefetch_fini-called-for-all-prefetched-bg.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-ensure-ext4_mb_prefetch_fini-called-for-all-prefetched-bg.patch
new file mode 100644
index 0000000..6f9604f
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-ensure-ext4_mb_prefetch_fini-called-for-all-prefetched-bg.patch
@@ -0,0 +1,85 @@
+commit 4f3d1e4533b0982034f316ace85415d3bc57e3da
+Author:     Ojaswin Mujoo <ojaswin@linux.ibm.com>
+AuthorDate: Tue May 30 18:03:47 2023 +0530
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Mon Jun 26 19:34:56 2023 -0400
+
+ext4: Ensure ext4_mb_prefetch_fini() is called for all prefetched BGs
+
+Before this patch, the call stack in ext4_run_li_request is as follows:
+
+  /*
+   * nr = no. of BGs we want to fetch (=s_mb_prefetch)
+   * prefetch_ios = no. of BGs not uptodate after
+   *                ext4_read_block_bitmap_nowait()
+   */
+  next_group = ext4_mb_prefetch(sb, group, nr, prefetch_ios);
+  ext4_mb_prefetch_fini(sb, next_group prefetch_ios);
+
+ext4_mb_prefetch_fini() will only try to initialize buddies for BGs in
+range [next_group - prefetch_ios, next_group). This is incorrect since
+sometimes (prefetch_ios < nr), which causes ext4_mb_prefetch_fini() to
+incorrectly ignore some of the BGs that might need initialization. This
+issue is more notable now with the previous patch enabling "fetching" of
+BLOCK_UNINIT BGs which are marked buffer_uptodate by default.
+
+Fix this by passing nr to ext4_mb_prefetch_fini() instead of
+prefetch_ios so that it considers the right range of groups.
+
+Similarly, make sure we don't pass nr=0 to ext4_mb_prefetch_fini() in
+ext4_mb_regular_allocator() since we might have prefetched BLOCK_UNINIT
+groups that would need buddy initialization.
+
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/05e648ae04ec5b754207032823e9c1de9a54f87a.1685449706.git.ojaswin@linux.ibm.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -2742,8 +2742,6 @@ repeat:
+ 			if ((prefetch_grp == group) &&
+ 			    (cr > 1 ||
+ 			     prefetch_ios < sbi->s_mb_prefetch_limit)) {
+-				unsigned int curr_ios = prefetch_ios;
+-
+ 				nr = sbi->s_mb_prefetch;
+ 				if (ext4_has_feature_flex_bg(sb)) {
+ 					nr = (group / sbi->s_mb_prefetch) *
+@@ -2752,8 +2750,6 @@ repeat:
+ 				}
+ 				prefetch_grp = ext4_mb_prefetch(sb, group,
+ 							nr, &prefetch_ios);
+-				if (prefetch_ios == curr_ios)
+-					nr = 0;
+ 			}
+ 
+ 			/* This now checks without needing the buddy page */
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -3284,16 +3284,13 @@ static int ext4_run_li_request(struct ex
+ 	ext4_group_t group = elr->lr_next_group;
+ 	unsigned int prefetch_ios = 0;
+ 	int ret = 0;
++	int nr = EXT4_SB(sb)->s_mb_prefetch;
+ 	u64 start_time;
+ 
+ 	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
+-		elr->lr_next_group = ext4_mb_prefetch(sb, group,
+-				EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
+-		if (prefetch_ios)
+-			ext4_mb_prefetch_fini(sb, elr->lr_next_group,
+-					      prefetch_ios);
+-		trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
+-					    prefetch_ios);
++		elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
++		ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
++		trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
+ 		if (group >= elr->lr_next_group) {
+ 			ret = 1;
+ 			if (elr->lr_first_not_zeroed != ngroups &&
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-fixup-possible-uninit-var-in-ext4_mb_choose_next_group_cr1.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-fixup-possible-uninit-var-in-ext4_mb_choose_next_group_cr1.patch
new file mode 100644
index 0000000..db7f3bf
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-fixup-possible-uninit-var-in-ext4_mb_choose_next_group_cr1.patch
@@ -0,0 +1,40 @@
+commit a078dff870136090b5779ca2831870a6c5539d36
+Author:     Jan Kara <jack@suse.cz>
+AuthorDate: Thu Sep 22 11:09:29 2022 +0200
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Mon Sep 26 13:21:05 2022 -0400
+
+ext4: fixup possible uninitialized variable access in ext4_mb_choose_next_group_cr1()
+
+Variable 'grp' may be left uninitialized if there's no group with
+suitable average fragment size (or larger). Fix the problem by
+initializing it earlier.
+
+Link: https://lore.kernel.org/r/20220922091542.pkhedytey7wzp5fi@quack3
+Fixes: 83e80a6e3543 ("ext4: use buckets for cr 1 block scan instead of rbtree")
+Cc: stable@kernel.org
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -911,7 +911,7 @@ static void ext4_mb_choose_next_group_cr
+ 		int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+-	struct ext4_group_info *grp, *iter;
++	struct ext4_group_info *grp = NULL, *iter;
+ 	int i;
+ 
+ 	if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
+@@ -928,7 +928,6 @@ static void ext4_mb_choose_next_group_cr
+ 			read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
+ 			continue;
+ 		}
+-		grp = NULL;
+ 		list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
+                                    bb_avg_fragment_size_node) {
+ 			if (sbi->s_mb_stats)
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-improve-cr0-cr1-group-scanning.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-improve-cr0-cr1-group-scanning.patch
new file mode 100644
index 0000000..8c2f5ca
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-improve-cr0-cr1-group-scanning.patch
@@ -0,0 +1,867 @@
+commit 196e402adf2e4cd66f101923409f1970ec5f1af3
+Author:     Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+AuthorDate: Thu Apr 1 10:21:27 2021 -0700
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Fri Apr 9 11:34:59 2021 -0400
+
+ext4: improve cr 0 / cr 1 group scanning
+
+Instead of traversing through groups linearly, scan groups in specific
+orders at cr 0 and cr 1. At cr 0, we want to find groups that have the
+largest free order >= the order of the request. So, with this patch,
+we maintain lists for each possible order and insert each group into a
+list based on the largest free order in its buddy bitmap. During cr 0
+allocation, we traverse these lists in the increasing order of largest
+free orders. This allows us to find a group with the best available cr
+0 match in constant time. If nothing can be found, we fallback to cr 1
+immediately.
+
+At CR1, the story is slightly different. We want to traverse in the
+order of increasing average fragment size. For CR1, we maintain a rb
+tree of groupinfos which is sorted by average fragment size. Instead
+of traversing linearly, at CR1, we traverse in the order of increasing
+average fragment size, starting at the most optimal group. This brings
+down cr 1 search complexity to log(num groups).
+
+For cr >= 2, we just perform the linear search as before. Also, in
+case of lock contention, we intermittently fallback to linear search
+even in CR 0 and CR 1 cases. This allows us to proceed during the
+allocation path even in case of high contention.
+
+There is an opportunity to do optimization at CR2 too. That's because
+at CR2 we only consider groups where bb_free counter (number of free
+blocks) is greater than the request extent size. That's left as future
+work.
+
+All the changes introduced in this patch are protected under a new
+mount option "mb_optimize_scan".
+
+With this patchset, following experiment was performed:
+
+Created a highly fragmented disk of size 65TB. The disk had no
+contiguous 2M regions. Following command was run consecutively for 3
+times:
+
+time dd if=/dev/urandom of=file bs=2M count=10
+
+Here are the results with and without cr 0/1 optimizations introduced
+in this patch:
+
+|---------+------------------------------+---------------------------|
+|         | Without CR 0/1 Optimizations | With CR 0/1 Optimizations |
+|---------+------------------------------+---------------------------|
+| 1st run | 5m1.871s                     | 2m47.642s                 |
+| 2nd run | 2m28.390s                    | 0m0.611s                  |
+| 3rd run | 2m26.530s                    | 0m1.255s                  |
+|---------+------------------------------+---------------------------|
+
+Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20210401172129.189766-6-harshadshirwadkar@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -151,6 +151,13 @@ enum SHIFT_DIRECTION {
+ #define EXT4_MB_USE_RESERVED		0x2000
+ /* Do strict check for free blocks while retrying block allocation */
+ #define EXT4_MB_STRICT_CHECK		0x4000
++
++/* Large fragment size list lookup succeeded at least once for cr = 0 */
++#define EXT4_MB_CR0_OPTIMIZED		0x8000
++/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
++#define EXT4_MB_CR1_OPTIMIZED		0x00010000
++/* Perform linear traversal for one group */
++#define EXT4_MB_SEARCH_NEXT_LINEAR	0x00020000
+ #define EXT4_MB_VERY_DENSE		0x80000
+ 
+ struct ext4_allocation_request {
+@@ -1199,6 +1206,8 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT2_DAX_NEVER		0x00000020 /* Do not allow Direct Access */
+ #define EXT4_MOUNT2_DAX_INODE		0x00000040 /* For printing options only */
+ 
++#define EXT4_MOUNT2_MB_OPTIMIZE_SCAN	0x00000080 /* Optimize group
++						      scanning in mballoc */
+ 
+ #define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
+ 						~EXT4_MOUNT_##opt
+@@ -1488,9 +1497,14 @@ struct ext4_sb_info {
+ 	unsigned int s_mb_free_pending;
+ 	struct list_head s_freed_data_list;	/* List of blocks to be freed
+ 						   after commit completed */
++	struct rb_root s_mb_avg_fragment_size_root;
++	rwlock_t s_mb_rb_lock;
++	struct list_head *s_mb_largest_free_orders;
++	rwlock_t *s_mb_largest_free_orders_locks;
+ 
+ 	/* tunables */
+ 	unsigned long s_stripe;
++	unsigned int s_mb_max_linear_groups;
+ 	unsigned long s_mb_small_req;
+ 	unsigned long s_mb_large_req;
+ 	unsigned int s_mb_max_to_scan;
+@@ -1520,6 +1534,8 @@ struct ext4_sb_info {
+ 	atomic_t s_bal_goals;	/* goal hits */
+ 	atomic_t s_bal_breaks;	/* too long searches */
+ 	atomic_t s_bal_2orders;	/* 2^order hits */
++	atomic_t s_bal_cr0_bad_suggestions;
++	atomic_t s_bal_cr1_bad_suggestions;
+ 	atomic64_t s_bal_cX_groups_considered[4];
+ 	atomic64_t s_bal_cX_hits[4];
+ 	atomic64_t s_bal_cX_failed[4];          /* cX loop didn't find blocks */
+@@ -3206,12 +3222,15 @@ struct ext4_group_info {
+ 	ext4_grpblk_t	bb_freed_since_trim; /* blocks freed since last trim */
+ 	ext4_grpblk_t	bb_fragments;	/* nr of freespace fragments */
+ 	ext4_grpblk_t	bb_largest_free_order;/* order of largest frag in BG */
++	ext4_group_t    bb_group;	/* Group number */
+ 	struct          list_head bb_prealloc_list;
+ 	unsigned long   bb_prealloc_nr;
+ #ifdef DOUBLE_CHECK
+ 	void            *bb_bitmap;
+ #endif
+ 	struct rw_semaphore alloc_sem;
++	struct rb_node  bb_avg_fragment_size_rb;
++	struct list_head bb_largest_free_order_node;
+ 	ext4_grpblk_t	bb_counters[];	/* Nr of free power-of-two-block
+ 					 * regions, index is order.
+ 					 * bb_counters[3] = 5 means
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -127,11 +127,50 @@
+  * the smallest multiple of the stripe value (sbi->s_stripe) which is
+  * greater than the default mb_group_prealloc.
+  *
++ * If "mb_optimize_scan" mount option is set, we maintain in memory group info
++ * structures in two data structures:
++ *
++ * 1) Array of largest free order lists (sbi->s_mb_largest_free_orders)
++ *
++ *    Locking: sbi->s_mb_largest_free_orders_locks(array of rw locks)
++ *
++ *    This is an array of lists where the index in the array represents the
++ *    largest free order in the buddy bitmap of the participating group infos of
++ *    that list. So, there are exactly MB_NUM_ORDERS(sb) (which means total
++ *    number of buddy bitmap orders possible) number of lists. Group-infos are
++ *    placed in appropriate lists.
++ *
++ * 2) Average fragment size rb tree (sbi->s_mb_avg_fragment_size_root)
++ *
++ *    Locking: sbi->s_mb_rb_lock (rwlock)
++ *
++ *    This is a red black tree consisting of group infos and the tree is sorted
++ *    by average fragment sizes (which is calculated as ext4_group_info->bb_free
++ *    / ext4_group_info->bb_fragments).
++ *
++ * When "mb_optimize_scan" mount option is set, mballoc consults the above data
++ * structures to decide the order in which groups are to be traversed for
++ * fulfilling an allocation request.
++ *
++ * At CR = 0, we look for groups which have the largest_free_order >= the order
++ * of the request. We directly look at the largest free order list in the data
++ * structure (1) above where largest_free_order = order of the request. If that
++ * list is empty, we look at remaining list in the increasing order of
++ * largest_free_order. This allows us to perform CR = 0 lookup in O(1) time.
++ *
++ * At CR = 1, we only consider groups where average fragment size > request
++ * size. So, we lookup a group which has average fragment size just above or
++ * equal to request size using our rb tree (data structure 2) in O(log N) time.
++ *
++ * If "mb_optimize_scan" mount option is not set, mballoc traverses groups in
++ * linear order which requires O(N) search time for each CR 0 and CR 1 phase.
++ *
+  * The regular allocator (using the buddy cache) supports a few tunables.
+  *
+  * /sys/fs/ext4/<partition>/mb_min_to_scan
+  * /sys/fs/ext4/<partition>/mb_max_to_scan
+  * /sys/fs/ext4/<partition>/mb_order2_req
++ * /sys/fs/ext4/<partition>/mb_linear_limit
+  *
+  * The regular allocator uses buddy scan only if the request len is power of
+  * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
+@@ -149,6 +188,16 @@
+  * can be used for allocation. ext4_mb_good_group explains how the groups are
+  * checked.
+  *
++ * When "mb_optimize_scan" is turned on, as mentioned above, the groups may not
++ * get traversed linearly. That may result in subsequent allocations being not
++ * close to each other. And so, the underlying device may get filled up in a
++ * non-linear fashion. While that may not matter on non-rotational devices, for
++ * rotational devices that may result in higher seek times. "mb_linear_limit"
++ * tells mballoc how many groups mballoc should search linearly before
++ * performing consulting above data structures for more efficient lookups. For
++ * non rotational devices, this value defaults to 0 and for rotational devices
++ * this is set to MB_DEFAULT_LINEAR_LIMIT.
++ *
+  * Both the prealloc space are getting populated as above. So for the first
+  * request we will hit the buddy cache which will result in this prealloc
+  * space getting filled. The prealloc space is then later used for the
+@@ -299,6 +348,8 @@
+  *  - bitlock on a group	(group)
+  *  - object (inode/locality)	(object)
+  *  - per-pa lock		(pa)
++ *  - cr0 lists lock		(cr0)
++ *  - cr1 tree lock		(cr1)
+  *
+  * Paths:
+  *  - new pa
+@@ -328,6 +379,9 @@
+  *    group
+  *        object
+  *
++ *  - allocation path (ext4_mb_regular_allocator)
++ *    group
++ *    cr0/cr1
+  */
+ static struct kmem_cache *ext4_pspace_cachep;
+ static struct kmem_cache *ext4_ac_cachep;
+@@ -351,6 +405,8 @@ static void ext4_mb_generate_from_freeli
+ 						ext4_group_t group);
+ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
+ 
++static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
++			       ext4_group_t group, int cr);
+ /*
+  * The algorithm using this percpu seq counter goes below:
+  * 1. We sample the percpu discard_pa_seq counter before trying for block
+@@ -747,6 +803,269 @@ static void ext4_mb_mark_free_simple(str
+ 	}
+ }
+ 
++static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
++			int (*cmp)(struct rb_node *, struct rb_node *))
++{
++	struct rb_node **iter = &root->rb_node, *parent = NULL;
++
++	while (*iter) {
++		parent = *iter;
++		if (cmp(new, *iter) > 0)
++			iter = &((*iter)->rb_left);
++		else
++			iter = &((*iter)->rb_right);
++	}
++
++	rb_link_node(new, parent, iter);
++	rb_insert_color(new, root);
++}
++
++static int
++ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
++{
++	struct ext4_group_info *grp1 = rb_entry(rb1,
++						struct ext4_group_info,
++						bb_avg_fragment_size_rb);
++	struct ext4_group_info *grp2 = rb_entry(rb2,
++						struct ext4_group_info,
++						bb_avg_fragment_size_rb);
++	int num_frags_1, num_frags_2;
++
++	num_frags_1 = grp1->bb_fragments ?
++		grp1->bb_free / grp1->bb_fragments : 0;
++	num_frags_2 = grp2->bb_fragments ?
++		grp2->bb_free / grp2->bb_fragments : 0;
++
++	return (num_frags_2 - num_frags_1);
++}
++
++/*
++ * Reinsert grpinfo into the avg_fragment_size tree with new average
++ * fragment size.
++ */
++static void
++mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
++{
++	struct ext4_sb_info *sbi = EXT4_SB(sb);
++
++	if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
++		return;
++
++	write_lock(&sbi->s_mb_rb_lock);
++	if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
++		rb_erase(&grp->bb_avg_fragment_size_rb,
++				&sbi->s_mb_avg_fragment_size_root);
++		RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
++	}
++
++	ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
++		&grp->bb_avg_fragment_size_rb,
++		ext4_mb_avg_fragment_size_cmp);
++	write_unlock(&sbi->s_mb_rb_lock);
++}
++
++/*
++ * Choose next group by traversing largest_free_order lists. Updates *new_cr if
++ * cr level needs an update.
++ */
++static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
++			int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
++{
++	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
++	struct ext4_group_info *iter, *grp;
++	int i;
++
++	if (ac->ac_status == AC_STATUS_FOUND)
++		return;
++
++	if (unlikely(sbi->s_mb_stats && ac->ac_flags & EXT4_MB_CR0_OPTIMIZED))
++		atomic_inc(&sbi->s_bal_cr0_bad_suggestions);
++
++	grp = NULL;
++	for (i = ac->ac_2order; i < MB_NUM_ORDERS(ac->ac_sb); i++) {
++		if (list_empty(&sbi->s_mb_largest_free_orders[i]))
++			continue;
++		read_lock(&sbi->s_mb_largest_free_orders_locks[i]);
++		if (list_empty(&sbi->s_mb_largest_free_orders[i])) {
++			read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
++			continue;
++		}
++		grp = NULL;
++		list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
++				    bb_largest_free_order_node) {
++			if (sbi->s_mb_stats)
++				atomic64_inc(&sbi->s_bal_cX_groups_considered[0]);
++			if (likely(ext4_mb_good_group(ac, iter->bb_group, 0))) {
++				grp = iter;
++				break;
++			}
++		}
++		read_unlock(&sbi->s_mb_largest_free_orders_locks[i]);
++		if (grp)
++			break;
++	}
++
++	if (!grp) {
++		/* Increment cr and search again */
++		*new_cr = 1;
++	} else {
++		*group = grp->bb_group;
++		ac->ac_last_optimal_group = *group;
++		ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
++	}
++}
++
++/*
++ * Choose next group by traversing average fragment size tree. Updates *new_cr
++ * if cr lvel needs an update. Sets EXT4_MB_SEARCH_NEXT_LINEAR to indicate that
++ * the linear search should continue for one iteration since there's lock
++ * contention on the rb tree lock.
++ */
++static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
++		int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
++{
++	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
++	int avg_fragment_size, best_so_far;
++	struct rb_node *node, *found;
++	struct ext4_group_info *grp;
++
++	/*
++	 * If there is contention on the lock, instead of waiting for the lock
++	 * to become available, just continue searching lineraly. We'll resume
++	 * our rb tree search later starting at ac->ac_last_optimal_group.
++	 */
++	if (!read_trylock(&sbi->s_mb_rb_lock)) {
++		ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
++		return;
++	}
++
++	if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
++		if (sbi->s_mb_stats)
++			atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
++		/* We have found something at CR 1 in the past */
++		grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
++		for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
++		     found = rb_next(found)) {
++			grp = rb_entry(found, struct ext4_group_info,
++				       bb_avg_fragment_size_rb);
++			if (sbi->s_mb_stats)
++				atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
++			if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
++				break;
++		}
++		goto done;
++	}
++
++	node = sbi->s_mb_avg_fragment_size_root.rb_node;
++	best_so_far = 0;
++	found = NULL;
++
++	while (node) {
++		grp = rb_entry(node, struct ext4_group_info,
++			       bb_avg_fragment_size_rb);
++		avg_fragment_size = 0;
++		if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
++			avg_fragment_size = grp->bb_fragments ?
++				grp->bb_free / grp->bb_fragments : 0;
++			if (!best_so_far || avg_fragment_size < best_so_far) {
++				best_so_far = avg_fragment_size;
++				found = node;
++			}
++		}
++		if (avg_fragment_size > ac->ac_g_ex.fe_len)
++			node = node->rb_right;
++		else
++			node = node->rb_left;
++	}
++
++done:
++	if (found) {
++		grp = rb_entry(found, struct ext4_group_info,
++			       bb_avg_fragment_size_rb);
++		*group = grp->bb_group;
++		ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
++	} else {
++		*new_cr = 2;
++	}
++
++	read_unlock(&sbi->s_mb_rb_lock);
++	ac->ac_last_optimal_group = *group;
++}
++
++static inline int should_optimize_scan(struct ext4_allocation_context *ac)
++{
++	if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN)))
++		return 0;
++	if (ac->ac_criteria >= 2)
++		return 0;
++	if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
++		return 0;
++	return 1;
++}
++
++/*
++ * Return next linear group for allocation. If linear traversal should not be
++ * performed, this function just returns the same group
++ */
++static int
++next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
++{
++	if (!should_optimize_scan(ac))
++		goto inc_and_return;
++
++	if (ac->ac_groups_linear_remaining) {
++		ac->ac_groups_linear_remaining--;
++		goto inc_and_return;
++	}
++
++	if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
++		ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
++		goto inc_and_return;
++	}
++
++	return group;
++inc_and_return:
++	/*
++	 * Artificially restricted ngroups for non-extent
++	 * files makes group > ngroups possible on first loop.
++	 */
++	return group + 1 >= ngroups ? 0 : group + 1;
++}
++
++/*
++ * ext4_mb_choose_next_group: choose next group for allocation.
++ *
++ * @ac        Allocation Context
++ * @new_cr    This is an output parameter. If the there is no good group
++ *            available at current CR level, this field is updated to indicate
++ *            the new cr level that should be used.
++ * @group     This is an input / output parameter. As an input it indicates the
++ *            next group that the allocator intends to use for allocation. As
++ *            output, this field indicates the next group that should be used as
++ *            determined by the optimization functions.
++ * @@ngroups   Total number of groups
++ */
++static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
++		int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
++{
++	*new_cr = ac->ac_criteria;
++
++	if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
++		return;
++
++	if (*new_cr == 0) {
++		ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
++	} else if (*new_cr == 1) {
++		ext4_mb_choose_next_group_cr1(ac, new_cr, group, ngroups);
++	} else {
++		/*
++		 * TODO: For CR=2, we can arrange groups in an rb tree sorted by
++		 * bb_free. But until that happens, we should never come here.
++		 */
++		WARN_ON(1);
++	}
++}
++
+ /*
+  * Cache the order of the largest free extent we have available in this block
+  * group.
+@@ -754,18 +1073,33 @@ static void ext4_mb_mark_free_simple(str
+ static void
+ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
+ {
++	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	int i;
+-	int bits;
+ 
++	if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
++		write_lock(&sbi->s_mb_largest_free_orders_locks[
++					grp->bb_largest_free_order]);
++		list_del_init(&grp->bb_largest_free_order_node);
++		write_unlock(&sbi->s_mb_largest_free_orders_locks[
++					grp->bb_largest_free_order]);
++	}
+ 	grp->bb_largest_free_order = -1; /* uninit */
+ 
+-	bits = MB_NUM_ORDERS(sb) - 1;
+-	for (i = bits; i >= 0; i--) {
++	for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
+ 		if (grp->bb_counters[i] > 0) {
+ 			grp->bb_largest_free_order = i;
+ 			break;
+ 		}
+ 	}
++	if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
++	    grp->bb_largest_free_order >= 0 && grp->bb_free) {
++		write_lock(&sbi->s_mb_largest_free_orders_locks[
++					grp->bb_largest_free_order]);
++		list_add_tail(&grp->bb_largest_free_order_node,
++		      &sbi->s_mb_largest_free_orders[grp->bb_largest_free_order]);
++		write_unlock(&sbi->s_mb_largest_free_orders_locks[
++					grp->bb_largest_free_order]);
++	}
+ }
+ 
+ static noinline_for_stack
+@@ -826,6 +1160,7 @@ int ext4_mb_generate_buddy(struct super_
+ 	period = get_cycles() - period;
+ 	atomic_inc(&sbi->s_mb_buddies_generated);
+ 	atomic64_add(period, &sbi->s_mb_generation_time);
++	mb_update_avg_fragment_size(sb, grp);
+ 
+ 	return 0;
+ }
+@@ -1556,6 +1891,7 @@ static void mb_free_blocks(struct inode
+ 
+ done:
+ 	mb_set_largest_free_order(sb, e4b->bd_info);
++	mb_update_avg_fragment_size(sb, e4b->bd_info);
+ 	mb_check_buddy(e4b);
+ }
+ 
+@@ -1693,6 +2029,7 @@ static int mb_mark_used(struct ext4_budd
+ 	}
+ 	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
+ 
++	mb_update_avg_fragment_size(e4b->bd_sb, e4b->bd_info);
+ 	ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
+ 	mb_check_buddy(e4b);
+ 
+@@ -2415,17 +2752,21 @@ repeat:
+ 		 * from the goal value specified
+ 		 */
+ 		group = ac->ac_g_ex.fe_group;
++		ac->ac_last_optimal_group = group;
++		ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
+ 		prefetch_grp = group;
+ 
+-		for (i = 0; i < ngroups; group++, i++) {
+-			int ret = 0;
++		for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
++				i++) {
++			int ret = 0, new_cr;
++
+ 			cond_resched();
+-			/*
+-			 * Artificially restricted ngroups for non-extent
+-			 * files makes group > ngroups possible on first loop.
+-			 */
+-			if (group >= ngroups)
+-				group = 0;
++
++			ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
++			if (new_cr != cr) {
++				cr = new_cr;
++				goto repeat;
++			}
+ 
+ 			/*
+ 			 * Batch reads of the block allocation bitmaps
+@@ -2651,6 +2992,8 @@ int ext4_seq_mb_stats_show(struct seq_fi
+ 		   atomic64_read(&sbi->s_bal_cX_groups_considered[0]));
+ 	seq_printf(seq, "\t\tuseless_loops: %llu\n",
+ 		   atomic64_read(&sbi->s_bal_cX_failed[0]));
++	seq_printf(seq, "\t\tbad_suggestions: %u\n",
++		   atomic_read(&sbi->s_bal_cr0_bad_suggestions));
+ 
+ 	seq_puts(seq, "\tcr1_stats:\n");
+ 	seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1]));
+@@ -2658,6 +3001,8 @@ int ext4_seq_mb_stats_show(struct seq_fi
+ 		   atomic64_read(&sbi->s_bal_cX_groups_considered[1]));
+ 	seq_printf(seq, "\t\tuseless_loops: %llu\n",
+ 		   atomic64_read(&sbi->s_bal_cX_failed[1]));
++	seq_printf(seq, "\t\tbad_suggestions: %u\n",
++		   atomic_read(&sbi->s_bal_cr1_bad_suggestions));
+ 
+ 	seq_puts(seq, "\tcr2_stats:\n");
+ 	seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2]));
+@@ -2945,8 +3290,11 @@ int ext4_mb_add_groupinfo(struct super_b
+ 	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+ 	init_rwsem(&meta_group_info[i]->alloc_sem);
+ 	meta_group_info[i]->bb_free_root = RB_ROOT;
++	INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
++	RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
+ 	meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
+ 	meta_group_info[i]->bb_freed_since_trim = 0;
++	meta_group_info[i]->bb_group = group;
+ 
+ 	mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
+ 	return 0;
+@@ -3137,6 +3485,27 @@ int ext4_mb_init(struct super_block *sb)
+ 		i++;
+ 	} while (i < MB_NUM_ORDERS(sb));
+ 
++	sbi->s_mb_avg_fragment_size_root = RB_ROOT;
++	sbi->s_mb_largest_free_orders =
++		kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
++			GFP_KERNEL);
++	if (!sbi->s_mb_largest_free_orders) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	sbi->s_mb_largest_free_orders_locks =
++		kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
++			GFP_KERNEL);
++	if (!sbi->s_mb_largest_free_orders_locks) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
++		INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
++		rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
++	}
++	rwlock_init(&sbi->s_mb_rb_lock);
++
+ 	spin_lock_init(&sbi->s_md_lock);
+ 	sbi->s_mb_free_pending = 0;
+ 	INIT_LIST_HEAD(&sbi->s_freed_data_list);
+@@ -3236,6 +3605,10 @@ int ext4_mb_init(struct super_block *sb)
+ 		spin_lock_init(&lg->lg_prealloc_lock);
+ 	}
+ 
++	if (blk_queue_nonrot(bdev_get_queue(sb->s_bdev)))
++		sbi->s_mb_max_linear_groups = 0;
++	else
++		sbi->s_mb_max_linear_groups = MB_DEFAULT_LINEAR_LIMIT;
+ 	/* init file for buddy data */
+ 	ret = ext4_mb_init_backend(sb);
+ 	if (ret != 0)
+@@ -3247,6 +3620,8 @@ out_free_locality_groups:
+ 	free_percpu(sbi->s_locality_groups);
+ 	sbi->s_locality_groups = NULL;
+ out:
++	kfree(sbi->s_mb_largest_free_orders);
++	kfree(sbi->s_mb_largest_free_orders_locks);
+ 	kfree(sbi->s_mb_prealloc_table);
+ 	kfree(sbi->s_mb_offsets);
+ 	sbi->s_mb_offsets = NULL;
+@@ -3304,6 +3679,8 @@ int ext4_mb_release(struct super_block *
+ 		kvfree(group_info);
+ 		rcu_read_unlock();
+ 	}
++	kfree(sbi->s_mb_largest_free_orders);
++	kfree(sbi->s_mb_largest_free_orders_locks);
+ 	kfree(sbi->s_mb_prealloc_table);
+ 	kfree(sbi->s_mb_offsets);
+ 	kfree(sbi->s_mb_maxs);
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -83,6 +83,18 @@
+ #define MB_DEFAULT_MAX_INODE_PREALLOC	512
+ 
+ /*
++ * Number of groups to search linearly before performing group scanning
++ * optimization.
++ */
++#define MB_DEFAULT_LINEAR_LIMIT			4
++
++/*
++ * Minimum number of groups that should be present in the file system to perform
++ * group scanning optimizations.
++ */
++#define MB_DEFAULT_LINEAR_SCAN_THRESHOLD	16
++
++/*
+  * Number of valid buddy orders
+  */
+ #define MB_NUM_ORDERS(sb)		((sb)->s_blocksize_bits + 2)
+@@ -173,11 +185,14 @@ struct ext4_allocation_context {
+ 	/* copy of the best found extent taken before preallocation efforts */
+ 	struct ext4_free_extent ac_f_ex;
+ 
++	ext4_group_t ac_last_optimal_group;
++	__u32 ac_groups_considered;
++	__u32 ac_flags;		/* allocation hints */
+ 	__u16 ac_groups_scanned;
++	__u16 ac_groups_linear_remaining;
+ 	__u16 ac_found;
+ 	__u16 ac_tail;
+ 	__u16 ac_buddy;
+-	__u32 ac_flags;		/* allocation hints */
+ 	__u8 ac_status;
+ 	__u8 ac_criteria;
+ 	__u8 ac_2order;		/* if request is to allocate 2^N blocks and
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -1623,7 +1623,7 @@ enum {
+ 	Opt_mb_c1_threshold, Opt_mb_c2_threshold, Opt_mb_c3_threshold,
+ 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+ 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+-	Opt_prefetch_block_bitmaps,
++	Opt_prefetch_block_bitmaps, Opt_mb_optimize_scan,
+ };
+ 
+ static const match_table_t tokens = {
+@@ -1718,6 +1718,7 @@ static const match_table_t tokens = {
+ 	{Opt_nombcache, "nombcache"},
+ 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
+ 	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
++	{Opt_mb_optimize_scan, "mb_optimize_scan=%d"},
+ 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
+ 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
+ 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
+@@ -1750,6 +1751,8 @@ static ext4_fsblk_t get_sb_block(void **
+ }
+ 
+ #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
++#define DEFAULT_MB_OPTIMIZE_SCAN	(-1)
++
+ static const char deprecated_msg[] =
+ 	"Mount option \"%s\" will be removed by %s\n"
+ 	"Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
+@@ -1942,12 +1945,14 @@ static const struct mount_opts {
+ 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+ 	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
+ 	 MOPT_SET},
++	{Opt_mb_optimize_scan, EXT4_MOUNT2_MB_OPTIMIZE_SCAN, MOPT_GTE0},
+ 	{Opt_err, 0, 0}
+ };
+ 
+ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
+ 			    substring_t *args, unsigned long *journal_devnum,
+-			    unsigned int *journal_ioprio, int is_remount)
++			    unsigned int *journal_ioprio, int *mb_optimize_scan,
++			    int is_remount)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	const struct mount_opts *m;
+@@ -2242,6 +2247,14 @@ static int handle_mount_opt(struct super
+ 		sbi->s_mount_opt |= m->mount_opt;
+ 	} else if (token == Opt_data_err_ignore) {
+ 		sbi->s_mount_opt &= ~m->mount_opt;
++	} else if (token == Opt_mb_optimize_scan) {
++		if (arg != 0 && arg != 1) {
++			ext4_msg(sb, KERN_WARNING,
++				 "mb_optimize_scan should be set to 0 or 1.");
++			return -1;
++		}
++		if (mb_optimize_scan)
++			*mb_optimize_scan = arg;
+ 	} else {
+ 		if (!args->from)
+ 			arg = 1;
+@@ -2264,6 +2277,7 @@ static int handle_mount_opt(struct super
+ static int parse_options(char *options, struct super_block *sb,
+ 			 unsigned long *journal_devnum,
+ 			 unsigned int *journal_ioprio,
++			 int *mb_optimize_scan,
+ 			 int is_remount)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+@@ -2284,7 +2298,8 @@ static int parse_options(char *options,
+ 		args[0].to = args[0].from = NULL;
+ 		token = match_token(p, tokens, args);
+ 		if (handle_mount_opt(sb, p, token, args, journal_devnum,
+-				     journal_ioprio, is_remount) < 0)
++				     journal_ioprio, mb_optimize_scan,
++				     is_remount) < 0)
+ 			return 0;
+ 	}
+ #ifdef CONFIG_QUOTA
+@@ -3859,6 +3874,7 @@ static int ext4_fill_super(struct super_
+ 	__u64 blocks_count;
+ 	int err = 0;
+ 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
++	int mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN;
+ 	ext4_group_t first_not_zeroed;
+ 
+ 	if ((data && !orig_data) || !sbi)
+@@ -4092,7 +4108,7 @@ static int ext4_fill_super(struct super_
+ 		if (!s_mount_opts)
+ 			goto failed_mount;
+ 		if (!parse_options(s_mount_opts, sb, &journal_devnum,
+-				   &journal_ioprio, 0)) {
++				   &journal_ioprio, &mb_optimize_scan, 0)) {
+ 			ext4_msg(sb, KERN_WARNING,
+ 				 "failed to parse options in superblock: %s",
+ 				 s_mount_opts);
+@@ -4101,7 +4117,7 @@ static int ext4_fill_super(struct super_
+ 	}
+ 	sbi->s_def_mount_opt = sbi->s_mount_opt;
+ 	if (!parse_options((char *) data, sb, &journal_devnum,
+-			   &journal_ioprio, 0))
++			   &journal_ioprio, &mb_optimize_scan, 0))
+ 		goto failed_mount;
+ 
+ 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+@@ -4763,6 +4779,14 @@ no_journal:
+ 	}
+ 
+ 	ext4_ext_init(sb);
++
++	if (mb_optimize_scan == 1)
++		set_opt2(sb, MB_OPTIMIZE_SCAN);
++	else if (mb_optimize_scan == 0)
++		clear_opt2(sb, MB_OPTIMIZE_SCAN);
++	else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
++		set_opt2(sb, MB_OPTIMIZE_SCAN);
++
+ 	err = ext4_mb_init(sb);
+ 	if (err) {
+ 		ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
+@@ -5593,7 +5617,7 @@ static int ext4_remount(struct super_blo
+ 	vfs_flags = SB_LAZYTIME | SB_I_VERSION;
+ 	sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags);
+ 
+-	if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
++	if (!parse_options(data, sb, NULL, &journal_ioprio, NULL, 1)) {
+ 		err = -EINVAL;
+ 		goto restore_opts;
+ 	}
+Index: linux-stage/fs/ext4/sysfs.c
+===================================================================
+--- linux-stage.orig/fs/ext4/sysfs.c
++++ linux-stage/fs/ext4/sysfs.c
+@@ -223,6 +223,7 @@ EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_s
+ EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req);
+ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+ EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc);
++EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
+ EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
+ EXT4_RW_ATTR_SBI_UI(bg_trimmed_threshold, s_bg_trimmed_threshold);
+ EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
+@@ -263,6 +264,7 @@ static struct attribute *ext4_attrs[] =
+ 	ATTR_LIST(mb_large_req),
+ 	ATTR_LIST(mb_group_prealloc),
+ 	ATTR_LIST(mb_max_inode_prealloc),
++	ATTR_LIST(mb_max_linear_groups),
+ 	ATTR_LIST(max_writeback_mb_bump),
+ 	ATTR_LIST(extent_max_zeroout_kb),
+ 	ATTR_LIST(bg_trimmed_threshold),
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-limit-number-of-retries-after-discard-prealloc-blocks.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-limit-number-of-retries-after-discard-prealloc-blocks.patch
new file mode 100644
index 0000000..a5e4d92
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-limit-number-of-retries-after-discard-prealloc-blocks.patch
@@ -0,0 +1,71 @@
+commit 80fa46d6b9e7b1527bfd2197d75431fd9c382161
+Author:     Theodore Ts'o <tytso@mit.edu>
+AuthorDate: Thu Sep 1 18:03:14 2022 -0400
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Thu Sep 22 10:51:19 2022 -0400
+
+ext4: limit the number of retries after discarding preallocations blocks
+
+This patch avoids threads live-locking for hours when a large number
+threads are competing over the last few free extents as they blocks
+getting added and removed from preallocation pools.  From our bug
+reporter:
+
+   A reliable way for triggering this has multiple writers
+   continuously write() to files when the filesystem is full, while
+   small amounts of space are freed (e.g. by truncating a large file
+   -1MiB at a time). In the local filesystem, this can be done by
+   simply not checking the return code of write (0) and/or the error
+   (ENOSPACE) that is set. Over NFS with an async mount, even clients
+   with proper error checking will behave this way since the linux NFS
+   client implementation will not propagate the server errors [the
+   write syscalls immediately return success] until the file handle is
+   closed. This leads to a situation where NFS clients send a
+   continuous stream of WRITE rpcs which result in ERRNOSPACE -- but
+   since the client isn't seeing this, the stream of writes continues
+   at maximum network speed.
+
+   When some space does appear, multiple writers will all attempt to
+   claim it for their current write. For NFS, we may see dozens to
+   hundreds of threads that do this.
+
+   The real-world scenario of this is database backup tooling (in
+   particular, github.com/mdkent/percona-xtrabackup) which may write
+   large files (>1TiB) to NFS for safe keeping. Some temporary files
+   are written, rewound, and read back -- all before closing the file
+   handle (the temp file is actually unlinked, to trigger automatic
+   deletion on close/crash.) An application like this operating on an
+   async NFS mount will not see an error code until TiB have been
+   written/read.
+
+   The lockup was observed when running this database backup on large
+   filesystems (64 TiB in this case) with a high number of block
+   groups and no free space. Fragmentation is generally not a factor
+   in this filesystem (~thousands of large files, mostly contiguous
+   except for the parts written while the filesystem is at capacity.)
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -5623,6 +5623,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ 	ext4_fsblk_t block = 0;
+ 	unsigned int inquota = 0;
+ 	unsigned int reserv_clstrs = 0;
++	int retries = 0;
+ 	u64 seq;
+ 
+ 	might_sleep();
+@@ -5723,7 +5724,8 @@ repeat:
+ 			ar->len = ac->ac_b_ex.fe_len;
+ 		}
+ 	} else {
+-		if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
++		if (++retries < 3 &&
++		    ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
+ 			goto repeat;
+ 		/*
+ 		 * If block allocation fails then the pa allocated above
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-mb_optimize_scan-performance-with-extents.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-mb_optimize_scan-performance-with-extents.patch
new file mode 100644
index 0000000..b4e9869
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-mb_optimize_scan-performance-with-extents.patch
@@ -0,0 +1,118 @@
+commit 077d0c2c78df6f7260cdd015a991327efa44d8ad
+Author:     Ojaswin Mujoo <ojaswin@linux.ibm.com>
+AuthorDate: Tue Mar 8 15:22:01 2022 +0530
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Sat Mar 12 20:54:21 2022 -0500
+
+ext4: make mb_optimize_scan performance mount option work with extents
+
+Currently mb_optimize_scan scan feature which improves filesystem
+performance heavily (when FS is fragmented), seems to be not working
+with files with extents (ext4 by default has files with extents).
+
+This patch fixes that and makes mb_optimize_scan feature work
+for files with extents.
+
+Below are some performance numbers obtained when allocating a 10M and 100M
+file with and w/o this patch on a filesytem with no 1M contiguous block.
+
+<perf numbers>
+===============
+Workload: dd if=/dev/urandom of=test conv=fsync bs=1M count=10/100
+
+Time taken
+=====================================================
+no.     Size   without-patch     with-patch    Diff(%)
+1       10M      0m8.401s         0m5.623s     33.06%
+2       100M     1m40.465s        1m14.737s    25.6%
+
+<debug stats>
+=============
+w/o patch:
+  mballoc:
+    reqs: 17056
+    success: 11407
+    groups_scanned: 13643
+    cr0_stats:
+            hits: 37
+            groups_considered: 9472
+            useless_loops: 36
+            bad_suggestions: 0
+    cr1_stats:
+            hits: 11418
+            groups_considered: 908560
+            useless_loops: 1894
+            bad_suggestions: 0
+    cr2_stats:
+            hits: 1873
+            groups_considered: 6913
+            useless_loops: 21
+    cr3_stats:
+            hits: 21
+            groups_considered: 5040
+            useless_loops: 21
+    extents_scanned: 417364
+            goal_hits: 3707
+            2^n_hits: 37
+            breaks: 1873
+            lost: 0
+    buddies_generated: 239/240
+    buddies_time_used: 651080
+    preallocated: 705
+    discarded: 478
+
+with patch:
+  mballoc:
+    reqs: 12768
+    success: 11305
+    groups_scanned: 12768
+    cr0_stats:
+            hits: 1
+            groups_considered: 18
+            useless_loops: 0
+            bad_suggestions: 0
+    cr1_stats:
+            hits: 5829
+            groups_considered: 50626
+            useless_loops: 0
+            bad_suggestions: 0
+    cr2_stats:
+            hits: 6938
+            groups_considered: 580363
+            useless_loops: 0
+    cr3_stats:
+            hits: 0
+            groups_considered: 0
+            useless_loops: 0
+    extents_scanned: 309059
+            goal_hits: 0
+            2^n_hits: 1
+            breaks: 1463
+            lost: 0
+    buddies_generated: 239/240
+    buddies_time_used: 791392
+    preallocated: 673
+    discarded: 446
+
+Fixes: 196e402 (ext4: improve cr 0 / cr 1 group scanning)
+Cc: stable@kernel.org
+Reported-by: Geetika Moolchandani <Geetika.Moolchandani1@ibm.com>
+Reported-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
+Suggested-by: Ritesh Harjani <riteshh@linux.ibm.com>
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Link: https://lore.kernel.org/r/fc9a48f7f8dcfc83891a8b21f6dd8cdf056ed810.1646732698.git.ojaswin@linux.ibm.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -998,7 +998,7 @@ static inline int should_optimize_scan(s
+ 		return 0;
+ 	if (ac->ac_criteria >= 2)
+ 		return 0;
+-	if (ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
++	if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
+ 		return 0;
+ 	return 1;
+ }
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-mballoc-try-target-group-first.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-mballoc-try-target-group-first.patch
new file mode 100644
index 0000000..717c1f6
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-mballoc-try-target-group-first.patch
@@ -0,0 +1,86 @@
+commit 4fca50d440cc5d4dc570ad5484cc0b70b381bc2a
+Author:     Jan Kara <jack@suse.cz>
+AuthorDate: Thu Sep 8 11:21:24 2022 +0200
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Wed Sep 21 22:11:34 2022 -0400
+
+ext4: make mballoc try target group first even with mb_optimize_scan
+
+One of the side-effects of mb_optimize_scan was that the optimized
+functions to select next group to try were called even before we tried
+the goal group. As a result we no longer allocate files close to
+corresponding inodes as well as we don't try to expand currently
+allocated extent in the same group. This results in reaim regression
+with workfile.disk workload of upto 8% with many clients on my test
+machine:
+
+		 baseline               mb_optimize_scan
+Hmean     disk-1       2114.16 (   0.00%)     2099.37 (  -0.70%)
+Hmean     disk-41     87794.43 (   0.00%)    83787.47 *  -4.56%*
+Hmean     disk-81    148170.73 (   0.00%)   135527.05 *  -8.53%*
+Hmean     disk-121   177506.11 (   0.00%)   166284.93 *  -6.32%*
+Hmean     disk-161   220951.51 (   0.00%)   207563.39 *  -6.06%*
+Hmean     disk-201   208722.74 (   0.00%)   203235.59 (  -2.63%)
+Hmean     disk-241   222051.60 (   0.00%)   217705.51 (  -1.96%)
+Hmean     disk-281   252244.17 (   0.00%)   241132.72 *  -4.41%*
+Hmean     disk-321   255844.84 (   0.00%)   245412.84 *  -4.08%*
+
+Also this is causing huge regression (time increased by a factor of 5 or
+so) when untarring archive with lots of small files on some eMMC storage
+cards.
+
+Fix the problem by making sure we try goal group first.
+
+Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
+CC: stable@kernel.org
+Reported-and-tested-by: Stefan Wahren <stefan.wahren@i2se.com>
+Tested-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/all/20220727105123.ckwrhbilzrxqpt24@quack3/
+Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220908092136.11770-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -1050,8 +1050,10 @@ static void ext4_mb_choose_next_group(st
+ {
+ 	*new_cr = ac->ac_criteria;
+ 
+-	if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
++	if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
++		*group = next_linear_group(ac, *group, ngroups);
+ 		return;
++	}
+ 
+ 	if (*new_cr == 0) {
+ 		ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
+@@ -2666,7 +2668,7 @@ static noinline_for_stack int
+ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+ {
+ 	ext4_group_t prefetch_grp = 0, ngroups, group, i;
+-	int cr = -1;
++	int cr = -1, new_cr;
+ 	int err = 0, first_err = 0;
+ 	unsigned int nr = 0, prefetch_ios = 0;
+ 	struct ext4_sb_info *sbi;
+@@ -2756,13 +2758,12 @@ repeat:
+ 		ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
+ 		prefetch_grp = group;
+ 
+-		for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
+-				i++) {
+-			int ret = 0, new_cr;
++		for (i = 0, new_cr = cr; i < ngroups; i++,
++		     ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
++			int ret = 0;
+ 
+ 			cond_resched();
+ 
+-			ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
+ 			if (new_cr != cr) {
+ 				cr = new_cr;
+ 				goto repeat;
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-prefetch_block_bitmaps-default.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-prefetch_block_bitmaps-default.patch
new file mode 100644
index 0000000..3972631
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-make-prefetch_block_bitmaps-default.patch
@@ -0,0 +1,87 @@
+commit 21175ca434c5d49509b73cf473618b01b0b85437
+Author:     Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+AuthorDate: Thu Apr 1 10:21:29 2021 -0700
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Fri Apr 9 11:34:59 2021 -0400
+
+ext4: make prefetch_block_bitmaps default
+
+Block bitmap prefetching is needed for these allocator optimization
+data structures to get populated and provide better group scanning
+order. So, turn it on bu default. prefetch_block_bitmaps mount option
+is now marked as removed and a new option no_prefetch_block_bitmaps is
+added to disable block bitmap prefetching.
+
+Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Link: https://lore.kernel.org/r/20210401172129.189766-8-harshadshirwadkar@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -1178,7 +1178,7 @@ struct ext4_inode_info {
+ #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
+ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
+ #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
+-#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
++#define EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS 0x4000000
+ #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
+ #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
+ #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -1623,7 +1623,7 @@ enum {
+ 	Opt_mb_c1_threshold, Opt_mb_c2_threshold, Opt_mb_c3_threshold,
+ 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+ 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+-	Opt_prefetch_block_bitmaps, Opt_mb_optimize_scan,
++	Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
+ };
+ 
+ static const match_table_t tokens = {
+@@ -1717,7 +1717,8 @@ static const match_table_t tokens = {
+ 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ 	{Opt_nombcache, "nombcache"},
+ 	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
+-	{Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
++	{Opt_removed, "prefetch_block_bitmaps"},
++	{Opt_no_prefetch_block_bitmaps, "no_prefetch_block_bitmaps"},
+ 	{Opt_mb_optimize_scan, "mb_optimize_scan=%d"},
+ 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
+ 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
+@@ -1943,7 +1944,7 @@ static const struct mount_opts {
+ 	{Opt_mb_c3_threshold, 0, MOPT_STRING},
+ 	{Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+-	{Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
++	{Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
+ 	 MOPT_SET},
+ 	{Opt_mb_optimize_scan, EXT4_MOUNT2_MB_OPTIMIZE_SCAN, MOPT_GTE0},
+ 	{Opt_err, 0, 0}
+@@ -3551,11 +3552,11 @@ static struct ext4_li_request *ext4_li_r
+ 
+ 	elr->lr_super = sb;
+ 	elr->lr_first_not_zeroed = start;
+-	if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
+-		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+-	else {
++	if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
+ 		elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ 		elr->lr_next_group = start;
++	} else {
++		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+ 	}
+ 
+ 	/*
+@@ -3586,7 +3587,7 @@ int ext4_register_li_request(struct supe
+ 		goto out;
+ 	}
+ 
+-	if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) &&
++	if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
+ 	    (first_not_zeroed == ngroups || sb_rdonly(sb) ||
+ 	    !test_opt(sb, INIT_INODE_TABLE)))
+ 		goto out;
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
new file mode 100644
index 0000000..452a13b
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
@@ -0,0 +1,66 @@
+commit 3c6296046c85333bc52555a670a9093d9e2657bb
+Author:     Ojaswin Mujoo <ojaswin@linux.ibm.com>
+AuthorDate: Tue May 30 18:03:46 2023 +0530
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Mon Jun 26 19:34:56 2023 -0400
+
+ext4: Don't skip prefetching BLOCK_UNINIT groups
+
+Currently, ext4_mb_prefetch() and ext4_mb_prefetch_fini() skip
+BLOCK_UNINIT groups since fetching their bitmaps doesn't need disk IO.
+As a consequence, we end not initializing the buddy structures and CR0/1
+lists for these BGs, even though it can be done without any disk IO
+overhead. Hence, don't skip such BGs during prefetch and prefetch_fini.
+
+This improves the accuracy of CR0/1 allocation as earlier, we could have
+essentially empty BLOCK_UNINIT groups being ignored by CR0/1 due to their buddy
+not being initialized, leading to slower CR2 allocations. With this patch CR0/1
+will be able to discover these groups as well, thus improving performance.
+
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/dc3130b8daf45ffe63d8a3c1edcf00eb8ba70e1f.1685449706.git.ojaswin@linux.ibm.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -2580,9 +2580,7 @@ ext4_group_t ext4_mb_prefetch(struct sup
+ 		 */
+ 		if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+ 		    EXT4_MB_GRP_NEED_INIT(grp) &&
+-		    ext4_free_group_clusters(sb, gdp) > 0 &&
+-		    !(ext4_has_group_desc_csum(sb) &&
+-		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
++		    ext4_free_group_clusters(sb, gdp) > 0 ) {
+ 			bh = ext4_read_block_bitmap_nowait(sb, group, 1);
+ 			if (bh && !IS_ERR(bh)) {
+ 				if (!buffer_uptodate(bh) && cnt)
+@@ -2613,20 +2611,18 @@ ext4_group_t ext4_mb_prefetch(struct sup
+ void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
+ 			   unsigned int nr)
+ {
+-	while (nr-- > 0) {
+-		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group,
+-								  NULL);
+-		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
++	struct ext4_group_desc *gdp;
++	struct ext4_group_info *grp;
+ 
++	while (nr-- > 0) {
+ 		if (!group)
+ 			group = ext4_get_groups_count(sb);
+ 		group--;
++		gdp = ext4_get_group_desc(sb, group, NULL);
+ 		grp = ext4_get_group_info(sb, group);
+ 
+ 		if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) &&
+-		    ext4_free_group_clusters(sb, gdp) > 0 &&
+-		    !(ext4_has_group_desc_csum(sb) &&
+-		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
++		    ext4_free_group_clusters(sb, gdp) > 0) {
+ 			if (ext4_mb_init_group(sb, group, GFP_NOFS))
+ 				break;
+ 		}
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-optimize-the-ext4_mb_good_group.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-optimize-the-ext4_mb_good_group.patch
new file mode 100644
index 0000000..bbee71d
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-optimize-the-ext4_mb_good_group.patch
@@ -0,0 +1,59 @@
+commit dddcd2f9ebdeca9fbd36526e950bbcd0f7c1765f
+Author:     brookxu <brookxu.cn@gmail.com>
+AuthorDate: Fri Aug 7 22:01:39 2020 +0800
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Tue Aug 18 14:18:36 2020 -0400
+
+ext4: optimize the implementation of ext4_mb_good_group()
+
+It might be better to adjust the code in two places:
+1. Determine whether grp is currupt or not should be placed first.
+2. (cr<=2 && free <ac->ac_g_ex.fe_len)should may belong to the crx
+   strategy, and it may be more appropriate to put it in the
+   subsequent switch statement block. For cr1, cr2, the conditions
+   in switch potentially realize the above judgment. For cr0, we
+   should add (free <ac->ac_g_ex.fe_len) judgment, and then delete
+   (free / fragments) >= ac->ac_g_ex.fe_len), because cr0 returns
+   true by default.
+
+Signed-off-by: Chunguang Xu <brookxu@tencent.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
+Link: https://lore.kernel.org/r/e20b2d8f-1154-adb7-3831-a9e11ba842e9@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -2135,13 +2135,11 @@ static bool ext4_mb_good_group(struct ex
+ 
+ 	BUG_ON(cr < 0 || cr >= 4);
+ 
+-	free = grp->bb_free;
+-	if (free == 0)
+-		return false;
+-	if (cr <= 2 && free < ac->ac_g_ex.fe_len)
++	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+ 		return false;
+ 
+-	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
++	free = grp->bb_free;
++	if (free == 0)
+ 		return false;
+ 
+ 	fragments = grp->bb_fragments;
+@@ -2158,8 +2156,10 @@ static bool ext4_mb_good_group(struct ex
+ 		    ((group % flex_size) == 0))
+ 			return false;
+ 
+-		if ((ac->ac_2order > ac->ac_sb->s_blocksize_bits+1) ||
+-		    (free / fragments) >= ac->ac_g_ex.fe_len)
++		if (free < ac->ac_g_ex.fe_len)
++			return false;
++
++		if (ac->ac_2order > ac->ac_sb->s_blocksize_bits+1)
+ 			return true;
+ 
+ 		if (grp->bb_largest_free_order < ac->ac_2order)
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-refactor-code-related-to-freeing-pa.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-refactor-code-related-to-freeing-pa.patch
new file mode 100644
index 0000000..a078949
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-refactor-code-related-to-freeing-pa.patch
@@ -0,0 +1,104 @@
+commit 820897258ad342e78388ee9f5814fc485e79102a
+Author:     Ojaswin Mujoo <ojaswin@linux.ibm.com>
+AuthorDate: Sat Mar 25 13:43:35 2023 +0530
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Thu Apr 6 01:13:12 2023 -0400
+
+ext4: Refactor code related to freeing PAs
+
+This patch makes the following changes:
+
+*  Rename ext4_mb_pa_free to ext4_mb_pa_put_free
+   to better reflect its purpose
+
+*  Add new ext4_mb_pa_free() which only handles freeing
+
+*  Refactor ext4_mb_pa_callback() to use ext4_mb_pa_free()
+
+There are no functional changes in this patch
+
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/b273bc9cbf5bd278f641fa5bc6c0cc9e6cb3330c.1679731817.git.ojaswin@linux.ibm.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -4612,16 +4612,22 @@ static void ext4_mb_mark_pa_deleted(stru
+ 	}
+ }
+ 
+-static void ext4_mb_pa_callback(struct rcu_head *head)
++static inline void ext4_mb_pa_free(struct ext4_prealloc_space *pa)
+ {
+-	struct ext4_prealloc_space *pa;
+-	pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+-
++	BUG_ON(!pa);
+ 	BUG_ON(atomic_read(&pa->pa_count));
+ 	BUG_ON(pa->pa_deleted == 0);
+ 	kmem_cache_free(ext4_pspace_cachep, pa);
+ }
+ 
++static void ext4_mb_pa_callback(struct rcu_head *head)
++{
++	struct ext4_prealloc_space *pa;
++
++	pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
++	ext4_mb_pa_free(pa);
++}
++
+ /*
+  * drops a reference to preallocated space descriptor
+  * if this was the last reference and the space is consumed
+@@ -5150,14 +5156,20 @@ static int ext4_mb_pa_alloc(struct ext4_
+ 	return 0;
+ }
+ 
+-static void ext4_mb_pa_free(struct ext4_allocation_context *ac)
++static void ext4_mb_pa_put_free(struct ext4_allocation_context *ac)
+ {
+ 	struct ext4_prealloc_space *pa = ac->ac_pa;
+ 
+ 	BUG_ON(!pa);
+ 	ac->ac_pa = NULL;
+ 	WARN_ON(!atomic_dec_and_test(&pa->pa_count));
+-	kmem_cache_free(ext4_pspace_cachep, pa);
++	/*
++	 * current function is only called due to an error or due to
++	 * len of found blocks < len of requested blocks hence the PA has not
++	 * been added to grp->bb_prealloc_list. So we don't need to lock it
++	 */
++	pa->pa_deleted = 1;
++	ext4_mb_pa_free(pa);
+ }
+ 
+ #ifdef CONFIG_EXT4_DEBUG
+@@ -5705,13 +5717,13 @@ repeat:
+ 		 * So we have to free this pa here itself.
+ 		 */
+ 		if (*errp) {
+-			ext4_mb_pa_free(ac);
++			ext4_mb_pa_put_free(ac);
+ 			ext4_discard_allocated_blocks(ac);
+ 			goto errout;
+ 		}
+ 		if (ac->ac_status == AC_STATUS_FOUND &&
+ 			ac->ac_o_ex.fe_len >= ac->ac_f_ex.fe_len)
+-			ext4_mb_pa_free(ac);
++			ext4_mb_pa_put_free(ac);
+ 	}
+ 	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+ 		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
+@@ -5730,7 +5742,7 @@ repeat:
+ 		 * If block allocation fails then the pa allocated above
+ 		 * needs to be freed here itself.
+ 		 */
+-		ext4_mb_pa_free(ac);
++		ext4_mb_pa_put_free(ac);
+ 		*errp = -ENOSPC;
+ 	}
+ 
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-reflect-mb_optimize_scan-value-in-options.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-reflect-mb_optimize_scan-value-in-options.patch
new file mode 100644
index 0000000..dc5691a
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-reflect-mb_optimize_scan-value-in-options.patch
@@ -0,0 +1,35 @@
+commit 3fa5d23e68a34dae9df2be168750dc5e03e0e40d
+Author:     Ojaswin Mujoo <ojaswin@linux.ibm.com>
+AuthorDate: Mon Jul 4 11:16:03 2022 +0530
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Tue Aug 2 23:56:17 2022 -0400
+
+ext4: reflect mb_optimize_scan value in options file
+
+Add support to display the mb_optimize_scan value in
+/proc/fs/ext4/<dev>/options file. The option is only
+displayed when the value is non default.
+
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220704054603.21462-1-ojaswin@linux.ibm.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -2488,6 +2488,14 @@ static int _ext4_show_options(struct seq
+ 		SEQ_OPTS_PUTS("dax=inode");
+ 	}
+ 
++	if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
++			!test_opt2(sb, MB_OPTIMIZE_SCAN)) {
++		SEQ_OPTS_PUTS("mb_optimize_scan=0");
++	} else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
++			test_opt2(sb, MB_OPTIMIZE_SCAN)) {
++		SEQ_OPTS_PUTS("mb_optimize_scan=1");
++	}
++
+ 	ext4_show_quota_options(seq, sb);
+ 	return 0;
+ }
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-use-buckets-for-cr1-block-scan.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-use-buckets-for-cr1-block-scan.patch
new file mode 100644
index 0000000..9af065e
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-use-buckets-for-cr1-block-scan.patch
@@ -0,0 +1,440 @@
+commit 83e80a6e3543f37f74c8e48a5f305b054b65ce2a
+Author:     Jan Kara <jack@suse.cz>
+AuthorDate: Thu Sep 8 11:21:28 2022 +0200
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Wed Sep 21 22:12:03 2022 -0400
+
+ext4: use buckets for cr 1 block scan instead of rbtree
+
+Using rbtree for sorting groups by average fragment size is relatively
+expensive (needs rbtree update on every block freeing or allocation) and
+leads to wide spreading of allocations because selection of block group
+is very sentitive both to changes in free space and amount of blocks
+allocated. Furthermore selecting group with the best matching average
+fragment size is not necessary anyway, even more so because the
+variability of fragment sizes within a group is likely large so average
+is not telling much. We just need a group with large enough average
+fragment size so that we have high probability of finding large enough
+free extent and we don't want average fragment size to be too big so
+that we are likely to find free extent only somewhat larger than what we
+need.
+
+So instead of maintaing rbtree of groups sorted by fragment size keep
+bins (lists) or groups where average fragment size is in the interval
+[2^i, 2^(i+1)). This structure requires less updates on block allocation
+/ freeing, generally avoids chaotic spreading of allocations into block
+groups, and still is able to quickly (even faster that the rbtree)
+provide a block group which is likely to have a suitably sized free
+space extent.
+
+This patch reduces number of block groups used when untarring archive
+with medium sized files (size somewhat above 64k which is default
+mballoc limit for avoiding locality group preallocation) to about half
+and thus improves write speeds for eMMC flash significantly.
+
+Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
+CC: stable@kernel.org
+Reported-and-tested-by: Stefan Wahren <stefan.wahren@i2se.com>
+Tested-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/
+Link: https://lore.kernel.org/r/20220908092136.11770-5-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -156,8 +156,6 @@ enum SHIFT_DIRECTION {
+ #define EXT4_MB_CR0_OPTIMIZED		0x8000
+ /* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
+ #define EXT4_MB_CR1_OPTIMIZED		0x00010000
+-/* Perform linear traversal for one group */
+-#define EXT4_MB_SEARCH_NEXT_LINEAR	0x00020000
+ #define EXT4_MB_VERY_DENSE		0x80000
+ 
+ struct ext4_allocation_request {
+@@ -1497,8 +1495,8 @@ struct ext4_sb_info {
+ 	unsigned int s_mb_free_pending;
+ 	struct list_head s_freed_data_list;	/* List of blocks to be freed
+ 						   after commit completed */
+-	struct rb_root s_mb_avg_fragment_size_root;
+-	rwlock_t s_mb_rb_lock;
++	struct list_head *s_mb_avg_fragment_size;
++	rwlock_t *s_mb_avg_fragment_size_locks;
+ 	struct list_head *s_mb_largest_free_orders;
+ 	rwlock_t *s_mb_largest_free_orders_locks;
+ 
+@@ -3221,6 +3219,8 @@ struct ext4_group_info {
+ 	ext4_grpblk_t	bb_free;	/* total free blocks */
+ 	ext4_grpblk_t	bb_freed_since_trim; /* blocks freed since last trim */
+ 	ext4_grpblk_t	bb_fragments;	/* nr of freespace fragments */
++	int		bb_avg_fragment_size_order;	/* order of average
++							   fragment in BG */
+ 	ext4_grpblk_t	bb_largest_free_order;/* order of largest frag in BG */
+ 	ext4_group_t    bb_group;	/* Group number */
+ 	struct          list_head bb_prealloc_list;
+@@ -3229,7 +3229,7 @@ struct ext4_group_info {
+ 	void            *bb_bitmap;
+ #endif
+ 	struct rw_semaphore alloc_sem;
+-	struct rb_node  bb_avg_fragment_size_rb;
++	struct list_head bb_avg_fragment_size_node;
+ 	struct list_head bb_largest_free_order_node;
+ 	ext4_grpblk_t	bb_counters[];	/* Nr of free power-of-two-block
+ 					 * regions, index is order.
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -140,13 +140,15 @@
+  *    number of buddy bitmap orders possible) number of lists. Group-infos are
+  *    placed in appropriate lists.
+  *
+- * 2) Average fragment size rb tree (sbi->s_mb_avg_fragment_size_root)
++ * 2) Average fragment size lists (sbi->s_mb_avg_fragment_size)
+  *
+- *    Locking: sbi->s_mb_rb_lock (rwlock)
++ *    Locking: sbi->s_mb_avg_fragment_size_locks(array of rw locks)
+  *
+- *    This is a red black tree consisting of group infos and the tree is sorted
+- *    by average fragment sizes (which is calculated as ext4_group_info->bb_free
+- *    / ext4_group_info->bb_fragments).
++ *    This is an array of lists where in the i-th list there are groups with
++ *    average fragment size >= 2^i and < 2^(i+1). The average fragment size
++ *    is computed as ext4_group_info->bb_free / ext4_group_info->bb_fragments.
++ *    Note that we don't bother with a special list for completely empty groups
++ *    so we only have MB_NUM_ORDERS(sb) lists.
+  *
+  * When "mb_optimize_scan" mount option is set, mballoc consults the above data
+  * structures to decide the order in which groups are to be traversed for
+@@ -160,7 +162,8 @@
+  *
+  * At CR = 1, we only consider groups where average fragment size > request
+  * size. So, we lookup a group which has average fragment size just above or
+- * equal to request size using our rb tree (data structure 2) in O(log N) time.
++ * equal to request size using our average fragment size group lists (data
++ * structure 2) in O(1) time.
+  *
+  * If "mb_optimize_scan" mount option is not set, mballoc traverses groups in
+  * linear order which requires O(N) search time for each CR 0 and CR 1 phase.
+@@ -803,65 +806,51 @@ static void ext4_mb_mark_free_simple(str
+ 	}
+ }
+ 
+-static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
+-			int (*cmp)(struct rb_node *, struct rb_node *))
++static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
+ {
+-	struct rb_node **iter = &root->rb_node, *parent = NULL;
++	int order;
+ 
+-	while (*iter) {
+-		parent = *iter;
+-		if (cmp(new, *iter) > 0)
+-			iter = &((*iter)->rb_left);
+-		else
+-			iter = &((*iter)->rb_right);
+-	}
+-
+-	rb_link_node(new, parent, iter);
+-	rb_insert_color(new, root);
+-}
+-
+-static int
+-ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
+-{
+-	struct ext4_group_info *grp1 = rb_entry(rb1,
+-						struct ext4_group_info,
+-						bb_avg_fragment_size_rb);
+-	struct ext4_group_info *grp2 = rb_entry(rb2,
+-						struct ext4_group_info,
+-						bb_avg_fragment_size_rb);
+-	int num_frags_1, num_frags_2;
+-
+-	num_frags_1 = grp1->bb_fragments ?
+-		grp1->bb_free / grp1->bb_fragments : 0;
+-	num_frags_2 = grp2->bb_fragments ?
+-		grp2->bb_free / grp2->bb_fragments : 0;
+-
+-	return (num_frags_2 - num_frags_1);
++	/*
++	 * We don't bother with a special lists groups with only 1 block free
++	 * extents and for completely empty groups.
++	 */
++	order = fls(len) - 2;
++	if (order < 0)
++		return 0;
++	if (order == MB_NUM_ORDERS(sb))
++		order--;
++	return order;
+ }
+ 
+-/*
+- * Reinsert grpinfo into the avg_fragment_size tree with new average
+- * fragment size.
+- */
++/* Move group to appropriate avg_fragment_size list */
+ static void
+ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
++	int new_order;
+ 
+ 	if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
+ 		return;
+ 
+-	write_lock(&sbi->s_mb_rb_lock);
+-	if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
+-		rb_erase(&grp->bb_avg_fragment_size_rb,
+-				&sbi->s_mb_avg_fragment_size_root);
+-		RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
+-	}
++	new_order = mb_avg_fragment_size_order(sb,
++					grp->bb_free / grp->bb_fragments);
++	if (new_order == grp->bb_avg_fragment_size_order)
++		return;
+ 
+-	ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
+-		&grp->bb_avg_fragment_size_rb,
+-		ext4_mb_avg_fragment_size_cmp);
+-	write_unlock(&sbi->s_mb_rb_lock);
++	if (grp->bb_avg_fragment_size_order != -1) {
++		write_lock(&sbi->s_mb_avg_fragment_size_locks[
++					grp->bb_avg_fragment_size_order]);
++		list_del(&grp->bb_avg_fragment_size_node);
++		write_unlock(&sbi->s_mb_avg_fragment_size_locks[
++					grp->bb_avg_fragment_size_order]);
++	}
++	grp->bb_avg_fragment_size_order = new_order;
++	write_lock(&sbi->s_mb_avg_fragment_size_locks[
++					grp->bb_avg_fragment_size_order]);
++	list_add_tail(&grp->bb_avg_fragment_size_node,
++		&sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
++	write_unlock(&sbi->s_mb_avg_fragment_size_locks[
++					grp->bb_avg_fragment_size_order]);
+ }
+ 
+ /*
+@@ -910,86 +899,56 @@ static void ext4_mb_choose_next_group_cr
+ 		*new_cr = 1;
+ 	} else {
+ 		*group = grp->bb_group;
+-		ac->ac_last_optimal_group = *group;
+ 		ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
+ 	}
+ }
+ 
+ /*
+- * Choose next group by traversing average fragment size tree. Updates *new_cr
+- * if cr lvel needs an update. Sets EXT4_MB_SEARCH_NEXT_LINEAR to indicate that
+- * the linear search should continue for one iteration since there's lock
+- * contention on the rb tree lock.
++ * Choose next group by traversing average fragment size list of suitable
++ * order. Updates *new_cr if cr level needs an update.
+  */
+ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
+ 		int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+-	int avg_fragment_size, best_so_far;
+-	struct rb_node *node, *found;
+-	struct ext4_group_info *grp;
+-
+-	/*
+-	 * If there is contention on the lock, instead of waiting for the lock
+-	 * to become available, just continue searching lineraly. We'll resume
+-	 * our rb tree search later starting at ac->ac_last_optimal_group.
+-	 */
+-	if (!read_trylock(&sbi->s_mb_rb_lock)) {
+-		ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
+-		return;
+-	}
++	struct ext4_group_info *grp, *iter;
++	int i;
+ 
+ 	if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
+ 		if (sbi->s_mb_stats)
+ 			atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
+-		/* We have found something at CR 1 in the past */
+-		grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
+-		for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
+-		     found = rb_next(found)) {
+-			grp = rb_entry(found, struct ext4_group_info,
+-				       bb_avg_fragment_size_rb);
++	}
++
++	for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
++	     i < MB_NUM_ORDERS(ac->ac_sb); i++) {
++		if (list_empty(&sbi->s_mb_avg_fragment_size[i]))
++			continue;
++		read_lock(&sbi->s_mb_avg_fragment_size_locks[i]);
++		if (list_empty(&sbi->s_mb_avg_fragment_size[i])) {
++			read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
++			continue;
++		}
++		grp = NULL;
++		list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
++                                   bb_avg_fragment_size_node) {
+ 			if (sbi->s_mb_stats)
+ 				atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
+-			if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
++			if (likely(ext4_mb_good_group(ac, iter->bb_group, 1))) {
++				grp = iter;
+ 				break;
+-		}
+-		goto done;
+-	}
+-
+-	node = sbi->s_mb_avg_fragment_size_root.rb_node;
+-	best_so_far = 0;
+-	found = NULL;
+-
+-	while (node) {
+-		grp = rb_entry(node, struct ext4_group_info,
+-			       bb_avg_fragment_size_rb);
+-		avg_fragment_size = 0;
+-		if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
+-			avg_fragment_size = grp->bb_fragments ?
+-				grp->bb_free / grp->bb_fragments : 0;
+-			if (!best_so_far || avg_fragment_size < best_so_far) {
+-				best_so_far = avg_fragment_size;
+-				found = node;
+ 			}
+ 		}
+-		if (avg_fragment_size > ac->ac_g_ex.fe_len)
+-			node = node->rb_right;
+-		else
+-			node = node->rb_left;
++		read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
++		if (grp)
++			break;
+ 	}
+ 
+-done:
+-	if (found) {
+-		grp = rb_entry(found, struct ext4_group_info,
+-			       bb_avg_fragment_size_rb);
++	if (grp) {
+ 		*group = grp->bb_group;
+ 		ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
+ 	} else {
+ 		*new_cr = 2;
+ 	}
+-
+-	read_unlock(&sbi->s_mb_rb_lock);
+-	ac->ac_last_optimal_group = *group;
+ }
+ 
+ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
+@@ -1018,11 +977,6 @@ next_linear_group(struct ext4_allocation
+ 		goto inc_and_return;
+ 	}
+ 
+-	if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
+-		ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
+-		goto inc_and_return;
+-	}
+-
+ 	return group;
+ inc_and_return:
+ 	/*
+@@ -1158,13 +1112,13 @@ int ext4_mb_generate_buddy(struct super_
+ 		return -EIO;
+ 	}
+ 	mb_set_largest_free_order(sb, grp);
++	mb_update_avg_fragment_size(sb, grp);
+ 
+ 	clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
+ 
+ 	period = get_cycles() - period;
+ 	atomic_inc(&sbi->s_mb_buddies_generated);
+ 	atomic64_add(period, &sbi->s_mb_generation_time);
+-	mb_update_avg_fragment_size(sb, grp);
+ 
+ 	return 0;
+ }
+@@ -2756,7 +2710,6 @@ repeat:
+ 		 * from the goal value specified
+ 		 */
+ 		group = ac->ac_g_ex.fe_group;
+-		ac->ac_last_optimal_group = group;
+ 		ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
+ 		prefetch_grp = group;
+ 
+@@ -3294,8 +3247,9 @@ int ext4_mb_add_groupinfo(struct super_b
+ 	init_rwsem(&meta_group_info[i]->alloc_sem);
+ 	meta_group_info[i]->bb_free_root = RB_ROOT;
+ 	INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
+-	RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
++	INIT_LIST_HEAD(&meta_group_info[i]->bb_avg_fragment_size_node);
+ 	meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
++	meta_group_info[i]->bb_avg_fragment_size_order = -1;  /* uninit */
+ 	meta_group_info[i]->bb_freed_since_trim = 0;
+ 	meta_group_info[i]->bb_group = group;
+ 
+@@ -3488,7 +3442,24 @@ int ext4_mb_init(struct super_block *sb)
+ 		i++;
+ 	} while (i < MB_NUM_ORDERS(sb));
+ 
+-	sbi->s_mb_avg_fragment_size_root = RB_ROOT;
++	sbi->s_mb_avg_fragment_size =
++		kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
++			GFP_KERNEL);
++	if (!sbi->s_mb_avg_fragment_size) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	sbi->s_mb_avg_fragment_size_locks =
++		kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
++			GFP_KERNEL);
++	if (!sbi->s_mb_avg_fragment_size_locks) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
++		INIT_LIST_HEAD(&sbi->s_mb_avg_fragment_size[i]);
++		rwlock_init(&sbi->s_mb_avg_fragment_size_locks[i]);
++	}
+ 	sbi->s_mb_largest_free_orders =
+ 		kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
+ 			GFP_KERNEL);
+@@ -3507,7 +3478,6 @@ int ext4_mb_init(struct super_block *sb)
+ 		INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
+ 		rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
+ 	}
+-	rwlock_init(&sbi->s_mb_rb_lock);
+ 
+ 	spin_lock_init(&sbi->s_md_lock);
+ 	sbi->s_mb_free_pending = 0;
+@@ -3623,6 +3593,8 @@ out_free_locality_groups:
+ 	free_percpu(sbi->s_locality_groups);
+ 	sbi->s_locality_groups = NULL;
+ out:
++	kfree(sbi->s_mb_avg_fragment_size);
++	kfree(sbi->s_mb_avg_fragment_size_locks);
+ 	kfree(sbi->s_mb_largest_free_orders);
+ 	kfree(sbi->s_mb_largest_free_orders_locks);
+ 	kfree(sbi->s_mb_prealloc_table);
+@@ -3682,6 +3654,8 @@ int ext4_mb_release(struct super_block *
+ 		kvfree(group_info);
+ 		rcu_read_unlock();
+ 	}
++	kfree(sbi->s_mb_avg_fragment_size);
++	kfree(sbi->s_mb_avg_fragment_size_locks);
+ 	kfree(sbi->s_mb_largest_free_orders);
+ 	kfree(sbi->s_mb_largest_free_orders_locks);
+ 	kfree(sbi->s_mb_prealloc_table);
+Index: linux-stage/fs/ext4/mballoc.h
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.h
++++ linux-stage/fs/ext4/mballoc.h
+@@ -185,7 +185,6 @@ struct ext4_allocation_context {
+ 	/* copy of the best found extent taken before preallocation efforts */
+ 	struct ext4_free_extent ac_f_ex;
+ 
+-	ext4_group_t ac_last_optimal_group;
+ 	__u32 ac_groups_considered;
+ 	__u32 ac_flags;		/* allocation hints */
+ 	__u16 ac_groups_scanned;
diff --git a/ldiskfs/kernel_patches/patches/rhel8.8/ext4-use-locality-group-preallocation-for-small-files.patch b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-use-locality-group-preallocation-for-small-files.patch
new file mode 100644
index 0000000..d4bd0c6
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel8.8/ext4-use-locality-group-preallocation-for-small-files.patch
@@ -0,0 +1,79 @@
+commit a9f2a2931d0e197ab28c6007966053fdababd53f
+Author:     Jan Kara <jack@suse.cz>
+AuthorDate: Thu Sep 8 11:21:27 2022 +0200
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Wed Sep 21 22:12:00 2022 -0400
+
+ext4: use locality group preallocation for small closed files
+
+Curently we don't use any preallocation when a file is already closed
+when allocating blocks (from writeback code when converting delayed
+allocation). However for small files, using locality group preallocation
+is actually desirable as that is not specific to a particular file.
+Rather it is a method to pack small files together to reduce
+fragmentation and for that the fact the file is closed is actually even
+stronger hint the file would benefit from packing. So change the logic
+to allow locality group preallocation in this case.
+
+Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
+CC: stable@kernel.org
+Reported-and-tested-by: Stefan Wahren <stefan.wahren@i2se.com>
+Tested-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/
+Link: https://lore.kernel.org/r/20220908092136.11770-4-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -5273,6 +5273,7 @@ static void ext4_mb_group_or_file(struct
+ 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+ 	int bsbits = ac->ac_sb->s_blocksize_bits;
+ 	loff_t size, isize;
++	bool inode_pa_eligible, group_pa_eligible;
+ 
+ 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
+ 		return;
+@@ -5280,26 +5281,27 @@ static void ext4_mb_group_or_file(struct
+ 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
+ 		return;
+ 
++	group_pa_eligible = sbi->s_mb_group_prealloc > 0;
++	inode_pa_eligible = true;
+ 	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+ 	isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
+ 		>> bsbits;
+ 
++	/* No point in using inode preallocation for closed files */
+ 	if ((size == isize) && !ext4_fs_is_busy(sbi) &&
+-	    !inode_is_open_for_write(ac->ac_inode)) {
+-		ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
+-		return;
+-	}
+-
+-	if (sbi->s_mb_group_prealloc <= 0) {
+-		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+-		return;
+-	}
++	    !inode_is_open_for_write(ac->ac_inode))
++		inode_pa_eligible = false;
+ 
+-	/* don't use group allocation for large files */
+ 	size = max(size, isize);
+ 	if ((ac->ac_o_ex.fe_len >= sbi->s_mb_small_req) ||
+-	    (size >= sbi->s_mb_large_req)) {
+-		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
++	    (size >= sbi->s_mb_large_req))
++		group_pa_eligible = false;
++
++	if (!group_pa_eligible) {
++		if (inode_pa_eligible)
++			ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
++		else
++			ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
+ 		return;
+ 	}
+ 
diff --git a/ldiskfs/kernel_patches/patches/rhel9.4/ext4-allow-ext4_get_group_info-to-fail.patch b/ldiskfs/kernel_patches/patches/rhel9.4/ext4-allow-ext4_get_group_info-to-fail.patch
new file mode 100644
index 0000000..fa52298
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel9.4/ext4-allow-ext4_get_group_info-to-fail.patch
@@ -0,0 +1,433 @@
+commit 5354b2af34064a4579be8bc0e2f15a7b70f14b5f
+Author:     Theodore Ts'o <tytso@mit.edu>
+AuthorDate: Sat Apr 29 00:06:28 2023 -0400
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Sat May 13 18:02:46 2023 -0400
+
+ext4: allow ext4_get_group_info() to fail
+
+Previously, ext4_get_group_info() would treat an invalid group number
+as BUG(), since in theory it should never happen.  However, if a
+malicious attaker (or fuzzer) modifies the superblock via the block
+device while it is the file system is mounted, it is possible for
+s_first_data_block to get set to a very large number.  In that case,
+when calculating the block group of some block number (such as the
+starting block of a preallocation region), could result in an
+underflow and very large block group number.  Then the BUG_ON check in
+ext4_get_group_info() would fire, resutling in a denial of service
+attack that can be triggered by root or someone with write access to
+the block device.
+
+For a quality of implementation perspective, it's best that even if
+the system administrator does something that they shouldn't, that it
+will not trigger a BUG.  So instead of BUG'ing, ext4_get_group_info()
+will call ext4_error and return NULL.  We also add fallback code in
+all of the callers of ext4_get_group_info() that it might NULL.
+
+Also, since ext4_get_group_info() was already borderline to be an
+inline function, un-inline it.  The results in a next reduction of the
+compiled text size of ext4 by roughly 2k.
+
+Cc: stable@kernel.org
+Link: https://lore.kernel.org/r/20230430154311.579720-2-tytso@mit.edu
+Reported-by: syzbot+e2efa3efc15a1c9e95c3@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=69b28112e098b070f639efb356393af3ffec4220
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+---
+Index: linux-stage/fs/ext4/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/balloc.c
++++ linux-stage/fs/ext4/balloc.c
+@@ -321,6 +321,22 @@ static ext4_fsblk_t ext4_valid_block_bit
+ 	return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
+ }
+ 
++struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
++					   ext4_group_t group)
++{
++	struct ext4_group_info **grp_info;
++	long indexv, indexh;
++
++	if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) {
++		ext4_error(sb, "invalid group %u", group);
++		return NULL;
++	}
++	indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
++	indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
++	grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
++	return grp_info[indexh];
++}
++
+ /*
+  * Return the block number which was discovered to be invalid, or 0 if
+  * the block bitmap is valid.
+@@ -395,7 +411,7 @@ static int ext4_validate_block_bitmap(st
+ 
+ 	if (buffer_verified(bh))
+ 		return 0;
+-	if (EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
++	if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
+ 		return -EFSCORRUPTED;
+ 
+ 	ext4_lock_group(sb, block_group);
+Index: linux-stage/fs/ext4/ext4.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4.h
++++ linux-stage/fs/ext4/ext4.h
+@@ -2903,6 +2903,8 @@ extern void ext4_check_blocks_bitmap(str
+ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
+ 						    ext4_group_t block_group,
+ 						    struct buffer_head ** bh);
++extern struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
++						  ext4_group_t group);
+ extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+ 
+ extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
+@@ -3546,19 +3548,6 @@ static inline void ext4_isize_set(struct
+ 	raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
+ }
+ 
+-static inline
+-struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+-					    ext4_group_t group)
+-{
+-	 struct ext4_group_info **grp_info;
+-	 long indexv, indexh;
+-	 BUG_ON(group >= EXT4_SB(sb)->s_groups_count);
+-	 indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
+-	 indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
+-	 grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
+-	 return grp_info[indexh];
+-}
+-
+ /*
+  * Reading s_groups_count requires using smp_rmb() afterwards.  See
+  * the locking protocol documented in the comments of ext4_group_add()
+Index: linux-stage/fs/ext4/ialloc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/ialloc.c
++++ linux-stage/fs/ext4/ialloc.c
+@@ -92,7 +92,7 @@ static int ext4_validate_inode_bitmap(st
+ 
+ 	if (buffer_verified(bh))
+ 		return 0;
+-	if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
++	if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ 		return -EFSCORRUPTED;
+ 
+ 	ext4_lock_group(sb, block_group);
+@@ -295,7 +295,7 @@ void ext4_free_inode(handle_t *handle, s
+ 	}
+ 	if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+ 		grp = ext4_get_group_info(sb, block_group);
+-		if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
++		if (!grp || unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) {
+ 			fatal = -EFSCORRUPTED;
+ 			goto error_return;
+ 		}
+@@ -1048,7 +1048,7 @@ got_group:
+ 			 * Skip groups with already-known suspicious inode
+ 			 * tables
+ 			 */
+-			if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
++			if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ 				goto next_group;
+ 		}
+ 
+@@ -1185,6 +1185,10 @@ got:
+ 
+ 		if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
+ 			grp = ext4_get_group_info(sb, group);
++			if (!grp) {
++				err = -EFSCORRUPTED;
++				goto out;
++			}
+ 			down_read(&grp->alloc_sem); /*
+ 						     * protect vs itable
+ 						     * lazyinit
+@@ -1538,7 +1542,7 @@ int ext4_init_inode_table(struct super_b
+ 	}
+ 
+ 	gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+-	if (!gdp)
++	if (!gdp || !grp)
+ 		goto out;
+ 
+ 	/*
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -745,6 +745,8 @@ static int __mb_check_buddy(struct ext4_
+ 	MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
+ 
+ 	grp = ext4_get_group_info(sb, e4b->bd_group);
++	if (!grp)
++		return NULL;
+ 	list_for_each(cur, &grp->bb_prealloc_list) {
+ 		ext4_group_t groupnr;
+ 		struct ext4_prealloc_space *pa;
+@@ -1059,10 +1061,10 @@ mb_set_largest_free_order(struct super_b
+ }
+ 
+ static noinline_for_stack
+-int ext4_mb_generate_buddy(struct super_block *sb,
+-				void *buddy, void *bitmap, ext4_group_t group)
++void ext4_mb_generate_buddy(struct super_block *sb,
++			   void *buddy, void *bitmap, ext4_group_t group,
++			   struct ext4_group_info *grp)
+ {
+-	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
+ 	ext4_grpblk_t i = 0;
+@@ -1107,7 +1109,6 @@ int ext4_mb_generate_buddy(struct super_
+ 		grp->bb_free = free;
+ 		ext4_mark_group_bitmap_corrupted(sb, group,
+ 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+-		return -EIO;
+ 	}
+ 	mb_set_largest_free_order(sb, grp);
+ 	mb_update_avg_fragment_size(sb, grp);
+@@ -1117,8 +1118,6 @@ int ext4_mb_generate_buddy(struct super_
+ 	period = get_cycles() - period;
+ 	atomic_inc(&sbi->s_mb_buddies_generated);
+ 	atomic64_add(period, &sbi->s_mb_generation_time);
+-
+-	return 0;
+ }
+ 
+ /* The buddy information is attached the buddy cache inode
+@@ -1188,6 +1187,8 @@ static int ext4_mb_init_cache(struct pag
+ 			break;
+ 
+ 		grinfo = ext4_get_group_info(sb, group);
++		if (!grinfo)
++			continue;
+ 		/*
+ 		 * If page is uptodate then we came here after online resize
+ 		 * which added some new uninitialized group info structs, so
+@@ -1253,6 +1254,10 @@ static int ext4_mb_init_cache(struct pag
+ 				group, page->index, i * blocksize);
+ 			trace_ext4_mb_buddy_bitmap_load(sb, group);
+ 			grinfo = ext4_get_group_info(sb, group);
++			if (!grinfo) {
++				err = -EFSCORRUPTED;
++				goto out;
++			}
+ 			grinfo->bb_fragments = 0;
+ 			memset(grinfo->bb_counters, 0,
+ 			       sizeof(*grinfo->bb_counters) *
+@@ -1263,7 +1268,7 @@ static int ext4_mb_init_cache(struct pag
+ 			ext4_lock_group(sb, group);
+ 			/* init the buddy */
+ 			memset(data, 0xff, blocksize);
+-			err = ext4_mb_generate_buddy(sb, data, incore, group);
++			ext4_mb_generate_buddy(sb, data, incore, group, grinfo);
+ 			ext4_unlock_group(sb, group);
+ 			incore = NULL;
+ 		} else {
+@@ -1378,6 +1383,9 @@ int ext4_mb_init_group(struct super_bloc
+ 	might_sleep();
+ 	mb_debug(sb, "init group %u\n", group);
+ 	this_grp = ext4_get_group_info(sb, group);
++	if (!this_grp)
++		return -EFSCORRUPTED;
++
+ 	/*
+ 	 * This ensures that we don't reinit the buddy cache
+ 	 * page which map to the group from which we are already
+@@ -1452,6 +1460,8 @@ ext4_mb_load_buddy_gfp(struct super_bloc
+ 
+ 	blocks_per_page = PAGE_SIZE / sb->s_blocksize;
+ 	grp = ext4_get_group_info(sb, group);
++	if (!grp)
++		return -EFSCORRUPTED;
+ 
+ 	e4b->bd_blkbits = sb->s_blocksize_bits;
+ 	e4b->bd_info = grp;
+@@ -2182,6 +2192,8 @@ int ext4_mb_find_by_goal(struct ext4_all
+ 	struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
+ 	struct ext4_free_extent ex;
+ 
++	if (!grp)
++		return -EFSCORRUPTED;
+ 	if (!(ac->ac_flags & (EXT4_MB_HINT_TRY_GOAL | EXT4_MB_HINT_GOAL_ONLY)))
+ 		return 0;
+ 	if (grp->bb_free == 0)
+@@ -2410,7 +2422,7 @@ static bool ext4_mb_good_group(struct ex
+ 
+ 	BUG_ON(cr < 0 || cr >= 4);
+ 
+-	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
++	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) || !grp)
+ 		return false;
+ 
+ 	free = grp->bb_free;
+@@ -2479,6 +2491,8 @@ static int ext4_mb_good_group_nolock(str
+ 	ext4_grpblk_t free;
+ 	int ret = 0;
+ 
++	if (!grp)
++		return -EFSCORRUPTED;
+ 	if (sbi->s_mb_stats)
+ 		atomic64_inc(&sbi->s_bal_cX_groups_considered[ac->ac_criteria]);
+ 	if (should_lock) {
+@@ -2559,7 +2573,7 @@ ext4_group_t ext4_mb_prefetch(struct sup
+ 		 * prefetch once, so we avoid getblk() call, which can
+ 		 * be expensive.
+ 		 */
+-		if (!EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
++		if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+ 		    EXT4_MB_GRP_NEED_INIT(grp) &&
+ 		    ext4_free_group_clusters(sb, gdp) > 0 &&
+ 		    !(ext4_has_group_desc_csum(sb) &&
+@@ -2603,7 +2617,7 @@ void ext4_mb_prefetch_fini(struct super_
+ 		gdp = ext4_get_group_desc(sb, group, NULL);
+ 		grp = ext4_get_group_info(sb, group);
+ 
+-		if (EXT4_MB_GRP_NEED_INIT(grp) &&
++		if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) &&
+ 		    ext4_free_group_clusters(sb, gdp) > 0 &&
+ 		    !(ext4_has_group_desc_csum(sb) &&
+ 		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
+@@ -2865,6 +2879,8 @@ static int ext4_mb_seq_groups_show(struc
+ 		sizeof(struct ext4_group_info);
+ 
+ 	grinfo = ext4_get_group_info(sb, group);
++	if (!grinfo)
++		return 0;
+ 	/* Load the group info in memory only if not already loaded. */
+ 	if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
+ 		err = ext4_mb_load_buddy(sb, group, &e4b);
+@@ -2879,7 +2895,7 @@ static int ext4_mb_seq_groups_show(struc
+ 	if (gdp != NULL)
+ 		free = ext4_free_group_clusters(sb, gdp);
+ 
+-	memcpy(&sg, ext4_get_group_info(sb, group), i);
++	memcpy(&sg, grinfo, i);
+ 
+ 	if (buddy_loaded)
+ 		ext4_mb_unload_buddy(&e4b);
+@@ -3394,8 +3410,12 @@ static int ext4_mb_init_backend(struct s
+ 
+ err_freebuddy:
+ 	cachep = get_groupinfo_cache(sb->s_blocksize_bits);
+-	while (i-- > 0)
+-		kmem_cache_free(cachep, ext4_get_group_info(sb, i));
++	while (i-- > 0) {
++		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
++
++		if (grp)
++			kmem_cache_free(cachep, grp);
++	}
+ 	i = sbi->s_group_info_size;
+ 	rcu_read_lock();
+ 	group_info = rcu_dereference(sbi->s_group_info);
+@@ -3730,6 +3750,8 @@ int ext4_mb_release(struct super_block *
+ 		for (i = 0; i < ngroups; i++) {
+ 			cond_resched();
+ 			grinfo = ext4_get_group_info(sb, i);
++			if (!grinfo)
++				continue;
+ 			mb_group_bb_bitmap_free(grinfo);
+ 			ext4_lock_group(sb, i);
+ 			count = ext4_mb_cleanup_pa(grinfo);
+@@ -4934,6 +4956,8 @@ static void ext4_mb_generate_from_freeli
+ 	struct ext4_free_data *entry;
+ 
+ 	grp = ext4_get_group_info(sb, group);
++	if (!grp)
++		return;
+ 	n = rb_first(&(grp->bb_free_root));
+ 
+ 	while (n) {
+@@ -5003,6 +5027,9 @@ int ext4_mb_generate_from_pa(struct supe
+ 	int err;
+ 	int len;
+ 
++	if (!grp)
++		return -EIO;
++
+ 	gdp = ext4_get_group_desc(sb, group, NULL);
+ 	if (gdp == NULL)
+ 		return -EIO;
+@@ -5256,6 +5283,8 @@ adjust_bex:
+ 
+ 	ei = EXT4_I(ac->ac_inode);
+ 	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
++	if (!grp)
++		return;
+ 
+ 	pa->pa_node_lock.inode_lock = &ei->i_prealloc_lock;
+ 	pa->pa_inode = ac->ac_inode;
+@@ -5307,6 +5336,8 @@ ext4_mb_new_group_pa(struct ext4_allocat
+ 	atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
+ 
+ 	grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
++	if (!grp)
++		return;
+ 	lg = ac->ac_lg;
+ 	BUG_ON(lg == NULL);
+ 
+@@ -5435,6 +5466,8 @@ ext4_mb_discard_group_preallocations(str
+ 	int err;
+ 	int free = 0;
+ 
++	if (!grp)
++		return 0;
+ 	mb_debug(sb, "discard preallocation for group %u\n", group);
+ 	if (list_empty(&grp->bb_prealloc_list))
+ 		goto out_dbg;
+@@ -5683,6 +5716,9 @@ static inline void ext4_mb_show_pa(struc
+ 		struct ext4_prealloc_space *pa;
+ 		ext4_grpblk_t start;
+ 		struct list_head *cur;
++
++		if (!grp)
++			continue;
+ 		ext4_lock_group(sb, i);
+ 		list_for_each(cur, &grp->bb_prealloc_list) {
+ 			pa = list_entry(cur, struct ext4_prealloc_space,
+@@ -6481,6 +6517,7 @@ static void ext4_mb_clear_bb(handle_t *h
+ 	struct buffer_head *bitmap_bh = NULL;
+ 	struct super_block *sb = inode->i_sb;
+ 	struct ext4_group_desc *gdp;
++	struct ext4_group_info *grp;
+ 	unsigned int overflow;
+ 	ext4_grpblk_t bit;
+ 	struct buffer_head *gd_bh;
+@@ -6506,8 +6543,8 @@ do_more:
+ 	overflow = 0;
+ 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
+ 
+-	if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
+-			ext4_get_group_info(sb, block_group))))
++	grp = ext4_get_group_info(sb, block_group);
++	if (unlikely(!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
+ 		return;
+ 
+ 	/*
+@@ -7169,6 +7206,8 @@ int ext4_trim_fs(struct super_block *sb,
+ 
+ 	for (group = first_group; group <= last_group; group++) {
+ 		grp = ext4_get_group_info(sb, group);
++		if (!grp)
++			continue;
+ 		/* We only do this if the grp has never been initialized */
+ 		if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+ 			ret = ext4_mb_init_group(sb, group, GFP_NOFS);
+Index: linux-stage/fs/ext4/super.c
+===================================================================
+--- linux-stage.orig/fs/ext4/super.c
++++ linux-stage/fs/ext4/super.c
+@@ -1094,6 +1094,8 @@ void ext4_mark_group_bitmap_corrupted(st
+ 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ 	int ret;
+ 
++	if (!grp || !gdp)
++		return;
+ 	if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+ 		ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+ 					    &grp->bb_state);
diff --git a/ldiskfs/kernel_patches/patches/rhel9.4/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch b/ldiskfs/kernel_patches/patches/rhel9.4/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
new file mode 100644
index 0000000..5981ee3
--- /dev/null
+++ b/ldiskfs/kernel_patches/patches/rhel9.4/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
@@ -0,0 +1,51 @@
+commit 3c6296046c85333bc52555a670a9093d9e2657bb
+Author:     Ojaswin Mujoo <ojaswin@linux.ibm.com>
+AuthorDate: Tue May 30 18:03:46 2023 +0530
+Commit:     Theodore Ts'o <tytso@mit.edu>
+CommitDate: Mon Jun 26 19:34:56 2023 -0400
+
+ext4: Don't skip prefetching BLOCK_UNINIT groups
+
+Currently, ext4_mb_prefetch() and ext4_mb_prefetch_fini() skip
+BLOCK_UNINIT groups since fetching their bitmaps doesn't need disk IO.
+As a consequence, we end not initializing the buddy structures and CR0/1
+lists for these BGs, even though it can be done without any disk IO
+overhead. Hence, don't skip such BGs during prefetch and prefetch_fini.
+
+This improves the accuracy of CR0/1 allocation as earlier, we could have
+essentially empty BLOCK_UNINIT groups being ignored by CR0/1 due to their buddy
+not being initialized, leading to slower CR2 allocations. With this patch CR0/1
+will be able to discover these groups as well, thus improving performance.
+
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/dc3130b8daf45ffe63d8a3c1edcf00eb8ba70e1f.1685449706.git.ojaswin@linux.ibm.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+---
+Index: linux-stage/fs/ext4/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext4/mballoc.c
++++ linux-stage/fs/ext4/mballoc.c
+@@ -2575,9 +2575,7 @@ ext4_group_t ext4_mb_prefetch(struct sup
+ 		 */
+ 		if (gdp && grp && !EXT4_MB_GRP_TEST_AND_SET_READ(grp) &&
+ 		    EXT4_MB_GRP_NEED_INIT(grp) &&
+-		    ext4_free_group_clusters(sb, gdp) > 0 &&
+-		    !(ext4_has_group_desc_csum(sb) &&
+-		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
++		    ext4_free_group_clusters(sb, gdp) > 0 ) {
+ 			bh = ext4_read_block_bitmap_nowait(sb, group, true);
+ 			if (bh && !IS_ERR(bh)) {
+ 				if (!buffer_uptodate(bh) && cnt)
+@@ -2618,9 +2616,7 @@ void ext4_mb_prefetch_fini(struct super_
+ 		grp = ext4_get_group_info(sb, group);
+ 
+ 		if (grp && gdp && EXT4_MB_GRP_NEED_INIT(grp) &&
+-		    ext4_free_group_clusters(sb, gdp) > 0 &&
+-		    !(ext4_has_group_desc_csum(sb) &&
+-		      (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))) {
++		    ext4_free_group_clusters(sb, gdp) > 0) {
+ 			if (ext4_mb_init_group(sb, group, GFP_NOFS))
+ 				break;
+ 		}
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.10.series b/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.10.series
index 948b74b..0c4cb3a 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.10.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.10.series
@@ -42,3 +42,22 @@ rhel8/ext4-encdata.patch
 rhel8/ext4-race-in-ext4-destroy-inode.patch
 rhel8/ext4-mballoc-dense.patch
 rhel8/ext4-limit-per-inode-preallocation-list.patch
+rhel8.8/ext4-add-prefetching-for-block-allocation-bitmaps.patch
+rhel8.8/ext4-add-prefetch-block-bitmap-mount-option.patch
+rhel8.8/ext4-optimize-the-ext4_mb_good_group.patch
+rhel8.8/ext4-drop-s_mb_bal_lock-convert-protected-fields-to-atomic.patch
+rhel8.8/ext4-add-mballoc-stats-proc-file.patch
+rhel8.8/ext4-add-MB_NUM_ORDERS-macro.patch
+rhel8.8/ext4-improve-cr0-cr1-group-scanning.patch
+rhel8.8/ext4-make-prefetch_block_bitmaps-default.patch
+rhel8.8/ext4-make-mb_optimize_scan-performance-with-extents.patch
+rhel8.8/ext4-reflect-mb_optimize_scan-value-in-options.patch
+rhel8.8/ext4-make-mballoc-try-target-group-first.patch
+rhel8.8/ext4-avoiod-unnecessary-spreading-of-allocations.patch
+rhel8.8/ext4-use-locality-group-preallocation-for-small-files.patch
+rhel8.8/ext4-use-buckets-for-cr1-block-scan.patch
+rhel8.8/ext4-fixup-possible-uninit-var-in-ext4_mb_choose_next_group_cr1.patch
+rhel8.8/ext4-refactor-code-related-to-freeing-pa.patch
+rhel8.8/ext4-allow-ext4_get_group_info-to-fail.patch
+rhel8.8/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
+rhel8.8/ext4-ensure-ext4_mb_prefetch_fini-called-for-all-prefetched-bg.patch
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.8.series b/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.8.series
index 9fef225..de867e7 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.8.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-4.18-rhel8.8.series
@@ -42,3 +42,23 @@ rhel8/ext4-encdata.patch
 rhel8/ext4-race-in-ext4-destroy-inode.patch
 rhel8/ext4-mballoc-dense.patch
 rhel8/ext4-limit-per-inode-preallocation-list.patch
+rhel8.8/ext4-add-prefetching-for-block-allocation-bitmaps.patch
+rhel8.8/ext4-add-prefetch-block-bitmap-mount-option.patch
+rhel8.8/ext4-optimize-the-ext4_mb_good_group.patch
+rhel8.8/ext4-drop-s_mb_bal_lock-convert-protected-fields-to-atomic.patch
+rhel8.8/ext4-add-mballoc-stats-proc-file.patch
+rhel8.8/ext4-add-MB_NUM_ORDERS-macro.patch
+rhel8.8/ext4-improve-cr0-cr1-group-scanning.patch
+rhel8.8/ext4-make-prefetch_block_bitmaps-default.patch
+rhel8.8/ext4-make-mb_optimize_scan-performance-with-extents.patch
+rhel8.8/ext4-reflect-mb_optimize_scan-value-in-options.patch
+rhel8.8/ext4-make-mballoc-try-target-group-first.patch
+rhel8.8/ext4-avoiod-unnecessary-spreading-of-allocations.patch
+rhel8.8/ext4-use-locality-group-preallocation-for-small-files.patch
+rhel8.8/ext4-use-buckets-for-cr1-block-scan.patch
+rhel8.8/ext4-limit-number-of-retries-after-discard-prealloc-blocks.patch
+rhel8.8/ext4-fixup-possible-uninit-var-in-ext4_mb_choose_next_group_cr1.patch
+rhel8.8/ext4-refactor-code-related-to-freeing-pa.patch
+rhel8.8/ext4-allow-ext4_get_group_info-to-fail.patch
+rhel8.8/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
+rhel8.8/ext4-ensure-ext4_mb_prefetch_fini-called-for-all-prefetched-bg.patch
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.4.series b/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.4.series
index f8014c7..5fa1ef9 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.4.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.4.series
@@ -38,3 +38,6 @@ rhel9.1/ext4-enc-flag.patch
 rhel9.2/ext4-encdata.patch
 rhel9/ext4-add-periodic-superblock-update.patch
 rhel9.4/ext4-add-IGET_NO_CHECKS-flag.patch
+rhel9.4/ext4-allow-ext4_get_group_info-to-fail.patch
+rhel9.4/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
+rhel8.8/ext4-ensure-ext4_mb_prefetch_fini-called-for-all-prefetched-bg.patch
diff --git a/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.5.series b/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.5.series
index 42c95ac..cae7767 100644
--- a/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.5.series
+++ b/ldiskfs/kernel_patches/series/ldiskfs-5.14-rhel9.5.series
@@ -38,3 +38,5 @@ rhel9.1/ext4-enc-flag.patch
 rhel9.2/ext4-encdata.patch
 rhel9.5/ext4-add-periodic-superblock-update.patch
 rhel9.4/ext4-add-IGET_NO_CHECKS-flag.patch
+rhel9.4/ext4-not-skip-prefetching-BLOCK_UNINIT-groups.patch
+rhel8.8/ext4-ensure-ext4_mb_prefetch_fini-called-for-all-prefetched-bg.patch
-- 
1.8.3.1