From ff94a50b736306989d6eb11d8e34a45b006a2b3e Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Mon, 4 Mar 2024 15:11:32 -0700 Subject: [PATCH] LU-17606 ldiskfs: remove old el7.[678] patches/series Remove the old ldiskfs el7.[678] patch series files, and the resulting patch files that are no longer referenced by any patch series file: ./contrib/scripts/clearpatches.sh -d Test-Parameters: trivial Signed-off-by: Andreas Dilger Change-Id: I6727d88af1261c9b4090984ad8cab51a5dce7057 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54271 Tested-by: jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Jian Yu Reviewed-by: Shaun Tancheff Reviewed-by: Oleg Drokin --- ...-LRU-to-round-robin-in-extent-status-tree.patch | 570 ---------- ...p-flag-definitions-for-extent-status-tree.patch | 139 --- ...-fix-xattr-shifting-when-expanding-inodes.patch | 245 ----- ...xt4-introduce-aging-to-extent-status-tree.patch | 156 --- .../patches/rhel7.6/ext4-large-dir.patch | 356 ------ .../patches/rhel7.6/ext4-large-eas.patch | 1145 -------------------- ...number-of-scanned-extents-in-status-tree-.patch | 235 ---- .../patches/rhel7.6/ext4-mballoc-prefetch.patch | 261 ----- .../kernel_patches/patches/rhel7.6/ext4-misc.patch | 134 --- ...andling-of-list-of-shrinkable-inodes-into.patch | 147 --- .../patches/rhel7.6/ext4-pdirop.patch | 855 --------------- .../rhel7.6/ext4-projid-ignore-maxquotas.patch | 141 --- ...-extent-status-procfs-files-if-journal-lo.patch | 49 - ...extent-status-tree-shrinker-delay-statict.patch | 465 -------- .../patches/rhel7.7/ext4-pdirop.patch | 855 --------------- .../series/ldiskfs-3.10-rhel7.6.series | 56 - .../series/ldiskfs-3.10-rhel7.7.series | 56 - .../series/ldiskfs-3.10-rhel7.8.series | 49 - 18 files changed, 5914 deletions(-) delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-change-LRU-to-round-robin-in-extent-status-tree.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-cleanup-flag-definitions-for-extent-status-tree.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-fix-xattr-shifting-when-expanding-inodes.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-introduce-aging-to-extent-status-tree.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-large-dir.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-large-eas.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-limit-number-of-scanned-extents-in-status-tree-.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-misc.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-move-handling-of-list-of-shrinkable-inodes-into.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-pdirop.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-projid-ignore-maxquotas.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-remove-extent-status-procfs-files-if-journal-lo.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.6/ext4-track-extent-status-tree-shrinker-delay-statict.patch delete mode 100644 ldiskfs/kernel_patches/patches/rhel7.7/ext4-pdirop.patch delete mode 100644 ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series delete mode 100644 ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series delete mode 100644 ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.8.series diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-change-LRU-to-round-robin-in-extent-status-tree.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-change-LRU-to-round-robin-in-extent-status-tree.patch deleted file mode 100644 index bc04f8b..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-change-LRU-to-round-robin-in-extent-status-tree.patch +++ /dev/null @@ -1,570 +0,0 @@ -From 4fac310a77c918d6a235a55cb76cf2f9bb22de71 Mon Sep 17 00:00:00 2001 -From: Zheng Liu -Date: Tue, 25 Nov 2014 11:45:37 -0500 -Subject: [PATCH 3/7] ext4: change LRU to round-robin in extent status tree - shrinker - -In this commit we discard the lru algorithm for inodes with extent -status tree because it takes significant effort to maintain a lru list -in extent status tree shrinker and the shrinker can take a long time to -scan this lru list in order to reclaim some objects. - -We replace the lru ordering with a simple round-robin. After that we -never need to keep a lru list. That means that the list needn't be -sorted if the shrinker can not reclaim any objects in the first round. - -Cc: Andreas Dilger -Signed-off-by: Zheng Liu -Signed-off-by: Jan Kara -Signed-off-by: Theodore Ts'o ---- - fs/ext4/ext4.h | 10 +- - fs/ext4/extents.c | 4 +- - fs/ext4/extents_status.c | 221 +++++++++++++++++---------------------- - fs/ext4/extents_status.h | 7 +- - fs/ext4/inode.c | 4 +- - fs/ext4/ioctl.c | 4 +- - fs/ext4/super.c | 7 +- - 7 files changed, 112 insertions(+), 145 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index cc5ba587..0813afd6 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -1017,10 +1017,9 @@ struct ext4_inode_info { - /* extents status tree */ - struct ext4_es_tree i_es_tree; - rwlock_t i_es_lock; -- struct list_head i_es_lru; -+ struct list_head i_es_list; - unsigned int i_es_all_nr; /* protected by i_es_lock */ -- unsigned int i_es_lru_nr; /* protected by i_es_lock */ -- unsigned long i_touch_when; /* jiffies of last accessing */ -+ unsigned int i_es_shk_nr; /* protected by i_es_lock */ - - /* ialloc */ - ext4_group_t i_last_alloc_group; -@@ -1482,9 +1481,10 @@ struct ext4_sb_info { - - /* Reclaim extents from extent status tree */ - struct shrinker s_es_shrinker; -- struct list_head s_es_lru; -+ struct list_head s_es_list; -+ long s_es_nr_inode; - struct ext4_es_stats s_es_stats; -- spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; -+ spinlock_t s_es_lock ____cacheline_aligned_in_smp; - - /* Ratelimit ext4 messages. */ - struct ratelimit_state s_err_ratelimit_state; -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index f618d0ba..c012dc51 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4689,7 +4689,7 @@ out2: - - trace_ext4_ext_map_blocks_exit(inode, flags, map, - err ? err : allocated); -- ext4_es_lru_add(inode); -+ ext4_es_list_add(inode); - return err ? err : allocated; - } - -@@ -5263,7 +5263,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - error = ext4_fill_fiemap_extents(inode, start_blk, - len_blks, fieinfo); - } -- ext4_es_lru_add(inode); -+ ext4_es_list_add(inode); - return error; - } - -diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c -index 7dfed27b..382a7bf9 100644 ---- a/fs/ext4/extents_status.c -+++ b/fs/ext4/extents_status.c -@@ -149,8 +149,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t end); - static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, - int nr_to_scan); --static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, -- struct ext4_inode_info *locked_ei); -+static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, -+ struct ext4_inode_info *locked_ei); - - int __init ext4_init_es(void) - { -@@ -298,6 +298,36 @@ out: - trace_ext4_es_find_delayed_extent_range_exit(inode, es); - } - -+void ext4_es_list_add(struct inode *inode) -+{ -+ struct ext4_inode_info *ei = EXT4_I(inode); -+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -+ -+ if (!list_empty(&ei->i_es_list)) -+ return; -+ -+ spin_lock(&sbi->s_es_lock); -+ if (list_empty(&ei->i_es_list)) { -+ list_add_tail(&ei->i_es_list, &sbi->s_es_list); -+ sbi->s_es_nr_inode++; -+ } -+ spin_unlock(&sbi->s_es_lock); -+} -+ -+void ext4_es_list_del(struct inode *inode) -+{ -+ struct ext4_inode_info *ei = EXT4_I(inode); -+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -+ -+ spin_lock(&sbi->s_es_lock); -+ if (!list_empty(&ei->i_es_list)) { -+ list_del_init(&ei->i_es_list); -+ sbi->s_es_nr_inode--; -+ WARN_ON_ONCE(sbi->s_es_nr_inode < 0); -+ } -+ spin_unlock(&sbi->s_es_lock); -+} -+ - static struct extent_status * - ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, - ext4_fsblk_t pblk) -@@ -314,9 +344,9 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, - * We don't count delayed extent because we never try to reclaim them - */ - if (!ext4_es_is_delayed(es)) { -- EXT4_I(inode)->i_es_lru_nr++; -+ EXT4_I(inode)->i_es_shk_nr++; - percpu_counter_inc(&EXT4_SB(inode->i_sb)-> -- s_es_stats.es_stats_lru_cnt); -+ s_es_stats.es_stats_shk_cnt); - } - - EXT4_I(inode)->i_es_all_nr++; -@@ -330,12 +360,12 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) - EXT4_I(inode)->i_es_all_nr--; - percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); - -- /* Decrease the lru counter when this es is not delayed */ -+ /* Decrease the shrink counter when this es is not delayed */ - if (!ext4_es_is_delayed(es)) { -- BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); -- EXT4_I(inode)->i_es_lru_nr--; -+ BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); -+ EXT4_I(inode)->i_es_shk_nr--; - percpu_counter_dec(&EXT4_SB(inode->i_sb)-> -- s_es_stats.es_stats_lru_cnt); -+ s_es_stats.es_stats_shk_cnt); - } - - kmem_cache_free(ext4_es_cachep, es); -@@ -693,8 +723,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, - goto error; - retry: - err = __es_insert_extent(inode, &newes); -- if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, -- EXT4_I(inode))) -+ if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), -+ 1, EXT4_I(inode))) - goto retry; - if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) - err = 0; -@@ -851,8 +881,8 @@ retry: - es->es_lblk = orig_es.es_lblk; - es->es_len = orig_es.es_len; - if ((err == -ENOMEM) && -- __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, -- EXT4_I(inode))) -+ __es_shrink(EXT4_SB(inode->i_sb), -+ 1, EXT4_I(inode))) - goto retry; - goto out; - } -@@ -924,6 +954,11 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - end = lblk + len - 1; - BUG_ON(end < lblk); - -+ /* -+ * ext4_clear_inode() depends on us taking i_es_lock unconditionally -+ * so that we are sure __es_shrink() is done with the inode before it -+ * is reclaimed. -+ */ - write_lock(&EXT4_I(inode)->i_es_lock); - err = __es_remove_extent(inode, lblk, end); - write_unlock(&EXT4_I(inode)->i_es_lock); -@@ -931,112 +966,77 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - return err; - } - --static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, -- struct list_head *b) --{ -- struct ext4_inode_info *eia, *eib; -- eia = list_entry(a, struct ext4_inode_info, i_es_lru); -- eib = list_entry(b, struct ext4_inode_info, i_es_lru); -- -- if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && -- !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) -- return 1; -- if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && -- ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) -- return -1; -- if (eia->i_touch_when == eib->i_touch_when) -- return 0; -- if (time_after(eia->i_touch_when, eib->i_touch_when)) -- return 1; -- else -- return -1; --} -- --static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, -- struct ext4_inode_info *locked_ei) -+static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, -+ struct ext4_inode_info *locked_ei) - { - struct ext4_inode_info *ei; - struct ext4_es_stats *es_stats; -- struct list_head *cur, *tmp; -- LIST_HEAD(skipped); - ktime_t start_time; - u64 scan_time; -+ int nr_to_walk; - int ret, nr_shrunk = 0; -- int retried = 0, skip_precached = 1, nr_skipped = 0; -+ int retried = 0, nr_skipped = 0; - - es_stats = &sbi->s_es_stats; - start_time = ktime_get(); -- spin_lock(&sbi->s_es_lru_lock); - - retry: -- list_for_each_safe(cur, tmp, &sbi->s_es_lru) { -- /* -- * If we have already reclaimed all extents from extent -- * status tree, just stop the loop immediately. -- */ -- if (percpu_counter_read_positive( -- &es_stats->es_stats_lru_cnt) == 0) -- break; -- -- ei = list_entry(cur, struct ext4_inode_info, i_es_lru); -+ spin_lock(&sbi->s_es_lock); -+ nr_to_walk = sbi->s_es_nr_inode; -+ while (nr_to_walk-- > 0) { - -+ if (list_empty(&sbi->s_es_list)) { -+ spin_unlock(&sbi->s_es_lock); -+ goto out; -+ } -+ ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, -+ i_es_list); -+ /* Move the inode to the tail */ -+ list_move(&ei->i_es_list, sbi->s_es_list.prev); - /* -- * Skip the inode that is newer than the last_sorted -- * time. Normally we try hard to avoid shrinking -- * precached inodes, but we will as a last resort. -+ * Normally we try hard to avoid shrinking precached inodes, -+ * but we will as a last resort. - */ -- if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || -- (skip_precached && ext4_test_inode_state(&ei->vfs_inode, -- EXT4_STATE_EXT_PRECACHED))) { -+ if (!retried && ext4_test_inode_state(&ei->vfs_inode, -+ EXT4_STATE_EXT_PRECACHED)) { - nr_skipped++; -- list_move_tail(cur, &skipped); - continue; - } - -- if (ei->i_es_lru_nr == 0 || ei == locked_ei || -- !write_trylock(&ei->i_es_lock)) -- continue; -+ if (ei == locked_ei || !write_trylock(&ei->i_es_lock)) { -+ nr_skipped++; -+ continue; -+ } -+ /* -+ * Now we hold i_es_lock which protects us from inode reclaim -+ * freeing inode under us -+ */ -+ spin_unlock(&sbi->s_es_lock); - - ret = __es_try_to_reclaim_extents(ei, nr_to_scan); -- if (ei->i_es_lru_nr == 0) -- list_del_init(&ei->i_es_lru); - write_unlock(&ei->i_es_lock); - - nr_shrunk += ret; - nr_to_scan -= ret; - if (nr_to_scan == 0) -- break; -+ goto out; -+ spin_lock(&sbi->s_es_lock); - } - -- /* Move the newer inodes into the tail of the LRU list. */ -- list_splice_tail(&skipped, &sbi->s_es_lru); -- INIT_LIST_HEAD(&skipped); -+ spin_unlock(&sbi->s_es_lock); - - /* - * If we skipped any inodes, and we weren't able to make any -- * forward progress, sort the list and try again. -+ * forward progress, try again to scan precached inodes. - */ - if ((nr_shrunk == 0) && nr_skipped && !retried) { - retried++; -- list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); -- es_stats->es_stats_last_sorted = jiffies; -- ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, -- i_es_lru); -- /* -- * If there are no non-precached inodes left on the -- * list, start releasing precached extents. -- */ -- if (ext4_test_inode_state(&ei->vfs_inode, -- EXT4_STATE_EXT_PRECACHED)) -- skip_precached = 0; - goto retry; - } - -- spin_unlock(&sbi->s_es_lru_lock); -- - if (locked_ei && nr_shrunk == 0) - nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); -- -+out: - scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); - if (likely(es_stats->es_stats_scan_time)) - es_stats->es_stats_scan_time = (scan_time + -@@ -1061,15 +1061,15 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) - int nr_to_scan = sc->nr_to_scan; - int ret, nr_shrunk; - -- ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); -+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); - trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); - - if (!nr_to_scan) - return ret; - -- nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); -+ nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL); - -- ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); -+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); - trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); - return ret; - } -@@ -1096,28 +1096,24 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) - return 0; - - /* here we just find an inode that has the max nr. of objects */ -- spin_lock(&sbi->s_es_lru_lock); -- list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { -+ spin_lock(&sbi->s_es_lock); -+ list_for_each_entry(ei, &sbi->s_es_list, i_es_list) { - inode_cnt++; - if (max && max->i_es_all_nr < ei->i_es_all_nr) - max = ei; - else if (!max) - max = ei; - } -- spin_unlock(&sbi->s_es_lru_lock); -+ spin_unlock(&sbi->s_es_lock); - - seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", - percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), -- percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); -+ percpu_counter_sum_positive(&es_stats->es_stats_shk_cnt)); - seq_printf(seq, " %lu/%lu cache hits/misses\n", - es_stats->es_stats_cache_hits, - es_stats->es_stats_cache_misses); -- if (es_stats->es_stats_last_sorted != 0) -- seq_printf(seq, " %u ms last sorted interval\n", -- jiffies_to_msecs(jiffies - -- es_stats->es_stats_last_sorted)); - if (inode_cnt) -- seq_printf(seq, " %d inodes on lru list\n", inode_cnt); -+ seq_printf(seq, " %d inodes on list\n", inode_cnt); - - seq_printf(seq, "average:\n %llu us scan time\n", - div_u64(es_stats->es_stats_scan_time, 1000)); -@@ -1126,7 +1122,7 @@ static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) - seq_printf(seq, - "maximum:\n %lu inode (%u objects, %u reclaimable)\n" - " %llu us max scan time\n", -- max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, -+ max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_shk_nr, - div_u64(es_stats->es_stats_max_scan_time, 1000)); - - return 0; -@@ -1175,9 +1171,9 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) - { - int err; - -- INIT_LIST_HEAD(&sbi->s_es_lru); -- spin_lock_init(&sbi->s_es_lru_lock); -- sbi->s_es_stats.es_stats_last_sorted = 0; -+ INIT_LIST_HEAD(&sbi->s_es_list); -+ sbi->s_es_nr_inode = 0; -+ spin_lock_init(&sbi->s_es_lock); - sbi->s_es_stats.es_stats_shrunk = 0; - sbi->s_es_stats.es_stats_cache_hits = 0; - sbi->s_es_stats.es_stats_cache_misses = 0; -@@ -1187,7 +1183,7 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) - 0, GFP_KERNEL); - if (err) - return err; -- err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, -+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_shk_cnt, - 0, GFP_KERNEL); - if (err) - goto err; -@@ -1211,37 +1207,10 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) - if (sbi->s_proc) - remove_proc_entry("es_shrinker_info", sbi->s_proc); - percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); -- percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); -+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_shk_cnt); - unregister_shrinker(&sbi->s_es_shrinker); - } - --void ext4_es_lru_add(struct inode *inode) --{ -- struct ext4_inode_info *ei = EXT4_I(inode); -- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -- -- ei->i_touch_when = jiffies; -- -- if (!list_empty(&ei->i_es_lru)) -- return; -- -- spin_lock(&sbi->s_es_lru_lock); -- if (list_empty(&ei->i_es_lru)) -- list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); -- spin_unlock(&sbi->s_es_lru_lock); --} -- --void ext4_es_lru_del(struct inode *inode) --{ -- struct ext4_inode_info *ei = EXT4_I(inode); -- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -- -- spin_lock(&sbi->s_es_lru_lock); -- if (!list_empty(&ei->i_es_lru)) -- list_del_init(&ei->i_es_lru); -- spin_unlock(&sbi->s_es_lru_lock); --} -- - static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, - int nr_to_scan) - { -@@ -1253,7 +1222,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, - static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); - -- if (ei->i_es_lru_nr == 0) -+ if (ei->i_es_shk_nr == 0) - return 0; - - if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && -diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h -index efd5f970..0e6a33e8 100644 ---- a/fs/ext4/extents_status.h -+++ b/fs/ext4/extents_status.h -@@ -65,14 +65,13 @@ struct ext4_es_tree { - }; - - struct ext4_es_stats { -- unsigned long es_stats_last_sorted; - unsigned long es_stats_shrunk; - unsigned long es_stats_cache_hits; - unsigned long es_stats_cache_misses; - u64 es_stats_scan_time; - u64 es_stats_max_scan_time; - struct percpu_counter es_stats_all_cnt; -- struct percpu_counter es_stats_lru_cnt; -+ struct percpu_counter es_stats_shk_cnt; - }; - - extern int __init ext4_init_es(void); -@@ -151,7 +150,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, - - extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); - extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); --extern void ext4_es_lru_add(struct inode *inode); --extern void ext4_es_lru_del(struct inode *inode); -+extern void ext4_es_list_add(struct inode *inode); -+extern void ext4_es_list_del(struct inode *inode); - - #endif /* _EXT4_EXTENTS_STATUS_H */ -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 21db5952..f6a2764c 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -523,7 +523,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, - - /* Lookup extent status tree firstly */ - if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { -- ext4_es_lru_add(inode); -+ ext4_es_list_add(inode); - if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { - map->m_pblk = ext4_es_pblock(&es) + - map->m_lblk - es.es_lblk; -@@ -1519,7 +1519,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, - - /* Lookup extent status tree firstly */ - if (ext4_es_lookup_extent(inode, iblock, &es)) { -- ext4_es_lru_add(inode); -+ ext4_es_list_add(inode); - if (ext4_es_is_hole(&es)) { - retval = 0; - down_read(&EXT4_I(inode)->i_data_sem); -diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index 858cf709..122d517c 100644 ---- a/fs/ext4/ioctl.c -+++ b/fs/ext4/ioctl.c -@@ -80,8 +80,8 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) - memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); - ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); - ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); -- ext4_es_lru_del(inode1); -- ext4_es_lru_del(inode2); -+ ext4_es_list_del(inode1); -+ ext4_es_list_del(inode2); - - isize = i_size_read(inode1); - i_size_write(inode1, i_size_read(inode2)); -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 95a01d56..ea2a1026 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -942,10 +942,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) - spin_lock_init(&ei->i_prealloc_lock); - ext4_es_init_tree(&ei->i_es_tree); - rwlock_init(&ei->i_es_lock); -- INIT_LIST_HEAD(&ei->i_es_lru); -+ INIT_LIST_HEAD(&ei->i_es_list); - ei->i_es_all_nr = 0; -- ei->i_es_lru_nr = 0; -- ei->i_touch_when = 0; -+ ei->i_es_shk_nr = 0; - ei->i_reserved_data_blocks = 0; - ei->i_reserved_meta_blocks = 0; - ei->i_allocated_meta_blocks = 0; -@@ -1034,7 +1033,7 @@ void ext4_clear_inode(struct inode *inode) - dquot_drop(inode); - ext4_discard_preallocations(inode); - ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); -- ext4_es_lru_del(inode); -+ ext4_es_list_del(inode); - if (EXT4_I(inode)->jinode) { - jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), - EXT4_I(inode)->jinode); --- -2.24.1 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-cleanup-flag-definitions-for-extent-status-tree.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-cleanup-flag-definitions-for-extent-status-tree.patch deleted file mode 100644 index 4d28b12..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-cleanup-flag-definitions-for-extent-status-tree.patch +++ /dev/null @@ -1,139 +0,0 @@ -From dd5c7af957dd0b9b3b04ef8aacffd601b46bc26c Mon Sep 17 00:00:00 2001 -From: Jan Kara -Date: Tue, 25 Nov 2014 11:53:47 -0500 -Subject: [PATCH 6/7] ext4: cleanup flag definitions for extent status tree - -Currently flags for extent status tree are defined twice, once shifted -and once without a being shifted. Consolidate these definitions into one -place and make some computations automatic to make adding flags less -error prone. Compiler should be clever enough to figure out these are -constants and generate the same code. - -Signed-off-by: Jan Kara -Signed-off-by: Theodore Ts'o ---- - fs/ext4/extents_status.c | 2 ++ - fs/ext4/extents_status.h | 58 ++++++++++++++++++---------------------- - 2 files changed, 28 insertions(+), 32 deletions(-) - -diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c -index b78eec2a..a29708c0 100644 ---- a/fs/ext4/extents_status.c -+++ b/fs/ext4/extents_status.c -@@ -1170,6 +1170,8 @@ int ext4_es_register_shrinker(struct ext4_sb_info *sbi) - { - int err; - -+ /* Make sure we have enough bits for physical block number */ -+ BUILD_BUG_ON(ES_SHIFT < 48); - INIT_LIST_HEAD(&sbi->s_es_list); - sbi->s_es_nr_inode = 0; - spin_lock_init(&sbi->s_es_lock); -diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h -index b0b78b95..e86b1f34 100644 ---- a/fs/ext4/extents_status.h -+++ b/fs/ext4/extents_status.h -@@ -29,25 +29,21 @@ - /* - * These flags live in the high bits of extent_status.es_pblk - */ --#define ES_SHIFT 60 -- --#define EXTENT_STATUS_WRITTEN (1 << 3) --#define EXTENT_STATUS_UNWRITTEN (1 << 2) --#define EXTENT_STATUS_DELAYED (1 << 1) --#define EXTENT_STATUS_HOLE (1 << 0) -- --#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \ -- EXTENT_STATUS_UNWRITTEN | \ -- EXTENT_STATUS_DELAYED | \ -- EXTENT_STATUS_HOLE) -+enum { -+ ES_WRITTEN_B, -+ ES_UNWRITTEN_B, -+ ES_DELAYED_B, -+ ES_HOLE_B, -+ ES_FLAGS -+}; - --#define ES_WRITTEN (1ULL << 63) --#define ES_UNWRITTEN (1ULL << 62) --#define ES_DELAYED (1ULL << 61) --#define ES_HOLE (1ULL << 60) -+#define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS) -+#define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT) - --#define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \ -- ES_DELAYED | ES_HOLE) -+#define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B) -+#define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B) -+#define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B) -+#define EXTENT_STATUS_HOLE (1 << ES_HOLE_B) - - struct ext4_sb_info; - struct ext4_extent; -@@ -92,29 +88,29 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode, - extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, - struct extent_status *es); - -+static inline unsigned int ext4_es_status(struct extent_status *es) -+{ -+ return es->es_pblk >> ES_SHIFT; -+} -+ - static inline int ext4_es_is_written(struct extent_status *es) - { -- return (es->es_pblk & ES_WRITTEN) != 0; -+ return (ext4_es_status(es) & EXTENT_STATUS_WRITTEN) != 0; - } - - static inline int ext4_es_is_unwritten(struct extent_status *es) - { -- return (es->es_pblk & ES_UNWRITTEN) != 0; -+ return (ext4_es_status(es) & EXTENT_STATUS_UNWRITTEN) != 0; - } - - static inline int ext4_es_is_delayed(struct extent_status *es) - { -- return (es->es_pblk & ES_DELAYED) != 0; -+ return (ext4_es_status(es) & EXTENT_STATUS_DELAYED) != 0; - } - - static inline int ext4_es_is_hole(struct extent_status *es) - { -- return (es->es_pblk & ES_HOLE) != 0; --} -- --static inline unsigned int ext4_es_status(struct extent_status *es) --{ -- return es->es_pblk >> ES_SHIFT; -+ return (ext4_es_status(es) & EXTENT_STATUS_HOLE) != 0; - } - - static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) -@@ -134,18 +130,16 @@ static inline void ext4_es_store_pblock(struct extent_status *es, - static inline void ext4_es_store_status(struct extent_status *es, - unsigned int status) - { -- es->es_pblk = (((ext4_fsblk_t) -- (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | -- (es->es_pblk & ~ES_MASK)); -+ es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) | -+ (es->es_pblk & ~ES_MASK); - } - - static inline void ext4_es_store_pblock_status(struct extent_status *es, - ext4_fsblk_t pb, - unsigned int status) - { -- es->es_pblk = (((ext4_fsblk_t) -- (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | -- (pb & ~ES_MASK)); -+ es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) | -+ (pb & ~ES_MASK); - } - - extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); --- -2.24.1 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-fix-xattr-shifting-when-expanding-inodes.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-fix-xattr-shifting-when-expanding-inodes.patch deleted file mode 100644 index f0b155f..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-fix-xattr-shifting-when-expanding-inodes.patch +++ /dev/null @@ -1,245 +0,0 @@ -From e3014d14a81edde488d9a6758eea8afc41752d2d Mon Sep 17 00:00:00 2001 -From: Jan Kara -Date: Mon, 29 Aug 2016 15:38:11 -0400 -Subject: [PATCH] ext4: fixup free space calculations when expanding inodes - -Conditions checking whether there is enough free space in an xattr block -and when xattr is large enough to make enough space in the inode forgot -to account for the fact that inode need not be completely filled up with -xattrs. Thus we could move unnecessarily many xattrs out of inode or -even falsely claim there is not enough space to expand the inode. We -also forgot to update the amount of free space in xattr block when moving -more xattrs and thus could decide to move too big xattr resulting in -unexpected failure. - -Fix these problems by properly updating free space in the inode and -xattr block as we move xattrs. To simplify the math, avoid shifting -xattrs after removing each one xattr and instead just shift xattrs only -once there is enough free space in the inode. - -Signed-off-by: Jan Kara -Signed-off-by: Theodore Ts'o ---- - fs/ext4/xattr.c | 58 ++++++++++++++++++++++++--------------------------------- - 1 file changed, 24 insertions(+), 34 deletions(-) - -diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c -index 2eb935c..22d2ebc 100644 ---- a/fs/ext4/xattr.c -+++ b/fs/ext4/xattr.c -@@ -1350,7 +1350,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - struct ext4_xattr_ibody_find *is = NULL; - struct ext4_xattr_block_find *bs = NULL; - char *buffer = NULL, *b_entry_name = NULL; -- size_t min_offs, free; -+ size_t min_offs; -+ size_t ifree, bfree; - int total_ino; - void *base, *start, *end; - int error = 0, tried_min_extra_isize = 0; -@@ -1385,17 +1386,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - if (error) - goto cleanup; - -- free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); -- if (free >= isize_diff) { -- entry = IFIRST(header); -- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -- - new_extra_isize, (void *)raw_inode + -- EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, -- (void *)header, total_ino, -- inode->i_sb->s_blocksize); -- EXT4_I(inode)->i_extra_isize = new_extra_isize; -- goto out; -- } -+ ifree = ext4_xattr_free_space(last, &min_offs, base, &total_ino); -+ if (ifree >= isize_diff) -+ goto shift; - - /* - * Enough free space isn't available in the inode, check if -@@ -1416,8 +1409,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - first = BFIRST(bh); - end = bh->b_data + bh->b_size; - min_offs = end - base; -- free = ext4_xattr_free_space(first, &min_offs, base, NULL); -- if (free < isize_diff) { -+ bfree = ext4_xattr_free_space(first, &min_offs, base, NULL); -+ if (bfree + ifree < isize_diff) { - if (!tried_min_extra_isize && s_min_extra_isize) { - tried_min_extra_isize++; - new_extra_isize = s_min_extra_isize; -@@ -1428,10 +1421,10 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - goto cleanup; - } - } else { -- free = inode->i_sb->s_blocksize; -+ bfree = inode->i_sb->s_blocksize; - } - -- while (isize_diff > 0) { -+ while (isize_diff > ifree) { - size_t offs, size, entry_size; - struct ext4_xattr_entry *small_entry = NULL; - struct ext4_xattr_info i = { -@@ -1439,7 +1432,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - .value_len = 0, - }; - unsigned int total_size; /* EA entry size + value size */ -- unsigned int shift_bytes; /* No. of bytes to shift EAs by? */ - unsigned int min_total_size = ~0U; - - is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); -@@ -1461,8 +1453,9 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - total_size = - EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + - EXT4_XATTR_LEN(last->e_name_len); -- if (total_size <= free && total_size < min_total_size) { -- if (total_size < isize_diff) { -+ if (total_size <= bfree && -+ total_size < min_total_size) { -+ if (total_size + ifree < isize_diff) { - small_entry = last; - } else { - entry = last; -@@ -1491,6 +1484,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - offs = le16_to_cpu(entry->e_value_offs); - size = le32_to_cpu(entry->e_value_size); - entry_size = EXT4_XATTR_LEN(entry->e_name_len); -+ total_size = entry_size + EXT4_XATTR_SIZE(size); - i.name_index = entry->e_name_index, - buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS); - b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); -@@ -1518,21 +1512,8 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - if (error) - goto cleanup; - total_ino -= entry_size; -- -- entry = IFIRST(header); -- if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff) -- shift_bytes = isize_diff; -- else -- shift_bytes = entry_size + EXT4_XATTR_SIZE(size); -- /* Adjust the offsets and shift the remaining entries ahead */ -- ext4_xattr_shift_entries(entry, -shift_bytes, -- (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + -- EXT4_I(inode)->i_extra_isize + shift_bytes, -- (void *)header, total_ino, inode->i_sb->s_blocksize); -- -- isize_diff -= shift_bytes; -- EXT4_I(inode)->i_extra_isize += shift_bytes; -- header = IHDR(inode, raw_inode); -+ ifree += total_size; -+ bfree -= total_size; - - i.name = b_entry_name; - i.value = buffer; -@@ -1553,6 +1534,15 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - kfree(is); - kfree(bs); - } -+ -+shift: -+ /* Adjust the offsets and shift the remaining entries ahead */ -+ entry = IFIRST(header); -+ ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -+ - new_extra_isize, (void *)raw_inode + -+ EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, -+ (void *)header, total_ino, inode->i_sb->s_blocksize); -+ EXT4_I(inode)->i_extra_isize = new_extra_isize; - brelse(bh); - out: - ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); --- -2.9.3 - -From 94405713889d4a9d341b4ad92956e4e2ec8ec2c2 Mon Sep 17 00:00:00 2001 -From: Jan Kara -Date: Mon, 29 Aug 2016 15:41:11 -0400 -Subject: [PATCH] ext4: replace bogus assertion in ext4_xattr_shift_entries() - -We were checking whether computed offsets do not exceed end of block in -ext4_xattr_shift_entries(). However this does not make sense since we -always only decrease offsets. So replace that assertion with a check -whether we really decrease xattrs value offsets. - -Signed-off-by: Jan Kara -Signed-off-by: Theodore Ts'o ---- - fs/ext4/xattr.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - -diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c -index 1447860..82b025c 100644 ---- a/fs/ext4/xattr.c -+++ b/fs/ext4/xattr.c -@@ -1319,18 +1319,19 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, - */ - static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, - int value_offs_shift, void *to, -- void *from, size_t n, int blocksize) -+ void *from, size_t n) - { - struct ext4_xattr_entry *last = entry; - int new_offs; - -+ /* We always shift xattr headers further thus offsets get lower */ -+ BUG_ON(value_offs_shift > 0); -+ - /* Adjust the value offsets of the entries */ - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { - if (!last->e_value_inum && last->e_value_size) { - new_offs = le16_to_cpu(last->e_value_offs) + - value_offs_shift; -- BUG_ON(new_offs + le32_to_cpu(last->e_value_size) -- > blocksize); - last->e_value_offs = cpu_to_le16(new_offs); - } - } -@@ -1542,7 +1543,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, - ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - - new_extra_isize, (void *)raw_inode + - EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, -- (void *)header, total_ino, inode->i_sb->s_blocksize); -+ (void *)header, total_ino); - EXT4_I(inode)->i_extra_isize = new_extra_isize; - brelse(bh); - out: --- -2.9.3 - -From 887a9730614727c4fff7cb756711b190593fc1df Mon Sep 17 00:00:00 2001 -From: Konstantin Khlebnikov -Date: Sun, 21 May 2017 22:36:23 -0400 -Subject: [PATCH] ext4: keep existing extra fields when inode expands - -ext4_expand_extra_isize() should clear only space between old and new -size. - -Fixes: 6dd4ee7cab7e # v2.6.23 -Cc: stable@vger.kernel.org -Signed-off-by: Konstantin Khlebnikov -Signed-off-by: Theodore Ts'o ---- - fs/ext4/inode.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 1bd0bfa..7cd99de 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -5637,8 +5637,9 @@ static int ext4_expand_extra_isize(struct inode *inode, - /* No extended attributes present */ - if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || - header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { -- memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, -- new_extra_isize); -+ memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + -+ EXT4_I(inode)->i_extra_isize, 0, -+ new_extra_isize - EXT4_I(inode)->i_extra_isize); - EXT4_I(inode)->i_extra_isize = new_extra_isize; - return 0; - } --- -2.9.3 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-introduce-aging-to-extent-status-tree.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-introduce-aging-to-extent-status-tree.patch deleted file mode 100644 index ddbe7e7..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-introduce-aging-to-extent-status-tree.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 1da6da1563df986dd35080d7edcf59b739696c40 Mon Sep 17 00:00:00 2001 -From: Jan Kara -Date: Tue, 25 Nov 2014 11:55:24 -0500 -Subject: [PATCH 7/7] ext4: introduce aging to extent status tree - -Introduce a simple aging to extent status tree. Each extent has a -REFERENCED bit which gets set when the extent is used. Shrinker then -skips entries with referenced bit set and clears the bit. Thus -frequently used extents have higher chances of staying in memory. - -Signed-off-by: Jan Kara -Signed-off-by: Theodore Ts'o ---- - fs/ext4/extents_status.c | 22 +++++++++++++++++----- - fs/ext4/extents_status.h | 35 +++++++++++++++++++++++++++++++---- - 2 files changed, 48 insertions(+), 9 deletions(-) - -diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c -index a29708c0..0305f308 100644 ---- a/fs/ext4/extents_status.c -+++ b/fs/ext4/extents_status.c -@@ -382,7 +382,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) - static int ext4_es_can_be_merged(struct extent_status *es1, - struct extent_status *es2) - { -- if (ext4_es_status(es1) != ext4_es_status(es2)) -+ if (ext4_es_type(es1) != ext4_es_type(es2)) - return 0; - - if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) { -@@ -425,6 +425,8 @@ ext4_es_try_to_merge_left(struct inode *inode, struct extent_status *es) - es1 = rb_entry(node, struct extent_status, rb_node); - if (ext4_es_can_be_merged(es1, es)) { - es1->es_len += es->es_len; -+ if (ext4_es_is_referenced(es)) -+ ext4_es_set_referenced(es1); - rb_erase(&es->rb_node, &tree->root); - ext4_es_free_extent(inode, es); - es = es1; -@@ -447,6 +449,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) - es1 = rb_entry(node, struct extent_status, rb_node); - if (ext4_es_can_be_merged(es, es1)) { - es->es_len += es1->es_len; -+ if (ext4_es_is_referenced(es1)) -+ ext4_es_set_referenced(es); - rb_erase(node, &tree->root); - ext4_es_free_extent(inode, es1); - } -@@ -823,6 +827,8 @@ out: - es->es_lblk = es1->es_lblk; - es->es_len = es1->es_len; - es->es_pblk = es1->es_pblk; -+ if (!ext4_es_is_referenced(es)) -+ ext4_es_set_referenced(es); - stats->es_stats_cache_hits++; - } else { - stats->es_stats_cache_misses++; -@@ -1243,11 +1249,17 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, - * We can't reclaim delayed extent from status tree because - * fiemap, bigallic, and seek_data/hole need to use it. - */ -- if (!ext4_es_is_delayed(es)) { -- rb_erase(&es->rb_node, &tree->root); -- ext4_es_free_extent(inode, es); -- (*nr_shrunk)++; -+ if (ext4_es_is_delayed(es)) -+ goto next; -+ if (ext4_es_is_referenced(es)) { -+ ext4_es_clear_referenced(es); -+ goto next; - } -+ -+ rb_erase(&es->rb_node, &tree->root); -+ ext4_es_free_extent(inode, es); -+ (*nr_shrunk)++; -+next: - if (!node) - goto out_wrap; - es = rb_entry(node, struct extent_status, rb_node); -diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h -index e86b1f34..691b5261 100644 ---- a/fs/ext4/extents_status.h -+++ b/fs/ext4/extents_status.h -@@ -34,6 +34,7 @@ enum { - ES_UNWRITTEN_B, - ES_DELAYED_B, - ES_HOLE_B, -+ ES_REFERENCED_B, - ES_FLAGS - }; - -@@ -44,6 +45,12 @@ enum { - #define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B) - #define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B) - #define EXTENT_STATUS_HOLE (1 << ES_HOLE_B) -+#define EXTENT_STATUS_REFERENCED (1 << ES_REFERENCED_B) -+ -+#define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \ -+ EXTENT_STATUS_UNWRITTEN | \ -+ EXTENT_STATUS_DELAYED | \ -+ EXTENT_STATUS_HOLE) << ES_SHIFT) - - struct ext4_sb_info; - struct ext4_extent; -@@ -93,24 +100,44 @@ static inline unsigned int ext4_es_status(struct extent_status *es) - return es->es_pblk >> ES_SHIFT; - } - -+static inline unsigned int ext4_es_type(struct extent_status *es) -+{ -+ return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT; -+} -+ - static inline int ext4_es_is_written(struct extent_status *es) - { -- return (ext4_es_status(es) & EXTENT_STATUS_WRITTEN) != 0; -+ return (ext4_es_type(es) & EXTENT_STATUS_WRITTEN) != 0; - } - - static inline int ext4_es_is_unwritten(struct extent_status *es) - { -- return (ext4_es_status(es) & EXTENT_STATUS_UNWRITTEN) != 0; -+ return (ext4_es_type(es) & EXTENT_STATUS_UNWRITTEN) != 0; - } - - static inline int ext4_es_is_delayed(struct extent_status *es) - { -- return (ext4_es_status(es) & EXTENT_STATUS_DELAYED) != 0; -+ return (ext4_es_type(es) & EXTENT_STATUS_DELAYED) != 0; - } - - static inline int ext4_es_is_hole(struct extent_status *es) - { -- return (ext4_es_status(es) & EXTENT_STATUS_HOLE) != 0; -+ return (ext4_es_type(es) & EXTENT_STATUS_HOLE) != 0; -+} -+ -+static inline void ext4_es_set_referenced(struct extent_status *es) -+{ -+ es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT; -+} -+ -+static inline void ext4_es_clear_referenced(struct extent_status *es) -+{ -+ es->es_pblk &= ~(((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT); -+} -+ -+static inline int ext4_es_is_referenced(struct extent_status *es) -+{ -+ return (ext4_es_status(es) & EXTENT_STATUS_REFERENCED) != 0; - } - - static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es) --- -2.24.1 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-large-dir.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-large-dir.patch deleted file mode 100644 index 6c3d5ce..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-large-dir.patch +++ /dev/null @@ -1,356 +0,0 @@ -This INCOMPAT_LARGEDIR feature allows larger directories -to be created in ldiskfs, both with directory sizes over -2GB and and a maximum htree depth of 3 instead of the -current limit of 2. These features are needed in order -to exceed the current limit of approximately 10M entries -in a single directory. - -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h -@@ -1585,7 +1585,8 @@ static inline void ext4_clear_state_flag - EXT4_FEATURE_INCOMPAT_EA_INODE| \ - EXT4_FEATURE_INCOMPAT_MMP | \ - EXT4_FEATURE_INCOMPAT_DIRDATA| \ -- EXT4_FEATURE_INCOMPAT_INLINE_DATA) -+ EXT4_FEATURE_INCOMPAT_INLINE_DATA| \ -+ EXT4_FEATURE_INCOMPAT_LARGEDIR) - #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ -@@ -1999,6 +2000,9 @@ struct mmpd_data { - # define NORET_TYPE /**/ - # define ATTRIB_NORET __attribute__((noreturn)) - # define NORET_AND noreturn, -+/* htree levels for ext4 */ -+#define EXT4_HTREE_LEVEL_COMPAT 2 -+#define EXT4_HTREE_LEVEL 3 - - struct ext4_xattr_ino_array { - unsigned int xia_count; /* # of used item in the array */ -@@ -2472,13 +2476,16 @@ static inline void ext4_r_blocks_count_s - es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); - } - --static inline loff_t ext4_isize(struct ext4_inode *raw_inode) -+static inline loff_t ext4_isize(struct super_block *sb, -+ struct ext4_inode *raw_inode) - { -- if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) -+ if (S_ISREG(le16_to_cpu(raw_inode->i_mode)) || -+ (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) && -+ S_ISDIR(le16_to_cpu(raw_inode->i_mode)))) - return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | - le32_to_cpu(raw_inode->i_size_lo); -- else -- return (loff_t) le32_to_cpu(raw_inode->i_size_lo); -+ -+ return (loff_t)le32_to_cpu(raw_inode->i_size_lo); - } - - static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/namei.c -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c -@@ -513,7 +513,14 @@ struct dx_root_info * dx_get_dx_info(str - - static inline ext4_lblk_t dx_get_block(struct dx_entry *entry) - { -- return le32_to_cpu(entry->block) & 0x00ffffff; -+ return le32_to_cpu(entry->block) & 0x0fffffff; -+} -+ -+static inline int -+ext4_dir_htree_level(struct super_block *sb) -+{ -+ return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ? -+ EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT; - } - - static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) -@@ -681,7 +688,7 @@ dx_probe(const struct qstr *d_name, stru - struct dx_frame *frame = frame_in; - u32 hash; - -- frame->bh = NULL; -+ memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); - bh = ext4_read_dirblock(dir, 0, INDEX); - if (IS_ERR(bh)) { - *err = PTR_ERR(bh); -@@ -714,10 +721,15 @@ dx_probe(const struct qstr *d_name, stru - } - - indirect = info->indirect_levels; -- if (indirect > 1) { -- ext4_warning(dir->i_sb, -- "inode #%lu: unimplemented hash depth %u", -- dir->i_ino, info->indirect_levels); -+ if (indirect >= ext4_dir_htree_level(dir->i_sb)) { -+ ext4_warning(dir->i_sb, -+ "inode #%lu: comm %s: htree depth %#06x exceed max depth %u", -+ dir->i_ino, current->comm, indirect, -+ ext4_dir_htree_level(dir->i_sb)); -+ if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) { -+ ext4_warning(dir->i_sb, "Enable large directory " -+ "feature to access it"); -+ } - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; -@@ -812,13 +826,18 @@ fail: - static void dx_release (struct dx_frame *frames) - { - struct dx_root_info *info; -+ int i; -+ - if (frames[0].bh == NULL) - return; - - info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data); -- if (info->indirect_levels) -- brelse(frames[1].bh); -- brelse(frames[0].bh); -+ for (i = 0; i <= info->indirect_levels; i++) { -+ if (frames[i].bh == NULL) -+ break; -+ brelse(frames[i].bh); -+ frames[i].bh = NULL; -+ } - } - - /* -@@ -960,7 +979,7 @@ int ext4_htree_fill_tree(struct file *di - { - struct dx_hash_info hinfo; - struct ext4_dir_entry_2 *de; -- struct dx_frame frames[2], *frame; -+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; - struct inode *dir; - ext4_lblk_t block; - int count = 0; -@@ -1376,7 +1395,7 @@ static struct buffer_head * ext4_dx_find - { - struct super_block * sb = dir->i_sb; - struct dx_hash_info hinfo; -- struct dx_frame frames[2], *frame; -+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; - struct buffer_head *bh; - ext4_lblk_t block; - int retval; -@@ -1832,7 +1851,7 @@ static int make_indexed_dir(handle_t *ha - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - struct buffer_head *bh2; -- struct dx_frame frames[2], *frame; -+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; - struct dx_entry *entries; - struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de; - struct ext4_dir_entry_tail *t; -@@ -2117,15 +2136,18 @@ static int ext4_add_entry(handle_t *hand - static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode) - { -- struct dx_frame frames[2], *frame; -+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; - struct dx_entry *entries, *at; - struct dx_hash_info hinfo; - struct buffer_head *bh; - struct inode *dir = dentry->d_parent->d_inode; - struct super_block *sb = dir->i_sb; - struct ext4_dir_entry_2 *de; -+ int restart; - int err; - -+again: -+ restart = 0; - frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); - if (!frame) - return err; -@@ -2138,33 +2160,48 @@ static int ext4_dx_add_entry(handle_t *h - goto cleanup; - } - -- BUFFER_TRACE(bh, "get_write_access"); -- err = ext4_journal_get_write_access(handle, bh); -- if (err) -- goto journal_error; -- - err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (err != -ENOSPC) - goto cleanup; - -+ err = 0; - /* Block full, should compress but for now just split */ - dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", - dx_get_count(entries), dx_get_limit(entries))); - /* Need to split index? */ - if (dx_get_count(entries) == dx_get_limit(entries)) { - ext4_lblk_t newblock; -- unsigned icount = dx_get_count(entries); -- int levels = frame - frames; -+ int levels = frame - frames + 1; -+ unsigned icount; -+ int add_level = 1; - struct dx_entry *entries2; - struct dx_node *node2; - struct buffer_head *bh2; - -- if (levels && (dx_get_count(frames->entries) == -- dx_get_limit(frames->entries))) { -- ext4_warning(sb, "Directory index full!"); -+ while (frame > frames) { -+ if (dx_get_count((frame - 1)->entries) < -+ dx_get_limit((frame - 1)->entries)) { -+ add_level = 0; -+ break; -+ } -+ frame--; /* split higher index block */ -+ at = frame->at; -+ entries = frame->entries; -+ restart = 1; -+ } -+ if (add_level && levels == ext4_dir_htree_level(sb)) { -+ ext4_warning(sb, "inode %lu: comm %s: index %u: reach max htree level %u", -+ dir->i_ino, current->comm, levels, -+ ext4_dir_htree_level(sb)); -+ if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) { -+ ext4_warning(sb, "Large directory feature is" -+ "not enabled on this " -+ "filesystem"); -+ } - err = -ENOSPC; - goto cleanup; - } -+ icount = dx_get_count(entries); - bh2 = ext4_append(handle, dir, &newblock); - if (IS_ERR(bh2)) { - err = PTR_ERR(bh2); -@@ -2179,7 +2216,7 @@ static int ext4_dx_add_entry(handle_t *h - err = ext4_journal_get_write_access(handle, frame->bh); - if (err) - goto journal_error; -- if (levels) { -+ if (!add_level) { - unsigned icount1 = icount/2, icount2 = icount - icount1; - unsigned hash2 = dx_get_hash(entries + icount1); - dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", -@@ -2187,7 +2224,7 @@ static int ext4_dx_add_entry(handle_t *h - - BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ - err = ext4_journal_get_write_access(handle, -- frames[0].bh); -+ (frame - 1)->bh); - if (err) - goto journal_error; - -@@ -2203,19 +2240,27 @@ static int ext4_dx_add_entry(handle_t *h - frame->entries = entries = entries2; - swap(frame->bh, bh2); - } -- dx_insert_block(frames + 0, hash2, newblock); -- dxtrace(dx_show_index("node", frames[1].entries)); -+ dx_insert_block(frame - 1, hash2, newblock); -+ dxtrace(dx_show_index("node", frame->entries)); - dxtrace(dx_show_index("node", -- ((struct dx_node *) bh2->b_data)->entries)); -+ ((struct dx_node *)bh2->b_data)->entries)); - err = ext4_handle_dirty_dx_node(handle, dir, bh2); - if (err) - goto journal_error; - brelse (bh2); -+ err = ext4_handle_dirty_dx_node(handle, dir, -+ (frame - 1)->bh); -+ if (err) -+ goto journal_error; -+ if (restart) { -+ err = ext4_handle_dirty_dx_node(handle, dir, -+ frame->bh); -+ goto journal_error; -+ } - } else { - struct dx_root_info *info; -- dxtrace(printk(KERN_DEBUG -- "Creating second level index...\n")); -- memcpy((char *) entries2, (char *) entries, -+ -+ memcpy((char *)entries2, (char *)entries, - icount * sizeof(struct dx_entry)); - dx_set_limit(entries2, dx_node_limit(dir)); - -@@ -2224,22 +2267,17 @@ static int ext4_dx_add_entry(handle_t *h - dx_set_block(entries + 0, newblock); - info = dx_get_dx_info((struct ext4_dir_entry_2*) - frames[0].bh->b_data); -- info->indirect_levels = 1; -- -- /* Add new access path frame */ -- frame = frames + 1; -- frame->at = at = at - entries + entries2; -- frame->entries = entries = entries2; -- frame->bh = bh2; -- err = ext4_journal_get_write_access(handle, -- frame->bh); -+ info->indirect_levels += 1; -+ dxtrace(printk(KERN_DEBUG -+ "Creating %d level index...\n", -+ info->indirect_levels)); -+ err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); - if (err) - goto journal_error; -- } -- err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh); -- if (err) { -- ext4_std_error(inode->i_sb, err); -- goto cleanup; -+ err = ext4_handle_dirty_dx_node(handle, dir, bh2); -+ brelse(bh2); -+ restart = 1; -+ goto journal_error; - } - } - de = do_split(handle, dir, &bh, frame, &hinfo, &err); -@@ -2249,10 +2285,14 @@ static int ext4_dx_add_entry(handle_t *h - goto cleanup; - - journal_error: -- ext4_std_error(dir->i_sb, err); -+ ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */ - cleanup: - brelse(bh); - dx_release(frames); -+ /* @restart is true means htree-path has been changed, we need to -+ * repeat dx_probe() to find out valid htree-path */ -+ if (restart && err == 0) -+ goto again; - return err; - } - -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/inode.c -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c -@@ -4056,12 +4056,12 @@ struct inode *ext4_iget(struct super_blo - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) - ei->i_file_acl |= - ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; -- inode->i_size = ext4_isize(raw_inode); -+ inode->i_size = ext4_isize(sb, raw_inode); - if ((size = i_size_read(inode)) < 0) { - EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); - ret = -EFSCORRUPTED; - goto bad_inode; - } - ei->i_disksize = inode->i_size; - #ifdef CONFIG_QUOTA - ei->i_reserved_quota = 0; -@@ -4306,7 +4306,7 @@ static int ext4_do_update_inode(handle_t - raw_inode->i_file_acl_high = - cpu_to_le16(ei->i_file_acl >> 32); - raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); -- if (ei->i_disksize != ext4_isize(raw_inode)) { -+ if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) { - ext4_isize_set(raw_inode, ei->i_disksize); - need_datasync = 1; - } diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-large-eas.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-large-eas.patch deleted file mode 100644 index 633e73a..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-large-eas.patch +++ /dev/null @@ -1,1145 +0,0 @@ -Date: Fri, 8 Oct 2021 12:28:37 +0700 -Subject: [PATCH] This patch implements the large EA support in ext4. If the - size of an EA value is larger than the blocksize, then the EA value would not - be saved in the external EA block, instead it would be saved in an external - EA inode. So, the patch also helps support a larger number of EAs. - ---- - fs/ext4/ext4.h | 6 + - fs/ext4/extents.c | 3 +- - fs/ext4/ialloc.c | 1 - - fs/ext4/indirect.c | 3 +- - fs/ext4/inline.c | 2 +- - fs/ext4/inode.c | 45 +++- - fs/ext4/xattr.c | 607 ++++++++++++++++++++++++++++++++++++++++++--- - fs/ext4/xattr.h | 33 ++- - 8 files changed, 647 insertions(+), 53 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index ba54a96..e28bcdb 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -1617,6 +1617,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) - EXT4_FEATURE_INCOMPAT_EXTENTS| \ - EXT4_FEATURE_INCOMPAT_64BIT| \ - EXT4_FEATURE_INCOMPAT_FLEX_BG| \ -+ EXT4_FEATURE_INCOMPAT_EA_INODE| \ - EXT4_FEATURE_INCOMPAT_MMP | \ - EXT4_FEATURE_INCOMPAT_DIRDATA| \ - EXT4_FEATURE_INCOMPAT_INLINE_DATA) -@@ -2028,6 +2029,10 @@ struct mmpd_data { - # define ATTRIB_NORET __attribute__((noreturn)) - # define NORET_AND noreturn, - -+struct ext4_xattr_ino_array { -+ unsigned int xia_count; /* # of used item in the array */ -+ unsigned int xia_inodes[0]; -+}; - /* bitmap.c */ - extern unsigned int ext4_count_free(char *bitmap, unsigned numchars); - void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group, -@@ -2233,6 +2238,7 @@ extern void ext4_set_inode_flags(struct inode *); - extern void ext4_get_inode_flags(struct ext4_inode_info *); - extern int ext4_alloc_da_blocks(struct inode *inode); - extern void ext4_set_aops(struct inode *inode); -+extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk); - extern int ext4_writepage_trans_blocks(struct inode *); - extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); - extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index d8434f2..8263aa4 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -2461,7 +2461,8 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int extents) - - static inline int get_default_free_blocks_flags(struct inode *inode) - { -- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) -+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || -+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE)) - return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; - else if (ext4_should_journal_data(inode)) - return EXT4_FREE_BLOCKS_FORGET; -diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c -index a739f71..472bb41 100644 ---- a/fs/ext4/ialloc.c -+++ b/fs/ext4/ialloc.c -@@ -247,7 +247,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) - * as writing the quota to disk may need the lock as well. - */ - dquot_initialize(inode); -- ext4_xattr_delete_inode(handle, inode); - dquot_free_inode(inode); - dquot_drop(inode); - -diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c -index 68163c3..b7cf6e0 100644 ---- a/fs/ext4/indirect.c -+++ b/fs/ext4/indirect.c -@@ -959,7 +959,8 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, - int flags = EXT4_FREE_BLOCKS_VALIDATED; - int err; - -- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) -+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || -+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE)) - flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; - else if (ext4_should_journal_data(inode)) - flags |= EXT4_FREE_BLOCKS_FORGET; -diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c -index 0e7433b..b1295be 100644 ---- a/fs/ext4/inline.c -+++ b/fs/ext4/inline.c -@@ -59,7 +59,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode, - - /* Compute min_offs. */ - for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) { -- if (!entry->e_value_block && entry->e_value_size) { -+ if (!entry->e_value_inum && entry->e_value_size) { - size_t offs = le16_to_cpu(entry->e_value_offs); - if (offs < min_offs) - min_offs = offs; -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index 6c6ac63..2086792 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -136,8 +136,6 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset, - unsigned int length); - static int __ext4_journalled_writepage(struct page *page, unsigned int len); - static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); --static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, -- int pextents); - - /* - * Test whether an inode is a fast symlink. -@@ -186,6 +184,8 @@ void ext4_evict_inode(struct inode *inode) - { - handle_t *handle; - int err; -+ int extra_credits = 3; -+ struct ext4_xattr_ino_array *lea_ino_array = NULL; - - trace_ext4_evict_inode(inode); - -@@ -235,8 +235,8 @@ void ext4_evict_inode(struct inode *inode) - * protection against it - */ - sb_start_intwrite(inode->i_sb); -- handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, -- ext4_blocks_for_truncate(inode)+3); -+ -+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits); - if (IS_ERR(handle)) { - ext4_std_error(inode->i_sb, PTR_ERR(handle)); - /* -@@ -251,6 +251,32 @@ void ext4_evict_inode(struct inode *inode) - - if (IS_SYNC(inode)) - ext4_handle_sync(handle); -+ -+ /* Delete xattr inode before deleting the main inode. */ -+ err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array); -+ if (err) { -+ ext4_warning(inode->i_sb, -+ "couldn't delete inode's xattr (err %d)", err); -+ goto stop_handle; -+ } -+ -+ if (!IS_NOQUOTA(inode)) -+ extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb); -+ -+ if (!ext4_handle_has_enough_credits(handle, -+ ext4_blocks_for_truncate(inode) + extra_credits)) { -+ err = ext4_journal_extend(handle, -+ ext4_blocks_for_truncate(inode) + extra_credits); -+ if (err > 0) -+ err = ext4_journal_restart(handle, -+ ext4_blocks_for_truncate(inode) + extra_credits); -+ if (err != 0) { -+ ext4_warning(inode->i_sb, -+ "couldn't extend journal (err %d)", err); -+ goto stop_handle; -+ } -+ } -+ - inode->i_size = 0; - err = ext4_mark_inode_dirty(handle, inode); - if (err) { -@@ -267,10 +293,10 @@ void ext4_evict_inode(struct inode *inode) - * enough credits left in the handle to remove the inode from - * the orphan list and set the dtime field. - */ -- if (!ext4_handle_has_enough_credits(handle, 3)) { -- err = ext4_journal_extend(handle, 3); -+ if (!ext4_handle_has_enough_credits(handle, extra_credits)) { -+ err = ext4_journal_extend(handle, extra_credits); - if (err > 0) -- err = ext4_journal_restart(handle, 3); -+ err = ext4_journal_restart(handle, extra_credits); - if (err != 0) { - ext4_warning(inode->i_sb, - "couldn't extend journal (err %d)", err); -@@ -307,6 +333,9 @@ void ext4_evict_inode(struct inode *inode) - ext4_free_inode(handle, inode); - ext4_journal_stop(handle); - sb_end_intwrite(inode->i_sb); -+ -+ if (lea_ino_array != NULL) -+ ext4_xattr_inode_array_free(inode, lea_ino_array); - return; - no_delete: - ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ -@@ -5132,7 +5161,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks, - * - * Also account for superblock, inode, quota and xattr blocks - */ --static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, -+int ext4_meta_trans_blocks(struct inode *inode, int lblocks, - int pextents) - { - ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); -diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c -index c10e37f..88e8579 100644 ---- a/fs/ext4/xattr.c -+++ b/fs/ext4/xattr.c -@@ -201,6 +201,7 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end, - - while (!IS_LAST_ENTRY(entry)) { - if (entry->e_value_size != 0 && -+ entry->e_value_inum == 0 && - (value_start + le16_to_cpu(entry->e_value_offs) < - (void *)e + sizeof(__u32) || - value_start + le16_to_cpu(entry->e_value_offs) + -@@ -233,19 +234,26 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) - } - - static inline int --ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) -+ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size, -+ struct inode *inode) - { - size_t value_size = le32_to_cpu(entry->e_value_size); - -- if (entry->e_value_block != 0 || value_size > size || -+ if (!entry->e_value_inum && - le16_to_cpu(entry->e_value_offs) + value_size > size) - return -EIO; -+ if (entry->e_value_inum && -+ (le32_to_cpu(entry->e_value_inum) < EXT4_FIRST_INO(inode->i_sb) || -+ le32_to_cpu(entry->e_value_inum) > -+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_inodes_count))) -+ return -EIO; - return 0; - } - - static int - ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, -- const char *name, size_t size, int sorted) -+ const char *name, size_t size, int sorted, -+ struct inode *inode) - { - struct ext4_xattr_entry *entry; - size_t name_len; -@@ -265,11 +273,109 @@ ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index, - break; - } - *pentry = entry; -- if (!cmp && ext4_xattr_check_entry(entry, size)) -+ if (!cmp && ext4_xattr_check_entry(entry, size, inode)) - return -EIO; - return cmp ? -ENODATA : 0; - } - -+/* -+ * Read the EA value from an inode. -+ */ -+static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t *size) -+{ -+ unsigned long block = 0; -+ struct buffer_head *bh = NULL; -+ int err, blocksize; -+ size_t csize, ret_size = 0; -+ -+ if (*size == 0) -+ return 0; -+ -+ blocksize = ea_inode->i_sb->s_blocksize; -+ -+ while (ret_size < *size) { -+ csize = (*size - ret_size) > blocksize ? blocksize : -+ *size - ret_size; -+ bh = ext4_bread(NULL, ea_inode, block, 0, &err); -+ if (!bh) { -+ *size = ret_size; -+ return err; -+ } -+ memcpy(buf, bh->b_data, csize); -+ brelse(bh); -+ -+ buf += csize; -+ block += 1; -+ ret_size += csize; -+ } -+ -+ *size = ret_size; -+ -+ return err; -+} -+ -+/* -+ * Fetch the xattr inode from disk. -+ * -+ * The xattr inode stores the parent inode number and generation so that -+ * the kernel and e2fsck can verify the xattr inode is valid upon access. -+ */ -+struct inode *ext4_xattr_inode_iget(struct inode *parent, -+ unsigned long ea_ino, int *err) -+{ -+ struct inode *ea_inode = NULL; -+ -+ ea_inode = ext4_iget(parent->i_sb, ea_ino); -+ if (IS_ERR(ea_inode) || is_bad_inode(ea_inode)) { -+ int rc = IS_ERR(ea_inode) ? PTR_ERR(ea_inode) : 0; -+ ext4_error(parent->i_sb, "error while reading EA inode %lu " -+ "/ %d %d", ea_ino, rc, is_bad_inode(ea_inode)); -+ *err = rc != 0 ? rc : -EIO; -+ return NULL; -+ } -+ -+ if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != parent->i_ino || -+ ea_inode->i_generation != parent->i_generation) { -+ ext4_error(parent->i_sb, "Backpointer from EA inode %lu " -+ "to parent invalid.", ea_ino); -+ *err = -EINVAL; -+ goto error; -+ } -+ -+ if (!(EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL)) { -+ ext4_error(parent->i_sb, "EA inode %lu does not have " -+ "EXT4_EA_INODE_FL flag set.\n", ea_ino); -+ *err = -EINVAL; -+ goto error; -+ } -+ -+ *err = 0; -+ return ea_inode; -+ -+error: -+ iput(ea_inode); -+ return NULL; -+} -+ -+/* -+ * Read the value from the EA inode. -+ */ -+static int ext4_xattr_inode_get(struct inode *inode, unsigned long ea_ino, -+ void *buffer, size_t *size) -+{ -+ struct inode *ea_inode = NULL; -+ int err; -+ -+ ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err); -+ if (err) -+ return err; -+ -+ err = ext4_xattr_inode_read(ea_inode, buffer, size); -+ iput(ea_inode); -+ -+ return err; -+} -+ - static int - ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) -@@ -301,7 +407,8 @@ bad_block: - } - ext4_xattr_cache_insert(bh); - entry = BFIRST(bh); -- error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); -+ error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1, -+ inode); - if (error == -EIO) - goto bad_block; - if (error) -@@ -311,8 +418,16 @@ bad_block: - error = -ERANGE; - if (size > buffer_size) - goto cleanup; -- memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -- size); -+ if (entry->e_value_inum) { -+ error = ext4_xattr_inode_get(inode, -+ le32_to_cpu(entry->e_value_inum), -+ buffer, &size); -+ if (error) -+ goto cleanup; -+ } else { -+ memcpy(buffer, bh->b_data + -+ le16_to_cpu(entry->e_value_offs), size); -+ } - } - error = size; - -@@ -346,7 +461,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, - if (error) - goto cleanup; - error = ext4_xattr_find_entry(&entry, name_index, name, -- end - (void *)entry, 0); -+ end - (void *)entry, 0, inode); - if (error) - goto cleanup; - size = le32_to_cpu(entry->e_value_size); -@@ -354,8 +469,16 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, - error = -ERANGE; - if (size > buffer_size) - goto cleanup; -- memcpy(buffer, (void *)IFIRST(header) + -- le16_to_cpu(entry->e_value_offs), size); -+ if (entry->e_value_inum) { -+ error = ext4_xattr_inode_get(inode, -+ le32_to_cpu(entry->e_value_inum), -+ buffer, &size); -+ if (error) -+ goto cleanup; -+ } else { -+ memcpy(buffer, (void *)IFIRST(header) + -+ le16_to_cpu(entry->e_value_offs), size); -+ } - } - error = size; - -@@ -600,7 +723,7 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, - size_t *min_offs, void *base, int *total) - { - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { -- if (!last->e_value_block && last->e_value_size) { -+ if (!last->e_value_inum && last->e_value_size) { - size_t offs = le16_to_cpu(last->e_value_offs); - if (offs < *min_offs) - *min_offs = offs; -@@ -611,11 +734,193 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, - return (*min_offs - ((void *)last - base) - sizeof(__u32)); - } - -+/* -+ * Write the value of the EA in an inode. -+ */ - static int --ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) -+ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode, -+ const void *buf, int bufsize) -+{ -+ struct buffer_head *bh = NULL; -+ unsigned long block = 0; -+ unsigned blocksize = ea_inode->i_sb->s_blocksize; -+ unsigned max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits; -+ int csize, wsize = 0; -+ int ret = 0; -+ int retries = 0; -+ -+retry: -+ while (ret >= 0 && ret < max_blocks) { -+ struct ext4_map_blocks map; -+ map.m_lblk = block += ret; -+ map.m_len = max_blocks -= ret; -+ -+ ret = ext4_map_blocks(handle, ea_inode, &map, -+ EXT4_GET_BLOCKS_CREATE); -+ if (ret <= 0) { -+ ext4_mark_inode_dirty(handle, ea_inode); -+ if (ret == -ENOSPC && -+ ext4_should_retry_alloc(ea_inode->i_sb, &retries)) { -+ ret = 0; -+ goto retry; -+ } -+ break; -+ } -+ } -+ -+ if (ret < 0) -+ return ret; -+ -+ block = 0; -+ while (wsize < bufsize) { -+ if (bh != NULL) -+ brelse(bh); -+ csize = (bufsize - wsize) > blocksize ? blocksize : -+ bufsize - wsize; -+ bh = ext4_getblk(handle, ea_inode, block, 0, &ret); -+ if (!bh) -+ goto out; -+ ret = ext4_journal_get_write_access(handle, bh); -+ if (ret) -+ goto out; -+ -+ memcpy(bh->b_data, buf, csize); -+ set_buffer_uptodate(bh); -+ ext4_handle_dirty_metadata(handle, ea_inode, bh); -+ -+ buf += csize; -+ wsize += csize; -+ block += 1; -+ } -+ -+ mutex_lock(&ea_inode->i_mutex); -+ i_size_write(ea_inode, wsize); -+ ext4_update_i_disksize(ea_inode, wsize); -+ mutex_unlock(&ea_inode->i_mutex); -+ -+ ext4_mark_inode_dirty(handle, ea_inode); -+ -+out: -+ brelse(bh); -+ -+ return ret; -+} -+ -+static void ext4_xattr_inode_set_ref(struct inode *ea_inode, __u64 ref_count) -+{ -+ ea_inode->i_ctime.tv_sec = (__u32)(ref_count >> 32); -+ ea_inode->i_version = (__u32)ref_count; -+} -+ -+static void ext4_xattr_inode_set_hash(struct inode *ea_inode, __u32 hash) -+{ -+ ea_inode->i_atime.tv_sec = hash; -+} -+ -+/* -+ * Create an inode to store the value of a large EA. -+ */ -+static struct inode * -+ext4_xattr_inode_create(handle_t *handle, struct inode *inode, __u32 hash) -+{ -+ struct inode *ea_inode = NULL; -+ -+ /* -+ * Let the next inode be the goal, so we try and allocate the EA inode -+ * in the same group, or nearby one. -+ */ -+ ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode, -+ S_IFREG|0600, NULL, inode->i_ino + 1, NULL); -+ -+ if (!IS_ERR(ea_inode)) { -+ ea_inode->i_op = &ext4_file_inode_operations; -+ ea_inode->i_fop = &ext4_file_operations.kabi_fops; -+ ext4_set_aops(ea_inode); -+ ea_inode->i_generation = inode->i_generation; -+ EXT4_I(ea_inode)->i_flags |= EXT4_EA_INODE_FL; -+ -+ /* -+ * A back-pointer from EA inode to parent inode will be useful -+ * for e2fsck. -+ */ -+ EXT4_XATTR_INODE_SET_PARENT(ea_inode, inode->i_ino); -+ unlock_new_inode(ea_inode); -+ -+ ext4_xattr_inode_set_ref(ea_inode, 1); -+ ext4_xattr_inode_set_hash(ea_inode, hash); -+ } -+ -+ return ea_inode; -+} -+ -+/* -+ * Unlink the inode storing the value of the EA. -+ */ -+int -+ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino) -+{ -+ struct inode *ea_inode = NULL; -+ int err; -+ -+ ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err); -+ if (err) -+ return err; -+ -+ clear_nlink(ea_inode); -+ iput(ea_inode); -+ -+ return 0; -+} -+ -+static __u32 -+ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size) -+{ -+ if (ext4_has_metadata_csum(sbi->s_sb)) -+ return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size); -+ return 0; -+} -+ -+/* -+ * Add value of the EA in an inode. -+ */ -+static int -+ext4_xattr_inode_set(handle_t *handle, struct inode *inode, unsigned long *ea_ino, -+ const void *value, size_t value_len) -+{ -+ struct inode *ea_inode = NULL; -+ __u32 hash; -+ int err; -+ -+ /* Create an inode for the EA value */ -+ hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len); -+ ea_inode = ext4_xattr_inode_create(handle, inode, hash); -+ if (IS_ERR(ea_inode)) -+ return -1; -+ -+ err = ext4_xattr_inode_write(handle, ea_inode, value, value_len); -+ if (err) -+ clear_nlink(ea_inode); -+ else -+ *ea_ino = ea_inode->i_ino; -+ -+ iput(ea_inode); -+ -+ return err; -+} -+ -+static int -+ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s, -+ handle_t *handle, struct inode *inode) - { - struct ext4_xattr_entry *last, *next; - size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); -+ int in_inode = i->in_inode; -+ -+ if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, -+ EXT4_FEATURE_INCOMPAT_EA_INODE) && -+ (EXT4_XATTR_SIZE(i->value_len) > -+ EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize))) -+ in_inode = 1; - - /* Compute min_offs and last. */ - last = s->first; -@@ -624,7 +929,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) - if ((void *)next >= s->end) { - return -EIO; - } -- if (!last->e_value_block && last->e_value_size) { -+ if (!last->e_value_inum && last->e_value_size) { - size_t offs = le16_to_cpu(last->e_value_offs); - if (offs < min_offs) - min_offs = offs; -@@ -632,15 +937,21 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) - } - free = min_offs - ((void *)last - s->base) - sizeof(__u32); - if (!s->not_found) { -- if (!s->here->e_value_block && s->here->e_value_size) { -+ if (!in_inode && -+ !s->here->e_value_inum && s->here->e_value_size) { - size_t size = le32_to_cpu(s->here->e_value_size); - free += EXT4_XATTR_SIZE(size); - } - free += EXT4_XATTR_LEN(name_len); - } - if (i->value) { -- if (free < EXT4_XATTR_LEN(name_len) + -- EXT4_XATTR_SIZE(i->value_len)) -+ size_t value_len = EXT4_XATTR_SIZE(i->value_len); -+ -+ if (in_inode) -+ value_len = 0; -+ -+ if (free < value_len || -+ free < EXT4_XATTR_LEN(name_len) + value_len) - return -ENOSPC; - } - -@@ -654,7 +965,8 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) - s->here->e_name_len = name_len; - memcpy(s->here->e_name, i->name, name_len); - } else { -- if (!s->here->e_value_block && s->here->e_value_size) { -+ if (!s->here->e_value_inum && s->here->e_value_size && -+ s->here->e_value_offs > 0) { - void *first_val = s->base + min_offs; - size_t offs = le16_to_cpu(s->here->e_value_offs); - void *val = s->base + offs; -@@ -688,13 +1000,18 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) - last = s->first; - while (!IS_LAST_ENTRY(last)) { - size_t o = le16_to_cpu(last->e_value_offs); -- if (!last->e_value_block && -+ if (!last->e_value_inum && - last->e_value_size && o < offs) - last->e_value_offs = - cpu_to_le16(o + size); - last = EXT4_XATTR_NEXT(last); - } - } -+ if (s->here->e_value_inum) { -+ ext4_xattr_inode_unlink(inode, -+ le32_to_cpu(s->here->e_value_inum)); -+ s->here->e_value_inum = 0; -+ } - if (!i->value) { - /* Remove the old name. */ - size_t size = EXT4_XATTR_LEN(name_len); -@@ -708,10 +1025,17 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s) - if (i->value) { - /* Insert the new value. */ - s->here->e_value_size = cpu_to_le32(i->value_len); -- if (i->value_len) { -+ if (in_inode) { -+ unsigned long ea_ino = le32_to_cpu(s->here->e_value_inum); -+ ext4_xattr_inode_set(handle, inode, &ea_ino, i->value, -+ i->value_len); -+ s->here->e_value_inum = cpu_to_le32(ea_ino); -+ s->here->e_value_offs = 0; -+ } else if (i->value_len) { - size_t size = EXT4_XATTR_SIZE(i->value_len); - void *val = s->base + min_offs - size; - s->here->e_value_offs = cpu_to_le16(min_offs - size); -+ s->here->e_value_inum = 0; - if (i->value == EXT4_ZERO_XATTR_VALUE) { - memset(val, 0, size); - } else { -@@ -761,7 +1085,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, - bs->s.end = bs->bh->b_data + bs->bh->b_size; - bs->s.here = bs->s.first; - error = ext4_xattr_find_entry(&bs->s.here, i->name_index, -- i->name, bs->bh->b_size, 1); -+ i->name, bs->bh->b_size, 1, inode); - if (error && error != -ENODATA) - goto cleanup; - bs->s.not_found = error; -@@ -785,8 +1109,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, - - #define header(x) ((struct ext4_xattr_header *)(x)) - -- if (i->value && i->value_len > sb->s_blocksize) -- return -ENOSPC; - if (s->base) { - ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, - bs->bh->b_blocknr); -@@ -802,7 +1124,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, - ce = NULL; - } - ea_bdebug(bs->bh, "modifying in-place"); -- error = ext4_xattr_set_entry(i, s); -+ error = ext4_xattr_set_entry(i, s, handle, inode); - if (!error) { - if (!IS_LAST_ENTRY(s->first)) - ext4_xattr_rehash(header(s->base), -@@ -853,7 +1175,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, - s->end = s->base + sb->s_blocksize; - } - -- error = ext4_xattr_set_entry(i, s); -+ error = ext4_xattr_set_entry(i, s, handle, inode); - if (error == -EIO) - goto bad_block; - if (error) -@@ -997,7 +1319,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, - /* Find the named attribute. */ - error = ext4_xattr_find_entry(&is->s.here, i->name_index, - i->name, is->s.end - -- (void *)is->s.base, 0); -+ (void *)is->s.base, 0, inode); - if (error && error != -ENODATA) - return error; - is->s.not_found = error; -@@ -1015,7 +1337,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, - - if (EXT4_I(inode)->i_extra_isize == 0) - return -ENOSPC; -- error = ext4_xattr_set_entry(i, s); -+ error = ext4_xattr_set_entry(i, s, handle, inode); - if (error) - return error; - header = IHDR(inode, ext4_raw_inode(&is->iloc)); -@@ -1039,7 +1361,7 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, - - if (EXT4_I(inode)->i_extra_isize == 0) - return -ENOSPC; -- error = ext4_xattr_set_entry(i, s); -+ error = ext4_xattr_set_entry(i, s, handle, inode); - if (error) - return error; - header = IHDR(inode, ext4_raw_inode(&is->iloc)); -@@ -1075,7 +1397,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, - .name = name, - .value = value, - .value_len = value_len, -- -+ .in_inode = 0, - }; - struct ext4_xattr_ibody_find is = { - .s = { .not_found = -ENODATA, }, -@@ -1140,6 +1462,15 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, - goto cleanup; - } - error = ext4_xattr_block_set(handle, inode, &i, &bs); -+ if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, -+ EXT4_FEATURE_INCOMPAT_EA_INODE) && -+ error == -ENOSPC) { -+ /* xattr not fit to block, store at external -+ * inode */ -+ i.in_inode = 1; -+ error = ext4_xattr_ibody_set(handle, inode, -+ &i, &is); -+ } - if (error) - goto cleanup; - if (!is.s.not_found) { -@@ -1186,9 +1517,22 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, int flags) - { - handle_t *handle; -+ struct super_block *sb = inode->i_sb; - int error, retries = 0; - int credits = ext4_jbd2_credits_xattr(inode); - -+ if ((value_len >= EXT4_XATTR_MIN_LARGE_EA_SIZE(sb->s_blocksize)) && -+ EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EA_INODE)) { -+ int nrblocks = (value_len + sb->s_blocksize - 1) >> -+ sb->s_blocksize_bits; -+ -+ /* For new inode */ -+ credits += EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + 3; -+ -+ /* For data blocks of EA inode */ -+ credits += ext4_meta_trans_blocks(inode, nrblocks, 0); -+ } -+ - retry: - handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); - if (IS_ERR(handle)) { -@@ -1200,7 +1544,7 @@ retry: - value, value_len, flags); - error2 = ext4_journal_stop(handle); - if (error == -ENOSPC && -- ext4_should_retry_alloc(inode->i_sb, &retries)) -+ ext4_should_retry_alloc(sb, &retries)) - goto retry; - if (error == 0) - error = error2; -@@ -1222,7 +1566,7 @@ static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, - - /* Adjust the value offsets of the entries */ - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { -- if (!last->e_value_block && last->e_value_size) { -+ if (!last->e_value_inum && last->e_value_size) { - new_offs = le16_to_cpu(last->e_value_offs) + - value_offs_shift; - BUG_ON(new_offs + le32_to_cpu(last->e_value_size) -@@ -1469,21 +1813,142 @@ cleanup: - } - - -+#define EIA_INCR 16 /* must be 2^n */ -+#define EIA_MASK (EIA_INCR - 1) -+/* Add the large xattr @ino into @lea_ino_array for later deletion. -+ * If @lea_ino_array is new or full it will be grown and the old -+ * contents copied over. -+ */ -+static int -+ext4_expand_ino_array(struct ext4_xattr_ino_array **lea_ino_array, __u32 ino) -+{ -+ if (*lea_ino_array == NULL) { -+ /* -+ * Start with 15 inodes, so it fits into a power-of-two size. -+ * If *lea_ino_array is NULL, this is essentially offsetof() -+ */ -+ (*lea_ino_array) = -+ kmalloc(offsetof(struct ext4_xattr_ino_array, -+ xia_inodes[EIA_MASK]), -+ GFP_NOFS); -+ if (*lea_ino_array == NULL) -+ return -ENOMEM; -+ (*lea_ino_array)->xia_count = 0; -+ } else if (((*lea_ino_array)->xia_count & EIA_MASK) == EIA_MASK) { -+ /* expand the array once all 15 + n * 16 slots are full */ -+ struct ext4_xattr_ino_array *new_array = NULL; -+ int count = (*lea_ino_array)->xia_count; -+ -+ /* if new_array is NULL, this is essentially offsetof() */ -+ new_array = kmalloc( -+ offsetof(struct ext4_xattr_ino_array, -+ xia_inodes[count + EIA_INCR]), -+ GFP_NOFS); -+ if (new_array == NULL) -+ return -ENOMEM; -+ memcpy(new_array, *lea_ino_array, -+ offsetof(struct ext4_xattr_ino_array, -+ xia_inodes[count])); -+ kfree(*lea_ino_array); -+ *lea_ino_array = new_array; -+ } -+ (*lea_ino_array)->xia_inodes[(*lea_ino_array)->xia_count++] = ino; -+ return 0; -+} -+ -+/** -+ * Add xattr inode to orphan list -+ */ -+static int -+ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, -+ int credits, struct ext4_xattr_ino_array *lea_ino_array) -+{ -+ struct inode *ea_inode = NULL; -+ int idx = 0, error = 0; -+ -+ if (lea_ino_array == NULL) -+ return 0; -+ -+ for (; idx < lea_ino_array->xia_count; ++idx) { -+ if (!ext4_handle_has_enough_credits(handle, credits)) { -+ error = ext4_journal_extend(handle, credits); -+ if (error > 0) -+ error = ext4_journal_restart(handle, credits); -+ -+ if (error != 0) { -+ ext4_warning(inode->i_sb, -+ "couldn't extend journal " -+ "(err %d)", error); -+ return error; -+ } -+ } -+ ea_inode = ext4_xattr_inode_iget(inode, -+ lea_ino_array->xia_inodes[idx], &error); -+ if (error) -+ continue; -+ inode_lock(ea_inode); -+ ext4_orphan_add(handle, ea_inode); -+ inode_unlock(ea_inode); -+ /* the inode's i_count will be released by caller */ -+ } -+ -+ return 0; -+} - - /* - * ext4_xattr_delete_inode() - * -- * Free extended attribute resources associated with this inode. This -+ * Free extended attribute resources associated with this inode. Traverse -+ * all entries and unlink any xattr inodes associated with this inode. This - * is called immediately before an inode is freed. We have exclusive -- * access to the inode. -+ * access to the inode. If an orphan inode is deleted it will also delete any -+ * xattr block and all xattr inodes. They are checked by ext4_xattr_inode_iget() -+ * to ensure they belong to the parent inode and were not deleted already. - */ --void --ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) -+int -+ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, -+ struct ext4_xattr_ino_array **lea_ino_array) - { - struct buffer_head *bh = NULL; -+ struct ext4_xattr_ibody_header *header; -+ struct ext4_inode *raw_inode; -+ struct ext4_iloc iloc = { .bh = NULL }; -+ struct ext4_xattr_entry *entry; -+ int credits = 3, error = 0; - -- if (!EXT4_I(inode)->i_file_acl) -+ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) -+ goto delete_external_ea; -+ -+ error = ext4_get_inode_loc(inode, &iloc); -+ if (error) - goto cleanup; -+ raw_inode = ext4_raw_inode(&iloc); -+ header = IHDR(inode, raw_inode); -+ for (entry = IFIRST(header); !IS_LAST_ENTRY(entry); -+ entry = EXT4_XATTR_NEXT(entry)) { -+ if (!entry->e_value_inum) -+ continue; -+ if (ext4_expand_ino_array(lea_ino_array, -+ entry->e_value_inum) != 0) -+ goto cleanup; -+ -+ error = ext4_journal_get_write_access(handle, iloc.bh); -+ if (error) -+ goto cleanup; -+ entry->e_value_inum = 0; -+ entry->e_value_size = 0; -+ error = ext4_handle_dirty_metadata(handle, inode, iloc.bh); -+ if (error) -+ goto cleanup; -+ } -+ -+delete_external_ea: -+ if (!EXT4_I(inode)->i_file_acl) { -+ /* add xattr inode to orphan list */ -+ ext4_xattr_inode_orphan_add(handle, inode, credits, -+ *lea_ino_array); -+ goto cleanup; -+ } - bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); - if (!bh) { - EXT4_ERROR_INODE(inode, "block %llu read error", -@@ -1496,11 +1961,78 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) - EXT4_I(inode)->i_file_acl); - goto cleanup; - } -+ -+ for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT4_XATTR_NEXT(entry)) { -+ if (!entry->e_value_inum) -+ continue; -+ if (ext4_expand_ino_array(lea_ino_array, -+ entry->e_value_inum) != 0) -+ goto cleanup; -+ -+ error = ext4_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ entry->e_value_inum = 0; -+ entry->e_value_size = 0; -+ error = ext4_handle_dirty_metadata(handle, inode, bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* add xattr inode to orphan list */ -+ error = ext4_xattr_inode_orphan_add(handle, inode, credits, -+ *lea_ino_array); -+ if (error != 0) -+ goto cleanup; -+ -+ if (!IS_NOQUOTA(inode)) -+ credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb); -+ -+ if (!ext4_handle_has_enough_credits(handle, credits)) { -+ error = ext4_journal_extend(handle, credits); -+ if (error > 0) -+ error = ext4_journal_restart(handle, credits); -+ if (error != 0) { -+ ext4_warning(inode->i_sb, -+ "couldn't extend journal (err %d)", error); -+ goto cleanup; -+ } -+ } -+ - ext4_xattr_release_block(handle, inode, bh); - EXT4_I(inode)->i_file_acl = 0; - - cleanup: - brelse(bh); -+ brelse(iloc.bh); -+ -+ return error; -+} -+ -+void -+ext4_xattr_inode_array_free(struct inode *inode, -+ struct ext4_xattr_ino_array *lea_ino_array) -+{ -+ struct inode *ea_inode = NULL; -+ int idx = 0; -+ int err; -+ -+ if (lea_ino_array == NULL) -+ return; -+ -+ for (; idx < lea_ino_array->xia_count; ++idx) { -+ ea_inode = ext4_xattr_inode_iget(inode, -+ lea_ino_array->xia_inodes[idx], &err); -+ if (err) -+ continue; -+ /* for inode's i_count get from ext4_xattr_delete_inode */ -+ if (!list_empty(&EXT4_I(ea_inode)->i_orphan)) -+ iput(ea_inode); -+ clear_nlink(ea_inode); -+ iput(ea_inode); -+ } -+ kfree(lea_ino_array); - } - - /* -@@ -1570,10 +2102,9 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1, - entry1->e_name_index != entry2->e_name_index || - entry1->e_name_len != entry2->e_name_len || - entry1->e_value_size != entry2->e_value_size || -+ entry1->e_value_inum != entry2->e_value_inum || - memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) - return 1; -- if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -- return -EIO; - if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), - (char *)header2 + le16_to_cpu(entry2->e_value_offs), - le32_to_cpu(entry1->e_value_size))) -@@ -1657,7 +2188,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, - *name++; - } - -- if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ if (!entry->e_value_inum && entry->e_value_size) { - __le32 *value = (__le32 *)((char *)header + - le16_to_cpu(entry->e_value_offs)); - for (n = (le32_to_cpu(entry->e_value_size) + -diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h -index c767dbd..8312412 100644 ---- a/fs/ext4/xattr.h -+++ b/fs/ext4/xattr.h -@@ -42,7 +42,7 @@ struct ext4_xattr_entry { - __u8 e_name_len; /* length of name */ - __u8 e_name_index; /* attribute name index */ - __le16 e_value_offs; /* offset in disk block of value */ -- __le32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __le32 e_value_inum; /* inode in which the value is stored */ - __le32 e_value_size; /* size of attribute value */ - __le32 e_hash; /* hash value of name and value */ - char e_name[0]; /* attribute name */ -@@ -67,6 +67,26 @@ struct ext4_xattr_entry { - EXT4_I(inode)->i_extra_isize)) - #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) - -+/* -+ * Link EA inode back to parent one using i_mtime field. -+ * Extra integer type conversion added to ignore higher -+ * bits in i_mtime.tv_sec which might be set by ext4_get() -+ */ -+#define EXT4_XATTR_INODE_SET_PARENT(inode, inum) \ -+do { \ -+ (inode)->i_mtime.tv_sec = inum; \ -+} while(0) -+ -+#define EXT4_XATTR_INODE_GET_PARENT(inode) \ -+ ((__u32)(inode)->i_mtime.tv_sec) -+ -+/* -+ * The minimum size of EA value when you start storing it in an external inode -+ * size of block - size of header - size of 1 entry - 4 null bytes -+*/ -+#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \ -+ ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4) -+ - #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data)) - #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr)) - #define BFIRST(bh) ENTRY(BHDR(bh)+1) -@@ -75,10 +95,11 @@ struct ext4_xattr_entry { - #define EXT4_ZERO_XATTR_VALUE ((void *)-1) - - struct ext4_xattr_info { -- int name_index; - const char *name; - const void *value; - size_t value_len; -+ int name_index; -+ int in_inode; - }; - - struct ext4_xattr_search { -@@ -106,7 +127,13 @@ extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t); - extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int); - extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); - --extern void ext4_xattr_delete_inode(handle_t *, struct inode *); -+extern struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, -+ int *err); -+extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino); -+extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, -+ struct ext4_xattr_ino_array **array); -+extern void ext4_xattr_inode_array_free(struct inode *inode, -+ struct ext4_xattr_ino_array *array); - extern void ext4_xattr_put_super(struct super_block *); - - extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, --- -2.30.2 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-limit-number-of-scanned-extents-in-status-tree-.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-limit-number-of-scanned-extents-in-status-tree-.patch deleted file mode 100644 index 30d3306..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-limit-number-of-scanned-extents-in-status-tree-.patch +++ /dev/null @@ -1,235 +0,0 @@ -From b72242d714ac3968bbb25867718e731be217e87b Mon Sep 17 00:00:00 2001 -From: Jan Kara -Date: Tue, 25 Nov 2014 11:51:23 -0500 -Subject: [PATCH 5/7] ext4: limit number of scanned extents in status tree - shrinker - -Currently we scan extent status trees of inodes until we reclaim nr_to_scan -extents. This can however require a lot of scanning when there are lots -of delayed extents (as those cannot be reclaimed). - -Change shrinker to work as shrinkers are supposed to and *scan* only -nr_to_scan extents regardless of how many extents did we actually -reclaim. We however need to be careful and avoid scanning each status -tree from the beginning - that could lead to a situation where we would -not be able to reclaim anything at all when first nr_to_scan extents in -the tree are always unreclaimable. We remember with each inode offset -where we stopped scanning and continue from there when we next come -across the inode. - -Note that we also need to update places calling __es_shrink() manually -to pass reasonable nr_to_scan to have a chance of reclaiming anything and -not just 1. - -Signed-off-by: Jan Kara -Signed-off-by: Theodore Ts'o ---- - fs/ext4/ext4.h | 5 ++- - fs/ext4/extents_status.c | 91 ++++++++++++++++++++++++++-------------- - fs/ext4/super.c | 1 + - 3 files changed, 65 insertions(+), 32 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 0813afd6..2893a168 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -1020,6 +1020,9 @@ struct ext4_inode_info { - struct list_head i_es_list; - unsigned int i_es_all_nr; /* protected by i_es_lock */ - unsigned int i_es_shk_nr; /* protected by i_es_lock */ -+ ext4_lblk_t i_es_shrink_lblk; /* Offset where we start searching for -+ extents to shrink. Protected by -+ i_es_lock */ - - /* ialloc */ - ext4_group_t i_last_alloc_group; -@@ -1481,7 +1484,7 @@ struct ext4_sb_info { - - /* Reclaim extents from extent status tree */ - struct shrinker s_es_shrinker; -- struct list_head s_es_list; -+ struct list_head s_es_list; /* List of inodes with reclaimable extents */ - long s_es_nr_inode; - struct ext4_es_stats s_es_stats; - spinlock_t s_es_lock ____cacheline_aligned_in_smp; -diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c -index edd49793..b78eec2a 100644 ---- a/fs/ext4/extents_status.c -+++ b/fs/ext4/extents_status.c -@@ -147,8 +147,7 @@ static struct kmem_cache *ext4_es_cachep; - static int __es_insert_extent(struct inode *inode, struct extent_status *newes); - static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t end); --static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, -- int nr_to_scan); -+static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); - static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, - struct ext4_inode_info *locked_ei); - -@@ -726,7 +725,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, - retry: - err = __es_insert_extent(inode, &newes); - if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), -- 1, EXT4_I(inode))) -+ 128, EXT4_I(inode))) - goto retry; - if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) - err = 0; -@@ -884,7 +883,7 @@ retry: - es->es_len = orig_es.es_len; - if ((err == -ENOMEM) && - __es_shrink(EXT4_SB(inode->i_sb), -- 1, EXT4_I(inode))) -+ 128, EXT4_I(inode))) - goto retry; - goto out; - } -@@ -976,7 +975,7 @@ static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, - ktime_t start_time; - u64 scan_time; - int nr_to_walk; -- int ret, nr_shrunk = 0; -+ int nr_shrunk = 0; - int retried = 0, nr_skipped = 0; - - es_stats = &sbi->s_es_stats; -@@ -994,7 +993,7 @@ retry: - ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info, - i_es_list); - /* Move the inode to the tail */ -- list_move(&ei->i_es_list, sbi->s_es_list.prev); -+ list_move_tail(&ei->i_es_list, &sbi->s_es_list); - /* - * Normally we try hard to avoid shrinking precached inodes, - * but we will as a last resort. -@@ -1015,12 +1014,10 @@ retry: - */ - spin_unlock(&sbi->s_es_lock); - -- ret = __es_try_to_reclaim_extents(ei, nr_to_scan); -+ nr_shrunk += es_reclaim_extents(ei, &nr_to_scan); - write_unlock(&ei->i_es_lock); - -- nr_shrunk += ret; -- nr_to_scan -= ret; -- if (nr_to_scan == 0) -+ if (nr_to_scan <= 0) - goto out; - spin_lock(&sbi->s_es_lock); - } -@@ -1037,7 +1034,7 @@ retry: - } - - if (locked_ei && nr_shrunk == 0) -- nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); -+ nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan); - out: - scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); - if (likely(es_stats->es_stats_scan_time)) -@@ -1213,27 +1210,32 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) - unregister_shrinker(&sbi->s_es_shrinker); - } - --static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, -- int nr_to_scan) -+/* -+ * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at -+ * most *nr_to_scan extents, update *nr_to_scan accordingly. -+ * -+ * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan. -+ * Increment *nr_shrunk by the number of reclaimed extents. Also update -+ * ei->i_es_shrink_lblk to where we should continue scanning. -+ */ -+static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, -+ int *nr_to_scan, int *nr_shrunk) - { - struct inode *inode = &ei->vfs_inode; - struct ext4_es_tree *tree = &ei->i_es_tree; -- struct rb_node *node; - struct extent_status *es; -- int nr_shrunk = 0; -- static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, -- DEFAULT_RATELIMIT_BURST); -- -- if (ei->i_es_shk_nr == 0) -- return 0; -- -- if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && -- __ratelimit(&_rs)) -- ext4_warning(inode->i_sb, "forced shrink of precached extents"); -+ struct rb_node *node; - -- node = rb_first(&tree->root); -- while (node != NULL) { -- es = rb_entry(node, struct extent_status, rb_node); -+ es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk); -+ if (!es) -+ goto out_wrap; -+ node = &es->rb_node; -+ while (*nr_to_scan > 0) { -+ if (es->es_lblk > end) { -+ ei->i_es_shrink_lblk = end + 1; -+ return 0; -+ } -+ (*nr_to_scan)--; - node = rb_next(&es->rb_node); - /* - * We can't reclaim delayed extent from status tree because -@@ -1242,11 +1244,38 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, - if (!ext4_es_is_delayed(es)) { - rb_erase(&es->rb_node, &tree->root); - ext4_es_free_extent(inode, es); -- nr_shrunk++; -- if (--nr_to_scan == 0) -- break; -+ (*nr_shrunk)++; - } -+ if (!node) -+ goto out_wrap; -+ es = rb_entry(node, struct extent_status, rb_node); - } -- tree->cache_es = NULL; -+ ei->i_es_shrink_lblk = es->es_lblk; -+ return 1; -+out_wrap: -+ ei->i_es_shrink_lblk = 0; -+ return 0; -+} -+ -+static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan) -+{ -+ struct inode *inode = &ei->vfs_inode; -+ int nr_shrunk = 0; -+ ext4_lblk_t start = ei->i_es_shrink_lblk; -+ static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, -+ DEFAULT_RATELIMIT_BURST); -+ -+ if (ei->i_es_shk_nr == 0) -+ return 0; -+ -+ if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && -+ __ratelimit(&_rs)) -+ ext4_warning(inode->i_sb, "forced shrink of precached extents"); -+ -+ if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) && -+ start != 0) -+ es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk); -+ -+ ei->i_es_tree.cache_es = NULL; - return nr_shrunk; - } -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 8a81fa73..d9cd4ff9 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -945,6 +945,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) - INIT_LIST_HEAD(&ei->i_es_list); - ei->i_es_all_nr = 0; - ei->i_es_shk_nr = 0; -+ ei->i_es_shrink_lblk = 0; - ei->i_reserved_data_blocks = 0; - ei->i_reserved_meta_blocks = 0; - ei->i_allocated_meta_blocks = 0; --- -2.24.1 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch deleted file mode 100644 index 5e4cd06..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-mballoc-prefetch.patch +++ /dev/null @@ -1,261 +0,0 @@ ---- linux-4.18/fs/ext4/balloc.c 2019-11-28 14:55:26.506546036 +0300 -+++ linux-4.18/fs/ext4/balloc.c 2019-12-02 11:21:50.565975537 +0300 -@@ -404,7 +404,8 @@ verified: - * Return buffer_head on success or NULL in case of failure. - */ - struct buffer_head * --ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) -+ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group, -+ int ignore_locked) - { - struct ext4_group_desc *desc; - struct ext4_sb_info *sbi = EXT4_SB(sb); -@@ -435,6 +436,13 @@ ext4_read_block_bitmap_nowait(struct - if (bitmap_uptodate(bh)) - goto verify; - -+ if (ignore_locked && buffer_locked(bh)) { -+ /* buffer under IO already, do not wait -+ * if called for prefetching */ -+ put_bh(bh); -+ return NULL; -+ } -+ - lock_buffer(bh); - if (bitmap_uptodate(bh)) { - unlock_buffer(bh); -@@ -524,7 +532,7 @@ ext4_read_block_bitmap(struct super_b - struct buffer_head *bh; - int err; - -- bh = ext4_read_block_bitmap_nowait(sb, block_group); -+ bh = ext4_read_block_bitmap_nowait(sb, block_group, 0); - if (!bh) - return NULL; - err = ext4_wait_block_bitmap(sb, block_group, bh); ---- linux-4.18/fs/ext4/ext4.h 2019-11-28 14:55:26.470545343 +0300 -+++ linux-4.18/fs/ext4/ext4.h 2019-12-02 11:21:40.795779972 +0300 -@@ -1446,6 +1446,8 @@ struct ext4_sb_info { - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; -+ unsigned int s_mb_prefetch; -+ unsigned int s_mb_prefetch_limit; - - /* stats for buddy allocator */ - atomic_t s_bal_reqs; /* number of reqs with len > 1 */ -@@ -2401,7 +2403,8 @@ extern struct ext4_group_desc * ldisk - extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); - - extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb, -- ext4_group_t block_group); -+ ext4_group_t block_group, -+ int ignore_locked); - extern int ext4_wait_block_bitmap(struct super_block *sb, - ext4_group_t block_group, - struct buffer_head *bh); -@@ -3047,6 +3051,7 @@ struct ext4_group_info { - #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3 - #define EXT4_GROUP_INFO_IBITMAP_CORRUPT \ - (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT) -+#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4 - - #define EXT4_MB_GRP_NEED_INIT(grp) \ - (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) -@@ -3065,6 +3070,10 @@ struct ext4_group_info { - (set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) - #define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \ - (clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state))) -+#define EXT4_MB_GRP_TEST(grp) \ -+ (test_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state))) -+#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \ -+ (test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state))) - - #define EXT4_MAX_CONTENTION 8 - #define EXT4_CONTENTION_THRESHOLD 2 ---- linux-4.18/fs/ext4/mballoc.c 2019-11-28 14:55:26.500545920 +0300 -+++ linux-4.18/fs/ext4/mballoc.c 2019-12-02 11:21:46.656897291 +0300 -@@ -868,7 +868,7 @@ static int ext4_mb_init_cache(struct - bh[i] = NULL; - continue; - } -- if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) { -+ if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group, 0))) { - err = -ENOMEM; - goto out; - } -@@ -2104,6 +2112,92 @@ static int ext4_mb_good_group(struct - return 0; - } - -+/* -+ * each allocation context (i.e. a thread doing allocation) has own -+ * sliding prefetch window of @s_mb_prefetch size which starts at the -+ * very first goal and moves ahead of scaning. -+ * a side effect is that subsequent allocations will likely find -+ * the bitmaps in cache or at least in-flight. -+ */ -+static void -+ext4_mb_prefetch(struct ext4_allocation_context *ac, -+ ext4_group_t start) -+{ -+ struct super_block *sb = ac->ac_sb; -+ ext4_group_t ngroups = ext4_get_groups_count(sb); -+ struct ext4_sb_info *sbi = EXT4_SB(sb); -+ struct ext4_group_info *grp; -+ ext4_group_t group = start; -+ struct buffer_head *bh; -+ int nr; -+ -+ /* limit prefetching at cr=0, otherwise mballoc can -+ * spend a lot of time loading imperfect groups */ -+ if (ac->ac_criteria < 2 && ac->ac_prefetch_ios >= sbi->s_mb_prefetch_limit) -+ return; -+ -+ /* batch prefetching to get few READs in flight */ -+ nr = ac->ac_prefetch - group; -+ if (ac->ac_prefetch < group) -+ /* wrapped to the first groups */ -+ nr += ngroups; -+ if (nr > 0) -+ return; -+ BUG_ON(nr < 0); -+ -+ nr = sbi->s_mb_prefetch; -+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { -+ /* align to flex_bg to get more bitmas with a single IO */ -+ nr = (group / sbi->s_mb_prefetch) * sbi->s_mb_prefetch; -+ nr = nr + sbi->s_mb_prefetch - group; -+ } -+ while (nr-- > 0) { -+ grp = ext4_get_group_info(sb, group); -+ /* prevent expensive getblk() on groups w/ IO in progress */ -+ if (EXT4_MB_GRP_TEST(grp) || EXT4_MB_GRP_TEST_AND_SET_READ(grp)) -+ goto next; -+ -+ /* ignore empty groups - those will be skipped -+ * during the scanning as well */ -+ if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { -+ bh = ext4_read_block_bitmap_nowait(sb, group, 1); -+ if (bh && !IS_ERR(bh)) { -+ if (!buffer_uptodate(bh)) -+ ac->ac_prefetch_ios++; -+ brelse(bh); -+ } -+ } -+next: -+ if (++group >= ngroups) -+ group = 0; -+ } -+ ac->ac_prefetch = group; -+} -+ -+static void -+ext4_mb_prefetch_fini(struct ext4_allocation_context *ac) -+{ -+ struct ext4_group_info *grp; -+ ext4_group_t group; -+ int nr, rc; -+ -+ /* initialize last window of prefetched groups */ -+ nr = ac->ac_prefetch_ios; -+ if (nr > EXT4_SB(ac->ac_sb)->s_mb_prefetch) -+ nr = EXT4_SB(ac->ac_sb)->s_mb_prefetch; -+ group = ac->ac_prefetch; -+ while (nr-- > 0) { -+ grp = ext4_get_group_info(ac->ac_sb, group); -+ if (grp->bb_free > 0 && EXT4_MB_GRP_NEED_INIT(grp)) { -+ rc = ext4_mb_init_group(ac->ac_sb, group); -+ if (rc) -+ break; -+ } -+ if (group-- == 0) -+ group = ext4_get_groups_count(ac->ac_sb) - 1; -+ } -+} -+ - static noinline_for_stack int - ext4_mb_regular_allocator(struct ext4_allocation_context *ac) - { -@@ -2176,6 +2264,7 @@ repeat: - * from the goal value specified - */ - group = ac->ac_g_ex.fe_group; -+ ac->ac_prefetch = group; - - for (i = 0; i < ngroups; group++, i++) { - int ret = 0; -@@ -2188,6 +2277,8 @@ repeat: - if (group >= ngroups) - group = 0; - -+ ext4_mb_prefetch(ac, group); -+ - /* This now checks without needing the buddy page */ - ret = ext4_mb_good_group(ac, group, cr); - if (ret <= 0) { -@@ -2260,6 +2351,8 @@ repeat: - } - } - out: -+ /* use prefetched bitmaps to init buddy so that read info is not lost */ -+ ext4_mb_prefetch_fini(ac); - return err; - } - -@@ -2832,6 +2925,24 @@ int ext4_mb_init(struct super_block * - sbi->s_mb_large_req = sbi->s_stripe * 8; - sbi->s_mb_group_prealloc = sbi->s_stripe * 4; - } -+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { -+ /* a single flex group is supposed to be read by a single IO */ -+ sbi->s_mb_prefetch = 1 << sbi->s_es->s_log_groups_per_flex; -+ sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ -+ } else { -+ sbi->s_mb_prefetch = 32; -+ } -+ if (sbi->s_mb_prefetch >= ext4_get_groups_count(sb)) -+ sbi->s_mb_prefetch = ext4_get_groups_count(sb); -+ /* now many real IOs to prefetch within a single allocation at cr=0 -+ * given cr=0 is an CPU-related optimization we shouldn't try to -+ * load too many groups, at some point we should start to use what -+ * we've got in memory. -+ * with an average random access time 5ms, it'd take a second to get -+ * 200 groups (* N with flex_bg), so let's make this limit 4 */ -+ sbi->s_mb_prefetch_limit = sbi->s_mb_prefetch * 4; -+ if (sbi->s_mb_prefetch_limit > ext4_get_groups_count(sb)) -+ sbi->s_mb_prefetch_limit = ext4_get_groups_count(sb); - - sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); - if (sbi->s_locality_groups == NULL) { ---- linux-4.18/fs/ext4/mballoc.h 2019-11-28 14:55:26.471545362 +0300 -+++ linux-4.18/fs/ext4/mballoc.h 2019-12-02 11:21:57.028104886 +0300 -@@ -177,6 +177,8 @@ struct ext4_allocation_context { - struct page *ac_buddy_page; - struct ext4_prealloc_space *ac_pa; - struct ext4_locality_group *ac_lg; -+ ext4_group_t ac_prefetch; -+ int ac_prefetch_ios; /* number of initialied prefetch IO */ - }; - - #define AC_STATUS_CONTINUE 1 ---- linux-4.18/fs/ext4/super.c 2019-11-28 14:55:26.502545959 +0300 -+++ linux-4.18/fs/ext4/super.c 2019-11-28 20:07:48.104558177 +0300 -@@ -190,6 +190,8 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_bur - EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req); - EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req); - EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -+EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); -+EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); - EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); - EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); - EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); -@@ -223,6 +224,8 @@ static struct attribute *ext4_attrs[] - ATTR_LIST(errors_count), - ATTR_LIST(first_error_time), - ATTR_LIST(last_error_time), -+ ATTR_LIST(mb_prefetch), -+ ATTR_LIST(mb_prefetch_limit), - NULL, - }; - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-misc.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-misc.patch deleted file mode 100644 index 9179435..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-misc.patch +++ /dev/null @@ -1,134 +0,0 @@ -Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h -=================================================================== ---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/ext4.h -+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h -@@ -1427,6 +1427,8 @@ static inline void ext4_clear_state_flag - - #define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime - -+#define JOURNAL_START_HAS_3ARGS 1 -+ - /* - * Codes for operating systems - */ -@@ -2612,6 +2614,13 @@ struct ext4_extent; - - extern int ext4_ext_tree_init(handle_t *handle, struct inode *); - extern int ext4_ext_writepage_trans_blocks(struct inode *, int); -+extern struct buffer_head *ext4_read_inode_bitmap(struct super_block *sb, -+ ext4_group_t block_group); -+extern void ext4_inc_count(handle_t *handle, struct inode *inode); -+extern void ext4_dec_count(handle_t *handle, struct inode *inode); -+extern struct buffer_head *ext4_append(handle_t *handle, -+ struct inode *inode, -+ ext4_lblk_t *block); - extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); - int chunk); - extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, -Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/namei.c -=================================================================== ---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/namei.c -+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/namei.c -@@ -48,7 +48,7 @@ - #define NAMEI_RA_BLOCKS 4 - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - --static struct buffer_head *ext4_append(handle_t *handle, -+struct buffer_head *ext4_append(handle_t *handle, - struct inode *inode, - ext4_lblk_t *block) - { -@@ -155,6 +155,7 @@ static struct buffer_head *__ext4_read_d - } - return bh; - } -+EXPORT_SYMBOL(ext4_append); - - #ifndef assert - #define assert(test) J_ASSERT(test) -@@ -2210,7 +2211,7 @@ out: - * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2, - * since this indicates that nlinks count was previously 1. - */ --static void ext4_inc_count(handle_t *handle, struct inode *inode) -+void ext4_inc_count(handle_t *handle, struct inode *inode) - { - inc_nlink(inode); - if (is_dx(inode) && inode->i_nlink > 1) { -@@ -2222,16 +2223,18 @@ static void ext4_inc_count(handle_t *han - } - } - } -+EXPORT_SYMBOL(ext4_inc_count); - - /* - * If a directory had nlink == 1, then we should let it be 1. This indicates - * directory has >EXT4_LINK_MAX subdirs. - */ --static void ext4_dec_count(handle_t *handle, struct inode *inode) -+void ext4_dec_count(handle_t *handle, struct inode *inode) - { - if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) - drop_nlink(inode); - } -+EXPORT_SYMBOL(ext4_dec_count); - - - static int ext4_add_nondir(handle_t *handle, -Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ialloc.c -=================================================================== ---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/ialloc.c -+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ialloc.c -@@ -111,7 +111,7 @@ void ext4_end_bitmap_read(struct buffer_ - * - * Return buffer_head of bitmap on success or NULL. - */ --static struct buffer_head * -+struct buffer_head * - ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) - { - struct ext4_group_desc *desc; -@@ -191,6 +191,7 @@ verify: - set_buffer_verified(bh); - return bh; - } -+EXPORT_SYMBOL(ext4_read_inode_bitmap); - - /* - * NOTE! When we get the inode, we're the only people -Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/inode.c -=================================================================== ---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/inode.c -+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/inode.c -@@ -5281,3 +5281,19 @@ out: - sb_end_pagefault(inode->i_sb); - return ret; - } -+EXPORT_SYMBOL(ext4_map_blocks); -+EXPORT_SYMBOL(ext4_truncate); -+EXPORT_SYMBOL(ext4_iget); -+EXPORT_SYMBOL(ext4_bread); -+EXPORT_SYMBOL(ext4_itable_unused_count); -+EXPORT_SYMBOL(ext4_force_commit); -+EXPORT_SYMBOL(ext4_mark_inode_dirty); -+EXPORT_SYMBOL(ext4_get_group_desc); -+EXPORT_SYMBOL(__ext4_journal_get_write_access); -+EXPORT_SYMBOL(__ext4_journal_start_sb); -+EXPORT_SYMBOL(__ext4_journal_stop); -+EXPORT_SYMBOL(__ext4_handle_dirty_metadata); -+EXPORT_SYMBOL(__ext4_std_error); -+EXPORT_SYMBOL(ext4fs_dirhash); -+EXPORT_SYMBOL(ext4_get_inode_loc); -+EXPORT_SYMBOL(ext4_chunk_trans_blocks); -Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/mballoc.c -=================================================================== ---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/mballoc.c -+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/mballoc.c -@@ -5281,7 +5281,6 @@ out: - void *buddy, void *bitmap, ext4_group_t group) - { - struct ext4_group_info *grp = ext4_get_group_info(sb, group); -- struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); - ext4_grpblk_t i = 0; - ext4_grpblk_t first; diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-move-handling-of-list-of-shrinkable-inodes-into.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-move-handling-of-list-of-shrinkable-inodes-into.patch deleted file mode 100644 index 6418198..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-move-handling-of-list-of-shrinkable-inodes-into.patch +++ /dev/null @@ -1,147 +0,0 @@ -From 8d5847463404eb2d6b24f748d521d1930a432da9 Mon Sep 17 00:00:00 2001 -From: Jan Kara -Date: Tue, 25 Nov 2014 11:49:25 -0500 -Subject: [PATCH 4/7] ext4: move handling of list of shrinkable inodes into - extent status code - -Currently callers adding extents to extent status tree were responsible -for adding the inode to the list of inodes with freeable extents. This -is error prone and puts list handling in unnecessarily many places. - -Just add inode to the list automatically when the first non-delay extent -is added to the tree and remove inode from the list when the last -non-delay extent is removed. - -Signed-off-by: Jan Kara -Signed-off-by: Theodore Ts'o ---- - fs/ext4/extents.c | 2 -- - fs/ext4/extents_status.c | 10 ++++++---- - fs/ext4/extents_status.h | 2 -- - fs/ext4/inode.c | 2 -- - fs/ext4/ioctl.c | 2 -- - fs/ext4/super.c | 1 - - 6 files changed, 6 insertions(+), 13 deletions(-) - -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index c012dc51..d9d51a5b 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -4689,7 +4689,6 @@ out2: - - trace_ext4_ext_map_blocks_exit(inode, flags, map, - err ? err : allocated); -- ext4_es_list_add(inode); - return err ? err : allocated; - } - -@@ -5263,7 +5262,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - error = ext4_fill_fiemap_extents(inode, start_blk, - len_blks, fieinfo); - } -- ext4_es_list_add(inode); - return error; - } - -diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c -index 382a7bf9..edd49793 100644 ---- a/fs/ext4/extents_status.c -+++ b/fs/ext4/extents_status.c -@@ -298,7 +298,7 @@ out: - trace_ext4_es_find_delayed_extent_range_exit(inode, es); - } - --void ext4_es_list_add(struct inode *inode) -+static void ext4_es_list_add(struct inode *inode) - { - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -@@ -314,7 +314,7 @@ void ext4_es_list_add(struct inode *inode) - spin_unlock(&sbi->s_es_lock); - } - --void ext4_es_list_del(struct inode *inode) -+static void ext4_es_list_del(struct inode *inode) - { - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); -@@ -344,7 +344,8 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, - * We don't count delayed extent because we never try to reclaim them - */ - if (!ext4_es_is_delayed(es)) { -- EXT4_I(inode)->i_es_shk_nr++; -+ if (!EXT4_I(inode)->i_es_shk_nr++) -+ ext4_es_list_add(inode); - percpu_counter_inc(&EXT4_SB(inode->i_sb)-> - s_es_stats.es_stats_shk_cnt); - } -@@ -363,7 +364,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) - /* Decrease the shrink counter when this es is not delayed */ - if (!ext4_es_is_delayed(es)) { - BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); -- EXT4_I(inode)->i_es_shk_nr--; -+ if (!--EXT4_I(inode)->i_es_shk_nr) -+ ext4_es_list_del(inode); - percpu_counter_dec(&EXT4_SB(inode->i_sb)-> - s_es_stats.es_stats_shk_cnt); - } -diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h -index 0e6a33e8..b0b78b95 100644 ---- a/fs/ext4/extents_status.h -+++ b/fs/ext4/extents_status.h -@@ -150,7 +150,5 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, - - extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); - extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); --extern void ext4_es_list_add(struct inode *inode); --extern void ext4_es_list_del(struct inode *inode); - - #endif /* _EXT4_EXTENTS_STATUS_H */ -diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c -index f6a2764c..9bbdc9e5 100644 ---- a/fs/ext4/inode.c -+++ b/fs/ext4/inode.c -@@ -523,7 +523,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, - - /* Lookup extent status tree firstly */ - if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { -- ext4_es_list_add(inode); - if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { - map->m_pblk = ext4_es_pblock(&es) + - map->m_lblk - es.es_lblk; -@@ -1519,7 +1518,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, - - /* Lookup extent status tree firstly */ - if (ext4_es_lookup_extent(inode, iblock, &es)) { -- ext4_es_list_add(inode); - if (ext4_es_is_hole(&es)) { - retval = 0; - down_read(&EXT4_I(inode)->i_data_sem); -diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c -index 122d517c..6a6a9588 100644 ---- a/fs/ext4/ioctl.c -+++ b/fs/ext4/ioctl.c -@@ -80,8 +80,6 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) - memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); - ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); - ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); -- ext4_es_list_del(inode1); -- ext4_es_list_del(inode2); - - isize = i_size_read(inode1); - i_size_write(inode1, i_size_read(inode2)); -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index ea2a1026..8a81fa73 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -1033,7 +1033,6 @@ void ext4_clear_inode(struct inode *inode) - dquot_drop(inode); - ext4_discard_preallocations(inode); - ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); -- ext4_es_list_del(inode); - if (EXT4_I(inode)->jinode) { - jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), - EXT4_I(inode)->jinode); --- -2.24.1 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-pdirop.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-pdirop.patch deleted file mode 100644 index b23bcdc..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-pdirop.patch +++ /dev/null @@ -1,855 +0,0 @@ -Single directory performance is a critical for HPC workloads. In a -typical use case an application creates a separate output file for -each node and task in a job. As nodes and tasks increase, hundreds -of thousands of files may be created in a single directory within -a short window of time. -Today, both filename lookup and file system modifying operations -(such as create and unlink) are protected with a single lock for -an entire ldiskfs directory. PDO project will remove this -bottleneck by introducing a parallel locking mechanism for entire -ldiskfs directories. This work will enable multiple application -threads to simultaneously lookup, create and unlink in parallel. - -This patch contains: - - pdirops support for ldiskfs - - integrate with osd-ldiskfs - -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/Makefile -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile -@@ -6,6 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o - - ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ -+ htree_lock.o \ - ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ - mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ - xattr_trusted.o inline.o -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -821,6 +822,9 @@ struct ext4_inode_info { - __u32 i_dtime; - ext4_fsblk_t i_file_acl; - -+ /* following fields for parallel directory operations -bzzz */ -+ struct semaphore i_append_sem; -+ - /* - * i_block_group is the number of the block group which contains - * this file's inode. Constant across the lifetime of the inode, -@@ -1846,6 +1850,71 @@ struct dx_hash_info - */ - #define HASH_NB_ALWAYS 1 - -+/* assume name-hash is protected by upper layer */ -+#define EXT4_HTREE_LOCK_HASH 0 -+ -+enum ext4_pdo_lk_types { -+#if EXT4_HTREE_LOCK_HASH -+ EXT4_LK_HASH, -+#endif -+ EXT4_LK_DX, /* index block */ -+ EXT4_LK_DE, /* directory entry block */ -+ EXT4_LK_SPIN, /* spinlock */ -+ EXT4_LK_MAX, -+}; -+ -+/* read-only bit */ -+#define EXT4_LB_RO(b) (1 << (b)) -+/* read + write, high bits for writer */ -+#define EXT4_LB_RW(b) ((1 << (b)) | (1 << (EXT4_LK_MAX + (b)))) -+ -+enum ext4_pdo_lock_bits { -+ /* DX lock bits */ -+ EXT4_LB_DX_RO = EXT4_LB_RO(EXT4_LK_DX), -+ EXT4_LB_DX = EXT4_LB_RW(EXT4_LK_DX), -+ /* DE lock bits */ -+ EXT4_LB_DE_RO = EXT4_LB_RO(EXT4_LK_DE), -+ EXT4_LB_DE = EXT4_LB_RW(EXT4_LK_DE), -+ /* DX spinlock bits */ -+ EXT4_LB_SPIN_RO = EXT4_LB_RO(EXT4_LK_SPIN), -+ EXT4_LB_SPIN = EXT4_LB_RW(EXT4_LK_SPIN), -+ /* accurate searching */ -+ EXT4_LB_EXACT = EXT4_LB_RO(EXT4_LK_MAX << 1), -+}; -+ -+enum ext4_pdo_lock_opc { -+ /* external */ -+ EXT4_HLOCK_READDIR = (EXT4_LB_DE_RO | EXT4_LB_DX_RO), -+ EXT4_HLOCK_LOOKUP = (EXT4_LB_DE_RO | EXT4_LB_SPIN_RO | -+ EXT4_LB_EXACT), -+ EXT4_HLOCK_DEL = (EXT4_LB_DE | EXT4_LB_SPIN_RO | -+ EXT4_LB_EXACT), -+ EXT4_HLOCK_ADD = (EXT4_LB_DE | EXT4_LB_SPIN_RO), -+ -+ /* internal */ -+ EXT4_HLOCK_LOOKUP_SAFE = (EXT4_LB_DE_RO | EXT4_LB_DX_RO | -+ EXT4_LB_EXACT), -+ EXT4_HLOCK_DEL_SAFE = (EXT4_LB_DE | EXT4_LB_DX_RO | EXT4_LB_EXACT), -+ EXT4_HLOCK_SPLIT = (EXT4_LB_DE | EXT4_LB_DX | EXT4_LB_SPIN), -+}; -+ -+extern struct htree_lock_head *ext4_htree_lock_head_alloc(unsigned hbits); -+#define ext4_htree_lock_head_free(lhead) htree_lock_head_free(lhead) -+ -+extern struct htree_lock *ext4_htree_lock_alloc(void); -+#define ext4_htree_lock_free(lck) htree_lock_free(lck) -+ -+extern void ext4_htree_lock(struct htree_lock *lck, -+ struct htree_lock_head *lhead, -+ struct inode *dir, unsigned flags); -+#define ext4_htree_unlock(lck) htree_unlock(lck) -+ -+extern struct buffer_head *__ext4_find_entry(struct inode *dir, -+ const struct qstr *d_name, -+ struct ext4_dir_entry_2 **res_dir, -+ int *inlined, struct htree_lock *lck); -+extern int __ext4_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct htree_lock *lck); - - /* - * Describe an inode's exact location on disk and in memory -@@ -2088,9 +2157,17 @@ void ext4_insert_dentry(struct inode *in - const char *name, int namelen, void *data); - static inline void ext4_update_dx_flag(struct inode *inode) - { -+ /* Disable it for ldiskfs, because going from a DX directory to -+ * a non-DX directory while it is in use will completely break -+ * the htree-locking. -+ * If we really want to support this operation in the future, -+ * we need to exclusively lock the directory at here which will -+ * increase complexity of code */ -+#if 0 - if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX)) - ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); -+#endif - } - static unsigned char ext4_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/namei.c -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c -@@ -53,6 +53,7 @@ struct buffer_head *ext4_append(handle_t - ext4_lblk_t *block) - { - struct buffer_head *bh; -+ struct ext4_inode_info *ei = EXT4_I(inode); - int err = 0; - - if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && -@@ -60,15 +61,22 @@ struct buffer_head *ext4_append(handle_t - EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) - return ERR_PTR(-ENOSPC); - -+ /* with parallel dir operations all appends -+ * have to be serialized -bzzz */ -+ down(&ei->i_append_sem); -+ - *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - - bh = ext4_bread(handle, inode, *block, 1, &err); -- if (!bh) -+ if (!bh) { -+ up(&ei->i_append_sem); - return ERR_PTR(err); -+ } - inode->i_size += inode->i_sb->s_blocksize; - EXT4_I(inode)->i_disksize = inode->i_size; - BUFFER_TRACE(bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh); -+ up(&ei->i_append_sem); - if (err) { - brelse(bh); - ext4_std_error(inode->i_sb, err); -@@ -246,7 +254,7 @@ static struct dx_frame *dx_probe(const s - struct inode *dir, - struct dx_hash_info *hinfo, - struct dx_frame *frame, -- int *err); -+ struct htree_lock *lck, int *err); - static void dx_release(struct dx_frame *frames); - static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, - struct dx_hash_info *hinfo, struct dx_map_entry map[]); -@@ -259,13 +267,13 @@ static void dx_insert_block(struct dx_fr - static int ext4_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, -- __u32 *start_hash); -+ __u32 *start_hash, struct htree_lock *lck); - static struct buffer_head * ext4_dx_find_entry(struct inode *dir, - const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, -- int *err); -+ struct htree_lock *lck, int *err); - static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, -- struct inode *inode); -+ struct inode *inode, struct htree_lock *lck); - - /* checksumming functions */ - void initialize_dirent_tail(struct ext4_dir_entry_tail *t, -@@ -668,6 +676,227 @@ struct stats dx_show_entries(struct dx_h - } - #endif /* DX_DEBUG */ - -+/* private data for htree_lock */ -+struct ext4_dir_lock_data { -+ unsigned ld_flags; /* bits-map for lock types */ -+ unsigned ld_count; /* # entries of the last DX block */ -+ struct dx_entry ld_at_entry; /* copy of leaf dx_entry */ -+ struct dx_entry *ld_at; /* position of leaf dx_entry */ -+}; -+ -+#define ext4_htree_lock_data(l) ((struct ext4_dir_lock_data *)(l)->lk_private) -+#define ext4_find_entry(dir, name, dirent, inline) \ -+ __ext4_find_entry(dir, name, dirent, inline, NULL) -+#define ext4_add_entry(handle, dentry, inode) \ -+ __ext4_add_entry(handle, dentry, inode, NULL) -+ -+/* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */ -+#define EXT4_HTREE_NODE_CHANGED (0xcafeULL << 32) -+ -+static void ext4_htree_event_cb(void *target, void *event) -+{ -+ u64 *block = (u64 *)target; -+ -+ if (*block == dx_get_block((struct dx_entry *)event)) -+ *block = EXT4_HTREE_NODE_CHANGED; -+} -+ -+struct htree_lock_head *ext4_htree_lock_head_alloc(unsigned hbits) -+{ -+ struct htree_lock_head *lhead; -+ -+ lhead = htree_lock_head_alloc(EXT4_LK_MAX, hbits, 0); -+ if (lhead != NULL) { -+ htree_lock_event_attach(lhead, EXT4_LK_SPIN, HTREE_EVENT_WR, -+ ext4_htree_event_cb); -+ } -+ return lhead; -+} -+EXPORT_SYMBOL(ext4_htree_lock_head_alloc); -+ -+struct htree_lock *ext4_htree_lock_alloc(void) -+{ -+ return htree_lock_alloc(EXT4_LK_MAX, -+ sizeof(struct ext4_dir_lock_data)); -+} -+EXPORT_SYMBOL(ext4_htree_lock_alloc); -+ -+static htree_lock_mode_t ext4_htree_mode(unsigned flags) -+{ -+ switch (flags) { -+ default: /* 0 or unknown flags require EX lock */ -+ return HTREE_LOCK_EX; -+ case EXT4_HLOCK_READDIR: -+ return HTREE_LOCK_PR; -+ case EXT4_HLOCK_LOOKUP: -+ return HTREE_LOCK_CR; -+ case EXT4_HLOCK_DEL: -+ case EXT4_HLOCK_ADD: -+ return HTREE_LOCK_CW; -+ } -+} -+ -+/* return PR for read-only operations, otherwise return EX */ -+static inline htree_lock_mode_t ext4_htree_safe_mode(unsigned flags) -+{ -+ int writer = (flags & EXT4_LB_DE) == EXT4_LB_DE; -+ -+ /* 0 requires EX lock */ -+ return (flags == 0 || writer) ? HTREE_LOCK_EX : HTREE_LOCK_PR; -+} -+ -+static int ext4_htree_safe_locked(struct htree_lock *lck) -+{ -+ int writer; -+ -+ if (lck == NULL || lck->lk_mode == HTREE_LOCK_EX) -+ return 1; -+ -+ writer = (ext4_htree_lock_data(lck)->ld_flags & EXT4_LB_DE) == -+ EXT4_LB_DE; -+ if (writer) /* all readers & writers are excluded? */ -+ return lck->lk_mode == HTREE_LOCK_EX; -+ -+ /* all writers are excluded? */ -+ return lck->lk_mode == HTREE_LOCK_PR || -+ lck->lk_mode == HTREE_LOCK_PW || -+ lck->lk_mode == HTREE_LOCK_EX; -+} -+ -+/* relock htree_lock with EX mode if it's change operation, otherwise -+ * relock it with PR mode. It's noop if PDO is disabled. */ -+static void ext4_htree_safe_relock(struct htree_lock *lck) -+{ -+ if (!ext4_htree_safe_locked(lck)) { -+ unsigned flags = ext4_htree_lock_data(lck)->ld_flags; -+ -+ htree_change_lock(lck, ext4_htree_safe_mode(flags)); -+ } -+} -+ -+void ext4_htree_lock(struct htree_lock *lck, struct htree_lock_head *lhead, -+ struct inode *dir, unsigned flags) -+{ -+ htree_lock_mode_t mode = is_dx(dir) ? ext4_htree_mode(flags) : -+ ext4_htree_safe_mode(flags); -+ -+ ext4_htree_lock_data(lck)->ld_flags = flags; -+ htree_lock(lck, lhead, mode); -+ if (!is_dx(dir)) -+ ext4_htree_safe_relock(lck); /* make sure it's safe locked */ -+} -+EXPORT_SYMBOL(ext4_htree_lock); -+ -+static int ext4_htree_node_lock(struct htree_lock *lck, struct dx_entry *at, -+ unsigned lmask, int wait, void *ev) -+{ -+ u32 key = (at == NULL) ? 0 : dx_get_block(at); -+ u32 mode; -+ -+ /* NOOP if htree is well protected or caller doesn't require the lock */ -+ if (ext4_htree_safe_locked(lck) || -+ !(ext4_htree_lock_data(lck)->ld_flags & lmask)) -+ return 1; -+ -+ mode = (ext4_htree_lock_data(lck)->ld_flags & lmask) == lmask ? -+ HTREE_LOCK_PW : HTREE_LOCK_PR; -+ while (1) { -+ if (htree_node_lock_try(lck, mode, key, ffz(~lmask), wait, ev)) -+ return 1; -+ if (!(lmask & EXT4_LB_SPIN)) /* not a spinlock */ -+ return 0; -+ cpu_relax(); /* spin until granted */ -+ } -+} -+ -+static int ext4_htree_node_locked(struct htree_lock *lck, unsigned lmask) -+{ -+ return ext4_htree_safe_locked(lck) || -+ htree_node_is_granted(lck, ffz(~lmask)); -+} -+ -+static void ext4_htree_node_unlock(struct htree_lock *lck, -+ unsigned lmask, void *buf) -+{ -+ /* NB: it's safe to call mutiple times or even it's not locked */ -+ if (!ext4_htree_safe_locked(lck) && -+ htree_node_is_granted(lck, ffz(~lmask))) -+ htree_node_unlock(lck, ffz(~lmask), buf); -+} -+ -+#define ext4_htree_dx_lock(lck, key) \ -+ ext4_htree_node_lock(lck, key, EXT4_LB_DX, 1, NULL) -+#define ext4_htree_dx_lock_try(lck, key) \ -+ ext4_htree_node_lock(lck, key, EXT4_LB_DX, 0, NULL) -+#define ext4_htree_dx_unlock(lck) \ -+ ext4_htree_node_unlock(lck, EXT4_LB_DX, NULL) -+#define ext4_htree_dx_locked(lck) \ -+ ext4_htree_node_locked(lck, EXT4_LB_DX) -+ -+static void ext4_htree_dx_need_lock(struct htree_lock *lck) -+{ -+ struct ext4_dir_lock_data *ld; -+ -+ if (ext4_htree_safe_locked(lck)) -+ return; -+ -+ ld = ext4_htree_lock_data(lck); -+ switch (ld->ld_flags) { -+ default: -+ return; -+ case EXT4_HLOCK_LOOKUP: -+ ld->ld_flags = EXT4_HLOCK_LOOKUP_SAFE; -+ return; -+ case EXT4_HLOCK_DEL: -+ ld->ld_flags = EXT4_HLOCK_DEL_SAFE; -+ return; -+ case EXT4_HLOCK_ADD: -+ ld->ld_flags = EXT4_HLOCK_SPLIT; -+ return; -+ } -+} -+ -+#define ext4_htree_de_lock(lck, key) \ -+ ext4_htree_node_lock(lck, key, EXT4_LB_DE, 1, NULL) -+#define ext4_htree_de_unlock(lck) \ -+ ext4_htree_node_unlock(lck, EXT4_LB_DE, NULL) -+ -+#define ext4_htree_spin_lock(lck, key, event) \ -+ ext4_htree_node_lock(lck, key, EXT4_LB_SPIN, 0, event) -+#define ext4_htree_spin_unlock(lck) \ -+ ext4_htree_node_unlock(lck, EXT4_LB_SPIN, NULL) -+#define ext4_htree_spin_unlock_listen(lck, p) \ -+ ext4_htree_node_unlock(lck, EXT4_LB_SPIN, p) -+ -+static void ext4_htree_spin_stop_listen(struct htree_lock *lck) -+{ -+ if (!ext4_htree_safe_locked(lck) && -+ htree_node_is_listening(lck, ffz(~EXT4_LB_SPIN))) -+ htree_node_stop_listen(lck, ffz(~EXT4_LB_SPIN)); -+} -+ -+enum { -+ DX_HASH_COL_IGNORE, /* ignore collision while probing frames */ -+ DX_HASH_COL_YES, /* there is collision and it does matter */ -+ DX_HASH_COL_NO, /* there is no collision */ -+}; -+ -+static int dx_probe_hash_collision(struct htree_lock *lck, -+ struct dx_entry *entries, -+ struct dx_entry *at, u32 hash) -+{ -+ if (!(ext4_htree_lock_data(lck)->ld_flags & EXT4_LB_EXACT)) { -+ return DX_HASH_COL_IGNORE; /* don't care about collision */ -+ -+ } else if (at == entries + dx_get_count(entries) - 1) { -+ return DX_HASH_COL_IGNORE; /* not in any leaf of this DX */ -+ -+ } else { /* hash collision? */ -+ return ((dx_get_hash(at + 1) & ~1) == hash) ? -+ DX_HASH_COL_YES : DX_HASH_COL_NO; -+ } -+} -+ - /* - * Probe for a directory leaf block to search. - * -@@ -679,10 +908,11 @@ struct stats dx_show_entries(struct dx_h - */ - static struct dx_frame * - dx_probe(const struct qstr *d_name, struct inode *dir, -- struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, -+ struct htree_lock *lck, int *err) - { - unsigned count, indirect; -- struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_entry *at, *entries, *p, *q, *m, *dx = NULL; - struct dx_root_info *info; - struct buffer_head *bh; - struct dx_frame *frame = frame_in; -@@ -750,8 +980,15 @@ dx_probe(const struct qstr *d_name, stru - dxtrace(printk("Look up %x", hash)); - while (1) - { -+ if (indirect == 0) { /* the last index level */ -+ /* NB: ext4_htree_dx_lock() could be noop if -+ * DX-lock flag is not set for current operation */ -+ ext4_htree_dx_lock(lck, dx); -+ ext4_htree_spin_lock(lck, dx, NULL); -+ } - count = dx_get_count(entries); -- if (!count || count > dx_get_limit(entries)) { -+ if (count == 0 || count > dx_get_limit(entries)) { -+ ext4_htree_spin_unlock(lck); /* release spin */ - ext4_warning(dir->i_sb, - "dx entry: no count or count > limit"); - brelse(bh); -@@ -792,7 +1029,70 @@ dx_probe(const struct qstr *d_name, stru - frame->bh = bh; - frame->entries = entries; - frame->at = at; -- if (!indirect--) return frame; -+ -+ if (indirect == 0) { /* the last index level */ -+ struct ext4_dir_lock_data *ld; -+ u64 myblock; -+ -+ /* By default we only lock DE-block, however, we will -+ * also lock the last level DX-block if: -+ * a) there is hash collision -+ * we will set DX-lock flag (a few lines below) -+ * and redo to lock DX-block -+ * see detail in dx_probe_hash_collision() -+ * b) it's a retry from splitting -+ * we need to lock the last level DX-block so nobody -+ * else can split any leaf blocks under the same -+ * DX-block, see detail in ext4_dx_add_entry() -+ */ -+ if (ext4_htree_dx_locked(lck)) { -+ /* DX-block is locked, just lock DE-block -+ * and return */ -+ ext4_htree_spin_unlock(lck); -+ if (!ext4_htree_safe_locked(lck)) -+ ext4_htree_de_lock(lck, frame->at); -+ return frame; -+ } -+ /* it's pdirop and no DX lock */ -+ if (dx_probe_hash_collision(lck, entries, at, hash) == -+ DX_HASH_COL_YES) { -+ /* found hash collision, set DX-lock flag -+ * and retry to abtain DX-lock */ -+ ext4_htree_spin_unlock(lck); -+ ext4_htree_dx_need_lock(lck); -+ continue; -+ } -+ ld = ext4_htree_lock_data(lck); -+ /* because I don't lock DX, so @at can't be trusted -+ * after I release spinlock so I have to save it */ -+ ld->ld_at = at; -+ ld->ld_at_entry = *at; -+ ld->ld_count = dx_get_count(entries); -+ -+ frame->at = &ld->ld_at_entry; -+ myblock = dx_get_block(at); -+ -+ /* NB: ordering locking */ -+ ext4_htree_spin_unlock_listen(lck, &myblock); -+ /* other thread can split this DE-block because: -+ * a) I don't have lock for the DE-block yet -+ * b) I released spinlock on DX-block -+ * if it happened I can detect it by listening -+ * splitting event on this DE-block */ -+ ext4_htree_de_lock(lck, frame->at); -+ ext4_htree_spin_stop_listen(lck); -+ -+ if (myblock == EXT4_HTREE_NODE_CHANGED) { -+ /* someone split this DE-block before -+ * I locked it, I need to retry and lock -+ * valid DE-block */ -+ ext4_htree_de_unlock(lck); -+ continue; -+ } -+ return frame; -+ } -+ dx = at; -+ indirect--; - bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); - if (IS_ERR(bh)) { - *err = PTR_ERR(bh); -@@ -860,7 +1160,7 @@ static void dx_release (struct dx_frame - static int ext4_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, -- __u32 *start_hash) -+ __u32 *start_hash, struct htree_lock *lck) - { - struct dx_frame *p; - struct buffer_head *bh; -@@ -875,12 +1175,22 @@ static int ext4_htree_next_block(struct - * this loop, num_frames indicates the number of interior - * nodes need to be read. - */ -+ ext4_htree_de_unlock(lck); - while (1) { -- if (++(p->at) < p->entries + dx_get_count(p->entries)) -- break; -+ if (num_frames > 0 || ext4_htree_dx_locked(lck)) { -+ /* num_frames > 0 : -+ * DX block -+ * ext4_htree_dx_locked: -+ * frame->at is reliable pointer returned by dx_probe, -+ * otherwise dx_probe already knew no collision */ -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ } - if (p == frames) - return 0; - num_frames++; -+ if (num_frames == 1) -+ ext4_htree_dx_unlock(lck); - p--; - } - -@@ -903,6 +1213,13 @@ static int ext4_htree_next_block(struct - * block so no check is necessary - */ - while (num_frames--) { -+ if (num_frames == 0) { -+ /* it's not always necessary, we just don't want to -+ * detect hash collision again */ -+ ext4_htree_dx_need_lock(lck); -+ ext4_htree_dx_lock(lck, p->at); -+ } -+ - bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX); - if (IS_ERR(bh)) - return PTR_ERR(bh); -@@ -911,6 +1228,7 @@ static int ext4_htree_next_block(struct - p->bh = bh; - p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; - } -+ ext4_htree_de_lock(lck, p->at); - return 1; - } - -@@ -1013,10 +1331,10 @@ int ext4_htree_fill_tree(struct file *di - } - hinfo.hash = start_hash; - hinfo.minor_hash = 0; -- frame = dx_probe(NULL, dir, &hinfo, frames, &err); -+ /* assume it's PR locked */ -+ frame = dx_probe(NULL, dir, &hinfo, frames, NULL, &err); - if (!frame) - return err; -- - /* Add '.' and '..' from the htree header */ - if (!start_hash && !start_minor_hash) { - de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; -@@ -1043,7 +1361,7 @@ int ext4_htree_fill_tree(struct file *di - count += ret; - hashval = ~0; - ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS, -- frame, frames, &hashval); -+ frame, frames, &hashval, NULL); - *next_hash = hashval; - if (ret < 0) { - err = ret; -@@ -1236,10 +1554,10 @@ static int is_dx_internal_node(struct in - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ --static struct buffer_head * ext4_find_entry (struct inode *dir, -+struct buffer_head *__ext4_find_entry(struct inode *dir, - const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, -- int *inlined) -+ int *inlined, struct htree_lock *lck) - { - struct super_block *sb; - struct buffer_head *bh_use[NAMEI_RA_SIZE]; -@@ -1283,7 +1601,7 @@ static struct buffer_head * ext4_find_en - goto restart; - } - if (is_dx(dir)) { -- bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); -+ bh = ext4_dx_find_entry(dir, d_name, res_dir, lck, &err); - /* - * On success, or if the error was file not found, - * return. Otherwise, fall back to doing a search the -@@ -1297,6 +1615,7 @@ static struct buffer_head * ext4_find_en - return bh; - dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " - "falling back\n")); -+ ext4_htree_safe_relock(lck); - } - nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); - start = EXT4_I(dir)->i_dir_start_lookup; -@@ -1389,9 +1708,12 @@ cleanup_and_exit: - brelse(bh_use[ra_ptr]); - return ret; - } -+EXPORT_SYMBOL(__ext4_find_entry); - --static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, -- struct ext4_dir_entry_2 **res_dir, int *err) -+static struct buffer_head *ext4_dx_find_entry(struct inode *dir, -+ const struct qstr *d_name, -+ struct ext4_dir_entry_2 **res_dir, -+ struct htree_lock *lck, int *err) - { - struct super_block * sb = dir->i_sb; - struct dx_hash_info hinfo; -@@ -1400,7 +1722,7 @@ static struct buffer_head * ext4_dx_find - ext4_lblk_t block; - int retval; - -- if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) -+ if (!(frame = dx_probe(d_name, dir, &hinfo, frames, lck, err))) - return NULL; - do { - block = dx_get_block(frame->at); -@@ -1424,7 +1746,7 @@ static struct buffer_head * ext4_dx_find - - /* Check to see if we should continue to search */ - retval = ext4_htree_next_block(dir, hinfo.hash, frame, -- frames, NULL); -+ frames, NULL, lck); - if (retval < 0) { - ext4_warning(sb, - "error reading index page in directory #%lu", -@@ -1583,8 +1905,9 @@ static struct ext4_dir_entry_2* dx_pack_ - * Returns pointer to de in block into which the new entry will be inserted. - */ - static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -- struct buffer_head **bh,struct dx_frame *frame, -- struct dx_hash_info *hinfo, int *error) -+ struct buffer_head **bh, struct dx_frame *frames, -+ struct dx_frame *frame, struct dx_hash_info *hinfo, -+ struct htree_lock *lck, int *error) - { - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; -@@ -1647,7 +1970,14 @@ static struct ext4_dir_entry_2 *do_split - hash2, split, count-split)); - - /* Fancy dance to stay within two buffers */ -- de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); -+ if (hinfo->hash < hash2) { -+ de2 = dx_move_dirents(data1, data2, map + split, -+ count - split, blocksize); -+ } else { -+ /* make sure we will add entry to the same block which -+ * we have already locked */ -+ de2 = dx_move_dirents(data1, data2, map, split, blocksize); -+ } - de = dx_pack_dirents(data1, blocksize); - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - - (char *) de, -@@ -1666,13 +1996,21 @@ static struct ext4_dir_entry_2 *do_split - dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); - dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); - -- /* Which block gets the new entry? */ -- if (hinfo->hash >= hash2) -- { -- swap(*bh, bh2); -- de = de2; -+ ext4_htree_spin_lock(lck, frame > frames ? (frame - 1)->at : NULL, -+ frame->at); /* notify block is being split */ -+ if (hinfo->hash < hash2) { -+ dx_insert_block(frame, hash2 + continued, newblock); -+ -+ } else { -+ /* switch block number */ -+ dx_insert_block(frame, hash2 + continued, -+ dx_get_block(frame->at)); -+ dx_set_block(frame->at, newblock); -+ (frame->at)++; - } -- dx_insert_block(frame, hash2 + continued, newblock); -+ ext4_htree_spin_unlock(lck); -+ ext4_htree_dx_unlock(lck); -+ - err = ext4_handle_dirty_dirent_node(handle, dir, bh2); - if (err) - goto journal_error; -@@ -1945,7 +2283,7 @@ static int make_indexed_dir(handle_t *ha - if (retval) - goto out_frames; - -- de = do_split(handle,dir, &bh2, frame, &hinfo, &retval); -+ de = do_split(handle, dir, &bh2, frames, frame, &hinfo, NULL, &retval); - if (!de) { - goto out_frames; - } -@@ -2051,8 +2389,8 @@ out: - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ --static int ext4_add_entry(handle_t *handle, struct dentry *dentry, -- struct inode *inode) -+int __ext4_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct htree_lock *lck) - { - struct inode *dir = dentry->d_parent->d_inode; - struct buffer_head *bh = NULL; -@@ -2087,9 +2425,10 @@ static int ext4_add_entry(handle_t *hand - if (dentry->d_name.len == 2 && - memcmp(dentry->d_name.name, "..", 2) == 0) - return ext4_update_dotdot(handle, dentry, inode); -- retval = ext4_dx_add_entry(handle, dentry, inode); -+ retval = ext4_dx_add_entry(handle, dentry, inode, lck); - if (!retval || (retval != ERR_BAD_DX_DIR)) - goto out; -+ ext4_htree_safe_relock(lck); - ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); - dx_fallback++; - ext4_mark_inode_dirty(handle, dir); -@@ -2129,12 +2468,13 @@ static int ext4_add_entry(handle_t *hand - ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); - return retval; - } -+EXPORT_SYMBOL(__ext4_add_entry); - - /* - * Returns 0 for success, or a negative error value - */ - static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, -- struct inode *inode) -+ struct inode *inode, struct htree_lock *lck) - { - struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; - struct dx_entry *entries, *at; -@@ -2148,7 +2488,7 @@ static int ext4_dx_add_entry(handle_t *h - - again: - restart = 0; -- frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); -+ frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, lck, &err); - if (!frame) - return err; - entries = frame->entries; -@@ -2178,6 +2518,11 @@ again: - struct dx_node *node2; - struct buffer_head *bh2; - -+ if (!ext4_htree_safe_locked(lck)) { /* retry with EX lock */ -+ ext4_htree_safe_relock(lck); -+ restart = 1; -+ goto cleanup; -+ } - while (frame > frames) { - if (dx_get_count((frame - 1)->entries) < - dx_get_limit((frame - 1)->entries)) { -@@ -2277,16 +2622,43 @@ again: - restart = 1; - goto journal_error; - } -+ } else if (!ext4_htree_dx_locked(lck)) { -+ struct ext4_dir_lock_data *ld = ext4_htree_lock_data(lck); -+ -+ /* not well protected, require DX lock */ -+ ext4_htree_dx_need_lock(lck); -+ at = frame > frames ? (frame - 1)->at : NULL; -+ -+ /* NB: no risk of deadlock because it's just a try. -+ * -+ * NB: we check ld_count for twice, the first time before -+ * having DX lock, the second time after holding DX lock. -+ * -+ * NB: We never free blocks for directory so far, which -+ * means value returned by dx_get_count() should equal to -+ * ld->ld_count if nobody split any DE-block under @at, -+ * and ld->ld_at still points to valid dx_entry. */ -+ if ((ld->ld_count != dx_get_count(entries)) || -+ !ext4_htree_dx_lock_try(lck, at) || -+ (ld->ld_count != dx_get_count(entries))) { -+ restart = 1; -+ goto cleanup; -+ } -+ /* OK, I've got DX lock and nothing changed */ -+ frame->at = ld->ld_at; - } -- de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ de = do_split(handle, dir, &bh, frames, frame, &hinfo, lck, &err); - if (!de) - goto cleanup; -+ - err = add_dirent_to_buf(handle, dentry, inode, de, bh); - goto cleanup; - - journal_error: - ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */ - cleanup: -+ ext4_htree_dx_unlock(lck); -+ ext4_htree_de_unlock(lck); - brelse(bh); - dx_release(frames); - /* @restart is true means htree-path has been changed, we need to -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/super.c -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/super.c -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/super.c -@@ -875,6 +875,7 @@ static struct inode *ext4_alloc_inode(st - - ei->vfs_inode.i_version = 1; - spin_lock_init(&ei->i_raw_lock); -+ sema_init(&ei->i_append_sem, 1); - INIT_LIST_HEAD(&ei->i_prealloc_list); - spin_lock_init(&ei->i_prealloc_lock); - ext4_es_init_tree(&ei->i_es_tree); diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-projid-ignore-maxquotas.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-projid-ignore-maxquotas.patch deleted file mode 100644 index dfde66f..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-projid-ignore-maxquotas.patch +++ /dev/null @@ -1,141 +0,0 @@ -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index d00d779..9308f35 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -1179,6 +1179,9 @@ struct ext4_super_block { - #define EXT4_MF_MNTDIR_SAMPLED 0x0001 - #define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ - -+/* Number of quota types we support */ -+#define EXT4_MAXQUOTAS 2 -+ - /* - * fourth extended-fs super-block data in memory - */ -@@ -1242,7 +1245,7 @@ struct ext4_sb_info { - u32 s_min_batch_time; - struct block_device *journal_bdev; - #ifdef CONFIG_QUOTA -- char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ -+ char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ - #endif - unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ -diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h -index 17c00ff..9c5b49f 100644 ---- a/fs/ext4/ext4_jbd2.h -+++ b/fs/ext4/ext4_jbd2.h -@@ -102,9 +102,9 @@ - #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 - #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 - #endif --#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) --#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) --#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) -+#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) -+#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) -+#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) - - static inline int ext4_jbd2_credits_xattr(struct inode *inode) - { -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 7f2c846..24ef7b2 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -820,7 +820,7 @@ static void ext4_put_super(struct super_block *sb) - percpu_counter_destroy(&sbi->s_extent_cache_cnt); - brelse(sbi->s_sbh); - #ifdef CONFIG_QUOTA -- for (i = 0; i < MAXQUOTAS; i++) -+ for (i = 0; i < EXT4_MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); - #endif - -@@ -2224,7 +2224,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, - /* Needed for iput() to work correctly and not trash data */ - sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ -- for (i = 0; i < MAXQUOTAS; i++) { -+ for (i = 0; i < EXT4_MAXQUOTAS; i++) { - if (EXT4_SB(sb)->s_qf_names[i]) { - int ret = ext4_quota_on_mount(sb, i); - if (ret < 0) -@@ -2280,7 +2280,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, - PLURAL(nr_truncates)); - #ifdef CONFIG_QUOTA - /* Turn quotas off */ -- for (i = 0; i < MAXQUOTAS; i++) { -+ for (i = 0; i < EXT4_MAXQUOTAS; i++) { - if (sb_dqopt(sb)->files[i]) - dquot_quota_off(sb, i); - } -@@ -4322,7 +4322,7 @@ failed_mount: - remove_proc_entry(sb->s_id, ext4_proc_root); - } - #ifdef CONFIG_QUOTA -- for (i = 0; i < MAXQUOTAS; i++) -+ for (i = 0; i < EXT4_MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); - #endif - ext4_blkdev_remove(sbi); -@@ -4858,7 +4858,7 @@ struct ext4_mount_options { - u32 s_min_batch_time, s_max_batch_time; - #ifdef CONFIG_QUOTA - int s_jquota_fmt; -- char *s_qf_names[MAXQUOTAS]; -+ char *s_qf_names[EXT4_MAXQUOTAS]; - #endif - }; - -@@ -4888,7 +4888,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) - old_opts.s_max_batch_time = sbi->s_max_batch_time; - #ifdef CONFIG_QUOTA - old_opts.s_jquota_fmt = sbi->s_jquota_fmt; -- for (i = 0; i < MAXQUOTAS; i++) -+ for (i = 0; i < EXT4_MAXQUOTAS; i++) - if (sbi->s_qf_names[i]) { - old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); -@@ -5062,7 +5062,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) - - #ifdef CONFIG_QUOTA - /* Release old quota file names */ -- for (i = 0; i < MAXQUOTAS; i++) -+ for (i = 0; i < EXT4_MAXQUOTAS; i++) - kfree(old_opts.s_qf_names[i]); - if (enable_quota) { - if (sb_any_quota_suspended(sb)) -@@ -5091,7 +5091,7 @@ restore_opts: - sbi->s_max_batch_time = old_opts.s_max_batch_time; - #ifdef CONFIG_QUOTA - sbi->s_jquota_fmt = old_opts.s_jquota_fmt; -- for (i = 0; i < MAXQUOTAS; i++) { -+ for (i = 0; i < EXT4_MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = old_opts.s_qf_names[i]; - } -@@ -5294,7 +5294,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, - { - int err; - struct inode *qf_inode; -- unsigned long qf_inums[MAXQUOTAS] = { -+ unsigned long qf_inums[EXT4_MAXQUOTAS] = { - le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), - le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) - }; -@@ -5322,13 +5322,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, - static int ext4_enable_quotas(struct super_block *sb) - { - int type, err = 0; -- unsigned long qf_inums[MAXQUOTAS] = { -+ unsigned long qf_inums[EXT4_MAXQUOTAS] = { - le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), - le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) - }; - - sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; -- for (type = 0; type < MAXQUOTAS; type++) { -+ for (type = 0; type < EXT4_MAXQUOTAS; type++) { - if (qf_inums[type]) { - err = ext4_quota_enable(sb, type, QFMT_VFS_V1, - DQUOT_USAGE_ENABLED); diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-remove-extent-status-procfs-files-if-journal-lo.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-remove-extent-status-procfs-files-if-journal-lo.patch deleted file mode 100644 index de226af..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-remove-extent-status-procfs-files-if-journal-lo.patch +++ /dev/null @@ -1,49 +0,0 @@ -From fabafc86567c2165c5b2165dcbf835edd6f81e72 Mon Sep 17 00:00:00 2001 -From: "Darrick J. Wong" -Date: Thu, 30 Oct 2014 10:53:16 -0400 -Subject: [PATCH 2/7] ext4: remove extent status procfs files if journal load - fails - -If we can't load the journal, remove the procfs files for the extent -status information file to avoid leaking resources. - -Signed-off-by: Darrick J. Wong -Signed-off-by: Theodore Ts'o -Cc: stable@vger.kernel.org ---- - fs/ext4/super.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index bcdb48cf..95a01d56 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -4326,7 +4326,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && - !(sb->s_flags & MS_RDONLY)) - if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) -- goto failed_mount3; -+ goto failed_mount3a; - - ext4_ext_init(sb); /* needed before using extent-mapped journal */ - -@@ -4338,7 +4338,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { - err = ext4_load_journal(sb, es, journal_devnum); - if (err) -- goto failed_mount3; -+ goto failed_mount3a; - } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && - EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { - ext4_msg(sb, KERN_ERR, "required journal recovery " -@@ -4635,6 +4635,7 @@ failed_mount_wq: - jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; - } -+failed_mount3a: - ext4_es_unregister_shrinker(sbi); - failed_mount3: - del_timer_sync(&sbi->s_err_report); --- -2.24.1 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-track-extent-status-tree-shrinker-delay-statict.patch b/ldiskfs/kernel_patches/patches/rhel7.6/ext4-track-extent-status-tree-shrinker-delay-statict.patch deleted file mode 100644 index c43d374..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.6/ext4-track-extent-status-tree-shrinker-delay-statict.patch +++ /dev/null @@ -1,465 +0,0 @@ -From f33e0fa5ab6cad962d3b88376f4611b9aba1d030 Mon Sep 17 00:00:00 2001 -From: Wang Shilong -Date: Thu, 27 Feb 2020 17:08:04 +0800 -Subject: [PATCH 1/7] ext4: track extent status tree shrinker delay statictics - -This commit adds some statictics in extent status tree shrinker. The -purpose to add these is that we want to collect more details when we -encounter a stall caused by extent status tree shrinker. Here we count -the following statictics: - stats: - the number of all objects on all extent status trees - the number of reclaimable objects on lru list - cache hits/misses - the last sorted interval - the number of inodes on lru list - average: - scan time for shrinking some objects - the number of shrunk objects - maximum: - the inode that has max nr. of objects on lru list - the maximum scan time for shrinking some objects - -The output looks like below: - $ cat /proc/fs/ext4/sda1/es_shrinker_info - stats: - 28228 objects - 6341 reclaimable objects - 5281/631 cache hits/misses - 586 ms last sorted interval - 250 inodes on lru list - average: - 153 us scan time - 128 shrunk objects - maximum: - 255 inode (255 objects, 198 reclaimable) - 125723 us max scan time - -If the lru list has never been sorted, the following line will not be -printed: - 586ms last sorted interval -If there is an empty lru list, the following lines also will not be -printed: - 250 inodes on lru list - ... - maximum: - 255 inode (255 objects, 198 reclaimable) - 0 us max scan time - -Meanwhile in this commit a new trace point is defined to print some -details in __ext4_es_shrink(). - -[Shilong remove trace point parts of this patch] - -Cc: Andreas Dilger -Cc: Jan Kara -Reviewed-by: Jan Kara -Signed-off-by: Zheng Liu -Signed-off-by: Theodore Ts'o ---- - fs/ext4/ext4.h | 4 +- - fs/ext4/extents_status.c | 179 +++++++++++++++++++++++++++++++++++++-- - fs/ext4/extents_status.h | 13 ++- - fs/ext4/super.c | 13 +-- - 4 files changed, 187 insertions(+), 22 deletions(-) - -diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h -index 763276e2..cc5ba587 100644 ---- a/fs/ext4/ext4.h -+++ b/fs/ext4/ext4.h -@@ -1018,6 +1018,7 @@ struct ext4_inode_info { - struct ext4_es_tree i_es_tree; - rwlock_t i_es_lock; - struct list_head i_es_lru; -+ unsigned int i_es_all_nr; /* protected by i_es_lock */ - unsigned int i_es_lru_nr; /* protected by i_es_lock */ - unsigned long i_touch_when; /* jiffies of last accessing */ - -@@ -1482,8 +1483,7 @@ struct ext4_sb_info { - /* Reclaim extents from extent status tree */ - struct shrinker s_es_shrinker; - struct list_head s_es_lru; -- unsigned long s_es_last_sorted; -- struct percpu_counter s_extent_cache_cnt; -+ struct ext4_es_stats s_es_stats; - spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; - - /* Ratelimit ext4 messages. */ -diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c -index 3ef7f932..7dfed27b 100644 ---- a/fs/ext4/extents_status.c -+++ b/fs/ext4/extents_status.c -@@ -11,6 +11,8 @@ - */ - #include - #include -+#include -+#include - #include "ext4.h" - #include "extents_status.h" - -@@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, - */ - if (!ext4_es_is_delayed(es)) { - EXT4_I(inode)->i_es_lru_nr++; -- percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); -+ percpu_counter_inc(&EXT4_SB(inode->i_sb)-> -+ s_es_stats.es_stats_lru_cnt); - } - -+ EXT4_I(inode)->i_es_all_nr++; -+ percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); -+ - return es; - } - - static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) - { -+ EXT4_I(inode)->i_es_all_nr--; -+ percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); -+ - /* Decrease the lru counter when this es is not delayed */ - if (!ext4_es_is_delayed(es)) { - BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); - EXT4_I(inode)->i_es_lru_nr--; -- percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); -+ percpu_counter_dec(&EXT4_SB(inode->i_sb)-> -+ s_es_stats.es_stats_lru_cnt); - } - - kmem_cache_free(ext4_es_cachep, es); -@@ -739,6 +749,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, - struct extent_status *es) - { - struct ext4_es_tree *tree; -+ struct ext4_es_stats *stats; - struct extent_status *es1 = NULL; - struct rb_node *node; - int found = 0; -@@ -775,11 +786,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, - } - - out: -+ stats = &EXT4_SB(inode->i_sb)->s_es_stats; - if (found) { - BUG_ON(!es1); - es->es_lblk = es1->es_lblk; - es->es_len = es1->es_len; - es->es_pblk = es1->es_pblk; -+ stats->es_stats_cache_hits++; -+ } else { -+ stats->es_stats_cache_misses++; - } - - read_unlock(&EXT4_I(inode)->i_es_lock); -@@ -941,11 +956,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, - struct ext4_inode_info *locked_ei) - { - struct ext4_inode_info *ei; -+ struct ext4_es_stats *es_stats; - struct list_head *cur, *tmp; - LIST_HEAD(skipped); -+ ktime_t start_time; -+ u64 scan_time; - int ret, nr_shrunk = 0; - int retried = 0, skip_precached = 1, nr_skipped = 0; - -+ es_stats = &sbi->s_es_stats; -+ start_time = ktime_get(); - spin_lock(&sbi->s_es_lru_lock); - - retry: -@@ -954,7 +974,8 @@ retry: - * If we have already reclaimed all extents from extent - * status tree, just stop the loop immediately. - */ -- if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) -+ if (percpu_counter_read_positive( -+ &es_stats->es_stats_lru_cnt) == 0) - break; - - ei = list_entry(cur, struct ext4_inode_info, i_es_lru); -@@ -964,7 +985,7 @@ retry: - * time. Normally we try hard to avoid shrinking - * precached inodes, but we will as a last resort. - */ -- if ((sbi->s_es_last_sorted < ei->i_touch_when) || -+ if ((es_stats->es_stats_last_sorted < ei->i_touch_when) || - (skip_precached && ext4_test_inode_state(&ei->vfs_inode, - EXT4_STATE_EXT_PRECACHED))) { - nr_skipped++; -@@ -998,7 +1019,7 @@ retry: - if ((nr_shrunk == 0) && nr_skipped && !retried) { - retried++; - list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); -- sbi->s_es_last_sorted = jiffies; -+ es_stats->es_stats_last_sorted = jiffies; - ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, - i_es_lru); - /* -@@ -1016,6 +1037,20 @@ retry: - if (locked_ei && nr_shrunk == 0) - nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); - -+ scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); -+ if (likely(es_stats->es_stats_scan_time)) -+ es_stats->es_stats_scan_time = (scan_time + -+ es_stats->es_stats_scan_time*3) / 4; -+ else -+ es_stats->es_stats_scan_time = scan_time; -+ if (scan_time > es_stats->es_stats_max_scan_time) -+ es_stats->es_stats_max_scan_time = scan_time; -+ if (likely(es_stats->es_stats_shrunk)) -+ es_stats->es_stats_shrunk = (nr_shrunk + -+ es_stats->es_stats_shrunk*3) / 4; -+ else -+ es_stats->es_stats_shrunk = nr_shrunk; -+ - return nr_shrunk; - } - -@@ -1026,7 +1061,7 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) - int nr_to_scan = sc->nr_to_scan; - int ret, nr_shrunk; - -- ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); -+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); - trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); - - if (!nr_to_scan) -@@ -1034,23 +1069,150 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) - - nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); - -- ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); -+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt); - trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); - return ret; - } - --void ext4_es_register_shrinker(struct ext4_sb_info *sbi) -+static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos) -+{ -+ return *pos ? NULL : SEQ_START_TOKEN; -+} -+ -+static void * -+ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return NULL; -+} -+ -+static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v) -+{ -+ struct ext4_sb_info *sbi = seq->private; -+ struct ext4_es_stats *es_stats = &sbi->s_es_stats; -+ struct ext4_inode_info *ei, *max = NULL; -+ unsigned int inode_cnt = 0; -+ -+ if (v != SEQ_START_TOKEN) -+ return 0; -+ -+ /* here we just find an inode that has the max nr. of objects */ -+ spin_lock(&sbi->s_es_lru_lock); -+ list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) { -+ inode_cnt++; -+ if (max && max->i_es_all_nr < ei->i_es_all_nr) -+ max = ei; -+ else if (!max) -+ max = ei; -+ } -+ spin_unlock(&sbi->s_es_lru_lock); -+ -+ seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n", -+ percpu_counter_sum_positive(&es_stats->es_stats_all_cnt), -+ percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt)); -+ seq_printf(seq, " %lu/%lu cache hits/misses\n", -+ es_stats->es_stats_cache_hits, -+ es_stats->es_stats_cache_misses); -+ if (es_stats->es_stats_last_sorted != 0) -+ seq_printf(seq, " %u ms last sorted interval\n", -+ jiffies_to_msecs(jiffies - -+ es_stats->es_stats_last_sorted)); -+ if (inode_cnt) -+ seq_printf(seq, " %d inodes on lru list\n", inode_cnt); -+ -+ seq_printf(seq, "average:\n %llu us scan time\n", -+ div_u64(es_stats->es_stats_scan_time, 1000)); -+ seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk); -+ if (inode_cnt) -+ seq_printf(seq, -+ "maximum:\n %lu inode (%u objects, %u reclaimable)\n" -+ " %llu us max scan time\n", -+ max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr, -+ div_u64(es_stats->es_stats_max_scan_time, 1000)); -+ -+ return 0; -+} -+ -+static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v) -+{ -+} -+ -+static const struct seq_operations ext4_es_seq_shrinker_info_ops = { -+ .start = ext4_es_seq_shrinker_info_start, -+ .next = ext4_es_seq_shrinker_info_next, -+ .stop = ext4_es_seq_shrinker_info_stop, -+ .show = ext4_es_seq_shrinker_info_show, -+}; -+ -+static int -+ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file) -+{ -+ int ret; -+ -+ ret = seq_open(file, &ext4_es_seq_shrinker_info_ops); -+ if (!ret) { -+ struct seq_file *m = file->private_data; -+ m->private = PDE_DATA(inode); -+ } -+ -+ return ret; -+} -+ -+static int -+ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file) - { -+ return seq_release(inode, file); -+} -+ -+static const struct file_operations ext4_es_seq_shrinker_info_fops = { -+ .owner = THIS_MODULE, -+ .open = ext4_es_seq_shrinker_info_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = ext4_es_seq_shrinker_info_release, -+}; -+ -+int ext4_es_register_shrinker(struct ext4_sb_info *sbi) -+{ -+ int err; -+ - INIT_LIST_HEAD(&sbi->s_es_lru); - spin_lock_init(&sbi->s_es_lru_lock); -- sbi->s_es_last_sorted = 0; -+ sbi->s_es_stats.es_stats_last_sorted = 0; -+ sbi->s_es_stats.es_stats_shrunk = 0; -+ sbi->s_es_stats.es_stats_cache_hits = 0; -+ sbi->s_es_stats.es_stats_cache_misses = 0; -+ sbi->s_es_stats.es_stats_scan_time = 0; -+ sbi->s_es_stats.es_stats_max_scan_time = 0; -+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, -+ 0, GFP_KERNEL); -+ if (err) -+ return err; -+ err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, -+ 0, GFP_KERNEL); -+ if (err) -+ goto err; - sbi->s_es_shrinker.shrink = ext4_es_shrink; - sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; - register_shrinker(&sbi->s_es_shrinker); -+ -+ if (sbi->s_proc) -+ proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc, -+ &ext4_es_seq_shrinker_info_fops, sbi); -+ -+ return 0; -+ -+err: -+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); -+ return err; - } - - void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) - { -+ if (sbi->s_proc) -+ remove_proc_entry("es_shrinker_info", sbi->s_proc); -+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt); -+ percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt); - unregister_shrinker(&sbi->s_es_shrinker); - } - -diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h -index f1b62a41..efd5f970 100644 ---- a/fs/ext4/extents_status.h -+++ b/fs/ext4/extents_status.h -@@ -64,6 +64,17 @@ struct ext4_es_tree { - struct extent_status *cache_es; /* recently accessed extent */ - }; - -+struct ext4_es_stats { -+ unsigned long es_stats_last_sorted; -+ unsigned long es_stats_shrunk; -+ unsigned long es_stats_cache_hits; -+ unsigned long es_stats_cache_misses; -+ u64 es_stats_scan_time; -+ u64 es_stats_max_scan_time; -+ struct percpu_counter es_stats_all_cnt; -+ struct percpu_counter es_stats_lru_cnt; -+}; -+ - extern int __init ext4_init_es(void); - extern void ext4_exit_es(void); - extern void ext4_es_init_tree(struct ext4_es_tree *tree); -@@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es, - (pb & ~ES_MASK)); - } - --extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); -+extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi); - extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); - extern void ext4_es_lru_add(struct inode *inode); - extern void ext4_es_lru_del(struct inode *inode); -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index 18fe358c..bcdb48cf 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -880,7 +880,6 @@ static void ext4_put_super(struct super_block *sb) - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); - percpu_counter_destroy(&sbi->s_dirtyclusters_counter); -- percpu_counter_destroy(&sbi->s_extent_cache_cnt); - #ifdef CONFIG_QUOTA - for (i = 0; i < EXT4_MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); -@@ -944,6 +943,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) - ext4_es_init_tree(&ei->i_es_tree); - rwlock_init(&ei->i_es_lock); - INIT_LIST_HEAD(&ei->i_es_lru); -+ ei->i_es_all_nr = 0; - ei->i_es_lru_nr = 0; - ei->i_touch_when = 0; - ei->i_reserved_data_blocks = 0; -@@ -4289,14 +4289,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) - sbi->s_err_report.function = print_daily_error_info; - sbi->s_err_report.data = (unsigned long) sb; - -- /* Register extent status tree shrinker */ -- ext4_es_register_shrinker(sbi); -- -- err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL); -- if (err) { -- ext4_msg(sb, KERN_ERR, "insufficient memory"); -+ if (ext4_es_register_shrinker(sbi)) - goto failed_mount3; -- } - - sbi->s_stripe = ext4_get_stripe_size(sbi); - sbi->s_extent_max_zeroout_kb = 32; -@@ -4641,10 +4635,9 @@ failed_mount_wq: - jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; - } --failed_mount3: - ext4_es_unregister_shrinker(sbi); -+failed_mount3: - del_timer_sync(&sbi->s_err_report); -- percpu_counter_destroy(&sbi->s_extent_cache_cnt); - if (sbi->s_mmp_tsk) - kthread_stop(sbi->s_mmp_tsk); - failed_mount2: --- -2.24.1 - diff --git a/ldiskfs/kernel_patches/patches/rhel7.7/ext4-pdirop.patch b/ldiskfs/kernel_patches/patches/rhel7.7/ext4-pdirop.patch deleted file mode 100644 index f208ea0..0000000 --- a/ldiskfs/kernel_patches/patches/rhel7.7/ext4-pdirop.patch +++ /dev/null @@ -1,855 +0,0 @@ -Single directory performance is a critical for HPC workloads. In a -typical use case an application creates a separate output file for -each node and task in a job. As nodes and tasks increase, hundreds -of thousands of files may be created in a single directory within -a short window of time. -Today, both filename lookup and file system modifying operations -(such as create and unlink) are protected with a single lock for -an entire ldiskfs directory. PDO project will remove this -bottleneck by introducing a parallel locking mechanism for entire -ldiskfs directories. This work will enable multiple application -threads to simultaneously lookup, create and unlink in parallel. - -This patch contains: - - pdirops support for ldiskfs - - integrate with osd-ldiskfs - -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/Makefile -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile -@@ -6,6 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o - - ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ -+ htree_lock.o \ - ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ - mmp.o indirect.o extents_status.o xattr.o xattr_user.o \ - xattr_trusted.o inline.o -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -821,6 +822,9 @@ struct ext4_inode_info { - __u32 i_dtime; - ext4_fsblk_t i_file_acl; - -+ /* following fields for parallel directory operations -bzzz */ -+ struct semaphore i_append_sem; -+ - /* - * i_block_group is the number of the block group which contains - * this file's inode. Constant across the lifetime of the inode, -@@ -1846,6 +1850,71 @@ struct dx_hash_info - */ - #define HASH_NB_ALWAYS 1 - -+/* assume name-hash is protected by upper layer */ -+#define EXT4_HTREE_LOCK_HASH 0 -+ -+enum ext4_pdo_lk_types { -+#if EXT4_HTREE_LOCK_HASH -+ EXT4_LK_HASH, -+#endif -+ EXT4_LK_DX, /* index block */ -+ EXT4_LK_DE, /* directory entry block */ -+ EXT4_LK_SPIN, /* spinlock */ -+ EXT4_LK_MAX, -+}; -+ -+/* read-only bit */ -+#define EXT4_LB_RO(b) (1 << (b)) -+/* read + write, high bits for writer */ -+#define EXT4_LB_RW(b) ((1 << (b)) | (1 << (EXT4_LK_MAX + (b)))) -+ -+enum ext4_pdo_lock_bits { -+ /* DX lock bits */ -+ EXT4_LB_DX_RO = EXT4_LB_RO(EXT4_LK_DX), -+ EXT4_LB_DX = EXT4_LB_RW(EXT4_LK_DX), -+ /* DE lock bits */ -+ EXT4_LB_DE_RO = EXT4_LB_RO(EXT4_LK_DE), -+ EXT4_LB_DE = EXT4_LB_RW(EXT4_LK_DE), -+ /* DX spinlock bits */ -+ EXT4_LB_SPIN_RO = EXT4_LB_RO(EXT4_LK_SPIN), -+ EXT4_LB_SPIN = EXT4_LB_RW(EXT4_LK_SPIN), -+ /* accurate searching */ -+ EXT4_LB_EXACT = EXT4_LB_RO(EXT4_LK_MAX << 1), -+}; -+ -+enum ext4_pdo_lock_opc { -+ /* external */ -+ EXT4_HLOCK_READDIR = (EXT4_LB_DE_RO | EXT4_LB_DX_RO), -+ EXT4_HLOCK_LOOKUP = (EXT4_LB_DE_RO | EXT4_LB_SPIN_RO | -+ EXT4_LB_EXACT), -+ EXT4_HLOCK_DEL = (EXT4_LB_DE | EXT4_LB_SPIN_RO | -+ EXT4_LB_EXACT), -+ EXT4_HLOCK_ADD = (EXT4_LB_DE | EXT4_LB_SPIN_RO), -+ -+ /* internal */ -+ EXT4_HLOCK_LOOKUP_SAFE = (EXT4_LB_DE_RO | EXT4_LB_DX_RO | -+ EXT4_LB_EXACT), -+ EXT4_HLOCK_DEL_SAFE = (EXT4_LB_DE | EXT4_LB_DX_RO | EXT4_LB_EXACT), -+ EXT4_HLOCK_SPLIT = (EXT4_LB_DE | EXT4_LB_DX | EXT4_LB_SPIN), -+}; -+ -+extern struct htree_lock_head *ext4_htree_lock_head_alloc(unsigned hbits); -+#define ext4_htree_lock_head_free(lhead) htree_lock_head_free(lhead) -+ -+extern struct htree_lock *ext4_htree_lock_alloc(void); -+#define ext4_htree_lock_free(lck) htree_lock_free(lck) -+ -+extern void ext4_htree_lock(struct htree_lock *lck, -+ struct htree_lock_head *lhead, -+ struct inode *dir, unsigned flags); -+#define ext4_htree_unlock(lck) htree_unlock(lck) -+ -+extern struct buffer_head *__ext4_find_entry(struct inode *dir, -+ const struct qstr *d_name, -+ struct ext4_dir_entry_2 **res_dir, -+ int *inlined, struct htree_lock *lck); -+extern int __ext4_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct htree_lock *lck); - - /* - * Describe an inode's exact location on disk and in memory -@@ -2088,9 +2157,17 @@ void ext4_insert_dentry(struct inode *in - const char *name, int namelen, void *data); - static inline void ext4_update_dx_flag(struct inode *inode) - { -+ /* Disable it for ldiskfs, because going from a DX directory to -+ * a non-DX directory while it is in use will completely break -+ * the htree-locking. -+ * If we really want to support this operation in the future, -+ * we need to exclusively lock the directory at here which will -+ * increase complexity of code */ -+#if 0 - if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX)) - ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); -+#endif - } - static unsigned char ext4_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -Index: linux-3.10.0-1062.el7.x86_64/fs/ext4/namei.c -=================================================================== ---- linux-3.10.0-1062.el7.x86_64.orig/fs/ext4/namei.c -+++ linux-3.10.0-1062.el7.x86_64/fs/ext4/namei.c -@@ -53,6 +53,7 @@ struct buffer_head *ext4_append(handle_t - ext4_lblk_t *block) - { - struct buffer_head *bh; -+ struct ext4_inode_info *ei = EXT4_I(inode); - int err = 0; - - if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && -@@ -60,15 +61,22 @@ struct buffer_head *ext4_append(handle_t - EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) - return ERR_PTR(-ENOSPC); - -+ /* with parallel dir operations all appends -+ * have to be serialized -bzzz */ -+ down(&ei->i_append_sem); -+ - *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - - bh = ext4_bread(handle, inode, *block, 1, &err); -- if (!bh) -+ if (!bh) { -+ up(&ei->i_append_sem); - return ERR_PTR(err); -+ } - inode->i_size += inode->i_sb->s_blocksize; - EXT4_I(inode)->i_disksize = inode->i_size; - BUFFER_TRACE(bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, bh); -+ up(&ei->i_append_sem); - if (err) { - brelse(bh); - ext4_std_error(inode->i_sb, err); -@@ -247,7 +255,7 @@ static struct dx_frame *dx_probe(const s - struct inode *dir, - struct dx_hash_info *hinfo, - struct dx_frame *frame, -- int *err); -+ struct htree_lock *lck, int *err); - static void dx_release(struct dx_frame *frames); - static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, - struct dx_hash_info *hinfo, struct dx_map_entry map[]); -@@ -260,13 +268,13 @@ static void dx_insert_block(struct dx_fr - static int ext4_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, -- __u32 *start_hash); -+ __u32 *start_hash, struct htree_lock *lck); - static struct buffer_head * ext4_dx_find_entry(struct inode *dir, - const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, -- int *err); -+ struct htree_lock *lck, int *err); - static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, -- struct inode *inode); -+ struct inode *inode, struct htree_lock *lck); - - /* checksumming functions */ - void initialize_dirent_tail(struct ext4_dir_entry_tail *t, -@@ -670,6 +678,227 @@ struct stats dx_show_entries(struct dx_h - } - #endif /* DX_DEBUG */ - -+/* private data for htree_lock */ -+struct ext4_dir_lock_data { -+ unsigned ld_flags; /* bits-map for lock types */ -+ unsigned ld_count; /* # entries of the last DX block */ -+ struct dx_entry ld_at_entry; /* copy of leaf dx_entry */ -+ struct dx_entry *ld_at; /* position of leaf dx_entry */ -+}; -+ -+#define ext4_htree_lock_data(l) ((struct ext4_dir_lock_data *)(l)->lk_private) -+#define ext4_find_entry(dir, name, dirent, inline) \ -+ __ext4_find_entry(dir, name, dirent, inline, NULL) -+#define ext4_add_entry(handle, dentry, inode) \ -+ __ext4_add_entry(handle, dentry, inode, NULL) -+ -+/* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */ -+#define EXT4_HTREE_NODE_CHANGED (0xcafeULL << 32) -+ -+static void ext4_htree_event_cb(void *target, void *event) -+{ -+ u64 *block = (u64 *)target; -+ -+ if (*block == dx_get_block((struct dx_entry *)event)) -+ *block = EXT4_HTREE_NODE_CHANGED; -+} -+ -+struct htree_lock_head *ext4_htree_lock_head_alloc(unsigned hbits) -+{ -+ struct htree_lock_head *lhead; -+ -+ lhead = htree_lock_head_alloc(EXT4_LK_MAX, hbits, 0); -+ if (lhead != NULL) { -+ htree_lock_event_attach(lhead, EXT4_LK_SPIN, HTREE_EVENT_WR, -+ ext4_htree_event_cb); -+ } -+ return lhead; -+} -+EXPORT_SYMBOL(ext4_htree_lock_head_alloc); -+ -+struct htree_lock *ext4_htree_lock_alloc(void) -+{ -+ return htree_lock_alloc(EXT4_LK_MAX, -+ sizeof(struct ext4_dir_lock_data)); -+} -+EXPORT_SYMBOL(ext4_htree_lock_alloc); -+ -+static htree_lock_mode_t ext4_htree_mode(unsigned flags) -+{ -+ switch (flags) { -+ default: /* 0 or unknown flags require EX lock */ -+ return HTREE_LOCK_EX; -+ case EXT4_HLOCK_READDIR: -+ return HTREE_LOCK_PR; -+ case EXT4_HLOCK_LOOKUP: -+ return HTREE_LOCK_CR; -+ case EXT4_HLOCK_DEL: -+ case EXT4_HLOCK_ADD: -+ return HTREE_LOCK_CW; -+ } -+} -+ -+/* return PR for read-only operations, otherwise return EX */ -+static inline htree_lock_mode_t ext4_htree_safe_mode(unsigned flags) -+{ -+ int writer = (flags & EXT4_LB_DE) == EXT4_LB_DE; -+ -+ /* 0 requires EX lock */ -+ return (flags == 0 || writer) ? HTREE_LOCK_EX : HTREE_LOCK_PR; -+} -+ -+static int ext4_htree_safe_locked(struct htree_lock *lck) -+{ -+ int writer; -+ -+ if (lck == NULL || lck->lk_mode == HTREE_LOCK_EX) -+ return 1; -+ -+ writer = (ext4_htree_lock_data(lck)->ld_flags & EXT4_LB_DE) == -+ EXT4_LB_DE; -+ if (writer) /* all readers & writers are excluded? */ -+ return lck->lk_mode == HTREE_LOCK_EX; -+ -+ /* all writers are excluded? */ -+ return lck->lk_mode == HTREE_LOCK_PR || -+ lck->lk_mode == HTREE_LOCK_PW || -+ lck->lk_mode == HTREE_LOCK_EX; -+} -+ -+/* relock htree_lock with EX mode if it's change operation, otherwise -+ * relock it with PR mode. It's noop if PDO is disabled. */ -+static void ext4_htree_safe_relock(struct htree_lock *lck) -+{ -+ if (!ext4_htree_safe_locked(lck)) { -+ unsigned flags = ext4_htree_lock_data(lck)->ld_flags; -+ -+ htree_change_lock(lck, ext4_htree_safe_mode(flags)); -+ } -+} -+ -+void ext4_htree_lock(struct htree_lock *lck, struct htree_lock_head *lhead, -+ struct inode *dir, unsigned flags) -+{ -+ htree_lock_mode_t mode = is_dx(dir) ? ext4_htree_mode(flags) : -+ ext4_htree_safe_mode(flags); -+ -+ ext4_htree_lock_data(lck)->ld_flags = flags; -+ htree_lock(lck, lhead, mode); -+ if (!is_dx(dir)) -+ ext4_htree_safe_relock(lck); /* make sure it's safe locked */ -+} -+EXPORT_SYMBOL(ext4_htree_lock); -+ -+static int ext4_htree_node_lock(struct htree_lock *lck, struct dx_entry *at, -+ unsigned lmask, int wait, void *ev) -+{ -+ u32 key = (at == NULL) ? 0 : dx_get_block(at); -+ u32 mode; -+ -+ /* NOOP if htree is well protected or caller doesn't require the lock */ -+ if (ext4_htree_safe_locked(lck) || -+ !(ext4_htree_lock_data(lck)->ld_flags & lmask)) -+ return 1; -+ -+ mode = (ext4_htree_lock_data(lck)->ld_flags & lmask) == lmask ? -+ HTREE_LOCK_PW : HTREE_LOCK_PR; -+ while (1) { -+ if (htree_node_lock_try(lck, mode, key, ffz(~lmask), wait, ev)) -+ return 1; -+ if (!(lmask & EXT4_LB_SPIN)) /* not a spinlock */ -+ return 0; -+ cpu_relax(); /* spin until granted */ -+ } -+} -+ -+static int ext4_htree_node_locked(struct htree_lock *lck, unsigned lmask) -+{ -+ return ext4_htree_safe_locked(lck) || -+ htree_node_is_granted(lck, ffz(~lmask)); -+} -+ -+static void ext4_htree_node_unlock(struct htree_lock *lck, -+ unsigned lmask, void *buf) -+{ -+ /* NB: it's safe to call mutiple times or even it's not locked */ -+ if (!ext4_htree_safe_locked(lck) && -+ htree_node_is_granted(lck, ffz(~lmask))) -+ htree_node_unlock(lck, ffz(~lmask), buf); -+} -+ -+#define ext4_htree_dx_lock(lck, key) \ -+ ext4_htree_node_lock(lck, key, EXT4_LB_DX, 1, NULL) -+#define ext4_htree_dx_lock_try(lck, key) \ -+ ext4_htree_node_lock(lck, key, EXT4_LB_DX, 0, NULL) -+#define ext4_htree_dx_unlock(lck) \ -+ ext4_htree_node_unlock(lck, EXT4_LB_DX, NULL) -+#define ext4_htree_dx_locked(lck) \ -+ ext4_htree_node_locked(lck, EXT4_LB_DX) -+ -+static void ext4_htree_dx_need_lock(struct htree_lock *lck) -+{ -+ struct ext4_dir_lock_data *ld; -+ -+ if (ext4_htree_safe_locked(lck)) -+ return; -+ -+ ld = ext4_htree_lock_data(lck); -+ switch (ld->ld_flags) { -+ default: -+ return; -+ case EXT4_HLOCK_LOOKUP: -+ ld->ld_flags = EXT4_HLOCK_LOOKUP_SAFE; -+ return; -+ case EXT4_HLOCK_DEL: -+ ld->ld_flags = EXT4_HLOCK_DEL_SAFE; -+ return; -+ case EXT4_HLOCK_ADD: -+ ld->ld_flags = EXT4_HLOCK_SPLIT; -+ return; -+ } -+} -+ -+#define ext4_htree_de_lock(lck, key) \ -+ ext4_htree_node_lock(lck, key, EXT4_LB_DE, 1, NULL) -+#define ext4_htree_de_unlock(lck) \ -+ ext4_htree_node_unlock(lck, EXT4_LB_DE, NULL) -+ -+#define ext4_htree_spin_lock(lck, key, event) \ -+ ext4_htree_node_lock(lck, key, EXT4_LB_SPIN, 0, event) -+#define ext4_htree_spin_unlock(lck) \ -+ ext4_htree_node_unlock(lck, EXT4_LB_SPIN, NULL) -+#define ext4_htree_spin_unlock_listen(lck, p) \ -+ ext4_htree_node_unlock(lck, EXT4_LB_SPIN, p) -+ -+static void ext4_htree_spin_stop_listen(struct htree_lock *lck) -+{ -+ if (!ext4_htree_safe_locked(lck) && -+ htree_node_is_listening(lck, ffz(~EXT4_LB_SPIN))) -+ htree_node_stop_listen(lck, ffz(~EXT4_LB_SPIN)); -+} -+ -+enum { -+ DX_HASH_COL_IGNORE, /* ignore collision while probing frames */ -+ DX_HASH_COL_YES, /* there is collision and it does matter */ -+ DX_HASH_COL_NO, /* there is no collision */ -+}; -+ -+static int dx_probe_hash_collision(struct htree_lock *lck, -+ struct dx_entry *entries, -+ struct dx_entry *at, u32 hash) -+{ -+ if (!(ext4_htree_lock_data(lck)->ld_flags & EXT4_LB_EXACT)) { -+ return DX_HASH_COL_IGNORE; /* don't care about collision */ -+ -+ } else if (at == entries + dx_get_count(entries) - 1) { -+ return DX_HASH_COL_IGNORE; /* not in any leaf of this DX */ -+ -+ } else { /* hash collision? */ -+ return ((dx_get_hash(at + 1) & ~1) == hash) ? -+ DX_HASH_COL_YES : DX_HASH_COL_NO; -+ } -+} -+ - /* - * Probe for a directory leaf block to search. - * -@@ -681,10 +910,11 @@ struct stats dx_show_entries(struct dx_h - */ - static struct dx_frame * - dx_probe(const struct qstr *d_name, struct inode *dir, -- struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, -+ struct htree_lock *lck, int *err) - { - unsigned count, indirect; -- struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_entry *at, *entries, *p, *q, *m, *dx = NULL; - struct dx_root_info *info; - struct buffer_head *bh; - struct dx_frame *frame = frame_in; -@@ -758,8 +988,15 @@ dx_probe(const struct qstr *d_name, stru - dxtrace(printk("Look up %x", hash)); - while (1) - { -+ if (indirect == 0) { /* the last index level */ -+ /* NB: ext4_htree_dx_lock() could be noop if -+ * DX-lock flag is not set for current operation */ -+ ext4_htree_dx_lock(lck, dx); -+ ext4_htree_spin_lock(lck, dx, NULL); -+ } - count = dx_get_count(entries); -- if (!count || count > dx_get_limit(entries)) { -+ if (count == 0 || count > dx_get_limit(entries)) { -+ ext4_htree_spin_unlock(lck); /* release spin */ - ext4_warning(dir->i_sb, - "dx entry: no count or count > limit"); - brelse(bh); -@@ -800,7 +1037,70 @@ dx_probe(const struct qstr *d_name, stru - frame->bh = bh; - frame->entries = entries; - frame->at = at; -- if (!indirect--) return frame; -+ -+ if (indirect == 0) { /* the last index level */ -+ struct ext4_dir_lock_data *ld; -+ u64 myblock; -+ -+ /* By default we only lock DE-block, however, we will -+ * also lock the last level DX-block if: -+ * a) there is hash collision -+ * we will set DX-lock flag (a few lines below) -+ * and redo to lock DX-block -+ * see detail in dx_probe_hash_collision() -+ * b) it's a retry from splitting -+ * we need to lock the last level DX-block so nobody -+ * else can split any leaf blocks under the same -+ * DX-block, see detail in ext4_dx_add_entry() -+ */ -+ if (ext4_htree_dx_locked(lck)) { -+ /* DX-block is locked, just lock DE-block -+ * and return */ -+ ext4_htree_spin_unlock(lck); -+ if (!ext4_htree_safe_locked(lck)) -+ ext4_htree_de_lock(lck, frame->at); -+ return frame; -+ } -+ /* it's pdirop and no DX lock */ -+ if (dx_probe_hash_collision(lck, entries, at, hash) == -+ DX_HASH_COL_YES) { -+ /* found hash collision, set DX-lock flag -+ * and retry to abtain DX-lock */ -+ ext4_htree_spin_unlock(lck); -+ ext4_htree_dx_need_lock(lck); -+ continue; -+ } -+ ld = ext4_htree_lock_data(lck); -+ /* because I don't lock DX, so @at can't be trusted -+ * after I release spinlock so I have to save it */ -+ ld->ld_at = at; -+ ld->ld_at_entry = *at; -+ ld->ld_count = dx_get_count(entries); -+ -+ frame->at = &ld->ld_at_entry; -+ myblock = dx_get_block(at); -+ -+ /* NB: ordering locking */ -+ ext4_htree_spin_unlock_listen(lck, &myblock); -+ /* other thread can split this DE-block because: -+ * a) I don't have lock for the DE-block yet -+ * b) I released spinlock on DX-block -+ * if it happened I can detect it by listening -+ * splitting event on this DE-block */ -+ ext4_htree_de_lock(lck, frame->at); -+ ext4_htree_spin_stop_listen(lck); -+ -+ if (myblock == EXT4_HTREE_NODE_CHANGED) { -+ /* someone split this DE-block before -+ * I locked it, I need to retry and lock -+ * valid DE-block */ -+ ext4_htree_de_unlock(lck); -+ continue; -+ } -+ return frame; -+ } -+ dx = at; -+ indirect--; - bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); - if (IS_ERR(bh)) { - *err = PTR_ERR(bh); -@@ -868,7 +1168,7 @@ static void dx_release (struct dx_frame - static int ext4_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, -- __u32 *start_hash) -+ __u32 *start_hash, struct htree_lock *lck) - { - struct dx_frame *p; - struct buffer_head *bh; -@@ -883,12 +1183,22 @@ static int ext4_htree_next_block(struct - * this loop, num_frames indicates the number of interior - * nodes need to be read. - */ -+ ext4_htree_de_unlock(lck); - while (1) { -- if (++(p->at) < p->entries + dx_get_count(p->entries)) -- break; -+ if (num_frames > 0 || ext4_htree_dx_locked(lck)) { -+ /* num_frames > 0 : -+ * DX block -+ * ext4_htree_dx_locked: -+ * frame->at is reliable pointer returned by dx_probe, -+ * otherwise dx_probe already knew no collision */ -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ } - if (p == frames) - return 0; - num_frames++; -+ if (num_frames == 1) -+ ext4_htree_dx_unlock(lck); - p--; - } - -@@ -911,6 +1221,13 @@ static int ext4_htree_next_block(struct - * block so no check is necessary - */ - while (num_frames--) { -+ if (num_frames == 0) { -+ /* it's not always necessary, we just don't want to -+ * detect hash collision again */ -+ ext4_htree_dx_need_lock(lck); -+ ext4_htree_dx_lock(lck, p->at); -+ } -+ - bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX); - if (IS_ERR(bh)) - return PTR_ERR(bh); -@@ -919,6 +1236,7 @@ static int ext4_htree_next_block(struct - p->bh = bh; - p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; - } -+ ext4_htree_de_lock(lck, p->at); - return 1; - } - -@@ -1021,10 +1339,10 @@ int ext4_htree_fill_tree(struct file *di - } - hinfo.hash = start_hash; - hinfo.minor_hash = 0; -- frame = dx_probe(NULL, dir, &hinfo, frames, &err); -+ /* assume it's PR locked */ -+ frame = dx_probe(NULL, dir, &hinfo, frames, NULL, &err); - if (!frame) - return err; -- - /* Add '.' and '..' from the htree header */ - if (!start_hash && !start_minor_hash) { - de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data; -@@ -1051,7 +1369,7 @@ int ext4_htree_fill_tree(struct file *di - count += ret; - hashval = ~0; - ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS, -- frame, frames, &hashval); -+ frame, frames, &hashval, NULL); - *next_hash = hashval; - if (ret < 0) { - err = ret; -@@ -1244,10 +1562,10 @@ static int is_dx_internal_node(struct in - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ --static struct buffer_head * ext4_find_entry (struct inode *dir, -+struct buffer_head *__ext4_find_entry(struct inode *dir, - const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, -- int *inlined) -+ int *inlined, struct htree_lock *lck) - { - struct super_block *sb; - struct buffer_head *bh_use[NAMEI_RA_SIZE]; -@@ -1291,7 +1609,7 @@ static struct buffer_head * ext4_find_en - goto restart; - } - if (is_dx(dir)) { -- bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); -+ bh = ext4_dx_find_entry(dir, d_name, res_dir, lck, &err); - /* - * On success, or if the error was file not found, - * return. Otherwise, fall back to doing a search the -@@ -1305,6 +1623,7 @@ static struct buffer_head * ext4_find_en - return bh; - dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " - "falling back\n")); -+ ext4_htree_safe_relock(lck); - ret = NULL; - } - nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); -@@ -1402,9 +1721,12 @@ cleanup_and_exit: - brelse(bh_use[ra_ptr]); - return ret; - } -+EXPORT_SYMBOL(__ext4_find_entry); - --static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, -- struct ext4_dir_entry_2 **res_dir, int *err) -+static struct buffer_head *ext4_dx_find_entry(struct inode *dir, -+ const struct qstr *d_name, -+ struct ext4_dir_entry_2 **res_dir, -+ struct htree_lock *lck, int *err) - { - struct super_block * sb = dir->i_sb; - struct dx_hash_info hinfo; -@@ -1413,7 +1735,7 @@ static struct buffer_head * ext4_dx_find - ext4_lblk_t block; - int retval; - -- if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) -+ if (!(frame = dx_probe(d_name, dir, &hinfo, frames, lck, err))) - return NULL; - do { - block = dx_get_block(frame->at); -@@ -1437,7 +1759,7 @@ static struct buffer_head * ext4_dx_find - - /* Check to see if we should continue to search */ - retval = ext4_htree_next_block(dir, hinfo.hash, frame, -- frames, NULL); -+ frames, NULL, lck); - if (retval < 0) { - ext4_warning(sb, - "error reading index page in directory #%lu", -@@ -1597,8 +1919,9 @@ static struct ext4_dir_entry_2* dx_pack_ - * Returns pointer to de in block into which the new entry will be inserted. - */ - static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -- struct buffer_head **bh,struct dx_frame *frame, -- struct dx_hash_info *hinfo, int *error) -+ struct buffer_head **bh, struct dx_frame *frames, -+ struct dx_frame *frame, struct dx_hash_info *hinfo, -+ struct htree_lock *lck, int *error) - { - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; -@@ -1661,7 +1984,14 @@ static struct ext4_dir_entry_2 *do_split - hash2, split, count-split)); - - /* Fancy dance to stay within two buffers */ -- de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize); -+ if (hinfo->hash < hash2) { -+ de2 = dx_move_dirents(data1, data2, map + split, -+ count - split, blocksize); -+ } else { -+ /* make sure we will add entry to the same block which -+ * we have already locked */ -+ de2 = dx_move_dirents(data1, data2, map, split, blocksize); -+ } - de = dx_pack_dirents(data1, blocksize); - de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) - - (char *) de, -@@ -1680,13 +2010,21 @@ static struct ext4_dir_entry_2 *do_split - dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); - dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); - -- /* Which block gets the new entry? */ -- if (hinfo->hash >= hash2) -- { -- swap(*bh, bh2); -- de = de2; -+ ext4_htree_spin_lock(lck, frame > frames ? (frame - 1)->at : NULL, -+ frame->at); /* notify block is being split */ -+ if (hinfo->hash < hash2) { -+ dx_insert_block(frame, hash2 + continued, newblock); -+ -+ } else { -+ /* switch block number */ -+ dx_insert_block(frame, hash2 + continued, -+ dx_get_block(frame->at)); -+ dx_set_block(frame->at, newblock); -+ (frame->at)++; - } -- dx_insert_block(frame, hash2 + continued, newblock); -+ ext4_htree_spin_unlock(lck); -+ ext4_htree_dx_unlock(lck); -+ - err = ext4_handle_dirty_dirent_node(handle, dir, bh2); - if (err) - goto journal_error; -@@ -1965,7 +2303,7 @@ static int make_indexed_dir(handle_t *ha - if (retval) - goto out_frames; - -- de = do_split(handle,dir, &bh2, frame, &hinfo, &retval); -+ de = do_split(handle, dir, &bh2, frames, frame, &hinfo, NULL, &retval); - if (!de) { - goto out_frames; - } -@@ -2072,8 +2410,8 @@ out: - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ --static int ext4_add_entry(handle_t *handle, struct dentry *dentry, -- struct inode *inode) -+int __ext4_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct htree_lock *lck) - { - struct inode *dir = dentry->d_parent->d_inode; - struct buffer_head *bh = NULL; -@@ -2108,9 +2446,10 @@ static int ext4_add_entry(handle_t *hand - if (dentry->d_name.len == 2 && - memcmp(dentry->d_name.name, "..", 2) == 0) - return ext4_update_dotdot(handle, dentry, inode); -- retval = ext4_dx_add_entry(handle, dentry, inode); -+ retval = ext4_dx_add_entry(handle, dentry, inode, lck); - if (!retval || (retval != ERR_BAD_DX_DIR)) - goto out; -+ ext4_htree_safe_relock(lck); - ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); - dx_fallback++; - ext4_mark_inode_dirty(handle, dir); -@@ -2152,12 +2491,13 @@ out: - ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); - return retval; - } -+EXPORT_SYMBOL(__ext4_add_entry); - - /* - * Returns 0 for success, or a negative error value - */ - static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, -- struct inode *inode) -+ struct inode *inode, struct htree_lock *lck) - { - struct dx_frame frames[EXT4_HTREE_LEVEL], *frame; - struct dx_entry *entries, *at; -@@ -2171,7 +2511,7 @@ static int ext4_dx_add_entry(handle_t *h - - again: - restart = 0; -- frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); -+ frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, lck, &err); - if (!frame) - return err; - entries = frame->entries; -@@ -2201,6 +2541,11 @@ again: - struct dx_node *node2; - struct buffer_head *bh2; - -+ if (!ext4_htree_safe_locked(lck)) { /* retry with EX lock */ -+ ext4_htree_safe_relock(lck); -+ restart = 1; -+ goto cleanup; -+ } - while (frame > frames) { - if (dx_get_count((frame - 1)->entries) < - dx_get_limit((frame - 1)->entries)) { -@@ -2304,16 +2649,43 @@ again: - restart = 1; - goto journal_error; - } -+ } else if (!ext4_htree_dx_locked(lck)) { -+ struct ext4_dir_lock_data *ld = ext4_htree_lock_data(lck); -+ -+ /* not well protected, require DX lock */ -+ ext4_htree_dx_need_lock(lck); -+ at = frame > frames ? (frame - 1)->at : NULL; -+ -+ /* NB: no risk of deadlock because it's just a try. -+ * -+ * NB: we check ld_count for twice, the first time before -+ * having DX lock, the second time after holding DX lock. -+ * -+ * NB: We never free blocks for directory so far, which -+ * means value returned by dx_get_count() should equal to -+ * ld->ld_count if nobody split any DE-block under @at, -+ * and ld->ld_at still points to valid dx_entry. */ -+ if ((ld->ld_count != dx_get_count(entries)) || -+ !ext4_htree_dx_lock_try(lck, at) || -+ (ld->ld_count != dx_get_count(entries))) { -+ restart = 1; -+ goto cleanup; -+ } -+ /* OK, I've got DX lock and nothing changed */ -+ frame->at = ld->ld_at; - } -- de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ de = do_split(handle, dir, &bh, frames, frame, &hinfo, lck, &err); - if (!de) - goto cleanup; -+ - err = add_dirent_to_buf(handle, dentry, inode, de, bh); - goto cleanup; - - journal_error: - ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */ - cleanup: -+ ext4_htree_dx_unlock(lck); -+ ext4_htree_de_unlock(lck); - brelse(bh); - dx_release(frames); - /* @restart is true means htree-path has been changed, we need to -Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/super.c -=================================================================== ---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/super.c -+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/super.c -@@ -875,6 +875,7 @@ static struct inode *ext4_alloc_inode(st - - ei->vfs_inode.i_version = 1; - spin_lock_init(&ei->i_raw_lock); -+ sema_init(&ei->i_append_sem, 1); - INIT_LIST_HEAD(&ei->i_prealloc_list); - spin_lock_init(&ei->i_prealloc_lock); - ext4_es_init_tree(&ei->i_es_tree); diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series deleted file mode 100644 index 524f08b..0000000 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.6.series +++ /dev/null @@ -1,56 +0,0 @@ -rhel7.6/ext4-inode-version.patch -rhel7.6/ext4-lookup-dotdot.patch -rhel7.6/ext4-print-inum-in-htree-warning.patch -rhel7.6/ext4-prealloc.patch -rhel7.6/ext4-mballoc-extra-checks.patch -rhel7.6/ext4-misc.patch -rhel7.6/ext4-osd-iop-common.patch -rhel7.6/ext4-hash-indexed-dir-dotdot-update.patch -rhel7.6/ext4-kill-dx-root.patch -rhel7.6/ext4-mballoc-pa-free-mismatch.patch -rhel7.6/ext4-data-in-dirent.patch -rhel7.6/ext4-large-eas.patch -rhel7.6/ext4-disable-mb-cache.patch -rhel7.6/ext4-nocmtime.patch -rhel7.6/ext4-large-dir.patch -base/ext4-htree-lock.patch -rhel7.6/ext4-pdirop.patch -rhel7.6/ext4-max-dir-size.patch -rhel7.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch -rhel7.6/ext4-give-warning-with-dir-htree-growing.patch -rhel7.6/ext4-mmp-brelse.patch -rhel7.6/ext4-jcb-optimization.patch -rhel7.6/ext4_s_max_ext_tree_depth.patch -rhel7.6/ext4-projid-ignore-maxquotas.patch -rhel7.6/ext4-projid-feature-support.patch -rhel7.6/ext4-projid-quotas.patch -rhel7.6/ext4-projid-xfs-ioctls.patch -rhel7.6/ext4-fix-xattr-shifting-when-expanding-inodes.patch -rhel7.6/ext4-attach-jinode-in-writepages.patch -rhel7.6/ext4-dont-check-before-replay.patch -rhel7.6/ext4-cleanup-goto-next-group.patch -rhel7.6/ext4-reduce-lock-contention-in-__ext4_new_inode.patch -rhel7.6/ext4-preread-gd.patch -rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch -rhel7.6/ext4-export-orphan-add.patch -rhel7.6/ext4-mmp-dont-mark-bh-dirty.patch -rhel7.6/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch -rhel7.6/ext4-export-mb-stream-allocator-variables.patch -rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch -rhel7.6/ext4-simple-blockalloc.patch -rhel7.6/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7.6/ext4-mballoc-prefetch.patch -rhel7.6/ext4-track-extent-status-tree-shrinker-delay-statict.patch -rhel7.6/ext4-remove-extent-status-procfs-files-if-journal-lo.patch -rhel7.6/ext4-change-LRU-to-round-robin-in-extent-status-tree.patch -rhel7.6/ext4-move-handling-of-list-of-shrinkable-inodes-into.patch -rhel7.6/ext4-limit-number-of-scanned-extents-in-status-tree-.patch -rhel7.6/ext4-cleanup-flag-definitions-for-extent-status-tree.patch -rhel7.6/ext4-introduce-aging-to-extent-status-tree.patch -base/ext4-no-max-dir-size-limit-for-iam-objects.patch -rhel7.6/ext4-dquot-commit-speedup.patch -rhel7.7/ext4-ialloc-uid-gid-and-pass-owner-down.patch -rhel7.6/ext4-projid-xattrs.patch -rhel7.9/ext4-enc-flag.patch -rhel7.9/ext4-filename-encode.patch -rhel7.9/ext4-encdata.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series deleted file mode 100644 index 00e6a03..0000000 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.7.series +++ /dev/null @@ -1,56 +0,0 @@ -rhel7.6/ext4-inode-version.patch -rhel7.6/ext4-lookup-dotdot.patch -rhel7.6/ext4-print-inum-in-htree-warning.patch -rhel7.6/ext4-prealloc.patch -rhel7.6/ext4-mballoc-extra-checks.patch -rhel7.7/ext4-misc.patch -rhel7.6/ext4-osd-iop-common.patch -rhel7.6/ext4-hash-indexed-dir-dotdot-update.patch -rhel7.6/ext4-kill-dx-root.patch -rhel7.6/ext4-mballoc-pa-free-mismatch.patch -rhel7.6/ext4-data-in-dirent.patch -rhel7.7/ext4-large-eas.patch -rhel7.6/ext4-disable-mb-cache.patch -rhel7.6/ext4-nocmtime.patch -rhel7.7/ext4-large-dir.patch -base/ext4-htree-lock.patch -rhel7.7/ext4-pdirop.patch -rhel7.6/ext4-max-dir-size.patch -rhel7.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch -rhel7.6/ext4-give-warning-with-dir-htree-growing.patch -rhel7.6/ext4-mmp-brelse.patch -rhel7.6/ext4-jcb-optimization.patch -rhel7.6/ext4_s_max_ext_tree_depth.patch -rhel7.7/ext4-projid-ignore-maxquotas.patch -rhel7.6/ext4-projid-feature-support.patch -rhel7.6/ext4-projid-quotas.patch -rhel7.6/ext4-projid-xfs-ioctls.patch -rhel7.7/ext4-fix-xattr-shifting-when-expanding-inodes.patch -rhel7.6/ext4-attach-jinode-in-writepages.patch -rhel7.6/ext4-dont-check-before-replay.patch -rhel7.6/ext4-cleanup-goto-next-group.patch -rhel7.6/ext4-reduce-lock-contention-in-__ext4_new_inode.patch -rhel7.6/ext4-preread-gd.patch -rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch -rhel7.6/ext4-export-orphan-add.patch -rhel7.6/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch -rhel7.6/ext4-export-mb-stream-allocator-variables.patch -rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch -rhel7.7/ext4-fix-project-with-unpatched-kernel.patch -rhel7.6/ext4-simple-blockalloc.patch -rhel7.6/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7.7/ext4-mballoc-prefetch.patch -rhel7.6/ext4-track-extent-status-tree-shrinker-delay-statict.patch -rhel7.6/ext4-remove-extent-status-procfs-files-if-journal-lo.patch -rhel7.6/ext4-change-LRU-to-round-robin-in-extent-status-tree.patch -rhel7.6/ext4-move-handling-of-list-of-shrinkable-inodes-into.patch -rhel7.6/ext4-limit-number-of-scanned-extents-in-status-tree-.patch -rhel7.6/ext4-cleanup-flag-definitions-for-extent-status-tree.patch -rhel7.6/ext4-introduce-aging-to-extent-status-tree.patch -base/ext4-no-max-dir-size-limit-for-iam-objects.patch -rhel7.6/ext4-dquot-commit-speedup.patch -rhel7.7/ext4-ialloc-uid-gid-and-pass-owner-down.patch -rhel7.6/ext4-projid-xattrs.patch -rhel7.9/ext4-enc-flag.patch -rhel7.9/ext4-filename-encode.patch -rhel7.9/ext4-encdata.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.8.series b/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.8.series deleted file mode 100644 index 280e48c..0000000 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.10-rhel7.8.series +++ /dev/null @@ -1,49 +0,0 @@ -rhel7.6/ext4-inode-version.patch -rhel7.6/ext4-lookup-dotdot.patch -rhel7.6/ext4-print-inum-in-htree-warning.patch -rhel7.6/ext4-prealloc.patch -rhel7.6/ext4-mballoc-extra-checks.patch -rhel7.7/ext4-misc.patch -rhel7.6/ext4-osd-iop-common.patch -rhel7.6/ext4-hash-indexed-dir-dotdot-update.patch -rhel7.6/ext4-kill-dx-root.patch -rhel7.6/ext4-mballoc-pa-free-mismatch.patch -rhel7.6/ext4-data-in-dirent.patch -rhel7.7/ext4-large-eas.patch -rhel7.6/ext4-disable-mb-cache.patch -rhel7.6/ext4-nocmtime.patch -rhel7.7/ext4-large-dir.patch -base/ext4-htree-lock.patch -rhel7.7/ext4-pdirop.patch -rhel7.6/ext4-max-dir-size.patch -rhel7.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch -rhel7.6/ext4-give-warning-with-dir-htree-growing.patch -rhel7.6/ext4-mmp-brelse.patch -rhel7.6/ext4-jcb-optimization.patch -rhel7.6/ext4_s_max_ext_tree_depth.patch -rhel7.7/ext4-projid-ignore-maxquotas.patch -rhel7.6/ext4-projid-feature-support.patch -rhel7.6/ext4-projid-quotas.patch -rhel7.6/ext4-projid-xfs-ioctls.patch -rhel7.7/ext4-fix-xattr-shifting-when-expanding-inodes.patch -rhel7.6/ext4-attach-jinode-in-writepages.patch -rhel7.6/ext4-dont-check-before-replay.patch -rhel7.6/ext4-cleanup-goto-next-group.patch -rhel7.6/ext4-reduce-lock-contention-in-__ext4_new_inode.patch -rhel7.6/ext4-preread-gd.patch -rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch -rhel7.6/ext4-export-orphan-add.patch -rhel7.6/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch -rhel7.6/ext4-export-mb-stream-allocator-variables.patch -rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch -rhel7.7/ext4-fix-project-with-unpatched-kernel.patch -rhel7.6/ext4-simple-blockalloc.patch -rhel7.6/ext4-mballoc-skip-uninit-groups-cr0.patch -rhel7.7/ext4-mballoc-prefetch.patch -base/ext4-no-max-dir-size-limit-for-iam-objects.patch -rhel7.6/ext4-dquot-commit-speedup.patch -rhel7.7/ext4-ialloc-uid-gid-and-pass-owner-down.patch -rhel7.6/ext4-projid-xattrs.patch -rhel7.9/ext4-enc-flag.patch -rhel7.9/ext4-filename-encode.patch -rhel7.9/ext4-encdata.patch -- 1.8.3.1