--- /dev/null
+LU-10026 osd-ldiskfs: use preallocation for dense writes
+
+use inode's preallocation chunks as per-inode group preallocation:
+just grab the very first available blocks from the window.
+
+Test-Parameters: env=ONLY=1000,ONLY_REPEAT=11 testlist=sanity-compr
+Test-Parameters: env=ONLY=fsx,ONLY_REPEAT=11 testlist=sanity-compr
+Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
+Change-Id: I9d36701f569f4c6305bc46f3373bfc054fcd61a9
+Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50171
+Tested-by: jenkins <devops@whamcloud.com>
+Tested-by: Maloo <maloo@whamcloud.com>
+Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
+Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
+Reviewed-by: Oleg Drokin <green@whamcloud.com>
+---
+ fs/ext4/ext4.h | 3 +++
+ fs/ext4/extents.c | 2 ++
+ fs/ext4/mballoc.c | 41 ++++++++++++++++++++++++++++++++++++++++-
+ fs/ext4/mballoc.h | 4 +++-
+ 4 files changed, 48 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 8dd054d0..d3661a3f 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -141,6 +141,8 @@ enum SHIFT_DIRECTION {
+ #define EXT4_MB_USE_ROOT_BLOCKS 0x1000
+ /* Use blocks from reserved pool */
+ #define EXT4_MB_USE_RESERVED 0x2000
++/* Ldiskfs very dense writes */
++#define EXT4_MB_VERY_DENSE 0x80000
+
+ struct ext4_allocation_request {
+ /* target inode for block we're allocating */
+@@ -617,6 +619,7 @@ enum {
+ /* Caller will submit data before dropping transaction handle. This
+ * allows jbd2 to avoid submitting data before commit. */
+ #define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400
++#define EXT4_GET_BLOCKS_VERY_DENSE 0x08000
+
+ /*
+ * The bit position of these flags must not overlap with any of the
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 507285d7..99ab7339 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -4691,6 +4691,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
+ ar.flags = 0;
+ if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
+ ar.flags |= EXT4_MB_HINT_NOPREALLOC;
++ if (flags & EXT4_GET_BLOCKS_VERY_DENSE)
++ ar.flags |= EXT4_MB_VERY_DENSE;
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+ if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 397854ca..d9ec558b 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3799,6 +3799,25 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
+ ext4_fsblk_t end;
+ int len;
+
++ if (ac->ac_flags & EXT4_MB_VERY_DENSE && !pa->pa_regular) {
++ unsigned int len = ac->ac_o_ex.fe_len;
++ if (len > pa->pa_free)
++ len = pa->pa_free;
++ ext4_get_group_no_and_offset(ac->ac_sb,
++ pa->pa_pstart,
++ &ac->ac_b_ex.fe_group,
++ &ac->ac_b_ex.fe_start);
++ ac->ac_b_ex.fe_len = len;
++ pa->pa_lstart += len;
++ pa->pa_pstart += len;
++ pa->pa_free -= len;
++ pa->pa_len -= len;
++ ac->ac_status = AC_STATUS_FOUND;
++ ac->ac_pa = pa;
++ return;
++ }
++
++ pa->pa_regular = 1;
+ /* found preallocated blocks, use them */
+ start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
+ end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
+@@ -3888,6 +3907,23 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
+ if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
+ return 0;
+
++ if (ac->ac_flags & EXT4_MB_VERY_DENSE) {
++ rcu_read_lock();
++ list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
++ spin_lock(&pa->pa_lock);
++ if (!pa->pa_deleted && pa->pa_free && !pa->pa_regular) {
++ atomic_inc(&pa->pa_count);
++ ext4_mb_use_inode_pa(ac, pa);
++ spin_unlock(&pa->pa_lock);
++ break;
++ }
++ spin_unlock(&pa->pa_lock);
++ }
++ rcu_read_unlock();
++ if (ac->ac_status == AC_STATUS_FOUND)
++ return true;
++ }
++
+ /* first, try per-file preallocation */
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
+@@ -4129,7 +4165,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
+ if (pa->pa_type == MB_GROUP_PA)
+ grp_blk--;
+
+- grp = ext4_get_group_number(sb, grp_blk);
++ grp = pa->pa_group;
+
+ /*
+ * possible race:
+@@ -4220,6 +4256,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+ pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+ pa->pa_len = ac->ac_b_ex.fe_len;
+ pa->pa_free = pa->pa_len;
++ pa->pa_group = ac->ac_b_ex.fe_group;
++ pa->pa_regular = 0;
+ atomic_set(&pa->pa_count, 1);
+ spin_lock_init(&pa->pa_lock);
+ INIT_LIST_HEAD(&pa->pa_inode_list);
+@@ -4282,6 +4320,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+ pa->pa_lstart = pa->pa_pstart;
+ pa->pa_len = ac->ac_b_ex.fe_len;
+ pa->pa_free = pa->pa_len;
++ pa->pa_group = ac->ac_b_ex.fe_group;
+ atomic_set(&pa->pa_count, 1);
+ spin_lock_init(&pa->pa_lock);
+ INIT_LIST_HEAD(&pa->pa_inode_list);
+diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
+index 608a7020..cabbc02f 100644
+--- a/fs/ext4/mballoc.h
++++ b/fs/ext4/mballoc.h
+@@ -114,6 +114,8 @@ struct ext4_prealloc_space {
+ ext4_lblk_t pa_lstart; /* log. block */
+ ext4_grpblk_t pa_len; /* len of preallocated chunk */
+ ext4_grpblk_t pa_free; /* how many blocks are free */
++ ext4_grpblk_t pa_group;
++ unsigned short pa_regular;
+ unsigned short pa_type; /* pa type. inode or group */
+ unsigned short pa_error;
+ spinlock_t *pa_obj_lock;
+@@ -170,7 +172,7 @@ struct ext4_allocation_context {
+ __u16 ac_found;
+ __u16 ac_tail;
+ __u16 ac_buddy;
+- __u16 ac_flags; /* allocation hints */
++ __u32 ac_flags; /* allocation hints */
+ __u8 ac_status;
+ __u8 ac_criteria;
+ __u8 ac_2order; /* if request is to allocate 2^N blocks and
+--
+2.34.1
+