-
commit 2db3b2b33ee796f4ea61316773452d936303ad27
Author: Pravin Shelar <Pravin.Shelar@sun.com>
AuthorDate: Sun Oct 4 18:13:14 2009 +0000
Signed-off-by: Andreas Dilger <andreas.dilger@sun.com>
---
fs/ext4/dir.c | 9 +-
- fs/ext4/ext4.h | 106 ++++++++++++++++--
+ fs/ext4/ext4.h | 106 ++++++++++++++-
fs/ext4/fast_commit.c | 2 +-
fs/ext4/inline.c | 8 +-
- fs/ext4/namei.c | 249 ++++++++++++++++++++++++++++++++----------
+ fs/ext4/namei.c | 295 ++++++++++++++++++++++++++++++++++++------
fs/ext4/super.c | 4 +-
- 6 files changed, 303 insertions(+), 75 deletions(-)
+ 6 files changed, 368 insertions(+), 56 deletions(-)
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
-index 74b172a..c6afabc 100644
+index 74b172a4..c6afabcb 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -466,12 +466,17 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
while (*p) {
parent = *p;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
-index 0791a8b..f1bc21d 100644
+index 645015d5..3b5b6df9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
-@@ -1165,6 +1165,7 @@ struct ext4_inode_info {
+@@ -1168,6 +1168,7 @@ struct ext4_inode_info {
__u32 i_csum_seed;
kprojid_t i_projid;
};
/*
-@@ -1186,6 +1187,7 @@ struct ext4_inode_info {
+@@ -1189,6 +1190,7 @@ struct ext4_inode_info {
* Mount flags set via mount options or defaults
*/
#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
-@@ -2117,6 +2119,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
+@@ -2140,6 +2142,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold, CASEFOLD)
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
EXT4_FEATURE_INCOMPAT_EA_INODE| \
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
EXT4_FEATURE_INCOMPAT_CASEFOLD | \
-@@ -2326,6 +2329,42 @@ struct ext4_dir_entry_tail {
+@@ -2349,6 +2352,42 @@ struct ext4_dir_entry_tail {
#define EXT4_FT_SYMLINK 7
#define EXT4_FT_MAX 8
#define EXT4_FT_DIR_CSUM 0xDE
-@@ -2337,6 +2376,17 @@ struct ext4_dir_entry_tail {
+@@ -2360,6 +2399,17 @@ struct ext4_dir_entry_tail {
#define EXT4_DIR_PAD 4
#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
#define EXT4_MAX_REC_LEN ((1<<16)-1)
/*
* The rec_len is dependent on the type of directory. Directories that are
-@@ -2344,10 +2394,10 @@ struct ext4_dir_entry_tail {
+@@ -2367,10 +2417,10 @@ struct ext4_dir_entry_tail {
* ext4_extended_dir_entry_2. For all entries related to '.' or '..' you should
* pass NULL for dir, as those entries do not use the extra fields.
*/
if (dir && ext4_hash_in_dirent(dir))
rec_len += sizeof(struct ext4_dir_entry_hash);
-@@ -2821,11 +2871,13 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
+@@ -2848,11 +2898,13 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
struct buffer_head *bh,
void *buf, int buf_size,
struct ext4_filename *fname,
static inline void ext4_update_dx_flag(struct inode *inode)
{
if (!ext4_has_feature_dir_index(inode->i_sb) &&
-@@ -2841,10 +2893,17 @@ static const unsigned char ext4_filetype_table[] = {
+@@ -2868,10 +2920,17 @@ static const unsigned char ext4_filetype_table[] = {
static inline unsigned char get_dtype(struct super_block *sb, int filetype)
{
}
extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
void *buf, int buf_size);
-@@ -3048,7 +3107,8 @@ extern int ext4_ind_migrate(struct inode *inode);
+@@ -3075,7 +3134,8 @@ extern int ext4_ind_migrate(struct inode *inode);
/* namei.c */
extern int ext4_init_new_dir(handle_t *handle, struct inode *dir,
extern int ext4_dirblock_csum_verify(struct inode *inode,
struct buffer_head *bh);
extern int ext4_orphan_add(handle_t *, struct inode *);
-@@ -3059,6 +3119,8 @@ extern struct inode *ext4_create_inode(handle_t *handle,
+@@ -3086,6 +3146,8 @@ extern struct inode *ext4_create_inode(handle_t *handle,
extern int ext4_delete_entry(handle_t *handle, struct inode * dir,
struct ext4_dir_entry_2 *de_del,
struct buffer_head *bh);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
__u32 start_minor_hash, __u32 *next_hash);
extern int ext4_search_dir(struct buffer_head *bh,
-@@ -3862,6 +3924,36 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh)
+@@ -3892,6 +3954,36 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh)
return buffer_uptodate(bh);
}
#define EFSBADCRC EBADMSG /* Bad CRC detected */
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
-index 276d9e6..3be0f08 100644
+index 6d9eb627..cf62b91e 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
-@@ -1596,7 +1596,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
+@@ -1610,7 +1610,7 @@ static int ext4_fc_replay_create(struct super_block *sb,
jbd_debug(1, "Dir %d not found.", darg.ino);
goto out;
}
if (ret) {
ret = 0;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
-index 9626c31..ed31b5c 100644
+index a605204d..43cc7453 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
-@@ -1029,7 +1029,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
+@@ -1031,7 +1031,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_dir_entry_2 *de;
err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
if (err)
return err;
-@@ -1038,7 +1038,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
+@@ -1040,7 +1040,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
EXT4_JTR_NONE);
if (err)
return err;
ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
-@@ -1396,7 +1396,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
+@@ -1399,7 +1399,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
fake.name_len = 1;
strcpy(fake.name, ".");
fake.rec_len = ext4_rec_len_to_disk(
inline_size);
ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
de = &fake;
-@@ -1406,7 +1406,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
+@@ -1409,7 +1409,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
fake.name_len = 2;
strcpy(fake.name, "..");
fake.rec_len = ext4_rec_len_to_disk(
ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
de = &fake;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
-index 7f00dc3..51c950b 100644
+index a408e369..afac85df 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
-@@ -285,7 +285,8 @@ static unsigned dx_get_count(struct dx_entry *entries);
+@@ -290,7 +290,8 @@ static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
static unsigned dx_node_limit(struct inode *dir);
static struct dx_frame *dx_probe(struct ext4_filename *fname,
struct inode *dir,
-@@ -431,22 +432,23 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
+@@ -436,22 +437,23 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
{
struct ext4_dir_entry *dp;
struct dx_root_info *root;
if (offset)
*offset = count_offset;
-@@ -551,11 +553,12 @@ ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
+@@ -556,11 +558,12 @@ ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
*/
struct dx_root_info *dx_get_dx_info(struct ext4_dir_entry_2 *de)
{
return (struct dx_root_info *)de;
}
-@@ -600,11 +603,16 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
+@@ -605,11 +608,16 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}
if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
-@@ -722,7 +730,7 @@ static struct stats dx_show_leaf(struct inode *dir,
+@@ -729,7 +737,7 @@ static struct stats dx_show_leaf(struct inode *dir,
(unsigned) ((char *) de - base));
#endif
}
names++;
}
de = ext4_next_entry(de, size);
-@@ -872,11 +880,14 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
+@@ -885,11 +893,14 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
entries = (struct dx_entry *)(((char *)info) + info->info_length);
goto fail;
}
-@@ -1925,7 +1936,7 @@ dx_move_dirents(struct inode *dir, char *from, char *to,
+@@ -1945,7 +1956,7 @@ dx_move_dirents(struct inode *dir, char *from, char *to,
while (count--) {
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
(from + (map->offs<<2));
memcpy (to, de, rec_len);
((struct ext4_dir_entry_2 *) to)->rec_len =
-@@ -1958,7 +1969,7 @@ static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
+@@ -1978,7 +1989,7 @@ static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
while ((char*)de < base + blocksize) {
next = ext4_next_entry(de, blocksize);
if (de->inode && de->name_len) {
if (de > to)
memmove(to, de, rec_len);
to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
-@@ -2101,10 +2112,11 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
+@@ -2121,10 +2132,11 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
struct buffer_head *bh,
void *buf, int buf_size,
struct ext4_filename *fname,
int nlen, rlen;
unsigned int offset = 0;
char *top;
-@@ -2117,7 +2135,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
+@@ -2137,7 +2149,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
return -EFSCORRUPTED;
if (ext4_match(dir, fname, de))
return -EEXIST;
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if ((de->inode ? rlen - nlen : rlen) >= reclen)
break;
-@@ -2135,12 +2174,13 @@ void ext4_insert_dentry(struct inode *dir,
+@@ -2155,12 +2167,13 @@ void ext4_insert_dentry(struct inode *dir,
struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if (de->inode) {
struct ext4_dir_entry_2 *de1 =
-@@ -2161,6 +2201,12 @@ void ext4_insert_dentry(struct inode *dir,
+@@ -2181,6 +2194,12 @@ void ext4_insert_dentry(struct inode *dir,
EXT4_DIRENT_HASHES(de)->minor_hash =
cpu_to_le32(hinfo->minor_hash);
}
}
/*
-@@ -2178,14 +2224,19 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
+@@ -2198,14 +2217,19 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
{
unsigned int blocksize = dir->i_sb->s_blocksize;
int csum_size = 0;
if (err)
return err;
}
-@@ -2198,7 +2249,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
+@@ -2218,7 +2242,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
}
/* By now the buffer is marked for journaling */
/*
* XXX shouldn't update any times until successful
-@@ -2307,7 +2361,8 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
+@@ -2335,7 +2359,8 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
entries = (void *)dx_info + sizeof(*dx_info);
dx_set_block(entries, 1);
dx_set_count(entries, 1);
/* Initialize as for dx_probe */
fname->hinfo.hash_version = dx_info->hash_version;
-@@ -2353,7 +2378,106 @@ out_frames:
+@@ -2386,7 +2411,106 @@ out_frames:
return retval;
}
static int ext4_update_dotdot(handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
-@@ -2362,6 +2486,8 @@ static int ext4_update_dotdot(handle_t *
+@@ -2395,6 +2519,8 @@ static int ext4_update_dotdot(handle_t *handle, struct dentry *dentry,
struct ext4_dir_entry_2 *dot_de, *dotdot_de;
unsigned int offset;
int retval = 0;
if (IS_ERR(handle))
return PTR_ERR(handle);
-@@ -2402,6 +2528,30 @@ static int ext4_update_dotdot(handle_t *
+@@ -2435,6 +2561,30 @@ static int ext4_update_dotdot(handle_t *handle, struct dentry *dentry,
dotdot_de->inode = cpu_to_le32(inode->i_ino);
ext4_mark_inode_dirty(handle, dir);
BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
if (is_dx(dir)) {
-@@ -2445,6 +2511,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+@@ -2472,6 +2622,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
ext4_lblk_t block, blocks;
int csum_size = 0;
if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
-@@ -3016,38 +3083,73 @@ err_unlock_inode:
+@@ -3044,38 +3195,73 @@ err_unlock_inode:
return err;
}
struct buffer_head *dir_block = NULL;
struct ext4_dir_entry_2 *de;
ext4_lblk_t block = 0;
-@@ -3071,7 +3173,11 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir,
+@@ -3099,7 +3285,11 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir,
if (IS_ERR(dir_block))
return PTR_ERR(dir_block);
de = (struct ext4_dir_entry_2 *)dir_block->b_data;
set_nlink(inode, 2);
if (csum_size)
ext4_initialize_dirent_tail(dir_block, blocksize);
-@@ -3086,6 +3192,29 @@ out:
+@@ -3114,6 +3304,29 @@ out:
return err;
}
static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
-@@ -3113,7 +3242,7 @@ retry:
+@@ -3141,7 +3354,7 @@ retry:
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
goto out_clear_inode;
err = ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
-index 4be1994..a2fcbf8 100644
+index 0af91da7..9b6ad3df 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
-@@ -1672,7 +1672,7 @@ enum {
- Opt_inlinecrypt,
+@@ -1666,7 +1666,7 @@ enum {
+ Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
+- Opt_inlinecrypt,
++ Opt_inlinecrypt, Opt_dirdata,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
-- Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
-+ Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_dirdata,
- Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
- Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
-@@ -1756,6 +1756,7 @@ static const match_table_t tokens = {
+ Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
+@@ -1753,6 +1753,7 @@ static const match_table_t tokens = {
{Opt_nolazytime, "nolazytime"},
{Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
{Opt_nodelalloc, "nodelalloc"},
{Opt_removed, "mblk_io_submit"},
{Opt_removed, "nomblk_io_submit"},
{Opt_block_validity, "block_validity"},
-@@ -2000,6 +2001,7 @@ static const struct mount_opts {
+@@ -1997,6 +1998,7 @@ static const struct mount_opts {
{Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
---
fs/ext4/Makefile | 1 +
fs/ext4/ext4.h | 78 ++++++++
- fs/ext4/namei.c | 465 ++++++++++++++++++++++++++++++++++++++++++-----
+ fs/ext4/namei.c | 467 ++++++++++++++++++++++++++++++++++++++++++-----
fs/ext4/super.c | 1 +
- 4 files changed, 504 insertions(+), 41 deletions(-)
+ 4 files changed, 505 insertions(+), 42 deletions(-)
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
-index 49e7af6..f7ced03 100644
+index 49e7af6c..f7ced03e 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
mmp.o move_extent.o namei.o page-io.o readpage.o resize.o \
super.o symlink.o sysfs.o xattr.o xattr_hurd.o xattr_trusted.o \
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
-index 54734be..fa5d5d6 100644
+index 3747c1cb..321e8683 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -28,6 +28,7 @@
#include <linux/sched/signal.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
-@@ -1020,6 +1021,9 @@ struct ext4_inode_info {
+@@ -1023,6 +1024,9 @@ struct ext4_inode_info {
__u32 i_dtime;
ext4_fsblk_t i_file_acl;
/*
* i_block_group is the number of the block group which contains
* this file's inode. Constant across the lifetime of the inode,
-@@ -2509,6 +2513,72 @@ struct dx_hash_info
+@@ -2526,6 +2530,72 @@ struct dx_hash_info
*/
#define HASH_NB_ALWAYS 1
struct ext4_filename {
const struct qstr *usr_fname;
struct fscrypt_str disk_name;
-@@ -2887,12 +2957,20 @@ void ext4_insert_dentry(struct inode *dir, struct inode *inode,
+@@ -2908,12 +2978,20 @@ void ext4_insert_dentry(struct inode *dir, struct inode *inode,
void *data);
static inline void ext4_update_dx_flag(struct inode *inode)
{
static const unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
-index 51c950b..1b8c80e 100644
+index 598212ed..f98f3cd3 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -56,6 +56,7 @@ struct buffer_head *ext4_append(handle_t *handle,
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
map.m_lblk = *block;
map.m_len = 1;
-@@ -73,21 +78,27 @@ struct buffer_head *ext4_append(handle_t *handle,
- * directory.
+@@ -74,15 +79,18 @@ struct buffer_head *ext4_append(handle_t *handle,
*/
err = ext4_map_blocks(NULL, inode, &map, 0);
-- if (err < 0)
-+ if (err < 0) {
-+ up(&ei->i_append_sem);
- return ERR_PTR(err);
-+ }
+ if (err < 0)
+- return ERR_PTR(err);
++ goto err_unlock;
if (err) {
-+ up(&ei->i_append_sem);
EXT4_ERROR_INODE(inode, "Logical block already allocated");
- return ERR_PTR(-EFSCORRUPTED);
+- return ERR_PTR(-EFSCORRUPTED);
++ err = -EFSCORRUPTED;
++ goto err_unlock;
}
bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
+ }
inode->i_size += inode->i_sb->s_blocksize;
EXT4_I(inode)->i_disksize = inode->i_size;
- BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+ err = ext4_mark_inode_dirty(handle, inode);
+@@ -93,11 +101,14 @@ struct buffer_head *ext4_append(handle_t *handle,
EXT4_JTR_NONE);
+ if (err)
+ goto out;
+ up(&ei->i_append_sem);
- if (err) {
- brelse(bh);
- ext4_std_error(inode->i_sb, err);
-@@ -291,7 +302,8 @@ static unsigned dx_node_limit(struct inode *dir);
+ return bh;
+
+ out:
+ brelse(bh);
+ ext4_std_error(inode->i_sb, err);
++err_unlock:
++ up(&ei->i_append_sem);
+ return ERR_PTR(err);
+ }
+
+@@ -296,7 +307,8 @@ static unsigned dx_node_limit(struct inode *dir);
static struct dx_frame *dx_probe(struct ext4_filename *fname,
struct inode *dir,
struct dx_hash_info *hinfo,
static void dx_release(struct dx_frame *frames);
static int dx_make_map(struct inode *dir, struct buffer_head *bh,
struct dx_hash_info *hinfo,
-@@ -307,12 +319,13 @@ static void dx_insert_block(struct dx_frame *frame,
+@@ -312,12 +324,13 @@ static void dx_insert_block(struct dx_frame *frame,
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame,
struct dx_frame *frames,
/* checksumming functions */
void ext4_initialize_dirent_tail(struct buffer_head *bh,
-@@ -797,6 +810,227 @@ static inline void htree_rep_invariant_check(struct dx_entry *at,
+@@ -804,6 +817,227 @@ static inline void htree_rep_invariant_check(struct dx_entry *at,
}
#endif /* DX_DEBUG */
/*
* Probe for a directory leaf block to search.
*
-@@ -808,10 +1042,11 @@ static inline void htree_rep_invariant_check(struct dx_entry *at,
+@@ -815,10 +1049,11 @@ static inline void htree_rep_invariant_check(struct dx_entry *at,
*/
static struct dx_frame *
dx_probe(struct ext4_filename *fname, struct inode *dir,
struct dx_root_info *info;
struct dx_frame *frame = frame_in;
struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
-@@ -895,8 +1130,16 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
+@@ -908,8 +1143,16 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
level = 0;
blocks[0] = 0;
while (1) {
ext4_warning_inode(dir,
"dx entry: count %u beyond limit %u",
count, dx_get_limit(entries));
-@@ -923,6 +1166,74 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
+@@ -936,6 +1179,74 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
frame->entries = entries;
frame->at = at;
block = dx_get_block(at);
for (i = 0; i <= level; i++) {
if (blocks[i] == block) {
-@@ -932,8 +1243,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
+@@ -945,8 +1256,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
goto fail;
}
}
blocks[level] = block;
frame++;
frame->bh = ext4_read_dirblock(dir, block, INDEX);
-@@ -1004,7 +1314,7 @@ static void dx_release(struct dx_frame *frames)
+@@ -1017,7 +1327,7 @@ static void dx_release(struct dx_frame *frames)
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame,
struct dx_frame *frames,
{
struct dx_frame *p;
struct buffer_head *bh;
-@@ -1019,12 +1329,22 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
+@@ -1032,12 +1342,22 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
* this loop, num_frames indicates the number of interior
* nodes need to be read.
*/
p--;
}
-@@ -1047,6 +1367,13 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
+@@ -1060,6 +1380,13 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
* block so no check is necessary
*/
while (num_frames--) {
bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
if (IS_ERR(bh))
return PTR_ERR(bh);
-@@ -1055,6 +1382,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
+@@ -1068,6 +1395,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
p->bh = bh;
p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
}
return 1;
}
-@@ -1216,10 +1544,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+@@ -1234,10 +1562,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
hinfo.hash = start_hash;
hinfo.minor_hash = 0;
/* Add '.' and '..' from the htree header */
if (!start_hash && !start_minor_hash) {
de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
-@@ -1259,7 +1587,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+@@ -1277,7 +1605,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
count += ret;
hashval = ~0;
ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
*next_hash = hashval;
if (ret < 0) {
err = ret;
-@@ -1579,7 +1907,7 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
+@@ -1600,7 +1928,7 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
static struct buffer_head *__ext4_find_entry(struct inode *dir,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir,
{
struct super_block *sb;
struct buffer_head *bh_use[NAMEI_RA_SIZE];
-@@ -1621,7 +1949,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
+@@ -1641,7 +1969,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
goto restart;
}
if (is_dx(dir)) {
/*
* On success, or if the error was file not found,
* return. Otherwise, fall back to doing a search the
-@@ -1631,6 +1959,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
+@@ -1651,6 +1979,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
goto cleanup_and_exit;
dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
"falling back\n"));
ret = NULL;
}
nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
-@@ -1721,10 +2050,10 @@ cleanup_and_exit:
+@@ -1741,10 +2070,10 @@ cleanup_and_exit:
return ret;
}
{
int err;
struct ext4_filename fname;
-@@ -1736,12 +2065,14 @@ static struct buffer_head *ext4_find_entry(struct inode *dir,
+@@ -1756,12 +2085,14 @@ static struct buffer_head *ext4_find_entry(struct inode *dir,
if (err)
return ERR_PTR(err);
static struct buffer_head *ext4_lookup_entry(struct inode *dir,
struct dentry *dentry,
struct ext4_dir_entry_2 **res_dir)
-@@ -1757,7 +2088,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
+@@ -1777,7 +2108,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
if (err)
return ERR_PTR(err);
ext4_fname_free_filename(&fname);
return bh;
-@@ -1765,7 +2096,8 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
+@@ -1785,7 +2116,8 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
struct ext4_filename *fname,
{
struct super_block * sb = dir->i_sb;
struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
-@@ -1776,7 +2108,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
+@@ -1796,7 +2128,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
#ifdef CONFIG_FS_ENCRYPTION
*res_dir = NULL;
#endif
if (IS_ERR(frame))
return (struct buffer_head *) frame;
do {
-@@ -1798,7 +2130,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
+@@ -1818,7 +2150,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
/* Check to see if we should continue to search */
retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
if (retval < 0) {
ext4_warning_inode(dir,
"error %d reading directory index block",
-@@ -1987,8 +2319,9 @@ static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
+@@ -2007,8 +2339,9 @@ static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
* Returns pointer to de in block into which the new entry will be inserted.
*/
static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
{
unsigned blocksize = dir->i_sb->s_blocksize;
unsigned continued;
-@@ -2065,8 +2398,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+@@ -2085,8 +2418,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
hash2, split, count-split));
/* Fancy dance to stay within two buffers */
de = dx_pack_dirents(dir, data1, blocksize);
de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
(char *) de,
-@@ -2084,12 +2423,21 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+@@ -2104,12 +2443,21 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
blocksize, 1));
err = ext4_handle_dirty_dirblock(handle, dir, bh2);
if (err)
goto journal_error;
-@@ -2388,7 +2736,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
+@@ -2391,7 +2739,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
if (retval)
goto out_frames;
if (IS_ERR(de)) {
retval = PTR_ERR(de);
goto out_frames;
-@@ -2497,8 +2845,8 @@ out:
+@@ -2608,8 +2956,8 @@ out:
* may not sleep between calling this and putting something into
* the entry, as someone else might have used it while you slept.
*/
{
struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh = NULL;
-@@ -2547,9 +2895,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+@@ -2659,9 +3007,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return ext4_update_dotdot(handle, dentry, inode);
if (is_dx(dir)) {
/* Can we just ignore htree data? */
if (ext4_has_metadata_csum(sb)) {
EXT4_ERROR_INODE(dir,
-@@ -2612,12 +2961,14 @@ out:
+@@ -2724,12 +3073,14 @@ out:
ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
return retval;
}
{
struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries, *at;
-@@ -2629,7 +2980,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
+@@ -2741,7 +3092,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
again:
restart = 0;
if (IS_ERR(frame))
return PTR_ERR(frame);
entries = frame->entries;
-@@ -2664,6 +3015,12 @@ again:
+@@ -2776,6 +3127,12 @@ again:
struct dx_node *node2;
struct buffer_head *bh2;
while (frame > frames) {
if (dx_get_count((frame - 1)->entries) <
dx_get_limit((frame - 1)->entries)) {
-@@ -2767,8 +3124,32 @@ again:
+@@ -2879,8 +3236,32 @@ again:
restart = 1;
goto journal_error;
}
if (IS_ERR(de)) {
err = PTR_ERR(de);
goto cleanup;
-@@ -2779,6 +3160,8 @@ again:
+@@ -2891,6 +3272,8 @@ again:
journal_error:
ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
cleanup:
dx_release(frames);
/* @restart is true means htree-path has been changed, we need to
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
-index a2fcbf8..82ea5f6 100644
+index e9a837a6..0d69b1d3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
-@@ -1291,6 +1291,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
-
+@@ -1288,6 +1288,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
inode_set_iversion(&ei->vfs_inode, 1);
+ ei->i_flags = 0;
spin_lock_init(&ei->i_raw_lock);
+ sema_init(&ei->i_append_sem, 1);
INIT_LIST_HEAD(&ei->i_prealloc_list);
--- /dev/null
+commit d8d8fd9192a54c7b8caef8cca9b7a1eb5e5e3298
+Author: Alex Zhuravlev <alex.zhuravlev@sun.com>
+AuthorDate: Thu Oct 23 10:02:19 2008 +0000
+Subject: ext4: support for tunable preallocation window
+
+Add support for tunable preallocation window and new tunables
+for large/small requests.
+
+Bugzilla-ID: b=12800
+Signed-off-by: Alex Zhuravlev <alex.zhuravlev@sun.com>
+Reviewed-by: Kalpak Shah <kalpak@clusterfs.com>
+Reviewed-by: Andreas Dilger <andreas.dilger@sun.com>
+---
+ fs/ext4/ext4.h | 7 +-
+ fs/ext4/inode.c | 3 +
+ fs/ext4/mballoc.c | 220 +++++++++++++++++++++++++++++++++++-----------
+ fs/ext4/sysfs.c | 8 +-
+ 4 files changed, 183 insertions(+), 55 deletions(-)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 2947e215..c57686b4 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1297,6 +1297,8 @@ extern void ext4_set_bits(void *bm, int cur, int len);
+ /* Metadata checksum algorithm codes */
+ #define EXT4_CRC32C_CHKSUM 1
+
++#define EXT4_MAX_PREALLOC_TABLE 64
++
+ /*
+ * Structure of the super block
+ */
+@@ -1555,11 +1557,13 @@ struct ext4_sb_info {
+ /* tunables */
+ unsigned long s_stripe;
+ unsigned int s_mb_max_linear_groups;
+- unsigned int s_mb_stream_request;
++ unsigned long s_mb_small_req;
++ unsigned long s_mb_large_req;
+ unsigned int s_mb_max_to_scan;
+ unsigned int s_mb_min_to_scan;
+ unsigned int s_mb_stats;
+ unsigned int s_mb_order2_reqs;
++ unsigned long *s_mb_prealloc_table;
+ unsigned int s_mb_group_prealloc;
+ unsigned int s_mb_max_inode_prealloc;
+ unsigned int s_max_dir_size_kb;
+@@ -2917,6 +2921,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
+ int len, int replay);
+
+ /* mballoc.c */
++extern const struct proc_ops ext4_seq_prealloc_table_fops;
+ extern const struct seq_operations ext4_mb_seq_groups_ops;
+ extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
+ extern long ext4_mb_stats;
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index e84b7181..f309de76 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2757,6 +2757,9 @@ static int ext4_writepages(struct address_space *mapping,
+ PAGE_SIZE >> inode->i_blkbits);
+ }
+
++ if (wbc->nr_to_write < sbi->s_mb_small_req)
++ wbc->nr_to_write = sbi->s_mb_small_req;
++
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index b014aa93..e1074fdb 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3074,6 +3074,99 @@ const struct seq_operations ext4_mb_seq_structs_summary_ops = {
+ .show = ext4_mb_seq_structs_summary_show,
+ };
+
++static int ext4_mb_check_and_update_prealloc(struct ext4_sb_info *sbi,
++ char *str, size_t cnt,
++ int update)
++{
++ unsigned long value;
++ unsigned long prev = 0;
++ char *cur;
++ char *next;
++ char *end;
++ int num = 0;
++
++ cur = str;
++ end = str + cnt;
++ while (cur < end) {
++ while ((cur < end) && (*cur == ' ')) cur++;
++ value = simple_strtol(cur, &next, 0);
++ if (value == 0)
++ break;
++ if (cur == next)
++ return -EINVAL;
++
++ cur = next;
++
++ if (value > (sbi->s_blocks_per_group - 1 - 1 - sbi->s_itb_per_group))
++ return -EINVAL;
++
++ /* they should add values in order */
++ if (value <= prev)
++ return -EINVAL;
++
++ if (update)
++ sbi->s_mb_prealloc_table[num] = value;
++
++ prev = value;
++ num++;
++ }
++
++ if (num > EXT4_MAX_PREALLOC_TABLE - 1)
++ return -EOVERFLOW;
++
++ if (update)
++ sbi->s_mb_prealloc_table[num] = 0;
++
++ return 0;
++}
++
++static ssize_t ext4_mb_prealloc_table_proc_write(struct file *file,
++ const char __user *buf,
++ size_t cnt, loff_t *pos)
++{
++ struct ext4_sb_info *sbi = EXT4_SB(PDE_DATA(file_inode(file)));
++ char str[128];
++ int rc;
++
++ if (cnt >= sizeof(str))
++ return -EINVAL;
++ if (copy_from_user(str, buf, cnt))
++ return -EFAULT;
++
++ rc = ext4_mb_check_and_update_prealloc(sbi, str, cnt, 0);
++ if (rc)
++ return rc;
++
++ rc = ext4_mb_check_and_update_prealloc(sbi, str, cnt, 1);
++ return rc ? rc : cnt;
++}
++
++static int mb_prealloc_table_seq_show(struct seq_file *m, void *v)
++{
++ struct ext4_sb_info *sbi = EXT4_SB(m->private);
++ int i;
++
++ for (i = 0; i < EXT4_MAX_PREALLOC_TABLE &&
++ sbi->s_mb_prealloc_table[i] != 0; i++)
++ seq_printf(m, "%ld ", sbi->s_mb_prealloc_table[i]);
++ seq_printf(m, "\n");
++
++ return 0;
++}
++
++static int mb_prealloc_table_seq_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, mb_prealloc_table_seq_show, PDE_DATA(inode));
++}
++
++const struct proc_ops ext4_seq_prealloc_table_fops = {
++ .proc_open = mb_prealloc_table_seq_open,
++ .proc_read = seq_read,
++ .proc_lseek = seq_lseek,
++ .proc_release = single_release,
++ .proc_write = ext4_mb_prealloc_table_proc_write,
++};
++
+ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
+ {
+ int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+@@ -3335,7 +3428,7 @@ static int ext4_groupinfo_create_slab(size_t size)
+ int ext4_mb_init(struct super_block *sb)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+- unsigned i, j;
++ unsigned i, j, k, l;
+ unsigned offset, offset_incr;
+ unsigned max;
+ int ret;
+@@ -3404,7 +3497,6 @@ int ext4_mb_init(struct super_block *sb)
+ sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
+ sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
+ sbi->s_mb_stats = MB_DEFAULT_STATS;
+- sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
+ sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
+ sbi->s_mb_max_inode_prealloc = MB_DEFAULT_MAX_INODE_PREALLOC;
+ /*
+@@ -3429,9 +3521,29 @@ int ext4_mb_init(struct super_block *sb)
+ * RAID stripe size so that preallocations don't fragment
+ * the stripes.
+ */
+- if (sbi->s_stripe > 1) {
+- sbi->s_mb_group_prealloc = roundup(
+- sbi->s_mb_group_prealloc, sbi->s_stripe);
++
++ /* Allocate table once */
++ sbi->s_mb_prealloc_table = kzalloc(
++ EXT4_MAX_PREALLOC_TABLE * sizeof(unsigned long), GFP_NOFS);
++ if (sbi->s_mb_prealloc_table == NULL) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ if (sbi->s_stripe == 0) {
++ for (k = 0, l = 4; k <= 9; ++k, l *= 2)
++ sbi->s_mb_prealloc_table[k] = l;
++
++ sbi->s_mb_small_req = 256;
++ sbi->s_mb_large_req = 1024;
++ sbi->s_mb_group_prealloc = 512;
++ } else {
++ for (k = 0, l = sbi->s_stripe; k <= 2; ++k, l *= 2)
++ sbi->s_mb_prealloc_table[k] = l;
++
++ sbi->s_mb_small_req = sbi->s_stripe;
++ sbi->s_mb_large_req = sbi->s_stripe * 8;
++ sbi->s_mb_group_prealloc = sbi->s_stripe * 4;
+ }
+
+ sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
+@@ -3465,6 +3577,7 @@ out_free_locality_groups:
+ out:
+ kfree(sbi->s_mb_largest_free_orders);
+ kfree(sbi->s_mb_largest_free_orders_locks);
++ kfree(sbi->s_mb_prealloc_table);
+ kfree(sbi->s_mb_offsets);
+ sbi->s_mb_offsets = NULL;
+ kfree(sbi->s_mb_maxs);
+@@ -3523,6 +3636,7 @@ int ext4_mb_release(struct super_block *sb)
+ }
+ kfree(sbi->s_mb_largest_free_orders);
+ kfree(sbi->s_mb_largest_free_orders_locks);
++ kfree(sbi->s_mb_prealloc_table);
+ kfree(sbi->s_mb_offsets);
+ kfree(sbi->s_mb_maxs);
+ iput(sbi->s_buddy_cache);
+@@ -3737,7 +3851,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ int err, len;
+
+ BUG_ON(ac->ac_status != AC_STATUS_FOUND);
+- BUG_ON(ac->ac_b_ex.fe_len <= 0);
+
+ sb = ac->ac_sb;
+ sbi = EXT4_SB(sb);
+@@ -3973,13 +4086,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+ struct ext4_super_block *es = sbi->s_es;
+- int bsbits, max;
++ int bsbits, i, wind;
+ ext4_lblk_t end;
+- loff_t size, start_off;
++ loff_t size;
+ loff_t orig_size __maybe_unused;
+ ext4_lblk_t start;
+ struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+ struct ext4_prealloc_space *pa;
++ unsigned long value, last_non_zero;
+
+ /* do normalize only data requests, metadata requests
+ do not need preallocation */
+@@ -4008,51 +4122,46 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ size = size << bsbits;
+ if (size < i_size_read(ac->ac_inode))
+ size = i_size_read(ac->ac_inode);
+- orig_size = size;
++ size = (size + ac->ac_sb->s_blocksize - 1) >> bsbits;
++
++ start = wind = 0;
++ value = last_non_zero = 0;
++
++ /* let's choose preallocation window depending on file size */
++ for (i = 0; i < EXT4_MAX_PREALLOC_TABLE; i++) {
++ value = sbi->s_mb_prealloc_table[i];
++ if (value == 0)
++ break;
++ else
++ last_non_zero = value;
+
+- /* max size of free chunks */
+- max = 2 << bsbits;
+-
+-#define NRL_CHECK_SIZE(req, size, max, chunk_size) \
+- (req <= (size) || max <= (chunk_size))
+-
+- /* first, try to predict filesize */
+- /* XXX: should this table be tunable? */
+- start_off = 0;
+- if (size <= 16 * 1024) {
+- size = 16 * 1024;
+- } else if (size <= 32 * 1024) {
+- size = 32 * 1024;
+- } else if (size <= 64 * 1024) {
+- size = 64 * 1024;
+- } else if (size <= 128 * 1024) {
+- size = 128 * 1024;
+- } else if (size <= 256 * 1024) {
+- size = 256 * 1024;
+- } else if (size <= 512 * 1024) {
+- size = 512 * 1024;
+- } else if (size <= 1024 * 1024) {
+- size = 1024 * 1024;
+- } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
+- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+- (21 - bsbits)) << 21;
+- size = 2 * 1024 * 1024;
+- } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
+- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+- (22 - bsbits)) << 22;
+- size = 4 * 1024 * 1024;
+- } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
+- (8<<20)>>bsbits, max, 8 * 1024)) {
+- start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
+- (23 - bsbits)) << 23;
+- size = 8 * 1024 * 1024;
++ if (size <= value) {
++ wind = value;
++ break;
++ }
++ }
++
++ if (wind == 0) {
++ if (last_non_zero != 0) {
++ __u64 tstart, tend;
++ /* file is quite large, we now preallocate with
++ * the biggest configured window with regart to
++ * logical offset */
++ wind = last_non_zero;
++ tstart = ac->ac_o_ex.fe_logical;
++ do_div(tstart, wind);
++ start = tstart * wind;
++ tend = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len - 1;
++ do_div(tend, wind);
++ tend = tend * wind + wind;
++ size = tend - start;
++ }
+ } else {
+- start_off = (loff_t) ac->ac_o_ex.fe_logical << bsbits;
+- size = (loff_t) EXT4_C2B(EXT4_SB(ac->ac_sb),
+- ac->ac_o_ex.fe_len) << bsbits;
++ size = wind;
+ }
+- size = size >> bsbits;
+- start = start_off >> bsbits;
++
++
++ orig_size = size;
+
+ /*
+ * For tiny groups (smaller than 8MB) the chosen allocation
+@@ -4143,7 +4252,6 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+ (unsigned long) ac->ac_o_ex.fe_logical);
+ BUG();
+ }
+- BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+
+ /* now prepare goal request */
+
+@@ -5149,11 +5257,19 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
+
+ /* don't use group allocation for large files */
+ size = max(size, isize);
+- if (size > sbi->s_mb_stream_request) {
++ if ((ac->ac_o_ex.fe_len >= sbi->s_mb_small_req) ||
++ (size >= sbi->s_mb_large_req)) {
+ ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+ return;
+ }
+
++ /*
++ * request is so large that we don't care about
++ * streaming - it overweights any possible seek
++ */
++ if (ac->ac_o_ex.fe_len >= sbi->s_mb_large_req)
++ return;
++
+ BUG_ON(ac->ac_lg != NULL);
+ /*
+ * locality group prealloc space are per cpu. The reason for having
+diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
+index 2314f744..5fc52fc1 100644
+--- a/fs/ext4/sysfs.c
++++ b/fs/ext4/sysfs.c
+@@ -212,7 +212,8 @@ EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
+ EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
+ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
+-EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
++EXT4_RW_ATTR_SBI_UI(mb_small_req, s_mb_small_req);
++EXT4_RW_ATTR_SBI_UI(mb_large_req, s_mb_large_req);
+ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+ EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc);
+ EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
+@@ -261,7 +262,8 @@ static struct attribute *ext4_attrs[] = {
+ ATTR_LIST(mb_max_to_scan),
+ ATTR_LIST(mb_min_to_scan),
+ ATTR_LIST(mb_order2_req),
+- ATTR_LIST(mb_stream_req),
++ ATTR_LIST(mb_small_req),
++ ATTR_LIST(mb_large_req),
+ ATTR_LIST(mb_group_prealloc),
+ ATTR_LIST(mb_max_inode_prealloc),
+ ATTR_LIST(mb_max_linear_groups),
+@@ -541,6 +543,8 @@ int ext4_register_sysfs(struct super_block *sb)
+ ext4_fc_info_show, sb);
+ proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
+ &ext4_mb_seq_groups_ops, sb);
++ proc_create_data("prealloc_table", S_IRUGO, sbi->s_proc,
++ &ext4_seq_prealloc_table_fops, sb);
+ proc_create_single_data("mb_stats", 0444, sbi->s_proc,
+ ext4_seq_mb_stats_show, sb);
+ proc_create_seq_data("mb_structs_summary", 0444, sbi->s_proc,
+--
+2.34.1
+
rhel8/ext4-inode-version.patch
linux-5.4/ext4-lookup-dotdot.patch
linux-5.14/ext4-print-inum-in-htree-warning.patch
-linux-5.14/ext4-prealloc.patch
+sles15sp4/ext4-prealloc.patch
ubuntu18/ext4-osd-iop-common.patch
sles15sp4/ext4-misc.patch
linux-5.14/ext4-mballoc-extra-checks.patch
ubuntu19/ext4-iget-with-flags.patch
linux-5.14/export-ext4fs-dirhash-helper.patch
linux-5.8/ext4-no-max-dir-size-limit-for-iam-objects.patch
+rhel9/ext4-dquot-commit-speedup.patch
linux-5.14/ext4-ialloc-uid-gid-and-pass-owner-down.patch
linux-5.14/ext4-projid-xattrs.patch
sles15sp4/ext4-delayed-iput.patch
linux-5.14/ext4-xattr-disable-credits-check.patch
linux-5.10/ext4-fiemap-kernel-data.patch
rhel8/ext4-old_ea_inodes_handling_fix.patch
-linux-5.14/ext4-enc-flag.patch
+ubuntu20.04.5/ext4-filename-encode.patch
+rhel9.1/ext4-enc-flag.patch
+rhel9.2/ext4-encdata.patch
rhel9/ext4-add-periodic-superblock-update.patch
rhel9.4/ext4-add-IGET_NO_CHECKS-flag.patch
rhel8/ext4-inode-version.patch
linux-5.4/ext4-lookup-dotdot.patch
linux-5.14/ext4-print-inum-in-htree-warning.patch
-linux-5.14/ext4-prealloc.patch
+sles15sp4/ext4-prealloc.patch
ubuntu18/ext4-osd-iop-common.patch
sles15sp4/ext4-misc.patch
linux-5.14/ext4-mballoc-extra-checks.patch
ubuntu19/ext4-iget-with-flags.patch
linux-5.14/export-ext4fs-dirhash-helper.patch
linux-5.8/ext4-no-max-dir-size-limit-for-iam-objects.patch
+rhel9/ext4-dquot-commit-speedup.patch
linux-5.14/ext4-ialloc-uid-gid-and-pass-owner-down.patch
linux-5.14/ext4-projid-xattrs.patch
sles15sp4/ext4-delayed-iput.patch
linux-5.14/ext4-xattr-disable-credits-check.patch
linux-5.10/ext4-fiemap-kernel-data.patch
rhel8/ext4-old_ea_inodes_handling_fix.patch
-linux-5.14/ext4-enc-flag.patch
+ubuntu20.04.5/ext4-filename-encode.patch
+rhel9.1/ext4-enc-flag.patch
+rhel9.2/ext4-encdata.patch
rhel9/ext4-add-periodic-superblock-update.patch
rhel9.4/ext4-add-IGET_NO_CHECKS-flag.patch