esac
], [test x$SUSE_KERNEL = xyes], [
AS_VERSION_COMPARE([$LINUXRELEASE],[5.3.18],[
- AS_VERSION_COMPARE([$LINUXRELEASE],[4.12.14],[
- AS_VERSION_COMPARE([$LINUXRELEASE],[4.4.82],[
- AS_VERSION_COMPARE([$LINUXRELEASE],[4.4.0],[
- AS_VERSION_COMPARE([$LINUXRELEASE],[3.12.0],[],
- [LDISKFS_SERIES="3.12-sles12.series"],[
- PLEV=$(grep PATCHLEVEL /etc/SuSE-release | sed -e 's/.*= *//')
- case $PLEV in # (
- 1) LDISKFS_SERIES="3.12-sles12sp1.series"
- ;; # (
- *) LDISKFS_SERIES="3.12-sles12.series"
- ;;
- esac
- ])],[LDISKFS_SERIES="4.4-sles12sp2.series"],
- [LDISKFS_SERIES="4.4-sles12sp2.series"]
- )], [LDISKFS_SERIES="4.4-sles12sp3.series"],
- [LDISKFS_SERIES="4.4-sles12sp3.series"]
- )], [], [
+ AS_VERSION_COMPARE([$LINUXRELEASE],[4.12.14],[], [], [
suse_conf=$LINUX_OBJ/include/generated/uapi/linux/suse_version.h
suse_vers=$(awk '[$]2 == "SUSE_VERSION" {print [$]3 }' $suse_conf)
suse_patchlevel=$(awk '[$]2 == "SUSE_PATCHLEVEL" {print [$]3 }' $suse_conf)
+++ /dev/null
-Index: linux-stage/fs/ext4/inode.c
-===================================================================
---- linux-stage.orig/fs/ext4/inode.c
-+++ linux-stage/fs/ext4/inode.c
-@@ -734,6 +734,9 @@ out_sem:
- !(flags & EXT4_GET_BLOCKS_ZERO) &&
- !IS_NOQUOTA(inode) &&
- ext4_should_order_data(inode)) {
-+ ret = ext4_inode_attach_jinode(inode);
-+ if (ret)
-+ return ret;
- ret = ext4_jbd2_file_inode(handle, inode);
- if (ret)
- return ret;
-@@ -2755,6 +2758,9 @@ static int ext4_writepages(struct addres
- mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
- }
-
-+ ret = ext4_inode_attach_jinode(inode);
-+ if (ret)
-+ goto out_writepages;
- mpd.inode = inode;
- mpd.wbc = wbc;
- ext4_io_submit_init(&mpd.io_submit, wbc);
-@@ -4116,6 +4122,7 @@ int ext4_inode_attach_jinode(struct inod
- jbd2_free_inode(jinode);
- return 0;
- }
-+EXPORT_SYMBOL(ext4_inode_attach_jinode);
-
- /*
- * ext4_truncate()
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -2632,6 +2632,7 @@ extern int ext4_group_add_blocks(handle_
- extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
-
- /* inode.c */
-+#define HAVE_LDISKFS_INFO_JINODE
- int ext4_inode_is_fast_symlink(struct inode *inode);
- struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
- struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
+++ /dev/null
-Since we could skip corrupt block groups, this patch
-use ext4_warning() intead of ext4_error() to make FS not
-emount RO in default, also fix a leftover from upstream
-commit 163a203ddb36c36d4a1c942
----
-diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
-index e069155..692b5e4 100644
---- a/fs/ext4/balloc.c
-+++ b/fs/ext4/balloc.c
-@@ -185,25 +185,17 @@ static int ext4_init_block_bitmap(struct super_block *sb,
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_fsblk_t start, tmp;
- int flex_bg = 0;
-- struct ext4_group_info *grp;
-
- J_ASSERT_BH(bh, buffer_locked(bh));
-
- /* If checksum is bad mark all blocks used to prevent allocation
- * essentially implementing a per-group read-only flag. */
- if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-- grp = ext4_get_group_info(sb, block_group);
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-- if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-- int count;
-- count = ext4_free_inodes_count(sb, gdp);
-- percpu_counter_sub(&sbi->s_freeinodes_counter,
-- count);
-- }
-- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT |
-+ EXT4_GROUP_INFO_IBITMAP_CORRUPT,
-+ "Checksum bad for group %u",
-+ block_group);
- return -EFSBADCRC;
- }
- memset(bh->b_data, 0, sb->s_blocksize);
-@@ -367,7 +359,6 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
- {
- ext4_fsblk_t blk;
- struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
-- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- if (buffer_verified(bh))
- return 0;
-@@ -377,22 +367,19 @@ static void ext4_validate_block_bitmap(struct super_block *sb,
- if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
- desc, bh))) {
- ext4_unlock_group(sb, block_group);
-- ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT,
-+ "bg %u: bad block bitmap checksum",
-+ block_group);
- return -EFSBADCRC;
- }
- blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
- if (unlikely(blk != 0)) {
- ext4_unlock_group(sb, block_group);
-- ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
-- block_group, blk);
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT,
-+ "bg %u: block %llu: invalid block bitmap",
-+ block_group, blk);
- return -EFSCORRUPTED;
- }
- set_buffer_verified(bh);
-@@ -445,8 +432,6 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
- ext4_unlock_group(sb, block_group);
- unlock_buffer(bh);
- if (err) {
-- ext4_error(sb, "Failed to init block bitmap for group "
-- "%u: %d", block_group, err);
- goto out;
- }
- goto verify;
-diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
-index 3c41773..63a63b6 100644
---- a/fs/ext4/ext4.h
-+++ b/fs/ext4/ext4.h
-@@ -91,6 +91,17 @@ typedef __u32 ext4_lblk_t;
- /* data type for block group number */
- typedef unsigned int ext4_group_t;
-
-+void __ext4_corrupted_block_group(struct super_block *sb,
-+ ext4_group_t group, unsigned int flags,
-+ const char *function, unsigned int line);
-+
-+#define ext4_corrupted_block_group(sb, group, flags, fmt, ...) \
-+ do { \
-+ __ext4_warning(sb, __func__, __LINE__, fmt, \
-+ ##__VA_ARGS__); \
-+ __ext4_corrupted_block_group(sb, group, flags, \
-+ __func__, __LINE__); \
-+ } while (0)
- /*
- * Flags used in mballoc's allocation_context flags field.
- *
-@@ -2673,7 +2684,11 @@ struct ext4_group_info {
- #define EXT4_GROUP_INFO_NEED_INIT_BIT 0
- #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
- #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2
-+#define EXT4_GROUP_INFO_BBITMAP_CORRUPT \
-+ (1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
- #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3
-+#define EXT4_GROUP_INFO_IBITMAP_CORRUPT \
-+ (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
-
- #define EXT4_MB_GRP_NEED_INIT(grp) \
- (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
-index fc65310..92bcc8d 100644
---- a/fs/ext4/ialloc.c
-+++ b/fs/ext4/ialloc.c
-@@ -70,25 +70,15 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp)
- {
-- struct ext4_group_info *grp;
-- struct ext4_sb_info *sbi = EXT4_SB(sb);
- J_ASSERT_BH(bh, buffer_locked(bh));
-
- /* If checksum is bad mark all blocks and inodes use to prevent
- * allocation, essentially implementing a per-group read-only flag. */
- if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-- grp = ext4_get_group_info(sb, block_group);
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-- if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-- int count;
-- count = ext4_free_inodes_count(sb, gdp);
-- percpu_counter_sub(&sbi->s_freeinodes_counter,
-- count);
-- }
-- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT |
-+ EXT4_GROUP_INFO_IBITMAP_CORRUPT,
-+ "Checksum bad for group %u", block_group);
- return -EFSBADCRC;
- }
-
-@@ -193,8 +180,6 @@ verify:
- ext4_unlock_group(sb, block_group);
- unlock_buffer(bh);
- if (err) {
-- ext4_error(sb, "Failed to init inode bitmap for group "
-- "%u: %d", block_group, err);
- goto out;
- }
- return bh;
-@@ -337,14 +318,9 @@ out:
- if (!fatal)
- fatal = err;
- } else {
-- ext4_error(sb, "bit already cleared for inode %lu", ino);
-- if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-- int count;
-- count = ext4_free_inodes_count(sb, gdp);
-- percpu_counter_sub(&sbi->s_freeinodes_counter,
-- count);
-- }
-- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_IBITMAP_CORRUPT,
-+ "bit already cleared for inode %lu", ino);
- }
-
- error_return:
-diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
-index 7282d07..e6805e6 100644
---- a/fs/ext4/mballoc.c
-+++ b/fs/ext4/mballoc.c
-@@ -752,10 +752,18 @@ int ext4_mb_generate_buddy(struct super_block *sb,
- if (free != grp->bb_free) {
- struct ext4_group_desc *gdp;
- gdp = ext4_get_group_desc(sb, group, NULL);
-- ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
-- "%u in gd, %lu pa's\n", (long unsigned int)group,
-- free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
-- grp->bb_prealloc_nr);
-+
-+ ext4_corrupted_block_group(sb, group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT,
-+ "group %lu: %u blocks in bitmap, %u in bb, %u in gd, %lu pa's block bitmap corrupt",
-+ (unsigned long int)group, free, grp->bb_free,
-+ ext4_free_group_clusters(sb, gdp),
-+ grp->bb_prealloc_nr);
-+ /*
-+ * If we intend to continue, we consider group descriptor
-+ * corrupt and update bb_free using bitmap value
-+ */
-+ grp->bb_free = free;
- return -EIO;
- }
- mb_set_largest_free_order(sb, grp);
-@@ -1101,7 +1109,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- int block;
- int pnum;
- int poff;
-- struct page *page;
-+ struct page *page = NULL;
- int ret;
- struct ext4_group_info *grp;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-@@ -1127,7 +1135,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- */
- ret = ext4_mb_init_group(sb, group);
- if (ret)
-- return ret;
-+ goto err;
- }
-
- /*
-@@ -1227,6 +1235,7 @@ err:
- page_cache_release(e4b->bd_buddy_page);
- e4b->bd_buddy = NULL;
- e4b->bd_bitmap = NULL;
-+ ext4_warning(sb, "Error loading buddy information for %u", group);
- return ret;
- }
-
-@@ -3599,9 +3608,11 @@ int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
- }
-
- if (free != free_in_gdp) {
-- ext4_error(sb, "on-disk bitmap for group %d"
-- "corrupted: %u blocks free in bitmap, %u - in gd\n",
-- group, free, free_in_gdp);
-+ ext4_corrupted_block_group(sb, group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT,
-+ "on-disk bitmap for group %d corrupted: %u blocks free in bitmap, %u - in gd\n",
-+ group, free,
-+ free_in_gdp);
- return -EIO;
- }
- return 0;
-@@ -3962,16 +3973,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
- /* "free < pa->pa_free" means we maybe double alloc the same blocks,
- * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
- if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
-- ext4_error(sb, "pa free mismatch: [pa %p] "
-- "[phy %lu] [logic %lu] [len %u] [free %u] "
-- "[error %u] [inode %lu] [freed %u]", pa,
-- (unsigned long)pa->pa_pstart,
-- (unsigned long)pa->pa_lstart,
-- (unsigned)pa->pa_len, (unsigned)pa->pa_free,
-- (unsigned)pa->pa_error, pa->pa_inode->i_ino,
-- free);
- ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
-- free, pa->pa_free);
-+ free, pa->pa_free);
- /*
- * pa is already deleted so we use the value obtained
- * from the bitmap and continue.
-@@ -4031,15 +4034,11 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
-- ext4_error(sb, "Error %d reading block bitmap for %u",
-- err, group);
- return 0;
- }
-
- err = ext4_mb_load_buddy(sb, group, &e4b);
- if (err) {
-- ext4_warning(sb, "Error %d loading buddy information for %u",
-- err, group);
- put_bh(bitmap_bh);
- return 0;
- }
-@@ -4198,17 +4198,12 @@ repeat:
-
- err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
- GFP_NOFS|__GFP_NOFAIL);
-- if (err) {
-- ext4_error(sb, "Error %d loading buddy information for %u",
-- err, group);
-+ if (err)
- return;
-- }
-
- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
-- ext4_error(sb, "Error %d reading block bitmap for %u",
-- err, group);
- ext4_mb_unload_buddy(&e4b);
- continue;
- }
-@@ -4467,11 +4462,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
-
- err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
- GFP_NOFS|__GFP_NOFAIL);
-- if (err) {
-- ext4_error(sb, "Error %d loading buddy information for %u",
-- err, group);
-+ if (err)
- continue;
-- }
- ext4_lock_group(sb, group);
- list_del(&pa->pa_group_list);
- ext4_get_group_info(sb, group)->bb_prealloc_nr--;
-@@ -4742,17 +4734,18 @@ errout:
- * been updated or not when fail case. So can
- * not revert pa_free back, just mark pa_error*/
- pa->pa_error++;
-- ext4_error(sb,
-- "Updating bitmap error: [err %d] "
-- "[pa %p] [phy %lu] [logic %lu] "
-- "[len %u] [free %u] [error %u] "
-- "[inode %lu]", *errp, pa,
-- (unsigned long)pa->pa_pstart,
-- (unsigned long)pa->pa_lstart,
-- (unsigned)pa->pa_len,
-- (unsigned)pa->pa_free,
-- (unsigned)pa->pa_error,
-- pa->pa_inode ? pa->pa_inode->i_ino : 0);
-+ ext4_corrupted_block_group(sb, 0, 0,
-+ "Updating bitmap error: [err %d] "
-+ "[pa %p] [phy %lu] [logic %lu] "
-+ "[len %u] [free %u] [error %u] "
-+ "[inode %lu]", *errp, pa,
-+ (unsigned long)pa->pa_pstart,
-+ (unsigned long)pa->pa_lstart,
-+ (unsigned)pa->pa_len,
-+ (unsigned)pa->pa_free,
-+ (unsigned)pa->pa_error,
-+ pa->pa_inode ?
-+ pa->pa_inode->i_ino : 0);
- }
- }
- ext4_mb_release_context(ac);
-@@ -5037,7 +5030,7 @@ do_more:
-
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
- if (err)
-- goto error_return;
-+ goto error_brelse;
-
- if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
- struct ext4_free_data *new_entry;
-@@ -5119,8 +5112,9 @@ do_more:
- goto do_more;
- }
- error_return:
-- brelse(bitmap_bh);
- ext4_std_error(sb, err);
-+error_brelse:
-+ brelse(bitmap_bh);
- return;
- }
-
-@@ -5216,7 +5210,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
-
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
- if (err)
-- goto error_return;
-+ goto error_brelse;
-
- /*
- * need to update group_info->bb_free and bitmap
-@@ -5253,8 +5247,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
- err = ret;
-
- error_return:
-- brelse(bitmap_bh);
- ext4_std_error(sb, err);
-+error_brelse:
-+ brelse(bitmap_bh);
- return err;
- }
-
-@@ -5329,11 +5324,9 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
- trace_ext4_trim_all_free(sb, group, start, max);
-
- ret = ext4_mb_load_buddy(sb, group, &e4b);
-- if (ret) {
-- ext4_warning(sb, "Error %d loading buddy information for %u",
-- ret, group);
-+ if (ret)
- return ret;
-- }
-+
- bitmap = e4b.bd_bitmap;
-
- ext4_lock_group(sb, group);
-diff --git a/fs/ext4/super.c b/fs/ext4/super.c
-index c625960..0de22f2 100644
---- a/fs/ext4/super.c
-+++ b/fs/ext4/super.c
-@@ -633,6 +633,37 @@ void __ext4_warning(struct super_block *sb, const char *function,
- va_end(args);
- }
-
-+void __ext4_corrupted_block_group(struct super_block *sb, ext4_group_t group,
-+ unsigned int flags, const char *function,
-+ unsigned int line)
-+{
-+ struct ext4_sb_info *sbi = EXT4_SB(sb);
-+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-+
-+ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT &&
-+ !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
-+ percpu_counter_sub(&sbi->s_freeclusters_counter,
-+ grp->bb_free);
-+ set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
-+ &grp->bb_state);
-+ }
-+
-+ if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT &&
-+ !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-+ if (gdp) {
-+ int count;
-+
-+ count = ext4_free_inodes_count(sb, gdp);
-+ percpu_counter_sub(&sbi->s_freeinodes_counter,
-+ count);
-+ }
-+ set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
-+ &grp->bb_state);
-+ }
-+ save_error_info(sb, function, line);
-+}
-+
- void __ext4_grp_locked_error(const char *function, unsigned int line,
- struct super_block *sb, ext4_group_t grp,
- unsigned long ino, ext4_fsblk_t block,
+++ /dev/null
-this patch implements feature which allows ext4 fs users (e.g. Lustre)
-to store data in ext4 dirent.
-data is stored in ext4 dirent after file-name, this space is accounted
-in de->rec_len. flag EXT4_DIRENT_LUFID added to d_type if extra data
-is present.
-
-make use of dentry->d_fsdata to pass fid to ext4. so no
-changes in ext4_add_entry() interface required.
-
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/dir.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/dir.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/dir.c
-@@ -71,11 +71,11 @@ int __ext4_check_dir_entry(const char *f
- const int rlen = ext4_rec_len_from_disk(de->rec_len,
- dir->i_sb->s_blocksize);
-
-- if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
-+ if (unlikely(rlen < __EXT4_DIR_REC_LEN(1)))
- error_msg = "rec_len is smaller than minimal";
- else if (unlikely(rlen % 4 != 0))
- error_msg = "rec_len % 4 != 0";
-- else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
-+ else if (unlikely(rlen < EXT4_DIR_REC_LEN(de)))
- error_msg = "rec_len is too small for name_len";
- else if (unlikely(((char *) de - buf) + rlen > size))
- error_msg = "directory entry across range";
-@@ -208,7 +208,7 @@ revalidate:
- * failure will be detected in the
- * dirent test below. */
- if (ext4_rec_len_from_disk(de->rec_len,
-- sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
-+ sb->s_blocksize) < __EXT4_DIR_REC_LEN(1))
- break;
- i += ext4_rec_len_from_disk(de->rec_len,
- sb->s_blocksize);
-@@ -438,12 +438,17 @@ int ext4_htree_store_dirent(struct file
- struct fname *fname, *new_fn;
- struct dir_private_info *info;
- int len;
-+ int extra_data = 0;
-
- info = dir_file->private_data;
- p = &info->root.rb_node;
-
- /* Create and allocate the fname structure */
-- len = sizeof(struct fname) + ent_name->len + 1;
-+ if (dirent->file_type & EXT4_DIRENT_LUFID)
-+ extra_data = ext4_get_dirent_data_len(dirent);
-+
-+ len = sizeof(struct fname) + ent_name->len + extra_data + 1;
-+
- new_fn = kzalloc(len, GFP_KERNEL);
- if (!new_fn)
- return -ENOMEM;
-@@ -452,7 +457,7 @@ int ext4_htree_store_dirent(struct file
- new_fn->inode = le32_to_cpu(dirent->inode);
- new_fn->name_len = dirent->name_len;
- new_fn->file_type = dirent->file_type;
-- memcpy(new_fn->name, ent_name->name, ent_name->len);
-+ memcpy(new_fn->name, ent_name->name, ent_name->len + extra_data);
- new_fn->name[ent_name->len] = 0;
-
- while (*p) {
-@@ -652,7 +457,7 @@ int ext4_htree_store_dirent(struct file
- if (ldiskfs_check_dir_entry(dir, NULL, de, bh,
- buf, buf_size, offset))
- return -EFSCORRUPTED;
-- nlen = EXT4_DIR_REC_LEN(de->name_len);
-+ nlen = EXT4_DIR_REC_LEN(de);
- rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
- de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
- offset += rlen;
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-@@ -952,6 +952,7 @@ struct ext4_inode_info {
- #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
- #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
- #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-+#define EXT4_MOUNT_DIRDATA 0x40000 /* Data in directory entries*/
- #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
- #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
- #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
-@@ -1534,6 +1535,7 @@ static inline void ext4_clear_state_flag
- EXT4_FEATURE_INCOMPAT_64BIT| \
- EXT4_FEATURE_INCOMPAT_FLEX_BG| \
- EXT4_FEATURE_INCOMPAT_MMP | \
-+ EXT4_FEATURE_INCOMPAT_DIRDATA| \
- EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
- EXT4_FEATURE_INCOMPAT_ENCRYPT | \
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
-@@ -1640,6 +1642,43 @@ struct ext4_dir_entry_tail {
- #define EXT4_FT_SYMLINK 7
-
- #define EXT4_FT_MAX 8
-+#define EXT4_FT_MASK 0xf
-+
-+#if EXT4_FT_MAX > EXT4_FT_MASK
-+#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
-+#endif
-+
-+/*
-+ * d_type has 4 unused bits, so it can hold four types data. these different
-+ * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
-+ * stored, in flag order, after file-name in ext4 dirent.
-+*/
-+/*
-+ * this flag is added to d_type if ext4 dirent has extra data after
-+ * filename. this data length is variable and length is stored in first byte
-+ * of data. data start after filename NUL byte.
-+ * This is used by Lustre FS.
-+ */
-+#define EXT4_DIRENT_LUFID 0x10
-+
-+#define EXT4_LUFID_MAGIC 0xAD200907UL
-+struct ext4_dentry_param {
-+ __u32 edp_magic; /* EXT4_LUFID_MAGIC */
-+ char edp_len; /* size of edp_data in bytes */
-+ char edp_data[0]; /* packed array of data */
-+} __packed;
-+
-+static inline unsigned char *ext4_dentry_get_data(struct super_block *sb,
-+ struct ext4_dentry_param *p)
-+
-+{
-+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
-+ return NULL;
-+ if (p && p->edp_magic == EXT4_LUFID_MAGIC)
-+ return &p->edp_len;
-+ else
-+ return NULL;
-+}
-
- #define EXT4_FT_DIR_CSUM 0xDE
-
-@@ -1650,8 +1689,11 @@ struct ext4_dir_entry_tail {
- */
- #define EXT4_DIR_PAD 4
- #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
--#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
-+#define __EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
- ~EXT4_DIR_ROUND)
-+#define EXT4_DIR_REC_LEN(de) (__EXT4_DIR_REC_LEN((de)->name_len +\
-+ ext4_get_dirent_data_len(de)))
-+
- #define EXT4_MAX_REC_LEN ((1<<16)-1)
-
- /*
-@@ -1987,12 +2029,12 @@ extern int ext4_find_dest_de(struct inod
- struct buffer_head *bh,
- void *buf, int buf_size,
- struct ext4_filename *fname,
-- struct ext4_dir_entry_2 **dest_de);
-+ struct ext4_dir_entry_2 **dest_de, int *dlen);
- int ext4_insert_dentry(struct inode *dir,
- struct inode *inode,
- struct ext4_dir_entry_2 *de,
- int buf_size,
-- struct ext4_filename *fname);
-+ struct ext4_filename *fname, void *data);
- static inline void ext4_update_dx_flag(struct inode *inode)
- {
- if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
-@@ -2004,10 +2046,17 @@ static unsigned char ext4_filetype_table
-
- static inline unsigned char get_dtype(struct super_block *sb, int filetype)
- {
-+ int fl_index = filetype & EXT4_FT_MASK;
-+
-- if (!ext4_has_feature_filetype(sb) || filetype >= EXT4_FT_MAX)
-+ if (!ext4_has_feature_filetype(sb) || fl_index >= EXT4_FT_MAX)
- return DT_UNKNOWN;
-
-- return ext4_filetype_table[filetype];
-+ if (!test_opt(sb, DIRDATA))
-+ return ext4_filetype_table[fl_index];
-+
-+ return (ext4_filetype_table[fl_index]) |
-+ (filetype & EXT4_DIRENT_LUFID);
-+
- }
-
- /* fsync.c */
-@@ -2157,6 +2206,8 @@ extern struct buffer_head * ext4_find_en
- extern int ext4_delete_entry(handle_t *handle, struct inode * dir,
- struct ext4_dir_entry_2 *de_del,
- struct buffer_head *bh);
-+extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
-+ struct inode *inode, const void *, const void *);
- extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
- __u32 start_minor_hash, __u32 *next_hash);
- extern int ext4_search_dir(struct buffer_head *bh,
-@@ -2761,6 +2810,36 @@ extern struct mutex ext4__aio_mutex[EXT4
- extern int ext4_resize_begin(struct super_block *sb);
- extern void ext4_resize_end(struct super_block *sb);
-
-+/*
-+ * Compute the total directory entry data length.
-+ * This includes the filename and an implicit NUL terminator (always present),
-+ * and optional extensions. Each extension has a bit set in the high 4 bits of
-+ * de->file_type, and the extension length is the first byte in each entry.
-+ */
-+static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
-+{
-+ char *len = de->name + de->name_len + 1 /* NUL terminator */;
-+ int dlen = 0;
-+ __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
-+ struct ext4_dir_entry_tail *t = (struct ext4_dir_entry_tail *)de;
-+
-+ if (!t->det_reserved_zero1 &&
-+ le16_to_cpu(t->det_rec_len) ==
-+ sizeof(struct ext4_dir_entry_tail) &&
-+ !t->det_reserved_zero2 &&
-+ t->det_reserved_ft == EXT4_FT_DIR_CSUM)
-+ return 0;
-+
-+ while (extra_data_flags) {
-+ if (extra_data_flags & 1) {
-+ dlen += *len + (dlen == 0);
-+ len += *len;
-+ }
-+ extra_data_flags >>= 1;
-+ }
-+ return dlen;
-+}
-+
- #endif /* __KERNEL__ */
-
- #endif /* _EXT4_H */
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/namei.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/namei.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/namei.c
-@@ -239,7 +239,8 @@ static unsigned dx_get_count(struct dx_e
- static unsigned dx_get_limit(struct dx_entry *entries);
- static void dx_set_count(struct dx_entry *entries, unsigned value);
- static void dx_set_limit(struct dx_entry *entries, unsigned value);
--static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
-+static inline unsigned dx_root_limit(struct inode *dir,
-+ struct ext4_dir_entry_2 *dot_de, unsigned infosize);
- static unsigned dx_node_limit(struct inode *dir);
- static struct dx_frame *dx_probe(const struct qstr *d_name,
- struct inode *dir,
-@@ -379,22 +380,23 @@ static struct dx_countlimit *get_dx_coun
- {
- struct ext4_dir_entry *dp;
- struct dx_root_info *root;
-- int count_offset;
-+ int count_offset, dot_rec_len, dotdot_rec_len;
-
- if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
- count_offset = 8;
-- else if (le16_to_cpu(dirent->rec_len) == 12) {
-- dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
-+ else {
-+ dot_rec_len = le16_to_cpu(dirent->rec_len);
-+ dp = (struct ext4_dir_entry *)(((void *)dirent) + dot_rec_len);
- if (le16_to_cpu(dp->rec_len) !=
-- EXT4_BLOCK_SIZE(inode->i_sb) - 12)
-+ EXT4_BLOCK_SIZE(inode->i_sb) - dot_rec_len)
- return NULL;
-- root = (struct dx_root_info *)(((void *)dp + 12));
-+ dotdot_rec_len = EXT4_DIR_REC_LEN((struct ext4_dir_entry_2 *)dp);
-+ root = (struct dx_root_info *)(((void *)dp + dotdot_rec_len));
- if (root->reserved_zero ||
- root->info_length != sizeof(struct dx_root_info))
- return NULL;
-- count_offset = 32;
-- } else
-- return NULL;
-+ count_offset = 8 + dot_rec_len + dotdot_rec_len;
-+ }
-
- if (offset)
- *offset = count_offset;
-@@ -504,11 +505,12 @@ ext4_next_entry(struct ext4_dir_entry_2
- */
- struct dx_root_info *dx_get_dx_info(struct ext4_dir_entry_2 *de)
- {
-- /* get dotdot first */
-- de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
-+ BUG_ON(de->name_len != 1);
-+ /* get dotdot first */
-+ de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
-
-- /* dx root info is after dotdot entry */
-- de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
-+ /* dx root info is after dotdot entry */
-+ de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
-
- return (struct dx_root_info *)de;
- }
-@@ -553,10 +555,16 @@ static inline void dx_set_limit(struct d
- ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
- }
-
--static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
-+static inline unsigned dx_root_limit(struct inode *dir,
-+ struct ext4_dir_entry_2 *dot_de, unsigned infosize)
- {
-- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
-- EXT4_DIR_REC_LEN(2) - infosize;
-+ struct ext4_dir_entry_2 *dotdot_de;
-+ unsigned entry_space;
-+
-+ BUG_ON(dot_de->name_len != 1);
-+ dotdot_de = ext4_next_entry(dot_de, dir->i_sb->s_blocksize);
-+ entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(dot_de) -
-+ EXT4_DIR_REC_LEN(dotdot_de) - infosize;
-
- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
-@@ -566,7 +574,7 @@ static inline unsigned dx_root_limit(str
-
- static inline unsigned dx_node_limit(struct inode *dir)
- {
-- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
-+ unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0);
-
- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
-@@ -617,7 +625,7 @@ static struct stats dx_show_leaf(struct
- printk(":%x.%u ", h.hash,
- (unsigned) ((char *) de - base));
- }
-- space += EXT4_DIR_REC_LEN(de->name_len);
-+ space += EXT4_DIR_REC_LEN(de);
- names++;
- }
- de = ext4_next_entry(de, size);
-@@ -723,11 +731,14 @@ dx_probe(const struct qstr *d_name, stru
-
- entries = (struct dx_entry *) (((char *)info) + info->info_length);
-
-- if (dx_get_limit(entries) != dx_root_limit(dir,
-- info->info_length)) {
-+ if (dx_get_limit(entries) !=
-+ dx_root_limit(dir, (struct ext4_dir_entry_2 *)frame->bh->b_data,
-+ info->info_length)) {
- ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
- dx_get_limit(entries),
-- dx_root_limit(dir, info->info_length));
-+ dx_root_limit(dir,
-+ (struct ext4_dir_entry_2 *)frame->bh->b_data,
-+ info->info_length));
- goto fail;
- }
-
-@@ -916,7 +925,7 @@ static int htree_dirblock_to_tree(struct
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- top = (struct ext4_dir_entry_2 *) ((char *) de +
- dir->i_sb->s_blocksize -
-- EXT4_DIR_REC_LEN(0));
-+ __EXT4_DIR_REC_LEN(0));
- #ifdef CONFIG_EXT4_FS_ENCRYPTION
- /* Check if the directory is encrypted */
- if (ext4_encrypted_inode(dir)) {
-@@ -1508,7 +1517,7 @@ dx_move_dirents(char *from, char *to, st
- while (count--) {
- struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
- (from + (map->offs<<2));
-- rec_len = EXT4_DIR_REC_LEN(de->name_len);
-+ rec_len = EXT4_DIR_REC_LEN(de);
- memcpy (to, de, rec_len);
- ((struct ext4_dir_entry_2 *) to)->rec_len =
- ext4_rec_len_to_disk(rec_len, blocksize);
-@@ -1532,7 +1541,7 @@ static struct ext4_dir_entry_2* dx_pack_
- while ((char*)de < base + blocksize) {
- next = ext4_next_entry(de, blocksize);
- if (de->inode && de->name_len) {
-- rec_len = EXT4_DIR_REC_LEN(de->name_len);
-+ rec_len = EXT4_DIR_REC_LEN(de);
- if (de > to)
- memmove(to, de, rec_len);
- to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
-@@ -1664,15 +1673,17 @@ int ext4_find_dest_de(struct inode *dir,
- struct buffer_head *bh,
- void *buf, int buf_size,
- struct ext4_filename *fname,
-- struct ext4_dir_entry_2 **dest_de)
-+ struct ext4_dir_entry_2 **dest_de, int *dlen)
- {
- struct ext4_dir_entry_2 *de;
-- unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname));
-+ unsigned short reclen = __EXT4_DIR_REC_LEN(fname_len(fname)) +
-+ (dlen ? *dlen : 0);
- int nlen, rlen;
- unsigned int offset = 0;
- char *top;
- int res;
-
-+ dlen ? *dlen = 0 : 0; /* default set to 0 */
- de = (struct ext4_dir_entry_2 *)buf;
- top = buf + buf_size - reclen;
- while ((char *) de <= top) {
-@@ -1680,10 +1690,26 @@ int ext4_find_dest_de(struct inode *dir,
- res = -EEXIST;
- goto return_result;
- }
-- nlen = EXT4_DIR_REC_LEN(de->name_len);
-+ nlen = EXT4_DIR_REC_LEN(de);
- rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
- if ((de->inode ? rlen - nlen : rlen) >= reclen)
- break;
-+ /* Then for dotdot entries, check for the smaller space
-+ * required for just the entry, no FID */
-+ if (fname_len(fname) == 2 && memcmp(fname_name(fname), "..", 2) == 0) {
-+ if ((de->inode ? rlen - nlen : rlen) >=
-+ __EXT4_DIR_REC_LEN(fname_len(fname))) {
-+ /* set dlen=1 to indicate not
-+ * enough space store fid */
-+ dlen ? *dlen = 1 : 0;
-+ break;
-+ }
-+ /* The new ".." entry must be written over the
-+ * previous ".." entry, which is the first
-+ * entry traversed by this scan. If it doesn't
-+ * fit, something is badly wrong, so -EIO. */
-+ return -EIO;
-+ }
- de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
- offset += rlen;
- }
-@@ -1697,12 +1723,12 @@ int ext4_find_dest_de(struct inode *dir,
- void ext4_insert_dentry(struct inode *inode,
- struct ext4_dir_entry_2 *de,
- int buf_size,
-- struct ext4_filename *fname)
-+ struct ext4_filename *fname, void *data)
- {
-
- int nlen, rlen;
-
-- nlen = EXT4_DIR_REC_LEN(de->name_len);
-+ nlen = EXT4_DIR_REC_LEN(de);
- rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
- if (de->inode) {
- struct ext4_dir_entry_2 *de1 =
-@@ -1716,6 +1742,11 @@ void ext4_insert_dentry(struct inode *in
- ext4_set_de_type(inode->i_sb, de, inode->i_mode);
- de->name_len = fname_len(fname);
- memcpy(de->name, fname_name(fname), fname_len(fname));
-+ if (data) {
-+ de->name[fname_len(fname)] = 0;
-+ memcpy(&de->name[fname_len(fname) + 1], data, *(char *)data);
-+ de->file_type |= EXT4_DIRENT_LUFID;
-+ }
- return 0;
- }
- /*
-@@ -1734,18 +1765,23 @@ static int add_dirent_to_buf(handle_t *h
- static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
- struct inode *dir,
- struct inode *inode, struct ext4_dir_entry_2 *de,
-- struct buffer_head *bh)
-+ struct buffer_head *bh, struct dentry *dentry)
- {
- unsigned int blocksize = dir->i_sb->s_blocksize;
- int csum_size = 0;
-- int err;
-+ int err, dlen = 0;
-+ unsigned char *data;
-
-+ data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
-+ dentry->d_fsdata);
- if (ext4_has_metadata_csum(inode->i_sb))
- csum_size = sizeof(struct ext4_dir_entry_tail);
-
- if (!de) {
-+ if (data)
-+ dlen = (*data) + 1;
- err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
-- blocksize - csum_size, fname, &de);
-+ blocksize - csum_size, fname, &de, &dlen);
- if (err)
- return err;
- }
-@@ -1755,7 +1791,10 @@ static int add_dirent_to_buf(handle_t *h
-
- /* By now the buffer is marked for journaling. Due to crypto operations,
- * the following function call may fail */
-- err = ext4_insert_dentry(dir, inode, de, blocksize, fname);
-+ /* If writing the short form of "dotdot", don't add the data section */
-+ if (dlen == 1)
-+ data = NULL;
-+ err = ext4_insert_dentry(dir, inode, de, blocksize, fname, data);
- if (err < 0)
- return err;
-
-@@ -1866,7 +1905,8 @@ static int make_indexed_dir(handle_t *ha
-
- dx_set_block(entries, 1);
- dx_set_count(entries, 1);
-- dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
-+ dx_set_limit(entries, dx_root_limit(dir,
-+ dot_de, sizeof(*dx_info)));
-
- /* Initialize as for dx_probe */
- hinfo.hash_version = dx_info->hash_version;
-@@ -1876,14 +2476,14 @@ static int make_indexed_dir(handle_t *ha
- }
-
-- retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
-+ retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2, dentry);
- out_frames:
- /*
- * Even if the block split failed, we have to properly write
- * out all the changes we did so far. Otherwise we can end up
- * with corrupted filesystem.
- */
- if (retval)
- ext4_mark_inode_dirty(handle, dir);
- dx_release(frames);
- brelse(bh);
- return retval;
-@@ -1909,6 +1949,8 @@ static int ext4_update_dotdot(handle_t *
- struct buffer_head * dir_block;
- struct ext4_dir_entry_2 * de;
- int len, journal = 0, err = 0;
-+ int dlen = 0;
-+ char *data;
-
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-@@ -1924,19 +1966,24 @@ static int ext4_update_dotdot(handle_t *
- /* the first item must be "." */
- assert(de->name_len == 1 && de->name[0] == '.');
- len = le16_to_cpu(de->rec_len);
-- assert(len >= EXT4_DIR_REC_LEN(1));
-- if (len > EXT4_DIR_REC_LEN(1)) {
-+ assert(len >= __EXT4_DIR_REC_LEN(1));
-+ if (len > __EXT4_DIR_REC_LEN(1)) {
- BUFFER_TRACE(dir_block, "get_write_access");
- err = ext4_journal_get_write_access(handle, dir_block);
- if (err)
- goto out_journal;
-
- journal = 1;
-- de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
-+ de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
- }
-
-- len -= EXT4_DIR_REC_LEN(1);
-- assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
-+ len -= EXT4_DIR_REC_LEN(de);
-+ data = ext4_dentry_get_data(dir->i_sb,
-+ (struct ext4_dentry_param *)dentry->d_fsdata);
-+ if (data)
-+ dlen = *data + 1;
-+ assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen));
-+
- de = (struct ext4_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
- if (!journal) {
-@@ -1950,10 +1997,15 @@ static int ext4_update_dotdot(handle_t *
- if (len > 0)
- de->rec_len = cpu_to_le16(len);
- else
-- assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
-+ assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2));
- de->name_len = 2;
- strcpy(de->name, "..");
-- ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-+ if (data != NULL && ext4_get_dirent_data_len(de) >= dlen) {
-+ de->name[2] = 0;
-+ memcpy(&de->name[2 + 1], data, *data);
-+ ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-+ de->file_type |= EXT4_DIRENT_LUFID;
-+ }
-
- out_journal:
- if (journal) {
-@@ -2039,7 +2639,7 @@ int __ext4_add_entry(handle_t *handle
- goto out;
- }
- retval = add_dirent_to_buf(handle, &fname, dir, inode,
-- NULL, bh);
-+ NULL, bh, dentry);
- if (retval != -ENOSPC)
- goto out;
-
-@@ -2067,7 +2667,7 @@ int __ext4_add_entry(handle_t *handle
- initialize_dirent_tail(t, blocksize);
- }
-
-- retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
-+ retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh, dentry);
- out:
- ext4_fname_free_filename(&fname);
- brelse(bh);
-@@ -2156,7 +2756,7 @@ again:
- goto cleanup;
- }
-
-- err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
-+ err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh, dentry);
- if (err != -ENOSPC)
- goto cleanup;
-
-@@ -2213,7 +2913,7 @@ again:
- err = PTR_ERR(de);
- goto cleanup;
- }
-- err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
-+ err = add_dirent_to_buf(handle, fname, dir, inode, de, bh, dentry);
- goto cleanup;
-
- journal_error:
-@@ -2428,30 +2480,61 @@ retry:
- return err;
- }
-
-+struct tp_block {
-+ struct inode *inode;
-+ void *data1;
-+ void *data2;
-+};
-+
- struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
- struct ext4_dir_entry_2 *de,
- int blocksize, int csum_size,
- unsigned int parent_ino, int dotdot_real_len)
- {
-+ void *data1 = NULL, *data2 = NULL;
-+ int dot_reclen = 0;
-+
-+ if (dotdot_real_len == 10) {
-+ struct tp_block *tpb = (struct tp_block *)inode;
-+ data1 = tpb->data1;
-+ data2 = tpb->data2;
-+ inode = tpb->inode;
-+ dotdot_real_len = 0;
-+ }
- de->inode = cpu_to_le32(inode->i_ino);
- de->name_len = 1;
-- de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
-- blocksize);
- strcpy(de->name, ".");
- ext4_set_de_type(inode->i_sb, de, S_IFDIR);
-
-+ /* get packed fid data*/
-+ data1 = ext4_dentry_get_data(inode->i_sb,
-+ (struct ext4_dentry_param *) data1);
-+ if (data1) {
-+ de->name[1] = 0;
-+ memcpy(&de->name[2], data1, *(char *) data1);
-+ de->file_type |= EXT4_DIRENT_LUFID;
-+ }
-+ de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
-+ dot_reclen = cpu_to_le16(de->rec_len);
- de = ext4_next_entry(de, blocksize);
- de->inode = cpu_to_le32(parent_ino);
- de->name_len = 2;
-+ strcpy(de->name, "..");
-+ ext4_set_de_type(inode->i_sb, de, S_IFDIR);
-+ data2 = ext4_dentry_get_data(inode->i_sb,
-+ (struct ext4_dentry_param *) data2);
-+ if (data2) {
-+ de->name[2] = 0;
-+ memcpy(&de->name[3], data2, *(char *) data2);
-+ de->file_type |= EXT4_DIRENT_LUFID;
-+ }
- if (!dotdot_real_len)
- de->rec_len = ext4_rec_len_to_disk(blocksize -
-- (csum_size + EXT4_DIR_REC_LEN(1)),
-+ (csum_size + dot_reclen),
- blocksize);
- else
- de->rec_len = ext4_rec_len_to_disk(
-- EXT4_DIR_REC_LEN(de->name_len), blocksize);
-+ EXT4_DIR_REC_LEN(de), blocksize);
-- strcpy(de->name, "..");
-- ext4_set_de_type(inode->i_sb, de, S_IFDIR);
-
- return ext4_next_entry(de, blocksize);
- }
-@@ -2457,8 +2540,10 @@ struct ext4_dir_entry_2 *ext4_init_dot_d
- }
-
- static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
-- struct inode *inode)
-+ struct inode *inode,
-+ const void *data1, const void *data2)
- {
-+ struct tp_block param;
- struct buffer_head *dir_block = NULL;
- struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
-@@ -2488,7 +2573,11 @@ static int ext4_init_new_dir(handle_t *h
- if (err)
- goto out;
- de = (struct ext4_dir_entry_2 *)dir_block->b_data;
-- ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
-+ param.inode = inode;
-+ param.data1 = (void *)data1;
-+ param.data2 = (void *)data2;
-+ ext4_init_dot_dotdot((struct inode *)(¶m), de, blocksize,
-+ csum_size, dir->i_ino, 10);
- set_nlink(inode, 2);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
-@@ -2402,6 +2426,29 @@ out:
- return err;
- }
-
-+/* Initialize @inode as a subdirectory of @dir, and add the
-+ * "." and ".." entries into the first directory block. */
-+int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
-+ struct inode *inode,
-+ const void *data1, const void *data2)
-+{
-+ int rc;
-+
-+ if (IS_ERR(handle))
-+ return PTR_ERR(handle);
-+
-+ if (IS_DIRSYNC(dir))
-+ ext4_handle_sync(handle);
-+
-+ inode->i_op = &ext4_dir_inode_operations;
-+ inode->i_fop = &ext4_dir_operations;
-+ rc = ext4_init_new_dir(handle, dir, inode, data1, data2);
-+ if (!rc)
-+ rc = ext4_mark_inode_dirty(handle, inode);
-+ return rc;
-+}
-+EXPORT_SYMBOL(ext4_add_dot_dotdot);
-+
- static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
- {
- handle_t *handle;
-@@ -2546,7 +2636,7 @@ retry:
-
- inode->i_op = &ext4_dir_inode_operations;
- inode->i_fop = &ext4_dir_operations;
-- err = ext4_init_new_dir(handle, dir, inode);
-+ err = ext4_init_new_dir(handle, dir, inode, NULL, NULL);
- if (err)
- goto out_clear_inode;
- err = ext4_mark_inode_dirty(handle, inode);
-@@ -2598,7 +2688,7 @@ static int empty_dir(struct inode *inode
- }
-
- sb = inode->i_sb;
-- if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
-+ if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2)) {
- EXT4_ERROR_INODE(inode, "invalid size");
- return 1;
- }
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/inline.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/inline.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/inline.c
-@@ -988,7 +998,7 @@ static int ext4_add_dirent_to_inline(han
-
-
- err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
-- inline_size, fname, &de);
-+ inline_size, fname, &de, NULL);
- if (err)
- return err;
-
-@@ -998,7 +998,7 @@ static int ext4_add_dirent_to_inline(han
- err = ext4_journal_get_write_access(handle, iloc->bh);
- if (err)
- return err;
-- ext4_insert_dentry(dir, inode, de, inline_size, fname);
-+ ext4_insert_dentry(dir, inode, de, inline_size, fname, NULL);
-
- ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
-
-@@ -1078,7 +1078,7 @@ static int ext4_update_inline_dir(handle
- int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
- int new_size = get_max_inline_xattr_value_size(dir, iloc);
-
-- if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
-+ if (new_size - old_size <= __EXT4_DIR_REC_LEN(1))
- return -ENOSPC;
-
- ret = ext4_update_inline_data(handle, dir,
-@@ -1348,7 +1348,7 @@ int htree_inlinedir_to_tree(struct file
- fake.name_len = 1;
- strcpy(fake.name, ".");
- fake.rec_len = ext4_rec_len_to_disk(
-- EXT4_DIR_REC_LEN(fake.name_len),
-+ EXT4_DIR_REC_LEN(&fake),
- inline_size);
- ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
- de = &fake;
-@@ -1358,7 +1358,7 @@ int htree_inlinedir_to_tree(struct file
- fake.name_len = 2;
- strcpy(fake.name, "..");
- fake.rec_len = ext4_rec_len_to_disk(
-- EXT4_DIR_REC_LEN(fake.name_len),
-+ EXT4_DIR_REC_LEN(&fake),
- inline_size);
- ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
- de = &fake;
-@@ -1455,8 +1455,8 @@ int ext4_read_inline_dir(struct file *fi
- * So we will use extra_offset and extra_size to indicate them
- * during the inline dir iteration.
- */
-- dotdot_offset = EXT4_DIR_REC_LEN(1);
-- dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2);
-+ dotdot_offset = __EXT4_DIR_REC_LEN(1);
-+ dotdot_size = dotdot_offset + __EXT4_DIR_REC_LEN(2);
- extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
- extra_size = extra_offset + inline_size;
-
-@@ -1493,7 +1493,7 @@ revalidate:
- * failure will be detected in the
- * dirent test below. */
- if (ext4_rec_len_from_disk(de->rec_len, extra_size)
-- < EXT4_DIR_REC_LEN(1))
-+ < __EXT4_DIR_REC_LEN(1))
- break;
- i += ext4_rec_len_from_disk(de->rec_len,
- extra_size);
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/super.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/super.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/super.c
-@@ -1151,7 +1151,7 @@ enum {
- Opt_data_err_abort, Opt_data_err_ignore,
- Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
- Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
-- Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
-+ Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_dirdata,
- Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
- Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
-@@ -1216,6 +1216,7 @@ static const match_table_t tokens = {
- {Opt_stripe, "stripe=%u"},
- {Opt_delalloc, "delalloc"},
- {Opt_nodelalloc, "nodelalloc"},
-+ {Opt_dirdata, "dirdata"},
- {Opt_removed, "mblk_io_submit"},
- {Opt_removed, "nomblk_io_submit"},
- {Opt_block_validity, "block_validity"},
-@@ -1424,6 +1425,7 @@ static const struct mount_opts {
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
- {Opt_offusrjquota, 0, MOPT_Q},
-+ {Opt_dirdata, EXT4_MOUNT_DIRDATA, MOPT_SET},
- {Opt_offgrpjquota, 0, MOPT_Q},
- {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
- {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
+++ /dev/null
-mbcache provides absolutely no value for Lustre xattrs (because
-they are unique and cannot be shared between files) and as we can
-see it has a noticable overhead in some cases. In the past there
-was a CONFIG_MBCACHE option that would allow it to be disabled,
-but this was removed in newer kernels, so we will need to patch
-ldiskfs to fix this.
-
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-@@ -944,6 +944,7 @@ struct ext4_inode_info {
- /*
- * Mount flags set via mount options or defaults
- */
-+#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Disable mbcache */
- #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
- #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
- #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/super.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/super.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/super.c
-@@ -1157,6 +1157,7 @@ enum {
- Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
- Opt_inode_readahead_blks, Opt_journal_ioprio,
- Opt_dioread_nolock, Opt_dioread_lock,
-+ Opt_no_mbcache,
- Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb,
- };
-@@ -1231,6 +1232,7 @@ static const match_table_t tokens = {
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_init_itable, "init_itable=%u"},
-+ {Opt_no_mbcache, "no_mbcache"},
- {Opt_init_itable, "init_itable"},
- {Opt_noinit_itable, "noinit_itable"},
- {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
-@@ -1390,6 +1392,7 @@ static const struct mount_opts {
- {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
- {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
- {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
-+ {Opt_no_mbcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
- {Opt_commit, 0, MOPT_GTE0},
- {Opt_max_batch_time, 0, MOPT_GTE0},
- {Opt_min_batch_time, 0, MOPT_GTE0},
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/xattr.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/xattr.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/xattr.c
-@@ -80,7 +80,7 @@
- # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
- #endif
-
--static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
-+static void _ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
- static struct buffer_head *ext4_xattr_cache_find(struct inode *,
- struct ext4_xattr_header *,
- struct mb_cache_entry **);
-@@ -401,7 +401,8 @@ bad_block:
- error = -EFSCORRUPTED;
- goto cleanup;
- }
-- ext4_xattr_cache_insert(ext4_mb_cache, bh);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ _ext4_xattr_cache_insert(ext4_mb_cache, bh);
- entry = BFIRST(bh);
- error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1,
- inode);
-@@ -565,7 +566,8 @@ ext4_xattr_block_list(struct dentry *den
- error = -EFSCORRUPTED;
- goto cleanup;
- }
-- ext4_xattr_cache_insert(ext4_mb_cache, bh);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ _ext4_xattr_cache_insert(ext4_mb_cache, bh);
- error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
-
- cleanup:
-@@ -678,7 +680,9 @@ ext4_xattr_release_block(handle_t *handl
- * This must happen under buffer lock for
- * ext4_xattr_block_set() to reliably detect freed block
- */
-- mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ mb_cache_entry_delete_block(ext4_mb_cache,
-+ hash, bh->b_blocknr);
- get_bh(bh);
- unlock_buffer(bh);
- ext4_free_blocks(handle, inode, bh, 0, 1,
-@@ -690,9 +694,10 @@ ext4_xattr_release_block(handle_t *handl
-
-
- if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
-- struct mb_cache_entry *ce;
-+ struct mb_cache_entry *ce = NULL;
-
-- ce = mb_cache_entry_get(ext4_mb_cache, hash,
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ ce = mb_cache_entry_get(ext4_mb_cache, hash,
- bh->b_blocknr);
- if (ce) {
- ce->e_reusable = 1;
-@@ -1107,7 +1112,8 @@ ext4_xattr_block_set(handle_t *handle, s
- * ext4_xattr_block_set() to reliably detect modified
- * block
- */
-- mb_cache_entry_delete_block(ext4_mb_cache, hash,
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ mb_cache_entry_delete_block(ext4_mb_cache, hash,
- bs->bh->b_blocknr);
- ea_bdebug(bs->bh, "modifying in-place");
- error = ext4_xattr_set_entry(i, s, handle, inode);
-@@ -1115,8 +1121,9 @@ ext4_xattr_block_set(handle_t *handle, s
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base),
- s->here);
-- ext4_xattr_cache_insert(ext4_mb_cache,
-- bs->bh);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ _ext4_xattr_cache_insert(ext4_mb_cache,
-+ bs->bh);
- }
- unlock_buffer(bs->bh);
- if (error == -EFSCORRUPTED)
-@@ -1277,7 +1284,8 @@ getblk_failed:
- memcpy(new_bh->b_data, s->base, new_bh->b_size);
- set_buffer_uptodate(new_bh);
- unlock_buffer(new_bh);
-- ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ _ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
- error = ext4_handle_dirty_xattr_block(handle,
- inode, new_bh);
- if (error)
-@@ -2068,7 +2076,7 @@ ext4_xattr_inode_array_free(struct inode
- * Returns 0, or a negative error number on failure.
- */
- static void
--ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
-+_ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
- {
- struct ext4_xattr_header *header = BHDR(bh);
- __u32 hash = le32_to_cpu(header->h_hash);
-@@ -2140,6 +2148,8 @@ ext4_xattr_cache_find(struct inode *inod
- struct mb_cache_entry *ce;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
-
-+ if (test_opt(inode->i_sb, NO_MBCACHE))
-+ return NULL;
- if (!header->h_hash)
- return NULL; /* never share */
- ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+++ /dev/null
-Index: linux-stage/fs/ext4/super.c
-When ldiskfs run in failover mode whith read-only disk.
-Part of allocation updates are lost and ldiskfs may fail
-while mounting this is due to inconsistent state of
-group-descriptor. Group-descriptor check is added after
-journal replay.
-===================================================================
---- linux-stage/fs/ext4/super.c 2016-11-06 15:15:30.892386878 +0530
-+++ linux-stage.orig.1/fs/ext4/super.c 2016-11-08 10:56:45.579892189 +0530
-@@ -3980,11 +3980,6 @@
- goto failed_mount2;
- }
- }
-- if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
-- ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-- ret = -EFSCORRUPTED;
-- goto failed_mount2;
-- }
-
- sbi->s_gdb_count = db_count;
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
-@@ -4104,6 +4100,13 @@
- sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
-
- no_journal:
-+
-+ if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
-+ ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-+ ret = -EFSCORRUPTED;
-+ goto failed_mount_wq;
-+ }
-+
- sbi->s_mb_cache = ext4_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+++ /dev/null
-From e3014d14a81edde488d9a6758eea8afc41752d2d Mon Sep 17 00:00:00 2001
-From: Jan Kara <jack@suse.cz>
-Date: Mon, 29 Aug 2016 15:38:11 -0400
-Subject: [PATCH] ext4: fixup free space calculations when expanding inodes
-
-Conditions checking whether there is enough free space in an xattr block
-and when xattr is large enough to make enough space in the inode forgot
-to account for the fact that inode need not be completely filled up with
-xattrs. Thus we could move unnecessarily many xattrs out of inode or
-even falsely claim there is not enough space to expand the inode. We
-also forgot to update the amount of free space in xattr block when moving
-more xattrs and thus could decide to move too big xattr resulting in
-unexpected failure.
-
-Fix these problems by properly updating free space in the inode and
-xattr block as we move xattrs. To simplify the math, avoid shifting
-xattrs after removing each one xattr and instead just shift xattrs only
-once there is enough free space in the inode.
-
-Signed-off-by: Jan Kara <jack@suse.cz>
-Signed-off-by: Theodore Ts'o <tytso@mit.edu>
----
- fs/ext4/xattr.c | 58 ++++++++++++++++++++++++---------------------------------
- 1 file changed, 24 insertions(+), 34 deletions(-)
-
-Index: linux-4.4.49-92.14_lustre-vanilla/fs/ext4/xattr.c
-===================================================================
---- linux-4.4.49-92.14_lustre-vanilla.orig/fs/ext4/xattr.c
-+++ linux-4.4.49-92.14_lustre-vanilla/fs/ext4/xattr.c
-@@ -1619,18 +1619,19 @@ retry:
- */
- static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
- int value_offs_shift, void *to,
-- void *from, size_t n, int blocksize)
-+ void *from, size_t n)
- {
- struct ext4_xattr_entry *last = entry;
- int new_offs;
-
-+ /* We always shift xattr headers further thus offsets get lower */
-+ BUG_ON(value_offs_shift > 0);
-+
- /* Adjust the value offsets of the entries */
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
- if (!last->e_value_inum && last->e_value_size) {
- new_offs = le16_to_cpu(last->e_value_offs) +
- value_offs_shift;
-- BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
-- > blocksize);
- last->e_value_offs = cpu_to_le16(new_offs);
- }
- }
-@@ -1651,7 +1652,8 @@ int ext4_expand_extra_isize_ea(struct in
- struct ext4_xattr_ibody_find *is = NULL;
- struct ext4_xattr_block_find *bs = NULL;
- char *buffer = NULL, *b_entry_name = NULL;
-- size_t min_offs, free;
-+ size_t min_offs;
-+ size_t ifree, bfree;
- int total_ino;
- void *base, *start, *end;
- int error = 0, tried_min_extra_isize = 0;
-@@ -1682,17 +1684,9 @@ retry:
- last = entry;
- total_ino = sizeof(struct ext4_xattr_ibody_header);
-
-- free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
-- if (free >= isize_diff) {
-- entry = IFIRST(header);
-- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
-- - new_extra_isize, (void *)raw_inode +
-- EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
-- (void *)header, total_ino,
-- inode->i_sb->s_blocksize);
-- EXT4_I(inode)->i_extra_isize = new_extra_isize;
-- goto out;
-- }
-+ ifree = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
-+ if (ifree >= isize_diff)
-+ goto shift;
-
- /*
- * Enough free space isn't available in the inode, check if
-@@ -1713,8 +1707,8 @@ retry:
- first = BFIRST(bh);
- end = bh->b_data + bh->b_size;
- min_offs = end - base;
-- free = ext4_xattr_free_space(first, &min_offs, base, NULL);
-- if (free < isize_diff) {
-+ bfree = ext4_xattr_free_space(first, &min_offs, base, NULL);
-+ if (bfree + ifree < isize_diff) {
- if (!tried_min_extra_isize && s_min_extra_isize) {
- tried_min_extra_isize++;
- new_extra_isize = s_min_extra_isize;
-@@ -1725,10 +1719,10 @@ retry:
- goto cleanup;
- }
- } else {
-- free = inode->i_sb->s_blocksize;
-+ bfree = inode->i_sb->s_blocksize;
- }
-
-- while (isize_diff > 0) {
-+ while (isize_diff > ifree) {
- size_t offs, size, entry_size;
- struct ext4_xattr_entry *small_entry = NULL;
- struct ext4_xattr_info i = {
-@@ -1736,7 +1730,6 @@ retry:
- .value_len = 0,
- };
- unsigned int total_size; /* EA entry size + value size */
-- unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
- unsigned int min_total_size = ~0U;
-
- is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
-@@ -1758,8 +1751,9 @@ retry:
- total_size =
- EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
- EXT4_XATTR_LEN(last->e_name_len);
-- if (total_size <= free && total_size < min_total_size) {
-- if (total_size < isize_diff) {
-+ if (total_size <= bfree &&
-+ total_size < min_total_size) {
-+ if (total_size + ifree < isize_diff) {
- small_entry = last;
- } else {
- entry = last;
-@@ -1788,6 +1782,7 @@ retry:
- offs = le16_to_cpu(entry->e_value_offs);
- size = le32_to_cpu(entry->e_value_size);
- entry_size = EXT4_XATTR_LEN(entry->e_name_len);
-+ total_size = entry_size + EXT4_XATTR_SIZE(size);
- i.name_index = entry->e_name_index,
- buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
- b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
-@@ -1815,21 +1810,8 @@ retry:
- if (error)
- goto cleanup;
- total_ino -= entry_size;
--
-- entry = IFIRST(header);
-- if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
-- shift_bytes = isize_diff;
-- else
-- shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
-- /* Adjust the offsets and shift the remaining entries ahead */
-- ext4_xattr_shift_entries(entry, -shift_bytes,
-- (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
-- EXT4_I(inode)->i_extra_isize + shift_bytes,
-- (void *)header, total_ino, inode->i_sb->s_blocksize);
--
-- isize_diff -= shift_bytes;
-- EXT4_I(inode)->i_extra_isize += shift_bytes;
-- header = IHDR(inode, raw_inode);
-+ ifree += total_size;
-+ bfree -= total_size;
-
- i.name = b_entry_name;
- i.value = buffer;
-@@ -1850,6 +1832,15 @@ retry:
- kfree(is);
- kfree(bs);
- }
-+
-+shift:
-+ /* Adjust the offsets and shift the remaining entries ahead */
-+ entry = IFIRST(header);
-+ ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
-+ - new_extra_isize, (void *)raw_inode +
-+ EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
-+ (void *)header, total_ino);
-+ EXT4_I(inode)->i_extra_isize = new_extra_isize;
- brelse(bh);
- out:
- ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
+++ /dev/null
-diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
-index 3c41773..157438f 100644
---- a/fs/ext4/ext4.h
-+++ b/fs/ext4/ext4.h
-@@ -1286,6 +1286,7 @@ struct ext4_sb_info {
- unsigned long s_mb_prealloc_table_size;
- unsigned int s_mb_group_prealloc;
- unsigned int s_max_dir_size_kb;
-+ unsigned long s_warning_dir_size;
- /* where last allocation was done - for stream allocation */
- unsigned long s_mb_last_group;
- unsigned long s_mb_last_start;
-diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
-index 7e9e04a..094d54f 100644
---- a/fs/ext4/namei.c
-+++ b/fs/ext4/namei.c
-@@ -687,12 +687,20 @@ struct ext4_dir_lock_data {
- #define ext4_htree_lock_data(l) ((struct ext4_dir_lock_data *)(l)->lk_private)
- #define ext4_find_entry(dir, name, dirent, inline) \
- __ext4_find_entry(dir, name, dirent, inline, NULL)
--#define ext4_add_entry(handle, dentry, inode) \
-- __ext4_add_entry(handle, dentry, inode, NULL)
-
- /* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */
- #define EXT4_HTREE_NODE_CHANGED (0xcafeULL << 32)
-
-+inline int ext4_add_entry(handle_t *handle, struct dentry *dentry,
-+ struct inode *inode)
-+{
-+ int ret = __ext4_add_entry(handle, dentry, inode, NULL);
-+
-+ if (ret == -ENOBUFS)
-+ ret = 0;
-+ return ret;
-+}
-+
- static void ext4_htree_event_cb(void *target, void *event)
- {
- u64 *block = (u64 *)target;
-@@ -2479,6 +2487,54 @@ int __ext4_add_entry(handle_t *handle, struct dentry *dentry,
- }
- EXPORT_SYMBOL(__ext4_add_entry);
-
-+static unsigned long __ext4_max_dir_size(struct dx_frame *frames,
-+ struct dx_frame *frame, struct inode *dir)
-+{
-+ unsigned long max_dir_size;
-+
-+ if (EXT4_SB(dir->i_sb)->s_max_dir_size_kb) {
-+ max_dir_size = EXT4_SB(dir->i_sb)->s_max_dir_size_kb << 10;
-+ } else {
-+ max_dir_size = EXT4_BLOCK_SIZE(dir->i_sb);
-+ while (frame >= frames) {
-+ max_dir_size *= dx_get_limit(frame->entries);
-+ if (frame == frames)
-+ break;
-+ frame--;
-+ }
-+ /* use 75% of max dir size in average */
-+ max_dir_size = max_dir_size / 4 * 3;
-+ }
-+ return max_dir_size;
-+}
-+
-+/*
-+ * With hash tree growing, it is easy to hit ENOSPC, but it is hard
-+ * to predict when it will happen. let's give administrators warning
-+ * when reaching 3/5 and 2/3 of limit
-+ */
-+static inline bool dir_size_in_warning_range(struct dx_frame *frames,
-+ struct dx_frame *frame,
-+ struct inode *dir)
-+{
-+ unsigned long size1, size2;
-+ struct super_block *sb = dir->i_sb;
-+
-+ if (unlikely(!EXT4_SB(sb)->s_warning_dir_size))
-+ EXT4_SB(sb)->s_warning_dir_size =
-+ __ext4_max_dir_size(frames, frame, dir);
-+
-+ size1 = EXT4_SB(sb)->s_warning_dir_size / 16 * 10;
-+ size1 = size1 & ~(EXT4_BLOCK_SIZE(sb) - 1);
-+ size2 = EXT4_SB(sb)->s_warning_dir_size / 16 * 11;
-+ size2 = size2 & ~(EXT4_BLOCK_SIZE(sb) - 1);
-+ if (in_range(dir->i_size, size1, EXT4_BLOCK_SIZE(sb)) ||
-+ in_range(dir->i_size, size2, EXT4_BLOCK_SIZE(sb)))
-+ return true;
-+
-+ return false;
-+}
-+
- /*
- * Returns 0 for success, or a negative error value
- */
-@@ -2494,6 +2550,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
- struct ext4_dir_entry_2 *de;
- int restart;
- int err;
-+ bool ret_warn = false;
-
- again:
- restart = 0;
-@@ -2517,6 +2574,11 @@ again:
- /* Block full, should compress but for now just split */
- dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
- dx_get_count(entries), dx_get_limit(entries)));
-+
-+ if (frame - frames + 1 >= ext4_dir_htree_level(sb) ||
-+ EXT4_SB(sb)->s_warning_dir_size)
-+ ret_warn = dir_size_in_warning_range(frames, frame, dir);
-+
- /* Need to split index? */
- if (dx_get_count(entries) == dx_get_limit(entries)) {
- ext4_lblk_t newblock;
-@@ -2548,7 +2610,7 @@ again:
- dir->i_ino, current->comm, levels,
- ext4_dir_htree_level(sb));
- if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
-- ext4_warning(sb, "Large directory feature is"
-+ ext4_warning(sb, "Large directory feature is "
- "not enabled on this "
- "filesystem");
- }
-@@ -2674,6 +2736,8 @@ cleanup:
- * repeat dx_probe() to find out valid htree-path */
- if (restart && err == 0)
- goto again;
-+ if (err == 0 && ret_warn)
-+ err = -ENOBUFS;
- return err;
- }
-
-diff --git a/fs/ext4/super.c b/fs/ext4/super.c
-index c625960..1914379 100644
---- a/fs/ext4/super.c
-+++ b/fs/ext4/super.c
-@@ -1546,6 +1546,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
- sbi->s_li_wait_mult = arg;
- } else if (token == Opt_max_dir_size_kb) {
- sbi->s_max_dir_size_kb = arg;
-+ /* reset s_warning_dir_size and make it re-calculated */
-+ sbi->s_warning_dir_size = 0;
- } else if (token == Opt_stripe) {
- sbi->s_stripe = arg;
- } else if (token == Opt_resuid) {
-diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
-index c625960..1914379 100644
---- a/fs/ext4/sysfs.c
-+++ b/fs/ext4/sysfs.c
-@@ -2657,6 +2659,7 @@ EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
- EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
- EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size_kb);
- EXT4_RW_ATTR_SBI_UI(max_dir_size_kb, s_max_dir_size_kb);
-+EXT4_RW_ATTR_SBI_UI(warning_dir_size, s_warning_dir_size);
- EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
- EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
- EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
-@@ -2686,6 +2689,7 @@ static struct attribute *ext4_attrs[] = {
- ATTR_LIST(inode_goal),
- ATTR_LIST(max_dir_size),
- ATTR_LIST(max_dir_size_kb),
-+ ATTR_LIST(warning_dir_size),
- ATTR_LIST(mb_stats),
- ATTR_LIST(mb_max_to_scan),
- ATTR_LIST(mb_min_to_scan),
---
-1.8.3.1
-
+++ /dev/null
-Index: linux-3.10.0-123.9.3.el7.x86_64/fs/ext4/namei.c
-===================================================================
---- linux-3.10.0-123.9.3.el7.x86_64.orig/fs/ext4/namei.c
-+++ linux-3.10.0-123.9.3.el7.x86_64/fs/ext4/namei.c
-@@ -1894,6 +1894,74 @@ static int make_indexed_dir(handle_t *ha
- return retval;
- }
-
-+/* update ".." for hash-indexed directory, split the item "." if necessary */
-+static int ext4_update_dotdot(handle_t *handle, struct dentry *dentry,
-+ struct inode *inode)
-+{
-+ struct inode *dir = dentry->d_parent->d_inode;
-+ struct buffer_head *dir_block;
-+ struct ext4_dir_entry_2 *de;
-+ int len, journal = 0, err = 0;
-+
-+ if (IS_ERR(handle))
-+ return PTR_ERR(handle);
-+
-+ if (IS_DIRSYNC(dir))
-+ handle->h_sync = 1;
-+
-+ dir_block = ext4_bread(handle, dir, 0, 0);
-+ if (IS_ERR(dir_block)) {
-+ err = PTR_ERR(dir_block);
-+ goto out;
-+ }
-+
-+ de = (struct ext4_dir_entry_2 *)dir_block->b_data;
-+ /* the first item must be "." */
-+ assert(de->name_len == 1 && de->name[0] == '.');
-+ len = le16_to_cpu(de->rec_len);
-+ assert(len >= EXT4_DIR_REC_LEN(1));
-+ if (len > EXT4_DIR_REC_LEN(1)) {
-+ BUFFER_TRACE(dir_block, "get_write_access");
-+ err = ext4_journal_get_write_access(handle, dir_block);
-+ if (err)
-+ goto out_journal;
-+
-+ journal = 1;
-+ de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
-+ }
-+
-+ len -= EXT4_DIR_REC_LEN(1);
-+ assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
-+ de = (struct ext4_dir_entry_2 *)
-+ ((char *) de + le16_to_cpu(de->rec_len));
-+ if (!journal) {
-+ BUFFER_TRACE(dir_block, "get_write_access");
-+ err = ext4_journal_get_write_access(handle, dir_block);
-+ if (err)
-+ goto out_journal;
-+ }
-+
-+ de->inode = cpu_to_le32(inode->i_ino);
-+ if (len > 0)
-+ de->rec_len = cpu_to_le16(len);
-+ else
-+ assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
-+ de->name_len = 2;
-+ strcpy(de->name, "..");
-+ ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-+
-+out_journal:
-+ if (journal) {
-+ BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
-+ err = ext4_handle_dirty_dirent_node(handle, dir, dir_block);
-+ ext4_mark_inode_dirty(handle, dir);
-+ }
-+ brelse(dir_block);
-+
-+out:
-+ return err;
-+}
-+
- /*
- * ext4_add_entry()
- *
-@@ -1938,6 +2004,9 @@ int ext4_add_entry(handle_t *handle, str
- }
-
- if (is_dx(dir)) {
-+ if (dentry->d_name.len == 2 &&
-+ memcmp(dentry->d_name.name, "..", 2) == 0)
-+ return ext4_update_dotdot(handle, dentry, inode);
- retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
- if (!retval || (retval != ERR_BAD_DX_DIR))
- return retval;
+++ /dev/null
-This INCOMPAT_LARGEDIR feature allows larger directories
-to be created in ldiskfs, both with directory sizes over
-2GB and and a maximum htree depth of 3 instead of the
-current limit of 2. These features are needed in order
-to exceed the current limit of approximately 10M entries
-in a single directory.
-
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
-@@ -1585,6 +1585,7 @@ static inline void ext4_clear_state_flag
- EXT4_FEATURE_INCOMPAT_MMP | \
- EXT4_FEATURE_INCOMPAT_DIRDATA| \
- EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
-+ EXT4_FEATURE_INCOMPAT_LARGEDIR | \
- EXT4_FEATURE_INCOMPAT_ENCRYPT | \
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
- #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-@@ -1999,6 +2000,9 @@ struct mmpd_data {
- # define NORET_TYPE /**/
- # define ATTRIB_NORET __attribute__((noreturn))
- # define NORET_AND noreturn,
-+/* htree levels for ext4 */
-+#define EXT4_HTREE_LEVEL_COMPAT 2
-+#define EXT4_HTREE_LEVEL 3
-
- struct ext4_xattr_ino_array {
- unsigned int xia_count; /* # of used item in the array */
-@@ -2472,13 +2476,16 @@ static inline void ext4_r_blocks_count_s
- es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
- }
-
--static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
-+static inline loff_t ext4_isize(struct super_block *sb,
-+ struct ext4_inode *raw_inode)
- {
-- if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
-+ if (S_ISREG(le16_to_cpu(raw_inode->i_mode)) ||
-+ (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) &&
-+ S_ISDIR(le16_to_cpu(raw_inode->i_mode))))
- return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
- le32_to_cpu(raw_inode->i_size_lo);
-- else
-- return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
-+
-+ return (loff_t)le32_to_cpu(raw_inode->i_size_lo);
- }
-
- static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/namei.c
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
-@@ -513,7 +513,14 @@ struct dx_root_info * dx_get_dx_info(str
-
- static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
- {
-- return le32_to_cpu(entry->block) & 0x00ffffff;
-+ return le32_to_cpu(entry->block) & 0x0fffffff;
-+}
-+
-+static inline int
-+ext4_dir_htree_level(struct super_block *sb)
-+{
-+ return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ?
-+ EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
- }
-
- static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
-@@ -681,6 +688,7 @@ dx_probe(const struct qstr *d_name, stru
- struct dx_frame *frame = frame_in;
- u32 hash;
-
-+ memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
- frame->bh = ext4_read_dirblock(dir, 0, INDEX);
- if (IS_ERR(frame->bh))
- return (struct dx_frame *) frame->bh;
-@@ -714,9 +721,13 @@ dx_probe(const struct qstr *d_name, stru
- }
-
- indirect = info->indirect_levels;
-- if (indirect > 1) {
-- ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
-- info->indirect_levels);
-+ if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
-+ ext4_warning_inode(dir, "htree depth: %#06x exceed max depth %u",
-+ indirect, ext4_dir_htree_level(dir->i_sb));
-+ if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
-+ ext4_warning(dir->i_sb, "Enable large directory "
-+ "feature to access it");
-+ }
- goto fail;
- }
-
-@@ -812,12 +826,20 @@ fail:
-
- static void dx_release (struct dx_frame *frames)
- {
-+ int i;
-+ struct dx_root_info *info;
-+
- if (frames[0].bh == NULL)
- return;
-
-- if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
-- brelse(frames[1].bh);
-- brelse(frames[0].bh);
-+ for (i = 0, info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data);
-+ i <= info->indirect_levels;
-+ i++) {
-+ if (frames[i].bh == NULL)
-+ break;
-+ brelse(frames[i].bh);
-+ frames[i].bh = NULL;
-+ }
- }
-
- /*
-@@ -960,7 +979,7 @@ int ext4_htree_fill_tree(struct file *di
- {
- struct dx_hash_info hinfo;
- struct ext4_dir_entry_2 *de;
-- struct dx_frame frames[2], *frame;
-+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- struct inode *dir;
- ext4_lblk_t block;
- int count = 0;
-@@ -1376,7 +1395,7 @@ static struct buffer_head * ext4_dx_find
- struct dx_hash_info hinfo;
- {
- struct super_block * sb = dir->i_sb;
-- struct dx_frame frames[2], *frame;
-+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- const struct qstr *d_name = fname->usr_fname;
- struct buffer_head *bh;
- ext4_lblk_t block;
-@@ -1832,7 +1851,7 @@ static int make_indexed_dir(handle_t *ha
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
- struct buffer_head *bh2;
-- struct dx_frame frames[2], *frame;
-+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- struct dx_entry *entries;
- struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
- struct ext4_dir_entry_tail *t;
-@@ -2117,14 +2136,17 @@ static int ext4_add_entry(handle_t *hand
- static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
- struct inode *inode)
- {
-- struct dx_frame frames[2], *frame;
-+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- struct dx_entry *entries, *at;
- struct buffer_head *bh;
- struct inode *dir = d_inode(dentry->d_parent);
- struct super_block *sb = dir->i_sb;
- struct ext4_dir_entry_2 *de;
-+ int restart;
- int err;
-
-+again:
-+ restart = 0;
- frame = dx_probe(fname, dir, NULL, frames);
- if (IS_ERR(frame))
- return PTR_ERR(frame);
-@@ -2138,33 +2160,48 @@ static int ext4_dx_add_entry(handle_t *h
- goto cleanup;
- }
-
-- BUFFER_TRACE(bh, "get_write_access");
-- err = ext4_journal_get_write_access(handle, bh);
-- if (err)
-- goto journal_error;
--
- err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh, dentry);
- if (err != -ENOSPC)
- goto cleanup;
-
-+ err = 0;
- /* Block full, should compress but for now just split */
- dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
- dx_get_count(entries), dx_get_limit(entries)));
- /* Need to split index? */
- if (dx_get_count(entries) == dx_get_limit(entries)) {
- ext4_lblk_t newblock;
-- unsigned icount = dx_get_count(entries);
-- int levels = frame - frames;
-+ int levels = frame - frames + 1;
-+ unsigned icount;
-+ int add_level = 1;
- struct dx_entry *entries2;
- struct dx_node *node2;
- struct buffer_head *bh2;
-
-- if (levels && (dx_get_count(frames->entries) ==
-- dx_get_limit(frames->entries))) {
-- ext4_warning_inode(dir, "Directory index full!");
-+ while (frame > frames) {
-+ if (dx_get_count((frame - 1)->entries) <
-+ dx_get_limit((frame - 1)->entries)) {
-+ add_level = 0;
-+ break;
-+ }
-+ frame--; /* split higher index block */
-+ at = frame->at;
-+ entries = frame->entries;
-+ restart = 1;
-+ }
-+ if (add_level && levels == ext4_dir_htree_level(sb)) {
-+ ext4_warning(sb, "inode %lu: comm %s: index %u: reach max htree level %u",
-+ dir->i_ino, current->comm, levels,
-+ ext4_dir_htree_level(sb));
-+ if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
-+ ext4_warning(sb, "Large directory feature is"
-+ "not enabled on this "
-+ "filesystem");
-+ }
- err = -ENOSPC;
- goto cleanup;
- }
-+ icount = dx_get_count(entries);
- bh2 = ext4_append(handle, dir, &newblock);
- if (IS_ERR(bh2)) {
- err = PTR_ERR(bh2);
-@@ -2179,7 +2216,7 @@ static int ext4_dx_add_entry(handle_t *h
- err = ext4_journal_get_write_access(handle, frame->bh);
- if (err)
- goto journal_error;
-- if (levels) {
-+ if (!add_level) {
- unsigned icount1 = icount/2, icount2 = icount - icount1;
- unsigned hash2 = dx_get_hash(entries + icount1);
- dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
-@@ -2187,7 +2224,7 @@ static int ext4_dx_add_entry(handle_t *h
-
- BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
- err = ext4_journal_get_write_access(handle,
-- frames[0].bh);
-+ (frame - 1)->bh);
- if (err)
- goto journal_error;
-
-@@ -2203,19 +2240,27 @@ static int ext4_dx_add_entry(handle_t *h
- frame->entries = entries = entries2;
- swap(frame->bh, bh2);
- }
-- dx_insert_block(frames + 0, hash2, newblock);
-- dxtrace(dx_show_index("node", frames[1].entries));
-+ dx_insert_block(frame - 1, hash2, newblock);
-+ dxtrace(dx_show_index("node", frame->entries));
- dxtrace(dx_show_index("node",
-- ((struct dx_node *) bh2->b_data)->entries));
-+ ((struct dx_node *)bh2->b_data)->entries));
- err = ext4_handle_dirty_dx_node(handle, dir, bh2);
- if (err)
- goto journal_error;
- brelse (bh2);
-+ err = ext4_handle_dirty_dx_node(handle, dir,
-+ (frame - 1)->bh);
-+ if (err)
-+ goto journal_error;
-+ if (restart) {
-+ err = ext4_handle_dirty_dx_node(handle, dir,
-+ frame->bh);
-+ goto journal_error;
-+ }
- } else {
- struct dx_root_info *info;
-- dxtrace(printk(KERN_DEBUG
-- "Creating second level index...\n"));
-- memcpy((char *) entries2, (char *) entries,
-+
-+ memcpy((char *)entries2, (char *)entries,
- icount * sizeof(struct dx_entry));
- dx_set_limit(entries2, dx_node_limit(dir));
-
-@@ -2224,22 +2267,17 @@ static int ext4_dx_add_entry(handle_t *h
- dx_set_block(entries + 0, newblock);
- info = dx_get_dx_info((struct ext4_dir_entry_2*)
- frames[0].bh->b_data);
-- info->indirect_levels = 1;
--
-- /* Add new access path frame */
-- frame = frames + 1;
-- frame->at = at = at - entries + entries2;
-- frame->entries = entries = entries2;
-- frame->bh = bh2;
-- err = ext4_journal_get_write_access(handle,
-- frame->bh);
-+ info->indirect_levels += 1;
-+ dxtrace(printk(KERN_DEBUG
-+ "Creating %d level index...\n",
-+ info->indirect_levels));
-+ err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
- if (err)
- goto journal_error;
-- }
-- err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
-- if (err) {
-- ext4_std_error(inode->i_sb, err);
-- goto cleanup;
-+ err = ext4_handle_dirty_dx_node(handle, dir, bh2);
-+ brelse(bh2);
-+ restart = 1;
-+ goto journal_error;
- }
- }
- de = do_split(handle, dir, &bh, frame, &fname->hinfo);
-@@ -2249,10 +2285,14 @@ static int ext4_dx_add_entry(handle_t *h
- goto cleanup;
-
- journal_error:
-- ext4_std_error(dir->i_sb, err);
-+ ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
- cleanup:
- brelse(bh);
- dx_release(frames);
-+ /* @restart is true means htree-path has been changed, we need to
-+ * repeat dx_probe() to find out valid htree-path */
-+ if (restart && err == 0)
-+ goto again;
- return err;
- }
-
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/inode.c
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c
-@@ -4056,12 +4056,12 @@ struct inode *ext4_iget(struct super_blo
- if (ext4_has_feature_64bit(sb))
- ei->i_file_acl |=
- ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
-- inode->i_size = ext4_isize(raw_inode);
-+ inode->i_size = ext4_isize(sb, raw_inode);
- if ((size = i_size_read(inode)) < 0) {
- EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
- ret = -EFSCORRUPTED;
- goto bad_inode;
- }
- ei->i_disksize = inode->i_size;
- #ifdef CONFIG_QUOTA
- ei->i_reserved_quota = 0;
-@@ -4306,7 +4306,7 @@ static int ext4_do_update_inode(handle_t
- raw_inode->i_file_acl_high =
- cpu_to_le16(ei->i_file_acl >> 32);
- raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
-- if (ei->i_disksize != ext4_isize(raw_inode)) {
-+ if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
- ext4_isize_set(raw_inode, ei->i_disksize);
- need_datasync = 1;
- }
+++ /dev/null
-This patch implements the large EA support in ext4. If the size of
-an EA value is larger than the blocksize, then the EA value would
-not be saved in the external EA block, instead it would be saved
-in an external EA inode. So, the patch also helps support a larger
-number of EAs.
-
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -1579,6 +1579,7 @@ static inline void ext4_clear_state_flag
- EXT4_FEATURE_INCOMPAT_EXTENTS| \
- EXT4_FEATURE_INCOMPAT_64BIT| \
- EXT4_FEATURE_INCOMPAT_FLEX_BG| \
-+ EXT4_FEATURE_INCOMPAT_EA_INODE| \
- EXT4_FEATURE_INCOMPAT_MMP | \
- EXT4_FEATURE_INCOMPAT_DIRDATA| \
- EXT4_FEATURE_INCOMPAT_INLINE_DATA)
-@@ -1990,6 +1997,10 @@ struct mmpd_data {
- # define ATTRIB_NORET __attribute__((noreturn))
- # define NORET_AND noreturn,
-
-+struct ext4_xattr_ino_array {
-+ unsigned int xia_count; /* # of used item in the array */
-+ unsigned int xia_inodes[0];
-+};
- /* bitmap.c */
- extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
- void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
-@@ -2194,6 +2205,7 @@ extern void ext4_set_inode_flags(struct
- extern void ext4_get_inode_flags(struct ext4_inode_info *);
- extern int ext4_alloc_da_blocks(struct inode *inode);
- extern void ext4_set_aops(struct inode *inode);
-+extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk);
- extern int ext4_writepage_trans_blocks(struct inode *);
- extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
- extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
-Index: linux-stage/fs/ext4/inode.c
-===================================================================
---- linux-stage.orig/fs/ext4/inode.c
-+++ linux-stage/fs/ext4/inode.c
-@@ -134,8 +134,6 @@ static void ext4_invalidatepage(struct p
- unsigned int length);
- static int __ext4_journalled_writepage(struct page *page, unsigned int len);
- static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
--static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
-- int pextents);
-
- /*
- * Test whether an inode is a fast symlink.
-@@ -184,6 +182,8 @@ void ext4_evict_inode(struct inode *inod
- {
- handle_t *handle;
- int err;
-+ int extra_credits = 3;
-+ struct ext4_xattr_ino_array *lea_ino_array = NULL;
-
- trace_ext4_evict_inode(inode);
-
-@@ -236,8 +236,8 @@ void ext4_evict_inode(struct inode *inod
- * protection against it
- */
- sb_start_intwrite(inode->i_sb);
-- handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
-- ext4_blocks_for_truncate(inode)+3);
-+
-+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
- if (IS_ERR(handle)) {
- ext4_std_error(inode->i_sb, PTR_ERR(handle));
- /*
-@@ -249,9 +249,36 @@ void ext4_evict_inode(struct inode *inod
- sb_end_intwrite(inode->i_sb);
- goto no_delete;
- }
--
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
-+
-+ /*
-+ * Delete xattr inode before deleting the main inode.
-+ */
-+ err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array);
-+ if (err) {
-+ ext4_warning(inode->i_sb,
-+ "couldn't delete inode's xattr (err %d)", err);
-+ goto stop_handle;
-+ }
-+
-+ if (!IS_NOQUOTA(inode))
-+ extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
-+
-+ if (!ext4_handle_has_enough_credits(handle,
-+ ext4_blocks_for_truncate(inode) + extra_credits)) {
-+ err = ext4_journal_extend(handle,
-+ ext4_blocks_for_truncate(inode) + extra_credits);
-+ if (err > 0)
-+ err = ext4_journal_restart(handle,
-+ ext4_blocks_for_truncate(inode) + extra_credits);
-+ if (err != 0) {
-+ ext4_warning(inode->i_sb,
-+ "couldn't extend journal (err %d)", err);
-+ goto stop_handle;
-+ }
-+ }
-+
- inode->i_size = 0;
- err = ext4_mark_inode_dirty(handle, inode);
- if (err) {
-@@ -269,10 +296,10 @@ void ext4_evict_inode(struct inode *inod
- * enough credits left in the handle to remove the inode from
- * the orphan list and set the dtime field.
- */
-- if (!ext4_handle_has_enough_credits(handle, 3)) {
-- err = ext4_journal_extend(handle, 3);
-+ if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
-+ err = ext4_journal_extend(handle, extra_credits);
- if (err > 0)
-- err = ext4_journal_restart(handle, 3);
-+ err = ext4_journal_restart(handle, extra_credits);
- if (err != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", err);
-@@ -306,8 +333,12 @@ void ext4_evict_inode(struct inode *inod
- ext4_clear_inode(inode);
- else
- ext4_free_inode(handle, inode);
-+
- ext4_journal_stop(handle);
- sb_end_intwrite(inode->i_sb);
-+
-+ if (lea_ino_array != NULL)
-+ ext4_xattr_inode_array_free(inode, lea_ino_array);
- return;
- no_delete:
- ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
-@@ -4681,7 +4712,7 @@ static int ext4_index_trans_blocks(struc
- *
- * Also account for superblock, inode, quota and xattr blocks
- */
--static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
-+int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
- int pextents)
- {
- ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
-Index: linux-stage/fs/ext4/xattr.c
-===================================================================
---- linux-stage.orig/fs/ext4/xattr.c
-+++ linux-stage/fs/ext4/xattr.c
-@@ -201,6 +201,7 @@ ext4_xattr_check_names(struct ext4_xattr
-
- while (!IS_LAST_ENTRY(entry)) {
- if (entry->e_value_size != 0 &&
-+ entry->e_value_inum == 0 &&
- (value_start + le16_to_cpu(entry->e_value_offs) <
- (void *)e + sizeof(__u32) ||
- value_start + le16_to_cpu(entry->e_value_offs) +
-@@ -233,19 +234,26 @@ ext4_xattr_check_block(struct inode *ino
- }
-
- static inline int
--ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
-+ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size,
-+ struct inode *inode)
- {
- size_t value_size = le32_to_cpu(entry->e_value_size);
-
-- if (entry->e_value_block != 0 || value_size > size ||
-+ if (!entry->e_value_inum &&
- le16_to_cpu(entry->e_value_offs) + value_size > size)
- return -EFSCORRUPTED;
-+ if (entry->e_value_inum &&
-+ (le32_to_cpu(entry->e_value_inum) < EXT4_FIRST_INO(inode->i_sb) ||
-+ le32_to_cpu(entry->e_value_inum) >
-+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_inodes_count)))
-+ return -EFSCORRUPTED;
- return 0;
- }
-
- static int
- ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
-- const char *name, size_t size, int sorted)
-+ const char *name, size_t size, int sorted,
-+ struct inode *inode)
- {
- struct ext4_xattr_entry *entry;
- size_t name_len;
-@@ -265,11 +273,104 @@ ext4_xattr_find_entry(struct ext4_xattr_
- break;
- }
- *pentry = entry;
-- if (!cmp && ext4_xattr_check_entry(entry, size))
-+ if (!cmp && ext4_xattr_check_entry(entry, size, inode))
- return -EFSCORRUPTED;
- return cmp ? -ENODATA : 0;
- }
-
-+/*
-+ * Read the EA value from an inode.
-+ */
-+static int
-+ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t *size)
-+{
-+ unsigned long block = 0;
-+ struct buffer_head *bh = NULL;
-+ int blocksize;
-+ size_t csize, ret_size = 0;
-+
-+ if (*size == 0)
-+ return 0;
-+
-+ blocksize = ea_inode->i_sb->s_blocksize;
-+
-+ while (ret_size < *size) {
-+ csize = (*size - ret_size) > blocksize ? blocksize :
-+ *size - ret_size;
-+ bh = ext4_bread(NULL, ea_inode, block, 0);
-+ if (IS_ERR(bh)) {
-+ *size = ret_size;
-+ return PTR_ERR(bh);
-+ }
-+ memcpy(buf, bh->b_data, csize);
-+ brelse(bh);
-+
-+ buf += csize;
-+ block += 1;
-+ ret_size += csize;
-+ }
-+
-+ *size = ret_size;
-+
-+ return 0;
-+}
-+
-+struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, int *err)
-+{
-+ struct inode *ea_inode = NULL;
-+
-+ ea_inode = ext4_iget(parent->i_sb, ea_ino);
-+ if (IS_ERR(ea_inode) || is_bad_inode(ea_inode)) {
-+ int rc = IS_ERR(ea_inode) ? PTR_ERR(ea_inode) : 0;
-+ ext4_error(parent->i_sb, "error while reading EA inode %lu "
-+ "/ %d %d", ea_ino, rc, is_bad_inode(ea_inode));
-+ *err = rc != 0 ? rc : -EIO;
-+ return NULL;
-+ }
-+
-+ if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != parent->i_ino ||
-+ ea_inode->i_generation != parent->i_generation) {
-+ ext4_error(parent->i_sb, "Backpointer from EA inode %lu "
-+ "to parent invalid.", ea_ino);
-+ *err = -EINVAL;
-+ goto error;
-+ }
-+
-+ if (!(EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL)) {
-+ ext4_error(parent->i_sb, "EA inode %lu does not have "
-+ "EXT4_EA_INODE_FL flag set.\n", ea_ino);
-+ *err = -EINVAL;
-+ goto error;
-+ }
-+
-+ *err = 0;
-+ return ea_inode;
-+
-+error:
-+ iput(ea_inode);
-+ return NULL;
-+}
-+
-+/*
-+ * Read the value from the EA inode.
-+ */
-+static int
-+ext4_xattr_inode_get(struct inode *inode, unsigned long ea_ino, void *buffer,
-+ size_t *size)
-+{
-+ struct inode *ea_inode = NULL;
-+ int err;
-+
-+ ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
-+ if (err)
-+ return err;
-+
-+ err = ext4_xattr_inode_read(ea_inode, buffer, size);
-+ iput(ea_inode);
-+
-+ return err;
-+}
-+
- static int
- ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
-@@ -301,7 +401,8 @@ bad_block:
- }
- ext4_xattr_cache_insert(bh);
- entry = BFIRST(bh);
-- error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
-+ error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1,
-+ inode);
- if (error == -EFSCORRUPTED)
- goto bad_block;
- if (error)
-@@ -311,8 +412,16 @@ bad_block:
- error = -ERANGE;
- if (size > buffer_size)
- goto cleanup;
-- memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
-- size);
-+ if (entry->e_value_inum) {
-+ error = ext4_xattr_inode_get(inode,
-+ le32_to_cpu(entry->e_value_inum),
-+ buffer, &size);
-+ if (error)
-+ goto cleanup;
-+ } else {
-+ memcpy(buffer, bh->b_data +
-+ le16_to_cpu(entry->e_value_offs), size);
-+ }
- }
- error = size;
-
-@@ -346,7 +455,7 @@ ext4_xattr_ibody_get(struct inode *inode
- if (error)
- goto cleanup;
- error = ext4_xattr_find_entry(&entry, name_index, name,
-- end - (void *)entry, 0);
-+ end - (void *)entry, 0, inode);
- if (error)
- goto cleanup;
- size = le32_to_cpu(entry->e_value_size);
-@@ -354,8 +463,16 @@ ext4_xattr_ibody_get(struct inode *inode
- error = -ERANGE;
- if (size > buffer_size)
- goto cleanup;
-- memcpy(buffer, (void *)IFIRST(header) +
-- le16_to_cpu(entry->e_value_offs), size);
-+ if (entry->e_value_inum) {
-+ error = ext4_xattr_inode_get(inode,
-+ le32_to_cpu(entry->e_value_inum),
-+ buffer, &size);
-+ if (error)
-+ goto cleanup;
-+ } else {
-+ memcpy(buffer, (void *)IFIRST(header) +
-+ le16_to_cpu(entry->e_value_offs), size);
-+ }
- }
- error = size;
-
-@@ -600,7 +717,7 @@ static size_t ext4_xattr_free_space(stru
- size_t *min_offs, void *base, int *total)
- {
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
-- if (!last->e_value_block && last->e_value_size) {
-+ if (!last->e_value_inum && last->e_value_size) {
- size_t offs = le16_to_cpu(last->e_value_offs);
- if (offs < *min_offs)
- *min_offs = offs;
-@@ -611,16 +728,200 @@ static size_t ext4_xattr_free_space(stru
- return (*min_offs - ((void *)last - base) - sizeof(__u32));
- }
-
-+/*
-+ * Write the value of the EA in an inode.
-+ */
- static int
--ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
-+ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
-+ const void *buf, int bufsize)
-+{
-+ struct buffer_head *bh = NULL;
-+ unsigned long block = 0;
-+ unsigned blocksize = ea_inode->i_sb->s_blocksize;
-+ unsigned max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
-+ int csize, wsize = 0;
-+ int ret = 0;
-+ int retries = 0;
-+
-+retry:
-+ while (ret >= 0 && ret < max_blocks) {
-+ struct ext4_map_blocks map;
-+ map.m_lblk = block += ret;
-+ map.m_len = max_blocks -= ret;
-+
-+ ret = ext4_map_blocks(handle, ea_inode, &map,
-+ EXT4_GET_BLOCKS_CREATE);
-+ if (ret <= 0) {
-+ ext4_mark_inode_dirty(handle, ea_inode);
-+ if (ret == -ENOSPC &&
-+ ext4_should_retry_alloc(ea_inode->i_sb, &retries)) {
-+ ret = 0;
-+ goto retry;
-+ }
-+ break;
-+ }
-+ }
-+
-+ if (ret < 0)
-+ return ret;
-+
-+ block = 0;
-+ while (wsize < bufsize) {
-+ if (bh != NULL)
-+ brelse(bh);
-+ csize = (bufsize - wsize) > blocksize ? blocksize :
-+ bufsize - wsize;
-+ bh = ext4_getblk(handle, ea_inode, block, 0);
-+ if (IS_ERR(bh)) {
-+ ret = PTR_ERR(bh);
-+ goto out;
-+ }
-+ ret = ext4_journal_get_write_access(handle, bh);
-+ if (ret)
-+ goto out;
-+
-+ memcpy(bh->b_data, buf, csize);
-+ set_buffer_uptodate(bh);
-+ ext4_handle_dirty_metadata(handle, ea_inode, bh);
-+
-+ buf += csize;
-+ wsize += csize;
-+ block += 1;
-+ }
-+
-+ mutex_lock(&ea_inode->i_mutex);
-+ i_size_write(ea_inode, wsize);
-+ ext4_update_i_disksize(ea_inode, wsize);
-+ mutex_unlock(&ea_inode->i_mutex);
-+
-+ ext4_mark_inode_dirty(handle, ea_inode);
-+
-+out:
-+ brelse(bh);
-+
-+ return ret;
-+}
-+
-+static void ext4_xattr_inode_set_ref(struct inode *ea_inode, __u64 ref_count)
-+{
-+ ea_inode->i_ctime.tv_sec = (__u32)(ref_count >> 32);
-+ ea_inode->i_version = (__u32)ref_count;
-+}
-+
-+static void ext4_xattr_inode_set_hash(struct inode *ea_inode, __u32 hash)
-+{
-+ ea_inode->i_atime.tv_sec = hash;
-+}
-+
-+/*
-+ * Create an inode to store the value of a large EA.
-+ */
-+static struct inode *
-+ext4_xattr_inode_create(handle_t *handle, struct inode *inode, __u32 hash)
-+{
-+ struct inode *ea_inode = NULL;
-+
-+ /*
-+ * Let the next inode be the goal, so we try and allocate the EA inode
-+ * in the same group, or nearby one.
-+ */
-+ ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
-+ S_IFREG|0600, NULL, inode->i_ino + 1, NULL);
-+
-+ if (!IS_ERR(ea_inode)) {
-+ ea_inode->i_op = &ext4_file_inode_operations;
-+ ea_inode->i_fop = &ext4_file_operations;
-+ ext4_set_aops(ea_inode);
-+ ea_inode->i_generation = inode->i_generation;
-+ EXT4_I(ea_inode)->i_flags |= EXT4_EA_INODE_FL;
-+
-+ /*
-+ * A back-pointer from EA inode to parent inode will be useful
-+ * for e2fsck.
-+ */
-+ EXT4_XATTR_INODE_SET_PARENT(ea_inode, inode->i_ino);
-+ unlock_new_inode(ea_inode);
-+
-+ ext4_xattr_inode_set_ref(ea_inode, 1);
-+ ext4_xattr_inode_set_hash(ea_inode, hash);
-+ }
-+
-+ return ea_inode;
-+}
-+
-+/*
-+ * Unlink the inode storing the value of the EA.
-+ */
-+int
-+ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino)
-+{
-+ struct inode *ea_inode = NULL;
-+ int err;
-+
-+ ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
-+ if (err)
-+ return err;
-+
-+ clear_nlink(ea_inode);
-+ iput(ea_inode);
-+
-+ return 0;
-+}
-+
-+static __u32
-+ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
-+{
-+ if (ext4_has_metadata_csum(sbi->s_sb))
-+ return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size);
-+ return 0;
-+}
-+
-+/*
-+ * Add value of the EA in an inode.
-+ */
-+static int
-+ext4_xattr_inode_set(handle_t *handle, struct inode *inode, unsigned long *ea_ino,
-+ const void *value, size_t value_len)
-+{
-+ struct inode *ea_inode = NULL;
-+ __u32 hash;
-+ int err;
-+
-+ /* Create an inode for the EA value */
-+ hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
-+ ea_inode = ext4_xattr_inode_create(handle, inode, hash);
-+ if (IS_ERR(ea_inode))
-+ return -1;
-+
-+ err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
-+ if (err)
-+ clear_nlink(ea_inode);
-+ else
-+ *ea_ino = ea_inode->i_ino;
-+
-+ iput(ea_inode);
-+
-+ return err;
-+}
-+
-+static int
-+ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s,
-+ handle_t *handle, struct inode *inode)
- {
- struct ext4_xattr_entry *last;
- size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
-+ int in_inode = i->in_inode;
-+
-+ if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
-+ EXT4_FEATURE_INCOMPAT_EA_INODE) &&
-+ (EXT4_XATTR_SIZE(i->value_len) >
-+ EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize)))
-+ in_inode = 1;
-
- /* Compute min_offs and last. */
- last = s->first;
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
-- if (!last->e_value_block && last->e_value_size) {
-+ if (!last->e_value_inum && last->e_value_size) {
- size_t offs = le16_to_cpu(last->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
-@@ -628,15 +927,20 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- }
- free = min_offs - ((void *)last - s->base) - sizeof(__u32);
- if (!s->not_found) {
-- if (!s->here->e_value_block && s->here->e_value_size) {
-+ if (!in_inode &&
-+ !s->here->e_value_inum && s->here->e_value_size) {
- size_t size = le32_to_cpu(s->here->e_value_size);
- free += EXT4_XATTR_SIZE(size);
- }
- free += EXT4_XATTR_LEN(name_len);
- }
- if (i->value) {
-- if (free < EXT4_XATTR_LEN(name_len) +
-- EXT4_XATTR_SIZE(i->value_len))
-+ size_t value_len = EXT4_XATTR_SIZE(i->value_len);
-+
-+ if (in_inode)
-+ value_len = 0;
-+
-+ if (free < EXT4_XATTR_LEN(name_len) + value_len)
- return -ENOSPC;
- }
-
-@@ -651,7 +955,8 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- s->here->e_name_len = name_len;
- memcpy(s->here->e_name, i->name, name_len);
- } else {
-- if (!s->here->e_value_block && s->here->e_value_size) {
-+ if (!s->here->e_value_inum && s->here->e_value_size &&
-+ s->here->e_value_offs > 0) {
- void *first_val = s->base + min_offs;
- size_t offs = le16_to_cpu(s->here->e_value_offs);
- void *val = s->base + offs;
-@@ -685,13 +990,18 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- last = s->first;
- while (!IS_LAST_ENTRY(last)) {
- size_t o = le16_to_cpu(last->e_value_offs);
-- if (!last->e_value_block &&
-+ if (!last->e_value_inum &&
- last->e_value_size && o < offs)
- last->e_value_offs =
- cpu_to_le16(o + size);
- last = EXT4_XATTR_NEXT(last);
- }
- }
-+ if (s->here->e_value_inum) {
-+ ext4_xattr_inode_unlink(inode,
-+ le32_to_cpu(s->here->e_value_inum));
-+ s->here->e_value_inum = 0;
-+ }
- if (!i->value) {
- /* Remove the old name. */
- size_t size = EXT4_XATTR_LEN(name_len);
-@@ -705,10 +1014,17 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- if (i->value) {
- /* Insert the new value. */
- s->here->e_value_size = cpu_to_le32(i->value_len);
-- if (i->value_len) {
-+ if (in_inode) {
-+ unsigned long ea_ino = le32_to_cpu(s->here->e_value_inum);
-+ ext4_xattr_inode_set(handle, inode, &ea_ino, i->value,
-+ i->value_len);
-+ s->here->e_value_inum = cpu_to_le32(ea_ino);
-+ s->here->e_value_offs = 0;
-+ } else if (i->value_len) {
- size_t size = EXT4_XATTR_SIZE(i->value_len);
- void *val = s->base + min_offs - size;
- s->here->e_value_offs = cpu_to_le16(min_offs - size);
-+ s->here->e_value_inum = 0;
- if (i->value == EXT4_ZERO_XATTR_VALUE) {
- memset(val, 0, size);
- } else {
-@@ -758,7 +1074,7 @@ ext4_xattr_block_find(struct inode *inod
- bs->s.end = bs->bh->b_data + bs->bh->b_size;
- bs->s.here = bs->s.first;
- error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
-- i->name, bs->bh->b_size, 1);
-+ i->name, bs->bh->b_size, 1, inode);
- if (error && error != -ENODATA)
- goto cleanup;
- bs->s.not_found = error;
-@@ -782,8 +1098,6 @@ ext4_xattr_block_set(handle_t *handle, s
-
- #define header(x) ((struct ext4_xattr_header *)(x))
-
-- if (i->value && i->value_len > sb->s_blocksize)
-- return -ENOSPC;
- if (s->base) {
- ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
- bs->bh->b_blocknr);
-@@ -799,7 +1113,7 @@ ext4_xattr_block_set(handle_t *handle, s
- ce = NULL;
- }
- ea_bdebug(bs->bh, "modifying in-place");
-- error = ext4_xattr_set_entry(i, s);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- if (!error) {
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base),
-@@ -850,7 +1164,7 @@ ext4_xattr_block_set(handle_t *handle, s
- s->end = s->base + sb->s_blocksize;
- }
-
-- error = ext4_xattr_set_entry(i, s);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- if (error == -EFSCORRUPTED)
- goto bad_block;
- if (error)
-@@ -1000,7 +1314,7 @@ int ext4_xattr_ibody_find(struct inode *
- /* Find the named attribute. */
- error = ext4_xattr_find_entry(&is->s.here, i->name_index,
- i->name, is->s.end -
-- (void *)is->s.base, 0);
-+ (void *)is->s.base, 0, inode);
- if (error && error != -ENODATA)
- return error;
- is->s.not_found = error;
-@@ -1018,7 +1332,7 @@ int ext4_xattr_ibody_inline_set(handle_t
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return -ENOSPC;
-- error = ext4_xattr_set_entry(i, s);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- if (error) {
- if (error == -ENOSPC &&
- ext4_has_inline_data(inode)) {
-@@ -1030,7 +1344,7 @@ int ext4_xattr_ibody_inline_set(handle_t
- error = ext4_xattr_ibody_find(inode, i, is);
- if (error)
- return error;
-- error = ext4_xattr_set_entry(i, s);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- }
- if (error)
- return error;
-@@ -1056,7 +1370,7 @@ static int ext4_xattr_ibody_set(handle_t
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return -ENOSPC;
-- error = ext4_xattr_set_entry(i, s);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- if (error)
- return error;
- header = IHDR(inode, ext4_raw_inode(&is->iloc));
-@@ -1092,7 +1406,7 @@ ext4_xattr_set_handle(handle_t *handle,
- .name = name,
- .value = value,
- .value_len = value_len,
--
-+ .in_inode = 0,
- };
- struct ext4_xattr_ibody_find is = {
- .s = { .not_found = -ENODATA, },
-@@ -1157,6 +1471,15 @@ ext4_xattr_set_handle(handle_t *handle,
- goto cleanup;
- }
- error = ext4_xattr_block_set(handle, inode, &i, &bs);
-+ if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
-+ EXT4_FEATURE_INCOMPAT_EA_INODE) &&
-+ error == -ENOSPC) {
-+ /* xattr not fit to block, store at external
-+ * inode */
-+ i.in_inode = 1;
-+ error = ext4_xattr_ibody_set(handle, inode,
-+ &i, &is);
-+ }
- if (error)
- goto cleanup;
- if (!is.s.not_found) {
-@@ -1203,9 +1526,22 @@ ext4_xattr_set(struct inode *inode, int
- const void *value, size_t value_len, int flags)
- {
- handle_t *handle;
-+ struct super_block *sb = inode->i_sb;
- int error, retries = 0;
- int credits = ext4_jbd2_credits_xattr(inode);
-
-+ if ((value_len >= EXT4_XATTR_MIN_LARGE_EA_SIZE(sb->s_blocksize)) &&
-+ EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EA_INODE)) {
-+ int nrblocks = (value_len + sb->s_blocksize - 1) >>
-+ sb->s_blocksize_bits;
-+
-+ /* For new inode */
-+ credits += EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + 3;
-+
-+ /* For data blocks of EA inode */
-+ credits += ext4_meta_trans_blocks(inode, nrblocks, 0);
-+ }
-+
- retry:
- handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
- if (IS_ERR(handle)) {
-@@ -1217,7 +1553,7 @@ retry:
- value, value_len, flags);
- error2 = ext4_journal_stop(handle);
- if (error == -ENOSPC &&
-- ext4_should_retry_alloc(inode->i_sb, &retries))
-+ ext4_should_retry_alloc(sb, &retries))
- goto retry;
- if (error == 0)
- error = error2;
-@@ -1239,7 +1575,7 @@ static void ext4_xattr_shift_entries(str
-
- /* Adjust the value offsets of the entries */
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
-- if (!last->e_value_block && last->e_value_size) {
-+ if (!last->e_value_inum && last->e_value_size) {
- new_offs = le16_to_cpu(last->e_value_offs) +
- value_offs_shift;
- BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
-@@ -1477,21 +1813,135 @@ cleanup:
- }
-
-
-+#define EIA_INCR 16 /* must be 2^n */
-+#define EIA_MASK (EIA_INCR - 1)
-+/* Add the large xattr @ino into @lea_ino_array for later deletion.
-+ * If @lea_ino_array is new or full it will be grown and the old
-+ * contents copied over.
-+ */
-+static int
-+ext4_expand_ino_array(struct ext4_xattr_ino_array **lea_ino_array, __u32 ino)
-+{
-+ if (*lea_ino_array == NULL) {
-+ /*
-+ * Start with 15 inodes, so it fits into a power-of-two size.
-+ * If *lea_ino_array is NULL, this is essentially offsetof()
-+ */
-+ (*lea_ino_array) =
-+ kmalloc(offsetof(struct ext4_xattr_ino_array,
-+ xia_inodes[EIA_MASK]),
-+ GFP_NOFS);
-+ if (*lea_ino_array == NULL)
-+ return -ENOMEM;
-+ (*lea_ino_array)->xia_count = 0;
-+ } else if (((*lea_ino_array)->xia_count & EIA_MASK) == EIA_MASK) {
-+ /* expand the array once all 15 + n * 16 slots are full */
-+ struct ext4_xattr_ino_array *new_array = NULL;
-+ int count = (*lea_ino_array)->xia_count;
-+
-+ /* if new_array is NULL, this is essentially offsetof() */
-+ new_array = kmalloc(
-+ offsetof(struct ext4_xattr_ino_array,
-+ xia_inodes[count + EIA_INCR]),
-+ GFP_NOFS);
-+ if (new_array == NULL)
-+ return -ENOMEM;
-+ memcpy(new_array, *lea_ino_array,
-+ offsetof(struct ext4_xattr_ino_array,
-+ xia_inodes[count]));
-+ kfree(*lea_ino_array);
-+ *lea_ino_array = new_array;
-+ }
-+ (*lea_ino_array)->xia_inodes[(*lea_ino_array)->xia_count++] = ino;
-+ return 0;
-+}
-+
-+/**
-+ * Add xattr inode to orphan list
-+ */
-+static int
-+ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode,
-+ int credits, struct ext4_xattr_ino_array *lea_ino_array)
-+{
-+ struct inode *ea_inode = NULL;
-+ int idx = 0, error = 0;
-+
-+ if (lea_ino_array == NULL)
-+ return 0;
-+
-+ for (; idx < lea_ino_array->xia_count; ++idx) {
-+ if (!ext4_handle_has_enough_credits(handle, credits)) {
-+ error = ext4_journal_extend(handle, credits);
-+ if (error > 0)
-+ error = ext4_journal_restart(handle, credits);
-+
-+ if (error != 0) {
-+ ext4_warning(inode->i_sb,
-+ "couldn't extend journal "
-+ "(err %d)", error);
-+ return error;
-+ }
-+ }
-+ ea_inode = ext4_xattr_inode_iget(inode,
-+ lea_ino_array->xia_inodes[idx], &error);
-+ if (error)
-+ continue;
-+ ext4_orphan_add(handle, ea_inode);
-+ /* the inode's i_count will be released by caller */
-+ }
-+
-+ return 0;
-+}
-
- /*
- * ext4_xattr_delete_inode()
- *
-- * Free extended attribute resources associated with this inode. This
-+ * Free extended attribute resources associated with this inode. Traverse
-+ * all entries and unlink any xattr inodes associated with this inode. This
- * is called immediately before an inode is freed. We have exclusive
-- * access to the inode.
-+ * access to the inode. If an orphan inode is deleted it will also delete any
-+ * xattr block and all xattr inodes. They are checked by ext4_xattr_inode_iget()
-+ * to ensure they belong to the parent inode and were not deleted already.
- */
--void
--ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
-+int
-+ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
-+ struct ext4_xattr_ino_array **lea_ino_array)
- {
- struct buffer_head *bh = NULL;
-+ struct ext4_xattr_ibody_header *header;
-+ struct ext4_inode *raw_inode;
-+ struct ext4_iloc iloc;
-+ struct ext4_xattr_entry *entry;
-+ int credits = 3, error = 0;
-
-- if (!EXT4_I(inode)->i_file_acl)
-+ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
-+ goto delete_external_ea;
-+
-+ error = ext4_get_inode_loc(inode, &iloc);
-+ if (error)
- goto cleanup;
-+ raw_inode = ext4_raw_inode(&iloc);
-+ header = IHDR(inode, raw_inode);
-+ for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
-+ entry = EXT4_XATTR_NEXT(entry)) {
-+ if (!entry->e_value_inum)
-+ continue;
-+ if (ext4_expand_ino_array(lea_ino_array,
-+ entry->e_value_inum) != 0) {
-+ brelse(iloc.bh);
-+ goto cleanup;
-+ }
-+ entry->e_value_inum = 0;
-+ }
-+ brelse(iloc.bh);
-+
-+delete_external_ea:
-+ if (!EXT4_I(inode)->i_file_acl) {
-+ /* add xattr inode to orphan list */
-+ ext4_xattr_inode_orphan_add(handle, inode, credits,
-+ *lea_ino_array);
-+ goto cleanup;
-+ }
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh) {
- EXT4_ERROR_INODE(inode, "block %llu read error",
-@@ -1504,11 +1954,69 @@ ext4_xattr_delete_inode(handle_t *handle
- EXT4_I(inode)->i_file_acl);
- goto cleanup;
- }
-+
-+ for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
-+ entry = EXT4_XATTR_NEXT(entry)) {
-+ if (!entry->e_value_inum)
-+ continue;
-+ if (ext4_expand_ino_array(lea_ino_array,
-+ entry->e_value_inum) != 0)
-+ goto cleanup;
-+ entry->e_value_inum = 0;
-+ }
-+
-+ /* add xattr inode to orphan list */
-+ error = ext4_xattr_inode_orphan_add(handle, inode, credits,
-+ *lea_ino_array);
-+ if (error != 0)
-+ goto cleanup;
-+
-+ if (!IS_NOQUOTA(inode))
-+ credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
-+
-+ if (!ext4_handle_has_enough_credits(handle, credits)) {
-+ error = ext4_journal_extend(handle, credits);
-+ if (error > 0)
-+ error = ext4_journal_restart(handle, credits);
-+ if (error != 0) {
-+ ext4_warning(inode->i_sb,
-+ "couldn't extend journal (err %d)", error);
-+ goto cleanup;
-+ }
-+ }
-+
- ext4_xattr_release_block(handle, inode, bh);
- EXT4_I(inode)->i_file_acl = 0;
-
- cleanup:
- brelse(bh);
-+
-+ return error;
-+}
-+
-+void
-+ext4_xattr_inode_array_free(struct inode *inode,
-+ struct ext4_xattr_ino_array *lea_ino_array)
-+{
-+ struct inode *ea_inode = NULL;
-+ int idx = 0;
-+ int err;
-+
-+ if (lea_ino_array == NULL)
-+ return;
-+
-+ for (; idx < lea_ino_array->xia_count; ++idx) {
-+ ea_inode = ext4_xattr_inode_iget(inode,
-+ lea_ino_array->xia_inodes[idx], &err);
-+ if (err)
-+ continue;
-+ /* for inode's i_count get from ext4_xattr_delete_inode */
-+ if (!list_empty(&EXT4_I(ea_inode)->i_orphan))
-+ iput(ea_inode);
-+ clear_nlink(ea_inode);
-+ iput(ea_inode);
-+ }
-+ kfree(lea_ino_array);
- }
-
- /*
-@@ -1578,10 +2086,9 @@ ext4_xattr_cmp(struct ext4_xattr_header
- entry1->e_name_index != entry2->e_name_index ||
- entry1->e_name_len != entry2->e_name_len ||
- entry1->e_value_size != entry2->e_value_size ||
-+ entry1->e_value_inum != entry2->e_value_inum ||
- memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
- return 1;
-- if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
-- return -EFSCORRUPTED;
- if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
- (char *)header2 + le16_to_cpu(entry2->e_value_offs),
- le32_to_cpu(entry1->e_value_size)))
-@@ -1665,7 +2172,7 @@ static inline void ext4_xattr_hash_entry
- *name++;
- }
-
-- if (entry->e_value_block == 0 && entry->e_value_size != 0) {
-+ if (!entry->e_value_inum && entry->e_value_size) {
- __le32 *value = (__le32 *)((char *)header +
- le16_to_cpu(entry->e_value_offs));
- for (n = (le32_to_cpu(entry->e_value_size) +
-Index: linux-stage/fs/ext4/xattr.h
-===================================================================
---- linux-stage.orig/fs/ext4/xattr.h
-+++ linux-stage/fs/ext4/xattr.h
-@@ -42,7 +42,7 @@ struct ext4_xattr_entry {
- __u8 e_name_len; /* length of name */
- __u8 e_name_index; /* attribute name index */
- __le16 e_value_offs; /* offset in disk block of value */
-- __le32 e_value_block; /* disk block attribute is stored on (n/i) */
-+ __le32 e_value_inum; /* inode in which the value is stored */
- __le32 e_value_size; /* size of attribute value */
- __le32 e_hash; /* hash value of name and value */
- char e_name[0]; /* attribute name */
-@@ -67,6 +67,26 @@ struct ext4_xattr_entry {
- EXT4_I(inode)->i_extra_isize))
- #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
-
-+/*
-+ * Link EA inode back to parent one using i_mtime field.
-+ * Extra integer type conversion added to ignore higher
-+ * bits in i_mtime.tv_sec which might be set by ext4_get()
-+ */
-+#define EXT4_XATTR_INODE_SET_PARENT(inode, inum) \
-+do { \
-+ (inode)->i_mtime.tv_sec = inum; \
-+} while(0)
-+
-+#define EXT4_XATTR_INODE_GET_PARENT(inode) \
-+((__u32)(inode)->i_mtime.tv_sec)
-+
-+/*
-+ * The minimum size of EA value when you start storing it in an external inode
-+ * size of block - size of header - size of 1 entry - 4 null bytes
-+*/
-+#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \
-+ ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
-+
- #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
- #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
- #define BFIRST(bh) ENTRY(BHDR(bh)+1)
-@@ -75,10 +84,11 @@ struct ext4_xattr_entry {
- #define EXT4_ZERO_XATTR_VALUE ((void *)-1)
-
- struct ext4_xattr_info {
-- int name_index;
- const char *name;
- const void *value;
- size_t value_len;
-+ int name_index;
-+ int in_inode;
- };
-
- struct ext4_xattr_search {
-@@ -106,7 +116,13 @@ extern int ext4_xattr_get(struct inode *
- extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
- extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
-
--extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
-+extern struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
-+ int *err);
-+extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
-+extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
-+ struct ext4_xattr_ino_array **array);
-+extern void ext4_xattr_inode_array_free(struct inode *inode,
-+ struct ext4_xattr_ino_array *array);
-
- extern void ext4_xattr_put_super(struct super_block *);
- extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
-Index: linux-stage/fs/ext4/ialloc.c
-===================================================================
---- linux-stage.orig/fs/ext4/ialloc.c
-+++ linux-stage/fs/ext4/ialloc.c
-@@ -269,7 +269,6 @@ void ext4_free_inode(handle_t *handle, s
- * as writing the quota to disk may need the lock as well.
- */
- dquot_initialize(inode);
-- ext4_xattr_delete_inode(handle, inode);
- dquot_free_inode(inode);
- dquot_drop(inode);
-
-Index: linux-stage/fs/ext4/inline.c
-===================================================================
---- linux-stage.orig/fs/ext4/inline.c
-+++ linux-stage/fs/ext4/inline.c
-@@ -59,7 +59,7 @@ static int get_max_inline_xattr_value_si
-
- /* Compute min_offs. */
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
-- if (!entry->e_value_block && entry->e_value_size) {
-+ if (!entry->e_value_inum && entry->e_value_size) {
- size_t offs = le16_to_cpu(entry->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
+++ /dev/null
-Index: linux-3.10.0-123.el7.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-123.el7.x86_64/fs/ext4/ext4.h
-@@ -2391,6 +2391,7 @@ struct ext4_group_info {
- ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
- ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
- struct list_head bb_prealloc_list;
-+ unsigned long bb_prealloc_nr;
- #ifdef DOUBLE_CHECK
- void *bb_bitmap;
- #endif
-Index: linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.c
-===================================================================
---- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/mballoc.c
-+++ linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.c
-@@ -362,7 +362,7 @@ static const char *ext4_groupinfo_slab_n
- "ext4_groupinfo_64k", "ext4_groupinfo_128k"
- };
-
--static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-+static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
- ext4_group_t group);
- static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
-@@ -718,7 +718,7 @@ mb_set_largest_free_order(struct super_b
- }
-
- static noinline_for_stack
--void ext4_mb_generate_buddy(struct super_block *sb,
-+int ext4_mb_generate_buddy(struct super_block *sb,
- void *buddy, void *bitmap, ext4_group_t group)
- {
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-@@ -750,19 +750,13 @@ void ext4_mb_generate_buddy(struct super
- grp->bb_fragments = fragments;
-
- if (free != grp->bb_free) {
-- ext4_grp_locked_error(sb, group, 0, 0,
-- "block bitmap and bg descriptor "
-- "inconsistent: %u vs %u free clusters",
-- free, grp->bb_free);
-- /*
-- * If we intend to continue, we consider group descriptor
-- * corrupt and update bb_free using bitmap value
-- */
-- grp->bb_free = free;
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ struct ext4_group_desc *gdp;
-+ gdp = ext4_get_group_desc(sb, group, NULL);
-+ ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
-+ "%u in gd, %lu pa's\n", (long unsigned int)group,
-+ free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
-+ grp->bb_prealloc_nr);
-+ return -EIO;
- }
- mb_set_largest_free_order(sb, grp);
-
-@@ -768,6 +767,8 @@ void ext4_mb_generate_buddy(struct super
- EXT4_SB(sb)->s_mb_buddies_generated++;
- EXT4_SB(sb)->s_mb_generation_time += period;
- spin_unlock(&EXT4_SB(sb)->s_bal_lock);
-+
-+ return 0;
- }
-
- static void mb_regenerate_buddy(struct ext4_buddy *e4b)
-@@ -883,7 +884,7 @@ static int ext4_mb_init_cache(struct pag
- }
-
- first_block = page->index * blocks_per_page;
-- for (i = 0; i < blocks_per_page; i++) {
-+ for (i = 0; i < blocks_per_page && err == 0; i++) {
- group = (first_block + i) >> 1;
- if (group >= ngroups)
- break;
-@@ -922,7 +923,7 @@ static int ext4_mb_init_cache(struct pag
- ext4_lock_group(sb, group);
- /* init the buddy */
- memset(data, 0xff, blocksize);
-- ext4_mb_generate_buddy(sb, data, incore, group);
-+ err = ext4_mb_generate_buddy(sb, data, incore, group);
- ext4_unlock_group(sb, group);
- incore = NULL;
- } else {
-@@ -937,7 +938,7 @@ static int ext4_mb_init_cache(struct pag
- memcpy(data, bitmap, blocksize);
-
- /* mark all preallocated blks used in in-core bitmap */
-- ext4_mb_generate_from_pa(sb, data, group);
-+ err = ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
- ext4_unlock_group(sb, group);
-
-@@ -947,7 +948,8 @@ static int ext4_mb_init_cache(struct pag
- incore = data;
- }
- }
-- SetPageUptodate(page);
-+ if (likely(err == 0))
-+ SetPageUptodate(page);
-
- out:
- if (bh) {
-@@ -2224,9 +2226,11 @@ static void *ext4_mb_seq_groups_next(str
- static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
- {
- struct super_block *sb = seq->private;
-+ struct ext4_group_desc *gdp;
- ext4_group_t group = (ext4_group_t) ((unsigned long) v);
- int i;
- int err, buddy_loaded = 0;
-+ int free = 0;
- struct ext4_buddy e4b;
- struct ext4_group_info *grinfo;
- struct sg {
-@@ -2236,7 +2240,7 @@ static int ext4_mb_seq_groups_show(struc
-
- group--;
- if (group == 0)
-- seq_puts(seq, "#group: free frags first ["
-+ seq_puts(seq, "#group: bfree gfree frags first pa ["
- " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
- " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]");
-
-@@ -2256,13 +2260,19 @@ static int ext4_mb_seq_groups_show(struc
- buddy_loaded = 1;
- }
-
-+ gdp = ext4_get_group_desc(sb, group, NULL);
-+ if (gdp != NULL)
-+ free = ext4_free_group_clusters(sb, gdp);
-+
- memcpy(&sg, ext4_get_group_info(sb, group), i);
-
- if (buddy_loaded)
- ext4_mb_unload_buddy(&e4b);
-
-- seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
-- sg.info.bb_fragments, sg.info.bb_first_free);
-+ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
-+ (long unsigned int)group, sg.info.bb_free, free,
-+ sg.info.bb_fragments, sg.info.bb_first_free,
-+ sg.info.bb_prealloc_nr);
- for (i = 0; i <= 13; i++)
- seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
- sg.info.bb_counters[i] : 0);
-@@ -3507,22 +3517,71 @@ static void ext4_mb_generate_from_freeli
- }
-
- /*
-+ * check free blocks in bitmap match free block in group descriptor
-+ * do this before taking preallocated blocks into account to be able
-+ * to detect on-disk corruptions. The group lock should be hold by the
-+ * caller.
-+ */
-+int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
-+ struct ext4_group_desc *gdp, int group)
-+{
-+ unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
-+ unsigned short i, first, free = 0;
-+ unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
-+
-+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
-+ return 0;
-+
-+ i = mb_find_next_zero_bit(bitmap, max, 0);
-+
-+ while (i < max) {
-+ first = i;
-+ i = mb_find_next_bit(bitmap, max, i);
-+ if (i > max)
-+ i = max;
-+ free += i - first;
-+ if (i < max)
-+ i = mb_find_next_zero_bit(bitmap, max, i);
-+ }
-+
-+ if (free != free_in_gdp) {
-+ ext4_error(sb, "on-disk bitmap for group %d"
-+ "corrupted: %u blocks free in bitmap, %u - in gd\n",
-+ group, free, free_in_gdp);
-+ return -EIO;
-+ }
-+ return 0;
-+}
-+
-+/*
- * the function goes through all preallocation in this group and marks them
- * used in in-core bitmap. buddy must be generated from this bitmap
- * Need to be called with ext4 group lock held
- */
- static noinline_for_stack
--void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-+int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
- ext4_group_t group)
- {
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- struct ext4_prealloc_space *pa;
-+ struct ext4_group_desc *gdp;
- struct list_head *cur;
- ext4_group_t groupnr;
- ext4_grpblk_t start;
- int preallocated = 0;
-+ int skip = 0, count = 0;
-+ int err;
- int len;
-
-+ gdp = ext4_get_group_desc(sb, group, NULL);
-+ if (gdp == NULL)
-+ return -EIO;
-+
-+ /* before applying preallocations, check bitmap consistency */
-+ err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
-+ if (err)
-+ return err;
-+
- /* all form of preallocation discards first load group,
- * so the only competing code is preallocation use.
- * we don't need any locking here
-@@ -3538,13 +3593,23 @@ void ext4_mb_generate_from_pa(struct sup
- &groupnr, &start);
- len = pa->pa_len;
- spin_unlock(&pa->pa_lock);
-- if (unlikely(len == 0))
-+ if (unlikely(len == 0)) {
-+ skip++;
- continue;
-+ }
- BUG_ON(groupnr != group);
- ext4_set_bits(bitmap, start, len);
- preallocated += len;
-+ count++;
-+ }
-+ if (count + skip != grp->bb_prealloc_nr) {
-+ ext4_error(sb, "lost preallocations: "
-+ "count %d, bb_prealloc_nr %lu, skip %d\n",
-+ count, grp->bb_prealloc_nr, skip);
-+ return -EIO;
- }
- mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
-+ return 0;
- }
-
- static void ext4_mb_pa_callback(struct rcu_head *head)
-@@ -3603,6 +3668,7 @@ static void ext4_mb_put_pa(struct ext4_a
- */
- ext4_lock_group(sb, grp);
- list_del(&pa->pa_group_list);
-+ ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
- ext4_unlock_group(sb, grp);
-
- spin_lock(pa->pa_obj_lock);
-@@ -3697,6 +3763,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
-
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-+ grp->bb_prealloc_nr++;
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-
- spin_lock(pa->pa_obj_lock);
-@@ -3758,6 +3825,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
-
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-+ grp->bb_prealloc_nr++;
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-
- /*
-@@ -3927,6 +3995,8 @@ repeat:
-
- spin_unlock(&pa->pa_lock);
-
-+ BUG_ON(grp->bb_prealloc_nr == 0);
-+ grp->bb_prealloc_nr--;
- list_del(&pa->pa_group_list);
- list_add(&pa->u.pa_tmp_list, &list);
- }
-@@ -4056,7 +4126,7 @@ repeat:
- if (err) {
- ext4_error(sb, "Error loading buddy information for %u",
- group);
-- continue;
-+ return;
- }
-
- bitmap_bh = ext4_read_block_bitmap(sb, group);
-@@ -4068,6 +4138,8 @@ repeat:
- }
-
- ext4_lock_group(sb, group);
-+ BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
-+ e4b.bd_info->bb_prealloc_nr--;
- list_del(&pa->pa_group_list);
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
- ext4_unlock_group(sb, group);
-@@ -4328,6 +4400,7 @@ ext4_mb_discard_lg_preallocations(struct
- }
- ext4_lock_group(sb, group);
- list_del(&pa->pa_group_list);
-+ ext4_get_group_info(sb, group)->bb_prealloc_nr--;
- ext4_mb_release_group_pa(&e4b, pa);
- ext4_unlock_group(sb, group);
-
-Index: linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.h
-===================================================================
---- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/mballoc.h
-+++ linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.h
-@@ -82,7 +82,7 @@ extern ushort ext4_mballoc_debug;
- /*
- * for which requests use 2^N search using buddies
- */
--#define MB_DEFAULT_ORDER2_REQS 2
-+#define MB_DEFAULT_ORDER2_REQS 8
-
- /*
- * default group prealloc size 512 blocks
+++ /dev/null
---- linux-stage.orig/fs/ext4/mmp.c 2015-10-06 08:21:12.013939184 +0530
-+++ linux-stage/fs/ext4/mmp.c 2015-10-06 08:20:35.242939292 +0530
-@@ -99,6 +99,8 @@ static int read_mmp_block(struct super_b
- return 0;
-
- warn_exit:
-+ brelse(*bh);
-+ *bh = NULL;
- ext4_warning(sb, "Error %d while reading MMP block %llu",
- ret, mmp_block);
- return ret;
-@@ -219,6 +221,7 @@ static int kmmpd(void *data)
- "The filesystem seems to have been"
- " multiply mounted.");
- ext4_error(sb, "abort");
-+ put_bh(bh_check);
- goto failed;
- }
- put_bh(bh_check);
+++ /dev/null
-Single directory performance is a critical for HPC workloads. In a
-typical use case an application creates a separate output file for
-each node and task in a job. As nodes and tasks increase, hundreds
-of thousands of files may be created in a single directory within
-a short window of time.
-Today, both filename lookup and file system modifying operations
-(such as create and unlink) are protected with a single lock for
-an entire ldiskfs directory. PDO project will remove this
-bottleneck by introducing a parallel locking mechanism for entire
-ldiskfs directories. This work will enable multiple application
-threads to simultaneously lookup, create and unlink in parallel.
-
-This patch contains:
- - pdirops support for ldiskfs
- - integrate with osd-ldiskfs
-
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/Makefile
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile
-@@ -6,6 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
-
- ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-+ htree_lock.o \
- ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
- mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
- xattr_trusted.o inline.o
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
-@@ -27,6 +27,7 @@
- #include <linux/mutex.h>
- #include <linux/timer.h>
- #include <linux/wait.h>
-+#include <linux/htree_lock.h>
- #include <linux/blockgroup_lock.h>
- #include <linux/percpu_counter.h>
- #include <linux/ratelimit.h>
-@@ -821,6 +822,9 @@ struct ext4_inode_info {
- __u32 i_dtime;
- ext4_fsblk_t i_file_acl;
-
-+ /* following fields for parallel directory operations -bzzz */
-+ struct semaphore i_append_sem;
-+
- /*
- * i_block_group is the number of the block group which contains
- * this file's inode. Constant across the lifetime of the inode,
-@@ -1846,6 +1850,71 @@ struct dx_hash_info
- */
- #define HASH_NB_ALWAYS 1
-
-+/* assume name-hash is protected by upper layer */
-+#define EXT4_HTREE_LOCK_HASH 0
-+
-+enum ext4_pdo_lk_types {
-+#if EXT4_HTREE_LOCK_HASH
-+ EXT4_LK_HASH,
-+#endif
-+ EXT4_LK_DX, /* index block */
-+ EXT4_LK_DE, /* directory entry block */
-+ EXT4_LK_SPIN, /* spinlock */
-+ EXT4_LK_MAX,
-+};
-+
-+/* read-only bit */
-+#define EXT4_LB_RO(b) (1 << (b))
-+/* read + write, high bits for writer */
-+#define EXT4_LB_RW(b) ((1 << (b)) | (1 << (EXT4_LK_MAX + (b))))
-+
-+enum ext4_pdo_lock_bits {
-+ /* DX lock bits */
-+ EXT4_LB_DX_RO = EXT4_LB_RO(EXT4_LK_DX),
-+ EXT4_LB_DX = EXT4_LB_RW(EXT4_LK_DX),
-+ /* DE lock bits */
-+ EXT4_LB_DE_RO = EXT4_LB_RO(EXT4_LK_DE),
-+ EXT4_LB_DE = EXT4_LB_RW(EXT4_LK_DE),
-+ /* DX spinlock bits */
-+ EXT4_LB_SPIN_RO = EXT4_LB_RO(EXT4_LK_SPIN),
-+ EXT4_LB_SPIN = EXT4_LB_RW(EXT4_LK_SPIN),
-+ /* accurate searching */
-+ EXT4_LB_EXACT = EXT4_LB_RO(EXT4_LK_MAX << 1),
-+};
-+
-+enum ext4_pdo_lock_opc {
-+ /* external */
-+ EXT4_HLOCK_READDIR = (EXT4_LB_DE_RO | EXT4_LB_DX_RO),
-+ EXT4_HLOCK_LOOKUP = (EXT4_LB_DE_RO | EXT4_LB_SPIN_RO |
-+ EXT4_LB_EXACT),
-+ EXT4_HLOCK_DEL = (EXT4_LB_DE | EXT4_LB_SPIN_RO |
-+ EXT4_LB_EXACT),
-+ EXT4_HLOCK_ADD = (EXT4_LB_DE | EXT4_LB_SPIN_RO),
-+
-+ /* internal */
-+ EXT4_HLOCK_LOOKUP_SAFE = (EXT4_LB_DE_RO | EXT4_LB_DX_RO |
-+ EXT4_LB_EXACT),
-+ EXT4_HLOCK_DEL_SAFE = (EXT4_LB_DE | EXT4_LB_DX_RO | EXT4_LB_EXACT),
-+ EXT4_HLOCK_SPLIT = (EXT4_LB_DE | EXT4_LB_DX | EXT4_LB_SPIN),
-+};
-+
-+extern struct htree_lock_head *ext4_htree_lock_head_alloc(unsigned hbits);
-+#define ext4_htree_lock_head_free(lhead) htree_lock_head_free(lhead)
-+
-+extern struct htree_lock *ext4_htree_lock_alloc(void);
-+#define ext4_htree_lock_free(lck) htree_lock_free(lck)
-+
-+extern void ext4_htree_lock(struct htree_lock *lck,
-+ struct htree_lock_head *lhead,
-+ struct inode *dir, unsigned flags);
-+#define ext4_htree_unlock(lck) htree_unlock(lck)
-+
-+extern struct buffer_head *__ext4_find_entry(struct inode *dir,
-+ const struct qstr *d_name,
-+ struct ext4_dir_entry_2 **res_dir,
-+ int *inlined, struct htree_lock *lck);
-+extern int __ext4_add_entry(handle_t *handle, struct dentry *dentry,
-+ struct inode *inode, struct htree_lock *lck);
-
- /*
- * Describe an inode's exact location on disk and in memory
-@@ -2088,8 +2157,16 @@ void ext4_insert_dentry(struct inode *in
- const char *name, int namelen, void *data);
- static inline void ext4_update_dx_flag(struct inode *inode)
- {
-+ /* Disable it for ldiskfs, because going from a DX directory to
-+ * a non-DX directory while it is in use will completely break
-+ * the htree-locking.
-+ * If we really want to support this operation in the future,
-+ * we need to exclusively lock the directory at here which will
-+ * increase complexity of code */
-+#if 0
- if (!ext4_has_feature_dir_index(inode->i_sb))
- ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
-+#endif
- }
- static unsigned char ext4_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/namei.c
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
-@@ -53,6 +53,7 @@ struct buffer_head *ext4_append(handle_t
- ext4_lblk_t *block)
- {
- struct buffer_head *bh;
-+ struct ext4_inode_info *ei = EXT4_I(inode);
- int err;
-
- if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
-@@ -60,15 +61,22 @@ struct buffer_head *ext4_append(handle_t
- EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
- return ERR_PTR(-ENOSPC);
-
-+ /* with parallel dir operations all appends
-+ * have to be serialized -bzzz */
-+ down(&ei->i_append_sem);
-+
- *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-
- bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
-- if (IS_ERR(bh))
-+ if (IS_ERR(bh)) {
-+ up(&ei->i_append_sem);
- return bh;
-+ }
- inode->i_size += inode->i_sb->s_blocksize;
- EXT4_I(inode)->i_disksize = inode->i_size;
- BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
-+ up(&ei->i_append_sem);
- if (err) {
- brelse(bh);
- ext4_std_error(inode->i_sb, err);
-@@ -246,7 +254,8 @@ static struct dx_frame *dx_probe(const s
-
- struct inode *dir,
- struct dx_hash_info *hinfo,
-- struct dx_frame *frame);
-+ struct dx_frame *frame,
-+ struct htree_lock *lck);
- static void dx_release(struct dx_frame *frames);
- static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
- struct dx_hash_info *hinfo, struct dx_map_entry map[]);
-@@ -259,12 +267,13 @@ static void dx_insert_block(struct dx_fr
- static int ext4_htree_next_block(struct inode *dir, __u32 hash,
- struct dx_frame *frame,
- struct dx_frame *frames,
-- __u32 *start_hash);
-+ __u32 *start_hash, struct htree_lock *lck);
- static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
- struct ext4_filename *fname,
-- struct ext4_dir_entry_2 **res_dir);
-+ struct ext4_dir_entry_2 **res_dir, struct htree_lock *lck);
- static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-- struct dentry *dentry, struct inode *inode);
-+ struct dentry *dentry, struct inode *inode,
-+ struct htree_lock *lck);
-
- /* checksumming functions */
- void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
-@@ -668,6 +676,227 @@ struct stats dx_show_entries(struct dx_h
- }
- #endif /* DX_DEBUG */
-
-+/* private data for htree_lock */
-+struct ext4_dir_lock_data {
-+ unsigned ld_flags; /* bits-map for lock types */
-+ unsigned ld_count; /* # entries of the last DX block */
-+ struct dx_entry ld_at_entry; /* copy of leaf dx_entry */
-+ struct dx_entry *ld_at; /* position of leaf dx_entry */
-+};
-+
-+#define ext4_htree_lock_data(l) ((struct ext4_dir_lock_data *)(l)->lk_private)
-+#define ext4_find_entry(dir, name, dirent, inline) \
-+ __ext4_find_entry(dir, name, dirent, inline, NULL)
-+#define ext4_add_entry(handle, dentry, inode) \
-+ __ext4_add_entry(handle, dentry, inode, NULL)
-+
-+/* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */
-+#define EXT4_HTREE_NODE_CHANGED (0xcafeULL << 32)
-+
-+static void ext4_htree_event_cb(void *target, void *event)
-+{
-+ u64 *block = (u64 *)target;
-+
-+ if (*block == dx_get_block((struct dx_entry *)event))
-+ *block = EXT4_HTREE_NODE_CHANGED;
-+}
-+
-+struct htree_lock_head *ext4_htree_lock_head_alloc(unsigned hbits)
-+{
-+ struct htree_lock_head *lhead;
-+
-+ lhead = htree_lock_head_alloc(EXT4_LK_MAX, hbits, 0);
-+ if (lhead != NULL) {
-+ htree_lock_event_attach(lhead, EXT4_LK_SPIN, HTREE_EVENT_WR,
-+ ext4_htree_event_cb);
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(ext4_htree_lock_head_alloc);
-+
-+struct htree_lock *ext4_htree_lock_alloc(void)
-+{
-+ return htree_lock_alloc(EXT4_LK_MAX,
-+ sizeof(struct ext4_dir_lock_data));
-+}
-+EXPORT_SYMBOL(ext4_htree_lock_alloc);
-+
-+static htree_lock_mode_t ext4_htree_mode(unsigned flags)
-+{
-+ switch (flags) {
-+ default: /* 0 or unknown flags require EX lock */
-+ return HTREE_LOCK_EX;
-+ case EXT4_HLOCK_READDIR:
-+ return HTREE_LOCK_PR;
-+ case EXT4_HLOCK_LOOKUP:
-+ return HTREE_LOCK_CR;
-+ case EXT4_HLOCK_DEL:
-+ case EXT4_HLOCK_ADD:
-+ return HTREE_LOCK_CW;
-+ }
-+}
-+
-+/* return PR for read-only operations, otherwise return EX */
-+static inline htree_lock_mode_t ext4_htree_safe_mode(unsigned flags)
-+{
-+ int writer = (flags & EXT4_LB_DE) == EXT4_LB_DE;
-+
-+ /* 0 requires EX lock */
-+ return (flags == 0 || writer) ? HTREE_LOCK_EX : HTREE_LOCK_PR;
-+}
-+
-+static int ext4_htree_safe_locked(struct htree_lock *lck)
-+{
-+ int writer;
-+
-+ if (lck == NULL || lck->lk_mode == HTREE_LOCK_EX)
-+ return 1;
-+
-+ writer = (ext4_htree_lock_data(lck)->ld_flags & EXT4_LB_DE) ==
-+ EXT4_LB_DE;
-+ if (writer) /* all readers & writers are excluded? */
-+ return lck->lk_mode == HTREE_LOCK_EX;
-+
-+ /* all writers are excluded? */
-+ return lck->lk_mode == HTREE_LOCK_PR ||
-+ lck->lk_mode == HTREE_LOCK_PW ||
-+ lck->lk_mode == HTREE_LOCK_EX;
-+}
-+
-+/* relock htree_lock with EX mode if it's change operation, otherwise
-+ * relock it with PR mode. It's noop if PDO is disabled. */
-+static void ext4_htree_safe_relock(struct htree_lock *lck)
-+{
-+ if (!ext4_htree_safe_locked(lck)) {
-+ unsigned flags = ext4_htree_lock_data(lck)->ld_flags;
-+
-+ htree_change_lock(lck, ext4_htree_safe_mode(flags));
-+ }
-+}
-+
-+void ext4_htree_lock(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ struct inode *dir, unsigned flags)
-+{
-+ htree_lock_mode_t mode = is_dx(dir) ? ext4_htree_mode(flags) :
-+ ext4_htree_safe_mode(flags);
-+
-+ ext4_htree_lock_data(lck)->ld_flags = flags;
-+ htree_lock(lck, lhead, mode);
-+ if (!is_dx(dir))
-+ ext4_htree_safe_relock(lck); /* make sure it's safe locked */
-+}
-+EXPORT_SYMBOL(ext4_htree_lock);
-+
-+static int ext4_htree_node_lock(struct htree_lock *lck, struct dx_entry *at,
-+ unsigned lmask, int wait, void *ev)
-+{
-+ u32 key = (at == NULL) ? 0 : dx_get_block(at);
-+ u32 mode;
-+
-+ /* NOOP if htree is well protected or caller doesn't require the lock */
-+ if (ext4_htree_safe_locked(lck) ||
-+ !(ext4_htree_lock_data(lck)->ld_flags & lmask))
-+ return 1;
-+
-+ mode = (ext4_htree_lock_data(lck)->ld_flags & lmask) == lmask ?
-+ HTREE_LOCK_PW : HTREE_LOCK_PR;
-+ while (1) {
-+ if (htree_node_lock_try(lck, mode, key, ffz(~lmask), wait, ev))
-+ return 1;
-+ if (!(lmask & EXT4_LB_SPIN)) /* not a spinlock */
-+ return 0;
-+ cpu_relax(); /* spin until granted */
-+ }
-+}
-+
-+static int ext4_htree_node_locked(struct htree_lock *lck, unsigned lmask)
-+{
-+ return ext4_htree_safe_locked(lck) ||
-+ htree_node_is_granted(lck, ffz(~lmask));
-+}
-+
-+static void ext4_htree_node_unlock(struct htree_lock *lck,
-+ unsigned lmask, void *buf)
-+{
-+ /* NB: it's safe to call mutiple times or even it's not locked */
-+ if (!ext4_htree_safe_locked(lck) &&
-+ htree_node_is_granted(lck, ffz(~lmask)))
-+ htree_node_unlock(lck, ffz(~lmask), buf);
-+}
-+
-+#define ext4_htree_dx_lock(lck, key) \
-+ ext4_htree_node_lock(lck, key, EXT4_LB_DX, 1, NULL)
-+#define ext4_htree_dx_lock_try(lck, key) \
-+ ext4_htree_node_lock(lck, key, EXT4_LB_DX, 0, NULL)
-+#define ext4_htree_dx_unlock(lck) \
-+ ext4_htree_node_unlock(lck, EXT4_LB_DX, NULL)
-+#define ext4_htree_dx_locked(lck) \
-+ ext4_htree_node_locked(lck, EXT4_LB_DX)
-+
-+static void ext4_htree_dx_need_lock(struct htree_lock *lck)
-+{
-+ struct ext4_dir_lock_data *ld;
-+
-+ if (ext4_htree_safe_locked(lck))
-+ return;
-+
-+ ld = ext4_htree_lock_data(lck);
-+ switch (ld->ld_flags) {
-+ default:
-+ return;
-+ case EXT4_HLOCK_LOOKUP:
-+ ld->ld_flags = EXT4_HLOCK_LOOKUP_SAFE;
-+ return;
-+ case EXT4_HLOCK_DEL:
-+ ld->ld_flags = EXT4_HLOCK_DEL_SAFE;
-+ return;
-+ case EXT4_HLOCK_ADD:
-+ ld->ld_flags = EXT4_HLOCK_SPLIT;
-+ return;
-+ }
-+}
-+
-+#define ext4_htree_de_lock(lck, key) \
-+ ext4_htree_node_lock(lck, key, EXT4_LB_DE, 1, NULL)
-+#define ext4_htree_de_unlock(lck) \
-+ ext4_htree_node_unlock(lck, EXT4_LB_DE, NULL)
-+
-+#define ext4_htree_spin_lock(lck, key, event) \
-+ ext4_htree_node_lock(lck, key, EXT4_LB_SPIN, 0, event)
-+#define ext4_htree_spin_unlock(lck) \
-+ ext4_htree_node_unlock(lck, EXT4_LB_SPIN, NULL)
-+#define ext4_htree_spin_unlock_listen(lck, p) \
-+ ext4_htree_node_unlock(lck, EXT4_LB_SPIN, p)
-+
-+static void ext4_htree_spin_stop_listen(struct htree_lock *lck)
-+{
-+ if (!ext4_htree_safe_locked(lck) &&
-+ htree_node_is_listening(lck, ffz(~EXT4_LB_SPIN)))
-+ htree_node_stop_listen(lck, ffz(~EXT4_LB_SPIN));
-+}
-+
-+enum {
-+ DX_HASH_COL_IGNORE, /* ignore collision while probing frames */
-+ DX_HASH_COL_YES, /* there is collision and it does matter */
-+ DX_HASH_COL_NO, /* there is no collision */
-+};
-+
-+static int dx_probe_hash_collision(struct htree_lock *lck,
-+ struct dx_entry *entries,
-+ struct dx_entry *at, u32 hash)
-+{
-+ if (!(lck && ext4_htree_lock_data(lck)->ld_flags & EXT4_LB_EXACT)) {
-+ return DX_HASH_COL_IGNORE; /* don't care about collision */
-+
-+ } else if (at == entries + dx_get_count(entries) - 1) {
-+ return DX_HASH_COL_IGNORE; /* not in any leaf of this DX */
-+
-+ } else { /* hash collision? */
-+ return ((dx_get_hash(at + 1) & ~1) == hash) ?
-+ DX_HASH_COL_YES : DX_HASH_COL_NO;
-+ }
-+}
-+
- /*
- * Probe for a directory leaf block to search.
- *
-@@ -679,10 +908,11 @@ struct stats dx_show_entries(struct dx_h
- */
- static struct dx_frame *
- dx_probe(struct ext4_filename *fname, struct inode *dir,
-- struct dx_hash_info *hinfo, struct dx_frame *frame_in)
-+ struct dx_hash_info *hinfo, struct dx_frame *frame_in,
-+ struct htree_lock *lck)
- {
- unsigned count, indirect;
-- struct dx_entry *at, *entries, *p, *q, *m;
-+ struct dx_entry *at, *entries, *p, *q, *m, *dx = NULL;
- struct dx_root_info *info;
- struct dx_frame *frame = frame_in;
- struct buffer_head *bh;
-@@ -750,8 +980,15 @@ dx_probe(const struct qstr *d_name, stru
-
- dxtrace(printk("Look up %x", hash));
- while (1) {
-+ if (indirect == 0) { /* the last index level */
-+ /* NB: ext4_htree_dx_lock() could be noop if
-+ * DX-lock flag is not set for current operation */
-+ ext4_htree_dx_lock(lck, dx);
-+ ext4_htree_spin_lock(lck, dx, NULL);
-+ }
- count = dx_get_count(entries);
-- if (!count || count > dx_get_limit(entries)) {
-+ if (count == 0 || count > dx_get_limit(entries)) {
-+ ext4_htree_spin_unlock(lck); /* release spin */
- ext4_warning_inode(dir,
- "dx entry: no count or count > limit");
- brelse(bh);
-@@ -792,8 +1029,70 @@ dx_probe(const struct qstr *d_name, stru
- frame->bh = bh;
- frame->entries = entries;
- frame->at = at;
-- if (!indirect--)
-- return frame;
-+
-+ if (indirect == 0) { /* the last index level */
-+ struct ext4_dir_lock_data *ld;
-+ u64 myblock;
-+
-+ /* By default we only lock DE-block, however, we will
-+ * also lock the last level DX-block if:
-+ * a) there is hash collision
-+ * we will set DX-lock flag (a few lines below)
-+ * and redo to lock DX-block
-+ * see detail in dx_probe_hash_collision()
-+ * b) it's a retry from splitting
-+ * we need to lock the last level DX-block so nobody
-+ * else can split any leaf blocks under the same
-+ * DX-block, see detail in ext4_dx_add_entry()
-+ */
-+ if (ext4_htree_dx_locked(lck)) {
-+ /* DX-block is locked, just lock DE-block
-+ * and return */
-+ ext4_htree_spin_unlock(lck);
-+ if (!ext4_htree_safe_locked(lck))
-+ ext4_htree_de_lock(lck, frame->at);
-+ return frame;
-+ }
-+ /* it's pdirop and no DX lock */
-+ if (dx_probe_hash_collision(lck, entries, at, hash) ==
-+ DX_HASH_COL_YES) {
-+ /* found hash collision, set DX-lock flag
-+ * and retry to abtain DX-lock */
-+ ext4_htree_spin_unlock(lck);
-+ ext4_htree_dx_need_lock(lck);
-+ continue;
-+ }
-+ ld = ext4_htree_lock_data(lck);
-+ /* because I don't lock DX, so @at can't be trusted
-+ * after I release spinlock so I have to save it */
-+ ld->ld_at = at;
-+ ld->ld_at_entry = *at;
-+ ld->ld_count = dx_get_count(entries);
-+
-+ frame->at = &ld->ld_at_entry;
-+ myblock = dx_get_block(at);
-+
-+ /* NB: ordering locking */
-+ ext4_htree_spin_unlock_listen(lck, &myblock);
-+ /* other thread can split this DE-block because:
-+ * a) I don't have lock for the DE-block yet
-+ * b) I released spinlock on DX-block
-+ * if it happened I can detect it by listening
-+ * splitting event on this DE-block */
-+ ext4_htree_de_lock(lck, frame->at);
-+ ext4_htree_spin_stop_listen(lck);
-+
-+ if (myblock == EXT4_HTREE_NODE_CHANGED) {
-+ /* someone split this DE-block before
-+ * I locked it, I need to retry and lock
-+ * valid DE-block */
-+ ext4_htree_de_unlock(lck);
-+ continue;
-+ }
-+ return frame;
-+ }
-+ dx = at;
-+ indirect--;
- frame++;
- frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
- *err = PTR_ERR(bh);
-@@ -860,7 +1160,7 @@ static void dx_release (struct dx_frame
- static int ext4_htree_next_block(struct inode *dir, __u32 hash,
- struct dx_frame *frame,
- struct dx_frame *frames,
-- __u32 *start_hash)
-+ __u32 *start_hash, struct htree_lock *lck)
- {
- struct dx_frame *p;
- struct buffer_head *bh;
-@@ -875,12 +1175,22 @@ static int ext4_htree_next_block(struct
- * this loop, num_frames indicates the number of interior
- * nodes need to be read.
- */
-+ ext4_htree_de_unlock(lck);
- while (1) {
-- if (++(p->at) < p->entries + dx_get_count(p->entries))
-- break;
-+ if (num_frames > 0 || ext4_htree_dx_locked(lck)) {
-+ /* num_frames > 0 :
-+ * DX block
-+ * ext4_htree_dx_locked:
-+ * frame->at is reliable pointer returned by dx_probe,
-+ * otherwise dx_probe already knew no collision */
-+ if (++(p->at) < p->entries + dx_get_count(p->entries))
-+ break;
-+ }
- if (p == frames)
- return 0;
- num_frames++;
-+ if (num_frames == 1)
-+ ext4_htree_dx_unlock(lck);
- p--;
- }
-
-@@ -903,6 +1213,13 @@ static int ext4_htree_next_block(struct
- * block so no check is necessary
- */
- while (num_frames--) {
-+ if (num_frames == 0) {
-+ /* it's not always necessary, we just don't want to
-+ * detect hash collision again */
-+ ext4_htree_dx_need_lock(lck);
-+ ext4_htree_dx_lock(lck, p->at);
-+ }
-+
- bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
-@@ -911,6 +1228,7 @@ static int ext4_htree_next_block(struct
- p->bh = bh;
- p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
- }
-+ ext4_htree_de_lock(lck, p->at);
- return 1;
- }
-
-@@ -1013,10 +1331,10 @@ int ext4_htree_fill_tree(struct file *di
- }
- hinfo.hash = start_hash;
- hinfo.minor_hash = 0;
-- frame = dx_probe(NULL, dir, &hinfo, frames);
-+ /* assume it's PR locked */
-+ frame = dx_probe(NULL, dir, &hinfo, frames, NULL);
- if (IS_ERR(frame))
- return PTR_ERR(frame);
--
- /* Add '.' and '..' from the htree header */
- if (!start_hash && !start_minor_hash) {
- de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
-@@ -1043,7 +1361,7 @@ int ext4_htree_fill_tree(struct file *di
- count += ret;
- hashval = ~0;
- ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
-- frame, frames, &hashval);
-+ frame, frames, &hashval, NULL);
- *next_hash = hashval;
- if (ret < 0) {
- err = ret;
-@@ -1236,10 +1554,10 @@ static int is_dx_internal_node(struct in
- * The returned buffer_head has ->b_count elevated. The caller is expected
- * to brelse() it when appropriate.
- */
--static struct buffer_head * ext4_find_entry (struct inode *dir,
-+struct buffer_head *__ext4_find_entry(struct inode *dir,
- const struct qstr *d_name,
- struct ext4_dir_entry_2 **res_dir,
-- int *inlined)
-+ int *inlined, struct htree_lock *lck)
- {
- struct super_block *sb;
- struct buffer_head *bh_use[NAMEI_RA_SIZE];
-@@ -1283,7 +1601,7 @@ static struct buffer_head * ext4_find_en
- goto restart;
- }
- if (is_dx(dir)) {
-- ret = ext4_dx_find_entry(dir, &fname, res_dir);
-+ ret = ext4_dx_find_entry(dir, &fname, res_dir, lck);
- /*
- * On success, or if the error was file not found,
- * return. Otherwise, fall back to doing a search the
-@@ -1297,6 +1615,7 @@ static struct buffer_head * ext4_find_en
- return bh;
- dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
- "falling back\n"));
-+ ext4_htree_safe_relock(lck);
- }
- nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
- start = EXT4_I(dir)->i_dir_start_lookup;
-@@ -1389,10 +1708,12 @@ cleanup_and_exit:
- brelse(bh_use[ra_ptr]);
- return ret;
- }
-+EXPORT_SYMBOL(__ext4_find_entry);
-
- static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
- struct ext4_filename *fname,
-- struct ext4_dir_entry_2 **res_dir)
-+ struct ext4_dir_entry_2 **res_dir,
-+ struct htree_lock *lck)
- {
- struct super_block * sb = dir->i_sb;
- struct dx_hash_info hinfo;
-@@ -1400,7 +1722,7 @@ static struct buffer_head * ext4_dx_find
-
-
- #endif
-- frame = dx_probe(fname, dir, NULL, frames);
-+ frame = dx_probe(fname, dir, NULL, frames, lck);
- if (IS_ERR(frame))
- return NULL;
- do {
-@@ -1424,7 +1746,7 @@ static struct buffer_head * ext4_dx_find
-
- /* Check to see if we should continue to search */
- retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
-- frames, NULL);
-+ frames, NULL, lck);
- if (retval < 0) {
- ext4_warning(sb,
- "error reading index page in directory #%lu",
-@@ -1583,8 +1905,9 @@ static struct ext4_dir_entry_2* dx_pack_
- * Returns pointer to de in block into which the new entry will be inserted.
- */
- static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
-- struct buffer_head **bh,struct dx_frame *frame,
-- struct dx_hash_info *hinfo)
-+ struct buffer_head **bh, struct dx_frame *frames,
-+ struct dx_frame *frame, struct dx_hash_info *hinfo,
-+ struct htree_lock *lck)
- {
- unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
-@@ -1647,8 +1970,14 @@ static struct ext4_dir_entry_2 *do_split
- hash2, split, count-split));
-
- /* Fancy dance to stay within two buffers */
-- de2 = dx_move_dirents(data1, data2, map + split, count - split,
-- blocksize);
-+ if (hinfo->hash < hash2) {
-+ de2 = dx_move_dirents(data1, data2, map + split,
-+ count - split, blocksize);
-+ } else {
-+ /* make sure we will add entry to the same block which
-+ * we have already locked */
-+ de2 = dx_move_dirents(data1, data2, map, split, blocksize);
-+ }
- de = dx_pack_dirents(data1, blocksize);
- de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
- (char *) de,
-@@ -1666,12 +1996,21 @@ static struct ext4_dir_entry_2 *do_split
- dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
- dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
-
-- /* Which block gets the new entry? */
-- if (hinfo->hash >= hash2) {
-- swap(*bh, bh2);
-- de = de2;
-+ ext4_htree_spin_lock(lck, frame > frames ? (frame - 1)->at : NULL,
-+ frame->at); /* notify block is being split */
-+ if (hinfo->hash < hash2) {
-+ dx_insert_block(frame, hash2 + continued, newblock);
-+
-+ } else {
-+ /* switch block number */
-+ dx_insert_block(frame, hash2 + continued,
-+ dx_get_block(frame->at));
-+ dx_set_block(frame->at, newblock);
-+ (frame->at)++;
- }
-- dx_insert_block(frame, hash2 + continued, newblock);
-+ ext4_htree_spin_unlock(lck);
-+ ext4_htree_dx_unlock(lck);
-+
- err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
- if (err)
- goto journal_error;
-@@ -1945,9 +2283,9 @@ static int make_indexed_dir(handle_t *ha
- retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
- if (retval)
- goto out_frames;
-
-- de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
-+ de = do_split(handle, dir, &bh2, frames, frame, &fname->hinfo, NULL);
- if (IS_ERR(de)) {
- retval = PTR_ERR(de);
- goto out_frames;
- }
-@@ -2051,8 +2389,8 @@ out:
- * may not sleep between calling this and putting something into
- * the entry, as someone else might have used it while you slept.
- */
--static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
-- struct inode *inode)
-+int __ext4_add_entry(handle_t *handle, struct dentry *dentry,
-+ struct inode *inode, struct htree_lock *lck)
- {
- struct inode *dir = dentry->d_parent->d_inode;
- struct buffer_head *bh;
-@@ -2087,9 +2425,10 @@ static int ext4_add_entry(handle_t *hand
- if (dentry->d_name.len == 2 &&
- memcmp(dentry->d_name.name, "..", 2) == 0)
- return ext4_update_dotdot(handle, dentry, inode);
-- retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
-+ retval = ext4_dx_add_entry(handle, &fname, dentry, inode, lck);
- if (!retval || (retval != ERR_BAD_DX_DIR))
- goto out;
-+ ext4_htree_safe_relock(lck);
- ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
- dx_fallback++;
- ext4_mark_inode_dirty(handle, dir);
-@@ -2129,12 +2468,14 @@ static int ext4_add_entry(handle_t *hand
- ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
- return retval;
- }
-+EXPORT_SYMBOL(__ext4_add_entry);
-
- /*
- * Returns 0 for success, or a negative error value
- */
- static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-- struct dentry *dentry, struct inode *inode)
-+ struct dentry *dentry, struct inode *inode,
-+ struct htree_lock *lck)
- {
- struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- struct dx_entry *entries, *at;
-@@ -2148,7 +2488,7 @@ static int ext4_dx_add_entry(handle_t *h
-
- again:
- restart = 0;
-- frame = dx_probe(fname, dir, NULL, frames);
-+ frame = dx_probe(fname, dir, NULL, frames, lck);
- if (IS_ERR(frame))
- return PTR_ERR(frame);
- entries = frame->entries;
-@@ -2178,6 +2518,11 @@ again:
- struct dx_node *node2;
- struct buffer_head *bh2;
-
-+ if (!ext4_htree_safe_locked(lck)) { /* retry with EX lock */
-+ ext4_htree_safe_relock(lck);
-+ restart = 1;
-+ goto cleanup;
-+ }
- while (frame > frames) {
- if (dx_get_count((frame - 1)->entries) <
- dx_get_limit((frame - 1)->entries)) {
-@@ -2277,8 +2622,32 @@ again:
- restart = 1;
- goto journal_error;
- }
-+ } else if (!ext4_htree_dx_locked(lck)) {
-+ struct ext4_dir_lock_data *ld = ext4_htree_lock_data(lck);
-+
-+ /* not well protected, require DX lock */
-+ ext4_htree_dx_need_lock(lck);
-+ at = frame > frames ? (frame - 1)->at : NULL;
-+
-+ /* NB: no risk of deadlock because it's just a try.
-+ *
-+ * NB: we check ld_count for twice, the first time before
-+ * having DX lock, the second time after holding DX lock.
-+ *
-+ * NB: We never free blocks for directory so far, which
-+ * means value returned by dx_get_count() should equal to
-+ * ld->ld_count if nobody split any DE-block under @at,
-+ * and ld->ld_at still points to valid dx_entry. */
-+ if ((ld->ld_count != dx_get_count(entries)) ||
-+ !ext4_htree_dx_lock_try(lck, at) ||
-+ (ld->ld_count != dx_get_count(entries))) {
-+ restart = 1;
-+ goto cleanup;
-+ }
-+ /* OK, I've got DX lock and nothing changed */
-+ frame->at = ld->ld_at;
- }
-- de = do_split(handle, dir, &bh, frame, &fname->hinfo);
-+ de = do_split(handle, dir, &bh, frames, frame, &fname->hinfo, lck);
- if (IS_ERR(de)) {
- err = PTR_ERR(de);
- goto cleanup;
-@@ -2277,6 +2622,8 @@ again:
- journal_error:
- ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
- cleanup:
-+ ext4_htree_dx_unlock(lck);
-+ ext4_htree_de_unlock(lck);
- brelse(bh);
- dx_release(frames);
- /* @restart is true means htree-path has been changed, we need to
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/super.c
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/super.c
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/super.c
-@@ -875,6 +875,7 @@ static struct inode *ext4_alloc_inode(st
-
- ei->vfs_inode.i_version = 1;
- spin_lock_init(&ei->i_raw_lock);
-+ sema_init(&ei->i_append_sem, 1);
- INIT_LIST_HEAD(&ei->i_prealloc_list);
- spin_lock_init(&ei->i_prealloc_lock);
- ext4_es_init_tree(&ei->i_es_tree);
+++ /dev/null
-Fix ext4_ext_find_extent() to already pre-allocate ext4_ext_path[]
-array of the max depth instead of current depth.
-This will avoid racy cases of concurrent ext_depth() growth in
-current and unsafe implementation with ext4_ext_path[] array
-re-[sizing,allocation], even with more recent and related patches
-that will be integrated in more recent Kernels.
-
-Index: linux-2.6.32-504.el6.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-2.6.32-504.el6.x86_64.orig/fs/ext4/ext4.h
-+++ linux-2.6.32-504.el6.x86_64/fs/ext4/ext4.h
-@@ -1147,6 +1147,9 @@
- unsigned long s_ext_extents;
- #endif
-
-+ /* maximum possible extents tree depth, to be computed at mount time */
-+ unsigned int s_max_ext_tree_depth;
-+
- /* for buddy allocator */
- struct ext4_group_info ***s_group_info;
- struct inode *s_buddy_cache;
-Index: linux-2.6.32-504.el6.x86_64/fs/ext4/super.c
-===================================================================
---- linux-2.6.32-504.el6.x86_64.orig/fs/ext4/super.c
-+++ linux-2.6.32-504.el6.x86_64/fs/ext4/super.c
-@@ -4038,6 +4038,8 @@
- if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
- goto failed_mount3;
-
-+ ext4_ext_init(sb); /* needed before using extent-mapped journal */
-+
- /*
- * The first inode we look at is the journal inode. Don't try
- * root first: it may be modified in the journal!
-@@ -4200,7 +4202,6 @@
- goto failed_mount4a;
- }
-
-- ext4_ext_init(sb);
- err = ext4_mb_init(sb);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
-Index: linux-2.6.32-504.el6.x86_64/fs/ext4/extents.c
-===================================================================
---- linux-2.6.32-504.el6.x86_64.orig/fs/ext4/extents.c
-+++ linux-2.6.32-504.el6.x86_64/fs/ext4/extents.c
-@@ -699,8 +699,9 @@
-
- if (!path) {
- /* account possible depth increase */
-- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
-- GFP_NOFS);
-+ path = kzalloc(sizeof(struct ext4_ext_path) *
-+ (EXT4_SB(inode->i_sb)->s_max_ext_tree_depth + 1),
-+ GFP_NOFS);
- if (unlikely(!path))
- return ERR_PTR(-ENOMEM);
- alloc = 1;
-@@ -2664,8 +2662,9 @@
- path[k].p_block =
- le16_to_cpu(path[k].p_hdr->eh_entries)+1;
- } else {
-- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
-- GFP_NOFS);
-+ path = kzalloc(sizeof(struct ext4_ext_path) *
-+ (EXT4_SB(inode->i_sb)->s_max_ext_tree_depth + 1),
-+ GFP_NOFS);
- if (path == NULL) {
- ext4_journal_stop(handle);
- return -ENOMEM;
-@@ -3048,13 +3034,14 @@
- */
- void ext4_ext_init(struct super_block *sb)
- {
-+ ext4_fsblk_t maxblocks;
-+
- /*
- * possible initialization would be here
- */
-
- if (ext4_has_feature_extents(sb)) {
--#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
-- printk(KERN_INFO "EXT4-fs: file extents enabled"
-+ printk(KERN_INFO "EXT4-fs (%s): file extents enabled"
- #ifdef AGGRESSIVE_TEST
- ", aggressive tests"
- #endif
-@@ -3064,8 +3051,31 @@
- #ifdef EXTENTS_STATS
- ", stats"
- #endif
-- "\n");
--#endif
-+ , sb->s_id);
-+ EXT4_SB(sb)->s_max_ext_tree_depth = 1;
-+
-+ maxblocks = sb->s_maxbytes / sb->s_blocksize;
-+
-+ /* 1st/root level/node of extents tree stands in i_data and
-+ * entries stored in tree nodes can be of type ext4_extent
-+ * (leaf node) or ext4_extent_idx (internal node) */
-+ maxblocks /= (sizeof(((struct ext4_inode_info *)0x0)->i_data) -
-+ sizeof(struct ext4_extent_header)) /
-+ max(sizeof(struct ext4_extent),
-+ sizeof(struct ext4_extent_idx));
-+
-+ /* compute maximum extents tree depth for a fully populated
-+ * file of max size made of only minimal/1-block extents */
-+ while (maxblocks > 0) {
-+ maxblocks /= (sb->s_blocksize -
-+ sizeof(struct ext4_extent_header)) /
-+ max(sizeof(struct ext4_extent),
-+ sizeof(struct ext4_extent_idx));
-+ EXT4_SB(sb)->s_max_ext_tree_depth++;
-+ }
-+
-+ printk(", maximum tree depth=%u\n",
-+ EXT4_SB(sb)->s_max_ext_tree_depth);
- #ifdef EXTENTS_STATS
- spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
- EXT4_SB(sb)->s_ext_min = 1 << 30;
+++ /dev/null
-Since we could skip corrupt block groups, this patch
-use ext4_warning() intead of ext4_error() to make FS not
-emount RO in default, also fix a leftover from upstream
-commit 163a203ddb36c36d4a1c942
----
-diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
-index e069155..692b5e4 100644
---- a/fs/ext4/balloc.c
-+++ b/fs/ext4/balloc.c
-@@ -183,25 +183,17 @@ static int ext4_init_block_bitmap(struct
- unsigned int bit, bit_max;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_fsblk_t start, tmp;
-- struct ext4_group_info *grp;
-
- J_ASSERT_BH(bh, buffer_locked(bh));
-
- /* If checksum is bad mark all blocks used to prevent allocation
- * essentially implementing a per-group read-only flag. */
- if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-- grp = ext4_get_group_info(sb, block_group);
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-- if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-- int count;
-- count = ext4_free_inodes_count(sb, gdp);
-- percpu_counter_sub(&sbi->s_freeinodes_counter,
-- count);
-- }
-- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT |
-+ EXT4_GROUP_INFO_IBITMAP_CORRUPT,
-+ "Checksum bad for group %u",
-+ block_group);
- return -EFSBADCRC;
- }
- memset(bh->b_data, 0, sb->s_blocksize);
-@@ -370,7 +362,6 @@ static int ext4_validate_block_bitmap(st
- {
- ext4_fsblk_t blk;
- struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
-- struct ext4_sb_info *sbi = EXT4_SB(sb);
-
- if (buffer_verified(bh))
- return 0;
-@@ -381,22 +372,19 @@ static int ext4_validate_block_bitmap(st
- if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
- desc, bh))) {
- ext4_unlock_group(sb, block_group);
-- ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT,
-+ "bg %u: bad block bitmap checksum",
-+ block_group);
- return -EFSBADCRC;
- }
- blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
- if (unlikely(blk != 0)) {
- ext4_unlock_group(sb, block_group);
-- ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
-- block_group, blk);
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT,
-+ "bg %u: block %llu: invalid block bitmap",
-+ block_group, blk);
- return -EFSCORRUPTED;
- }
- set_buffer_verified(bh);
-@@ -467,8 +455,6 @@ ext4_read_block_bitmap_nowait(struct sup
- ext4_unlock_group(sb, block_group);
- unlock_buffer(bh);
- if (err) {
-- ext4_error(sb, "Failed to init block bitmap for group "
-- "%u: %d", block_group, err);
- goto out;
- }
- goto verify;
-diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
-index 3c41773..63a63b6 100644
---- a/fs/ext4/ext4.h
-+++ b/fs/ext4/ext4.h
-@@ -91,6 +91,17 @@ typedef __u32 ext4_lblk_t;
- /* data type for block group number */
- typedef unsigned int ext4_group_t;
-
-+void __ext4_corrupted_block_group(struct super_block *sb,
-+ ext4_group_t group, unsigned int flags,
-+ const char *function, unsigned int line);
-+
-+#define ext4_corrupted_block_group(sb, group, flags, fmt, ...) \
-+ do { \
-+ __ext4_warning(sb, __func__, __LINE__, fmt, \
-+ ##__VA_ARGS__); \
-+ __ext4_corrupted_block_group(sb, group, flags, \
-+ __func__, __LINE__); \
-+ } while (0)
- /*
- * Flags used in mballoc's allocation_context flags field.
- *
-@@ -2673,7 +2684,11 @@ struct ext4_group_info {
- #define EXT4_GROUP_INFO_NEED_INIT_BIT 0
- #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
- #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2
-+#define EXT4_GROUP_INFO_BBITMAP_CORRUPT \
-+ (1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
- #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3
-+#define EXT4_GROUP_INFO_IBITMAP_CORRUPT \
-+ (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
-
- #define EXT4_MB_GRP_NEED_INIT(grp) \
- (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
-index fc65310..92bcc8d 100644
---- a/fs/ext4/ialloc.c
-+++ b/fs/ext4/ialloc.c
-@@ -337,14 +318,9 @@ out:
- if (!fatal)
- fatal = err;
- } else {
-- ext4_error(sb, "bit already cleared for inode %lu", ino);
-- if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-- int count;
-- count = ext4_free_inodes_count(sb, gdp);
-- percpu_counter_sub(&sbi->s_freeinodes_counter,
-- count);
-- }
-- set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ ext4_corrupted_block_group(sb, block_group,
-+ EXT4_GROUP_INFO_IBITMAP_CORRUPT,
-+ "bit already cleared for inode %lu", ino);
- }
-
- error_return:
-diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
-index 7282d07..e6805e6 100644
---- a/fs/ext4/mballoc.c
-+++ b/fs/ext4/mballoc.c
-@@ -752,10 +752,18 @@ int ext4_mb_generate_buddy(struct super_block *sb,
- if (free != grp->bb_free) {
- struct ext4_group_desc *gdp;
- gdp = ext4_get_group_desc(sb, group, NULL);
-- ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
-- "%u in gd, %lu pa's\n", (long unsigned int)group,
-- free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
-- grp->bb_prealloc_nr);
-+
-+ ext4_corrupted_block_group(sb, group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT,
-+ "group %lu: %u blocks in bitmap, %u in bb, %u in gd, %lu pa's block bitmap corrupt",
-+ (unsigned long int)group, free, grp->bb_free,
-+ ext4_free_group_clusters(sb, gdp),
-+ grp->bb_prealloc_nr);
-+ /*
-+ * If we intend to continue, we consider group descriptor
-+ * corrupt and update bb_free using bitmap value
-+ */
-+ grp->bb_free = free;
- return -EIO;
- }
- mb_set_largest_free_order(sb, grp);
-@@ -1101,7 +1109,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- int block;
- int pnum;
- int poff;
-- struct page *page;
-+ struct page *page = NULL;
- int ret;
- struct ext4_group_info *grp;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
-@@ -1127,7 +1135,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- */
- ret = ext4_mb_init_group(sb, group);
- if (ret)
-- return ret;
-+ goto err;
- }
-
- /*
-@@ -1227,6 +1235,7 @@ err:
- page_cache_release(e4b->bd_buddy_page);
- e4b->bd_buddy = NULL;
- e4b->bd_bitmap = NULL;
-+ ext4_warning(sb, "Error loading buddy information for %u", group);
- return ret;
- }
-
-@@ -3599,9 +3608,11 @@ int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
- }
-
- if (free != free_in_gdp) {
-- ext4_error(sb, "on-disk bitmap for group %d"
-- "corrupted: %u blocks free in bitmap, %u - in gd\n",
-- group, free, free_in_gdp);
-+ ext4_corrupted_block_group(sb, group,
-+ EXT4_GROUP_INFO_BBITMAP_CORRUPT,
-+ "on-disk bitmap for group %d corrupted: %u blocks free in bitmap, %u - in gd\n",
-+ group, free,
-+ free_in_gdp);
- return -EIO;
- }
- return 0;
-@@ -3962,16 +3973,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
- /* "free < pa->pa_free" means we maybe double alloc the same blocks,
- * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
- if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
-- ext4_error(sb, "pa free mismatch: [pa %p] "
-- "[phy %lu] [logic %lu] [len %u] [free %u] "
-- "[error %u] [inode %lu] [freed %u]", pa,
-- (unsigned long)pa->pa_pstart,
-- (unsigned long)pa->pa_lstart,
-- (unsigned)pa->pa_len, (unsigned)pa->pa_free,
-- (unsigned)pa->pa_error, pa->pa_inode->i_ino,
-- free);
- ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
-- free, pa->pa_free);
-+ free, pa->pa_free);
- /*
- * pa is already deleted so we use the value obtained
- * from the bitmap and continue.
-@@ -4031,15 +4034,11 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
-- ext4_error(sb, "Error %d reading block bitmap for %u",
-- err, group);
- return 0;
- }
-
- err = ext4_mb_load_buddy(sb, group, &e4b);
- if (err) {
-- ext4_warning(sb, "Error %d loading buddy information for %u",
-- err, group);
- put_bh(bitmap_bh);
- return 0;
- }
-@@ -4198,17 +4198,12 @@ repeat:
-
- err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
- GFP_NOFS|__GFP_NOFAIL);
-- if (err) {
-- ext4_error(sb, "Error %d loading buddy information for %u",
-- err, group);
-+ if (err)
- return;
-- }
-
- bitmap_bh = ext4_read_block_bitmap(sb, group);
- if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
-- ext4_error(sb, "Error %d reading block bitmap for %u",
-- err, group);
- ext4_mb_unload_buddy(&e4b);
- continue;
- }
-@@ -4467,11 +4462,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
-
- err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
- GFP_NOFS|__GFP_NOFAIL);
-- if (err) {
-- ext4_error(sb, "Error %d loading buddy information for %u",
-- err, group);
-+ if (err)
- continue;
-- }
- ext4_lock_group(sb, group);
- list_del(&pa->pa_group_list);
- ext4_get_group_info(sb, group)->bb_prealloc_nr--;
-@@ -4742,17 +4734,18 @@ errout:
- * been updated or not when fail case. So can
- * not revert pa_free back, just mark pa_error*/
- pa->pa_error++;
-- ext4_error(sb,
-- "Updating bitmap error: [err %d] "
-- "[pa %p] [phy %lu] [logic %lu] "
-- "[len %u] [free %u] [error %u] "
-- "[inode %lu]", *errp, pa,
-- (unsigned long)pa->pa_pstart,
-- (unsigned long)pa->pa_lstart,
-- (unsigned)pa->pa_len,
-- (unsigned)pa->pa_free,
-- (unsigned)pa->pa_error,
-- pa->pa_inode ? pa->pa_inode->i_ino : 0);
-+ ext4_corrupted_block_group(sb, 0, 0,
-+ "Updating bitmap error: [err %d] "
-+ "[pa %p] [phy %lu] [logic %lu] "
-+ "[len %u] [free %u] [error %u] "
-+ "[inode %lu]", *errp, pa,
-+ (unsigned long)pa->pa_pstart,
-+ (unsigned long)pa->pa_lstart,
-+ (unsigned)pa->pa_len,
-+ (unsigned)pa->pa_free,
-+ (unsigned)pa->pa_error,
-+ pa->pa_inode ?
-+ pa->pa_inode->i_ino : 0);
- }
- }
- ext4_mb_release_context(ac);
-@@ -5037,7 +5030,7 @@ do_more:
-
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
- if (err)
-- goto error_return;
-+ goto error_brelse;
-
- if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
- struct ext4_free_data *new_entry;
-@@ -5119,8 +5112,9 @@ do_more:
- goto do_more;
- }
- error_return:
-- brelse(bitmap_bh);
- ext4_std_error(sb, err);
-+error_brelse:
-+ brelse(bitmap_bh);
- return;
- }
-
-@@ -5216,7 +5210,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
-
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
- if (err)
-- goto error_return;
-+ goto error_brelse;
-
- /*
- * need to update group_info->bb_free and bitmap
-@@ -5253,8 +5247,9 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
- err = ret;
-
- error_return:
-- brelse(bitmap_bh);
- ext4_std_error(sb, err);
-+error_brelse:
-+ brelse(bitmap_bh);
- return err;
- }
-
-@@ -5329,11 +5324,9 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
- trace_ext4_trim_all_free(sb, group, start, max);
-
- ret = ext4_mb_load_buddy(sb, group, &e4b);
-- if (ret) {
-- ext4_warning(sb, "Error %d loading buddy information for %u",
-- ret, group);
-+ if (ret)
- return ret;
-- }
-+
- bitmap = e4b.bd_bitmap;
-
- ext4_lock_group(sb, group);
-diff --git a/fs/ext4/super.c b/fs/ext4/super.c
-index c625960..0de22f2 100644
---- a/fs/ext4/super.c
-+++ b/fs/ext4/super.c
-@@ -633,6 +633,37 @@ void __ext4_warning(struct super_block *sb, const char *function,
- va_end(args);
- }
-
-+void __ext4_corrupted_block_group(struct super_block *sb, ext4_group_t group,
-+ unsigned int flags, const char *function,
-+ unsigned int line)
-+{
-+ struct ext4_sb_info *sbi = EXT4_SB(sb);
-+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-+
-+ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT &&
-+ !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
-+ percpu_counter_sub(&sbi->s_freeclusters_counter,
-+ grp->bb_free);
-+ set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
-+ &grp->bb_state);
-+ }
-+
-+ if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT &&
-+ !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-+ if (gdp) {
-+ int count;
-+
-+ count = ext4_free_inodes_count(sb, gdp);
-+ percpu_counter_sub(&sbi->s_freeinodes_counter,
-+ count);
-+ }
-+ set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
-+ &grp->bb_state);
-+ }
-+ save_error_info(sb, function, line);
-+}
-+
- void __ext4_grp_locked_error(const char *function, unsigned int line,
- struct super_block *sb, ext4_group_t grp,
- unsigned long ino, ext4_fsblk_t block,
+++ /dev/null
-this patch implements feature which allows ext4 fs users (e.g. Lustre)
-to store data in ext4 dirent.
-data is stored in ext4 dirent after file-name, this space is accounted
-in de->rec_len. flag EXT4_DIRENT_LUFID added to d_type if extra data
-is present.
-
-make use of dentry->d_fsdata to pass fid to ext4. so no
-changes in ext4_add_entry() interface required.
-
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/dir.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/dir.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/dir.c
-@@ -71,11 +71,11 @@ int __ext4_check_dir_entry(const char *f
- const int rlen = ext4_rec_len_from_disk(de->rec_len,
- dir->i_sb->s_blocksize);
-
-- if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
-+ if (unlikely(rlen < __EXT4_DIR_REC_LEN(1)))
- error_msg = "rec_len is smaller than minimal";
- else if (unlikely(rlen % 4 != 0))
- error_msg = "rec_len % 4 != 0";
-- else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
-+ else if (unlikely(rlen < EXT4_DIR_REC_LEN(de)))
- error_msg = "rec_len is too small for name_len";
- else if (unlikely(((char *) de - buf) + rlen > size))
- error_msg = "directory entry across range";
-@@ -208,7 +208,7 @@ revalidate:
- * failure will be detected in the
- * dirent test below. */
- if (ext4_rec_len_from_disk(de->rec_len,
-- sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
-+ sb->s_blocksize) < __EXT4_DIR_REC_LEN(1))
- break;
- i += ext4_rec_len_from_disk(de->rec_len,
- sb->s_blocksize);
-@@ -438,12 +438,17 @@ int ext4_htree_store_dirent(struct file
- struct fname *fname, *new_fn;
- struct dir_private_info *info;
- int len;
-+ int extra_data = 0;
-
- info = dir_file->private_data;
- p = &info->root.rb_node;
-
- /* Create and allocate the fname structure */
-- len = sizeof(struct fname) + ent_name->len + 1;
-+ if (dirent->file_type & EXT4_DIRENT_LUFID)
-+ extra_data = ext4_get_dirent_data_len(dirent);
-+
-+ len = sizeof(struct fname) + ent_name->len + extra_data + 1;
-+
- new_fn = kzalloc(len, GFP_KERNEL);
- if (!new_fn)
- return -ENOMEM;
-@@ -452,7 +457,7 @@ int ext4_htree_store_dirent(struct file
- new_fn->inode = le32_to_cpu(dirent->inode);
- new_fn->name_len = dirent->name_len;
- new_fn->file_type = dirent->file_type;
-- memcpy(new_fn->name, ent_name->name, ent_name->len);
-+ memcpy(new_fn->name, ent_name->name, ent_name->len + extra_data);
- new_fn->name[ent_name->len] = 0;
-
- while (*p) {
-@@ -652,7 +457,7 @@ int ext4_htree_store_dirent(struct file
- if (ldiskfs_check_dir_entry(dir, NULL, de, bh,
- buf, buf_size, offset))
- return -EFSCORRUPTED;
-- nlen = EXT4_DIR_REC_LEN(de->name_len);
-+ nlen = EXT4_DIR_REC_LEN(de);
- rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
- de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
- offset += rlen;
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-@@ -952,6 +952,7 @@ struct ext4_inode_info {
- #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
- #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
- #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
-+#define EXT4_MOUNT_DIRDATA 0x40000 /* Data in directory entries*/
- #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
- #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
- #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
-@@ -1534,6 +1535,7 @@ static inline void ext4_clear_state_flag
- EXT4_FEATURE_INCOMPAT_64BIT| \
- EXT4_FEATURE_INCOMPAT_FLEX_BG| \
- EXT4_FEATURE_INCOMPAT_MMP | \
-+ EXT4_FEATURE_INCOMPAT_DIRDATA| \
- EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
- EXT4_FEATURE_INCOMPAT_ENCRYPT | \
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
-@@ -1640,6 +1642,43 @@ struct ext4_dir_entry_tail {
- #define EXT4_FT_SYMLINK 7
-
- #define EXT4_FT_MAX 8
-+#define EXT4_FT_MASK 0xf
-+
-+#if EXT4_FT_MAX > EXT4_FT_MASK
-+#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
-+#endif
-+
-+/*
-+ * d_type has 4 unused bits, so it can hold four types data. these different
-+ * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
-+ * stored, in flag order, after file-name in ext4 dirent.
-+*/
-+/*
-+ * this flag is added to d_type if ext4 dirent has extra data after
-+ * filename. this data length is variable and length is stored in first byte
-+ * of data. data start after filename NUL byte.
-+ * This is used by Lustre FS.
-+ */
-+#define EXT4_DIRENT_LUFID 0x10
-+
-+#define EXT4_LUFID_MAGIC 0xAD200907UL
-+struct ext4_dentry_param {
-+ __u32 edp_magic; /* EXT4_LUFID_MAGIC */
-+ char edp_len; /* size of edp_data in bytes */
-+ char edp_data[0]; /* packed array of data */
-+} __packed;
-+
-+static inline unsigned char *ext4_dentry_get_data(struct super_block *sb,
-+ struct ext4_dentry_param *p)
-+
-+{
-+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA))
-+ return NULL;
-+ if (p && p->edp_magic == EXT4_LUFID_MAGIC)
-+ return &p->edp_len;
-+ else
-+ return NULL;
-+}
-
- #define EXT4_FT_DIR_CSUM 0xDE
-
-@@ -1650,8 +1689,11 @@ struct ext4_dir_entry_tail {
- */
- #define EXT4_DIR_PAD 4
- #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
--#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
-+#define __EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
- ~EXT4_DIR_ROUND)
-+#define EXT4_DIR_REC_LEN(de) (__EXT4_DIR_REC_LEN((de)->name_len +\
-+ ext4_get_dirent_data_len(de)))
-+
- #define EXT4_MAX_REC_LEN ((1<<16)-1)
-
- /*
-@@ -1987,12 +2029,12 @@ extern int ext4_find_dest_de(struct inod
- struct buffer_head *bh,
- void *buf, int buf_size,
- struct ext4_filename *fname,
-- struct ext4_dir_entry_2 **dest_de);
-+ struct ext4_dir_entry_2 **dest_de, int *dlen);
- int ext4_insert_dentry(struct inode *dir,
- struct inode *inode,
- struct ext4_dir_entry_2 *de,
- int buf_size,
-- struct ext4_filename *fname);
-+ struct ext4_filename *fname, void *data);
- static inline void ext4_update_dx_flag(struct inode *inode)
- {
- if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
-@@ -2004,10 +2046,17 @@ static unsigned char ext4_filetype_table
-
- static inline unsigned char get_dtype(struct super_block *sb, int filetype)
- {
-+ int fl_index = filetype & EXT4_FT_MASK;
-+
-- if (!ext4_has_feature_filetype(sb) || filetype >= EXT4_FT_MAX)
-+ if (!ext4_has_feature_filetype(sb) || fl_index >= EXT4_FT_MAX)
- return DT_UNKNOWN;
-
-- return ext4_filetype_table[filetype];
-+ if (!test_opt(sb, DIRDATA))
-+ return ext4_filetype_table[fl_index];
-+
-+ return (ext4_filetype_table[fl_index]) |
-+ (filetype & EXT4_DIRENT_LUFID);
-+
- }
-
- /* fsync.c */
-@@ -2157,6 +2206,8 @@ extern struct buffer_head * ext4_find_en
- extern int ext4_delete_entry(handle_t *handle, struct inode * dir,
- struct ext4_dir_entry_2 *de_del,
- struct buffer_head *bh);
-+extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
-+ struct inode *inode, const void *, const void *);
- extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
- __u32 start_minor_hash, __u32 *next_hash);
- extern int ext4_search_dir(struct buffer_head *bh,
-@@ -2761,6 +2810,36 @@ extern struct mutex ext4__aio_mutex[EXT4
- extern int ext4_resize_begin(struct super_block *sb);
- extern void ext4_resize_end(struct super_block *sb);
-
-+/*
-+ * Compute the total directory entry data length.
-+ * This includes the filename and an implicit NUL terminator (always present),
-+ * and optional extensions. Each extension has a bit set in the high 4 bits of
-+ * de->file_type, and the extension length is the first byte in each entry.
-+ */
-+static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
-+{
-+ char *len = de->name + de->name_len + 1 /* NUL terminator */;
-+ int dlen = 0;
-+ __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
-+ struct ext4_dir_entry_tail *t = (struct ext4_dir_entry_tail *)de;
-+
-+ if (!t->det_reserved_zero1 &&
-+ le16_to_cpu(t->det_rec_len) ==
-+ sizeof(struct ext4_dir_entry_tail) &&
-+ !t->det_reserved_zero2 &&
-+ t->det_reserved_ft == EXT4_FT_DIR_CSUM)
-+ return 0;
-+
-+ while (extra_data_flags) {
-+ if (extra_data_flags & 1) {
-+ dlen += *len + (dlen == 0);
-+ len += *len;
-+ }
-+ extra_data_flags >>= 1;
-+ }
-+ return dlen;
-+}
-+
- #endif /* __KERNEL__ */
-
- #endif /* _EXT4_H */
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/namei.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/namei.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/namei.c
-@@ -241,7 +241,8 @@ static unsigned dx_get_count(struct dx_e
- static unsigned dx_get_limit(struct dx_entry *entries);
- static void dx_set_count(struct dx_entry *entries, unsigned value);
- static void dx_set_limit(struct dx_entry *entries, unsigned value);
--static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
-+static inline unsigned dx_root_limit(struct inode *dir,
-+ struct ext4_dir_entry_2 *dot_de, unsigned infosize);
- static unsigned dx_node_limit(struct inode *dir);
- static struct dx_frame *dx_probe(struct ext4_filename *fname,
- struct inode *dir,
-@@ -383,22 +384,23 @@ static struct dx_countlimit *get_dx_coun
- {
- struct ext4_dir_entry *dp;
- struct dx_root_info *root;
-- int count_offset;
-+ int count_offset, dot_rec_len, dotdot_rec_len;
-
- if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
- count_offset = 8;
-- else if (le16_to_cpu(dirent->rec_len) == 12) {
-- dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
-+ else {
-+ dot_rec_len = le16_to_cpu(dirent->rec_len);
-+ dp = (struct ext4_dir_entry *)(((void *)dirent) + dot_rec_len);
- if (le16_to_cpu(dp->rec_len) !=
-- EXT4_BLOCK_SIZE(inode->i_sb) - 12)
-+ EXT4_BLOCK_SIZE(inode->i_sb) - dot_rec_len)
- return NULL;
-- root = (struct dx_root_info *)(((void *)dp + 12));
-+ dotdot_rec_len = EXT4_DIR_REC_LEN((struct ext4_dir_entry_2 *)dp);
-+ root = (struct dx_root_info *)(((void *)dp + dotdot_rec_len));
- if (root->reserved_zero ||
- root->info_length != sizeof(struct dx_root_info))
- return NULL;
-- count_offset = 32;
-- } else
-- return NULL;
-+ count_offset = 8 + dot_rec_len + dotdot_rec_len;
-+ }
-
- if (offset)
- *offset = count_offset;
-@@ -504,11 +505,12 @@ ext4_next_entry(struct ext4_dir_entry_2
- */
- struct dx_root_info *dx_get_dx_info(struct ext4_dir_entry_2 *de)
- {
-+ BUG_ON(de->name_len != 1);
- /* get dotdot first */
-- de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1));
-+ de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
-
- /* dx root info is after dotdot entry */
-- de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2));
-+ de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de));
-
- return (struct dx_root_info *)de;
- }
-@@ -553,10 +555,16 @@ static inline void dx_set_limit(struct d
- ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
- }
-
--static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
-+static inline unsigned dx_root_limit(struct inode *dir,
-+ struct ext4_dir_entry_2 *dot_de, unsigned infosize)
- {
-- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
-- EXT4_DIR_REC_LEN(2) - infosize;
-+ struct ext4_dir_entry_2 *dotdot_de;
-+ unsigned entry_space;
-+
-+ BUG_ON(dot_de->name_len != 1);
-+ dotdot_de = ext4_next_entry(dot_de, dir->i_sb->s_blocksize);
-+ entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(dot_de) -
-+ EXT4_DIR_REC_LEN(dotdot_de) - infosize;
-
- if (ext4_has_metadata_csum(dir->i_sb))
- entry_space -= sizeof(struct dx_tail);
-@@ -565,7 +573,7 @@ static inline unsigned dx_root_limit(str
-
- static inline unsigned dx_node_limit(struct inode *dir)
- {
-- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
-+ unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0);
-
- if (ext4_has_metadata_csum(dir->i_sb))
- entry_space -= sizeof(struct dx_tail);
-@@ -674,7 +682,7 @@ static struct stats dx_show_leaf(struct
- (unsigned) ((char *) de - base));
- #endif
- }
-- space += EXT4_DIR_REC_LEN(de->name_len);
-+ space += EXT4_DIR_REC_LEN(de);
- names++;
- }
- de = ext4_next_entry(de, size);
-@@ -775,11 +783,14 @@ dx_probe(struct ext4_filename *fname, st
-
- entries = (struct dx_entry *)(((char *)info) + info->info_length);
-
-- if (dx_get_limit(entries) != dx_root_limit(dir,
-- info->info_length)) {
-+ if (dx_get_limit(entries) !=
-+ dx_root_limit(dir, (struct ext4_dir_entry_2 *)frame->bh->b_data,
-+ info->info_length)) {
- ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
- dx_get_limit(entries),
-- dx_root_limit(dir, info->info_length));
-+ dx_root_limit(dir,
-+ (struct ext4_dir_entry_2 *)frame->bh->b_data,
-+ info->info_length));
- goto fail;
- }
-
-@@ -963,7 +974,7 @@ static int htree_dirblock_to_tree(struct
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- top = (struct ext4_dir_entry_2 *) ((char *) de +
- dir->i_sb->s_blocksize -
-- EXT4_DIR_REC_LEN(0));
-+ __EXT4_DIR_REC_LEN(0));
- #ifdef CONFIG_EXT4_FS_ENCRYPTION
- /* Check if the directory is encrypted */
- if (ext4_encrypted_inode(dir)) {
-@@ -1688,7 +1699,7 @@ dx_move_dirents(char *from, char *to, st
- while (count--) {
- struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
- (from + (map->offs<<2));
-- rec_len = EXT4_DIR_REC_LEN(de->name_len);
-+ rec_len = EXT4_DIR_REC_LEN(de);
- memcpy (to, de, rec_len);
- ((struct ext4_dir_entry_2 *) to)->rec_len =
- ext4_rec_len_to_disk(rec_len, blocksize);
-@@ -1712,7 +1723,7 @@ static struct ext4_dir_entry_2* dx_pack_
- while ((char*)de < base + blocksize) {
- next = ext4_next_entry(de, blocksize);
- if (de->inode && de->name_len) {
-- rec_len = EXT4_DIR_REC_LEN(de->name_len);
-+ rec_len = EXT4_DIR_REC_LEN(de);
- if (de > to)
- memmove(to, de, rec_len);
- to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
-@@ -1843,15 +1854,17 @@ int ext4_find_dest_de(struct inode *dir,
- struct buffer_head *bh,
- void *buf, int buf_size,
- struct ext4_filename *fname,
-- struct ext4_dir_entry_2 **dest_de)
-+ struct ext4_dir_entry_2 **dest_de, int *dlen)
- {
- struct ext4_dir_entry_2 *de;
-- unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname));
-+ unsigned short reclen = __EXT4_DIR_REC_LEN(fname_len(fname)) +
-+ (dlen ? *dlen : 0);
- int nlen, rlen;
- unsigned int offset = 0;
- char *top;
- int res;
-
-+ dlen ? *dlen = 0 : 0; /* default set to 0 */
- de = (struct ext4_dir_entry_2 *)buf;
- top = buf + buf_size - reclen;
- while ((char *) de <= top) {
-@@ -1868,10 +1881,26 @@ int ext4_find_dest_de(struct inode *dir,
- res = -EEXIST;
- goto return_result;
- }
-- nlen = EXT4_DIR_REC_LEN(de->name_len);
-+ nlen = EXT4_DIR_REC_LEN(de);
- rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
- if ((de->inode ? rlen - nlen : rlen) >= reclen)
- break;
-+ /* Then for dotdot entries, check for the smaller space
-+ * required for just the entry, no FID */
-+ if (fname_len(fname) == 2 && memcmp(fname_name(fname), "..", 2) == 0) {
-+ if ((de->inode ? rlen - nlen : rlen) >=
-+ __EXT4_DIR_REC_LEN(fname_len(fname))) {
-+ /* set dlen=1 to indicate not
-+ * enough space store fid */
-+ dlen ? *dlen = 1 : 0;
-+ break;
-+ }
-+ /* The new ".." entry must be written over the
-+ * previous ".." entry, which is the first
-+ * entry traversed by this scan. If it doesn't
-+ * fit, something is badly wrong, so -EIO. */
-+ return -EIO;
-+ }
- de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
- offset += rlen;
- }
-@@ -1890,12 +1919,12 @@ int ext4_insert_dentry(struct inode *dir
- struct inode *inode,
- struct ext4_dir_entry_2 *de,
- int buf_size,
-- struct ext4_filename *fname)
-+ struct ext4_filename *fname, void *data)
- {
-
- int nlen, rlen;
-
-- nlen = EXT4_DIR_REC_LEN(de->name_len);
-+ nlen = EXT4_DIR_REC_LEN(de);
- rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
- if (de->inode) {
- struct ext4_dir_entry_2 *de1 =
-@@ -1909,6 +1938,11 @@ int ext4_insert_dentry(struct inode *dir
- ext4_set_de_type(inode->i_sb, de, inode->i_mode);
- de->name_len = fname_len(fname);
- memcpy(de->name, fname_name(fname), fname_len(fname));
-+ if (data) {
-+ de->name[fname_len(fname)] = 0;
-+ memcpy(&de->name[fname_len(fname) + 1], data, *(char *)data);
-+ de->file_type |= EXT4_DIRENT_LUFID;
-+ }
- return 0;
- }
-
-@@ -1923,18 +1957,23 @@ int ext4_insert_dentry(struct inode *dir
- static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
- struct inode *dir,
- struct inode *inode, struct ext4_dir_entry_2 *de,
-- struct buffer_head *bh)
-+ struct buffer_head *bh, struct dentry *dentry)
- {
- unsigned int blocksize = dir->i_sb->s_blocksize;
- int csum_size = 0;
-- int err;
-+ int err, dlen = 0;
-+ unsigned char *data;
-
-+ data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
-+ dentry->d_fsdata);
- if (ext4_has_metadata_csum(inode->i_sb))
- csum_size = sizeof(struct ext4_dir_entry_tail);
-
- if (!de) {
-+ if (data)
-+ dlen = (*data) + 1;
- err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
-- blocksize - csum_size, fname, &de);
-+ blocksize - csum_size, fname, &de, &dlen);
- if (err)
- return err;
- }
-@@ -1947,7 +1986,10 @@ static int add_dirent_to_buf(handle_t *h
-
- /* By now the buffer is marked for journaling. Due to crypto operations,
- * the following function call may fail */
-- err = ext4_insert_dentry(dir, inode, de, blocksize, fname);
-+ /* If writing the short form of "dotdot", don't add the data section */
-+ if (dlen == 1)
-+ data = NULL;
-+ err = ext4_insert_dentry(dir, inode, de, blocksize, fname, data);
- if (err < 0)
- return err;
-
-@@ -2059,7 +2101,8 @@ static int make_indexed_dir(handle_t *ha
-
- dx_set_block(entries, 1);
- dx_set_count(entries, 1);
-- dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
-+ dx_set_limit(entries, dx_root_limit(dir,
-+ dot_de, sizeof(*dx_info)));
-
- /* Initialize as for dx_probe */
- fname->hinfo.hash_version = dx_info->hash_version;
-@@ -2087,7 +2130,7 @@ static int make_indexed_dir(handle_t *ha
- goto out_frames;
- }
-
-- retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
-+ retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2, dentry);
- out_frames:
- /*
- * Even if the block split failed, we have to properly write
-@@ -2109,6 +2152,8 @@ static int ext4_update_dotdot(handle_t *
- struct buffer_head *dir_block;
- struct ext4_dir_entry_2 *de;
- int len, journal = 0, err = 0;
-+ int dlen = 0;
-+ char *data;
-
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-@@ -2126,19 +2171,24 @@ static int ext4_update_dotdot(handle_t *
- /* the first item must be "." */
- assert(de->name_len == 1 && de->name[0] == '.');
- len = le16_to_cpu(de->rec_len);
-- assert(len >= EXT4_DIR_REC_LEN(1));
-- if (len > EXT4_DIR_REC_LEN(1)) {
-+ assert(len >= __EXT4_DIR_REC_LEN(1));
-+ if (len > __EXT4_DIR_REC_LEN(1)) {
- BUFFER_TRACE(dir_block, "get_write_access");
- err = ext4_journal_get_write_access(handle, dir_block);
- if (err)
- goto out_journal;
-
- journal = 1;
-- de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1));
-+ de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
- }
-
-- len -= EXT4_DIR_REC_LEN(1);
-- assert(len == 0 || len >= EXT4_DIR_REC_LEN(2));
-+ len -= EXT4_DIR_REC_LEN(de);
-+ data = ext4_dentry_get_data(dir->i_sb,
-+ (struct ext4_dentry_param *)dentry->d_fsdata);
-+ if (data)
-+ dlen = *data + 1;
-+ assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen));
-+
- de = (struct ext4_dir_entry_2 *)
- ((char *) de + le16_to_cpu(de->rec_len));
- if (!journal) {
-@@ -2152,10 +2202,15 @@ static int ext4_update_dotdot(handle_t *
- if (len > 0)
- de->rec_len = cpu_to_le16(len);
- else
-- assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2));
-+ assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2));
- de->name_len = 2;
- strcpy(de->name, "..");
-- ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-+ if (data != NULL && ext4_get_dirent_data_len(de) >= dlen) {
-+ de->name[2] = 0;
-+ memcpy(&de->name[2 + 1], data, *data);
-+ ext4_set_de_type(dir->i_sb, de, S_IFDIR);
-+ de->file_type |= EXT4_DIRENT_LUFID;
-+ }
-
- out_journal:
- if (journal) {
-@@ -2237,7 +2292,7 @@ static int ext4_add_entry(handle_t *hand
- goto out;
- }
- retval = add_dirent_to_buf(handle, &fname, dir, inode,
-- NULL, bh);
-+ NULL, bh, dentry);
- if (retval != -ENOSPC)
- goto out;
-
-@@ -2265,7 +2320,7 @@ static int ext4_add_entry(handle_t *hand
- initialize_dirent_tail(t, blocksize);
- }
-
-- retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
-+ retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh, dentry);
- out:
- ext4_fname_free_filename(&fname);
- brelse(bh);
-@@ -2305,7 +2360,7 @@ static int ext4_dx_add_entry(handle_t *h
- if (err)
- goto journal_error;
-
-- err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
-+ err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh, dentry);
- if (err != -ENOSPC)
- goto cleanup;
-
-@@ -2409,7 +2464,7 @@ static int ext4_dx_add_entry(handle_t *h
- err = PTR_ERR(de);
- goto cleanup;
- }
-- err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
-+ err = add_dirent_to_buf(handle, fname, dir, inode, de, bh, dentry);
- goto cleanup;
-
- journal_error:
-@@ -2683,37 +2738,70 @@ err_unlock_inode:
- return err;
- }
-
-+struct tp_block {
-+ struct inode *inode;
-+ void *data1;
-+ void *data2;
-+};
-+
- struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
- struct ext4_dir_entry_2 *de,
- int blocksize, int csum_size,
- unsigned int parent_ino, int dotdot_real_len)
- {
-+ void *data1 = NULL, *data2 = NULL;
-+ int dot_reclen = 0;
-+
-+ if (dotdot_real_len == 10) {
-+ struct tp_block *tpb = (struct tp_block *)inode;
-+ data1 = tpb->data1;
-+ data2 = tpb->data2;
-+ inode = tpb->inode;
-+ dotdot_real_len = 0;
-+ }
- de->inode = cpu_to_le32(inode->i_ino);
- de->name_len = 1;
-- de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
-- blocksize);
- strcpy(de->name, ".");
- ext4_set_de_type(inode->i_sb, de, S_IFDIR);
-
-+ /* get packed fid data*/
-+ data1 = ext4_dentry_get_data(inode->i_sb,
-+ (struct ext4_dentry_param *) data1);
-+ if (data1) {
-+ de->name[1] = 0;
-+ memcpy(&de->name[2], data1, *(char *) data1);
-+ de->file_type |= EXT4_DIRENT_LUFID;
-+ }
-+ de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de));
-+ dot_reclen = cpu_to_le16(de->rec_len);
- de = ext4_next_entry(de, blocksize);
- de->inode = cpu_to_le32(parent_ino);
- de->name_len = 2;
-+ strcpy(de->name, "..");
-+ ext4_set_de_type(inode->i_sb, de, S_IFDIR);
-+ data2 = ext4_dentry_get_data(inode->i_sb,
-+ (struct ext4_dentry_param *) data2);
-+ if (data2) {
-+ de->name[2] = 0;
-+ memcpy(&de->name[3], data2, *(char *) data2);
-+ de->file_type |= EXT4_DIRENT_LUFID;
-+ }
- if (!dotdot_real_len)
- de->rec_len = ext4_rec_len_to_disk(blocksize -
-- (csum_size + EXT4_DIR_REC_LEN(1)),
-+ (csum_size + dot_reclen),
- blocksize);
- else
- de->rec_len = ext4_rec_len_to_disk(
-- EXT4_DIR_REC_LEN(de->name_len), blocksize);
-- strcpy(de->name, "..");
-- ext4_set_de_type(inode->i_sb, de, S_IFDIR);
-+ EXT4_DIR_REC_LEN(de), blocksize);
-
- return ext4_next_entry(de, blocksize);
- }
-
- static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
-- struct inode *inode)
-+ struct inode *inode,
-+ const void *data1, const void *data2)
- {
-+ struct tp_block param;
- struct buffer_head *dir_block = NULL;
- struct ext4_dir_entry_2 *de;
- struct ext4_dir_entry_tail *t;
-@@ -2738,7 +2826,11 @@ static int ext4_init_new_dir(handle_t *h
- if (IS_ERR(dir_block))
- return PTR_ERR(dir_block);
- de = (struct ext4_dir_entry_2 *)dir_block->b_data;
-- ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
-+ param.inode = inode;
-+ param.data1 = (void *)data1;
-+ param.data2 = (void *)data2;
-+ ext4_init_dot_dotdot((struct inode *)(¶m), de, blocksize,
-+ csum_size, dir->i_ino, 10);
- set_nlink(inode, 2);
- if (csum_size) {
- t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
-@@ -2755,6 +2847,29 @@ out:
- return err;
- }
-
-+/* Initialize @inode as a subdirectory of @dir, and add the
-+ * "." and ".." entries into the first directory block. */
-+int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir,
-+ struct inode *inode,
-+ const void *data1, const void *data2)
-+{
-+ int rc;
-+
-+ if (IS_ERR(handle))
-+ return PTR_ERR(handle);
-+
-+ if (IS_DIRSYNC(dir))
-+ ext4_handle_sync(handle);
-+
-+ inode->i_op = &ext4_dir_inode_operations;
-+ inode->i_fop = &ext4_dir_operations;
-+ rc = ext4_init_new_dir(handle, dir, inode, data1, data2);
-+ if (!rc)
-+ rc = ext4_mark_inode_dirty(handle, inode);
-+ return rc;
-+}
-+EXPORT_SYMBOL(ext4_add_dot_dotdot);
-+
- static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
- {
- handle_t *handle;
-@@ -2781,7 +2896,7 @@ retry:
-
- inode->i_op = &ext4_dir_inode_operations;
- inode->i_fop = &ext4_dir_operations;
-- err = ext4_init_new_dir(handle, dir, inode);
-+ err = ext4_init_new_dir(handle, dir, inode, NULL, NULL);
- if (err)
- goto out_clear_inode;
- err = ext4_mark_inode_dirty(handle, inode);
-@@ -2832,7 +2947,7 @@ int ext4_empty_dir(struct inode *inode)
- }
-
- sb = inode->i_sb;
-- if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
-+ if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2)) {
- EXT4_ERROR_INODE(inode, "invalid size");
- return 1;
- }
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/inline.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/inline.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/inline.c
-@@ -988,7 +998,7 @@ static int ext4_add_dirent_to_inline(han
-
-
- err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
-- inline_size, fname, &de);
-+ inline_size, fname, &de, NULL);
- if (err)
- return err;
-
-@@ -998,7 +998,7 @@ static int ext4_add_dirent_to_inline(han
- err = ext4_journal_get_write_access(handle, iloc->bh);
- if (err)
- return err;
-- ext4_insert_dentry(dir, inode, de, inline_size, fname);
-+ ext4_insert_dentry(dir, inode, de, inline_size, fname, NULL);
-
- ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
-
-@@ -1078,7 +1078,7 @@ static int ext4_update_inline_dir(handle
- int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
- int new_size = get_max_inline_xattr_value_size(dir, iloc);
-
-- if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
-+ if (new_size - old_size <= __EXT4_DIR_REC_LEN(1))
- return -ENOSPC;
-
- ret = ext4_update_inline_data(handle, dir,
-@@ -1348,7 +1348,7 @@ int htree_inlinedir_to_tree(struct file
- fake.name_len = 1;
- strcpy(fake.name, ".");
- fake.rec_len = ext4_rec_len_to_disk(
-- EXT4_DIR_REC_LEN(fake.name_len),
-+ EXT4_DIR_REC_LEN(&fake),
- inline_size);
- ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
- de = &fake;
-@@ -1358,7 +1358,7 @@ int htree_inlinedir_to_tree(struct file
- fake.name_len = 2;
- strcpy(fake.name, "..");
- fake.rec_len = ext4_rec_len_to_disk(
-- EXT4_DIR_REC_LEN(fake.name_len),
-+ EXT4_DIR_REC_LEN(&fake),
- inline_size);
- ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
- de = &fake;
-@@ -1455,8 +1455,8 @@ int ext4_read_inline_dir(struct file *fi
- * So we will use extra_offset and extra_size to indicate them
- * during the inline dir iteration.
- */
-- dotdot_offset = EXT4_DIR_REC_LEN(1);
-- dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2);
-+ dotdot_offset = __EXT4_DIR_REC_LEN(1);
-+ dotdot_size = dotdot_offset + __EXT4_DIR_REC_LEN(2);
- extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
- extra_size = extra_offset + inline_size;
-
-@@ -1493,7 +1493,7 @@ revalidate:
- * failure will be detected in the
- * dirent test below. */
- if (ext4_rec_len_from_disk(de->rec_len, extra_size)
-- < EXT4_DIR_REC_LEN(1))
-+ < __EXT4_DIR_REC_LEN(1))
- break;
- i += ext4_rec_len_from_disk(de->rec_len,
- extra_size);
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/super.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/super.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/super.c
-@@ -1151,7 +1151,7 @@ enum {
- Opt_data_err_abort, Opt_data_err_ignore,
- Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
- Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
-- Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
-+ Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_dirdata,
- Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
- Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
-@@ -1216,6 +1216,7 @@ static const match_table_t tokens = {
- {Opt_stripe, "stripe=%u"},
- {Opt_delalloc, "delalloc"},
- {Opt_nodelalloc, "nodelalloc"},
-+ {Opt_dirdata, "dirdata"},
- {Opt_removed, "mblk_io_submit"},
- {Opt_removed, "nomblk_io_submit"},
- {Opt_block_validity, "block_validity"},
-@@ -1424,6 +1425,7 @@ static const struct mount_opts {
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
- {Opt_offusrjquota, 0, MOPT_Q},
-+ {Opt_dirdata, EXT4_MOUNT_DIRDATA, MOPT_SET},
- {Opt_offgrpjquota, 0, MOPT_Q},
- {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
- {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
+++ /dev/null
-mbcache provides absolutely no value for Lustre xattrs (because
-they are unique and cannot be shared between files) and as we can
-see it has a noticable overhead in some cases. In the past there
-was a CONFIG_MBCACHE option that would allow it to be disabled,
-but this was removed in newer kernels, so we will need to patch
-ldiskfs to fix this.
-
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-@@ -944,6 +944,7 @@ struct ext4_inode_info {
- /*
- * Mount flags set via mount options or defaults
- */
-+#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Disable mbcache */
- #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
- #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
- #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/super.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/super.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/super.c
-@@ -1157,6 +1157,7 @@ enum {
- Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
- Opt_inode_readahead_blks, Opt_journal_ioprio,
- Opt_dioread_nolock, Opt_dioread_lock,
-+ Opt_no_mbcache,
- Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb,
- };
-@@ -1231,6 +1232,7 @@ static const match_table_t tokens = {
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_init_itable, "init_itable=%u"},
-+ {Opt_no_mbcache, "no_mbcache"},
- {Opt_init_itable, "init_itable"},
- {Opt_noinit_itable, "noinit_itable"},
- {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
-@@ -1390,6 +1392,7 @@ static const struct mount_opts {
- {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
- {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
- {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
-+ {Opt_no_mbcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
- {Opt_commit, 0, MOPT_GTE0},
- {Opt_max_batch_time, 0, MOPT_GTE0},
- {Opt_min_batch_time, 0, MOPT_GTE0},
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/xattr.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/xattr.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/xattr.c
-@@ -80,7 +80,7 @@
- # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
- #endif
-
--static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
-+static void _ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
- static struct buffer_head *ext4_xattr_cache_find(struct inode *,
- struct ext4_xattr_header *,
- struct mb_cache_entry **);
-@@ -401,7 +401,8 @@ bad_block:
- error = -EFSCORRUPTED;
- goto cleanup;
- }
-- ext4_xattr_cache_insert(ext4_mb_cache, bh);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ _ext4_xattr_cache_insert(ext4_mb_cache, bh);
- entry = BFIRST(bh);
- error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1,
- inode);
-@@ -565,7 +566,8 @@ ext4_xattr_block_list(struct dentry *den
- error = -EFSCORRUPTED;
- goto cleanup;
- }
-- ext4_xattr_cache_insert(ext4_mb_cache, bh);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ _ext4_xattr_cache_insert(ext4_mb_cache, bh);
- error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
-
- cleanup:
-@@ -678,7 +680,9 @@ ext4_xattr_release_block(handle_t *handl
- * This must happen under buffer lock for
- * ext4_xattr_block_set() to reliably detect freed block
- */
-- mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ mb_cache_entry_delete_block(ext4_mb_cache,
-+ hash, bh->b_blocknr);
- get_bh(bh);
- unlock_buffer(bh);
- ext4_free_blocks(handle, inode, bh, 0, 1,
-@@ -690,9 +694,10 @@ ext4_xattr_release_block(handle_t *handl
-
-
- if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
-- struct mb_cache_entry *ce;
-+ struct mb_cache_entry *ce = NULL;
-
-- ce = mb_cache_entry_get(ext4_mb_cache, hash,
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ ce = mb_cache_entry_get(ext4_mb_cache, hash,
- bh->b_blocknr);
- if (ce) {
- ce->e_reusable = 1;
-@@ -1107,7 +1112,8 @@ ext4_xattr_block_set(handle_t *handle, s
- * ext4_xattr_block_set() to reliably detect modified
- * block
- */
-- mb_cache_entry_delete_block(ext4_mb_cache, hash,
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ mb_cache_entry_delete_block(ext4_mb_cache, hash,
- bs->bh->b_blocknr);
- ea_bdebug(bs->bh, "modifying in-place");
- error = ext4_xattr_set_entry(i, s, handle, inode);
-@@ -1281,7 +1287,8 @@ inserted:
- } else if (bs->bh && s->base == bs->bh->b_data) {
- /* We were modifying this block in-place. */
- ea_bdebug(bs->bh, "keeping this block");
-- ext4_xattr_cache_insert(ext4_mb_cache, bs->bh);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ _ext4_xattr_cache_insert(ext4_mb_cache, bs->bh);
- new_bh = bs->bh;
- get_bh(new_bh);
- } else {
-@@ -1277,7 +1284,8 @@ getblk_failed:
- memcpy(new_bh->b_data, s->base, new_bh->b_size);
- set_buffer_uptodate(new_bh);
- unlock_buffer(new_bh);
-- ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
-+ if (!test_opt(inode->i_sb, NO_MBCACHE))
-+ _ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
- error = ext4_handle_dirty_xattr_block(handle,
- inode, new_bh);
- if (error)
-@@ -2068,7 +2076,7 @@ ext4_xattr_inode_array_free(struct inode
- * Returns 0, or a negative error number on failure.
- */
- static void
--ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
-+_ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
- {
- struct ext4_xattr_header *header = BHDR(bh);
- __u32 hash = le32_to_cpu(header->h_hash);
-@@ -2140,6 +2148,8 @@ ext4_xattr_cache_find(struct inode *inod
- struct mb_cache_entry *ce;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
-
-+ if (test_opt(inode->i_sb, NO_MBCACHE))
-+ return NULL;
- if (!header->h_hash)
- return NULL; /* never share */
- ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+++ /dev/null
-Index: linux-stage/fs/ext4/super.c
-When ldiskfs run in failover mode whith read-only disk.
-Part of allocation updates are lost and ldiskfs may fail
-while mounting this is due to inconsistent state of
-group-descriptor. Group-descriptor check is added after
-journal replay.
-===================================================================
---- linux-stage/fs/ext4/super.c 2016-11-06 15:15:30.892386878 +0530
-+++ linux-stage.orig.1/fs/ext4/super.c 2016-11-08 10:56:45.579892189 +0530
-@@ -3884,12 +3884,6 @@ static int ext4_fill_super(struct super_
- }
- }
- sbi->s_gdb_count = db_count;
-- if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
-- ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-- ret = -EFSCORRUPTED;
-- goto failed_mount2;
-- }
--
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
- spin_lock_init(&sbi->s_next_gen_lock);
-
-@@ -4020,6 +4014,13 @@ static int ext4_fill_super(struct super_
- sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
-
- no_journal:
-+
-+ if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
-+ ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-+ ret = -EFSCORRUPTED;
-+ goto failed_mount_wq;
-+ }
-+
- sbi->s_mb_cache = ext4_xattr_create_cache();
- if (!sbi->s_mb_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+++ /dev/null
-This INCOMPAT_LARGEDIR feature allows larger directories
-to be created in ldiskfs, both with directory sizes over
-2GB and and a maximum htree depth of 3 instead of the
-current limit of 2. These features are needed in order
-to exceed the current limit of approximately 10M entries
-in a single directory.
-
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
-@@ -1585,6 +1585,7 @@ static inline void ext4_clear_state_flag
- EXT4_FEATURE_INCOMPAT_MMP | \
- EXT4_FEATURE_INCOMPAT_DIRDATA| \
- EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
-+ EXT4_FEATURE_INCOMPAT_LARGEDIR | \
- EXT4_FEATURE_INCOMPAT_ENCRYPT | \
- EXT4_FEATURE_INCOMPAT_CSUM_SEED)
- #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-@@ -1999,6 +2000,9 @@ struct mmpd_data {
- # define NORET_TYPE /**/
- # define ATTRIB_NORET __attribute__((noreturn))
- # define NORET_AND noreturn,
-+/* htree levels for ext4 */
-+#define EXT4_HTREE_LEVEL_COMPAT 2
-+#define EXT4_HTREE_LEVEL 3
-
- struct ext4_xattr_ino_array {
- unsigned int xia_count; /* # of used item in the array */
-@@ -2472,13 +2476,16 @@ static inline void ext4_r_blocks_count_s
- es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
- }
-
--static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
-+static inline loff_t ext4_isize(struct super_block *sb,
-+ struct ext4_inode *raw_inode)
- {
-- if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
-+ if (S_ISREG(le16_to_cpu(raw_inode->i_mode)) ||
-+ (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) &&
-+ S_ISDIR(le16_to_cpu(raw_inode->i_mode))))
- return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
- le32_to_cpu(raw_inode->i_size_lo);
-- else
-- return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
-+
-+ return (loff_t)le32_to_cpu(raw_inode->i_size_lo);
- }
-
- static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/namei.c
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
-@@ -513,7 +513,14 @@ struct dx_root_info * dx_get_dx_info(str
-
- static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
- {
-- return le32_to_cpu(entry->block) & 0x00ffffff;
-+ return le32_to_cpu(entry->block) & 0x0fffffff;
-+}
-+
-+static inline int
-+ext4_dir_htree_level(struct super_block *sb)
-+{
-+ return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_LARGEDIR) ?
-+ EXT4_HTREE_LEVEL : EXT4_HTREE_LEVEL_COMPAT;
- }
-
- static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
-@@ -681,6 +688,7 @@ dx_probe(const struct qstr *d_name, stru
- struct dx_frame *frame = frame_in;
- u32 hash;
-
-+ memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
- frame->bh = ext4_read_dirblock(dir, 0, INDEX);
- if (IS_ERR(frame->bh))
- return (struct dx_frame *) frame->bh;
-@@ -714,9 +721,13 @@ dx_probe(const struct qstr *d_name, stru
- }
-
- indirect = info->indirect_levels;
-- if (indirect > 1) {
-- ext4_warning_inode(dir, "Unimplemented hash depth: %#06x",
-- info->indirect_levels);
-+ if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
-+ ext4_warning_inode(dir, "htree depth: %#06x exceed max depth %u",
-+ indirect, ext4_dir_htree_level(dir->i_sb));
-+ if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
-+ ext4_warning(dir->i_sb, "Enable large directory "
-+ "feature to access it");
-+ }
- goto fail;
- }
-
-@@ -812,12 +826,20 @@ fail:
-
- static void dx_release (struct dx_frame *frames)
- {
-+ int i;
-+ struct dx_root_info *info;
-+
- if (frames[0].bh == NULL)
- return;
-
-- if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels)
-- brelse(frames[1].bh);
-- brelse(frames[0].bh);
-+ for (i = 0, info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data);
-+ i <= info->indirect_levels;
-+ i++) {
-+ if (frames[i].bh == NULL)
-+ break;
-+ brelse(frames[i].bh);
-+ frames[i].bh = NULL;
-+ }
- }
-
- /*
-@@ -960,7 +979,7 @@ int ext4_htree_fill_tree(struct file *di
- {
- struct dx_hash_info hinfo;
- struct ext4_dir_entry_2 *de;
-- struct dx_frame frames[2], *frame;
-+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- struct inode *dir;
- ext4_lblk_t block;
- int count = 0;
-@@ -1376,7 +1395,7 @@ static struct buffer_head * ext4_dx_find
- struct dx_hash_info hinfo;
- {
- struct super_block * sb = dir->i_sb;
-- struct dx_frame frames[2], *frame;
-+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- const struct qstr *d_name = fname->usr_fname;
- struct buffer_head *bh;
- ext4_lblk_t block;
-@@ -1832,7 +1851,7 @@ static int make_indexed_dir(handle_t *ha
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
- struct buffer_head *bh2;
-- struct dx_frame frames[2], *frame;
-+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- struct dx_entry *entries;
- struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
- struct ext4_dir_entry_tail *t;
-@@ -2117,14 +2136,17 @@ static int ext4_add_entry(handle_t *hand
- static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
- struct inode *inode)
- {
-- struct dx_frame frames[2], *frame;
-+ struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- struct dx_entry *entries, *at;
- struct buffer_head *bh;
- struct inode *dir = d_inode(dentry->d_parent);
- struct super_block *sb = dir->i_sb;
- struct ext4_dir_entry_2 *de;
-+ int restart;
- int err;
-
-+again:
-+ restart = 0;
- frame = dx_probe(fname, dir, NULL, frames);
- if (IS_ERR(frame))
- return PTR_ERR(frame);
-@@ -2138,33 +2160,48 @@ static int ext4_dx_add_entry(handle_t *h
- goto cleanup;
- }
-
-- BUFFER_TRACE(bh, "get_write_access");
-- err = ext4_journal_get_write_access(handle, bh);
-- if (err)
-- goto journal_error;
--
- err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh, dentry);
- if (err != -ENOSPC)
- goto cleanup;
-
-+ err = 0;
- /* Block full, should compress but for now just split */
- dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
- dx_get_count(entries), dx_get_limit(entries)));
- /* Need to split index? */
- if (dx_get_count(entries) == dx_get_limit(entries)) {
- ext4_lblk_t newblock;
-- unsigned icount = dx_get_count(entries);
-- int levels = frame - frames;
-+ int levels = frame - frames + 1;
-+ unsigned icount;
-+ int add_level = 1;
- struct dx_entry *entries2;
- struct dx_node *node2;
- struct buffer_head *bh2;
-
-- if (levels && (dx_get_count(frames->entries) ==
-- dx_get_limit(frames->entries))) {
-- ext4_warning_inode(dir, "Directory index full!");
-+ while (frame > frames) {
-+ if (dx_get_count((frame - 1)->entries) <
-+ dx_get_limit((frame - 1)->entries)) {
-+ add_level = 0;
-+ break;
-+ }
-+ frame--; /* split higher index block */
-+ at = frame->at;
-+ entries = frame->entries;
-+ restart = 1;
-+ }
-+ if (add_level && levels == ext4_dir_htree_level(sb)) {
-+ ext4_warning(sb, "inode %lu: comm %s: index %u: reach max htree level %u",
-+ dir->i_ino, current->comm, levels,
-+ ext4_dir_htree_level(sb));
-+ if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
-+ ext4_warning(sb, "Large directory feature is"
-+ "not enabled on this "
-+ "filesystem");
-+ }
- err = -ENOSPC;
- goto cleanup;
- }
-+ icount = dx_get_count(entries);
- bh2 = ext4_append(handle, dir, &newblock);
- if (IS_ERR(bh2)) {
- err = PTR_ERR(bh2);
-@@ -2179,7 +2216,7 @@ static int ext4_dx_add_entry(handle_t *h
- err = ext4_journal_get_write_access(handle, frame->bh);
- if (err)
- goto journal_error;
-- if (levels) {
-+ if (!add_level) {
- unsigned icount1 = icount/2, icount2 = icount - icount1;
- unsigned hash2 = dx_get_hash(entries + icount1);
- dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
-@@ -2187,7 +2224,7 @@ static int ext4_dx_add_entry(handle_t *h
-
- BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
- err = ext4_journal_get_write_access(handle,
-- frames[0].bh);
-+ (frame - 1)->bh);
- if (err)
- goto journal_error;
-
-@@ -2203,19 +2240,27 @@ static int ext4_dx_add_entry(handle_t *h
- frame->entries = entries = entries2;
- swap(frame->bh, bh2);
- }
-- dx_insert_block(frames + 0, hash2, newblock);
-- dxtrace(dx_show_index("node", frames[1].entries));
-+ dx_insert_block(frame - 1, hash2, newblock);
-+ dxtrace(dx_show_index("node", frame->entries));
- dxtrace(dx_show_index("node",
-- ((struct dx_node *) bh2->b_data)->entries));
-+ ((struct dx_node *)bh2->b_data)->entries));
- err = ext4_handle_dirty_dx_node(handle, dir, bh2);
- if (err)
- goto journal_error;
- brelse (bh2);
-+ err = ext4_handle_dirty_dx_node(handle, dir,
-+ (frame - 1)->bh);
-+ if (err)
-+ goto journal_error;
-+ if (restart) {
-+ err = ext4_handle_dirty_dx_node(handle, dir,
-+ frame->bh);
-+ goto journal_error;
-+ }
- } else {
- struct dx_root_info *info;
-- dxtrace(printk(KERN_DEBUG
-- "Creating second level index...\n"));
-- memcpy((char *) entries2, (char *) entries,
-+
-+ memcpy((char *)entries2, (char *)entries,
- icount * sizeof(struct dx_entry));
- dx_set_limit(entries2, dx_node_limit(dir));
-
-@@ -2224,22 +2267,17 @@ static int ext4_dx_add_entry(handle_t *h
- dx_set_block(entries + 0, newblock);
- info = dx_get_dx_info((struct ext4_dir_entry_2*)
- frames[0].bh->b_data);
-- info->indirect_levels = 1;
--
-- /* Add new access path frame */
-- frame = frames + 1;
-- frame->at = at = at - entries + entries2;
-- frame->entries = entries = entries2;
-- frame->bh = bh2;
-- err = ext4_journal_get_write_access(handle,
-- frame->bh);
-+ info->indirect_levels += 1;
-+ dxtrace(printk(KERN_DEBUG
-+ "Creating %d level index...\n",
-+ info->indirect_levels));
-+ err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
- if (err)
- goto journal_error;
-- }
-- err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
-- if (err) {
-- ext4_std_error(inode->i_sb, err);
-- goto cleanup;
-+ err = ext4_handle_dirty_dx_node(handle, dir, bh2);
-+ brelse(bh2);
-+ restart = 1;
-+ goto journal_error;
- }
- }
- de = do_split(handle, dir, &bh, frame, &fname->hinfo);
-@@ -2249,10 +2285,14 @@ static int ext4_dx_add_entry(handle_t *h
- goto cleanup;
-
- journal_error:
-- ext4_std_error(dir->i_sb, err);
-+ ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
- cleanup:
- brelse(bh);
- dx_release(frames);
-+ /* @restart is true means htree-path has been changed, we need to
-+ * repeat dx_probe() to find out valid htree-path */
-+ if (restart && err == 0)
-+ goto again;
- return err;
- }
-
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/inode.c
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/inode.c
-@@ -4617,7 +4617,7 @@ struct inode *ext4_iget(struct super_blo
- if (ext4_has_feature_64bit(sb))
- ei->i_file_acl |=
- ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
-- inode->i_size = ext4_isize(raw_inode);
-+ inode->i_size = ext4_isize(sb, raw_inode);
- if ((size = i_size_read(inode)) < 0) {
- EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
- ret = -EFSCORRUPTED;
-@@ -4940,7 +4940,7 @@ static int ext4_do_update_inode(handle_t
- raw_inode->i_file_acl_high =
- cpu_to_le16(ei->i_file_acl >> 32);
- raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
-- if (ei->i_disksize != ext4_isize(raw_inode)) {
-+ if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
- ext4_isize_set(raw_inode, ei->i_disksize);
- need_datasync = 1;
- }
+++ /dev/null
-This patch implements the large EA support in ext4. If the size of
-an EA value is larger than the blocksize, then the EA value would
-not be saved in the external EA block, instead it would be saved
-in an external EA inode. So, the patch also helps support a larger
-number of EAs.
-
-Index: linux-stage/fs/ext4/ext4.h
-===================================================================
---- linux-stage.orig/fs/ext4/ext4.h
-+++ linux-stage/fs/ext4/ext4.h
-@@ -1579,6 +1579,7 @@ static inline void ext4_clear_state_flag
- EXT4_FEATURE_INCOMPAT_EXTENTS| \
- EXT4_FEATURE_INCOMPAT_64BIT| \
- EXT4_FEATURE_INCOMPAT_FLEX_BG| \
-+ EXT4_FEATURE_INCOMPAT_EA_INODE| \
- EXT4_FEATURE_INCOMPAT_MMP | \
- EXT4_FEATURE_INCOMPAT_DIRDATA| \
- EXT4_FEATURE_INCOMPAT_INLINE_DATA)
-@@ -1990,6 +1997,10 @@ struct mmpd_data {
- # define ATTRIB_NORET __attribute__((noreturn))
- # define NORET_AND noreturn,
-
-+struct ext4_xattr_ino_array {
-+ unsigned int xia_count; /* # of used item in the array */
-+ unsigned int xia_inodes[0];
-+};
- /* bitmap.c */
- extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
- void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
-@@ -2194,6 +2205,7 @@ extern void ext4_set_inode_flags(struct
- extern void ext4_get_inode_flags(struct ext4_inode_info *);
- extern int ext4_alloc_da_blocks(struct inode *inode);
- extern void ext4_set_aops(struct inode *inode);
-+extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk);
- extern int ext4_writepage_trans_blocks(struct inode *);
- extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
- extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
-Index: linux-stage/fs/ext4/inode.c
-===================================================================
---- linux-stage.orig/fs/ext4/inode.c
-+++ linux-stage/fs/ext4/inode.c
-@@ -134,8 +134,6 @@ static void ext4_invalidatepage(struct p
- unsigned int length);
- static int __ext4_journalled_writepage(struct page *page, unsigned int len);
- static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
--static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
-- int pextents);
-
- /*
- * Test whether an inode is a fast symlink.
-@@ -184,6 +182,8 @@ void ext4_evict_inode(struct inode *inod
- {
- handle_t *handle;
- int err;
-+ int extra_credits = 3;
-+ struct ext4_xattr_ino_array *lea_ino_array = NULL;
-
- trace_ext4_evict_inode(inode);
-
-@@ -236,8 +236,8 @@ void ext4_evict_inode(struct inode *inod
- * protection against it
- */
- sb_start_intwrite(inode->i_sb);
-- handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
-- ext4_blocks_for_truncate(inode)+3);
-+
-+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
- if (IS_ERR(handle)) {
- ext4_std_error(inode->i_sb, PTR_ERR(handle));
- /*
-@@ -249,9 +249,36 @@ void ext4_evict_inode(struct inode *inod
- sb_end_intwrite(inode->i_sb);
- goto no_delete;
- }
--
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
-+
-+ /*
-+ * Delete xattr inode before deleting the main inode.
-+ */
-+ err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array);
-+ if (err) {
-+ ext4_warning(inode->i_sb,
-+ "couldn't delete inode's xattr (err %d)", err);
-+ goto stop_handle;
-+ }
-+
-+ if (!IS_NOQUOTA(inode))
-+ extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
-+
-+ if (!ext4_handle_has_enough_credits(handle,
-+ ext4_blocks_for_truncate(inode) + extra_credits)) {
-+ err = ext4_journal_extend(handle,
-+ ext4_blocks_for_truncate(inode) + extra_credits);
-+ if (err > 0)
-+ err = ext4_journal_restart(handle,
-+ ext4_blocks_for_truncate(inode) + extra_credits);
-+ if (err != 0) {
-+ ext4_warning(inode->i_sb,
-+ "couldn't extend journal (err %d)", err);
-+ goto stop_handle;
-+ }
-+ }
-+
- inode->i_size = 0;
- err = ext4_mark_inode_dirty(handle, inode);
- if (err) {
-@@ -269,10 +296,10 @@ void ext4_evict_inode(struct inode *inod
- * enough credits left in the handle to remove the inode from
- * the orphan list and set the dtime field.
- */
-- if (!ext4_handle_has_enough_credits(handle, 3)) {
-- err = ext4_journal_extend(handle, 3);
-+ if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
-+ err = ext4_journal_extend(handle, extra_credits);
- if (err > 0)
-- err = ext4_journal_restart(handle, 3);
-+ err = ext4_journal_restart(handle, extra_credits);
- if (err != 0) {
- ext4_warning(inode->i_sb,
- "couldn't extend journal (err %d)", err);
-@@ -306,8 +333,12 @@ void ext4_evict_inode(struct inode *inod
- ext4_clear_inode(inode);
- else
- ext4_free_inode(handle, inode);
-+
- ext4_journal_stop(handle);
- sb_end_intwrite(inode->i_sb);
-+
-+ if (lea_ino_array != NULL)
-+ ext4_xattr_inode_array_free(inode, lea_ino_array);
- return;
- no_delete:
- ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
-@@ -4681,7 +4712,7 @@ static int ext4_index_trans_blocks(struc
- *
- * Also account for superblock, inode, quota and xattr blocks
- */
--static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
-+int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
- int pextents)
- {
- ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
-Index: linux-stage/fs/ext4/xattr.c
-===================================================================
---- linux-stage.orig/fs/ext4/xattr.c
-+++ linux-stage/fs/ext4/xattr.c
-@@ -204,6 +204,7 @@ ext4_xattr_check_names(struct ext4_xattr
-
- while (!IS_LAST_ENTRY(entry)) {
- if (entry->e_value_size != 0 &&
-+ entry->e_value_inum == 0 &&
- (value_start + le16_to_cpu(entry->e_value_offs) <
- (void *)e + sizeof(__u32) ||
- value_start + le16_to_cpu(entry->e_value_offs) +
-@@ -257,19 +258,26 @@ errout:
- __xattr_check_inode((inode), (header), (end), __func__, __LINE__)
-
- static inline int
--ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
-+ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size,
-+ struct inode *inode)
- {
- size_t value_size = le32_to_cpu(entry->e_value_size);
-
-- if (entry->e_value_block != 0 || value_size > size ||
-+ if (!entry->e_value_inum &&
- le16_to_cpu(entry->e_value_offs) + value_size > size)
- return -EFSCORRUPTED;
-+ if (entry->e_value_inum &&
-+ (le32_to_cpu(entry->e_value_inum) < EXT4_FIRST_INO(inode->i_sb) ||
-+ le32_to_cpu(entry->e_value_inum) >
-+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_inodes_count)))
-+ return -EFSCORRUPTED;
- return 0;
- }
-
- static int
- ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
-- const char *name, size_t size, int sorted)
-+ const char *name, size_t size, int sorted,
-+ struct inode *inode)
- {
- struct ext4_xattr_entry *entry;
- size_t name_len;
-@@ -289,11 +297,104 @@ ext4_xattr_find_entry(struct ext4_xattr_
- break;
- }
- *pentry = entry;
-- if (!cmp && ext4_xattr_check_entry(entry, size))
-+ if (!cmp && ext4_xattr_check_entry(entry, size, inode))
- return -EFSCORRUPTED;
- return cmp ? -ENODATA : 0;
- }
-
-+/*
-+ * Read the EA value from an inode.
-+ */
-+static int
-+ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t *size)
-+{
-+ unsigned long block = 0;
-+ struct buffer_head *bh = NULL;
-+ int blocksize;
-+ size_t csize, ret_size = 0;
-+
-+ if (*size == 0)
-+ return 0;
-+
-+ blocksize = ea_inode->i_sb->s_blocksize;
-+
-+ while (ret_size < *size) {
-+ csize = (*size - ret_size) > blocksize ? blocksize :
-+ *size - ret_size;
-+ bh = ext4_bread(NULL, ea_inode, block, 0);
-+ if (IS_ERR(bh)) {
-+ *size = ret_size;
-+ return PTR_ERR(bh);
-+ }
-+ memcpy(buf, bh->b_data, csize);
-+ brelse(bh);
-+
-+ buf += csize;
-+ block += 1;
-+ ret_size += csize;
-+ }
-+
-+ *size = ret_size;
-+
-+ return 0;
-+}
-+
-+struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, int *err)
-+{
-+ struct inode *ea_inode = NULL;
-+
-+ ea_inode = ext4_iget(parent->i_sb, ea_ino);
-+ if (IS_ERR(ea_inode) || is_bad_inode(ea_inode)) {
-+ int rc = IS_ERR(ea_inode) ? PTR_ERR(ea_inode) : 0;
-+ ext4_error(parent->i_sb, "error while reading EA inode %lu "
-+ "/ %d %d", ea_ino, rc, is_bad_inode(ea_inode));
-+ *err = rc != 0 ? rc : -EIO;
-+ return NULL;
-+ }
-+
-+ if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != parent->i_ino ||
-+ ea_inode->i_generation != parent->i_generation) {
-+ ext4_error(parent->i_sb, "Backpointer from EA inode %lu "
-+ "to parent invalid.", ea_ino);
-+ *err = -EINVAL;
-+ goto error;
-+ }
-+
-+ if (!(EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL)) {
-+ ext4_error(parent->i_sb, "EA inode %lu does not have "
-+ "EXT4_EA_INODE_FL flag set.\n", ea_ino);
-+ *err = -EINVAL;
-+ goto error;
-+ }
-+
-+ *err = 0;
-+ return ea_inode;
-+
-+error:
-+ iput(ea_inode);
-+ return NULL;
-+}
-+
-+/*
-+ * Read the value from the EA inode.
-+ */
-+static int
-+ext4_xattr_inode_get(struct inode *inode, unsigned long ea_ino, void *buffer,
-+ size_t *size)
-+{
-+ struct inode *ea_inode = NULL;
-+ int err;
-+
-+ ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
-+ if (err)
-+ return err;
-+
-+ err = ext4_xattr_inode_read(ea_inode, buffer, size);
-+ iput(ea_inode);
-+
-+ return err;
-+}
-+
- static int
- ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
- void *buffer, size_t buffer_size)
-@@ -326,7 +427,8 @@ bad_block:
- }
- ext4_xattr_cache_insert(ext4_mb_cache, bh);
- entry = BFIRST(bh);
-- error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
-+ error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1,
-+ inode);
- if (error == -EFSCORRUPTED)
- goto bad_block;
- if (error)
-@@ -336,8 +438,16 @@ bad_block:
- error = -ERANGE;
- if (size > buffer_size)
- goto cleanup;
-- memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
-- size);
-+ if (entry->e_value_inum) {
-+ error = ext4_xattr_inode_get(inode,
-+ le32_to_cpu(entry->e_value_inum),
-+ buffer, &size);
-+ if (error)
-+ goto cleanup;
-+ } else {
-+ memcpy(buffer, bh->b_data +
-+ le16_to_cpu(entry->e_value_offs), size);
-+ }
- }
- error = size;
-
-@@ -371,7 +481,7 @@ ext4_xattr_ibody_get(struct inode *inode
- if (error)
- goto cleanup;
- error = ext4_xattr_find_entry(&entry, name_index, name,
-- end - (void *)entry, 0);
-+ end - (void *)entry, 0, inode);
- if (error)
- goto cleanup;
- size = le32_to_cpu(entry->e_value_size);
-@@ -379,8 +489,16 @@ ext4_xattr_ibody_get(struct inode *inode
- error = -ERANGE;
- if (size > buffer_size)
- goto cleanup;
-- memcpy(buffer, (void *)IFIRST(header) +
-- le16_to_cpu(entry->e_value_offs), size);
-+ if (entry->e_value_inum) {
-+ error = ext4_xattr_inode_get(inode,
-+ le32_to_cpu(entry->e_value_inum),
-+ buffer, &size);
-+ if (error)
-+ goto cleanup;
-+ } else {
-+ memcpy(buffer, (void *)IFIRST(header) +
-+ le16_to_cpu(entry->e_value_offs), size);
-+ }
- }
- error = size;
-
-@@ -640,7 +758,7 @@ static size_t ext4_xattr_free_space(stru
- size_t *min_offs, void *base, int *total)
- {
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
-- if (!last->e_value_block && last->e_value_size) {
-+ if (!last->e_value_inum && last->e_value_size) {
- size_t offs = le16_to_cpu(last->e_value_offs);
- if (offs < *min_offs)
- *min_offs = offs;
-@@ -651,12 +769,195 @@ static size_t ext4_xattr_free_space(stru
- return (*min_offs - ((void *)last - base) - sizeof(__u32));
- }
-
-+/*
-+ * Write the value of the EA in an inode.
-+ */
-+static int
-+ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
-+ const void *buf, int bufsize)
-+{
-+ struct buffer_head *bh = NULL;
-+ unsigned long block = 0;
-+ unsigned blocksize = ea_inode->i_sb->s_blocksize;
-+ unsigned max_blocks = (bufsize + blocksize - 1) >> ea_inode->i_blkbits;
-+ int csize, wsize = 0;
-+ int ret = 0;
-+ int retries = 0;
-+
-+retry:
-+ while (ret >= 0 && ret < max_blocks) {
-+ struct ext4_map_blocks map;
-+ map.m_lblk = block += ret;
-+ map.m_len = max_blocks -= ret;
-+
-+ ret = ext4_map_blocks(handle, ea_inode, &map,
-+ EXT4_GET_BLOCKS_CREATE);
-+ if (ret <= 0) {
-+ ext4_mark_inode_dirty(handle, ea_inode);
-+ if (ret == -ENOSPC &&
-+ ext4_should_retry_alloc(ea_inode->i_sb, &retries)) {
-+ ret = 0;
-+ goto retry;
-+ }
-+ break;
-+ }
-+ }
-+
-+ if (ret < 0)
-+ return ret;
-+
-+ block = 0;
-+ while (wsize < bufsize) {
-+ if (bh != NULL)
-+ brelse(bh);
-+ csize = (bufsize - wsize) > blocksize ? blocksize :
-+ bufsize - wsize;
-+ bh = ext4_getblk(handle, ea_inode, block, 0);
-+ if (IS_ERR(bh)) {
-+ ret = PTR_ERR(bh);
-+ goto out;
-+ }
-+ ret = ext4_journal_get_write_access(handle, bh);
-+ if (ret)
-+ goto out;
-+
-+ memcpy(bh->b_data, buf, csize);
-+ set_buffer_uptodate(bh);
-+ ext4_handle_dirty_metadata(handle, ea_inode, bh);
-+
-+ buf += csize;
-+ wsize += csize;
-+ block += 1;
-+ }
-+
-+ mutex_lock(&ea_inode->i_mutex);
-+ i_size_write(ea_inode, wsize);
-+ ext4_update_i_disksize(ea_inode, wsize);
-+ mutex_unlock(&ea_inode->i_mutex);
-+
-+ ext4_mark_inode_dirty(handle, ea_inode);
-+
-+out:
-+ brelse(bh);
-+
-+ return ret;
-+}
-+
-+static void ext4_xattr_inode_set_ref(struct inode *ea_inode, __u64 ref_count)
-+{
-+ ea_inode->i_ctime.tv_sec = (__u32)(ref_count >> 32);
-+ ea_inode->i_version = (__u32)ref_count;
-+}
-+
-+static void ext4_xattr_inode_set_hash(struct inode *ea_inode, __u32 hash)
-+{
-+ ea_inode->i_atime.tv_sec = hash;
-+}
-+
-+/*
-+ * Create an inode to store the value of a large EA.
-+ */
-+static struct inode *
-+ext4_xattr_inode_create(handle_t *handle, struct inode *inode, __u32 hash)
-+{
-+ struct inode *ea_inode = NULL;
-+
-+ /*
-+ * Let the next inode be the goal, so we try and allocate the EA inode
-+ * in the same group, or nearby one.
-+ */
-+ ea_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
-+ S_IFREG|0600, NULL, inode->i_ino + 1, NULL);
-+
-+ if (!IS_ERR(ea_inode)) {
-+ ea_inode->i_op = &ext4_file_inode_operations;
-+ ea_inode->i_fop = &ext4_file_operations;
-+ ext4_set_aops(ea_inode);
-+ ea_inode->i_generation = inode->i_generation;
-+ EXT4_I(ea_inode)->i_flags |= EXT4_EA_INODE_FL;
-+
-+ /*
-+ * A back-pointer from EA inode to parent inode will be useful
-+ * for e2fsck.
-+ */
-+ EXT4_XATTR_INODE_SET_PARENT(ea_inode, inode->i_ino);
-+ unlock_new_inode(ea_inode);
-+
-+ ext4_xattr_inode_set_ref(ea_inode, 1);
-+ ext4_xattr_inode_set_hash(ea_inode, hash);
-+ }
-+
-+ return ea_inode;
-+}
-+
-+/*
-+ * Unlink the inode storing the value of the EA.
-+ */
-+int
-+ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino)
-+{
-+ struct inode *ea_inode = NULL;
-+ int err;
-+
-+ ea_inode = ext4_xattr_inode_iget(inode, ea_ino, &err);
-+ if (err)
-+ return err;
-+
-+ clear_nlink(ea_inode);
-+ iput(ea_inode);
-+
-+ return 0;
-+}
-+
-+static __u32
-+ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
-+{
-+ if (ext4_has_metadata_csum(sbi->s_sb))
-+ return ext4_chksum(sbi, sbi->s_csum_seed, buffer, size);
-+ return 0;
-+}
-+
-+/*
-+ * Add value of the EA in an inode.
-+ */
-+static int
-+ext4_xattr_inode_set(handle_t *handle, struct inode *inode, unsigned long *ea_ino,
-+ const void *value, size_t value_len)
-+{
-+ struct inode *ea_inode = NULL;
-+ __u32 hash;
-+ int err;
-+
-+ /* Create an inode for the EA value */
-+ hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
-+ ea_inode = ext4_xattr_inode_create(handle, inode, hash);
-+ if (IS_ERR(ea_inode))
-+ return -1;
-+
-+ err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
-+ if (err)
-+ clear_nlink(ea_inode);
-+ else
-+ *ea_ino = ea_inode->i_ino;
-+
-+ iput(ea_inode);
-+
-+ return err;
-+}
-+
- static int
- ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s,
-- struct inode *inode)
-+ handle_t *handle, struct inode *inode)
- {
- struct ext4_xattr_entry *last, *next;
- size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
-+ int in_inode = i->in_inode;
-+
-+ if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
-+ EXT4_FEATURE_INCOMPAT_EA_INODE) &&
-+ (EXT4_XATTR_SIZE(i->value_len) >
-+ EXT4_XATTR_MIN_LARGE_EA_SIZE(inode->i_sb->s_blocksize)))
-+ in_inode = 1;
-
- /* Compute min_offs and last. */
- last = s->first;
-@@ -666,7 +967,7 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- EXT4_ERROR_INODE(inode, "corrupted xattr entries");
- return -EFSCORRUPTED;
- }
-- if (!last->e_value_block && last->e_value_size) {
-+ if (!last->e_value_inum && last->e_value_size) {
- size_t offs = le16_to_cpu(last->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
-@@ -674,15 +975,20 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- }
- free = min_offs - ((void *)last - s->base) - sizeof(__u32);
- if (!s->not_found) {
-- if (!s->here->e_value_block && s->here->e_value_size) {
-+ if (!in_inode &&
-+ !s->here->e_value_inum && s->here->e_value_size) {
- size_t size = le32_to_cpu(s->here->e_value_size);
- free += EXT4_XATTR_SIZE(size);
- }
- free += EXT4_XATTR_LEN(name_len);
- }
- if (i->value) {
-- if (free < EXT4_XATTR_LEN(name_len) +
-- EXT4_XATTR_SIZE(i->value_len))
-+ size_t value_len = EXT4_XATTR_SIZE(i->value_len);
-+
-+ if (in_inode)
-+ value_len = 0;
-+
-+ if (free < EXT4_XATTR_LEN(name_len) + value_len)
- return -ENOSPC;
- }
-
-@@ -696,7 +1002,8 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- s->here->e_name_len = name_len;
- memcpy(s->here->e_name, i->name, name_len);
- } else {
-- if (!s->here->e_value_block && s->here->e_value_size) {
-+ if (!s->here->e_value_inum && s->here->e_value_size &&
-+ s->here->e_value_offs > 0) {
- void *first_val = s->base + min_offs;
- size_t offs = le16_to_cpu(s->here->e_value_offs);
- void *val = s->base + offs;
-@@ -730,13 +1037,18 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- last = s->first;
- while (!IS_LAST_ENTRY(last)) {
- size_t o = le16_to_cpu(last->e_value_offs);
-- if (!last->e_value_block &&
-+ if (!last->e_value_inum &&
- last->e_value_size && o < offs)
- last->e_value_offs =
- cpu_to_le16(o + size);
- last = EXT4_XATTR_NEXT(last);
- }
- }
-+ if (s->here->e_value_inum) {
-+ ext4_xattr_inode_unlink(inode,
-+ le32_to_cpu(s->here->e_value_inum));
-+ s->here->e_value_inum = 0;
-+ }
- if (!i->value) {
- /* Remove the old name. */
- size_t size = EXT4_XATTR_LEN(name_len);
-@@ -750,10 +1062,17 @@ ext4_xattr_set_entry(struct ext4_xattr_i
- if (i->value) {
- /* Insert the new value. */
- s->here->e_value_size = cpu_to_le32(i->value_len);
-- if (i->value_len) {
-+ if (in_inode) {
-+ unsigned long ea_ino = le32_to_cpu(s->here->e_value_inum);
-+ ext4_xattr_inode_set(handle, inode, &ea_ino, i->value,
-+ i->value_len);
-+ s->here->e_value_inum = cpu_to_le32(ea_ino);
-+ s->here->e_value_offs = 0;
-+ } else if (i->value_len) {
- size_t size = EXT4_XATTR_SIZE(i->value_len);
- void *val = s->base + min_offs - size;
- s->here->e_value_offs = cpu_to_le16(min_offs - size);
-+ s->here->e_value_inum = 0;
- if (i->value == EXT4_ZERO_XATTR_VALUE) {
- memset(val, 0, size);
- } else {
-@@ -803,7 +1122,7 @@ ext4_xattr_block_find(struct inode *inod
- bs->s.end = bs->bh->b_data + bs->bh->b_size;
- bs->s.here = bs->s.first;
- error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
-- i->name, bs->bh->b_size, 1);
-+ i->name, bs->bh->b_size, 1, inode);
- if (error && error != -ENODATA)
- goto cleanup;
- bs->s.not_found = error;
-@@ -828,8 +1147,6 @@ ext4_xattr_block_set(handle_t *handle, s
-
- #define header(x) ((struct ext4_xattr_header *)(x))
-
-- if (i->value && i->value_len > sb->s_blocksize)
-- return -ENOSPC;
- if (s->base) {
- BUFFER_TRACE(bs->bh, "get_write_access");
- error = ext4_journal_get_write_access(handle, bs->bh);
-@@ -848,7 +1165,7 @@ ext4_xattr_block_set(handle_t *handle, s
- mb_cache_entry_delete_block(ext4_mb_cache, hash,
- bs->bh->b_blocknr);
- ea_bdebug(bs->bh, "modifying in-place");
-- error = ext4_xattr_set_entry(i, s, inode);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- if (!error) {
- if (!IS_LAST_ENTRY(s->first))
- ext4_xattr_rehash(header(s->base),
-@@ -894,7 +1211,7 @@ ext4_xattr_block_set(handle_t *handle, s
- s->end = s->base + sb->s_blocksize;
- }
-
-- error = ext4_xattr_set_entry(i, s, inode);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- if (error == -EFSCORRUPTED)
- goto bad_block;
- if (error)
-@@ -1072,7 +1389,7 @@ int ext4_xattr_ibody_find(struct inode *
- /* Find the named attribute. */
- error = ext4_xattr_find_entry(&is->s.here, i->name_index,
- i->name, is->s.end -
-- (void *)is->s.base, 0);
-+ (void *)is->s.base, 0, inode);
- if (error && error != -ENODATA)
- return error;
- is->s.not_found = error;
-@@ -1090,7 +1407,7 @@ int ext4_xattr_ibody_inline_set(handle_t
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return -ENOSPC;
-- error = ext4_xattr_set_entry(i, s, inode);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- if (error)
- return error;
- header = IHDR(inode, ext4_raw_inode(&is->iloc));
-@@ -1114,7 +1431,7 @@ static int ext4_xattr_ibody_set(handle_t
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return -ENOSPC;
-- error = ext4_xattr_set_entry(i, s, inode);
-+ error = ext4_xattr_set_entry(i, s, handle, inode);
- if (error)
- return error;
- header = IHDR(inode, ext4_raw_inode(&is->iloc));
-@@ -1161,7 +1478,7 @@ ext4_xattr_set_handle(handle_t *handle,
- .name = name,
- .value = value,
- .value_len = value_len,
--
-+ .in_inode = 0,
- };
- struct ext4_xattr_ibody_find is = {
- .s = { .not_found = -ENODATA, },
-@@ -1231,6 +1548,15 @@ ext4_xattr_set_handle(handle_t *handle,
- goto cleanup;
- }
- error = ext4_xattr_block_set(handle, inode, &i, &bs);
-+ if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
-+ EXT4_FEATURE_INCOMPAT_EA_INODE) &&
-+ error == -ENOSPC) {
-+ /* xattr not fit to block, store at external
-+ * inode */
-+ i.in_inode = 1;
-+ error = ext4_xattr_ibody_set(handle, inode,
-+ &i, &is);
-+ }
- if (error)
- goto cleanup;
- if (!is.s.not_found) {
-@@ -1275,9 +1601,22 @@ ext4_xattr_set(struct inode *inode, int
- const void *value, size_t value_len, int flags)
- {
- handle_t *handle;
-+ struct super_block *sb = inode->i_sb;
- int error, retries = 0;
- int credits = ext4_jbd2_credits_xattr(inode);
-
-+ if ((value_len >= EXT4_XATTR_MIN_LARGE_EA_SIZE(sb->s_blocksize)) &&
-+ EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EA_INODE)) {
-+ int nrblocks = (value_len + sb->s_blocksize - 1) >>
-+ sb->s_blocksize_bits;
-+
-+ /* For new inode */
-+ credits += EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + 3;
-+
-+ /* For data blocks of EA inode */
-+ credits += ext4_meta_trans_blocks(inode, nrblocks, 0);
-+ }
-+
- retry:
- handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
- if (IS_ERR(handle)) {
-@@ -1289,7 +1628,7 @@ retry:
- value, value_len, flags);
- error2 = ext4_journal_stop(handle);
- if (error == -ENOSPC &&
-- ext4_should_retry_alloc(inode->i_sb, &retries))
-+ ext4_should_retry_alloc(sb, &retries))
- goto retry;
- if (error == 0)
- error = error2;
-@@ -1311,7 +1650,7 @@ static void ext4_xattr_shift_entries(str
-
- /* Adjust the value offsets of the entries */
- for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
-- if (!last->e_value_block && last->e_value_size) {
-+ if (!last->e_value_inum && last->e_value_size) {
- new_offs = le16_to_cpu(last->e_value_offs) +
- value_offs_shift;
- BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
-@@ -1565,21 +1904,135 @@ cleanup:
- }
-
-
-+#define EIA_INCR 16 /* must be 2^n */
-+#define EIA_MASK (EIA_INCR - 1)
-+/* Add the large xattr @ino into @lea_ino_array for later deletion.
-+ * If @lea_ino_array is new or full it will be grown and the old
-+ * contents copied over.
-+ */
-+static int
-+ext4_expand_ino_array(struct ext4_xattr_ino_array **lea_ino_array, __u32 ino)
-+{
-+ if (*lea_ino_array == NULL) {
-+ /*
-+ * Start with 15 inodes, so it fits into a power-of-two size.
-+ * If *lea_ino_array is NULL, this is essentially offsetof()
-+ */
-+ (*lea_ino_array) =
-+ kmalloc(offsetof(struct ext4_xattr_ino_array,
-+ xia_inodes[EIA_MASK]),
-+ GFP_NOFS);
-+ if (*lea_ino_array == NULL)
-+ return -ENOMEM;
-+ (*lea_ino_array)->xia_count = 0;
-+ } else if (((*lea_ino_array)->xia_count & EIA_MASK) == EIA_MASK) {
-+ /* expand the array once all 15 + n * 16 slots are full */
-+ struct ext4_xattr_ino_array *new_array = NULL;
-+ int count = (*lea_ino_array)->xia_count;
-+
-+ /* if new_array is NULL, this is essentially offsetof() */
-+ new_array = kmalloc(
-+ offsetof(struct ext4_xattr_ino_array,
-+ xia_inodes[count + EIA_INCR]),
-+ GFP_NOFS);
-+ if (new_array == NULL)
-+ return -ENOMEM;
-+ memcpy(new_array, *lea_ino_array,
-+ offsetof(struct ext4_xattr_ino_array,
-+ xia_inodes[count]));
-+ kfree(*lea_ino_array);
-+ *lea_ino_array = new_array;
-+ }
-+ (*lea_ino_array)->xia_inodes[(*lea_ino_array)->xia_count++] = ino;
-+ return 0;
-+}
-+
-+/**
-+ * Add xattr inode to orphan list
-+ */
-+static int
-+ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode,
-+ int credits, struct ext4_xattr_ino_array *lea_ino_array)
-+{
-+ struct inode *ea_inode = NULL;
-+ int idx = 0, error = 0;
-+
-+ if (lea_ino_array == NULL)
-+ return 0;
-+
-+ for (; idx < lea_ino_array->xia_count; ++idx) {
-+ if (!ext4_handle_has_enough_credits(handle, credits)) {
-+ error = ext4_journal_extend(handle, credits);
-+ if (error > 0)
-+ error = ext4_journal_restart(handle, credits);
-+
-+ if (error != 0) {
-+ ext4_warning(inode->i_sb,
-+ "couldn't extend journal "
-+ "(err %d)", error);
-+ return error;
-+ }
-+ }
-+ ea_inode = ext4_xattr_inode_iget(inode,
-+ lea_ino_array->xia_inodes[idx], &error);
-+ if (error)
-+ continue;
-+ ext4_orphan_add(handle, ea_inode);
-+ /* the inode's i_count will be released by caller */
-+ }
-+
-+ return 0;
-+}
-
- /*
- * ext4_xattr_delete_inode()
- *
-- * Free extended attribute resources associated with this inode. This
-+ * Free extended attribute resources associated with this inode. Traverse
-+ * all entries and unlink any xattr inodes associated with this inode. This
- * is called immediately before an inode is freed. We have exclusive
-- * access to the inode.
-+ * access to the inode. If an orphan inode is deleted it will also delete any
-+ * xattr block and all xattr inodes. They are checked by ext4_xattr_inode_iget()
-+ * to ensure they belong to the parent inode and were not deleted already.
- */
--void
--ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
-+int
-+ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
-+ struct ext4_xattr_ino_array **lea_ino_array)
- {
- struct buffer_head *bh = NULL;
-+ struct ext4_xattr_ibody_header *header;
-+ struct ext4_inode *raw_inode;
-+ struct ext4_iloc iloc;
-+ struct ext4_xattr_entry *entry;
-+ int credits = 3, error = 0;
-
-- if (!EXT4_I(inode)->i_file_acl)
-+ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
-+ goto delete_external_ea;
-+
-+ error = ext4_get_inode_loc(inode, &iloc);
-+ if (error)
- goto cleanup;
-+ raw_inode = ext4_raw_inode(&iloc);
-+ header = IHDR(inode, raw_inode);
-+ for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
-+ entry = EXT4_XATTR_NEXT(entry)) {
-+ if (!entry->e_value_inum)
-+ continue;
-+ if (ext4_expand_ino_array(lea_ino_array,
-+ entry->e_value_inum) != 0) {
-+ brelse(iloc.bh);
-+ goto cleanup;
-+ }
-+ entry->e_value_inum = 0;
-+ }
-+ brelse(iloc.bh);
-+
-+delete_external_ea:
-+ if (!EXT4_I(inode)->i_file_acl) {
-+ /* add xattr inode to orphan list */
-+ ext4_xattr_inode_orphan_add(handle, inode, credits,
-+ *lea_ino_array);
-+ goto cleanup;
-+ }
- bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
- if (!bh) {
- EXT4_ERROR_INODE(inode, "block %llu read error",
-@@ -1592,11 +2045,69 @@ ext4_xattr_delete_inode(handle_t *handle
- EXT4_I(inode)->i_file_acl);
- goto cleanup;
- }
-+
-+ for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
-+ entry = EXT4_XATTR_NEXT(entry)) {
-+ if (!entry->e_value_inum)
-+ continue;
-+ if (ext4_expand_ino_array(lea_ino_array,
-+ entry->e_value_inum) != 0)
-+ goto cleanup;
-+ entry->e_value_inum = 0;
-+ }
-+
-+ /* add xattr inode to orphan list */
-+ error = ext4_xattr_inode_orphan_add(handle, inode, credits,
-+ *lea_ino_array);
-+ if (error != 0)
-+ goto cleanup;
-+
-+ if (!IS_NOQUOTA(inode))
-+ credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
-+
-+ if (!ext4_handle_has_enough_credits(handle, credits)) {
-+ error = ext4_journal_extend(handle, credits);
-+ if (error > 0)
-+ error = ext4_journal_restart(handle, credits);
-+ if (error != 0) {
-+ ext4_warning(inode->i_sb,
-+ "couldn't extend journal (err %d)", error);
-+ goto cleanup;
-+ }
-+ }
-+
- ext4_xattr_release_block(handle, inode, bh);
- EXT4_I(inode)->i_file_acl = 0;
-
- cleanup:
- brelse(bh);
-+
-+ return error;
-+}
-+
-+void
-+ext4_xattr_inode_array_free(struct inode *inode,
-+ struct ext4_xattr_ino_array *lea_ino_array)
-+{
-+ struct inode *ea_inode = NULL;
-+ int idx = 0;
-+ int err;
-+
-+ if (lea_ino_array == NULL)
-+ return;
-+
-+ for (; idx < lea_ino_array->xia_count; ++idx) {
-+ ea_inode = ext4_xattr_inode_iget(inode,
-+ lea_ino_array->xia_inodes[idx], &err);
-+ if (err)
-+ continue;
-+ /* for inode's i_count get from ext4_xattr_delete_inode */
-+ if (!list_empty(&EXT4_I(ea_inode)->i_orphan))
-+ iput(ea_inode);
-+ clear_nlink(ea_inode);
-+ iput(ea_inode);
-+ }
-+ kfree(lea_ino_array);
- }
-
- /*
-@@ -1648,10 +2159,9 @@ ext4_xattr_cmp(struct ext4_xattr_header
- entry1->e_name_index != entry2->e_name_index ||
- entry1->e_name_len != entry2->e_name_len ||
- entry1->e_value_size != entry2->e_value_size ||
-+ entry1->e_value_inum != entry2->e_value_inum ||
- memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
- return 1;
-- if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
-- return -EFSCORRUPTED;
- if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
- (char *)header2 + le16_to_cpu(entry2->e_value_offs),
- le32_to_cpu(entry1->e_value_size)))
-@@ -1723,7 +2233,7 @@ static inline void ext4_xattr_hash_entry
- *name++;
- }
-
-- if (entry->e_value_block == 0 && entry->e_value_size != 0) {
-+ if (!entry->e_value_inum && entry->e_value_size) {
- __le32 *value = (__le32 *)((char *)header +
- le16_to_cpu(entry->e_value_offs));
- for (n = (le32_to_cpu(entry->e_value_size) +
-Index: linux-stage/fs/ext4/xattr.h
-===================================================================
---- linux-stage.orig/fs/ext4/xattr.h
-+++ linux-stage/fs/ext4/xattr.h
-@@ -42,7 +42,7 @@ struct ext4_xattr_entry {
- __u8 e_name_len; /* length of name */
- __u8 e_name_index; /* attribute name index */
- __le16 e_value_offs; /* offset in disk block of value */
-- __le32 e_value_block; /* disk block attribute is stored on (n/i) */
-+ __le32 e_value_inum; /* inode in which the value is stored */
- __le32 e_value_size; /* size of attribute value */
- __le32 e_hash; /* hash value of name and value */
- char e_name[0]; /* attribute name */
-@@ -67,6 +67,26 @@ struct ext4_xattr_entry {
- EXT4_I(inode)->i_extra_isize))
- #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
-
-+/*
-+ * Link EA inode back to parent one using i_mtime field.
-+ * Extra integer type conversion added to ignore higher
-+ * bits in i_mtime.tv_sec which might be set by ext4_get()
-+ */
-+#define EXT4_XATTR_INODE_SET_PARENT(inode, inum) \
-+do { \
-+ (inode)->i_mtime.tv_sec = inum; \
-+} while(0)
-+
-+#define EXT4_XATTR_INODE_GET_PARENT(inode) \
-+((__u32)(inode)->i_mtime.tv_sec)
-+
-+/*
-+ * The minimum size of EA value when you start storing it in an external inode
-+ * size of block - size of header - size of 1 entry - 4 null bytes
-+*/
-+#define EXT4_XATTR_MIN_LARGE_EA_SIZE(b) \
-+ ((b) - EXT4_XATTR_LEN(3) - sizeof(struct ext4_xattr_header) - 4)
-+
- #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
- #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
- #define BFIRST(bh) ENTRY(BHDR(bh)+1)
-@@ -75,10 +84,11 @@ struct ext4_xattr_entry {
- #define EXT4_ZERO_XATTR_VALUE ((void *)-1)
-
- struct ext4_xattr_info {
-- int name_index;
- const char *name;
- const void *value;
- size_t value_len;
-+ int name_index;
-+ int in_inode;
- };
-
- struct ext4_xattr_search {
-@@ -106,7 +116,13 @@ extern int ext4_xattr_get(struct inode *
- extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
- extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
-
--extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
-+extern struct inode *ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
-+ int *err);
-+extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
-+extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
-+ struct ext4_xattr_ino_array **array);
-+extern void ext4_xattr_inode_array_free(struct inode *inode,
-+ struct ext4_xattr_ino_array *array);
-
- extern void ext4_xattr_put_super(struct super_block *);
- extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
-Index: linux-stage/fs/ext4/ialloc.c
-===================================================================
---- linux-stage.orig/fs/ext4/ialloc.c
-+++ linux-stage/fs/ext4/ialloc.c
-@@ -269,7 +269,6 @@ void ext4_free_inode(handle_t *handle, s
- * as writing the quota to disk may need the lock as well.
- */
- dquot_initialize(inode);
-- ext4_xattr_delete_inode(handle, inode);
- dquot_free_inode(inode);
- dquot_drop(inode);
-
-Index: linux-stage/fs/ext4/inline.c
-===================================================================
---- linux-stage.orig/fs/ext4/inline.c
-+++ linux-stage/fs/ext4/inline.c
-@@ -59,7 +59,7 @@ static int get_max_inline_xattr_value_si
-
- /* Compute min_offs. */
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
-- if (!entry->e_value_block && entry->e_value_size) {
-+ if (!entry->e_value_inum && entry->e_value_size) {
- size_t offs = le16_to_cpu(entry->e_value_offs);
- if (offs < min_offs)
- min_offs = offs;
+++ /dev/null
-Index: linux-3.10.0-123.el7.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-123.el7.x86_64/fs/ext4/ext4.h
-@@ -2391,6 +2391,7 @@ struct ext4_group_info {
- ext4_grpblk_t bb_fragments; /* nr of freespace fragments */
- ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */
- struct list_head bb_prealloc_list;
-+ unsigned long bb_prealloc_nr;
- #ifdef DOUBLE_CHECK
- void *bb_bitmap;
- #endif
-Index: linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.c
-===================================================================
---- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/mballoc.c
-+++ linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.c
-@@ -362,7 +362,7 @@ static const char *ext4_groupinfo_slab_n
- "ext4_groupinfo_64k", "ext4_groupinfo_128k"
- };
-
--static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-+static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
- ext4_group_t group);
- static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
-@@ -718,7 +718,7 @@ mb_set_largest_free_order(struct super_b
- }
-
- static noinline_for_stack
--void ext4_mb_generate_buddy(struct super_block *sb,
-+int ext4_mb_generate_buddy(struct super_block *sb,
- void *buddy, void *bitmap, ext4_group_t group)
- {
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-@@ -750,19 +750,13 @@ void ext4_mb_generate_buddy(struct super
- grp->bb_fragments = fragments;
-
- if (free != grp->bb_free) {
-- ext4_grp_locked_error(sb, group, 0, 0,
-- "block bitmap and bg descriptor "
-- "inconsistent: %u vs %u free clusters",
-- free, grp->bb_free);
-- /*
-- * If we intend to continue, we consider group descriptor
-- * corrupt and update bb_free using bitmap value
-- */
-- grp->bb_free = free;
-- if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-- percpu_counter_sub(&sbi->s_freeclusters_counter,
-- grp->bb_free);
-- set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-+ struct ext4_group_desc *gdp;
-+ gdp = ext4_get_group_desc(sb, group, NULL);
-+ ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
-+ "%u in gd, %lu pa's\n", (long unsigned int)group,
-+ free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
-+ grp->bb_prealloc_nr);
-+ return -EIO;
- }
- mb_set_largest_free_order(sb, grp);
-
-@@ -768,6 +767,8 @@ void ext4_mb_generate_buddy(struct super
- EXT4_SB(sb)->s_mb_buddies_generated++;
- EXT4_SB(sb)->s_mb_generation_time += period;
- spin_unlock(&EXT4_SB(sb)->s_bal_lock);
-+
-+ return 0;
- }
-
- static void mb_regenerate_buddy(struct ext4_buddy *e4b)
-@@ -883,7 +884,7 @@ static int ext4_mb_init_cache(struct pag
- }
-
- first_block = page->index * blocks_per_page;
-- for (i = 0; i < blocks_per_page; i++) {
-+ for (i = 0; i < blocks_per_page && err == 0; i++) {
- group = (first_block + i) >> 1;
- if (group >= ngroups)
- break;
-@@ -922,7 +923,7 @@ static int ext4_mb_init_cache(struct pag
- ext4_lock_group(sb, group);
- /* init the buddy */
- memset(data, 0xff, blocksize);
-- ext4_mb_generate_buddy(sb, data, incore, group);
-+ err = ext4_mb_generate_buddy(sb, data, incore, group);
- ext4_unlock_group(sb, group);
- incore = NULL;
- } else {
-@@ -937,7 +938,7 @@ static int ext4_mb_init_cache(struct pag
- memcpy(data, bitmap, blocksize);
-
- /* mark all preallocated blks used in in-core bitmap */
-- ext4_mb_generate_from_pa(sb, data, group);
-+ err = ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
- ext4_unlock_group(sb, group);
-
-@@ -947,7 +948,8 @@ static int ext4_mb_init_cache(struct pag
- incore = data;
- }
- }
-- SetPageUptodate(page);
-+ if (likely(err == 0))
-+ SetPageUptodate(page);
-
- out:
- if (bh) {
-@@ -2224,9 +2226,11 @@ static void *ext4_mb_seq_groups_next(str
- static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
- {
- struct super_block *sb = seq->private;
-+ struct ext4_group_desc *gdp;
- ext4_group_t group = (ext4_group_t) ((unsigned long) v);
- int i;
- int err, buddy_loaded = 0;
-+ int free = 0;
- struct ext4_buddy e4b;
- struct ext4_group_info *grinfo;
- struct sg {
-@@ -2236,7 +2240,7 @@ static int ext4_mb_seq_groups_show(struc
-
- group--;
- if (group == 0)
-- seq_puts(seq, "#group: free frags first ["
-+ seq_puts(seq, "#group: bfree gfree frags first pa ["
- " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
- " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]");
-
-@@ -2256,13 +2260,19 @@ static int ext4_mb_seq_groups_show(struc
- buddy_loaded = 1;
- }
-
-+ gdp = ext4_get_group_desc(sb, group, NULL);
-+ if (gdp != NULL)
-+ free = ext4_free_group_clusters(sb, gdp);
-+
- memcpy(&sg, ext4_get_group_info(sb, group), i);
-
- if (buddy_loaded)
- ext4_mb_unload_buddy(&e4b);
-
-- seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
-- sg.info.bb_fragments, sg.info.bb_first_free);
-+ seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
-+ (long unsigned int)group, sg.info.bb_free, free,
-+ sg.info.bb_fragments, sg.info.bb_first_free,
-+ sg.info.bb_prealloc_nr);
- for (i = 0; i <= 13; i++)
- seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
- sg.info.bb_counters[i] : 0);
-@@ -3507,22 +3517,71 @@ static void ext4_mb_generate_from_freeli
- }
-
- /*
-+ * check free blocks in bitmap match free block in group descriptor
-+ * do this before taking preallocated blocks into account to be able
-+ * to detect on-disk corruptions. The group lock should be hold by the
-+ * caller.
-+ */
-+int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
-+ struct ext4_group_desc *gdp, int group)
-+{
-+ unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
-+ unsigned short i, first, free = 0;
-+ unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
-+
-+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
-+ return 0;
-+
-+ i = mb_find_next_zero_bit(bitmap, max, 0);
-+
-+ while (i < max) {
-+ first = i;
-+ i = mb_find_next_bit(bitmap, max, i);
-+ if (i > max)
-+ i = max;
-+ free += i - first;
-+ if (i < max)
-+ i = mb_find_next_zero_bit(bitmap, max, i);
-+ }
-+
-+ if (free != free_in_gdp) {
-+ ext4_error(sb, "on-disk bitmap for group %d"
-+ "corrupted: %u blocks free in bitmap, %u - in gd\n",
-+ group, free, free_in_gdp);
-+ return -EIO;
-+ }
-+ return 0;
-+}
-+
-+/*
- * the function goes through all preallocation in this group and marks them
- * used in in-core bitmap. buddy must be generated from this bitmap
- * Need to be called with ext4 group lock held
- */
- static noinline_for_stack
--void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-+int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
- ext4_group_t group)
- {
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
- struct ext4_prealloc_space *pa;
-+ struct ext4_group_desc *gdp;
- struct list_head *cur;
- ext4_group_t groupnr;
- ext4_grpblk_t start;
- int preallocated = 0;
-+ int skip = 0, count = 0;
-+ int err;
- int len;
-
-+ gdp = ext4_get_group_desc(sb, group, NULL);
-+ if (gdp == NULL)
-+ return -EIO;
-+
-+ /* before applying preallocations, check bitmap consistency */
-+ err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
-+ if (err)
-+ return err;
-+
- /* all form of preallocation discards first load group,
- * so the only competing code is preallocation use.
- * we don't need any locking here
-@@ -3538,13 +3593,23 @@ void ext4_mb_generate_from_pa(struct sup
- &groupnr, &start);
- len = pa->pa_len;
- spin_unlock(&pa->pa_lock);
-- if (unlikely(len == 0))
-+ if (unlikely(len == 0)) {
-+ skip++;
- continue;
-+ }
- BUG_ON(groupnr != group);
- ext4_set_bits(bitmap, start, len);
- preallocated += len;
-+ count++;
-+ }
-+ if (count + skip != grp->bb_prealloc_nr) {
-+ ext4_error(sb, "lost preallocations: "
-+ "count %d, bb_prealloc_nr %lu, skip %d\n",
-+ count, grp->bb_prealloc_nr, skip);
-+ return -EIO;
- }
- mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
-+ return 0;
- }
-
- static void ext4_mb_pa_callback(struct rcu_head *head)
-@@ -3603,6 +3668,7 @@ static void ext4_mb_put_pa(struct ext4_a
- */
- ext4_lock_group(sb, grp);
- list_del(&pa->pa_group_list);
-+ ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
- ext4_unlock_group(sb, grp);
-
- spin_lock(pa->pa_obj_lock);
-@@ -3697,6 +3763,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
-
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-+ grp->bb_prealloc_nr++;
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-
- spin_lock(pa->pa_obj_lock);
-@@ -3758,6 +3825,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
-
- ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
-+ grp->bb_prealloc_nr++;
- ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
-
- /*
-@@ -3927,6 +3995,8 @@ repeat:
-
- spin_unlock(&pa->pa_lock);
-
-+ BUG_ON(grp->bb_prealloc_nr == 0);
-+ grp->bb_prealloc_nr--;
- list_del(&pa->pa_group_list);
- list_add(&pa->u.pa_tmp_list, &list);
- }
-@@ -4056,7 +4126,7 @@ repeat:
- if (err) {
- ext4_error(sb, "Error %d loading buddy information for %u",
- err, group);
-- continue;
-+ return;
- }
-
- bitmap_bh = ext4_read_block_bitmap(sb, group);
-@@ -4068,6 +4138,8 @@ repeat:
- }
-
- ext4_lock_group(sb, group);
-+ BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
-+ e4b.bd_info->bb_prealloc_nr--;
- list_del(&pa->pa_group_list);
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
- ext4_unlock_group(sb, group);
-@@ -4328,6 +4400,7 @@ ext4_mb_discard_lg_preallocations(struct
- }
- ext4_lock_group(sb, group);
- list_del(&pa->pa_group_list);
-+ ext4_get_group_info(sb, group)->bb_prealloc_nr--;
- ext4_mb_release_group_pa(&e4b, pa);
- ext4_unlock_group(sb, group);
-
-Index: linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.h
-===================================================================
---- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/mballoc.h
-+++ linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.h
-@@ -82,7 +82,7 @@ extern ushort ext4_mballoc_debug;
- /*
- * for which requests use 2^N search using buddies
- */
--#define MB_DEFAULT_ORDER2_REQS 2
-+#define MB_DEFAULT_ORDER2_REQS 8
-
- /*
- * default group prealloc size 512 blocks
+++ /dev/null
-Single directory performance is a critical for HPC workloads. In a
-typical use case an application creates a separate output file for
-each node and task in a job. As nodes and tasks increase, hundreds
-of thousands of files may be created in a single directory within
-a short window of time.
-Today, both filename lookup and file system modifying operations
-(such as create and unlink) are protected with a single lock for
-an entire ldiskfs directory. PDO project will remove this
-bottleneck by introducing a parallel locking mechanism for entire
-ldiskfs directories. This work will enable multiple application
-threads to simultaneously lookup, create and unlink in parallel.
-
-This patch contains:
- - pdirops support for ldiskfs
- - integrate with osd-ldiskfs
-
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/Makefile
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile
-@@ -6,6 +6,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
-
- ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
-+ htree_lock.o \
- ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
- mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
- xattr_trusted.o inline.o
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/ext4.h
-@@ -27,6 +27,7 @@
- #include <linux/mutex.h>
- #include <linux/timer.h>
- #include <linux/wait.h>
-+#include <linux/htree_lock.h>
- #include <linux/blockgroup_lock.h>
- #include <linux/percpu_counter.h>
- #include <linux/ratelimit.h>
-@@ -821,6 +822,9 @@ struct ext4_inode_info {
- __u32 i_dtime;
- ext4_fsblk_t i_file_acl;
-
-+ /* following fields for parallel directory operations -bzzz */
-+ struct semaphore i_append_sem;
-+
- /*
- * i_block_group is the number of the block group which contains
- * this file's inode. Constant across the lifetime of the inode,
-@@ -1846,6 +1850,71 @@ struct dx_hash_info
- */
- #define HASH_NB_ALWAYS 1
-
-+/* assume name-hash is protected by upper layer */
-+#define EXT4_HTREE_LOCK_HASH 0
-+
-+enum ext4_pdo_lk_types {
-+#if EXT4_HTREE_LOCK_HASH
-+ EXT4_LK_HASH,
-+#endif
-+ EXT4_LK_DX, /* index block */
-+ EXT4_LK_DE, /* directory entry block */
-+ EXT4_LK_SPIN, /* spinlock */
-+ EXT4_LK_MAX,
-+};
-+
-+/* read-only bit */
-+#define EXT4_LB_RO(b) (1 << (b))
-+/* read + write, high bits for writer */
-+#define EXT4_LB_RW(b) ((1 << (b)) | (1 << (EXT4_LK_MAX + (b))))
-+
-+enum ext4_pdo_lock_bits {
-+ /* DX lock bits */
-+ EXT4_LB_DX_RO = EXT4_LB_RO(EXT4_LK_DX),
-+ EXT4_LB_DX = EXT4_LB_RW(EXT4_LK_DX),
-+ /* DE lock bits */
-+ EXT4_LB_DE_RO = EXT4_LB_RO(EXT4_LK_DE),
-+ EXT4_LB_DE = EXT4_LB_RW(EXT4_LK_DE),
-+ /* DX spinlock bits */
-+ EXT4_LB_SPIN_RO = EXT4_LB_RO(EXT4_LK_SPIN),
-+ EXT4_LB_SPIN = EXT4_LB_RW(EXT4_LK_SPIN),
-+ /* accurate searching */
-+ EXT4_LB_EXACT = EXT4_LB_RO(EXT4_LK_MAX << 1),
-+};
-+
-+enum ext4_pdo_lock_opc {
-+ /* external */
-+ EXT4_HLOCK_READDIR = (EXT4_LB_DE_RO | EXT4_LB_DX_RO),
-+ EXT4_HLOCK_LOOKUP = (EXT4_LB_DE_RO | EXT4_LB_SPIN_RO |
-+ EXT4_LB_EXACT),
-+ EXT4_HLOCK_DEL = (EXT4_LB_DE | EXT4_LB_SPIN_RO |
-+ EXT4_LB_EXACT),
-+ EXT4_HLOCK_ADD = (EXT4_LB_DE | EXT4_LB_SPIN_RO),
-+
-+ /* internal */
-+ EXT4_HLOCK_LOOKUP_SAFE = (EXT4_LB_DE_RO | EXT4_LB_DX_RO |
-+ EXT4_LB_EXACT),
-+ EXT4_HLOCK_DEL_SAFE = (EXT4_LB_DE | EXT4_LB_DX_RO | EXT4_LB_EXACT),
-+ EXT4_HLOCK_SPLIT = (EXT4_LB_DE | EXT4_LB_DX | EXT4_LB_SPIN),
-+};
-+
-+extern struct htree_lock_head *ext4_htree_lock_head_alloc(unsigned hbits);
-+#define ext4_htree_lock_head_free(lhead) htree_lock_head_free(lhead)
-+
-+extern struct htree_lock *ext4_htree_lock_alloc(void);
-+#define ext4_htree_lock_free(lck) htree_lock_free(lck)
-+
-+extern void ext4_htree_lock(struct htree_lock *lck,
-+ struct htree_lock_head *lhead,
-+ struct inode *dir, unsigned flags);
-+#define ext4_htree_unlock(lck) htree_unlock(lck)
-+
-+extern struct buffer_head *__ext4_find_entry(struct inode *dir,
-+ const struct qstr *d_name,
-+ struct ext4_dir_entry_2 **res_dir,
-+ int *inlined, struct htree_lock *lck);
-+extern int __ext4_add_entry(handle_t *handle, struct dentry *dentry,
-+ struct inode *inode, struct htree_lock *lck);
-
- /*
- * Describe an inode's exact location on disk and in memory
-@@ -2088,8 +2157,16 @@ void ext4_insert_dentry(struct inode *in
- const char *name, int namelen, void *data);
- static inline void ext4_update_dx_flag(struct inode *inode)
- {
-+ /* Disable it for ldiskfs, because going from a DX directory to
-+ * a non-DX directory while it is in use will completely break
-+ * the htree-locking.
-+ * If we really want to support this operation in the future,
-+ * we need to exclusively lock the directory at here which will
-+ * increase complexity of code */
-+#if 0
- if (!ext4_has_feature_dir_index(inode->i_sb))
- ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
-+#endif
- }
- static unsigned char ext4_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/namei.c
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/namei.c
-@@ -52,6 +52,7 @@ struct buffer_head *ext4_append(handle_t
- ext4_lblk_t *block)
- {
- struct buffer_head *bh;
-+ struct ext4_inode_info *ei = EXT4_I(inode);
- int err;
-
- if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
-@@ -59,15 +60,22 @@ struct buffer_head *ext4_append(handle_t
- EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
- return ERR_PTR(-ENOSPC);
-
-+ /* with parallel dir operations all appends
-+ * have to be serialized -bzzz */
-+ down(&ei->i_append_sem);
-+
- *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
-
- bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
-- if (IS_ERR(bh))
-+ if (IS_ERR(bh)) {
-+ up(&ei->i_append_sem);
- return bh;
-+ }
- inode->i_size += inode->i_sb->s_blocksize;
- EXT4_I(inode)->i_disksize = inode->i_size;
- BUFFER_TRACE(bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, bh);
-+ up(&ei->i_append_sem);
- if (err) {
- brelse(bh);
- ext4_std_error(inode->i_sb, err);
-@@ -247,7 +255,8 @@ static unsigned dx_node_limit(struct ino
- static struct dx_frame *dx_probe(struct ext4_filename *fname,
- struct inode *dir,
- struct dx_hash_info *hinfo,
-- struct dx_frame *frame);
-+ struct dx_frame *frame,
-+ struct htree_lock *lck);
- static void dx_release(struct dx_frame *frames);
- static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
- unsigned blocksize, struct dx_hash_info *hinfo,
-@@ -261,12 +270,13 @@ static void dx_insert_block(struct dx_fr
- static int ext4_htree_next_block(struct inode *dir, __u32 hash,
- struct dx_frame *frame,
- struct dx_frame *frames,
-- __u32 *start_hash);
-+ __u32 *start_hash, struct htree_lock *lck);
- static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
- struct ext4_filename *fname,
-- struct ext4_dir_entry_2 **res_dir);
-+ struct ext4_dir_entry_2 **res_dir, struct htree_lock *lck);
- static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-- struct dentry *dentry, struct inode *inode);
-+ struct dentry *dentry, struct inode *inode,
-+ struct htree_lock *lck);
-
- /* checksumming functions */
- void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
-@@ -733,6 +743,227 @@ struct stats dx_show_entries(struct dx_h
- }
- #endif /* DX_DEBUG */
-
-+/* private data for htree_lock */
-+struct ext4_dir_lock_data {
-+ unsigned ld_flags; /* bits-map for lock types */
-+ unsigned ld_count; /* # entries of the last DX block */
-+ struct dx_entry ld_at_entry; /* copy of leaf dx_entry */
-+ struct dx_entry *ld_at; /* position of leaf dx_entry */
-+};
-+
-+#define ext4_htree_lock_data(l) ((struct ext4_dir_lock_data *)(l)->lk_private)
-+#define ext4_find_entry(dir, name, dirent, inline) \
-+ __ext4_find_entry(dir, name, dirent, inline, NULL)
-+#define ext4_add_entry(handle, dentry, inode) \
-+ __ext4_add_entry(handle, dentry, inode, NULL)
-+
-+/* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */
-+#define EXT4_HTREE_NODE_CHANGED (0xcafeULL << 32)
-+
-+static void ext4_htree_event_cb(void *target, void *event)
-+{
-+ u64 *block = (u64 *)target;
-+
-+ if (*block == dx_get_block((struct dx_entry *)event))
-+ *block = EXT4_HTREE_NODE_CHANGED;
-+}
-+
-+struct htree_lock_head *ext4_htree_lock_head_alloc(unsigned hbits)
-+{
-+ struct htree_lock_head *lhead;
-+
-+ lhead = htree_lock_head_alloc(EXT4_LK_MAX, hbits, 0);
-+ if (lhead != NULL) {
-+ htree_lock_event_attach(lhead, EXT4_LK_SPIN, HTREE_EVENT_WR,
-+ ext4_htree_event_cb);
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(ext4_htree_lock_head_alloc);
-+
-+struct htree_lock *ext4_htree_lock_alloc(void)
-+{
-+ return htree_lock_alloc(EXT4_LK_MAX,
-+ sizeof(struct ext4_dir_lock_data));
-+}
-+EXPORT_SYMBOL(ext4_htree_lock_alloc);
-+
-+static htree_lock_mode_t ext4_htree_mode(unsigned flags)
-+{
-+ switch (flags) {
-+ default: /* 0 or unknown flags require EX lock */
-+ return HTREE_LOCK_EX;
-+ case EXT4_HLOCK_READDIR:
-+ return HTREE_LOCK_PR;
-+ case EXT4_HLOCK_LOOKUP:
-+ return HTREE_LOCK_CR;
-+ case EXT4_HLOCK_DEL:
-+ case EXT4_HLOCK_ADD:
-+ return HTREE_LOCK_CW;
-+ }
-+}
-+
-+/* return PR for read-only operations, otherwise return EX */
-+static inline htree_lock_mode_t ext4_htree_safe_mode(unsigned flags)
-+{
-+ int writer = (flags & EXT4_LB_DE) == EXT4_LB_DE;
-+
-+ /* 0 requires EX lock */
-+ return (flags == 0 || writer) ? HTREE_LOCK_EX : HTREE_LOCK_PR;
-+}
-+
-+static int ext4_htree_safe_locked(struct htree_lock *lck)
-+{
-+ int writer;
-+
-+ if (lck == NULL || lck->lk_mode == HTREE_LOCK_EX)
-+ return 1;
-+
-+ writer = (ext4_htree_lock_data(lck)->ld_flags & EXT4_LB_DE) ==
-+ EXT4_LB_DE;
-+ if (writer) /* all readers & writers are excluded? */
-+ return lck->lk_mode == HTREE_LOCK_EX;
-+
-+ /* all writers are excluded? */
-+ return lck->lk_mode == HTREE_LOCK_PR ||
-+ lck->lk_mode == HTREE_LOCK_PW ||
-+ lck->lk_mode == HTREE_LOCK_EX;
-+}
-+
-+/* relock htree_lock with EX mode if it's change operation, otherwise
-+ * relock it with PR mode. It's noop if PDO is disabled. */
-+static void ext4_htree_safe_relock(struct htree_lock *lck)
-+{
-+ if (!ext4_htree_safe_locked(lck)) {
-+ unsigned flags = ext4_htree_lock_data(lck)->ld_flags;
-+
-+ htree_change_lock(lck, ext4_htree_safe_mode(flags));
-+ }
-+}
-+
-+void ext4_htree_lock(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ struct inode *dir, unsigned flags)
-+{
-+ htree_lock_mode_t mode = is_dx(dir) ? ext4_htree_mode(flags) :
-+ ext4_htree_safe_mode(flags);
-+
-+ ext4_htree_lock_data(lck)->ld_flags = flags;
-+ htree_lock(lck, lhead, mode);
-+ if (!is_dx(dir))
-+ ext4_htree_safe_relock(lck); /* make sure it's safe locked */
-+}
-+EXPORT_SYMBOL(ext4_htree_lock);
-+
-+static int ext4_htree_node_lock(struct htree_lock *lck, struct dx_entry *at,
-+ unsigned lmask, int wait, void *ev)
-+{
-+ u32 key = (at == NULL) ? 0 : dx_get_block(at);
-+ u32 mode;
-+
-+ /* NOOP if htree is well protected or caller doesn't require the lock */
-+ if (ext4_htree_safe_locked(lck) ||
-+ !(ext4_htree_lock_data(lck)->ld_flags & lmask))
-+ return 1;
-+
-+ mode = (ext4_htree_lock_data(lck)->ld_flags & lmask) == lmask ?
-+ HTREE_LOCK_PW : HTREE_LOCK_PR;
-+ while (1) {
-+ if (htree_node_lock_try(lck, mode, key, ffz(~lmask), wait, ev))
-+ return 1;
-+ if (!(lmask & EXT4_LB_SPIN)) /* not a spinlock */
-+ return 0;
-+ cpu_relax(); /* spin until granted */
-+ }
-+}
-+
-+static int ext4_htree_node_locked(struct htree_lock *lck, unsigned lmask)
-+{
-+ return ext4_htree_safe_locked(lck) ||
-+ htree_node_is_granted(lck, ffz(~lmask));
-+}
-+
-+static void ext4_htree_node_unlock(struct htree_lock *lck,
-+ unsigned lmask, void *buf)
-+{
-+ /* NB: it's safe to call mutiple times or even it's not locked */
-+ if (!ext4_htree_safe_locked(lck) &&
-+ htree_node_is_granted(lck, ffz(~lmask)))
-+ htree_node_unlock(lck, ffz(~lmask), buf);
-+}
-+
-+#define ext4_htree_dx_lock(lck, key) \
-+ ext4_htree_node_lock(lck, key, EXT4_LB_DX, 1, NULL)
-+#define ext4_htree_dx_lock_try(lck, key) \
-+ ext4_htree_node_lock(lck, key, EXT4_LB_DX, 0, NULL)
-+#define ext4_htree_dx_unlock(lck) \
-+ ext4_htree_node_unlock(lck, EXT4_LB_DX, NULL)
-+#define ext4_htree_dx_locked(lck) \
-+ ext4_htree_node_locked(lck, EXT4_LB_DX)
-+
-+static void ext4_htree_dx_need_lock(struct htree_lock *lck)
-+{
-+ struct ext4_dir_lock_data *ld;
-+
-+ if (ext4_htree_safe_locked(lck))
-+ return;
-+
-+ ld = ext4_htree_lock_data(lck);
-+ switch (ld->ld_flags) {
-+ default:
-+ return;
-+ case EXT4_HLOCK_LOOKUP:
-+ ld->ld_flags = EXT4_HLOCK_LOOKUP_SAFE;
-+ return;
-+ case EXT4_HLOCK_DEL:
-+ ld->ld_flags = EXT4_HLOCK_DEL_SAFE;
-+ return;
-+ case EXT4_HLOCK_ADD:
-+ ld->ld_flags = EXT4_HLOCK_SPLIT;
-+ return;
-+ }
-+}
-+
-+#define ext4_htree_de_lock(lck, key) \
-+ ext4_htree_node_lock(lck, key, EXT4_LB_DE, 1, NULL)
-+#define ext4_htree_de_unlock(lck) \
-+ ext4_htree_node_unlock(lck, EXT4_LB_DE, NULL)
-+
-+#define ext4_htree_spin_lock(lck, key, event) \
-+ ext4_htree_node_lock(lck, key, EXT4_LB_SPIN, 0, event)
-+#define ext4_htree_spin_unlock(lck) \
-+ ext4_htree_node_unlock(lck, EXT4_LB_SPIN, NULL)
-+#define ext4_htree_spin_unlock_listen(lck, p) \
-+ ext4_htree_node_unlock(lck, EXT4_LB_SPIN, p)
-+
-+static void ext4_htree_spin_stop_listen(struct htree_lock *lck)
-+{
-+ if (!ext4_htree_safe_locked(lck) &&
-+ htree_node_is_listening(lck, ffz(~EXT4_LB_SPIN)))
-+ htree_node_stop_listen(lck, ffz(~EXT4_LB_SPIN));
-+}
-+
-+enum {
-+ DX_HASH_COL_IGNORE, /* ignore collision while probing frames */
-+ DX_HASH_COL_YES, /* there is collision and it does matter */
-+ DX_HASH_COL_NO, /* there is no collision */
-+};
-+
-+static int dx_probe_hash_collision(struct htree_lock *lck,
-+ struct dx_entry *entries,
-+ struct dx_entry *at, u32 hash)
-+{
-+ if (!(lck && ext4_htree_lock_data(lck)->ld_flags & EXT4_LB_EXACT)) {
-+ return DX_HASH_COL_IGNORE; /* don't care about collision */
-+
-+ } else if (at == entries + dx_get_count(entries) - 1) {
-+ return DX_HASH_COL_IGNORE; /* not in any leaf of this DX */
-+
-+ } else { /* hash collision? */
-+ return ((dx_get_hash(at + 1) & ~1) == hash) ?
-+ DX_HASH_COL_YES : DX_HASH_COL_NO;
-+ }
-+}
-+
- /*
- * Probe for a directory leaf block to search.
- *
-@@ -744,10 +975,11 @@ struct stats dx_show_entries(struct dx_h
- */
- static struct dx_frame *
- dx_probe(struct ext4_filename *fname, struct inode *dir,
-- struct dx_hash_info *hinfo, struct dx_frame *frame_in)
-+ struct dx_hash_info *hinfo, struct dx_frame *frame_in,
-+ struct htree_lock *lck)
- {
- unsigned count, indirect;
-- struct dx_entry *at, *entries, *p, *q, *m;
-+ struct dx_entry *at, *entries, *p, *q, *m, *dx = NULL;
- struct dx_root_info *info;
- struct dx_frame *frame = frame_in;
- struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
-@@ -808,8 +1040,15 @@ dx_probe(struct ext4_filename *fname, st
-
- dxtrace(printk("Look up %x", hash));
- while (1) {
-+ if (indirect == 0) { /* the last index level */
-+ /* NB: ext4_htree_dx_lock() could be noop if
-+ * DX-lock flag is not set for current operation */
-+ ext4_htree_dx_lock(lck, dx);
-+ ext4_htree_spin_lock(lck, dx, NULL);
-+ }
- count = dx_get_count(entries);
-- if (!count || count > dx_get_limit(entries)) {
-+ if (count == 0 || count > dx_get_limit(entries)) {
-+ ext4_htree_spin_unlock(lck); /* release spin */
- ext4_warning_inode(dir,
- "dx entry: count %u beyond limit %u",
- count, dx_get_limit(entries));
-@@ -847,8 +1086,70 @@ dx_probe(struct ext4_filename *fname, st
- dx_get_block(at)));
- frame->entries = entries;
- frame->at = at;
-- if (!indirect--)
-+
-+ if (indirect == 0) { /* the last index level */
-+ struct ext4_dir_lock_data *ld;
-+ u64 myblock;
-+
-+ /* By default we only lock DE-block, however, we will
-+ * also lock the last level DX-block if:
-+ * a) there is hash collision
-+ * we will set DX-lock flag (a few lines below)
-+ * and redo to lock DX-block
-+ * see detail in dx_probe_hash_collision()
-+ * b) it's a retry from splitting
-+ * we need to lock the last level DX-block so nobody
-+ * else can split any leaf blocks under the same
-+ * DX-block, see detail in ext4_dx_add_entry()
-+ */
-+ if (ext4_htree_dx_locked(lck)) {
-+ /* DX-block is locked, just lock DE-block
-+ * and return */
-+ ext4_htree_spin_unlock(lck);
-+ if (!ext4_htree_safe_locked(lck))
-+ ext4_htree_de_lock(lck, frame->at);
-+ return frame;
-+ }
-+ /* it's pdirop and no DX lock */
-+ if (dx_probe_hash_collision(lck, entries, at, hash) ==
-+ DX_HASH_COL_YES) {
-+ /* found hash collision, set DX-lock flag
-+ * and retry to abtain DX-lock */
-+ ext4_htree_spin_unlock(lck);
-+ ext4_htree_dx_need_lock(lck);
-+ continue;
-+ }
-+ ld = ext4_htree_lock_data(lck);
-+ /* because I don't lock DX, so @at can't be trusted
-+ * after I release spinlock so I have to save it */
-+ ld->ld_at = at;
-+ ld->ld_at_entry = *at;
-+ ld->ld_count = dx_get_count(entries);
-+
-+ frame->at = &ld->ld_at_entry;
-+ myblock = dx_get_block(at);
-+
-+ /* NB: ordering locking */
-+ ext4_htree_spin_unlock_listen(lck, &myblock);
-+ /* other thread can split this DE-block because:
-+ * a) I don't have lock for the DE-block yet
-+ * b) I released spinlock on DX-block
-+ * if it happened I can detect it by listening
-+ * splitting event on this DE-block */
-+ ext4_htree_de_lock(lck, frame->at);
-+ ext4_htree_spin_stop_listen(lck);
-+
-+ if (myblock == EXT4_HTREE_NODE_CHANGED) {
-+ /* someone split this DE-block before
-+ * I locked it, I need to retry and lock
-+ * valid DE-block */
-+ ext4_htree_de_unlock(lck);
-+ continue;
-+ }
- return frame;
-+ }
-+ dx = at;
-+ indirect--;
- frame++;
- frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
- if (IS_ERR(frame->bh)) {
-@@ -915,7 +1216,7 @@ static void dx_release(struct dx_frame *
- static int ext4_htree_next_block(struct inode *dir, __u32 hash,
- struct dx_frame *frame,
- struct dx_frame *frames,
-- __u32 *start_hash)
-+ __u32 *start_hash, struct htree_lock *lck)
- {
- struct dx_frame *p;
- struct buffer_head *bh;
-@@ -930,12 +1231,22 @@ static int ext4_htree_next_block(struct
- * this loop, num_frames indicates the number of interior
- * nodes need to be read.
- */
-+ ext4_htree_de_unlock(lck);
- while (1) {
-- if (++(p->at) < p->entries + dx_get_count(p->entries))
-- break;
-+ if (num_frames > 0 || ext4_htree_dx_locked(lck)) {
-+ /* num_frames > 0 :
-+ * DX block
-+ * ext4_htree_dx_locked:
-+ * frame->at is reliable pointer returned by dx_probe,
-+ * otherwise dx_probe already knew no collision */
-+ if (++(p->at) < p->entries + dx_get_count(p->entries))
-+ break;
-+ }
- if (p == frames)
- return 0;
- num_frames++;
-+ if (num_frames == 1)
-+ ext4_htree_dx_unlock(lck);
- p--;
- }
-
-@@ -958,6 +1269,13 @@ static int ext4_htree_next_block(struct
- * block so no check is necessary
- */
- while (num_frames--) {
-+ if (num_frames == 0) {
-+ /* it's not always necessary, we just don't want to
-+ * detect hash collision again */
-+ ext4_htree_dx_need_lock(lck);
-+ ext4_htree_dx_lock(lck, p->at);
-+ }
-+
- bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
-@@ -966,6 +1284,7 @@ static int ext4_htree_next_block(struct
- p->bh = bh;
- p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
- }
-+ ext4_htree_de_lock(lck, p->at);
- return 1;
- }
-
-@@ -1110,10 +1429,10 @@ int ext4_htree_fill_tree(struct file *di
- }
- hinfo.hash = start_hash;
- hinfo.minor_hash = 0;
-- frame = dx_probe(NULL, dir, &hinfo, frames);
-+ /* assume it's PR locked */
-+ frame = dx_probe(NULL, dir, &hinfo, frames, NULL);
- if (IS_ERR(frame))
- return PTR_ERR(frame);
--
- /* Add '.' and '..' from the htree header */
- if (!start_hash && !start_minor_hash) {
- de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
-@@ -1148,7 +1467,7 @@ int ext4_htree_fill_tree(struct file *di
- count += ret;
- hashval = ~0;
- ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
-- frame, frames, &hashval);
-+ frame, frames, &hashval, NULL);
- *next_hash = hashval;
- if (ret < 0) {
- err = ret;
-@@ -1372,10 +1691,10 @@ static int is_dx_internal_node(struct in
- * The returned buffer_head has ->b_count elevated. The caller is expected
- * to brelse() it when appropriate.
- */
--static struct buffer_head * ext4_find_entry (struct inode *dir,
-+struct buffer_head *__ext4_find_entry(struct inode *dir,
- const struct qstr *d_name,
- struct ext4_dir_entry_2 **res_dir,
-- int *inlined)
-+ int *inlined, struct htree_lock *lck)
- {
- struct super_block *sb;
- struct buffer_head *bh_use[NAMEI_RA_SIZE];
-@@ -1423,7 +1742,7 @@ static struct buffer_head * ext4_find_en
- goto restart;
- }
- if (is_dx(dir)) {
-- ret = ext4_dx_find_entry(dir, &fname, res_dir);
-+ ret = ext4_dx_find_entry(dir, &fname, res_dir, lck);
- /*
- * On success, or if the error was file not found,
- * return. Otherwise, fall back to doing a search the
-@@ -1433,6 +1752,7 @@ static struct buffer_head * ext4_find_en
- goto cleanup_and_exit;
- dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
- "falling back\n"));
-+ ext4_htree_safe_relock(lck);
- ret = NULL;
- }
- nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
-@@ -1534,10 +1854,12 @@ cleanup_and_exit:
- ext4_fname_free_filename(&fname);
- return ret;
- }
-+EXPORT_SYMBOL(__ext4_find_entry);
-
- static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
- struct ext4_filename *fname,
-- struct ext4_dir_entry_2 **res_dir)
-+ struct ext4_dir_entry_2 **res_dir,
-+ struct htree_lock *lck)
- {
- struct super_block * sb = dir->i_sb;
- struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
-@@ -1549,7 +1871,7 @@ static struct buffer_head * ext4_dx_find
- #ifdef CONFIG_EXT4_FS_ENCRYPTION
- *res_dir = NULL;
- #endif
-- frame = dx_probe(fname, dir, NULL, frames);
-+ frame = dx_probe(fname, dir, NULL, frames, lck);
- if (IS_ERR(frame))
- return (struct buffer_head *) frame;
- do {
-@@ -1571,7 +1893,7 @@ static struct buffer_head * ext4_dx_find
-
- /* Check to see if we should continue to search */
- retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
-- frames, NULL);
-+ frames, NULL, lck);
- if (retval < 0) {
- ext4_warning_inode(dir,
- "error %d reading directory index block",
-@@ -1762,8 +2084,9 @@ static struct ext4_dir_entry_2* dx_pack_
- * Returns pointer to de in block into which the new entry will be inserted.
- */
- static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
-- struct buffer_head **bh,struct dx_frame *frame,
-- struct dx_hash_info *hinfo)
-+ struct buffer_head **bh, struct dx_frame *frames,
-+ struct dx_frame *frame, struct dx_hash_info *hinfo,
-+ struct htree_lock *lck)
- {
- unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
-@@ -1825,8 +2148,14 @@ static struct ext4_dir_entry_2 *do_split
- hash2, split, count-split));
-
- /* Fancy dance to stay within two buffers */
-- de2 = dx_move_dirents(data1, data2, map + split, count - split,
-- blocksize);
-+ if (hinfo->hash < hash2) {
-+ de2 = dx_move_dirents(data1, data2, map + split,
-+ count - split, blocksize);
-+ } else {
-+ /* make sure we will add entry to the same block which
-+ * we have already locked */
-+ de2 = dx_move_dirents(data1, data2, map, split, blocksize);
-+ }
- de = dx_pack_dirents(data1, blocksize);
- de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
- (char *) de,
-@@ -1847,12 +2176,21 @@ static struct ext4_dir_entry_2 *do_split
- dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
- blocksize, 1));
-
-- /* Which block gets the new entry? */
-- if (hinfo->hash >= hash2) {
-- swap(*bh, bh2);
-- de = de2;
-+ ext4_htree_spin_lock(lck, frame > frames ? (frame - 1)->at : NULL,
-+ frame->at); /* notify block is being split */
-+ if (hinfo->hash < hash2) {
-+ dx_insert_block(frame, hash2 + continued, newblock);
-+
-+ } else {
-+ /* switch block number */
-+ dx_insert_block(frame, hash2 + continued,
-+ dx_get_block(frame->at));
-+ dx_set_block(frame->at, newblock);
-+ (frame->at)++;
- }
-- dx_insert_block(frame, hash2 + continued, newblock);
-+ ext4_htree_spin_unlock(lck);
-+ ext4_htree_dx_unlock(lck);
-+
- err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
- if (err)
- goto journal_error;
-@@ -2145,7 +2483,7 @@ static int make_indexed_dir(handle_t *ha
- if (retval)
- goto out_frames;
-
-- de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
-+ de = do_split(handle, dir, &bh2, frames, frame, &fname->hinfo, NULL);
- if (IS_ERR(de)) {
- retval = PTR_ERR(de);
- goto out_frames;
-@@ -2255,8 +2593,8 @@ out:
- * may not sleep between calling this and putting something into
- * the entry, as someone else might have used it while you slept.
- */
--static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
-- struct inode *inode)
-+int __ext4_add_entry(handle_t *handle, struct dentry *dentry,
-+ struct inode *inode, struct htree_lock *lck)
- {
- struct inode *dir = d_inode(dentry->d_parent);
- struct buffer_head *bh = NULL;
-@@ -2297,9 +2635,10 @@ static int ext4_add_entry(handle_t *hand
- if (dentry->d_name.len == 2 &&
- memcmp(dentry->d_name.name, "..", 2) == 0)
- return ext4_update_dotdot(handle, dentry, inode);
-- retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
-+ retval = ext4_dx_add_entry(handle, &fname, dentry, inode, lck);
- if (!retval || (retval != ERR_BAD_DX_DIR))
- goto out;
-+ ext4_htree_safe_relock(lck);
- ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
- dx_fallback++;
- ext4_mark_inode_dirty(handle, dir);
-@@ -2349,12 +2688,14 @@ out:
- ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
- return retval;
- }
-+EXPORT_SYMBOL(__ext4_add_entry);
-
- /*
- * Returns 0 for success, or a negative error value
- */
- static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
-- struct dentry *dentry, struct inode *inode)
-+ struct dentry *dentry, struct inode *inode,
-+ struct htree_lock *lck)
- {
- struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
- struct dx_entry *entries, *at;
-@@ -2367,7 +2708,7 @@ static int ext4_dx_add_entry(handle_t *h
-
- again:
- restart = 0;
-- frame = dx_probe(fname, dir, NULL, frames);
-+ frame = dx_probe(fname, dir, NULL, frames, lck);
- if (IS_ERR(frame))
- return PTR_ERR(frame);
- entries = frame->entries;
-@@ -2397,6 +2738,11 @@ again:
- struct dx_node *node2;
- struct buffer_head *bh2;
-
-+ if (!ext4_htree_safe_locked(lck)) { /* retry with EX lock */
-+ ext4_htree_safe_relock(lck);
-+ restart = 1;
-+ goto cleanup;
-+ }
- while (frame > frames) {
- if (dx_get_count((frame - 1)->entries) <
- dx_get_limit((frame - 1)->entries)) {
-@@ -2496,8 +2842,32 @@ again:
- restart = 1;
- goto journal_error;
- }
-+ } else if (!ext4_htree_dx_locked(lck)) {
-+ struct ext4_dir_lock_data *ld = ext4_htree_lock_data(lck);
-+
-+ /* not well protected, require DX lock */
-+ ext4_htree_dx_need_lock(lck);
-+ at = frame > frames ? (frame - 1)->at : NULL;
-+
-+ /* NB: no risk of deadlock because it's just a try.
-+ *
-+ * NB: we check ld_count for twice, the first time before
-+ * having DX lock, the second time after holding DX lock.
-+ *
-+ * NB: We never free blocks for directory so far, which
-+ * means value returned by dx_get_count() should equal to
-+ * ld->ld_count if nobody split any DE-block under @at,
-+ * and ld->ld_at still points to valid dx_entry. */
-+ if ((ld->ld_count != dx_get_count(entries)) ||
-+ !ext4_htree_dx_lock_try(lck, at) ||
-+ (ld->ld_count != dx_get_count(entries))) {
-+ restart = 1;
-+ goto cleanup;
-+ }
-+ /* OK, I've got DX lock and nothing changed */
-+ frame->at = ld->ld_at;
- }
-- de = do_split(handle, dir, &bh, frame, &fname->hinfo);
-+ de = do_split(handle, dir, &bh, frames, frame, &fname->hinfo, lck);
- if (IS_ERR(de)) {
- err = PTR_ERR(de);
- goto cleanup;
-@@ -2508,6 +2878,8 @@ again:
- journal_error:
- ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
- cleanup:
-+ ext4_htree_dx_unlock(lck);
-+ ext4_htree_de_unlock(lck);
- brelse(bh);
- dx_release(frames);
- /* @restart is true means htree-path has been changed, we need to
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/super.c
-===================================================================
---- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/super.c
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/super.c
-@@ -875,6 +875,7 @@ static struct inode *ext4_alloc_inode(st
-
- ei->vfs_inode.i_version = 1;
- spin_lock_init(&ei->i_raw_lock);
-+ sema_init(&ei->i_append_sem, 1);
- INIT_LIST_HEAD(&ei->i_prealloc_list);
- spin_lock_init(&ei->i_prealloc_lock);
- ext4_es_init_tree(&ei->i_es_tree);
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/ext4.h
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ext4.h
-@@ -1427,6 +1427,8 @@ static inline void ext4_clear_state_flag
-
+---
+ fs/ext4/ext4.h | 23 ++++++++++++++++++++++-
+ fs/ext4/ialloc.c | 3 ++-
+ fs/ext4/inode.c | 15 +++++++++++++++
+ fs/ext4/namei.c | 9 ++++++---
+ fs/ext4/super.c | 10 ++--------
+ 5 files changed, 47 insertions(+), 13 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1630,6 +1630,8 @@ static inline void ext4_clear_state_flag
+ */
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
+#define JOURNAL_START_HAS_3ARGS 1
/*
* Codes for operating systems
*/
-@@ -1527,7 +1427,21 @@ static inline void ext4_clear_state_flag
-
+@@ -1842,7 +1844,21 @@ static inline bool ext4_has_unknown_ext#
+
EXTN_FEATURE_FUNCS(2)
EXTN_FEATURE_FUNCS(3)
-EXTN_FEATURE_FUNCS(4)
+ return ((EXT4_SB(sb)->s_es->s_feature_incompat &
+ cpu_to_le32(~EXT4_FEATURE_INCOMPAT_SUPP)) != 0);
+}
-
+
static inline bool ext4_has_compat_features(struct super_block *sb)
{
-@@ -2612,6 +2614,11 @@ struct ext4_extent;
+@@ -3133,6 +3149,11 @@ struct ext4_extent;
extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+ struct inode *inode,
+ ext4_lblk_t *block);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
- int chunk);
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/namei.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/namei.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/namei.c
+ struct ext4_map_blocks *map, int flags);
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -120,7 +120,7 @@ verified:
+ *
+ * Return buffer_head of bitmap on success or NULL.
+ */
+-static struct buffer_head *
++struct buffer_head *
+ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
+ {
+ struct ext4_group_desc *desc;
+@@ -213,6 +213,7 @@ out:
+ put_bh(bh);
+ return ERR_PTR(err);
+ }
++EXPORT_SYMBOL(ext4_read_inode_bitmap);
+
+ /*
+ * NOTE! When we get the inode, we're the only people
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -6253,3 +6253,18 @@ int ext4_get_next_extent(struct inode *i
+ result->es_len = 0;
+ return 0;
+ }
++EXPORT_SYMBOL(ext4_map_blocks);
++EXPORT_SYMBOL(ext4_truncate);
++EXPORT_SYMBOL(__ext4_iget);
++EXPORT_SYMBOL(ext4_bread);
++EXPORT_SYMBOL(ext4_itable_unused_count);
++EXPORT_SYMBOL(ext4_force_commit);
++EXPORT_SYMBOL(ext4_mark_inode_dirty);
++EXPORT_SYMBOL(ext4_get_group_desc);
++EXPORT_SYMBOL(__ext4_journal_get_write_access);
++EXPORT_SYMBOL(__ext4_journal_start_sb);
++EXPORT_SYMBOL(__ext4_journal_stop);
++EXPORT_SYMBOL(__ext4_handle_dirty_metadata);
++EXPORT_SYMBOL(__ext4_std_error);
++EXPORT_SYMBOL(ext4fs_dirhash);
++EXPORT_SYMBOL(ext4_get_inode_loc);
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
@@ -48,7 +48,7 @@
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
struct inode *inode,
ext4_lblk_t *block)
{
-@@ -155,6 +155,7 @@ static struct buffer_head *__ext4_read_d
+@@ -159,6 +159,7 @@ static struct buffer_head *__ext4_read_d
}
return bh;
}
#ifndef assert
#define assert(test) J_ASSERT(test)
-@@ -2210,7 +2211,7 @@ out:
+@@ -2392,7 +2393,7 @@ EXPORT_SYMBOL(ext4_delete_entry);
* DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
* since this indicates that nlinks count was previously 1.
*/
{
inc_nlink(inode);
if (is_dx(inode) && inode->i_nlink > 1) {
-@@ -2222,16 +2223,18 @@ static void ext4_inc_count(handle_t *han
+@@ -2403,16 +2404,18 @@ static void ext4_inc_count(handle_t *han
}
}
}
static int ext4_add_nondir(handle_t *handle,
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ialloc.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/ialloc.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/ialloc.c
-@@ -111,7 +111,7 @@ void ext4_end_bitmap_read(struct buffer_
- *
- * Return buffer_head of bitmap on success or NULL.
- */
--static struct buffer_head *
-+struct buffer_head *
- ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
- {
- struct ext4_group_desc *desc;
-@@ -191,6 +191,7 @@ verify:
- set_buffer_verified(bh);
- return bh;
- }
-+EXPORT_SYMBOL(ext4_read_inode_bitmap);
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -326,11 +326,11 @@ static void __save_error_info(struct sup
+ return;
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ es->s_last_error_time = cpu_to_le32(get_seconds());
+- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
++ strlcpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+ es->s_last_error_line = cpu_to_le32(line);
+ if (!es->s_first_error_time) {
+ es->s_first_error_time = es->s_last_error_time;
+- strncpy(es->s_first_error_func, func,
++ strlcpy(es->s_first_error_func, func,
+ sizeof(es->s_first_error_func));
+ es->s_first_error_line = cpu_to_le32(line);
+ es->s_first_error_ino = es->s_last_error_ino;
+@@ -5899,16 +5899,12 @@ static int __init ext4_init_fs(void)
+ err = init_inodecache();
+ if (err)
+ goto out1;
+- register_as_ext3();
+- register_as_ext2();
+ err = register_filesystem(&ext4_fs_type);
+ if (err)
+ goto out;
- /*
- * NOTE! When we get the inode, we're the only people
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/inode.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/inode.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/inode.c
-@@ -5281,3 +5281,17 @@ out:
- sb_end_pagefault(inode->i_sb);
- return ret;
- }
-+EXPORT_SYMBOL(ext4_map_blocks);
-+EXPORT_SYMBOL(ext4_truncate);
-+EXPORT_SYMBOL(ext4_iget);
-+EXPORT_SYMBOL(ext4_bread);
-+EXPORT_SYMBOL(ext4_itable_unused_count);
-+EXPORT_SYMBOL(ext4_force_commit);
-+EXPORT_SYMBOL(ext4_mark_inode_dirty);
-+EXPORT_SYMBOL(ext4_get_group_desc);
-+EXPORT_SYMBOL(__ext4_journal_get_write_access);
-+EXPORT_SYMBOL(__ext4_journal_start_sb);
-+EXPORT_SYMBOL(__ext4_journal_stop);
-+EXPORT_SYMBOL(__ext4_handle_dirty_metadata);
-+EXPORT_SYMBOL(__ext4_std_error);
-+EXPORT_SYMBOL(ext4fs_dirhash);
-Index: linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/mballoc.c
-===================================================================
---- linux-3.10.0-123.13.2.el7.x86_64.orig/fs/ext4/mballoc.c
-+++ linux-3.10.0-123.13.2.el7.x86_64/fs/ext4/mballoc.c
-@@ -5281,7 +5281,6 @@ out:
- void *buddy, void *bitmap, ext4_group_t group)
+ return 0;
+ out:
+- unregister_as_ext2();
+- unregister_as_ext3();
+ destroy_inodecache();
+ out1:
+ ext4_exit_mballoc();
+@@ -5927,8 +5923,6 @@ out5:
+ static void __exit ext4_exit_fs(void)
{
- struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
- ext4_grpblk_t i = 0;
- ext4_grpblk_t first;
+ ext4_destroy_lazyinit_thread();
+- unregister_as_ext2();
+- unregister_as_ext3();
+ unregister_filesystem(&ext4_fs_type);
+ destroy_inodecache();
+ ext4_exit_mballoc();
}
+EXPORT_SYMBOL(ext4_map_blocks);
+EXPORT_SYMBOL(ext4_truncate);
-+EXPORT_SYMBOL(__ext4_iget);
++EXPORT_SYMBOL(ext4_iget);
+EXPORT_SYMBOL(ext4_bread);
+EXPORT_SYMBOL(ext4_itable_unused_count);
+EXPORT_SYMBOL(ext4_force_commit);
suse15/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
-sles12sp2/ext4-prealloc.patch
-sles12sp2/ext4-osd-iop-common.patch
-sles12sp2/ext4-misc.patch
-sles12sp3/ext4-mballoc-extra-checks.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-prealloc.patch
+suse15/ext4-osd-iop-common.patch
+suse15/ext4-misc.patch
+suse15/ext4-mballoc-extra-checks.patch
ubuntu18/ext4-hash-indexed-dir-dotdot-update.patch
-sles12sp2/ext4-kill-dx-root.patch
+suse15/ext4-kill-dx-root.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
linux-5.4/ext4-data-in-dirent.patch
suse15/ext4-large-dir.patch
rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch
rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
rhel7.6/ext4-export-orphan-add.patch
-sles12sp2/ext4-export-mb-stream-allocator-variables.patch
+suse15/ext4-export-mb-stream-allocator-variables.patch
suse15/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
-sles12sp2/ext4-prealloc.patch
-sles12sp2/ext4-osd-iop-common.patch
-sles12sp2/ext4-misc.patch
-sles12sp3/ext4-mballoc-extra-checks.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-prealloc.patch
+suse15/ext4-osd-iop-common.patch
+suse15/ext4-misc.patch
+suse15/ext4-mballoc-extra-checks.patch
ubuntu18/ext4-hash-indexed-dir-dotdot-update.patch
-sles12sp2/ext4-kill-dx-root.patch
+suse15/ext4-kill-dx-root.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
linux-5.4/ext4-data-in-dirent.patch
suse15/ext4-large-dir.patch
rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch
rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
rhel7.6/ext4-export-orphan-add.patch
-sles12sp2/ext4-export-mb-stream-allocator-variables.patch
+suse15/ext4-export-mb-stream-allocator-variables.patch
suse15/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
-sles12sp2/ext4-prealloc.patch
-sles12sp2/ext4-osd-iop-common.patch
-sles12sp2/ext4-misc.patch
-sles12sp3/ext4-mballoc-extra-checks.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-prealloc.patch
+suse15/ext4-osd-iop-common.patch
+suse15/ext4-misc.patch
+suse15/ext4-mballoc-extra-checks.patch
ubuntu18/ext4-hash-indexed-dir-dotdot-update.patch
-sles12sp2/ext4-kill-dx-root.patch
+suse15/ext4-kill-dx-root.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
linux-5.4/ext4-data-in-dirent.patch
suse15/ext4-large-dir.patch
rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch
rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
rhel7.6/ext4-export-orphan-add.patch
-sles12sp2/ext4-export-mb-stream-allocator-variables.patch
+suse15/ext4-export-mb-stream-allocator-variables.patch
suse15/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
-sles12sp2/ext4-prealloc.patch
-sles12sp2/ext4-osd-iop-common.patch
-suse15/ext4-misc.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-prealloc.patch
+suse15/ext4-osd-iop-common.patch
+sles15sp1/ext4-misc.patch
suse15/ext4-mballoc-extra-checks.patch
ubuntu18/ext4-hash-indexed-dir-dotdot-update.patch
-sles12sp2/ext4-kill-dx-root.patch
+suse15/ext4-kill-dx-root.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
linux-5.4/ext4-data-in-dirent.patch
suse15/ext4-large-dir.patch
rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch
rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
rhel7.6/ext4-export-orphan-add.patch
-sles12sp2/ext4-export-mb-stream-allocator-variables.patch
+suse15/ext4-export-mb-stream-allocator-variables.patch
-sles12sp2/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
-sles12sp2/ext4-prealloc.patch
+ubuntu18/ext4-inode-version.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-prealloc.patch
ubuntu18/ext4-osd-iop-common.patch
ubuntu18/ext4-misc.patch
ubuntu18/ext4-mballoc-extra-checks.patch
ubuntu18/ext4-nocmtime.patch
base/ext4-htree-lock.patch
ubuntu18/ext4-pdirop.patch
-sles12sp2/ext4-max-dir-size.patch
+ubuntu18/ext4-max-dir-size.patch
ubuntu18/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
ubuntu18/ext4-give-warning-with-dir-htree-growing.patch
ubuntu18/ext4-jcb-optimization.patch
-sles12sp2/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
-sles12sp2/ext4-prealloc.patch
+ubuntu18/ext4-inode-version.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-prealloc.patch
ubuntu18/ext4-osd-iop-common.patch
ubuntu18/ext4-misc.patch
ubuntu18/ext4-mballoc-extra-checks.patch
ubuntu18/ext4-nocmtime.patch
base/ext4-htree-lock.patch
ubuntu18/ext4-pdirop.patch
-sles12sp2/ext4-max-dir-size.patch
+ubuntu18/ext4-max-dir-size.patch
ubuntu18/ext4-corrupted-inode-block-bitmaps-handling-patches-001.patch
ubuntu18/ext4-give-warning-with-dir-htree-growing.patch
ubuntu18/ext4-jcb-optimization.patch
rhel7.6/ext4-export-orphan-add.patch
rhel7.6/ext4-mmp-dont-mark-bh-dirty.patch
ubuntu18/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
-sles12sp2/ext4-export-mb-stream-allocator-variables.patch
+suse15/ext4-export-mb-stream-allocator-variables.patch
rhel8.1/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
rhel8/ext4-prealloc.patch
ubuntu18/ext4-osd-iop-common.patch
rhel8.1/ext4-misc.patch
rhel8/ext4-nocmtime.patch
base/ext4-htree-lock.patch
rhel8/ext4-pdirop.patch
-sles12sp3/ext4-max-dir-size.patch
+rhel8/ext4-max-dir-size.patch
rhel8/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
ubuntu18/ext4-give-warning-with-dir-htree-growing.patch
ubuntu18/ext4-jcb-optimization.patch
rhel8/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
rhel8/ext4-prealloc.patch
ubuntu18/ext4-osd-iop-common.patch
rhel8/ext4-misc.patch
rhel8/ext4-nocmtime.patch
base/ext4-htree-lock.patch
rhel8/ext4-pdirop.patch
-sles12sp3/ext4-max-dir-size.patch
+rhel8/ext4-max-dir-size.patch
rhel8/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
ubuntu18/ext4-give-warning-with-dir-htree-growing.patch
ubuntu18/ext4-jcb-optimization.patch
+++ /dev/null
-sles12sp2/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
-sles12sp2/ext4-prealloc.patch
-sles12sp2/ext4-osd-iop-common.patch
-sles12sp2/ext4-misc.patch
-sles12sp3/ext4-mballoc-extra-checks.patch
-sles12sp2/ext4-hash-indexed-dir-dotdot-update.patch
-sles12sp2/ext4-kill-dx-root.patch
-rhel7.6/ext4-mballoc-pa-free-mismatch.patch
-sles12sp2/ext4-data-in-dirent.patch
-sles12sp2/ext4-large-eas.patch
-sles12sp2/ext4-disable-mb-cache.patch
-rhel7.6/ext4-nocmtime.patch
-sles12sp2/ext4-large-dir.patch
-base/ext4-htree-lock.patch
-sles12sp2/ext4-pdirop.patch
-sles12sp2/ext4-max-dir-size.patch
-sles12sp2/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
-sles12sp2/ext4-give-warning-with-dir-htree-growing.patch
-sles12sp2/ext4-mmp-brelse.patch
-rhel7.6/ext4-jcb-optimization.patch
-sles12sp2/ext4-attach-jinode-in-writepages.patch
-sles12sp2/ext4-dont-check-before-replay.patch
-sles12sp2/ext4-fix-xattr-shifting-when-expanding-inodes.patch
-rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch
-rhel7.6/ext4-export-orphan-add.patch
-rhel7.6/ext4-mmp-dont-mark-bh-dirty.patch
-rhel7.6/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
-sles12sp2/ext4-export-mb-stream-allocator-variables.patch
-rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
+++ /dev/null
-sles12sp2/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
-sles12sp2/ext4-prealloc.patch
-sles12sp2/ext4-osd-iop-common.patch
-sles12sp2/ext4-misc.patch
-sles12sp3/ext4-mballoc-extra-checks.patch
-sles12sp2/ext4-hash-indexed-dir-dotdot-update.patch
-sles12sp2/ext4-kill-dx-root.patch
-rhel7.6/ext4-mballoc-pa-free-mismatch.patch
-sles12sp3/ext4-data-in-dirent.patch
-sles12sp3/ext4-large-eas.patch
-sles12sp3/ext4-disable-mb-cache.patch
-rhel7.6/ext4-nocmtime.patch
-sles12sp3/ext4-large-dir.patch
-base/ext4-htree-lock.patch
-sles12sp3/ext4-pdirop.patch
-sles12sp3/ext4-max-dir-size.patch
-sles12sp3/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
-sles12sp2/ext4-give-warning-with-dir-htree-growing.patch
-sles12sp2/ext4-mmp-brelse.patch
-rhel7.6/ext4-jcb-optimization.patch
-sles12sp2/ext4-attach-jinode-in-writepages.patch
-sles12sp3/ext4-dont-check-before-replay.patch
-sles12sp2/ext4-fix-xattr-shifting-when-expanding-inodes.patch
-rhel7.6/ext4-use-GFP_NOFS-in-ext4_inode_attach_jinode.patch
-rhel7.6/ext4-export-orphan-add.patch
-rhel7.6/ext4-include-terminating-u32-in-size-of-xattr-entries-when-expanding-inodes.patch
-sles12sp2/ext4-export-mb-stream-allocator-variables.patch
-rhel7.6/ext4-optimize-ext4_find_delalloc_range-in-nodelalloc.patch
rhel8/ext4-inode-version.patch
-sles12sp2/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-lookup-dotdot.patch
+suse15/ext4-print-inum-in-htree-warning.patch
rhel8/ext4-prealloc.patch
ubuntu18/ext4-osd-iop-common.patch
ubuntu19/ext4-misc.patch
rhel8/ext4-nocmtime.patch
base/ext4-htree-lock.patch
rhel8/ext4-pdirop.patch
-sles12sp3/ext4-max-dir-size.patch
+rhel8/ext4-max-dir-size.patch
rhel8/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
ubuntu18/ext4-give-warning-with-dir-htree-growing.patch
ubuntu18/ext4-jcb-optimization.patch
rhel8/ext4-inode-version.patch
linux-5.4/ext4-lookup-dotdot.patch
-sles12sp2/ext4-print-inum-in-htree-warning.patch
+suse15/ext4-print-inum-in-htree-warning.patch
rhel8/ext4-prealloc.patch
ubuntu18/ext4-osd-iop-common.patch
ubuntu19/ext4-misc.patch
rhel8/ext4-nocmtime.patch
base/ext4-htree-lock.patch
linux-5.4/ext4-pdirop.patch
-sles12sp3/ext4-max-dir-size.patch
+rhel8/ext4-max-dir-size.patch
rhel8/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
linux-5.4/ext4-give-warning-with-dir-htree-growing.patch
ubuntu18/ext4-jcb-optimization.patch
lustre/kernel_patches/targets/3.10-rhel7.5.target
lustre/kernel_patches/targets/4.14-rhel7.5.target
lustre/kernel_patches/targets/4.14-rhel7.6.target
-lustre/kernel_patches/targets/3.0-sles11.target
-lustre/kernel_patches/targets/3.0-sles11sp3.target
-lustre/kernel_patches/targets/3.0-sles11sp4.target
-lustre/kernel_patches/targets/3.12-sles12.target
-lustre/kernel_patches/targets/4.4-sles12.target
-lustre/kernel_patches/targets/4.4-sles12sp3.target
lustre/kernel_patches/targets/4.12-sles12sp4.target
lustre/kernel_patches/targets/4.12-sles15sp1.target
lustre/kernel_patches/targets/3.x-fc18.target
+++ /dev/null
-Increase the buffer-head per-CPU LRU size to allow efficient
-filesystem operations that access many blocks for each transaction.
-For example, creating a file in a large ext4 directory with quota
-enabled will access multiple buffer heads and will overflow the LRU
-at the default 8-block LRU size:
-
-* parent directory inode table block (ctime, nlinks for subdirs)
-* new inode bitmap
-* inode table block
-* 2 quota blocks
-* directory leaf block (not reused, but pollutes one cache entry)
-* 2 levels htree blocks (only one is reused, other pollutes cache)
-* 2 levels indirect/index blocks (only one is reused)
-
-The buffer-head per-CPU LRU size is raised to 16, as it shows in
-metadata performance benchmarks up to 10% gain for create,
-4% for lookup and 7% for destroy.
-
-Signed-off-by: Liang Zhen <liang.zhen@intel.com>
-Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
-Signed-off-by: Sebastien Buisson <sebastien.buisson@bull.net>
----
- fs/buffer.c | 2 +-
- 1 files changed, 1 insertions(+), 1 deletions(-)
-
-diff --git a/fs/buffer.c b/fs/buffer.c
-index 6024877..a6468f2 100644
---- a/fs/buffer.c
-+++ b/fs/buffer.c
-@@ -1256,7 +1256,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh)
- * a local interrupt disable for that.
- */
-
--#define BH_LRU_SIZE 8
-+#define BH_LRU_SIZE 16
-
- struct bh_lru {
- struct buffer_head *bhs[BH_LRU_SIZE];
---
-1.7.1
-
+++ /dev/null
---- a/block/blk-settings.c 2013-02-06 12:40:44.000000000 -0500
-+++ b/block/blk-settings.c 2013-02-06 12:55:28.000000000 -0500
-@@ -19,6 +19,12 @@
-
- unsigned long blk_max_pfn;
-
-+int default_max_sectors = BLK_DEF_MAX_SECTORS;
-+module_param(default_max_sectors, int, 0);
-+
-+int default_max_segments = BLK_MAX_SEGMENTS;
-+module_param(default_max_segments, int, 0);
-+
- /**
- * blk_queue_prep_rq - set a prepare_request function for queue
- * @q: queue
-@@ -108,7 +114,7 @@
- */
- void blk_set_default_limits(struct queue_limits *lim)
- {
-- lim->max_segments = BLK_MAX_SEGMENTS;
-+ lim->max_segments = default_max_segments;
- lim->max_integrity_segments = 0;
- lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-@@ -255,7 +261,7 @@
-
- limits->max_hw_sectors = max_hw_sectors;
- limits->max_sectors = min_t(unsigned int, max_hw_sectors,
-- BLK_DEF_MAX_SECTORS);
-+ default_max_sectors);
- }
- EXPORT_SYMBOL(blk_limits_max_hw_sectors);
-
---- a/drivers/scsi/Kconfig 2013-02-07 09:25:49.000000000 -0500
-+++ b/drivers/scsi/Kconfig 2013-02-07 09:30:15.000000000 -0500
-@@ -245,6 +245,15 @@ config SCSI_SCAN_ASYNC
- there should be no noticeable performance impact as long as you have
- logging turned off.
-
-+config SCSI_MAX_SG_SEGMENTS
-+ int "Maximum SCSI scatter gather segment size"
-+ range 32 256
-+ default "128"
-+ depends on SCSI
-+ help
-+ Control the maximum limit for scatter gather buffers for the
-+ SCSI device.
-+
- config SCSI_SCAN_ASYNC
- bool "Asynchronous SCSI scanning"
- depends on SCSI
---- a/include/scsi/scsi.h 2013-02-07 09:55:02.000000000 -0500
-+++ b/include/scsi/scsi.h 2013-02-07 09:55:20.000000000 -0500
-@@ -20,7 +20,7 @@ struct scsi_cmnd;
- * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The
- * minimum value is 32
- */
--#define SCSI_MAX_SG_SEGMENTS 128
-+#define SCSI_MAX_SG_SEGMENTS CONFIG_SCSI_MAX_SG_SEGMENTS
-
- /*
- * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
---- a/drivers/scsi/isci/init.c 2013-02-08 10:13:00.000000000 -0500
-+++ b/drivers/scsi/isci/init.c 2013-02-08 10:15:04.000000000 -0500
-@@ -118,6 +118,10 @@ unsigned char phy_gen = 3;
- module_param(phy_gen, byte, 0);
- MODULE_PARM_DESC(phy_gen, "PHY generation (1: 1.5Gbps 2: 3.0Gbps 3: 6.0Gbps)");
-
-+u16 sg_table_size = SG_ALL;
-+module_param(sg_table_size, ushort, 0);
-+MODULE_PARM_DESC(sg_table_size, "Size in KB of scatter gather table");
-+
- unsigned char max_concurr_spinup = 1;
- module_param(max_concurr_spinup, byte, 0);
- MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup");
-@@ -155,7 +159,6 @@ static struct scsi_host_template isci_sh
- .can_queue = ISCI_CAN_QUEUE_VAL,
- .cmd_per_lun = 1,
- .this_id = -1,
-- .sg_tablesize = SG_ALL,
- .max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
- .eh_device_reset_handler = sas_eh_device_reset_handler,
-@@ -407,6 +410,7 @@ static struct isci_host *isci_host_alloc
- isci_host->pdev = pdev;
- isci_host->id = id;
-
-+ isci_sht.sg_tablesize = sg_table_size;
- shost = scsi_host_alloc(&isci_sht, sizeof(void *));
- if (!shost)
- return NULL;
---- a/drivers/message/fusion/Kconfig 2013-02-08 10:21:25.000000000 -0500
-+++ b/drivers/message/fusion/Kconfig 2013-02-08 10:22:37.000000000 -0500
-@@ -61,9 +61,9 @@
- LSISAS1078
-
- config FUSION_MAX_SGE
-- int "Maximum number of scatter gather entries for SAS and SPI (16 - 128)"
-- default "128"
-- range 16 128
-+ int "Maximum number of scatter gather entries for SAS and SPI (16 - 256)"
-+ default "256"
-+ range 16 256
- help
- This option allows you to specify the maximum number of scatter-
- gather entries per I/O. The driver default is 128, which matches
---- a/drivers/message/fusion/mptbase.h 2013-02-08 10:32:45.000000000 -0500
-+++ b/drivers/message/fusion/mptbase.h 2013-02-08 10:32:55.000000000 -0500
-@@ -168,8 +168,8 @@
- #ifdef CONFIG_FUSION_MAX_SGE
- #if CONFIG_FUSION_MAX_SGE < 16
- #define MPT_SCSI_SG_DEPTH 16
--#elif CONFIG_FUSION_MAX_SGE > 128
--#define MPT_SCSI_SG_DEPTH 128
-+#elif CONFIG_FUSION_MAX_SGE > 256
-+#define MPT_SCSI_SG_DEPTH 256
- #else
- #define MPT_SCSI_SG_DEPTH CONFIG_FUSION_MAX_SGE
- #endif
+++ /dev/null
-Index: linux-3.0.82-0.7.9/block/blk-settings.c
-===================================================================
---- linux-3.0.82-0.7.9.orig/block/blk-settings.c
-+++ linux-3.0.82-0.7.9/block/blk-settings.c
-@@ -19,6 +19,12 @@ EXPORT_SYMBOL(blk_max_low_pfn);
-
- unsigned long blk_max_pfn;
-
-+int default_max_sectors = BLK_DEF_MAX_SECTORS;
-+module_param(default_max_sectors, int, 0);
-+
-+int default_max_segments = BLK_MAX_SEGMENTS;
-+module_param(default_max_segments, int, 0);
-+
- /**
- * blk_queue_prep_rq - set a prepare_request function for queue
- * @q: queue
-@@ -108,7 +114,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
- */
- void blk_set_default_limits(struct queue_limits *lim)
- {
-- lim->max_segments = BLK_MAX_SEGMENTS;
-+ lim->max_segments = default_max_segments;
- lim->max_integrity_segments = 0;
- lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-@@ -255,7 +261,7 @@ void blk_limits_max_hw_sectors(struct qu
-
- limits->max_hw_sectors = max_hw_sectors;
- limits->max_sectors = min_t(unsigned int, max_hw_sectors,
-- BLK_DEF_MAX_SECTORS);
-+ default_max_sectors);
- }
- EXPORT_SYMBOL(blk_limits_max_hw_sectors);
-
-Index: linux-3.0.82-0.7.9/drivers/scsi/Kconfig
-===================================================================
---- linux-3.0.82-0.7.9.orig/drivers/scsi/Kconfig
-+++ linux-3.0.82-0.7.9/drivers/scsi/Kconfig
-@@ -245,6 +245,15 @@ config SCSI_LOGGING
- there should be no noticeable performance impact as long as you have
- logging turned off.
-
-+config SCSI_MAX_SG_SEGMENTS
-+ int "Maximum SCSI scatter gather segment size"
-+ range 32 256
-+ default "128"
-+ depends on SCSI
-+ help
-+ Control the maximum limit for scatter gather buffers for the
-+ SCSI device.
-+
- config SCSI_SCAN_ASYNC
- bool "Asynchronous SCSI scanning"
- depends on SCSI
-Index: linux-3.0.82-0.7.9/include/scsi/scsi.h
-===================================================================
---- linux-3.0.82-0.7.9.orig/include/scsi/scsi.h
-+++ linux-3.0.82-0.7.9/include/scsi/scsi.h
-@@ -20,7 +20,7 @@ struct scsi_cmnd;
- * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The
- * minimum value is 32
- */
--#define SCSI_MAX_SG_SEGMENTS 128
-+#define SCSI_MAX_SG_SEGMENTS CONFIG_SCSI_MAX_SG_SEGMENTS
-
- /*
- * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
-Index: linux-3.0.82-0.7.9/drivers/scsi/isci/init.c
-===================================================================
---- linux-3.0.82-0.7.9.orig/drivers/scsi/isci/init.c
-+++ linux-3.0.82-0.7.9/drivers/scsi/isci/init.c
-@@ -119,6 +119,10 @@ unsigned char phy_gen = SCIC_SDS_PARM_GE
- module_param(phy_gen, byte, 0);
- MODULE_PARM_DESC(phy_gen, "PHY generation (1: 1.5Gbps 2: 3.0Gbps 3: 6.0Gbps)");
-
-+u16 sg_table_size = SG_ALL;
-+module_param(sg_table_size, ushort, 0);
-+MODULE_PARM_DESC(sg_table_size, "Size in KB of scatter gather table");
-+
- unsigned char max_concurr_spinup;
- module_param(max_concurr_spinup, byte, 0);
- MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup");
-@@ -163,7 +167,6 @@ static struct scsi_host_template isci_sh
- .can_queue = ISCI_CAN_QUEUE_VAL,
- .cmd_per_lun = 1,
- .this_id = -1,
-- .sg_tablesize = SG_ALL,
- .max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
- .eh_abort_handler = sas_eh_abort_handler,
-@@ -574,6 +577,7 @@ static struct isci_host *isci_host_alloc
- INIT_LIST_HEAD(&idev->node);
- }
-
-+ isci_sht.sg_tablesize = sg_table_size;
- shost = scsi_host_alloc(&isci_sht, sizeof(void *));
- if (!shost)
- return NULL;
-Index: linux-3.0.82-0.7.9/drivers/message/fusion/Kconfig
-===================================================================
---- linux-3.0.82-0.7.9.orig/drivers/message/fusion/Kconfig
-+++ linux-3.0.82-0.7.9/drivers/message/fusion/Kconfig
-@@ -61,9 +61,9 @@ config FUSION_SAS
- LSISAS1078
-
- config FUSION_MAX_SGE
-- int "Maximum number of scatter gather entries for SAS and SPI (16 - 128)"
-- default "128"
-- range 16 128
-+ int "Maximum number of scatter gather entries for SAS and SPI (16 - 256)"
-+ default "256"
-+ range 16 256
- help
- This option allows you to specify the maximum number of scatter-
- gather entries per I/O. The driver default is 128, which matches
-Index: linux-3.0.82-0.7.9/drivers/message/fusion/mptbase.h
-===================================================================
---- linux-3.0.82-0.7.9.orig/drivers/message/fusion/mptbase.h
-+++ linux-3.0.82-0.7.9/drivers/message/fusion/mptbase.h
-@@ -168,8 +168,8 @@
- #ifdef CONFIG_FUSION_MAX_SGE
- #if CONFIG_FUSION_MAX_SGE < 16
- #define MPT_SCSI_SG_DEPTH 16
--#elif CONFIG_FUSION_MAX_SGE > 128
--#define MPT_SCSI_SG_DEPTH 128
-+#elif CONFIG_FUSION_MAX_SGE > 256
-+#define MPT_SCSI_SG_DEPTH 256
- #else
- #define MPT_SCSI_SG_DEPTH CONFIG_FUSION_MAX_SGE
- #endif
+++ /dev/null
---- a/block/blk-settings.c 2013-02-06 12:40:44.000000000 -0500
-+++ b/block/blk-settings.c 2013-02-06 12:55:28.000000000 -0500
-@@ -19,6 +19,12 @@
-
- unsigned long blk_max_pfn;
-
-+int default_max_sectors = BLK_DEF_MAX_SECTORS;
-+module_param(default_max_sectors, int, 0);
-+
-+int default_max_segments = BLK_MAX_SEGMENTS;
-+module_param(default_max_segments, int, 0);
-+
- /**
- * blk_queue_prep_rq - set a prepare_request function for queue
- * @q: queue
-@@ -108,7 +114,7 @@
- */
- void blk_set_default_limits(struct queue_limits *lim)
- {
-- lim->max_segments = BLK_MAX_SEGMENTS;
-+ lim->max_segments = default_max_segments;
- lim->max_integrity_segments = 0;
- lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-@@ -255,7 +261,7 @@
-
- limits->max_hw_sectors = max_hw_sectors;
- limits->max_sectors = min_t(unsigned int, max_hw_sectors,
-- BLK_DEF_MAX_SECTORS);
-+ default_max_sectors);
- }
- EXPORT_SYMBOL(blk_limits_max_hw_sectors);
-
---- a/drivers/scsi/Kconfig 2013-02-07 09:25:49.000000000 -0500
-+++ b/drivers/scsi/Kconfig 2013-02-07 09:30:15.000000000 -0500
-@@ -245,6 +245,15 @@ config SCSI_SCAN_ASYNC
- there should be no noticeable performance impact as long as you have
- logging turned off.
-
-+config SCSI_MAX_SG_SEGMENTS
-+ int "Maximum SCSI scatter gather segment size"
-+ range 32 256
-+ default "128"
-+ depends on SCSI
-+ help
-+ Control the maximum limit for scatter gather buffers for the
-+ SCSI device.
-+
- config SCSI_SCAN_ASYNC
- bool "Asynchronous SCSI scanning"
- depends on SCSI
---- a/include/scsi/scsi.h 2013-02-07 09:55:02.000000000 -0500
-+++ b/include/scsi/scsi.h 2013-02-07 09:55:20.000000000 -0500
-@@ -20,7 +20,7 @@ struct scsi_cmnd;
- * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The
- * minimum value is 32
- */
--#define SCSI_MAX_SG_SEGMENTS 128
-+#define SCSI_MAX_SG_SEGMENTS CONFIG_SCSI_MAX_SG_SEGMENTS
-
- /*
- * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
---- a/drivers/scsi/isci/init.c 2013-02-07 09:59:49.000000000 -0500
-+++ b/drivers/scsi/isci/init.c 2013-02-07 10:01:51.000000000 -0500
-@@ -119,6 +119,10 @@
- module_param(phy_gen, byte, 0);
- MODULE_PARM_DESC(phy_gen, "PHY generation (1: 1.5Gbps 2: 3.0Gbps 3: 6.0Gbps)");
-
-+u16 sg_table_size = SG_ALL;
-+module_param(sg_table_size, ushort, 0);
-+MODULE_PARM_DESC(sg_table_size, "Size in KB of scatter gather table");
-+
- unsigned char max_concurr_spinup;
- module_param(max_concurr_spinup, byte, 0);
- MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup");
-@@ -163,7 +167,6 @@
- .can_queue = ISCI_CAN_QUEUE_VAL,
- .cmd_per_lun = 1,
- .this_id = -1,
-- .sg_tablesize = SG_ALL,
- .max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
- .eh_abort_handler = sas_eh_abort_handler,
-@@ -573,6 +576,7 @@
-
- INIT_LIST_HEAD(&idev->node);
- }
-+ isci_sht.sg_tablesize = sg_table_size;
-
- shost = scsi_host_alloc(&isci_sht, sizeof(void *));
- if (!shost)
-Increase MAX_SGE for fusion mpt driver.
-
-Index: linux-2.6.32.i386/drivers/message/fusion/Kconfig
-===================================================================
---- linux-2.6.32.i386.orig/drivers/message/fusion/Kconfig 2009-12-03 09:21:21.000000000 +0530
-+++ linux-2.6.32.i386/drivers/message/fusion/Kconfig 2010-03-16 16:45:08.000000000 +0530
-@@ -61,9 +61,9 @@
- LSISAS1078
-
- config FUSION_MAX_SGE
-- int "Maximum number of scatter gather entries (16 - 128)"
-- default "128"
-- range 16 128
-+ int "Maximum number of scatter gather entries (16 - 256)"
-+ default "256"
-+ range 16 256
- help
- This option allows you to specify the maximum number of scatter-
- gather entries per I/O. The driver default is 128, which matches
-Index: linux-2.6.32.i386/drivers/message/fusion/mptbase.h
-===================================================================
---- linux-2.6.32.i386.orig/drivers/message/fusion/mptbase.h 2009-12-03 09:21:21.000000000 +0530
-+++ linux-2.6.32.i386/drivers/message/fusion/mptbase.h 2010-03-16 16:46:54.000000000 +0530
-@@ -165,10 +165,10 @@
- * Set the MAX_SGE value based on user input.
- */
- #ifdef CONFIG_FUSION_MAX_SGE
--#if CONFIG_FUSION_MAX_SGE < 16
-+#if CONFIG_FUSION_MAX_SGE < 16
- #define MPT_SCSI_SG_DEPTH 16
--#elif CONFIG_FUSION_MAX_SGE > 128
--#define MPT_SCSI_SG_DEPTH 128
-+#elif CONFIG_FUSION_MAX_SGE > 256
-+#define MPT_SCSI_SG_DEPTH 256
- #else
- #define MPT_SCSI_SG_DEPTH CONFIG_FUSION_MAX_SGE
- #endif
+++ /dev/null
-Index: linux-4.4.21-64/block/blk-settings.c
-===================================================================
---- linux-4.4.21-64.orig/block/blk-settings.c
-+++ linux-4.4.21-64/block/blk-settings.c
-@@ -19,6 +19,12 @@ EXPORT_SYMBOL(blk_max_low_pfn);
-
- unsigned long blk_max_pfn;
-
-+int default_max_sectors = BLK_DEF_MAX_SECTORS;
-+module_param(default_max_sectors, int, 0);
-+
-+int default_max_segments = BLK_MAX_SEGMENTS;
-+module_param(default_max_segments, int, 0);
-+
- /**
- * blk_queue_prep_rq - set a prepare_request function for queue
- * @q: queue
-@@ -86,7 +92,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
- */
- void blk_set_default_limits(struct queue_limits *lim)
- {
-- lim->max_segments = BLK_MAX_SEGMENTS;
-+ lim->max_segments = default_max_segments;
- lim->max_integrity_segments = 0;
- lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- lim->virt_boundary_mask = 0;
-@@ -247,7 +253,7 @@ void blk_queue_max_hw_sectors(struct req
-
- limits->max_hw_sectors = max_hw_sectors;
- max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
-- max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
-+ max_sectors = min_t(unsigned int, max_sectors, default_max_sectors);
- limits->max_sectors = max_sectors;
- }
- EXPORT_SYMBOL(blk_queue_max_hw_sectors);
-Index: linux-4.4.21-64/drivers/scsi/Kconfig
-===================================================================
---- linux-4.4.21-64.orig/drivers/scsi/Kconfig
-+++ linux-4.4.21-64/drivers/scsi/Kconfig
-@@ -235,6 +235,15 @@ config SCSI_LOGGING
- there should be no noticeable performance impact as long as you have
- logging turned off.
-
-+config SCSI_MAX_SG_SEGMENTS
-+ int "Maximum SCSI scatter gather segment size"
-+ range 32 256
-+ default "128"
-+ depends on SCSI
-+ help
-+ Control the maximum limit for scatter gather buffers for the
-+ SCSI device.
-+
- config SCSI_SCAN_ASYNC
- bool "Asynchronous SCSI scanning"
- depends on SCSI
-Index: linux-4.4.21-64/include/scsi/scsi.h
-===================================================================
---- linux-4.4.21-64.orig/include/scsi/scsi.h
-+++ linux-4.4.21-64/include/scsi/scsi.h
-@@ -24,7 +24,7 @@ enum scsi_timeouts {
- * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The
- * minimum value is 32
- */
--#define SCSI_MAX_SG_SEGMENTS 128
-+#define SCSI_MAX_SG_SEGMENTS CONFIG_SCSI_MAX_SG_SEGMENTS
-
- /*
- * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
-Index: linux-4.4.21-64/drivers/scsi/isci/init.c
-===================================================================
---- linux-4.4.21-64.orig/drivers/scsi/isci/init.c
-+++ linux-4.4.21-64/drivers/scsi/isci/init.c
-@@ -119,6 +119,10 @@ unsigned char phy_gen = SCIC_SDS_PARM_GE
- module_param(phy_gen, byte, 0);
- MODULE_PARM_DESC(phy_gen, "PHY generation (1: 1.5Gbps 2: 3.0Gbps 3: 6.0Gbps)");
-
-+u16 sg_table_size = SG_ALL;
-+module_param(sg_table_size, ushort, 0);
-+MODULE_PARM_DESC(sg_table_size, "Size in KB of scatter gather table");
-+
- unsigned char max_concurr_spinup;
- module_param(max_concurr_spinup, byte, 0);
- MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup");
-@@ -161,7 +165,6 @@ static struct scsi_host_template isci_sh
- .bios_param = sas_bios_param,
- .can_queue = ISCI_CAN_QUEUE_VAL,
- .this_id = -1,
-- .sg_tablesize = SG_ALL,
- .max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
- .eh_abort_handler = sas_eh_abort_handler,
-@@ -570,6 +573,7 @@ static struct isci_host *isci_host_alloc
-
- INIT_LIST_HEAD(&idev->node);
- }
-+ isci_sht.sg_tablesize = sg_table_size;
-
- shost = scsi_host_alloc(&isci_sht, sizeof(void *));
- if (!shost)
-Index: linux-4.4.21-64/drivers/message/fusion/Kconfig
-===================================================================
---- linux-4.4.21-64.orig/drivers/message/fusion/Kconfig
-+++ linux-4.4.21-64/drivers/message/fusion/Kconfig
-@@ -61,9 +61,9 @@ config FUSION_SAS
- LSISAS1078
-
- config FUSION_MAX_SGE
-- int "Maximum number of scatter gather entries (16 - 128)"
-- default "128"
-- range 16 128
-+ int "Maximum number of scatter gather entries (16 - 256)"
-+ default "256"
-+ range 16 256
- help
- This option allows you to specify the maximum number of scatter-
- gather entries per I/O. The driver default is 128, which matches
-Index: linux-4.4.21-64/drivers/message/fusion/mptbase.h
-===================================================================
---- linux-4.4.21-64.orig/drivers/message/fusion/mptbase.h
-+++ linux-4.4.21-64/drivers/message/fusion/mptbase.h
-@@ -166,10 +166,10 @@
- * Set the MAX_SGE value based on user input.
- */
- #ifdef CONFIG_FUSION_MAX_SGE
--#if CONFIG_FUSION_MAX_SGE < 16
-+#if CONFIG_FUSION_MAX_SGE < 16
- #define MPT_SCSI_SG_DEPTH 16
--#elif CONFIG_FUSION_MAX_SGE > 128
--#define MPT_SCSI_SG_DEPTH 128
-+#elif CONFIG_FUSION_MAX_SGE > 256
-+#define MPT_SCSI_SG_DEPTH 256
- #else
- #define MPT_SCSI_SG_DEPTH CONFIG_FUSION_MAX_SGE
- #endif
+++ /dev/null
-Index: linux-3.10.0-495.el7.x86_64/block/blk-settings.c
-===================================================================
---- linux-3.10.0-495.el7.x86_64.orig/block/blk-settings.c
-+++ linux-3.10.0-495.el7.x86_64/block/blk-settings.c
-@@ -19,6 +19,12 @@ EXPORT_SYMBOL(blk_max_low_pfn);
-
- unsigned long blk_max_pfn;
-
-+int default_max_sectors = BLK_DEF_MAX_SECTORS;
-+module_param(default_max_sectors, int, 0);
-+
-+int default_max_segments = BLK_MAX_SEGMENTS;
-+module_param(default_max_segments, int, 0);
-+
- /**
- * blk_queue_prep_rq - set a prepare_request function for queue
- * @q: queue
-@@ -108,7 +114,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
- */
- void blk_set_default_limits(struct queue_limits *lim)
- {
-- lim->max_segments = BLK_MAX_SEGMENTS;
-+ lim->max_segments = default_max_segments;
- lim->max_integrity_segments = 0;
- lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- if (lim->limits_aux)
-@@ -268,7 +274,7 @@ void blk_limits_max_hw_sectors(struct qu
-
- limits->max_hw_sectors = max_hw_sectors;
- max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
-- max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
-+ max_sectors = min_t(unsigned int, max_sectors, default_max_sectors);
- limits->max_sectors = max_sectors;
- }
- EXPORT_SYMBOL(blk_limits_max_hw_sectors);
-Index: linux-3.10.0-495.el7.x86_64/drivers/scsi/Kconfig
-===================================================================
---- linux-3.10.0-495.el7.x86_64.orig/drivers/scsi/Kconfig
-+++ linux-3.10.0-495.el7.x86_64/drivers/scsi/Kconfig
-@@ -246,6 +246,15 @@ config SCSI_LOGGING
- there should be no noticeable performance impact as long as you have
- logging turned off.
-
-+config SCSI_MAX_SG_SEGMENTS
-+ int "Maximum SCSI scatter gather segment size"
-+ range 32 256
-+ default "128"
-+ depends on SCSI
-+ help
-+ Control the maximum limit for scatter gather buffers for the
-+ SCSI device.
-+
- config SCSI_SCAN_ASYNC
- bool "Asynchronous SCSI scanning"
- depends on SCSI
-Index: linux-3.10.0-495.el7.x86_64/include/scsi/scsi.h
-===================================================================
---- linux-3.10.0-495.el7.x86_64.orig/include/scsi/scsi.h
-+++ linux-3.10.0-495.el7.x86_64/include/scsi/scsi.h
-@@ -25,7 +25,7 @@ enum scsi_timeouts {
- * to SG_MAX_SINGLE_ALLOC to pack correctly at the highest order. The
- * minimum value is 32
- */
--#define SCSI_MAX_SG_SEGMENTS 128
-+#define SCSI_MAX_SG_SEGMENTS CONFIG_SCSI_MAX_SG_SEGMENTS
-
- /*
- * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
-Index: linux-3.10.0-495.el7.x86_64/drivers/scsi/isci/init.c
-===================================================================
---- linux-3.10.0-495.el7.x86_64.orig/drivers/scsi/isci/init.c
-+++ linux-3.10.0-495.el7.x86_64/drivers/scsi/isci/init.c
-@@ -119,6 +119,10 @@ unsigned char phy_gen = SCIC_SDS_PARM_GE
- module_param(phy_gen, byte, 0);
- MODULE_PARM_DESC(phy_gen, "PHY generation (1: 1.5Gbps 2: 3.0Gbps 3: 6.0Gbps)");
-
-+u16 sg_table_size = SG_ALL;
-+module_param(sg_table_size, ushort, 0);
-+MODULE_PARM_DESC(sg_table_size, "Size in KB of scatter gather table");
-+
- unsigned char max_concurr_spinup;
- module_param(max_concurr_spinup, byte, 0);
- MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup");
-@@ -163,7 +167,6 @@ static struct scsi_host_template isci_sh
- .can_queue = ISCI_CAN_QUEUE_VAL,
- .cmd_per_lun = 1,
- .this_id = -1,
-- .sg_tablesize = SG_ALL,
- .max_sectors = SCSI_DEFAULT_MAX_SECTORS,
- .use_clustering = ENABLE_CLUSTERING,
- .eh_abort_handler = sas_eh_abort_handler,
-@@ -571,6 +574,7 @@ static struct isci_host *isci_host_alloc
-
- INIT_LIST_HEAD(&idev->node);
- }
-+ isci_sht.sg_tablesize = sg_table_size;
-
- shost = scsi_host_alloc(&isci_sht, sizeof(void *));
- if (!shost)
-Index: linux-3.10.0-495.el7.x86_64/drivers/message/fusion/Kconfig
-===================================================================
---- linux-3.10.0-495.el7.x86_64.orig/drivers/message/fusion/Kconfig
-+++ linux-3.10.0-495.el7.x86_64/drivers/message/fusion/Kconfig
-@@ -61,9 +61,9 @@ config FUSION_SAS
- LSISAS1078
-
- config FUSION_MAX_SGE
-- int "Maximum number of scatter gather entries (16 - 128)"
-- default "128"
-- range 16 128
-+ int "Maximum number of scatter gather entries (16 - 256)"
-+ default "256"
-+ range 16 256
- help
- This option allows you to specify the maximum number of scatter-
- gather entries per I/O. The driver default is 128, which matches
-Index: linux-3.10.0-495.el7.x86_64/drivers/message/fusion/mptbase.h
-===================================================================
---- linux-3.10.0-495.el7.x86_64.orig/drivers/message/fusion/mptbase.h
-+++ linux-3.10.0-495.el7.x86_64/drivers/message/fusion/mptbase.h
-@@ -166,10 +166,10 @@
- * Set the MAX_SGE value based on user input.
- */
- #ifdef CONFIG_FUSION_MAX_SGE
--#if CONFIG_FUSION_MAX_SGE < 16
-+#if CONFIG_FUSION_MAX_SGE < 16
- #define MPT_SCSI_SG_DEPTH 16
--#elif CONFIG_FUSION_MAX_SGE > 128
--#define MPT_SCSI_SG_DEPTH 128
-+#elif CONFIG_FUSION_MAX_SGE > 256
-+#define MPT_SCSI_SG_DEPTH 256
- #else
- #define MPT_SCSI_SG_DEPTH CONFIG_FUSION_MAX_SGE
- #endif
+++ /dev/null
-Index: linux-4.4.59-1/block/blk-settings.c
-===================================================================
---- linux-4.4.59-1.orig/block/blk-settings.c
-+++ linux-4.4.59-1/block/blk-settings.c
-@@ -20,6 +20,12 @@ EXPORT_SYMBOL(blk_max_low_pfn);
-
- unsigned long blk_max_pfn;
-
-+int default_max_sectors = BLK_DEF_MAX_SECTORS;
-+module_param(default_max_sectors, int, 0);
-+
-+int default_max_segments = BLK_MAX_SEGMENTS;
-+module_param(default_max_segments, int, 0);
-+
- /**
- * blk_queue_prep_rq - set a prepare_request function for queue
- * @q: queue
-@@ -87,7 +93,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
- */
- void blk_set_default_limits(struct queue_limits *lim)
- {
-- lim->max_segments = BLK_MAX_SEGMENTS;
-+ lim->max_segments = default_max_segments;
- lim->max_integrity_segments = 0;
- lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
- lim->virt_boundary_mask = 0;
-@@ -251,7 +257,7 @@ void blk_queue_max_hw_sectors(struct req
-
- limits->max_hw_sectors = max_hw_sectors;
- max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
-- max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
-+ max_sectors = min_t(unsigned int, max_sectors, default_max_sectors);
- limits->max_sectors = max_sectors;
- q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
- }
-Index: linux-4.4.59-1/drivers/scsi/Kconfig
-===================================================================
---- linux-4.4.59-1.orig/drivers/scsi/Kconfig
-+++ linux-4.4.59-1/drivers/scsi/Kconfig
-@@ -236,6 +236,24 @@ config SCSI_LOGGING
- there should be no noticeable performance impact as long as you have
- logging turned off.
-
-+config SCSI_MAX_SG_SEGMENTS
-+ int "Maximum SCSI scatter gather segment size"
-+ range 32 256
-+ default "128"
-+ depends on SCSI
-+ help
-+ Control the maximum limit for scatter gather buffers for the
-+ SCSI device.
-+
-+config SCSI_MAX_SG_SEGMENTS
-+ int "Maximum SCSI scatter gather segment size"
-+ range 32 256
-+ default "128"
-+ depends on SCSI
-+ help
-+ Control the maximum limit for scatter gather buffers for the
-+ SCSI device.
-+
- config SCSI_SCAN_ASYNC
- bool "Asynchronous SCSI scanning"
- depends on SCSI
-Index: linux-4.4.59-1/drivers/message/fusion/Kconfig
-===================================================================
---- linux-4.4.59-1.orig/drivers/message/fusion/Kconfig
-+++ linux-4.4.59-1/drivers/message/fusion/Kconfig
-@@ -61,9 +61,9 @@ config FUSION_SAS
- LSISAS1078
-
- config FUSION_MAX_SGE
-- int "Maximum number of scatter gather entries (16 - 128)"
-- default "128"
-- range 16 128
-+ int "Maximum number of scatter gather entries (16 - 256)"
-+ default "256"
-+ range 16 256
- help
- This option allows you to specify the maximum number of scatter-
- gather entries per I/O. The driver default is 128, which matches
-Index: linux-4.4.59-1/drivers/message/fusion/mptbase.h
-===================================================================
---- linux-4.4.59-1.orig/drivers/message/fusion/mptbase.h
-+++ linux-4.4.59-1/drivers/message/fusion/mptbase.h
-@@ -166,10 +166,10 @@
- * Set the MAX_SGE value based on user input.
- */
- #ifdef CONFIG_FUSION_MAX_SGE
--#if CONFIG_FUSION_MAX_SGE < 16
-+#if CONFIG_FUSION_MAX_SGE < 16
- #define MPT_SCSI_SG_DEPTH 16
--#elif CONFIG_FUSION_MAX_SGE > 128
--#define MPT_SCSI_SG_DEPTH 128
-+#elif CONFIG_FUSION_MAX_SGE > 256
-+#define MPT_SCSI_SG_DEPTH 256
- #else
- #define MPT_SCSI_SG_DEPTH CONFIG_FUSION_MAX_SGE
- #endif
+++ /dev/null
-jbd2: fix incorrect unlock on j_list_lock
-
-When 'jh->b_transaction == transaction' (asserted by below)
-
- J_ASSERT_JH(jh, (jh->b_transaction == transaction || ...
-
-'journal->j_list_lock' will be incorrectly unlocked, since
-the the lock is aquired only at the end of if / else-if
-statements (missing the else case).
-
-This bug has been introduced by an earlier change named
-"jbd2: minimize region locked by j_list_lock in journal_get_create_access()".
-
-Signed-off-by: Taesoo Kim <tsgatesv@gmail.com>
-
-Index: linux-3.10.0-327.36.1.el7/fs/jbd2/transaction.c
-===================================================================
---- linux-3.10.0-327.36.1.el7.orig/fs/jbd2/transaction.c
-+++ linux-3.10.0-327.36.1.el7/fs/jbd2/transaction.c
-@@ -1091,6 +1091,7 @@
- JBUFFER_TRACE(jh, "file as BJ_Reserved");
- spin_lock(&journal->j_list_lock);
- __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
-+ spin_unlock(&journal->j_list_lock);
- } else if (jh->b_transaction == journal->j_committing_transaction) {
- /* first access by this transaction */
- jh->b_modified = 0;
-@@ -1098,8 +1099,8 @@
- JBUFFER_TRACE(jh, "set next transaction");
- spin_lock(&journal->j_list_lock);
- jh->b_next_transaction = transaction;
-+ spin_unlock(&journal->j_list_lock);
- }
-- spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
-
- /*
+++ /dev/null
-patch get from linux vanilla kernel commit
-0ef54180e0187117062939202b96faf04c8673bc (v3.10-rc2)
-
-jbd2: drop checkpoint mutex when waiting in __jbd2_log_wait_for_space()
-
-While trying to debug an an issue under extreme I/O loading
-on preempt-rt kernels, the following backtrace was observed
-via SysRQ output:
-
-rm D ffff8802203afbc0 4600 4878 4748 0x00000000
-ffff8802217bfb78 0000000000000082 ffff88021fc2bb80 ffff88021fc2bb80
-ffff88021fc2bb80 ffff8802217bffd8 ffff8802217bffd8 ffff8802217bffd8
-ffff88021f1d4c80 ffff88021fc2bb80 ffff8802217bfb88 ffff88022437b000
-Call Trace:
-[<ffffffff8172dc34>] schedule+0x24/0x70
-[<ffffffff81225b5d>] jbd2_log_wait_commit+0xbd/0x140
-[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
-[<ffffffff81223635>] jbd2_log_do_checkpoint+0xf5/0x520
-[<ffffffff81223b09>] __jbd2_log_wait_for_space+0xa9/0x1f0
-[<ffffffff8121dc40>] start_this_handle.isra.10+0x2e0/0x530
-[<ffffffff81060390>] ? __init_waitqueue_head+0x50/0x50
-[<ffffffff8121e0a3>] jbd2__journal_start+0xc3/0x110
-[<ffffffff811de7ce>] ? ext4_rmdir+0x6e/0x230
-[<ffffffff8121e0fe>] jbd2_journal_start+0xe/0x10
-[<ffffffff811f308b>] ext4_journal_start_sb+0x5b/0x160
-[<ffffffff811de7ce>] ext4_rmdir+0x6e/0x230
-[<ffffffff811435c5>] vfs_rmdir+0xd5/0x140
-[<ffffffff8114370f>] do_rmdir+0xdf/0x120
-[<ffffffff8105c6b4>] ? task_work_run+0x44/0x80
-[<ffffffff81002889>] ? do_notify_resume+0x89/0x100
-[<ffffffff817361ae>] ? int_signal+0x12/0x17
-[<ffffffff81145d85>] sys_unlinkat+0x25/0x40
-[<ffffffff81735f22>] system_call_fastpath+0x16/0x1b
-
-What is interesting here, is that we call log_wait_commit, from
-within wait_for_space, but we are still holding the checkpoint_mutex
-as it surrounds mostly the whole of wait_for_space. And then, as we
-are waiting, journal_commit_transaction can run, and if the JBD2_FLUSHED
-bit is set, then we will also try to take the same checkpoint_mutex.
-
-It seems that we need to drop the checkpoint_mutex while sitting in
-jbd2_log_wait_commit, if we want to guarantee that progress can be made
-by jbd2_journal_commit_transaction(). There does not seem to be
-anything preempt-rt specific about this, other then perhaps increasing
-the odds of it happening.
-
-Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
-Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-
-Index: linux-2.6.32-358.18.1.el6-master/fs/jbd2/checkpoint.c
-===================================================================
---- linux-2.6.32-358.18.1.el6-master.orig/fs/jbd2/checkpoint.c
-+++ linux-2.6.32-358.18.1.el6-master/fs/jbd2/checkpoint.c
-@@ -156,7 +156,15 @@ void __jbd2_log_wait_for_space(journal_t
- /* We were able to recover space; yay! */
- ;
- } else if (tid) {
-+ /*
-+ * jbd2_journal_commit_transaction() may want
-+ * to take the checkpoint_mutex if JBD2_FLUSHED
-+ * is set. So we need to temporarily drop it.
-+ */
-+ mutex_unlock(&journal->j_checkpoint_mutex);
- jbd2_log_wait_commit(journal, tid);
-+ spin_lock(&journal->j_state_lock);
-+ continue;
- } else {
- printk(KERN_ERR "%s: needed %d blocks and "
- "only had %d space available\n",
+++ /dev/null
-commit 1ea06bec78a128adc995ca32bd906a6c9bb9cf91
-Author: Niu Yawei <yawei.niu@gmail.com>
-Date: Wed Jun 4 12:20:30 2014 +0800
-
- quota: avoid unnecessary dqget()/dqput() calls
-
- Avoid unnecessary dqget()/dqput() calls in __dquot_initialize(),
- that will introduce global lock contention otherwise.
-
- Signed-off-by: Lai Siyao <lai.siyao@intel.com>
- Signed-off-by: Niu Yawei <yawei.niu@intel.com>
- Signed-off-by: Jan Kara <jack@suse.cz>
-Index: linux-3.0.101-0.46_lustre.gbe21584/fs/quota/dquot.c
-===================================================================
---- linux-3.0.101-0.46_lustre.gbe21584.orig/fs/quota/dquot.c
-+++ linux-3.0.101-0.46_lustre.gbe21584/fs/quota/dquot.c
-@@ -1342,7 +1342,7 @@ static int dquot_active(const struct ino
- static void __dquot_initialize(struct inode *inode, int type)
- {
- unsigned int id = 0;
-- int cnt;
-+ int cnt, init_needed = 0;
- struct dquot *got[MAXQUOTAS];
- struct super_block *sb = inode->i_sb;
- qsize_t rsv;
-@@ -1357,6 +1357,15 @@ static void __dquot_initialize(struct in
- got[cnt] = NULL;
- if (type != -1 && cnt != type)
- continue;
-+ /*
-+ * The i_dquot should have been initialized in most cases,
-+ * we check it without locking here to avoid unnecessary
-+ * dqget()/dqput() calls.
-+ */
-+ if (inode->i_dquot[cnt])
-+ continue;
-+ init_needed = 1;
-+
- switch (cnt) {
- case USRQUOTA:
- id = inode->i_uid;
-@@ -1368,6 +1377,10 @@ static void __dquot_initialize(struct in
- got[cnt] = dqget(sb, id, cnt);
- }
-
-+ /* All required i_dquot has been initialized */
-+ if (!init_needed)
-+ return;
-+
- spin_lock(&inode->i_lock);
- if (IS_NOQUOTA(inode))
- goto out_err;
+++ /dev/null
-diff -urp linux-3.0.61-0.orig/fs/quota/dquot.c linux-3.0.61-0/fs/quota/dquot.c
---- linux-3.0.61-0.orig/fs/quota/dquot.c 2013-04-10 15:15:11.000000000 -0400
-+++ linux-3.0.61-0/fs/quota/dquot.c 2013-04-24 10:27:22.000000000 -0400
-@@ -83,26 +83,21 @@
- /*
- * There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats.
-- * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
-- * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
-- * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
-- * in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects
-- * modifications of quota state (on quotaon and quotaoff) and readers who care
-- * about latest values take it as well.
-+ * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures.
-+ * dq_state_lock protects modifications of quota state (on quotaon and quotaoff)
-+ * and readers who care about latest values take it as well.
- *
-- * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock,
-+ * The spinlock ordering is hence: i_lock > dq_data_lock > dq_list_lock,
- * dq_list_lock > dq_state_lock
- *
- * Note that some things (eg. sb pointer, type, id) doesn't change during
- * the life of the dquot structure and so needn't to be protected by a lock
- *
-- * Any operation working on dquots via inode pointers must hold dqptr_sem. If
-- * operation is just reading pointers from inode (or not using them at all) the
-- * read lock is enough. If pointers are altered function must hold write lock.
-+ * Any operation working on dquots via inode pointers must hold i_lock.
- * Special care needs to be taken about S_NOQUOTA inode flag (marking that
- * inode is a quota file). Functions adding pointers from inode to dquots have
-- * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
-- * have to do all pointer modifications before dropping dqptr_sem. This makes
-+ * to check this flag under i_lock and then (if S_NOQUOTA is not set) they
-+ * have to do all pointer modifications before dropping i_lock. This makes
- * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
- * then drops all pointers to dquots from an inode.
- *
-@@ -116,15 +111,8 @@
- * spinlock to internal buffers before writing.
- *
- * Lock ordering (including related VFS locks) is the following:
-- * i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock >
-- * dqio_mutex
-- * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
-- * dqptr_sem. But filesystem has to count with the fact that functions such as
-- * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
-- * from inside a transaction to keep filesystem consistency after a crash. Also
-- * filesystems usually want to do some IO on dquot from ->mark_dirty which is
-- * called with dqptr_sem held.
-- * i_mutex on quota files is special (it's below dqio_mutex)
-+ * i_mutex > dqonoff_sem > journal_lock > dquot->dq_lock > dqio_mutex
-+ * i_mutex on quota files is special (it's below dqio_mutex)
- */
-
- static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
-@@ -955,7 +943,6 @@ static inline int dqput_blocks(struct dq
- /*
- * Remove references to dquots from inode and add dquot to list for freeing
- * if we have the last reference to dquot
-- * We can't race with anybody because we hold dqptr_sem for writing...
- */
- static int remove_inode_dquot_ref(struct inode *inode, int type,
- struct list_head *tofree_head)
-@@ -1016,13 +1003,15 @@ static void remove_dquot_ref(struct supe
- * We have to scan also I_NEW inodes because they can already
- * have quota pointer initialized. Luckily, we need to touch
- * only quota pointers and these have separate locking
-- * (dqptr_sem).
-+ * (i_lock).
- */
-+ spin_lock(&inode->i_lock);
- if (!IS_NOQUOTA(inode)) {
- if (unlikely(inode_get_rsv_space(inode) > 0))
- reserved = 1;
- remove_inode_dquot_ref(inode, type, tofree_head);
- }
-+ spin_unlock(&inode->i_lock);
- }
- spin_unlock(&inode_sb_list_lock);
- #ifdef CONFIG_QUOTA_DEBUG
-@@ -1040,9 +1029,7 @@ static void drop_dquot_ref(struct super_
- LIST_HEAD(tofree_head);
-
- if (sb->dq_op) {
-- down_write(&sb_dqopt(sb)->dqptr_sem);
- remove_dquot_ref(sb, type, &tofree_head);
-- up_write(&sb_dqopt(sb)->dqptr_sem);
- put_dquot_list(&tofree_head);
- }
- }
-@@ -1349,9 +1336,6 @@ static int dquot_active(const struct ino
- /*
- * Initialize quota pointers in inode
- *
-- * We do things in a bit complicated way but by that we avoid calling
-- * dqget() and thus filesystem callbacks under dqptr_sem.
-- *
- * It is better to call this function outside of any transaction as it
- * might need a lot of space in journal for dquot structure allocation.
- */
-@@ -1384,7 +1368,7 @@ static void __dquot_initialize(struct in
- got[cnt] = dqget(sb, id, cnt);
- }
-
-- down_write(&sb_dqopt(sb)->dqptr_sem);
-+ spin_lock(&inode->i_lock);
- if (IS_NOQUOTA(inode))
- goto out_err;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-@@ -1404,12 +1388,16 @@ static void __dquot_initialize(struct in
- * did a write before quota was turned on
- */
- rsv = inode_get_rsv_space(inode);
-- if (unlikely(rsv))
-+ if (unlikely(rsv)) {
-+ spin_lock(&dq_data_lock);
- dquot_resv_space(inode->i_dquot[cnt], rsv);
-+ spin_unlock(&dq_data_lock);
-+ }
- }
- }
- out_err:
-- up_write(&sb_dqopt(sb)->dqptr_sem);
-+ spin_unlock(&inode->i_lock);
-+
- /* Drop unused references */
- dqput_all(got);
- }
-@@ -1428,12 +1416,12 @@ static void __dquot_drop(struct inode *i
- int cnt;
- struct dquot *put[MAXQUOTAS];
-
-- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_lock(&inode->i_lock);
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- put[cnt] = inode->i_dquot[cnt];
- inode->i_dquot[cnt] = NULL;
- }
-- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_unlock(&inode->i_lock);
- dqput_all(put);
- }
-
-@@ -1473,27 +1461,42 @@ static qsize_t *inode_reserved_space(str
- return inode->i_sb->dq_op->get_reserved_space(inode);
- }
-
-+static inline void __inode_add_rsv_space(struct inode *inode, qsize_t number)
-+{
-+ *inode_reserved_space(inode) += number;
-+}
-+
- void inode_add_rsv_space(struct inode *inode, qsize_t number)
- {
- spin_lock(&inode->i_lock);
-- *inode_reserved_space(inode) += number;
-+ __inode_add_rsv_space(inode, number);
- spin_unlock(&inode->i_lock);
- }
- EXPORT_SYMBOL(inode_add_rsv_space);
-
--void inode_claim_rsv_space(struct inode *inode, qsize_t number)
-+static inline void __inode_claim_rsv_space(struct inode *inode, qsize_t number)
- {
-- spin_lock(&inode->i_lock);
- *inode_reserved_space(inode) -= number;
- __inode_add_bytes(inode, number);
-+}
-+
-+void inode_claim_rsv_space(struct inode *inode, qsize_t number)
-+{
-+ spin_lock(&inode->i_lock);
-+ __inode_claim_rsv_space(inode, number);
- spin_unlock(&inode->i_lock);
- }
- EXPORT_SYMBOL(inode_claim_rsv_space);
-
-+static inline void __inode_sub_rsv_space(struct inode *inode, qsize_t number)
-+{
-+ *inode_reserved_space(inode) -= number;
-+}
-+
- void inode_sub_rsv_space(struct inode *inode, qsize_t number)
- {
- spin_lock(&inode->i_lock);
-- *inode_reserved_space(inode) -= number;
-+ __inode_sub_rsv_space(inode, number);
- spin_unlock(&inode->i_lock);
- }
- EXPORT_SYMBOL(inode_sub_rsv_space);
-@@ -1504,9 +1507,8 @@ static qsize_t inode_get_rsv_space(struc
-
- if (!inode->i_sb->dq_op->get_reserved_space)
- return 0;
-- spin_lock(&inode->i_lock);
-+
- ret = *inode_reserved_space(inode);
-- spin_unlock(&inode->i_lock);
- return ret;
- }
-
-@@ -1514,17 +1516,17 @@ static void inode_incr_space(struct inod
- int reserve)
- {
- if (reserve)
-- inode_add_rsv_space(inode, number);
-+ __inode_add_rsv_space(inode, number);
- else
-- inode_add_bytes(inode, number);
-+ __inode_add_bytes(inode, number);
- }
-
- static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
- {
- if (reserve)
-- inode_sub_rsv_space(inode, number);
-+ __inode_sub_rsv_space(inode, number);
- else
-- inode_sub_bytes(inode, number);
-+ __inode_sub_bytes(inode, number);
- }
-
- /*
-@@ -1547,6 +1549,7 @@ int __dquot_alloc_space(struct inode *in
- int warn = flags & DQUOT_SPACE_WARN;
- int reserve = flags & DQUOT_SPACE_RESERVE;
- int nofail = flags & DQUOT_SPACE_NOFAIL;
-+ struct dquot *dquot[MAXQUOTAS] = { NULL };
-
- /*
- * First test before acquiring mutex - solves deadlocks when we
-@@ -1557,38 +1560,41 @@ int __dquot_alloc_space(struct inode *in
- goto out;
- }
-
-- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_lock(&inode->i_lock);
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- warntype[cnt] = QUOTA_NL_NOWARN;
-
- spin_lock(&dq_data_lock);
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-- if (!inode->i_dquot[cnt])
-+ dquot[cnt] = inode->i_dquot[cnt];
-+ if (!dquot[cnt])
- continue;
-- ret = check_bdq(inode->i_dquot[cnt], number, !warn,
-- warntype+cnt);
-+ atomic_inc(&dquot[cnt]->dq_count);
-+ ret = check_bdq(dquot[cnt], number, !warn, warntype + cnt);
- if (ret && !nofail) {
- spin_unlock(&dq_data_lock);
-+ spin_unlock(&inode->i_lock);
- goto out_flush_warn;
- }
- }
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-- if (!inode->i_dquot[cnt])
-+ if (!dquot[cnt])
- continue;
- if (reserve)
-- dquot_resv_space(inode->i_dquot[cnt], number);
-+ dquot_resv_space(dquot[cnt], number);
- else
-- dquot_incr_space(inode->i_dquot[cnt], number);
-+ dquot_incr_space(dquot[cnt], number);
- }
- inode_incr_space(inode, number, reserve);
- spin_unlock(&dq_data_lock);
-+ spin_unlock(&inode->i_lock);
-
- if (reserve)
- goto out_flush_warn;
-- mark_all_dquot_dirty(inode->i_dquot);
-+ mark_all_dquot_dirty(dquot);
- out_flush_warn:
-- flush_warnings(inode->i_dquot, warntype);
-- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ flush_warnings(dquot, warntype);
-+ dqput_all(dquot);
- out:
- return ret;
- }
-@@ -1601,6 +1607,7 @@ int dquot_alloc_inode(const struct inode
- {
- int cnt, ret = 0;
- char warntype[MAXQUOTAS];
-+ struct dquot *dquot[MAXQUOTAS] = { NULL };
-
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
-@@ -1608,28 +1615,33 @@ int dquot_alloc_inode(const struct inode
- return 0;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- warntype[cnt] = QUOTA_NL_NOWARN;
-- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+
-+ spin_lock(&((struct inode *)inode)->i_lock);
- spin_lock(&dq_data_lock);
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-- if (!inode->i_dquot[cnt])
-+ dquot[cnt] = inode->i_dquot[cnt];
-+ if (!dquot[cnt])
- continue;
-- ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt);
-+ atomic_inc(&dquot[cnt]->dq_count);
-+ ret = check_idq(dquot[cnt], 1, warntype + cnt);
- if (ret)
- goto warn_put_all;
- }
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-- if (!inode->i_dquot[cnt])
-+ if (!dquot[cnt])
- continue;
-- dquot_incr_inodes(inode->i_dquot[cnt], 1);
-+ dquot_incr_inodes(dquot[cnt], 1);
- }
-
- warn_put_all:
- spin_unlock(&dq_data_lock);
-+ spin_unlock(&((struct inode *)inode)->i_lock);
-+
- if (ret == 0)
-- mark_all_dquot_dirty(inode->i_dquot);
-- flush_warnings(inode->i_dquot, warntype);
-- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ mark_all_dquot_dirty(dquot);
-+ flush_warnings(dquot, warntype);
-+ dqput_all(dquot);
- return ret;
- }
- EXPORT_SYMBOL(dquot_alloc_inode);
-@@ -1639,6 +1651,7 @@ EXPORT_SYMBOL(dquot_alloc_inode);
- */
- int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
- {
-+ struct dquot *dquot[MAXQUOTAS] = { NULL };
- int cnt;
-
- if (!dquot_active(inode)) {
-@@ -1646,19 +1659,23 @@ int dquot_claim_space_nodirty(struct ino
- return 0;
- }
-
-- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_lock(&inode->i_lock);
- spin_lock(&dq_data_lock);
- /* Claim reserved quotas to allocated quotas */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-- if (inode->i_dquot[cnt])
-- dquot_claim_reserved_space(inode->i_dquot[cnt],
-- number);
-+ dquot[cnt] = inode->i_dquot[cnt];
-+ if (dquot[cnt]) {
-+ atomic_inc(&dquot[cnt]->dq_count);
-+ dquot_claim_reserved_space(dquot[cnt], number);
-+ }
- }
- /* Update inode bytes */
-- inode_claim_rsv_space(inode, number);
-+ __inode_claim_rsv_space(inode, number);
- spin_unlock(&dq_data_lock);
-- mark_all_dquot_dirty(inode->i_dquot);
-- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_unlock(&inode->i_lock);
-+
-+ mark_all_dquot_dirty(dquot);
-+ dqput_all(dquot);
- return 0;
- }
- EXPORT_SYMBOL(dquot_claim_space_nodirty);
-@@ -1671,6 +1688,7 @@ void __dquot_free_space(struct inode *in
- unsigned int cnt;
- char warntype[MAXQUOTAS];
- int reserve = flags & DQUOT_SPACE_RESERVE;
-+ struct dquot *dquot[MAXQUOTAS] = { NULL };
-
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
-@@ -1679,26 +1697,29 @@ void __dquot_free_space(struct inode *in
- return;
- }
-
-- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_lock(&inode->i_lock);
- spin_lock(&dq_data_lock);
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-- if (!inode->i_dquot[cnt])
-+ dquot[cnt] = inode->i_dquot[cnt];
-+ if (!dquot[cnt])
- continue;
-- warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
-+ atomic_inc(&dquot[cnt]->dq_count);
-+ warntype[cnt] = info_bdq_free(dquot[cnt], number);
- if (reserve)
-- dquot_free_reserved_space(inode->i_dquot[cnt], number);
-+ dquot_free_reserved_space(dquot[cnt], number);
- else
-- dquot_decr_space(inode->i_dquot[cnt], number);
-+ dquot_decr_space(dquot[cnt], number);
- }
- inode_decr_space(inode, number, reserve);
- spin_unlock(&dq_data_lock);
-+ spin_unlock(&inode->i_lock);
-
- if (reserve)
- goto out_unlock;
-- mark_all_dquot_dirty(inode->i_dquot);
-+ mark_all_dquot_dirty(dquot);
- out_unlock:
-- flush_warnings(inode->i_dquot, warntype);
-- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ flush_warnings(dquot, warntype);
-+ dqput_all(dquot);
- }
- EXPORT_SYMBOL(__dquot_free_space);
-
-@@ -1707,26 +1728,31 @@ EXPORT_SYMBOL(__dquot_free_space);
- */
- void dquot_free_inode(const struct inode *inode)
- {
-- unsigned int cnt;
-+ struct dquot *dquot[MAXQUOTAS] = { NULL };
- char warntype[MAXQUOTAS];
-+ unsigned int cnt;
-
- /* First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex */
- if (!dquot_active(inode))
- return;
-
-- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_lock(&((struct inode *)inode)->i_lock);
- spin_lock(&dq_data_lock);
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-- if (!inode->i_dquot[cnt])
-+ dquot[cnt] = inode->i_dquot[cnt];
-+ if (!dquot[cnt])
- continue;
-- warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
-- dquot_decr_inodes(inode->i_dquot[cnt], 1);
-+ atomic_inc(&dquot[cnt]->dq_count);
-+ warntype[cnt] = info_idq_free(dquot[cnt], 1);
-+ dquot_decr_inodes(dquot[cnt], 1);
- }
- spin_unlock(&dq_data_lock);
-- mark_all_dquot_dirty(inode->i_dquot);
-- flush_warnings(inode->i_dquot, warntype);
-- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_unlock(&((struct inode *)inode)->i_lock);
-+
-+ mark_all_dquot_dirty(dquot);
-+ flush_warnings(dquot, warntype);
-+ dqput_all(dquot);
- }
- EXPORT_SYMBOL(dquot_free_inode);
-
-@@ -1757,13 +1783,13 @@ int __dquot_transfer(struct inode *inode
- /* Initialize the arrays */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- warntype_to[cnt] = QUOTA_NL_NOWARN;
-- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_lock(&inode->i_lock);
- if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
-- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_unlock(&inode->i_lock);
- return 0;
- }
- spin_lock(&dq_data_lock);
-- cur_space = inode_get_bytes(inode);
-+ cur_space = __inode_get_bytes(inode);
- rsv_space = inode_get_rsv_space(inode);
- space = cur_space + rsv_space;
- /* Build the transfer_from list and check the limits */
-@@ -1811,7 +1837,7 @@ int __dquot_transfer(struct inode *inode
- inode->i_dquot[cnt] = transfer_to[cnt];
- }
- spin_unlock(&dq_data_lock);
-- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_unlock(&inode->i_lock);
-
- mark_all_dquot_dirty(transfer_from);
- mark_all_dquot_dirty(transfer_to);
-@@ -1825,7 +1851,7 @@ int __dquot_transfer(struct inode *inode
- return 0;
- over_quota:
- spin_unlock(&dq_data_lock);
-- up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-+ spin_unlock(&inode->i_lock);
- flush_warnings(transfer_to, warntype_to);
- return ret;
- }
-diff -urp linux-3.0.61-0.orig/fs/quota/quota.c linux-3.0.61-0/fs/quota/quota.c
---- linux-3.0.61-0.orig/fs/quota/quota.c 2013-04-10 15:15:08.000000000 -0400
-+++ linux-3.0.61-0/fs/quota/quota.c 2013-04-24 10:27:22.000000000 -0400
-@@ -79,13 +79,13 @@ static int quota_getfmt(struct super_blo
- {
- __u32 fmt;
-
-- down_read(&sb_dqopt(sb)->dqptr_sem);
-+ mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!sb_has_quota_active(sb, type)) {
-- up_read(&sb_dqopt(sb)->dqptr_sem);
-+ mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- return -ESRCH;
- }
- fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
-- up_read(&sb_dqopt(sb)->dqptr_sem);
-+ mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- if (copy_to_user(addr, &fmt, sizeof(fmt)))
- return -EFAULT;
- return 0;
-diff -urp linux-3.0.61-0.orig/fs/stat.c linux-3.0.61-0/fs/stat.c
---- linux-3.0.61-0.orig/fs/stat.c 2013-04-10 15:15:08.000000000 -0400
-+++ linux-3.0.61-0/fs/stat.c 2013-04-24 10:27:22.000000000 -0400
-@@ -435,9 +435,8 @@ void inode_add_bytes(struct inode *inode
-
- EXPORT_SYMBOL(inode_add_bytes);
-
--void inode_sub_bytes(struct inode *inode, loff_t bytes)
-+void __inode_sub_bytes(struct inode *inode, loff_t bytes)
- {
-- spin_lock(&inode->i_lock);
- inode->i_blocks -= bytes >> 9;
- bytes &= 511;
- if (inode->i_bytes < bytes) {
-@@ -445,17 +444,28 @@ void inode_sub_bytes(struct inode *inode
- inode->i_bytes += 512;
- }
- inode->i_bytes -= bytes;
-+}
-+
-+void inode_sub_bytes(struct inode *inode, loff_t bytes)
-+{
-+ spin_lock(&inode->i_lock);
-+ __inode_sub_bytes(inode, bytes);
- spin_unlock(&inode->i_lock);
- }
-
- EXPORT_SYMBOL(inode_sub_bytes);
-
-+loff_t __inode_get_bytes(struct inode *inode)
-+{
-+ return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
-+}
-+
- loff_t inode_get_bytes(struct inode *inode)
- {
- loff_t ret;
-
- spin_lock(&inode->i_lock);
-- ret = (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
-+ ret = __inode_get_bytes(inode);
- spin_unlock(&inode->i_lock);
- return ret;
- }
-diff -urp linux-3.0.61-0.orig/fs/super.c linux-3.0.61-0/fs/super.c
---- linux-3.0.61-0.orig/fs/super.c 2013-04-10 15:15:08.000000000 -0400
-+++ linux-3.0.61-0/fs/super.c 2013-04-24 10:27:22.000000000 -0400
-@@ -108,7 +108,6 @@ static struct super_block *alloc_super(s
- lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
- mutex_init(&s->s_dquot.dqio_mutex);
- mutex_init(&s->s_dquot.dqonoff_mutex);
-- init_rwsem(&s->s_dquot.dqptr_sem);
- init_waitqueue_head(&s->s_wait_unfrozen);
- s->s_maxbytes = MAX_NON_LFS;
- s->s_op = &default_op;
-diff -urp linux-3.0.61-0.orig/include/linux/fs.h linux-3.0.61-0/include/linux/fs.h
---- linux-3.0.61-0.orig/include/linux/fs.h 2013-04-24 10:27:55.000000000 -0400
-+++ linux-3.0.61-0/include/linux/fs.h 2013-04-22 17:42:39.000000000 -0400
-@@ -2450,7 +2450,9 @@ extern void generic_fillattr(struct inod
- extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
- void __inode_add_bytes(struct inode *inode, loff_t bytes);
- void inode_add_bytes(struct inode *inode, loff_t bytes);
-+void __inode_sub_bytes(struct inode *inode, loff_t bytes);
- void inode_sub_bytes(struct inode *inode, loff_t bytes);
-+loff_t __inode_get_bytes(struct inode *inode);
- loff_t inode_get_bytes(struct inode *inode);
- void inode_set_bytes(struct inode *inode, loff_t bytes);
-
+++ /dev/null
---- linux-3.6.0-0.3.fc.el6.x86_64/drivers/md/raid5.c.orig 2012-11-21 08:51:15.312175089 -0500
-+++ linux-3.6.0-0.3.fc.el6.x86_64/drivers/md/raid5.c 2012-11-21 09:02:38.415174560 -0500
-@@ -2394,6 +2394,8 @@ static int add_stripe_bio(struct stripe_
- bi->bi_next = *bip;
- *bip = bi;
- raid5_inc_bi_active_stripes(bi);
-+ if ((bi->bi_rw & REQ_SYNC) && !forwrite)
-+ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
-
- if (forwrite) {
- /* check if page is covered */
-@@ -4217,6 +4222,9 @@ static void make_request(struct mddev *m
-
- bio_endio(bi, 0);
- }
-+
-+ if (bi->bi_rw & REQ_SYNC)
-+ md_wakeup_thread(mddev->thread);
- }
-
- static sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks);
+++ /dev/null
---- linux-3.10.0-685.el7.x86_64/drivers/md/raid5.c.orig 2017-06-28 14:06:00.627299582 -0700
-+++ linux-3.10.0-685.el7.x86_64/drivers/md/raid5.c 2017-06-28 14:08:01.564618793 -0700
-@@ -3090,6 +3090,8 @@ static int add_stripe_bio(struct stripe_
- bi->bi_next = *bip;
- *bip = bi;
- raid5_inc_bi_active_stripes(bi);
-+ if ((bi->bi_rw & REQ_SYNC) && !forwrite)
-+ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
-
- if (forwrite) {
- /* check if page is covered */
-@@ -5538,6 +5540,9 @@ static bool raid5_make_request(struct md
- bi, 0);
- bio_endio(bi, 0);
- }
-+
-+ if (bi->bi_rw & REQ_SYNC)
-+ md_wakeup_thread(mddev->thread);
- return true;
- }
-
+++ /dev/null
-Index: linux-3.10.0-799.el7.x86_64/drivers/md/raid5.c
-===================================================================
---- linux-3.10.0-799.el7.x86_64.orig/drivers/md/raid5.c
-+++ linux-3.10.0-799.el7.x86_64/drivers/md/raid5.c
-@@ -3096,7 +3096,9 @@ static int add_stripe_bio(struct stripe_
- bi->bi_next = *bip;
- *bip = bi;
- raid5_inc_bi_active_stripes(bi);
- md_write_inc(conf->mddev, bi);
-+ if ((bi->bi_rw & REQ_SYNC) && !forwrite)
-+ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
-
- if (forwrite) {
- /* check if page is covered */
-@@ -5548,6 +5550,9 @@ static void raid5_make_request(struct md
- bi, 0);
- bio_endio(bi, 0);
- }
-+
-+ if (bi->bi_rw & REQ_SYNC)
-+ md_wakeup_thread(mddev->thread);
- }
-
- static sector_t raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks);
+++ /dev/null
-Force MD devices to pass SYNC reads directly to the disk
-instead of handling from cache. This is needed for MMP
-on MD RAID devices, and in theory could be accepted in
-the upstream kernel. Not needed for DMU.
-
-Index: linux-3.0.35/drivers/md/raid5.c
-===================================================================
---- linux-3.0.35.orig/drivers/md/raid5.c 2012-06-22 06:09:49.000000000 -0400
-+++ linux-3.0.35/drivers/md/raid5.c 2013-01-25 10:11:10.076431000 -0500
-@@ -2169,6 +2169,9 @@ static int add_stripe_bio(struct stripe_
- bi->bi_next = *bip;
- *bip = bi;
- bi->bi_phys_segments++;
-+ /* force to read from disk. */
-+ if ((bi->bi_rw & REQ_SYNC) && !forwrite)
-+ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
- spin_unlock_irq(&conf->device_lock);
- spin_unlock(&sh->lock);
-
-@@ -4097,6 +4100,9 @@ static int make_request(mddev_t *mddev,
- bio_endio(bi, 0);
- }
-
-+ if (bi->bi_rw & REQ_SYNC)
-+ md_wakeup_thread(mddev->thread);
-+
- return 0;
- }
-
+++ /dev/null
-Index: linux-3.12.44-52.10/drivers/md/raid5.c
-===================================================================
---- linux-3.12.44-52.10.orig/drivers/md/raid5.c
-+++ linux-3.12.44-52.10/drivers/md/raid5.c
-@@ -2530,6 +2530,8 @@ static int add_stripe_bio(struct stripe_
- bi->bi_next = *bip;
- *bip = bi;
- raid5_inc_bi_active_stripes(bi);
-+ if ((bi->bi_rw & REQ_SYNC) && !forwrite)
-+ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
-
- if (forwrite) {
- /* check if page is covered */
-@@ -4416,6 +4418,9 @@ static void make_discard_request(struct
- md_write_end(mddev);
- bio_endio(bi, 0);
- }
-+
-+ if (bi->bi_rw & REQ_SYNC)
-+ md_wakeup_thread(mddev->thread);
- }
-
- static void make_request(struct mddev *mddev, struct bio * bi)
+++ /dev/null
-Index: linux-4.4.21-64/drivers/md/raid5.c
-===================================================================
---- linux-4.4.21-64.orig/drivers/md/raid5.c
-+++ linux-4.4.21-64/drivers/md/raid5.c
-@@ -2991,6 +2991,8 @@ static int add_stripe_bio(struct stripe_
- bi->bi_next = *bip;
- *bip = bi;
- raid5_inc_bi_active_stripes(bi);
-+ if ((bi->bi_rw & REQ_SYNC) && !forwrite)
-+ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
-
- if (forwrite) {
- /* check if page is covered */
-@@ -5136,6 +5138,9 @@ static void make_discard_request(struct
- md_write_end(mddev);
- bio_endio(bi);
- }
-+
-+ if (bi->bi_rw & REQ_SYNC)
-+ md_wakeup_thread(mddev->thread);
- }
-
- static void raid5_make_request(struct mddev *mddev, struct bio * bi)
+++ /dev/null
-Index: linux-4.4.59-1/drivers/md/raid5.c
-===================================================================
---- linux-4.4.59-1.orig/drivers/md/raid5.c
-+++ linux-4.4.59-1/drivers/md/raid5.c
-@@ -3041,6 +3041,8 @@ static int add_stripe_bio(struct stripe_
- bi->bi_next = *bip;
- *bip = bi;
- raid5_inc_bi_active_stripes(bi);
-+ if ((bi->bi_opf & REQ_SYNC) && !forwrite)
-+ clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); /* force to read from disk. */
-
- if (forwrite) {
- /* check if page is covered */
-@@ -5192,6 +5194,9 @@ static void make_discard_request(struct
- md_write_end(mddev);
- bio_endio(bi);
- }
-+
-+ if (bi->bi_opf & REQ_SYNC)
-+ md_wakeup_thread(mddev->thread);
- }
-
- static void raid5_make_request(struct mddev *mddev, struct bio * bi)
+++ /dev/null
-raid5-mmp-unplug-dev-sles11sp2.patch
-quota-replace-dqptr-sem-sles11sp2.patch
-quota-avoid-dqget-calls-sles11sp2.patch
-blkdev_tunables-3.0-sles11.patch
-bh_lru_size_increase.patch
-jbd2-log_wait_for_space-2.6-rhel6.patch
+++ /dev/null
-raid5-mmp-unplug-dev-sles11sp2.patch
-quota-replace-dqptr-sem-sles11sp2.patch
-quota-avoid-dqget-calls-sles11sp2.patch
-blkdev_tunables-3.0-sles11sp3.patch
-bh_lru_size_increase.patch
-jbd2-log_wait_for_space-2.6-rhel6.patch
+++ /dev/null
-raid5-mmp-unplug-dev-sles12.patch
-blkdev_tunables-3.7.patch
-bh_lru_size_increase.patch
+++ /dev/null
-raid5-mmp-unplug-dev-sles12sp2.patch
-blkdev_tunables-3.8-sles12.patch
+++ /dev/null
-raid5-mmp-unplug-dev-sles12sp3.patch
+++ /dev/null
-lnxmaj="3.0"
-lnxmin=".101"
-lnxrel="0.7.15"
-# use this when there is an "RPM fix" which means that the name of the
-# (source) RPM has been updated but the version of the kernel inside the
-# RPM is not also updated
-rpmfix=".1"
-
-# this is the delimeter that goes before the "smp" at the end of the version
-# defaults to empty
-FLAVOR_DELIMITER="-"
-
-KERNEL_SRPM=kernel-source-${lnxmaj}${lnxmin}-${lnxrel}${rpmfix}.src.rpm
-SERIES=3.0-sles11.series
-VERSION=$lnxmaj
-EXTRA_VERSION="${lnxmin#.}-${lnxrel}_lustre.@VERSION@"
-LUSTRE_VERSION=@VERSION@
-
-OFED_VERSION=inkernel
-
-BASE_ARCHS="i686 ppc x86_64 ia64 ppc64"
-BIGMEM_ARCHS=""
-BOOT_ARCHS=""
-JENSEN_ARCHS=""
-DEFAULT_ARCHS="i686 x86_64 ia64 ppc64"
-BIGSMP_ARCHS=""
-PSERIES64_ARCHS="ppc"
-UP_ARCHS=""
-SRC_ARCHS=""
-#RPMSMPTYPE="smp"
-
-for cc in gcc ; do
- if which $cc >/dev/null 2>/dev/null ; then
- export CC=$cc
- break
- fi
-done
+++ /dev/null
-lnxmaj="3.0"
-lnxmin=".101"
-lnxrel="0.47.71"
-# use this when there is an "RPM fix" which means that the name of the
-# (source) RPM has been updated but the version of the kernel inside the
-# RPM is not also updated
-rpmfix=".1"
-
-# this is the delimeter that goes before the "smp" at the end of the version
-# defaults to empty
-FLAVOR_DELIMITER="-"
-
-KERNEL_SRPM=kernel-source-${lnxmaj}${lnxmin}-${lnxrel}${rpmfix}.src.rpm
-SERIES=3.0-sles11sp3.series
-VERSION=$lnxmaj
-EXTRA_VERSION="${lnxmin#.}-${lnxrel}_lustre.@VERSION@"
-LUSTRE_VERSION=@VERSION@
-
-OFED_VERSION=inkernel
-
-BASE_ARCHS="i686 ppc x86_64 ia64 ppc64"
-BIGMEM_ARCHS=""
-BOOT_ARCHS=""
-JENSEN_ARCHS=""
-DEFAULT_ARCHS="i686 x86_64 ia64 ppc64"
-BIGSMP_ARCHS=""
-PSERIES64_ARCHS="ppc"
-UP_ARCHS=""
-SRC_ARCHS=""
-#RPMSMPTYPE="smp"
-
-for cc in gcc ; do
- if which $cc >/dev/null 2>/dev/null ; then
- export CC=$cc
- break
- fi
-done
+++ /dev/null
-lnxmaj="3.0"
-lnxmin=".101"
-lnxrel="107"
-# use this when there is an "RPM fix" which means that the name of the
-# (source) RPM has been updated but the version of the kernel inside the
-# RPM is not also updated
-rpmfix=".1"
-
-# this is the delimeter that goes before the "smp" at the end of the version
-# defaults to empty
-FLAVOR_DELIMITER="-"
-
-KERNEL_SRPM=kernel-source-${lnxmaj}${lnxmin}-${lnxrel}${rpmfix}.src.rpm
-SERIES=3.0-sles11sp3.series
-VERSION=$lnxmaj
-EXTRA_VERSION="${lnxmin#.}-${lnxrel}_lustre.@VERSION@"
-LUSTRE_VERSION=@VERSION@
-
-OFED_VERSION=inkernel
-
-BASE_ARCHS="i686 ppc x86_64 ia64 ppc64"
-BIGMEM_ARCHS=""
-BOOT_ARCHS=""
-JENSEN_ARCHS=""
-DEFAULT_ARCHS="i686 x86_64 ia64 ppc64"
-BIGSMP_ARCHS=""
-PSERIES64_ARCHS="ppc"
-UP_ARCHS=""
-SRC_ARCHS=""
-#RPMSMPTYPE="smp"
-
-for cc in gcc ; do
- if which $cc >/dev/null 2>/dev/null ; then
- export CC=$cc
- break
- fi
-done
+++ /dev/null
-lnxmaj="3.12"
-lnxmin=".74"
-lnxrel="60.64.40"
-# use this when there is an "RPM fix" which means that the name of the
-# (source) RPM has been updated but the version of the kernel inside the
-# RPM is not also updated
-rpmfix=".1"
-
-# this is the delimeter that goes before the "smp" at the end of the version
-# defaults to empty
-FLAVOR_DELIMITER="-"
-
-KERNEL_SRPM=kernel-source-${lnxmaj}${lnxmin}-${lnxrel}${rpmfix}.src.rpm
-SERIES=3.12-sles12.series
-LDISKFS_SERIES=3.12-sles12sp1.series
-VERSION=$lnxmaj
-EXTRA_VERSION="${lnxmin#.}-${lnxrel}_lustre.@VERSION@"
-LUSTRE_VERSION=@VERSION@
-
-OFED_VERSION=inkernel
-
-BASE_ARCHS="i686 ppc x86_64 ia64 ppc64"
-BIGMEM_ARCHS=""
-BOOT_ARCHS=""
-JENSEN_ARCHS=""
-DEFAULT_ARCHS="i686 x86_64 ia64 ppc64"
-BIGSMP_ARCHS=""
-PSERIES64_ARCHS="ppc"
-UP_ARCHS=""
-SRC_ARCHS=""
-#RPMSMPTYPE="smp"
-
-for cc in gcc ; do
- if which $cc >/dev/null 2>/dev/null ; then
- export CC=$cc
- break
- fi
-done
+++ /dev/null
-lnxmaj="4.4"
-lnxmin=".120"
-lnxrel="92.70"
-# use this when there is an "RPM fix" which means that the name of the
-# (source) RPM has been updated but the version of the kernel inside the
-# RPM is not also updated
-rpmfix=".1"
-
-# this is the delimeter that goes before the "smp" at the end of the version
-# defaults to empty
-FLAVOR_DELIMITER="-"
-
-KERNEL_SRPM=kernel-source-${lnxmaj}${lnxmin}-${lnxrel}${rpmfix}.src.rpm
-SERIES=4.4-sles12.series
-LDISKFS_SERIES=4.4-sles12sp2.series
-VERSION=$lnxmaj
-EXTRA_VERSION="${lnxmin#.}-${lnxrel}_lustre.@VERSION@"
-LUSTRE_VERSION=@VERSION@
-
-OFED_VERSION=inkernel
-
-BASE_ARCHS="i686 ppc x86_64 ia64 ppc64"
-BIGMEM_ARCHS=""
-BOOT_ARCHS=""
-JENSEN_ARCHS=""
-DEFAULT_ARCHS="i686 x86_64 ia64 ppc64"
-BIGSMP_ARCHS=""
-PSERIES64_ARCHS="ppc"
-UP_ARCHS=""
-SRC_ARCHS=""
-#RPMSMPTYPE="smp"
-
-for cc in gcc ; do
- if which $cc >/dev/null 2>/dev/null ; then
- export CC=$cc
- break
- fi
-done
+++ /dev/null
-lnxmaj="4.4"
-lnxmin=".180"
-lnxrel="94.100"
-# use this when there is an "RPM fix" which means that the name of the
-# (source) RPM has been updated but the version of the kernel inside the
-# RPM is not also updated
-rpmfix=".1"
-
-# this is the delimeter that goes before the "smp" at the end of the version
-# defaults to empty
-FLAVOR_DELIMITER="-"
-
-KERNEL_SRPM=kernel-source-${lnxmaj}${lnxmin}-${lnxrel}${rpmfix}.src.rpm
-SERIES=4.4-sles12sp3.series
-VERSION=$lnxmaj
-EXTRA_VERSION="${lnxmin#.}-${lnxrel}_lustre.@VERSION@"
-LUSTRE_VERSION=@VERSION@
-
-OFED_VERSION=inkernel
-
-BASE_ARCHS="i686 ppc x86_64 ia64 ppc64"
-BIGMEM_ARCHS=""
-BOOT_ARCHS=""
-JENSEN_ARCHS=""
-DEFAULT_ARCHS="i686 x86_64 ia64 ppc64"
-BIGSMP_ARCHS=""
-PSERIES64_ARCHS="ppc"
-UP_ARCHS=""
-SRC_ARCHS=""
-#RPMSMPTYPE="smp"
-
-for cc in gcc ; do
- if which $cc >/dev/null 2>/dev/null ; then
- export CC=$cc
- break
- fi
-done
Clients since 2.6.18 DO NOT need any patches.
PATCH SERIES FOR SERVER KERNELS:
-3.10-rhel7.5.series 3.10.0-862.14.4.el7 (RHEL 7.5)
-3.10-rhel7.6.series 3.10.0-957.27.2.el7 (RHEL 7.6)
+3.10-rhel7.6.series 3.10.0-957.27.2.el7 (RHEL 7.6)
3.10-rhel7.7.series 3.10.0-1062.18.1.el7 (RHEL 7.7)
-3.10-rhel7.8.series 3.10.0-1127.el7 (RHEL 7.8)
-4.18-rhel8.series 4.18.0-80.11.2.el8 (RHEL 8.0)
-4.18-rhel8.1.series 4.18.0-147.8.1.el8 (RHEL 8.1)
-3.0-sles11sp3.series 3.0.101-0.47.71 (SLES11 SP3)
-3.0-sles11sp3.series 3.0.101-107 (SLES11 SP4)
-3.12-sles12.series 3.12.74-60.64.40 (SLES12 SP1)
-4.4-sles12.series 4.4.120-92.70 (SLES12 SP2)
-4.4-sles12sp3.series 4.4.180-94.100 (SLES12 SP3)
-4.4-ubuntu14+16.series 4.4.0-85.108 (Ubuntu 14.04.5 LTS)
-4.4-ubuntu14+16.series 4.4.0-85.108 (Ubuntu 16.04)
+3.10-rhel7.8.series 3.10.0-1127.el7 (RHEL 7.8)
+4.18-rhel8.series 4.18.0-80.11.2.el8 (RHEL 8.0)
+4.18-rhel8.1.series 4.18.0-147.8.1.el8 (RHEL 8.1)
See lustre/ChangeLog for supported client kernel versions.