Whamcloud - gitweb
b14d5990c929dcf3e85a844dcd62803e93b4514e
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel6.6 / ext4_s_max_ext_tree_depth.patch
1 Fix ext4_ext_find_extent() to already pre-allocate ext4_ext_path[]
2 array of the max depth instead of current depth.
3 This will avoid racy cases of concurrent ext_depth() growth in
4 current and unsafe implementation with ext4_ext_path[] array
5 re-[sizing,allocation], even with more recent and related patches
6 that will be integrated in more recent Kernels.
7
8 Index: linux-stage/fs/ext4/ext4.h
9 ===================================================================
10 --- linux-stage.orig/fs/ext4/ext4.h     2016-07-15 10:55:51.000000000 +0300
11 +++ linux-stage/fs/ext4/ext4.h  2016-07-15 10:56:19.000000000 +0300
12 @@ -1153,6 +1153,9 @@ struct ext4_sb_info {
13         unsigned long s_ext_extents;
14  #endif
15  
16 +       /* maximum possible extents tree depth, to be computed at mount time */
17 +       unsigned int s_max_ext_tree_depth;
18 +
19         /* for buddy allocator */
20         struct ext4_group_info ***s_group_info;
21         struct inode *s_buddy_cache;
22 Index: linux-stage/fs/ext4/extents.c
23 ===================================================================
24 --- linux-stage.orig/fs/ext4/extents.c  2016-07-15 10:55:51.000000000 +0300
25 +++ linux-stage/fs/ext4/extents.c       2016-07-15 10:56:19.000000000 +0300
26 @@ -698,8 +698,9 @@ ext4_ext_find_extent(struct inode *inode
27  
28         /* account possible depth increase */
29         if (!path) {
30 -               path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
31 -                               GFP_NOFS);
32 +               path = kzalloc(sizeof(struct ext4_ext_path) *
33 +                              EXT4_SB(inode->i_sb)->s_max_ext_tree_depth,
34 +                              GFP_NOFS);
35                 if (!path)
36                         return ERR_PTR(-ENOMEM);
37                 alloc = 1;
38 @@ -1907,11 +1908,8 @@ static int ext4_fill_fiemap_extents(stru
39                 /* find extent for this block */
40                 down_read(&EXT4_I(inode)->i_data_sem);
41  
42 -               if (path && ext_depth(inode) != depth) {
43 -                       /* depth was changed. we have to realloc path */
44 -                       kfree(path);
45 -                       path = NULL;
46 -               }
47 +               /* path of max possible depth will be allocated during
48 +                * first pass, so its space can be re-used for each loop */
49  
50                 path = ext4_ext_find_extent(inode, block, path);
51                 if (IS_ERR(path)) {
52 @@ -2656,7 +2654,8 @@ again:
53                         path[k].p_block =
54                                 le16_to_cpu(path[k].p_hdr->eh_entries)+1;
55         } else {
56 -               path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1),
57 +               path = kzalloc(sizeof(struct ext4_ext_path) *
58 +                              EXT4_SB(inode->i_sb)->s_max_ext_tree_depth,
59                                GFP_NOFS);
60                 if (path == NULL) {
61                         ext4_journal_stop(handle);
62 @@ -2781,13 +2780,15 @@ out:
63   */
64  void ext4_ext_init(struct super_block *sb)
65  {
66 +       ext4_fsblk_t maxblocks;
67 +
68         /*
69          * possible initialization would be here
70          */
71  
72         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
73 -#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
74 -               printk(KERN_INFO "EXT4-fs: file extents enabled");
75 +               printk(KERN_INFO "EXT4-fs (%s): file extents enabled",
76 +                      sb->s_id);
77  #ifdef AGGRESSIVE_TEST
78                 printk(", aggressive tests");
79  #endif
80 @@ -2796,14 +2797,35 @@ void ext4_ext_init(struct super_block *s
81  #endif
82  #ifdef EXTENTS_STATS
83                 printk(", stats");
84 -#endif
85 -               printk("\n");
86 -#endif
87 -#ifdef EXTENTS_STATS
88                 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
89                 EXT4_SB(sb)->s_ext_min = 1 << 30;
90                 EXT4_SB(sb)->s_ext_max = 0;
91  #endif
92 +               EXT4_SB(sb)->s_max_ext_tree_depth = 1;
93 +
94 +               maxblocks = sb->s_maxbytes / sb->s_blocksize;
95 +
96 +               /* 1st/root level/node of extents tree stands in i_data and
97 +                * entries stored in tree nodes can be of type ext4_extent
98 +                * (leaf node) or ext4_extent_idx (internal node) */
99 +               maxblocks /= (sizeof(((struct ext4_inode_info *)0x0)->i_data) -
100 +                             sizeof(struct ext4_extent_header)) /
101 +                            max(sizeof(struct ext4_extent),
102 +                                sizeof(struct ext4_extent_idx));
103 +
104 +               /* compute maximum extents tree depth for a fully populated
105 +                * file of max size made of only minimal/1-block extents */
106 +               while (maxblocks > 0) {
107 +                       maxblocks /= (sb->s_blocksize -
108 +                                     sizeof(struct ext4_extent_header)) /
109 +                                    max(sizeof(struct ext4_extent),
110 +                                        sizeof(struct ext4_extent_idx));
111 +                       EXT4_SB(sb)->s_max_ext_tree_depth++;
112 +               }
113 +
114 +               printk(", maximum tree depth=%u",
115 +                      EXT4_SB(sb)->s_max_ext_tree_depth);
116 +               printk("\n");
117         }
118  }
119  
120 Index: linux-stage/fs/ext4/super.c
121 ===================================================================
122 --- linux-stage.orig/fs/ext4/super.c    2016-07-15 10:55:51.000000000 +0300
123 +++ linux-stage/fs/ext4/super.c 2016-07-15 10:56:19.000000000 +0300
124 @@ -3529,6 +3529,8 @@ static int ext4_fill_super(struct super_
125                 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
126                         goto failed_mount3;
127  
128 +       ext4_ext_init(sb); /* needed before using extent-mapped journal */
129 +
130         /*
131          * The first inode we look at is the journal inode.  Don't try
132          * root first: it may be modified in the journal!
133 @@ -3722,7 +3724,6 @@ no_journal:
134                 goto failed_mount4a;
135         }
136  
137 -       ext4_ext_init(sb);
138         err = ext4_mb_init(sb, needs_recovery);
139         if (err) {
140                 ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)",