Whamcloud - gitweb
LU-12353 ldiskfs: speedup quota journalling
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel7.6 / ext4-limit-number-of-scanned-extents-in-status-tree-.patch
1 From b72242d714ac3968bbb25867718e731be217e87b Mon Sep 17 00:00:00 2001
2 From: Jan Kara <jack@suse.cz>
3 Date: Tue, 25 Nov 2014 11:51:23 -0500
4 Subject: [PATCH 5/7] ext4: limit number of scanned extents in status tree
5  shrinker
6
7 Currently we scan extent status trees of inodes until we reclaim nr_to_scan
8 extents. This can however require a lot of scanning when there are lots
9 of delayed extents (as those cannot be reclaimed).
10
11 Change shrinker to work as shrinkers are supposed to and *scan* only
12 nr_to_scan extents regardless of how many extents did we actually
13 reclaim. We however need to be careful and avoid scanning each status
14 tree from the beginning - that could lead to a situation where we would
15 not be able to reclaim anything at all when first nr_to_scan extents in
16 the tree are always unreclaimable. We remember with each inode offset
17 where we stopped scanning and continue from there when we next come
18 across the inode.
19
20 Note that we also need to update places calling __es_shrink() manually
21 to pass reasonable nr_to_scan to have a chance of reclaiming anything and
22 not just 1.
23
24 Signed-off-by: Jan Kara <jack@suse.cz>
25 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
26 ---
27  fs/ext4/ext4.h           |  5 ++-
28  fs/ext4/extents_status.c | 91 ++++++++++++++++++++++++++--------------
29  fs/ext4/super.c          |  1 +
30  3 files changed, 65 insertions(+), 32 deletions(-)
31
32 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
33 index 0813afd6..2893a168 100644
34 --- a/fs/ext4/ext4.h
35 +++ b/fs/ext4/ext4.h
36 @@ -1020,6 +1020,9 @@ struct ext4_inode_info {
37         struct list_head i_es_list;
38         unsigned int i_es_all_nr;       /* protected by i_es_lock */
39         unsigned int i_es_shk_nr;       /* protected by i_es_lock */
40 +       ext4_lblk_t i_es_shrink_lblk;   /* Offset where we start searching for
41 +                                          extents to shrink. Protected by
42 +                                          i_es_lock  */
43  
44         /* ialloc */
45         ext4_group_t    i_last_alloc_group;
46 @@ -1481,7 +1484,7 @@ struct ext4_sb_info {
47  
48         /* Reclaim extents from extent status tree */
49         struct shrinker s_es_shrinker;
50 -       struct list_head s_es_list;
51 +       struct list_head s_es_list;     /* List of inodes with reclaimable extents */
52         long s_es_nr_inode;
53         struct ext4_es_stats s_es_stats;
54         spinlock_t s_es_lock ____cacheline_aligned_in_smp;
55 diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
56 index edd49793..b78eec2a 100644
57 --- a/fs/ext4/extents_status.c
58 +++ b/fs/ext4/extents_status.c
59 @@ -147,8 +147,7 @@ static struct kmem_cache *ext4_es_cachep;
60  static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
61  static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
62                               ext4_lblk_t end);
63 -static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
64 -                                      int nr_to_scan);
65 +static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
66  static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
67                        struct ext4_inode_info *locked_ei);
68  
69 @@ -726,7 +725,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
70  retry:
71         err = __es_insert_extent(inode, &newes);
72         if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
73 -                                         1, EXT4_I(inode)))
74 +                                         128, EXT4_I(inode)))
75                 goto retry;
76         if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
77                 err = 0;
78 @@ -884,7 +883,7 @@ retry:
79                                 es->es_len = orig_es.es_len;
80                                 if ((err == -ENOMEM) &&
81                                     __es_shrink(EXT4_SB(inode->i_sb),
82 -                                                       1, EXT4_I(inode)))
83 +                                                       128, EXT4_I(inode)))
84                                         goto retry;
85                                 goto out;
86                         }
87 @@ -976,7 +975,7 @@ static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
88         ktime_t start_time;
89         u64 scan_time;
90         int nr_to_walk;
91 -       int ret, nr_shrunk = 0;
92 +       int nr_shrunk = 0;
93         int retried = 0, nr_skipped = 0;
94  
95         es_stats = &sbi->s_es_stats;
96 @@ -994,7 +993,7 @@ retry:
97                 ei = list_first_entry(&sbi->s_es_list, struct ext4_inode_info,
98                                       i_es_list);
99                 /* Move the inode to the tail */
100 -               list_move(&ei->i_es_list, sbi->s_es_list.prev);
101 +               list_move_tail(&ei->i_es_list, &sbi->s_es_list);
102                 /*
103                  * Normally we try hard to avoid shrinking precached inodes,
104                  * but we will as a last resort.
105 @@ -1015,12 +1014,10 @@ retry:
106                  */
107                 spin_unlock(&sbi->s_es_lock);
108  
109 -               ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
110 +               nr_shrunk += es_reclaim_extents(ei, &nr_to_scan);
111                 write_unlock(&ei->i_es_lock);
112  
113 -               nr_shrunk += ret;
114 -               nr_to_scan -= ret;
115 -               if (nr_to_scan == 0)
116 +               if (nr_to_scan <= 0)
117                         goto out;
118                 spin_lock(&sbi->s_es_lock);
119         }
120 @@ -1037,7 +1034,7 @@ retry:
121         }
122  
123         if (locked_ei && nr_shrunk == 0)
124 -               nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
125 +               nr_shrunk = es_reclaim_extents(locked_ei, &nr_to_scan);
126  out:
127         scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
128         if (likely(es_stats->es_stats_scan_time))
129 @@ -1213,27 +1210,32 @@ void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
130         unregister_shrinker(&sbi->s_es_shrinker);
131  }
132  
133 -static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
134 -                                      int nr_to_scan)
135 +/*
136 + * Shrink extents in given inode from ei->i_es_shrink_lblk till end. Scan at
137 + * most *nr_to_scan extents, update *nr_to_scan accordingly.
138 + *
139 + * Return 0 if we hit end of tree / interval, 1 if we exhausted nr_to_scan.
140 + * Increment *nr_shrunk by the number of reclaimed extents. Also update
141 + * ei->i_es_shrink_lblk to where we should continue scanning.
142 + */
143 +static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
144 +                                int *nr_to_scan, int *nr_shrunk)
145  {
146         struct inode *inode = &ei->vfs_inode;
147         struct ext4_es_tree *tree = &ei->i_es_tree;
148 -       struct rb_node *node;
149         struct extent_status *es;
150 -       int nr_shrunk = 0;
151 -       static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
152 -                                     DEFAULT_RATELIMIT_BURST);
153 -
154 -       if (ei->i_es_shk_nr == 0)
155 -               return 0;
156 -
157 -       if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
158 -           __ratelimit(&_rs))
159 -               ext4_warning(inode->i_sb, "forced shrink of precached extents");
160 +       struct rb_node *node;
161  
162 -       node = rb_first(&tree->root);
163 -       while (node != NULL) {
164 -               es = rb_entry(node, struct extent_status, rb_node);
165 +       es = __es_tree_search(&tree->root, ei->i_es_shrink_lblk);
166 +       if (!es)
167 +               goto out_wrap;
168 +       node = &es->rb_node;
169 +       while (*nr_to_scan > 0) {
170 +               if (es->es_lblk > end) {
171 +                       ei->i_es_shrink_lblk = end + 1;
172 +                       return 0;
173 +               }
174 +               (*nr_to_scan)--;
175                 node = rb_next(&es->rb_node);
176                 /*
177                  * We can't reclaim delayed extent from status tree because
178 @@ -1242,11 +1244,38 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
179                 if (!ext4_es_is_delayed(es)) {
180                         rb_erase(&es->rb_node, &tree->root);
181                         ext4_es_free_extent(inode, es);
182 -                       nr_shrunk++;
183 -                       if (--nr_to_scan == 0)
184 -                               break;
185 +                       (*nr_shrunk)++;
186                 }
187 +               if (!node)
188 +                       goto out_wrap;
189 +               es = rb_entry(node, struct extent_status, rb_node);
190         }
191 -       tree->cache_es = NULL;
192 +       ei->i_es_shrink_lblk = es->es_lblk;
193 +       return 1;
194 +out_wrap:
195 +       ei->i_es_shrink_lblk = 0;
196 +       return 0;
197 +}
198 +
199 +static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan)
200 +{
201 +       struct inode *inode = &ei->vfs_inode;
202 +       int nr_shrunk = 0;
203 +       ext4_lblk_t start = ei->i_es_shrink_lblk;
204 +       static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
205 +                                     DEFAULT_RATELIMIT_BURST);
206 +
207 +       if (ei->i_es_shk_nr == 0)
208 +               return 0;
209 +
210 +       if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
211 +           __ratelimit(&_rs))
212 +               ext4_warning(inode->i_sb, "forced shrink of precached extents");
213 +
214 +       if (!es_do_reclaim_extents(ei, EXT_MAX_BLOCKS, nr_to_scan, &nr_shrunk) &&
215 +           start != 0)
216 +               es_do_reclaim_extents(ei, start - 1, nr_to_scan, &nr_shrunk);
217 +
218 +       ei->i_es_tree.cache_es = NULL;
219         return nr_shrunk;
220  }
221 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
222 index 8a81fa73..d9cd4ff9 100644
223 --- a/fs/ext4/super.c
224 +++ b/fs/ext4/super.c
225 @@ -945,6 +945,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
226         INIT_LIST_HEAD(&ei->i_es_list);
227         ei->i_es_all_nr = 0;
228         ei->i_es_shk_nr = 0;
229 +       ei->i_es_shrink_lblk = 0;
230         ei->i_reserved_data_blocks = 0;
231         ei->i_reserved_meta_blocks = 0;
232         ei->i_allocated_meta_blocks = 0;
233 -- 
234 2.24.1
235