Whamcloud - gitweb
LU-14317 ldiskfs: ‘%llu’ mismatch with type ‘long int’ on arm64
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel8 / ext4-simple-blockalloc.patch
1 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
2 index 3b9ec24..64dc5fd 100644
3 --- a/fs/ext4/ext4.h
4 +++ b/fs/ext4/ext4.h
5 @@ -1450,6 +1450,9 @@ struct ext4_sb_info {
6         unsigned int s_mb_min_to_scan;
7         unsigned int s_mb_stats;
8         unsigned int s_mb_order2_reqs;
9 +       ext4_fsblk_t s_mb_c1_blocks;
10 +       ext4_fsblk_t s_mb_c2_blocks;
11 +       ext4_fsblk_t s_mb_c3_blocks;
12         unsigned long *s_mb_prealloc_table;
13         unsigned int s_mb_group_prealloc;
14         unsigned int s_max_dir_size_kb;
15 @@ -1466,6 +1469,9 @@ struct ext4_sb_info {
16         atomic_t s_bal_goals;   /* goal hits */
17         atomic_t s_bal_breaks;  /* too long searches */
18         atomic_t s_bal_2orders; /* 2^order hits */
19 +       /* cX loop didn't find blocks */
20 +       atomic64_t s_bal_cX_failed[3];
21 +       atomic64_t s_bal_cX_skipped[3];
22         spinlock_t s_bal_lock;
23         unsigned long s_mb_buddies_generated;
24         unsigned long long s_mb_generation_time;
25 @@ -2563,6 +2569,7 @@ extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
26  /* mballoc.c */
27  extern const struct file_operations ext4_seq_prealloc_table_fops;
28  extern const struct seq_operations ext4_mb_seq_groups_ops;
29 +extern const struct file_operations ext4_mb_seq_alloc_fops;
30  extern const struct file_operations ext4_seq_mb_last_group_fops;
31  extern int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v);
32  extern long ext4_mb_stats;
33 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
34 index 15c962f..7870406 100644
35 --- a/fs/ext4/mballoc.c
36 +++ b/fs/ext4/mballoc.c
37 @@ -2104,6 +2104,20 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
38         return 0;
39  }
40  
41 +static u64 available_blocks_count(struct ext4_sb_info *sbi)
42 +{
43 +       ext4_fsblk_t resv_blocks;
44 +       u64 bfree;
45 +       struct ext4_super_block *es = sbi->s_es;
46 +
47 +       resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
48 +       bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
49 +                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
50 +
51 +       bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
52 +       return bfree - (ext4_r_blocks_count(es) + resv_blocks);
53 +}
54 +
55  static noinline_for_stack int
56  ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
57  {
58 @@ -2113,6 +2127,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
59         struct ext4_sb_info *sbi;
60         struct super_block *sb;
61         struct ext4_buddy e4b;
62 +       ext4_fsblk_t avail_blocks;
63  
64         sb = ac->ac_sb;
65         sbi = EXT4_SB(sb);
66 @@ -2165,6 +2180,21 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
67  
68         /* Let's just scan groups to find more-less suitable blocks */
69         cr = ac->ac_2order ? 0 : 1;
70 +
71 +       /* Choose what loop to pass based on disk fullness */
72 +       avail_blocks = available_blocks_count(sbi) ;
73 +
74 +       if (avail_blocks < sbi->s_mb_c3_blocks) {
75 +               cr = 3;
76 +               atomic64_inc(&sbi->s_bal_cX_skipped[2]);
77 +       } else if(avail_blocks < sbi->s_mb_c2_blocks) {
78 +               cr = 2;
79 +               atomic64_inc(&sbi->s_bal_cX_skipped[1]);
80 +       } else if(avail_blocks < sbi->s_mb_c1_blocks) {
81 +               cr = 1;
82 +               atomic64_inc(&sbi->s_bal_cX_skipped[0]);
83 +       }
84 +
85         /*
86          * cr == 0 try to get exact allocation,
87          * cr == 3  try to get anything
88 @@ -2230,6 +2260,9 @@ repeat:
89                         if (ac->ac_status != AC_STATUS_CONTINUE)
90                                 break;
91                 }
92 +               /* Processed all groups and haven't found blocks */
93 +               if (i == ngroups)
94 +                       atomic64_inc(&sbi->s_bal_cX_failed[cr]);
95         }
96  
97         if (ac->ac_b_ex.fe_len > 0 && ac->ac_status != AC_STATUS_FOUND &&
98 @@ -2510,6 +2543,93 @@ const struct file_operations ext4_seq_mb_last_group_fops = {
99         .write         = ext4_mb_last_group_write,
100  };
101  
102 +static int mb_seq_alloc_show(struct seq_file *seq, void *v)
103 +{
104 +       struct super_block *sb = seq->private;
105 +       struct ext4_sb_info *sbi = EXT4_SB(sb);
106 +
107 +       seq_printf(seq, "mballoc:\n");
108 +       seq_printf(seq, "\tblocks: %u\n", atomic_read(&sbi->s_bal_allocated));
109 +       seq_printf(seq, "\treqs: %u\n", atomic_read(&sbi->s_bal_reqs));
110 +       seq_printf(seq, "\tsuccess: %u\n", atomic_read(&sbi->s_bal_success));
111 +
112 +       seq_printf(seq, "\textents_scanned: %u\n",
113 +                  atomic_read(&sbi->s_bal_ex_scanned));
114 +       seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
115 +       seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
116 +       seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks));
117 +       seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks));
118 +
119 +       seq_printf(seq, "\tuseless_c1_loops: %llu\n",
120 +                  (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[0]));
121 +       seq_printf(seq, "\tuseless_c2_loops: %llu\n",
122 +                  (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[1]));
123 +       seq_printf(seq, "\tuseless_c3_loops: %llu\n",
124 +                  (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[2]));
125 +       seq_printf(seq, "\tskipped_c1_loops: %llu\n",
126 +                  (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[0]));
127 +       seq_printf(seq, "\tskipped_c2_loops: %llu\n",
128 +                  (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[1]));
129 +       seq_printf(seq, "\tskipped_c3_loops: %llu\n",
130 +                  (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[2]));
131 +       seq_printf(seq, "\tbuddies_generated: %lu\n",
132 +                  sbi->s_mb_buddies_generated);
133 +       seq_printf(seq, "\tbuddies_time_used: %llu\n", sbi->s_mb_generation_time);
134 +       seq_printf(seq, "\tpreallocated: %u\n",
135 +                  atomic_read(&sbi->s_mb_preallocated));
136 +       seq_printf(seq, "\tdiscarded: %u\n",
137 +                  atomic_read(&sbi->s_mb_discarded));
138 +       return 0;
139 +}
140 +
141 +static ssize_t mb_seq_alloc_write(struct file *file,
142 +                             const char __user *buf,
143 +                             size_t cnt, loff_t *pos)
144 +{
145 +       struct ext4_sb_info *sbi = EXT4_SB(PDE_DATA(file_inode(file)));
146 +
147 +       atomic_set(&sbi->s_bal_allocated, 0),
148 +       atomic_set(&sbi->s_bal_reqs, 0),
149 +       atomic_set(&sbi->s_bal_success, 0);
150 +
151 +       atomic_set(&sbi->s_bal_ex_scanned, 0),
152 +       atomic_set(&sbi->s_bal_goals, 0),
153 +       atomic_set(&sbi->s_bal_2orders, 0),
154 +       atomic_set(&sbi->s_bal_breaks, 0),
155 +       atomic_set(&sbi->s_mb_lost_chunks, 0);
156 +
157 +       atomic64_set(&sbi->s_bal_cX_failed[0], 0),
158 +       atomic64_set(&sbi->s_bal_cX_failed[1], 0),
159 +       atomic64_set(&sbi->s_bal_cX_failed[2], 0);
160 +
161 +       atomic64_set(&sbi->s_bal_cX_skipped[0], 0),
162 +       atomic64_set(&sbi->s_bal_cX_skipped[1], 0),
163 +       atomic64_set(&sbi->s_bal_cX_skipped[2], 0);
164 +
165 +
166 +       sbi->s_mb_buddies_generated = 0;
167 +       sbi->s_mb_generation_time = 0;
168 +
169 +       atomic_set(&sbi->s_mb_preallocated, 0),
170 +       atomic_set(&sbi->s_mb_discarded, 0);
171 +
172 +       return cnt;
173 +}
174 +
175 +static int mb_seq_alloc_open(struct inode *inode, struct file *file)
176 +{
177 +       return single_open(file, mb_seq_alloc_show, PDE_DATA(inode));
178 +}
179 +
180 +const struct file_operations ext4_mb_seq_alloc_fops = {
181 +       .owner          = THIS_MODULE,
182 +       .open           = mb_seq_alloc_open,
183 +       .read           = seq_read,
184 +       .llseek         = seq_lseek,
185 +       .release        = single_release,
186 +       .write          = mb_seq_alloc_write,
187 +};
188 +
189  int ext4_mb_seq_last_start_seq_show(struct seq_file *m, void *v)
190  {
191         struct ext4_sb_info *sbi = EXT4_SB(m->private);
192 @@ -2734,6 +2854,7 @@ static int ext4_groupinfo_create_slab(size_t size)
193         return 0;
194  }
195  
196 +#define THRESHOLD_BLOCKS(ts) (ext4_blocks_count(sbi->s_es) / 100 * ts)
197  int ext4_mb_init(struct super_block *sb)
198  {
199         struct ext4_sb_info *sbi = EXT4_SB(sb);
200 @@ -2787,6 +2908,9 @@ int ext4_mb_init(struct super_block *sb)
201         sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
202         sbi->s_mb_stats = MB_DEFAULT_STATS;
203         sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
204 +       sbi->s_mb_c1_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C1_THRESHOLD);
205 +       sbi->s_mb_c2_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C2_THRESHOLD);
206 +       sbi->s_mb_c3_blocks = THRESHOLD_BLOCKS(MB_DEFAULT_C3_THRESHOLD);
207         /*
208          * The default group preallocation is 512, which for 4k block
209          * sizes translates to 2 megabytes.  However for bigalloc file
210 @@ -2922,6 +3046,16 @@ int ext4_mb_release(struct super_block *sb)
211                                 atomic_read(&sbi->s_bal_allocated),
212                                 atomic_read(&sbi->s_bal_reqs),
213                                 atomic_read(&sbi->s_bal_success));
214 +               ext4_msg(sb, KERN_INFO,
215 +                       "mballoc: (%llu, %llu, %llu) useless c(0,1,2) loops",
216 +                               (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[0]),
217 +                               (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[1]),
218 +                               (unsigned long long)atomic64_read(&sbi->s_bal_cX_failed[2]));
219 +               ext4_msg(sb, KERN_INFO,
220 +                       "mballoc: (%llu, %llu, %llu) skipped c(0,1,2) loops",
221 +                               (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[0]),
222 +                               (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[1]),
223 +                               (unsigned long long)atomic64_read(&sbi->s_bal_cX_skipped[2]));
224                 ext4_msg(sb, KERN_INFO,
225                       "mballoc: %u extents scanned, %u goal hits, "
226                                 "%u 2^N hits, %u breaks, %u lost",
227 diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
228 index e00c3b7..d02daaf 100644
229 --- a/fs/ext4/mballoc.h
230 +++ b/fs/ext4/mballoc.h
231 @@ -72,6 +72,9 @@ do {                                                                  \
232   * for which requests use 2^N search using buddies
233   */
234  #define MB_DEFAULT_ORDER2_REQS         8
235 +#define MB_DEFAULT_C1_THRESHOLD                25
236 +#define MB_DEFAULT_C2_THRESHOLD                15
237 +#define MB_DEFAULT_C3_THRESHOLD                5
238  
239  /*
240   * default group prealloc size 512 blocks
241 diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
242 index 417b33a..f49821e 100644
243 --- a/fs/ext4/sysfs.c
244 +++ b/fs/ext4/sysfs.c
245 @@ -20,6 +20,9 @@
246  typedef enum {
247         attr_noop,
248         attr_delayed_allocation_blocks,
249 +       attr_mb_c1_threshold,
250 +       attr_mb_c2_threshold,
251 +       attr_mb_c3_threshold,
252         attr_session_write_kbytes,
253         attr_lifetime_write_kbytes,
254         attr_reserved_clusters,
255 @@ -134,6 +137,32 @@ static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
256                         task_pid_vnr(sbi->s_journal->j_task));
257  }
258  
259 +#define THRESHOLD_PERCENT(ts) (ts * 100 / ext4_blocks_count(sbi->s_es))
260 +
261 +static int save_threshold_percent(struct ext4_sb_info *sbi, const char *buf,
262 +                                 ext4_fsblk_t *blocks)
263 +{
264 +       unsigned long long val;
265 +
266 +       int ret;
267 +
268 +       ret = kstrtoull(skip_spaces(buf), 0, &val);
269 +       if (ret || val > 100)
270 +               return -EINVAL;
271 +
272 +       *blocks = val * ext4_blocks_count(sbi->s_es) / 100;
273 +       return 0;
274 +}
275 +
276 +static ssize_t mb_threshold_store(struct ext4_sb_info *sbi,
277 +                                 const char *buf, size_t count,
278 +                                 ext4_fsblk_t *blocks)
279 +{
280 +       int ret = save_threshold_percent(sbi, buf, blocks);
281 +
282 +       return ret ?: count;
283 +}
284 +
285  #define EXT4_ATTR(_name,_mode,_id)                                     \
286  static struct ext4_attr ext4_attr_##_name = {                          \
287         .attr = {.name = __stringify(_name), .mode = _mode },           \
288 @@ -176,6 +205,9 @@ EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444);
289  EXT4_ATTR_FUNC(session_write_kbytes, 0444);
290  EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
291  EXT4_ATTR_FUNC(reserved_clusters, 0644);
292 +EXT4_ATTR_FUNC(mb_c1_threshold, 0644);
293 +EXT4_ATTR_FUNC(mb_c2_threshold, 0644);
294 +EXT4_ATTR_FUNC(mb_c3_threshold, 0644);
295  
296  EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
297                  ext4_sb_info, s_inode_readahead_blks);
298 @@ -211,6 +243,9 @@ static struct attribute *ext4_attrs[] = {
299         ATTR_LIST(session_write_kbytes),
300         ATTR_LIST(lifetime_write_kbytes),
301         ATTR_LIST(reserved_clusters),
302 +       ATTR_LIST(mb_c1_threshold),
303 +       ATTR_LIST(mb_c2_threshold),
304 +       ATTR_LIST(mb_c3_threshold),
305         ATTR_LIST(inode_readahead_blks),
306         ATTR_LIST(inode_goal),
307         ATTR_LIST(max_dir_size),
308 @@ -294,6 +329,15 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
309                 return snprintf(buf, PAGE_SIZE, "%llu\n",
310                                 (s64) EXT4_C2B(sbi,
311                        percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
312 +       case attr_mb_c1_threshold:
313 +               return scnprintf(buf, PAGE_SIZE, "%llu\n",
314 +                                THRESHOLD_PERCENT(sbi->s_mb_c1_blocks));
315 +       case attr_mb_c2_threshold:
316 +               return scnprintf(buf, PAGE_SIZE, "%llu\n",
317 +                                THRESHOLD_PERCENT(sbi->s_mb_c2_blocks));
318 +       case attr_mb_c3_threshold:
319 +               return scnprintf(buf, PAGE_SIZE, "%llu\n",
320 +                                THRESHOLD_PERCENT(sbi->s_mb_c3_blocks));
321         case attr_session_write_kbytes:
322                 return session_write_kbytes_show(sbi, buf);
323         case attr_lifetime_write_kbytes:
324 @@ -363,6 +407,12 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
325                 return inode_readahead_blks_store(sbi, buf, len);
326         case attr_trigger_test_error:
327                 return trigger_test_error(sbi, buf, len);
328 +       case attr_mb_c1_threshold:
329 +               return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c1_blocks);
330 +       case attr_mb_c2_threshold:
331 +               return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c2_blocks);
332 +       case attr_mb_c3_threshold:
333 +               return mb_threshold_store(sbi, buf, len, &sbi->s_mb_c3_blocks);
334         }
335         return 0;
336  }
337 @@ -425,6 +475,8 @@ int ext4_register_sysfs(struct super_block *sb)
338                                 &ext4_seq_mb_last_group_fops, sb);
339                 proc_create_single_data("mb_last_start", S_IRUGO, sbi->s_proc,
340                                 ext4_mb_seq_last_start_seq_show, sb);
341 +               proc_create_data("mb_alloc_stats", S_IFREG | S_IRUGO | S_IWUSR,
342 +                                sbi->s_proc, &ext4_mb_seq_alloc_fops, sb);
343         }
344         return 0;
345  }