ldiskfs/kernel_patches/patches/rhel8/ext4-add-periodic-superblock-update.patch

   1 From 332f8696f462ae8affc0a44494423e4edd88bb29 Mon Sep 17 00:00:00 2001
   2 From: "Vitaliy Kuznetsov" <vk.en.mail@gmail.com>
   3 Date: Thu, 15 Jun 2023 11:17:14 +0300
   4 Subject: [PATCH] ext4: Add periodic superblock update check
   5
   6 This patch introduces a mechanism to periodically check and update
   7 the superblock within the ext4 file system. The main purpose of this
   8 patch is to keep the disk superblock up to date. The update will be
   9 performed if more than one hour has passed since the last update, and
  10 if more than 16MB of data have been written to disk.
  11
  12 This check and update is performed within the ext4_journal_commit_callback
  13 function, ensuring that the superblock is written while the disk is
  14 active, rather than based on a timer that may trigger during disk idle
  15 periods.
  16
  17 Signed-off-by: Vitaliy Kuznetsov <vk.en.mail@gmail.com>
  18 ---
  19  fs/ext4/ext4.h  |   1 +
  20  fs/ext4/super.c | 186 +++++++++++++++++++++++++++++++++++++-----------
  21  2 files changed, 146 insertions(+), 41 deletions(-)
  22
  23 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
  24 index 7c7123f265c2..44f69f5c6931 100644
  25 --- a/fs/ext4/ext4.h
  26 +++ b/fs/ext4/ext4.h
  27 @@ -1446,6 +1446,7 @@ struct ext4_sb_info {
  28         /* for write statistics */
  29         unsigned long s_sectors_written_start;
  30         u64 s_kbytes_written;
  31 +       struct work_struct s_stats_work;
  32
  33         /* the size of zero-out chunk */
  34         unsigned int s_extent_max_zeroout_kb;
  35 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
  36 index 10c45b3b7910..9d6e8b48f775 100644
  37 --- a/fs/ext4/super.c
  38 +++ b/fs/ext4/super.c
  39 @@ -64,6 +64,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit;
  40  static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  41                              unsigned long journal_devnum);
  42  static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  43 +static void ext4_update_super(struct super_block *sb, int sync);
  44  static int ext4_commit_super(struct super_block *sb, int sync);
  45  static void ext4_mark_recovery_complete(struct super_block *sb,
  46                                         struct ext4_super_block *es);
  47 @@ -469,6 +470,90 @@ static int block_device_ejected(struct super_block *sb)
  48         return bdi->dev == NULL;
  49  }
  50
  51 +static void flush_stashed_stats_work(struct work_struct *work)
  52 +{
  53 +       struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
  54 +                                               s_stats_work);
  55 +       journal_t *journal = sbi->s_journal;
  56 +       struct buffer_head *sbh = sbi->s_sbh;
  57 +       handle_t *handle;
  58 +
  59 +       /*
  60 +        * If the journal is still running, we have to write out superblock
  61 +        * through the journal to avoid collisions of other journalled sb
  62 +        * updates.
  63 +        */
  64 +       if (sb_rdonly(sbi->s_sb) && !journal)
  65 +               return;
  66 +
  67 +       handle = jbd2_journal_start(journal, 1);
  68 +       if (IS_ERR(handle))
  69 +               return;
  70 +
  71 +       if (jbd2_journal_get_write_access(handle, sbh)) {
  72 +               jbd2_journal_stop(handle);
  73 +               return;
  74 +       }
  75 +
  76 +       ext4_update_super(sbi->s_sb, 1);
  77 +       jbd2_journal_dirty_metadata(handle, sbh);
  78 +       jbd2_journal_stop(handle);
  79 +       return;
  80 +}
  81 +
  82 +#define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
  83 +#define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
  84 +
  85 +/*
  86 + * The ext4_maybe_update_superblock() function checks and updates the
  87 + * superblock if needed.
  88 + *
  89 + * This function is designed to update the on-disk superblock only under
  90 + * certain conditions to prevent excessive disk writes and unnecessary
  91 + * waking of the disk from sleep. The superblock will be updated if:
  92 + * 1. More than an hour has passed since the last superblock update, and
  93 + * 2. More than 16MB have been written since the last superblock update.
  94 + *
  95 + * @sb: The superblock
  96 + */
  97 +static void ext4_maybe_update_superblock(struct super_block *sb)
  98 +{
  99 +       struct ext4_sb_info *sbi = EXT4_SB(sb);
 100 +       struct ext4_super_block *es = sbi->s_es;
 101 +       time64_t now;
 102 +       __u64 last_update;
 103 +       __u64 lifetime_write_kbytes;
 104 +       __u64 diff_size;
 105 +
 106 +       if (sb_rdonly(sb))
 107 +               return;
 108 +
 109 +       now = ktime_get_real_seconds();
 110 +       last_update = ext4_get_tstamp(es, s_wtime);
 111 +
 112 +       if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
 113 +               return;
 114 +
 115 +       lifetime_write_kbytes = sbi->s_kbytes_written;
 116 +       if (likely(sb->s_bdev->bd_part))
 117 +               lifetime_write_kbytes = sbi->s_kbytes_written +
 118 +                       ((part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]) -
 119 +                         sbi->s_sectors_written_start) >> 1);
 120 +
 121 +
 122 +       /* Get the number of kilobytes not written to disk to account
 123 +        * for statistics and compare with a multiple of 16 MB. This
 124 +        * is used to determine when the next superblock commit should
 125 +        * occur (i.e. not more often than once per 16MB if there was
 126 +        * less written in an hour).
 127 +        */
 128 +       diff_size = lifetime_write_kbytes -
 129 +               le64_to_cpu(es->s_kbytes_written);
 130 +
 131 +       if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
 132 +               schedule_work(&EXT4_SB(sb)->s_stats_work);
 133 +}
 134 +
 135  static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 136  {
 137         struct super_block              *sb = journal->j_private;
 138 @@ -479,7 +564,7 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 139         BUG_ON(txn->t_state == T_FINISHED);
 140
 141         ext4_process_freed_data(sb, txn->t_tid);
 142 -
 143 +       ext4_maybe_update_superblock(sb);
 144         spin_lock(&sbi->s_md_lock);
 145         while (!list_empty(&txn->t_private_list)) {
 146                 jce = list_entry(txn->t_private_list.next,
 147 @@ -1023,6 +1108,7 @@ static void ext4_put_super(struct super_block *sb)
 148
 149         ext4_unregister_li_request(sb);
 150         ext4_quota_off_umount(sb);
 151 +       flush_work(&sbi->s_stats_work);
 152
 153         destroy_workqueue(sbi->rsv_conversion_wq);
 154
 155 @@ -4373,6 +4459,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 156         }
 157
 158         timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
 159 +       INIT_WORK(&sbi->s_stats_work, flush_stashed_stats_work);
 160
 161         /* Register extent status tree shrinker */
 162         if (ext4_es_register_shrinker(sbi))
 163 @@ -4985,6 +5072,53 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 164         return NULL;
 165  }
 166
 167 +static void ext4_update_super(struct super_block *sb, int sync)
 168 +{
 169 +       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 170 +       struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 171 +       if (sync)
 172 +               lock_buffer(sbh);
 173 +
 174 +       /*
 175 +        * If the file system is mounted read-only, don't update the
 176 +        * superblock write time. This avoids updating the superblock
 177 +        * write time when we are mounting the root file system
 178 +        * read/only but we need to replay the journal; at that point,
 179 +        * for people who are east of GMT and who make their clock
 180 +        * tick in localtime for Windows bug-for-bug compatibility,
 181 +        * the clock is set in the future, and this will cause e2fsck
 182 +        * to complain and force a full file system check.
 183 +        */
 184 +       if (!(sb->s_flags & SB_RDONLY))
 185 +               ext4_update_tstamp(es, s_wtime);
 186 +
 187 +       if (sb->s_bdev->bd_part) {
 188 +               es->s_kbytes_written =
 189 +                       cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
 190 +                           ((part_stat_read(sb->s_bdev->bd_part,
 191 +                                            sectors[STAT_WRITE]) -
 192 +                             EXT4_SB(sb)->s_sectors_written_start) >> 1));
 193 +       } else {
 194 +               es->s_kbytes_written =
 195 +                       cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
 196 +       }
 197 +
 198 +       if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) {
 199 +               ext4_free_blocks_count_set(es,
 200 +                       EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
 201 +                               &EXT4_SB(sb)->s_freeclusters_counter)));
 202 +       }
 203 +
 204 +       if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) {
 205 +               es->s_free_inodes_count =
 206 +                       cpu_to_le32(percpu_counter_sum_positive(
 207 +                               &EXT4_SB(sb)->s_freeinodes_counter));
 208 +       }
 209 +
 210 +       if (sync)
 211 +               unlock_buffer(sbh);
 212 +}
 213 +
 214  static int ext4_load_journal(struct super_block *sb,
 215                              struct ext4_super_block *es,
 216                              unsigned long journal_devnum)
 217 @@ -5095,7 +5229,6 @@ static int ext4_load_journal(struct super_block *sb,
 218
 219  static int ext4_commit_super(struct super_block *sb, int sync)
 220  {
 221 -       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 222         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 223         int error = 0;
 224
 225 @@ -5109,41 +5242,9 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 226          * device was hot-removed. Not much we can do but fail the I/O.
 227          */
 228         if (!buffer_mapped(sbh))
 229 -               return error;
 230 +               return -EIO;
 231
 232 -       /*
 233 -        * If the file system is mounted read-only, don't update the
 234 -        * superblock write time.  This avoids updating the superblock
 235 -        * write time when we are mounting the root file system
 236 -        * read/only but we need to replay the journal; at that point,
 237 -        * for people who are east of GMT and who make their clock
 238 -        * tick in localtime for Windows bug-for-bug compatibility,
 239 -        * the clock is set in the future, and this will cause e2fsck
 240 -        * to complain and force a full file system check.
 241 -        */
 242 -       if (!(sb->s_flags & SB_RDONLY))
 243 -               ext4_update_tstamp(es, s_wtime);
 244 -       if (sb->s_bdev->bd_part)
 245 -               es->s_kbytes_written =
 246 -                       cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
 247 -                           ((part_stat_read(sb->s_bdev->bd_part,
 248 -                                            sectors[STAT_WRITE]) -
 249 -                             EXT4_SB(sb)->s_sectors_written_start) >> 1));
 250 -       else
 251 -               es->s_kbytes_written =
 252 -                       cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
 253 -       if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
 254 -               ext4_free_blocks_count_set(es,
 255 -                       EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
 256 -                               &EXT4_SB(sb)->s_freeclusters_counter)));
 257 -       if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
 258 -               es->s_free_inodes_count =
 259 -                       cpu_to_le32(percpu_counter_sum_positive(
 260 -                               &EXT4_SB(sb)->s_freeinodes_counter));
 261 -       BUFFER_TRACE(sbh, "marking dirty");
 262 -       ext4_superblock_csum_set(sb);
 263 -       if (sync)
 264 -               lock_buffer(sbh);
 265 +       ext4_update_super(sb, sync);
 266         if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
 267                 /*
 268                  * Oh, dear.  A previous attempt to write the
 269 @@ -5153,19 +5254,19 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 270                  * be remapped.  Nothing we can do but to retry the
 271                  * write and hope for the best.
 272                  */
 273 -               ext4_msg(sb, KERN_ERR, "previous I/O error to "
 274 -                      "superblock detected");
 275 +               ext4_msg(sb, KERN_ERR,
 276 +                                "previous I/O error to superblock detected");
 277                 clear_buffer_write_io_error(sbh);
 278                 set_buffer_uptodate(sbh);
 279         }
 280 +       BUFFER_TRACE(sbh, "marking dirty");
 281         mark_buffer_dirty(sbh);
 282         if (sync) {
 283 -               unlock_buffer(sbh);
 284                 error = __sync_dirty_buffer(sbh,
 285                         REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
 286                 if (buffer_write_io_error(sbh)) {
 287 -                       ext4_msg(sb, KERN_ERR, "I/O error while writing "
 288 -                              "superblock");
 289 +                       ext4_msg(sb, KERN_ERR,
 290 +                                        "I/O error while writing superblock");
 291                         clear_buffer_write_io_error(sbh);
 292                         set_buffer_uptodate(sbh);
 293                 }
 294 @@ -5483,6 +5584,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 295                 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 296         }
 297
 298 +       /* Flush stats before changing fs state */
 299 +       flush_work(&sbi->s_stats_work);
 300 +
 301         if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
 302                 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
 303                         err = -EROFS;
 304 --