Whamcloud - gitweb
LU-14958 kernel: use rhashtable for revoke records in jbd2 30/50730/2
authorAlex Zhuravlev <bzzz@whamcloud.com>
Mon, 24 Apr 2023 19:56:51 +0000 (12:56 -0700)
committerOleg Drokin <green@whamcloud.com>
Sat, 29 Apr 2023 01:47:25 +0000 (01:47 +0000)
resizable hashtable should improve journal replay time when
the latter has got million of revoke records. notice that
rhashtable is used during replay only as removal with list_del()
is less expensive and it's used a lot during regular processing.

before:
1048576 records - 95 seconds
2097152 records - 580 seconds

after:
1048576 records - 2 seconds
2097152 records - 3 seconds
4194304 records - 7 seconds

Lustre-change: https://review.whamcloud.com/45122
Lustre-commit: c3bb2b778d6b40a5cecb01993b55fcc107305b4a

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I9a9e3801223fa9e36cbf6d2ef5ddbad5dff3e19d
Reviewed-by: jsimmons <jsimmons@infradead.org>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50730
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
16 files changed:
lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel7.patch [new file with mode: 0644]
lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel8.4.patch [new file with mode: 0644]
lustre/kernel_patches/series/3.10-rhel7.6.series
lustre/kernel_patches/series/3.10-rhel7.7.series
lustre/kernel_patches/series/3.10-rhel7.8.series
lustre/kernel_patches/series/3.10-rhel7.9.series
lustre/kernel_patches/series/4.14-rhel7.5.series
lustre/kernel_patches/series/4.14-rhel7.6.series
lustre/kernel_patches/series/4.18-rhel8.1.series
lustre/kernel_patches/series/4.18-rhel8.2.series
lustre/kernel_patches/series/4.18-rhel8.3.series
lustre/kernel_patches/series/4.18-rhel8.4.series
lustre/kernel_patches/series/4.18-rhel8.5.series
lustre/kernel_patches/series/4.18-rhel8.6.series
lustre/kernel_patches/series/4.18-rhel8.7.series
lustre/kernel_patches/series/4.18-rhel8.series

diff --git a/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel7.patch b/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel7.patch
new file mode 100644 (file)
index 0000000..3b374b4
--- /dev/null
@@ -0,0 +1,179 @@
+Index: linux-3.10.0-1160.80.1.el7/fs/jbd2/recovery.c
+===================================================================
+--- linux-3.10.0-1160.80.1.el7.orig/fs/jbd2/recovery.c
++++ linux-3.10.0-1160.80.1.el7/fs/jbd2/recovery.c
+@@ -22,6 +22,7 @@
+ #include <linux/errno.h>
+ #include <linux/crc32.h>
+ #include <linux/blkdev.h>
++#include <linux/rhashtable.h>
+ #endif
+ /*
+@@ -255,6 +256,10 @@ int jbd2_journal_recover(journal_t *jour
+       memset(&info, 0, sizeof(info));
+       sb = journal->j_superblock;
++      err = jbd2_journal_init_recovery_revoke(journal);
++      if (err)
++              return err;
++
+       /*
+        * The journal superblock's s_start field (the current log head)
+        * is always zero if, and only if, the journal was cleanly
+Index: linux-3.10.0-1160.80.1.el7/fs/jbd2/revoke.c
+===================================================================
+--- linux-3.10.0-1160.80.1.el7.orig/fs/jbd2/revoke.c
++++ linux-3.10.0-1160.80.1.el7/fs/jbd2/revoke.c
+@@ -93,6 +93,7 @@
+ #include <linux/bio.h>
+ #include <linux/log2.h>
+ #include <linux/hash.h>
++#include <linux/rhashtable.h>
+ #endif
+ static struct kmem_cache *jbd2_revoke_record_cache;
+@@ -104,7 +105,10 @@ static struct kmem_cache *jbd2_revoke_ta
+ struct jbd2_revoke_record_s
+ {
+-      struct list_head  hash;
++      union {
++              struct list_head  hash;
++              struct rhash_head linkage;
++      };
+       tid_t             sequence;     /* Used for recovery only */
+       unsigned long long        blocknr;
+ };
+@@ -701,13 +705,21 @@ static void flush_descriptor(journal_t *
+  * single block.
+  */
++static const struct rhashtable_params revoke_rhashtable_params = {
++      .key_len     = sizeof(unsigned long long),
++      .key_offset  = offsetof(struct jbd2_revoke_record_s, blocknr),
++      .head_offset = offsetof(struct jbd2_revoke_record_s, linkage),
++};
++
+ int jbd2_journal_set_revoke(journal_t *journal,
+                      unsigned long long blocknr,
+                      tid_t sequence)
+ {
+-      struct jbd2_revoke_record_s *record;
++      struct jbd2_revoke_record_s *record, *old;
++      gfp_t gfp_mask = GFP_NOFS;
+-      record = find_revoke_record(journal, blocknr);
++      record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
++                              revoke_rhashtable_params);
+       if (record) {
+               /* If we have multiple occurrences, only record the
+                * latest sequence number in the hashed record */
+@@ -715,7 +727,24 @@ int jbd2_journal_set_revoke(journal_t *j
+                       record->sequence = sequence;
+               return 0;
+       }
+-      return insert_revoke_hash(journal, blocknr, sequence);
++
++      if (journal_oom_retry)
++              gfp_mask |= __GFP_NOFAIL;
++      record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
++      if (!record)
++              return -ENOMEM;
++
++      record->sequence = sequence;
++      record->blocknr = blocknr;
++      old = rhashtable_lookup_get_insert_fast(&journal->j_revoke_rhtable,
++                               &record->linkage, revoke_rhashtable_params);
++      if (IS_ERR(old)) {
++              kmem_cache_free(jbd2_revoke_record_cache, record);
++              return PTR_ERR(old);
++      }
++      BUG_ON(old != NULL);
++
++      return 0;
+ }
+ /*
+@@ -731,7 +760,8 @@ int jbd2_journal_test_revoke(journal_t *
+ {
+       struct jbd2_revoke_record_s *record;
+-      record = find_revoke_record(journal, blocknr);
++      record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
++                              revoke_rhashtable_params);
+       if (!record)
+               return 0;
+       if (tid_gt(sequence, record->sequence))
+@@ -739,6 +769,17 @@ int jbd2_journal_test_revoke(journal_t *
+       return 1;
+ }
++int jbd2_journal_init_recovery_revoke(journal_t *journal)
++{
++      return rhashtable_init(&journal->j_revoke_rhtable,
++                              &revoke_rhashtable_params);
++}
++ 
++static void jbd2_revoke_record_free(void *ptr, void *arg)
++{
++      kmem_cache_free(jbd2_revoke_record_cache, ptr);
++}
++ 
+ /*
+  * Finally, once recovery is over, we need to clear the revoke table so
+  * that it can be reused by the running filesystem.
+@@ -746,19 +787,6 @@ int jbd2_journal_test_revoke(journal_t *
+ void jbd2_journal_clear_revoke(journal_t *journal)
+ {
+-      int i;
+-      struct list_head *hash_list;
+-      struct jbd2_revoke_record_s *record;
+-      struct jbd2_revoke_table_s *revoke;
+-
+-      revoke = journal->j_revoke;
+-
+-      for (i = 0; i < revoke->hash_size; i++) {
+-              hash_list = &revoke->hash_table[i];
+-              while (!list_empty(hash_list)) {
+-                      record = (struct jbd2_revoke_record_s*) hash_list->next;
+-                      list_del(&record->hash);
+-                      kmem_cache_free(jbd2_revoke_record_cache, record);
+-              }
+-      }
++      rhashtable_free_and_destroy(&journal->j_revoke_rhtable,
++                                      jbd2_revoke_record_free, NULL);
+ }
+Index: linux-3.10.0-1160.80.1.el7/include/linux/jbd2.h
+===================================================================
+--- linux-3.10.0-1160.80.1.el7.orig/include/linux/jbd2.h
++++ linux-3.10.0-1160.80.1.el7/include/linux/jbd2.h
+@@ -31,6 +31,7 @@
+ #include <linux/timer.h>
+ #include <linux/slab.h>
+ #include <crypto/hash.h>
++#include <linux/rhashtable.h>
+ #endif
+ #define journal_oom_retry 1
+@@ -944,6 +945,11 @@ struct journal_s
+       struct jbd2_revoke_table_s *j_revoke_table[2];
+       /*
++       * rhashtable for revoke records during recovery
++       */
++      struct rhashtable       j_revoke_rhtable;
++
++      /*
+        * array of bhs for jbd2_journal_commit_transaction
+        */
+       struct buffer_head      **j_wbuf;
+@@ -1231,6 +1237,7 @@ extern void         jbd2_journal_write_revoke
+ /* Recovery revoke support */
+ extern int    jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
+ extern int    jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
++extern int    jbd2_journal_init_recovery_revoke(journal_t *);
+ extern void   jbd2_journal_clear_revoke(journal_t *);
+ extern void   jbd2_journal_switch_revoke_table(journal_t *journal);
+ extern void   jbd2_clear_buffer_revoked_flags(journal_t *journal);
diff --git a/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel8.4.patch b/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel8.4.patch
new file mode 100644 (file)
index 0000000..d574ac6
--- /dev/null
@@ -0,0 +1,171 @@
+Index: linux-4.18.0-425.3.1.el8/fs/jbd2/recovery.c
+===================================================================
+--- linux-4.18.0-425.3.1.el8.orig/fs/jbd2/recovery.c
++++ linux-4.18.0-425.3.1.el8/fs/jbd2/recovery.c
+@@ -19,6 +19,7 @@
+ #include <linux/errno.h>
+ #include <linux/crc32.h>
+ #include <linux/blkdev.h>
++#include <linux/rhashtable.h>
+ #endif
+ /*
+@@ -251,6 +252,10 @@ int jbd2_journal_recover(journal_t *jour
+       memset(&info, 0, sizeof(info));
+       sb = journal->j_superblock;
++      err = jbd2_journal_init_recovery_revoke(journal);
++      if (err)
++              return err;
++
+       /*
+        * The journal superblock's s_start field (the current log head)
+        * is always zero if, and only if, the journal was cleanly
+Index: linux-4.18.0-425.3.1.el8/fs/jbd2/revoke.c
+===================================================================
+--- linux-4.18.0-425.3.1.el8.orig/fs/jbd2/revoke.c
++++ linux-4.18.0-425.3.1.el8/fs/jbd2/revoke.c
+@@ -90,6 +90,7 @@
+ #include <linux/bio.h>
+ #include <linux/log2.h>
+ #include <linux/hash.h>
++#include <linux/rhashtable.h>
+ #endif
+ static struct kmem_cache *jbd2_revoke_record_cache;
+@@ -101,7 +102,10 @@ static struct kmem_cache *jbd2_revoke_ta
+ struct jbd2_revoke_record_s
+ {
+-      struct list_head  hash;
++      union {
++              struct list_head  hash;
++              struct rhash_head linkage;
++      };
+       tid_t             sequence;     /* Used for recovery only */
+       unsigned long long        blocknr;
+ };
+@@ -680,13 +684,21 @@ static void flush_descriptor(journal_t *
+  * single block.
+  */
++static const struct rhashtable_params revoke_rhashtable_params = {
++      .key_len     = sizeof(unsigned long long),
++      .key_offset  = offsetof(struct jbd2_revoke_record_s, blocknr),
++      .head_offset = offsetof(struct jbd2_revoke_record_s, linkage),
++};
++
+ int jbd2_journal_set_revoke(journal_t *journal,
+                      unsigned long long blocknr,
+                      tid_t sequence)
+ {
+-      struct jbd2_revoke_record_s *record;
++      struct jbd2_revoke_record_s *record, *old;
++      gfp_t gfp_mask = GFP_NOFS;
+-      record = find_revoke_record(journal, blocknr);
++      record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
++                              revoke_rhashtable_params);
+       if (record) {
+               /* If we have multiple occurrences, only record the
+                * latest sequence number in the hashed record */
+@@ -694,7 +706,24 @@ int jbd2_journal_set_revoke(journal_t *j
+                       record->sequence = sequence;
+               return 0;
+       }
+-      return insert_revoke_hash(journal, blocknr, sequence);
++
++      if (journal_oom_retry)
++              gfp_mask |= __GFP_NOFAIL;
++      record = kmem_cache_alloc(jbd2_revoke_record_cache, gfp_mask);
++      if (!record)
++              return -ENOMEM;
++
++      record->sequence = sequence;
++      record->blocknr = blocknr;
++      old = rhashtable_lookup_get_insert_fast(&journal->j_revoke_rhtable,
++                               &record->linkage, revoke_rhashtable_params);
++      if (IS_ERR(old)) {
++              kmem_cache_free(jbd2_revoke_record_cache, record);
++              return PTR_ERR(old);
++      }
++      BUG_ON(old != NULL);
++
++      return 0;
+ }
+ /*
+@@ -710,7 +739,8 @@ int jbd2_journal_test_revoke(journal_t *
+ {
+       struct jbd2_revoke_record_s *record;
+-      record = find_revoke_record(journal, blocknr);
++      record = rhashtable_lookup(&journal->j_revoke_rhtable, &blocknr,
++                              revoke_rhashtable_params);
+       if (!record)
+               return 0;
+       if (tid_gt(sequence, record->sequence))
+@@ -718,6 +748,17 @@ int jbd2_journal_test_revoke(journal_t *
+       return 1;
+ }
++int jbd2_journal_init_recovery_revoke(journal_t *journal)
++{
++      return rhashtable_init(&journal->j_revoke_rhtable,
++                              &revoke_rhashtable_params);
++}
++ 
++static void jbd2_revoke_record_free(void *ptr, void *arg)
++{
++      kmem_cache_free(jbd2_revoke_record_cache, ptr);
++}
++ 
+ /*
+  * Finally, once recovery is over, we need to clear the revoke table so
+  * that it can be reused by the running filesystem.
+@@ -725,19 +766,6 @@ int jbd2_journal_test_revoke(journal_t *
+ void jbd2_journal_clear_revoke(journal_t *journal)
+ {
+-      int i;
+-      struct list_head *hash_list;
+-      struct jbd2_revoke_record_s *record;
+-      struct jbd2_revoke_table_s *revoke;
+-
+-      revoke = journal->j_revoke;
+-
+-      for (i = 0; i < revoke->hash_size; i++) {
+-              hash_list = &revoke->hash_table[i];
+-              while (!list_empty(hash_list)) {
+-                      record = (struct jbd2_revoke_record_s*) hash_list->next;
+-                      list_del(&record->hash);
+-                      kmem_cache_free(jbd2_revoke_record_cache, record);
+-              }
+-      }
++      rhashtable_free_and_destroy(&journal->j_revoke_rhtable,
++                                      jbd2_revoke_record_free, NULL);
+ }
+Index: linux-4.18.0-425.3.1.el8/include/linux/jbd2.h
+===================================================================
+--- linux-4.18.0-425.3.1.el8.orig/include/linux/jbd2.h
++++ linux-4.18.0-425.3.1.el8/include/linux/jbd2.h
+@@ -1084,6 +1084,11 @@ struct journal_s
+       struct jbd2_revoke_table_s *j_revoke_table[2];
+       /**
++       * @j_revoke_rhtable:   rhashtable for revoke records during recovery
++       */
++      struct rhashtable       j_revoke_rhtable;
++
++      /**
+        * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction.
+        */
+       struct buffer_head      **j_wbuf;
+@@ -1508,6 +1513,7 @@ extern void         jbd2_journal_write_revoke
+ /* Recovery revoke support */
+ extern int    jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
+ extern int    jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
++extern int    jbd2_journal_init_recovery_revoke(journal_t *);
+ extern void   jbd2_journal_clear_revoke(journal_t *);
+ extern void   jbd2_journal_switch_revoke_table(journal_t *journal);
+ extern void   jbd2_clear_buffer_revoked_flags(journal_t *journal);
index e1db1d4..7d36af7 100644 (file)
@@ -3,6 +3,7 @@ blkdev_tunables-3.9.patch
 vfs-project-quotas-rhel7.patch
 fix-integrity-verify-rhel7.patch
 fix-sd-dif-complete-rhel7.patch
+jbd2-revoke-rhashtable-rhel7.patch
 block-integrity-allow-optional-integrity-functions-rhel7.patch
 block-pass-bio-into-integrity_processing_fn-rhel7.patch
 dm-fix-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch
index 7788a59..8797e3c 100644 (file)
@@ -3,6 +3,7 @@ blkdev_tunables-3.9.patch
 vfs-project-quotas-rhel7.patch
 fix-integrity-verify-rhel7.patch
 fix-sd-dif-complete-rhel7.patch
+jbd2-revoke-rhashtable-rhel7.patch
 block-integrity-allow-optional-integrity-functions-rhel7.patch
 block-pass-bio-into-integrity_processing_fn-rhel7.patch
 block-Ensure-we-only-enable-integrity-metadata-for-reads-and-writes-rhel7.patch
index 059dd33..4bf1d23 100644 (file)
@@ -1,6 +1,7 @@
 vfs-project-quotas-rhel7.patch
 fix-integrity-verify-rhel7.patch
 fix-sd-dif-complete-rhel7.patch
+jbd2-revoke-rhashtable-rhel7.patch
 block-integrity-allow-optional-integrity-functions-rhel7.patch
 block-pass-bio-into-integrity_processing_fn-rhel7.patch
 block-Ensure-we-only-enable-integrity-metadata-for-reads-and-writes-rhel7.patch
index f9577dc..7f952f0 100644 (file)
@@ -1,5 +1,6 @@
 vfs-project-quotas-rhel7.patch
 fix-integrity-verify-rhel7.patch
+jbd2-revoke-rhashtable-rhel7.patch
 block-integrity-allow-optional-integrity-functions-rhel7.patch
 block-pass-bio-into-integrity_processing_fn-rhel7.patch
 block-Ensure-we-only-enable-integrity-metadata-for-reads-and-writes-rhel7.patch
index 67fc309..9f9cce9 100644 (file)
@@ -1,3 +1,4 @@
 block-bio-integrity-Advance-seed-correctly-for-large.patch
 block-integrity-allow-optional-integrity-functions-rhel8.patch
 block-pass-bio-into-integrity_processing_fn-rhel8.patch
+jbd2-revoke-rhashtable-rhel8.4.patch
index 67fc309..9f9cce9 100644 (file)
@@ -1,3 +1,4 @@
 block-bio-integrity-Advance-seed-correctly-for-large.patch
 block-integrity-allow-optional-integrity-functions-rhel8.patch
 block-pass-bio-into-integrity_processing_fn-rhel8.patch
+jbd2-revoke-rhashtable-rhel8.4.patch
index 7c2908c..0719776 100644 (file)
@@ -1,3 +1,4 @@
 block-bio-integrity-Advance-seed-correctly-for-large.patch
 block-integrity-allow-optional-integrity-functions-rhel8.3.patch
 block-pass-bio-into-integrity_processing_fn-rhel8.patch
+jbd2-revoke-rhashtable-rhel8.4.patch
index 7c2908c..0719776 100644 (file)
@@ -1,3 +1,4 @@
 block-bio-integrity-Advance-seed-correctly-for-large.patch
 block-integrity-allow-optional-integrity-functions-rhel8.3.patch
 block-pass-bio-into-integrity_processing_fn-rhel8.patch
+jbd2-revoke-rhashtable-rhel8.4.patch
index 7c2908c..0719776 100644 (file)
@@ -1,3 +1,4 @@
 block-bio-integrity-Advance-seed-correctly-for-large.patch
 block-integrity-allow-optional-integrity-functions-rhel8.3.patch
 block-pass-bio-into-integrity_processing_fn-rhel8.patch
+jbd2-revoke-rhashtable-rhel8.4.patch
index 7c2908c..0719776 100644 (file)
@@ -1,3 +1,4 @@
 block-bio-integrity-Advance-seed-correctly-for-large.patch
 block-integrity-allow-optional-integrity-functions-rhel8.3.patch
 block-pass-bio-into-integrity_processing_fn-rhel8.patch
+jbd2-revoke-rhashtable-rhel8.4.patch
index ccf4b53..4b95dad 100644 (file)
@@ -1,2 +1,3 @@
 block-integrity-allow-optional-integrity-functions-rhel8.3.patch
 block-pass-bio-into-integrity_processing_fn-rhel8.patch
+jbd2-revoke-rhashtable-rhel8.4.patch
index 67fc309..9f9cce9 100644 (file)
@@ -1,3 +1,4 @@
 block-bio-integrity-Advance-seed-correctly-for-large.patch
 block-integrity-allow-optional-integrity-functions-rhel8.patch
 block-pass-bio-into-integrity_processing_fn-rhel8.patch
+jbd2-revoke-rhashtable-rhel8.4.patch