From cc2996585c489632fc8fb02ebf61d755a339361a Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Thu, 26 Aug 2021 19:52:27 +0300 Subject: [PATCH] LU-14958 kernel: use rhashtable for revoke records in jbd2 resizable hashtable should improve journal replay time when the latter has got million of revoke records before: 1048576 records - 95 seconds 2097152 records - 580 seconds after: 1048576 records - 2 seconds 2097152 records - 3 seconds 4194304 records - 7 seconds Signed-off-by: Alex Zhuravlev Change-Id: I9a9e3801223fa9e36cbf6d2ef5ddbad5dff3e19d --- .../patches/jbd2-revoke-rhashtable-rhel7.patch | 440 ++++++++++++++++++++ .../patches/jbd2-revoke-rhashtable-rhel8.4.patch | 456 +++++++++++++++++++++ lustre/kernel_patches/series/3.10-rhel7.6.series | 1 + lustre/kernel_patches/series/3.10-rhel7.7.series | 1 + lustre/kernel_patches/series/3.10-rhel7.8.series | 1 + lustre/kernel_patches/series/3.10-rhel7.9.series | 1 + lustre/kernel_patches/series/4.14-rhel7.5.series | 1 + lustre/kernel_patches/series/4.14-rhel7.6.series | 1 + lustre/kernel_patches/series/4.18-rhel8.1.series | 1 + lustre/kernel_patches/series/4.18-rhel8.2.series | 1 + lustre/kernel_patches/series/4.18-rhel8.3.series | 1 + lustre/kernel_patches/series/4.18-rhel8.4.series | 1 + lustre/kernel_patches/series/4.18-rhel8.5.series | 1 + lustre/kernel_patches/series/4.18-rhel8.series | 1 + 14 files changed, 908 insertions(+) create mode 100644 lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel7.patch create mode 100644 lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel8.4.patch diff --git a/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel7.patch b/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel7.patch new file mode 100644 index 0000000..b24901d --- /dev/null +++ b/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel7.patch @@ -0,0 +1,440 @@ +Index: linux-3.10.0-1160.21.1.el7/fs/jbd2/journal.c +=================================================================== +--- linux-3.10.0-1160.21.1.el7.orig/fs/jbd2/journal.c ++++ linux-3.10.0-1160.21.1.el7/fs/jbd2/journal.c +@@ -1121,7 +1121,7 @@ static journal_t * journal_init_common ( + journal->j_flags = JBD2_ABORT; + + /* Set up a default-sized revoke table for the new mount. */ +- err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); ++ err = jbd2_journal_init_revoke(journal); + if (err) { + kfree(journal); + return NULL; +Index: linux-3.10.0-1160.21.1.el7/fs/jbd2/revoke.c +=================================================================== +--- linux-3.10.0-1160.21.1.el7.orig/fs/jbd2/revoke.c ++++ linux-3.10.0-1160.21.1.el7/fs/jbd2/revoke.c +@@ -93,10 +93,10 @@ + #include + #include + #include ++#include + #endif + + static struct kmem_cache *jbd2_revoke_record_cache; +-static struct kmem_cache *jbd2_revoke_table_cache; + + /* Each revoke record represents one single revoked block. During + journal replay, this involves recording the transaction ID of the +@@ -104,23 +104,17 @@ static struct kmem_cache *jbd2_revoke_ta + + struct jbd2_revoke_record_s + { +- struct list_head hash; ++ struct rhash_head linkage; + tid_t sequence; /* Used for recovery only */ + unsigned long long blocknr; + }; + +- +-/* The revoke table is just a simple hash table of revoke records. */ +-struct jbd2_revoke_table_s +-{ +- /* It is conceivable that we might want a larger hash table +- * for recovery. Must be a power of two. */ +- int hash_size; +- int hash_shift; +- struct list_head *hash_table; ++static const struct rhashtable_params revoke_rhashtable_params = { ++ .key_len = sizeof(unsigned long long), ++ .key_offset = offsetof(struct jbd2_revoke_record_s, blocknr), ++ .head_offset = offsetof(struct jbd2_revoke_record_s, linkage), + }; + +- + #ifdef __KERNEL__ + static void write_one_revoke_record(journal_t *, transaction_t *, + struct list_head *, +@@ -129,18 +123,10 @@ static void write_one_revoke_record(jour + static void flush_descriptor(journal_t *, struct buffer_head *, int, int); + #endif + +-/* Utility functions to maintain the revoke table */ +- +-static inline int hash(journal_t *journal, unsigned long long block) +-{ +- return hash_64(block, journal->j_revoke->hash_shift); +-} +- + static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, + tid_t seq) + { +- struct list_head *hash_list; +- struct jbd2_revoke_record_s *record; ++ struct jbd2_revoke_record_s *record, *old; + + repeat: + record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS); +@@ -149,10 +135,12 @@ repeat: + + record->sequence = seq; + record->blocknr = blocknr; +- hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; +- spin_lock(&journal->j_revoke_lock); +- list_add(&record->hash, hash_list); +- spin_unlock(&journal->j_revoke_lock); ++ old = rhashtable_lookup_get_insert_fast(journal->j_revoke, ++ &record->linkage, revoke_rhashtable_params); ++ if (old) { ++ BUG_ON(record->sequence != seq); ++ kmem_cache_free(jbd2_revoke_record_cache, record); ++ } + return 0; + + oom: +@@ -168,22 +156,8 @@ oom: + static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, + unsigned long long blocknr) + { +- struct list_head *hash_list; +- struct jbd2_revoke_record_s *record; +- +- hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; +- +- spin_lock(&journal->j_revoke_lock); +- record = (struct jbd2_revoke_record_s *) hash_list->next; +- while (&(record->hash) != hash_list) { +- if (record->blocknr == blocknr) { +- spin_unlock(&journal->j_revoke_lock); +- return record; +- } +- record = (struct jbd2_revoke_record_s *) record->hash.next; +- } +- spin_unlock(&journal->j_revoke_lock); +- return NULL; ++ return rhashtable_lookup_fast(journal->j_revoke, &blocknr, ++ revoke_rhashtable_params); + } + + void jbd2_journal_destroy_revoke_caches(void) +@@ -192,99 +166,40 @@ void jbd2_journal_destroy_revoke_caches( + kmem_cache_destroy(jbd2_revoke_record_cache); + jbd2_revoke_record_cache = NULL; + } +- if (jbd2_revoke_table_cache) { +- kmem_cache_destroy(jbd2_revoke_table_cache); +- jbd2_revoke_table_cache = NULL; +- } + } + + int __init jbd2_journal_init_revoke_caches(void) + { + J_ASSERT(!jbd2_revoke_record_cache); +- J_ASSERT(!jbd2_revoke_table_cache); + + jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s, + SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY); + if (!jbd2_revoke_record_cache) +- goto record_cache_failure; +- +- jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, +- SLAB_TEMPORARY); +- if (!jbd2_revoke_table_cache) +- goto table_cache_failure; +- return 0; +-table_cache_failure: +- jbd2_journal_destroy_revoke_caches(); +-record_cache_failure: + return -ENOMEM; +-} +- +-static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) +-{ +- int shift = 0; +- int tmp = hash_size; +- struct jbd2_revoke_table_s *table; +- +- table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); +- if (!table) +- goto out; +- +- while((tmp >>= 1UL) != 0UL) +- shift++; +- +- table->hash_size = hash_size; +- table->hash_shift = shift; +- table->hash_table = +- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); +- if (!table->hash_table) { +- kmem_cache_free(jbd2_revoke_table_cache, table); +- table = NULL; +- goto out; +- } +- +- for (tmp = 0; tmp < hash_size; tmp++) +- INIT_LIST_HEAD(&table->hash_table[tmp]); +- +-out: +- return table; +-} +- +-static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) +-{ +- int i; +- struct list_head *hash_list; +- +- for (i = 0; i < table->hash_size; i++) { +- hash_list = &table->hash_table[i]; +- J_ASSERT(list_empty(hash_list)); +- } +- +- kfree(table->hash_table); +- kmem_cache_free(jbd2_revoke_table_cache, table); ++ return 0; + } + + /* Initialise the revoke table for a given journal to a given size. */ +-int jbd2_journal_init_revoke(journal_t *journal, int hash_size) ++int jbd2_journal_init_revoke(journal_t *journal) + { +- J_ASSERT(journal->j_revoke_table[0] == NULL); +- J_ASSERT(is_power_of_2(hash_size)); ++ int rc; + +- journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size); +- if (!journal->j_revoke_table[0]) ++ rc = rhashtable_init(&journal->j_revoke_table[0], &revoke_rhashtable_params); ++ if (rc) + goto fail0; + +- journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size); +- if (!journal->j_revoke_table[1]) ++ rc = rhashtable_init(&journal->j_revoke_table[1], &revoke_rhashtable_params); ++ if (rc) + goto fail1; + +- journal->j_revoke = journal->j_revoke_table[1]; ++ journal->j_revoke = &journal->j_revoke_table[1]; + + spin_lock_init(&journal->j_revoke_lock); + + return 0; + + fail1: +- jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); ++ rhashtable_destroy(&journal->j_revoke_table[0]); + fail0: + return -ENOMEM; + } +@@ -293,10 +208,8 @@ fail0: + void jbd2_journal_destroy_revoke(journal_t *journal) + { + journal->j_revoke = NULL; +- if (journal->j_revoke_table[0]) +- jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); +- if (journal->j_revoke_table[1]) +- jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]); ++ rhashtable_destroy(&journal->j_revoke_table[0]); ++ rhashtable_destroy(&journal->j_revoke_table[1]); + } + + +@@ -443,9 +356,8 @@ int jbd2_journal_cancel_revoke(handle_t + if (record) { + jbd_debug(4, "cancelled existing revoke on " + "blocknr %llu\n", (unsigned long long)bh->b_blocknr); +- spin_lock(&journal->j_revoke_lock); +- list_del(&record->hash); +- spin_unlock(&journal->j_revoke_lock); ++ rhashtable_remove_fast(journal->j_revoke, &record->linkage, ++ revoke_rhashtable_params); + kmem_cache_free(jbd2_revoke_record_cache, record); + did_revoke = 1; + } +@@ -480,27 +392,29 @@ int jbd2_journal_cancel_revoke(handle_t + */ + void jbd2_clear_buffer_revoked_flags(journal_t *journal) + { +- struct jbd2_revoke_table_s *revoke = journal->j_revoke; +- int i = 0; ++ struct rhashtable *revoke = journal->j_revoke; ++ struct jbd2_revoke_record_s *record; ++ struct rhashtable_iter iter; + +- for (i = 0; i < revoke->hash_size; i++) { +- struct list_head *hash_list; +- struct list_head *list_entry; +- hash_list = &revoke->hash_table[i]; +- +- list_for_each(list_entry, hash_list) { +- struct jbd2_revoke_record_s *record; +- struct buffer_head *bh; +- record = (struct jbd2_revoke_record_s *)list_entry; +- bh = __find_get_block(journal->j_fs_dev, +- record->blocknr, +- journal->j_blocksize); +- if (bh) { +- clear_buffer_revoked(bh); +- __brelse(bh); +- } ++ rhashtable_walk_enter(revoke, &iter); ++ rhashtable_walk_start(&iter); ++ while ((record = rhashtable_walk_next(&iter)) != NULL) { ++ struct buffer_head *bh; ++ ++ if (IS_ERR(record)) ++ continue; ++ rhashtable_walk_stop(&iter); ++ bh = __find_get_block(journal->j_fs_dev, ++ record->blocknr, ++ journal->j_blocksize); ++ if (bh) { ++ clear_buffer_revoked(bh); ++ __brelse(bh); + } +- } ++ rhashtable_walk_start(&iter); ++ } ++ rhashtable_walk_stop(&iter); ++ rhashtable_walk_exit(&iter); + } + + /* journal_switch_revoke table select j_revoke for next transaction +@@ -509,15 +423,12 @@ void jbd2_clear_buffer_revoked_flags(jou + */ + void jbd2_journal_switch_revoke_table(journal_t *journal) + { +- int i; +- +- if (journal->j_revoke == journal->j_revoke_table[0]) +- journal->j_revoke = journal->j_revoke_table[1]; ++ if (journal->j_revoke == &journal->j_revoke_table[0]) ++ journal->j_revoke = &journal->j_revoke_table[1]; + else +- journal->j_revoke = journal->j_revoke_table[0]; ++ journal->j_revoke = &journal->j_revoke_table[0]; + +- for (i = 0; i < journal->j_revoke->hash_size; i++) +- INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); ++ /* XXX: check rhashtable is empty? reinitialize it? */ + } + + /* +@@ -531,32 +442,37 @@ void jbd2_journal_write_revoke_records(j + { + struct buffer_head *descriptor; + struct jbd2_revoke_record_s *record; +- struct jbd2_revoke_table_s *revoke; +- struct list_head *hash_list; +- int i, offset, count; ++ struct rhashtable_iter iter; ++ struct rhashtable *revoke; ++ int offset, count; + + descriptor = NULL; + offset = 0; + count = 0; + + /* select revoke table for committing transaction */ +- revoke = journal->j_revoke == journal->j_revoke_table[0] ? +- journal->j_revoke_table[1] : journal->j_revoke_table[0]; +- +- for (i = 0; i < revoke->hash_size; i++) { +- hash_list = &revoke->hash_table[i]; ++ revoke = journal->j_revoke == &journal->j_revoke_table[0] ? ++ &journal->j_revoke_table[1] : &journal->j_revoke_table[0]; + +- while (!list_empty(hash_list)) { +- record = (struct jbd2_revoke_record_s *) +- hash_list->next; ++ rhashtable_walk_enter(revoke, &iter); ++ rhashtable_walk_start(&iter); ++ while ((record = rhashtable_walk_next(&iter)) != NULL) { ++ if (IS_ERR(record)) ++ continue; ++ if (rhashtable_remove_fast(revoke, ++ &record->linkage, ++ revoke_rhashtable_params) == 0) { ++ rhashtable_walk_stop(&iter); + write_one_revoke_record(journal, transaction, log_bufs, + &descriptor, &offset, + record, write_op); ++ rhashtable_walk_start(&iter); + count++; +- list_del(&record->hash); + kmem_cache_free(jbd2_revoke_record_cache, record); + } + } ++ rhashtable_walk_stop(&iter); ++ rhashtable_walk_exit(&iter); + if (descriptor) + flush_descriptor(journal, descriptor, offset, write_op); + jbd_debug(1, "Wrote %d revoke records\n", count); +@@ -746,19 +662,23 @@ int jbd2_journal_test_revoke(journal_t * + + void jbd2_journal_clear_revoke(journal_t *journal) + { +- int i; +- struct list_head *hash_list; ++ struct rhashtable_iter iter; + struct jbd2_revoke_record_s *record; +- struct jbd2_revoke_table_s *revoke; ++ struct rhashtable *revoke; + + revoke = journal->j_revoke; + +- for (i = 0; i < revoke->hash_size; i++) { +- hash_list = &revoke->hash_table[i]; +- while (!list_empty(hash_list)) { +- record = (struct jbd2_revoke_record_s*) hash_list->next; +- list_del(&record->hash); ++ rhashtable_walk_enter(revoke, &iter); ++ rhashtable_walk_start(&iter); ++ while ((record = rhashtable_walk_next(&iter)) != NULL) { ++ if (IS_ERR(record)) ++ continue; ++ if (rhashtable_remove_fast(revoke, ++ &record->linkage, ++ revoke_rhashtable_params) == 0) { + kmem_cache_free(jbd2_revoke_record_cache, record); +- } +- } ++ } ++ } ++ rhashtable_walk_stop(&iter); ++ rhashtable_walk_exit(&iter); + } +Index: linux-3.10.0-1160.21.1.el7/include/linux/jbd2.h +=================================================================== +--- linux-3.10.0-1160.21.1.el7.orig/include/linux/jbd2.h ++++ linux-3.10.0-1160.21.1.el7/include/linux/jbd2.h +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + #endif + + #define journal_oom_retry 1 +@@ -940,8 +941,8 @@ struct journal_s + * current transaction. [j_revoke_lock] + */ + spinlock_t j_revoke_lock; +- struct jbd2_revoke_table_s *j_revoke; +- struct jbd2_revoke_table_s *j_revoke_table[2]; ++ struct rhashtable *j_revoke; ++ struct rhashtable j_revoke_table[2]; + + /* + * array of bhs for jbd2_journal_commit_transaction +@@ -1215,8 +1216,7 @@ static inline void jbd2_free_inode(struc + } + + /* Primary revoke support */ +-#define JOURNAL_REVOKE_DEFAULT_HASH 256 +-extern int jbd2_journal_init_revoke(journal_t *, int); ++extern int jbd2_journal_init_revoke(journal_t *); + extern void jbd2_journal_destroy_revoke_caches(void); + extern int jbd2_journal_init_revoke_caches(void); + diff --git a/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel8.4.patch b/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel8.4.patch new file mode 100644 index 0000000..68b5b27 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd2-revoke-rhashtable-rhel8.4.patch @@ -0,0 +1,456 @@ +Index: linux-4.18.0-305.19.1.el8_4/fs/jbd2/journal.c +=================================================================== +--- linux-4.18.0-305.19.1.el8_4.orig/fs/jbd2/journal.c ++++ linux-4.18.0-305.19.1.el8_4/fs/jbd2/journal.c +@@ -1158,7 +1158,7 @@ static journal_t *journal_init_common(st + journal->j_flags = JBD2_ABORT; + + /* Set up a default-sized revoke table for the new mount. */ +- err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); ++ err = jbd2_journal_init_revoke(journal); + if (err) + goto err_cleanup; + +@@ -2700,8 +2700,6 @@ static int __init journal_init_caches(vo + + ret = jbd2_journal_init_revoke_record_cache(); + if (ret == 0) +- ret = jbd2_journal_init_revoke_table_cache(); +- if (ret == 0) + ret = jbd2_journal_init_journal_head_cache(); + if (ret == 0) + ret = jbd2_journal_init_handle_cache(); +@@ -2715,7 +2713,6 @@ static int __init journal_init_caches(vo + static void jbd2_journal_destroy_caches(void) + { + jbd2_journal_destroy_revoke_record_cache(); +- jbd2_journal_destroy_revoke_table_cache(); + jbd2_journal_destroy_journal_head_cache(); + jbd2_journal_destroy_handle_cache(); + jbd2_journal_destroy_inode_cache(); +Index: linux-4.18.0-305.19.1.el8_4/fs/jbd2/revoke.c +=================================================================== +--- linux-4.18.0-305.19.1.el8_4.orig/fs/jbd2/revoke.c ++++ linux-4.18.0-305.19.1.el8_4/fs/jbd2/revoke.c +@@ -90,10 +90,10 @@ + #include + #include + #include ++#include + #endif + + static struct kmem_cache *jbd2_revoke_record_cache; +-static struct kmem_cache *jbd2_revoke_table_cache; + + /* Each revoke record represents one single revoked block. During + journal replay, this involves recording the transaction ID of the +@@ -101,23 +101,17 @@ static struct kmem_cache *jbd2_revoke_ta + + struct jbd2_revoke_record_s + { +- struct list_head hash; ++ struct rhash_head linkage; + tid_t sequence; /* Used for recovery only */ + unsigned long long blocknr; + }; + +- +-/* The revoke table is just a simple hash table of revoke records. */ +-struct jbd2_revoke_table_s +-{ +- /* It is conceivable that we might want a larger hash table +- * for recovery. Must be a power of two. */ +- int hash_size; +- int hash_shift; +- struct list_head *hash_table; ++static const struct rhashtable_params revoke_rhashtable_params = { ++ .key_len = sizeof(unsigned long long), ++ .key_offset = offsetof(struct jbd2_revoke_record_s, blocknr), ++ .head_offset = offsetof(struct jbd2_revoke_record_s, linkage), + }; + +- + #ifdef __KERNEL__ + static void write_one_revoke_record(transaction_t *, + struct list_head *, +@@ -126,18 +120,10 @@ static void write_one_revoke_record(tran + static void flush_descriptor(journal_t *, struct buffer_head *, int); + #endif + +-/* Utility functions to maintain the revoke table */ +- +-static inline int hash(journal_t *journal, unsigned long long block) +-{ +- return hash_64(block, journal->j_revoke->hash_shift); +-} +- + static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, + tid_t seq) + { +- struct list_head *hash_list; +- struct jbd2_revoke_record_s *record; ++ struct jbd2_revoke_record_s *record, *old; + gfp_t gfp_mask = GFP_NOFS; + + if (journal_oom_retry) +@@ -148,10 +134,12 @@ static int insert_revoke_hash(journal_t + + record->sequence = seq; + record->blocknr = blocknr; +- hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; +- spin_lock(&journal->j_revoke_lock); +- list_add(&record->hash, hash_list); +- spin_unlock(&journal->j_revoke_lock); ++ old = rhashtable_lookup_get_insert_fast(journal->j_revoke, ++ &record->linkage, revoke_rhashtable_params); ++ if (old) { ++ BUG_ON(record->sequence != seq); ++ kmem_cache_free(jbd2_revoke_record_cache, record); ++ } + return 0; + } + +@@ -160,22 +148,8 @@ static int insert_revoke_hash(journal_t + static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal, + unsigned long long blocknr) + { +- struct list_head *hash_list; +- struct jbd2_revoke_record_s *record; +- +- hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; +- +- spin_lock(&journal->j_revoke_lock); +- record = (struct jbd2_revoke_record_s *) hash_list->next; +- while (&(record->hash) != hash_list) { +- if (record->blocknr == blocknr) { +- spin_unlock(&journal->j_revoke_lock); +- return record; +- } +- record = (struct jbd2_revoke_record_s *) record->hash.next; +- } +- spin_unlock(&journal->j_revoke_lock); +- return NULL; ++ return rhashtable_lookup_fast(journal->j_revoke, &blocknr, ++ revoke_rhashtable_params); + } + + void jbd2_journal_destroy_revoke_record_cache(void) +@@ -184,12 +158,6 @@ void jbd2_journal_destroy_revoke_record_ + jbd2_revoke_record_cache = NULL; + } + +-void jbd2_journal_destroy_revoke_table_cache(void) +-{ +- kmem_cache_destroy(jbd2_revoke_table_cache); +- jbd2_revoke_table_cache = NULL; +-} +- + int __init jbd2_journal_init_revoke_record_cache(void) + { + J_ASSERT(!jbd2_revoke_record_cache); +@@ -203,85 +171,27 @@ int __init jbd2_journal_init_revoke_reco + return 0; + } + +-int __init jbd2_journal_init_revoke_table_cache(void) +-{ +- J_ASSERT(!jbd2_revoke_table_cache); +- jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s, +- SLAB_TEMPORARY); +- if (!jbd2_revoke_table_cache) { +- pr_emerg("JBD2: failed to create revoke_table cache\n"); +- return -ENOMEM; +- } +- return 0; +-} +- +-static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size) +-{ +- int shift = 0; +- int tmp = hash_size; +- struct jbd2_revoke_table_s *table; +- +- table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL); +- if (!table) +- goto out; +- +- while((tmp >>= 1UL) != 0UL) +- shift++; +- +- table->hash_size = hash_size; +- table->hash_shift = shift; +- table->hash_table = +- kmalloc_array(hash_size, sizeof(struct list_head), GFP_KERNEL); +- if (!table->hash_table) { +- kmem_cache_free(jbd2_revoke_table_cache, table); +- table = NULL; +- goto out; +- } +- +- for (tmp = 0; tmp < hash_size; tmp++) +- INIT_LIST_HEAD(&table->hash_table[tmp]); +- +-out: +- return table; +-} +- +-static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table) +-{ +- int i; +- struct list_head *hash_list; +- +- for (i = 0; i < table->hash_size; i++) { +- hash_list = &table->hash_table[i]; +- J_ASSERT(list_empty(hash_list)); +- } +- +- kfree(table->hash_table); +- kmem_cache_free(jbd2_revoke_table_cache, table); +-} +- + /* Initialise the revoke table for a given journal to a given size. */ +-int jbd2_journal_init_revoke(journal_t *journal, int hash_size) ++int jbd2_journal_init_revoke(journal_t *journal) + { +- J_ASSERT(journal->j_revoke_table[0] == NULL); +- J_ASSERT(is_power_of_2(hash_size)); ++ int rc; + +- journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size); +- if (!journal->j_revoke_table[0]) ++ rc = rhashtable_init(&journal->j_revoke_table[0], &revoke_rhashtable_params); ++ if (rc) + goto fail0; + +- journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size); +- if (!journal->j_revoke_table[1]) ++ rc = rhashtable_init(&journal->j_revoke_table[1], &revoke_rhashtable_params); ++ if (rc) + goto fail1; + +- journal->j_revoke = journal->j_revoke_table[1]; ++ journal->j_revoke = &journal->j_revoke_table[1]; + + spin_lock_init(&journal->j_revoke_lock); + + return 0; + + fail1: +- jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); +- journal->j_revoke_table[0] = NULL; ++ rhashtable_destroy(&journal->j_revoke_table[0]); + fail0: + return -ENOMEM; + } +@@ -290,10 +200,8 @@ fail0: + void jbd2_journal_destroy_revoke(journal_t *journal) + { + journal->j_revoke = NULL; +- if (journal->j_revoke_table[0]) +- jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); +- if (journal->j_revoke_table[1]) +- jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]); ++ rhashtable_destroy(&journal->j_revoke_table[0]); ++ rhashtable_destroy(&journal->j_revoke_table[1]); + } + + +@@ -446,9 +354,8 @@ int jbd2_journal_cancel_revoke(handle_t + if (record) { + jbd_debug(4, "cancelled existing revoke on " + "blocknr %llu\n", (unsigned long long)bh->b_blocknr); +- spin_lock(&journal->j_revoke_lock); +- list_del(&record->hash); +- spin_unlock(&journal->j_revoke_lock); ++ rhashtable_remove_fast(journal->j_revoke, &record->linkage, ++ revoke_rhashtable_params); + kmem_cache_free(jbd2_revoke_record_cache, record); + did_revoke = 1; + } +@@ -483,27 +390,29 @@ int jbd2_journal_cancel_revoke(handle_t + */ + void jbd2_clear_buffer_revoked_flags(journal_t *journal) + { +- struct jbd2_revoke_table_s *revoke = journal->j_revoke; +- int i = 0; ++ struct rhashtable *revoke = journal->j_revoke; ++ struct jbd2_revoke_record_s *record; ++ struct rhashtable_iter iter; + +- for (i = 0; i < revoke->hash_size; i++) { +- struct list_head *hash_list; +- struct list_head *list_entry; +- hash_list = &revoke->hash_table[i]; +- +- list_for_each(list_entry, hash_list) { +- struct jbd2_revoke_record_s *record; +- struct buffer_head *bh; +- record = (struct jbd2_revoke_record_s *)list_entry; +- bh = __find_get_block(journal->j_fs_dev, +- record->blocknr, +- journal->j_blocksize); +- if (bh) { +- clear_buffer_revoked(bh); +- __brelse(bh); +- } ++ rhashtable_walk_enter(revoke, &iter); ++ rhashtable_walk_start(&iter); ++ while ((record = rhashtable_walk_next(&iter)) != NULL) { ++ struct buffer_head *bh; ++ ++ if (IS_ERR(record)) ++ continue; ++ rhashtable_walk_stop(&iter); ++ bh = __find_get_block(journal->j_fs_dev, ++ record->blocknr, ++ journal->j_blocksize); ++ if (bh) { ++ clear_buffer_revoked(bh); ++ __brelse(bh); + } +- } ++ rhashtable_walk_start(&iter); ++ } ++ rhashtable_walk_stop(&iter); ++ rhashtable_walk_exit(&iter); + } + + /* journal_switch_revoke table select j_revoke for next transaction +@@ -512,15 +421,12 @@ void jbd2_clear_buffer_revoked_flags(jou + */ + void jbd2_journal_switch_revoke_table(journal_t *journal) + { +- int i; +- +- if (journal->j_revoke == journal->j_revoke_table[0]) +- journal->j_revoke = journal->j_revoke_table[1]; ++ if (journal->j_revoke == &journal->j_revoke_table[0]) ++ journal->j_revoke = &journal->j_revoke_table[1]; + else +- journal->j_revoke = journal->j_revoke_table[0]; ++ journal->j_revoke = &journal->j_revoke_table[0]; + +- for (i = 0; i < journal->j_revoke->hash_size; i++) +- INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); ++ /* XXX: check rhashtable is empty? reinitialize it? */ + } + + /* +@@ -533,31 +439,36 @@ void jbd2_journal_write_revoke_records(t + journal_t *journal = transaction->t_journal; + struct buffer_head *descriptor; + struct jbd2_revoke_record_s *record; +- struct jbd2_revoke_table_s *revoke; +- struct list_head *hash_list; +- int i, offset, count; ++ struct rhashtable_iter iter; ++ struct rhashtable *revoke; ++ int offset, count; + + descriptor = NULL; + offset = 0; + count = 0; + + /* select revoke table for committing transaction */ +- revoke = journal->j_revoke == journal->j_revoke_table[0] ? +- journal->j_revoke_table[1] : journal->j_revoke_table[0]; +- +- for (i = 0; i < revoke->hash_size; i++) { +- hash_list = &revoke->hash_table[i]; ++ revoke = journal->j_revoke == &journal->j_revoke_table[0] ? ++ &journal->j_revoke_table[1] : &journal->j_revoke_table[0]; + +- while (!list_empty(hash_list)) { +- record = (struct jbd2_revoke_record_s *) +- hash_list->next; ++ rhashtable_walk_enter(revoke, &iter); ++ rhashtable_walk_start(&iter); ++ while ((record = rhashtable_walk_next(&iter)) != NULL) { ++ if (IS_ERR(record)) ++ continue; ++ if (rhashtable_remove_fast(revoke, ++ &record->linkage, ++ revoke_rhashtable_params) == 0) { ++ rhashtable_walk_stop(&iter); + write_one_revoke_record(transaction, log_bufs, + &descriptor, &offset, record); ++ rhashtable_walk_start(&iter); + count++; +- list_del(&record->hash); + kmem_cache_free(jbd2_revoke_record_cache, record); + } + } ++ rhashtable_walk_stop(&iter); ++ rhashtable_walk_exit(&iter); + if (descriptor) + flush_descriptor(journal, descriptor, offset); + jbd_debug(1, "Wrote %d revoke records\n", count); +@@ -725,19 +636,23 @@ int jbd2_journal_test_revoke(journal_t * + + void jbd2_journal_clear_revoke(journal_t *journal) + { +- int i; +- struct list_head *hash_list; ++ struct rhashtable_iter iter; + struct jbd2_revoke_record_s *record; +- struct jbd2_revoke_table_s *revoke; ++ struct rhashtable *revoke; + + revoke = journal->j_revoke; + +- for (i = 0; i < revoke->hash_size; i++) { +- hash_list = &revoke->hash_table[i]; +- while (!list_empty(hash_list)) { +- record = (struct jbd2_revoke_record_s*) hash_list->next; +- list_del(&record->hash); ++ rhashtable_walk_enter(revoke, &iter); ++ rhashtable_walk_start(&iter); ++ while ((record = rhashtable_walk_next(&iter)) != NULL) { ++ if (IS_ERR(record)) ++ continue; ++ if (rhashtable_remove_fast(revoke, ++ &record->linkage, ++ revoke_rhashtable_params) == 0) { + kmem_cache_free(jbd2_revoke_record_cache, record); +- } +- } ++ } ++ } ++ rhashtable_walk_stop(&iter); ++ rhashtable_walk_exit(&iter); + } +Index: linux-4.18.0-305.19.1.el8_4/include/linux/jbd2.h +=================================================================== +--- linux-4.18.0-305.19.1.el8_4.orig/include/linux/jbd2.h ++++ linux-4.18.0-305.19.1.el8_4/include/linux/jbd2.h +@@ -31,4 +31,5 @@ + #include ++#include + #endif + + #define journal_oom_retry 1 +@@ -1075,12 +1076,12 @@ struct journal_s + * The revoke table - maintains the list of revoked blocks in the + * current transaction. + */ +- struct jbd2_revoke_table_s *j_revoke; ++ struct rhashtable *j_revoke; + + /** + * @j_revoke_table: Alternate revoke tables for j_revoke. + */ +- struct jbd2_revoke_table_s *j_revoke_table[2]; ++ struct rhashtable j_revoke_table[2]; + + /** + * @j_wbuf: Array of bhs for jbd2_journal_commit_transaction. +@@ -1491,8 +1492,7 @@ static inline void jbd2_free_inode(struc + } + + /* Primary revoke support */ +-#define JOURNAL_REVOKE_DEFAULT_HASH 256 +-extern int jbd2_journal_init_revoke(journal_t *, int); ++extern int jbd2_journal_init_revoke(journal_t *); + extern void jbd2_journal_destroy_revoke_record_cache(void); + extern void jbd2_journal_destroy_revoke_table_cache(void); + extern int __init jbd2_journal_init_revoke_record_cache(void); diff --git a/lustre/kernel_patches/series/3.10-rhel7.6.series b/lustre/kernel_patches/series/3.10-rhel7.6.series index e1db1d4..7d36af7 100644 --- a/lustre/kernel_patches/series/3.10-rhel7.6.series +++ b/lustre/kernel_patches/series/3.10-rhel7.6.series @@ -3,6 +3,7 @@ blkdev_tunables-3.9.patch vfs-project-quotas-rhel7.patch fix-integrity-verify-rhel7.patch fix-sd-dif-complete-rhel7.patch +jbd2-revoke-rhashtable-rhel7.patch block-integrity-allow-optional-integrity-functions-rhel7.patch block-pass-bio-into-integrity_processing_fn-rhel7.patch dm-fix-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch diff --git a/lustre/kernel_patches/series/3.10-rhel7.7.series b/lustre/kernel_patches/series/3.10-rhel7.7.series index 7788a59..8797e3c 100644 --- a/lustre/kernel_patches/series/3.10-rhel7.7.series +++ b/lustre/kernel_patches/series/3.10-rhel7.7.series @@ -3,6 +3,7 @@ blkdev_tunables-3.9.patch vfs-project-quotas-rhel7.patch fix-integrity-verify-rhel7.patch fix-sd-dif-complete-rhel7.patch +jbd2-revoke-rhashtable-rhel7.patch block-integrity-allow-optional-integrity-functions-rhel7.patch block-pass-bio-into-integrity_processing_fn-rhel7.patch block-Ensure-we-only-enable-integrity-metadata-for-reads-and-writes-rhel7.patch diff --git a/lustre/kernel_patches/series/3.10-rhel7.8.series b/lustre/kernel_patches/series/3.10-rhel7.8.series index 059dd33..4bf1d23 100644 --- a/lustre/kernel_patches/series/3.10-rhel7.8.series +++ b/lustre/kernel_patches/series/3.10-rhel7.8.series @@ -1,6 +1,7 @@ vfs-project-quotas-rhel7.patch fix-integrity-verify-rhel7.patch fix-sd-dif-complete-rhel7.patch +jbd2-revoke-rhashtable-rhel7.patch block-integrity-allow-optional-integrity-functions-rhel7.patch block-pass-bio-into-integrity_processing_fn-rhel7.patch block-Ensure-we-only-enable-integrity-metadata-for-reads-and-writes-rhel7.patch diff --git a/lustre/kernel_patches/series/3.10-rhel7.9.series b/lustre/kernel_patches/series/3.10-rhel7.9.series index f9577dc..7f952f0 100644 --- a/lustre/kernel_patches/series/3.10-rhel7.9.series +++ b/lustre/kernel_patches/series/3.10-rhel7.9.series @@ -1,5 +1,6 @@ vfs-project-quotas-rhel7.patch fix-integrity-verify-rhel7.patch +jbd2-revoke-rhashtable-rhel7.patch block-integrity-allow-optional-integrity-functions-rhel7.patch block-pass-bio-into-integrity_processing_fn-rhel7.patch block-Ensure-we-only-enable-integrity-metadata-for-reads-and-writes-rhel7.patch diff --git a/lustre/kernel_patches/series/4.14-rhel7.5.series b/lustre/kernel_patches/series/4.14-rhel7.5.series index e69de29..a4c11c0 100644 --- a/lustre/kernel_patches/series/4.14-rhel7.5.series +++ b/lustre/kernel_patches/series/4.14-rhel7.5.series @@ -0,0 +1 @@ +jbd2-revoke-rhashtable-rhel7.patch diff --git a/lustre/kernel_patches/series/4.14-rhel7.6.series b/lustre/kernel_patches/series/4.14-rhel7.6.series index e69de29..a4c11c0 100644 --- a/lustre/kernel_patches/series/4.14-rhel7.6.series +++ b/lustre/kernel_patches/series/4.14-rhel7.6.series @@ -0,0 +1 @@ +jbd2-revoke-rhashtable-rhel7.patch diff --git a/lustre/kernel_patches/series/4.18-rhel8.1.series b/lustre/kernel_patches/series/4.18-rhel8.1.series index c74f6e8..f960dda 100644 --- a/lustre/kernel_patches/series/4.18-rhel8.1.series +++ b/lustre/kernel_patches/series/4.18-rhel8.1.series @@ -1,2 +1,3 @@ +jbd2-revoke-rhashtable-rhel8.4.patch block-integrity-allow-optional-integrity-functions-rhel8.patch block-pass-bio-into-integrity_processing_fn-rhel8.patch diff --git a/lustre/kernel_patches/series/4.18-rhel8.2.series b/lustre/kernel_patches/series/4.18-rhel8.2.series index c74f6e8..f960dda 100644 --- a/lustre/kernel_patches/series/4.18-rhel8.2.series +++ b/lustre/kernel_patches/series/4.18-rhel8.2.series @@ -1,2 +1,3 @@ +jbd2-revoke-rhashtable-rhel8.4.patch block-integrity-allow-optional-integrity-functions-rhel8.patch block-pass-bio-into-integrity_processing_fn-rhel8.patch diff --git a/lustre/kernel_patches/series/4.18-rhel8.3.series b/lustre/kernel_patches/series/4.18-rhel8.3.series index c74f6e8..f960dda 100644 --- a/lustre/kernel_patches/series/4.18-rhel8.3.series +++ b/lustre/kernel_patches/series/4.18-rhel8.3.series @@ -1,2 +1,3 @@ +jbd2-revoke-rhashtable-rhel8.4.patch block-integrity-allow-optional-integrity-functions-rhel8.patch block-pass-bio-into-integrity_processing_fn-rhel8.patch diff --git a/lustre/kernel_patches/series/4.18-rhel8.4.series b/lustre/kernel_patches/series/4.18-rhel8.4.series index c74f6e8..f960dda 100644 --- a/lustre/kernel_patches/series/4.18-rhel8.4.series +++ b/lustre/kernel_patches/series/4.18-rhel8.4.series @@ -1,2 +1,3 @@ +jbd2-revoke-rhashtable-rhel8.4.patch block-integrity-allow-optional-integrity-functions-rhel8.patch block-pass-bio-into-integrity_processing_fn-rhel8.patch diff --git a/lustre/kernel_patches/series/4.18-rhel8.5.series b/lustre/kernel_patches/series/4.18-rhel8.5.series index c74f6e8..f960dda 100644 --- a/lustre/kernel_patches/series/4.18-rhel8.5.series +++ b/lustre/kernel_patches/series/4.18-rhel8.5.series @@ -1,2 +1,3 @@ +jbd2-revoke-rhashtable-rhel8.4.patch block-integrity-allow-optional-integrity-functions-rhel8.patch block-pass-bio-into-integrity_processing_fn-rhel8.patch diff --git a/lustre/kernel_patches/series/4.18-rhel8.series b/lustre/kernel_patches/series/4.18-rhel8.series index c74f6e8..f960dda 100644 --- a/lustre/kernel_patches/series/4.18-rhel8.series +++ b/lustre/kernel_patches/series/4.18-rhel8.series @@ -1,2 +1,3 @@ +jbd2-revoke-rhashtable-rhel8.4.patch block-integrity-allow-optional-integrity-functions-rhel8.patch block-pass-bio-into-integrity_processing_fn-rhel8.patch -- 1.8.3.1