From 2083ffd1bc6c772972834b50e5aef2118c88658d Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Mon, 19 Mar 2018 01:20:24 +0000 Subject: [PATCH] Revert "LU-9796 kernel: improve metadata performaces for RHEL7" This reverts commit 17fe3c192e101ac due to suspected problems hit in some deployments. Change-Id: I8cb28b4c69f67583356a7e07cf94ba897ffeb6ee Signed-off-by: Andreas Dilger Reviewed-on: https://review.whamcloud.com/31683 Reviewed-by: Wang Shilong Tested-by: Jenkins Reviewed-by: Oleg Drokin Tested-by: Oleg Drokin --- ...null-committed-data-return-in-undo_access.patch | 113 -------------- ...edup-jbd2_journal_get_-write-undo-_access.patch | 167 --------------------- lustre/kernel_patches/series/3.10-rhel7.series | 2 - 3 files changed, 282 deletions(-) delete mode 100644 lustre/kernel_patches/patches/jbd2-fix-null-committed-data-return-in-undo_access.patch delete mode 100644 lustre/kernel_patches/patches/jbd2-speedup-jbd2_journal_get_-write-undo-_access.patch diff --git a/lustre/kernel_patches/patches/jbd2-fix-null-committed-data-return-in-undo_access.patch b/lustre/kernel_patches/patches/jbd2-fix-null-committed-data-return-in-undo_access.patch deleted file mode 100644 index 04713b3..0000000 --- a/lustre/kernel_patches/patches/jbd2-fix-null-committed-data-return-in-undo_access.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 087ffd4eae9929afd06f6a709861df3c3508492a Mon Sep 17 00:00:00 2001 -From: Junxiao Bi -Date: Fri, 4 Dec 2015 12:29:28 -0500 -Subject: [PATCH] jbd2: fix null committed data return in undo_access - -introduced jbd2_write_access_granted() to improve write|undo_access -speed, but missed to check the status of b_committed_data which caused -a kernel panic on ocfs2. - -[ 6538.405938] ------------[ cut here ]------------ -[ 6538.406686] kernel BUG at fs/ocfs2/suballoc.c:2400! -[ 6538.406686] invalid opcode: 0000 [#1] SMP -[ 6538.406686] Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront xen_netfront xen_fbfront parport_pc parport pcspkr i2c_piix4 acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix cirrus ttm drm_kms_helper drm fb_sys_fops sysimgblt sysfillrect i2c_core syscopyarea dm_mirror dm_region_hash dm_log dm_mod -[ 6538.406686] CPU: 1 PID: 16265 Comm: mmap_truncate Not tainted 4.3.0 #1 -[ 6538.406686] Hardware name: Xen HVM domU, BIOS 4.3.1OVM 05/14/2014 -[ 6538.406686] task: ffff88007c2bab00 ti: ffff880075b78000 task.ti: ffff880075b78000 -[ 6538.406686] RIP: 0010:[] [] ocfs2_block_group_clear_bits+0x23b/0x250 [ocfs2] -[ 6538.406686] RSP: 0018:ffff880075b7b7f8 EFLAGS: 00010246 -[ 6538.406686] RAX: ffff8800760c5b40 RBX: ffff88006c06a000 RCX: ffffffffa06e6df0 -[ 6538.406686] RDX: 0000000000000000 RSI: ffff88007a6f6ea0 RDI: ffff88007a760430 -[ 6538.406686] RBP: ffff880075b7b878 R08: 0000000000000002 R09: 0000000000000001 -[ 6538.406686] R10: ffffffffa06769be R11: 0000000000000000 R12: 0000000000000001 -[ 6538.406686] R13: ffffffffa06a1750 R14: 0000000000000001 R15: ffff88007a6f6ea0 -[ 6538.406686] FS: 00007f17fde30720(0000) GS:ffff88007f040000(0000) knlGS:0000000000000000 -[ 6538.406686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -[ 6538.406686] CR2: 0000000000601730 CR3: 000000007aea0000 CR4: 00000000000406e0 -[ 6538.406686] Stack: -[ 6538.406686] ffff88007c2bb5b0 ffff880075b7b8e0 ffff88007a7604b0 ffff88006c640800 -[ 6538.406686] ffff88007a7604b0 ffff880075d77390 0000000075b7b878 ffffffffa06a309d -[ 6538.406686] ffff880075d752d8 ffff880075b7b990 ffff880075b7b898 0000000000000000 -[ 6538.406686] Call Trace: -[ 6538.406686] [] ? ocfs2_read_group_descriptor+0x6d/0xa0 [ocfs2] -[ 6538.406686] [] _ocfs2_free_suballoc_bits+0xe4/0x320 [ocfs2] -[ 6538.406686] [] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2] -[ 6538.406686] [] _ocfs2_free_clusters+0xee/0x210 [ocfs2] -[ 6538.406686] [] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2] -[ 6538.406686] [] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2] -[ 6538.406686] [] ? ocfs2_extend_trans+0x50/0x1a0 [ocfs2] -[ 6538.406686] [] ocfs2_free_clusters+0x15/0x20 [ocfs2] -[ 6538.406686] [] ocfs2_replay_truncate_records+0xfc/0x290 [ocfs2] -[ 6538.406686] [] ? ocfs2_start_trans+0xec/0x1d0 [ocfs2] -[ 6538.406686] [] __ocfs2_flush_truncate_log+0x140/0x2d0 [ocfs2] -[ 6538.406686] [] ? ocfs2_reserve_blocks_for_rec_trunc.clone.0+0x44/0x170 [ocfs2] -[ 6538.406686] [] ocfs2_remove_btree_range+0x374/0x630 [ocfs2] -[ 6538.406686] [] ? jbd2_journal_stop+0x25b/0x470 [jbd2] -[ 6538.406686] [] ocfs2_commit_truncate+0x305/0x670 [ocfs2] -[ 6538.406686] [] ? ocfs2_journal_access_eb+0x20/0x20 [ocfs2] -[ 6538.406686] [] ocfs2_truncate_file+0x297/0x380 [ocfs2] -[ 6538.406686] [] ? jbd2_journal_begin_ordered_truncate+0x64/0xc0 [jbd2] -[ 6538.406686] [] ocfs2_setattr+0x572/0x860 [ocfs2] -[ 6538.406686] [] ? current_fs_time+0x3f/0x50 -[ 6538.406686] [] notify_change+0x1d7/0x340 -[ 6538.406686] [] ? generic_getxattr+0x79/0x80 -[ 6538.406686] [] do_truncate+0x66/0x90 -[ 6538.406686] [] ? __audit_syscall_entry+0xb0/0x110 -[ 6538.406686] [] do_sys_ftruncate.clone.0+0xf3/0x120 -[ 6538.406686] [] SyS_ftruncate+0xe/0x10 -[ 6538.406686] [] entry_SYSCALL_64_fastpath+0x12/0x71 -[ 6538.406686] Code: 28 48 81 ee b0 04 00 00 48 8b 92 50 fb ff ff 48 8b 80 b0 03 00 00 48 39 90 88 00 00 00 0f 84 30 fe ff ff 0f 0b eb fe 0f 0b eb fe <0f> 0b 0f 1f 00 eb fb 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 -[ 6538.406686] RIP [] ocfs2_block_group_clear_bits+0x23b/0x250 [ocfs2] -[ 6538.406686] RSP -[ 6538.691128] ---[ end trace 31cd7011d6770d7e ]--- -[ 6538.694492] Kernel panic - not syncing: Fatal exception -[ 6538.695484] Kernel Offset: disabled - -Fixes: de92c8caf16c("jbd2: speedup jbd2_journal_get_[write|undo]_access()") -Cc: -Signed-off-by: Junxiao Bi -Signed-off-by: Theodore Ts'o ---- - -diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c -index 5fece02..cd61b7b 100644 ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -1031,7 +1031,8 @@ out: - } - - /* Fast check whether buffer is already attached to the required transaction */ --static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh) -+static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, -+ bool undo) - { - struct journal_head *jh; - bool ret = false; -@@ -1058,6 +1059,9 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh) - jh = READ_ONCE(bh->b_private); - if (!jh) - goto out; -+ /* For undo access buffer must have data copied */ -+ if (undo && !jh->b_committed_data) -+ goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) - goto out; -@@ -1095,7 +1099,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) - struct journal_head *jh; - int rc; - -- if (jbd2_write_access_granted(handle, bh)) -+ if (jbd2_write_access_granted(handle, bh, false)) - return 0; - - jh = jbd2_journal_add_journal_head(bh); -@@ -1232,7 +1236,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) - char *committed_data = NULL; - - JBUFFER_TRACE(jh, "entry"); -- if (jbd2_write_access_granted(handle, bh)) -+ if (jbd2_write_access_granted(handle, bh, true)) - return 0; - - jh = jbd2_journal_add_journal_head(bh); diff --git a/lustre/kernel_patches/patches/jbd2-speedup-jbd2_journal_get_-write-undo-_access.patch b/lustre/kernel_patches/patches/jbd2-speedup-jbd2_journal_get_-write-undo-_access.patch deleted file mode 100644 index 486583e..0000000 --- a/lustre/kernel_patches/patches/jbd2-speedup-jbd2_journal_get_-write-undo-_access.patch +++ /dev/null @@ -1,167 +0,0 @@ -commit de92c8caf16ca84926fa31b7a5590c0fb9c0d5ca -Author: Jan Kara -Date: Mon Jun 8 12:46:37 2015 -0400 - - jbd2: speedup jbd2_journal_get_[write|undo]_access() - - jbd2_journal_get_write_access() and jbd2_journal_get_create_access() are - frequently called for buffers that are already part of the running - transaction - most frequently it is the case for bitmaps, inode table - blocks, and superblock. Since in such cases we have nothing to do, it is - unfortunate we still grab reference to journal head, lock the bh, lock - bh_state only to find out there's nothing to do. - - Improving this is a bit subtle though since until we find out journal - head is attached to the running transaction, it can disappear from under - us because checkpointing / commit decided it's no longer needed. We deal - with this by protecting journal_head slab with RCU. We still have to be - careful about journal head being freed & reallocated within slab and - about exposing journal head in consistent state (in particular - b_modified and b_frozen_data must be in correct state before we allow - user to touch the buffer). - - Signed-off-by: Jan Kara - Signed-off-by: Theodore Ts'o -diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c -index e853493..e200e9f 100644 ---- a/fs/jbd2/journal.c -+++ b/fs/jbd2/journal.c -@@ -2342,7 +2342,7 @@ static int jbd2_journal_init_journal_head_cache(void) - jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", - sizeof(struct journal_head), - 0, /* offset */ -- SLAB_TEMPORARY, /* flags */ -+ SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU, - NULL); /* ctor */ - retval = 0; - if (!jbd2_journal_head_cache) { -diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c -index 3a1700a..5fece02 100644 ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -889,6 +889,12 @@ repeat: - if (jh->b_frozen_data) { - JBUFFER_TRACE(jh, "has frozen data"); - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); -+ /* -+ * Make sure all stores to jh (b_modified, b_frozen_data) are visible -+ * before attaching it to the running transaction. Paired with barrier -+ * in jbd2_write_access_granted() -+ */ -+ smp_wmb(); - jh->b_next_transaction = transaction; - goto done; - } -@@ -955,6 +961,12 @@ repeat: - frozen_buffer = NULL; - need_copy = 1; - } -+ /* -+ * Make sure all stores to jh (b_modified, b_frozen_data) are visible -+ * before attaching it to the running transaction. Paired with barrier -+ * in jbd2_write_access_granted() -+ */ -+ smp_wmb(); - jh->b_next_transaction = transaction; - } - -@@ -968,6 +980,12 @@ repeat: - JBUFFER_TRACE(jh, "no transaction"); - J_ASSERT_JH(jh, !jh->b_next_transaction); - JBUFFER_TRACE(jh, "file as BJ_Reserved"); -+ /* -+ * Make sure all stores to jh (b_modified, b_frozen_data) are -+ * visible before attaching it to the running transaction. -+ * Paired with barrier in jbd2_write_access_granted() -+ */ -+ smp_wmb(); - spin_lock(&journal->j_list_lock); - __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); - spin_unlock(&journal->j_list_lock); -@@ -1012,6 +1030,55 @@ out: - return error; - } - -+/* Fast check whether buffer is already attached to the required transaction */ -+static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh) -+{ -+ struct journal_head *jh; -+ bool ret = false; -+ -+ /* Dirty buffers require special handling... */ -+ if (buffer_dirty(bh)) -+ return false; -+ -+ /* -+ * RCU protects us from dereferencing freed pages. So the checks we do -+ * are guaranteed not to oops. However the jh slab object can get freed -+ * & reallocated while we work with it. So we have to be careful. When -+ * we see jh attached to the running transaction, we know it must stay -+ * so until the transaction is committed. Thus jh won't be freed and -+ * will be attached to the same bh while we run. However it can -+ * happen jh gets freed, reallocated, and attached to the transaction -+ * just after we get pointer to it from bh. So we have to be careful -+ * and recheck jh still belongs to our bh before we return success. -+ */ -+ rcu_read_lock(); -+ if (!buffer_jbd(bh)) -+ goto out; -+ /* This should be bh2jh() but that doesn't work with inline functions */ -+ jh = READ_ONCE(bh->b_private); -+ if (!jh) -+ goto out; -+ if (jh->b_transaction != handle->h_transaction && -+ jh->b_next_transaction != handle->h_transaction) -+ goto out; -+ /* -+ * There are two reasons for the barrier here: -+ * 1) Make sure to fetch b_bh after we did previous checks so that we -+ * detect when jh went through free, realloc, attach to transaction -+ * while we were checking. Paired with implicit barrier in that path. -+ * 2) So that access to bh done after jbd2_write_access_granted() -+ * doesn't get reordered and see inconsistent state of concurrent -+ * do_get_write_access(). -+ */ -+ smp_mb(); -+ if (unlikely(jh->b_bh != bh)) -+ goto out; -+ ret = true; -+out: -+ rcu_read_unlock(); -+ return ret; -+} -+ - /** - * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. - * @handle: transaction to add buffer modifications to -@@ -1025,9 +1092,13 @@ out: - - int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) - { -- struct journal_head *jh = jbd2_journal_add_journal_head(bh); -+ struct journal_head *jh; - int rc; - -+ if (jbd2_write_access_granted(handle, bh)) -+ return 0; -+ -+ jh = jbd2_journal_add_journal_head(bh); - /* We do not want to get caught playing with fields which the - * log thread also manipulates. Make sure that the buffer - * completes any outstanding IO before proceeding. */ -@@ -1157,11 +1228,14 @@ out: - int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) - { - int err; -- struct journal_head *jh = jbd2_journal_add_journal_head(bh); -+ struct journal_head *jh; - char *committed_data = NULL; - - JBUFFER_TRACE(jh, "entry"); -+ if (jbd2_write_access_granted(handle, bh)) -+ return 0; - -+ jh = jbd2_journal_add_journal_head(bh); - /* - * Do this first --- it can drop the journal lock, so we want to - * make sure that obtaining the committed_data is done diff --git a/lustre/kernel_patches/series/3.10-rhel7.series b/lustre/kernel_patches/series/3.10-rhel7.series index c229d2a..093f78d 100644 --- a/lustre/kernel_patches/series/3.10-rhel7.series +++ b/lustre/kernel_patches/series/3.10-rhel7.series @@ -2,5 +2,3 @@ raid5-mmp-unplug-dev-3.8.patch dev_read_only-3.7.patch blkdev_tunables-3.9.patch vfs-project-quotas-rhel7.patch -jbd2-speedup-jbd2_journal_get_-write-undo-_access.patch -jbd2-fix-null-committed-data-return-in-undo_access.patch -- 1.8.3.1