Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / jbd-check-for-unmapped-buffer.patch
diff --git a/lustre/kernel_patches/patches/jbd-check-for-unmapped-buffer.patch b/lustre/kernel_patches/patches/jbd-check-for-unmapped-buffer.patch
new file mode 100644 (file)
index 0000000..0127ef0
--- /dev/null
@@ -0,0 +1,91 @@
+Date: Mon, 23 Oct 2006 15:40:48 -0500
+From: Eric Sandeen <sandeen@redhat.com>
+Subject: [PATCH RHEL5] handle races w/ truncate in journal_dirty_data()
+
+This is for BZ 209647 <https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=209647>: ext3/jbd panic
+
+This patch is now in -mm.
+
+When running several fsx's and other filesystem stress tests, we found
+cases where an unmapped buffer was still being sent to submit_bh by the
+ext3 dirty data journaling code.
+
+I saw this happen in two ways, both related to another thread doing a
+truncate which would unmap the buffer in question.
+
+Either we would get into journal_dirty_data with a bh which was already
+unmapped (although journal_dirty_data_fn had checked for this earlier, the
+state was not locked at that point), or it would get unmapped in the middle
+of journal_dirty_data when we dropped locks to call sync_dirty_buffer.
+
+By re-checking for mapped state after we've acquired the bh state lock, we
+should avoid these races.  If we find a buffer which is no longer mapped,
+we essentially ignore it, because journal_unmap_buffer has already decided
+that this buffer can go away.
+
+I've also added tracepoints in these two cases, and made a couple other
+tracepoint changes that I found useful in debugging this.
+
+Signed-off-by: Eric Sandeen <esandeen@redhat.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+---
+
+ fs/jbd/transaction.c |   15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+Index: linux-2.6.18-1.2732.el5/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.18-1.2732.el5.orig/fs/jbd/transaction.c
++++ linux-2.6.18-1.2732.el5/fs/jbd/transaction.c
+@@ -967,6 +967,13 @@ int journal_dirty_data(handle_t *handle,
+        */
+       jbd_lock_bh_state(bh);
+       spin_lock(&journal->j_list_lock);
++
++      /* Now that we have bh_state locked, are we really still mapped? */
++      if (!buffer_mapped(bh)) {
++              JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
++              goto no_journal;
++      }
++
+       if (jh->b_transaction) {
+               JBUFFER_TRACE(jh, "has transaction");
+               if (jh->b_transaction != handle->h_transaction) {
+@@ -1028,6 +1035,11 @@ int journal_dirty_data(handle_t *handle,
+                               sync_dirty_buffer(bh);
+                               jbd_lock_bh_state(bh);
+                               spin_lock(&journal->j_list_lock);
++                              /* Since we dropped the lock... */
++                              if (!buffer_mapped(bh)) {
++                                      JBUFFER_TRACE(jh, "buffer got unmapped");
++                                      goto no_journal;
++                              }
+                               /* The buffer may become locked again at any
+                                  time if it is redirtied */
+                       }
+@@ -1823,6 +1835,7 @@ static int journal_unmap_buffer(journal_
+                       }
+               }
+       } else if (transaction == journal->j_committing_transaction) {
++              JBUFFER_TRACE(jh, "on committing transaction");
+               if (jh->b_jlist == BJ_Locked) {
+                       /*
+                        * The buffer is on the committing transaction's locked
+@@ -1837,7 +1850,6 @@ static int journal_unmap_buffer(journal_
+                * can remove it's next_transaction pointer from the
+                * running transaction if that is set, but nothing
+                * else. */
+-              JBUFFER_TRACE(jh, "on committing transaction");
+               set_buffer_freed(bh);
+               if (jh->b_next_transaction) {
+                       J_ASSERT(jh->b_next_transaction ==
+@@ -1857,6 +1869,7 @@ static int journal_unmap_buffer(journal_
+                * i_size already for this truncate so recovery will not
+                * expose the disk blocks we are discarding here.) */
+               J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
++              JBUFFER_TRACE(jh, "on running transaction");
+               may_free = __dispose_buffer(jh, transaction);
+       }
+