Whamcloud - gitweb
LU-10800 lnet: Revert "LU-10270 lnet: remove an early rx code"
[fs/lustre-release.git] / lustre / kernel_patches / patches / jbd2-speedup-jbd2_journal_get_-write-undo-_access.patch
1 commit de92c8caf16ca84926fa31b7a5590c0fb9c0d5ca
2 Author: Jan Kara <jack@suse.cz>
3 Date:   Mon Jun 8 12:46:37 2015 -0400
4
5     jbd2: speedup jbd2_journal_get_[write|undo]_access()
6     
7     jbd2_journal_get_write_access() and jbd2_journal_get_create_access() are
8     frequently called for buffers that are already part of the running
9     transaction - most frequently it is the case for bitmaps, inode table
10     blocks, and superblock. Since in such cases we have nothing to do, it is
11     unfortunate we still grab reference to journal head, lock the bh, lock
12     bh_state only to find out there's nothing to do.
13     
14     Improving this is a bit subtle though since until we find out journal
15     head is attached to the running transaction, it can disappear from under
16     us because checkpointing / commit decided it's no longer needed. We deal
17     with this by protecting journal_head slab with RCU. We still have to be
18     careful about journal head being freed & reallocated within slab and
19     about exposing journal head in consistent state (in particular
20     b_modified and b_frozen_data must be in correct state before we allow
21     user to touch the buffer).
22     
23     Signed-off-by: Jan Kara <jack@suse.cz>
24     Signed-off-by: Theodore Ts'o <tytso@mit.edu>
25 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
26 index e853493..e200e9f 100644
27 --- a/fs/jbd2/journal.c
28 +++ b/fs/jbd2/journal.c
29 @@ -2342,7 +2342,7 @@ static int jbd2_journal_init_journal_head_cache(void)
30         jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
31                                 sizeof(struct journal_head),
32                                 0,              /* offset */
33 -                               SLAB_TEMPORARY, /* flags */
34 +                               SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU,
35                                 NULL);          /* ctor */
36         retval = 0;
37         if (!jbd2_journal_head_cache) {
38 diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
39 index 3a1700a..5fece02 100644
40 --- a/fs/jbd2/transaction.c
41 +++ b/fs/jbd2/transaction.c
42 @@ -889,6 +889,12 @@ repeat:
43         if (jh->b_frozen_data) {
44                 JBUFFER_TRACE(jh, "has frozen data");
45                 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
46 +               /*
47 +                * Make sure all stores to jh (b_modified, b_frozen_data) are visible
48 +                * before attaching it to the running transaction. Paired with barrier
49 +                * in jbd2_write_access_granted()
50 +                */
51 +               smp_wmb();
52                 jh->b_next_transaction = transaction;
53                 goto done;
54         }
55 @@ -955,6 +961,12 @@ repeat:
56                         frozen_buffer = NULL;
57                         need_copy = 1;
58                 }
59 +               /*
60 +                * Make sure all stores to jh (b_modified, b_frozen_data) are visible
61 +                * before attaching it to the running transaction. Paired with barrier
62 +                * in jbd2_write_access_granted()
63 +                */
64 +               smp_wmb();
65                 jh->b_next_transaction = transaction;
66         }
67  
68 @@ -968,6 +980,12 @@ repeat:
69                 JBUFFER_TRACE(jh, "no transaction");
70                 J_ASSERT_JH(jh, !jh->b_next_transaction);
71                 JBUFFER_TRACE(jh, "file as BJ_Reserved");
72 +               /*
73 +                * Make sure all stores to jh (b_modified, b_frozen_data) are
74 +                * visible before attaching it to the running transaction.
75 +                * Paired with barrier in jbd2_write_access_granted()
76 +                */
77 +               smp_wmb();
78                 spin_lock(&journal->j_list_lock);
79                 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
80                 spin_unlock(&journal->j_list_lock);
81 @@ -1012,6 +1030,55 @@ out:
82         return error;
83  }
84  
85 +/* Fast check whether buffer is already attached to the required transaction */
86 +static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
87 +{
88 +       struct journal_head *jh;
89 +       bool ret = false;
90 +
91 +       /* Dirty buffers require special handling... */
92 +       if (buffer_dirty(bh))
93 +               return false;
94 +
95 +       /*
96 +        * RCU protects us from dereferencing freed pages. So the checks we do
97 +        * are guaranteed not to oops. However the jh slab object can get freed
98 +        * & reallocated while we work with it. So we have to be careful. When
99 +        * we see jh attached to the running transaction, we know it must stay
100 +        * so until the transaction is committed. Thus jh won't be freed and
101 +        * will be attached to the same bh while we run.  However it can
102 +        * happen jh gets freed, reallocated, and attached to the transaction
103 +        * just after we get pointer to it from bh. So we have to be careful
104 +        * and recheck jh still belongs to our bh before we return success.
105 +        */
106 +       rcu_read_lock();
107 +       if (!buffer_jbd(bh))
108 +               goto out;
109 +       /* This should be bh2jh() but that doesn't work with inline functions */
110 +       jh = READ_ONCE(bh->b_private);
111 +       if (!jh)
112 +               goto out;
113 +       if (jh->b_transaction != handle->h_transaction &&
114 +           jh->b_next_transaction != handle->h_transaction)
115 +               goto out;
116 +       /*
117 +        * There are two reasons for the barrier here:
118 +        * 1) Make sure to fetch b_bh after we did previous checks so that we
119 +        * detect when jh went through free, realloc, attach to transaction
120 +        * while we were checking. Paired with implicit barrier in that path.
121 +        * 2) So that access to bh done after jbd2_write_access_granted()
122 +        * doesn't get reordered and see inconsistent state of concurrent
123 +        * do_get_write_access().
124 +        */
125 +       smp_mb();
126 +       if (unlikely(jh->b_bh != bh))
127 +               goto out;
128 +       ret = true;
129 +out:
130 +       rcu_read_unlock();
131 +       return ret;
132 +}
133 +
134  /**
135   * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
136   * @handle: transaction to add buffer modifications to
137 @@ -1025,9 +1092,13 @@ out:
138  
139  int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
140  {
141 -       struct journal_head *jh = jbd2_journal_add_journal_head(bh);
142 +       struct journal_head *jh;
143         int rc;
144  
145 +       if (jbd2_write_access_granted(handle, bh))
146 +               return 0;
147 +
148 +       jh = jbd2_journal_add_journal_head(bh);
149         /* We do not want to get caught playing with fields which the
150          * log thread also manipulates.  Make sure that the buffer
151          * completes any outstanding IO before proceeding. */
152 @@ -1157,11 +1228,14 @@ out:
153  int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
154  {
155         int err;
156 -       struct journal_head *jh = jbd2_journal_add_journal_head(bh);
157 +       struct journal_head *jh;
158         char *committed_data = NULL;
159  
160         JBUFFER_TRACE(jh, "entry");
161 +       if (jbd2_write_access_granted(handle, bh))
162 +               return 0;
163  
164 +       jh = jbd2_journal_add_journal_head(bh);
165         /*
166          * Do this first --- it can drop the journal lock, so we want to
167          * make sure that obtaining the committed_data is done