Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / jbd-checkpoint-on-commit.patch
1 Index: linux/fs/jbd/checkpoint.c
2 ===================================================================
3 --- linux.orig/fs/jbd/checkpoint.c
4 +++ linux/fs/jbd/checkpoint.c
5 @@ -170,6 +170,15 @@ static int __cleanup_transaction(journal
6                         spin_unlock(&journal->j_list_lock);
7                         jbd_unlock_bh_state(bh);
8                         log_start_commit(journal, tid);
9 +                       if (journal->j_task == current) {
10 +                               /*
11 +                                * Don't wait for commit if called from
12 +                                * kjournald to avoid deadlock.
13 +                                */
14 +                               ret = -EAGAIN;
15 +                               spin_lock(&journal->j_list_lock);
16 +                               break;
17 +                       }
18                         log_wait_commit(journal, tid);
19                         goto out_return_1;
20                 }
21 @@ -286,6 +295,7 @@ static int __flush_buffer(journal_t *jou
22  int log_do_checkpoint(journal_t *journal)
23  {
24         int result;
25 +       int result2;
26         int batch_count = 0;
27         struct buffer_head *bhs[NR_BATCH];
28  
29 @@ -300,6 +310,7 @@ int log_do_checkpoint(journal_t *journal
30         jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
31         if (result <= 0)
32                 return result;
33 +       result = 0;
34  
35         /*
36          * OK, we need to start writing disk blocks.  Try to free up a
37 @@ -314,7 +325,7 @@ int log_do_checkpoint(journal_t *journal
38                 transaction_t *transaction;
39                 struct journal_head *jh, *last_jh, *next_jh;
40                 int drop_count = 0;
41 -               int cleanup_ret, retry = 0;
42 +               int retry = 0;
43                 tid_t this_tid;
44  
45                 transaction = journal->j_checkpoint_transactions;
46 @@ -364,17 +375,20 @@ int log_do_checkpoint(journal_t *journal
47                  * finding anything to write to disk.  We had better be
48                  * able to make some progress or we are in trouble.
49                  */
50 -               cleanup_ret = __cleanup_transaction(journal, transaction);
51 -               J_ASSERT(drop_count != 0 || cleanup_ret != 0);
52 +               result = __cleanup_transaction(journal, transaction);
53 +               J_ASSERT(drop_count != 0 || result != 0);
54 +               if (result == -EAGAIN)
55 +                       break;
56 +               result = 0;
57                 if (journal->j_checkpoint_transactions != transaction)
58                         break;
59         }
60         spin_unlock(&journal->j_list_lock);
61 -       result = cleanup_journal_tail(journal);
62 -       if (result < 0)
63 -               return result;
64 +       result2 = cleanup_journal_tail(journal);
65 +       if (result == 0 && result2 < 0)
66 +               result = result2;
67  
68 -       return 0;
69 +       return result;
70  }
71  
72  /*
73 Index: linux/fs/jbd/journal.c
74 ===================================================================
75 --- linux.orig/fs/jbd/journal.c
76 +++ linux/fs/jbd/journal.c
77 @@ -130,6 +130,8 @@ int kjournald(void *arg)
78         journal_t *journal = (journal_t *) arg;
79         transaction_t *transaction;
80         struct timer_list timer;
81 +       int transaction_size;
82 +       int canlock;
83  
84         current_journal = journal;
85  
86 @@ -154,12 +156,15 @@ int kjournald(void *arg)
87          */
88         spin_lock(&journal->j_state_lock);
89  
90 +       transaction_size = 0;
91  loop:
92         jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
93                 journal->j_commit_sequence, journal->j_commit_request);
94  
95         if (journal->j_commit_sequence != journal->j_commit_request) {
96                 jbd_debug(1, "OK, requests differ\n");
97 +               transaction_size =
98 +                       journal->j_running_transaction->t_outstanding_credits;
99                 spin_unlock(&journal->j_state_lock);
100                 del_timer_sync(journal->j_commit_timer);
101                 journal_commit_transaction(journal);
102 @@ -168,6 +173,36 @@ loop:
103         }
104  
105         wake_up(&journal->j_wait_done_commit);
106 +
107 +       /*
108 +        * If journal is low on free space, do a checkpoint, pro-actively.
109 +        *
110 +        * Do this after wake-up to reduce waiters latency.
111 +        */
112 +
113 +       /*
114 +        * Trylock to avoid deadlock with threads waiting for commit under
115 +        * journal->j_checkpoint_sem.
116 +        */
117 +       spin_unlock(&journal->j_state_lock);
118 +       canlock = !down_trylock(&journal->j_checkpoint_sem);
119 +       spin_lock(&journal->j_state_lock);
120 +
121 +       if (canlock) {
122 +               while (__log_space_left(journal) <
123 +                      journal->j_max_transaction_buffers +
124 +                      2 * transaction_size) {
125 +                       int result;
126 +
127 +                       spin_unlock(&journal->j_state_lock);
128 +                       result = log_do_checkpoint(journal);
129 +                       spin_lock(&journal->j_state_lock);
130 +                       if (result < 0)
131 +                               break;
132 +               }
133 +               up(&journal->j_checkpoint_sem);
134 +       }
135 +
136         if (current->flags & PF_FREEZE) {
137                 /*
138                  * The simpler the better. Flushing journal isn't a