Index: linux/fs/jbd/checkpoint.c =================================================================== --- linux.orig/fs/jbd/checkpoint.c +++ linux/fs/jbd/checkpoint.c @@ -170,6 +170,15 @@ static int __cleanup_transaction(journal spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); log_start_commit(journal, tid); + if (journal->j_task == current) { + /* + * Don't wait for commit if called from + * kjournald to avoid deadlock. + */ + ret = -EAGAIN; + spin_lock(&journal->j_list_lock); + break; + } log_wait_commit(journal, tid); goto out_return_1; } @@ -286,6 +295,7 @@ static int __flush_buffer(journal_t *jou int log_do_checkpoint(journal_t *journal) { int result; + int result2; int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; @@ -300,6 +310,7 @@ int log_do_checkpoint(journal_t *journal jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; + result = 0; /* * OK, we need to start writing disk blocks. Try to free up a @@ -314,7 +325,7 @@ int log_do_checkpoint(journal_t *journal transaction_t *transaction; struct journal_head *jh, *last_jh, *next_jh; int drop_count = 0; - int cleanup_ret, retry = 0; + int retry = 0; tid_t this_tid; transaction = journal->j_checkpoint_transactions; @@ -364,17 +375,20 @@ int log_do_checkpoint(journal_t *journal * finding anything to write to disk. We had better be * able to make some progress or we are in trouble. */ - cleanup_ret = __cleanup_transaction(journal, transaction); - J_ASSERT(drop_count != 0 || cleanup_ret != 0); + result = __cleanup_transaction(journal, transaction); + J_ASSERT(drop_count != 0 || result != 0); + if (result == -EAGAIN) + break; + result = 0; if (journal->j_checkpoint_transactions != transaction) break; } spin_unlock(&journal->j_list_lock); - result = cleanup_journal_tail(journal); - if (result < 0) - return result; + result2 = cleanup_journal_tail(journal); + if (result == 0 && result2 < 0) + result = result2; - return 0; + return result; } /* Index: linux/fs/jbd/journal.c =================================================================== --- linux.orig/fs/jbd/journal.c +++ linux/fs/jbd/journal.c @@ -130,6 +130,8 @@ int kjournald(void *arg) journal_t *journal = (journal_t *) arg; transaction_t *transaction; struct timer_list timer; + int transaction_size; + int canlock; current_journal = journal; @@ -154,12 +156,15 @@ int kjournald(void *arg) */ spin_lock(&journal->j_state_lock); + transaction_size = 0; loop: jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", journal->j_commit_sequence, journal->j_commit_request); if (journal->j_commit_sequence != journal->j_commit_request) { jbd_debug(1, "OK, requests differ\n"); + transaction_size = + journal->j_running_transaction->t_outstanding_credits; spin_unlock(&journal->j_state_lock); del_timer_sync(journal->j_commit_timer); journal_commit_transaction(journal); @@ -168,6 +173,36 @@ loop: } wake_up(&journal->j_wait_done_commit); + + /* + * If journal is low on free space, do a checkpoint, pro-actively. + * + * Do this after wake-up to reduce waiters latency. + */ + + /* + * Trylock to avoid deadlock with threads waiting for commit under + * journal->j_checkpoint_sem. + */ + spin_unlock(&journal->j_state_lock); + canlock = !down_trylock(&journal->j_checkpoint_sem); + spin_lock(&journal->j_state_lock); + + if (canlock) { + while (__log_space_left(journal) < + journal->j_max_transaction_buffers + + 2 * transaction_size) { + int result; + + spin_unlock(&journal->j_state_lock); + result = log_do_checkpoint(journal); + spin_lock(&journal->j_state_lock); + if (result < 0) + break; + } + up(&journal->j_checkpoint_sem); + } + if (current->flags & PF_FREEZE) { /* * The simpler the better. Flushing journal isn't a