From bbdfa1431182a5b0d856695453e1031915913197 Mon Sep 17 00:00:00 2001 From: alex Date: Wed, 19 Jan 2005 16:40:55 +0000 Subject: [PATCH] b=5209 the patch contains: 1) journal_release_buffer() doesn't cause journal overflow 2) journal_commit_thread() uses persistent buffer instead of local wbuf (could cause journal overflow also) 3) optimization in journal_forget() allows to avoid needless commits --- .../kernel_patches/patches/jbd-static-wbuf-2.6.7 | 281 +++++++++++++++++++++ lustre/kernel_patches/series/2.6-vanilla.series | 1 + 2 files changed, 282 insertions(+) create mode 100644 lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 diff --git a/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 b/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 new file mode 100644 index 0000000..52ed921 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 @@ -0,0 +1,281 @@ +Index: linux-2.6.7/include/linux/jbd.h +=================================================================== +--- linux-2.6.7.orig/include/linux/jbd.h 2004-08-26 17:12:16.000000000 +0400 ++++ linux-2.6.7/include/linux/jbd.h 2005-01-19 17:08:33.144512008 +0300 +@@ -826,6 +826,12 @@ + struct jbd_revoke_table_s *j_revoke_table[2]; + + /* ++ * array of bhs for journal_commit_transaction ++ */ ++ struct buffer_head **j_wbuf; ++ int j_wbufsize; ++ ++ /* + * An opaque pointer to fs-private information. ext3 puts its + * superblock pointer here + */ +Index: linux-2.6.7/include/linux/journal-head.h +=================================================================== +--- linux-2.6.7.orig/include/linux/journal-head.h 2003-06-24 18:05:26.000000000 +0400 ++++ linux-2.6.7/include/linux/journal-head.h 2005-01-19 14:09:59.000000000 +0300 +@@ -80,6 +80,11 @@ + * [j_list_lock] + */ + struct journal_head *b_cpnext, *b_cpprev; ++ ++ /* ++ * counter to track users of the buffer in current transaction ++ */ ++ int b_tcount; + }; + + #endif /* JOURNAL_HEAD_H_INCLUDED */ +Index: linux-2.6.7/fs/jbd/commit.c +=================================================================== +--- linux-2.6.7.orig/fs/jbd/commit.c 2004-08-26 17:12:40.000000000 +0400 ++++ linux-2.6.7/fs/jbd/commit.c 2005-01-19 17:28:32.965111552 +0300 +@@ -103,7 +103,7 @@ + { + transaction_t *commit_transaction; + struct journal_head *jh, *new_jh, *descriptor; +- struct buffer_head *wbuf[64]; ++ struct buffer_head **wbuf = journal->j_wbuf; + int bufs; + int flags; + int err; +@@ -271,7 +283,7 @@ + BUFFER_TRACE(bh, "start journal writeout"); + get_bh(bh); + wbuf[bufs++] = bh; +- if (bufs == ARRAY_SIZE(wbuf)) { ++ if (bufs == journal->j_wbufsize) { + jbd_debug(2, "submit %d writes\n", + bufs); + spin_unlock(&journal->j_list_lock); +@@ -488,7 +500,7 @@ + /* If there's no more to do, or if the descriptor is full, + let the IO rip! */ + +- if (bufs == ARRAY_SIZE(wbuf) || ++ if (bufs == journal->j_wbufsize || + commit_transaction->t_buffers == NULL || + space_left < sizeof(journal_block_tag_t) + 16) { + +Index: linux-2.6.7/fs/jbd/journal.c +=================================================================== +--- linux-2.6.7.orig/fs/jbd/journal.c 2005-01-19 12:07:59.000000000 +0300 ++++ linux-2.6.7/fs/jbd/journal.c 2005-01-19 17:11:08.589880720 +0300 +@@ -687,6 +687,7 @@ + { + journal_t *journal = journal_init_common(); + struct buffer_head *bh; ++ int n; + + if (!journal) + return NULL; +@@ -702,6 +703,17 @@ + journal->j_sb_buffer = bh; + journal->j_superblock = (journal_superblock_t *)bh->b_data; + ++ /* journal descriptor can store upto n blocks -bzzz */ ++ n = journal->j_blocksize / sizeof(journal_block_tag_t); ++ journal->j_wbufsize = n; ++ journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); ++ if (!journal->j_wbuf) { ++ printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", ++ __FUNCTION__); ++ kfree(journal); ++ journal = NULL; ++ } ++ + return journal; + } + +@@ -717,7 +729,7 @@ + { + struct buffer_head *bh; + journal_t *journal = journal_init_common(); +- int err; ++ int err, n; + unsigned long blocknr; + + if (!journal) +@@ -734,6 +746,17 @@ + journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; + journal->j_blocksize = inode->i_sb->s_blocksize; + ++ /* journal descriptor can store upto n blocks -bzzz */ ++ n = journal->j_blocksize / sizeof(journal_block_tag_t); ++ journal->j_wbufsize = n; ++ journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); ++ if (!journal->j_wbuf) { ++ printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", ++ __FUNCTION__); ++ kfree(journal); ++ return NULL; ++ } ++ + err = journal_bmap(journal, 0, &blocknr); + /* If that failed, give up */ + if (err) { +@@ -1107,6 +1130,10 @@ + iput(journal->j_inode); + if (journal->j_revoke) + journal_destroy_revoke(journal); ++ if (journal->j_wbuf) { ++ kfree(journal->j_wbuf); ++ journal->j_wbuf = NULL; ++ } + kfree(journal); + } + +Index: linux-2.6.7/fs/jbd/transaction.c +=================================================================== +--- linux-2.6.7.orig/fs/jbd/transaction.c 2004-08-26 17:12:40.000000000 +0400 ++++ linux-2.6.7/fs/jbd/transaction.c 2005-01-19 17:23:30.058160408 +0300 +@@ -611,6 +611,10 @@ + handle->h_buffer_credits--; + if (credits) + (*credits)++; ++ ++ /* the block's becoming member of the trasaction -bzzz */ ++ jh->b_tcount = 0; ++ + goto done; + } + +@@ -694,6 +698,9 @@ + if (credits) + (*credits)++; + ++ /* the block's becoming member of the trasaction -bzzz */ ++ jh->b_tcount = 0; ++ + /* + * Finally, if the buffer is not journaled right now, we need to make + * sure it doesn't get written to disk before the caller actually +@@ -723,6 +730,11 @@ + memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); + kunmap_atomic(source, KM_USER0); + } ++ ++ /* track all references to the block to be able to recognize the ++ * situation when the buffer is not part of transaction -bzzz */ ++ jh->b_tcount++; ++ + jbd_unlock_bh_state(bh); + + /* +@@ -822,11 +834,20 @@ + jh->b_transaction = transaction; + JBUFFER_TRACE(jh, "file as BJ_Reserved"); + __journal_file_buffer(jh, transaction, BJ_Reserved); ++ jh->b_tcount = 0; + } else if (jh->b_transaction == journal->j_committing_transaction) { + JBUFFER_TRACE(jh, "set next transaction"); + jh->b_next_transaction = transaction; ++ jh->b_tcount = 0; + } + spin_unlock(&journal->j_list_lock); ++ ++ /* ++ * track all reference to the block to be able to recognize ++ * the situation when the buffer is not part of transaction -bzzz ++ */ ++ jh->b_tcount++; ++ + jbd_unlock_bh_state(bh); + + /* +@@ -1178,8 +1199,40 @@ + void + journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits) + { ++ journal_t *journal = handle->h_transaction->t_journal; ++ struct journal_head *jh = bh2jh(bh); ++ + BUFFER_TRACE(bh, "entry"); +- handle->h_buffer_credits += credits; ++ ++ /* return credit back to the handle if it was really spent */ ++ if (credits) ++ handle->h_buffer_credits++; ++ ++ jbd_lock_bh_state(bh); ++ J_ASSERT(jh->b_tcount > 0); ++ ++ jh->b_tcount--; ++ if (jh->b_tcount == 0) { ++ /* we can drop it from the transaction -bzzz */ ++ J_ASSERT(jh->b_transaction == handle->h_transaction || ++ jh->b_next_transaction == handle->h_transaction); ++ if (jh->b_transaction == handle->h_transaction) { ++ spin_lock(&journal->j_list_lock); ++ __journal_unfile_buffer(jh); ++ spin_unlock(&journal->j_list_lock); ++ } else if(jh->b_next_transaction) { ++ jh->b_next_transaction = NULL; ++ } ++ ++ /* ++ * this was last reference to the block from the current ++ * transaction and we'd like to return credit to the ++ * whole transaction -bzzz ++ */ ++ if (!credits) ++ handle->h_buffer_credits++; ++ } ++ jbd_unlock_bh_state(bh); + } + + /** +@@ -1204,6 +1257,7 @@ + transaction_t *transaction = handle->h_transaction; + journal_t *journal = transaction->t_journal; + struct journal_head *jh; ++ int drop_reserve = 0; + + BUFFER_TRACE(bh, "entry"); + +@@ -1227,6 +1281,7 @@ + J_ASSERT_JH(jh, !jh->b_committed_data); + + __journal_unfile_buffer(jh); ++ drop_reserve = 1; + + /* + * We are no longer going to journal this buffer. +@@ -1249,7 +1304,7 @@ + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + __bforget(bh); +- return; ++ goto drop; + } + } + } else if (jh->b_transaction) { +@@ -1264,6 +1319,7 @@ + if (jh->b_next_transaction) { + J_ASSERT(jh->b_next_transaction == transaction); + jh->b_next_transaction = NULL; ++ drop_reserve = 1; + } + } + +@@ -1271,6 +1327,15 @@ + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + __brelse(bh); ++ ++drop: ++ if (drop_reserve) { ++ /* no need to reserve log space for this block -bzzz */ ++ spin_lock(&transaction->t_handle_lock); ++ transaction->t_outstanding_credits--; ++ spin_unlock(&transaction->t_handle_lock); ++ } ++ + return; + } + diff --git a/lustre/kernel_patches/series/2.6-vanilla.series b/lustre/kernel_patches/series/2.6-vanilla.series index fb5f213..61b9bdb 100644 --- a/lustre/kernel_patches/series/2.6-vanilla.series +++ b/lustre/kernel_patches/series/2.6-vanilla.series @@ -17,3 +17,4 @@ dcache-mds-num-2.6.7.patch dynamic-locks-2.6.7.patch vfs-pdirops-2.6.7.patch dcache-fid-2.6.7.patch +jbd-static-wbuf-2.6.7 -- 1.8.3.1