Whamcloud - gitweb
b=5209
authoralex <alex>
Wed, 19 Jan 2005 16:40:55 +0000 (16:40 +0000)
committeralex <alex>
Wed, 19 Jan 2005 16:40:55 +0000 (16:40 +0000)
the patch contains:
  1) journal_release_buffer() doesn't cause journal overflow
  2) journal_commit_thread() uses persistent buffer instead of local wbuf
     (could cause journal overflow also)
  3) optimization in journal_forget() allows to avoid needless commits

lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 [new file with mode: 0644]
lustre/kernel_patches/series/2.6-vanilla.series

diff --git a/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 b/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7
new file mode 100644 (file)
index 0000000..52ed921
--- /dev/null
@@ -0,0 +1,281 @@
+Index: linux-2.6.7/include/linux/jbd.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/jbd.h       2004-08-26 17:12:16.000000000 +0400
++++ linux-2.6.7/include/linux/jbd.h    2005-01-19 17:08:33.144512008 +0300
+@@ -826,6 +826,12 @@
+       struct jbd_revoke_table_s *j_revoke_table[2];
+       /*
++       * array of bhs for journal_commit_transaction
++       */
++      struct buffer_head      **j_wbuf;
++      int                     j_wbufsize;
++
++      /*
+        * An opaque pointer to fs-private information.  ext3 puts its
+        * superblock pointer here
+        */
+Index: linux-2.6.7/include/linux/journal-head.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/journal-head.h      2003-06-24 18:05:26.000000000 +0400
++++ linux-2.6.7/include/linux/journal-head.h   2005-01-19 14:09:59.000000000 +0300
+@@ -80,6 +80,11 @@
+        * [j_list_lock]
+        */
+       struct journal_head *b_cpnext, *b_cpprev;
++
++      /*
++       * counter to track users of the buffer in current transaction
++       */
++      int b_tcount;
+ };
+ #endif                /* JOURNAL_HEAD_H_INCLUDED */
+Index: linux-2.6.7/fs/jbd/commit.c
+===================================================================
+--- linux-2.6.7.orig/fs/jbd/commit.c   2004-08-26 17:12:40.000000000 +0400
++++ linux-2.6.7/fs/jbd/commit.c        2005-01-19 17:28:32.965111552 +0300
+@@ -103,7 +103,7 @@
+ {
+       transaction_t *commit_transaction;
+       struct journal_head *jh, *new_jh, *descriptor;
+-      struct buffer_head *wbuf[64];
++      struct buffer_head **wbuf = journal->j_wbuf;
+       int bufs;
+       int flags;
+       int err;
+@@ -271,7 +283,7 @@
+                               BUFFER_TRACE(bh, "start journal writeout");
+                               get_bh(bh);
+                               wbuf[bufs++] = bh;
+-                              if (bufs == ARRAY_SIZE(wbuf)) {
++                              if (bufs == journal->j_wbufsize) {
+                                       jbd_debug(2, "submit %d writes\n",
+                                                       bufs);
+                                       spin_unlock(&journal->j_list_lock);
+@@ -488,7 +500,7 @@
+               /* If there's no more to do, or if the descriptor is full,
+                  let the IO rip! */
+-              if (bufs == ARRAY_SIZE(wbuf) ||
++              if (bufs == journal->j_wbufsize ||
+                   commit_transaction->t_buffers == NULL ||
+                   space_left < sizeof(journal_block_tag_t) + 16) {
+Index: linux-2.6.7/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.7.orig/fs/jbd/journal.c  2005-01-19 12:07:59.000000000 +0300
++++ linux-2.6.7/fs/jbd/journal.c       2005-01-19 17:11:08.589880720 +0300
+@@ -687,6 +687,7 @@
+ {
+       journal_t *journal = journal_init_common();
+       struct buffer_head *bh;
++      int n;
+       if (!journal)
+               return NULL;
+@@ -702,6 +703,17 @@
+       journal->j_sb_buffer = bh;
+       journal->j_superblock = (journal_superblock_t *)bh->b_data;
++      /* journal descriptor can store upto n blocks -bzzz */
++      n = journal->j_blocksize / sizeof(journal_block_tag_t);
++      journal->j_wbufsize = n;
++      journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
++      if (!journal->j_wbuf) {
++              printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
++                      __FUNCTION__);
++              kfree(journal);
++              journal = NULL;
++      }
++      
+       return journal;
+ }
+  
+@@ -717,7 +729,7 @@
+ {
+       struct buffer_head *bh;
+       journal_t *journal = journal_init_common();
+-      int err;
++      int err, n;
+       unsigned long blocknr;
+       if (!journal)
+@@ -734,6 +746,17 @@
+       journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+       journal->j_blocksize = inode->i_sb->s_blocksize;
++      /* journal descriptor can store upto n blocks -bzzz */
++      n = journal->j_blocksize / sizeof(journal_block_tag_t);
++      journal->j_wbufsize = n;
++      journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
++      if (!journal->j_wbuf) {
++              printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
++                      __FUNCTION__);
++              kfree(journal);
++              return NULL;
++      }
++
+       err = journal_bmap(journal, 0, &blocknr);
+       /* If that failed, give up */
+       if (err) {
+@@ -1107,6 +1130,10 @@
+               iput(journal->j_inode);
+       if (journal->j_revoke)
+               journal_destroy_revoke(journal);
++      if (journal->j_wbuf) {
++              kfree(journal->j_wbuf);
++              journal->j_wbuf = NULL;
++      }
+       kfree(journal);
+ }
+Index: linux-2.6.7/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.7.orig/fs/jbd/transaction.c      2004-08-26 17:12:40.000000000 +0400
++++ linux-2.6.7/fs/jbd/transaction.c   2005-01-19 17:23:30.058160408 +0300
+@@ -611,6 +611,10 @@
+               handle->h_buffer_credits--;
+               if (credits)
+                       (*credits)++;
++
++              /* the block's becoming member of the trasaction -bzzz */
++              jh->b_tcount = 0;
++
+               goto done;
+       }
+@@ -694,6 +698,9 @@
+       if (credits)
+               (*credits)++;
++      /* the block's becoming member of the trasaction -bzzz */
++      jh->b_tcount = 0;
++
+       /*
+        * Finally, if the buffer is not journaled right now, we need to make
+        * sure it doesn't get written to disk before the caller actually
+@@ -723,6 +730,11 @@
+               memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
+               kunmap_atomic(source, KM_USER0);
+       }
++
++      /* track all references to the block to be able to recognize the
++       * situation when the buffer is not part of transaction -bzzz */
++      jh->b_tcount++;
++
+       jbd_unlock_bh_state(bh);
+       /*
+@@ -822,11 +834,20 @@
+               jh->b_transaction = transaction;
+               JBUFFER_TRACE(jh, "file as BJ_Reserved");
+               __journal_file_buffer(jh, transaction, BJ_Reserved);
++              jh->b_tcount = 0;
+       } else if (jh->b_transaction == journal->j_committing_transaction) {
+               JBUFFER_TRACE(jh, "set next transaction");
+               jh->b_next_transaction = transaction;
++              jh->b_tcount = 0;
+       }
+       spin_unlock(&journal->j_list_lock);
++
++      /*
++       * track all reference to the block to be able to recognize
++       * the situation when the buffer is not part of transaction -bzzz
++       */
++      jh->b_tcount++;
++
+       jbd_unlock_bh_state(bh);
+       /*
+@@ -1178,8 +1199,40 @@
+ void
+ journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits)
+ {
++      journal_t *journal = handle->h_transaction->t_journal;
++      struct journal_head *jh = bh2jh(bh);
++
+       BUFFER_TRACE(bh, "entry");
+-      handle->h_buffer_credits += credits;
++
++      /* return credit back to the handle if it was really spent */
++      if (credits)
++              handle->h_buffer_credits++; 
++
++      jbd_lock_bh_state(bh);
++      J_ASSERT(jh->b_tcount > 0);
++
++      jh->b_tcount--;
++      if (jh->b_tcount == 0) {
++              /* we can drop it from the transaction -bzzz */
++              J_ASSERT(jh->b_transaction == handle->h_transaction ||
++                              jh->b_next_transaction == handle->h_transaction);
++              if (jh->b_transaction == handle->h_transaction) {
++                      spin_lock(&journal->j_list_lock);
++                      __journal_unfile_buffer(jh);
++                      spin_unlock(&journal->j_list_lock);
++              } else if(jh->b_next_transaction) {
++                      jh->b_next_transaction = NULL;
++              }
++
++              /* 
++               * this was last reference to the block from the current
++               * transaction and we'd like to return credit to the
++               * whole transaction -bzzz
++               */
++              if (!credits)
++                      handle->h_buffer_credits++; 
++      }
++      jbd_unlock_bh_state(bh);
+ }
+ /** 
+@@ -1204,6 +1257,7 @@
+       transaction_t *transaction = handle->h_transaction;
+       journal_t *journal = transaction->t_journal;
+       struct journal_head *jh;
++      int drop_reserve = 0;
+       BUFFER_TRACE(bh, "entry");
+@@ -1227,6 +1281,7 @@
+               J_ASSERT_JH(jh, !jh->b_committed_data);
+               __journal_unfile_buffer(jh);
++              drop_reserve = 1;
+               /* 
+                * We are no longer going to journal this buffer.
+@@ -1249,7 +1304,7 @@
+                               spin_unlock(&journal->j_list_lock);
+                               jbd_unlock_bh_state(bh);
+                               __bforget(bh);
+-                              return;
++                              goto drop;
+                       }
+               }
+       } else if (jh->b_transaction) {
+@@ -1264,6 +1319,7 @@
+               if (jh->b_next_transaction) {
+                       J_ASSERT(jh->b_next_transaction == transaction);
+                       jh->b_next_transaction = NULL;
++                      drop_reserve = 1;
+               }
+       }
+@@ -1271,6 +1327,15 @@
+       spin_unlock(&journal->j_list_lock);
+       jbd_unlock_bh_state(bh);
+       __brelse(bh);
++
++drop:
++      if (drop_reserve) {
++              /* no need to reserve log space for this block -bzzz */
++              spin_lock(&transaction->t_handle_lock);
++              transaction->t_outstanding_credits--;
++              spin_unlock(&transaction->t_handle_lock);
++      }
++
+       return;
+ }
index fb5f213..61b9bdb 100644 (file)
@@ -17,3 +17,4 @@ dcache-mds-num-2.6.7.patch
 dynamic-locks-2.6.7.patch
 vfs-pdirops-2.6.7.patch
 dcache-fid-2.6.7.patch
 dynamic-locks-2.6.7.patch
 vfs-pdirops-2.6.7.patch
 dcache-fid-2.6.7.patch
+jbd-static-wbuf-2.6.7