+++ /dev/null
-Index: linux-2.6.18-128.1.6/fs/jbd/commit.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/commit.c 2009-06-02 23:24:00.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/commit.c 2009-06-02 23:26:07.000000000 -0600
-@@ -22,6 +22,7 @@
- #include <linux/mm.h>
- #include <linux/pagemap.h>
- #include <linux/smp_lock.h>
-+#include <linux/crc32.h>
-
-
- /*
-@@ -95,19 +96,23 @@
- return 1;
- }
-
--/* Done it all: now write the commit record. We should have
-+/*
-+ * Done it all: now submit the commit record. We should have
- * cleaned up our previous buffers by now, so if we are in abort
- * mode we can now just skip the rest of the journal write
- * entirely.
- *
- * Returns 1 if the journal needs to be aborted or 0 on success
- */
--static int journal_write_commit_record(journal_t *journal,
-- transaction_t *commit_transaction)
-+static int journal_submit_commit_record(journal_t *journal,
-+ transaction_t *commit_transaction,
-+ struct buffer_head **cbh,
-+ __u32 crc32_sum)
- {
- struct journal_head *descriptor;
-+ struct commit_header *tmp;
- struct buffer_head *bh;
-- int i, ret;
-+ int ret;
- int barrier_done = 0;
-
- if (is_journal_aborted(journal))
-@@ -119,21 +124,34 @@
-
- bh = jh2bh(descriptor);
-
-- /* AKPM: buglet - add `i' to tmp! */
-- for (i = 0; i < bh->b_size; i += 512) {
-- journal_header_t *tmp = (journal_header_t*)bh->b_data;
-- tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
-- tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
-- tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
-+ tmp = (struct commit_header *)bh->b_data;
-+ tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
-+ tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
-+ tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
-+
-+ if (JFS_HAS_COMPAT_FEATURE(journal,
-+ JFS_FEATURE_COMPAT_CHECKSUM)) {
-+ tmp->h_chksum_type = JFS_CRC32_CHKSUM;
-+ tmp->h_chksum_size = JFS_CRC32_CHKSUM_SIZE;
-+ tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
- }
-
-- JBUFFER_TRACE(descriptor, "write commit block");
-+ JBUFFER_TRACE(descriptor, "submit commit block");
-+ lock_buffer(bh);
-+
- set_buffer_dirty(bh);
-- if (journal->j_flags & JFS_BARRIER) {
-+ set_buffer_uptodate(bh);
-+ bh->b_end_io = journal_end_buffer_io_sync;
-+
-+ if (journal->j_flags & JFS_BARRIER &&
-+ !JFS_HAS_INCOMPAT_FEATURE(journal,
-+ JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-+
- set_buffer_ordered(bh);
- barrier_done = 1;
- }
-- ret = sync_dirty_buffer(bh);
-+ ret = submit_bh(WRITE, bh);
-+
- /* is it possible for another commit to fail at roughly
- * the same time as this one? If so, we don't want to
- * trust the barrier flag in the super, but instead want
-@@ -154,12 +172,70 @@
- clear_buffer_ordered(bh);
- set_buffer_uptodate(bh);
- set_buffer_dirty(bh);
-- ret = sync_dirty_buffer(bh);
-+ ret = submit_bh(WRITE, bh);
- }
-- put_bh(bh); /* One for getblk() */
-- journal_put_journal_head(descriptor);
-+ *cbh = bh;
-+ return ret;
-+}
-+
-+/*
-+ * This function along with journal_submit_commit_record
-+ * allows to write the commit record asynchronously.
-+ */
-+static int journal_wait_on_commit_record(struct buffer_head *bh)
-+{
-+ int ret = 0;
-+
-+ clear_buffer_dirty(bh);
-+ wait_on_buffer(bh);
-+
-+ if (unlikely(!buffer_uptodate(bh)))
-+ ret = -EIO;
-+ put_bh(bh); /* One for getblk() */
-+ journal_put_journal_head(bh2jh(bh));
-+
-+ return ret;
-+}
-+
-+/*
-+ * Wait for all submitted IO to complete.
-+ */
-+static int journal_wait_on_locked_list(journal_t *journal,
-+ transaction_t *commit_transaction)
-+{
-+ int ret = 0;
-+ struct journal_head *jh;
-
-- return (ret == -EIO);
-+ while (commit_transaction->t_locked_list) {
-+ struct buffer_head *bh;
-+
-+ jh = commit_transaction->t_locked_list->b_tprev;
-+ bh = jh2bh(jh);
-+ get_bh(bh);
-+ if (buffer_locked(bh)) {
-+ spin_unlock(&journal->j_list_lock);
-+ wait_on_buffer(bh);
-+ if (unlikely(!buffer_uptodate(bh)))
-+ ret = -EIO;
-+ spin_lock(&journal->j_list_lock);
-+ }
-+ if (!inverted_lock(journal, bh)) {
-+ put_bh(bh);
-+ spin_lock(&journal->j_list_lock);
-+ continue;
-+ }
-+ if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
-+ __journal_unfile_buffer(jh);
-+ jbd_unlock_bh_state(bh);
-+ journal_remove_journal_head(bh);
-+ put_bh(bh);
-+ } else {
-+ jbd_unlock_bh_state(bh);
-+ }
-+ put_bh(bh);
-+ cond_resched_lock(&journal->j_list_lock);
-+ }
-+ return ret;
- }
-
- void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
-@@ -282,6 +358,20 @@
- return err;
- }
-
-+static inline __u32 jbd_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
-+{
-+ struct page *page = bh->b_page;
-+ char *addr;
-+ __u32 checksum;
-+
-+ addr = kmap_atomic(page, KM_USER0);
-+ checksum = crc32_be(crc32_sum,
-+ (void *)(addr + offset_in_page(bh->b_data)),
-+ bh->b_size);
-+ kunmap_atomic(addr, KM_USER0);
-+ return checksum;
-+}
-+
- /*
- * journal_commit_transaction
- *
-@@ -305,6 +395,8 @@
- int first_tag = 0;
- int tag_flag;
- int i;
-+ struct buffer_head *cbh = NULL; /* For transactional checksums */
-+ __u32 crc32_sum = ~0;
-
- /*
- * First job: lock down the current transaction and wait for
-@@ -431,39 +523,14 @@
- err = journal_submit_data_buffers(journal, commit_transaction);
-
- /*
-- * Wait for all previously submitted IO to complete.
-+ * Wait for all previously submitted IO to complete if commit
-+ * record is to be written synchronously.
- */
- spin_lock(&journal->j_list_lock);
-- while (commit_transaction->t_locked_list) {
-- struct buffer_head *bh;
--
-- jh = commit_transaction->t_locked_list->b_tprev;
-- bh = jh2bh(jh);
-- get_bh(bh);
-- if (buffer_locked(bh)) {
-- spin_unlock(&journal->j_list_lock);
-- wait_on_buffer(bh);
-- spin_lock(&journal->j_list_lock);
-- }
-- if (unlikely(!buffer_uptodate(bh)))
-- err = -EIO;
--
-- if (!inverted_lock(journal, bh)) {
-- put_bh(bh);
-- spin_lock(&journal->j_list_lock);
-- continue;
-- }
-- if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
-- __journal_unfile_buffer(jh);
-- jbd_unlock_bh_state(bh);
-- journal_remove_journal_head(bh);
-- put_bh(bh);
-- } else {
-- jbd_unlock_bh_state(bh);
-- }
-- release_data_buffer(bh);
-- cond_resched_lock(&journal->j_list_lock);
-- }
-+ if (!JFS_HAS_INCOMPAT_FEATURE(journal,
-+ JFS_FEATURE_INCOMPAT_ASYNC_COMMIT))
-+ err = journal_wait_on_locked_list(journal,
-+ commit_transaction);
- spin_unlock(&journal->j_list_lock);
-
- if (err)
-@@ -642,6 +709,16 @@
- start_journal_io:
- for (i = 0; i < bufs; i++) {
- struct buffer_head *bh = wbuf[i];
-+ /*
-+ * Compute checksum.
-+ */
-+ if (JFS_HAS_COMPAT_FEATURE(journal,
-+ JFS_FEATURE_COMPAT_CHECKSUM)) {
-+ crc32_sum =
-+ jbd_checksum_data(crc32_sum,
-+ bh);
-+ }
-+
- lock_buffer(bh);
- clear_buffer_dirty(bh);
- set_buffer_uptodate(bh);
-@@ -658,6 +735,23 @@
- }
- }
-
-+ /* Done it all: now write the commit record asynchronously. */
-+
-+ if (JFS_HAS_INCOMPAT_FEATURE(journal,
-+ JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-+ err = journal_submit_commit_record(journal, commit_transaction,
-+ &cbh, crc32_sum);
-+ if (err)
-+ __journal_abort_hard(journal);
-+
-+ spin_lock(&journal->j_list_lock);
-+ err = journal_wait_on_locked_list(journal,
-+ commit_transaction);
-+ spin_unlock(&journal->j_list_lock);
-+ if (err)
-+ __journal_abort_hard(journal);
-+ }
-+
- /* Lo and behold: we have just managed to send a transaction to
- the log. Before we can commit it, wait for the IO so far to
- complete. Control buffers being written are on the
-@@ -759,9 +853,15 @@
- journal_abort(journal, err);
-
- jbd_debug(3, "JBD: commit phase 6\n");
--
-- if (journal_write_commit_record(journal, commit_transaction))
-- err = -EIO;
-+
-+ if (!JFS_HAS_INCOMPAT_FEATURE(journal,
-+ JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
-+ err = journal_submit_commit_record(journal, commit_transaction,
-+ &cbh, crc32_sum);
-+ if (err)
-+ __journal_abort_hard(journal);
-+ }
-+ err = journal_wait_on_commit_record(cbh);
-
- if (err)
- journal_abort(journal, err);
-Index: linux-2.6.18-128.1.6/fs/jbd/recovery.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/recovery.c 2009-04-14 21:05:39.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/recovery.c 2009-06-02 23:26:07.000000000 -0600
-@@ -21,6 +21,7 @@
- #include <linux/jbd.h>
- #include <linux/errno.h>
- #include <linux/slab.h>
-+#include <linux/crc32.h>
- #endif
-
- /*
-@@ -310,6 +311,38 @@
- return err;
- }
-
-+/*
-+ * calc_chksums calculates the checksums for the blocks described in the
-+ * descriptor block.
-+ */
-+static int calc_chksums(journal_t *journal, struct buffer_head *bh,
-+ unsigned long *next_log_block, __u32 *crc32_sum)
-+{
-+ int i, num_blks, err;
-+ unsigned long io_block;
-+ struct buffer_head *obh;
-+
-+ num_blks = count_tags(bh, journal->j_blocksize);
-+ /* Calculate checksum of the descriptor block. */
-+ *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
-+
-+ for (i = 0; i < num_blks; i++) {
-+ io_block = (*next_log_block)++;
-+ wrap(journal, *next_log_block);
-+ err = jread(&obh, journal, io_block);
-+ if (err) {
-+ printk(KERN_ERR "JBD: IO error %d recovering block "
-+ "%lu in log\n", err, io_block);
-+ return 1;
-+ } else {
-+ *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
-+ obh->b_size);
-+ }
-+ put_bh(obh);
-+ }
-+ return 0;
-+}
-+
- static int do_one_pass(journal_t *journal,
- struct recovery_info *info, enum passtype pass)
- {
-@@ -321,6 +354,7 @@
- struct buffer_head * bh;
- unsigned int sequence;
- int blocktype;
-+ __u32 crc32_sum = ~0; /* Transactional Checksums */
-
- /* Precompute the maximum metadata descriptors in a descriptor block */
- int MAX_BLOCKS_PER_DESC;
-@@ -412,9 +446,24 @@
- switch(blocktype) {
- case JFS_DESCRIPTOR_BLOCK:
- /* If it is a valid descriptor block, replay it
-- * in pass REPLAY; otherwise, just skip over the
-- * blocks it describes. */
-+ * in pass REPLAY; if journal_checksums enabled, then
-+ * calculate checksums in PASS_SCAN, otherwise,
-+ * just skip over the blocks it describes. */
- if (pass != PASS_REPLAY) {
-+ if (pass == PASS_SCAN &&
-+ JFS_HAS_COMPAT_FEATURE(journal,
-+ JFS_FEATURE_COMPAT_CHECKSUM) &&
-+ !info->end_transaction) {
-+ if (calc_chksums(journal, bh,
-+ &next_log_block,
-+ &crc32_sum)) {
-+ put_bh(bh);
-+ break;
-+ }
-+ put_bh(bh);
-+ continue;
-+ }
-+
- next_log_block +=
- count_tags(bh, journal->j_blocksize);
- wrap(journal, next_log_block);
-@@ -509,9 +558,97 @@
- continue;
-
- case JFS_COMMIT_BLOCK:
-- /* Found an expected commit block: not much to
-- * do other than move on to the next sequence
-+ /* How to differentiate between interrupted commit
-+ * and journal corruption ?
-+ *
-+ * {nth transaction}
-+ * Checksum Verification Failed
-+ * |
-+ * ____________________
-+ * | |
-+ * async_commit sync_commit
-+ * | |
-+ * | GO TO NEXT "Journal Corruption"
-+ * | TRANSACTION
-+ * |
-+ * {(n+1)th transanction}
-+ * |
-+ * _______|______________
-+ * | |
-+ * Commit block found Commit block not found
-+ * | |
-+ * "Journal Corruption" |
-+ * _____________|__________
-+ * | |
-+ * nth trans corrupt OR nth trans
-+ * and (n+1)th interrupted interrupted
-+ * before commit block
-+ * could reach the disk.
-+ * (Cannot find the difference in above
-+ * mentioned conditions. Hence assume
-+ * "Interrupted Commit".)
-+ */
-+
-+ /* Found an expected commit block: if checksums
-+ * are present verify them in PASS_SCAN; else not
-+ * much to do other than move on to the next sequence
- * number. */
-+ if (pass == PASS_SCAN &&
-+ JFS_HAS_COMPAT_FEATURE(journal,
-+ JFS_FEATURE_COMPAT_CHECKSUM)) {
-+ int chksum_err, chksum_seen;
-+ struct commit_header *cbh =
-+ (struct commit_header *)bh->b_data;
-+ unsigned found_chksum =
-+ be32_to_cpu(cbh->h_chksum[0]);
-+
-+ chksum_err = chksum_seen = 0;
-+
-+ if (info->end_transaction) {
-+ printk(KERN_ERR "JBD: Transaction %u "
-+ "found to be corrupt.\n",
-+ next_commit_ID - 1);
-+ brelse(bh);
-+ break;
-+ }
-+
-+ if (crc32_sum == found_chksum &&
-+ cbh->h_chksum_type == JFS_CRC32_CHKSUM &&
-+ cbh->h_chksum_size ==
-+ JFS_CRC32_CHKSUM_SIZE) {
-+ chksum_seen = 1;
-+ } else if (!(cbh->h_chksum_type == 0 &&
-+ cbh->h_chksum_size == 0 &&
-+ found_chksum == 0 &&
-+ !chksum_seen)) {
-+ /*
-+ * If fs is mounted using an old kernel and then
-+ * kernel with journal_chksum is used then we
-+ * get a situation where the journal flag has
-+ * checksum flag set but checksums are not
-+ * present i.e chksum = 0, in the individual
-+ * commit blocks.
-+ * Hence to avoid checksum failures, in this
-+ * situation, this extra check is added.
-+ */
-+ chksum_err = 1;
-+ }
-+
-+ if (chksum_err) {
-+ info->end_transaction = next_commit_ID;
-+
-+ if (!JFS_HAS_INCOMPAT_FEATURE(journal,
-+ JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)){
-+ printk(KERN_ERR
-+ "JBD: Transaction %u "
-+ "found to be corrupt.\n",
-+ next_commit_ID);
-+ brelse(bh);
-+ break;
-+ }
-+ }
-+ crc32_sum = ~0;
-+ }
- brelse(bh);
- next_commit_ID++;
- continue;
-@@ -547,9 +684,10 @@
- * transaction marks the end of the valid log.
- */
-
-- if (pass == PASS_SCAN)
-- info->end_transaction = next_commit_ID;
-- else {
-+ if (pass == PASS_SCAN) {
-+ if (!info->end_transaction)
-+ info->end_transaction = next_commit_ID;
-+ } else {
- /* It's really bad news if different passes end up at
- * different places (but possible due to IO errors). */
- if (info->end_transaction != next_commit_ID) {
-Index: linux-2.6.18-128.1.6/fs/jbd/journal.c
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/jbd/journal.c 2009-06-02 23:24:00.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/jbd/journal.c 2009-06-02 23:26:07.000000000 -0600
-@@ -67,6 +67,7 @@
- EXPORT_SYMBOL(journal_check_used_features);
- EXPORT_SYMBOL(journal_check_available_features);
- EXPORT_SYMBOL(journal_set_features);
-+EXPORT_SYMBOL(journal_clear_features);
- EXPORT_SYMBOL(journal_create);
- EXPORT_SYMBOL(journal_load);
- EXPORT_SYMBOL(journal_destroy);
-@@ -1583,6 +1584,33 @@
- return 1;
- }
-
-+/**
-+ * int journal_clear_features () - Clear a given journal feature in the superblock
-+ * @journal: Journal to act on.
-+ * @compat: bitmask of compatible features
-+ * @ro: bitmask of features that force read-only mount
-+ * @incompat: bitmask of incompatible features
-+ *
-+ * Clear a given journal feature as present on the
-+ * superblock. Returns true if the requested features could be reset.
-+ *
-+ */
-+int journal_clear_features (journal_t *journal, unsigned long compat,
-+ unsigned long ro, unsigned long incompat)
-+{
-+ journal_superblock_t *sb;
-+
-+ jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
-+ compat, ro, incompat);
-+
-+ sb = journal->j_superblock;
-+
-+ sb->s_feature_compat &= ~cpu_to_be32(compat);
-+ sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
-+ sb->s_feature_incompat &= ~cpu_to_be32(incompat);
-+
-+ return 1;
-+}
-
- /**
- * int journal_update_format () - Update on-disk journal structure.
-Index: linux-2.6.18-128.1.6/fs/Kconfig
-===================================================================
---- linux-2.6.18-128.1.6.orig/fs/Kconfig 2009-04-14 21:05:39.000000000 -0600
-+++ linux-2.6.18-128.1.6/fs/Kconfig 2009-06-02 23:26:07.000000000 -0600
-@@ -206,6 +206,7 @@
-
- config JBD
- tristate
-+ select CRC32
- help
- This is a generic journaling layer for block devices. It is
- currently used by the ext3 and OCFS2 file systems, but it could
-Index: linux-2.6.18-128.1.6/include/linux/jbd.h
-===================================================================
---- linux-2.6.18-128.1.6.orig/include/linux/jbd.h 2009-06-02 23:24:00.000000000 -0600
-+++ linux-2.6.18-128.1.6/include/linux/jbd.h 2009-06-02 23:26:07.000000000 -0600
-@@ -148,6 +148,29 @@
- __be32 h_sequence;
- } journal_header_t;
-
-+/*
-+ * Checksum types.
-+ */
-+#define JFS_CRC32_CHKSUM 1
-+#define JFS_MD5_CHKSUM 2
-+#define JFS_SHA1_CHKSUM 3
-+
-+#define JFS_CRC32_CHKSUM_SIZE 4
-+
-+#define JFS_CHECKSUM_BYTES (32 / sizeof(u32))
-+/*
-+ * Commit block header for storing transactional checksums:
-+ */
-+struct commit_header
-+{
-+ __be32 h_magic;
-+ __be32 h_blocktype;
-+ __be32 h_sequence;
-+ unsigned char h_chksum_type;
-+ unsigned char h_chksum_size;
-+ unsigned char h_padding[2];
-+ __be32 h_chksum[JFS_CHECKSUM_BYTES];
-+};
-
- /*
- * The block tag: used to describe a single buffer in the journal
-@@ -234,12 +257,16 @@
- ((j)->j_format_version >= 2 && \
- ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
-
--#define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001
-+#define JFS_FEATURE_COMPAT_CHECKSUM 0x00000001
-+
-+#define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001
-+#define JFS_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
-
- /* Features known to this kernel version: */
--#define JFS_KNOWN_COMPAT_FEATURES 0
-+#define JFS_KNOWN_COMPAT_FEATURES JFS_FEATURE_COMPAT_CHECKSUM
- #define JFS_KNOWN_ROCOMPAT_FEATURES 0
--#define JFS_KNOWN_INCOMPAT_FEATURES JFS_FEATURE_INCOMPAT_REVOKE
-+#define JFS_KNOWN_INCOMPAT_FEATURES (JFS_FEATURE_INCOMPAT_REVOKE | \
-+ JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)
-
- #ifdef __KERNEL__
-
-@@ -1053,6 +1080,8 @@
- (journal_t *, unsigned long, unsigned long, unsigned long);
- extern int journal_set_features
- (journal_t *, unsigned long, unsigned long, unsigned long);
-+extern int journal_clear_features
-+ (journal_t *, unsigned long, unsigned long, unsigned long);
- extern int journal_create (journal_t *);
- extern int journal_load (journal_t *journal);
- #ifndef __GENKSYMS__
-Index: linux-2.6.18-128.1.6/Documentation/filesystems/ext3.txt
-===================================================================
---- linux-2.6.18-128.1.6.orig/Documentation/filesystems/ext3.txt 2006-09-19 21:42:06.000000000 -0600
-+++ linux-2.6.18-128.1.6/Documentation/filesystems/ext3.txt 2009-06-02 23:26:07.000000000 -0600
-@@ -14,6 +14,16 @@
- When mounting an ext3 filesystem, the following option are accepted:
- (*) == default
-
-+journal_checksum Enable checksumming of the journal transactions.
-+ This will allow the recovery code in e2fsck and the
-+ kernel to detect corruption in the kernel. It is a
-+ compatible change and will be ignored by older kernels.
-+
-+journal_async_commit Commit block can be written to disk without waiting
-+ for descriptor blocks. If enabled older kernels cannot
-+ mount the device. This will enable 'journal_checksum'
-+ internally.
-+
- journal=update Update the ext3 file system's journal to the current
- format.
-