X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=e2fsck%2Frecovery.c;h=8e4057560f45a41432de8927a09eb05b856a54a4;hb=3fc1db6218f885b5f0a50084f4634f7b793d638f;hp=8ad13b8de03d0004692339bd1b210d2b0305ae20;hpb=53ef44c40a3e425d2c700d8fd77a6b655aa121fe;p=tools%2Fe2fsprogs.git diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c index 8ad13b8..8e40575 100644 --- a/e2fsck/recovery.c +++ b/e2fsck/recovery.c @@ -1,6 +1,6 @@ /* - * linux/fs/recovery.c - * + * linux/fs/jbd/recovery.c + * * Written by Stephen C. Tweedie , 1999 * * Copyright 1999-2000 Red Hat Software --- All Rights Reserved @@ -10,43 +10,43 @@ * option, any later version, incorporated herein by reference. * * Journal recovery routines for the generic filesystem journaling code; - * part of the ext2fs journaling system. + * part of the ext2fs journaling system. */ #ifndef __KERNEL__ #include "jfs_user.h" #else -#include +#include #include -#include +#include #include -#include -#include -#include +#include #endif /* * Maintain information about the progress of the recovery job, so that - * the different passes can carry information between them. + * the different passes can carry information between them. */ -struct recovery_info +struct recovery_info { - tid_t start_transaction; + tid_t start_transaction; tid_t end_transaction; - + int nr_replays; int nr_revokes; int nr_revoke_hits; }; enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; -static int do_one_pass(journal_t *, struct recovery_info *, enum passtype); -static int scan_revoke_records(journal_t *, struct buffer_head *, tid_t, struct recovery_info *); +static int do_one_pass(journal_t *journal, + struct recovery_info *info, enum passtype pass); +static int scan_revoke_records(journal_t *, struct buffer_head *, + tid_t, struct recovery_info *); #ifdef __KERNEL__ /* Release readahead buffers after use */ -static void brelse_array(struct buffer_head *b[], int n) +static void journal_brelse_array(struct buffer_head *b[], int n) { while (--n >= 0) brelse (b[n]); @@ -65,15 +65,16 @@ static void brelse_array(struct buffer_head *b[], int n) * fixed value. */ +#define MAXBUF 8 static int do_readahead(journal_t *journal, unsigned int start) { int err; - unsigned int max, nbufs, next, blocknr; + unsigned int max, nbufs, next; + unsigned long blocknr; struct buffer_head *bh; - -#define MAXBUF 8 + struct buffer_head * bufs[MAXBUF]; - + /* Do up to 128K of readahead */ max = start + (128 * 1024 / journal->j_blocksize); if (max > journal->j_maxlen) @@ -81,21 +82,19 @@ static int do_readahead(journal_t *journal, unsigned int start) /* Do the readahead itself. We'll submit MAXBUF buffer_heads at * a time to the block device IO layer. */ - + nbufs = 0; - + for (next = start; next < max; next++) { - blocknr = next; - if (journal->j_inode) - blocknr = bmap(journal->j_inode, next); - if (!blocknr) { - printk (KERN_ERR "JFS: bad block at offset %u\n", + err = journal_bmap(journal, next, &blocknr); + + if (err) { + printk (KERN_ERR "JBD: bad block at offset %u\n", next); - err = -EIO; goto failed; } - - bh = getblk(journal->j_dev, blocknr, journal->j_blocksize); + + bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); if (!bh) { err = -ENOMEM; goto failed; @@ -105,7 +104,7 @@ static int do_readahead(journal_t *journal, unsigned int start) bufs[nbufs++] = bh; if (nbufs == MAXBUF) { ll_rw_block(READ, nbufs, bufs); - brelse_array(bufs, nbufs); + journal_brelse_array(bufs, nbufs); nbufs = 0; } } else @@ -115,10 +114,10 @@ static int do_readahead(journal_t *journal, unsigned int start) if (nbufs) ll_rw_block(READ, nbufs, bufs); err = 0; - -failed: - if (nbufs) - brelse_array(bufs, nbufs); + +failed: + if (nbufs) + journal_brelse_array(bufs, nbufs); return err; } @@ -129,30 +128,32 @@ failed: * Read a block from the journal */ -static int jread(struct buffer_head **bhp, journal_t *journal, +static int jread(struct buffer_head **bhp, journal_t *journal, unsigned int offset) { - unsigned int blocknr; + int err; + unsigned long blocknr; struct buffer_head *bh; *bhp = NULL; - J_ASSERT (offset < journal->j_maxlen); - - blocknr = offset; - if (journal->j_inode) - blocknr = bmap(journal->j_inode, offset); - - if (!blocknr) { - printk (KERN_ERR "JFS: bad block at offset %u\n", - offset); + if (offset >= journal->j_maxlen) { + printk(KERN_ERR "JBD: corrupted journal superblock\n"); return -EIO; } - - bh = getblk(journal->j_dev, blocknr, journal->j_blocksize); + + err = journal_bmap(journal, offset, &blocknr); + + if (err) { + printk (KERN_ERR "JBD: bad block at offset %u\n", + offset); + return err; + } + + bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); if (!bh) return -ENOMEM; - + if (!buffer_uptodate(bh)) { /* If this is a brand new buffer, start readahead. Otherwise, we assume we are already reading it. */ @@ -160,14 +161,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal, do_readahead(journal, offset); wait_on_buffer(bh); } - + if (!buffer_uptodate(bh)) { - printk (KERN_ERR "JFS: Failed to read block at offset %u\n", + printk (KERN_ERR "JBD: Failed to read block at offset %u\n", offset); brelse(bh); return -EIO; } - + *bhp = bh; return 0; } @@ -177,26 +178,27 @@ static int jread(struct buffer_head **bhp, journal_t *journal, * Count the number of in-use tags in a journal descriptor block. */ -static int count_tags(struct buffer_head *bh, int size) +static int count_tags(journal_t *journal, struct buffer_head *bh) { char * tagp; journal_block_tag_t * tag; - int nr = 0; - + int nr = 0, size = journal->j_blocksize; + int tag_bytes = journal_tag_bytes(journal); + tagp = &bh->b_data[sizeof(journal_header_t)]; - - while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) { + + while ((tagp - bh->b_data + tag_bytes) <= size) { tag = (journal_block_tag_t *) tagp; - + nr++; - tagp += sizeof(journal_block_tag_t); - if (!(tag->t_flags & htonl(JFS_FLAG_SAME_UUID))) + tagp += tag_bytes; + if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID))) tagp += 16; - if (tag->t_flags & htonl(JFS_FLAG_LAST_TAG)) + if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG)) break; } - + return nr; } @@ -208,18 +210,18 @@ do { \ var -= ((journal)->j_last - (journal)->j_first); \ } while (0) -/* - * journal_recover +/** + * journal_recover - recovers a on-disk journal + * @journal: the journal to recover * * The primary function for recovering the log contents when mounting a - * journaled device. - * + * journaled device. + * * Recovery is done in three passes. In the first pass, we look for the * end of the log. In the second, we assemble the list of revoke * blocks. In the third and final pass, we replay any un-revoked blocks - * in the log. + * in the log. */ - int journal_recover(journal_t *journal) { int err; @@ -228,22 +230,20 @@ int journal_recover(journal_t *journal) struct recovery_info info; memset(&info, 0, sizeof(info)); - sb = journal->j_superblock; - /* + /* * The journal superblock's s_start field (the current log head) * is always zero if, and only if, the journal was cleanly - * unmounted. + * unmounted. */ if (!sb->s_start) { - jfs_debug(1, "No recovery required, last transaction %d\n", - ntohl(sb->s_sequence)); - journal->j_transaction_sequence = ntohl(sb->s_sequence) + 1; + jbd_debug(1, "No recovery required, last transaction %d\n", + be32_to_cpu(sb->s_sequence)); + journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; return 0; } - err = do_one_pass(journal, &info, PASS_SCAN); if (!err) @@ -251,60 +251,145 @@ int journal_recover(journal_t *journal) if (!err) err = do_one_pass(journal, &info, PASS_REPLAY); - jfs_debug(0, "JFS: recovery, exit status %d, " + jbd_debug(1, "JBD: recovery, exit status %d, " "recovered transactions %u to %u\n", err, info.start_transaction, info.end_transaction); - jfs_debug(0, "JFS: Replayed %d and revoked %d/%d blocks\n", + jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", info.nr_replays, info.nr_revoke_hits, info.nr_revokes); /* Restart the log at the next transaction ID, thus invalidating * any existing commit records in the log. */ journal->j_transaction_sequence = ++info.end_transaction; - + journal_clear_revoke(journal); - fsync_dev(journal->j_dev); + sync_blockdev(journal->j_fs_dev); + return err; +} + +/** + * journal_skip_recovery - Start journal and wipe exiting records + * @journal: journal to startup + * + * Locate any valid recovery information from the journal and set up the + * journal structures in memory to ignore it (presumably because the + * caller has evidence that it is out of date). + * This function does'nt appear to be exorted.. + * + * We perform one pass over the journal to allow us to tell the user how + * much recovery information is being erased, and to let us initialise + * the journal transaction sequence numbers to the next unused ID. + */ +int journal_skip_recovery(journal_t *journal) +{ + int err; + journal_superblock_t * sb; + + struct recovery_info info; + + memset (&info, 0, sizeof(info)); + sb = journal->j_superblock; + + err = do_one_pass(journal, &info, PASS_SCAN); + + if (err) { + printk(KERN_ERR "JBD: error %d scanning journal\n", err); + ++journal->j_transaction_sequence; + } else { +#ifdef CONFIG_JBD_DEBUG + int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); +#endif + jbd_debug(1, + "JBD: ignoring %d transaction%s from the journal.\n", + dropped, (dropped == 1) ? "" : "s"); + journal->j_transaction_sequence = ++info.end_transaction; + } + + journal->j_tail = 0; return err; } -static int do_one_pass(journal_t *journal, struct recovery_info *info, - enum passtype pass) +#if 0 +static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) +{ + unsigned long long block = be32_to_cpu(tag->t_blocknr); + if (tag_bytes > JBD_TAG_SIZE32) + block |= (__u64)be32_to_cpu(tag->t_blocknr_high) << 32; + return block; +} +#endif + +/* + * calc_chksums calculates the checksums for the blocks described in the + * descriptor block. + */ +static int calc_chksums(journal_t *journal, struct buffer_head *bh, + unsigned long *next_log_block, __u32 *crc32_sum) +{ + int i, num_blks, err; + unsigned long io_block; + struct buffer_head *obh; + + num_blks = count_tags(journal, bh); + /* Calculate checksum of the descriptor block. */ + *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); + + for (i = 0; i < num_blks; i++) { + io_block = (*next_log_block)++; + wrap(journal, *next_log_block); + err = jread(&obh, journal, io_block); + if (err) { + printk(KERN_ERR "JBD: IO error %d recovering block " + "%lu in log\n", err, io_block); + return 1; + } else { + *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, + obh->b_size); + } + brelse(obh); + } + return 0; +} + +static int do_one_pass(journal_t *journal, + struct recovery_info *info, enum passtype pass) { - unsigned int first_commit_ID, next_commit_ID; unsigned long next_log_block; int err, success = 0; journal_superblock_t * sb; - journal_header_t * tmp; + journal_header_t * tmp; struct buffer_head * bh; unsigned int sequence; int blocktype; - + int tag_bytes = journal_tag_bytes(journal); + __u32 crc32_sum = ~0; /* Transactional Checksums */ + /* Precompute the maximum metadata descriptors in a descriptor block */ int MAX_BLOCKS_PER_DESC; MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) - / sizeof(journal_block_tag_t)); + / tag_bytes); - /* + /* * First thing is to establish what we expect to find in the log * (in terms of transaction IDs), and where (in terms of log - * block offsets): query the superblock. + * block offsets): query the superblock. */ sb = journal->j_superblock; - next_commit_ID = ntohl(sb->s_sequence); - next_log_block = ntohl(sb->s_start); + next_commit_ID = be32_to_cpu(sb->s_sequence); + next_log_block = be32_to_cpu(sb->s_start); first_commit_ID = next_commit_ID; if (pass == PASS_SCAN) info->start_transaction = first_commit_ID; - - jfs_debug(1, "Starting recovery pass %d\n", pass); - + + jbd_debug(1, "Starting recovery pass %d\n", pass); + /* * Now we walk through the log, transaction by transaction, * making sure that each transaction has a commit block in the * expected place. Each complete transaction gets replayed back - * into the main filesystem. + * into the main filesystem. */ while (1) { @@ -313,64 +398,80 @@ static int do_one_pass(journal_t *journal, struct recovery_info *info, journal_block_tag_t * tag; struct buffer_head * obh; struct buffer_head * nbh; - + + cond_resched(); + /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of * the log. */ - + if (pass != PASS_SCAN) if (tid_geq(next_commit_ID, info->end_transaction)) break; - - jfs_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", + + jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", next_commit_ID, next_log_block, journal->j_last); /* Skip over each chunk of the transaction looking * either the next descriptor block or the final commit * record. */ - - jfs_debug(3, "JFS: checking block %ld\n", next_log_block); + + jbd_debug(3, "JBD: checking block %ld\n", next_log_block); err = jread(&bh, journal, next_log_block); if (err) goto failed; - + next_log_block++; wrap(journal, next_log_block); - - /* What kind of buffer is it? - * + + /* What kind of buffer is it? + * * If it is a descriptor block, check that it has the * expected sequence number. Otherwise, we're all done * here. */ - tmp = (journal_header_t *) bh->b_data; - - if (tmp->h_magic != htonl(JFS_MAGIC_NUMBER)) { + tmp = (journal_header_t *)bh->b_data; + + if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) { brelse(bh); break; } - - blocktype = ntohl(tmp->h_blocktype); - sequence = ntohl(tmp->h_sequence); - jfs_debug(3, "Found magic %d, sequence %d\n", + + blocktype = be32_to_cpu(tmp->h_blocktype); + sequence = be32_to_cpu(tmp->h_sequence); + jbd_debug(3, "Found magic %d, sequence %d\n", blocktype, sequence); - + if (sequence != next_commit_ID) { brelse(bh); break; } - + /* OK, we have a valid descriptor block which matches * all of the sequence number checks. What are we going * to do with it? That depends on the pass... */ - + switch(blocktype) { case JFS_DESCRIPTOR_BLOCK: /* If it is a valid descriptor block, replay it - * in pass REPLAY; otherwise, just skip over the - * blocks it describes. */ + * in pass REPLAY; if journal_checksums enabled, then + * calculate checksums in PASS_SCAN, otherwise, + * just skip over the blocks it describes. */ if (pass != PASS_REPLAY) { - next_log_block += count_tags(bh, journal->j_blocksize); + if (pass == PASS_SCAN && + JFS_HAS_COMPAT_FEATURE(journal, + JFS_FEATURE_COMPAT_CHECKSUM) && + !info->end_transaction) { + if (calc_chksums(journal, bh, + &next_log_block, + &crc32_sum)) { + brelse(bh); + break; + } + brelse(bh); + continue; + } + next_log_block += count_tags(journal, bh); wrap(journal, next_log_block); brelse(bh); continue; @@ -381,13 +482,13 @@ static int do_one_pass(journal_t *journal, struct recovery_info *info, * getting done here! */ tagp = &bh->b_data[sizeof(journal_header_t)]; - while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) + while ((tagp - bh->b_data + tag_bytes) <= journal->j_blocksize) { unsigned long io_block; - + tag = (journal_block_tag_t *) tagp; - flags = ntohl(tag->t_flags); - + flags = be32_to_cpu(tag->t_flags); + io_block = next_log_block++; wrap(journal, next_log_block); err = jread(&obh, journal, io_block); @@ -395,34 +496,35 @@ static int do_one_pass(journal_t *journal, struct recovery_info *info, /* Recover what we can, but * report failure at the end. */ success = err; - printk (KERN_ERR - "JFS: IO error %d recovering " + printk (KERN_ERR + "JBD: IO error %d recovering " "block %ld in log\n", err, io_block); } else { unsigned long blocknr; - + J_ASSERT(obh != NULL); - blocknr = ntohl(tag->t_blocknr); + blocknr = be32_to_cpu(tag->t_blocknr); /* If the block has been * revoked, then we're all done * here. */ if (journal_test_revoke - (journal, blocknr, + (journal, blocknr, next_commit_ID)) { brelse(obh); ++info->nr_revoke_hits; goto skip_write; } - + /* Find a buffer for the new * data being restored */ - nbh = getblk(journal->j_dev, blocknr, - journal->j_blocksize); + nbh = __getblk(journal->j_fs_dev, + blocknr, + journal->j_blocksize); if (nbh == NULL) { - printk(KERN_ERR - "JFS: Out of memory " + printk(KERN_ERR + "JBD: Out of memory " "during recovery.\n"); err = -ENOMEM; brelse(bh); @@ -430,35 +532,130 @@ static int do_one_pass(journal_t *journal, struct recovery_info *info, goto failed; } - memcpy(nbh->b_data, obh->b_data, - journal->j_blocksize); + lock_buffer(nbh); + memcpy(nbh->b_data, obh->b_data, + journal->j_blocksize); if (flags & JFS_FLAG_ESCAPE) { - * ((unsigned int *) bh->b_data) = htonl(JFS_MAGIC_NUMBER); + *((__be32 *)nbh->b_data) = + cpu_to_be32(JFS_MAGIC_NUMBER); } - - mark_buffer_dirty(nbh, 1); + + BUFFER_TRACE(nbh, "marking dirty"); + set_buffer_uptodate(nbh); + mark_buffer_dirty(nbh); + BUFFER_TRACE(nbh, "marking uptodate"); ++info->nr_replays; /* ll_rw_block(WRITE, 1, &nbh); */ + unlock_buffer(nbh); brelse(obh); brelse(nbh); } - + skip_write: - tagp += sizeof(journal_block_tag_t); + tagp += tag_bytes; if (!(flags & JFS_FLAG_SAME_UUID)) tagp += 16; if (flags & JFS_FLAG_LAST_TAG) break; } - + brelse(bh); continue; - + case JFS_COMMIT_BLOCK: - /* Found an expected commit block: not much to - * do other than move on to the next sequence + jbd_debug(3, "Commit block for #%u found\n", + next_commit_ID); + /* How to differentiate between interrupted commit + * and journal corruption ? + * + * {nth transaction} + * Checksum Verification Failed + * | + * ____________________ + * | | + * async_commit sync_commit + * | | + * | GO TO NEXT "Journal Corruption" + * | TRANSACTION + * | + * {(n+1)th transanction} + * | + * _______|______________ + * | | + * Commit block found Commit block not found + * | | + * "Journal Corruption" | + * _____________|_________ + * | | + * nth trans corrupt OR nth trans + * and (n+1)th interrupted interrupted + * before commit block + * could reach the disk. + * (Cannot find the difference in above + * mentioned conditions. Hence assume + * "Interrupted Commit".) + */ + + /* Found an expected commit block: if checksums + * are present verify them in PASS_SCAN; else not + * much to do other than move on to the next sequence * number. */ + if (pass == PASS_SCAN && + JFS_HAS_COMPAT_FEATURE(journal, + JFS_FEATURE_COMPAT_CHECKSUM)) { + int chksum_err, chksum_seen; + struct commit_header *cbh = + (struct commit_header *)bh->b_data; + unsigned found_chksum = + be32_to_cpu(cbh->h_chksum[0]); + + chksum_err = chksum_seen = 0; + + jbd_debug(3, "Checksums %x %x\n", + crc32_sum, found_chksum); + if (info->end_transaction) { + journal->j_failed_commit = + info->end_transaction; + brelse(bh); + break; + } + + if (crc32_sum == found_chksum && + cbh->h_chksum_type == JBD2_CRC32_CHKSUM && + cbh->h_chksum_size == + JBD2_CRC32_CHKSUM_SIZE) + chksum_seen = 1; + else if (!(cbh->h_chksum_type == 0 && + cbh->h_chksum_size == 0 && + found_chksum == 0 && + !chksum_seen)) + /* + * If fs is mounted using an old kernel and then + * kernel with journal_chksum is used then we + * get a situation where the journal flag has + * checksum flag set but checksums are not + * present i.e chksum = 0, in the individual + * commit blocks. + * Hence to avoid checksum failures, in this + * situation, this extra check is added. + */ + chksum_err = 1; + + if (chksum_err) { + info->end_transaction = next_commit_ID; + jbd_debug(1, "Checksum_err %x %x\n", + crc32_sum, found_chksum); + if (!JFS_HAS_INCOMPAT_FEATURE(journal, + JFS_FEATURE_INCOMPAT_ASYNC_COMMIT)){ + journal->j_failed_commit = + next_commit_ID; + brelse(bh); + break; + } + } + crc32_sum = ~0; + } brelse(bh); next_commit_ID++; continue; @@ -471,7 +668,7 @@ static int do_one_pass(journal_t *journal, struct recovery_info *info, continue; } - err = scan_revoke_records(journal, bh, + err = scan_revoke_records(journal, bh, next_commit_ID, info); brelse(bh); if (err) @@ -479,27 +676,29 @@ static int do_one_pass(journal_t *journal, struct recovery_info *info, continue; default: - jfs_debug(3, "Unrecognised magic %d, end of scan.\n", + jbd_debug(3, "Unrecognised magic %d, end of scan.\n", blocktype); + brelse(bh); goto done; } } done: - /* + /* * We broke out of the log scan loop: either we came to the * known end of the log or we found an unexpected block in the * log. If the latter happened, then we know that the "current" * transaction marks the end of the valid log. */ - - if (pass == PASS_SCAN) - info->end_transaction = next_commit_ID; - else { + + if (pass == PASS_SCAN) { + if (!info->end_transaction) + info->end_transaction = next_commit_ID; + } else { /* It's really bad news if different passes end up at * different places (but possible due to IO errors). */ if (info->end_transaction != next_commit_ID) { - printk (KERN_ERR "JFS: recovery pass %d ended at " + printk (KERN_ERR "JBD: recovery pass %d ended at " "transaction %u, expected %u\n", pass, next_commit_ID, info->end_transaction); if (!success) @@ -516,21 +715,21 @@ static int do_one_pass(journal_t *journal, struct recovery_info *info, /* Scan a revoke record, marking all blocks mentioned as revoked. */ -static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, +static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, tid_t sequence, struct recovery_info *info) { journal_revoke_header_t *header; int offset, max; - + header = (journal_revoke_header_t *) bh->b_data; offset = sizeof(journal_revoke_header_t); - max = ntohl(header->r_count); - + max = be32_to_cpu(header->r_count); + while (offset < max) { unsigned long blocknr; int err; - - blocknr = * ((unsigned int *) bh->b_data+offset); + + blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); offset += 4; err = journal_set_revoke(journal, blocknr, sequence); if (err)