Whamcloud - gitweb
Sync kernel's fix for potential double free in jbd2
[tools/e2fsprogs.git] / e2fsck / journal.c
1 /*
2  * journal.c --- code for handling the "ext3" journal
3  *
4  * Copyright (C) 2000 Andreas Dilger
5  * Copyright (C) 2000 Theodore Ts'o
6  *
7  * Parts of the code are based on fs/jfs/journal.c by Stephen C. Tweedie
8  * Copyright (C) 1999 Red Hat Software
9  *
10  * This file may be redistributed under the terms of the
11  * GNU General Public License version 2 or at your discretion
12  * any later version.
13  */
14
15 #include "config.h"
16 #ifdef HAVE_SYS_MOUNT_H
17 #include <sys/param.h>
18 #include <sys/mount.h>
19 #define MNT_FL (MS_MGC_VAL | MS_RDONLY)
20 #endif
21 #ifdef HAVE_SYS_STAT_H
22 #include <sys/stat.h>
23 #endif
24
25 #define E2FSCK_INCLUDE_INLINE_FUNCS
26 #include "jfs_user.h"
27 #include "problem.h"
28 #include "uuid/uuid.h"
29
30 #ifdef CONFIG_JBD_DEBUG         /* Enabled by configure --enable-jfs-debug */
31 static int bh_count = 0;
32 #endif
33
34 /*
35  * Define USE_INODE_IO to use the inode_io.c / fileio.c codepaths.
36  * This creates a larger static binary, and a smaller binary using
37  * shared libraries.  It's also probably slightly less CPU-efficient,
38  * which is why it's not on by default.  But, it's a good way of
39  * testing the functions in inode_io.c and fileio.c.
40  */
41 #undef USE_INODE_IO
42
43 /* Checksumming functions */
44 static int e2fsck_journal_verify_csum_type(journal_t *j,
45                                            journal_superblock_t *jsb)
46 {
47         if (!jbd2_journal_has_csum_v2or3(j))
48                 return 1;
49
50         return jsb->s_checksum_type == JBD2_CRC32C_CHKSUM;
51 }
52
53 static __u32 e2fsck_journal_sb_csum(journal_superblock_t *jsb)
54 {
55         __u32 crc, old_crc;
56
57         old_crc = jsb->s_checksum;
58         jsb->s_checksum = 0;
59         crc = ext2fs_crc32c_le(~0, (unsigned char *)jsb,
60                                sizeof(journal_superblock_t));
61         jsb->s_checksum = old_crc;
62
63         return crc;
64 }
65
66 static int e2fsck_journal_sb_csum_verify(journal_t *j,
67                                          journal_superblock_t *jsb)
68 {
69         __u32 provided, calculated;
70
71         if (!jbd2_journal_has_csum_v2or3(j))
72                 return 1;
73
74         provided = ext2fs_be32_to_cpu(jsb->s_checksum);
75         calculated = e2fsck_journal_sb_csum(jsb);
76
77         return provided == calculated;
78 }
79
80 static errcode_t e2fsck_journal_sb_csum_set(journal_t *j,
81                                             journal_superblock_t *jsb)
82 {
83         __u32 crc;
84
85         if (!jbd2_journal_has_csum_v2or3(j))
86                 return 0;
87
88         crc = e2fsck_journal_sb_csum(jsb);
89         jsb->s_checksum = ext2fs_cpu_to_be32(crc);
90         return 0;
91 }
92
93 /* Kernel compatibility functions for handling the journal.  These allow us
94  * to use the recovery.c file virtually unchanged from the kernel, so we
95  * don't have to do much to keep kernel and user recovery in sync.
96  */
97 int jbd2_journal_bmap(journal_t *journal, blk64_t block,
98                       unsigned long long *phys)
99 {
100 #ifdef USE_INODE_IO
101         *phys = block;
102         return 0;
103 #else
104         struct inode    *inode = journal->j_inode;
105         errcode_t       retval;
106         blk64_t         pblk;
107
108         if (!inode) {
109                 *phys = block;
110                 return 0;
111         }
112
113         retval= ext2fs_bmap2(inode->i_ctx->fs, inode->i_ino,
114                              &inode->i_ext2, NULL, 0, block, 0, &pblk);
115         *phys = pblk;
116         return -1 * ((int) retval);
117 #endif
118 }
119
120 struct buffer_head *getblk(kdev_t kdev, blk64_t blocknr, int blocksize)
121 {
122         struct buffer_head *bh;
123         int bufsize = sizeof(*bh) + kdev->k_ctx->fs->blocksize -
124                 sizeof(bh->b_data);
125
126         bh = e2fsck_allocate_memory(kdev->k_ctx, bufsize, "block buffer");
127         if (!bh)
128                 return NULL;
129
130 #ifdef CONFIG_JBD_DEBUG
131         if (journal_enable_debug >= 3)
132                 bh_count++;
133 #endif
134         jfs_debug(4, "getblk for block %llu (%d bytes)(total %d)\n",
135                   (unsigned long long) blocknr, blocksize, bh_count);
136
137         bh->b_ctx = kdev->k_ctx;
138         if (kdev->k_dev == K_DEV_FS)
139                 bh->b_io = kdev->k_ctx->fs->io;
140         else
141                 bh->b_io = kdev->k_ctx->journal_io;
142         bh->b_size = blocksize;
143         bh->b_blocknr = blocknr;
144
145         return bh;
146 }
147
148 int sync_blockdev(kdev_t kdev)
149 {
150         io_channel      io;
151
152         if (kdev->k_dev == K_DEV_FS)
153                 io = kdev->k_ctx->fs->io;
154         else
155                 io = kdev->k_ctx->journal_io;
156
157         return io_channel_flush(io) ? -EIO : 0;
158 }
159
160 void ll_rw_block(int rw, int op_flags, int nr, struct buffer_head *bhp[])
161 {
162         errcode_t retval;
163         struct buffer_head *bh;
164
165         for (; nr > 0; --nr) {
166                 bh = *bhp++;
167                 if (rw == REQ_OP_READ && !bh->b_uptodate) {
168                         jfs_debug(3, "reading block %llu/%p\n",
169                                   bh->b_blocknr, (void *) bh);
170                         retval = io_channel_read_blk64(bh->b_io,
171                                                      bh->b_blocknr,
172                                                      1, bh->b_data);
173                         if (retval) {
174                                 com_err(bh->b_ctx->device_name, retval,
175                                         "while reading block %llu\n",
176                                         bh->b_blocknr);
177                                 bh->b_err = (int) retval;
178                                 continue;
179                         }
180                         bh->b_uptodate = 1;
181                 } else if (rw == REQ_OP_WRITE && bh->b_dirty) {
182                         jfs_debug(3, "writing block %llu/%p\n",
183                                   bh->b_blocknr,
184                                   (void *) bh);
185                         retval = io_channel_write_blk64(bh->b_io,
186                                                       bh->b_blocknr,
187                                                       1, bh->b_data);
188                         if (retval) {
189                                 com_err(bh->b_ctx->device_name, retval,
190                                         "while writing block %llu\n",
191                                         bh->b_blocknr);
192                                 bh->b_err = (int) retval;
193                                 continue;
194                         }
195                         bh->b_dirty = 0;
196                         bh->b_uptodate = 1;
197                 } else {
198                         jfs_debug(3, "no-op %s for block %llu\n",
199                                   rw == REQ_OP_READ ? "read" : "write",
200                                   bh->b_blocknr);
201                 }
202         }
203 }
204
205 void mark_buffer_dirty(struct buffer_head *bh)
206 {
207         bh->b_dirty = 1;
208 }
209
210 static void mark_buffer_clean(struct buffer_head * bh)
211 {
212         bh->b_dirty = 0;
213 }
214
215 void brelse(struct buffer_head *bh)
216 {
217         if (bh->b_dirty)
218                 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
219         jfs_debug(3, "freeing block %llu/%p (total %d)\n",
220                   bh->b_blocknr, (void *) bh, --bh_count);
221         ext2fs_free_mem(&bh);
222 }
223
224 int buffer_uptodate(struct buffer_head *bh)
225 {
226         return bh->b_uptodate;
227 }
228
229 void mark_buffer_uptodate(struct buffer_head *bh, int val)
230 {
231         bh->b_uptodate = val;
232 }
233
234 void wait_on_buffer(struct buffer_head *bh)
235 {
236         if (!bh->b_uptodate)
237                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
238 }
239
240
241 static void e2fsck_clear_recover(e2fsck_t ctx, int error)
242 {
243         ext2fs_clear_feature_journal_needs_recovery(ctx->fs->super);
244
245         /* if we had an error doing journal recovery, we need a full fsck */
246         if (error)
247                 ctx->fs->super->s_state &= ~EXT2_VALID_FS;
248         ext2fs_mark_super_dirty(ctx->fs);
249 }
250
251 /*
252  * This is a helper function to check the validity of the journal.
253  */
254 struct process_block_struct {
255         e2_blkcnt_t     last_block;
256 };
257
258 static int process_journal_block(ext2_filsys fs,
259                                  blk64_t        *block_nr,
260                                  e2_blkcnt_t blockcnt,
261                                  blk64_t ref_block EXT2FS_ATTR((unused)),
262                                  int ref_offset EXT2FS_ATTR((unused)),
263                                  void *priv_data)
264 {
265         struct process_block_struct *p;
266         blk64_t blk = *block_nr;
267
268         p = (struct process_block_struct *) priv_data;
269
270         if (!blk || blk < fs->super->s_first_data_block ||
271             blk >= ext2fs_blocks_count(fs->super))
272                 return BLOCK_ABORT;
273
274         if (blockcnt >= 0)
275                 p->last_block = blockcnt;
276         return 0;
277 }
278
279 static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal)
280 {
281         struct process_block_struct pb;
282         struct ext2_super_block *sb = ctx->fs->super;
283         struct ext2_super_block jsuper;
284         struct problem_context  pctx;
285         struct buffer_head      *bh;
286         struct inode            *j_inode = NULL;
287         struct kdev_s           *dev_fs = NULL, *dev_journal;
288         const char              *journal_name = 0;
289         journal_t               *journal = NULL;
290         errcode_t               retval = 0;
291         io_manager              io_ptr = 0;
292         unsigned long long      start = 0;
293         int                     ret;
294         int                     ext_journal = 0;
295         int                     tried_backup_jnl = 0;
296
297         clear_problem_context(&pctx);
298
299         journal = e2fsck_allocate_memory(ctx, sizeof(journal_t), "journal");
300         if (!journal) {
301                 return EXT2_ET_NO_MEMORY;
302         }
303
304         dev_fs = e2fsck_allocate_memory(ctx, 2*sizeof(struct kdev_s), "kdev");
305         if (!dev_fs) {
306                 retval = EXT2_ET_NO_MEMORY;
307                 goto errout;
308         }
309         dev_journal = dev_fs+1;
310
311         dev_fs->k_ctx = dev_journal->k_ctx = ctx;
312         dev_fs->k_dev = K_DEV_FS;
313         dev_journal->k_dev = K_DEV_JOURNAL;
314
315         journal->j_dev = dev_journal;
316         journal->j_fs_dev = dev_fs;
317         journal->j_inode = NULL;
318         journal->j_blocksize = ctx->fs->blocksize;
319
320         if (uuid_is_null(sb->s_journal_uuid)) {
321                 if (!sb->s_journal_inum) {
322                         retval = EXT2_ET_BAD_INODE_NUM;
323                         goto errout;
324                 }
325                 j_inode = e2fsck_allocate_memory(ctx, sizeof(*j_inode),
326                                                  "journal inode");
327                 if (!j_inode) {
328                         retval = EXT2_ET_NO_MEMORY;
329                         goto errout;
330                 }
331
332                 j_inode->i_ctx = ctx;
333                 j_inode->i_ino = sb->s_journal_inum;
334
335                 if ((retval = ext2fs_read_inode(ctx->fs,
336                                                 sb->s_journal_inum,
337                                                 &j_inode->i_ext2))) {
338                 try_backup_journal:
339                         if (sb->s_jnl_backup_type != EXT3_JNL_BACKUP_BLOCKS ||
340                             tried_backup_jnl)
341                                 goto errout;
342                         memset(&j_inode->i_ext2, 0, sizeof(struct ext2_inode));
343                         memcpy(&j_inode->i_ext2.i_block[0], sb->s_jnl_blocks,
344                                EXT2_N_BLOCKS*4);
345                         j_inode->i_ext2.i_size_high = sb->s_jnl_blocks[15];
346                         j_inode->i_ext2.i_size = sb->s_jnl_blocks[16];
347                         j_inode->i_ext2.i_links_count = 1;
348                         j_inode->i_ext2.i_mode = LINUX_S_IFREG | 0600;
349                         e2fsck_use_inode_shortcuts(ctx, 1);
350                         ctx->stashed_ino = j_inode->i_ino;
351                         ctx->stashed_inode = &j_inode->i_ext2;
352                         tried_backup_jnl++;
353                 }
354                 if (!j_inode->i_ext2.i_links_count ||
355                     !LINUX_S_ISREG(j_inode->i_ext2.i_mode)) {
356                         retval = EXT2_ET_NO_JOURNAL;
357                         goto try_backup_journal;
358                 }
359                 if (EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize <
360                     JBD2_MIN_JOURNAL_BLOCKS) {
361                         retval = EXT2_ET_JOURNAL_TOO_SMALL;
362                         goto try_backup_journal;
363                 }
364                 pb.last_block = -1;
365                 retval = ext2fs_block_iterate3(ctx->fs, j_inode->i_ino,
366                                                BLOCK_FLAG_HOLE, 0,
367                                                process_journal_block, &pb);
368                 if ((pb.last_block + 1) * ctx->fs->blocksize <
369                     (int) EXT2_I_SIZE(&j_inode->i_ext2)) {
370                         retval = EXT2_ET_JOURNAL_TOO_SMALL;
371                         goto try_backup_journal;
372                 }
373                 if (tried_backup_jnl && !(ctx->options & E2F_OPT_READONLY)) {
374                         retval = ext2fs_write_inode(ctx->fs, sb->s_journal_inum,
375                                                     &j_inode->i_ext2);
376                         if (retval)
377                                 goto errout;
378                 }
379
380                 journal->j_maxlen = EXT2_I_SIZE(&j_inode->i_ext2) /
381                         journal->j_blocksize;
382
383 #ifdef USE_INODE_IO
384                 retval = ext2fs_inode_io_intern2(ctx->fs, sb->s_journal_inum,
385                                                  &j_inode->i_ext2,
386                                                  &journal_name);
387                 if (retval)
388                         goto errout;
389
390                 io_ptr = inode_io_manager;
391 #else
392                 journal->j_inode = j_inode;
393                 ctx->journal_io = ctx->fs->io;
394                 if ((ret = jbd2_journal_bmap(journal, 0, &start)) != 0) {
395                         retval = (errcode_t) (-1 * ret);
396                         goto errout;
397                 }
398 #endif
399         } else {
400                 ext_journal = 1;
401                 if (!ctx->journal_name) {
402                         char uuid[37];
403
404                         uuid_unparse(sb->s_journal_uuid, uuid);
405                         ctx->journal_name = blkid_get_devname(ctx->blkid,
406                                                               "UUID", uuid);
407                         if (!ctx->journal_name)
408                                 ctx->journal_name = blkid_devno_to_devname(sb->s_journal_dev);
409                 }
410                 journal_name = ctx->journal_name;
411
412                 if (!journal_name) {
413                         fix_problem(ctx, PR_0_CANT_FIND_JOURNAL, &pctx);
414                         retval = EXT2_ET_LOAD_EXT_JOURNAL;
415                         goto errout;
416                 }
417
418                 jfs_debug(1, "Using journal file %s\n", journal_name);
419                 io_ptr = unix_io_manager;
420         }
421
422 #if 0
423         test_io_backing_manager = io_ptr;
424         io_ptr = test_io_manager;
425 #endif
426 #ifndef USE_INODE_IO
427         if (ext_journal)
428 #endif
429         {
430                 int flags = IO_FLAG_RW;
431                 if (!(ctx->mount_flags & EXT2_MF_ISROOT &&
432                       ctx->mount_flags & EXT2_MF_READONLY))
433                         flags |= IO_FLAG_EXCLUSIVE;
434                 if ((ctx->mount_flags & EXT2_MF_READONLY) &&
435                     (ctx->options & E2F_OPT_FORCE))
436                         flags &= ~IO_FLAG_EXCLUSIVE;
437
438
439                 retval = io_ptr->open(journal_name, flags,
440                                       &ctx->journal_io);
441         }
442         if (retval)
443                 goto errout;
444
445         io_channel_set_blksize(ctx->journal_io, ctx->fs->blocksize);
446
447         if (ext_journal) {
448                 blk64_t maxlen;
449
450                 start = ext2fs_journal_sb_start(ctx->fs->blocksize) - 1;
451                 bh = getblk(dev_journal, start, ctx->fs->blocksize);
452                 if (!bh) {
453                         retval = EXT2_ET_NO_MEMORY;
454                         goto errout;
455                 }
456                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
457                 if ((retval = bh->b_err) != 0) {
458                         brelse(bh);
459                         goto errout;
460                 }
461                 memcpy(&jsuper, start ? bh->b_data :  bh->b_data + SUPERBLOCK_OFFSET,
462                        sizeof(jsuper));
463 #ifdef WORDS_BIGENDIAN
464                 if (jsuper.s_magic == ext2fs_swab16(EXT2_SUPER_MAGIC))
465                         ext2fs_swap_super(&jsuper);
466 #endif
467                 if (jsuper.s_magic != EXT2_SUPER_MAGIC ||
468                     !ext2fs_has_feature_journal_dev(&jsuper)) {
469                         fix_problem(ctx, PR_0_EXT_JOURNAL_BAD_SUPER, &pctx);
470                         retval = EXT2_ET_LOAD_EXT_JOURNAL;
471                         brelse(bh);
472                         goto errout;
473                 }
474                 /* Make sure the journal UUID is correct */
475                 if (memcmp(jsuper.s_uuid, ctx->fs->super->s_journal_uuid,
476                            sizeof(jsuper.s_uuid))) {
477                         fix_problem(ctx, PR_0_JOURNAL_BAD_UUID, &pctx);
478                         retval = EXT2_ET_LOAD_EXT_JOURNAL;
479                         brelse(bh);
480                         goto errout;
481                 }
482
483                 /* Check the superblock checksum */
484                 if (ext2fs_has_feature_metadata_csum(&jsuper)) {
485                         struct struct_ext2_filsys fsx;
486                         struct ext2_super_block superx;
487                         void *p;
488
489                         p = start ? bh->b_data : bh->b_data + SUPERBLOCK_OFFSET;
490                         memcpy(&fsx, ctx->fs, sizeof(fsx));
491                         memcpy(&superx, ctx->fs->super, sizeof(superx));
492                         fsx.super = &superx;
493                         ext2fs_set_feature_metadata_csum(fsx.super);
494                         if (!ext2fs_superblock_csum_verify(&fsx, p) &&
495                             fix_problem(ctx, PR_0_EXT_JOURNAL_SUPER_CSUM_INVALID,
496                                         &pctx)) {
497                                 ext2fs_superblock_csum_set(&fsx, p);
498                                 mark_buffer_dirty(bh);
499                         }
500                 }
501                 brelse(bh);
502
503                 maxlen = ext2fs_blocks_count(&jsuper);
504                 journal->j_maxlen = (maxlen < 1ULL << 32) ? maxlen : (1ULL << 32) - 1;
505                 start++;
506         }
507
508         if (!(bh = getblk(dev_journal, start, journal->j_blocksize))) {
509                 retval = EXT2_ET_NO_MEMORY;
510                 goto errout;
511         }
512
513         journal->j_sb_buffer = bh;
514         journal->j_superblock = (journal_superblock_t *)bh->b_data;
515
516 #ifdef USE_INODE_IO
517         if (j_inode)
518                 ext2fs_free_mem(&j_inode);
519 #endif
520
521         *ret_journal = journal;
522         e2fsck_use_inode_shortcuts(ctx, 0);
523         return 0;
524
525 errout:
526         e2fsck_use_inode_shortcuts(ctx, 0);
527         if (dev_fs)
528                 ext2fs_free_mem(&dev_fs);
529         if (j_inode)
530                 ext2fs_free_mem(&j_inode);
531         if (journal)
532                 ext2fs_free_mem(&journal);
533         return retval;
534 }
535
536 static errcode_t e2fsck_journal_fix_bad_inode(e2fsck_t ctx,
537                                               struct problem_context *pctx)
538 {
539         struct ext2_super_block *sb = ctx->fs->super;
540         int recover = ext2fs_has_feature_journal_needs_recovery(ctx->fs->super);
541         int has_journal = ext2fs_has_feature_journal(ctx->fs->super);
542
543         if (has_journal || sb->s_journal_inum) {
544                 /* The journal inode is bogus, remove and force full fsck */
545                 pctx->ino = sb->s_journal_inum;
546                 if (fix_problem(ctx, PR_0_JOURNAL_BAD_INODE, pctx)) {
547                         if (has_journal && sb->s_journal_inum)
548                                 printf("*** journal has been deleted ***\n\n");
549                         ext2fs_clear_feature_journal(sb);
550                         sb->s_journal_inum = 0;
551                         memset(sb->s_jnl_blocks, 0, sizeof(sb->s_jnl_blocks));
552                         ctx->flags |= E2F_FLAG_JOURNAL_INODE;
553                         ctx->fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
554                         e2fsck_clear_recover(ctx, 1);
555                         return 0;
556                 }
557                 return EXT2_ET_CORRUPT_JOURNAL_SB;
558         } else if (recover) {
559                 if (fix_problem(ctx, PR_0_JOURNAL_RECOVER_SET, pctx)) {
560                         e2fsck_clear_recover(ctx, 1);
561                         return 0;
562                 }
563                 return EXT2_ET_UNSUPP_FEATURE;
564         }
565         return 0;
566 }
567
568 #define V1_SB_SIZE      0x0024
569 static void clear_v2_journal_fields(journal_t *journal)
570 {
571         e2fsck_t ctx = journal->j_dev->k_ctx;
572         struct problem_context pctx;
573
574         clear_problem_context(&pctx);
575
576         if (!fix_problem(ctx, PR_0_CLEAR_V2_JOURNAL, &pctx))
577                 return;
578
579         ctx->flags |= E2F_FLAG_PROBLEMS_FIXED;
580         memset(((char *) journal->j_superblock) + V1_SB_SIZE, 0,
581                ctx->fs->blocksize-V1_SB_SIZE);
582         mark_buffer_dirty(journal->j_sb_buffer);
583 }
584
585
586 static errcode_t e2fsck_journal_load(journal_t *journal)
587 {
588         e2fsck_t ctx = journal->j_dev->k_ctx;
589         journal_superblock_t *jsb;
590         struct buffer_head *jbh = journal->j_sb_buffer;
591         struct problem_context pctx;
592
593         clear_problem_context(&pctx);
594
595         ll_rw_block(REQ_OP_READ, 0, 1, &jbh);
596         if (jbh->b_err) {
597                 com_err(ctx->device_name, jbh->b_err, "%s",
598                         _("reading journal superblock\n"));
599                 return jbh->b_err;
600         }
601
602         jsb = journal->j_superblock;
603         /* If we don't even have JBD2_MAGIC, we probably have a wrong inode */
604         if (jsb->s_header.h_magic != htonl(JBD2_MAGIC_NUMBER))
605                 return e2fsck_journal_fix_bad_inode(ctx, &pctx);
606
607         switch (ntohl(jsb->s_header.h_blocktype)) {
608         case JBD2_SUPERBLOCK_V1:
609                 journal->j_format_version = 1;
610                 if (jsb->s_feature_compat ||
611                     jsb->s_feature_incompat ||
612                     jsb->s_feature_ro_compat ||
613                     jsb->s_nr_users)
614                         clear_v2_journal_fields(journal);
615                 break;
616
617         case JBD2_SUPERBLOCK_V2:
618                 journal->j_format_version = 2;
619                 if (ntohl(jsb->s_nr_users) > 1 &&
620                     uuid_is_null(ctx->fs->super->s_journal_uuid))
621                         clear_v2_journal_fields(journal);
622                 if (ntohl(jsb->s_nr_users) > 1) {
623                         fix_problem(ctx, PR_0_JOURNAL_UNSUPP_MULTIFS, &pctx);
624                         return EXT2_ET_JOURNAL_UNSUPP_VERSION;
625                 }
626                 break;
627
628         /*
629          * These should never appear in a journal super block, so if
630          * they do, the journal is badly corrupted.
631          */
632         case JBD2_DESCRIPTOR_BLOCK:
633         case JBD2_COMMIT_BLOCK:
634         case JBD2_REVOKE_BLOCK:
635                 return EXT2_ET_CORRUPT_JOURNAL_SB;
636
637         /* If we don't understand the superblock major type, but there
638          * is a magic number, then it is likely to be a new format we
639          * just don't understand, so leave it alone. */
640         default:
641                 return EXT2_ET_JOURNAL_UNSUPP_VERSION;
642         }
643
644         if (JBD2_HAS_INCOMPAT_FEATURE(journal, ~JBD2_KNOWN_INCOMPAT_FEATURES))
645                 return EXT2_ET_UNSUPP_FEATURE;
646
647         if (JBD2_HAS_RO_COMPAT_FEATURE(journal, ~JBD2_KNOWN_ROCOMPAT_FEATURES))
648                 return EXT2_ET_RO_UNSUPP_FEATURE;
649
650         /* Checksum v1-3 are mutually exclusive features. */
651         if (jbd2_has_feature_csum2(journal) && jbd2_has_feature_csum3(journal))
652                 return EXT2_ET_CORRUPT_JOURNAL_SB;
653
654         if (jbd2_journal_has_csum_v2or3(journal) &&
655             jbd2_has_feature_checksum(journal))
656                 return EXT2_ET_CORRUPT_JOURNAL_SB;
657
658         if (!e2fsck_journal_verify_csum_type(journal, jsb) ||
659             !e2fsck_journal_sb_csum_verify(journal, jsb))
660                 return EXT2_ET_CORRUPT_JOURNAL_SB;
661
662         if (jbd2_journal_has_csum_v2or3(journal))
663                 journal->j_csum_seed = jbd2_chksum(journal, ~0, jsb->s_uuid,
664                                                    sizeof(jsb->s_uuid));
665
666         /* We have now checked whether we know enough about the journal
667          * format to be able to proceed safely, so any other checks that
668          * fail we should attempt to recover from. */
669         if (jsb->s_blocksize != htonl(journal->j_blocksize)) {
670                 com_err(ctx->program_name, EXT2_ET_CORRUPT_JOURNAL_SB,
671                         _("%s: no valid journal superblock found\n"),
672                         ctx->device_name);
673                 return EXT2_ET_CORRUPT_JOURNAL_SB;
674         }
675
676         if (ntohl(jsb->s_maxlen) < journal->j_maxlen)
677                 journal->j_maxlen = ntohl(jsb->s_maxlen);
678         else if (ntohl(jsb->s_maxlen) > journal->j_maxlen) {
679                 com_err(ctx->program_name, EXT2_ET_CORRUPT_JOURNAL_SB,
680                         _("%s: journal too short\n"),
681                         ctx->device_name);
682                 return EXT2_ET_CORRUPT_JOURNAL_SB;
683         }
684
685         journal->j_tail_sequence = ntohl(jsb->s_sequence);
686         journal->j_transaction_sequence = journal->j_tail_sequence;
687         journal->j_tail = ntohl(jsb->s_start);
688         journal->j_first = ntohl(jsb->s_first);
689         journal->j_last = ntohl(jsb->s_maxlen);
690
691         return 0;
692 }
693
694 static void e2fsck_journal_reset_super(e2fsck_t ctx, journal_superblock_t *jsb,
695                                        journal_t *journal)
696 {
697         char *p;
698         union {
699                 uuid_t uuid;
700                 __u32 val[4];
701         } u;
702         __u32 new_seq = 0;
703         int i;
704
705         /* Leave a valid existing V1 superblock signature alone.
706          * Anything unrecognisable we overwrite with a new V2
707          * signature. */
708
709         if (jsb->s_header.h_magic != htonl(JBD2_MAGIC_NUMBER) ||
710             jsb->s_header.h_blocktype != htonl(JBD2_SUPERBLOCK_V1)) {
711                 jsb->s_header.h_magic = htonl(JBD2_MAGIC_NUMBER);
712                 jsb->s_header.h_blocktype = htonl(JBD2_SUPERBLOCK_V2);
713         }
714
715         /* Zero out everything else beyond the superblock header */
716
717         p = ((char *) jsb) + sizeof(journal_header_t);
718         memset (p, 0, ctx->fs->blocksize-sizeof(journal_header_t));
719
720         jsb->s_blocksize = htonl(ctx->fs->blocksize);
721         jsb->s_maxlen = htonl(journal->j_maxlen);
722         jsb->s_first = htonl(1);
723
724         /* Initialize the journal sequence number so that there is "no"
725          * chance we will find old "valid" transactions in the journal.
726          * This avoids the need to zero the whole journal (slow to do,
727          * and risky when we are just recovering the filesystem).
728          */
729         uuid_generate(u.uuid);
730         for (i = 0; i < 4; i ++)
731                 new_seq ^= u.val[i];
732         jsb->s_sequence = htonl(new_seq);
733         e2fsck_journal_sb_csum_set(journal, jsb);
734
735         mark_buffer_dirty(journal->j_sb_buffer);
736         ll_rw_block(REQ_OP_WRITE, 0, 1, &journal->j_sb_buffer);
737 }
738
739 static errcode_t e2fsck_journal_fix_corrupt_super(e2fsck_t ctx,
740                                                   journal_t *journal,
741                                                   struct problem_context *pctx)
742 {
743         struct ext2_super_block *sb = ctx->fs->super;
744         int recover = ext2fs_has_feature_journal_needs_recovery(ctx->fs->super);
745
746         if (ext2fs_has_feature_journal(sb)) {
747                 if (fix_problem(ctx, PR_0_JOURNAL_BAD_SUPER, pctx)) {
748                         e2fsck_journal_reset_super(ctx, journal->j_superblock,
749                                                    journal);
750                         journal->j_transaction_sequence = 1;
751                         e2fsck_clear_recover(ctx, recover);
752                         return 0;
753                 }
754                 return EXT2_ET_CORRUPT_JOURNAL_SB;
755         } else if (e2fsck_journal_fix_bad_inode(ctx, pctx))
756                 return EXT2_ET_CORRUPT_JOURNAL_SB;
757
758         return 0;
759 }
760
761 static void e2fsck_journal_release(e2fsck_t ctx, journal_t *journal,
762                                    int reset, int drop)
763 {
764         journal_superblock_t *jsb;
765
766         if (drop)
767                 mark_buffer_clean(journal->j_sb_buffer);
768         else if (!(ctx->options & E2F_OPT_READONLY)) {
769                 jsb = journal->j_superblock;
770                 jsb->s_sequence = htonl(journal->j_tail_sequence);
771                 if (reset)
772                         jsb->s_start = 0; /* this marks the journal as empty */
773                 e2fsck_journal_sb_csum_set(journal, jsb);
774                 mark_buffer_dirty(journal->j_sb_buffer);
775         }
776         brelse(journal->j_sb_buffer);
777
778         if (ctx->journal_io) {
779                 if (ctx->fs && ctx->fs->io != ctx->journal_io)
780                         io_channel_close(ctx->journal_io);
781                 ctx->journal_io = 0;
782         }
783
784 #ifndef USE_INODE_IO
785         if (journal->j_inode)
786                 ext2fs_free_mem(&journal->j_inode);
787 #endif
788         if (journal->j_fs_dev)
789                 ext2fs_free_mem(&journal->j_fs_dev);
790         ext2fs_free_mem(&journal);
791 }
792
793 /*
794  * This function makes sure that the superblock fields regarding the
795  * journal are consistent.
796  */
797 errcode_t e2fsck_check_ext3_journal(e2fsck_t ctx)
798 {
799         struct ext2_super_block *sb = ctx->fs->super;
800         journal_t *journal;
801         int recover = ext2fs_has_feature_journal_needs_recovery(ctx->fs->super);
802         struct problem_context pctx;
803         problem_t problem;
804         int reset = 0, force_fsck = 0;
805         errcode_t retval;
806
807         /* If we don't have any journal features, don't do anything more */
808         if (!ext2fs_has_feature_journal(sb) &&
809             !recover && sb->s_journal_inum == 0 && sb->s_journal_dev == 0 &&
810             uuid_is_null(sb->s_journal_uuid))
811                 return 0;
812
813         clear_problem_context(&pctx);
814         pctx.num = sb->s_journal_inum;
815
816         retval = e2fsck_get_journal(ctx, &journal);
817         if (retval) {
818                 if ((retval == EXT2_ET_BAD_INODE_NUM) ||
819                     (retval == EXT2_ET_BAD_BLOCK_NUM) ||
820                     (retval == EXT2_ET_JOURNAL_TOO_SMALL) ||
821                     (retval == EXT2_ET_NO_JOURNAL))
822                         return e2fsck_journal_fix_bad_inode(ctx, &pctx);
823                 return retval;
824         }
825
826         retval = e2fsck_journal_load(journal);
827         if (retval) {
828                 if ((retval == EXT2_ET_CORRUPT_JOURNAL_SB) ||
829                     ((retval == EXT2_ET_UNSUPP_FEATURE) &&
830                     (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_INCOMPAT,
831                                   &pctx))) ||
832                     ((retval == EXT2_ET_RO_UNSUPP_FEATURE) &&
833                     (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_ROCOMPAT,
834                                   &pctx))) ||
835                     ((retval == EXT2_ET_JOURNAL_UNSUPP_VERSION) &&
836                     (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_VERSION, &pctx))))
837                         retval = e2fsck_journal_fix_corrupt_super(ctx, journal,
838                                                                   &pctx);
839                 e2fsck_journal_release(ctx, journal, 0, 1);
840                 return retval;
841         }
842
843         /*
844          * We want to make the flags consistent here.  We will not leave with
845          * needs_recovery set but has_journal clear.  We can't get in a loop
846          * with -y, -n, or -p, only if a user isn't making up their mind.
847          */
848 no_has_journal:
849         if (!ext2fs_has_feature_journal(sb)) {
850                 recover = ext2fs_has_feature_journal_needs_recovery(sb);
851                 if (fix_problem(ctx, PR_0_JOURNAL_HAS_JOURNAL, &pctx)) {
852                         if (recover &&
853                             !fix_problem(ctx, PR_0_JOURNAL_RECOVER_SET, &pctx))
854                                 goto no_has_journal;
855                         /*
856                          * Need a full fsck if we are releasing a
857                          * journal stored on a reserved inode.
858                          */
859                         force_fsck = recover ||
860                                 (sb->s_journal_inum < EXT2_FIRST_INODE(sb));
861                         /* Clear all of the journal fields */
862                         sb->s_journal_inum = 0;
863                         sb->s_journal_dev = 0;
864                         memset(sb->s_journal_uuid, 0,
865                                sizeof(sb->s_journal_uuid));
866                         e2fsck_clear_recover(ctx, force_fsck);
867                 } else if (!(ctx->options & E2F_OPT_READONLY)) {
868                         ext2fs_set_feature_journal(sb);
869                         ctx->fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
870                         ext2fs_mark_super_dirty(ctx->fs);
871                 }
872         }
873
874         if (ext2fs_has_feature_journal(sb) &&
875             !ext2fs_has_feature_journal_needs_recovery(sb) &&
876             journal->j_superblock->s_start != 0) {
877                 /* Print status information */
878                 fix_problem(ctx, PR_0_JOURNAL_RECOVERY_CLEAR, &pctx);
879                 if (ctx->superblock)
880                         problem = PR_0_JOURNAL_RUN_DEFAULT;
881                 else
882                         problem = PR_0_JOURNAL_RUN;
883                 if (fix_problem(ctx, problem, &pctx)) {
884                         ctx->options |= E2F_OPT_FORCE;
885                         ext2fs_set_feature_journal_needs_recovery(sb);
886                         ext2fs_mark_super_dirty(ctx->fs);
887                 } else if (fix_problem(ctx,
888                                        PR_0_JOURNAL_RESET_JOURNAL, &pctx)) {
889                         reset = 1;
890                         sb->s_state &= ~EXT2_VALID_FS;
891                         ext2fs_mark_super_dirty(ctx->fs);
892                 }
893                 /*
894                  * If the user answers no to the above question, we
895                  * ignore the fact that journal apparently has data;
896                  * accidentally replaying over valid data would be far
897                  * worse than skipping a questionable recovery.
898                  *
899                  * XXX should we abort with a fatal error here?  What
900                  * will the ext3 kernel code do if a filesystem with
901                  * !NEEDS_RECOVERY but with a non-zero
902                  * journal->j_superblock->s_start is mounted?
903                  */
904         }
905
906         /*
907          * If we don't need to do replay the journal, check to see if
908          * the journal's errno is set; if so, we need to mark the file
909          * system as being corrupt and clear the journal's s_errno.
910          */
911         if (!ext2fs_has_feature_journal_needs_recovery(sb) &&
912             journal->j_superblock->s_errno) {
913                 ctx->fs->super->s_state |= EXT2_ERROR_FS;
914                 ext2fs_mark_super_dirty(ctx->fs);
915                 journal->j_superblock->s_errno = 0;
916                 e2fsck_journal_sb_csum_set(journal, journal->j_superblock);
917                 mark_buffer_dirty(journal->j_sb_buffer);
918         }
919
920         e2fsck_journal_release(ctx, journal, reset, 0);
921         return retval;
922 }
923
924 static errcode_t recover_ext3_journal(e2fsck_t ctx)
925 {
926         struct problem_context  pctx;
927         journal_t *journal;
928         errcode_t retval;
929
930         clear_problem_context(&pctx);
931
932         retval = jbd2_journal_init_revoke_record_cache();
933         if (retval)
934                 return retval;
935
936         retval = jbd2_journal_init_revoke_table_cache();
937         if (retval)
938                 return retval;
939
940         retval = e2fsck_get_journal(ctx, &journal);
941         if (retval)
942                 return retval;
943
944         retval = e2fsck_journal_load(journal);
945         if (retval)
946                 goto errout;
947
948         retval = jbd2_journal_init_revoke(journal, 1024);
949         if (retval)
950                 goto errout;
951
952         retval = -jbd2_journal_recover(journal);
953         if (retval)
954                 goto errout;
955
956         if (journal->j_failed_commit) {
957                 pctx.ino = journal->j_failed_commit;
958                 fix_problem(ctx, PR_0_JNL_TXN_CORRUPT, &pctx);
959                 journal->j_superblock->s_errno = -EINVAL;
960                 mark_buffer_dirty(journal->j_sb_buffer);
961         }
962
963         journal->j_tail_sequence = journal->j_transaction_sequence;
964
965 errout:
966         jbd2_journal_destroy_revoke(journal);
967         jbd2_journal_destroy_revoke_record_cache();
968         jbd2_journal_destroy_revoke_table_cache();
969         e2fsck_journal_release(ctx, journal, 1, 0);
970         return retval;
971 }
972
973 errcode_t e2fsck_run_ext3_journal(e2fsck_t ctx)
974 {
975         io_manager io_ptr = ctx->fs->io->manager;
976         int blocksize = ctx->fs->blocksize;
977         errcode_t       retval, recover_retval;
978         io_stats        stats = 0;
979         unsigned long long kbytes_written = 0;
980
981         printf(_("%s: recovering journal\n"), ctx->device_name);
982         if (ctx->options & E2F_OPT_READONLY) {
983                 printf(_("%s: won't do journal recovery while read-only\n"),
984                        ctx->device_name);
985                 return EXT2_ET_FILE_RO;
986         }
987
988         if (ctx->fs->flags & EXT2_FLAG_DIRTY)
989                 ext2fs_flush(ctx->fs);  /* Force out any modifications */
990
991         recover_retval = recover_ext3_journal(ctx);
992
993         /*
994          * Reload the filesystem context to get up-to-date data from disk
995          * because journal recovery will change the filesystem under us.
996          */
997         if (ctx->fs->super->s_kbytes_written &&
998             ctx->fs->io->manager->get_stats)
999                 ctx->fs->io->manager->get_stats(ctx->fs->io, &stats);
1000         if (stats && stats->bytes_written)
1001                 kbytes_written = stats->bytes_written >> 10;
1002
1003         ext2fs_mmp_stop(ctx->fs);
1004         ext2fs_free(ctx->fs);
1005         retval = ext2fs_open(ctx->filesystem_name, ctx->openfs_flags,
1006                              ctx->superblock, blocksize, io_ptr,
1007                              &ctx->fs);
1008         if (retval) {
1009                 com_err(ctx->program_name, retval,
1010                         _("while trying to re-open %s"),
1011                         ctx->device_name);
1012                 fatal_error(ctx, 0);
1013         }
1014         ctx->fs->priv_data = ctx;
1015         ctx->fs->now = ctx->now;
1016         ctx->fs->flags |= EXT2_FLAG_MASTER_SB_ONLY;
1017         ctx->fs->super->s_kbytes_written += kbytes_written;
1018
1019         /* Set the superblock flags */
1020         e2fsck_clear_recover(ctx, recover_retval != 0);
1021
1022         /*
1023          * Do one last sanity check, and propagate journal->s_errno to
1024          * the EXT2_ERROR_FS flag in the fs superblock if needed.
1025          */
1026         retval = e2fsck_check_ext3_journal(ctx);
1027         return retval ? retval : recover_retval;
1028 }
1029
1030 /*
1031  * This function will move the journal inode from a visible file in
1032  * the filesystem directory hierarchy to the reserved inode if necessary.
1033  */
1034 static const char * const journal_names[] = {
1035         ".journal", "journal", ".journal.dat", "journal.dat", 0 };
1036
1037 void e2fsck_move_ext3_journal(e2fsck_t ctx)
1038 {
1039         struct ext2_super_block *sb = ctx->fs->super;
1040         struct problem_context  pctx;
1041         struct ext2_inode       inode;
1042         ext2_filsys             fs = ctx->fs;
1043         ext2_ino_t              ino;
1044         errcode_t               retval;
1045         const char * const *    cpp;
1046         dgrp_t                  group;
1047         int                     mount_flags;
1048
1049         clear_problem_context(&pctx);
1050
1051         /*
1052          * If the filesystem is opened read-only, or there is no
1053          * journal, then do nothing.
1054          */
1055         if ((ctx->options & E2F_OPT_READONLY) ||
1056             (sb->s_journal_inum == 0) ||
1057             !ext2fs_has_feature_journal(sb))
1058                 return;
1059
1060         /*
1061          * Read in the journal inode
1062          */
1063         if (ext2fs_read_inode(fs, sb->s_journal_inum, &inode) != 0)
1064                 return;
1065
1066         /*
1067          * If it's necessary to backup the journal inode, do so.
1068          */
1069         if ((sb->s_jnl_backup_type == 0) ||
1070             ((sb->s_jnl_backup_type == EXT3_JNL_BACKUP_BLOCKS) &&
1071              memcmp(inode.i_block, sb->s_jnl_blocks, EXT2_N_BLOCKS*4))) {
1072                 if (fix_problem(ctx, PR_0_BACKUP_JNL, &pctx)) {
1073                         memcpy(sb->s_jnl_blocks, inode.i_block,
1074                                EXT2_N_BLOCKS*4);
1075                         sb->s_jnl_blocks[15] = inode.i_size_high;
1076                         sb->s_jnl_blocks[16] = inode.i_size;
1077                         sb->s_jnl_backup_type = EXT3_JNL_BACKUP_BLOCKS;
1078                         ext2fs_mark_super_dirty(fs);
1079                         fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
1080                 }
1081         }
1082
1083         /*
1084          * If the journal is already the hidden inode, then do nothing
1085          */
1086         if (sb->s_journal_inum == EXT2_JOURNAL_INO)
1087                 return;
1088
1089         /*
1090          * The journal inode had better have only one link and not be readable.
1091          */
1092         if (inode.i_links_count != 1)
1093                 return;
1094
1095         /*
1096          * If the filesystem is mounted, or we can't tell whether
1097          * or not it's mounted, do nothing.
1098          */
1099         retval = ext2fs_check_if_mounted(ctx->filesystem_name, &mount_flags);
1100         if (retval || (mount_flags & EXT2_MF_MOUNTED))
1101                 return;
1102
1103         /*
1104          * If we can't find the name of the journal inode, then do
1105          * nothing.
1106          */
1107         for (cpp = journal_names; *cpp; cpp++) {
1108                 retval = ext2fs_lookup(fs, EXT2_ROOT_INO, *cpp,
1109                                        strlen(*cpp), 0, &ino);
1110                 if ((retval == 0) && (ino == sb->s_journal_inum))
1111                         break;
1112         }
1113         if (*cpp == 0)
1114                 return;
1115
1116         /* We need the inode bitmap to be loaded */
1117         retval = ext2fs_read_bitmaps(fs);
1118         if (retval)
1119                 return;
1120
1121         pctx.str = *cpp;
1122         if (!fix_problem(ctx, PR_0_MOVE_JOURNAL, &pctx))
1123                 return;
1124
1125         /*
1126          * OK, we've done all the checks, let's actually move the
1127          * journal inode.  Errors at this point mean we need to force
1128          * an ext2 filesystem check.
1129          */
1130         if ((retval = ext2fs_unlink(fs, EXT2_ROOT_INO, *cpp, ino, 0)) != 0)
1131                 goto err_out;
1132         if ((retval = ext2fs_write_inode(fs, EXT2_JOURNAL_INO, &inode)) != 0)
1133                 goto err_out;
1134         sb->s_journal_inum = EXT2_JOURNAL_INO;
1135         ext2fs_mark_super_dirty(fs);
1136         fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
1137         inode.i_links_count = 0;
1138         inode.i_dtime = ctx->now;
1139         if ((retval = ext2fs_write_inode(fs, ino, &inode)) != 0)
1140                 goto err_out;
1141
1142         group = ext2fs_group_of_ino(fs, ino);
1143         ext2fs_unmark_inode_bitmap2(fs->inode_map, ino);
1144         ext2fs_mark_ib_dirty(fs);
1145         ext2fs_bg_free_inodes_count_set(fs, group, ext2fs_bg_free_inodes_count(fs, group) + 1);
1146         ext2fs_group_desc_csum_set(fs, group);
1147         fs->super->s_free_inodes_count++;
1148         return;
1149
1150 err_out:
1151         pctx.errcode = retval;
1152         fix_problem(ctx, PR_0_ERR_MOVE_JOURNAL, &pctx);
1153         fs->super->s_state &= ~EXT2_VALID_FS;
1154         ext2fs_mark_super_dirty(fs);
1155         return;
1156 }
1157
1158 /*
1159  * This function makes sure the superblock hint for the external
1160  * journal is correct.
1161  */
1162 int e2fsck_fix_ext3_journal_hint(e2fsck_t ctx)
1163 {
1164         struct ext2_super_block *sb = ctx->fs->super;
1165         struct problem_context pctx;
1166         char uuid[37], *journal_name;
1167         struct stat st;
1168
1169         if (!ext2fs_has_feature_journal(sb) ||
1170             uuid_is_null(sb->s_journal_uuid))
1171                 return 0;
1172
1173         uuid_unparse(sb->s_journal_uuid, uuid);
1174         journal_name = blkid_get_devname(ctx->blkid, "UUID", uuid);
1175         if (!journal_name)
1176                 return 0;
1177
1178         if (stat(journal_name, &st) < 0) {
1179                 free(journal_name);
1180                 return 0;
1181         }
1182
1183         if (st.st_rdev != sb->s_journal_dev) {
1184                 clear_problem_context(&pctx);
1185                 pctx.num = st.st_rdev;
1186                 if (fix_problem(ctx, PR_0_EXTERNAL_JOURNAL_HINT, &pctx)) {
1187                         sb->s_journal_dev = st.st_rdev;
1188                         ext2fs_mark_super_dirty(ctx->fs);
1189                 }
1190         }
1191
1192         free(journal_name);
1193         return 0;
1194 }