Whamcloud - gitweb
e2fsck: add fc replay for link, unlink, creat tags
[tools/e2fsprogs.git] / e2fsck / journal.c
1 /*
2  * journal.c --- code for handling the "ext3" journal
3  *
4  * Copyright (C) 2000 Andreas Dilger
5  * Copyright (C) 2000 Theodore Ts'o
6  *
7  * Parts of the code are based on fs/jfs/journal.c by Stephen C. Tweedie
8  * Copyright (C) 1999 Red Hat Software
9  *
10  * This file may be redistributed under the terms of the
11  * GNU General Public License version 2 or at your discretion
12  * any later version.
13  */
14
15 #include "config.h"
16 #ifdef HAVE_SYS_MOUNT_H
17 #include <sys/param.h>
18 #include <sys/mount.h>
19 #define MNT_FL (MS_MGC_VAL | MS_RDONLY)
20 #endif
21 #ifdef HAVE_SYS_STAT_H
22 #include <sys/stat.h>
23 #endif
24
25 #define E2FSCK_INCLUDE_INLINE_FUNCS
26 #include "jfs_user.h"
27 #include "problem.h"
28 #include "uuid/uuid.h"
29
30 #ifdef CONFIG_JBD_DEBUG         /* Enabled by configure --enable-jfs-debug */
31 static int bh_count = 0;
32 #endif
33
34 /*
35  * Define USE_INODE_IO to use the inode_io.c / fileio.c codepaths.
36  * This creates a larger static binary, and a smaller binary using
37  * shared libraries.  It's also probably slightly less CPU-efficient,
38  * which is why it's not on by default.  But, it's a good way of
39  * testing the functions in inode_io.c and fileio.c.
40  */
41 #undef USE_INODE_IO
42
43 /* Checksumming functions */
44 static int e2fsck_journal_verify_csum_type(journal_t *j,
45                                            journal_superblock_t *jsb)
46 {
47         if (!jbd2_journal_has_csum_v2or3(j))
48                 return 1;
49
50         return jsb->s_checksum_type == JBD2_CRC32C_CHKSUM;
51 }
52
53 static __u32 e2fsck_journal_sb_csum(journal_superblock_t *jsb)
54 {
55         __u32 crc, old_crc;
56
57         old_crc = jsb->s_checksum;
58         jsb->s_checksum = 0;
59         crc = ext2fs_crc32c_le(~0, (unsigned char *)jsb,
60                                sizeof(journal_superblock_t));
61         jsb->s_checksum = old_crc;
62
63         return crc;
64 }
65
66 static int e2fsck_journal_sb_csum_verify(journal_t *j,
67                                          journal_superblock_t *jsb)
68 {
69         __u32 provided, calculated;
70
71         if (!jbd2_journal_has_csum_v2or3(j))
72                 return 1;
73
74         provided = ext2fs_be32_to_cpu(jsb->s_checksum);
75         calculated = e2fsck_journal_sb_csum(jsb);
76
77         return provided == calculated;
78 }
79
80 static errcode_t e2fsck_journal_sb_csum_set(journal_t *j,
81                                             journal_superblock_t *jsb)
82 {
83         __u32 crc;
84
85         if (!jbd2_journal_has_csum_v2or3(j))
86                 return 0;
87
88         crc = e2fsck_journal_sb_csum(jsb);
89         jsb->s_checksum = ext2fs_cpu_to_be32(crc);
90         return 0;
91 }
92
93 /* Kernel compatibility functions for handling the journal.  These allow us
94  * to use the recovery.c file virtually unchanged from the kernel, so we
95  * don't have to do much to keep kernel and user recovery in sync.
96  */
97 int jbd2_journal_bmap(journal_t *journal, unsigned long block,
98                       unsigned long long *phys)
99 {
100 #ifdef USE_INODE_IO
101         *phys = block;
102         return 0;
103 #else
104         struct inode    *inode = journal->j_inode;
105         errcode_t       retval;
106         blk64_t         pblk;
107
108         if (!inode) {
109                 *phys = block;
110                 return 0;
111         }
112
113         retval= ext2fs_bmap2(inode->i_ctx->fs, inode->i_ino,
114                              &inode->i_ext2, NULL, 0, (blk64_t) block,
115                              0, &pblk);
116         *phys = pblk;
117         return -1 * ((int) retval);
118 #endif
119 }
120
121 struct buffer_head *getblk(kdev_t kdev, unsigned long long blocknr,
122                            int blocksize)
123 {
124         struct buffer_head *bh;
125         int bufsize = sizeof(*bh) + kdev->k_ctx->fs->blocksize -
126                 sizeof(bh->b_data);
127
128         bh = e2fsck_allocate_memory(kdev->k_ctx, bufsize, "block buffer");
129         if (!bh)
130                 return NULL;
131
132 #ifdef CONFIG_JBD_DEBUG
133         if (journal_enable_debug >= 3)
134                 bh_count++;
135 #endif
136         jfs_debug(4, "getblk for block %llu (%d bytes)(total %d)\n",
137                   blocknr, blocksize, bh_count);
138
139         bh->b_ctx = kdev->k_ctx;
140         if (kdev->k_dev == K_DEV_FS)
141                 bh->b_io = kdev->k_ctx->fs->io;
142         else
143                 bh->b_io = kdev->k_ctx->journal_io;
144         bh->b_size = blocksize;
145         bh->b_blocknr = blocknr;
146
147         return bh;
148 }
149
150 int sync_blockdev(kdev_t kdev)
151 {
152         io_channel      io;
153
154         if (kdev->k_dev == K_DEV_FS)
155                 io = kdev->k_ctx->fs->io;
156         else
157                 io = kdev->k_ctx->journal_io;
158
159         return io_channel_flush(io) ? -EIO : 0;
160 }
161
162 void ll_rw_block(int rw, int op_flags, int nr, struct buffer_head *bhp[])
163 {
164         errcode_t retval;
165         struct buffer_head *bh;
166
167         for (; nr > 0; --nr) {
168                 bh = *bhp++;
169                 if (rw == REQ_OP_READ && !bh->b_uptodate) {
170                         jfs_debug(3, "reading block %llu/%p\n",
171                                   bh->b_blocknr, (void *) bh);
172                         retval = io_channel_read_blk64(bh->b_io,
173                                                      bh->b_blocknr,
174                                                      1, bh->b_data);
175                         if (retval) {
176                                 com_err(bh->b_ctx->device_name, retval,
177                                         "while reading block %llu\n",
178                                         bh->b_blocknr);
179                                 bh->b_err = (int) retval;
180                                 continue;
181                         }
182                         bh->b_uptodate = 1;
183                 } else if (rw == REQ_OP_WRITE && bh->b_dirty) {
184                         jfs_debug(3, "writing block %llu/%p\n",
185                                   bh->b_blocknr,
186                                   (void *) bh);
187                         retval = io_channel_write_blk64(bh->b_io,
188                                                       bh->b_blocknr,
189                                                       1, bh->b_data);
190                         if (retval) {
191                                 com_err(bh->b_ctx->device_name, retval,
192                                         "while writing block %llu\n",
193                                         bh->b_blocknr);
194                                 bh->b_err = (int) retval;
195                                 continue;
196                         }
197                         bh->b_dirty = 0;
198                         bh->b_uptodate = 1;
199                 } else {
200                         jfs_debug(3, "no-op %s for block %llu\n",
201                                   rw == REQ_OP_READ ? "read" : "write",
202                                   bh->b_blocknr);
203                 }
204         }
205 }
206
207 void mark_buffer_dirty(struct buffer_head *bh)
208 {
209         bh->b_dirty = 1;
210 }
211
212 static void mark_buffer_clean(struct buffer_head * bh)
213 {
214         bh->b_dirty = 0;
215 }
216
217 void brelse(struct buffer_head *bh)
218 {
219         if (bh->b_dirty)
220                 ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
221         jfs_debug(3, "freeing block %llu/%p (total %d)\n",
222                   bh->b_blocknr, (void *) bh, --bh_count);
223         ext2fs_free_mem(&bh);
224 }
225
226 int buffer_uptodate(struct buffer_head *bh)
227 {
228         return bh->b_uptodate;
229 }
230
231 void mark_buffer_uptodate(struct buffer_head *bh, int val)
232 {
233         bh->b_uptodate = val;
234 }
235
236 void wait_on_buffer(struct buffer_head *bh)
237 {
238         if (!bh->b_uptodate)
239                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
240 }
241
242
243 static void e2fsck_clear_recover(e2fsck_t ctx, int error)
244 {
245         ext2fs_clear_feature_journal_needs_recovery(ctx->fs->super);
246
247         /* if we had an error doing journal recovery, we need a full fsck */
248         if (error)
249                 ctx->fs->super->s_state &= ~EXT2_VALID_FS;
250         ext2fs_mark_super_dirty(ctx->fs);
251 }
252
253 /*
254  * This is a helper function to check the validity of the journal.
255  */
256 struct process_block_struct {
257         e2_blkcnt_t     last_block;
258 };
259
260 static int process_journal_block(ext2_filsys fs,
261                                  blk64_t        *block_nr,
262                                  e2_blkcnt_t blockcnt,
263                                  blk64_t ref_block EXT2FS_ATTR((unused)),
264                                  int ref_offset EXT2FS_ATTR((unused)),
265                                  void *priv_data)
266 {
267         struct process_block_struct *p;
268         blk64_t blk = *block_nr;
269
270         p = (struct process_block_struct *) priv_data;
271
272         if (!blk || blk < fs->super->s_first_data_block ||
273             blk >= ext2fs_blocks_count(fs->super))
274                 return BLOCK_ABORT;
275
276         if (blockcnt >= 0)
277                 p->last_block = blockcnt;
278         return 0;
279 }
280
281 static int ext4_fc_replay_scan(journal_t *j, struct buffer_head *bh,
282                                 int off, tid_t expected_tid)
283 {
284         e2fsck_t ctx = j->j_fs_dev->k_ctx;
285         struct e2fsck_fc_replay_state *state;
286         int ret = JBD2_FC_REPLAY_CONTINUE;
287         struct ext4_fc_add_range *ext;
288         struct ext4_fc_tl *tl;
289         struct ext4_fc_tail *tail;
290         __u8 *start, *end;
291         struct ext4_fc_head *head;
292         struct ext2fs_extent ext2fs_ex;
293
294         state = &ctx->fc_replay_state;
295
296         start = (__u8 *)bh->b_data;
297         end = (__u8 *)bh->b_data + j->j_blocksize - 1;
298
299         jbd_debug(1, "Scan phase starting, expected %d", expected_tid);
300         if (state->fc_replay_expected_off == 0) {
301                 memset(state, 0, sizeof(*state));
302                 /* Check if we can stop early */
303                 if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
304                         != EXT4_FC_TAG_HEAD) {
305                         jbd_debug(1, "Ending early!, not a head tag");
306                         return 0;
307                 }
308         }
309
310         if (off != state->fc_replay_expected_off) {
311                 ret = -EFSCORRUPTED;
312                 goto out_err;
313         }
314
315         state->fc_replay_expected_off++;
316         fc_for_each_tl(start, end, tl) {
317                 jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
318                           tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
319                 switch (le16_to_cpu(tl->fc_tag)) {
320                 case EXT4_FC_TAG_ADD_RANGE:
321                         ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
322                         ret = ext2fs_decode_extent(&ext2fs_ex, (void *)&ext->fc_ex,
323                                                    sizeof(ext->fc_ex));
324                         if (ret)
325                                 ret = JBD2_FC_REPLAY_STOP;
326                         else
327                                 ret = JBD2_FC_REPLAY_CONTINUE;
328                 case EXT4_FC_TAG_DEL_RANGE:
329                 case EXT4_FC_TAG_LINK:
330                 case EXT4_FC_TAG_UNLINK:
331                 case EXT4_FC_TAG_CREAT:
332                 case EXT4_FC_TAG_INODE:
333                 case EXT4_FC_TAG_PAD:
334                         state->fc_cur_tag++;
335                         state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
336                                         sizeof(*tl) + ext4_fc_tag_len(tl));
337                         break;
338                 case EXT4_FC_TAG_TAIL:
339                         state->fc_cur_tag++;
340                         tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
341                         state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
342                                                 sizeof(*tl) +
343                                                 offsetof(struct ext4_fc_tail,
344                                                 fc_crc));
345                         jbd_debug(1, "tail tid %d, expected %d\n",
346                                         le32_to_cpu(tail->fc_tid),
347                                         expected_tid);
348                         if (le32_to_cpu(tail->fc_tid) == expected_tid &&
349                                 le32_to_cpu(tail->fc_crc) == state->fc_crc) {
350                                 state->fc_replay_num_tags = state->fc_cur_tag;
351                         } else {
352                                 ret = state->fc_replay_num_tags ?
353                                         JBD2_FC_REPLAY_STOP : -EFSBADCRC;
354                         }
355                         state->fc_crc = 0;
356                         break;
357                 case EXT4_FC_TAG_HEAD:
358                         head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
359                         if (le32_to_cpu(head->fc_features) &
360                                 ~EXT4_FC_SUPPORTED_FEATURES) {
361                                 ret = -EOPNOTSUPP;
362                                 break;
363                         }
364                         if (le32_to_cpu(head->fc_tid) != expected_tid) {
365                                 ret = -EINVAL;
366                                 break;
367                         }
368                         state->fc_cur_tag++;
369                         state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
370                                         sizeof(*tl) + ext4_fc_tag_len(tl));
371                         break;
372                 default:
373                         ret = state->fc_replay_num_tags ?
374                                 JBD2_FC_REPLAY_STOP : -ECANCELED;
375                 }
376                 if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
377                         break;
378         }
379
380 out_err:
381         return ret;
382 }
383
384 static int __errcode_to_errno(errcode_t err, const char *func, int line)
385 {
386         if (err == 0)
387                 return 0;
388         fprintf(stderr, "Error \"%s\" encountered in function %s at line %d\n",
389                 error_message(err), func, line);
390         if (err <= 256)
391                 return -err;
392         return -EFAULT;
393 }
394
395 #define errcode_to_errno(err)   __errcode_to_errno(err, __func__, __LINE__)
396
397 /* Helper struct for dentry replay routines */
398 struct dentry_info_args {
399         int parent_ino, dname_len, ino, inode_len;
400         char *dname;
401 };
402
403 static inline void tl_to_darg(struct dentry_info_args *darg,
404                                 struct  ext4_fc_tl *tl)
405 {
406         struct ext4_fc_dentry_info *fcd;
407         int tag = le16_to_cpu(tl->fc_tag);
408
409         fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl);
410
411         darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino);
412         darg->ino = le32_to_cpu(fcd->fc_ino);
413         darg->dname = fcd->fc_dname;
414         darg->dname_len = ext4_fc_tag_len(tl) -
415                         sizeof(struct ext4_fc_dentry_info);
416         darg->dname = malloc(darg->dname_len + 1);
417         memcpy(darg->dname, fcd->fc_dname, darg->dname_len);
418         darg->dname[darg->dname_len] = 0;
419         jbd_debug(1, "%s: %s, ino %d, parent %d\n",
420                 tag == EXT4_FC_TAG_CREAT ? "create" :
421                 (tag == EXT4_FC_TAG_LINK ? "link" :
422                 (tag == EXT4_FC_TAG_UNLINK ? "unlink" : "error")),
423                 darg->dname, darg->ino, darg->parent_ino);
424 }
425
426 static int ext4_fc_handle_unlink(e2fsck_t ctx, struct ext4_fc_tl *tl)
427 {
428         struct ext2_inode inode;
429         struct dentry_info_args darg;
430         ext2_filsys fs = ctx->fs;
431         int ret;
432
433         tl_to_darg(&darg, tl);
434         ret = errcode_to_errno(
435                        ext2fs_unlink(ctx->fs, darg.parent_ino,
436                                      darg.dname, darg.ino, 0));
437         /* It's okay if the above call fails */
438         free(darg.dname);
439         return ret;
440 }
441
442 static int ext4_fc_handle_link_and_create(e2fsck_t ctx, struct ext4_fc_tl *tl)
443 {
444         struct dentry_info_args darg;
445         ext2_filsys fs = ctx->fs;
446         struct ext2_inode_large inode_large;
447         int ret, filetype, mode;
448
449         tl_to_darg(&darg, tl);
450         ret = errcode_to_errno(ext2fs_read_inode(fs, darg.ino,
451                                                  (struct ext2_inode *)&inode_large));
452         if (ret)
453                 goto out;
454
455         mode = inode_large.i_mode;
456
457         if (LINUX_S_ISREG(mode))
458                 filetype = EXT2_FT_REG_FILE;
459         else if (LINUX_S_ISDIR(mode))
460                 filetype = EXT2_FT_DIR;
461         else if (LINUX_S_ISCHR(mode))
462                 filetype = EXT2_FT_CHRDEV;
463         else if (LINUX_S_ISBLK(mode))
464                 filetype = EXT2_FT_BLKDEV;
465         else if (LINUX_S_ISLNK(mode))
466                 return EXT2_FT_SYMLINK;
467         else if (LINUX_S_ISFIFO(mode))
468                 filetype = EXT2_FT_FIFO;
469         else if (LINUX_S_ISSOCK(mode))
470                 filetype = EXT2_FT_SOCK;
471         else {
472                 ret = -EINVAL;
473                 goto out;
474         }
475
476         /*
477          * Forcefully unlink if the same name is present and ignore the error
478          * if any, since this dirent might not exist
479          */
480         ext2fs_unlink(fs, darg.parent_ino, darg.dname, darg.ino,
481                         EXT2FS_UNLINK_FORCE);
482
483         ret = errcode_to_errno(
484                        ext2fs_link(fs, darg.parent_ino, darg.dname, darg.ino,
485                                    filetype));
486 out:
487         free(darg.dname);
488         return ret;
489
490 }
491 /*
492  * Main recovery path entry point. This function returns JBD2_FC_REPLAY_CONTINUE
493  * to indicate that it is expecting more fast commit blocks. It returns
494  * JBD2_FC_REPLAY_STOP to indicate that replay is done.
495  */
496 static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
497                                 enum passtype pass, int off, tid_t expected_tid)
498 {
499         e2fsck_t ctx = journal->j_fs_dev->k_ctx;
500         struct e2fsck_fc_replay_state *state = &ctx->fc_replay_state;
501         int ret = JBD2_FC_REPLAY_CONTINUE;
502         struct ext4_fc_tl *tl;
503         __u8 *start, *end;
504
505         if (pass == PASS_SCAN) {
506                 state->fc_current_pass = PASS_SCAN;
507                 return ext4_fc_replay_scan(journal, bh, off, expected_tid);
508         }
509
510         if (state->fc_replay_num_tags == 0)
511                 goto replay_done;
512
513         if (state->fc_current_pass != pass) {
514                 /* Starting replay phase */
515                 state->fc_current_pass = pass;
516                 /* We will reset checksums */
517                 ctx->fs->flags |= EXT2_FLAG_IGNORE_CSUM_ERRORS;
518                 ret = ext2fs_read_bitmaps(ctx->fs);
519                 if (ret) {
520                         jbd_debug(1, "Error %d while reading bitmaps\n", ret);
521                         return ret;
522                 }
523                 state->fc_super_state = ctx->fs->super->s_state;
524                 /*
525                  * Mark the file system to indicate it contains errors. That's
526                  * because the updates performed by fast commit replay code are
527                  * not atomic and may result in incosistent file system if it
528                  * crashes before the replay is complete.
529                  */
530                 ctx->fs->super->s_state |= EXT2_ERROR_FS;
531                 ctx->fs->super->s_state |= EXT4_FC_REPLAY;
532                 ext2fs_mark_super_dirty(ctx->fs);
533                 ext2fs_flush(ctx->fs);
534         }
535
536         start = (__u8 *)bh->b_data;
537         end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
538
539         fc_for_each_tl(start, end, tl) {
540                 if (state->fc_replay_num_tags == 0)
541                         goto replay_done;
542                 jbd_debug(3, "Replay phase processing %s tag\n",
543                                 tag2str(le16_to_cpu(tl->fc_tag)));
544                 state->fc_replay_num_tags--;
545                 switch (le16_to_cpu(tl->fc_tag)) {
546                 case EXT4_FC_TAG_CREAT:
547                 case EXT4_FC_TAG_LINK:
548                         ret = ext4_fc_handle_link_and_create(ctx, tl);
549                         break;
550                 case EXT4_FC_TAG_UNLINK:
551                         ret = ext4_fc_handle_unlink(ctx, tl);
552                         break;
553                 case EXT4_FC_TAG_ADD_RANGE:
554                 case EXT4_FC_TAG_DEL_RANGE:
555                 case EXT4_FC_TAG_INODE:
556                 case EXT4_FC_TAG_TAIL:
557                 case EXT4_FC_TAG_PAD:
558                 case EXT4_FC_TAG_HEAD:
559                         break;
560                 default:
561                         ret = -ECANCELED;
562                         break;
563                 }
564                 if (ret < 0)
565                         break;
566                 ret = JBD2_FC_REPLAY_CONTINUE;
567         }
568         return ret;
569 replay_done:
570         jbd_debug(1, "End of fast commit replay\n");
571         if (state->fc_current_pass != pass)
572                 return JBD2_FC_REPLAY_STOP;
573
574         ext2fs_calculate_summary_stats(ctx->fs, 0 /* update bg also */);
575         ext2fs_write_block_bitmap(ctx->fs);
576         ext2fs_write_inode_bitmap(ctx->fs);
577         ext2fs_mark_super_dirty(ctx->fs);
578         ext2fs_set_gdt_csum(ctx->fs);
579         ctx->fs->super->s_state = state->fc_super_state;
580         ext2fs_flush(ctx->fs);
581
582         return JBD2_FC_REPLAY_STOP;
583 }
584
585 static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal)
586 {
587         struct process_block_struct pb;
588         struct ext2_super_block *sb = ctx->fs->super;
589         struct ext2_super_block jsuper;
590         struct problem_context  pctx;
591         struct buffer_head      *bh;
592         struct inode            *j_inode = NULL;
593         struct kdev_s           *dev_fs = NULL, *dev_journal;
594         const char              *journal_name = 0;
595         journal_t               *journal = NULL;
596         errcode_t               retval = 0;
597         io_manager              io_ptr = 0;
598         unsigned long long      start = 0;
599         int                     ret;
600         int                     ext_journal = 0;
601         int                     tried_backup_jnl = 0;
602
603         clear_problem_context(&pctx);
604
605         journal = e2fsck_allocate_memory(ctx, sizeof(journal_t), "journal");
606         if (!journal) {
607                 return EXT2_ET_NO_MEMORY;
608         }
609
610         dev_fs = e2fsck_allocate_memory(ctx, 2*sizeof(struct kdev_s), "kdev");
611         if (!dev_fs) {
612                 retval = EXT2_ET_NO_MEMORY;
613                 goto errout;
614         }
615         dev_journal = dev_fs+1;
616
617         dev_fs->k_ctx = dev_journal->k_ctx = ctx;
618         dev_fs->k_dev = K_DEV_FS;
619         dev_journal->k_dev = K_DEV_JOURNAL;
620
621         journal->j_dev = dev_journal;
622         journal->j_fs_dev = dev_fs;
623         journal->j_inode = NULL;
624         journal->j_blocksize = ctx->fs->blocksize;
625
626         if (uuid_is_null(sb->s_journal_uuid)) {
627                 if (!sb->s_journal_inum) {
628                         retval = EXT2_ET_BAD_INODE_NUM;
629                         goto errout;
630                 }
631                 j_inode = e2fsck_allocate_memory(ctx, sizeof(*j_inode),
632                                                  "journal inode");
633                 if (!j_inode) {
634                         retval = EXT2_ET_NO_MEMORY;
635                         goto errout;
636                 }
637
638                 j_inode->i_ctx = ctx;
639                 j_inode->i_ino = sb->s_journal_inum;
640
641                 if ((retval = ext2fs_read_inode(ctx->fs,
642                                                 sb->s_journal_inum,
643                                                 &j_inode->i_ext2))) {
644                 try_backup_journal:
645                         if (sb->s_jnl_backup_type != EXT3_JNL_BACKUP_BLOCKS ||
646                             tried_backup_jnl)
647                                 goto errout;
648                         memset(&j_inode->i_ext2, 0, sizeof(struct ext2_inode));
649                         memcpy(&j_inode->i_ext2.i_block[0], sb->s_jnl_blocks,
650                                EXT2_N_BLOCKS*4);
651                         j_inode->i_ext2.i_size_high = sb->s_jnl_blocks[15];
652                         j_inode->i_ext2.i_size = sb->s_jnl_blocks[16];
653                         j_inode->i_ext2.i_links_count = 1;
654                         j_inode->i_ext2.i_mode = LINUX_S_IFREG | 0600;
655                         e2fsck_use_inode_shortcuts(ctx, 1);
656                         ctx->stashed_ino = j_inode->i_ino;
657                         ctx->stashed_inode = &j_inode->i_ext2;
658                         tried_backup_jnl++;
659                 }
660                 if (!j_inode->i_ext2.i_links_count ||
661                     !LINUX_S_ISREG(j_inode->i_ext2.i_mode)) {
662                         retval = EXT2_ET_NO_JOURNAL;
663                         goto try_backup_journal;
664                 }
665                 if (EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize <
666                     JBD2_MIN_JOURNAL_BLOCKS) {
667                         retval = EXT2_ET_JOURNAL_TOO_SMALL;
668                         goto try_backup_journal;
669                 }
670                 pb.last_block = -1;
671                 retval = ext2fs_block_iterate3(ctx->fs, j_inode->i_ino,
672                                                BLOCK_FLAG_HOLE, 0,
673                                                process_journal_block, &pb);
674                 if ((pb.last_block + 1) * ctx->fs->blocksize <
675                     (int) EXT2_I_SIZE(&j_inode->i_ext2)) {
676                         retval = EXT2_ET_JOURNAL_TOO_SMALL;
677                         goto try_backup_journal;
678                 }
679                 if (tried_backup_jnl && !(ctx->options & E2F_OPT_READONLY)) {
680                         retval = ext2fs_write_inode(ctx->fs, sb->s_journal_inum,
681                                                     &j_inode->i_ext2);
682                         if (retval)
683                                 goto errout;
684                 }
685
686                 journal->j_total_len = EXT2_I_SIZE(&j_inode->i_ext2) /
687                         journal->j_blocksize;
688
689 #ifdef USE_INODE_IO
690                 retval = ext2fs_inode_io_intern2(ctx->fs, sb->s_journal_inum,
691                                                  &j_inode->i_ext2,
692                                                  &journal_name);
693                 if (retval)
694                         goto errout;
695
696                 io_ptr = inode_io_manager;
697 #else
698                 journal->j_inode = j_inode;
699                 ctx->journal_io = ctx->fs->io;
700                 if ((ret = jbd2_journal_bmap(journal, 0, &start)) != 0) {
701                         retval = (errcode_t) (-1 * ret);
702                         goto errout;
703                 }
704 #endif
705         } else {
706                 ext_journal = 1;
707                 if (!ctx->journal_name) {
708                         char uuid[37];
709
710                         uuid_unparse(sb->s_journal_uuid, uuid);
711                         ctx->journal_name = blkid_get_devname(ctx->blkid,
712                                                               "UUID", uuid);
713                         if (!ctx->journal_name)
714                                 ctx->journal_name = blkid_devno_to_devname(sb->s_journal_dev);
715                 }
716                 journal_name = ctx->journal_name;
717
718                 if (!journal_name) {
719                         fix_problem(ctx, PR_0_CANT_FIND_JOURNAL, &pctx);
720                         retval = EXT2_ET_LOAD_EXT_JOURNAL;
721                         goto errout;
722                 }
723
724                 jfs_debug(1, "Using journal file %s\n", journal_name);
725                 io_ptr = unix_io_manager;
726         }
727
728 #if 0
729         test_io_backing_manager = io_ptr;
730         io_ptr = test_io_manager;
731 #endif
732 #ifndef USE_INODE_IO
733         if (ext_journal)
734 #endif
735         {
736                 int flags = IO_FLAG_RW;
737                 if (!(ctx->mount_flags & EXT2_MF_ISROOT &&
738                       ctx->mount_flags & EXT2_MF_READONLY))
739                         flags |= IO_FLAG_EXCLUSIVE;
740                 if ((ctx->mount_flags & EXT2_MF_READONLY) &&
741                     (ctx->options & E2F_OPT_FORCE))
742                         flags &= ~IO_FLAG_EXCLUSIVE;
743
744
745                 retval = io_ptr->open(journal_name, flags,
746                                       &ctx->journal_io);
747         }
748         if (retval)
749                 goto errout;
750
751         io_channel_set_blksize(ctx->journal_io, ctx->fs->blocksize);
752
753         if (ext_journal) {
754                 blk64_t maxlen;
755
756                 start = ext2fs_journal_sb_start(ctx->fs->blocksize) - 1;
757                 bh = getblk(dev_journal, start, ctx->fs->blocksize);
758                 if (!bh) {
759                         retval = EXT2_ET_NO_MEMORY;
760                         goto errout;
761                 }
762                 ll_rw_block(REQ_OP_READ, 0, 1, &bh);
763                 if ((retval = bh->b_err) != 0) {
764                         brelse(bh);
765                         goto errout;
766                 }
767                 memcpy(&jsuper, start ? bh->b_data :  bh->b_data + SUPERBLOCK_OFFSET,
768                        sizeof(jsuper));
769 #ifdef WORDS_BIGENDIAN
770                 if (jsuper.s_magic == ext2fs_swab16(EXT2_SUPER_MAGIC))
771                         ext2fs_swap_super(&jsuper);
772 #endif
773                 if (jsuper.s_magic != EXT2_SUPER_MAGIC ||
774                     !ext2fs_has_feature_journal_dev(&jsuper)) {
775                         fix_problem(ctx, PR_0_EXT_JOURNAL_BAD_SUPER, &pctx);
776                         retval = EXT2_ET_LOAD_EXT_JOURNAL;
777                         brelse(bh);
778                         goto errout;
779                 }
780                 /* Make sure the journal UUID is correct */
781                 if (memcmp(jsuper.s_uuid, ctx->fs->super->s_journal_uuid,
782                            sizeof(jsuper.s_uuid))) {
783                         fix_problem(ctx, PR_0_JOURNAL_BAD_UUID, &pctx);
784                         retval = EXT2_ET_LOAD_EXT_JOURNAL;
785                         brelse(bh);
786                         goto errout;
787                 }
788
789                 /* Check the superblock checksum */
790                 if (ext2fs_has_feature_metadata_csum(&jsuper)) {
791                         struct struct_ext2_filsys fsx;
792                         struct ext2_super_block superx;
793                         void *p;
794
795                         p = start ? bh->b_data : bh->b_data + SUPERBLOCK_OFFSET;
796                         memcpy(&fsx, ctx->fs, sizeof(fsx));
797                         memcpy(&superx, ctx->fs->super, sizeof(superx));
798                         fsx.super = &superx;
799                         ext2fs_set_feature_metadata_csum(fsx.super);
800                         if (!ext2fs_superblock_csum_verify(&fsx, p) &&
801                             fix_problem(ctx, PR_0_EXT_JOURNAL_SUPER_CSUM_INVALID,
802                                         &pctx)) {
803                                 ext2fs_superblock_csum_set(&fsx, p);
804                                 mark_buffer_dirty(bh);
805                         }
806                 }
807                 brelse(bh);
808
809                 maxlen = ext2fs_blocks_count(&jsuper);
810                 journal->j_total_len = (maxlen < 1ULL << 32) ? maxlen : (1ULL << 32) - 1;
811                 start++;
812         }
813
814         if (!(bh = getblk(dev_journal, start, journal->j_blocksize))) {
815                 retval = EXT2_ET_NO_MEMORY;
816                 goto errout;
817         }
818
819         journal->j_sb_buffer = bh;
820         journal->j_superblock = (journal_superblock_t *)bh->b_data;
821         if (ext2fs_has_feature_fast_commit(ctx->fs->super))
822                 journal->j_fc_replay_callback = ext4_fc_replay;
823         else
824                 journal->j_fc_replay_callback = NULL;
825
826 #ifdef USE_INODE_IO
827         if (j_inode)
828                 ext2fs_free_mem(&j_inode);
829 #endif
830
831         *ret_journal = journal;
832         e2fsck_use_inode_shortcuts(ctx, 0);
833         return 0;
834
835 errout:
836         e2fsck_use_inode_shortcuts(ctx, 0);
837         if (dev_fs)
838                 ext2fs_free_mem(&dev_fs);
839         if (j_inode)
840                 ext2fs_free_mem(&j_inode);
841         if (journal)
842                 ext2fs_free_mem(&journal);
843         return retval;
844 }
845
846 static errcode_t e2fsck_journal_fix_bad_inode(e2fsck_t ctx,
847                                               struct problem_context *pctx)
848 {
849         struct ext2_super_block *sb = ctx->fs->super;
850         int recover = ext2fs_has_feature_journal_needs_recovery(ctx->fs->super);
851         int has_journal = ext2fs_has_feature_journal(ctx->fs->super);
852
853         if (has_journal || sb->s_journal_inum) {
854                 /* The journal inode is bogus, remove and force full fsck */
855                 pctx->ino = sb->s_journal_inum;
856                 if (fix_problem(ctx, PR_0_JOURNAL_BAD_INODE, pctx)) {
857                         if (has_journal && sb->s_journal_inum)
858                                 printf("*** journal has been deleted ***\n\n");
859                         ext2fs_clear_feature_journal(sb);
860                         sb->s_journal_inum = 0;
861                         memset(sb->s_jnl_blocks, 0, sizeof(sb->s_jnl_blocks));
862                         ctx->flags |= E2F_FLAG_JOURNAL_INODE;
863                         ctx->fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
864                         e2fsck_clear_recover(ctx, 1);
865                         return 0;
866                 }
867                 return EXT2_ET_CORRUPT_JOURNAL_SB;
868         } else if (recover) {
869                 if (fix_problem(ctx, PR_0_JOURNAL_RECOVER_SET, pctx)) {
870                         e2fsck_clear_recover(ctx, 1);
871                         return 0;
872                 }
873                 return EXT2_ET_UNSUPP_FEATURE;
874         }
875         return 0;
876 }
877
878 #define V1_SB_SIZE      0x0024
879 static void clear_v2_journal_fields(journal_t *journal)
880 {
881         e2fsck_t ctx = journal->j_dev->k_ctx;
882         struct problem_context pctx;
883
884         clear_problem_context(&pctx);
885
886         if (!fix_problem(ctx, PR_0_CLEAR_V2_JOURNAL, &pctx))
887                 return;
888
889         ctx->flags |= E2F_FLAG_PROBLEMS_FIXED;
890         memset(((char *) journal->j_superblock) + V1_SB_SIZE, 0,
891                ctx->fs->blocksize-V1_SB_SIZE);
892         mark_buffer_dirty(journal->j_sb_buffer);
893 }
894
895
896 static errcode_t e2fsck_journal_load(journal_t *journal)
897 {
898         e2fsck_t ctx = journal->j_dev->k_ctx;
899         journal_superblock_t *jsb;
900         struct buffer_head *jbh = journal->j_sb_buffer;
901         struct problem_context pctx;
902
903         clear_problem_context(&pctx);
904
905         ll_rw_block(REQ_OP_READ, 0, 1, &jbh);
906         if (jbh->b_err) {
907                 com_err(ctx->device_name, jbh->b_err, "%s",
908                         _("reading journal superblock\n"));
909                 return jbh->b_err;
910         }
911
912         jsb = journal->j_superblock;
913         /* If we don't even have JBD2_MAGIC, we probably have a wrong inode */
914         if (jsb->s_header.h_magic != htonl(JBD2_MAGIC_NUMBER))
915                 return e2fsck_journal_fix_bad_inode(ctx, &pctx);
916
917         switch (ntohl(jsb->s_header.h_blocktype)) {
918         case JBD2_SUPERBLOCK_V1:
919                 journal->j_format_version = 1;
920                 if (jsb->s_feature_compat ||
921                     jsb->s_feature_incompat ||
922                     jsb->s_feature_ro_compat ||
923                     jsb->s_nr_users)
924                         clear_v2_journal_fields(journal);
925                 break;
926
927         case JBD2_SUPERBLOCK_V2:
928                 journal->j_format_version = 2;
929                 if (ntohl(jsb->s_nr_users) > 1 &&
930                     uuid_is_null(ctx->fs->super->s_journal_uuid))
931                         clear_v2_journal_fields(journal);
932                 if (ntohl(jsb->s_nr_users) > 1) {
933                         fix_problem(ctx, PR_0_JOURNAL_UNSUPP_MULTIFS, &pctx);
934                         return EXT2_ET_JOURNAL_UNSUPP_VERSION;
935                 }
936                 break;
937
938         /*
939          * These should never appear in a journal super block, so if
940          * they do, the journal is badly corrupted.
941          */
942         case JBD2_DESCRIPTOR_BLOCK:
943         case JBD2_COMMIT_BLOCK:
944         case JBD2_REVOKE_BLOCK:
945                 return EXT2_ET_CORRUPT_JOURNAL_SB;
946
947         /* If we don't understand the superblock major type, but there
948          * is a magic number, then it is likely to be a new format we
949          * just don't understand, so leave it alone. */
950         default:
951                 return EXT2_ET_JOURNAL_UNSUPP_VERSION;
952         }
953
954         if (JBD2_HAS_INCOMPAT_FEATURE(journal, ~JBD2_KNOWN_INCOMPAT_FEATURES))
955                 return EXT2_ET_UNSUPP_FEATURE;
956
957         if (JBD2_HAS_RO_COMPAT_FEATURE(journal, ~JBD2_KNOWN_ROCOMPAT_FEATURES))
958                 return EXT2_ET_RO_UNSUPP_FEATURE;
959
960         /* Checksum v1-3 are mutually exclusive features. */
961         if (jbd2_has_feature_csum2(journal) && jbd2_has_feature_csum3(journal))
962                 return EXT2_ET_CORRUPT_JOURNAL_SB;
963
964         if (jbd2_journal_has_csum_v2or3(journal) &&
965             jbd2_has_feature_checksum(journal))
966                 return EXT2_ET_CORRUPT_JOURNAL_SB;
967
968         if (!e2fsck_journal_verify_csum_type(journal, jsb) ||
969             !e2fsck_journal_sb_csum_verify(journal, jsb))
970                 return EXT2_ET_CORRUPT_JOURNAL_SB;
971
972         if (jbd2_journal_has_csum_v2or3(journal))
973                 journal->j_csum_seed = jbd2_chksum(journal, ~0, jsb->s_uuid,
974                                                    sizeof(jsb->s_uuid));
975
976         /* We have now checked whether we know enough about the journal
977          * format to be able to proceed safely, so any other checks that
978          * fail we should attempt to recover from. */
979         if (jsb->s_blocksize != htonl(journal->j_blocksize)) {
980                 com_err(ctx->program_name, EXT2_ET_CORRUPT_JOURNAL_SB,
981                         _("%s: no valid journal superblock found\n"),
982                         ctx->device_name);
983                 return EXT2_ET_CORRUPT_JOURNAL_SB;
984         }
985
986         if (ntohl(jsb->s_maxlen) < journal->j_total_len)
987                 journal->j_total_len = ntohl(jsb->s_maxlen);
988         else if (ntohl(jsb->s_maxlen) > journal->j_total_len) {
989                 com_err(ctx->program_name, EXT2_ET_CORRUPT_JOURNAL_SB,
990                         _("%s: journal too short\n"),
991                         ctx->device_name);
992                 return EXT2_ET_CORRUPT_JOURNAL_SB;
993         }
994
995         journal->j_tail_sequence = ntohl(jsb->s_sequence);
996         journal->j_transaction_sequence = journal->j_tail_sequence;
997         journal->j_tail = ntohl(jsb->s_start);
998         journal->j_first = ntohl(jsb->s_first);
999         if (jbd2_has_feature_fast_commit(journal)) {
1000                 if (ntohl(jsb->s_maxlen) - jbd2_journal_get_num_fc_blks(jsb)
1001                         < JBD2_MIN_JOURNAL_BLOCKS) {
1002                         com_err(ctx->program_name, EXT2_ET_CORRUPT_JOURNAL_SB,
1003                                 _("%s: incorrect fast commit blocks\n"),
1004                                 ctx->device_name);
1005                         return EXT2_ET_CORRUPT_JOURNAL_SB;
1006                 }
1007                 journal->j_fc_last = ntohl(jsb->s_maxlen);
1008                 journal->j_last = journal->j_fc_last -
1009                                         jbd2_journal_get_num_fc_blks(jsb);
1010                 journal->j_fc_first = journal->j_last + 1;
1011         } else {
1012                 journal->j_last = ntohl(jsb->s_maxlen);
1013         }
1014
1015         return 0;
1016 }
1017
1018 static void e2fsck_journal_reset_super(e2fsck_t ctx, journal_superblock_t *jsb,
1019                                        journal_t *journal)
1020 {
1021         char *p;
1022         union {
1023                 uuid_t uuid;
1024                 __u32 val[4];
1025         } u;
1026         __u32 new_seq = 0;
1027         int i;
1028
1029         /* Leave a valid existing V1 superblock signature alone.
1030          * Anything unrecognisable we overwrite with a new V2
1031          * signature. */
1032
1033         if (jsb->s_header.h_magic != htonl(JBD2_MAGIC_NUMBER) ||
1034             jsb->s_header.h_blocktype != htonl(JBD2_SUPERBLOCK_V1)) {
1035                 jsb->s_header.h_magic = htonl(JBD2_MAGIC_NUMBER);
1036                 jsb->s_header.h_blocktype = htonl(JBD2_SUPERBLOCK_V2);
1037         }
1038
1039         /* Zero out everything else beyond the superblock header */
1040
1041         p = ((char *) jsb) + sizeof(journal_header_t);
1042         memset (p, 0, ctx->fs->blocksize-sizeof(journal_header_t));
1043
1044         jsb->s_blocksize = htonl(ctx->fs->blocksize);
1045         jsb->s_maxlen = htonl(journal->j_total_len);
1046         jsb->s_first = htonl(1);
1047
1048         /* Initialize the journal sequence number so that there is "no"
1049          * chance we will find old "valid" transactions in the journal.
1050          * This avoids the need to zero the whole journal (slow to do,
1051          * and risky when we are just recovering the filesystem).
1052          */
1053         uuid_generate(u.uuid);
1054         for (i = 0; i < 4; i ++)
1055                 new_seq ^= u.val[i];
1056         jsb->s_sequence = htonl(new_seq);
1057         e2fsck_journal_sb_csum_set(journal, jsb);
1058
1059         mark_buffer_dirty(journal->j_sb_buffer);
1060         ll_rw_block(REQ_OP_WRITE, 0, 1, &journal->j_sb_buffer);
1061 }
1062
1063 static errcode_t e2fsck_journal_fix_corrupt_super(e2fsck_t ctx,
1064                                                   journal_t *journal,
1065                                                   struct problem_context *pctx)
1066 {
1067         struct ext2_super_block *sb = ctx->fs->super;
1068         int recover = ext2fs_has_feature_journal_needs_recovery(ctx->fs->super);
1069
1070         if (ext2fs_has_feature_journal(sb)) {
1071                 if (fix_problem(ctx, PR_0_JOURNAL_BAD_SUPER, pctx)) {
1072                         e2fsck_journal_reset_super(ctx, journal->j_superblock,
1073                                                    journal);
1074                         journal->j_transaction_sequence = 1;
1075                         e2fsck_clear_recover(ctx, recover);
1076                         return 0;
1077                 }
1078                 return EXT2_ET_CORRUPT_JOURNAL_SB;
1079         } else if (e2fsck_journal_fix_bad_inode(ctx, pctx))
1080                 return EXT2_ET_CORRUPT_JOURNAL_SB;
1081
1082         return 0;
1083 }
1084
1085 static void e2fsck_journal_release(e2fsck_t ctx, journal_t *journal,
1086                                    int reset, int drop)
1087 {
1088         journal_superblock_t *jsb;
1089
1090         if (drop)
1091                 mark_buffer_clean(journal->j_sb_buffer);
1092         else if (!(ctx->options & E2F_OPT_READONLY)) {
1093                 jsb = journal->j_superblock;
1094                 jsb->s_sequence = htonl(journal->j_tail_sequence);
1095                 if (reset)
1096                         jsb->s_start = 0; /* this marks the journal as empty */
1097                 e2fsck_journal_sb_csum_set(journal, jsb);
1098                 mark_buffer_dirty(journal->j_sb_buffer);
1099         }
1100         brelse(journal->j_sb_buffer);
1101
1102         if (ctx->journal_io) {
1103                 if (ctx->fs && ctx->fs->io != ctx->journal_io)
1104                         io_channel_close(ctx->journal_io);
1105                 ctx->journal_io = 0;
1106         }
1107
1108 #ifndef USE_INODE_IO
1109         if (journal->j_inode)
1110                 ext2fs_free_mem(&journal->j_inode);
1111 #endif
1112         if (journal->j_fs_dev)
1113                 ext2fs_free_mem(&journal->j_fs_dev);
1114         ext2fs_free_mem(&journal);
1115 }
1116
1117 /*
1118  * This function makes sure that the superblock fields regarding the
1119  * journal are consistent.
1120  */
1121 errcode_t e2fsck_check_ext3_journal(e2fsck_t ctx)
1122 {
1123         struct ext2_super_block *sb = ctx->fs->super;
1124         journal_t *journal;
1125         int recover = ext2fs_has_feature_journal_needs_recovery(ctx->fs->super);
1126         struct problem_context pctx;
1127         problem_t problem;
1128         int reset = 0, force_fsck = 0;
1129         errcode_t retval;
1130
1131         /* If we don't have any journal features, don't do anything more */
1132         if (!ext2fs_has_feature_journal(sb) &&
1133             !recover && sb->s_journal_inum == 0 && sb->s_journal_dev == 0 &&
1134             uuid_is_null(sb->s_journal_uuid))
1135                 return 0;
1136
1137         clear_problem_context(&pctx);
1138         pctx.num = sb->s_journal_inum;
1139
1140         retval = e2fsck_get_journal(ctx, &journal);
1141         if (retval) {
1142                 if ((retval == EXT2_ET_BAD_INODE_NUM) ||
1143                     (retval == EXT2_ET_BAD_BLOCK_NUM) ||
1144                     (retval == EXT2_ET_JOURNAL_TOO_SMALL) ||
1145                     (retval == EXT2_ET_NO_JOURNAL))
1146                         return e2fsck_journal_fix_bad_inode(ctx, &pctx);
1147                 return retval;
1148         }
1149
1150         retval = e2fsck_journal_load(journal);
1151         if (retval) {
1152                 if ((retval == EXT2_ET_CORRUPT_JOURNAL_SB) ||
1153                     ((retval == EXT2_ET_UNSUPP_FEATURE) &&
1154                     (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_INCOMPAT,
1155                                   &pctx))) ||
1156                     ((retval == EXT2_ET_RO_UNSUPP_FEATURE) &&
1157                     (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_ROCOMPAT,
1158                                   &pctx))) ||
1159                     ((retval == EXT2_ET_JOURNAL_UNSUPP_VERSION) &&
1160                     (!fix_problem(ctx, PR_0_JOURNAL_UNSUPP_VERSION, &pctx))))
1161                         retval = e2fsck_journal_fix_corrupt_super(ctx, journal,
1162                                                                   &pctx);
1163                 e2fsck_journal_release(ctx, journal, 0, 1);
1164                 return retval;
1165         }
1166
1167         /*
1168          * We want to make the flags consistent here.  We will not leave with
1169          * needs_recovery set but has_journal clear.  We can't get in a loop
1170          * with -y, -n, or -p, only if a user isn't making up their mind.
1171          */
1172 no_has_journal:
1173         if (!ext2fs_has_feature_journal(sb)) {
1174                 recover = ext2fs_has_feature_journal_needs_recovery(sb);
1175                 if (fix_problem(ctx, PR_0_JOURNAL_HAS_JOURNAL, &pctx)) {
1176                         if (recover &&
1177                             !fix_problem(ctx, PR_0_JOURNAL_RECOVER_SET, &pctx))
1178                                 goto no_has_journal;
1179                         /*
1180                          * Need a full fsck if we are releasing a
1181                          * journal stored on a reserved inode.
1182                          */
1183                         force_fsck = recover ||
1184                                 (sb->s_journal_inum < EXT2_FIRST_INODE(sb));
1185                         /* Clear all of the journal fields */
1186                         sb->s_journal_inum = 0;
1187                         sb->s_journal_dev = 0;
1188                         memset(sb->s_journal_uuid, 0,
1189                                sizeof(sb->s_journal_uuid));
1190                         e2fsck_clear_recover(ctx, force_fsck);
1191                 } else if (!(ctx->options & E2F_OPT_READONLY)) {
1192                         ext2fs_set_feature_journal(sb);
1193                         ctx->fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
1194                         ext2fs_mark_super_dirty(ctx->fs);
1195                 }
1196         }
1197
1198         if (ext2fs_has_feature_journal(sb) &&
1199             !ext2fs_has_feature_journal_needs_recovery(sb) &&
1200             journal->j_superblock->s_start != 0) {
1201                 /* Print status information */
1202                 fix_problem(ctx, PR_0_JOURNAL_RECOVERY_CLEAR, &pctx);
1203                 if (ctx->superblock)
1204                         problem = PR_0_JOURNAL_RUN_DEFAULT;
1205                 else
1206                         problem = PR_0_JOURNAL_RUN;
1207                 if (fix_problem(ctx, problem, &pctx)) {
1208                         ctx->options |= E2F_OPT_FORCE;
1209                         ext2fs_set_feature_journal_needs_recovery(sb);
1210                         ext2fs_mark_super_dirty(ctx->fs);
1211                 } else if (fix_problem(ctx,
1212                                        PR_0_JOURNAL_RESET_JOURNAL, &pctx)) {
1213                         reset = 1;
1214                         sb->s_state &= ~EXT2_VALID_FS;
1215                         ext2fs_mark_super_dirty(ctx->fs);
1216                 }
1217                 /*
1218                  * If the user answers no to the above question, we
1219                  * ignore the fact that journal apparently has data;
1220                  * accidentally replaying over valid data would be far
1221                  * worse than skipping a questionable recovery.
1222                  *
1223                  * XXX should we abort with a fatal error here?  What
1224                  * will the ext3 kernel code do if a filesystem with
1225                  * !NEEDS_RECOVERY but with a non-zero
1226                  * journal->j_superblock->s_start is mounted?
1227                  */
1228         }
1229
1230         /*
1231          * If we don't need to do replay the journal, check to see if
1232          * the journal's errno is set; if so, we need to mark the file
1233          * system as being corrupt and clear the journal's s_errno.
1234          */
1235         if (!ext2fs_has_feature_journal_needs_recovery(sb) &&
1236             journal->j_superblock->s_errno) {
1237                 ctx->fs->super->s_state |= EXT2_ERROR_FS;
1238                 ext2fs_mark_super_dirty(ctx->fs);
1239                 journal->j_superblock->s_errno = 0;
1240                 e2fsck_journal_sb_csum_set(journal, journal->j_superblock);
1241                 mark_buffer_dirty(journal->j_sb_buffer);
1242         }
1243
1244         e2fsck_journal_release(ctx, journal, reset, 0);
1245         return retval;
1246 }
1247
1248 static errcode_t recover_ext3_journal(e2fsck_t ctx)
1249 {
1250         struct problem_context  pctx;
1251         journal_t *journal;
1252         errcode_t retval;
1253
1254         clear_problem_context(&pctx);
1255
1256         retval = jbd2_journal_init_revoke_record_cache();
1257         if (retval)
1258                 return retval;
1259
1260         retval = jbd2_journal_init_revoke_table_cache();
1261         if (retval)
1262                 return retval;
1263
1264         retval = e2fsck_get_journal(ctx, &journal);
1265         if (retval)
1266                 return retval;
1267
1268         retval = e2fsck_journal_load(journal);
1269         if (retval)
1270                 goto errout;
1271
1272         retval = jbd2_journal_init_revoke(journal, 1024);
1273         if (retval)
1274                 goto errout;
1275
1276         retval = -jbd2_journal_recover(journal);
1277         if (retval)
1278                 goto errout;
1279
1280         if (journal->j_failed_commit) {
1281                 pctx.ino = journal->j_failed_commit;
1282                 fix_problem(ctx, PR_0_JNL_TXN_CORRUPT, &pctx);
1283                 journal->j_superblock->s_errno = -EINVAL;
1284                 mark_buffer_dirty(journal->j_sb_buffer);
1285         }
1286
1287         journal->j_tail_sequence = journal->j_transaction_sequence;
1288
1289 errout:
1290         jbd2_journal_destroy_revoke(journal);
1291         jbd2_journal_destroy_revoke_record_cache();
1292         jbd2_journal_destroy_revoke_table_cache();
1293         e2fsck_journal_release(ctx, journal, 1, 0);
1294         return retval;
1295 }
1296
1297 errcode_t e2fsck_run_ext3_journal(e2fsck_t ctx)
1298 {
1299         io_manager io_ptr = ctx->fs->io->manager;
1300         int blocksize = ctx->fs->blocksize;
1301         errcode_t       retval, recover_retval;
1302         io_stats        stats = 0;
1303         unsigned long long kbytes_written = 0;
1304
1305         printf(_("%s: recovering journal\n"), ctx->device_name);
1306         if (ctx->options & E2F_OPT_READONLY) {
1307                 printf(_("%s: won't do journal recovery while read-only\n"),
1308                        ctx->device_name);
1309                 return EXT2_ET_FILE_RO;
1310         }
1311
1312         if (ctx->fs->flags & EXT2_FLAG_DIRTY)
1313                 ext2fs_flush(ctx->fs);  /* Force out any modifications */
1314
1315         recover_retval = recover_ext3_journal(ctx);
1316
1317         /*
1318          * Reload the filesystem context to get up-to-date data from disk
1319          * because journal recovery will change the filesystem under us.
1320          */
1321         if (ctx->fs->super->s_kbytes_written &&
1322             ctx->fs->io->manager->get_stats)
1323                 ctx->fs->io->manager->get_stats(ctx->fs->io, &stats);
1324         if (stats && stats->bytes_written)
1325                 kbytes_written = stats->bytes_written >> 10;
1326
1327         ext2fs_mmp_stop(ctx->fs);
1328         ext2fs_free(ctx->fs);
1329         retval = ext2fs_open(ctx->filesystem_name, ctx->openfs_flags,
1330                              ctx->superblock, blocksize, io_ptr,
1331                              &ctx->fs);
1332         if (retval) {
1333                 com_err(ctx->program_name, retval,
1334                         _("while trying to re-open %s"),
1335                         ctx->device_name);
1336                 fatal_error(ctx, 0);
1337         }
1338         ctx->fs->priv_data = ctx;
1339         ctx->fs->now = ctx->now;
1340         ctx->fs->flags |= EXT2_FLAG_MASTER_SB_ONLY;
1341         ctx->fs->super->s_kbytes_written += kbytes_written;
1342
1343         /* Set the superblock flags */
1344         e2fsck_clear_recover(ctx, recover_retval != 0);
1345
1346         /*
1347          * Do one last sanity check, and propagate journal->s_errno to
1348          * the EXT2_ERROR_FS flag in the fs superblock if needed.
1349          */
1350         retval = e2fsck_check_ext3_journal(ctx);
1351         return retval ? retval : recover_retval;
1352 }
1353
1354 /*
1355  * This function will move the journal inode from a visible file in
1356  * the filesystem directory hierarchy to the reserved inode if necessary.
1357  */
1358 static const char * const journal_names[] = {
1359         ".journal", "journal", ".journal.dat", "journal.dat", 0 };
1360
1361 void e2fsck_move_ext3_journal(e2fsck_t ctx)
1362 {
1363         struct ext2_super_block *sb = ctx->fs->super;
1364         struct problem_context  pctx;
1365         struct ext2_inode       inode;
1366         ext2_filsys             fs = ctx->fs;
1367         ext2_ino_t              ino;
1368         errcode_t               retval;
1369         const char * const *    cpp;
1370         dgrp_t                  group;
1371         int                     mount_flags;
1372
1373         clear_problem_context(&pctx);
1374
1375         /*
1376          * If the filesystem is opened read-only, or there is no
1377          * journal, then do nothing.
1378          */
1379         if ((ctx->options & E2F_OPT_READONLY) ||
1380             (sb->s_journal_inum == 0) ||
1381             !ext2fs_has_feature_journal(sb))
1382                 return;
1383
1384         /*
1385          * Read in the journal inode
1386          */
1387         if (ext2fs_read_inode(fs, sb->s_journal_inum, &inode) != 0)
1388                 return;
1389
1390         /*
1391          * If it's necessary to backup the journal inode, do so.
1392          */
1393         if ((sb->s_jnl_backup_type == 0) ||
1394             ((sb->s_jnl_backup_type == EXT3_JNL_BACKUP_BLOCKS) &&
1395              memcmp(inode.i_block, sb->s_jnl_blocks, EXT2_N_BLOCKS*4))) {
1396                 if (fix_problem(ctx, PR_0_BACKUP_JNL, &pctx)) {
1397                         memcpy(sb->s_jnl_blocks, inode.i_block,
1398                                EXT2_N_BLOCKS*4);
1399                         sb->s_jnl_blocks[15] = inode.i_size_high;
1400                         sb->s_jnl_blocks[16] = inode.i_size;
1401                         sb->s_jnl_backup_type = EXT3_JNL_BACKUP_BLOCKS;
1402                         ext2fs_mark_super_dirty(fs);
1403                         fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
1404                 }
1405         }
1406
1407         /*
1408          * If the journal is already the hidden inode, then do nothing
1409          */
1410         if (sb->s_journal_inum == EXT2_JOURNAL_INO)
1411                 return;
1412
1413         /*
1414          * The journal inode had better have only one link and not be readable.
1415          */
1416         if (inode.i_links_count != 1)
1417                 return;
1418
1419         /*
1420          * If the filesystem is mounted, or we can't tell whether
1421          * or not it's mounted, do nothing.
1422          */
1423         retval = ext2fs_check_if_mounted(ctx->filesystem_name, &mount_flags);
1424         if (retval || (mount_flags & EXT2_MF_MOUNTED))
1425                 return;
1426
1427         /*
1428          * If we can't find the name of the journal inode, then do
1429          * nothing.
1430          */
1431         for (cpp = journal_names; *cpp; cpp++) {
1432                 retval = ext2fs_lookup(fs, EXT2_ROOT_INO, *cpp,
1433                                        strlen(*cpp), 0, &ino);
1434                 if ((retval == 0) && (ino == sb->s_journal_inum))
1435                         break;
1436         }
1437         if (*cpp == 0)
1438                 return;
1439
1440         /* We need the inode bitmap to be loaded */
1441         retval = ext2fs_read_bitmaps(fs);
1442         if (retval)
1443                 return;
1444
1445         pctx.str = *cpp;
1446         if (!fix_problem(ctx, PR_0_MOVE_JOURNAL, &pctx))
1447                 return;
1448
1449         /*
1450          * OK, we've done all the checks, let's actually move the
1451          * journal inode.  Errors at this point mean we need to force
1452          * an ext2 filesystem check.
1453          */
1454         if ((retval = ext2fs_unlink(fs, EXT2_ROOT_INO, *cpp, ino, 0)) != 0)
1455                 goto err_out;
1456         if ((retval = ext2fs_write_inode(fs, EXT2_JOURNAL_INO, &inode)) != 0)
1457                 goto err_out;
1458         sb->s_journal_inum = EXT2_JOURNAL_INO;
1459         ext2fs_mark_super_dirty(fs);
1460         fs->flags &= ~EXT2_FLAG_MASTER_SB_ONLY;
1461         inode.i_links_count = 0;
1462         inode.i_dtime = ctx->now;
1463         if ((retval = ext2fs_write_inode(fs, ino, &inode)) != 0)
1464                 goto err_out;
1465
1466         group = ext2fs_group_of_ino(fs, ino);
1467         ext2fs_unmark_inode_bitmap2(fs->inode_map, ino);
1468         ext2fs_mark_ib_dirty(fs);
1469         ext2fs_bg_free_inodes_count_set(fs, group, ext2fs_bg_free_inodes_count(fs, group) + 1);
1470         ext2fs_group_desc_csum_set(fs, group);
1471         fs->super->s_free_inodes_count++;
1472         return;
1473
1474 err_out:
1475         pctx.errcode = retval;
1476         fix_problem(ctx, PR_0_ERR_MOVE_JOURNAL, &pctx);
1477         fs->super->s_state &= ~EXT2_VALID_FS;
1478         ext2fs_mark_super_dirty(fs);
1479         return;
1480 }
1481
1482 /*
1483  * This function makes sure the superblock hint for the external
1484  * journal is correct.
1485  */
1486 int e2fsck_fix_ext3_journal_hint(e2fsck_t ctx)
1487 {
1488         struct ext2_super_block *sb = ctx->fs->super;
1489         struct problem_context pctx;
1490         char uuid[37], *journal_name;
1491         struct stat st;
1492
1493         if (!ext2fs_has_feature_journal(sb) ||
1494             uuid_is_null(sb->s_journal_uuid))
1495                 return 0;
1496
1497         uuid_unparse(sb->s_journal_uuid, uuid);
1498         journal_name = blkid_get_devname(ctx->blkid, "UUID", uuid);
1499         if (!journal_name)
1500                 return 0;
1501
1502         if (stat(journal_name, &st) < 0) {
1503                 free(journal_name);
1504                 return 0;
1505         }
1506
1507         if (st.st_rdev != sb->s_journal_dev) {
1508                 clear_problem_context(&pctx);
1509                 pctx.num = st.st_rdev;
1510                 if (fix_problem(ctx, PR_0_EXTERNAL_JOURNAL_HINT, &pctx)) {
1511                         sb->s_journal_dev = st.st_rdev;
1512                         ext2fs_mark_super_dirty(ctx->fs);
1513                 }
1514         }
1515
1516         free(journal_name);
1517         return 0;
1518 }