Whamcloud - gitweb
libext2fs: readahead for meta_bg
[tools/e2fsprogs.git] / lib / ext2fs / openfs.c
1 /*
2  * openfs.c --- open an ext2 filesystem
3  *
4  * Copyright (C) 1993, 1994, 1995, 1996 Theodore Ts'o.
5  *
6  * %Begin-Header%
7  * This file may be redistributed under the terms of the GNU Library
8  * General Public License, version 2.
9  * %End-Header%
10  */
11
12 #include "config.h"
13 #include <stdio.h>
14 #include <string.h>
15 #if HAVE_UNISTD_H
16 #include <unistd.h>
17 #endif
18 #include <fcntl.h>
19 #include <time.h>
20 #if HAVE_SYS_STAT_H
21 #include <sys/stat.h>
22 #endif
23 #if HAVE_SYS_TYPES_H
24 #include <sys/types.h>
25 #endif
26 #ifdef HAVE_ERRNO_H
27 #include <errno.h>
28 #endif
29
30 #include "ext2_fs.h"
31
32
33 #include "ext2fs.h"
34 #include "e2image.h"
35
36 blk64_t ext2fs_descriptor_block_loc2(ext2_filsys fs, blk64_t group_block,
37                                      dgrp_t i)
38 {
39         int     bg;
40         int     has_super = 0, group_zero_adjust = 0;
41         blk64_t ret_blk;
42
43         /*
44          * On a bigalloc FS with 1K blocks, block 0 is reserved for non-ext4
45          * stuff, so adjust for that if we're being asked for group 0.
46          */
47         if (i == 0 && fs->blocksize == 1024 && EXT2FS_CLUSTER_RATIO(fs) > 1)
48                 group_zero_adjust = 1;
49
50         if (!ext2fs_has_feature_meta_bg(fs->super) ||
51             (i < fs->super->s_first_meta_bg))
52                 return group_block + i + 1 + group_zero_adjust;
53
54         bg = EXT2_DESC_PER_BLOCK(fs->super) * i;
55         if (ext2fs_bg_has_super(fs, bg))
56                 has_super = 1;
57         ret_blk = ext2fs_group_first_block2(fs, bg);
58         /*
59          * If group_block is not the normal value, we're trying to use
60          * the backup group descriptors and superblock --- so use the
61          * alternate location of the second block group in the
62          * metablock group.  Ideally we should be testing each bg
63          * descriptor block individually for correctness, but we don't
64          * have the infrastructure in place to do that.
65          */
66         if (group_block != fs->super->s_first_data_block &&
67             ((ret_blk + has_super + fs->super->s_blocks_per_group) <
68              ext2fs_blocks_count(fs->super))) {
69                 ret_blk += fs->super->s_blocks_per_group;
70
71                 /*
72                  * If we're going to jump forward a block group, make sure
73                  * that we adjust has_super to account for the next group's
74                  * backup superblock (or lack thereof).
75                  */
76                 if (ext2fs_bg_has_super(fs, bg + 1))
77                         has_super = 1;
78                 else
79                         has_super = 0;
80         }
81         return ret_blk + has_super + group_zero_adjust;
82 }
83
84 blk_t ext2fs_descriptor_block_loc(ext2_filsys fs, blk_t group_block, dgrp_t i)
85 {
86         return ext2fs_descriptor_block_loc2(fs, group_block, i);
87 }
88
89 errcode_t ext2fs_open(const char *name, int flags, int superblock,
90                       unsigned int block_size, io_manager manager,
91                       ext2_filsys *ret_fs)
92 {
93         return ext2fs_open2(name, 0, flags, superblock, block_size,
94                             manager, ret_fs);
95 }
96
97 /*
98  *  Note: if superblock is non-zero, block-size must also be non-zero.
99  *      Superblock and block_size can be zero to use the default size.
100  *
101  * Valid flags for ext2fs_open()
102  *
103  *      EXT2_FLAG_RW    - Open the filesystem for read/write.
104  *      EXT2_FLAG_FORCE - Open the filesystem even if some of the
105  *                              features aren't supported.
106  *      EXT2_FLAG_JOURNAL_DEV_OK - Open an ext3 journal device
107  *      EXT2_FLAG_SKIP_MMP - Open without multi-mount protection check.
108  *      EXT2_FLAG_64BITS - Allow 64-bit bitfields (needed for large
109  *                              filesystems)
110  */
111 errcode_t ext2fs_open2(const char *name, const char *io_options,
112                        int flags, int superblock,
113                        unsigned int block_size, io_manager manager,
114                        ext2_filsys *ret_fs)
115 {
116         ext2_filsys     fs;
117         errcode_t       retval;
118         unsigned long   i, first_meta_bg;
119         __u32           features;
120         unsigned int    blocks_per_group, io_flags;
121         blk64_t         group_block, blk;
122         char            *dest, *cp;
123         int             group_zero_adjust = 0;
124 #ifdef WORDS_BIGENDIAN
125         unsigned int    groups_per_block;
126         struct ext2_group_desc *gdp;
127         int             j;
128 #endif
129
130         EXT2_CHECK_MAGIC(manager, EXT2_ET_MAGIC_IO_MANAGER);
131
132         retval = ext2fs_get_mem(sizeof(struct struct_ext2_filsys), &fs);
133         if (retval)
134                 return retval;
135
136         memset(fs, 0, sizeof(struct struct_ext2_filsys));
137         fs->magic = EXT2_ET_MAGIC_EXT2FS_FILSYS;
138         fs->flags = flags;
139         /* don't overwrite sb backups unless flag is explicitly cleared */
140         fs->flags |= EXT2_FLAG_MASTER_SB_ONLY;
141         fs->umask = 022;
142         retval = ext2fs_get_mem(strlen(name)+1, &fs->device_name);
143         if (retval)
144                 goto cleanup;
145         strcpy(fs->device_name, name);
146         cp = strchr(fs->device_name, '?');
147         if (!io_options && cp) {
148                 *cp++ = 0;
149                 io_options = cp;
150         }
151
152         io_flags = 0;
153         if (flags & EXT2_FLAG_RW)
154                 io_flags |= IO_FLAG_RW;
155         if (flags & EXT2_FLAG_EXCLUSIVE)
156                 io_flags |= IO_FLAG_EXCLUSIVE;
157         if (flags & EXT2_FLAG_DIRECT_IO)
158                 io_flags |= IO_FLAG_DIRECT_IO;
159         retval = manager->open(fs->device_name, io_flags, &fs->io);
160         if (retval)
161                 goto cleanup;
162         if (io_options &&
163             (retval = io_channel_set_options(fs->io, io_options)))
164                 goto cleanup;
165         fs->image_io = fs->io;
166         fs->io->app_data = fs;
167         retval = io_channel_alloc_buf(fs->io, -SUPERBLOCK_SIZE, &fs->super);
168         if (retval)
169                 goto cleanup;
170         if (flags & EXT2_FLAG_IMAGE_FILE) {
171                 retval = ext2fs_get_mem(sizeof(struct ext2_image_hdr),
172                                         &fs->image_header);
173                 if (retval)
174                         goto cleanup;
175                 retval = io_channel_read_blk(fs->io, 0,
176                                              -(int)sizeof(struct ext2_image_hdr),
177                                              fs->image_header);
178                 if (retval)
179                         goto cleanup;
180                 if (fs->image_header->magic_number != EXT2_ET_MAGIC_E2IMAGE)
181                         return EXT2_ET_MAGIC_E2IMAGE;
182                 superblock = 1;
183                 block_size = fs->image_header->fs_blocksize;
184         }
185
186         /*
187          * If the user specifies a specific block # for the
188          * superblock, then he/she must also specify the block size!
189          * Otherwise, read the master superblock located at offset
190          * SUPERBLOCK_OFFSET from the start of the partition.
191          *
192          * Note: we only save a backup copy of the superblock if we
193          * are reading the superblock from the primary superblock location.
194          */
195         if (superblock) {
196                 if (!block_size) {
197                         retval = EXT2_ET_INVALID_ARGUMENT;
198                         goto cleanup;
199                 }
200                 io_channel_set_blksize(fs->io, block_size);
201                 group_block = superblock;
202                 fs->orig_super = 0;
203         } else {
204                 io_channel_set_blksize(fs->io, SUPERBLOCK_OFFSET);
205                 superblock = 1;
206                 group_block = 0;
207                 retval = ext2fs_get_mem(SUPERBLOCK_SIZE, &fs->orig_super);
208                 if (retval)
209                         goto cleanup;
210         }
211         retval = io_channel_read_blk(fs->io, superblock, -SUPERBLOCK_SIZE,
212                                      fs->super);
213         if (retval)
214                 goto cleanup;
215         if (fs->orig_super)
216                 memcpy(fs->orig_super, fs->super, SUPERBLOCK_SIZE);
217
218         if (!(fs->flags & EXT2_FLAG_IGNORE_CSUM_ERRORS)) {
219                 retval = 0;
220                 if (!ext2fs_verify_csum_type(fs, fs->super))
221                         retval = EXT2_ET_UNKNOWN_CSUM;
222                 if (!ext2fs_superblock_csum_verify(fs, fs->super))
223                         retval = EXT2_ET_SB_CSUM_INVALID;
224         }
225
226 #ifdef WORDS_BIGENDIAN
227         fs->flags |= EXT2_FLAG_SWAP_BYTES;
228         ext2fs_swap_super(fs->super);
229 #else
230         if (fs->flags & EXT2_FLAG_SWAP_BYTES) {
231                 retval = EXT2_ET_UNIMPLEMENTED;
232                 goto cleanup;
233         }
234 #endif
235
236         if (fs->super->s_magic != EXT2_SUPER_MAGIC)
237                 retval = EXT2_ET_BAD_MAGIC;
238         if (retval)
239                 goto cleanup;
240
241         if (fs->super->s_rev_level > EXT2_LIB_CURRENT_REV) {
242                 retval = EXT2_ET_REV_TOO_HIGH;
243                 goto cleanup;
244         }
245
246         /*
247          * Check for feature set incompatibility
248          */
249         if (!(flags & EXT2_FLAG_FORCE)) {
250                 features = fs->super->s_feature_incompat;
251 #ifdef EXT2_LIB_SOFTSUPP_INCOMPAT
252                 if (flags & EXT2_FLAG_SOFTSUPP_FEATURES)
253                         features &= ~EXT2_LIB_SOFTSUPP_INCOMPAT;
254 #endif
255                 if (features & ~EXT2_LIB_FEATURE_INCOMPAT_SUPP) {
256                         retval = EXT2_ET_UNSUPP_FEATURE;
257                         goto cleanup;
258                 }
259
260                 features = fs->super->s_feature_ro_compat;
261 #ifdef EXT2_LIB_SOFTSUPP_RO_COMPAT
262                 if (flags & EXT2_FLAG_SOFTSUPP_FEATURES)
263                         features &= ~EXT2_LIB_SOFTSUPP_RO_COMPAT;
264 #endif
265                 if ((flags & EXT2_FLAG_RW) &&
266                     (features & ~EXT2_LIB_FEATURE_RO_COMPAT_SUPP)) {
267                         retval = EXT2_ET_RO_UNSUPP_FEATURE;
268                         goto cleanup;
269                 }
270
271                 if (!(flags & EXT2_FLAG_JOURNAL_DEV_OK) &&
272                     ext2fs_has_feature_journal_dev(fs->super)) {
273                         retval = EXT2_ET_UNSUPP_FEATURE;
274                         goto cleanup;
275                 }
276         }
277
278         if ((fs->super->s_log_block_size + EXT2_MIN_BLOCK_LOG_SIZE) >
279             EXT2_MAX_BLOCK_LOG_SIZE) {
280                 retval = EXT2_ET_CORRUPT_SUPERBLOCK;
281                 goto cleanup;
282         }
283
284         /*
285          * bigalloc requires cluster-aware bitfield operations, which at the
286          * moment means we need EXT2_FLAG_64BITS.
287          */
288         if (ext2fs_has_feature_bigalloc(fs->super) &&
289             !(flags & EXT2_FLAG_64BITS)) {
290                 retval = EXT2_ET_CANT_USE_LEGACY_BITMAPS;
291                 goto cleanup;
292         }
293
294         if (!ext2fs_has_feature_bigalloc(fs->super) &&
295             (fs->super->s_log_block_size != fs->super->s_log_cluster_size)) {
296                 retval = EXT2_ET_CORRUPT_SUPERBLOCK;
297                 goto cleanup;
298         }
299         fs->fragsize = fs->blocksize = EXT2_BLOCK_SIZE(fs->super);
300         if (EXT2_INODE_SIZE(fs->super) < EXT2_GOOD_OLD_INODE_SIZE) {
301                 retval = EXT2_ET_CORRUPT_SUPERBLOCK;
302                 goto cleanup;
303         }
304
305         /* Enforce the block group descriptor size */
306         if (ext2fs_has_feature_64bit(fs->super)) {
307                 if (fs->super->s_desc_size < EXT2_MIN_DESC_SIZE_64BIT) {
308                         retval = EXT2_ET_BAD_DESC_SIZE;
309                         goto cleanup;
310                 }
311         } else {
312                 if (fs->super->s_desc_size &&
313                     fs->super->s_desc_size != EXT2_MIN_DESC_SIZE) {
314                         retval = EXT2_ET_BAD_DESC_SIZE;
315                         goto cleanup;
316                 }
317         }
318
319         fs->cluster_ratio_bits = fs->super->s_log_cluster_size -
320                 fs->super->s_log_block_size;
321         if (EXT2_BLOCKS_PER_GROUP(fs->super) !=
322             EXT2_CLUSTERS_PER_GROUP(fs->super) << fs->cluster_ratio_bits) {
323                 retval = EXT2_ET_CORRUPT_SUPERBLOCK;
324                 goto cleanup;
325         }
326         fs->inode_blocks_per_group = ((EXT2_INODES_PER_GROUP(fs->super) *
327                                        EXT2_INODE_SIZE(fs->super) +
328                                        EXT2_BLOCK_SIZE(fs->super) - 1) /
329                                       EXT2_BLOCK_SIZE(fs->super));
330         if (block_size) {
331                 if (block_size != fs->blocksize) {
332                         retval = EXT2_ET_UNEXPECTED_BLOCK_SIZE;
333                         goto cleanup;
334                 }
335         }
336         /*
337          * Set the blocksize to the filesystem's blocksize.
338          */
339         io_channel_set_blksize(fs->io, fs->blocksize);
340
341         /*
342          * If this is an external journal device, don't try to read
343          * the group descriptors, because they're not there.
344          */
345         if (ext2fs_has_feature_journal_dev(fs->super)) {
346                 fs->group_desc_count = 0;
347                 *ret_fs = fs;
348                 return 0;
349         }
350
351         if (EXT2_INODES_PER_GROUP(fs->super) == 0) {
352                 retval = EXT2_ET_CORRUPT_SUPERBLOCK;
353                 goto cleanup;
354         }
355         /* Precompute the FS UUID to seed other checksums */
356         ext2fs_init_csum_seed(fs);
357
358         /*
359          * Read group descriptors
360          */
361         blocks_per_group = EXT2_BLOCKS_PER_GROUP(fs->super);
362         if (blocks_per_group == 0 ||
363             blocks_per_group > EXT2_MAX_BLOCKS_PER_GROUP(fs->super) ||
364             fs->inode_blocks_per_group > EXT2_MAX_INODES_PER_GROUP(fs->super) ||
365            EXT2_DESC_PER_BLOCK(fs->super) == 0 ||
366            fs->super->s_first_data_block >= ext2fs_blocks_count(fs->super)) {
367                 retval = EXT2_ET_CORRUPT_SUPERBLOCK;
368                 goto cleanup;
369         }
370         fs->group_desc_count = ext2fs_div64_ceil(ext2fs_blocks_count(fs->super) -
371                                                  fs->super->s_first_data_block,
372                                                  blocks_per_group);
373         if (fs->group_desc_count * EXT2_INODES_PER_GROUP(fs->super) !=
374             fs->super->s_inodes_count) {
375                 retval = EXT2_ET_CORRUPT_SUPERBLOCK;
376                 goto cleanup;
377         }
378         fs->desc_blocks = ext2fs_div_ceil(fs->group_desc_count,
379                                           EXT2_DESC_PER_BLOCK(fs->super));
380         retval = ext2fs_get_array(fs->desc_blocks, fs->blocksize,
381                                 &fs->group_desc);
382         if (retval)
383                 goto cleanup;
384         if (!group_block)
385                 group_block = fs->super->s_first_data_block;
386         /*
387          * On a FS with a 1K blocksize, block 0 is reserved for bootloaders
388          * so we must increment block numbers to any group 0 items.
389          *
390          * However, we cannot touch group_block directly because in the meta_bg
391          * case, the ext2fs_descriptor_block_loc2() function will interpret
392          * group_block != s_first_data_block to mean that we want to access the
393          * backup group descriptors.  This is not what we want if the caller
394          * set superblock == 0 (i.e. auto-detect the superblock), which is
395          * what's going on here.
396          */
397         if (group_block == 0 && fs->blocksize == 1024)
398                 group_zero_adjust = 1;
399         dest = (char *) fs->group_desc;
400 #ifdef WORDS_BIGENDIAN
401         groups_per_block = EXT2_DESC_PER_BLOCK(fs->super);
402 #endif
403         if (ext2fs_has_feature_meta_bg(fs->super)) {
404                 first_meta_bg = fs->super->s_first_meta_bg;
405                 if (first_meta_bg > fs->desc_blocks)
406                         first_meta_bg = fs->desc_blocks;
407         } else
408                 first_meta_bg = fs->desc_blocks;
409         if (first_meta_bg) {
410                 retval = io_channel_read_blk(fs->io, group_block +
411                                              group_zero_adjust + 1,
412                                              first_meta_bg, dest);
413                 if (retval)
414                         goto cleanup;
415 #ifdef WORDS_BIGENDIAN
416                 gdp = (struct ext2_group_desc *) dest;
417                 for (j=0; j < groups_per_block*first_meta_bg; j++) {
418                         gdp = ext2fs_group_desc(fs, fs->group_desc, j);
419                         ext2fs_swap_group_desc2(fs, gdp);
420                 }
421 #endif
422                 dest += fs->blocksize*first_meta_bg;
423         }
424
425         for (i = first_meta_bg ; i < fs->desc_blocks; i++) {
426                 blk = ext2fs_descriptor_block_loc2(fs, group_block, i);
427                 io_channel_cache_readahead(fs->io, blk, 1);
428         }
429
430         for (i=first_meta_bg ; i < fs->desc_blocks; i++) {
431                 blk = ext2fs_descriptor_block_loc2(fs, group_block, i);
432                 retval = io_channel_read_blk64(fs->io, blk, 1, dest);
433                 if (retval)
434                         goto cleanup;
435 #ifdef WORDS_BIGENDIAN
436                 for (j=0; j < groups_per_block; j++) {
437                         gdp = ext2fs_group_desc(fs, fs->group_desc,
438                                                 i * groups_per_block + j);
439                         ext2fs_swap_group_desc2(fs, gdp);
440                 }
441 #endif
442                 dest += fs->blocksize;
443         }
444
445         fs->stride = fs->super->s_raid_stride;
446
447         /*
448          * If recovery is from backup superblock, Clear _UNININT flags &
449          * reset bg_itable_unused to zero
450          */
451         if (superblock > 1 && ext2fs_has_group_desc_csum(fs)) {
452                 dgrp_t group;
453
454                 for (group = 0; group < fs->group_desc_count; group++) {
455                         ext2fs_bg_flags_clear(fs, group, EXT2_BG_BLOCK_UNINIT);
456                         ext2fs_bg_flags_clear(fs, group, EXT2_BG_INODE_UNINIT);
457                         ext2fs_bg_itable_unused_set(fs, group, 0);
458                         /* The checksum will be reset later, but fix it here
459                          * anyway to avoid printing a lot of spurious errors. */
460                         ext2fs_group_desc_csum_set(fs, group);
461                 }
462                 if (fs->flags & EXT2_FLAG_RW)
463                         ext2fs_mark_super_dirty(fs);
464         }
465
466         if (ext2fs_has_feature_mmp(fs->super) &&
467             !(flags & EXT2_FLAG_SKIP_MMP) &&
468             (flags & (EXT2_FLAG_RW | EXT2_FLAG_EXCLUSIVE))) {
469                 retval = ext2fs_mmp_start(fs);
470                 if (retval) {
471                         fs->flags |= EXT2_FLAG_SKIP_MMP; /* just do cleanup */
472                         ext2fs_mmp_stop(fs);
473                         goto cleanup;
474                 }
475         }
476
477         fs->flags &= ~EXT2_FLAG_NOFREE_ON_ERROR;
478         *ret_fs = fs;
479
480         return 0;
481 cleanup:
482         if (!(flags & EXT2_FLAG_NOFREE_ON_ERROR)) {
483                 ext2fs_free(fs);
484                 fs = NULL;
485         }
486         *ret_fs = fs;
487         return retval;
488 }
489
490 /*
491  * Set/get the filesystem data I/O channel.
492  *
493  * These functions are only valid if EXT2_FLAG_IMAGE_FILE is true.
494  */
495 errcode_t ext2fs_get_data_io(ext2_filsys fs, io_channel *old_io)
496 {
497         if ((fs->flags & EXT2_FLAG_IMAGE_FILE) == 0)
498                 return EXT2_ET_NOT_IMAGE_FILE;
499         if (old_io) {
500                 *old_io = (fs->image_io == fs->io) ? 0 : fs->io;
501         }
502         return 0;
503 }
504
505 errcode_t ext2fs_set_data_io(ext2_filsys fs, io_channel new_io)
506 {
507         if ((fs->flags & EXT2_FLAG_IMAGE_FILE) == 0)
508                 return EXT2_ET_NOT_IMAGE_FILE;
509         fs->io = new_io ? new_io : fs->image_io;
510         return 0;
511 }
512
513 errcode_t ext2fs_rewrite_to_io(ext2_filsys fs, io_channel new_io)
514 {
515         errcode_t err;
516
517         if ((fs->flags & EXT2_FLAG_IMAGE_FILE) == 0)
518                 return EXT2_ET_NOT_IMAGE_FILE;
519         err = io_channel_set_blksize(new_io, fs->blocksize);
520         if (err)
521                 return err;
522         if ((new_io == fs->image_io) || (new_io == fs->io))
523                 return 0;
524         if ((fs->image_io != fs->io) &&
525             fs->image_io)
526                 io_channel_close(fs->image_io);
527         if (fs->io)
528                 io_channel_close(fs->io);
529         fs->io = fs->image_io = new_io;
530         fs->flags |= EXT2_FLAG_DIRTY | EXT2_FLAG_RW |
531                 EXT2_FLAG_BB_DIRTY | EXT2_FLAG_IB_DIRTY;
532         fs->flags &= ~EXT2_FLAG_IMAGE_FILE;
533         return 0;
534 }