2 * extents.c --- rebuild extent tree
4 * Copyright (C) 2014 Oracle.
7 * This file may be redistributed under the terms of the GNU Public
23 #define NUM_EXTENTS 341 /* about one ETB' worth of extents */
25 static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino);
27 /* Schedule an inode to have its extent tree rebuilt during pass 1E. */
28 errcode_t e2fsck_rebuild_extents_later(e2fsck_t ctx, ext2_ino_t ino)
32 if (!ext2fs_has_feature_extents(ctx->fs->super) ||
33 (ctx->options & E2F_OPT_NO) ||
34 (ino != EXT2_ROOT_INO && ino < ctx->fs->super->s_first_ino))
37 if (ctx->flags & E2F_FLAG_ALLOC_OK)
38 return e2fsck_rebuild_extents(ctx, ino);
40 if (!ctx->inodes_to_rebuild)
41 retval = e2fsck_allocate_inode_bitmap(ctx->fs,
42 _("extent rebuild inode map"),
45 &ctx->inodes_to_rebuild);
49 ext2fs_mark_inode_bitmap2(ctx->inodes_to_rebuild, ino);
53 /* Ask if an inode will have its extents rebuilt during pass 1E. */
54 int e2fsck_ino_will_be_rebuilt(e2fsck_t ctx, ext2_ino_t ino)
56 if (!ctx->inodes_to_rebuild)
58 return ext2fs_test_inode_bitmap2(ctx->inodes_to_rebuild, ino);
61 static errcode_t load_extents(e2fsck_t ctx, struct extent_list *list)
63 ext2_filsys fs = ctx->fs;
64 ext2_extent_handle_t handle;
65 struct ext2fs_extent extent;
68 retval = ext2fs_extent_open(fs, list->ino, &handle);
72 retval = ext2fs_extent_get(handle, EXT2_EXTENT_ROOT, &extent);
77 if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
80 /* Internal node; free it and we'll re-allocate it later */
81 if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) {
82 #if defined(DEBUG) || defined(DEBUG_FREE)
83 printf("ino=%d free=%llu bf=%llu\n", list->ino,
84 extent.e_pblk, list->blocks_freed + 1);
87 ext2fs_block_alloc_stats2(fs, extent.e_pblk, -1);
92 /* Can we attach it to the previous extent? */
94 struct ext2fs_extent *last = list->extents +
96 blk64_t end = last->e_len + extent.e_len;
98 if (last->e_pblk + last->e_len == extent.e_pblk &&
99 last->e_lblk + last->e_len == extent.e_lblk &&
100 (last->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ==
101 (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
102 end < (1ULL << 32)) {
103 last->e_len += extent.e_len;
105 printf("R: ino=%d len=%u\n", list->ino,
112 /* Do we need to expand? */
113 if (list->count == list->size) {
114 unsigned int new_size = (list->size + NUM_EXTENTS) *
115 sizeof(struct ext2fs_extent);
116 retval = ext2fs_resize_mem(0, new_size, &list->extents);
119 list->size += NUM_EXTENTS;
122 /* Add a new extent */
123 memcpy(list->extents + list->count, &extent, sizeof(extent));
125 printf("R: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino,
126 extent.e_pblk, extent.e_lblk, extent.e_len);
130 retval = ext2fs_extent_get(handle, EXT2_EXTENT_NEXT, &extent);
131 } while (retval == 0);
134 /* Ok if we run off the end */
135 if (retval == EXT2_ET_EXTENT_NO_NEXT)
137 ext2fs_extent_free(handle);
141 static int find_blocks(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt,
142 blk64_t ref_blk EXT2FS_ATTR((unused)),
143 int ref_offset EXT2FS_ATTR((unused)), void *priv_data)
145 struct extent_list *list = priv_data;
149 #if defined(DEBUG) || defined(DEBUG_FREE)
150 printf("ino=%d free=%llu bf=%llu\n", list->ino, *blocknr,
151 list->blocks_freed + 1);
153 list->blocks_freed++;
154 ext2fs_block_alloc_stats2(fs, *blocknr, -1);
158 /* Can we attach it to the previous extent? */
160 struct ext2fs_extent *last = list->extents +
162 blk64_t end = last->e_len + 1;
164 if (last->e_lblk + last->e_len == (__u64) blockcnt &&
165 last->e_pblk + last->e_len == *blocknr &&
166 end < (1ULL << 32)) {
169 printf("R: ino=%d len=%u\n", list->ino, last->e_len);
175 /* Do we need to expand? */
176 if (list->count == list->size) {
177 unsigned int new_size = (list->size + NUM_EXTENTS) *
178 sizeof(struct ext2fs_extent);
179 list->retval = ext2fs_resize_mem(0, new_size, &list->extents);
182 list->size += NUM_EXTENTS;
185 /* Add a new extent */
186 list->extents[list->count].e_pblk = *blocknr;
187 list->extents[list->count].e_lblk = blockcnt;
188 list->extents[list->count].e_len = 1;
189 list->extents[list->count].e_flags = 0;
191 printf("R: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino, *blocknr,
199 static errcode_t rewrite_extent_replay(e2fsck_t ctx, struct extent_list *list,
200 struct ext2_inode_large *inode)
203 ext2_extent_handle_t handle;
205 #if defined(DEBUG) || defined(DEBUG_SUMMARY)
206 unsigned int ext_written = 0;
208 struct ext2fs_extent *ex, extent;
209 blk64_t start_val, delta;
211 /* Reset extent tree */
212 inode->i_flags &= ~EXT4_EXTENTS_FL;
213 memset(inode->i_block, 0, sizeof(inode->i_block));
215 /* Make a note of freed blocks */
216 quota_data_sub(ctx->qctx, inode, list->ino,
217 list->blocks_freed * ctx->fs->blocksize);
218 retval = ext2fs_iblk_sub_blocks(ctx->fs, EXT2_INODE(inode),
223 /* Now stuff extents into the file */
224 retval = ext2fs_extent_open2(ctx->fs, list->ino, EXT2_INODE(inode),
229 start_val = ext2fs_get_stat_i_blocks(ctx->fs, EXT2_INODE(inode));
231 for (i = 0, ex = list->extents; i < list->count; i++, ex++) {
234 memcpy(&extent, ex, sizeof(struct ext2fs_extent));
235 extent.e_flags &= EXT2_EXTENT_FLAGS_UNINIT;
236 if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) {
237 if (extent.e_len > EXT_UNINIT_MAX_LEN) {
238 extent.e_len = EXT_UNINIT_MAX_LEN;
239 ex->e_pblk += EXT_UNINIT_MAX_LEN;
240 ex->e_lblk += EXT_UNINIT_MAX_LEN;
241 ex->e_len -= EXT_UNINIT_MAX_LEN;
246 if (extent.e_len > EXT_INIT_MAX_LEN) {
247 extent.e_len = EXT_INIT_MAX_LEN;
248 ex->e_pblk += EXT_INIT_MAX_LEN;
249 ex->e_lblk += EXT_INIT_MAX_LEN;
250 ex->e_len -= EXT_INIT_MAX_LEN;
257 printf("W: ino=%d pblk=%llu lblk=%llu len=%u\n", list->ino,
258 extent.e_pblk, extent.e_lblk, extent.e_len);
260 retval = ext2fs_extent_insert(handle, EXT2_EXTENT_INSERT_AFTER,
264 retval = ext2fs_extent_fix_parents(handle);
267 #if defined(DEBUG) || defined(DEBUG_SUMMARY)
272 delta = ext2fs_get_stat_i_blocks(ctx->fs, EXT2_INODE(inode)) -
275 quota_data_add(ctx->qctx, inode, list->ino, delta << 9);
277 #if defined(DEBUG) || defined(DEBUG_SUMMARY)
278 printf("rebuild: ino=%d extents=%d->%d\n", ino, list->ext_read,
281 e2fsck_write_inode(ctx, list->ino, EXT2_INODE(inode),
285 ext2fs_extent_free(handle);
289 errcode_t e2fsck_rewrite_extent_tree(e2fsck_t ctx, struct extent_list *list)
291 struct ext2_inode_large inode;
295 memset(&inode, 0, sizeof(inode));
296 err = ext2fs_read_inode_full(ctx->fs, list->ino, EXT2_INODE(&inode),
301 /* Skip deleted inodes and inline data files */
302 if (inode.i_flags & EXT4_INLINE_DATA_FL)
305 err = rewrite_extent_replay(ctx, list, &inode);
309 err = ext2fs_count_blocks(ctx->fs, list->ino, EXT2_INODE(&inode),
313 err = ext2fs_iblk_set(ctx->fs, EXT2_INODE(&inode), blk_count);
316 return ext2fs_write_inode_full(ctx->fs, list->ino, EXT2_INODE(&inode),
320 errcode_t e2fsck_read_extents(e2fsck_t ctx, struct extent_list *extents)
322 struct ext2_inode_large inode;
325 extents->extents = NULL;
327 extents->blocks_freed = 0;
328 extents->ext_read = 0;
329 extents->size = NUM_EXTENTS;
330 retval = ext2fs_get_array(NUM_EXTENTS, sizeof(struct ext2fs_extent),
335 retval = ext2fs_read_inode(ctx->fs, extents->ino, EXT2_INODE(&inode));
339 retval = load_extents(ctx, extents);
343 ext2fs_free_mem(&extents->extents);
349 static errcode_t rebuild_extent_tree(e2fsck_t ctx, struct extent_list *list,
352 struct ext2_inode_large inode;
356 list->blocks_freed = 0;
359 e2fsck_read_inode_full(ctx, ino, EXT2_INODE(&inode), sizeof(inode),
362 /* Skip deleted inodes and inline data files */
363 if (inode.i_links_count == 0 ||
364 inode.i_flags & EXT4_INLINE_DATA_FL)
367 /* Collect lblk->pblk mappings */
368 if (inode.i_flags & EXT4_EXTENTS_FL) {
369 retval = load_extents(ctx, list);
372 return rewrite_extent_replay(ctx, list, &inode);
375 retval = ext2fs_block_iterate3(ctx->fs, ino, BLOCK_FLAG_READ_ONLY, 0,
378 return retval || list->retval ||
379 rewrite_extent_replay(ctx, list, &inode);
382 /* Rebuild the extents immediately */
383 static errcode_t e2fsck_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino)
385 struct extent_list list = { 0 };
388 if (!ext2fs_has_feature_extents(ctx->fs->super) ||
389 (ctx->options & E2F_OPT_NO) ||
390 (ino != EXT2_ROOT_INO && ino < ctx->fs->super->s_first_ino))
393 e2fsck_read_bitmaps(ctx);
394 err = ext2fs_get_array(NUM_EXTENTS, sizeof(struct ext2fs_extent),
398 list.size = NUM_EXTENTS;
399 err = rebuild_extent_tree(ctx, &list, ino);
400 ext2fs_free_mem(&list.extents);
405 static void rebuild_extents(e2fsck_t ctx, const char *pass_name, int pr_header)
407 struct problem_context pctx;
408 #ifdef RESOURCE_TRACK
409 struct resource_track rtrack;
411 struct extent_list list = { 0 };
416 if (!ext2fs_has_feature_extents(ctx->fs->super) ||
417 !ext2fs_test_valid(ctx->fs) ||
418 ctx->invalid_bitmaps) {
419 if (ctx->inodes_to_rebuild)
420 ext2fs_free_inode_bitmap(ctx->inodes_to_rebuild);
421 ctx->inodes_to_rebuild = NULL;
424 if (ctx->inodes_to_rebuild == NULL)
427 init_resource_track(&rtrack, ctx->fs->io);
428 clear_problem_context(&pctx);
429 e2fsck_read_bitmaps(ctx);
431 list.size = NUM_EXTENTS;
432 retval = ext2fs_get_array(sizeof(struct ext2fs_extent),
433 list.size, &list.extents);
437 retval = ext2fs_find_first_set_inode_bitmap2(
438 ctx->inodes_to_rebuild, ino + 1,
439 ctx->fs->super->s_inodes_count, &ino);
444 fix_problem(ctx, pr_header, &pctx);
447 pctx.errcode = rebuild_extent_tree(ctx, &list, ino);
449 end_problem_latch(ctx, PR_LATCH_OPTIMIZE_EXT);
450 fix_problem(ctx, PR_1E_OPTIMIZE_EXT_ERR, &pctx);
452 if (ctx->progress && !ctx->progress_fd)
453 e2fsck_simple_progress(ctx, "Rebuilding extents",
454 100.0 * (float) ino /
455 (float) ctx->fs->super->s_inodes_count,
458 end_problem_latch(ctx, PR_LATCH_OPTIMIZE_EXT);
460 ext2fs_free_inode_bitmap(ctx->inodes_to_rebuild);
461 ctx->inodes_to_rebuild = NULL;
462 ext2fs_free_mem(&list.extents);
464 print_resource_track(ctx, pass_name, &rtrack, ctx->fs->io);
467 /* Scan a file to see if we should rebuild its extent tree */
468 errcode_t e2fsck_check_rebuild_extents(e2fsck_t ctx, ext2_ino_t ino,
469 struct ext2_inode *inode,
470 struct problem_context *pctx)
472 struct extent_tree_info eti;
473 struct ext2_extent_info info, top_info;
474 struct ext2fs_extent extent;
475 ext2_extent_handle_t ehandle;
476 ext2_filsys fs = ctx->fs;
479 /* block map file and we want extent conversion */
480 if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
481 !(inode->i_flags & EXT4_INLINE_DATA_FL) &&
482 (ctx->options & E2F_OPT_CONVERT_BMAP)) {
483 return e2fsck_rebuild_extents_later(ctx, ino);
486 if (!(inode->i_flags & EXT4_EXTENTS_FL))
488 memset(&eti, 0, sizeof(eti));
491 /* Otherwise, go scan the extent tree... */
492 retval = ext2fs_extent_open2(fs, ino, inode, &ehandle);
496 retval = ext2fs_extent_get_info(ehandle, &top_info);
500 /* Check maximum extent depth */
502 pctx->blk = top_info.max_depth;
503 pctx->blk2 = ext2fs_max_extent_depth(ehandle);
504 if (pctx->blk2 < pctx->blk &&
505 fix_problem(ctx, PR_1_EXTENT_BAD_MAX_DEPTH, pctx))
506 eti.force_rebuild = 1;
508 /* Can we collect extent tree level stats? */
509 pctx->blk = MAX_EXTENT_DEPTH_COUNT;
510 if (pctx->blk2 > pctx->blk)
511 fix_problem(ctx, PR_1E_MAX_EXTENT_TREE_DEPTH, pctx);
513 /* We need to fix tree depth problems, but the scan isn't a fix. */
514 if (ctx->options & E2F_OPT_FIXES_ONLY)
517 retval = ext2fs_extent_get(ehandle, EXT2_EXTENT_ROOT, &extent);
522 retval = ext2fs_extent_get_info(ehandle, &info);
527 * If this is the first extent in an extent block that we
528 * haven't visited, collect stats on the block.
530 if (info.curr_entry == 1 &&
531 !(extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) &&
532 !eti.force_rebuild &&
533 info.curr_level < MAX_EXTENT_DEPTH_COUNT) {
534 struct extent_tree_level *etl;
536 etl = eti.ext_info + info.curr_level;
537 etl->num_extents += info.num_entries;
538 etl->max_extents += info.max_entries;
540 * Implementation wart: Splitting extent blocks when
541 * appending will leave the old block with one free
542 * entry. Therefore unless the node is totally full,
543 * pretend that a non-root extent block can hold one
544 * fewer entry than it actually does, so that we don't
545 * repeatedly rebuild the extent tree.
547 if (info.curr_level &&
548 info.num_entries < info.max_entries)
552 /* Skip to the end of a block of leaf nodes */
553 if (extent.e_flags & EXT2_EXTENT_FLAGS_LEAF) {
554 retval = ext2fs_extent_get(ehandle,
555 EXT2_EXTENT_LAST_SIB,
561 retval = ext2fs_extent_get(ehandle, EXT2_EXTENT_NEXT, &extent);
562 } while (retval == 0);
564 ext2fs_extent_free(ehandle);
565 return e2fsck_should_rebuild_extents(ctx, pctx, &eti, &top_info);
568 /* Having scanned a file's extent tree, decide if we should rebuild it */
569 errcode_t e2fsck_should_rebuild_extents(e2fsck_t ctx,
570 struct problem_context *pctx,
571 struct extent_tree_info *eti,
572 struct ext2_extent_info *info)
574 struct extent_tree_level *ei;
576 unsigned int extents_per_block;
578 if (eti->force_rebuild)
581 if (ctx->options & E2F_OPT_NOOPT_EXTENTS)
584 extents_per_block = (ctx->fs->blocksize -
585 sizeof(struct ext3_extent_header)) /
586 sizeof(struct ext3_extent);
588 /* If the extent tree is too deep, then rebuild it. */
589 if (info->max_depth > MAX_EXTENT_DEPTH_COUNT-1) {
590 pctx->blk = info->max_depth;
591 op = PR_1E_CAN_COLLAPSE_EXTENT_TREE;
595 * If we can consolidate a level or shorten the tree, schedule the
596 * extent tree to be rebuilt.
598 for (i = 0, ei = eti->ext_info; i < info->max_depth + 1; i++, ei++) {
599 if (ei->max_extents - ei->num_extents > extents_per_block) {
601 op = PR_1E_CAN_NARROW_EXTENT_TREE;
604 for (j = 0; j < i; j++) {
605 if (ei->num_extents < eti->ext_info[j].max_extents) {
607 op = PR_1E_CAN_COLLAPSE_EXTENT_TREE;
615 if (eti->force_rebuild || fix_problem(ctx, op, pctx))
616 return e2fsck_rebuild_extents_later(ctx, eti->ino);
620 void e2fsck_pass1e(e2fsck_t ctx)
622 rebuild_extents(ctx, "Pass 1E", PR_1E_PASS_HEADER);