Whamcloud - gitweb
libext2fs: fix block-mapped file punch
[tools/e2fsprogs.git] / lib / ext2fs / punch.c
1 /*
2  * punch.c --- deallocate blocks allocated to an inode
3  *
4  * Copyright (C) 2010 Theodore Ts'o.
5  *
6  * %Begin-Header%
7  * This file may be redistributed under the terms of the GNU Library
8  * General Public License, version 2.
9  * %End-Header%
10  */
11
12 #include "config.h"
13 #include <stdio.h>
14 #include <string.h>
15 #if HAVE_UNISTD_H
16 #include <unistd.h>
17 #endif
18 #include <errno.h>
19
20 #include "ext2_fs.h"
21 #include "ext2fs.h"
22
23 #undef PUNCH_DEBUG
24
25 /*
26  * This function returns 1 if the specified block is all zeros
27  */
28 static int check_zero_block(char *buf, int blocksize)
29 {
30         char    *cp = buf;
31         int     left = blocksize;
32
33         while (left > 0) {
34                 if (*cp++)
35                         return 0;
36                 left--;
37         }
38         return 1;
39 }
40
41 /*
42  * This clever recursive function handles i_blocks[] as well as
43  * indirect, double indirect, and triple indirect blocks.  It iterates
44  * over the entries in the i_blocks array or indirect blocks, and for
45  * each one, will recursively handle any indirect blocks and then
46  * frees and deallocates the blocks.
47  */
48 static errcode_t ind_punch(ext2_filsys fs, struct ext2_inode *inode,
49                            char *block_buf, blk_t *p, int level,
50                            blk64_t start, blk64_t count, int max)
51 {
52         errcode_t       retval;
53         blk_t           b;
54         int             i;
55         blk64_t         offset, incr;
56         int             freed = 0;
57
58 #ifdef PUNCH_DEBUG
59         printf("Entering ind_punch, level %d, start %llu, count %llu, "
60                "max %d\n", level, start, count, max);
61 #endif
62         incr = 1ULL << ((EXT2_BLOCK_SIZE_BITS(fs->super) - 2) * level);
63         for (i = 0, offset = 0; i < max; i++, p++, offset += incr) {
64                 if (offset >= start + count)
65                         break;
66                 if (*p == 0 || (offset+incr) <= start)
67                         continue;
68                 b = *p;
69                 if (level > 0) {
70                         blk_t start2;
71 #ifdef PUNCH_DEBUG
72                         printf("Reading indirect block %u\n", b);
73 #endif
74                         retval = ext2fs_read_ind_block(fs, b, block_buf);
75                         if (retval)
76                                 return retval;
77                         start2 = (start > offset) ? start - offset : 0;
78                         retval = ind_punch(fs, inode, block_buf + fs->blocksize,
79                                            (blk_t *) block_buf, level - 1,
80                                            start2, count - offset,
81                                            fs->blocksize >> 2);
82                         if (retval)
83                                 return retval;
84                         retval = ext2fs_write_ind_block(fs, b, block_buf);
85                         if (retval)
86                                 return retval;
87                         if (!check_zero_block(block_buf, fs->blocksize))
88                                 continue;
89                 }
90 #ifdef PUNCH_DEBUG
91                 printf("Freeing block %u (offset %llu)\n", b, offset);
92 #endif
93                 ext2fs_block_alloc_stats(fs, b, -1);
94                 *p = 0;
95                 freed++;
96         }
97 #ifdef PUNCH_DEBUG
98         printf("Freed %d blocks\n", freed);
99 #endif
100         return ext2fs_iblk_sub_blocks(fs, inode, freed);
101 }
102
103 #define BLK_T_MAX ((blk_t)~0ULL)
104 static errcode_t ext2fs_punch_ind(ext2_filsys fs, struct ext2_inode *inode,
105                                   char *block_buf, blk64_t start, blk64_t end)
106 {
107         errcode_t               retval;
108         char                    *buf = 0;
109         int                     level;
110         int                     num = EXT2_NDIR_BLOCKS;
111         blk_t                   *bp = inode->i_block;
112         blk_t                   addr_per_block;
113         blk64_t                 max = EXT2_NDIR_BLOCKS;
114         blk_t                   count;
115
116         /* Check start/end don't overflow the 2^32-1 indirect block limit */
117         if (start > BLK_T_MAX)
118                 return 0;
119         if (end >= BLK_T_MAX || end - start + 1 >= BLK_T_MAX)
120                 count = BLK_T_MAX - start;
121         else
122                 count = end - start + 1;
123
124         if (!block_buf) {
125                 retval = ext2fs_get_array(3, fs->blocksize, &buf);
126                 if (retval)
127                         return retval;
128                 block_buf = buf;
129         }
130
131         addr_per_block = (blk_t)fs->blocksize >> 2;
132
133         for (level = 0; level < 4; level++, max *= (blk64_t)addr_per_block) {
134 #ifdef PUNCH_DEBUG
135                 printf("Main loop level %d, start %llu count %u "
136                        "max %llu num %d\n", level, start, count, max, num);
137 #endif
138                 if (start < max) {
139                         retval = ind_punch(fs, inode, block_buf, bp, level,
140                                            start, count, num);
141                         if (retval)
142                                 goto errout;
143                         if (count > max)
144                                 count -= max - start;
145                         else
146                                 break;
147                         start = 0;
148                 } else
149                         start -= max;
150                 bp += num;
151                 if (level == 0) {
152                         num = 1;
153                         max = 1;
154                 }
155         }
156         retval = 0;
157 errout:
158         if (buf)
159                 ext2fs_free_mem(&buf);
160         return retval;
161 }
162 #undef BLK_T_MAX
163
164 #ifdef PUNCH_DEBUG
165
166 #define dbg_printf(f, a...)  printf(f, ## a)
167
168 static void dbg_print_extent(char *desc, struct ext2fs_extent *extent)
169 {
170         if (desc)
171                 printf("%s: ", desc);
172         printf("extent: lblk %llu--%llu, len %u, pblk %llu, flags: ",
173                extent->e_lblk, extent->e_lblk + extent->e_len - 1,
174                extent->e_len, extent->e_pblk);
175         if (extent->e_flags & EXT2_EXTENT_FLAGS_LEAF)
176                 fputs("LEAF ", stdout);
177         if (extent->e_flags & EXT2_EXTENT_FLAGS_UNINIT)
178                 fputs("UNINIT ", stdout);
179         if (extent->e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT)
180                 fputs("2ND_VISIT ", stdout);
181         if (!extent->e_flags)
182                 fputs("(none)", stdout);
183         fputc('\n', stdout);
184
185 }
186 #else
187 #define dbg_print_extent(desc, ex)      do { } while (0)
188 #define dbg_printf(f, a...)             do { } while (0)
189 #endif
190
191 /* Free a range of blocks, respecting cluster boundaries */
192 static errcode_t punch_extent_blocks(ext2_filsys fs, ext2_ino_t ino,
193                                      struct ext2_inode *inode,
194                                      blk64_t lfree_start, blk64_t free_start,
195                                      __u32 free_count, int *freed)
196 {
197         blk64_t         pblk;
198         int             freed_now = 0;
199         __u32           cluster_freed;
200         errcode_t       retval = 0;
201
202         /* No bigalloc?  Just free each block. */
203         if (EXT2FS_CLUSTER_RATIO(fs) == 1) {
204                 *freed += free_count;
205                 while (free_count-- > 0)
206                         ext2fs_block_alloc_stats2(fs, free_start++, -1);
207                 return retval;
208         }
209
210         /*
211          * Try to free up to the next cluster boundary.  We assume that all
212          * blocks in a logical cluster map to blocks from the same physical
213          * cluster, and that the offsets within the [pl]clusters match.
214          */
215         if (free_start & EXT2FS_CLUSTER_MASK(fs)) {
216                 retval = ext2fs_map_cluster_block(fs, ino, inode,
217                                                   lfree_start, &pblk);
218                 if (retval)
219                         goto errout;
220                 if (!pblk) {
221                         ext2fs_block_alloc_stats2(fs, free_start, -1);
222                         freed_now++;
223                 }
224                 cluster_freed = EXT2FS_CLUSTER_RATIO(fs) -
225                         (free_start & EXT2FS_CLUSTER_MASK(fs));
226                 if (cluster_freed > free_count)
227                         cluster_freed = free_count;
228                 free_count -= cluster_freed;
229                 free_start += cluster_freed;
230                 lfree_start += cluster_freed;
231         }
232
233         /* Free whole clusters from the middle of the range. */
234         while (free_count > 0 && free_count >= EXT2FS_CLUSTER_RATIO(fs)) {
235                 ext2fs_block_alloc_stats2(fs, free_start, -1);
236                 freed_now++;
237                 cluster_freed = EXT2FS_CLUSTER_RATIO(fs);
238                 free_count -= cluster_freed;
239                 free_start += cluster_freed;
240                 lfree_start += cluster_freed;
241         }
242
243         /* Try to free the last cluster. */
244         if (free_count > 0) {
245                 retval = ext2fs_map_cluster_block(fs, ino, inode,
246                                                   lfree_start, &pblk);
247                 if (retval)
248                         goto errout;
249                 if (!pblk) {
250                         ext2fs_block_alloc_stats2(fs, free_start, -1);
251                         freed_now++;
252                 }
253         }
254
255 errout:
256         *freed += freed_now;
257         return retval;
258 }
259
260 static errcode_t ext2fs_punch_extent(ext2_filsys fs, ext2_ino_t ino,
261                                      struct ext2_inode *inode,
262                                      blk64_t start, blk64_t end)
263 {
264         ext2_extent_handle_t    handle = 0;
265         struct ext2fs_extent    extent;
266         errcode_t               retval;
267         blk64_t                 free_start, next, lfree_start;
268         __u32                   free_count, newlen;
269         int                     freed = 0;
270         int                     op;
271
272         retval = ext2fs_extent_open2(fs, ino, inode, &handle);
273         if (retval)
274                 return retval;
275         /*
276          * Find the extent closest to the start of the punch range.  We don't
277          * check the return value because _goto() sets the current node to the
278          * next-lowest extent if 'start' is in a hole, and doesn't set a
279          * current node if there was a real error reading the extent tree.
280          * In that case, _get() will error out.
281          *
282          * Note: If _get() returns 'no current node', that simply means that
283          * there aren't any blocks mapped past this point in the file, so we're
284          * done.
285          */
286         ext2fs_extent_goto(handle, start);
287         retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT, &extent);
288         if (retval == EXT2_ET_NO_CURRENT_NODE) {
289                 retval = 0;
290                 goto errout;
291         } else if (retval)
292                 goto errout;
293         while (1) {
294                 op = EXT2_EXTENT_NEXT_LEAF;
295                 dbg_print_extent("main loop", &extent);
296                 next = extent.e_lblk + extent.e_len;
297                 dbg_printf("start %llu, end %llu, next %llu\n",
298                            (unsigned long long) start,
299                            (unsigned long long) end,
300                            (unsigned long long) next);
301                 if (start <= extent.e_lblk) {
302                         /*
303                          * Have we iterated past the end of the punch region?
304                          * If so, we can stop.
305                          */
306                         if (end < extent.e_lblk)
307                                 break;
308                         dbg_printf("Case #%d\n", 1);
309                         /* Start of deleted region before extent; 
310                            adjust beginning of extent */
311                         free_start = extent.e_pblk;
312                         lfree_start = extent.e_lblk;
313                         if (next > end)
314                                 free_count = end - extent.e_lblk + 1;
315                         else
316                                 free_count = extent.e_len;
317                         extent.e_len -= free_count;
318                         extent.e_lblk += free_count;
319                         extent.e_pblk += free_count;
320                 } else if (end >= next-1) {
321                         /*
322                          * Is the punch region beyond this extent?  This can
323                          * happen if start is already inside a hole.  Try to
324                          * advance to the next extent if this is the case.
325                          */
326                         if (start >= next)
327                                 goto next_extent;
328                         /* End of deleted region after extent;
329                            adjust end of extent */
330                         dbg_printf("Case #%d\n", 2);
331                         newlen = start - extent.e_lblk;
332                         free_start = extent.e_pblk + newlen;
333                         lfree_start = extent.e_lblk + newlen;
334                         free_count = extent.e_len - newlen;
335                         extent.e_len = newlen;
336                 } else {
337                         struct ext2fs_extent    newex;
338
339                         dbg_printf("Case #%d\n", 3);
340                         /* The hard case; we need to split the extent */
341                         newex.e_pblk = extent.e_pblk +
342                                 (end + 1 - extent.e_lblk);
343                         newex.e_lblk = end + 1;
344                         newex.e_len = next - end - 1;
345                         newex.e_flags = extent.e_flags;
346
347                         extent.e_len = start - extent.e_lblk;
348                         free_start = extent.e_pblk + extent.e_len;
349                         lfree_start = extent.e_lblk + extent.e_len;
350                         free_count = end - start + 1;
351
352                         dbg_print_extent("inserting", &newex);
353                         retval = ext2fs_extent_insert(handle,
354                                         EXT2_EXTENT_INSERT_AFTER, &newex);
355                         if (retval)
356                                 goto errout;
357                         retval = ext2fs_extent_fix_parents(handle);
358                         if (retval)
359                                 goto errout;
360                         /*
361                          * Now pointing at inserted extent; so go back.
362                          *
363                          * We cannot use EXT2_EXTENT_PREV to go back; note the
364                          * subtlety in the comment for fix_parents().
365                          */
366                         retval = ext2fs_extent_goto(handle, extent.e_lblk);
367                         if (retval)
368                                 goto errout;
369                 } 
370                 if (extent.e_len) {
371                         dbg_print_extent("replacing", &extent);
372                         retval = ext2fs_extent_replace(handle, 0, &extent);
373                         if (retval)
374                                 goto errout;
375                         retval = ext2fs_extent_fix_parents(handle);
376                 } else {
377                         struct ext2fs_extent    newex;
378                         blk64_t                 old_lblk, next_lblk;
379                         dbg_printf("deleting current extent%s\n", "");
380
381                         /*
382                          * Save the location of the next leaf, then slip
383                          * back to the current extent.
384                          */
385                         retval = ext2fs_extent_get(handle, EXT2_EXTENT_CURRENT,
386                                                    &newex);
387                         if (retval)
388                                 goto errout;
389                         old_lblk = newex.e_lblk;
390
391                         retval = ext2fs_extent_get(handle,
392                                                    EXT2_EXTENT_NEXT_LEAF,
393                                                    &newex);
394                         if (retval == EXT2_ET_EXTENT_NO_NEXT)
395                                 next_lblk = old_lblk;
396                         else if (retval)
397                                 goto errout;
398                         else
399                                 next_lblk = newex.e_lblk;
400
401                         retval = ext2fs_extent_goto(handle, old_lblk);
402                         if (retval)
403                                 goto errout;
404
405                         /* Now delete the extent. */
406                         retval = ext2fs_extent_delete(handle, 0);
407                         if (retval)
408                                 goto errout;
409
410                         retval = ext2fs_extent_fix_parents(handle);
411                         if (retval && retval != EXT2_ET_NO_CURRENT_NODE)
412                                 goto errout;
413                         retval = 0;
414
415                         /* Jump forward to the next extent. */
416                         ext2fs_extent_goto(handle, next_lblk);
417                         op = EXT2_EXTENT_CURRENT;
418                 }
419                 if (retval)
420                         goto errout;
421                 dbg_printf("Free start %llu, free count = %u\n",
422                        free_start, free_count);
423                 retval = punch_extent_blocks(fs, ino, inode, lfree_start,
424                                              free_start, free_count, &freed);
425                 if (retval)
426                         goto errout;
427         next_extent:
428                 retval = ext2fs_extent_get(handle, op,
429                                            &extent);
430                 if (retval == EXT2_ET_EXTENT_NO_NEXT ||
431                     retval == EXT2_ET_NO_CURRENT_NODE)
432                         break;
433                 if (retval)
434                         goto errout;
435         }
436         dbg_printf("Freed %d blocks\n", freed);
437         retval = ext2fs_iblk_sub_blocks(fs, inode, freed);
438 errout:
439         ext2fs_extent_free(handle);
440         return retval;
441 }
442
443 /*
444  * Deallocate all logical _blocks_ starting at start to end, inclusive.
445  * If end is ~0ULL, then this is effectively truncate.
446  */
447 errcode_t ext2fs_punch(ext2_filsys fs, ext2_ino_t ino,
448                        struct ext2_inode *inode,
449                        char *block_buf, blk64_t start,
450                        blk64_t end)
451 {
452         errcode_t               retval;
453         struct ext2_inode       inode_buf;
454
455         if (start > end)
456                 return EINVAL;
457
458         /* Read inode structure if necessary */
459         if (!inode) {
460                 retval = ext2fs_read_inode(fs, ino, &inode_buf);
461                 if (retval)
462                         return retval;
463                 inode = &inode_buf;
464         }
465         if (inode->i_flags & EXT4_EXTENTS_FL)
466                 retval = ext2fs_punch_extent(fs, ino, inode, start, end);
467         else
468                 retval = ext2fs_punch_ind(fs, inode, block_buf, start, end);
469         if (retval)
470                 return retval;
471
472 #ifdef PUNCH_DEBUG
473         printf("%u: write inode size now %u blocks %u\n",
474                 ino, inode->i_size, inode->i_blocks);
475 #endif
476         return ext2fs_write_inode(fs, ino, inode);
477 }