Whamcloud - gitweb
filefrag: Lustre changes to filefrag FIEMAP handling
[tools/e2fsprogs.git] / misc / filefrag.c
1 /*
2  * filefrag.c -- report if a particular file is fragmented
3  *
4  * Copyright 2003 by Theodore Ts'o.
5  *
6  * %Begin-Header%
7  * This file may be redistributed under the terms of the GNU Public
8  * License.
9  * %End-Header%
10  */
11
12 #include "config.h"
13 #ifndef __linux__
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <unistd.h>
17
18 int main(void) {
19         fputs("This program is only supported on Linux!\n", stderr);
20         exit(EXIT_FAILURE);
21 }
22 #else
23 #ifndef _LARGEFILE_SOURCE
24 #define _LARGEFILE_SOURCE
25 #endif
26 #ifndef _LARGEFILE64_SOURCE
27 #define _LARGEFILE64_SOURCE
28 #endif
29
30
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <string.h>
35 #include <time.h>
36 #include <fcntl.h>
37 #include <errno.h>
38 #ifdef HAVE_GETOPT_H
39 #include <getopt.h>
40 #else
41 extern char *optarg;
42 extern int optind;
43 #endif
44 #include <sys/types.h>
45 #include <sys/stat.h>
46 #include <sys/vfs.h>
47 #include <sys/ioctl.h>
48 #ifdef HAVE_LINUX_FD_H
49 #include <linux/fd.h>
50 #endif
51 #include <ext2fs/ext2fs.h>
52 #include <ext2fs/ext2_types.h>
53 #include <ext2fs/fiemap.h>
54
55 int verbose = 0;
56 int blocksize;          /* Use specified blocksize (default 1kB) */
57 int sync_file = 0;      /* fsync file before getting the mapping */
58 int xattr_map = 0;      /* get xattr mapping */
59 int force_bmap;         /* force use of FIBMAP instead of FIEMAP */
60 int force_extent;       /* print output in extent format always */
61 int device_offset;      /* extents report device-relative offsets */
62 int logical_width = 8;
63 int physical_width = 10;
64 const char *ext_fmt = "%4d: %*llu..%*llu: %*llu..%*llu: %6llu: %s\n";
65 const char *hex_fmt = "%4d: %*llx..%*llx: %*llx..%*llx: %6llx: %s\n";
66
67 #define FILEFRAG_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR |\
68                                       FIEMAP_FLAG_DEVICE_ORDER)
69
70 #define FIBMAP          _IO(0x00, 1)    /* bmap access */
71 #define FIGETBSZ        _IO(0x00, 2)    /* get the block size used for bmap */
72
73 #define LUSTRE_SUPER_MAGIC 0x0BD00BD0
74
75 #define EXT4_EXTENTS_FL                 0x00080000 /* Inode uses extents */
76 #define EXT3_IOC_GETFLAGS               _IOR('f', 1, long)
77
78 static int int_log2(int arg)
79 {
80         int     l = 0;
81
82         arg >>= 1;
83         while (arg) {
84                 l++;
85                 arg >>= 1;
86         }
87         return l;
88 }
89
90 static int int_log10(unsigned long long arg)
91 {
92         int     l = 0;
93
94         arg = arg / 10;
95         while (arg) {
96                 l++;
97                 arg = arg / 10;
98         }
99         return l;
100 }
101
102 static unsigned int div_ceil(unsigned int a, unsigned int b)
103 {
104         if (!a)
105                 return 0;
106         return ((a - 1) / b) + 1;
107 }
108
109 static int get_bmap(int fd, unsigned long block, unsigned long *phy_blk)
110 {
111         int     ret;
112         unsigned int b;
113
114         b = block;
115         ret = ioctl(fd, FIBMAP, &b); /* FIBMAP takes pointer to integer */
116         if (ret < 0)
117                 return -errno;
118         *phy_blk = b;
119
120         return ret;
121 }
122
123 static void print_extent_header(void)
124 {
125         printf(" ext: %*s %*s length: %*s flags:\n",
126                logical_width * 2 + 3,
127                device_offset ? "device_logical:" : "logical_offset:",
128                physical_width * 2 + 3, "physical_offset:",
129                device_offset ? 5 : physical_width + 1,
130                device_offset ? " dev:" : "expected:");
131 }
132
133 static void print_flag(__u32 *flags, __u32 mask, char *buf, const char *name)
134 {
135         if ((*flags & mask) == 0)
136                 return;
137
138         strcat(buf, name);
139         *flags &= ~mask;
140 }
141
142 static void print_extent_info(struct fiemap_extent *fm_extent, int cur_ex,
143                               unsigned long long expected, int blk_shift,
144                               ext2fs_struct_stat *st)
145 {
146         unsigned long long physical_blk;
147         unsigned long long logical_blk;
148         unsigned long long ext_len;
149         unsigned long long ext_blks;
150         __u32 fe_flags, mask;
151         char flags[256] = "";
152
153         /* For inline data all offsets should be in bytes, not blocks */
154         if (fm_extent->fe_flags & FIEMAP_EXTENT_DATA_INLINE)
155                 blk_shift = 0;
156
157         ext_len = fm_extent->fe_length >> blk_shift;
158         ext_blks = (fm_extent->fe_length - 1) >> blk_shift;
159         logical_blk = fm_extent->fe_logical >> blk_shift;
160         if (fm_extent->fe_flags & FIEMAP_EXTENT_UNKNOWN) {
161                 physical_blk = 0;
162         } else {
163                 physical_blk = fm_extent->fe_physical >> blk_shift;
164         }
165
166         if (device_offset)
167                 sprintf(flags, "%04x: ", fm_extent->fe_device);
168         else if (expected)
169                 sprintf(flags, ext_fmt == hex_fmt ? "%*llx:" : "%*llu: ",
170                         physical_width, expected >> blk_shift);
171
172         fe_flags = fm_extent->fe_flags;
173         print_flag(&fe_flags, FIEMAP_EXTENT_LAST, flags, "last,");
174         print_flag(&fe_flags, FIEMAP_EXTENT_UNKNOWN, flags, "unknown_loc,");
175         print_flag(&fe_flags, FIEMAP_EXTENT_DELALLOC, flags, "delalloc,");
176         print_flag(&fe_flags, FIEMAP_EXTENT_ENCODED, flags, "encoded,");
177         print_flag(&fe_flags, FIEMAP_EXTENT_DATA_ENCRYPTED, flags,"encrypted,");
178         print_flag(&fe_flags, FIEMAP_EXTENT_NOT_ALIGNED, flags, "not_aligned,");
179         print_flag(&fe_flags, FIEMAP_EXTENT_DATA_INLINE, flags, "inline,");
180         print_flag(&fe_flags, FIEMAP_EXTENT_DATA_TAIL, flags, "tail_packed,");
181         print_flag(&fe_flags, FIEMAP_EXTENT_UNWRITTEN, flags, "unwritten,");
182         print_flag(&fe_flags, FIEMAP_EXTENT_MERGED, flags, "merged,");
183         print_flag(&fe_flags, FIEMAP_EXTENT_SHARED, flags, "shared,");
184         print_flag(&fe_flags, FIEMAP_EXTENT_NET, flags, "net,");
185
186         /* print any unknown flags as hex values */
187         for (mask = 1; fe_flags != 0 && mask != 0; mask <<= 1) {
188                 char hex[sizeof(mask) * 2 + 4]; /* 2 chars/byte + 0x, + NUL */
189
190                 if ((fe_flags & mask) == 0)
191                         continue;
192                 sprintf(hex, "%#04x,", mask);
193                 print_flag(&fe_flags, mask, flags, hex);
194         }
195
196         if (fm_extent->fe_logical + fm_extent->fe_length >=
197             (unsigned long long) st->st_size)
198                 strcat(flags, "eof,");
199
200         /* Remove trailing comma, if any */
201         if (flags[0] != '\0')
202                 flags[strnlen(flags, sizeof(flags)) - 1] = '\0';
203
204         printf(ext_fmt, cur_ex, logical_width, logical_blk,
205                logical_width, logical_blk + ext_blks,
206                physical_width, physical_blk,
207                physical_width, physical_blk + ext_blks,
208                ext_len, flags);
209 }
210
211 static int filefrag_fiemap(int fd, int blk_shift, int *num_extents,
212                            ext2fs_struct_stat *st)
213 {
214         __u64 buf[2048];        /* __u64 for proper field alignment */
215         struct fiemap *fiemap = (struct fiemap *)buf;
216         struct fiemap_extent *fm_ext = &fiemap->fm_extents[0];
217         struct fiemap_extent fm_last;
218         int count = (sizeof(buf) - sizeof(*fiemap)) /
219                         sizeof(struct fiemap_extent);
220         unsigned long long expected = 0;
221         unsigned long long expected_dense = 0;
222         unsigned long flags = 0;
223         unsigned int i;
224         int fiemap_header_printed = 0;
225         int tot_extents = 0, n = 0;
226         int previous_device = 0;
227         int last = 0;
228         int rc;
229
230         memset(fiemap, 0, sizeof(struct fiemap));
231         memset(&fm_last, 0, sizeof(fm_last));
232
233         if (sync_file)
234                 flags |= FIEMAP_FLAG_SYNC;
235
236         if (xattr_map)
237                 flags |= FIEMAP_FLAG_XATTR;
238
239         if (device_offset) {
240                 flags |= FIEMAP_FLAG_DEVICE_ORDER;
241                 memset(fm_ext, 0, sizeof(struct fiemap_extent));
242         }
243
244 retry_wo_device_order:
245         do {
246                 fiemap->fm_length = ~0ULL;
247                 fiemap->fm_flags = flags;
248                 fiemap->fm_extent_count = count;
249                 rc = ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap);
250                 if (rc < 0) {
251                         static int fiemap_incompat_printed;
252
253                         rc = -errno;
254                         if (rc == -EBADR && !fiemap_incompat_printed) {
255                                 fprintf(stderr, "FIEMAP failed with unknown "
256                                                 "flags %x\n",
257                                        fiemap->fm_flags);
258                                 fiemap_incompat_printed = 1;
259                         } else if (rc == EBADR && (fiemap->fm_flags &
260                                                    FIEMAP_FLAG_DEVICE_ORDER)) {
261                                 flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
262                                 goto retry_wo_device_order;
263                         }
264                         return rc;
265                 }
266
267                 /* If 0 extents are returned, then more ioctls are not needed */
268                 if (fiemap->fm_mapped_extents == 0)
269                         break;
270
271                 if (verbose && !fiemap_header_printed) {
272                         print_extent_header();
273                         fiemap_header_printed = 1;
274                 }
275
276                 for (i = 0; i < fiemap->fm_mapped_extents; i++) {
277                         if (previous_device != fm_ext[i].fe_device)
278                                 previous_device = fm_ext[i].fe_device;
279
280                         expected_dense = fm_last.fe_physical +
281                                          fm_last.fe_length;
282                         expected = fm_last.fe_physical +
283                                    fm_ext[i].fe_logical - fm_last.fe_logical;
284                         if (fm_ext[i].fe_logical != 0 &&
285                             fm_ext[i].fe_physical != expected &&
286                             fm_ext[i].fe_physical != expected_dense) {
287                                 tot_extents++;
288                         } else {
289                                 expected = 0;
290                                 if (!tot_extents)
291                                         tot_extents = 1;
292                         }
293                         if (verbose)
294                                 print_extent_info(&fm_ext[i], n, expected,
295                                                   blk_shift, st);
296                         if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST)
297                                 last = 1;
298                         fm_last = fm_ext[i];
299                         n++;
300                 }
301
302                 /* For DEVICE_ORDER mappings, if EXTENT_LAST not yet found then
303                  * fm_start needs to be the same as it was for earlier ioctl.
304                  * The first extent is used to pass the end offset and device
305                  * of the last FIEMAP call.  Otherwise, we ask for extents
306                  * starting from where the last mapping ended. */
307                 if (flags & FIEMAP_FLAG_DEVICE_ORDER) {
308                         fm_ext[0].fe_logical =  fm_ext[i - 1].fe_logical +
309                                                 fm_ext[i - 1].fe_length;
310                         fm_ext[0].fe_device =   fm_ext[i - 1].fe_device;
311                         fiemap->fm_start =      0;
312                 } else {
313                         fiemap->fm_start =      fm_ext[i - 1].fe_logical +
314                                                 fm_ext[i - 1].fe_length;
315                 }
316         } while (last == 0);
317
318         *num_extents = tot_extents;
319
320         return 0;
321 }
322
323 #define EXT2_DIRECT     12
324
325 static int filefrag_fibmap(int fd, int blk_shift, int *num_extents,
326                            ext2fs_struct_stat *st,
327                            unsigned long numblocks, int is_ext2)
328 {
329         struct fiemap_extent    fm_ext, fm_last;
330         unsigned long           i, last_block;
331         unsigned long long      logical, expected = 0;
332                                 /* Blocks per indirect block */
333         const long              bpib = st->st_blksize / 4;
334         int                     count;
335
336         memset(&fm_ext, 0, sizeof(fm_ext));
337         memset(&fm_last, 0, sizeof(fm_last));
338         if (force_extent) {
339                 fm_ext.fe_device = st->st_dev;
340                 fm_ext.fe_flags = FIEMAP_EXTENT_MERGED;
341         }
342
343         if (sync_file && fsync(fd) != 0)
344                 return -errno;
345
346         for (i = 0, logical = 0, *num_extents = 0, count = last_block = 0;
347              i < numblocks;
348              i++, logical += st->st_blksize) {
349                 unsigned long block = 0;
350                 int rc;
351
352                 if (is_ext2 && last_block) {
353                         if (((i - EXT2_DIRECT) % bpib) == 0)
354                                 last_block++;
355                         if (((i - EXT2_DIRECT - bpib) % (bpib * bpib)) == 0)
356                                 last_block++;
357                         if (((i - EXT2_DIRECT - bpib - bpib * bpib) %
358                              (((unsigned long long)bpib) * bpib * bpib)) == 0)
359                                 last_block++;
360                 }
361                 rc = get_bmap(fd, i, &block);
362                 if (rc < 0)
363                         return rc;
364                 if (block == 0)
365                         continue;
366
367                 if (*num_extents == 0 || block != last_block + 1 ||
368                     fm_ext.fe_logical + fm_ext.fe_length != logical) {
369                         /*
370                          * This is the start of a new extent; figure out where
371                          * we expected it to be and report the extent.
372                          */
373                         if (*num_extents != 0 && fm_last.fe_length) {
374                                 expected = fm_last.fe_physical +
375                                         (fm_ext.fe_logical - fm_last.fe_logical);
376                                 if (expected == fm_ext.fe_physical)
377                                         expected = 0;
378                         }
379                         if (force_extent && *num_extents == 0)
380                                 print_extent_header();
381                         if (force_extent && *num_extents != 0) {
382                                 print_extent_info(&fm_ext, *num_extents - 1,
383                                                   expected, blk_shift, st);
384                         }
385                         if (verbose && expected != 0) {
386                                 printf("Discontinuity: Block %llu is at %llu "
387                                        "(was %llu)\n",
388                                         fm_ext.fe_logical / st->st_blksize,
389                                         fm_ext.fe_physical / st->st_blksize,
390                                         expected / st->st_blksize);
391                         }
392                         /* create the new extent */
393                         fm_last = fm_ext;
394                         (*num_extents)++;
395                         fm_ext.fe_physical = block * st->st_blksize;
396                         fm_ext.fe_logical = logical;
397                         fm_ext.fe_length = 0;
398                 }
399                 fm_ext.fe_length += st->st_blksize;
400                 last_block = block;
401         }
402         if (force_extent && *num_extents != 0) {
403                 if (fm_last.fe_length) {
404                         expected = fm_last.fe_physical +
405                                    (fm_ext.fe_logical - fm_last.fe_logical);
406                         if (expected == fm_ext.fe_physical)
407                                 expected = 0;
408                 }
409                 print_extent_info(&fm_ext, *num_extents - 1, expected,
410                                   blk_shift, st);
411         }
412
413         return count;
414 }
415
416 static int frag_report(const char *filename)
417 {
418         static struct statfs fsinfo;
419         static unsigned int blksize;
420         ext2fs_struct_stat st;
421         int             blk_shift;
422         long            fd;
423         unsigned long long      numblocks;
424         int             data_blocks_per_cyl = 1;
425         int             num_extents = 1, expected = ~0;
426         int             is_ext2 = 0;
427         static dev_t    last_device;
428         int             width;
429         int             rc = 0;
430
431 #if defined(HAVE_OPEN64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
432         fd = open64(filename, O_RDONLY);
433 #else
434         fd = open(filename, O_RDONLY);
435 #endif
436         if (fd < 0) {
437                 rc = -errno;
438                 perror("open");
439                 return rc;
440         }
441
442 #if defined(HAVE_FSTAT64) && !defined(__OSX_AVAILABLE_BUT_DEPRECATED)
443         if (fstat64(fd, &st) < 0) {
444 #else
445         if (fstat(fd, &st) < 0) {
446 #endif
447                 rc = -errno;
448                 perror("stat");
449                 goto out_close;
450         }
451
452         if (last_device != st.st_dev) {
453                 if (fstatfs(fd, &fsinfo) < 0) {
454                         rc = -errno;
455                         perror("fstatfs");
456                         goto out_close;
457                 }
458                 if (ioctl(fd, FIGETBSZ, &blksize) < 0)
459                         blksize = fsinfo.f_bsize;
460                 if (verbose)
461                         printf("Filesystem type is: %lx\n",
462                                (unsigned long)fsinfo.f_type);
463         }
464         st.st_blksize = blksize;
465         if (fsinfo.f_type == 0xef51 || fsinfo.f_type == 0xef52 ||
466             fsinfo.f_type == 0xef53) {
467                 unsigned int    flags;
468
469                 if (ioctl(fd, EXT3_IOC_GETFLAGS, &flags) == 0 &&
470                     !(flags & EXT4_EXTENTS_FL))
471                         is_ext2 = 1;
472         }
473
474         /* Check if filesystem is Lustre.  Always print in extent format
475          * with 1kB blocks, using the device-relative logical offsets. */
476         if (fsinfo.f_type == LUSTRE_SUPER_MAGIC) {
477                 device_offset = 1;
478                 blocksize = blocksize ?: 1024;
479         }
480
481         if (is_ext2) {
482                 long cylgroups = div_ceil(fsinfo.f_blocks, blksize * 8);
483
484                 if (verbose && last_device != st.st_dev)
485                         printf("Filesystem cylinder groups approximately %ld\n",
486                                cylgroups);
487
488                 data_blocks_per_cyl = blksize * 8 -
489                                         (fsinfo.f_files / 8 / cylgroups) - 3;
490         }
491         last_device = st.st_dev;
492
493         width = int_log10(fsinfo.f_blocks);
494         if (width > physical_width)
495                 physical_width = width;
496
497         numblocks = (st.st_size + blksize - 1) / blksize;
498         if (blocksize != 0)
499                 blk_shift = int_log2(blocksize);
500         else
501                 blk_shift = int_log2(blksize);
502
503         width = int_log10(numblocks);
504         if (width > logical_width)
505                 logical_width = width;
506         if (verbose)
507                 printf("File size of %s is %llu (%llu block%s of %d bytes)\n",
508                        filename, (unsigned long long)st.st_size,
509                        numblocks * blksize >> blk_shift,
510                        numblocks == 1 ? "" : "s", 1 << blk_shift);
511
512         if (!force_bmap) {
513                 rc = filefrag_fiemap(fd, blk_shift, &num_extents, &st);
514                 expected = 0;
515         }
516
517         if (force_bmap || rc < 0) { /* FIEMAP failed, try FIBMAP instead */
518                 expected = filefrag_fibmap(fd, blk_shift, &num_extents,
519                                            &st, numblocks, is_ext2);
520                 if (expected < 0) {
521                         if (expected == -EINVAL || expected == -ENOTTY) {
522                                 fprintf(stderr, "%s: FIBMAP unsupported\n",
523                                         filename);
524                         } else if (expected == -EPERM) {
525                                 fprintf(stderr,
526                                         "%s: FIBMAP requires root privileges\n",
527                                         filename);
528                         } else {
529                                 fprintf(stderr, "%s: FIBMAP error: %s",
530                                         filename, strerror(expected));
531                         }
532                         rc = expected;
533                         goto out_close;
534                 } else {
535                         rc = 0;
536                 }
537                 expected = expected / data_blocks_per_cyl + 1;
538         }
539
540         if (num_extents == 1)
541                 printf("%s: 1 extent found", filename);
542         else
543                 printf("%s: %d extents found", filename, num_extents);
544         /* count, and thus expected, only set for indirect FIBMAP'd files */
545         if (is_ext2 && expected && expected < num_extents)
546                 printf(", perfection would be %d extent%s\n", expected,
547                         (expected > 1) ? "s" : "");
548         else
549                 fputc('\n', stdout);
550 out_close:
551         close(fd);
552
553         return rc;
554 }
555
556 static void usage(const char *progname)
557 {
558         fprintf(stderr, "Usage: %s [-b{blocksize}] [-BeksvxX] file ...\n",
559                 progname);
560         exit(1);
561 }
562
563 int main(int argc, char**argv)
564 {
565         char **cpp;
566         int rc = 0, c;
567
568         while ((c = getopt(argc, argv, "Bb::eklsvxX")) != EOF) {
569                 switch (c) {
570                 case 'B':
571                         force_bmap++;
572                         force_extent = 0;
573                         break;
574                 case 'b':
575                         if (optarg) {
576                                 char *end;
577                                 blocksize = strtoul(optarg, &end, 0);
578                                 if (end) {
579 #if __GNUC_PREREQ (7, 0)
580 #pragma GCC diagnostic push
581 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
582 #endif
583                                         switch (end[0]) {
584                                         case 'g':
585                                         case 'G':
586                                                 blocksize *= 1024;
587                                                 /* fall through */
588                                         case 'm':
589                                         case 'M':
590                                                 blocksize *= 1024;
591                                                 /* fall through */
592                                         case 'k':
593                                         case 'K':
594                                                 blocksize *= 1024;
595                                                 break;
596                                         default:
597                                                 break;
598                                         }
599 #if __GNUC_PREREQ (7, 0)
600 #pragma GCC diagnostic pop
601 #endif
602                                 }
603                         } else { /* Allow -b without argument for compat. Remove
604                                   * this eventually so "-b {blocksize}" works */
605                                 fprintf(stderr, "%s: -b needs a blocksize "
606                                         "option, assuming 1024-byte blocks.\n",
607                                         argv[0]);
608                                 blocksize = 1024;
609                         }
610                         break;
611                 case 'e':
612                         force_extent++;
613                         if (!verbose)
614                                 verbose++;
615                         break;
616                 case 'k':
617                         blocksize = 1024;
618                         break;
619                 case 'l':
620                         device_offset++;
621                         break;
622                 case 's':
623                         sync_file++;
624                         break;
625                 case 'v':
626                         verbose++;
627                         break;
628                 case 'x':
629                         xattr_map++;
630                         break;
631                 case 'X':
632                         ext_fmt = hex_fmt;
633                         break;
634                 default:
635                         usage(argv[0]);
636                         break;
637                 }
638         }
639
640         if (optind == argc)
641                 usage(argv[0]);
642
643         for (cpp = argv + optind; *cpp != NULL; cpp++) {
644                 int rc2 = frag_report(*cpp);
645
646                 if (rc2 < 0 && rc == 0)
647                         rc = rc2;
648         }
649
650         return -rc;
651 }
652 #endif