Add support for multiple-device filesystems by defining a new
fe_device field in the fiemap_extent structure. This allows
printing the filesystem-relative or linux block device number
associated with each extent of a file. If a single filesystem
extent is mirrored to multiple block devices, the fe_device
field can be used to disambiguate the multiple copies.
If the "-l" (device-logical) option is given to filefrag, then
all extents for a particular device of a file are returned
before returning extents for the next device. This makes it
easier to see if extent allocation within a single device is
contiguous, instead of returning all of the blocks of a file
interleaved in file-logical-offset order.
Lustre uses the high 16bits of fiemap_extent:fe_device to record
the absolute stripe number being processed, so that next fiemap
call continues from that stripe, which allows files that have
multiple objects on the same device to work properly (FLR, PFL).
Older filefrag will print the stripe number with the OST index.
Change-Id: Ifb40cc159ddc61d2296c494f0e899ac11bf88b60
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
__u64 fe_length; /* length in bytes for this extent */
__u64 fe_reserved64[2];
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
__u64 fe_length; /* length in bytes for this extent */
__u64 fe_reserved64[2];
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
+ __u32 fe_device; /* device number (fs-specific if FIEMAP_EXTENT_NET)
+ * low 16bits are used */
+ __u32 fe_reserved[2];
#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
#define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */
#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
#define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */
+#define FIEMAP_FLAG_DEVICE_ORDER 0x40000000 /* return device ordered mapping */
#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
#define EXT4_FIEMAP_EXTENT_HOLE 0x08000000 /* Entry in extent status
cache for a hole*/
#define EXT4_FIEMAP_EXTENT_HOLE 0x08000000 /* Entry in extent status
cache for a hole*/
+/* Network filesystem flags - use a high bit, don't conflict with upstream */
+#define FIEMAP_EXTENT_NET 0x80000000 /* Data stored remotely. */
#endif /* _LINUX_FIEMAP_H */
#endif /* _LINUX_FIEMAP_H */
Pre-load the ext4 extent status cache for the file. This is not
supported on all kernels, and is only supported on ext4 file systems.
.TP
Pre-load the ext4 extent status cache for the file. This is not
supported on all kernels, and is only supported on ext4 file systems.
.TP
+.B \-l
+Extents are displayed in device-logical offset order.
+.TP
.B \-s
Sync the file before requesting the mapping.
.TP
.B \-s
Sync the file before requesting the mapping.
.TP
.TP
.B \-X
Display extent block numbers in hexadecimal format.
.TP
.B \-X
Display extent block numbers in hexadecimal format.
+.TP
+.B \-V
+Print version number of program and library. If given twice, also
+print the FIEMAP flags that are understood by the current version.
.SH AUTHOR
.B filefrag
was written by Theodore Ts'o <tytso@mit.edu>.
.SH AUTHOR
.B filefrag
was written by Theodore Ts'o <tytso@mit.edu>.
int force_bmap; /* force use of FIBMAP instead of FIEMAP */
int force_extent; /* print output in extent format always */
int use_extent_cache; /* Use extent cache */
int force_bmap; /* force use of FIBMAP instead of FIEMAP */
int force_extent; /* print output in extent format always */
int use_extent_cache; /* Use extent cache */
+int device_offset; /* extents report device-relative offsets */
int logical_width = 8;
int physical_width = 10;
const char *ext_fmt = "%4d: %*llu..%*llu: %*llu..%*llu: %6llu: %s\n";
const char *hex_fmt = "%4d: %*llx..%*llx: %*llx..%*llx: %6llx: %s\n";
int logical_width = 8;
int physical_width = 10;
const char *ext_fmt = "%4d: %*llu..%*llu: %*llu..%*llu: %6llu: %s\n";
const char *hex_fmt = "%4d: %*llx..%*llx: %*llx..%*llx: %6llx: %s\n";
-#define FILEFRAG_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
+#define FILEFRAG_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR |\
+ FIEMAP_FLAG_DEVICE_ORDER)
#define FIBMAP _IO(0x00, 1) /* bmap access */
#define FIGETBSZ _IO(0x00, 2) /* get the block size used for bmap */
#define FIBMAP _IO(0x00, 1) /* bmap access */
#define FIGETBSZ _IO(0x00, 2) /* get the block size used for bmap */
{
printf(" ext: %*s %*s length: %*s flags:\n",
logical_width * 2 + 3,
{
printf(" ext: %*s %*s length: %*s flags:\n",
logical_width * 2 + 3,
+ device_offset ? "device_logical:" : "logical_offset:",
physical_width * 2 + 3, "physical_offset:",
physical_width * 2 + 3, "physical_offset:",
- physical_width + 1,
- "expected:");
+ device_offset ? 5 : physical_width + 1,
+ device_offset ? " dev:" : "expected:");
}
static void print_flag(__u32 *flags, __u32 mask, char *buf, const char *name)
}
static void print_flag(__u32 *flags, __u32 mask, char *buf, const char *name)
print_flag(&fe_flags, FIEMAP_EXTENT_SHARED, flags, "shared,");
print_flag(&fe_flags, EXT4_FIEMAP_EXTENT_HOLE, flags, "hole,");
print_flag(&fe_flags, FIEMAP_EXTENT_SHARED, flags, "shared,");
print_flag(&fe_flags, EXT4_FIEMAP_EXTENT_HOLE, flags, "hole,");
+ print_flag(&fe_flags, FIEMAP_EXTENT_NET, flags, "net,");
+
if (!print_unknown)
goto out;
if (!print_unknown)
goto out;
physical_blk = fm_extent->fe_physical >> blk_shift;
}
physical_blk = fm_extent->fe_physical >> blk_shift;
}
+ if (device_offset)
+ sprintf(flags, "%04x: ", fm_extent->fe_device & 0xffff);
+ else if (expected &&
!(fm_extent->fe_flags & FIEMAP_EXTENT_UNKNOWN) &&
!(fm_extent->fe_flags & EXT4_FIEMAP_EXTENT_HOLE))
sprintf(flags, ext_fmt == hex_fmt ? "%*llx: " : "%*llu: ",
physical_width, expected >> blk_shift);
!(fm_extent->fe_flags & FIEMAP_EXTENT_UNKNOWN) &&
!(fm_extent->fe_flags & EXT4_FIEMAP_EXTENT_HOLE))
sprintf(flags, ext_fmt == hex_fmt ? "%*llx: " : "%*llu: ",
physical_width, expected >> blk_shift);
- else
- sprintf(flags, "%.*s ", physical_width, " ");
print_flags(fm_extent->fe_flags, flags, sizeof(flags), 1);
print_flags(fm_extent->fe_flags, flags, sizeof(flags), 1);
unsigned long cmd = FS_IOC_FIEMAP;
int fiemap_header_printed = 0;
int tot_extents = 0, n = 0;
unsigned long cmd = FS_IOC_FIEMAP;
int fiemap_header_printed = 0;
int tot_extents = 0, n = 0;
+ int previous_device = -1;
if (use_extent_cache)
cmd = EXT4_IOC_GET_ES_CACHE;
if (use_extent_cache)
cmd = EXT4_IOC_GET_ES_CACHE;
+ if (device_offset) {
+ flags |= FIEMAP_FLAG_DEVICE_ORDER;
+ memset(fm_ext, 0, sizeof(struct fiemap_extent));
+ }
+
+retry_wo_device_order:
do {
fiemap->fm_length = ~0ULL;
fiemap->fm_flags = flags;
do {
fiemap->fm_length = ~0ULL;
fiemap->fm_flags = flags;
"flags %x\n",
fiemap->fm_flags);
fiemap_incompat_printed = 1;
"flags %x\n",
fiemap->fm_flags);
fiemap_incompat_printed = 1;
+ } else if (rc == EBADR && (fiemap->fm_flags &
+ FIEMAP_FLAG_DEVICE_ORDER)) {
+ flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
+ goto retry_wo_device_order;
fm_last.fe_length;
expected = fm_last.fe_physical +
fm_ext[i].fe_logical - fm_last.fe_logical;
fm_last.fe_length;
expected = fm_last.fe_physical +
fm_ext[i].fe_logical - fm_last.fe_logical;
- if (fm_ext[i].fe_logical != 0 &&
- fm_ext[i].fe_physical != expected &&
- fm_ext[i].fe_physical != expected_dense) {
+ if ((fm_ext[i].fe_logical != 0 &&
+ fm_ext[i].fe_physical != expected &&
+ fm_ext[i].fe_physical != expected_dense) ||
+ ((fm_ext[i].fe_device & 0xffff) != previous_device)) {
tot_extents++;
} else {
expected = 0;
tot_extents++;
} else {
expected = 0;
last = 1;
fm_last = fm_ext[i];
n++;
last = 1;
fm_last = fm_ext[i];
n++;
+ previous_device = fm_ext[i].fe_device & 0xffff;
- fiemap->fm_start = (fm_ext[i - 1].fe_logical +
- fm_ext[i - 1].fe_length);
+ /* For DEVICE_ORDER mappings, if EXTENT_LAST not yet found then
+ * fm_start needs to be the same as it was for earlier ioctl.
+ * The first extent is used to pass the end offset and device
+ * of the last FIEMAP call. Otherwise, we ask for extents
+ * starting from where the last mapping ended. */
+ if (flags & FIEMAP_FLAG_DEVICE_ORDER) {
+ fm_ext[0].fe_logical = fm_ext[i - 1].fe_logical +
+ fm_ext[i - 1].fe_length;
+ fm_ext[0].fe_device = fm_ext[i - 1].fe_device;
+ fiemap->fm_start = 0;
+ } else {
+ fiemap->fm_start = fm_ext[i - 1].fe_logical +
+ fm_ext[i - 1].fe_length;
+ }
} while (last == 0);
*num_extents = tot_extents;
} while (last == 0);
*num_extents = tot_extents;
memset(&fm_ext, 0, sizeof(fm_ext));
memset(&fm_last, 0, sizeof(fm_last));
if (force_extent) {
memset(&fm_ext, 0, sizeof(fm_ext));
memset(&fm_last, 0, sizeof(fm_last));
if (force_extent) {
+ fm_ext.fe_device = st->st_dev;
fm_ext.fe_flags = FIEMAP_EXTENT_MERGED;
}
fm_ext.fe_flags = FIEMAP_EXTENT_MERGED;
}
+ /* Check if filesystem is Lustre. Always print in extent format
+ * with 1kB blocks, using the device-relative logical offsets. */
+ if (fsinfo.f_type == LUSTRE_SUPER_MAGIC) {
+ device_offset = 1;
+ blocksize = blocksize ?: 1024;
+ }
+
if (is_ext2) {
long cylgroups = div_ceil(fsinfo.f_blocks, blksize * 8);
if (is_ext2) {
long cylgroups = div_ceil(fsinfo.f_blocks, blksize * 8);
int rc = 0, c;
int version = 0;
int rc = 0, c;
int version = 0;
- while ((c = getopt(argc, argv, "Bb::eEkPsvVxX")) != EOF) {
+ while ((c = getopt(argc, argv, "Bb::eEkPlsvxXV")) != EOF) {
switch (c) {
case 'B':
force_bmap++;
switch (c) {
case 'B':
force_bmap++;
break;
case 'b':
if (optarg) {
break;
case 'b':
if (optarg) {
case 'P':
precache_file++;
break;
case 'P':
precache_file++;
break;
+ case 'l':
+ device_offset++;
+ break;
case 's':
sync_file++;
break;
case 's':
sync_file++;
break;