+A large part of this code is from the generic VFS code in fs/ioctl.c in the
+upstream kernel.
+
+Index: linux-2.6.18.i386/fs/ext4/ioctl.c
+===================================================================
+--- linux-2.6.18.i386.orig/fs/ext4/ioctl.c
++++ linux-2.6.18.i386/fs/ext4/ioctl.c
+@@ -17,6 +17,162 @@
+ #include "ext4_jbd2.h"
+ #include "ext4.h"
+
++#include "fiemap.h"
++
++/* So that the fiemap access checks can't overflow on 32 bit machines. */
++#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent))
++
++/**
++ * fiemap_fill_next_extent - Fiemap helper function
++ * @fieinfo: Fiemap context passed into ->fiemap
++ * @logical: Extent logical start offset, in bytes
++ * @phys: Extent physical start offset, in bytes
++ * @len: Extent length, in bytes
++ * @flags: FIEMAP_EXTENT flags that describe this extent
++ * @lun: LUN on which this extent resides
++ *
++ * Called from file system ->fiemap callback. Will populate extent
++ * info as passed in via arguments and copy to user memory. On
++ * success, extent count on fieinfo is incremented.
++ *
++ * Returns 0 on success, -errno on error, 1 if this was the last
++ * extent that will fit in user array.
++ */
++#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC)
++#define SET_NO_DIRECT_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED \
++ |FIEMAP_EXTENT_NET)
++#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED)
++#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
++int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
++ u64 phys, u64 len, u32 flags, dev_t dev)
++{
++ struct fiemap_extent extent = { 0 };
++ struct fiemap_extent *dest = fieinfo->fi_extents_start;
++
++ /* only count the extents */
++ if (fieinfo->fi_extents_max == 0) {
++ fieinfo->fi_extents_mapped++;
++ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
++ }
++
++ if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
++ return 1;
++
++ if (flags & SET_UNKNOWN_FLAGS)
++ flags |= FIEMAP_EXTENT_UNKNOWN;
++ if (flags & SET_NO_DIRECT_FLAGS)
++ flags |= FIEMAP_EXTENT_NO_DIRECT;
++ if (flags & SET_NOT_ALIGNED_FLAGS)
++ flags |= FIEMAP_EXTENT_NOT_ALIGNED;
++ if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
++ flags |= FIEMAP_EXTENT_ENCODED;
++
++ extent.fe_logical = logical;
++ extent.fe_physical = phys;
++ extent.fe_length = len;
++ extent.fe_flags = flags;
++ extent.fe_device = new_encode_dev(dev);
++
++ dest += fieinfo->fi_extents_mapped;
++ if (copy_to_user(dest, &extent, sizeof(extent)))
++ return -EFAULT;
++
++ fieinfo->fi_extents_mapped++;
++ if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max)
++ return 1;
++
++ return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0;
++}
++
++static int fiemap_check_ranges(struct super_block *sb,
++ u64 start, u64 len, u64 *new_len)
++{
++ *new_len = len;
++
++ if (len == 0)
++ return -EINVAL;
++
++ if (start > sb->s_maxbytes)
++ return -EFBIG;
++
++ /*
++ * Shrink request scope to what the fs can actually handle.
++ */
++ if ((len > sb->s_maxbytes) ||
++ (sb->s_maxbytes - len) < start)
++ *new_len = sb->s_maxbytes - start;
++
++ return 0;
++}
++
++/*
++ * fiemap_check_flags - check validity of requested flags for fiemap
++ * @fieinfo: Fiemap context passed into ->fiemap
++ * @fs_flags: Set of fiemap flags that the file system understands
++ *
++ * Called from file system ->fiemap callback. This will compute the
++ * intersection of valid fiemap flags and those that the fs supports. That
++ * value is then compared against the user supplied flags. In case of bad user
++ * flags, the invalid values will be written into the fieinfo structure, and
++ * -EBADR is returned, which tells ioctl_fiemap() to return those values to
++ * userspace. For this reason, a return code of -EBADR should be preserved.
++ *
++ * Returns 0 on success, -EBADR on bad flags.
++ */
++int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags)
++{
++ u32 incompat_flags;
++
++ incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags);
++ if (incompat_flags) {
++ fieinfo->fi_flags = incompat_flags;
++ return -EBADR;
++ }
++
++ return 0;
++}
++
++int ioctl_fiemap(struct inode *inode, struct file *filp, unsigned long arg)
++{
++ struct fiemap fiemap;
++ u64 len;
++ struct fiemap_extent_info fieinfo = {0, };
++ struct super_block *sb = inode->i_sb;
++ int error = 0;
++
++ if (copy_from_user(&fiemap, (struct fiemap __user *) arg,
++ sizeof(struct fiemap)))
++ return -EFAULT;
++
++ if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
++ return -EINVAL;
++
++ error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length,
++ &len);
++ if (error)
++ return error;
++
++ fieinfo.fi_flags = fiemap.fm_flags;
++ fieinfo.fi_extents_max = fiemap.fm_extent_count;
++ fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap));
++
++ if (fiemap.fm_extent_count != 0 &&
++ !access_ok(VERIFY_WRITE, (void *)arg,
++ offsetof(typeof(fiemap), fm_extents[fiemap.fm_extent_count])))
++ return -EFAULT;
++
++ if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC)
++ filemap_write_and_wait(inode->i_mapping);
++
++ error = ext4_fiemap(inode, &fieinfo, fiemap.fm_start, len);
++ fiemap.fm_flags = fieinfo.fi_flags;
++ fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
++ if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap)))
++ error = -EFAULT;
++
++ return error;
++}
++
+ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ struct inode *inode = filp->f_dentry->d_inode;
+@@ -257,6 +413,10 @@ flags_err:
+ case EXT4_IOC_MIGRATE:
+ return ext4_ext_migrate(inode, filp, cmd, arg);
+
++ case EXT4_IOC_FIEMAP: {
++ return ioctl_fiemap(inode, filp, arg);
++ }
++
+ default:
+ return -ENOTTY;
+ }
+Index: linux-2.6.18.i386/fs/ext4/ext4.h
+===================================================================
+--- linux-2.6.18.i386.orig/fs/ext4/ext4.h
++++ linux-2.6.18.i386/fs/ext4/ext4.h
+@@ -300,6 +300,7 @@ struct ext4_new_group_data {
+ #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
+ #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
+ #define EXT4_IOC_MIGRATE _IO('f', 7)
++#define EXT4_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
+
+ /*
+ * ioctl commands in 32 bit emulation
+@@ -317,6 +318,8 @@ struct ext4_new_group_data {
+ #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
+ #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
+
++/* FIEMAP flags supported by ext4 */
++#define EXT4_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC)
+
+ /*
+ * Mount options
+@@ -1115,6 +1118,9 @@ extern int ext4_page_mkwrite(struct vm_a
+ /* ioctl.c */
+ extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
+ extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
++struct fiemap_extent_info;
++extern int ext4_fiemap(struct inode *, struct fiemap_extent_info *, __u64,
++ __u64);
+
+ /* migrate.c */
+ extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
+Index: linux-2.6.18.i386/fs/ext4/ext4_extents.h
+===================================================================
+--- linux-2.6.18.i386.orig/fs/ext4/ext4_extents.h
++++ linux-2.6.18.i386/fs/ext4/ext4_extents.h
+@@ -128,6 +128,22 @@ struct ext4_ext_path {
+ #define EXT_MAX_BLOCK 0xffffffff
+
+ /*
++ * to be called by ext4_ext_walk_space()
++ * negative retcode - error
++ * positive retcode - signal for ext4_ext_walk_space(), see below
++ * callback must return valid extent (passed or newly created)
++ */
++typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
++ struct ext4_ext_cache *,
++ struct ext4_extent *, void *);
++
++#define HAVE_EXT_PREPARE_CB_EXTENT
++
++#define EXT_CONTINUE 0
++#define EXT_BREAK 1
++#define EXT_REPEAT 2
++
++/*
+ * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
+ * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
+ * MSB of ee_len field in the extent datastructure to signify if this
+@@ -223,6 +239,8 @@ extern int ext4_ext_try_to_merge(struct
+ struct ext4_extent *);
+ extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
+ extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
++extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
++ ext_prepare_callback, void *);
+ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
+ struct ext4_ext_path *);
+ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
+Index: linux-2.6.18.i386/fs/ext4/extents.c
+===================================================================
+--- linux-2.6.18.i386.orig/fs/ext4/extents.c
++++ linux-2.6.18.i386/fs/ext4/extents.c
+@@ -44,7 +44,7 @@
+ #include <asm/uaccess.h>
+ #include "ext4_jbd2.h"
+ #include "ext4_extents.h"
+-
++#include "fiemap.h"
+
+ /*
+ * ext_pblock:
+@@ -1597,6 +1597,113 @@ cleanup:
+ return err;
+ }
+
++int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
++ ext4_lblk_t num, ext_prepare_callback func,
++ void *cbdata)
++{
++ struct ext4_ext_path *path = NULL;
++ struct ext4_ext_cache cbex;
++ struct ext4_extent *ex;
++ ext4_lblk_t next, start = 0, end = 0;
++ ext4_lblk_t last = block + num;
++ int depth, exists, err = 0;
++
++ BUG_ON(func == NULL);
++ BUG_ON(inode == NULL);
++
++ while (block < last && block != EXT_MAX_BLOCK) {
++ num = last - block;
++ /* find extent for this block */
++ path = ext4_ext_find_extent(inode, block, path);
++ if (IS_ERR(path)) {
++ err = PTR_ERR(path);
++ path = NULL;
++ break;
++ }
++
++ depth = ext_depth(inode);
++ BUG_ON(path[depth].p_hdr == NULL);
++ ex = path[depth].p_ext;
++ next = ext4_ext_next_allocated_block(path);
++
++ exists = 0;
++ if (!ex) {
++ /* there is no extent yet, so try to allocate
++ * all requested space */
++ start = block;
++ end = block + num;
++ } else if (le32_to_cpu(ex->ee_block) > block) {
++ /* need to allocate space before found extent */
++ start = block;
++ end = le32_to_cpu(ex->ee_block);
++ if (block + num < end)
++ end = block + num;
++ } else if (block >= le32_to_cpu(ex->ee_block)
++ + ext4_ext_get_actual_len(ex)) {
++ /* need to allocate space after found extent */
++ start = block;
++ end = block + num;
++ if (end >= next)
++ end = next;
++ } else if (block >= le32_to_cpu(ex->ee_block)) {
++ /*
++ * some part of requested space is covered
++ * by found extent
++ */
++ start = block;
++ end = le32_to_cpu(ex->ee_block)
++ + ext4_ext_get_actual_len(ex);
++ if (block + num < end)
++ end = block + num;
++ exists = 1;
++ } else {
++ BUG();
++ }
++ BUG_ON(end <= start);
++
++ if (!exists) {
++ cbex.ec_block = start;
++ cbex.ec_len = end - start;
++ cbex.ec_start = 0;
++ cbex.ec_type = EXT4_EXT_CACHE_GAP;
++ } else {
++ cbex.ec_block = le32_to_cpu(ex->ee_block);
++ cbex.ec_len = ext4_ext_get_actual_len(ex);
++ cbex.ec_start = ext_pblock(ex);
++ cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
++ }
++
++ BUG_ON(cbex.ec_len == 0);
++ err = func(inode, path, &cbex, ex, cbdata);
++ ext4_ext_drop_refs(path);
++
++ if (err < 0)
++ break;
++
++ if (err == EXT_REPEAT)
++ continue;
++ else if (err == EXT_BREAK) {
++ err = 0;
++ break;
++ }
++
++ if (ext_depth(inode) != depth) {
++ /* depth was changed. we have to realloc path */
++ kfree(path);
++ path = NULL;
++ }
++
++ block = cbex.ec_block + cbex.ec_len;
++ }
++
++ if (path) {
++ ext4_ext_drop_refs(path);
++ kfree(path);
++ }
++
++ return err;
++}
++
+ static void
+ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
+ __u32 len, ext4_fsblk_t start, int type)
+@@ -2953,3 +3060,100 @@ retry:
+ return ret > 0 ? ret2 : ret;
+ }
+ #endif
++
++/*
++ * Callback function called for each extent to gather FIEMAP information.
++ */
++int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
++ struct ext4_ext_cache *newex, struct ext4_extent *ex,
++ void *data)
++{
++ struct fiemap_extent_info *fieinfo = data;
++ unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
++ __u64 logical;
++ __u64 physical;
++ __u64 length;
++ __u32 flags = 0;
++ int error;
++
++ logical = (__u64)newex->ec_block << blksize_bits;
++
++ if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
++ pgoff_t offset;
++ struct page *page;
++ struct buffer_head *bh = NULL;
++
++ offset = logical >> PAGE_SHIFT;
++ page = find_get_page(inode->i_mapping, offset);
++ if (!page || !page_has_buffers(page))
++ return EXT_CONTINUE;
++
++ bh = page_buffers(page);
++
++ if (!bh)
++ return EXT_CONTINUE;
++
++ if (buffer_delay(bh)) {
++ flags |= FIEMAP_EXTENT_DELALLOC;
++ page_cache_release(page);
++ } else {
++ page_cache_release(page);
++ return EXT_CONTINUE;
++ }
++ }
++
++ physical = (__u64)newex->ec_start << blksize_bits;
++ length = (__u64)newex->ec_len << blksize_bits;
++
++ if (ex && ext4_ext_is_uninitialized(ex))
++ flags |= FIEMAP_EXTENT_UNWRITTEN;
++
++ /*
++ * If this extent reaches EXT_MAX_BLOCK, it must be last.
++ *
++ * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
++ * this indicates no more allocated blocks.
++ *
++ * XXX this might miss a single-block extent at EXT_MAX_BLOCK
++ */
++ if (logical + length - 1 == EXT_MAX_BLOCK ||
++ ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
++ flags |= FIEMAP_EXTENT_LAST;
++
++ error = fiemap_fill_next_extent(fieinfo, logical, physical,
++ length, flags, inode->i_sb->s_dev);
++ if (error < 0)
++ return error;
++ if (error == 1)
++ return EXT_BREAK;
++
++ return EXT_CONTINUE;
++}
++
++int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
++ __u64 start, __u64 len)
++{
++ ext4_fsblk_t start_blk;
++ ext4_fsblk_t len_blks;
++ int error = 0;
++
++ if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
++ return -EOPNOTSUPP;
++
++ if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS_COMPAT))
++ return -EBADR;
++
++ start_blk = start >> inode->i_sb->s_blocksize_bits;
++ len_blks = (len + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits;
++
++ /*
++ * Walk the extent tree gathering extent information.
++ * ext4_ext_fiemap_cb will push extents back to user.
++ */
++ down_write(&EXT4_I(inode)->i_data_sem);
++ error = ext4_ext_walk_space(inode, start_blk, len_blks,
++ ext4_ext_fiemap_cb, fieinfo);
++ up_write(&EXT4_I(inode)->i_data_sem);
++
++ return error;
++}
+Index: linux-2.6.18.i386/fs/ext4/fiemap.h
+===================================================================
+--- /dev/null
++++ linux-2.6.18.i386/fs/ext4/fiemap.h
+@@ -0,0 +1,85 @@
++/*
++ * FIEMAP ioctl infrastructure.
++ *
++ * Copyright 2008 Sun Microsystems, Inc
++ *
++ * Author: Kalpak Shah <kalpak.shah@sun.com>
++ * Andreas Dilger <adilger@sun.com>
++ */
++
++#ifndef _LINUX_EXT4_FIEMAP_H
++#define _LINUX_EXT4_FIEMAP_H
++
++struct fiemap_extent {
++ __u64 fe_logical; /* logical offset in bytes for the start of
++ * the extent from the beginning of the file */
++ __u64 fe_physical; /* physical offset in bytes for the start
++ * of the extent from the beginning of the disk */
++ __u64 fe_length; /* length in bytes for this extent */
++ __u64 fe_reserved64[2];
++ __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
++ __u32 fe_device; /* device number for this extent */
++ __u32 fe_reserved[2];
++};
++
++struct fiemap {
++ __u64 fm_start; /* logical offset (inclusive) at
++ * which to start mapping (in) */
++ __u64 fm_length; /* logical length of mapping which
++ * userspace wants (in) */
++ __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
++ __u32 fm_mapped_extents;/* number of extents that were mapped (out) */
++ __u32 fm_extent_count; /* size of fm_extents array (in) */
++ __u32 fm_reserved;
++ struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
++};
++
++/*
++ * FIEMAP helper definition.
++ */
++struct fiemap_extent_info {
++ unsigned int fi_flags; /* Flags as passed from user */
++ unsigned int fi_extents_mapped; /* Number of mapped extents */
++ unsigned int fi_extents_max; /* Size of fiemap_extent array*/
++ struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */
++};
++
++int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
++int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
++ u64 phys, u64 len, u32 flags, u32 lun);
++
++#define FIEMAP_MAX_OFFSET (~0ULL)
++
++#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
++#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
++
++/* ldiskfs only supports FLAG_SYNC flag currently */
++#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
++
++#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */
++#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */
++#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending.
++ * Sets EXTENT_UNKNOWN. */
++#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read
++ * while fs is unmounted */
++#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs.
++ * Sets EXTENT_NO_DIRECT. */
++#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be
++ * block aligned. */
++#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata.
++ * Sets EXTENT_NOT_ALIGNED.*/
++#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block.
++ * Sets EXTENT_NOT_ALIGNED.*/
++#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but
++ * no data (i.e. zero). */
++#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively
++ * support extents. Result
++ * merged for efficiency. */
++
++/* Lustre specific flags - use a high bit, don't conflict with upstream flag */
++#define FIEMAP_EXTENT_NO_DIRECT 0x40000000 /* Data mapping undefined */
++#define FIEMAP_EXTENT_NET 0x80000000 /* Data stored remotely.
++ * Sets NO_DIRECT flag */
++
++#endif /* _LINUX_EXT4_FIEMAP_H */
++