Index: linux-2.6.18/fs/ext3/ioctl.c =================================================================== --- linux-2.6.18.orig/fs/ext3/ioctl.c +++ linux-2.6.18/fs/ext3/ioctl.c @@ -15,6 +15,7 @@ #include #include #include +#include "fiemap.h" int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, unsigned long arg) @@ -272,6 +272,9 @@ flags_err: return err; } + case EXT3_IOC_FIEMAP: { + return ext3_fiemap(inode, filp, cmd, arg); + } default: Index: linux-2.6.18/include/linux/ext3_fs.h =================================================================== --- linux-2.6.18.orig/include/linux/ext3_fs.h +++ linux-2.6.18/include/linux/ext3_fs.h @@ -249,7 +249,6 @@ struct ext3_new_group_data { __u32 free_blocks_count; }; - /* * ioctl commands */ @@ -257,15 +256,16 @@ struct ext3_new_group_data { #define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS #define EXT3_IOC_GETVERSION _IOR('f', 3, long) #define EXT3_IOC_SETVERSION _IOW('f', 4, long) -#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) +#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) +#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) #define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input) +#define EXT3_IOC_FIEMAP _IOWR('f', 10, struct fiemap) #define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION #define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION #ifdef CONFIG_JBD_DEBUG #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif -#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) -#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) /* * ioctl commands in 32 bit emulation @@ -1117,6 +1117,8 @@ ext3_get_blocks_wrap(handle_t *handle, s bh->b_size = (ret << inode->i_blkbits); return ret; } +extern int ext3_fiemap(struct inode *, struct file *, unsigned int, + unsigned long); #endif /* __KERNEL__ */ Index: linux-2.6.18/include/linux/ext3_extents.h =================================================================== --- linux-2.6.18.orig/include/linux/ext3_extents.h +++ linux-2.6.18/include/linux/ext3_extents.h @@ -142,8 +142,9 @@ struct ext3_ext_path { * callback must return valid extent (passed or newly created) */ typedef int (*ext_prepare_callback)(struct inode *, struct ext3_ext_path *, - struct ext3_ext_cache *, - void *); + struct ext3_ext_cache *, + struct ext3_extent *, void *); +#define HAVE_EXT_PREPARE_CB_EXTENT #define EXT_CONTINUE 0 #define EXT_BREAK 1 @@ -152,6 +152,26 @@ typedef int (*ext_prepare_callback)(stru #define EXT_MAX_BLOCK 0xffffffff +/* + * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an + * initialized extent. This is 2^15 and not (2^16 - 1), since we use the + * MSB of ee_len field in the extent datastructure to signify if this + * particular extent is an initialized extent or an uninitialized (i.e. + * preallocated). + * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an + * uninitialized extent. + * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an + * uninitialized one. In other words, if MSB of ee_len is set, it is an + * uninitialized extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an uninitialized extent of zero length and + * thus we make it as a special case of initialized extent with 0x8000 length. + * This way we get better extent-to-group alignment for initialized extents. + * Hence, the maximum number of blocks we can have in an *initialized* + * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + */ +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) + #define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ #define EXT_HDR_GEN_BITS 24 #define EXT_HDR_GEN_MASK ((1 << EXT_HDR_GEN_BITS) - 1) @@ -219,6 +239,13 @@ ext3_ext_invalidate_cache(struct inode * EXT3_I(inode)->i_cached_extent.ec_type = EXT3_EXT_CACHE_NO; } +static inline int ext3_ext_is_uninitialized(struct ext3_extent *ext) +{ + /* Extent with ee_len of 0x8000 is treated as an initialized extent */ + return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); +} + + extern int ext3_ext_search_left(struct inode *, struct ext3_ext_path *, unsigned long *, unsigned long *); extern int ext3_ext_search_right(struct inode *, struct ext3_ext_path *, unsigned long *, unsigned long *); extern int ext3_extent_tree_init(handle_t *, struct inode *); Index: linux-2.6.18/fs/ext3/extents.c =================================================================== --- linux-2.6.18.orig/fs/ext3/extents.c +++ linux-2.6.18/fs/ext3/extents.c @@ -42,7 +42,7 @@ #include #include #include - +#include "fiemap.h" static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) { @@ -1477,7 +1477,7 @@ int ext3_ext_walk_space(struct inode *in } BUG_ON(cbex.ec_len == 0); - err = func(inode, path, &cbex, cbdata); + err = func(inode, path, &cbex, ex, cbdata); ext3_ext_drop_refs(path); if (err < 0) @@ -2289,6 +2289,143 @@ int ext3_ext_writepage_trans_blocks(stru return needed; } +struct fiemap_internal { + struct fiemap *fiemap_s; + struct fiemap_extent fm_extent; + size_t tot_mapping_len; + char *cur_ext_ptr; + int current_extent; + int err; +}; + +/* + * Callback function called for each extent to gather fiemap information. + */ +int ext3_ext_fiemap_cb(struct inode *inode, struct ext3_ext_path *path, + struct ext3_ext_cache *newex, struct ext3_extent *ex, + void *data) +{ + struct fiemap_internal *fiemap_i = data; + struct fiemap *fiemap_s = fiemap_i->fiemap_s; + struct fiemap_extent *fm_extent = &fiemap_i->fm_extent; + int current_extent = fiemap_i->current_extent; + unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; + + /* + * ext3_ext_walk_space returns a hole for extents that have not been + * allocated yet. + */ + if (((u64)(newex->ec_block + newex->ec_len) << blksize_bits >= + inode->i_size) && !ext3_ext_is_uninitialized(ex) && + newex->ec_type == EXT3_EXT_CACHE_GAP) + return EXT_BREAK; + + /* + * We only need to return number of extents. + */ + if (fiemap_s->fm_flags & FIEMAP_FLAG_NUM_EXTENTS) + goto count_extents; + + if (current_extent >= fiemap_s->fm_extent_count) + return EXT_BREAK; + + memset(fm_extent, 0, sizeof(*fm_extent)); + fm_extent->fe_offset = (__u64)newex->ec_start << blksize_bits; + fm_extent->fe_length = (__u64)newex->ec_len << blksize_bits; + fiemap_i->tot_mapping_len += fm_extent->fe_length; + + if (newex->ec_type == EXT3_EXT_CACHE_GAP) + fm_extent->fe_flags |= FIEMAP_EXTENT_HOLE; + + if (ext3_ext_is_uninitialized(ex)) + fm_extent->fe_flags |= (FIEMAP_EXTENT_DELALLOC | + FIEMAP_EXTENT_UNMAPPED); + + /* + * Mark this fiemap_extent as FIEMAP_EXTENT_EOF if it's past the end + * of file. + */ + if ((u64)(newex->ec_block + newex->ec_len) << blksize_bits >= + inode->i_size) + fm_extent->fe_flags |= FIEMAP_EXTENT_EOF; + + if (!copy_to_user(fiemap_i->cur_ext_ptr, fm_extent, + sizeof(struct fiemap_extent))) { + fiemap_i->cur_ext_ptr += sizeof(struct fiemap_extent); + } else { + fiemap_i->err = -EFAULT; + return EXT_BREAK; + } + +count_extents: + fiemap_i->current_extent++; + + /* + * Stop if we are beyond requested mapping size but return complete last + * extent. + */ + if ((u64)(newex->ec_block + newex->ec_len) << blksize_bits >= + fiemap_s->fm_length) + return EXT_BREAK; + + return EXT_CONTINUE; +} + +int ext3_fiemap(struct inode *inode, struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct fiemap *fiemap_s; + struct fiemap_internal fiemap_i; + struct fiemap_extent *last_extent; + ext3_fsblk_t start_blk; + int err = 0; + + if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) + return -EOPNOTSUPP; + + fiemap_s = kmalloc(sizeof(*fiemap_s), GFP_KERNEL); + if (fiemap_s == NULL) + return -ENOMEM; + if (copy_from_user(fiemap_s, (struct fiemap __user *)arg, + sizeof(*fiemap_s))) + return -EFAULT; + + if (fiemap_s->fm_flags & FIEMAP_FLAG_INCOMPAT) + return -EOPNOTSUPP; + + if (fiemap_s->fm_flags & FIEMAP_FLAG_SYNC) + ext3_sync_file(filp, filp->f_dentry, 1); + + start_blk = (fiemap_s->fm_start + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits; + fiemap_i.fiemap_s = fiemap_s; + fiemap_i.tot_mapping_len = 0; + fiemap_i.cur_ext_ptr = (char *)(arg + sizeof(*fiemap_s)); + fiemap_i.current_extent = 0; + fiemap_i.err = 0; + + /* + * Walk the extent tree gathering extent information + */ + mutex_lock(&EXT3_I(inode)->truncate_mutex); + err = ext3_ext_walk_space(inode, start_blk , EXT_MAX_BLOCK - start_blk, + ext3_ext_fiemap_cb, &fiemap_i); + mutex_unlock(&EXT3_I(inode)->truncate_mutex); + if (err) + return err; + + fiemap_s->fm_extent_count = fiemap_i.current_extent; + fiemap_s->fm_length = fiemap_i.tot_mapping_len; + if (fiemap_i.current_extent != 0 && + !(fiemap_s->fm_flags & FIEMAP_FLAG_NUM_EXTENTS)) { + last_extent = &fiemap_i.fm_extent; + last_extent->fe_flags |= FIEMAP_EXTENT_LAST; + } + err = copy_to_user((void *)arg, fiemap_s, sizeof(*fiemap_s)); + + return err; +} + EXPORT_SYMBOL(ext3_mark_inode_dirty); EXPORT_SYMBOL(ext3_ext_invalidate_cache); EXPORT_SYMBOL(ext3_ext_insert_extent); Index: linux-2.6.18/fs/ext3/fiemap.h =================================================================== --- /dev/null +++ linux-2.6.18/fs/ext3/fiemap.h @@ -0,0 +1,49 @@ +/* + * linux/fs/ext3/fiemap.h + * + * Copyright (C) 2007 Cluster File Systems, Inc + * + * Author: Kalpak Shah + */ + +#ifndef _LINUX_EXT3_FIEMAP_H +#define _LINUX_EXT3_FIEMAP_H + +struct fiemap_extent { + __u64 fe_offset; /* offset in bytes for the start of the extent */ + __u64 fe_length; /* length in bytes for the extent */ + __u32 fe_flags; /* returned FIEMAP_EXTENT_* flags for the extent */ + __u32 fe_lun; /* logical device number for extent (starting at 0)*/ +}; + +/* + * fiemap is not ext3-specific and should be moved into fs.h eventually. + */ + +struct fiemap { + __u64 fm_start; /* logical starting byte offset (in/out) */ + __u64 fm_length; /* logical length of map (in/out) */ + __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ + __u32 fm_extent_count; /* number of extents in fm_extents (in/out) */ + __u64 fm_unused; + struct fiemap_extent fm_extents[0]; +}; + +#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ +#define FIEMAP_FLAG_HSM_READ 0x00000002 /* get data from HSM before map */ +#define FIEMAP_FLAG_NUM_EXTENTS 0x00000004 /* return only number of extents */ +#define FIEMAP_FLAG_INCOMPAT 0xff000000 /* error for unknown flags in here */ + +#define FIEMAP_EXTENT_HOLE 0x00000001 /* has no data or space allocation */ +#define FIEMAP_EXTENT_UNWRITTEN 0x00000002 /* space allocated, but no data */ +#define FIEMAP_EXTENT_UNMAPPED 0x00000004 /* has data but no space allocation*/ +#define FIEMAP_EXTENT_ERROR 0x00000008 /* mapping error, errno in fe_start*/ +#define FIEMAP_EXTENT_NO_DIRECT 0x00000010 /* cannot access data directly */ +#define FIEMAP_EXTENT_LAST 0x00000020 /* last extent in the file */ +#define FIEMAP_EXTENT_DELALLOC 0x00000040 /* has data but not yet written, + * must have EXTENT_UNKNOWN set */ +#define FIEMAP_EXTENT_SECONDARY 0x00000080 /* data (also) in secondary storage, + * not in primary if EXTENT_UNKNOWN*/ +#define FIEMAP_EXTENT_EOF 0x00000100 /* if fm_start+fm_len is beyond EOF*/ + +#endif /* _LINUX_EXT3_FIEMAP_H */