--- fs/ext4/dir.c | 26 +++++++++--- fs/ext4/ext4.h | 70 ++++++++++++++++++++++++++++++++- fs/ext4/namei.c | 117 ++++++++++++++++++++++++++++++++++++++++---------------- 3 files changed, 170 insertions(+), 43 deletions(-) --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -53,11 +53,18 @@ const struct file_operations ext4_dir_op static unsigned char get_dtype(struct super_block *sb, int filetype) { + int fl_index = filetype & EXT4_FT_MASK; + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || - (filetype >= EXT4_FT_MAX)) + (fl_index >= EXT4_FT_MAX)) return DT_UNKNOWN; - return (ext4_filetype_table[filetype]); + if (!test_opt(sb, DIRDATA)) + return (ext4_filetype_table[fl_index]); + + return (ext4_filetype_table[fl_index]) | + (filetype & EXT4_DIRENT_LUFID); + } /* @@ -75,11 +82,11 @@ int __ext4_check_dir_entry(const char *f const int rlen = ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize); - if (unlikely(rlen < EXT4_DIR_REC_LEN(1))) + if (unlikely(rlen < __EXT4_DIR_REC_LEN(1))) error_msg = "rec_len is smaller than minimal"; else if (unlikely(rlen % 4 != 0)) error_msg = "rec_len % 4 != 0"; - else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len))) + else if (unlikely(rlen < EXT4_DIR_REC_LEN(de))) error_msg = "rec_len is too small for name_len"; else if (unlikely(((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)) @@ -196,7 +203,7 @@ revalidate: * failure will be detected in the * dirent test below. */ if (ext4_rec_len_from_disk(de->rec_len, - sb->s_blocksize) < EXT4_DIR_REC_LEN(1)) + sb->s_blocksize) < __EXT4_DIR_REC_LEN(1)) break; i += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); @@ -359,12 +366,17 @@ int ext4_htree_store_dirent(struct file struct fname *fname, *new_fn; struct dir_private_info *info; int len; + int extra_data = 1; info = dir_file->private_data; p = &info->root.rb_node; /* Create and allocate the fname structure */ - len = sizeof(struct fname) + dirent->name_len + 1; + if (dirent->file_type & EXT4_DIRENT_LUFID) + extra_data = ext4_get_dirent_data_len(dirent); + + len = sizeof(struct fname) + dirent->name_len + extra_data; + new_fn = kzalloc(len, GFP_KERNEL); if (!new_fn) return -ENOMEM; @@ -373,7 +385,7 @@ int ext4_htree_store_dirent(struct file new_fn->inode = le32_to_cpu(dirent->inode); new_fn->name_len = dirent->name_len; new_fn->file_type = dirent->file_type; - memcpy(new_fn->name, dirent->name, dirent->name_len); + memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data); new_fn->name[dirent->name_len] = 0; while (*p) { --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -902,6 +902,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ +#define EXT4_MOUNT_DIRDATA 0x00200 /* Data in directory entries */ #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ @@ -1414,7 +1414,9 @@ static inline void ext4_clear_state_flag EXT4_FEATURE_INCOMPAT_EXTENTS| \ EXT4_FEATURE_INCOMPAT_64BIT| \ EXT4_FEATURE_INCOMPAT_FLEX_BG| \ - EXT4_FEATURE_INCOMPAT_MMP) + EXT4_FEATURE_INCOMPAT_MMP| \ + EXT4_FEATURE_INCOMPAT_DIRDATA) + #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ @@ -1501,6 +1503,43 @@ struct ext4_dir_entry_2 { #define EXT4_FT_SYMLINK 7 #define EXT4_FT_MAX 8 +#define EXT4_FT_MASK 0xf + +#if EXT4_FT_MAX > EXT4_FT_MASK +#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK" +#endif + +/* + * d_type has 4 unused bits, so it can hold four types data. these different + * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be + * stored, in flag order, after file-name in ext4 dirent. +*/ +/* + * this flag is added to d_type if ext4 dirent has extra data after + * filename. this data length is variable and length is stored in first byte + * of data. data start after filename NUL byte. + * This is used by Lustre FS. + */ +#define EXT4_DIRENT_LUFID 0x10 + +#define EXT4_LUFID_MAGIC 0xAD200907UL +struct ext4_dentry_param { + __u32 edp_magic; /* EXT4_LUFID_MAGIC */ + char edp_len; /* size of edp_data in bytes */ + char edp_data[0]; /* packed array of data */ +} __attribute__((packed)); + +static inline unsigned char *ext4_dentry_get_data(struct super_block *sb, + struct ext4_dentry_param* p) + +{ + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_DIRDATA)) + return NULL; + if (p && p->edp_magic == EXT4_LUFID_MAGIC) + return &p->edp_len; + else + return NULL; +} /* * EXT4_DIR_PAD defines the directory entries boundaries @@ -1509,8 +1548,11 @@ struct ext4_dir_entry_2 { */ #define EXT4_DIR_PAD 4 #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) -#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ +#define __EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ ~EXT4_DIR_ROUND) +#define EXT4_DIR_REC_LEN(de) (__EXT4_DIR_REC_LEN(de->name_len +\ + ext4_get_dirent_data_len(de))) + #define EXT4_MAX_REC_LEN ((1<<16)-1) /* @@ -1908,7 +1950,7 @@ extern struct buffer_head * ext4_find_en struct ext4_dir_entry_2 ** res_dir); #define ll_ext4_find_entry(inode, dentry, res_dir) ext4_find_entry(inode, &(dentry)->d_name, res_dir) extern int ext4_add_dot_dotdot(handle_t *handle, struct inode *dir, - struct inode *inode); + struct inode *inode, const void *, const void *); extern struct buffer_head *ext4_append(handle_t *handle, struct inode *inode, ext4_lblk_t *block, int *err); @@ -2308,6 +2350,28 @@ static inline void set_bitmap_uptodate(s extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; +/* + * Compute the total directory entry data length. + * This includes the filename and an implicit NUL terminator (always present), + * and optional extensions. Each extension has a bit set in the high 4 bits of + * de->file_type, and the extension length is the first byte in each entry. + */ +static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de) +{ + char *len = de->name + de->name_len + 1 /* NUL terminator */; + int dlen = 0; + __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4; + + while (extra_data_flags) { + if (extra_data_flags & 1) { + dlen += *len + (dlen == 0); + len += *len; + } + extra_data_flags >>= 1; + } + return dlen; +} + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -170,7 +170,8 @@ static unsigned dx_get_count(struct dx_e static unsigned dx_get_limit(struct dx_entry *entries); static void dx_set_count(struct dx_entry *entries, unsigned value); static void dx_set_limit(struct dx_entry *entries, unsigned value); -static unsigned dx_root_limit(struct inode *dir, unsigned infosize); +static inline unsigned dx_root_limit(__u32 blocksize, + struct ext4_dir_entry_2 *dot_de, unsigned infosize); static unsigned dx_node_limit(struct inode *dir); static struct dx_frame *dx_probe(const struct qstr *d_name, struct inode *dir, @@ -213,11 +214,12 @@ ext4_next_entry(struct ext4_dir_entry_2 */ struct dx_root_info * dx_get_dx_info(struct ext4_dir_entry_2 *de) { - /* get dotdot first */ - de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(1)); + BUG_ON(de->name_len != 1); + /* get dotdot first */ + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de)); - /* dx root info is after dotdot entry */ - de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(2)); + /* dx root info is after dotdot entry */ + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_REC_LEN(de)); return (struct dx_root_info *) de; } @@ -262,16 +264,23 @@ static inline void dx_set_limit(struct d ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); } -static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) +static inline unsigned dx_root_limit(__u32 blocksize, + struct ext4_dir_entry_2 *dot_de, unsigned infosize) { - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - - EXT4_DIR_REC_LEN(2) - infosize; + struct ext4_dir_entry_2 *dotdot_de; + unsigned entry_space; + + BUG_ON(dot_de->name_len != 1); + dotdot_de = ext4_next_entry(dot_de, blocksize); + entry_space = blocksize - EXT4_DIR_REC_LEN(dot_de) - + EXT4_DIR_REC_LEN(dotdot_de) - infosize; + return entry_space / sizeof(struct dx_entry); } static inline unsigned dx_node_limit(struct inode *dir) { - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); + unsigned entry_space = dir->i_sb->s_blocksize - __EXT4_DIR_REC_LEN(0); return entry_space / sizeof(struct dx_entry); } @@ -318,7 +327,7 @@ static struct stats dx_show_leaf(struct printk(":%x.%u ", h.hash, ((char *) de - base)); } - space += EXT4_DIR_REC_LEN(de->name_len); + space += EXT4_DIR_REC_LEN(de); names++; } de = ext4_next_entry(de, size); @@ -420,7 +429,8 @@ dx_probe(const struct qstr *d_name, stru entries = (struct dx_entry *) (((char *)info) + info->info_length); - if (dx_get_limit(entries) != dx_root_limit(dir, + if (dx_get_limit(entries) != dx_root_limit(dir->i_sb->s_blocksize, + (struct ext4_dir_entry_2*)bh->b_data, info->info_length)) { ext4_warning(dir->i_sb, "dx entry: limit != root limit"); brelse(bh); @@ -609,7 +619,7 @@ static int htree_dirblock_to_tree(struct de = (struct ext4_dir_entry_2 *) bh->b_data; top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - - EXT4_DIR_REC_LEN(0)); + __EXT4_DIR_REC_LEN(0)); for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { if (ext4_check_dir_entry(dir, NULL, de, bh, (block<i_sb)) @@ -1172,7 +1182,7 @@ dx_move_dirents(char *from, char *to, st while (count--) { struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + (map->offs<<2)); - rec_len = EXT4_DIR_REC_LEN(de->name_len); + rec_len = EXT4_DIR_REC_LEN(de); memcpy (to, de, rec_len); ((struct ext4_dir_entry_2 *) to)->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); @@ -1196,7 +1206,7 @@ static struct ext4_dir_entry_2* dx_pack_ while ((char*)de < base + blocksize) { next = ext4_next_entry(de, blocksize); if (de->inode && de->name_len) { - rec_len = EXT4_DIR_REC_LEN(de->name_len); + rec_len = EXT4_DIR_REC_LEN(de); if (de > to) memmove(to, de, rec_len); to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); @@ -1326,10 +1336,16 @@ static int add_dirent_to_buf(handle_t *h unsigned int offset = 0; unsigned int blocksize = dir->i_sb->s_blocksize; unsigned short reclen; - int nlen, rlen, err; + int nlen, rlen, err, dlen = 0; + unsigned char *data; char *top; - reclen = EXT4_DIR_REC_LEN(namelen); + data = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *) + dentry->d_fsdata); + if (data) + dlen = (*data) + 1; + + reclen = __EXT4_DIR_REC_LEN(namelen + dlen); if (!de) { de = (struct ext4_dir_entry_2 *)bh->b_data; top = bh->b_data + blocksize - reclen; @@ -1338,7 +1354,7 @@ static int add_dirent_to_buf(handle_t *h return -EIO; if (ext4_match(namelen, name, de)) return -EEXIST; - nlen = EXT4_DIR_REC_LEN(de->name_len); + nlen = EXT4_DIR_REC_LEN(de); rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); if ((de->inode? rlen - nlen: rlen) >= reclen) break; @@ -1356,7 +1372,7 @@ static int add_dirent_to_buf(handle_t *h } /* By now the buffer is marked for journaling */ - nlen = EXT4_DIR_REC_LEN(de->name_len); + nlen = EXT4_DIR_REC_LEN(de); rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); if (de->inode) { struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); @@ -1372,6 +1388,12 @@ static int add_dirent_to_buf(handle_t *h de->inode = 0; de->name_len = namelen; memcpy(de->name, name, namelen); + if (data) { + de->name[namelen] = 0; + memcpy(&de->name[namelen + 1], data, *(char *) data); + de->file_type |= EXT4_DIRENT_LUFID; + } + /* * XXX shouldn't update any times until successful * completion of syscall, but too many callers depend @@ -1468,7 +1490,8 @@ static int make_indexed_dir(handle_t *ha dx_set_block(entries, 1); dx_set_count(entries, 1); - dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info))); + dx_set_limit(entries, dx_root_limit(dir->i_sb->s_blocksize, + dot_de, sizeof(*dx_info))); /* Initialize as for dx_probe */ hinfo.hash_version = dx_info->hash_version; @@ -1511,6 +1534,8 @@ static int ext4_update_dotdot(handle_t * struct buffer_head * dir_block; struct ext4_dir_entry_2 * de; int len, journal = 0, err = 0; + int dlen = 0; + char *data; if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1526,19 +1551,24 @@ static int ext4_update_dotdot(handle_t * /* the first item must be "." */ assert(de->name_len == 1 && de->name[0] == '.'); len = le16_to_cpu(de->rec_len); - assert(len >= EXT4_DIR_REC_LEN(1)); - if (len > EXT4_DIR_REC_LEN(1)) { + assert(len >= __EXT4_DIR_REC_LEN(1)); + if (len > __EXT4_DIR_REC_LEN(1)) { BUFFER_TRACE(dir_block, "get_write_access"); err = ext4_journal_get_write_access(handle, dir_block); if (err) goto out_journal; journal = 1; - de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(1)); + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de)); } - len -= EXT4_DIR_REC_LEN(1); - assert(len == 0 || len >= EXT4_DIR_REC_LEN(2)); + len -= EXT4_DIR_REC_LEN(de); + data = ext4_dentry_get_data(dir->i_sb, + (struct ext4_dentry_param *) dentry->d_fsdata); + if (data) + dlen = *data + 1; + assert(len == 0 || len >= __EXT4_DIR_REC_LEN(2 + dlen)); + de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); if (!journal) { @@ -1552,10 +1582,15 @@ static int ext4_update_dotdot(handle_t * if (len > 0) de->rec_len = cpu_to_le16(len); else - assert(le16_to_cpu(de->rec_len) >= EXT4_DIR_REC_LEN(2)); + assert(le16_to_cpu(de->rec_len) >= __EXT4_DIR_REC_LEN(2)); de->name_len = 2; strcpy (de->name, ".."); - ext4_set_de_type(dir->i_sb, de, S_IFDIR); + if (data != NULL && ext4_get_dirent_data_len(de) >= dlen) { + de->name[2] = 0; + memcpy(&de->name[2 + 1], data, *data); + ext4_set_de_type(dir->i_sb, de, S_IFDIR); + de->file_type |= EXT4_DIRENT_LUFID; + } out_journal: if (journal) { @@ -1994,12 +2029,13 @@ retry: /* Initialize @inode as a subdirectory of @dir, and add the * "." and ".." entries into the first directory block. */ int ext4_add_dot_dotdot(handle_t *handle, struct inode * dir, - struct inode *inode) + struct inode *inode, + const void *data1, const void *data2) { struct buffer_head *dir_block; struct ext4_dir_entry_2 *de; unsigned int blocksize = dir->i_sb->s_blocksize; - int err = 0; + int err = 0, dot_reclen; if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2020,17 +2056,32 @@ int ext4_add_dot_dotdot(handle_t *handle de = (struct ext4_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; - de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len), - blocksize); strcpy(de->name, "."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); + /* get packed fid data */ + data1 = ext4_dentry_get_data(dir->i_sb, + (struct ext4_dentry_param *) data1); + if (data1) { + de->name[1] = 0; + memcpy(&de->name[2], data1, *(char *) data1); + de->file_type |= EXT4_DIRENT_LUFID; + } + de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de)); + dot_reclen = cpu_to_le16(de->rec_len); de = ext4_next_entry(de, blocksize); de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1), + de->rec_len = ext4_rec_len_to_disk(blocksize - dot_reclen, blocksize); de->name_len = 2; strcpy(de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); + data2 = ext4_dentry_get_data(dir->i_sb, + (struct ext4_dentry_param *) data2); + if (data2) { + de->name[2] = 0; + memcpy(&de->name[3], data2, *(char *) data2); + de->file_type |= EXT4_DIRENT_LUFID; + } inode->i_nlink = 2; BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, inode, dir_block); @@ -2070,7 +2121,7 @@ retry: if (IS_ERR(inode)) goto out_stop; - err = ext4_add_dot_dotdot(handle, dir, inode); + err = ext4_add_dot_dotdot(handle, dir, inode, NULL, NULL); if (err) goto out_clear_inode; err = ext4_add_entry(handle, dentry, inode); @@ -2108,7 +2159,7 @@ static int empty_dir(struct inode *inode int err = 0; sb = inode->i_sb; - if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || + if (inode->i_size < __EXT4_DIR_REC_LEN(1) + __EXT4_DIR_REC_LEN(2) || !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { if (err) EXT4_ERROR_INODE(inode, --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1363,7 +1363,7 @@ enum { Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, Opt_dirdata, Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, @@ -1427,6 +1427,7 @@ static const match_table_t tokens = { {Opt_iopen, "iopen"}, {Opt_noiopen, "noiopen"}, {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_dirdata, "dirdata"}, {Opt_barrier, "barrier=%u"}, {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, @@ -1840,6 +1841,9 @@ set_qf_format: case Opt_noiopen: case Opt_iopen_nopriv: break; + case Opt_dirdata: + set_opt(sb, DIRDATA); + break; case Opt_ignore: break; case Opt_resize: