this patch implements feature which allows ext3 fs users (e.g. Lustre) to store data in ext3 dirent. data is stored in ext3 dirent after file-name, this space is accounted in de->rec_len. flag EXT3_DIRENT_LUFID added to d_type if extra data is present. make use of dentry->d_fsdata to pass fid to ext3. so no changes in ext3_add_entry() interface required. Index: b/fs/ext3/namei.c =================================================================== --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -174,7 +174,8 @@ static unsigned dx_get_count (struct dx_ static unsigned dx_get_limit (struct dx_entry *entries); static void dx_set_count (struct dx_entry *entries, unsigned value); static void dx_set_limit (struct dx_entry *entries, unsigned value); -static unsigned dx_root_limit (struct inode *dir, unsigned infosize); +static unsigned dx_root_limit (__u32 blocksize, struct ext3_dir_entry_2*, + unsigned infosize); static unsigned dx_node_limit (struct inode *dir); static struct dx_frame *dx_probe(struct dentry *dentry, struct inode *dir, @@ -204,11 +205,13 @@ static int ext3_dx_add_entry(handle_t *h */ struct dx_root_info * dx_get_dx_info(struct ext3_dir_entry_2 *de) { + BUG_ON(de->name_len != 1); + /* get dotdot first */ de = (struct ext3_dir_entry_2 *)((char *)de + le16_to_cpu(de->rec_len)); /* dx root info is after dotdot entry */ - de = (struct ext3_dir_entry_2 *)((char *)de + EXT3_DIR_REC_LEN(2)); + de = (struct ext3_dir_entry_2 *)((char *)de + EXT3_DIR_REC_LEN(de)); return (struct dx_root_info *) de; } @@ -253,16 +256,24 @@ static inline void dx_set_limit (struct ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); } -static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) +static inline unsigned dx_root_limit (__u32 blocksize, + struct ext3_dir_entry_2 * dot_de, unsigned infosize) { - unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - - EXT3_DIR_REC_LEN(2) - infosize; - return 0? 20: entry_space / sizeof(struct dx_entry); + struct ext3_dir_entry_2 * dotdot_de; + unsigned entry_space; + + BUG_ON(dot_de->name_len != 1); + dotdot_de = (struct ext3_dir_entry_2 *)((char *)dot_de + + le16_to_cpu(dot_de->rec_len)); + entry_space = blocksize - EXT3_DIR_REC_LEN(dot_de) - + EXT3_DIR_REC_LEN(dotdot_de) - infosize; + + return entry_space / sizeof(struct dx_entry); } static inline unsigned dx_node_limit (struct inode *dir) { - unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); + unsigned entry_space = dir->i_sb->s_blocksize - __EXT3_DIR_REC_LEN(0); return 0? 22: entry_space / sizeof(struct dx_entry); } @@ -309,7 +320,7 @@ static struct stats dx_show_leaf(struct printk(":%x.%u ", h.hash, ((char *) de - base)); } - space += EXT3_DIR_REC_LEN(de->name_len); + space += EXT3_DIR_REC_LEN(de); names++; } de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); @@ -411,7 +422,9 @@ dx_probe(struct dentry *dentry, struct i entries = (struct dx_entry *) (((char *)info) + info->info_length); - if (dx_get_limit(entries) != dx_root_limit(dir, info->info_length)) { + if (dx_get_limit(entries) != dx_root_limit(dir->i_sb->s_blocksize, + (struct ext3_dir_entry_2*)bh->b_data, + info->info_length)) { ext3_warning(dir->i_sb, __FUNCTION__, "dx entry: limit != root limit"); brelse(bh); @@ -468,14 +481,17 @@ dx_probe(struct dentry *dentry, struct i if (!indirect--) return frame; if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; - at = entries = ((struct dx_node *) bh->b_data)->entries; + entries = ((struct dx_node *) bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit (dir)) { ext3_warning(dir->i_sb, __FUNCTION__, - "dx entry: limit != node limit"); + "block %u(%lu): limit %u != node limit %u", + dx_get_block(at), (long)bh->b_blocknr, + dx_get_limit(entries), dx_node_limit(dir)); brelse(bh); *err = ERR_BAD_DX_DIR; goto fail2; } + at = entries; frame++; frame->bh = NULL; } @@ -608,7 +624,7 @@ static int htree_dirblock_to_tree(struct de = (struct ext3_dir_entry_2 *) bh->b_data; top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - - EXT3_DIR_REC_LEN(0)); + __EXT3_DIR_REC_LEN(0)); for (; de < top; de = ext3_next_entry(de)) { if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, (block<i_sb)) @@ -1001,7 +1015,7 @@ static struct buffer_head * ext3_dx_find goto errout; de = (struct ext3_dir_entry_2 *) bh->b_data; top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - - EXT3_DIR_REC_LEN(0)); + __EXT3_DIR_REC_LEN(0)); for (; de < top; de = ext3_next_entry(de)) if (ext3_match (namelen, name, de)) { if (!ext3_check_dir_entry("ext3_find_entry", @@ -1183,7 +1197,7 @@ dx_move_dirents(char *from, char *to, st while (count--) { struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); - rec_len = EXT3_DIR_REC_LEN(de->name_len); + rec_len = EXT3_DIR_REC_LEN(de); memcpy (to, de, rec_len); ((struct ext3_dir_entry_2 *) to)->rec_len = cpu_to_le16(rec_len); @@ -1208,7 +1222,7 @@ static struct ext3_dir_entry_2* dx_pack_ next = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); if (de->inode && de->name_len) { - rec_len = EXT3_DIR_REC_LEN(de->name_len); + rec_len = EXT3_DIR_REC_LEN(de); if (de > to) memmove(to, de, rec_len); to->rec_len = cpu_to_le16(rec_len); @@ -1334,12 +1348,18 @@ static int add_dirent_to_buf(handle_t *h struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; + unsigned char *data; unsigned long offset = 0; unsigned short reclen; - int nlen, rlen, err; + int nlen, rlen, err, dlen = 0; char *top; - reclen = EXT3_DIR_REC_LEN(namelen); + data = ext3_dentry_get_data(inode->i_sb, (struct ext3_dentry_param *) + dentry->d_fsdata); + + if (data) + dlen = *data + 1; + reclen = __EXT3_DIR_REC_LEN(namelen + dlen); if (!de) { de = (struct ext3_dir_entry_2 *)bh->b_data; top = bh->b_data + dir->i_sb->s_blocksize - reclen; @@ -1353,7 +1373,7 @@ static int add_dirent_to_buf(handle_t *h brelse (bh); return -EEXIST; } - nlen = EXT3_DIR_REC_LEN(de->name_len); + nlen = EXT3_DIR_REC_LEN(de); rlen = le16_to_cpu(de->rec_len); if ((de->inode? rlen - nlen: rlen) >= reclen) break; @@ -1372,7 +1392,7 @@ static int add_dirent_to_buf(handle_t *h } /* By now the buffer is marked for journaling */ - nlen = EXT3_DIR_REC_LEN(de->name_len); + nlen = EXT3_DIR_REC_LEN(de); rlen = le16_to_cpu(de->rec_len); if (de->inode) { struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); @@ -1388,6 +1408,11 @@ static int add_dirent_to_buf(handle_t *h de->inode = 0; de->name_len = namelen; memcpy (de->name, name, namelen); + if (data) { + de->name[namelen] = 0; + memcpy(&de->name[namelen + 1], data, *(char*) data); + de->file_type |= EXT3_DIRENT_LUFID; + } /* * XXX shouldn't update any times until successful * completion of syscall, but too many callers depend @@ -1474,7 +1499,8 @@ static int make_indexed_dir(handle_t *ha entries = (void *)dx_info + sizeof(*dx_info); dx_set_block (entries, 1); dx_set_count (entries, 1); - dx_set_limit (entries, dx_root_limit(dir, sizeof(*dx_info))); + dx_set_limit (entries, dx_root_limit(dir->i_sb->s_blocksize, + dot_de, sizeof(*dx_info))); /* Initialize as for dx_probe */ hinfo.hash_version = dx_info->hash_version; @@ -1502,6 +1528,8 @@ static int ext3_update_dotdot(handle_t * struct buffer_head * dir_block; struct ext3_dir_entry_2 * de; int len, journal = 0, err = 0; + int dlen = 0; + char *data; if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1517,19 +1545,23 @@ static int ext3_update_dotdot(handle_t * /* the first item must be "." */ assert(de->name_len == 1 && de->name[0] == '.'); len = le16_to_cpu(de->rec_len); - assert(len >= EXT3_DIR_REC_LEN(1)); - if (len > EXT3_DIR_REC_LEN(1)) { + assert(len >= __EXT3_DIR_REC_LEN(1)); + if (len > __EXT3_DIR_REC_LEN(1)) { BUFFER_TRACE(dir_block, "get_write_access"); err = ext3_journal_get_write_access(handle, dir_block); if (err) goto out_journal; journal = 1; - de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(1)); + de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de)); } - len -= EXT3_DIR_REC_LEN(1); - assert(len == 0 || len >= EXT3_DIR_REC_LEN(2)); + len -= EXT3_DIR_REC_LEN(de); + data = ext3_dentry_get_data(dir->i_sb, + (struct ext3_dentry_param *) dentry->d_fsdata); + if (data) + dlen = *data + 1; + assert(len == 0 || len >= __EXT3_DIR_REC_LEN(2 + dlen)); de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); if (!journal) { @@ -1543,11 +1575,15 @@ static int ext3_update_dotdot(handle_t * if (len > 0) de->rec_len = cpu_to_le16(len); else - assert(le16_to_cpu(de->rec_len) >= EXT3_DIR_REC_LEN(2)); + assert(le16_to_cpu(de->rec_len) >= __EXT3_DIR_REC_LEN(2)); de->name_len = 2; strcpy (de->name, ".."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); - + if (data) { + de->name[2] = 0; + memcpy(&de->name[2 + 1], data, dlen); + de->file_type |= EXT3_DIRENT_LUFID; + } out_journal: if (journal) { BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); @@ -1958,12 +1994,12 @@ retry: /* Initialize @inode as a subdirectory of @dir, and add the * "." and ".." entries into the first directory block. */ -int ext3_add_dot_dotdot(handle_t *handle, struct inode * dir, - struct inode *inode) +int ext3_add_dot_dotdot(handle_t *handle, struct inode * dir, struct inode *inode, + const void *data1, const void *data2) { struct buffer_head * dir_block; struct ext3_dir_entry_2 * de; - int err = 0; + int err = 0, dot_reclen; if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1983,16 +2019,36 @@ int ext3_add_dot_dotdot(handle_t *handle de = (struct ext3_dir_entry_2 *) dir_block->b_data; de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; - de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len)); - strcpy (de->name, "."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); + strcpy (de->name, "."); + + /* get packed fid data*/ + data1 = ext3_dentry_get_data(dir->i_sb, + (struct ext3_dentry_param *) data1); + if (data1) { + de->name[1] = 0; + memcpy(&de->name[2], data1, *(char*) data1); + de->file_type |= EXT3_DIRENT_LUFID; + } + de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de)); + dot_reclen = cpu_to_le16(de->rec_len); + de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1)); + de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-dot_reclen); de->name_len = 2; strcpy (de->name, ".."); ext3_set_de_type(dir->i_sb, de, S_IFDIR); + + /* get packed fid data*/ + data2 = ext3_dentry_get_data(dir->i_sb, + (struct ext3_dentry_param *) data2); + if (data2) { + de->name[2] = 0; + memcpy(&de->name[3], data2, *(char*) data2); + de->file_type |= EXT3_DIRENT_LUFID; + } inode->i_nlink = 2; BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); ext3_journal_dirty_metadata(handle, dir_block); @@ -2029,7 +2085,7 @@ retry: if (IS_ERR(inode)) goto out_stop; - err = ext3_add_dot_dotdot(handle, dir, inode); + err = ext3_add_dot_dotdot(handle, dir, inode, NULL, NULL); if (err) { inode->i_nlink = 0; ext3_mark_inode_dirty(handle, inode); @@ -2067,7 +2123,7 @@ static int empty_dir (struct inode * ino int err = 0; sb = inode->i_sb; - if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) || + if (inode->i_size < __EXT3_DIR_REC_LEN(1) + __EXT3_DIR_REC_LEN(2) || !(bh = ext3_bread (NULL, inode, 0, 0, &err))) { if (err) ext3_error(inode->i_sb, __FUNCTION__, Index: b/include/linux/ext3_fs.h =================================================================== --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -717,13 +717,16 @@ static inline int ext3_valid_inum(struct #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 #define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ #define EXT3_FEATURE_INCOMPAT_MMP 0x0100 +#define EXT3_FEATURE_INCOMPAT_DIRDATA 0x1000 #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ EXT3_FEATURE_INCOMPAT_RECOVER| \ EXT3_FEATURE_INCOMPAT_META_BG| \ EXT3_FEATURE_INCOMPAT_EXTENTS| \ - EXT3_FEATURE_INCOMPAT_MMP) + EXT3_FEATURE_INCOMPAT_MMP| \ + EXT3_FEATURE_INCOMPAT_DIRDATA) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ @@ -790,7 +793,44 @@ struct ext3_dir_entry_2 { #define EXT3_FT_SYMLINK 7 #define EXT3_FT_MAX 8 +#define EXT3_FT_MASK 0xf + +#if EXT3_FT_MAX > EXT3_FT_MASK +#error "conflicting EXT3_FT_MAX and EXT3_FT_MASK" +#endif + +#define EXT3_LUFID_MAGIC 0xAD200907UL +struct ext3_dentry_param { + __u32 edp_magic; /* EXT3_LUFID_MAGIC */ + char edp_len; /* size of edp_data in bytes */ + char edp_data[0]; /* packed array of data */ +} __attribute__((packed)); + +static inline unsigned char *ext3_dentry_get_data(struct super_block *sb, + struct ext3_dentry_param* p) + +{ + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_DIRDATA)) + return NULL; + if (p && p->edp_magic == EXT3_LUFID_MAGIC) + return &p->edp_len; + else + return NULL; +} + +/* + * d_type has 4 unused bits, so it can hold four types data. these different + * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be + * stored, in flag order, after file-name in ext3 dirent. +*/ +/* + * this flag is added to d_type if ext3 dirent has extra data after + * filename. this data length is variable and length is stored in first byte + * of data. data start after filename NUL byte. + * This is used by Lustre FS. + */ +#define EXT3_DIRENT_LUFID 0x10 /* * EXT3_DIR_PAD defines the directory entries boundaries * @@ -798,8 +838,12 @@ struct ext3_dir_entry_2 { */ #define EXT3_DIR_PAD 4 #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) -#define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ +#define __EXT3_DIR_REC_LEN(len) (((len) + 8 + EXT3_DIR_ROUND) & \ ~EXT3_DIR_ROUND) + +#define EXT3_DIR_REC_LEN(de) (__EXT3_DIR_REC_LEN(de->name_len +\ + ext3_get_dirent_data_len(de))) + /* * Hash Tree Directory indexing * (c) Daniel Phillips, 2001 @@ -1125,7 +1169,8 @@ extern struct buffer_head * ext3_find_en struct ext3_dir_entry_2 ** res_dir); extern int ext3_add_dot_dotdot(handle_t *handle, struct inode *dir, - struct inode *inode); + struct inode *inode, const void *data1, + const void *data2); extern struct inode_operations ext3_dir_inode_operations; extern struct inode_operations ext3_special_inode_operations; extern struct buffer_head *ext3_append(handle_t *handle, struct inode *inode, @@ -1159,6 +1204,29 @@ ext3_get_blocks_wrap(handle_t *handle, s return ret; } +/* + * Compute the total directory entry data length. + * This includes the filename and an implicit NUL terminator (always present), + * and optional extensions. Each extension has a bit set in the high 4 bits of + * de->file_type, and the extension length is the first byte in each entry. + */ + +static inline int ext3_get_dirent_data_len(struct ext3_dir_entry_2 *de) +{ + char *len = de->name + de->name_len + 1 /* NUL terminator */; + int dlen = 0; + __u8 extra_data_flags = (de->file_type & ~EXT3_FT_MASK) >> 4; + + while (extra_data_flags) { + if (extra_data_flags & 1) { + dlen += *len + (dlen == 0); + len += *len; + } + extra_data_flags >>= 1; + } + return dlen; +} + #endif /* __KERNEL__ */ /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ Index: b/fs/ext3/dir.c =================================================================== --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -53,11 +53,17 @@ const struct file_operations ext3_dir_op static unsigned char get_dtype(struct super_block *sb, int filetype) { + int fl_index = filetype & EXT3_FT_MASK; + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || - (filetype >= EXT3_FT_MAX)) + (fl_index >= EXT3_FT_MAX)) return DT_UNKNOWN; - return (ext3_filetype_table[filetype]); + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_DIRDATA)) + return (ext3_filetype_table[fl_index]); + + return (ext3_filetype_table[fl_index]) | + (filetype & EXT3_DIRENT_LUFID); } @@ -69,11 +75,11 @@ int ext3_check_dir_entry (const char * f const char * error_msg = NULL; const int rlen = le16_to_cpu(de->rec_len); - if (rlen < EXT3_DIR_REC_LEN(1)) + if (rlen < __EXT3_DIR_REC_LEN(1)) error_msg = "rec_len is smaller than minimal"; else if (rlen % 4 != 0) error_msg = "rec_len % 4 != 0"; - else if (rlen < EXT3_DIR_REC_LEN(de->name_len)) + else if (rlen < EXT3_DIR_REC_LEN(de)) error_msg = "rec_len is too small for name_len"; else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) error_msg = "directory entry across blocks"; @@ -177,7 +183,7 @@ revalidate: * failure will be detected in the * dirent test below. */ if (le16_to_cpu(de->rec_len) < - EXT3_DIR_REC_LEN(1)) + __EXT3_DIR_REC_LEN(1)) break; i += le16_to_cpu(de->rec_len); } @@ -210,7 +216,6 @@ revalidate: * during the copy operation. */ unsigned long version = filp->f_version; - error = filldir(dirent, de->name, de->name_len, filp->f_pos, @@ -342,13 +347,17 @@ int ext3_htree_store_dirent(struct file struct rb_node **p, *parent = NULL; struct fname * fname, *new_fn; struct dir_private_info *info; + int extra_data = 1; int len; info = (struct dir_private_info *) dir_file->private_data; p = &info->root.rb_node; /* Create and allocate the fname structure */ - len = sizeof(struct fname) + dirent->name_len + 1; + if (dirent->file_type & EXT3_DIRENT_LUFID) + extra_data = ext3_get_dirent_data_len(dirent); + + len = sizeof(struct fname) + dirent->name_len + extra_data; new_fn = kmalloc(len, GFP_KERNEL); if (!new_fn) return -ENOMEM; @@ -358,7 +367,7 @@ int ext3_htree_store_dirent(struct file new_fn->inode = le32_to_cpu(dirent->inode); new_fn->name_len = dirent->name_len; new_fn->file_type = dirent->file_type; - memcpy(new_fn->name, dirent->name, dirent->name_len); + memcpy(new_fn->name, dirent->name, dirent->name_len + extra_data); new_fn->name[dirent->name_len] = 0; while (*p) {